xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision cedd07cade5cbdfdad435c8172b7ec8972d9cd8d)
1 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/
2 #include <petsc/private/vecimpl.h>
3 #include <petsc/private/sfimpl.h>
4 #include <petsc/private/isimpl.h>
5 #include <petscblaslapack.h>
6 #include <petscsf.h>
7 #include <petsc/private/hashmapi.h>
8 
9 PetscErrorCode MatGetRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done)
10 {
11   Mat B;
12 
13   PetscFunctionBegin;
14   PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &B));
15   PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject)B));
16   PetscCall(MatGetRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done));
17   PetscCall(MatDestroy(&B));
18   PetscFunctionReturn(0);
19 }
20 
21 PetscErrorCode MatRestoreRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done)
22 {
23   Mat B;
24 
25   PetscFunctionBegin;
26   PetscCall(PetscObjectQuery((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject *)&B));
27   PetscCall(MatRestoreRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done));
28   PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", NULL));
29   PetscFunctionReturn(0);
30 }
31 
32 /*MC
33    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
34 
35    This matrix type is identical to` MATSEQAIJ` when constructed with a single process communicator,
36    and `MATMPIAIJ` otherwise.  As a result, for single process communicators,
37   `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported
38   for communicators controlling multiple processes.  It is recommended that you call both of
39   the above preallocation routines for simplicity.
40 
41    Options Database Keys:
42 . -mat_type aij - sets the matrix type to `MATAIJ` during a call to `MatSetFromOptions()`
43 
44   Developer Note:
45     Subclasses include `MATAIJCUSPARSE`, `MATAIJPERM`, `MATAIJSELL`, `MATAIJMKL`, `MATAIJCRL`, `MATAIJKOKKOS`,and also automatically switches over to use inodes when
46    enough exist.
47 
48   Level: beginner
49 
50 .seealso: `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateAIJ()`, `MatCreateSeqAIJ()`, `MATSEQAIJ`, `MATMPIAIJ`
51 M*/
52 
53 /*MC
54    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
55 
56    This matrix type is identical to `MATSEQAIJCRL` when constructed with a single process communicator,
57    and `MATMPIAIJCRL` otherwise.  As a result, for single process communicators,
58    `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported
59   for communicators controlling multiple processes.  It is recommended that you call both of
60   the above preallocation routines for simplicity.
61 
62    Options Database Keys:
63 . -mat_type aijcrl - sets the matrix type to `MATMPIAIJCRL` during a call to `MatSetFromOptions()`
64 
65   Level: beginner
66 
67 .seealso: `MatCreateMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`
68 M*/
69 
70 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A, PetscBool flg)
71 {
72   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
73 
74   PetscFunctionBegin;
75 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) || defined(PETSC_HAVE_VIENNACL)
76   A->boundtocpu = flg;
77 #endif
78   if (a->A) PetscCall(MatBindToCPU(a->A, flg));
79   if (a->B) PetscCall(MatBindToCPU(a->B, flg));
80 
81   /* In addition to binding the diagonal and off-diagonal matrices, bind the local vectors used for matrix-vector products.
82    * This maybe seems a little odd for a MatBindToCPU() call to do, but it makes no sense for the binding of these vectors
83    * to differ from the parent matrix. */
84   if (a->lvec) PetscCall(VecBindToCPU(a->lvec, flg));
85   if (a->diag) PetscCall(VecBindToCPU(a->diag, flg));
86 
87   PetscFunctionReturn(0);
88 }
89 
90 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs)
91 {
92   Mat_MPIAIJ *mat = (Mat_MPIAIJ *)M->data;
93 
94   PetscFunctionBegin;
95   if (mat->A) {
96     PetscCall(MatSetBlockSizes(mat->A, rbs, cbs));
97     PetscCall(MatSetBlockSizes(mat->B, rbs, 1));
98   }
99   PetscFunctionReturn(0);
100 }
101 
102 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M, IS *keptrows)
103 {
104   Mat_MPIAIJ      *mat = (Mat_MPIAIJ *)M->data;
105   Mat_SeqAIJ      *a   = (Mat_SeqAIJ *)mat->A->data;
106   Mat_SeqAIJ      *b   = (Mat_SeqAIJ *)mat->B->data;
107   const PetscInt  *ia, *ib;
108   const MatScalar *aa, *bb, *aav, *bav;
109   PetscInt         na, nb, i, j, *rows, cnt = 0, n0rows;
110   PetscInt         m = M->rmap->n, rstart = M->rmap->rstart;
111 
112   PetscFunctionBegin;
113   *keptrows = NULL;
114 
115   ia = a->i;
116   ib = b->i;
117   PetscCall(MatSeqAIJGetArrayRead(mat->A, &aav));
118   PetscCall(MatSeqAIJGetArrayRead(mat->B, &bav));
119   for (i = 0; i < m; i++) {
120     na = ia[i + 1] - ia[i];
121     nb = ib[i + 1] - ib[i];
122     if (!na && !nb) {
123       cnt++;
124       goto ok1;
125     }
126     aa = aav + ia[i];
127     for (j = 0; j < na; j++) {
128       if (aa[j] != 0.0) goto ok1;
129     }
130     bb = bav + ib[i];
131     for (j = 0; j < nb; j++) {
132       if (bb[j] != 0.0) goto ok1;
133     }
134     cnt++;
135   ok1:;
136   }
137   PetscCall(MPIU_Allreduce(&cnt, &n0rows, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)M)));
138   if (!n0rows) {
139     PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav));
140     PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav));
141     PetscFunctionReturn(0);
142   }
143   PetscCall(PetscMalloc1(M->rmap->n - cnt, &rows));
144   cnt = 0;
145   for (i = 0; i < m; i++) {
146     na = ia[i + 1] - ia[i];
147     nb = ib[i + 1] - ib[i];
148     if (!na && !nb) continue;
149     aa = aav + ia[i];
150     for (j = 0; j < na; j++) {
151       if (aa[j] != 0.0) {
152         rows[cnt++] = rstart + i;
153         goto ok2;
154       }
155     }
156     bb = bav + ib[i];
157     for (j = 0; j < nb; j++) {
158       if (bb[j] != 0.0) {
159         rows[cnt++] = rstart + i;
160         goto ok2;
161       }
162     }
163   ok2:;
164   }
165   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), cnt, rows, PETSC_OWN_POINTER, keptrows));
166   PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav));
167   PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav));
168   PetscFunctionReturn(0);
169 }
170 
171 PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y, Vec D, InsertMode is)
172 {
173   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)Y->data;
174   PetscBool   cong;
175 
176   PetscFunctionBegin;
177   PetscCall(MatHasCongruentLayouts(Y, &cong));
178   if (Y->assembled && cong) {
179     PetscCall(MatDiagonalSet(aij->A, D, is));
180   } else {
181     PetscCall(MatDiagonalSet_Default(Y, D, is));
182   }
183   PetscFunctionReturn(0);
184 }
185 
186 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M, IS *zrows)
187 {
188   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)M->data;
189   PetscInt    i, rstart, nrows, *rows;
190 
191   PetscFunctionBegin;
192   *zrows = NULL;
193   PetscCall(MatFindZeroDiagonals_SeqAIJ_Private(aij->A, &nrows, &rows));
194   PetscCall(MatGetOwnershipRange(M, &rstart, NULL));
195   for (i = 0; i < nrows; i++) rows[i] += rstart;
196   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), nrows, rows, PETSC_OWN_POINTER, zrows));
197   PetscFunctionReturn(0);
198 }
199 
200 PetscErrorCode MatGetColumnReductions_MPIAIJ(Mat A, PetscInt type, PetscReal *reductions)
201 {
202   Mat_MPIAIJ        *aij = (Mat_MPIAIJ *)A->data;
203   PetscInt           i, m, n, *garray = aij->garray;
204   Mat_SeqAIJ        *a_aij = (Mat_SeqAIJ *)aij->A->data;
205   Mat_SeqAIJ        *b_aij = (Mat_SeqAIJ *)aij->B->data;
206   PetscReal         *work;
207   const PetscScalar *dummy;
208 
209   PetscFunctionBegin;
210   PetscCall(MatGetSize(A, &m, &n));
211   PetscCall(PetscCalloc1(n, &work));
212   PetscCall(MatSeqAIJGetArrayRead(aij->A, &dummy));
213   PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &dummy));
214   PetscCall(MatSeqAIJGetArrayRead(aij->B, &dummy));
215   PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &dummy));
216   if (type == NORM_2) {
217     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i] * a_aij->a[i]);
218     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i] * b_aij->a[i]);
219   } else if (type == NORM_1) {
220     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
221     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
222   } else if (type == NORM_INFINITY) {
223     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
224     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]), work[garray[b_aij->j[i]]]);
225   } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) {
226     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscRealPart(a_aij->a[i]);
227     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscRealPart(b_aij->a[i]);
228   } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) {
229     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscImaginaryPart(a_aij->a[i]);
230     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscImaginaryPart(b_aij->a[i]);
231   } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type");
232   if (type == NORM_INFINITY) {
233     PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A)));
234   } else {
235     PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A)));
236   }
237   PetscCall(PetscFree(work));
238   if (type == NORM_2) {
239     for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
240   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
241     for (i = 0; i < n; i++) reductions[i] /= m;
242   }
243   PetscFunctionReturn(0);
244 }
245 
246 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A, IS *is)
247 {
248   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
249   IS              sis, gis;
250   const PetscInt *isis, *igis;
251   PetscInt        n, *iis, nsis, ngis, rstart, i;
252 
253   PetscFunctionBegin;
254   PetscCall(MatFindOffBlockDiagonalEntries(a->A, &sis));
255   PetscCall(MatFindNonzeroRows(a->B, &gis));
256   PetscCall(ISGetSize(gis, &ngis));
257   PetscCall(ISGetSize(sis, &nsis));
258   PetscCall(ISGetIndices(sis, &isis));
259   PetscCall(ISGetIndices(gis, &igis));
260 
261   PetscCall(PetscMalloc1(ngis + nsis, &iis));
262   PetscCall(PetscArraycpy(iis, igis, ngis));
263   PetscCall(PetscArraycpy(iis + ngis, isis, nsis));
264   n = ngis + nsis;
265   PetscCall(PetscSortRemoveDupsInt(&n, iis));
266   PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
267   for (i = 0; i < n; i++) iis[i] += rstart;
268   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)A), n, iis, PETSC_OWN_POINTER, is));
269 
270   PetscCall(ISRestoreIndices(sis, &isis));
271   PetscCall(ISRestoreIndices(gis, &igis));
272   PetscCall(ISDestroy(&sis));
273   PetscCall(ISDestroy(&gis));
274   PetscFunctionReturn(0);
275 }
276 
277 /*
278   Local utility routine that creates a mapping from the global column
279 number to the local number in the off-diagonal part of the local
280 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
281 a slightly higher hash table cost; without it it is not scalable (each processor
282 has an order N integer array but is fast to access.
283 */
284 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
285 {
286   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
287   PetscInt    n   = aij->B->cmap->n, i;
288 
289   PetscFunctionBegin;
290   PetscCheck(!n || aij->garray, PETSC_COMM_SELF, PETSC_ERR_PLIB, "MPIAIJ Matrix was assembled but is missing garray");
291 #if defined(PETSC_USE_CTABLE)
292   PetscCall(PetscHMapICreateWithSize(n, &aij->colmap));
293   for (i = 0; i < n; i++) PetscCall(PetscHMapISet(aij->colmap, aij->garray[i] + 1, i + 1));
294 #else
295   PetscCall(PetscCalloc1(mat->cmap->N + 1, &aij->colmap));
296   for (i = 0; i < n; i++) aij->colmap[aij->garray[i]] = i + 1;
297 #endif
298   PetscFunctionReturn(0);
299 }
300 
301 #define MatSetValues_SeqAIJ_A_Private(row, col, value, addv, orow, ocol) \
302   { \
303     if (col <= lastcol1) low1 = 0; \
304     else high1 = nrow1; \
305     lastcol1 = col; \
306     while (high1 - low1 > 5) { \
307       t = (low1 + high1) / 2; \
308       if (rp1[t] > col) high1 = t; \
309       else low1 = t; \
310     } \
311     for (_i = low1; _i < high1; _i++) { \
312       if (rp1[_i] > col) break; \
313       if (rp1[_i] == col) { \
314         if (addv == ADD_VALUES) { \
315           ap1[_i] += value; \
316           /* Not sure LogFlops will slow dow the code or not */ \
317           (void)PetscLogFlops(1.0); \
318         } else ap1[_i] = value; \
319         goto a_noinsert; \
320       } \
321     } \
322     if (value == 0.0 && ignorezeroentries && row != col) { \
323       low1  = 0; \
324       high1 = nrow1; \
325       goto a_noinsert; \
326     } \
327     if (nonew == 1) { \
328       low1  = 0; \
329       high1 = nrow1; \
330       goto a_noinsert; \
331     } \
332     PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
333     MatSeqXAIJReallocateAIJ(A, am, 1, nrow1, row, col, rmax1, aa, ai, aj, rp1, ap1, aimax, nonew, MatScalar); \
334     N = nrow1++ - 1; \
335     a->nz++; \
336     high1++; \
337     /* shift up all the later entries in this row */ \
338     PetscCall(PetscArraymove(rp1 + _i + 1, rp1 + _i, N - _i + 1)); \
339     PetscCall(PetscArraymove(ap1 + _i + 1, ap1 + _i, N - _i + 1)); \
340     rp1[_i] = col; \
341     ap1[_i] = value; \
342     A->nonzerostate++; \
343   a_noinsert:; \
344     ailen[row] = nrow1; \
345   }
346 
347 #define MatSetValues_SeqAIJ_B_Private(row, col, value, addv, orow, ocol) \
348   { \
349     if (col <= lastcol2) low2 = 0; \
350     else high2 = nrow2; \
351     lastcol2 = col; \
352     while (high2 - low2 > 5) { \
353       t = (low2 + high2) / 2; \
354       if (rp2[t] > col) high2 = t; \
355       else low2 = t; \
356     } \
357     for (_i = low2; _i < high2; _i++) { \
358       if (rp2[_i] > col) break; \
359       if (rp2[_i] == col) { \
360         if (addv == ADD_VALUES) { \
361           ap2[_i] += value; \
362           (void)PetscLogFlops(1.0); \
363         } else ap2[_i] = value; \
364         goto b_noinsert; \
365       } \
366     } \
367     if (value == 0.0 && ignorezeroentries) { \
368       low2  = 0; \
369       high2 = nrow2; \
370       goto b_noinsert; \
371     } \
372     if (nonew == 1) { \
373       low2  = 0; \
374       high2 = nrow2; \
375       goto b_noinsert; \
376     } \
377     PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
378     MatSeqXAIJReallocateAIJ(B, bm, 1, nrow2, row, col, rmax2, ba, bi, bj, rp2, ap2, bimax, nonew, MatScalar); \
379     N = nrow2++ - 1; \
380     b->nz++; \
381     high2++; \
382     /* shift up all the later entries in this row */ \
383     PetscCall(PetscArraymove(rp2 + _i + 1, rp2 + _i, N - _i + 1)); \
384     PetscCall(PetscArraymove(ap2 + _i + 1, ap2 + _i, N - _i + 1)); \
385     rp2[_i] = col; \
386     ap2[_i] = value; \
387     B->nonzerostate++; \
388   b_noinsert:; \
389     bilen[row] = nrow2; \
390   }
391 
392 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A, PetscInt row, const PetscScalar v[])
393 {
394   Mat_MPIAIJ  *mat = (Mat_MPIAIJ *)A->data;
395   Mat_SeqAIJ  *a = (Mat_SeqAIJ *)mat->A->data, *b = (Mat_SeqAIJ *)mat->B->data;
396   PetscInt     l, *garray                         = mat->garray, diag;
397   PetscScalar *aa, *ba;
398 
399   PetscFunctionBegin;
400   /* code only works for square matrices A */
401 
402   /* find size of row to the left of the diagonal part */
403   PetscCall(MatGetOwnershipRange(A, &diag, NULL));
404   row = row - diag;
405   for (l = 0; l < b->i[row + 1] - b->i[row]; l++) {
406     if (garray[b->j[b->i[row] + l]] > diag) break;
407   }
408   if (l) {
409     PetscCall(MatSeqAIJGetArray(mat->B, &ba));
410     PetscCall(PetscArraycpy(ba + b->i[row], v, l));
411     PetscCall(MatSeqAIJRestoreArray(mat->B, &ba));
412   }
413 
414   /* diagonal part */
415   if (a->i[row + 1] - a->i[row]) {
416     PetscCall(MatSeqAIJGetArray(mat->A, &aa));
417     PetscCall(PetscArraycpy(aa + a->i[row], v + l, (a->i[row + 1] - a->i[row])));
418     PetscCall(MatSeqAIJRestoreArray(mat->A, &aa));
419   }
420 
421   /* right of diagonal part */
422   if (b->i[row + 1] - b->i[row] - l) {
423     PetscCall(MatSeqAIJGetArray(mat->B, &ba));
424     PetscCall(PetscArraycpy(ba + b->i[row] + l, v + l + a->i[row + 1] - a->i[row], b->i[row + 1] - b->i[row] - l));
425     PetscCall(MatSeqAIJRestoreArray(mat->B, &ba));
426   }
427   PetscFunctionReturn(0);
428 }
429 
430 PetscErrorCode MatSetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
431 {
432   Mat_MPIAIJ *aij   = (Mat_MPIAIJ *)mat->data;
433   PetscScalar value = 0.0;
434   PetscInt    i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend;
435   PetscInt    cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col;
436   PetscBool   roworiented = aij->roworiented;
437 
438   /* Some Variables required in the macro */
439   Mat         A     = aij->A;
440   Mat_SeqAIJ *a     = (Mat_SeqAIJ *)A->data;
441   PetscInt   *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j;
442   PetscBool   ignorezeroentries = a->ignorezeroentries;
443   Mat         B                 = aij->B;
444   Mat_SeqAIJ *b                 = (Mat_SeqAIJ *)B->data;
445   PetscInt   *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n;
446   MatScalar  *aa, *ba;
447   PetscInt   *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2;
448   PetscInt    nonew;
449   MatScalar  *ap1, *ap2;
450 
451   PetscFunctionBegin;
452   PetscCall(MatSeqAIJGetArray(A, &aa));
453   PetscCall(MatSeqAIJGetArray(B, &ba));
454   for (i = 0; i < m; i++) {
455     if (im[i] < 0) continue;
456     PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
457     if (im[i] >= rstart && im[i] < rend) {
458       row      = im[i] - rstart;
459       lastcol1 = -1;
460       rp1      = aj + ai[row];
461       ap1      = aa + ai[row];
462       rmax1    = aimax[row];
463       nrow1    = ailen[row];
464       low1     = 0;
465       high1    = nrow1;
466       lastcol2 = -1;
467       rp2      = bj + bi[row];
468       ap2      = ba + bi[row];
469       rmax2    = bimax[row];
470       nrow2    = bilen[row];
471       low2     = 0;
472       high2    = nrow2;
473 
474       for (j = 0; j < n; j++) {
475         if (v) value = roworiented ? v[i * n + j] : v[i + j * m];
476         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
477         if (in[j] >= cstart && in[j] < cend) {
478           col   = in[j] - cstart;
479           nonew = a->nonew;
480           MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]);
481         } else if (in[j] < 0) {
482           continue;
483         } else {
484           PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1);
485           if (mat->was_assembled) {
486             if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
487 #if defined(PETSC_USE_CTABLE)
488             PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col)); /* map global col ids to local ones */
489             col--;
490 #else
491             col = aij->colmap[in[j]] - 1;
492 #endif
493             if (col < 0 && !((Mat_SeqAIJ *)(aij->B->data))->nonew) { /* col < 0 means in[j] is a new col for B */
494               PetscCall(MatDisAssemble_MPIAIJ(mat));                 /* Change aij->B from reduced/local format to expanded/global format */
495               col = in[j];
496               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
497               B     = aij->B;
498               b     = (Mat_SeqAIJ *)B->data;
499               bimax = b->imax;
500               bi    = b->i;
501               bilen = b->ilen;
502               bj    = b->j;
503               ba    = b->a;
504               rp2   = bj + bi[row];
505               ap2   = ba + bi[row];
506               rmax2 = bimax[row];
507               nrow2 = bilen[row];
508               low2  = 0;
509               high2 = nrow2;
510               bm    = aij->B->rmap->n;
511               ba    = b->a;
512             } else if (col < 0 && !(ignorezeroentries && value == 0.0)) {
513               if (1 == ((Mat_SeqAIJ *)(aij->B->data))->nonew) {
514                 PetscCall(PetscInfo(mat, "Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%" PetscInt_FMT ",%" PetscInt_FMT ")\n", (double)PetscRealPart(value), im[i], in[j]));
515               } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]);
516             }
517           } else col = in[j];
518           nonew = b->nonew;
519           MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]);
520         }
521       }
522     } else {
523       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]);
524       if (!aij->donotstash) {
525         mat->assembled = PETSC_FALSE;
526         if (roworiented) {
527           PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
528         } else {
529           PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
530         }
531       }
532     }
533   }
534   PetscCall(MatSeqAIJRestoreArray(A, &aa)); /* aa, bb might have been free'd due to reallocation above. But we don't access them here */
535   PetscCall(MatSeqAIJRestoreArray(B, &ba));
536   PetscFunctionReturn(0);
537 }
538 
539 /*
540     This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
541     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
542     No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE.
543 */
544 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[])
545 {
546   Mat_MPIAIJ *aij    = (Mat_MPIAIJ *)mat->data;
547   Mat         A      = aij->A; /* diagonal part of the matrix */
548   Mat         B      = aij->B; /* offdiagonal part of the matrix */
549   Mat_SeqAIJ *a      = (Mat_SeqAIJ *)A->data;
550   Mat_SeqAIJ *b      = (Mat_SeqAIJ *)B->data;
551   PetscInt    cstart = mat->cmap->rstart, cend = mat->cmap->rend, col;
552   PetscInt   *ailen = a->ilen, *aj = a->j;
553   PetscInt   *bilen = b->ilen, *bj = b->j;
554   PetscInt    am          = aij->A->rmap->n, j;
555   PetscInt    diag_so_far = 0, dnz;
556   PetscInt    offd_so_far = 0, onz;
557 
558   PetscFunctionBegin;
559   /* Iterate over all rows of the matrix */
560   for (j = 0; j < am; j++) {
561     dnz = onz = 0;
562     /*  Iterate over all non-zero columns of the current row */
563     for (col = mat_i[j]; col < mat_i[j + 1]; col++) {
564       /* If column is in the diagonal */
565       if (mat_j[col] >= cstart && mat_j[col] < cend) {
566         aj[diag_so_far++] = mat_j[col] - cstart;
567         dnz++;
568       } else { /* off-diagonal entries */
569         bj[offd_so_far++] = mat_j[col];
570         onz++;
571       }
572     }
573     ailen[j] = dnz;
574     bilen[j] = onz;
575   }
576   PetscFunctionReturn(0);
577 }
578 
579 /*
580     This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
581     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
582     No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ.
583     Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
584     would not be true and the more complex MatSetValues_MPIAIJ has to be used.
585 */
586 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[], const PetscScalar mat_a[])
587 {
588   Mat_MPIAIJ  *aij  = (Mat_MPIAIJ *)mat->data;
589   Mat          A    = aij->A; /* diagonal part of the matrix */
590   Mat          B    = aij->B; /* offdiagonal part of the matrix */
591   Mat_SeqAIJ  *aijd = (Mat_SeqAIJ *)(aij->A)->data, *aijo = (Mat_SeqAIJ *)(aij->B)->data;
592   Mat_SeqAIJ  *a      = (Mat_SeqAIJ *)A->data;
593   Mat_SeqAIJ  *b      = (Mat_SeqAIJ *)B->data;
594   PetscInt     cstart = mat->cmap->rstart, cend = mat->cmap->rend;
595   PetscInt    *ailen = a->ilen, *aj = a->j;
596   PetscInt    *bilen = b->ilen, *bj = b->j;
597   PetscInt     am          = aij->A->rmap->n, j;
598   PetscInt    *full_diag_i = aijd->i, *full_offd_i = aijo->i; /* These variables can also include non-local elements, which are set at a later point. */
599   PetscInt     col, dnz_row, onz_row, rowstart_diag, rowstart_offd;
600   PetscScalar *aa = a->a, *ba = b->a;
601 
602   PetscFunctionBegin;
603   /* Iterate over all rows of the matrix */
604   for (j = 0; j < am; j++) {
605     dnz_row = onz_row = 0;
606     rowstart_offd     = full_offd_i[j];
607     rowstart_diag     = full_diag_i[j];
608     /*  Iterate over all non-zero columns of the current row */
609     for (col = mat_i[j]; col < mat_i[j + 1]; col++) {
610       /* If column is in the diagonal */
611       if (mat_j[col] >= cstart && mat_j[col] < cend) {
612         aj[rowstart_diag + dnz_row] = mat_j[col] - cstart;
613         aa[rowstart_diag + dnz_row] = mat_a[col];
614         dnz_row++;
615       } else { /* off-diagonal entries */
616         bj[rowstart_offd + onz_row] = mat_j[col];
617         ba[rowstart_offd + onz_row] = mat_a[col];
618         onz_row++;
619       }
620     }
621     ailen[j] = dnz_row;
622     bilen[j] = onz_row;
623   }
624   PetscFunctionReturn(0);
625 }
626 
627 PetscErrorCode MatGetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[])
628 {
629   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
630   PetscInt    i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend;
631   PetscInt    cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col;
632 
633   PetscFunctionBegin;
634   for (i = 0; i < m; i++) {
635     if (idxm[i] < 0) continue; /* negative row */
636     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1);
637     if (idxm[i] >= rstart && idxm[i] < rend) {
638       row = idxm[i] - rstart;
639       for (j = 0; j < n; j++) {
640         if (idxn[j] < 0) continue; /* negative column */
641         PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1);
642         if (idxn[j] >= cstart && idxn[j] < cend) {
643           col = idxn[j] - cstart;
644           PetscCall(MatGetValues(aij->A, 1, &row, 1, &col, v + i * n + j));
645         } else {
646           if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
647 #if defined(PETSC_USE_CTABLE)
648           PetscCall(PetscHMapIGetWithDefault(aij->colmap, idxn[j] + 1, 0, &col));
649           col--;
650 #else
651           col = aij->colmap[idxn[j]] - 1;
652 #endif
653           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v + i * n + j) = 0.0;
654           else PetscCall(MatGetValues(aij->B, 1, &row, 1, &col, v + i * n + j));
655         }
656       }
657     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported");
658   }
659   PetscFunctionReturn(0);
660 }
661 
662 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat, MatAssemblyType mode)
663 {
664   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
665   PetscInt    nstash, reallocs;
666 
667   PetscFunctionBegin;
668   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
669 
670   PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range));
671   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
672   PetscCall(PetscInfo(aij->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
673   PetscFunctionReturn(0);
674 }
675 
676 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat, MatAssemblyType mode)
677 {
678   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *)mat->data;
679   PetscMPIInt  n;
680   PetscInt     i, j, rstart, ncols, flg;
681   PetscInt    *row, *col;
682   PetscBool    other_disassembled;
683   PetscScalar *val;
684 
685   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
686 
687   PetscFunctionBegin;
688   if (!aij->donotstash && !mat->nooffprocentries) {
689     while (1) {
690       PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg));
691       if (!flg) break;
692 
693       for (i = 0; i < n;) {
694         /* Now identify the consecutive vals belonging to the same row */
695         for (j = i, rstart = row[j]; j < n; j++) {
696           if (row[j] != rstart) break;
697         }
698         if (j < n) ncols = j - i;
699         else ncols = n - i;
700         /* Now assemble all these values with a single function call */
701         PetscCall(MatSetValues_MPIAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode));
702         i = j;
703       }
704     }
705     PetscCall(MatStashScatterEnd_Private(&mat->stash));
706   }
707 #if defined(PETSC_HAVE_DEVICE)
708   if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU;
709   /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */
710   if (mat->boundtocpu) {
711     PetscCall(MatBindToCPU(aij->A, PETSC_TRUE));
712     PetscCall(MatBindToCPU(aij->B, PETSC_TRUE));
713   }
714 #endif
715   PetscCall(MatAssemblyBegin(aij->A, mode));
716   PetscCall(MatAssemblyEnd(aij->A, mode));
717 
718   /* determine if any processor has disassembled, if so we must
719      also disassemble ourself, in order that we may reassemble. */
720   /*
721      if nonzero structure of submatrix B cannot change then we know that
722      no processor disassembled thus we can skip this stuff
723   */
724   if (!((Mat_SeqAIJ *)aij->B->data)->nonew) {
725     PetscCall(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
726     if (mat->was_assembled && !other_disassembled) { /* mat on this rank has reduced off-diag B with local col ids, but globally it does not */
727       PetscCall(MatDisAssemble_MPIAIJ(mat));
728     }
729   }
730   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPIAIJ(mat));
731   PetscCall(MatSetOption(aij->B, MAT_USE_INODES, PETSC_FALSE));
732 #if defined(PETSC_HAVE_DEVICE)
733   if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU;
734 #endif
735   PetscCall(MatAssemblyBegin(aij->B, mode));
736   PetscCall(MatAssemblyEnd(aij->B, mode));
737 
738   PetscCall(PetscFree2(aij->rowvalues, aij->rowindices));
739 
740   aij->rowvalues = NULL;
741 
742   PetscCall(VecDestroy(&aij->diag));
743 
744   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
745   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ *)(aij->A->data))->nonew) {
746     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
747     PetscCall(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat)));
748   }
749 #if defined(PETSC_HAVE_DEVICE)
750   mat->offloadmask = PETSC_OFFLOAD_BOTH;
751 #endif
752   PetscFunctionReturn(0);
753 }
754 
755 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
756 {
757   Mat_MPIAIJ *l = (Mat_MPIAIJ *)A->data;
758 
759   PetscFunctionBegin;
760   PetscCall(MatZeroEntries(l->A));
761   PetscCall(MatZeroEntries(l->B));
762   PetscFunctionReturn(0);
763 }
764 
765 PetscErrorCode MatZeroRows_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
766 {
767   Mat_MPIAIJ      *mat = (Mat_MPIAIJ *)A->data;
768   PetscObjectState sA, sB;
769   PetscInt        *lrows;
770   PetscInt         r, len;
771   PetscBool        cong, lch, gch;
772 
773   PetscFunctionBegin;
774   /* get locally owned rows */
775   PetscCall(MatZeroRowsMapLocal_Private(A, N, rows, &len, &lrows));
776   PetscCall(MatHasCongruentLayouts(A, &cong));
777   /* fix right hand side if needed */
778   if (x && b) {
779     const PetscScalar *xx;
780     PetscScalar       *bb;
781 
782     PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout");
783     PetscCall(VecGetArrayRead(x, &xx));
784     PetscCall(VecGetArray(b, &bb));
785     for (r = 0; r < len; ++r) bb[lrows[r]] = diag * xx[lrows[r]];
786     PetscCall(VecRestoreArrayRead(x, &xx));
787     PetscCall(VecRestoreArray(b, &bb));
788   }
789 
790   sA = mat->A->nonzerostate;
791   sB = mat->B->nonzerostate;
792 
793   if (diag != 0.0 && cong) {
794     PetscCall(MatZeroRows(mat->A, len, lrows, diag, NULL, NULL));
795     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
796   } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */
797     Mat_SeqAIJ *aijA = (Mat_SeqAIJ *)mat->A->data;
798     Mat_SeqAIJ *aijB = (Mat_SeqAIJ *)mat->B->data;
799     PetscInt    nnwA, nnwB;
800     PetscBool   nnzA, nnzB;
801 
802     nnwA = aijA->nonew;
803     nnwB = aijB->nonew;
804     nnzA = aijA->keepnonzeropattern;
805     nnzB = aijB->keepnonzeropattern;
806     if (!nnzA) {
807       PetscCall(PetscInfo(mat->A, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n"));
808       aijA->nonew = 0;
809     }
810     if (!nnzB) {
811       PetscCall(PetscInfo(mat->B, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n"));
812       aijB->nonew = 0;
813     }
814     /* Must zero here before the next loop */
815     PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL));
816     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
817     for (r = 0; r < len; ++r) {
818       const PetscInt row = lrows[r] + A->rmap->rstart;
819       if (row >= A->cmap->N) continue;
820       PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES));
821     }
822     aijA->nonew = nnwA;
823     aijB->nonew = nnwB;
824   } else {
825     PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL));
826     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
827   }
828   PetscCall(PetscFree(lrows));
829   PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
830   PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
831 
832   /* reduce nonzerostate */
833   lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate);
834   PetscCall(MPIU_Allreduce(&lch, &gch, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)A)));
835   if (gch) A->nonzerostate++;
836   PetscFunctionReturn(0);
837 }
838 
839 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
840 {
841   Mat_MPIAIJ        *l = (Mat_MPIAIJ *)A->data;
842   PetscMPIInt        n = A->rmap->n;
843   PetscInt           i, j, r, m, len = 0;
844   PetscInt          *lrows, *owners = A->rmap->range;
845   PetscMPIInt        p = 0;
846   PetscSFNode       *rrows;
847   PetscSF            sf;
848   const PetscScalar *xx;
849   PetscScalar       *bb, *mask, *aij_a;
850   Vec                xmask, lmask;
851   Mat_SeqAIJ        *aij = (Mat_SeqAIJ *)l->B->data;
852   const PetscInt    *aj, *ii, *ridx;
853   PetscScalar       *aa;
854 
855   PetscFunctionBegin;
856   /* Create SF where leaves are input rows and roots are owned rows */
857   PetscCall(PetscMalloc1(n, &lrows));
858   for (r = 0; r < n; ++r) lrows[r] = -1;
859   PetscCall(PetscMalloc1(N, &rrows));
860   for (r = 0; r < N; ++r) {
861     const PetscInt idx = rows[r];
862     PetscCheck(idx >= 0 && A->rmap->N > idx, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, A->rmap->N);
863     if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this row too */
864       PetscCall(PetscLayoutFindOwner(A->rmap, idx, &p));
865     }
866     rrows[r].rank  = p;
867     rrows[r].index = rows[r] - owners[p];
868   }
869   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
870   PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER));
871   /* Collect flags for rows to be zeroed */
872   PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR));
873   PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR));
874   PetscCall(PetscSFDestroy(&sf));
875   /* Compress and put in row numbers */
876   for (r = 0; r < n; ++r)
877     if (lrows[r] >= 0) lrows[len++] = r;
878   /* zero diagonal part of matrix */
879   PetscCall(MatZeroRowsColumns(l->A, len, lrows, diag, x, b));
880   /* handle off diagonal part of matrix */
881   PetscCall(MatCreateVecs(A, &xmask, NULL));
882   PetscCall(VecDuplicate(l->lvec, &lmask));
883   PetscCall(VecGetArray(xmask, &bb));
884   for (i = 0; i < len; i++) bb[lrows[i]] = 1;
885   PetscCall(VecRestoreArray(xmask, &bb));
886   PetscCall(VecScatterBegin(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD));
887   PetscCall(VecScatterEnd(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD));
888   PetscCall(VecDestroy(&xmask));
889   if (x && b) { /* this code is buggy when the row and column layout don't match */
890     PetscBool cong;
891 
892     PetscCall(MatHasCongruentLayouts(A, &cong));
893     PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout");
894     PetscCall(VecScatterBegin(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD));
895     PetscCall(VecScatterEnd(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD));
896     PetscCall(VecGetArrayRead(l->lvec, &xx));
897     PetscCall(VecGetArray(b, &bb));
898   }
899   PetscCall(VecGetArray(lmask, &mask));
900   /* remove zeroed rows of off diagonal matrix */
901   PetscCall(MatSeqAIJGetArray(l->B, &aij_a));
902   ii = aij->i;
903   for (i = 0; i < len; i++) PetscCall(PetscArrayzero(aij_a + ii[lrows[i]], ii[lrows[i] + 1] - ii[lrows[i]]));
904   /* loop over all elements of off process part of matrix zeroing removed columns*/
905   if (aij->compressedrow.use) {
906     m    = aij->compressedrow.nrows;
907     ii   = aij->compressedrow.i;
908     ridx = aij->compressedrow.rindex;
909     for (i = 0; i < m; i++) {
910       n  = ii[i + 1] - ii[i];
911       aj = aij->j + ii[i];
912       aa = aij_a + ii[i];
913 
914       for (j = 0; j < n; j++) {
915         if (PetscAbsScalar(mask[*aj])) {
916           if (b) bb[*ridx] -= *aa * xx[*aj];
917           *aa = 0.0;
918         }
919         aa++;
920         aj++;
921       }
922       ridx++;
923     }
924   } else { /* do not use compressed row format */
925     m = l->B->rmap->n;
926     for (i = 0; i < m; i++) {
927       n  = ii[i + 1] - ii[i];
928       aj = aij->j + ii[i];
929       aa = aij_a + ii[i];
930       for (j = 0; j < n; j++) {
931         if (PetscAbsScalar(mask[*aj])) {
932           if (b) bb[i] -= *aa * xx[*aj];
933           *aa = 0.0;
934         }
935         aa++;
936         aj++;
937       }
938     }
939   }
940   if (x && b) {
941     PetscCall(VecRestoreArray(b, &bb));
942     PetscCall(VecRestoreArrayRead(l->lvec, &xx));
943   }
944   PetscCall(MatSeqAIJRestoreArray(l->B, &aij_a));
945   PetscCall(VecRestoreArray(lmask, &mask));
946   PetscCall(VecDestroy(&lmask));
947   PetscCall(PetscFree(lrows));
948 
949   /* only change matrix nonzero state if pattern was allowed to be changed */
950   if (!((Mat_SeqAIJ *)(l->A->data))->keepnonzeropattern) {
951     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
952     PetscCall(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A)));
953   }
954   PetscFunctionReturn(0);
955 }
956 
957 PetscErrorCode MatMult_MPIAIJ(Mat A, Vec xx, Vec yy)
958 {
959   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
960   PetscInt    nt;
961   VecScatter  Mvctx = a->Mvctx;
962 
963   PetscFunctionBegin;
964   PetscCall(VecGetLocalSize(xx, &nt));
965   PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A (%" PetscInt_FMT ") and xx (%" PetscInt_FMT ")", A->cmap->n, nt);
966   PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
967   PetscUseTypeMethod(a->A, mult, xx, yy);
968   PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
969   PetscUseTypeMethod(a->B, multadd, a->lvec, yy, yy);
970   PetscFunctionReturn(0);
971 }
972 
973 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A, Vec bb, Vec xx)
974 {
975   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
976 
977   PetscFunctionBegin;
978   PetscCall(MatMultDiagonalBlock(a->A, bb, xx));
979   PetscFunctionReturn(0);
980 }
981 
982 PetscErrorCode MatMultAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz)
983 {
984   Mat_MPIAIJ *a     = (Mat_MPIAIJ *)A->data;
985   VecScatter  Mvctx = a->Mvctx;
986 
987   PetscFunctionBegin;
988   PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
989   PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz));
990   PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
991   PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz));
992   PetscFunctionReturn(0);
993 }
994 
995 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A, Vec xx, Vec yy)
996 {
997   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
998 
999   PetscFunctionBegin;
1000   /* do nondiagonal part */
1001   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec));
1002   /* do local part */
1003   PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy));
1004   /* add partial results together */
1005   PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
1006   PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
1007   PetscFunctionReturn(0);
1008 }
1009 
1010 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat, Mat Bmat, PetscReal tol, PetscBool *f)
1011 {
1012   MPI_Comm    comm;
1013   Mat_MPIAIJ *Aij  = (Mat_MPIAIJ *)Amat->data, *Bij;
1014   Mat         Adia = Aij->A, Bdia, Aoff, Boff, *Aoffs, *Boffs;
1015   IS          Me, Notme;
1016   PetscInt    M, N, first, last, *notme, i;
1017   PetscBool   lf;
1018   PetscMPIInt size;
1019 
1020   PetscFunctionBegin;
1021   /* Easy test: symmetric diagonal block */
1022   Bij  = (Mat_MPIAIJ *)Bmat->data;
1023   Bdia = Bij->A;
1024   PetscCall(MatIsTranspose(Adia, Bdia, tol, &lf));
1025   PetscCall(MPIU_Allreduce(&lf, f, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)Amat)));
1026   if (!*f) PetscFunctionReturn(0);
1027   PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm));
1028   PetscCallMPI(MPI_Comm_size(comm, &size));
1029   if (size == 1) PetscFunctionReturn(0);
1030 
1031   /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */
1032   PetscCall(MatGetSize(Amat, &M, &N));
1033   PetscCall(MatGetOwnershipRange(Amat, &first, &last));
1034   PetscCall(PetscMalloc1(N - last + first, &notme));
1035   for (i = 0; i < first; i++) notme[i] = i;
1036   for (i = last; i < M; i++) notme[i - last + first] = i;
1037   PetscCall(ISCreateGeneral(MPI_COMM_SELF, N - last + first, notme, PETSC_COPY_VALUES, &Notme));
1038   PetscCall(ISCreateStride(MPI_COMM_SELF, last - first, first, 1, &Me));
1039   PetscCall(MatCreateSubMatrices(Amat, 1, &Me, &Notme, MAT_INITIAL_MATRIX, &Aoffs));
1040   Aoff = Aoffs[0];
1041   PetscCall(MatCreateSubMatrices(Bmat, 1, &Notme, &Me, MAT_INITIAL_MATRIX, &Boffs));
1042   Boff = Boffs[0];
1043   PetscCall(MatIsTranspose(Aoff, Boff, tol, f));
1044   PetscCall(MatDestroyMatrices(1, &Aoffs));
1045   PetscCall(MatDestroyMatrices(1, &Boffs));
1046   PetscCall(ISDestroy(&Me));
1047   PetscCall(ISDestroy(&Notme));
1048   PetscCall(PetscFree(notme));
1049   PetscFunctionReturn(0);
1050 }
1051 
1052 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A, PetscReal tol, PetscBool *f)
1053 {
1054   PetscFunctionBegin;
1055   PetscCall(MatIsTranspose_MPIAIJ(A, A, tol, f));
1056   PetscFunctionReturn(0);
1057 }
1058 
1059 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1060 {
1061   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1062 
1063   PetscFunctionBegin;
1064   /* do nondiagonal part */
1065   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec));
1066   /* do local part */
1067   PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz));
1068   /* add partial results together */
1069   PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE));
1070   PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE));
1071   PetscFunctionReturn(0);
1072 }
1073 
1074 /*
1075   This only works correctly for square matrices where the subblock A->A is the
1076    diagonal block
1077 */
1078 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A, Vec v)
1079 {
1080   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1081 
1082   PetscFunctionBegin;
1083   PetscCheck(A->rmap->N == A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block");
1084   PetscCheck(A->rmap->rstart == A->cmap->rstart && A->rmap->rend == A->cmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "row partition must equal col partition");
1085   PetscCall(MatGetDiagonal(a->A, v));
1086   PetscFunctionReturn(0);
1087 }
1088 
1089 PetscErrorCode MatScale_MPIAIJ(Mat A, PetscScalar aa)
1090 {
1091   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1092 
1093   PetscFunctionBegin;
1094   PetscCall(MatScale(a->A, aa));
1095   PetscCall(MatScale(a->B, aa));
1096   PetscFunctionReturn(0);
1097 }
1098 
1099 /* Free COO stuff; must match allocation methods in MatSetPreallocationCOO_MPIAIJ() */
1100 PETSC_INTERN PetscErrorCode MatResetPreallocationCOO_MPIAIJ(Mat mat)
1101 {
1102   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1103 
1104   PetscFunctionBegin;
1105   PetscCall(PetscSFDestroy(&aij->coo_sf));
1106   PetscCall(PetscFree(aij->Aperm1));
1107   PetscCall(PetscFree(aij->Bperm1));
1108   PetscCall(PetscFree(aij->Ajmap1));
1109   PetscCall(PetscFree(aij->Bjmap1));
1110 
1111   PetscCall(PetscFree(aij->Aimap2));
1112   PetscCall(PetscFree(aij->Bimap2));
1113   PetscCall(PetscFree(aij->Aperm2));
1114   PetscCall(PetscFree(aij->Bperm2));
1115   PetscCall(PetscFree(aij->Ajmap2));
1116   PetscCall(PetscFree(aij->Bjmap2));
1117 
1118   PetscCall(PetscFree2(aij->sendbuf, aij->recvbuf));
1119   PetscCall(PetscFree(aij->Cperm1));
1120   PetscFunctionReturn(0);
1121 }
1122 
1123 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1124 {
1125   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1126 
1127   PetscFunctionBegin;
1128 #if defined(PETSC_USE_LOG)
1129   PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N);
1130 #endif
1131   PetscCall(MatStashDestroy_Private(&mat->stash));
1132   PetscCall(VecDestroy(&aij->diag));
1133   PetscCall(MatDestroy(&aij->A));
1134   PetscCall(MatDestroy(&aij->B));
1135 #if defined(PETSC_USE_CTABLE)
1136   PetscCall(PetscHMapIDestroy(&aij->colmap));
1137 #else
1138   PetscCall(PetscFree(aij->colmap));
1139 #endif
1140   PetscCall(PetscFree(aij->garray));
1141   PetscCall(VecDestroy(&aij->lvec));
1142   PetscCall(VecScatterDestroy(&aij->Mvctx));
1143   PetscCall(PetscFree2(aij->rowvalues, aij->rowindices));
1144   PetscCall(PetscFree(aij->ld));
1145 
1146   /* Free COO */
1147   PetscCall(MatResetPreallocationCOO_MPIAIJ(mat));
1148 
1149   PetscCall(PetscFree(mat->data));
1150 
1151   /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */
1152   PetscCall(PetscObjectCompose((PetscObject)mat, "MatMergeSeqsToMPI", NULL));
1153 
1154   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL));
1155   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL));
1156   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL));
1157   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatIsTranspose_C", NULL));
1158   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocation_C", NULL));
1159   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatResetPreallocation_C", NULL));
1160   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocationCSR_C", NULL));
1161   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL));
1162   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpibaij_C", NULL));
1163   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisbaij_C", NULL));
1164 #if defined(PETSC_HAVE_CUDA)
1165   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcusparse_C", NULL));
1166 #endif
1167 #if defined(PETSC_HAVE_HIP)
1168   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijhipsparse_C", NULL));
1169 #endif
1170 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
1171   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijkokkos_C", NULL));
1172 #endif
1173   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL));
1174 #if defined(PETSC_HAVE_ELEMENTAL)
1175   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_elemental_C", NULL));
1176 #endif
1177 #if defined(PETSC_HAVE_SCALAPACK)
1178   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_scalapack_C", NULL));
1179 #endif
1180 #if defined(PETSC_HAVE_HYPRE)
1181   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_hypre_C", NULL));
1182   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", NULL));
1183 #endif
1184   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL));
1185   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_is_mpiaij_C", NULL));
1186   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpiaij_C", NULL));
1187   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetUseScalableIncreaseOverlap_C", NULL));
1188   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijperm_C", NULL));
1189   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijsell_C", NULL));
1190 #if defined(PETSC_HAVE_MKL_SPARSE)
1191   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijmkl_C", NULL));
1192 #endif
1193   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcrl_C", NULL));
1194   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL));
1195   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisell_C", NULL));
1196   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetPreallocationCOO_C", NULL));
1197   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetValuesCOO_C", NULL));
1198   PetscFunctionReturn(0);
1199 }
1200 
1201 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat, PetscViewer viewer)
1202 {
1203   Mat_MPIAIJ        *aij    = (Mat_MPIAIJ *)mat->data;
1204   Mat_SeqAIJ        *A      = (Mat_SeqAIJ *)aij->A->data;
1205   Mat_SeqAIJ        *B      = (Mat_SeqAIJ *)aij->B->data;
1206   const PetscInt    *garray = aij->garray;
1207   const PetscScalar *aa, *ba;
1208   PetscInt           header[4], M, N, m, rs, cs, nz, cnt, i, ja, jb;
1209   PetscInt          *rowlens;
1210   PetscInt          *colidxs;
1211   PetscScalar       *matvals;
1212 
1213   PetscFunctionBegin;
1214   PetscCall(PetscViewerSetUp(viewer));
1215 
1216   M  = mat->rmap->N;
1217   N  = mat->cmap->N;
1218   m  = mat->rmap->n;
1219   rs = mat->rmap->rstart;
1220   cs = mat->cmap->rstart;
1221   nz = A->nz + B->nz;
1222 
1223   /* write matrix header */
1224   header[0] = MAT_FILE_CLASSID;
1225   header[1] = M;
1226   header[2] = N;
1227   header[3] = nz;
1228   PetscCallMPI(MPI_Reduce(&nz, &header[3], 1, MPIU_INT, MPI_SUM, 0, PetscObjectComm((PetscObject)mat)));
1229   PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT));
1230 
1231   /* fill in and store row lengths  */
1232   PetscCall(PetscMalloc1(m, &rowlens));
1233   for (i = 0; i < m; i++) rowlens[i] = A->i[i + 1] - A->i[i] + B->i[i + 1] - B->i[i];
1234   PetscCall(PetscViewerBinaryWriteAll(viewer, rowlens, m, rs, M, PETSC_INT));
1235   PetscCall(PetscFree(rowlens));
1236 
1237   /* fill in and store column indices */
1238   PetscCall(PetscMalloc1(nz, &colidxs));
1239   for (cnt = 0, i = 0; i < m; i++) {
1240     for (jb = B->i[i]; jb < B->i[i + 1]; jb++) {
1241       if (garray[B->j[jb]] > cs) break;
1242       colidxs[cnt++] = garray[B->j[jb]];
1243     }
1244     for (ja = A->i[i]; ja < A->i[i + 1]; ja++) colidxs[cnt++] = A->j[ja] + cs;
1245     for (; jb < B->i[i + 1]; jb++) colidxs[cnt++] = garray[B->j[jb]];
1246   }
1247   PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT, cnt, nz);
1248   PetscCall(PetscViewerBinaryWriteAll(viewer, colidxs, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT));
1249   PetscCall(PetscFree(colidxs));
1250 
1251   /* fill in and store nonzero values */
1252   PetscCall(MatSeqAIJGetArrayRead(aij->A, &aa));
1253   PetscCall(MatSeqAIJGetArrayRead(aij->B, &ba));
1254   PetscCall(PetscMalloc1(nz, &matvals));
1255   for (cnt = 0, i = 0; i < m; i++) {
1256     for (jb = B->i[i]; jb < B->i[i + 1]; jb++) {
1257       if (garray[B->j[jb]] > cs) break;
1258       matvals[cnt++] = ba[jb];
1259     }
1260     for (ja = A->i[i]; ja < A->i[i + 1]; ja++) matvals[cnt++] = aa[ja];
1261     for (; jb < B->i[i + 1]; jb++) matvals[cnt++] = ba[jb];
1262   }
1263   PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &aa));
1264   PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &ba));
1265   PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT, cnt, nz);
1266   PetscCall(PetscViewerBinaryWriteAll(viewer, matvals, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR));
1267   PetscCall(PetscFree(matvals));
1268 
1269   /* write block size option to the viewer's .info file */
1270   PetscCall(MatView_Binary_BlockSizes(mat, viewer));
1271   PetscFunctionReturn(0);
1272 }
1273 
1274 #include <petscdraw.h>
1275 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer)
1276 {
1277   Mat_MPIAIJ       *aij  = (Mat_MPIAIJ *)mat->data;
1278   PetscMPIInt       rank = aij->rank, size = aij->size;
1279   PetscBool         isdraw, iascii, isbinary;
1280   PetscViewer       sviewer;
1281   PetscViewerFormat format;
1282 
1283   PetscFunctionBegin;
1284   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
1285   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
1286   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
1287   if (iascii) {
1288     PetscCall(PetscViewerGetFormat(viewer, &format));
1289     if (format == PETSC_VIEWER_LOAD_BALANCE) {
1290       PetscInt i, nmax = 0, nmin = PETSC_MAX_INT, navg = 0, *nz, nzlocal = ((Mat_SeqAIJ *)(aij->A->data))->nz + ((Mat_SeqAIJ *)(aij->B->data))->nz;
1291       PetscCall(PetscMalloc1(size, &nz));
1292       PetscCallMPI(MPI_Allgather(&nzlocal, 1, MPIU_INT, nz, 1, MPIU_INT, PetscObjectComm((PetscObject)mat)));
1293       for (i = 0; i < (PetscInt)size; i++) {
1294         nmax = PetscMax(nmax, nz[i]);
1295         nmin = PetscMin(nmin, nz[i]);
1296         navg += nz[i];
1297       }
1298       PetscCall(PetscFree(nz));
1299       navg = navg / size;
1300       PetscCall(PetscViewerASCIIPrintf(viewer, "Load Balance - Nonzeros: Min %" PetscInt_FMT "  avg %" PetscInt_FMT "  max %" PetscInt_FMT "\n", nmin, navg, nmax));
1301       PetscFunctionReturn(0);
1302     }
1303     PetscCall(PetscViewerGetFormat(viewer, &format));
1304     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1305       MatInfo   info;
1306       PetscInt *inodes = NULL;
1307 
1308       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
1309       PetscCall(MatGetInfo(mat, MAT_LOCAL, &info));
1310       PetscCall(MatInodeGetInodeSizes(aij->A, NULL, &inodes, NULL));
1311       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
1312       if (!inodes) {
1313         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, not using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
1314                                                      (double)info.memory));
1315       } else {
1316         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
1317                                                      (double)info.memory));
1318       }
1319       PetscCall(MatGetInfo(aij->A, MAT_LOCAL, &info));
1320       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
1321       PetscCall(MatGetInfo(aij->B, MAT_LOCAL, &info));
1322       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
1323       PetscCall(PetscViewerFlush(viewer));
1324       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
1325       PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n"));
1326       PetscCall(VecScatterView(aij->Mvctx, viewer));
1327       PetscFunctionReturn(0);
1328     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1329       PetscInt inodecount, inodelimit, *inodes;
1330       PetscCall(MatInodeGetInodeSizes(aij->A, &inodecount, &inodes, &inodelimit));
1331       if (inodes) {
1332         PetscCall(PetscViewerASCIIPrintf(viewer, "using I-node (on process 0) routines: found %" PetscInt_FMT " nodes, limit used is %" PetscInt_FMT "\n", inodecount, inodelimit));
1333       } else {
1334         PetscCall(PetscViewerASCIIPrintf(viewer, "not using I-node (on process 0) routines\n"));
1335       }
1336       PetscFunctionReturn(0);
1337     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1338       PetscFunctionReturn(0);
1339     }
1340   } else if (isbinary) {
1341     if (size == 1) {
1342       PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name));
1343       PetscCall(MatView(aij->A, viewer));
1344     } else {
1345       PetscCall(MatView_MPIAIJ_Binary(mat, viewer));
1346     }
1347     PetscFunctionReturn(0);
1348   } else if (iascii && size == 1) {
1349     PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name));
1350     PetscCall(MatView(aij->A, viewer));
1351     PetscFunctionReturn(0);
1352   } else if (isdraw) {
1353     PetscDraw draw;
1354     PetscBool isnull;
1355     PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
1356     PetscCall(PetscDrawIsNull(draw, &isnull));
1357     if (isnull) PetscFunctionReturn(0);
1358   }
1359 
1360   { /* assemble the entire matrix onto first processor */
1361     Mat A = NULL, Av;
1362     IS  isrow, iscol;
1363 
1364     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->rmap->N : 0, 0, 1, &isrow));
1365     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->cmap->N : 0, 0, 1, &iscol));
1366     PetscCall(MatCreateSubMatrix(mat, isrow, iscol, MAT_INITIAL_MATRIX, &A));
1367     PetscCall(MatMPIAIJGetSeqAIJ(A, &Av, NULL, NULL));
1368     /*  The commented code uses MatCreateSubMatrices instead */
1369     /*
1370     Mat *AA, A = NULL, Av;
1371     IS  isrow,iscol;
1372 
1373     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow));
1374     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol));
1375     PetscCall(MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA));
1376     if (rank == 0) {
1377        PetscCall(PetscObjectReference((PetscObject)AA[0]));
1378        A    = AA[0];
1379        Av   = AA[0];
1380     }
1381     PetscCall(MatDestroySubMatrices(1,&AA));
1382 */
1383     PetscCall(ISDestroy(&iscol));
1384     PetscCall(ISDestroy(&isrow));
1385     /*
1386        Everyone has to call to draw the matrix since the graphics waits are
1387        synchronized across all processors that share the PetscDraw object
1388     */
1389     PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1390     if (rank == 0) {
1391       if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)Av, ((PetscObject)mat)->name));
1392       PetscCall(MatView_SeqAIJ(Av, sviewer));
1393     }
1394     PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1395     PetscCall(PetscViewerFlush(viewer));
1396     PetscCall(MatDestroy(&A));
1397   }
1398   PetscFunctionReturn(0);
1399 }
1400 
1401 PetscErrorCode MatView_MPIAIJ(Mat mat, PetscViewer viewer)
1402 {
1403   PetscBool iascii, isdraw, issocket, isbinary;
1404 
1405   PetscFunctionBegin;
1406   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
1407   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
1408   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
1409   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket));
1410   if (iascii || isdraw || isbinary || issocket) PetscCall(MatView_MPIAIJ_ASCIIorDraworSocket(mat, viewer));
1411   PetscFunctionReturn(0);
1412 }
1413 
1414 PetscErrorCode MatSOR_MPIAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx)
1415 {
1416   Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data;
1417   Vec         bb1 = NULL;
1418   PetscBool   hasop;
1419 
1420   PetscFunctionBegin;
1421   if (flag == SOR_APPLY_UPPER) {
1422     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1423     PetscFunctionReturn(0);
1424   }
1425 
1426   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) PetscCall(VecDuplicate(bb, &bb1));
1427 
1428   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1429     if (flag & SOR_ZERO_INITIAL_GUESS) {
1430       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1431       its--;
1432     }
1433 
1434     while (its--) {
1435       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1436       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1437 
1438       /* update rhs: bb1 = bb - B*x */
1439       PetscCall(VecScale(mat->lvec, -1.0));
1440       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
1441 
1442       /* local sweep */
1443       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx));
1444     }
1445   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1446     if (flag & SOR_ZERO_INITIAL_GUESS) {
1447       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1448       its--;
1449     }
1450     while (its--) {
1451       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1452       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1453 
1454       /* update rhs: bb1 = bb - B*x */
1455       PetscCall(VecScale(mat->lvec, -1.0));
1456       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
1457 
1458       /* local sweep */
1459       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx));
1460     }
1461   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1462     if (flag & SOR_ZERO_INITIAL_GUESS) {
1463       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1464       its--;
1465     }
1466     while (its--) {
1467       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1468       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1469 
1470       /* update rhs: bb1 = bb - B*x */
1471       PetscCall(VecScale(mat->lvec, -1.0));
1472       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
1473 
1474       /* local sweep */
1475       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx));
1476     }
1477   } else if (flag & SOR_EISENSTAT) {
1478     Vec xx1;
1479 
1480     PetscCall(VecDuplicate(bb, &xx1));
1481     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP), fshift, lits, 1, xx));
1482 
1483     PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1484     PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1485     if (!mat->diag) {
1486       PetscCall(MatCreateVecs(matin, &mat->diag, NULL));
1487       PetscCall(MatGetDiagonal(matin, mat->diag));
1488     }
1489     PetscCall(MatHasOperation(matin, MATOP_MULT_DIAGONAL_BLOCK, &hasop));
1490     if (hasop) {
1491       PetscCall(MatMultDiagonalBlock(matin, xx, bb1));
1492     } else {
1493       PetscCall(VecPointwiseMult(bb1, mat->diag, xx));
1494     }
1495     PetscCall(VecAYPX(bb1, (omega - 2.0) / omega, bb));
1496 
1497     PetscCall(MatMultAdd(mat->B, mat->lvec, bb1, bb1));
1498 
1499     /* local sweep */
1500     PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP), fshift, lits, 1, xx1));
1501     PetscCall(VecAXPY(xx, 1.0, xx1));
1502     PetscCall(VecDestroy(&xx1));
1503   } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel SOR not supported");
1504 
1505   PetscCall(VecDestroy(&bb1));
1506 
1507   matin->factorerrortype = mat->A->factorerrortype;
1508   PetscFunctionReturn(0);
1509 }
1510 
1511 PetscErrorCode MatPermute_MPIAIJ(Mat A, IS rowp, IS colp, Mat *B)
1512 {
1513   Mat             aA, aB, Aperm;
1514   const PetscInt *rwant, *cwant, *gcols, *ai, *bi, *aj, *bj;
1515   PetscScalar    *aa, *ba;
1516   PetscInt        i, j, m, n, ng, anz, bnz, *dnnz, *onnz, *tdnnz, *tonnz, *rdest, *cdest, *work, *gcdest;
1517   PetscSF         rowsf, sf;
1518   IS              parcolp = NULL;
1519   PetscBool       done;
1520 
1521   PetscFunctionBegin;
1522   PetscCall(MatGetLocalSize(A, &m, &n));
1523   PetscCall(ISGetIndices(rowp, &rwant));
1524   PetscCall(ISGetIndices(colp, &cwant));
1525   PetscCall(PetscMalloc3(PetscMax(m, n), &work, m, &rdest, n, &cdest));
1526 
1527   /* Invert row permutation to find out where my rows should go */
1528   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &rowsf));
1529   PetscCall(PetscSFSetGraphLayout(rowsf, A->rmap, A->rmap->n, NULL, PETSC_OWN_POINTER, rwant));
1530   PetscCall(PetscSFSetFromOptions(rowsf));
1531   for (i = 0; i < m; i++) work[i] = A->rmap->rstart + i;
1532   PetscCall(PetscSFReduceBegin(rowsf, MPIU_INT, work, rdest, MPI_REPLACE));
1533   PetscCall(PetscSFReduceEnd(rowsf, MPIU_INT, work, rdest, MPI_REPLACE));
1534 
1535   /* Invert column permutation to find out where my columns should go */
1536   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1537   PetscCall(PetscSFSetGraphLayout(sf, A->cmap, A->cmap->n, NULL, PETSC_OWN_POINTER, cwant));
1538   PetscCall(PetscSFSetFromOptions(sf));
1539   for (i = 0; i < n; i++) work[i] = A->cmap->rstart + i;
1540   PetscCall(PetscSFReduceBegin(sf, MPIU_INT, work, cdest, MPI_REPLACE));
1541   PetscCall(PetscSFReduceEnd(sf, MPIU_INT, work, cdest, MPI_REPLACE));
1542   PetscCall(PetscSFDestroy(&sf));
1543 
1544   PetscCall(ISRestoreIndices(rowp, &rwant));
1545   PetscCall(ISRestoreIndices(colp, &cwant));
1546   PetscCall(MatMPIAIJGetSeqAIJ(A, &aA, &aB, &gcols));
1547 
1548   /* Find out where my gcols should go */
1549   PetscCall(MatGetSize(aB, NULL, &ng));
1550   PetscCall(PetscMalloc1(ng, &gcdest));
1551   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1552   PetscCall(PetscSFSetGraphLayout(sf, A->cmap, ng, NULL, PETSC_OWN_POINTER, gcols));
1553   PetscCall(PetscSFSetFromOptions(sf));
1554   PetscCall(PetscSFBcastBegin(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE));
1555   PetscCall(PetscSFBcastEnd(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE));
1556   PetscCall(PetscSFDestroy(&sf));
1557 
1558   PetscCall(PetscCalloc4(m, &dnnz, m, &onnz, m, &tdnnz, m, &tonnz));
1559   PetscCall(MatGetRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done));
1560   PetscCall(MatGetRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done));
1561   for (i = 0; i < m; i++) {
1562     PetscInt    row = rdest[i];
1563     PetscMPIInt rowner;
1564     PetscCall(PetscLayoutFindOwner(A->rmap, row, &rowner));
1565     for (j = ai[i]; j < ai[i + 1]; j++) {
1566       PetscInt    col = cdest[aj[j]];
1567       PetscMPIInt cowner;
1568       PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner)); /* Could build an index for the columns to eliminate this search */
1569       if (rowner == cowner) dnnz[i]++;
1570       else onnz[i]++;
1571     }
1572     for (j = bi[i]; j < bi[i + 1]; j++) {
1573       PetscInt    col = gcdest[bj[j]];
1574       PetscMPIInt cowner;
1575       PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner));
1576       if (rowner == cowner) dnnz[i]++;
1577       else onnz[i]++;
1578     }
1579   }
1580   PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE));
1581   PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE));
1582   PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE));
1583   PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE));
1584   PetscCall(PetscSFDestroy(&rowsf));
1585 
1586   PetscCall(MatCreateAIJ(PetscObjectComm((PetscObject)A), A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N, 0, tdnnz, 0, tonnz, &Aperm));
1587   PetscCall(MatSeqAIJGetArray(aA, &aa));
1588   PetscCall(MatSeqAIJGetArray(aB, &ba));
1589   for (i = 0; i < m; i++) {
1590     PetscInt *acols = dnnz, *bcols = onnz; /* Repurpose now-unneeded arrays */
1591     PetscInt  j0, rowlen;
1592     rowlen = ai[i + 1] - ai[i];
1593     for (j0 = j = 0; j < rowlen; j0 = j) { /* rowlen could be larger than number of rows m, so sum in batches */
1594       for (; j < PetscMin(rowlen, j0 + m); j++) acols[j - j0] = cdest[aj[ai[i] + j]];
1595       PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, acols, aa + ai[i] + j0, INSERT_VALUES));
1596     }
1597     rowlen = bi[i + 1] - bi[i];
1598     for (j0 = j = 0; j < rowlen; j0 = j) {
1599       for (; j < PetscMin(rowlen, j0 + m); j++) bcols[j - j0] = gcdest[bj[bi[i] + j]];
1600       PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, bcols, ba + bi[i] + j0, INSERT_VALUES));
1601     }
1602   }
1603   PetscCall(MatAssemblyBegin(Aperm, MAT_FINAL_ASSEMBLY));
1604   PetscCall(MatAssemblyEnd(Aperm, MAT_FINAL_ASSEMBLY));
1605   PetscCall(MatRestoreRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done));
1606   PetscCall(MatRestoreRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done));
1607   PetscCall(MatSeqAIJRestoreArray(aA, &aa));
1608   PetscCall(MatSeqAIJRestoreArray(aB, &ba));
1609   PetscCall(PetscFree4(dnnz, onnz, tdnnz, tonnz));
1610   PetscCall(PetscFree3(work, rdest, cdest));
1611   PetscCall(PetscFree(gcdest));
1612   if (parcolp) PetscCall(ISDestroy(&colp));
1613   *B = Aperm;
1614   PetscFunctionReturn(0);
1615 }
1616 
1617 PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[])
1618 {
1619   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1620 
1621   PetscFunctionBegin;
1622   PetscCall(MatGetSize(aij->B, NULL, nghosts));
1623   if (ghosts) *ghosts = aij->garray;
1624   PetscFunctionReturn(0);
1625 }
1626 
1627 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin, MatInfoType flag, MatInfo *info)
1628 {
1629   Mat_MPIAIJ    *mat = (Mat_MPIAIJ *)matin->data;
1630   Mat            A = mat->A, B = mat->B;
1631   PetscLogDouble isend[5], irecv[5];
1632 
1633   PetscFunctionBegin;
1634   info->block_size = 1.0;
1635   PetscCall(MatGetInfo(A, MAT_LOCAL, info));
1636 
1637   isend[0] = info->nz_used;
1638   isend[1] = info->nz_allocated;
1639   isend[2] = info->nz_unneeded;
1640   isend[3] = info->memory;
1641   isend[4] = info->mallocs;
1642 
1643   PetscCall(MatGetInfo(B, MAT_LOCAL, info));
1644 
1645   isend[0] += info->nz_used;
1646   isend[1] += info->nz_allocated;
1647   isend[2] += info->nz_unneeded;
1648   isend[3] += info->memory;
1649   isend[4] += info->mallocs;
1650   if (flag == MAT_LOCAL) {
1651     info->nz_used      = isend[0];
1652     info->nz_allocated = isend[1];
1653     info->nz_unneeded  = isend[2];
1654     info->memory       = isend[3];
1655     info->mallocs      = isend[4];
1656   } else if (flag == MAT_GLOBAL_MAX) {
1657     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin)));
1658 
1659     info->nz_used      = irecv[0];
1660     info->nz_allocated = irecv[1];
1661     info->nz_unneeded  = irecv[2];
1662     info->memory       = irecv[3];
1663     info->mallocs      = irecv[4];
1664   } else if (flag == MAT_GLOBAL_SUM) {
1665     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin)));
1666 
1667     info->nz_used      = irecv[0];
1668     info->nz_allocated = irecv[1];
1669     info->nz_unneeded  = irecv[2];
1670     info->memory       = irecv[3];
1671     info->mallocs      = irecv[4];
1672   }
1673   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1674   info->fill_ratio_needed = 0;
1675   info->factor_mallocs    = 0;
1676   PetscFunctionReturn(0);
1677 }
1678 
1679 PetscErrorCode MatSetOption_MPIAIJ(Mat A, MatOption op, PetscBool flg)
1680 {
1681   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1682 
1683   PetscFunctionBegin;
1684   switch (op) {
1685   case MAT_NEW_NONZERO_LOCATIONS:
1686   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1687   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1688   case MAT_KEEP_NONZERO_PATTERN:
1689   case MAT_NEW_NONZERO_LOCATION_ERR:
1690   case MAT_USE_INODES:
1691   case MAT_IGNORE_ZERO_ENTRIES:
1692   case MAT_FORM_EXPLICIT_TRANSPOSE:
1693     MatCheckPreallocated(A, 1);
1694     PetscCall(MatSetOption(a->A, op, flg));
1695     PetscCall(MatSetOption(a->B, op, flg));
1696     break;
1697   case MAT_ROW_ORIENTED:
1698     MatCheckPreallocated(A, 1);
1699     a->roworiented = flg;
1700 
1701     PetscCall(MatSetOption(a->A, op, flg));
1702     PetscCall(MatSetOption(a->B, op, flg));
1703     break;
1704   case MAT_FORCE_DIAGONAL_ENTRIES:
1705   case MAT_SORTED_FULL:
1706     PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
1707     break;
1708   case MAT_IGNORE_OFF_PROC_ENTRIES:
1709     a->donotstash = flg;
1710     break;
1711   /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */
1712   case MAT_SPD:
1713   case MAT_SYMMETRIC:
1714   case MAT_STRUCTURALLY_SYMMETRIC:
1715   case MAT_HERMITIAN:
1716   case MAT_SYMMETRY_ETERNAL:
1717   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
1718   case MAT_SPD_ETERNAL:
1719     /* if the diagonal matrix is square it inherits some of the properties above */
1720     break;
1721   case MAT_SUBMAT_SINGLEIS:
1722     A->submat_singleis = flg;
1723     break;
1724   case MAT_STRUCTURE_ONLY:
1725     /* The option is handled directly by MatSetOption() */
1726     break;
1727   default:
1728     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %d", op);
1729   }
1730   PetscFunctionReturn(0);
1731 }
1732 
1733 PetscErrorCode MatGetRow_MPIAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1734 {
1735   Mat_MPIAIJ  *mat = (Mat_MPIAIJ *)matin->data;
1736   PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p;
1737   PetscInt     i, *cworkA, *cworkB, **pcA, **pcB, cstart = matin->cmap->rstart;
1738   PetscInt     nztot, nzA, nzB, lrow, rstart = matin->rmap->rstart, rend = matin->rmap->rend;
1739   PetscInt    *cmap, *idx_p;
1740 
1741   PetscFunctionBegin;
1742   PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active");
1743   mat->getrowactive = PETSC_TRUE;
1744 
1745   if (!mat->rowvalues && (idx || v)) {
1746     /*
1747         allocate enough space to hold information from the longest row.
1748     */
1749     Mat_SeqAIJ *Aa = (Mat_SeqAIJ *)mat->A->data, *Ba = (Mat_SeqAIJ *)mat->B->data;
1750     PetscInt    max = 1, tmp;
1751     for (i = 0; i < matin->rmap->n; i++) {
1752       tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i];
1753       if (max < tmp) max = tmp;
1754     }
1755     PetscCall(PetscMalloc2(max, &mat->rowvalues, max, &mat->rowindices));
1756   }
1757 
1758   PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Only local rows");
1759   lrow = row - rstart;
1760 
1761   pvA = &vworkA;
1762   pcA = &cworkA;
1763   pvB = &vworkB;
1764   pcB = &cworkB;
1765   if (!v) {
1766     pvA = NULL;
1767     pvB = NULL;
1768   }
1769   if (!idx) {
1770     pcA = NULL;
1771     if (!v) pcB = NULL;
1772   }
1773   PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA));
1774   PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB));
1775   nztot = nzA + nzB;
1776 
1777   cmap = mat->garray;
1778   if (v || idx) {
1779     if (nztot) {
1780       /* Sort by increasing column numbers, assuming A and B already sorted */
1781       PetscInt imark = -1;
1782       if (v) {
1783         *v = v_p = mat->rowvalues;
1784         for (i = 0; i < nzB; i++) {
1785           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1786           else break;
1787         }
1788         imark = i;
1789         for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i];
1790         for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i];
1791       }
1792       if (idx) {
1793         *idx = idx_p = mat->rowindices;
1794         if (imark > -1) {
1795           for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i]];
1796         } else {
1797           for (i = 0; i < nzB; i++) {
1798             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1799             else break;
1800           }
1801           imark = i;
1802         }
1803         for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart + cworkA[i];
1804         for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i]];
1805       }
1806     } else {
1807       if (idx) *idx = NULL;
1808       if (v) *v = NULL;
1809     }
1810   }
1811   *nz = nztot;
1812   PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA));
1813   PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB));
1814   PetscFunctionReturn(0);
1815 }
1816 
1817 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1818 {
1819   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1820 
1821   PetscFunctionBegin;
1822   PetscCheck(aij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow() must be called first");
1823   aij->getrowactive = PETSC_FALSE;
1824   PetscFunctionReturn(0);
1825 }
1826 
1827 PetscErrorCode MatNorm_MPIAIJ(Mat mat, NormType type, PetscReal *norm)
1828 {
1829   Mat_MPIAIJ      *aij  = (Mat_MPIAIJ *)mat->data;
1830   Mat_SeqAIJ      *amat = (Mat_SeqAIJ *)aij->A->data, *bmat = (Mat_SeqAIJ *)aij->B->data;
1831   PetscInt         i, j, cstart = mat->cmap->rstart;
1832   PetscReal        sum = 0.0;
1833   const MatScalar *v, *amata, *bmata;
1834 
1835   PetscFunctionBegin;
1836   if (aij->size == 1) {
1837     PetscCall(MatNorm(aij->A, type, norm));
1838   } else {
1839     PetscCall(MatSeqAIJGetArrayRead(aij->A, &amata));
1840     PetscCall(MatSeqAIJGetArrayRead(aij->B, &bmata));
1841     if (type == NORM_FROBENIUS) {
1842       v = amata;
1843       for (i = 0; i < amat->nz; i++) {
1844         sum += PetscRealPart(PetscConj(*v) * (*v));
1845         v++;
1846       }
1847       v = bmata;
1848       for (i = 0; i < bmat->nz; i++) {
1849         sum += PetscRealPart(PetscConj(*v) * (*v));
1850         v++;
1851       }
1852       PetscCall(MPIU_Allreduce(&sum, norm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
1853       *norm = PetscSqrtReal(*norm);
1854       PetscCall(PetscLogFlops(2.0 * amat->nz + 2.0 * bmat->nz));
1855     } else if (type == NORM_1) { /* max column norm */
1856       PetscReal *tmp, *tmp2;
1857       PetscInt  *jj, *garray = aij->garray;
1858       PetscCall(PetscCalloc1(mat->cmap->N + 1, &tmp));
1859       PetscCall(PetscMalloc1(mat->cmap->N + 1, &tmp2));
1860       *norm = 0.0;
1861       v     = amata;
1862       jj    = amat->j;
1863       for (j = 0; j < amat->nz; j++) {
1864         tmp[cstart + *jj++] += PetscAbsScalar(*v);
1865         v++;
1866       }
1867       v  = bmata;
1868       jj = bmat->j;
1869       for (j = 0; j < bmat->nz; j++) {
1870         tmp[garray[*jj++]] += PetscAbsScalar(*v);
1871         v++;
1872       }
1873       PetscCall(MPIU_Allreduce(tmp, tmp2, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
1874       for (j = 0; j < mat->cmap->N; j++) {
1875         if (tmp2[j] > *norm) *norm = tmp2[j];
1876       }
1877       PetscCall(PetscFree(tmp));
1878       PetscCall(PetscFree(tmp2));
1879       PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0)));
1880     } else if (type == NORM_INFINITY) { /* max row norm */
1881       PetscReal ntemp = 0.0;
1882       for (j = 0; j < aij->A->rmap->n; j++) {
1883         v   = amata + amat->i[j];
1884         sum = 0.0;
1885         for (i = 0; i < amat->i[j + 1] - amat->i[j]; i++) {
1886           sum += PetscAbsScalar(*v);
1887           v++;
1888         }
1889         v = bmata + bmat->i[j];
1890         for (i = 0; i < bmat->i[j + 1] - bmat->i[j]; i++) {
1891           sum += PetscAbsScalar(*v);
1892           v++;
1893         }
1894         if (sum > ntemp) ntemp = sum;
1895       }
1896       PetscCall(MPIU_Allreduce(&ntemp, norm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)mat)));
1897       PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0)));
1898     } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "No support for two norm");
1899     PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &amata));
1900     PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &bmata));
1901   }
1902   PetscFunctionReturn(0);
1903 }
1904 
1905 PetscErrorCode MatTranspose_MPIAIJ(Mat A, MatReuse reuse, Mat *matout)
1906 {
1907   Mat_MPIAIJ      *a    = (Mat_MPIAIJ *)A->data, *b;
1908   Mat_SeqAIJ      *Aloc = (Mat_SeqAIJ *)a->A->data, *Bloc = (Mat_SeqAIJ *)a->B->data, *sub_B_diag;
1909   PetscInt         M = A->rmap->N, N = A->cmap->N, ma, na, mb, nb, row, *cols, *cols_tmp, *B_diag_ilen, i, ncol, A_diag_ncol;
1910   const PetscInt  *ai, *aj, *bi, *bj, *B_diag_i;
1911   Mat              B, A_diag, *B_diag;
1912   const MatScalar *pbv, *bv;
1913 
1914   PetscFunctionBegin;
1915   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout));
1916   ma = A->rmap->n;
1917   na = A->cmap->n;
1918   mb = a->B->rmap->n;
1919   nb = a->B->cmap->n;
1920   ai = Aloc->i;
1921   aj = Aloc->j;
1922   bi = Bloc->i;
1923   bj = Bloc->j;
1924   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1925     PetscInt            *d_nnz, *g_nnz, *o_nnz;
1926     PetscSFNode         *oloc;
1927     PETSC_UNUSED PetscSF sf;
1928 
1929     PetscCall(PetscMalloc4(na, &d_nnz, na, &o_nnz, nb, &g_nnz, nb, &oloc));
1930     /* compute d_nnz for preallocation */
1931     PetscCall(PetscArrayzero(d_nnz, na));
1932     for (i = 0; i < ai[ma]; i++) d_nnz[aj[i]]++;
1933     /* compute local off-diagonal contributions */
1934     PetscCall(PetscArrayzero(g_nnz, nb));
1935     for (i = 0; i < bi[ma]; i++) g_nnz[bj[i]]++;
1936     /* map those to global */
1937     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1938     PetscCall(PetscSFSetGraphLayout(sf, A->cmap, nb, NULL, PETSC_USE_POINTER, a->garray));
1939     PetscCall(PetscSFSetFromOptions(sf));
1940     PetscCall(PetscArrayzero(o_nnz, na));
1941     PetscCall(PetscSFReduceBegin(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM));
1942     PetscCall(PetscSFReduceEnd(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM));
1943     PetscCall(PetscSFDestroy(&sf));
1944 
1945     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
1946     PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M));
1947     PetscCall(MatSetBlockSizes(B, PetscAbs(A->cmap->bs), PetscAbs(A->rmap->bs)));
1948     PetscCall(MatSetType(B, ((PetscObject)A)->type_name));
1949     PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz));
1950     PetscCall(PetscFree4(d_nnz, o_nnz, g_nnz, oloc));
1951   } else {
1952     B = *matout;
1953     PetscCall(MatSetOption(B, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE));
1954   }
1955 
1956   b           = (Mat_MPIAIJ *)B->data;
1957   A_diag      = a->A;
1958   B_diag      = &b->A;
1959   sub_B_diag  = (Mat_SeqAIJ *)(*B_diag)->data;
1960   A_diag_ncol = A_diag->cmap->N;
1961   B_diag_ilen = sub_B_diag->ilen;
1962   B_diag_i    = sub_B_diag->i;
1963 
1964   /* Set ilen for diagonal of B */
1965   for (i = 0; i < A_diag_ncol; i++) B_diag_ilen[i] = B_diag_i[i + 1] - B_diag_i[i];
1966 
1967   /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done
1968   very quickly (=without using MatSetValues), because all writes are local. */
1969   PetscCall(MatTransposeSetPrecursor(A_diag, *B_diag));
1970   PetscCall(MatTranspose(A_diag, MAT_REUSE_MATRIX, B_diag));
1971 
1972   /* copy over the B part */
1973   PetscCall(PetscMalloc1(bi[mb], &cols));
1974   PetscCall(MatSeqAIJGetArrayRead(a->B, &bv));
1975   pbv = bv;
1976   row = A->rmap->rstart;
1977   for (i = 0; i < bi[mb]; i++) cols[i] = a->garray[bj[i]];
1978   cols_tmp = cols;
1979   for (i = 0; i < mb; i++) {
1980     ncol = bi[i + 1] - bi[i];
1981     PetscCall(MatSetValues(B, ncol, cols_tmp, 1, &row, pbv, INSERT_VALUES));
1982     row++;
1983     pbv += ncol;
1984     cols_tmp += ncol;
1985   }
1986   PetscCall(PetscFree(cols));
1987   PetscCall(MatSeqAIJRestoreArrayRead(a->B, &bv));
1988 
1989   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
1990   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
1991   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
1992     *matout = B;
1993   } else {
1994     PetscCall(MatHeaderMerge(A, &B));
1995   }
1996   PetscFunctionReturn(0);
1997 }
1998 
1999 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat, Vec ll, Vec rr)
2000 {
2001   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
2002   Mat         a = aij->A, b = aij->B;
2003   PetscInt    s1, s2, s3;
2004 
2005   PetscFunctionBegin;
2006   PetscCall(MatGetLocalSize(mat, &s2, &s3));
2007   if (rr) {
2008     PetscCall(VecGetLocalSize(rr, &s1));
2009     PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size");
2010     /* Overlap communication with computation. */
2011     PetscCall(VecScatterBegin(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD));
2012   }
2013   if (ll) {
2014     PetscCall(VecGetLocalSize(ll, &s1));
2015     PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size");
2016     PetscUseTypeMethod(b, diagonalscale, ll, NULL);
2017   }
2018   /* scale  the diagonal block */
2019   PetscUseTypeMethod(a, diagonalscale, ll, rr);
2020 
2021   if (rr) {
2022     /* Do a scatter end and then right scale the off-diagonal block */
2023     PetscCall(VecScatterEnd(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD));
2024     PetscUseTypeMethod(b, diagonalscale, NULL, aij->lvec);
2025   }
2026   PetscFunctionReturn(0);
2027 }
2028 
2029 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2030 {
2031   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2032 
2033   PetscFunctionBegin;
2034   PetscCall(MatSetUnfactored(a->A));
2035   PetscFunctionReturn(0);
2036 }
2037 
2038 PetscErrorCode MatEqual_MPIAIJ(Mat A, Mat B, PetscBool *flag)
2039 {
2040   Mat_MPIAIJ *matB = (Mat_MPIAIJ *)B->data, *matA = (Mat_MPIAIJ *)A->data;
2041   Mat         a, b, c, d;
2042   PetscBool   flg;
2043 
2044   PetscFunctionBegin;
2045   a = matA->A;
2046   b = matA->B;
2047   c = matB->A;
2048   d = matB->B;
2049 
2050   PetscCall(MatEqual(a, c, &flg));
2051   if (flg) PetscCall(MatEqual(b, d, &flg));
2052   PetscCall(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
2053   PetscFunctionReturn(0);
2054 }
2055 
2056 PetscErrorCode MatCopy_MPIAIJ(Mat A, Mat B, MatStructure str)
2057 {
2058   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2059   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
2060 
2061   PetscFunctionBegin;
2062   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2063   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2064     /* because of the column compression in the off-processor part of the matrix a->B,
2065        the number of columns in a->B and b->B may be different, hence we cannot call
2066        the MatCopy() directly on the two parts. If need be, we can provide a more
2067        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2068        then copying the submatrices */
2069     PetscCall(MatCopy_Basic(A, B, str));
2070   } else {
2071     PetscCall(MatCopy(a->A, b->A, str));
2072     PetscCall(MatCopy(a->B, b->B, str));
2073   }
2074   PetscCall(PetscObjectStateIncrease((PetscObject)B));
2075   PetscFunctionReturn(0);
2076 }
2077 
2078 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2079 {
2080   PetscFunctionBegin;
2081   PetscCall(MatMPIAIJSetPreallocation(A, PETSC_DEFAULT, NULL, PETSC_DEFAULT, NULL));
2082   PetscFunctionReturn(0);
2083 }
2084 
2085 /*
2086    Computes the number of nonzeros per row needed for preallocation when X and Y
2087    have different nonzero structure.
2088 */
2089 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m, const PetscInt *xi, const PetscInt *xj, const PetscInt *xltog, const PetscInt *yi, const PetscInt *yj, const PetscInt *yltog, PetscInt *nnz)
2090 {
2091   PetscInt i, j, k, nzx, nzy;
2092 
2093   PetscFunctionBegin;
2094   /* Set the number of nonzeros in the new matrix */
2095   for (i = 0; i < m; i++) {
2096     const PetscInt *xjj = xj + xi[i], *yjj = yj + yi[i];
2097     nzx    = xi[i + 1] - xi[i];
2098     nzy    = yi[i + 1] - yi[i];
2099     nnz[i] = 0;
2100     for (j = 0, k = 0; j < nzx; j++) {                                /* Point in X */
2101       for (; k < nzy && yltog[yjj[k]] < xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2102       if (k < nzy && yltog[yjj[k]] == xltog[xjj[j]]) k++;             /* Skip duplicate */
2103       nnz[i]++;
2104     }
2105     for (; k < nzy; k++) nnz[i]++;
2106   }
2107   PetscFunctionReturn(0);
2108 }
2109 
2110 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2111 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y, const PetscInt *yltog, Mat X, const PetscInt *xltog, PetscInt *nnz)
2112 {
2113   PetscInt    m = Y->rmap->N;
2114   Mat_SeqAIJ *x = (Mat_SeqAIJ *)X->data;
2115   Mat_SeqAIJ *y = (Mat_SeqAIJ *)Y->data;
2116 
2117   PetscFunctionBegin;
2118   PetscCall(MatAXPYGetPreallocation_MPIX_private(m, x->i, x->j, xltog, y->i, y->j, yltog, nnz));
2119   PetscFunctionReturn(0);
2120 }
2121 
2122 PetscErrorCode MatAXPY_MPIAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str)
2123 {
2124   Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data, *yy = (Mat_MPIAIJ *)Y->data;
2125 
2126   PetscFunctionBegin;
2127   if (str == SAME_NONZERO_PATTERN) {
2128     PetscCall(MatAXPY(yy->A, a, xx->A, str));
2129     PetscCall(MatAXPY(yy->B, a, xx->B, str));
2130   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2131     PetscCall(MatAXPY_Basic(Y, a, X, str));
2132   } else {
2133     Mat       B;
2134     PetscInt *nnz_d, *nnz_o;
2135 
2136     PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d));
2137     PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o));
2138     PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B));
2139     PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name));
2140     PetscCall(MatSetLayouts(B, Y->rmap, Y->cmap));
2141     PetscCall(MatSetType(B, ((PetscObject)Y)->type_name));
2142     PetscCall(MatAXPYGetPreallocation_SeqAIJ(yy->A, xx->A, nnz_d));
2143     PetscCall(MatAXPYGetPreallocation_MPIAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o));
2144     PetscCall(MatMPIAIJSetPreallocation(B, 0, nnz_d, 0, nnz_o));
2145     PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str));
2146     PetscCall(MatHeaderMerge(Y, &B));
2147     PetscCall(PetscFree(nnz_d));
2148     PetscCall(PetscFree(nnz_o));
2149   }
2150   PetscFunctionReturn(0);
2151 }
2152 
2153 PETSC_INTERN PetscErrorCode MatConjugate_SeqAIJ(Mat);
2154 
2155 PetscErrorCode MatConjugate_MPIAIJ(Mat mat)
2156 {
2157   PetscFunctionBegin;
2158   if (PetscDefined(USE_COMPLEX)) {
2159     Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
2160 
2161     PetscCall(MatConjugate_SeqAIJ(aij->A));
2162     PetscCall(MatConjugate_SeqAIJ(aij->B));
2163   }
2164   PetscFunctionReturn(0);
2165 }
2166 
2167 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2168 {
2169   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2170 
2171   PetscFunctionBegin;
2172   PetscCall(MatRealPart(a->A));
2173   PetscCall(MatRealPart(a->B));
2174   PetscFunctionReturn(0);
2175 }
2176 
2177 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2178 {
2179   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2180 
2181   PetscFunctionBegin;
2182   PetscCall(MatImaginaryPart(a->A));
2183   PetscCall(MatImaginaryPart(a->B));
2184   PetscFunctionReturn(0);
2185 }
2186 
2187 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2188 {
2189   Mat_MPIAIJ        *a = (Mat_MPIAIJ *)A->data;
2190   PetscInt           i, *idxb = NULL, m = A->rmap->n;
2191   PetscScalar       *va, *vv;
2192   Vec                vB, vA;
2193   const PetscScalar *vb;
2194 
2195   PetscFunctionBegin;
2196   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vA));
2197   PetscCall(MatGetRowMaxAbs(a->A, vA, idx));
2198 
2199   PetscCall(VecGetArrayWrite(vA, &va));
2200   if (idx) {
2201     for (i = 0; i < m; i++) {
2202       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2203     }
2204   }
2205 
2206   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vB));
2207   PetscCall(PetscMalloc1(m, &idxb));
2208   PetscCall(MatGetRowMaxAbs(a->B, vB, idxb));
2209 
2210   PetscCall(VecGetArrayWrite(v, &vv));
2211   PetscCall(VecGetArrayRead(vB, &vb));
2212   for (i = 0; i < m; i++) {
2213     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2214       vv[i] = vb[i];
2215       if (idx) idx[i] = a->garray[idxb[i]];
2216     } else {
2217       vv[i] = va[i];
2218       if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]]) idx[i] = a->garray[idxb[i]];
2219     }
2220   }
2221   PetscCall(VecRestoreArrayWrite(vA, &vv));
2222   PetscCall(VecRestoreArrayWrite(vA, &va));
2223   PetscCall(VecRestoreArrayRead(vB, &vb));
2224   PetscCall(PetscFree(idxb));
2225   PetscCall(VecDestroy(&vA));
2226   PetscCall(VecDestroy(&vB));
2227   PetscFunctionReturn(0);
2228 }
2229 
2230 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2231 {
2232   Mat_MPIAIJ        *mat = (Mat_MPIAIJ *)A->data;
2233   PetscInt           m = A->rmap->n, n = A->cmap->n;
2234   PetscInt           cstart = A->cmap->rstart, cend = A->cmap->rend;
2235   PetscInt          *cmap = mat->garray;
2236   PetscInt          *diagIdx, *offdiagIdx;
2237   Vec                diagV, offdiagV;
2238   PetscScalar       *a, *diagA, *offdiagA;
2239   const PetscScalar *ba, *bav;
2240   PetscInt           r, j, col, ncols, *bi, *bj;
2241   Mat                B = mat->B;
2242   Mat_SeqAIJ        *b = (Mat_SeqAIJ *)B->data;
2243 
2244   PetscFunctionBegin;
2245   /* When a process holds entire A and other processes have no entry */
2246   if (A->cmap->N == n) {
2247     PetscCall(VecGetArrayWrite(v, &diagA));
2248     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV));
2249     PetscCall(MatGetRowMinAbs(mat->A, diagV, idx));
2250     PetscCall(VecDestroy(&diagV));
2251     PetscCall(VecRestoreArrayWrite(v, &diagA));
2252     PetscFunctionReturn(0);
2253   } else if (n == 0) {
2254     if (m) {
2255       PetscCall(VecGetArrayWrite(v, &a));
2256       for (r = 0; r < m; r++) {
2257         a[r] = 0.0;
2258         if (idx) idx[r] = -1;
2259       }
2260       PetscCall(VecRestoreArrayWrite(v, &a));
2261     }
2262     PetscFunctionReturn(0);
2263   }
2264 
2265   PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx));
2266   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2267   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2268   PetscCall(MatGetRowMinAbs(mat->A, diagV, diagIdx));
2269 
2270   /* Get offdiagIdx[] for implicit 0.0 */
2271   PetscCall(MatSeqAIJGetArrayRead(B, &bav));
2272   ba = bav;
2273   bi = b->i;
2274   bj = b->j;
2275   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2276   for (r = 0; r < m; r++) {
2277     ncols = bi[r + 1] - bi[r];
2278     if (ncols == A->cmap->N - n) { /* Brow is dense */
2279       offdiagA[r]   = *ba;
2280       offdiagIdx[r] = cmap[0];
2281     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2282       offdiagA[r] = 0.0;
2283 
2284       /* Find first hole in the cmap */
2285       for (j = 0; j < ncols; j++) {
2286         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2287         if (col > j && j < cstart) {
2288           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2289           break;
2290         } else if (col > j + n && j >= cstart) {
2291           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2292           break;
2293         }
2294       }
2295       if (j == ncols && ncols < A->cmap->N - n) {
2296         /* a hole is outside compressed Bcols */
2297         if (ncols == 0) {
2298           if (cstart) {
2299             offdiagIdx[r] = 0;
2300           } else offdiagIdx[r] = cend;
2301         } else { /* ncols > 0 */
2302           offdiagIdx[r] = cmap[ncols - 1] + 1;
2303           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2304         }
2305       }
2306     }
2307 
2308     for (j = 0; j < ncols; j++) {
2309       if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) {
2310         offdiagA[r]   = *ba;
2311         offdiagIdx[r] = cmap[*bj];
2312       }
2313       ba++;
2314       bj++;
2315     }
2316   }
2317 
2318   PetscCall(VecGetArrayWrite(v, &a));
2319   PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA));
2320   for (r = 0; r < m; ++r) {
2321     if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) {
2322       a[r] = diagA[r];
2323       if (idx) idx[r] = cstart + diagIdx[r];
2324     } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) {
2325       a[r] = diagA[r];
2326       if (idx) {
2327         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2328           idx[r] = cstart + diagIdx[r];
2329         } else idx[r] = offdiagIdx[r];
2330       }
2331     } else {
2332       a[r] = offdiagA[r];
2333       if (idx) idx[r] = offdiagIdx[r];
2334     }
2335   }
2336   PetscCall(MatSeqAIJRestoreArrayRead(B, &bav));
2337   PetscCall(VecRestoreArrayWrite(v, &a));
2338   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA));
2339   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
2340   PetscCall(VecDestroy(&diagV));
2341   PetscCall(VecDestroy(&offdiagV));
2342   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2343   PetscFunctionReturn(0);
2344 }
2345 
2346 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2347 {
2348   Mat_MPIAIJ        *mat = (Mat_MPIAIJ *)A->data;
2349   PetscInt           m = A->rmap->n, n = A->cmap->n;
2350   PetscInt           cstart = A->cmap->rstart, cend = A->cmap->rend;
2351   PetscInt          *cmap = mat->garray;
2352   PetscInt          *diagIdx, *offdiagIdx;
2353   Vec                diagV, offdiagV;
2354   PetscScalar       *a, *diagA, *offdiagA;
2355   const PetscScalar *ba, *bav;
2356   PetscInt           r, j, col, ncols, *bi, *bj;
2357   Mat                B = mat->B;
2358   Mat_SeqAIJ        *b = (Mat_SeqAIJ *)B->data;
2359 
2360   PetscFunctionBegin;
2361   /* When a process holds entire A and other processes have no entry */
2362   if (A->cmap->N == n) {
2363     PetscCall(VecGetArrayWrite(v, &diagA));
2364     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV));
2365     PetscCall(MatGetRowMin(mat->A, diagV, idx));
2366     PetscCall(VecDestroy(&diagV));
2367     PetscCall(VecRestoreArrayWrite(v, &diagA));
2368     PetscFunctionReturn(0);
2369   } else if (n == 0) {
2370     if (m) {
2371       PetscCall(VecGetArrayWrite(v, &a));
2372       for (r = 0; r < m; r++) {
2373         a[r] = PETSC_MAX_REAL;
2374         if (idx) idx[r] = -1;
2375       }
2376       PetscCall(VecRestoreArrayWrite(v, &a));
2377     }
2378     PetscFunctionReturn(0);
2379   }
2380 
2381   PetscCall(PetscCalloc2(m, &diagIdx, m, &offdiagIdx));
2382   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2383   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2384   PetscCall(MatGetRowMin(mat->A, diagV, diagIdx));
2385 
2386   /* Get offdiagIdx[] for implicit 0.0 */
2387   PetscCall(MatSeqAIJGetArrayRead(B, &bav));
2388   ba = bav;
2389   bi = b->i;
2390   bj = b->j;
2391   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2392   for (r = 0; r < m; r++) {
2393     ncols = bi[r + 1] - bi[r];
2394     if (ncols == A->cmap->N - n) { /* Brow is dense */
2395       offdiagA[r]   = *ba;
2396       offdiagIdx[r] = cmap[0];
2397     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2398       offdiagA[r] = 0.0;
2399 
2400       /* Find first hole in the cmap */
2401       for (j = 0; j < ncols; j++) {
2402         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2403         if (col > j && j < cstart) {
2404           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2405           break;
2406         } else if (col > j + n && j >= cstart) {
2407           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2408           break;
2409         }
2410       }
2411       if (j == ncols && ncols < A->cmap->N - n) {
2412         /* a hole is outside compressed Bcols */
2413         if (ncols == 0) {
2414           if (cstart) {
2415             offdiagIdx[r] = 0;
2416           } else offdiagIdx[r] = cend;
2417         } else { /* ncols > 0 */
2418           offdiagIdx[r] = cmap[ncols - 1] + 1;
2419           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2420         }
2421       }
2422     }
2423 
2424     for (j = 0; j < ncols; j++) {
2425       if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) {
2426         offdiagA[r]   = *ba;
2427         offdiagIdx[r] = cmap[*bj];
2428       }
2429       ba++;
2430       bj++;
2431     }
2432   }
2433 
2434   PetscCall(VecGetArrayWrite(v, &a));
2435   PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA));
2436   for (r = 0; r < m; ++r) {
2437     if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) {
2438       a[r] = diagA[r];
2439       if (idx) idx[r] = cstart + diagIdx[r];
2440     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2441       a[r] = diagA[r];
2442       if (idx) {
2443         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2444           idx[r] = cstart + diagIdx[r];
2445         } else idx[r] = offdiagIdx[r];
2446       }
2447     } else {
2448       a[r] = offdiagA[r];
2449       if (idx) idx[r] = offdiagIdx[r];
2450     }
2451   }
2452   PetscCall(MatSeqAIJRestoreArrayRead(B, &bav));
2453   PetscCall(VecRestoreArrayWrite(v, &a));
2454   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA));
2455   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
2456   PetscCall(VecDestroy(&diagV));
2457   PetscCall(VecDestroy(&offdiagV));
2458   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2459   PetscFunctionReturn(0);
2460 }
2461 
2462 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2463 {
2464   Mat_MPIAIJ        *mat = (Mat_MPIAIJ *)A->data;
2465   PetscInt           m = A->rmap->n, n = A->cmap->n;
2466   PetscInt           cstart = A->cmap->rstart, cend = A->cmap->rend;
2467   PetscInt          *cmap = mat->garray;
2468   PetscInt          *diagIdx, *offdiagIdx;
2469   Vec                diagV, offdiagV;
2470   PetscScalar       *a, *diagA, *offdiagA;
2471   const PetscScalar *ba, *bav;
2472   PetscInt           r, j, col, ncols, *bi, *bj;
2473   Mat                B = mat->B;
2474   Mat_SeqAIJ        *b = (Mat_SeqAIJ *)B->data;
2475 
2476   PetscFunctionBegin;
2477   /* When a process holds entire A and other processes have no entry */
2478   if (A->cmap->N == n) {
2479     PetscCall(VecGetArrayWrite(v, &diagA));
2480     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV));
2481     PetscCall(MatGetRowMax(mat->A, diagV, idx));
2482     PetscCall(VecDestroy(&diagV));
2483     PetscCall(VecRestoreArrayWrite(v, &diagA));
2484     PetscFunctionReturn(0);
2485   } else if (n == 0) {
2486     if (m) {
2487       PetscCall(VecGetArrayWrite(v, &a));
2488       for (r = 0; r < m; r++) {
2489         a[r] = PETSC_MIN_REAL;
2490         if (idx) idx[r] = -1;
2491       }
2492       PetscCall(VecRestoreArrayWrite(v, &a));
2493     }
2494     PetscFunctionReturn(0);
2495   }
2496 
2497   PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx));
2498   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2499   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2500   PetscCall(MatGetRowMax(mat->A, diagV, diagIdx));
2501 
2502   /* Get offdiagIdx[] for implicit 0.0 */
2503   PetscCall(MatSeqAIJGetArrayRead(B, &bav));
2504   ba = bav;
2505   bi = b->i;
2506   bj = b->j;
2507   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2508   for (r = 0; r < m; r++) {
2509     ncols = bi[r + 1] - bi[r];
2510     if (ncols == A->cmap->N - n) { /* Brow is dense */
2511       offdiagA[r]   = *ba;
2512       offdiagIdx[r] = cmap[0];
2513     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2514       offdiagA[r] = 0.0;
2515 
2516       /* Find first hole in the cmap */
2517       for (j = 0; j < ncols; j++) {
2518         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2519         if (col > j && j < cstart) {
2520           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2521           break;
2522         } else if (col > j + n && j >= cstart) {
2523           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2524           break;
2525         }
2526       }
2527       if (j == ncols && ncols < A->cmap->N - n) {
2528         /* a hole is outside compressed Bcols */
2529         if (ncols == 0) {
2530           if (cstart) {
2531             offdiagIdx[r] = 0;
2532           } else offdiagIdx[r] = cend;
2533         } else { /* ncols > 0 */
2534           offdiagIdx[r] = cmap[ncols - 1] + 1;
2535           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2536         }
2537       }
2538     }
2539 
2540     for (j = 0; j < ncols; j++) {
2541       if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) {
2542         offdiagA[r]   = *ba;
2543         offdiagIdx[r] = cmap[*bj];
2544       }
2545       ba++;
2546       bj++;
2547     }
2548   }
2549 
2550   PetscCall(VecGetArrayWrite(v, &a));
2551   PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA));
2552   for (r = 0; r < m; ++r) {
2553     if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) {
2554       a[r] = diagA[r];
2555       if (idx) idx[r] = cstart + diagIdx[r];
2556     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2557       a[r] = diagA[r];
2558       if (idx) {
2559         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2560           idx[r] = cstart + diagIdx[r];
2561         } else idx[r] = offdiagIdx[r];
2562       }
2563     } else {
2564       a[r] = offdiagA[r];
2565       if (idx) idx[r] = offdiagIdx[r];
2566     }
2567   }
2568   PetscCall(MatSeqAIJRestoreArrayRead(B, &bav));
2569   PetscCall(VecRestoreArrayWrite(v, &a));
2570   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA));
2571   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
2572   PetscCall(VecDestroy(&diagV));
2573   PetscCall(VecDestroy(&offdiagV));
2574   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2575   PetscFunctionReturn(0);
2576 }
2577 
2578 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat, Mat *newmat)
2579 {
2580   Mat *dummy;
2581 
2582   PetscFunctionBegin;
2583   PetscCall(MatCreateSubMatrix_MPIAIJ_All(mat, MAT_DO_NOT_GET_VALUES, MAT_INITIAL_MATRIX, &dummy));
2584   *newmat = *dummy;
2585   PetscCall(PetscFree(dummy));
2586   PetscFunctionReturn(0);
2587 }
2588 
2589 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A, const PetscScalar **values)
2590 {
2591   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2592 
2593   PetscFunctionBegin;
2594   PetscCall(MatInvertBlockDiagonal(a->A, values));
2595   A->factorerrortype = a->A->factorerrortype;
2596   PetscFunctionReturn(0);
2597 }
2598 
2599 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x, PetscRandom rctx)
2600 {
2601   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)x->data;
2602 
2603   PetscFunctionBegin;
2604   PetscCheck(x->assembled || x->preallocated, PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed");
2605   PetscCall(MatSetRandom(aij->A, rctx));
2606   if (x->assembled) {
2607     PetscCall(MatSetRandom(aij->B, rctx));
2608   } else {
2609     PetscCall(MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B, x->cmap->rstart, x->cmap->rend, rctx));
2610   }
2611   PetscCall(MatAssemblyBegin(x, MAT_FINAL_ASSEMBLY));
2612   PetscCall(MatAssemblyEnd(x, MAT_FINAL_ASSEMBLY));
2613   PetscFunctionReturn(0);
2614 }
2615 
2616 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A, PetscBool sc)
2617 {
2618   PetscFunctionBegin;
2619   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2620   else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ;
2621   PetscFunctionReturn(0);
2622 }
2623 
2624 /*@
2625    MatMPIAIJGetNumberNonzeros - gets the number of nonzeros in the matrix on this MPI rank
2626 
2627    Not collective
2628 
2629    Input Parameter:
2630 .    A - the matrix
2631 
2632    Output Parameter:
2633 .    nz - the number of nonzeros
2634 
2635  Level: advanced
2636 
2637 .seealso: `MATMPIAIJ`, `Mat`
2638 @*/
2639 PetscErrorCode MatMPIAIJGetNumberNonzeros(Mat A, PetscCount *nz)
2640 {
2641   Mat_MPIAIJ *maij = (Mat_MPIAIJ *)A->data;
2642   Mat_SeqAIJ *aaij = (Mat_SeqAIJ *)maij->A->data, *baij = (Mat_SeqAIJ *)maij->B->data;
2643 
2644   PetscFunctionBegin;
2645   *nz = aaij->i[A->rmap->n] + baij->i[A->rmap->n];
2646   PetscFunctionReturn(0);
2647 }
2648 
2649 /*@
2650    MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2651 
2652    Collective
2653 
2654    Input Parameters:
2655 +    A - the matrix
2656 -    sc - `PETSC_TRUE` indicates use the scalable algorithm (default is not to use the scalable algorithm)
2657 
2658  Level: advanced
2659 
2660 @*/
2661 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A, PetscBool sc)
2662 {
2663   PetscFunctionBegin;
2664   PetscTryMethod(A, "MatMPIAIJSetUseScalableIncreaseOverlap_C", (Mat, PetscBool), (A, sc));
2665   PetscFunctionReturn(0);
2666 }
2667 
2668 PetscErrorCode MatSetFromOptions_MPIAIJ(Mat A, PetscOptionItems *PetscOptionsObject)
2669 {
2670   PetscBool sc = PETSC_FALSE, flg;
2671 
2672   PetscFunctionBegin;
2673   PetscOptionsHeadBegin(PetscOptionsObject, "MPIAIJ options");
2674   if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2675   PetscCall(PetscOptionsBool("-mat_increase_overlap_scalable", "Use a scalable algorithm to compute the overlap", "MatIncreaseOverlap", sc, &sc, &flg));
2676   if (flg) PetscCall(MatMPIAIJSetUseScalableIncreaseOverlap(A, sc));
2677   PetscOptionsHeadEnd();
2678   PetscFunctionReturn(0);
2679 }
2680 
2681 PetscErrorCode MatShift_MPIAIJ(Mat Y, PetscScalar a)
2682 {
2683   Mat_MPIAIJ *maij = (Mat_MPIAIJ *)Y->data;
2684   Mat_SeqAIJ *aij  = (Mat_SeqAIJ *)maij->A->data;
2685 
2686   PetscFunctionBegin;
2687   if (!Y->preallocated) {
2688     PetscCall(MatMPIAIJSetPreallocation(Y, 1, NULL, 0, NULL));
2689   } else if (!aij->nz) { /* It does not matter if diagonals of Y only partially lie in maij->A. We just need an estimated preallocation. */
2690     PetscInt nonew = aij->nonew;
2691     PetscCall(MatSeqAIJSetPreallocation(maij->A, 1, NULL));
2692     aij->nonew = nonew;
2693   }
2694   PetscCall(MatShift_Basic(Y, a));
2695   PetscFunctionReturn(0);
2696 }
2697 
2698 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A, PetscBool *missing, PetscInt *d)
2699 {
2700   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2701 
2702   PetscFunctionBegin;
2703   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices");
2704   PetscCall(MatMissingDiagonal(a->A, missing, d));
2705   if (d) {
2706     PetscInt rstart;
2707     PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
2708     *d += rstart;
2709   }
2710   PetscFunctionReturn(0);
2711 }
2712 
2713 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A, PetscInt nblocks, const PetscInt *bsizes, PetscScalar *diag)
2714 {
2715   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2716 
2717   PetscFunctionBegin;
2718   PetscCall(MatInvertVariableBlockDiagonal(a->A, nblocks, bsizes, diag));
2719   PetscFunctionReturn(0);
2720 }
2721 
2722 PetscErrorCode MatEliminateZeros_MPIAIJ(Mat A)
2723 {
2724   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2725 
2726   PetscFunctionBegin;
2727   PetscCall(MatEliminateZeros(a->A));
2728   PetscCall(MatEliminateZeros(a->B));
2729   PetscFunctionReturn(0);
2730 }
2731 
2732 /* -------------------------------------------------------------------*/
2733 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2734                                        MatGetRow_MPIAIJ,
2735                                        MatRestoreRow_MPIAIJ,
2736                                        MatMult_MPIAIJ,
2737                                        /* 4*/ MatMultAdd_MPIAIJ,
2738                                        MatMultTranspose_MPIAIJ,
2739                                        MatMultTransposeAdd_MPIAIJ,
2740                                        NULL,
2741                                        NULL,
2742                                        NULL,
2743                                        /*10*/ NULL,
2744                                        NULL,
2745                                        NULL,
2746                                        MatSOR_MPIAIJ,
2747                                        MatTranspose_MPIAIJ,
2748                                        /*15*/ MatGetInfo_MPIAIJ,
2749                                        MatEqual_MPIAIJ,
2750                                        MatGetDiagonal_MPIAIJ,
2751                                        MatDiagonalScale_MPIAIJ,
2752                                        MatNorm_MPIAIJ,
2753                                        /*20*/ MatAssemblyBegin_MPIAIJ,
2754                                        MatAssemblyEnd_MPIAIJ,
2755                                        MatSetOption_MPIAIJ,
2756                                        MatZeroEntries_MPIAIJ,
2757                                        /*24*/ MatZeroRows_MPIAIJ,
2758                                        NULL,
2759                                        NULL,
2760                                        NULL,
2761                                        NULL,
2762                                        /*29*/ MatSetUp_MPIAIJ,
2763                                        NULL,
2764                                        NULL,
2765                                        MatGetDiagonalBlock_MPIAIJ,
2766                                        NULL,
2767                                        /*34*/ MatDuplicate_MPIAIJ,
2768                                        NULL,
2769                                        NULL,
2770                                        NULL,
2771                                        NULL,
2772                                        /*39*/ MatAXPY_MPIAIJ,
2773                                        MatCreateSubMatrices_MPIAIJ,
2774                                        MatIncreaseOverlap_MPIAIJ,
2775                                        MatGetValues_MPIAIJ,
2776                                        MatCopy_MPIAIJ,
2777                                        /*44*/ MatGetRowMax_MPIAIJ,
2778                                        MatScale_MPIAIJ,
2779                                        MatShift_MPIAIJ,
2780                                        MatDiagonalSet_MPIAIJ,
2781                                        MatZeroRowsColumns_MPIAIJ,
2782                                        /*49*/ MatSetRandom_MPIAIJ,
2783                                        MatGetRowIJ_MPIAIJ,
2784                                        MatRestoreRowIJ_MPIAIJ,
2785                                        NULL,
2786                                        NULL,
2787                                        /*54*/ MatFDColoringCreate_MPIXAIJ,
2788                                        NULL,
2789                                        MatSetUnfactored_MPIAIJ,
2790                                        MatPermute_MPIAIJ,
2791                                        NULL,
2792                                        /*59*/ MatCreateSubMatrix_MPIAIJ,
2793                                        MatDestroy_MPIAIJ,
2794                                        MatView_MPIAIJ,
2795                                        NULL,
2796                                        NULL,
2797                                        /*64*/ NULL,
2798                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2799                                        NULL,
2800                                        NULL,
2801                                        NULL,
2802                                        /*69*/ MatGetRowMaxAbs_MPIAIJ,
2803                                        MatGetRowMinAbs_MPIAIJ,
2804                                        NULL,
2805                                        NULL,
2806                                        NULL,
2807                                        NULL,
2808                                        /*75*/ MatFDColoringApply_AIJ,
2809                                        MatSetFromOptions_MPIAIJ,
2810                                        NULL,
2811                                        NULL,
2812                                        MatFindZeroDiagonals_MPIAIJ,
2813                                        /*80*/ NULL,
2814                                        NULL,
2815                                        NULL,
2816                                        /*83*/ MatLoad_MPIAIJ,
2817                                        MatIsSymmetric_MPIAIJ,
2818                                        NULL,
2819                                        NULL,
2820                                        NULL,
2821                                        NULL,
2822                                        /*89*/ NULL,
2823                                        NULL,
2824                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2825                                        NULL,
2826                                        NULL,
2827                                        /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2828                                        NULL,
2829                                        NULL,
2830                                        NULL,
2831                                        MatBindToCPU_MPIAIJ,
2832                                        /*99*/ MatProductSetFromOptions_MPIAIJ,
2833                                        NULL,
2834                                        NULL,
2835                                        MatConjugate_MPIAIJ,
2836                                        NULL,
2837                                        /*104*/ MatSetValuesRow_MPIAIJ,
2838                                        MatRealPart_MPIAIJ,
2839                                        MatImaginaryPart_MPIAIJ,
2840                                        NULL,
2841                                        NULL,
2842                                        /*109*/ NULL,
2843                                        NULL,
2844                                        MatGetRowMin_MPIAIJ,
2845                                        NULL,
2846                                        MatMissingDiagonal_MPIAIJ,
2847                                        /*114*/ MatGetSeqNonzeroStructure_MPIAIJ,
2848                                        NULL,
2849                                        MatGetGhosts_MPIAIJ,
2850                                        NULL,
2851                                        NULL,
2852                                        /*119*/ MatMultDiagonalBlock_MPIAIJ,
2853                                        NULL,
2854                                        NULL,
2855                                        NULL,
2856                                        MatGetMultiProcBlock_MPIAIJ,
2857                                        /*124*/ MatFindNonzeroRows_MPIAIJ,
2858                                        MatGetColumnReductions_MPIAIJ,
2859                                        MatInvertBlockDiagonal_MPIAIJ,
2860                                        MatInvertVariableBlockDiagonal_MPIAIJ,
2861                                        MatCreateSubMatricesMPI_MPIAIJ,
2862                                        /*129*/ NULL,
2863                                        NULL,
2864                                        NULL,
2865                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2866                                        NULL,
2867                                        /*134*/ NULL,
2868                                        NULL,
2869                                        NULL,
2870                                        NULL,
2871                                        NULL,
2872                                        /*139*/ MatSetBlockSizes_MPIAIJ,
2873                                        NULL,
2874                                        NULL,
2875                                        MatFDColoringSetUp_MPIXAIJ,
2876                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2877                                        MatCreateMPIMatConcatenateSeqMat_MPIAIJ,
2878                                        /*145*/ NULL,
2879                                        NULL,
2880                                        NULL,
2881                                        MatCreateGraph_Simple_AIJ,
2882                                        NULL,
2883                                        /*150*/ NULL,
2884                                        MatEliminateZeros_MPIAIJ};
2885 
2886 /* ----------------------------------------------------------------------------------------*/
2887 
2888 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat)
2889 {
2890   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
2891 
2892   PetscFunctionBegin;
2893   PetscCall(MatStoreValues(aij->A));
2894   PetscCall(MatStoreValues(aij->B));
2895   PetscFunctionReturn(0);
2896 }
2897 
2898 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat)
2899 {
2900   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
2901 
2902   PetscFunctionBegin;
2903   PetscCall(MatRetrieveValues(aij->A));
2904   PetscCall(MatRetrieveValues(aij->B));
2905   PetscFunctionReturn(0);
2906 }
2907 
2908 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[])
2909 {
2910   Mat_MPIAIJ *b;
2911   PetscMPIInt size;
2912 
2913   PetscFunctionBegin;
2914   PetscCall(PetscLayoutSetUp(B->rmap));
2915   PetscCall(PetscLayoutSetUp(B->cmap));
2916   b = (Mat_MPIAIJ *)B->data;
2917 
2918 #if defined(PETSC_USE_CTABLE)
2919   PetscCall(PetscHMapIDestroy(&b->colmap));
2920 #else
2921   PetscCall(PetscFree(b->colmap));
2922 #endif
2923   PetscCall(PetscFree(b->garray));
2924   PetscCall(VecDestroy(&b->lvec));
2925   PetscCall(VecScatterDestroy(&b->Mvctx));
2926 
2927   /* Because the B will have been resized we simply destroy it and create a new one each time */
2928   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
2929   PetscCall(MatDestroy(&b->B));
2930   PetscCall(MatCreate(PETSC_COMM_SELF, &b->B));
2931   PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0));
2932   PetscCall(MatSetBlockSizesFromMats(b->B, B, B));
2933   PetscCall(MatSetType(b->B, MATSEQAIJ));
2934 
2935   if (!B->preallocated) {
2936     PetscCall(MatCreate(PETSC_COMM_SELF, &b->A));
2937     PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n));
2938     PetscCall(MatSetBlockSizesFromMats(b->A, B, B));
2939     PetscCall(MatSetType(b->A, MATSEQAIJ));
2940   }
2941 
2942   PetscCall(MatSeqAIJSetPreallocation(b->A, d_nz, d_nnz));
2943   PetscCall(MatSeqAIJSetPreallocation(b->B, o_nz, o_nnz));
2944   B->preallocated  = PETSC_TRUE;
2945   B->was_assembled = PETSC_FALSE;
2946   B->assembled     = PETSC_FALSE;
2947   PetscFunctionReturn(0);
2948 }
2949 
2950 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B)
2951 {
2952   Mat_MPIAIJ *b;
2953 
2954   PetscFunctionBegin;
2955   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
2956   PetscCall(PetscLayoutSetUp(B->rmap));
2957   PetscCall(PetscLayoutSetUp(B->cmap));
2958   b = (Mat_MPIAIJ *)B->data;
2959 
2960 #if defined(PETSC_USE_CTABLE)
2961   PetscCall(PetscHMapIDestroy(&b->colmap));
2962 #else
2963   PetscCall(PetscFree(b->colmap));
2964 #endif
2965   PetscCall(PetscFree(b->garray));
2966   PetscCall(VecDestroy(&b->lvec));
2967   PetscCall(VecScatterDestroy(&b->Mvctx));
2968 
2969   PetscCall(MatResetPreallocation(b->A));
2970   PetscCall(MatResetPreallocation(b->B));
2971   B->preallocated  = PETSC_TRUE;
2972   B->was_assembled = PETSC_FALSE;
2973   B->assembled     = PETSC_FALSE;
2974   PetscFunctionReturn(0);
2975 }
2976 
2977 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat)
2978 {
2979   Mat         mat;
2980   Mat_MPIAIJ *a, *oldmat = (Mat_MPIAIJ *)matin->data;
2981 
2982   PetscFunctionBegin;
2983   *newmat = NULL;
2984   PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat));
2985   PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N));
2986   PetscCall(MatSetBlockSizesFromMats(mat, matin, matin));
2987   PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name));
2988   a = (Mat_MPIAIJ *)mat->data;
2989 
2990   mat->factortype   = matin->factortype;
2991   mat->assembled    = matin->assembled;
2992   mat->insertmode   = NOT_SET_VALUES;
2993   mat->preallocated = matin->preallocated;
2994 
2995   a->size         = oldmat->size;
2996   a->rank         = oldmat->rank;
2997   a->donotstash   = oldmat->donotstash;
2998   a->roworiented  = oldmat->roworiented;
2999   a->rowindices   = NULL;
3000   a->rowvalues    = NULL;
3001   a->getrowactive = PETSC_FALSE;
3002 
3003   PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap));
3004   PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap));
3005 
3006   if (oldmat->colmap) {
3007 #if defined(PETSC_USE_CTABLE)
3008     PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap));
3009 #else
3010     PetscCall(PetscMalloc1(mat->cmap->N, &a->colmap));
3011     PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, mat->cmap->N));
3012 #endif
3013   } else a->colmap = NULL;
3014   if (oldmat->garray) {
3015     PetscInt len;
3016     len = oldmat->B->cmap->n;
3017     PetscCall(PetscMalloc1(len + 1, &a->garray));
3018     if (len) PetscCall(PetscArraycpy(a->garray, oldmat->garray, len));
3019   } else a->garray = NULL;
3020 
3021   /* It may happen MatDuplicate is called with a non-assembled matrix
3022      In fact, MatDuplicate only requires the matrix to be preallocated
3023      This may happen inside a DMCreateMatrix_Shell */
3024   if (oldmat->lvec) { PetscCall(VecDuplicate(oldmat->lvec, &a->lvec)); }
3025   if (oldmat->Mvctx) { PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx)); }
3026   PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A));
3027   PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B));
3028   PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist));
3029   *newmat = mat;
3030   PetscFunctionReturn(0);
3031 }
3032 
3033 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3034 {
3035   PetscBool isbinary, ishdf5;
3036 
3037   PetscFunctionBegin;
3038   PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1);
3039   PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2);
3040   /* force binary viewer to load .info file if it has not yet done so */
3041   PetscCall(PetscViewerSetUp(viewer));
3042   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
3043   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5));
3044   if (isbinary) {
3045     PetscCall(MatLoad_MPIAIJ_Binary(newMat, viewer));
3046   } else if (ishdf5) {
3047 #if defined(PETSC_HAVE_HDF5)
3048     PetscCall(MatLoad_AIJ_HDF5(newMat, viewer));
3049 #else
3050     SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5");
3051 #endif
3052   } else {
3053     SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name);
3054   }
3055   PetscFunctionReturn(0);
3056 }
3057 
3058 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer)
3059 {
3060   PetscInt     header[4], M, N, m, nz, rows, cols, sum, i;
3061   PetscInt    *rowidxs, *colidxs;
3062   PetscScalar *matvals;
3063 
3064   PetscFunctionBegin;
3065   PetscCall(PetscViewerSetUp(viewer));
3066 
3067   /* read in matrix header */
3068   PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT));
3069   PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file");
3070   M  = header[1];
3071   N  = header[2];
3072   nz = header[3];
3073   PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M);
3074   PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N);
3075   PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as MPIAIJ");
3076 
3077   /* set block sizes from the viewer's .info file */
3078   PetscCall(MatLoad_Binary_BlockSizes(mat, viewer));
3079   /* set global sizes if not set already */
3080   if (mat->rmap->N < 0) mat->rmap->N = M;
3081   if (mat->cmap->N < 0) mat->cmap->N = N;
3082   PetscCall(PetscLayoutSetUp(mat->rmap));
3083   PetscCall(PetscLayoutSetUp(mat->cmap));
3084 
3085   /* check if the matrix sizes are correct */
3086   PetscCall(MatGetSize(mat, &rows, &cols));
3087   PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols);
3088 
3089   /* read in row lengths and build row indices */
3090   PetscCall(MatGetLocalSize(mat, &m, NULL));
3091   PetscCall(PetscMalloc1(m + 1, &rowidxs));
3092   PetscCall(PetscViewerBinaryReadAll(viewer, rowidxs + 1, m, PETSC_DECIDE, M, PETSC_INT));
3093   rowidxs[0] = 0;
3094   for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i];
3095   PetscCall(MPIU_Allreduce(&rowidxs[m], &sum, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)viewer)));
3096   PetscCheck(sum == nz, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum);
3097   /* read in column indices and matrix values */
3098   PetscCall(PetscMalloc2(rowidxs[m], &colidxs, rowidxs[m], &matvals));
3099   PetscCall(PetscViewerBinaryReadAll(viewer, colidxs, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT));
3100   PetscCall(PetscViewerBinaryReadAll(viewer, matvals, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR));
3101   /* store matrix indices and values */
3102   PetscCall(MatMPIAIJSetPreallocationCSR(mat, rowidxs, colidxs, matvals));
3103   PetscCall(PetscFree(rowidxs));
3104   PetscCall(PetscFree2(colidxs, matvals));
3105   PetscFunctionReturn(0);
3106 }
3107 
3108 /* Not scalable because of ISAllGather() unless getting all columns. */
3109 PetscErrorCode ISGetSeqIS_Private(Mat mat, IS iscol, IS *isseq)
3110 {
3111   IS          iscol_local;
3112   PetscBool   isstride;
3113   PetscMPIInt lisstride = 0, gisstride;
3114 
3115   PetscFunctionBegin;
3116   /* check if we are grabbing all columns*/
3117   PetscCall(PetscObjectTypeCompare((PetscObject)iscol, ISSTRIDE, &isstride));
3118 
3119   if (isstride) {
3120     PetscInt start, len, mstart, mlen;
3121     PetscCall(ISStrideGetInfo(iscol, &start, NULL));
3122     PetscCall(ISGetLocalSize(iscol, &len));
3123     PetscCall(MatGetOwnershipRangeColumn(mat, &mstart, &mlen));
3124     if (mstart == start && mlen - mstart == len) lisstride = 1;
3125   }
3126 
3127   PetscCall(MPIU_Allreduce(&lisstride, &gisstride, 1, MPI_INT, MPI_MIN, PetscObjectComm((PetscObject)mat)));
3128   if (gisstride) {
3129     PetscInt N;
3130     PetscCall(MatGetSize(mat, NULL, &N));
3131     PetscCall(ISCreateStride(PETSC_COMM_SELF, N, 0, 1, &iscol_local));
3132     PetscCall(ISSetIdentity(iscol_local));
3133     PetscCall(PetscInfo(mat, "Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n"));
3134   } else {
3135     PetscInt cbs;
3136     PetscCall(ISGetBlockSize(iscol, &cbs));
3137     PetscCall(ISAllGather(iscol, &iscol_local));
3138     PetscCall(ISSetBlockSize(iscol_local, cbs));
3139   }
3140 
3141   *isseq = iscol_local;
3142   PetscFunctionReturn(0);
3143 }
3144 
3145 /*
3146  Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local
3147  (see MatCreateSubMatrix_MPIAIJ_nonscalable)
3148 
3149  Input Parameters:
3150    mat - matrix
3151    isrow - parallel row index set; its local indices are a subset of local columns of mat,
3152            i.e., mat->rstart <= isrow[i] < mat->rend
3153    iscol - parallel column index set; its local indices are a subset of local columns of mat,
3154            i.e., mat->cstart <= iscol[i] < mat->cend
3155  Output Parameter:
3156    isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A
3157    iscol_o - sequential column index set for retrieving mat->B
3158    garray - column map; garray[i] indicates global location of iscol_o[i] in iscol
3159  */
3160 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat, IS isrow, IS iscol, IS *isrow_d, IS *iscol_d, IS *iscol_o, const PetscInt *garray[])
3161 {
3162   Vec             x, cmap;
3163   const PetscInt *is_idx;
3164   PetscScalar    *xarray, *cmaparray;
3165   PetscInt        ncols, isstart, *idx, m, rstart, *cmap1, count;
3166   Mat_MPIAIJ     *a    = (Mat_MPIAIJ *)mat->data;
3167   Mat             B    = a->B;
3168   Vec             lvec = a->lvec, lcmap;
3169   PetscInt        i, cstart, cend, Bn = B->cmap->N;
3170   MPI_Comm        comm;
3171   VecScatter      Mvctx = a->Mvctx;
3172 
3173   PetscFunctionBegin;
3174   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3175   PetscCall(ISGetLocalSize(iscol, &ncols));
3176 
3177   /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */
3178   PetscCall(MatCreateVecs(mat, &x, NULL));
3179   PetscCall(VecSet(x, -1.0));
3180   PetscCall(VecDuplicate(x, &cmap));
3181   PetscCall(VecSet(cmap, -1.0));
3182 
3183   /* Get start indices */
3184   PetscCallMPI(MPI_Scan(&ncols, &isstart, 1, MPIU_INT, MPI_SUM, comm));
3185   isstart -= ncols;
3186   PetscCall(MatGetOwnershipRangeColumn(mat, &cstart, &cend));
3187 
3188   PetscCall(ISGetIndices(iscol, &is_idx));
3189   PetscCall(VecGetArray(x, &xarray));
3190   PetscCall(VecGetArray(cmap, &cmaparray));
3191   PetscCall(PetscMalloc1(ncols, &idx));
3192   for (i = 0; i < ncols; i++) {
3193     xarray[is_idx[i] - cstart]    = (PetscScalar)is_idx[i];
3194     cmaparray[is_idx[i] - cstart] = i + isstart;        /* global index of iscol[i] */
3195     idx[i]                        = is_idx[i] - cstart; /* local index of iscol[i]  */
3196   }
3197   PetscCall(VecRestoreArray(x, &xarray));
3198   PetscCall(VecRestoreArray(cmap, &cmaparray));
3199   PetscCall(ISRestoreIndices(iscol, &is_idx));
3200 
3201   /* Get iscol_d */
3202   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, iscol_d));
3203   PetscCall(ISGetBlockSize(iscol, &i));
3204   PetscCall(ISSetBlockSize(*iscol_d, i));
3205 
3206   /* Get isrow_d */
3207   PetscCall(ISGetLocalSize(isrow, &m));
3208   rstart = mat->rmap->rstart;
3209   PetscCall(PetscMalloc1(m, &idx));
3210   PetscCall(ISGetIndices(isrow, &is_idx));
3211   for (i = 0; i < m; i++) idx[i] = is_idx[i] - rstart;
3212   PetscCall(ISRestoreIndices(isrow, &is_idx));
3213 
3214   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, m, idx, PETSC_OWN_POINTER, isrow_d));
3215   PetscCall(ISGetBlockSize(isrow, &i));
3216   PetscCall(ISSetBlockSize(*isrow_d, i));
3217 
3218   /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */
3219   PetscCall(VecScatterBegin(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD));
3220   PetscCall(VecScatterEnd(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD));
3221 
3222   PetscCall(VecDuplicate(lvec, &lcmap));
3223 
3224   PetscCall(VecScatterBegin(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD));
3225   PetscCall(VecScatterEnd(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD));
3226 
3227   /* (3) create sequential iscol_o (a subset of iscol) and isgarray */
3228   /* off-process column indices */
3229   count = 0;
3230   PetscCall(PetscMalloc1(Bn, &idx));
3231   PetscCall(PetscMalloc1(Bn, &cmap1));
3232 
3233   PetscCall(VecGetArray(lvec, &xarray));
3234   PetscCall(VecGetArray(lcmap, &cmaparray));
3235   for (i = 0; i < Bn; i++) {
3236     if (PetscRealPart(xarray[i]) > -1.0) {
3237       idx[count]   = i;                                     /* local column index in off-diagonal part B */
3238       cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */
3239       count++;
3240     }
3241   }
3242   PetscCall(VecRestoreArray(lvec, &xarray));
3243   PetscCall(VecRestoreArray(lcmap, &cmaparray));
3244 
3245   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_COPY_VALUES, iscol_o));
3246   /* cannot ensure iscol_o has same blocksize as iscol! */
3247 
3248   PetscCall(PetscFree(idx));
3249   *garray = cmap1;
3250 
3251   PetscCall(VecDestroy(&x));
3252   PetscCall(VecDestroy(&cmap));
3253   PetscCall(VecDestroy(&lcmap));
3254   PetscFunctionReturn(0);
3255 }
3256 
3257 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */
3258 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *submat)
3259 {
3260   Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data, *asub;
3261   Mat         M = NULL;
3262   MPI_Comm    comm;
3263   IS          iscol_d, isrow_d, iscol_o;
3264   Mat         Asub = NULL, Bsub = NULL;
3265   PetscInt    n;
3266 
3267   PetscFunctionBegin;
3268   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3269 
3270   if (call == MAT_REUSE_MATRIX) {
3271     /* Retrieve isrow_d, iscol_d and iscol_o from submat */
3272     PetscCall(PetscObjectQuery((PetscObject)*submat, "isrow_d", (PetscObject *)&isrow_d));
3273     PetscCheck(isrow_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "isrow_d passed in was not used before, cannot reuse");
3274 
3275     PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_d", (PetscObject *)&iscol_d));
3276     PetscCheck(iscol_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_d passed in was not used before, cannot reuse");
3277 
3278     PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_o", (PetscObject *)&iscol_o));
3279     PetscCheck(iscol_o, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_o passed in was not used before, cannot reuse");
3280 
3281     /* Update diagonal and off-diagonal portions of submat */
3282     asub = (Mat_MPIAIJ *)(*submat)->data;
3283     PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->A));
3284     PetscCall(ISGetLocalSize(iscol_o, &n));
3285     if (n) PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->B));
3286     PetscCall(MatAssemblyBegin(*submat, MAT_FINAL_ASSEMBLY));
3287     PetscCall(MatAssemblyEnd(*submat, MAT_FINAL_ASSEMBLY));
3288 
3289   } else { /* call == MAT_INITIAL_MATRIX) */
3290     const PetscInt *garray;
3291     PetscInt        BsubN;
3292 
3293     /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */
3294     PetscCall(ISGetSeqIS_SameColDist_Private(mat, isrow, iscol, &isrow_d, &iscol_d, &iscol_o, &garray));
3295 
3296     /* Create local submatrices Asub and Bsub */
3297     PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Asub));
3298     PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Bsub));
3299 
3300     /* Create submatrix M */
3301     PetscCall(MatCreateMPIAIJWithSeqAIJ(comm, Asub, Bsub, garray, &M));
3302 
3303     /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */
3304     asub = (Mat_MPIAIJ *)M->data;
3305 
3306     PetscCall(ISGetLocalSize(iscol_o, &BsubN));
3307     n = asub->B->cmap->N;
3308     if (BsubN > n) {
3309       /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */
3310       const PetscInt *idx;
3311       PetscInt        i, j, *idx_new, *subgarray = asub->garray;
3312       PetscCall(PetscInfo(M, "submatrix Bn %" PetscInt_FMT " != BsubN %" PetscInt_FMT ", update iscol_o\n", n, BsubN));
3313 
3314       PetscCall(PetscMalloc1(n, &idx_new));
3315       j = 0;
3316       PetscCall(ISGetIndices(iscol_o, &idx));
3317       for (i = 0; i < n; i++) {
3318         if (j >= BsubN) break;
3319         while (subgarray[i] > garray[j]) j++;
3320 
3321         if (subgarray[i] == garray[j]) {
3322           idx_new[i] = idx[j++];
3323         } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "subgarray[%" PetscInt_FMT "]=%" PetscInt_FMT " cannot < garray[%" PetscInt_FMT "]=%" PetscInt_FMT, i, subgarray[i], j, garray[j]);
3324       }
3325       PetscCall(ISRestoreIndices(iscol_o, &idx));
3326 
3327       PetscCall(ISDestroy(&iscol_o));
3328       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx_new, PETSC_OWN_POINTER, &iscol_o));
3329 
3330     } else if (BsubN < n) {
3331       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Columns of Bsub (%" PetscInt_FMT ") cannot be smaller than B's (%" PetscInt_FMT ")", BsubN, asub->B->cmap->N);
3332     }
3333 
3334     PetscCall(PetscFree(garray));
3335     *submat = M;
3336 
3337     /* Save isrow_d, iscol_d and iscol_o used in processor for next request */
3338     PetscCall(PetscObjectCompose((PetscObject)M, "isrow_d", (PetscObject)isrow_d));
3339     PetscCall(ISDestroy(&isrow_d));
3340 
3341     PetscCall(PetscObjectCompose((PetscObject)M, "iscol_d", (PetscObject)iscol_d));
3342     PetscCall(ISDestroy(&iscol_d));
3343 
3344     PetscCall(PetscObjectCompose((PetscObject)M, "iscol_o", (PetscObject)iscol_o));
3345     PetscCall(ISDestroy(&iscol_o));
3346   }
3347   PetscFunctionReturn(0);
3348 }
3349 
3350 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat)
3351 {
3352   IS        iscol_local = NULL, isrow_d;
3353   PetscInt  csize;
3354   PetscInt  n, i, j, start, end;
3355   PetscBool sameRowDist = PETSC_FALSE, sameDist[2], tsameDist[2];
3356   MPI_Comm  comm;
3357 
3358   PetscFunctionBegin;
3359   /* If isrow has same processor distribution as mat,
3360      call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */
3361   if (call == MAT_REUSE_MATRIX) {
3362     PetscCall(PetscObjectQuery((PetscObject)*newmat, "isrow_d", (PetscObject *)&isrow_d));
3363     if (isrow_d) {
3364       sameRowDist  = PETSC_TRUE;
3365       tsameDist[1] = PETSC_TRUE; /* sameColDist */
3366     } else {
3367       PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_local));
3368       if (iscol_local) {
3369         sameRowDist  = PETSC_TRUE;
3370         tsameDist[1] = PETSC_FALSE; /* !sameColDist */
3371       }
3372     }
3373   } else {
3374     /* Check if isrow has same processor distribution as mat */
3375     sameDist[0] = PETSC_FALSE;
3376     PetscCall(ISGetLocalSize(isrow, &n));
3377     if (!n) {
3378       sameDist[0] = PETSC_TRUE;
3379     } else {
3380       PetscCall(ISGetMinMax(isrow, &i, &j));
3381       PetscCall(MatGetOwnershipRange(mat, &start, &end));
3382       if (i >= start && j < end) sameDist[0] = PETSC_TRUE;
3383     }
3384 
3385     /* Check if iscol has same processor distribution as mat */
3386     sameDist[1] = PETSC_FALSE;
3387     PetscCall(ISGetLocalSize(iscol, &n));
3388     if (!n) {
3389       sameDist[1] = PETSC_TRUE;
3390     } else {
3391       PetscCall(ISGetMinMax(iscol, &i, &j));
3392       PetscCall(MatGetOwnershipRangeColumn(mat, &start, &end));
3393       if (i >= start && j < end) sameDist[1] = PETSC_TRUE;
3394     }
3395 
3396     PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3397     PetscCall(MPIU_Allreduce(&sameDist, &tsameDist, 2, MPIU_BOOL, MPI_LAND, comm));
3398     sameRowDist = tsameDist[0];
3399   }
3400 
3401   if (sameRowDist) {
3402     if (tsameDist[1]) { /* sameRowDist & sameColDist */
3403       /* isrow and iscol have same processor distribution as mat */
3404       PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat, isrow, iscol, call, newmat));
3405       PetscFunctionReturn(0);
3406     } else { /* sameRowDist */
3407       /* isrow has same processor distribution as mat */
3408       if (call == MAT_INITIAL_MATRIX) {
3409         PetscBool sorted;
3410         PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local));
3411         PetscCall(ISGetLocalSize(iscol_local, &n)); /* local size of iscol_local = global columns of newmat */
3412         PetscCall(ISGetSize(iscol, &i));
3413         PetscCheck(n == i, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "n %" PetscInt_FMT " != size of iscol %" PetscInt_FMT, n, i);
3414 
3415         PetscCall(ISSorted(iscol_local, &sorted));
3416         if (sorted) {
3417           /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */
3418           PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, iscol_local, MAT_INITIAL_MATRIX, newmat));
3419           PetscFunctionReturn(0);
3420         }
3421       } else { /* call == MAT_REUSE_MATRIX */
3422         IS iscol_sub;
3423         PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub));
3424         if (iscol_sub) {
3425           PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, NULL, call, newmat));
3426           PetscFunctionReturn(0);
3427         }
3428       }
3429     }
3430   }
3431 
3432   /* General case: iscol -> iscol_local which has global size of iscol */
3433   if (call == MAT_REUSE_MATRIX) {
3434     PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local));
3435     PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
3436   } else {
3437     if (!iscol_local) PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local));
3438   }
3439 
3440   PetscCall(ISGetLocalSize(iscol, &csize));
3441   PetscCall(MatCreateSubMatrix_MPIAIJ_nonscalable(mat, isrow, iscol_local, csize, call, newmat));
3442 
3443   if (call == MAT_INITIAL_MATRIX) {
3444     PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local));
3445     PetscCall(ISDestroy(&iscol_local));
3446   }
3447   PetscFunctionReturn(0);
3448 }
3449 
3450 /*@C
3451      MatCreateMPIAIJWithSeqAIJ - creates a `MATMPIAIJ` matrix using `MATSEQAIJ` matrices that contain the "diagonal"
3452          and "off-diagonal" part of the matrix in CSR format.
3453 
3454    Collective
3455 
3456    Input Parameters:
3457 +  comm - MPI communicator
3458 .  A - "diagonal" portion of matrix
3459 .  B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine
3460 -  garray - global index of B columns
3461 
3462    Output Parameter:
3463 .   mat - the matrix, with input A as its local diagonal matrix
3464    Level: advanced
3465 
3466    Notes:
3467    See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix.
3468 
3469    A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore.
3470 
3471 .seealso: `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateMPIAIJWithSplitArrays()`
3472 @*/
3473 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm, Mat A, Mat B, const PetscInt garray[], Mat *mat)
3474 {
3475   Mat_MPIAIJ        *maij;
3476   Mat_SeqAIJ        *b  = (Mat_SeqAIJ *)B->data, *bnew;
3477   PetscInt          *oi = b->i, *oj = b->j, i, nz, col;
3478   const PetscScalar *oa;
3479   Mat                Bnew;
3480   PetscInt           m, n, N;
3481   MatType            mpi_mat_type;
3482 
3483   PetscFunctionBegin;
3484   PetscCall(MatCreate(comm, mat));
3485   PetscCall(MatGetSize(A, &m, &n));
3486   PetscCheck(m == B->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Am %" PetscInt_FMT " != Bm %" PetscInt_FMT, m, B->rmap->N);
3487   PetscCheck(A->rmap->bs == B->rmap->bs, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "A row bs %" PetscInt_FMT " != B row bs %" PetscInt_FMT, A->rmap->bs, B->rmap->bs);
3488   /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */
3489   /* PetscCheck(A->cmap->bs == B->cmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %" PetscInt_FMT " != B column bs %" PetscInt_FMT,A->cmap->bs,B->cmap->bs); */
3490 
3491   /* Get global columns of mat */
3492   PetscCall(MPIU_Allreduce(&n, &N, 1, MPIU_INT, MPI_SUM, comm));
3493 
3494   PetscCall(MatSetSizes(*mat, m, n, PETSC_DECIDE, N));
3495   /* Determine the type of MPI matrix that should be created from the type of matrix A, which holds the "diagonal" portion. */
3496   PetscCall(MatGetMPIMatType_Private(A, &mpi_mat_type));
3497   PetscCall(MatSetType(*mat, mpi_mat_type));
3498 
3499   PetscCall(MatSetBlockSizes(*mat, A->rmap->bs, A->cmap->bs));
3500   maij = (Mat_MPIAIJ *)(*mat)->data;
3501 
3502   (*mat)->preallocated = PETSC_TRUE;
3503 
3504   PetscCall(PetscLayoutSetUp((*mat)->rmap));
3505   PetscCall(PetscLayoutSetUp((*mat)->cmap));
3506 
3507   /* Set A as diagonal portion of *mat */
3508   maij->A = A;
3509 
3510   nz = oi[m];
3511   for (i = 0; i < nz; i++) {
3512     col   = oj[i];
3513     oj[i] = garray[col];
3514   }
3515 
3516   /* Set Bnew as off-diagonal portion of *mat */
3517   PetscCall(MatSeqAIJGetArrayRead(B, &oa));
3518   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, N, oi, oj, (PetscScalar *)oa, &Bnew));
3519   PetscCall(MatSeqAIJRestoreArrayRead(B, &oa));
3520   bnew        = (Mat_SeqAIJ *)Bnew->data;
3521   bnew->maxnz = b->maxnz; /* allocated nonzeros of B */
3522   maij->B     = Bnew;
3523 
3524   PetscCheck(B->rmap->N == Bnew->rmap->N, PETSC_COMM_SELF, PETSC_ERR_PLIB, "BN %" PetscInt_FMT " != BnewN %" PetscInt_FMT, B->rmap->N, Bnew->rmap->N);
3525 
3526   b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */
3527   b->free_a       = PETSC_FALSE;
3528   b->free_ij      = PETSC_FALSE;
3529   PetscCall(MatDestroy(&B));
3530 
3531   bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */
3532   bnew->free_a       = PETSC_TRUE;
3533   bnew->free_ij      = PETSC_TRUE;
3534 
3535   /* condense columns of maij->B */
3536   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
3537   PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY));
3538   PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY));
3539   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE));
3540   PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
3541   PetscFunctionReturn(0);
3542 }
3543 
3544 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat, PetscInt, const IS[], const IS[], MatReuse, PetscBool, Mat *);
3545 
3546 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat, IS isrow, IS iscol, IS iscol_local, MatReuse call, Mat *newmat)
3547 {
3548   PetscInt        i, m, n, rstart, row, rend, nz, j, bs, cbs;
3549   PetscInt       *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal;
3550   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)mat->data;
3551   Mat             M, Msub, B = a->B;
3552   MatScalar      *aa;
3553   Mat_SeqAIJ     *aij;
3554   PetscInt       *garray = a->garray, *colsub, Ncols;
3555   PetscInt        count, Bn = B->cmap->N, cstart = mat->cmap->rstart, cend = mat->cmap->rend;
3556   IS              iscol_sub, iscmap;
3557   const PetscInt *is_idx, *cmap;
3558   PetscBool       allcolumns = PETSC_FALSE;
3559   MPI_Comm        comm;
3560 
3561   PetscFunctionBegin;
3562   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3563   if (call == MAT_REUSE_MATRIX) {
3564     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub));
3565     PetscCheck(iscol_sub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "SubIScol passed in was not used before, cannot reuse");
3566     PetscCall(ISGetLocalSize(iscol_sub, &count));
3567 
3568     PetscCall(PetscObjectQuery((PetscObject)*newmat, "Subcmap", (PetscObject *)&iscmap));
3569     PetscCheck(iscmap, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Subcmap passed in was not used before, cannot reuse");
3570 
3571     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Msub));
3572     PetscCheck(Msub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
3573 
3574     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_REUSE_MATRIX, PETSC_FALSE, &Msub));
3575 
3576   } else { /* call == MAT_INITIAL_MATRIX) */
3577     PetscBool flg;
3578 
3579     PetscCall(ISGetLocalSize(iscol, &n));
3580     PetscCall(ISGetSize(iscol, &Ncols));
3581 
3582     /* (1) iscol -> nonscalable iscol_local */
3583     /* Check for special case: each processor gets entire matrix columns */
3584     PetscCall(ISIdentity(iscol_local, &flg));
3585     if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3586     PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
3587     if (allcolumns) {
3588       iscol_sub = iscol_local;
3589       PetscCall(PetscObjectReference((PetscObject)iscol_local));
3590       PetscCall(ISCreateStride(PETSC_COMM_SELF, n, 0, 1, &iscmap));
3591 
3592     } else {
3593       /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */
3594       PetscInt *idx, *cmap1, k;
3595       PetscCall(PetscMalloc1(Ncols, &idx));
3596       PetscCall(PetscMalloc1(Ncols, &cmap1));
3597       PetscCall(ISGetIndices(iscol_local, &is_idx));
3598       count = 0;
3599       k     = 0;
3600       for (i = 0; i < Ncols; i++) {
3601         j = is_idx[i];
3602         if (j >= cstart && j < cend) {
3603           /* diagonal part of mat */
3604           idx[count]     = j;
3605           cmap1[count++] = i; /* column index in submat */
3606         } else if (Bn) {
3607           /* off-diagonal part of mat */
3608           if (j == garray[k]) {
3609             idx[count]     = j;
3610             cmap1[count++] = i; /* column index in submat */
3611           } else if (j > garray[k]) {
3612             while (j > garray[k] && k < Bn - 1) k++;
3613             if (j == garray[k]) {
3614               idx[count]     = j;
3615               cmap1[count++] = i; /* column index in submat */
3616             }
3617           }
3618         }
3619       }
3620       PetscCall(ISRestoreIndices(iscol_local, &is_idx));
3621 
3622       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_OWN_POINTER, &iscol_sub));
3623       PetscCall(ISGetBlockSize(iscol, &cbs));
3624       PetscCall(ISSetBlockSize(iscol_sub, cbs));
3625 
3626       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local), count, cmap1, PETSC_OWN_POINTER, &iscmap));
3627     }
3628 
3629     /* (3) Create sequential Msub */
3630     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_INITIAL_MATRIX, allcolumns, &Msub));
3631   }
3632 
3633   PetscCall(ISGetLocalSize(iscol_sub, &count));
3634   aij = (Mat_SeqAIJ *)(Msub)->data;
3635   ii  = aij->i;
3636   PetscCall(ISGetIndices(iscmap, &cmap));
3637 
3638   /*
3639       m - number of local rows
3640       Ncols - number of columns (same on all processors)
3641       rstart - first row in new global matrix generated
3642   */
3643   PetscCall(MatGetSize(Msub, &m, NULL));
3644 
3645   if (call == MAT_INITIAL_MATRIX) {
3646     /* (4) Create parallel newmat */
3647     PetscMPIInt rank, size;
3648     PetscInt    csize;
3649 
3650     PetscCallMPI(MPI_Comm_size(comm, &size));
3651     PetscCallMPI(MPI_Comm_rank(comm, &rank));
3652 
3653     /*
3654         Determine the number of non-zeros in the diagonal and off-diagonal
3655         portions of the matrix in order to do correct preallocation
3656     */
3657 
3658     /* first get start and end of "diagonal" columns */
3659     PetscCall(ISGetLocalSize(iscol, &csize));
3660     if (csize == PETSC_DECIDE) {
3661       PetscCall(ISGetSize(isrow, &mglobal));
3662       if (mglobal == Ncols) { /* square matrix */
3663         nlocal = m;
3664       } else {
3665         nlocal = Ncols / size + ((Ncols % size) > rank);
3666       }
3667     } else {
3668       nlocal = csize;
3669     }
3670     PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm));
3671     rstart = rend - nlocal;
3672     PetscCheck(rank != size - 1 || rend == Ncols, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, Ncols);
3673 
3674     /* next, compute all the lengths */
3675     jj = aij->j;
3676     PetscCall(PetscMalloc1(2 * m + 1, &dlens));
3677     olens = dlens + m;
3678     for (i = 0; i < m; i++) {
3679       jend = ii[i + 1] - ii[i];
3680       olen = 0;
3681       dlen = 0;
3682       for (j = 0; j < jend; j++) {
3683         if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++;
3684         else dlen++;
3685         jj++;
3686       }
3687       olens[i] = olen;
3688       dlens[i] = dlen;
3689     }
3690 
3691     PetscCall(ISGetBlockSize(isrow, &bs));
3692     PetscCall(ISGetBlockSize(iscol, &cbs));
3693 
3694     PetscCall(MatCreate(comm, &M));
3695     PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, Ncols));
3696     PetscCall(MatSetBlockSizes(M, bs, cbs));
3697     PetscCall(MatSetType(M, ((PetscObject)mat)->type_name));
3698     PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens));
3699     PetscCall(PetscFree(dlens));
3700 
3701   } else { /* call == MAT_REUSE_MATRIX */
3702     M = *newmat;
3703     PetscCall(MatGetLocalSize(M, &i, NULL));
3704     PetscCheck(i == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request");
3705     PetscCall(MatZeroEntries(M));
3706     /*
3707          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3708        rather than the slower MatSetValues().
3709     */
3710     M->was_assembled = PETSC_TRUE;
3711     M->assembled     = PETSC_FALSE;
3712   }
3713 
3714   /* (5) Set values of Msub to *newmat */
3715   PetscCall(PetscMalloc1(count, &colsub));
3716   PetscCall(MatGetOwnershipRange(M, &rstart, NULL));
3717 
3718   jj = aij->j;
3719   PetscCall(MatSeqAIJGetArrayRead(Msub, (const PetscScalar **)&aa));
3720   for (i = 0; i < m; i++) {
3721     row = rstart + i;
3722     nz  = ii[i + 1] - ii[i];
3723     for (j = 0; j < nz; j++) colsub[j] = cmap[jj[j]];
3724     PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, colsub, aa, INSERT_VALUES));
3725     jj += nz;
3726     aa += nz;
3727   }
3728   PetscCall(MatSeqAIJRestoreArrayRead(Msub, (const PetscScalar **)&aa));
3729   PetscCall(ISRestoreIndices(iscmap, &cmap));
3730 
3731   PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY));
3732   PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY));
3733 
3734   PetscCall(PetscFree(colsub));
3735 
3736   /* save Msub, iscol_sub and iscmap used in processor for next request */
3737   if (call == MAT_INITIAL_MATRIX) {
3738     *newmat = M;
3739     PetscCall(PetscObjectCompose((PetscObject)(*newmat), "SubMatrix", (PetscObject)Msub));
3740     PetscCall(MatDestroy(&Msub));
3741 
3742     PetscCall(PetscObjectCompose((PetscObject)(*newmat), "SubIScol", (PetscObject)iscol_sub));
3743     PetscCall(ISDestroy(&iscol_sub));
3744 
3745     PetscCall(PetscObjectCompose((PetscObject)(*newmat), "Subcmap", (PetscObject)iscmap));
3746     PetscCall(ISDestroy(&iscmap));
3747 
3748     if (iscol_local) {
3749       PetscCall(PetscObjectCompose((PetscObject)(*newmat), "ISAllGather", (PetscObject)iscol_local));
3750       PetscCall(ISDestroy(&iscol_local));
3751     }
3752   }
3753   PetscFunctionReturn(0);
3754 }
3755 
3756 /*
3757     Not great since it makes two copies of the submatrix, first an SeqAIJ
3758   in local and then by concatenating the local matrices the end result.
3759   Writing it directly would be much like MatCreateSubMatrices_MPIAIJ()
3760 
3761   This requires a sequential iscol with all indices.
3762 */
3763 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat, IS isrow, IS iscol, PetscInt csize, MatReuse call, Mat *newmat)
3764 {
3765   PetscMPIInt rank, size;
3766   PetscInt    i, m, n, rstart, row, rend, nz, *cwork, j, bs, cbs;
3767   PetscInt   *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal;
3768   Mat         M, Mreuse;
3769   MatScalar  *aa, *vwork;
3770   MPI_Comm    comm;
3771   Mat_SeqAIJ *aij;
3772   PetscBool   colflag, allcolumns = PETSC_FALSE;
3773 
3774   PetscFunctionBegin;
3775   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3776   PetscCallMPI(MPI_Comm_rank(comm, &rank));
3777   PetscCallMPI(MPI_Comm_size(comm, &size));
3778 
3779   /* Check for special case: each processor gets entire matrix columns */
3780   PetscCall(ISIdentity(iscol, &colflag));
3781   PetscCall(ISGetLocalSize(iscol, &n));
3782   if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3783   PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
3784 
3785   if (call == MAT_REUSE_MATRIX) {
3786     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Mreuse));
3787     PetscCheck(Mreuse, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
3788     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_REUSE_MATRIX, allcolumns, &Mreuse));
3789   } else {
3790     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_INITIAL_MATRIX, allcolumns, &Mreuse));
3791   }
3792 
3793   /*
3794       m - number of local rows
3795       n - number of columns (same on all processors)
3796       rstart - first row in new global matrix generated
3797   */
3798   PetscCall(MatGetSize(Mreuse, &m, &n));
3799   PetscCall(MatGetBlockSizes(Mreuse, &bs, &cbs));
3800   if (call == MAT_INITIAL_MATRIX) {
3801     aij = (Mat_SeqAIJ *)(Mreuse)->data;
3802     ii  = aij->i;
3803     jj  = aij->j;
3804 
3805     /*
3806         Determine the number of non-zeros in the diagonal and off-diagonal
3807         portions of the matrix in order to do correct preallocation
3808     */
3809 
3810     /* first get start and end of "diagonal" columns */
3811     if (csize == PETSC_DECIDE) {
3812       PetscCall(ISGetSize(isrow, &mglobal));
3813       if (mglobal == n) { /* square matrix */
3814         nlocal = m;
3815       } else {
3816         nlocal = n / size + ((n % size) > rank);
3817       }
3818     } else {
3819       nlocal = csize;
3820     }
3821     PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm));
3822     rstart = rend - nlocal;
3823     PetscCheck(rank != size - 1 || rend == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, n);
3824 
3825     /* next, compute all the lengths */
3826     PetscCall(PetscMalloc1(2 * m + 1, &dlens));
3827     olens = dlens + m;
3828     for (i = 0; i < m; i++) {
3829       jend = ii[i + 1] - ii[i];
3830       olen = 0;
3831       dlen = 0;
3832       for (j = 0; j < jend; j++) {
3833         if (*jj < rstart || *jj >= rend) olen++;
3834         else dlen++;
3835         jj++;
3836       }
3837       olens[i] = olen;
3838       dlens[i] = dlen;
3839     }
3840     PetscCall(MatCreate(comm, &M));
3841     PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, n));
3842     PetscCall(MatSetBlockSizes(M, bs, cbs));
3843     PetscCall(MatSetType(M, ((PetscObject)mat)->type_name));
3844     PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens));
3845     PetscCall(PetscFree(dlens));
3846   } else {
3847     PetscInt ml, nl;
3848 
3849     M = *newmat;
3850     PetscCall(MatGetLocalSize(M, &ml, &nl));
3851     PetscCheck(ml == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request");
3852     PetscCall(MatZeroEntries(M));
3853     /*
3854          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3855        rather than the slower MatSetValues().
3856     */
3857     M->was_assembled = PETSC_TRUE;
3858     M->assembled     = PETSC_FALSE;
3859   }
3860   PetscCall(MatGetOwnershipRange(M, &rstart, &rend));
3861   aij = (Mat_SeqAIJ *)(Mreuse)->data;
3862   ii  = aij->i;
3863   jj  = aij->j;
3864 
3865   /* trigger copy to CPU if needed */
3866   PetscCall(MatSeqAIJGetArrayRead(Mreuse, (const PetscScalar **)&aa));
3867   for (i = 0; i < m; i++) {
3868     row   = rstart + i;
3869     nz    = ii[i + 1] - ii[i];
3870     cwork = jj;
3871     jj += nz;
3872     vwork = aa;
3873     aa += nz;
3874     PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, cwork, vwork, INSERT_VALUES));
3875   }
3876   PetscCall(MatSeqAIJRestoreArrayRead(Mreuse, (const PetscScalar **)&aa));
3877 
3878   PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY));
3879   PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY));
3880   *newmat = M;
3881 
3882   /* save submatrix used in processor for next request */
3883   if (call == MAT_INITIAL_MATRIX) {
3884     PetscCall(PetscObjectCompose((PetscObject)M, "SubMatrix", (PetscObject)Mreuse));
3885     PetscCall(MatDestroy(&Mreuse));
3886   }
3887   PetscFunctionReturn(0);
3888 }
3889 
3890 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[])
3891 {
3892   PetscInt        m, cstart, cend, j, nnz, i, d, *ld;
3893   PetscInt       *d_nnz, *o_nnz, nnz_max = 0, rstart, ii;
3894   const PetscInt *JJ;
3895   PetscBool       nooffprocentries;
3896   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *)B->data;
3897 
3898   PetscFunctionBegin;
3899   PetscCheck(Ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Ii[0] must be 0 it is %" PetscInt_FMT, Ii[0]);
3900 
3901   PetscCall(PetscLayoutSetUp(B->rmap));
3902   PetscCall(PetscLayoutSetUp(B->cmap));
3903   m      = B->rmap->n;
3904   cstart = B->cmap->rstart;
3905   cend   = B->cmap->rend;
3906   rstart = B->rmap->rstart;
3907 
3908   PetscCall(PetscCalloc2(m, &d_nnz, m, &o_nnz));
3909 
3910   if (PetscDefined(USE_DEBUG)) {
3911     for (i = 0; i < m; i++) {
3912       nnz = Ii[i + 1] - Ii[i];
3913       JJ  = J + Ii[i];
3914       PetscCheck(nnz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative %" PetscInt_FMT " number of columns", i, nnz);
3915       PetscCheck(!nnz || !(JJ[0] < 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " starts with negative column index %" PetscInt_FMT, i, JJ[0]);
3916       PetscCheck(!nnz || !(JJ[nnz - 1] >= B->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " ends with too large a column index %" PetscInt_FMT " (max allowed %" PetscInt_FMT ")", i, JJ[nnz - 1], B->cmap->N);
3917     }
3918   }
3919 
3920   for (i = 0; i < m; i++) {
3921     nnz     = Ii[i + 1] - Ii[i];
3922     JJ      = J + Ii[i];
3923     nnz_max = PetscMax(nnz_max, nnz);
3924     d       = 0;
3925     for (j = 0; j < nnz; j++) {
3926       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3927     }
3928     d_nnz[i] = d;
3929     o_nnz[i] = nnz - d;
3930   }
3931   PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz));
3932   PetscCall(PetscFree2(d_nnz, o_nnz));
3933 
3934   for (i = 0; i < m; i++) {
3935     ii = i + rstart;
3936     PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, Ii[i + 1] - Ii[i], J + Ii[i], v ? v + Ii[i] : NULL, INSERT_VALUES));
3937   }
3938   nooffprocentries    = B->nooffprocentries;
3939   B->nooffprocentries = PETSC_TRUE;
3940   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
3941   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
3942   B->nooffprocentries = nooffprocentries;
3943 
3944   /* count number of entries below block diagonal */
3945   PetscCall(PetscFree(Aij->ld));
3946   PetscCall(PetscCalloc1(m, &ld));
3947   Aij->ld = ld;
3948   for (i = 0; i < m; i++) {
3949     nnz = Ii[i + 1] - Ii[i];
3950     j   = 0;
3951     while (j < nnz && J[j] < cstart) j++;
3952     ld[i] = j;
3953     J += nnz;
3954   }
3955 
3956   PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
3957   PetscFunctionReturn(0);
3958 }
3959 
3960 /*@
3961    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in `MATAIJ` format
3962    (the default parallel PETSc format).
3963 
3964    Collective
3965 
3966    Input Parameters:
3967 +  B - the matrix
3968 .  i - the indices into j for the start of each local row (starts with zero)
3969 .  j - the column indices for each local row (starts with zero)
3970 -  v - optional values in the matrix
3971 
3972    Level: developer
3973 
3974    Notes:
3975        The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc;
3976      thus you CANNOT change the matrix entries by changing the values of v[] after you have
3977      called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays.
3978 
3979        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3980 
3981        The format which is used for the sparse matrix input, is equivalent to a
3982     row-major ordering.. i.e for the following matrix, the input data expected is
3983     as shown
3984 
3985 $        1 0 0
3986 $        2 0 3     P0
3987 $       -------
3988 $        4 5 6     P1
3989 $
3990 $     Process0 [P0]: rows_owned=[0,1]
3991 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
3992 $        j =  {0,0,2}  [size = 3]
3993 $        v =  {1,2,3}  [size = 3]
3994 $
3995 $     Process1 [P1]: rows_owned=[2]
3996 $        i =  {0,3}    [size = nrow+1  = 1+1]
3997 $        j =  {0,1,2}  [size = 3]
3998 $        v =  {4,5,6}  [size = 3]
3999 
4000 .seealso: `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatCreateAIJ()`, `MATMPIAIJ`,
4001           `MatCreateSeqAIJWithArrays()`, `MatCreateMPIAIJWithSplitArrays()`
4002 @*/
4003 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B, const PetscInt i[], const PetscInt j[], const PetscScalar v[])
4004 {
4005   PetscFunctionBegin;
4006   PetscTryMethod(B, "MatMPIAIJSetPreallocationCSR_C", (Mat, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, i, j, v));
4007   PetscFunctionReturn(0);
4008 }
4009 
4010 /*@C
4011    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in `MATMPIAIJ` format
4012    (the default parallel PETSc format).  For good matrix assembly performance
4013    the user should preallocate the matrix storage by setting the parameters
4014    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4015    performance can be increased by more than a factor of 50.
4016 
4017    Collective
4018 
4019    Input Parameters:
4020 +  B - the matrix
4021 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4022            (same value is used for all local rows)
4023 .  d_nnz - array containing the number of nonzeros in the various rows of the
4024            DIAGONAL portion of the local submatrix (possibly different for each row)
4025            or NULL (`PETSC_NULL_INTEGER` in Fortran), if d_nz is used to specify the nonzero structure.
4026            The size of this array is equal to the number of local rows, i.e 'm'.
4027            For matrices that will be factored, you must leave room for (and set)
4028            the diagonal entry even if it is zero.
4029 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4030            submatrix (same value is used for all local rows).
4031 -  o_nnz - array containing the number of nonzeros in the various rows of the
4032            OFF-DIAGONAL portion of the local submatrix (possibly different for
4033            each row) or NULL (`PETSC_NULL_INTEGER` in Fortran), if o_nz is used to specify the nonzero
4034            structure. The size of this array is equal to the number
4035            of local rows, i.e 'm'.
4036 
4037    If the *_nnz parameter is given then the *_nz parameter is ignored
4038 
4039    The `MATAIJ` format, also called compressed row storage (CSR)), is fully compatible with standard Fortran 77
4040    storage.  The stored row and column indices begin with zero.
4041    See [Sparse Matrices](sec_matsparse) for details.
4042 
4043    The parallel matrix is partitioned such that the first m0 rows belong to
4044    process 0, the next m1 rows belong to process 1, the next m2 rows belong
4045    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
4046 
4047    The DIAGONAL portion of the local submatrix of a processor can be defined
4048    as the submatrix which is obtained by extraction the part corresponding to
4049    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
4050    first row that belongs to the processor, r2 is the last row belonging to
4051    the this processor, and c1-c2 is range of indices of the local part of a
4052    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
4053    common case of a square matrix, the row and column ranges are the same and
4054    the DIAGONAL part is also square. The remaining portion of the local
4055    submatrix (mxN) constitute the OFF-DIAGONAL portion.
4056 
4057    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4058 
4059    You can call MatGetInfo() to get information on how effective the preallocation was;
4060    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
4061    You can also run with the option -info and look for messages with the string
4062    malloc in them to see if additional memory allocation was needed.
4063 
4064    Example usage:
4065 
4066    Consider the following 8x8 matrix with 34 non-zero values, that is
4067    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4068    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4069    as follows:
4070 
4071 .vb
4072             1  2  0  |  0  3  0  |  0  4
4073     Proc0   0  5  6  |  7  0  0  |  8  0
4074             9  0 10  | 11  0  0  | 12  0
4075     -------------------------------------
4076            13  0 14  | 15 16 17  |  0  0
4077     Proc1   0 18  0  | 19 20 21  |  0  0
4078             0  0  0  | 22 23  0  | 24  0
4079     -------------------------------------
4080     Proc2  25 26 27  |  0  0 28  | 29  0
4081            30  0  0  | 31 32 33  |  0 34
4082 .ve
4083 
4084    This can be represented as a collection of submatrices as:
4085 
4086 .vb
4087       A B C
4088       D E F
4089       G H I
4090 .ve
4091 
4092    Where the submatrices A,B,C are owned by proc0, D,E,F are
4093    owned by proc1, G,H,I are owned by proc2.
4094 
4095    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4096    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4097    The 'M','N' parameters are 8,8, and have the same values on all procs.
4098 
4099    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4100    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4101    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4102    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4103    part as `MATSEQAIJ` matrices. for eg: proc1 will store [E] as a SeqAIJ
4104    matrix, ans [DF] as another `MATSEQAIJ` matrix.
4105 
4106    When d_nz, o_nz parameters are specified, d_nz storage elements are
4107    allocated for every row of the local diagonal submatrix, and o_nz
4108    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4109    One way to choose d_nz and o_nz is to use the max nonzerors per local
4110    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4111    In this case, the values of d_nz,o_nz are:
4112 .vb
4113      proc0 : dnz = 2, o_nz = 2
4114      proc1 : dnz = 3, o_nz = 2
4115      proc2 : dnz = 1, o_nz = 4
4116 .ve
4117    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4118    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4119    for proc3. i.e we are using 12+15+10=37 storage locations to store
4120    34 values.
4121 
4122    When d_nnz, o_nnz parameters are specified, the storage is specified
4123    for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4124    In the above case the values for d_nnz,o_nnz are:
4125 .vb
4126      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4127      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4128      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4129 .ve
4130    Here the space allocated is sum of all the above values i.e 34, and
4131    hence pre-allocation is perfect.
4132 
4133    Level: intermediate
4134 
4135 .seealso: [Sparse Matrices](sec_matsparse), `MATMPIAIJ`, `MATAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateAIJ()`, `MatMPIAIJSetPreallocationCSR()`,
4136           `MATMPIAIJ`, `MatGetInfo()`, `PetscSplitOwnership()`
4137 @*/
4138 PetscErrorCode MatMPIAIJSetPreallocation(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[])
4139 {
4140   PetscFunctionBegin;
4141   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
4142   PetscValidType(B, 1);
4143   PetscTryMethod(B, "MatMPIAIJSetPreallocation_C", (Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, d_nz, d_nnz, o_nz, o_nnz));
4144   PetscFunctionReturn(0);
4145 }
4146 
4147 /*@
4148      MatCreateMPIAIJWithArrays - creates a `MATMPIAIJ` matrix using arrays that contain in standard
4149          CSR format for the local rows.
4150 
4151    Collective
4152 
4153    Input Parameters:
4154 +  comm - MPI communicator
4155 .  m - number of local rows (Cannot be `PETSC_DECIDE`)
4156 .  n - This value should be the same as the local size used in creating the
4157        x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have
4158        calculated if N is given) For square matrices n is almost always m.
4159 .  M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
4160 .  N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
4161 .   i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
4162 .   j - column indices
4163 -   a - optional matrix values
4164 
4165    Output Parameter:
4166 .   mat - the matrix
4167 
4168    Level: intermediate
4169 
4170    Notes:
4171        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4172      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4173      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4174 
4175        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4176 
4177        The format which is used for the sparse matrix input, is equivalent to a
4178     row-major ordering.. i.e for the following matrix, the input data expected is
4179     as shown
4180 
4181        Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays
4182 
4183 $        1 0 0
4184 $        2 0 3     P0
4185 $       -------
4186 $        4 5 6     P1
4187 $
4188 $     Process0 [P0]: rows_owned=[0,1]
4189 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
4190 $        j =  {0,0,2}  [size = 3]
4191 $        v =  {1,2,3}  [size = 3]
4192 $
4193 $     Process1 [P1]: rows_owned=[2]
4194 $        i =  {0,3}    [size = nrow+1  = 1+1]
4195 $        j =  {0,1,2}  [size = 3]
4196 $        v =  {4,5,6}  [size = 3]
4197 
4198 .seealso: `MATMPIAIK`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4199           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`
4200 @*/
4201 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat)
4202 {
4203   PetscFunctionBegin;
4204   PetscCheck(!i || !i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
4205   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
4206   PetscCall(MatCreate(comm, mat));
4207   PetscCall(MatSetSizes(*mat, m, n, M, N));
4208   /* PetscCall(MatSetBlockSizes(M,bs,cbs)); */
4209   PetscCall(MatSetType(*mat, MATMPIAIJ));
4210   PetscCall(MatMPIAIJSetPreallocationCSR(*mat, i, j, a));
4211   PetscFunctionReturn(0);
4212 }
4213 
4214 /*@
4215      MatUpdateMPIAIJWithArrays - updates a `MATMPIAIJ` matrix using arrays that contain in standard
4216          CSR format for the local rows. Only the numerical values are updated the other arrays must be identical to what was passed from `MatCreateMPIAIJWithArrays()`
4217 
4218      Deprecated: Use `MatUpdateMPIAIJWithArray()`
4219 
4220    Collective
4221 
4222    Input Parameters:
4223 +  mat - the matrix
4224 .  m - number of local rows (Cannot be `PETSC_DECIDE`)
4225 .  n - This value should be the same as the local size used in creating the
4226        x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have
4227        calculated if N is given) For square matrices n is almost always m.
4228 .  M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
4229 .  N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
4230 .  Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix
4231 .  J - column indices
4232 -  v - matrix values
4233 
4234    Level: intermediate
4235 
4236 .seealso: `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4237           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`, `MatUpdateMPIAIJWithArray()`
4238 @*/
4239 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[])
4240 {
4241   PetscInt        nnz, i;
4242   PetscBool       nooffprocentries;
4243   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *)mat->data;
4244   Mat_SeqAIJ     *Ad  = (Mat_SeqAIJ *)Aij->A->data;
4245   PetscScalar    *ad, *ao;
4246   PetscInt        ldi, Iii, md;
4247   const PetscInt *Adi = Ad->i;
4248   PetscInt       *ld  = Aij->ld;
4249 
4250   PetscFunctionBegin;
4251   PetscCheck(Ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
4252   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
4253   PetscCheck(m == mat->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()");
4254   PetscCheck(n == mat->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()");
4255 
4256   PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad));
4257   PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao));
4258 
4259   for (i = 0; i < m; i++) {
4260     nnz = Ii[i + 1] - Ii[i];
4261     Iii = Ii[i];
4262     ldi = ld[i];
4263     md  = Adi[i + 1] - Adi[i];
4264     PetscCall(PetscArraycpy(ao, v + Iii, ldi));
4265     PetscCall(PetscArraycpy(ad, v + Iii + ldi, md));
4266     PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md));
4267     ad += md;
4268     ao += nnz - md;
4269   }
4270   nooffprocentries      = mat->nooffprocentries;
4271   mat->nooffprocentries = PETSC_TRUE;
4272   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad));
4273   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao));
4274   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A));
4275   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B));
4276   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
4277   PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
4278   PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
4279   mat->nooffprocentries = nooffprocentries;
4280   PetscFunctionReturn(0);
4281 }
4282 
4283 /*@
4284      MatUpdateMPIAIJWithArray - updates an `MATMPIAIJ` matrix using an array that contains the nonzero values
4285 
4286    Collective
4287 
4288    Input Parameters:
4289 +  mat - the matrix
4290 -  v - matrix values, stored by row
4291 
4292    Level: intermediate
4293 
4294    Note:
4295    The matrix must have been obtained with `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()`
4296 
4297 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4298           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`, `MatUpdateMPIAIJWithArrays()`
4299 @*/
4300 PetscErrorCode MatUpdateMPIAIJWithArray(Mat mat, const PetscScalar v[])
4301 {
4302   PetscInt        nnz, i, m;
4303   PetscBool       nooffprocentries;
4304   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *)mat->data;
4305   Mat_SeqAIJ     *Ad  = (Mat_SeqAIJ *)Aij->A->data;
4306   Mat_SeqAIJ     *Ao  = (Mat_SeqAIJ *)Aij->B->data;
4307   PetscScalar    *ad, *ao;
4308   const PetscInt *Adi = Ad->i, *Adj = Ao->i;
4309   PetscInt        ldi, Iii, md;
4310   PetscInt       *ld = Aij->ld;
4311 
4312   PetscFunctionBegin;
4313   m = mat->rmap->n;
4314 
4315   PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad));
4316   PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao));
4317   Iii = 0;
4318   for (i = 0; i < m; i++) {
4319     nnz = Adi[i + 1] - Adi[i] + Adj[i + 1] - Adj[i];
4320     ldi = ld[i];
4321     md  = Adi[i + 1] - Adi[i];
4322     PetscCall(PetscArraycpy(ao, v + Iii, ldi));
4323     PetscCall(PetscArraycpy(ad, v + Iii + ldi, md));
4324     PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md));
4325     ad += md;
4326     ao += nnz - md;
4327     Iii += nnz;
4328   }
4329   nooffprocentries      = mat->nooffprocentries;
4330   mat->nooffprocentries = PETSC_TRUE;
4331   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad));
4332   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao));
4333   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A));
4334   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B));
4335   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
4336   PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
4337   PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
4338   mat->nooffprocentries = nooffprocentries;
4339   PetscFunctionReturn(0);
4340 }
4341 
4342 /*@C
4343    MatCreateAIJ - Creates a sparse parallel matrix in `MATAIJ` format
4344    (the default parallel PETSc format).  For good matrix assembly performance
4345    the user should preallocate the matrix storage by setting the parameters
4346    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4347    performance can be increased by more than a factor of 50.
4348 
4349    Collective
4350 
4351    Input Parameters:
4352 +  comm - MPI communicator
4353 .  m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given)
4354            This value should be the same as the local size used in creating the
4355            y vector for the matrix-vector product y = Ax.
4356 .  n - This value should be the same as the local size used in creating the
4357        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4358        calculated if N is given) For square matrices n is almost always m.
4359 .  M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
4360 .  N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
4361 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4362            (same value is used for all local rows)
4363 .  d_nnz - array containing the number of nonzeros in the various rows of the
4364            DIAGONAL portion of the local submatrix (possibly different for each row)
4365            or NULL, if d_nz is used to specify the nonzero structure.
4366            The size of this array is equal to the number of local rows, i.e 'm'.
4367 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4368            submatrix (same value is used for all local rows).
4369 -  o_nnz - array containing the number of nonzeros in the various rows of the
4370            OFF-DIAGONAL portion of the local submatrix (possibly different for
4371            each row) or NULL, if o_nz is used to specify the nonzero
4372            structure. The size of this array is equal to the number
4373            of local rows, i.e 'm'.
4374 
4375    Output Parameter:
4376 .  A - the matrix
4377 
4378    It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
4379    MatXXXXSetPreallocation() paradigm instead of this routine directly.
4380    [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`]
4381 
4382    Notes:
4383    If the *_nnz parameter is given then the *_nz parameter is ignored
4384 
4385    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4386    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4387    storage requirements for this matrix.
4388 
4389    If `PETSC_DECIDE` or  `PETSC_DETERMINE` is used for a particular argument on one
4390    processor than it must be used on all processors that share the object for
4391    that argument.
4392 
4393    The user MUST specify either the local or global matrix dimensions
4394    (possibly both).
4395 
4396    The parallel matrix is partitioned across processors such that the
4397    first m0 rows belong to process 0, the next m1 rows belong to
4398    process 1, the next m2 rows belong to process 2 etc.. where
4399    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4400    values corresponding to [m x N] submatrix.
4401 
4402    The columns are logically partitioned with the n0 columns belonging
4403    to 0th partition, the next n1 columns belonging to the next
4404    partition etc.. where n0,n1,n2... are the input parameter 'n'.
4405 
4406    The DIAGONAL portion of the local submatrix on any given processor
4407    is the submatrix corresponding to the rows and columns m,n
4408    corresponding to the given processor. i.e diagonal matrix on
4409    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4410    etc. The remaining portion of the local submatrix [m x (N-n)]
4411    constitute the OFF-DIAGONAL portion. The example below better
4412    illustrates this concept.
4413 
4414    For a square global matrix we define each processor's diagonal portion
4415    to be its local rows and the corresponding columns (a square submatrix);
4416    each processor's off-diagonal portion encompasses the remainder of the
4417    local matrix (a rectangular submatrix).
4418 
4419    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4420 
4421    When calling this routine with a single process communicator, a matrix of
4422    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4423    type of communicator, use the construction mechanism
4424 .vb
4425      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4426 .ve
4427 
4428 $     MatCreate(...,&A);
4429 $     MatSetType(A,MATMPIAIJ);
4430 $     MatSetSizes(A, m,n,M,N);
4431 $     MatMPIAIJSetPreallocation(A,...);
4432 
4433    By default, this format uses inodes (identical nodes) when possible.
4434    We search for consecutive rows with the same nonzero structure, thereby
4435    reusing matrix information to achieve increased efficiency.
4436 
4437    Options Database Keys:
4438 +  -mat_no_inode  - Do not use inodes
4439 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4440 -  -matmult_vecscatter_view <viewer> - View the vecscatter (i.e., communication pattern) used in `MatMult()` of sparse parallel matrices.
4441         See viewer types in manual of `MatView()`. Of them, ascii_matlab, draw or binary cause the vecscatter be viewed as a matrix.
4442         Entry (i,j) is the size of message (in bytes) rank i sends to rank j in one `MatMult()` call.
4443 
4444    Example usage:
4445 
4446    Consider the following 8x8 matrix with 34 non-zero values, that is
4447    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4448    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4449    as follows
4450 
4451 .vb
4452             1  2  0  |  0  3  0  |  0  4
4453     Proc0   0  5  6  |  7  0  0  |  8  0
4454             9  0 10  | 11  0  0  | 12  0
4455     -------------------------------------
4456            13  0 14  | 15 16 17  |  0  0
4457     Proc1   0 18  0  | 19 20 21  |  0  0
4458             0  0  0  | 22 23  0  | 24  0
4459     -------------------------------------
4460     Proc2  25 26 27  |  0  0 28  | 29  0
4461            30  0  0  | 31 32 33  |  0 34
4462 .ve
4463 
4464    This can be represented as a collection of submatrices as
4465 
4466 .vb
4467       A B C
4468       D E F
4469       G H I
4470 .ve
4471 
4472    Where the submatrices A,B,C are owned by proc0, D,E,F are
4473    owned by proc1, G,H,I are owned by proc2.
4474 
4475    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4476    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4477    The 'M','N' parameters are 8,8, and have the same values on all procs.
4478 
4479    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4480    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4481    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4482    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4483    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4484    matrix, ans [DF] as another SeqAIJ matrix.
4485 
4486    When d_nz, o_nz parameters are specified, d_nz storage elements are
4487    allocated for every row of the local diagonal submatrix, and o_nz
4488    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4489    One way to choose d_nz and o_nz is to use the max nonzerors per local
4490    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4491    In this case, the values of d_nz,o_nz are
4492 .vb
4493      proc0 : dnz = 2, o_nz = 2
4494      proc1 : dnz = 3, o_nz = 2
4495      proc2 : dnz = 1, o_nz = 4
4496 .ve
4497    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4498    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4499    for proc3. i.e we are using 12+15+10=37 storage locations to store
4500    34 values.
4501 
4502    When d_nnz, o_nnz parameters are specified, the storage is specified
4503    for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4504    In the above case the values for d_nnz,o_nnz are
4505 .vb
4506      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4507      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4508      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4509 .ve
4510    Here the space allocated is sum of all the above values i.e 34, and
4511    hence pre-allocation is perfect.
4512 
4513    Level: intermediate
4514 
4515 .seealso: [Sparse Matrix Creation](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4516           `MATMPIAIJ`, `MatCreateMPIAIJWithArrays()`
4517 @*/
4518 PetscErrorCode MatCreateAIJ(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A)
4519 {
4520   PetscMPIInt size;
4521 
4522   PetscFunctionBegin;
4523   PetscCall(MatCreate(comm, A));
4524   PetscCall(MatSetSizes(*A, m, n, M, N));
4525   PetscCallMPI(MPI_Comm_size(comm, &size));
4526   if (size > 1) {
4527     PetscCall(MatSetType(*A, MATMPIAIJ));
4528     PetscCall(MatMPIAIJSetPreallocation(*A, d_nz, d_nnz, o_nz, o_nnz));
4529   } else {
4530     PetscCall(MatSetType(*A, MATSEQAIJ));
4531     PetscCall(MatSeqAIJSetPreallocation(*A, d_nz, d_nnz));
4532   }
4533   PetscFunctionReturn(0);
4534 }
4535 
4536 /*@C
4537   MatMPIAIJGetSeqAIJ - Returns the local piece of this distributed matrix
4538 
4539   Not collective
4540 
4541   Input Parameter:
4542 . A - The `MATMPIAIJ` matrix
4543 
4544   Output Parameters:
4545 + Ad - The local diagonal block as a `MATSEQAIJ` matrix
4546 . Ao - The local off-diagonal block as a `MATSEQAIJ` matrix
4547 - colmap - An array mapping local column numbers of Ao to global column numbers of the parallel matrix
4548 
4549   Note:
4550   The rows in Ad and Ao are in [0, Nr), where Nr is the number of local rows on this process. The columns
4551   in Ad are in [0, Nc) where Nc is the number of local columns. The columns are Ao are in [0, Nco), where Nco is
4552   the number of nonzero columns in the local off-diagonal piece of the matrix A. The array colmap maps these
4553   local column numbers to global column numbers in the original matrix.
4554 
4555   Level: intermediate
4556 
4557 .seealso: `MATMPIAIJ`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`, `MatCreateAIJ()`, `MATMPIAIJ`, `MATSEQAIJ`
4558 @*/
4559 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[])
4560 {
4561   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
4562   PetscBool   flg;
4563 
4564   PetscFunctionBegin;
4565   PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &flg));
4566   PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPIAIJ matrix as input");
4567   if (Ad) *Ad = a->A;
4568   if (Ao) *Ao = a->B;
4569   if (colmap) *colmap = a->garray;
4570   PetscFunctionReturn(0);
4571 }
4572 
4573 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
4574 {
4575   PetscInt     m, N, i, rstart, nnz, Ii;
4576   PetscInt    *indx;
4577   PetscScalar *values;
4578   MatType      rootType;
4579 
4580   PetscFunctionBegin;
4581   PetscCall(MatGetSize(inmat, &m, &N));
4582   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
4583     PetscInt *dnz, *onz, sum, bs, cbs;
4584 
4585     if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N));
4586     /* Check sum(n) = N */
4587     PetscCall(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm));
4588     PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N);
4589 
4590     PetscCallMPI(MPI_Scan(&m, &rstart, 1, MPIU_INT, MPI_SUM, comm));
4591     rstart -= m;
4592 
4593     MatPreallocateBegin(comm, m, n, dnz, onz);
4594     for (i = 0; i < m; i++) {
4595       PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, NULL));
4596       PetscCall(MatPreallocateSet(i + rstart, nnz, indx, dnz, onz));
4597       PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, NULL));
4598     }
4599 
4600     PetscCall(MatCreate(comm, outmat));
4601     PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
4602     PetscCall(MatGetBlockSizes(inmat, &bs, &cbs));
4603     PetscCall(MatSetBlockSizes(*outmat, bs, cbs));
4604     PetscCall(MatGetRootType_Private(inmat, &rootType));
4605     PetscCall(MatSetType(*outmat, rootType));
4606     PetscCall(MatSeqAIJSetPreallocation(*outmat, 0, dnz));
4607     PetscCall(MatMPIAIJSetPreallocation(*outmat, 0, dnz, 0, onz));
4608     MatPreallocateEnd(dnz, onz);
4609     PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
4610   }
4611 
4612   /* numeric phase */
4613   PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL));
4614   for (i = 0; i < m; i++) {
4615     PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, &values));
4616     Ii = i + rstart;
4617     PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES));
4618     PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, &values));
4619   }
4620   PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY));
4621   PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY));
4622   PetscFunctionReturn(0);
4623 }
4624 
4625 PetscErrorCode MatFileSplit(Mat A, char *outfile)
4626 {
4627   PetscMPIInt        rank;
4628   PetscInt           m, N, i, rstart, nnz;
4629   size_t             len;
4630   const PetscInt    *indx;
4631   PetscViewer        out;
4632   char              *name;
4633   Mat                B;
4634   const PetscScalar *values;
4635 
4636   PetscFunctionBegin;
4637   PetscCall(MatGetLocalSize(A, &m, NULL));
4638   PetscCall(MatGetSize(A, NULL, &N));
4639   /* Should this be the type of the diagonal block of A? */
4640   PetscCall(MatCreate(PETSC_COMM_SELF, &B));
4641   PetscCall(MatSetSizes(B, m, N, m, N));
4642   PetscCall(MatSetBlockSizesFromMats(B, A, A));
4643   PetscCall(MatSetType(B, MATSEQAIJ));
4644   PetscCall(MatSeqAIJSetPreallocation(B, 0, NULL));
4645   PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
4646   for (i = 0; i < m; i++) {
4647     PetscCall(MatGetRow(A, i + rstart, &nnz, &indx, &values));
4648     PetscCall(MatSetValues(B, 1, &i, nnz, indx, values, INSERT_VALUES));
4649     PetscCall(MatRestoreRow(A, i + rstart, &nnz, &indx, &values));
4650   }
4651   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
4652   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
4653 
4654   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank));
4655   PetscCall(PetscStrlen(outfile, &len));
4656   PetscCall(PetscMalloc1(len + 6, &name));
4657   PetscCall(PetscSNPrintf(name, len + 6, "%s.%d", outfile, rank));
4658   PetscCall(PetscViewerBinaryOpen(PETSC_COMM_SELF, name, FILE_MODE_APPEND, &out));
4659   PetscCall(PetscFree(name));
4660   PetscCall(MatView(B, out));
4661   PetscCall(PetscViewerDestroy(&out));
4662   PetscCall(MatDestroy(&B));
4663   PetscFunctionReturn(0);
4664 }
4665 
4666 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void *data)
4667 {
4668   Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)data;
4669 
4670   PetscFunctionBegin;
4671   if (!merge) PetscFunctionReturn(0);
4672   PetscCall(PetscFree(merge->id_r));
4673   PetscCall(PetscFree(merge->len_s));
4674   PetscCall(PetscFree(merge->len_r));
4675   PetscCall(PetscFree(merge->bi));
4676   PetscCall(PetscFree(merge->bj));
4677   PetscCall(PetscFree(merge->buf_ri[0]));
4678   PetscCall(PetscFree(merge->buf_ri));
4679   PetscCall(PetscFree(merge->buf_rj[0]));
4680   PetscCall(PetscFree(merge->buf_rj));
4681   PetscCall(PetscFree(merge->coi));
4682   PetscCall(PetscFree(merge->coj));
4683   PetscCall(PetscFree(merge->owners_co));
4684   PetscCall(PetscLayoutDestroy(&merge->rowmap));
4685   PetscCall(PetscFree(merge));
4686   PetscFunctionReturn(0);
4687 }
4688 
4689 #include <../src/mat/utils/freespace.h>
4690 #include <petscbt.h>
4691 
4692 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat, Mat mpimat)
4693 {
4694   MPI_Comm             comm;
4695   Mat_SeqAIJ          *a = (Mat_SeqAIJ *)seqmat->data;
4696   PetscMPIInt          size, rank, taga, *len_s;
4697   PetscInt             N = mpimat->cmap->N, i, j, *owners, *ai = a->i, *aj;
4698   PetscInt             proc, m;
4699   PetscInt           **buf_ri, **buf_rj;
4700   PetscInt             k, anzi, *bj_i, *bi, *bj, arow, bnzi, nextaj;
4701   PetscInt             nrows, **buf_ri_k, **nextrow, **nextai;
4702   MPI_Request         *s_waits, *r_waits;
4703   MPI_Status          *status;
4704   const MatScalar     *aa, *a_a;
4705   MatScalar          **abuf_r, *ba_i;
4706   Mat_Merge_SeqsToMPI *merge;
4707   PetscContainer       container;
4708 
4709   PetscFunctionBegin;
4710   PetscCall(PetscObjectGetComm((PetscObject)mpimat, &comm));
4711   PetscCall(PetscLogEventBegin(MAT_Seqstompinum, seqmat, 0, 0, 0));
4712 
4713   PetscCallMPI(MPI_Comm_size(comm, &size));
4714   PetscCallMPI(MPI_Comm_rank(comm, &rank));
4715 
4716   PetscCall(PetscObjectQuery((PetscObject)mpimat, "MatMergeSeqsToMPI", (PetscObject *)&container));
4717   PetscCheck(container, PetscObjectComm((PetscObject)mpimat), PETSC_ERR_PLIB, "Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic");
4718   PetscCall(PetscContainerGetPointer(container, (void **)&merge));
4719   PetscCall(MatSeqAIJGetArrayRead(seqmat, &a_a));
4720   aa = a_a;
4721 
4722   bi     = merge->bi;
4723   bj     = merge->bj;
4724   buf_ri = merge->buf_ri;
4725   buf_rj = merge->buf_rj;
4726 
4727   PetscCall(PetscMalloc1(size, &status));
4728   owners = merge->rowmap->range;
4729   len_s  = merge->len_s;
4730 
4731   /* send and recv matrix values */
4732   /*-----------------------------*/
4733   PetscCall(PetscObjectGetNewTag((PetscObject)mpimat, &taga));
4734   PetscCall(PetscPostIrecvScalar(comm, taga, merge->nrecv, merge->id_r, merge->len_r, &abuf_r, &r_waits));
4735 
4736   PetscCall(PetscMalloc1(merge->nsend + 1, &s_waits));
4737   for (proc = 0, k = 0; proc < size; proc++) {
4738     if (!len_s[proc]) continue;
4739     i = owners[proc];
4740     PetscCallMPI(MPI_Isend(aa + ai[i], len_s[proc], MPIU_MATSCALAR, proc, taga, comm, s_waits + k));
4741     k++;
4742   }
4743 
4744   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, r_waits, status));
4745   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, s_waits, status));
4746   PetscCall(PetscFree(status));
4747 
4748   PetscCall(PetscFree(s_waits));
4749   PetscCall(PetscFree(r_waits));
4750 
4751   /* insert mat values of mpimat */
4752   /*----------------------------*/
4753   PetscCall(PetscMalloc1(N, &ba_i));
4754   PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai));
4755 
4756   for (k = 0; k < merge->nrecv; k++) {
4757     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4758     nrows       = *(buf_ri_k[k]);
4759     nextrow[k]  = buf_ri_k[k] + 1;           /* next row number of k-th recved i-structure */
4760     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure  */
4761   }
4762 
4763   /* set values of ba */
4764   m = merge->rowmap->n;
4765   for (i = 0; i < m; i++) {
4766     arow = owners[rank] + i;
4767     bj_i = bj + bi[i]; /* col indices of the i-th row of mpimat */
4768     bnzi = bi[i + 1] - bi[i];
4769     PetscCall(PetscArrayzero(ba_i, bnzi));
4770 
4771     /* add local non-zero vals of this proc's seqmat into ba */
4772     anzi   = ai[arow + 1] - ai[arow];
4773     aj     = a->j + ai[arow];
4774     aa     = a_a + ai[arow];
4775     nextaj = 0;
4776     for (j = 0; nextaj < anzi; j++) {
4777       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4778         ba_i[j] += aa[nextaj++];
4779       }
4780     }
4781 
4782     /* add received vals into ba */
4783     for (k = 0; k < merge->nrecv; k++) { /* k-th received message */
4784       /* i-th row */
4785       if (i == *nextrow[k]) {
4786         anzi   = *(nextai[k] + 1) - *nextai[k];
4787         aj     = buf_rj[k] + *(nextai[k]);
4788         aa     = abuf_r[k] + *(nextai[k]);
4789         nextaj = 0;
4790         for (j = 0; nextaj < anzi; j++) {
4791           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4792             ba_i[j] += aa[nextaj++];
4793           }
4794         }
4795         nextrow[k]++;
4796         nextai[k]++;
4797       }
4798     }
4799     PetscCall(MatSetValues(mpimat, 1, &arow, bnzi, bj_i, ba_i, INSERT_VALUES));
4800   }
4801   PetscCall(MatSeqAIJRestoreArrayRead(seqmat, &a_a));
4802   PetscCall(MatAssemblyBegin(mpimat, MAT_FINAL_ASSEMBLY));
4803   PetscCall(MatAssemblyEnd(mpimat, MAT_FINAL_ASSEMBLY));
4804 
4805   PetscCall(PetscFree(abuf_r[0]));
4806   PetscCall(PetscFree(abuf_r));
4807   PetscCall(PetscFree(ba_i));
4808   PetscCall(PetscFree3(buf_ri_k, nextrow, nextai));
4809   PetscCall(PetscLogEventEnd(MAT_Seqstompinum, seqmat, 0, 0, 0));
4810   PetscFunctionReturn(0);
4811 }
4812 
4813 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, Mat *mpimat)
4814 {
4815   Mat                  B_mpi;
4816   Mat_SeqAIJ          *a = (Mat_SeqAIJ *)seqmat->data;
4817   PetscMPIInt          size, rank, tagi, tagj, *len_s, *len_si, *len_ri;
4818   PetscInt           **buf_rj, **buf_ri, **buf_ri_k;
4819   PetscInt             M = seqmat->rmap->n, N = seqmat->cmap->n, i, *owners, *ai = a->i, *aj = a->j;
4820   PetscInt             len, proc, *dnz, *onz, bs, cbs;
4821   PetscInt             k, anzi, *bi, *bj, *lnk, nlnk, arow, bnzi;
4822   PetscInt             nrows, *buf_s, *buf_si, *buf_si_i, **nextrow, **nextai;
4823   MPI_Request         *si_waits, *sj_waits, *ri_waits, *rj_waits;
4824   MPI_Status          *status;
4825   PetscFreeSpaceList   free_space = NULL, current_space = NULL;
4826   PetscBT              lnkbt;
4827   Mat_Merge_SeqsToMPI *merge;
4828   PetscContainer       container;
4829 
4830   PetscFunctionBegin;
4831   PetscCall(PetscLogEventBegin(MAT_Seqstompisym, seqmat, 0, 0, 0));
4832 
4833   /* make sure it is a PETSc comm */
4834   PetscCall(PetscCommDuplicate(comm, &comm, NULL));
4835   PetscCallMPI(MPI_Comm_size(comm, &size));
4836   PetscCallMPI(MPI_Comm_rank(comm, &rank));
4837 
4838   PetscCall(PetscNew(&merge));
4839   PetscCall(PetscMalloc1(size, &status));
4840 
4841   /* determine row ownership */
4842   /*---------------------------------------------------------*/
4843   PetscCall(PetscLayoutCreate(comm, &merge->rowmap));
4844   PetscCall(PetscLayoutSetLocalSize(merge->rowmap, m));
4845   PetscCall(PetscLayoutSetSize(merge->rowmap, M));
4846   PetscCall(PetscLayoutSetBlockSize(merge->rowmap, 1));
4847   PetscCall(PetscLayoutSetUp(merge->rowmap));
4848   PetscCall(PetscMalloc1(size, &len_si));
4849   PetscCall(PetscMalloc1(size, &merge->len_s));
4850 
4851   m      = merge->rowmap->n;
4852   owners = merge->rowmap->range;
4853 
4854   /* determine the number of messages to send, their lengths */
4855   /*---------------------------------------------------------*/
4856   len_s = merge->len_s;
4857 
4858   len          = 0; /* length of buf_si[] */
4859   merge->nsend = 0;
4860   for (proc = 0; proc < size; proc++) {
4861     len_si[proc] = 0;
4862     if (proc == rank) {
4863       len_s[proc] = 0;
4864     } else {
4865       len_si[proc] = owners[proc + 1] - owners[proc] + 1;
4866       len_s[proc]  = ai[owners[proc + 1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4867     }
4868     if (len_s[proc]) {
4869       merge->nsend++;
4870       nrows = 0;
4871       for (i = owners[proc]; i < owners[proc + 1]; i++) {
4872         if (ai[i + 1] > ai[i]) nrows++;
4873       }
4874       len_si[proc] = 2 * (nrows + 1);
4875       len += len_si[proc];
4876     }
4877   }
4878 
4879   /* determine the number and length of messages to receive for ij-structure */
4880   /*-------------------------------------------------------------------------*/
4881   PetscCall(PetscGatherNumberOfMessages(comm, NULL, len_s, &merge->nrecv));
4882   PetscCall(PetscGatherMessageLengths2(comm, merge->nsend, merge->nrecv, len_s, len_si, &merge->id_r, &merge->len_r, &len_ri));
4883 
4884   /* post the Irecv of j-structure */
4885   /*-------------------------------*/
4886   PetscCall(PetscCommGetNewTag(comm, &tagj));
4887   PetscCall(PetscPostIrecvInt(comm, tagj, merge->nrecv, merge->id_r, merge->len_r, &buf_rj, &rj_waits));
4888 
4889   /* post the Isend of j-structure */
4890   /*--------------------------------*/
4891   PetscCall(PetscMalloc2(merge->nsend, &si_waits, merge->nsend, &sj_waits));
4892 
4893   for (proc = 0, k = 0; proc < size; proc++) {
4894     if (!len_s[proc]) continue;
4895     i = owners[proc];
4896     PetscCallMPI(MPI_Isend(aj + ai[i], len_s[proc], MPIU_INT, proc, tagj, comm, sj_waits + k));
4897     k++;
4898   }
4899 
4900   /* receives and sends of j-structure are complete */
4901   /*------------------------------------------------*/
4902   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, rj_waits, status));
4903   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, sj_waits, status));
4904 
4905   /* send and recv i-structure */
4906   /*---------------------------*/
4907   PetscCall(PetscCommGetNewTag(comm, &tagi));
4908   PetscCall(PetscPostIrecvInt(comm, tagi, merge->nrecv, merge->id_r, len_ri, &buf_ri, &ri_waits));
4909 
4910   PetscCall(PetscMalloc1(len + 1, &buf_s));
4911   buf_si = buf_s; /* points to the beginning of k-th msg to be sent */
4912   for (proc = 0, k = 0; proc < size; proc++) {
4913     if (!len_s[proc]) continue;
4914     /* form outgoing message for i-structure:
4915          buf_si[0]:                 nrows to be sent
4916                [1:nrows]:           row index (global)
4917                [nrows+1:2*nrows+1]: i-structure index
4918     */
4919     /*-------------------------------------------*/
4920     nrows       = len_si[proc] / 2 - 1;
4921     buf_si_i    = buf_si + nrows + 1;
4922     buf_si[0]   = nrows;
4923     buf_si_i[0] = 0;
4924     nrows       = 0;
4925     for (i = owners[proc]; i < owners[proc + 1]; i++) {
4926       anzi = ai[i + 1] - ai[i];
4927       if (anzi) {
4928         buf_si_i[nrows + 1] = buf_si_i[nrows] + anzi; /* i-structure */
4929         buf_si[nrows + 1]   = i - owners[proc];       /* local row index */
4930         nrows++;
4931       }
4932     }
4933     PetscCallMPI(MPI_Isend(buf_si, len_si[proc], MPIU_INT, proc, tagi, comm, si_waits + k));
4934     k++;
4935     buf_si += len_si[proc];
4936   }
4937 
4938   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, ri_waits, status));
4939   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, si_waits, status));
4940 
4941   PetscCall(PetscInfo(seqmat, "nsend: %d, nrecv: %d\n", merge->nsend, merge->nrecv));
4942   for (i = 0; i < merge->nrecv; i++) PetscCall(PetscInfo(seqmat, "recv len_ri=%d, len_rj=%d from [%d]\n", len_ri[i], merge->len_r[i], merge->id_r[i]));
4943 
4944   PetscCall(PetscFree(len_si));
4945   PetscCall(PetscFree(len_ri));
4946   PetscCall(PetscFree(rj_waits));
4947   PetscCall(PetscFree2(si_waits, sj_waits));
4948   PetscCall(PetscFree(ri_waits));
4949   PetscCall(PetscFree(buf_s));
4950   PetscCall(PetscFree(status));
4951 
4952   /* compute a local seq matrix in each processor */
4953   /*----------------------------------------------*/
4954   /* allocate bi array and free space for accumulating nonzero column info */
4955   PetscCall(PetscMalloc1(m + 1, &bi));
4956   bi[0] = 0;
4957 
4958   /* create and initialize a linked list */
4959   nlnk = N + 1;
4960   PetscCall(PetscLLCreate(N, N, nlnk, lnk, lnkbt));
4961 
4962   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4963   len = ai[owners[rank + 1]] - ai[owners[rank]];
4964   PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(2, len) + 1, &free_space));
4965 
4966   current_space = free_space;
4967 
4968   /* determine symbolic info for each local row */
4969   PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai));
4970 
4971   for (k = 0; k < merge->nrecv; k++) {
4972     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4973     nrows       = *buf_ri_k[k];
4974     nextrow[k]  = buf_ri_k[k] + 1;           /* next row number of k-th recved i-structure */
4975     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure  */
4976   }
4977 
4978   MatPreallocateBegin(comm, m, n, dnz, onz);
4979   len = 0;
4980   for (i = 0; i < m; i++) {
4981     bnzi = 0;
4982     /* add local non-zero cols of this proc's seqmat into lnk */
4983     arow = owners[rank] + i;
4984     anzi = ai[arow + 1] - ai[arow];
4985     aj   = a->j + ai[arow];
4986     PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt));
4987     bnzi += nlnk;
4988     /* add received col data into lnk */
4989     for (k = 0; k < merge->nrecv; k++) { /* k-th received message */
4990       if (i == *nextrow[k]) {            /* i-th row */
4991         anzi = *(nextai[k] + 1) - *nextai[k];
4992         aj   = buf_rj[k] + *nextai[k];
4993         PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt));
4994         bnzi += nlnk;
4995         nextrow[k]++;
4996         nextai[k]++;
4997       }
4998     }
4999     if (len < bnzi) len = bnzi; /* =max(bnzi) */
5000 
5001     /* if free space is not available, make more free space */
5002     if (current_space->local_remaining < bnzi) PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(bnzi, current_space->total_array_size), &current_space));
5003     /* copy data into free space, then initialize lnk */
5004     PetscCall(PetscLLClean(N, N, bnzi, lnk, current_space->array, lnkbt));
5005     PetscCall(MatPreallocateSet(i + owners[rank], bnzi, current_space->array, dnz, onz));
5006 
5007     current_space->array += bnzi;
5008     current_space->local_used += bnzi;
5009     current_space->local_remaining -= bnzi;
5010 
5011     bi[i + 1] = bi[i] + bnzi;
5012   }
5013 
5014   PetscCall(PetscFree3(buf_ri_k, nextrow, nextai));
5015 
5016   PetscCall(PetscMalloc1(bi[m] + 1, &bj));
5017   PetscCall(PetscFreeSpaceContiguous(&free_space, bj));
5018   PetscCall(PetscLLDestroy(lnk, lnkbt));
5019 
5020   /* create symbolic parallel matrix B_mpi */
5021   /*---------------------------------------*/
5022   PetscCall(MatGetBlockSizes(seqmat, &bs, &cbs));
5023   PetscCall(MatCreate(comm, &B_mpi));
5024   if (n == PETSC_DECIDE) {
5025     PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, N));
5026   } else {
5027     PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
5028   }
5029   PetscCall(MatSetBlockSizes(B_mpi, bs, cbs));
5030   PetscCall(MatSetType(B_mpi, MATMPIAIJ));
5031   PetscCall(MatMPIAIJSetPreallocation(B_mpi, 0, dnz, 0, onz));
5032   MatPreallocateEnd(dnz, onz);
5033   PetscCall(MatSetOption(B_mpi, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE));
5034 
5035   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
5036   B_mpi->assembled = PETSC_FALSE;
5037   merge->bi        = bi;
5038   merge->bj        = bj;
5039   merge->buf_ri    = buf_ri;
5040   merge->buf_rj    = buf_rj;
5041   merge->coi       = NULL;
5042   merge->coj       = NULL;
5043   merge->owners_co = NULL;
5044 
5045   PetscCall(PetscCommDestroy(&comm));
5046 
5047   /* attach the supporting struct to B_mpi for reuse */
5048   PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container));
5049   PetscCall(PetscContainerSetPointer(container, merge));
5050   PetscCall(PetscContainerSetUserDestroy(container, MatDestroy_MPIAIJ_SeqsToMPI));
5051   PetscCall(PetscObjectCompose((PetscObject)B_mpi, "MatMergeSeqsToMPI", (PetscObject)container));
5052   PetscCall(PetscContainerDestroy(&container));
5053   *mpimat = B_mpi;
5054 
5055   PetscCall(PetscLogEventEnd(MAT_Seqstompisym, seqmat, 0, 0, 0));
5056   PetscFunctionReturn(0);
5057 }
5058 
5059 /*@C
5060       MatCreateMPIAIJSumSeqAIJ - Creates a `MATMPIAIJ` matrix by adding sequential
5061                  matrices from each processor
5062 
5063     Collective
5064 
5065    Input Parameters:
5066 +    comm - the communicators the parallel matrix will live on
5067 .    seqmat - the input sequential matrices
5068 .    m - number of local rows (or `PETSC_DECIDE`)
5069 .    n - number of local columns (or `PETSC_DECIDE`)
5070 -    scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5071 
5072    Output Parameter:
5073 .    mpimat - the parallel matrix generated
5074 
5075     Level: advanced
5076 
5077    Note:
5078      The dimensions of the sequential matrix in each processor MUST be the same.
5079      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
5080      destroyed when mpimat is destroyed. Call `PetscObjectQuery()` to access seqmat.
5081 @*/
5082 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, MatReuse scall, Mat *mpimat)
5083 {
5084   PetscMPIInt size;
5085 
5086   PetscFunctionBegin;
5087   PetscCallMPI(MPI_Comm_size(comm, &size));
5088   if (size == 1) {
5089     PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0));
5090     if (scall == MAT_INITIAL_MATRIX) {
5091       PetscCall(MatDuplicate(seqmat, MAT_COPY_VALUES, mpimat));
5092     } else {
5093       PetscCall(MatCopy(seqmat, *mpimat, SAME_NONZERO_PATTERN));
5094     }
5095     PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0));
5096     PetscFunctionReturn(0);
5097   }
5098   PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0));
5099   if (scall == MAT_INITIAL_MATRIX) PetscCall(MatCreateMPIAIJSumSeqAIJSymbolic(comm, seqmat, m, n, mpimat));
5100   PetscCall(MatCreateMPIAIJSumSeqAIJNumeric(seqmat, *mpimat));
5101   PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0));
5102   PetscFunctionReturn(0);
5103 }
5104 
5105 /*@
5106      MatAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATAIJ` matrix by taking all its local rows and putting them into a sequential matrix with
5107           mlocal rows and n columns. Where mlocal is the row count obtained with `MatGetLocalSize()` and n is the global column count obtained
5108           with `MatGetSize()`
5109 
5110     Not Collective
5111 
5112    Input Parameters:
5113 +    A - the matrix
5114 -    scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5115 
5116    Output Parameter:
5117 .    A_loc - the local sequential matrix generated
5118 
5119     Level: developer
5120 
5121    Notes:
5122      In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix.
5123 
5124      Destroy the matrix with `MatDestroy()`
5125 
5126 .seealso: `MatMPIAIJGetLocalMat()`
5127 @*/
5128 PetscErrorCode MatAIJGetLocalMat(Mat A, Mat *A_loc)
5129 {
5130   PetscBool mpi;
5131 
5132   PetscFunctionBegin;
5133   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &mpi));
5134   if (mpi) {
5135     PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, A_loc));
5136   } else {
5137     *A_loc = A;
5138     PetscCall(PetscObjectReference((PetscObject)*A_loc));
5139   }
5140   PetscFunctionReturn(0);
5141 }
5142 
5143 /*@
5144      MatMPIAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix by taking all its local rows and putting them into a sequential matrix with
5145           mlocal rows and n columns. Where mlocal is the row count obtained with `MatGetLocalSize()` and n is the global column count obtained
5146           with `MatGetSize()`
5147 
5148     Not Collective
5149 
5150    Input Parameters:
5151 +    A - the matrix
5152 -    scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5153 
5154    Output Parameter:
5155 .    A_loc - the local sequential matrix generated
5156 
5157     Level: developer
5158 
5159    Notes:
5160      In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix.
5161 
5162      When the communicator associated with A has size 1 and `MAT_INITIAL_MATRIX` is requested, the matrix returned is the diagonal part of A.
5163      If `MAT_REUSE_MATRIX` is requested with comm size 1, `MatCopy`(Adiag,*A_loc,`SAME_NONZERO_PATTERN`) is called.
5164      This means that one can preallocate the proper sequential matrix first and then call this routine with `MAT_REUSE_MATRIX` to safely
5165      modify the values of the returned A_loc.
5166 
5167 .seealso: `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMatCondensed()`, `MatMPIAIJGetLocalMatMerge()`
5168 @*/
5169 PetscErrorCode MatMPIAIJGetLocalMat(Mat A, MatReuse scall, Mat *A_loc)
5170 {
5171   Mat_MPIAIJ        *mpimat = (Mat_MPIAIJ *)A->data;
5172   Mat_SeqAIJ        *mat, *a, *b;
5173   PetscInt          *ai, *aj, *bi, *bj, *cmap = mpimat->garray;
5174   const PetscScalar *aa, *ba, *aav, *bav;
5175   PetscScalar       *ca, *cam;
5176   PetscMPIInt        size;
5177   PetscInt           am = A->rmap->n, i, j, k, cstart = A->cmap->rstart;
5178   PetscInt          *ci, *cj, col, ncols_d, ncols_o, jo;
5179   PetscBool          match;
5180 
5181   PetscFunctionBegin;
5182   PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &match));
5183   PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input");
5184   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
5185   if (size == 1) {
5186     if (scall == MAT_INITIAL_MATRIX) {
5187       PetscCall(PetscObjectReference((PetscObject)mpimat->A));
5188       *A_loc = mpimat->A;
5189     } else if (scall == MAT_REUSE_MATRIX) {
5190       PetscCall(MatCopy(mpimat->A, *A_loc, SAME_NONZERO_PATTERN));
5191     }
5192     PetscFunctionReturn(0);
5193   }
5194 
5195   PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0));
5196   a  = (Mat_SeqAIJ *)(mpimat->A)->data;
5197   b  = (Mat_SeqAIJ *)(mpimat->B)->data;
5198   ai = a->i;
5199   aj = a->j;
5200   bi = b->i;
5201   bj = b->j;
5202   PetscCall(MatSeqAIJGetArrayRead(mpimat->A, &aav));
5203   PetscCall(MatSeqAIJGetArrayRead(mpimat->B, &bav));
5204   aa = aav;
5205   ba = bav;
5206   if (scall == MAT_INITIAL_MATRIX) {
5207     PetscCall(PetscMalloc1(1 + am, &ci));
5208     ci[0] = 0;
5209     for (i = 0; i < am; i++) ci[i + 1] = ci[i] + (ai[i + 1] - ai[i]) + (bi[i + 1] - bi[i]);
5210     PetscCall(PetscMalloc1(1 + ci[am], &cj));
5211     PetscCall(PetscMalloc1(1 + ci[am], &ca));
5212     k = 0;
5213     for (i = 0; i < am; i++) {
5214       ncols_o = bi[i + 1] - bi[i];
5215       ncols_d = ai[i + 1] - ai[i];
5216       /* off-diagonal portion of A */
5217       for (jo = 0; jo < ncols_o; jo++) {
5218         col = cmap[*bj];
5219         if (col >= cstart) break;
5220         cj[k] = col;
5221         bj++;
5222         ca[k++] = *ba++;
5223       }
5224       /* diagonal portion of A */
5225       for (j = 0; j < ncols_d; j++) {
5226         cj[k]   = cstart + *aj++;
5227         ca[k++] = *aa++;
5228       }
5229       /* off-diagonal portion of A */
5230       for (j = jo; j < ncols_o; j++) {
5231         cj[k]   = cmap[*bj++];
5232         ca[k++] = *ba++;
5233       }
5234     }
5235     /* put together the new matrix */
5236     PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, A->cmap->N, ci, cj, ca, A_loc));
5237     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5238     /* Since these are PETSc arrays, change flags to free them as necessary. */
5239     mat          = (Mat_SeqAIJ *)(*A_loc)->data;
5240     mat->free_a  = PETSC_TRUE;
5241     mat->free_ij = PETSC_TRUE;
5242     mat->nonew   = 0;
5243   } else if (scall == MAT_REUSE_MATRIX) {
5244     mat = (Mat_SeqAIJ *)(*A_loc)->data;
5245     ci  = mat->i;
5246     cj  = mat->j;
5247     PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &cam));
5248     for (i = 0; i < am; i++) {
5249       /* off-diagonal portion of A */
5250       ncols_o = bi[i + 1] - bi[i];
5251       for (jo = 0; jo < ncols_o; jo++) {
5252         col = cmap[*bj];
5253         if (col >= cstart) break;
5254         *cam++ = *ba++;
5255         bj++;
5256       }
5257       /* diagonal portion of A */
5258       ncols_d = ai[i + 1] - ai[i];
5259       for (j = 0; j < ncols_d; j++) *cam++ = *aa++;
5260       /* off-diagonal portion of A */
5261       for (j = jo; j < ncols_o; j++) {
5262         *cam++ = *ba++;
5263         bj++;
5264       }
5265     }
5266     PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &cam));
5267   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall);
5268   PetscCall(MatSeqAIJRestoreArrayRead(mpimat->A, &aav));
5269   PetscCall(MatSeqAIJRestoreArrayRead(mpimat->B, &bav));
5270   PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0));
5271   PetscFunctionReturn(0);
5272 }
5273 
5274 /*@
5275      MatMPIAIJGetLocalMatMerge - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix by taking all its local rows and putting them into a sequential matrix with
5276           mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and offdiagonal part
5277 
5278     Not Collective
5279 
5280    Input Parameters:
5281 +    A - the matrix
5282 -    scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5283 
5284    Output Parameters:
5285 +    glob - sequential `IS` with global indices associated with the columns of the local sequential matrix generated (can be NULL)
5286 -    A_loc - the local sequential matrix generated
5287 
5288     Level: developer
5289 
5290    Note:
5291      This is different from `MatMPIAIJGetLocalMat()` since the first columns in the returning matrix are those associated with the diagonal part, then those associated with the off diagonal part (in its local ordering)
5292 
5293 .seealso: `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`
5294 @*/
5295 PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A, MatReuse scall, IS *glob, Mat *A_loc)
5296 {
5297   Mat             Ao, Ad;
5298   const PetscInt *cmap;
5299   PetscMPIInt     size;
5300   PetscErrorCode (*f)(Mat, MatReuse, IS *, Mat *);
5301 
5302   PetscFunctionBegin;
5303   PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &cmap));
5304   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
5305   if (size == 1) {
5306     if (scall == MAT_INITIAL_MATRIX) {
5307       PetscCall(PetscObjectReference((PetscObject)Ad));
5308       *A_loc = Ad;
5309     } else if (scall == MAT_REUSE_MATRIX) {
5310       PetscCall(MatCopy(Ad, *A_loc, SAME_NONZERO_PATTERN));
5311     }
5312     if (glob) PetscCall(ISCreateStride(PetscObjectComm((PetscObject)Ad), Ad->cmap->n, Ad->cmap->rstart, 1, glob));
5313     PetscFunctionReturn(0);
5314   }
5315   PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIAIJGetLocalMatMerge_C", &f));
5316   PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0));
5317   if (f) {
5318     PetscCall((*f)(A, scall, glob, A_loc));
5319   } else {
5320     Mat_SeqAIJ        *a = (Mat_SeqAIJ *)Ad->data;
5321     Mat_SeqAIJ        *b = (Mat_SeqAIJ *)Ao->data;
5322     Mat_SeqAIJ        *c;
5323     PetscInt          *ai = a->i, *aj = a->j;
5324     PetscInt          *bi = b->i, *bj = b->j;
5325     PetscInt          *ci, *cj;
5326     const PetscScalar *aa, *ba;
5327     PetscScalar       *ca;
5328     PetscInt           i, j, am, dn, on;
5329 
5330     PetscCall(MatGetLocalSize(Ad, &am, &dn));
5331     PetscCall(MatGetLocalSize(Ao, NULL, &on));
5332     PetscCall(MatSeqAIJGetArrayRead(Ad, &aa));
5333     PetscCall(MatSeqAIJGetArrayRead(Ao, &ba));
5334     if (scall == MAT_INITIAL_MATRIX) {
5335       PetscInt k;
5336       PetscCall(PetscMalloc1(1 + am, &ci));
5337       PetscCall(PetscMalloc1(ai[am] + bi[am], &cj));
5338       PetscCall(PetscMalloc1(ai[am] + bi[am], &ca));
5339       ci[0] = 0;
5340       for (i = 0, k = 0; i < am; i++) {
5341         const PetscInt ncols_o = bi[i + 1] - bi[i];
5342         const PetscInt ncols_d = ai[i + 1] - ai[i];
5343         ci[i + 1]              = ci[i] + ncols_o + ncols_d;
5344         /* diagonal portion of A */
5345         for (j = 0; j < ncols_d; j++, k++) {
5346           cj[k] = *aj++;
5347           ca[k] = *aa++;
5348         }
5349         /* off-diagonal portion of A */
5350         for (j = 0; j < ncols_o; j++, k++) {
5351           cj[k] = dn + *bj++;
5352           ca[k] = *ba++;
5353         }
5354       }
5355       /* put together the new matrix */
5356       PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, dn + on, ci, cj, ca, A_loc));
5357       /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5358       /* Since these are PETSc arrays, change flags to free them as necessary. */
5359       c          = (Mat_SeqAIJ *)(*A_loc)->data;
5360       c->free_a  = PETSC_TRUE;
5361       c->free_ij = PETSC_TRUE;
5362       c->nonew   = 0;
5363       PetscCall(MatSetType(*A_loc, ((PetscObject)Ad)->type_name));
5364     } else if (scall == MAT_REUSE_MATRIX) {
5365       PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &ca));
5366       for (i = 0; i < am; i++) {
5367         const PetscInt ncols_d = ai[i + 1] - ai[i];
5368         const PetscInt ncols_o = bi[i + 1] - bi[i];
5369         /* diagonal portion of A */
5370         for (j = 0; j < ncols_d; j++) *ca++ = *aa++;
5371         /* off-diagonal portion of A */
5372         for (j = 0; j < ncols_o; j++) *ca++ = *ba++;
5373       }
5374       PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &ca));
5375     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall);
5376     PetscCall(MatSeqAIJRestoreArrayRead(Ad, &aa));
5377     PetscCall(MatSeqAIJRestoreArrayRead(Ao, &aa));
5378     if (glob) {
5379       PetscInt cst, *gidx;
5380 
5381       PetscCall(MatGetOwnershipRangeColumn(A, &cst, NULL));
5382       PetscCall(PetscMalloc1(dn + on, &gidx));
5383       for (i = 0; i < dn; i++) gidx[i] = cst + i;
5384       for (i = 0; i < on; i++) gidx[i + dn] = cmap[i];
5385       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)Ad), dn + on, gidx, PETSC_OWN_POINTER, glob));
5386     }
5387   }
5388   PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0));
5389   PetscFunctionReturn(0);
5390 }
5391 
5392 /*@C
5393      MatMPIAIJGetLocalMatCondensed - Creates a `MATSEQAIJ` matrix from an `MATMPIAIJ` matrix by taking all its local rows and NON-ZERO columns
5394 
5395     Not Collective
5396 
5397    Input Parameters:
5398 +    A - the matrix
5399 .    scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5400 -    row, col - index sets of rows and columns to extract (or NULL)
5401 
5402    Output Parameter:
5403 .    A_loc - the local sequential matrix generated
5404 
5405     Level: developer
5406 
5407 .seealso: `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`
5408 @*/
5409 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A, MatReuse scall, IS *row, IS *col, Mat *A_loc)
5410 {
5411   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
5412   PetscInt    i, start, end, ncols, nzA, nzB, *cmap, imark, *idx;
5413   IS          isrowa, iscola;
5414   Mat        *aloc;
5415   PetscBool   match;
5416 
5417   PetscFunctionBegin;
5418   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &match));
5419   PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input");
5420   PetscCall(PetscLogEventBegin(MAT_Getlocalmatcondensed, A, 0, 0, 0));
5421   if (!row) {
5422     start = A->rmap->rstart;
5423     end   = A->rmap->rend;
5424     PetscCall(ISCreateStride(PETSC_COMM_SELF, end - start, start, 1, &isrowa));
5425   } else {
5426     isrowa = *row;
5427   }
5428   if (!col) {
5429     start = A->cmap->rstart;
5430     cmap  = a->garray;
5431     nzA   = a->A->cmap->n;
5432     nzB   = a->B->cmap->n;
5433     PetscCall(PetscMalloc1(nzA + nzB, &idx));
5434     ncols = 0;
5435     for (i = 0; i < nzB; i++) {
5436       if (cmap[i] < start) idx[ncols++] = cmap[i];
5437       else break;
5438     }
5439     imark = i;
5440     for (i = 0; i < nzA; i++) idx[ncols++] = start + i;
5441     for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i];
5442     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &iscola));
5443   } else {
5444     iscola = *col;
5445   }
5446   if (scall != MAT_INITIAL_MATRIX) {
5447     PetscCall(PetscMalloc1(1, &aloc));
5448     aloc[0] = *A_loc;
5449   }
5450   PetscCall(MatCreateSubMatrices(A, 1, &isrowa, &iscola, scall, &aloc));
5451   if (!col) { /* attach global id of condensed columns */
5452     PetscCall(PetscObjectCompose((PetscObject)aloc[0], "_petsc_GetLocalMatCondensed_iscol", (PetscObject)iscola));
5453   }
5454   *A_loc = aloc[0];
5455   PetscCall(PetscFree(aloc));
5456   if (!row) PetscCall(ISDestroy(&isrowa));
5457   if (!col) PetscCall(ISDestroy(&iscola));
5458   PetscCall(PetscLogEventEnd(MAT_Getlocalmatcondensed, A, 0, 0, 0));
5459   PetscFunctionReturn(0);
5460 }
5461 
5462 /*
5463  * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched.
5464  * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based
5465  * on a global size.
5466  * */
5467 PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P, IS rows, Mat *P_oth)
5468 {
5469   Mat_MPIAIJ            *p  = (Mat_MPIAIJ *)P->data;
5470   Mat_SeqAIJ            *pd = (Mat_SeqAIJ *)(p->A)->data, *po = (Mat_SeqAIJ *)(p->B)->data, *p_oth;
5471   PetscInt               plocalsize, nrows, *ilocal, *oilocal, i, lidx, *nrcols, *nlcols, ncol;
5472   PetscMPIInt            owner;
5473   PetscSFNode           *iremote, *oiremote;
5474   const PetscInt        *lrowindices;
5475   PetscSF                sf, osf;
5476   PetscInt               pcstart, *roffsets, *loffsets, *pnnz, j;
5477   PetscInt               ontotalcols, dntotalcols, ntotalcols, nout;
5478   MPI_Comm               comm;
5479   ISLocalToGlobalMapping mapping;
5480   const PetscScalar     *pd_a, *po_a;
5481 
5482   PetscFunctionBegin;
5483   PetscCall(PetscObjectGetComm((PetscObject)P, &comm));
5484   /* plocalsize is the number of roots
5485    * nrows is the number of leaves
5486    * */
5487   PetscCall(MatGetLocalSize(P, &plocalsize, NULL));
5488   PetscCall(ISGetLocalSize(rows, &nrows));
5489   PetscCall(PetscCalloc1(nrows, &iremote));
5490   PetscCall(ISGetIndices(rows, &lrowindices));
5491   for (i = 0; i < nrows; i++) {
5492     /* Find a remote index and an owner for a row
5493      * The row could be local or remote
5494      * */
5495     owner = 0;
5496     lidx  = 0;
5497     PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, &lidx));
5498     iremote[i].index = lidx;
5499     iremote[i].rank  = owner;
5500   }
5501   /* Create SF to communicate how many nonzero columns for each row */
5502   PetscCall(PetscSFCreate(comm, &sf));
5503   /* SF will figure out the number of nonzero colunms for each row, and their
5504    * offsets
5505    * */
5506   PetscCall(PetscSFSetGraph(sf, plocalsize, nrows, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
5507   PetscCall(PetscSFSetFromOptions(sf));
5508   PetscCall(PetscSFSetUp(sf));
5509 
5510   PetscCall(PetscCalloc1(2 * (plocalsize + 1), &roffsets));
5511   PetscCall(PetscCalloc1(2 * plocalsize, &nrcols));
5512   PetscCall(PetscCalloc1(nrows, &pnnz));
5513   roffsets[0] = 0;
5514   roffsets[1] = 0;
5515   for (i = 0; i < plocalsize; i++) {
5516     /* diag */
5517     nrcols[i * 2 + 0] = pd->i[i + 1] - pd->i[i];
5518     /* off diag */
5519     nrcols[i * 2 + 1] = po->i[i + 1] - po->i[i];
5520     /* compute offsets so that we relative location for each row */
5521     roffsets[(i + 1) * 2 + 0] = roffsets[i * 2 + 0] + nrcols[i * 2 + 0];
5522     roffsets[(i + 1) * 2 + 1] = roffsets[i * 2 + 1] + nrcols[i * 2 + 1];
5523   }
5524   PetscCall(PetscCalloc1(2 * nrows, &nlcols));
5525   PetscCall(PetscCalloc1(2 * nrows, &loffsets));
5526   /* 'r' means root, and 'l' means leaf */
5527   PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE));
5528   PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE));
5529   PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE));
5530   PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE));
5531   PetscCall(PetscSFDestroy(&sf));
5532   PetscCall(PetscFree(roffsets));
5533   PetscCall(PetscFree(nrcols));
5534   dntotalcols = 0;
5535   ontotalcols = 0;
5536   ncol        = 0;
5537   for (i = 0; i < nrows; i++) {
5538     pnnz[i] = nlcols[i * 2 + 0] + nlcols[i * 2 + 1];
5539     ncol    = PetscMax(pnnz[i], ncol);
5540     /* diag */
5541     dntotalcols += nlcols[i * 2 + 0];
5542     /* off diag */
5543     ontotalcols += nlcols[i * 2 + 1];
5544   }
5545   /* We do not need to figure the right number of columns
5546    * since all the calculations will be done by going through the raw data
5547    * */
5548   PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, nrows, ncol, 0, pnnz, P_oth));
5549   PetscCall(MatSetUp(*P_oth));
5550   PetscCall(PetscFree(pnnz));
5551   p_oth = (Mat_SeqAIJ *)(*P_oth)->data;
5552   /* diag */
5553   PetscCall(PetscCalloc1(dntotalcols, &iremote));
5554   /* off diag */
5555   PetscCall(PetscCalloc1(ontotalcols, &oiremote));
5556   /* diag */
5557   PetscCall(PetscCalloc1(dntotalcols, &ilocal));
5558   /* off diag */
5559   PetscCall(PetscCalloc1(ontotalcols, &oilocal));
5560   dntotalcols = 0;
5561   ontotalcols = 0;
5562   ntotalcols  = 0;
5563   for (i = 0; i < nrows; i++) {
5564     owner = 0;
5565     PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, NULL));
5566     /* Set iremote for diag matrix */
5567     for (j = 0; j < nlcols[i * 2 + 0]; j++) {
5568       iremote[dntotalcols].index = loffsets[i * 2 + 0] + j;
5569       iremote[dntotalcols].rank  = owner;
5570       /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */
5571       ilocal[dntotalcols++] = ntotalcols++;
5572     }
5573     /* off diag */
5574     for (j = 0; j < nlcols[i * 2 + 1]; j++) {
5575       oiremote[ontotalcols].index = loffsets[i * 2 + 1] + j;
5576       oiremote[ontotalcols].rank  = owner;
5577       oilocal[ontotalcols++]      = ntotalcols++;
5578     }
5579   }
5580   PetscCall(ISRestoreIndices(rows, &lrowindices));
5581   PetscCall(PetscFree(loffsets));
5582   PetscCall(PetscFree(nlcols));
5583   PetscCall(PetscSFCreate(comm, &sf));
5584   /* P serves as roots and P_oth is leaves
5585    * Diag matrix
5586    * */
5587   PetscCall(PetscSFSetGraph(sf, pd->i[plocalsize], dntotalcols, ilocal, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
5588   PetscCall(PetscSFSetFromOptions(sf));
5589   PetscCall(PetscSFSetUp(sf));
5590 
5591   PetscCall(PetscSFCreate(comm, &osf));
5592   /* Off diag */
5593   PetscCall(PetscSFSetGraph(osf, po->i[plocalsize], ontotalcols, oilocal, PETSC_OWN_POINTER, oiremote, PETSC_OWN_POINTER));
5594   PetscCall(PetscSFSetFromOptions(osf));
5595   PetscCall(PetscSFSetUp(osf));
5596   PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a));
5597   PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a));
5598   /* We operate on the matrix internal data for saving memory */
5599   PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5600   PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5601   PetscCall(MatGetOwnershipRangeColumn(P, &pcstart, NULL));
5602   /* Convert to global indices for diag matrix */
5603   for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] += pcstart;
5604   PetscCall(PetscSFBcastBegin(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE));
5605   /* We want P_oth store global indices */
5606   PetscCall(ISLocalToGlobalMappingCreate(comm, 1, p->B->cmap->n, p->garray, PETSC_COPY_VALUES, &mapping));
5607   /* Use memory scalable approach */
5608   PetscCall(ISLocalToGlobalMappingSetType(mapping, ISLOCALTOGLOBALMAPPINGHASH));
5609   PetscCall(ISLocalToGlobalMappingApply(mapping, po->i[plocalsize], po->j, po->j));
5610   PetscCall(PetscSFBcastBegin(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE));
5611   PetscCall(PetscSFBcastEnd(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE));
5612   /* Convert back to local indices */
5613   for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] -= pcstart;
5614   PetscCall(PetscSFBcastEnd(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE));
5615   nout = 0;
5616   PetscCall(ISGlobalToLocalMappingApply(mapping, IS_GTOLM_DROP, po->i[plocalsize], po->j, &nout, po->j));
5617   PetscCheck(nout == po->i[plocalsize], comm, PETSC_ERR_ARG_INCOMP, "n %" PetscInt_FMT " does not equal to nout %" PetscInt_FMT " ", po->i[plocalsize], nout);
5618   PetscCall(ISLocalToGlobalMappingDestroy(&mapping));
5619   /* Exchange values */
5620   PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5621   PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5622   PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a));
5623   PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a));
5624   /* Stop PETSc from shrinking memory */
5625   for (i = 0; i < nrows; i++) p_oth->ilen[i] = p_oth->imax[i];
5626   PetscCall(MatAssemblyBegin(*P_oth, MAT_FINAL_ASSEMBLY));
5627   PetscCall(MatAssemblyEnd(*P_oth, MAT_FINAL_ASSEMBLY));
5628   /* Attach PetscSF objects to P_oth so that we can reuse it later */
5629   PetscCall(PetscObjectCompose((PetscObject)*P_oth, "diagsf", (PetscObject)sf));
5630   PetscCall(PetscObjectCompose((PetscObject)*P_oth, "offdiagsf", (PetscObject)osf));
5631   PetscCall(PetscSFDestroy(&sf));
5632   PetscCall(PetscSFDestroy(&osf));
5633   PetscFunctionReturn(0);
5634 }
5635 
5636 /*
5637  * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5638  * This supports MPIAIJ and MAIJ
5639  * */
5640 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A, Mat P, PetscInt dof, MatReuse reuse, Mat *P_oth)
5641 {
5642   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data, *p = (Mat_MPIAIJ *)P->data;
5643   Mat_SeqAIJ *p_oth;
5644   IS          rows, map;
5645   PetscHMapI  hamp;
5646   PetscInt    i, htsize, *rowindices, off, *mapping, key, count;
5647   MPI_Comm    comm;
5648   PetscSF     sf, osf;
5649   PetscBool   has;
5650 
5651   PetscFunctionBegin;
5652   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
5653   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, P, 0, 0));
5654   /* If it is the first time, create an index set of off-diag nonzero columns of A,
5655    *  and then create a submatrix (that often is an overlapping matrix)
5656    * */
5657   if (reuse == MAT_INITIAL_MATRIX) {
5658     /* Use a hash table to figure out unique keys */
5659     PetscCall(PetscHMapICreateWithSize(a->B->cmap->n, &hamp));
5660     PetscCall(PetscCalloc1(a->B->cmap->n, &mapping));
5661     count = 0;
5662     /* Assume that  a->g is sorted, otherwise the following does not make sense */
5663     for (i = 0; i < a->B->cmap->n; i++) {
5664       key = a->garray[i] / dof;
5665       PetscCall(PetscHMapIHas(hamp, key, &has));
5666       if (!has) {
5667         mapping[i] = count;
5668         PetscCall(PetscHMapISet(hamp, key, count++));
5669       } else {
5670         /* Current 'i' has the same value the previous step */
5671         mapping[i] = count - 1;
5672       }
5673     }
5674     PetscCall(ISCreateGeneral(comm, a->B->cmap->n, mapping, PETSC_OWN_POINTER, &map));
5675     PetscCall(PetscHMapIGetSize(hamp, &htsize));
5676     PetscCheck(htsize == count, comm, PETSC_ERR_ARG_INCOMP, " Size of hash map %" PetscInt_FMT " is inconsistent with count %" PetscInt_FMT, htsize, count);
5677     PetscCall(PetscCalloc1(htsize, &rowindices));
5678     off = 0;
5679     PetscCall(PetscHMapIGetKeys(hamp, &off, rowindices));
5680     PetscCall(PetscHMapIDestroy(&hamp));
5681     PetscCall(PetscSortInt(htsize, rowindices));
5682     PetscCall(ISCreateGeneral(comm, htsize, rowindices, PETSC_OWN_POINTER, &rows));
5683     /* In case, the matrix was already created but users want to recreate the matrix */
5684     PetscCall(MatDestroy(P_oth));
5685     PetscCall(MatCreateSeqSubMatrixWithRows_Private(P, rows, P_oth));
5686     PetscCall(PetscObjectCompose((PetscObject)*P_oth, "aoffdiagtopothmapping", (PetscObject)map));
5687     PetscCall(ISDestroy(&map));
5688     PetscCall(ISDestroy(&rows));
5689   } else if (reuse == MAT_REUSE_MATRIX) {
5690     /* If matrix was already created, we simply update values using SF objects
5691      * that as attached to the matrix earlier.
5692      */
5693     const PetscScalar *pd_a, *po_a;
5694 
5695     PetscCall(PetscObjectQuery((PetscObject)*P_oth, "diagsf", (PetscObject *)&sf));
5696     PetscCall(PetscObjectQuery((PetscObject)*P_oth, "offdiagsf", (PetscObject *)&osf));
5697     PetscCheck(sf && osf, comm, PETSC_ERR_ARG_NULL, "Matrix is not initialized yet");
5698     p_oth = (Mat_SeqAIJ *)(*P_oth)->data;
5699     /* Update values in place */
5700     PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a));
5701     PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a));
5702     PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5703     PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5704     PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5705     PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5706     PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a));
5707     PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a));
5708   } else SETERRQ(comm, PETSC_ERR_ARG_UNKNOWN_TYPE, "Unknown reuse type");
5709   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, P, 0, 0));
5710   PetscFunctionReturn(0);
5711 }
5712 
5713 /*@C
5714   MatGetBrowsOfAcols - Returns `IS` that contain rows of B that equal to nonzero columns of local A
5715 
5716   Collective
5717 
5718   Input Parameters:
5719 + A - the first matrix in `MATMPIAIJ` format
5720 . B - the second matrix in `MATMPIAIJ` format
5721 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5722 
5723   Output Parameters:
5724 + rowb - On input index sets of rows of B to extract (or NULL), modified on output
5725 . colb - On input index sets of columns of B to extract (or NULL), modified on output
5726 - B_seq - the sequential matrix generated
5727 
5728   Level: developer
5729 
5730 @*/
5731 PetscErrorCode MatGetBrowsOfAcols(Mat A, Mat B, MatReuse scall, IS *rowb, IS *colb, Mat *B_seq)
5732 {
5733   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
5734   PetscInt   *idx, i, start, ncols, nzA, nzB, *cmap, imark;
5735   IS          isrowb, iscolb;
5736   Mat        *bseq = NULL;
5737 
5738   PetscFunctionBegin;
5739   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5740     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend);
5741   }
5742   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAcols, A, B, 0, 0));
5743 
5744   if (scall == MAT_INITIAL_MATRIX) {
5745     start = A->cmap->rstart;
5746     cmap  = a->garray;
5747     nzA   = a->A->cmap->n;
5748     nzB   = a->B->cmap->n;
5749     PetscCall(PetscMalloc1(nzA + nzB, &idx));
5750     ncols = 0;
5751     for (i = 0; i < nzB; i++) { /* row < local row index */
5752       if (cmap[i] < start) idx[ncols++] = cmap[i];
5753       else break;
5754     }
5755     imark = i;
5756     for (i = 0; i < nzA; i++) idx[ncols++] = start + i;   /* local rows */
5757     for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5758     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &isrowb));
5759     PetscCall(ISCreateStride(PETSC_COMM_SELF, B->cmap->N, 0, 1, &iscolb));
5760   } else {
5761     PetscCheck(rowb && colb, PETSC_COMM_SELF, PETSC_ERR_SUP, "IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5762     isrowb = *rowb;
5763     iscolb = *colb;
5764     PetscCall(PetscMalloc1(1, &bseq));
5765     bseq[0] = *B_seq;
5766   }
5767   PetscCall(MatCreateSubMatrices(B, 1, &isrowb, &iscolb, scall, &bseq));
5768   *B_seq = bseq[0];
5769   PetscCall(PetscFree(bseq));
5770   if (!rowb) {
5771     PetscCall(ISDestroy(&isrowb));
5772   } else {
5773     *rowb = isrowb;
5774   }
5775   if (!colb) {
5776     PetscCall(ISDestroy(&iscolb));
5777   } else {
5778     *colb = iscolb;
5779   }
5780   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAcols, A, B, 0, 0));
5781   PetscFunctionReturn(0);
5782 }
5783 
5784 /*
5785     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5786     of the OFF-DIAGONAL portion of local A
5787 
5788     Collective
5789 
5790    Input Parameters:
5791 +    A,B - the matrices in mpiaij format
5792 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5793 
5794    Output Parameter:
5795 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5796 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5797 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5798 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5799 
5800     Developer Note:
5801     This directly accesses information inside the VecScatter associated with the matrix-vector product
5802      for this matrix. This is not desirable..
5803 
5804     Level: developer
5805 
5806 */
5807 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A, Mat B, MatReuse scall, PetscInt **startsj_s, PetscInt **startsj_r, MatScalar **bufa_ptr, Mat *B_oth)
5808 {
5809   Mat_MPIAIJ        *a = (Mat_MPIAIJ *)A->data;
5810   Mat_SeqAIJ        *b_oth;
5811   VecScatter         ctx;
5812   MPI_Comm           comm;
5813   const PetscMPIInt *rprocs, *sprocs;
5814   const PetscInt    *srow, *rstarts, *sstarts;
5815   PetscInt          *rowlen, *bufj, *bufJ, ncols = 0, aBn = a->B->cmap->n, row, *b_othi, *b_othj, *rvalues = NULL, *svalues = NULL, *cols, sbs, rbs;
5816   PetscInt           i, j, k = 0, l, ll, nrecvs, nsends, nrows, *rstartsj = NULL, *sstartsj, len;
5817   PetscScalar       *b_otha, *bufa, *bufA, *vals = NULL;
5818   MPI_Request       *reqs = NULL, *rwaits = NULL, *swaits = NULL;
5819   PetscMPIInt        size, tag, rank, nreqs;
5820 
5821   PetscFunctionBegin;
5822   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
5823   PetscCallMPI(MPI_Comm_size(comm, &size));
5824 
5825   if (PetscUnlikely(A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend)) {
5826     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend);
5827   }
5828   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, B, 0, 0));
5829   PetscCallMPI(MPI_Comm_rank(comm, &rank));
5830 
5831   if (size == 1) {
5832     startsj_s = NULL;
5833     bufa_ptr  = NULL;
5834     *B_oth    = NULL;
5835     PetscFunctionReturn(0);
5836   }
5837 
5838   ctx = a->Mvctx;
5839   tag = ((PetscObject)ctx)->tag;
5840 
5841   PetscCall(VecScatterGetRemote_Private(ctx, PETSC_TRUE /*send*/, &nsends, &sstarts, &srow, &sprocs, &sbs));
5842   /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */
5843   PetscCall(VecScatterGetRemoteOrdered_Private(ctx, PETSC_FALSE /*recv*/, &nrecvs, &rstarts, NULL /*indices not needed*/, &rprocs, &rbs));
5844   PetscCall(PetscMPIIntCast(nsends + nrecvs, &nreqs));
5845   PetscCall(PetscMalloc1(nreqs, &reqs));
5846   rwaits = reqs;
5847   swaits = reqs + nrecvs;
5848 
5849   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5850   if (scall == MAT_INITIAL_MATRIX) {
5851     /* i-array */
5852     /*---------*/
5853     /*  post receives */
5854     if (nrecvs) PetscCall(PetscMalloc1(rbs * (rstarts[nrecvs] - rstarts[0]), &rvalues)); /* rstarts can be NULL when nrecvs=0 */
5855     for (i = 0; i < nrecvs; i++) {
5856       rowlen = rvalues + rstarts[i] * rbs;
5857       nrows  = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of indices to be received */
5858       PetscCallMPI(MPI_Irecv(rowlen, nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i));
5859     }
5860 
5861     /* pack the outgoing message */
5862     PetscCall(PetscMalloc2(nsends + 1, &sstartsj, nrecvs + 1, &rstartsj));
5863 
5864     sstartsj[0] = 0;
5865     rstartsj[0] = 0;
5866     len         = 0; /* total length of j or a array to be sent */
5867     if (nsends) {
5868       k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */
5869       PetscCall(PetscMalloc1(sbs * (sstarts[nsends] - sstarts[0]), &svalues));
5870     }
5871     for (i = 0; i < nsends; i++) {
5872       rowlen = svalues + (sstarts[i] - sstarts[0]) * sbs;
5873       nrows  = sstarts[i + 1] - sstarts[i]; /* num of block rows */
5874       for (j = 0; j < nrows; j++) {
5875         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5876         for (l = 0; l < sbs; l++) {
5877           PetscCall(MatGetRow_MPIAIJ(B, row + l, &ncols, NULL, NULL)); /* rowlength */
5878 
5879           rowlen[j * sbs + l] = ncols;
5880 
5881           len += ncols;
5882           PetscCall(MatRestoreRow_MPIAIJ(B, row + l, &ncols, NULL, NULL));
5883         }
5884         k++;
5885       }
5886       PetscCallMPI(MPI_Isend(rowlen, nrows * sbs, MPIU_INT, sprocs[i], tag, comm, swaits + i));
5887 
5888       sstartsj[i + 1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5889     }
5890     /* recvs and sends of i-array are completed */
5891     if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE));
5892     PetscCall(PetscFree(svalues));
5893 
5894     /* allocate buffers for sending j and a arrays */
5895     PetscCall(PetscMalloc1(len + 1, &bufj));
5896     PetscCall(PetscMalloc1(len + 1, &bufa));
5897 
5898     /* create i-array of B_oth */
5899     PetscCall(PetscMalloc1(aBn + 2, &b_othi));
5900 
5901     b_othi[0] = 0;
5902     len       = 0; /* total length of j or a array to be received */
5903     k         = 0;
5904     for (i = 0; i < nrecvs; i++) {
5905       rowlen = rvalues + (rstarts[i] - rstarts[0]) * rbs;
5906       nrows  = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of rows to be received */
5907       for (j = 0; j < nrows; j++) {
5908         b_othi[k + 1] = b_othi[k] + rowlen[j];
5909         PetscCall(PetscIntSumError(rowlen[j], len, &len));
5910         k++;
5911       }
5912       rstartsj[i + 1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5913     }
5914     PetscCall(PetscFree(rvalues));
5915 
5916     /* allocate space for j and a arrays of B_oth */
5917     PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_othj));
5918     PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_otha));
5919 
5920     /* j-array */
5921     /*---------*/
5922     /*  post receives of j-array */
5923     for (i = 0; i < nrecvs; i++) {
5924       nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */
5925       PetscCallMPI(MPI_Irecv(b_othj + rstartsj[i], nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i));
5926     }
5927 
5928     /* pack the outgoing message j-array */
5929     if (nsends) k = sstarts[0];
5930     for (i = 0; i < nsends; i++) {
5931       nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */
5932       bufJ  = bufj + sstartsj[i];
5933       for (j = 0; j < nrows; j++) {
5934         row = srow[k++] + B->rmap->range[rank]; /* global row idx */
5935         for (ll = 0; ll < sbs; ll++) {
5936           PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL));
5937           for (l = 0; l < ncols; l++) *bufJ++ = cols[l];
5938           PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL));
5939         }
5940       }
5941       PetscCallMPI(MPI_Isend(bufj + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_INT, sprocs[i], tag, comm, swaits + i));
5942     }
5943 
5944     /* recvs and sends of j-array are completed */
5945     if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE));
5946   } else if (scall == MAT_REUSE_MATRIX) {
5947     sstartsj = *startsj_s;
5948     rstartsj = *startsj_r;
5949     bufa     = *bufa_ptr;
5950     b_oth    = (Mat_SeqAIJ *)(*B_oth)->data;
5951     PetscCall(MatSeqAIJGetArrayWrite(*B_oth, &b_otha));
5952   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not possess an object container");
5953 
5954   /* a-array */
5955   /*---------*/
5956   /*  post receives of a-array */
5957   for (i = 0; i < nrecvs; i++) {
5958     nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */
5959     PetscCallMPI(MPI_Irecv(b_otha + rstartsj[i], nrows, MPIU_SCALAR, rprocs[i], tag, comm, rwaits + i));
5960   }
5961 
5962   /* pack the outgoing message a-array */
5963   if (nsends) k = sstarts[0];
5964   for (i = 0; i < nsends; i++) {
5965     nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */
5966     bufA  = bufa + sstartsj[i];
5967     for (j = 0; j < nrows; j++) {
5968       row = srow[k++] + B->rmap->range[rank]; /* global row idx */
5969       for (ll = 0; ll < sbs; ll++) {
5970         PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals));
5971         for (l = 0; l < ncols; l++) *bufA++ = vals[l];
5972         PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals));
5973       }
5974     }
5975     PetscCallMPI(MPI_Isend(bufa + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_SCALAR, sprocs[i], tag, comm, swaits + i));
5976   }
5977   /* recvs and sends of a-array are completed */
5978   if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE));
5979   PetscCall(PetscFree(reqs));
5980 
5981   if (scall == MAT_INITIAL_MATRIX) {
5982     /* put together the new matrix */
5983     PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, aBn, B->cmap->N, b_othi, b_othj, b_otha, B_oth));
5984 
5985     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5986     /* Since these are PETSc arrays, change flags to free them as necessary. */
5987     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
5988     b_oth->free_a  = PETSC_TRUE;
5989     b_oth->free_ij = PETSC_TRUE;
5990     b_oth->nonew   = 0;
5991 
5992     PetscCall(PetscFree(bufj));
5993     if (!startsj_s || !bufa_ptr) {
5994       PetscCall(PetscFree2(sstartsj, rstartsj));
5995       PetscCall(PetscFree(bufa_ptr));
5996     } else {
5997       *startsj_s = sstartsj;
5998       *startsj_r = rstartsj;
5999       *bufa_ptr  = bufa;
6000     }
6001   } else if (scall == MAT_REUSE_MATRIX) {
6002     PetscCall(MatSeqAIJRestoreArrayWrite(*B_oth, &b_otha));
6003   }
6004 
6005   PetscCall(VecScatterRestoreRemote_Private(ctx, PETSC_TRUE, &nsends, &sstarts, &srow, &sprocs, &sbs));
6006   PetscCall(VecScatterRestoreRemoteOrdered_Private(ctx, PETSC_FALSE, &nrecvs, &rstarts, NULL, &rprocs, &rbs));
6007   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, B, 0, 0));
6008   PetscFunctionReturn(0);
6009 }
6010 
6011 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat, MatType, MatReuse, Mat *);
6012 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat, MatType, MatReuse, Mat *);
6013 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat, MatType, MatReuse, Mat *);
6014 #if defined(PETSC_HAVE_MKL_SPARSE)
6015 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat, MatType, MatReuse, Mat *);
6016 #endif
6017 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat, MatType, MatReuse, Mat *);
6018 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat, MatType, MatReuse, Mat *);
6019 #if defined(PETSC_HAVE_ELEMENTAL)
6020 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat, MatType, MatReuse, Mat *);
6021 #endif
6022 #if defined(PETSC_HAVE_SCALAPACK)
6023 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat, MatType, MatReuse, Mat *);
6024 #endif
6025 #if defined(PETSC_HAVE_HYPRE)
6026 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *);
6027 #endif
6028 #if defined(PETSC_HAVE_CUDA)
6029 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat, MatType, MatReuse, Mat *);
6030 #endif
6031 #if defined(PETSC_HAVE_HIP)
6032 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJHIPSPARSE(Mat, MatType, MatReuse, Mat *);
6033 #endif
6034 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
6035 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat, MatType, MatReuse, Mat *);
6036 #endif
6037 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat, MatType, MatReuse, Mat *);
6038 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *);
6039 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat);
6040 
6041 /*
6042     Computes (B'*A')' since computing B*A directly is untenable
6043 
6044                n                       p                          p
6045         [             ]       [             ]         [                 ]
6046       m [      A      ]  *  n [       B     ]   =   m [         C       ]
6047         [             ]       [             ]         [                 ]
6048 
6049 */
6050 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A, Mat B, Mat C)
6051 {
6052   Mat At, Bt, Ct;
6053 
6054   PetscFunctionBegin;
6055   PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &At));
6056   PetscCall(MatTranspose(B, MAT_INITIAL_MATRIX, &Bt));
6057   PetscCall(MatMatMult(Bt, At, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &Ct));
6058   PetscCall(MatDestroy(&At));
6059   PetscCall(MatDestroy(&Bt));
6060   PetscCall(MatTransposeSetPrecursor(Ct, C));
6061   PetscCall(MatTranspose(Ct, MAT_REUSE_MATRIX, &C));
6062   PetscCall(MatDestroy(&Ct));
6063   PetscFunctionReturn(0);
6064 }
6065 
6066 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A, Mat B, PetscReal fill, Mat C)
6067 {
6068   PetscBool cisdense;
6069 
6070   PetscFunctionBegin;
6071   PetscCheck(A->cmap->n == B->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "A->cmap->n %" PetscInt_FMT " != B->rmap->n %" PetscInt_FMT, A->cmap->n, B->rmap->n);
6072   PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, A->rmap->N, B->cmap->N));
6073   PetscCall(MatSetBlockSizesFromMats(C, A, B));
6074   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, MATMPIDENSEHIP, ""));
6075   if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
6076   PetscCall(MatSetUp(C));
6077 
6078   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
6079   PetscFunctionReturn(0);
6080 }
6081 
6082 /* ----------------------------------------------------------------*/
6083 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C)
6084 {
6085   Mat_Product *product = C->product;
6086   Mat          A = product->A, B = product->B;
6087 
6088   PetscFunctionBegin;
6089   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend)
6090     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend);
6091 
6092   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ;
6093   C->ops->productsymbolic = MatProductSymbolic_AB;
6094   PetscFunctionReturn(0);
6095 }
6096 
6097 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C)
6098 {
6099   Mat_Product *product = C->product;
6100 
6101   PetscFunctionBegin;
6102   if (product->type == MATPRODUCT_AB) PetscCall(MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C));
6103   PetscFunctionReturn(0);
6104 }
6105 
6106 /* Merge two sets of sorted nonzeros and return a CSR for the merged (sequential) matrix
6107 
6108   Input Parameters:
6109 
6110     j1,rowBegin1,rowEnd1,perm1,jmap1: describe the first set of nonzeros (Set1)
6111     j2,rowBegin2,rowEnd2,perm2,jmap2: describe the second set of nonzeros (Set2)
6112 
6113     mat: both sets' nonzeros are on m rows, where m is the number of local rows of the matrix mat
6114 
6115     For Set1, j1[] contains column indices of the nonzeros.
6116     For the k-th row (0<=k<m), [rowBegin1[k],rowEnd1[k]) index into j1[] and point to the begin/end nonzero in row k
6117     respectively (note rowEnd1[k] is not necessarily equal to rwoBegin1[k+1]). Indices in this range of j1[] are sorted,
6118     but might have repeats. jmap1[t+1] - jmap1[t] is the number of repeats for the t-th unique nonzero in Set1.
6119 
6120     Similar for Set2.
6121 
6122     This routine merges the two sets of nonzeros row by row and removes repeats.
6123 
6124   Output Parameters: (memory is allocated by the caller)
6125 
6126     i[],j[]: the CSR of the merged matrix, which has m rows.
6127     imap1[]: the k-th unique nonzero in Set1 (k=0,1,...) corresponds to imap1[k]-th unique nonzero in the merged matrix.
6128     imap2[]: similar to imap1[], but for Set2.
6129     Note we order nonzeros row-by-row and from left to right.
6130 */
6131 static PetscErrorCode MatMergeEntries_Internal(Mat mat, const PetscInt j1[], const PetscInt j2[], const PetscCount rowBegin1[], const PetscCount rowEnd1[], const PetscCount rowBegin2[], const PetscCount rowEnd2[], const PetscCount jmap1[], const PetscCount jmap2[], PetscCount imap1[], PetscCount imap2[], PetscInt i[], PetscInt j[])
6132 {
6133   PetscInt   r, m; /* Row index of mat */
6134   PetscCount t, t1, t2, b1, e1, b2, e2;
6135 
6136   PetscFunctionBegin;
6137   PetscCall(MatGetLocalSize(mat, &m, NULL));
6138   t1 = t2 = t = 0; /* Count unique nonzeros of in Set1, Set1 and the merged respectively */
6139   i[0]        = 0;
6140   for (r = 0; r < m; r++) { /* Do row by row merging */
6141     b1 = rowBegin1[r];
6142     e1 = rowEnd1[r];
6143     b2 = rowBegin2[r];
6144     e2 = rowEnd2[r];
6145     while (b1 < e1 && b2 < e2) {
6146       if (j1[b1] == j2[b2]) { /* Same column index and hence same nonzero */
6147         j[t]      = j1[b1];
6148         imap1[t1] = t;
6149         imap2[t2] = t;
6150         b1 += jmap1[t1 + 1] - jmap1[t1]; /* Jump to next unique local nonzero */
6151         b2 += jmap2[t2 + 1] - jmap2[t2]; /* Jump to next unique remote nonzero */
6152         t1++;
6153         t2++;
6154         t++;
6155       } else if (j1[b1] < j2[b2]) {
6156         j[t]      = j1[b1];
6157         imap1[t1] = t;
6158         b1 += jmap1[t1 + 1] - jmap1[t1];
6159         t1++;
6160         t++;
6161       } else {
6162         j[t]      = j2[b2];
6163         imap2[t2] = t;
6164         b2 += jmap2[t2 + 1] - jmap2[t2];
6165         t2++;
6166         t++;
6167       }
6168     }
6169     /* Merge the remaining in either j1[] or j2[] */
6170     while (b1 < e1) {
6171       j[t]      = j1[b1];
6172       imap1[t1] = t;
6173       b1 += jmap1[t1 + 1] - jmap1[t1];
6174       t1++;
6175       t++;
6176     }
6177     while (b2 < e2) {
6178       j[t]      = j2[b2];
6179       imap2[t2] = t;
6180       b2 += jmap2[t2 + 1] - jmap2[t2];
6181       t2++;
6182       t++;
6183     }
6184     i[r + 1] = t;
6185   }
6186   PetscFunctionReturn(0);
6187 }
6188 
6189 /* Split nonzeros in a block of local rows into two subsets: those in the diagonal block and those in the off-diagonal block
6190 
6191   Input Parameters:
6192     mat: an MPI matrix that provides row and column layout information for splitting. Let's say its number of local rows is m.
6193     n,i[],j[],perm[]: there are n input entries, belonging to m rows. Row/col indices of the entries are stored in i[] and j[]
6194       respectively, along with a permutation array perm[]. Length of the i[],j[],perm[] arrays is n.
6195 
6196       i[] is already sorted, but within a row, j[] is not sorted and might have repeats.
6197       i[] might contain negative indices at the beginning, which means the corresponding entries should be ignored in the splitting.
6198 
6199   Output Parameters:
6200     j[],perm[]: the routine needs to sort j[] within each row along with perm[].
6201     rowBegin[],rowMid[],rowEnd[]: of length m, and the memory is preallocated and zeroed by the caller.
6202       They contain indices pointing to j[]. For 0<=r<m, [rowBegin[r],rowMid[r]) point to begin/end entries of row r of the diagonal block,
6203       and [rowMid[r],rowEnd[r]) point to begin/end entries of row r of the off-diagonal block.
6204 
6205     Aperm[],Ajmap[],Atot,Annz: Arrays are allocated by this routine.
6206       Atot: number of entries belonging to the diagonal block.
6207       Annz: number of unique nonzeros belonging to the diagonal block.
6208       Aperm[Atot] stores values from perm[] for entries belonging to the diagonal block. Length of Aperm[] is Atot, though it may also count
6209         repeats (i.e., same 'i,j' pair).
6210       Ajmap[Annz+1] stores the number of repeats of each unique entry belonging to the diagonal block. More precisely, Ajmap[t+1] - Ajmap[t]
6211         is the number of repeats for the t-th unique entry in the diagonal block. Ajmap[0] is always 0.
6212 
6213       Atot: number of entries belonging to the diagonal block
6214       Annz: number of unique nonzeros belonging to the diagonal block.
6215 
6216     Bperm[], Bjmap[], Btot, Bnnz are similar but for the off-diagonal block.
6217 
6218     Aperm[],Bperm[],Ajmap[] and Bjmap[] are allocated separately by this routine with PetscMalloc1().
6219 */
6220 static PetscErrorCode MatSplitEntries_Internal(Mat mat, PetscCount n, const PetscInt i[], PetscInt j[], PetscCount perm[], PetscCount rowBegin[], PetscCount rowMid[], PetscCount rowEnd[], PetscCount *Atot_, PetscCount **Aperm_, PetscCount *Annz_, PetscCount **Ajmap_, PetscCount *Btot_, PetscCount **Bperm_, PetscCount *Bnnz_, PetscCount **Bjmap_)
6221 {
6222   PetscInt    cstart, cend, rstart, rend, row, col;
6223   PetscCount  Atot = 0, Btot = 0; /* Total number of nonzeros in the diagonal and off-diagonal blocks */
6224   PetscCount  Annz = 0, Bnnz = 0; /* Number of unique nonzeros in the diagonal and off-diagonal blocks */
6225   PetscCount  k, m, p, q, r, s, mid;
6226   PetscCount *Aperm, *Bperm, *Ajmap, *Bjmap;
6227 
6228   PetscFunctionBegin;
6229   PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend));
6230   PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend));
6231   m = rend - rstart;
6232 
6233   for (k = 0; k < n; k++) {
6234     if (i[k] >= 0) break;
6235   } /* Skip negative rows */
6236 
6237   /* Process [k,n): sort and partition each local row into diag and offdiag portions,
6238      fill rowBegin[], rowMid[], rowEnd[], and count Atot, Btot, Annz, Bnnz.
6239   */
6240   while (k < n) {
6241     row = i[k];
6242     /* Entries in [k,s) are in one row. Shift diagonal block col indices so that diag is ahead of offdiag after sorting the row */
6243     for (s = k; s < n; s++)
6244       if (i[s] != row) break;
6245     for (p = k; p < s; p++) {
6246       if (j[p] >= cstart && j[p] < cend) j[p] -= PETSC_MAX_INT; /* Shift diag columns to range of [-PETSC_MAX_INT, -1]  */
6247       else PetscAssert((j[p] >= 0) && (j[p] <= mat->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column index %" PetscInt_FMT " is out of range", j[p]);
6248     }
6249     PetscCall(PetscSortIntWithCountArray(s - k, j + k, perm + k));
6250     PetscCall(PetscSortedIntUpperBound(j, k, s, -1, &mid)); /* Separate [k,s) into [k,mid) for diag and [mid,s) for offdiag */
6251     rowBegin[row - rstart] = k;
6252     rowMid[row - rstart]   = mid;
6253     rowEnd[row - rstart]   = s;
6254 
6255     /* Count nonzeros of this diag/offdiag row, which might have repeats */
6256     Atot += mid - k;
6257     Btot += s - mid;
6258 
6259     /* Count unique nonzeros of this diag/offdiag row */
6260     for (p = k; p < mid;) {
6261       col = j[p];
6262       do {
6263         j[p] += PETSC_MAX_INT;
6264         p++;
6265       } while (p < mid && j[p] == col); /* Revert the modified diagonal indices */
6266       Annz++;
6267     }
6268 
6269     for (p = mid; p < s;) {
6270       col = j[p];
6271       do {
6272         p++;
6273       } while (p < s && j[p] == col);
6274       Bnnz++;
6275     }
6276     k = s;
6277   }
6278 
6279   /* Allocation according to Atot, Btot, Annz, Bnnz */
6280   PetscCall(PetscMalloc1(Atot, &Aperm));
6281   PetscCall(PetscMalloc1(Btot, &Bperm));
6282   PetscCall(PetscMalloc1(Annz + 1, &Ajmap));
6283   PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap));
6284 
6285   /* Re-scan indices and copy diag/offdiag permutation indices to Aperm, Bperm and also fill Ajmap and Bjmap */
6286   Ajmap[0] = Bjmap[0] = Atot = Btot = Annz = Bnnz = 0;
6287   for (r = 0; r < m; r++) {
6288     k   = rowBegin[r];
6289     mid = rowMid[r];
6290     s   = rowEnd[r];
6291     PetscCall(PetscArraycpy(Aperm + Atot, perm + k, mid - k));
6292     PetscCall(PetscArraycpy(Bperm + Btot, perm + mid, s - mid));
6293     Atot += mid - k;
6294     Btot += s - mid;
6295 
6296     /* Scan column indices in this row and find out how many repeats each unique nonzero has */
6297     for (p = k; p < mid;) {
6298       col = j[p];
6299       q   = p;
6300       do {
6301         p++;
6302       } while (p < mid && j[p] == col);
6303       Ajmap[Annz + 1] = Ajmap[Annz] + (p - q);
6304       Annz++;
6305     }
6306 
6307     for (p = mid; p < s;) {
6308       col = j[p];
6309       q   = p;
6310       do {
6311         p++;
6312       } while (p < s && j[p] == col);
6313       Bjmap[Bnnz + 1] = Bjmap[Bnnz] + (p - q);
6314       Bnnz++;
6315     }
6316   }
6317   /* Output */
6318   *Aperm_ = Aperm;
6319   *Annz_  = Annz;
6320   *Atot_  = Atot;
6321   *Ajmap_ = Ajmap;
6322   *Bperm_ = Bperm;
6323   *Bnnz_  = Bnnz;
6324   *Btot_  = Btot;
6325   *Bjmap_ = Bjmap;
6326   PetscFunctionReturn(0);
6327 }
6328 
6329 /* Expand the jmap[] array to make a new one in view of nonzeros in the merged matrix
6330 
6331   Input Parameters:
6332     nnz1: number of unique nonzeros in a set that was used to produce imap[], jmap[]
6333     nnz:  number of unique nonzeros in the merged matrix
6334     imap[nnz1]: i-th nonzero in the set is the imap[i]-th nonzero in the merged matrix
6335     jmap[nnz1+1]: i-th nonzeron in the set has jmap[i+1] - jmap[i] repeats in the set
6336 
6337   Output Parameter: (memory is allocated by the caller)
6338     jmap_new[nnz+1]: i-th nonzero in the merged matrix has jmap_new[i+1] - jmap_new[i] repeats in the set
6339 
6340   Example:
6341     nnz1 = 4
6342     nnz  = 6
6343     imap = [1,3,4,5]
6344     jmap = [0,3,5,6,7]
6345    then,
6346     jmap_new = [0,0,3,3,5,6,7]
6347 */
6348 static PetscErrorCode ExpandJmap_Internal(PetscCount nnz1, PetscCount nnz, const PetscCount imap[], const PetscCount jmap[], PetscCount jmap_new[])
6349 {
6350   PetscCount k, p;
6351 
6352   PetscFunctionBegin;
6353   jmap_new[0] = 0;
6354   p           = nnz;                /* p loops over jmap_new[] backwards */
6355   for (k = nnz1 - 1; k >= 0; k--) { /* k loops over imap[] */
6356     for (; p > imap[k]; p--) jmap_new[p] = jmap[k + 1];
6357   }
6358   for (; p >= 0; p--) jmap_new[p] = jmap[0];
6359   PetscFunctionReturn(0);
6360 }
6361 
6362 PetscErrorCode MatSetPreallocationCOO_MPIAIJ(Mat mat, PetscCount coo_n, PetscInt coo_i[], PetscInt coo_j[])
6363 {
6364   MPI_Comm    comm;
6365   PetscMPIInt rank, size;
6366   PetscInt    m, n, M, N, rstart, rend, cstart, cend; /* Sizes, indices of row/col, therefore with type PetscInt */
6367   PetscCount  k, p, q, rem;                           /* Loop variables over coo arrays */
6368   Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)mat->data;
6369 
6370   PetscFunctionBegin;
6371   PetscCall(PetscFree(mpiaij->garray));
6372   PetscCall(VecDestroy(&mpiaij->lvec));
6373 #if defined(PETSC_USE_CTABLE)
6374   PetscCall(PetscHMapIDestroy(&mpiaij->colmap));
6375 #else
6376   PetscCall(PetscFree(mpiaij->colmap));
6377 #endif
6378   PetscCall(VecScatterDestroy(&mpiaij->Mvctx));
6379   mat->assembled     = PETSC_FALSE;
6380   mat->was_assembled = PETSC_FALSE;
6381   PetscCall(MatResetPreallocationCOO_MPIAIJ(mat));
6382 
6383   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
6384   PetscCallMPI(MPI_Comm_size(comm, &size));
6385   PetscCallMPI(MPI_Comm_rank(comm, &rank));
6386   PetscCall(PetscLayoutSetUp(mat->rmap));
6387   PetscCall(PetscLayoutSetUp(mat->cmap));
6388   PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend));
6389   PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend));
6390   PetscCall(MatGetLocalSize(mat, &m, &n));
6391   PetscCall(MatGetSize(mat, &M, &N));
6392 
6393   /* ---------------------------------------------------------------------------*/
6394   /* Sort (i,j) by row along with a permutation array, so that the to-be-ignored */
6395   /* entries come first, then local rows, then remote rows.                     */
6396   /* ---------------------------------------------------------------------------*/
6397   PetscCount n1 = coo_n, *perm1;
6398   PetscInt  *i1 = coo_i, *j1 = coo_j;
6399 
6400   PetscCall(PetscMalloc1(n1, &perm1));
6401   for (k = 0; k < n1; k++) perm1[k] = k;
6402 
6403   /* Manipulate indices so that entries with negative row or col indices will have smallest
6404      row indices, local entries will have greater but negative row indices, and remote entries
6405      will have positive row indices.
6406   */
6407   for (k = 0; k < n1; k++) {
6408     if (i1[k] < 0 || j1[k] < 0) i1[k] = PETSC_MIN_INT;                /* e.g., -2^31, minimal to move them ahead */
6409     else if (i1[k] >= rstart && i1[k] < rend) i1[k] -= PETSC_MAX_INT; /* e.g., minus 2^31-1 to shift local rows to range of [-PETSC_MAX_INT, -1] */
6410     else {
6411       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_USER_INPUT, "MAT_NO_OFF_PROC_ENTRIES is set but insert to remote rows");
6412       if (mpiaij->donotstash) i1[k] = PETSC_MIN_INT; /* Ignore offproc entries as if they had negative indices */
6413     }
6414   }
6415 
6416   /* Sort by row; after that, [0,k) have ignored entires, [k,rem) have local rows and [rem,n1) have remote rows */
6417   PetscCall(PetscSortIntWithIntCountArrayPair(n1, i1, j1, perm1));
6418   for (k = 0; k < n1; k++) {
6419     if (i1[k] > PETSC_MIN_INT) break;
6420   }                                                                               /* Advance k to the first entry we need to take care of */
6421   PetscCall(PetscSortedIntUpperBound(i1, k, n1, rend - 1 - PETSC_MAX_INT, &rem)); /* rem is upper bound of the last local row */
6422   for (; k < rem; k++) i1[k] += PETSC_MAX_INT;                                    /* Revert row indices of local rows*/
6423 
6424   /* ---------------------------------------------------------------------------*/
6425   /*           Split local rows into diag/offdiag portions                      */
6426   /* ---------------------------------------------------------------------------*/
6427   PetscCount *rowBegin1, *rowMid1, *rowEnd1;
6428   PetscCount *Ajmap1, *Aperm1, *Bjmap1, *Bperm1, *Cperm1;
6429   PetscCount  Annz1, Bnnz1, Atot1, Btot1;
6430 
6431   PetscCall(PetscCalloc3(m, &rowBegin1, m, &rowMid1, m, &rowEnd1));
6432   PetscCall(PetscMalloc1(n1 - rem, &Cperm1));
6433   PetscCall(MatSplitEntries_Internal(mat, rem, i1, j1, perm1, rowBegin1, rowMid1, rowEnd1, &Atot1, &Aperm1, &Annz1, &Ajmap1, &Btot1, &Bperm1, &Bnnz1, &Bjmap1));
6434 
6435   /* ---------------------------------------------------------------------------*/
6436   /*           Send remote rows to their owner                                  */
6437   /* ---------------------------------------------------------------------------*/
6438   /* Find which rows should be sent to which remote ranks*/
6439   PetscInt        nsend = 0; /* Number of MPI ranks to send data to */
6440   PetscMPIInt    *sendto;    /* [nsend], storing remote ranks */
6441   PetscInt       *nentries;  /* [nsend], storing number of entries sent to remote ranks; Assume PetscInt is big enough for this count, and error if not */
6442   const PetscInt *ranges;
6443   PetscInt        maxNsend = size >= 128 ? 128 : size; /* Assume max 128 neighbors; realloc when needed */
6444 
6445   PetscCall(PetscLayoutGetRanges(mat->rmap, &ranges));
6446   PetscCall(PetscMalloc2(maxNsend, &sendto, maxNsend, &nentries));
6447   for (k = rem; k < n1;) {
6448     PetscMPIInt owner;
6449     PetscInt    firstRow, lastRow;
6450 
6451     /* Locate a row range */
6452     firstRow = i1[k]; /* first row of this owner */
6453     PetscCall(PetscLayoutFindOwner(mat->rmap, firstRow, &owner));
6454     lastRow = ranges[owner + 1] - 1; /* last row of this owner */
6455 
6456     /* Find the first index 'p' in [k,n) with i[p] belonging to next owner */
6457     PetscCall(PetscSortedIntUpperBound(i1, k, n1, lastRow, &p));
6458 
6459     /* All entries in [k,p) belong to this remote owner */
6460     if (nsend >= maxNsend) { /* Double the remote ranks arrays if not long enough */
6461       PetscMPIInt *sendto2;
6462       PetscInt    *nentries2;
6463       PetscInt     maxNsend2 = (maxNsend <= size / 2) ? maxNsend * 2 : size;
6464 
6465       PetscCall(PetscMalloc2(maxNsend2, &sendto2, maxNsend2, &nentries2));
6466       PetscCall(PetscArraycpy(sendto2, sendto, maxNsend));
6467       PetscCall(PetscArraycpy(nentries2, nentries2, maxNsend + 1));
6468       PetscCall(PetscFree2(sendto, nentries2));
6469       sendto   = sendto2;
6470       nentries = nentries2;
6471       maxNsend = maxNsend2;
6472     }
6473     sendto[nsend]   = owner;
6474     nentries[nsend] = p - k;
6475     PetscCall(PetscCountCast(p - k, &nentries[nsend]));
6476     nsend++;
6477     k = p;
6478   }
6479 
6480   /* Build 1st SF to know offsets on remote to send data */
6481   PetscSF      sf1;
6482   PetscInt     nroots = 1, nroots2 = 0;
6483   PetscInt     nleaves = nsend, nleaves2 = 0;
6484   PetscInt    *offsets;
6485   PetscSFNode *iremote;
6486 
6487   PetscCall(PetscSFCreate(comm, &sf1));
6488   PetscCall(PetscMalloc1(nsend, &iremote));
6489   PetscCall(PetscMalloc1(nsend, &offsets));
6490   for (k = 0; k < nsend; k++) {
6491     iremote[k].rank  = sendto[k];
6492     iremote[k].index = 0;
6493     nleaves2 += nentries[k];
6494     PetscCheck(nleaves2 >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF leaves is too large for PetscInt");
6495   }
6496   PetscCall(PetscSFSetGraph(sf1, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
6497   PetscCall(PetscSFFetchAndOpWithMemTypeBegin(sf1, MPIU_INT, PETSC_MEMTYPE_HOST, &nroots2 /*rootdata*/, PETSC_MEMTYPE_HOST, nentries /*leafdata*/, PETSC_MEMTYPE_HOST, offsets /*leafupdate*/, MPI_SUM));
6498   PetscCall(PetscSFFetchAndOpEnd(sf1, MPIU_INT, &nroots2, nentries, offsets, MPI_SUM)); /* Would nroots2 overflow, we check offsets[] below */
6499   PetscCall(PetscSFDestroy(&sf1));
6500   PetscAssert(nleaves2 == n1 - rem, PETSC_COMM_SELF, PETSC_ERR_PLIB, "nleaves2 %" PetscInt_FMT " != number of remote entries %" PetscCount_FMT "", nleaves2, n1 - rem);
6501 
6502   /* Build 2nd SF to send remote COOs to their owner */
6503   PetscSF sf2;
6504   nroots  = nroots2;
6505   nleaves = nleaves2;
6506   PetscCall(PetscSFCreate(comm, &sf2));
6507   PetscCall(PetscSFSetFromOptions(sf2));
6508   PetscCall(PetscMalloc1(nleaves, &iremote));
6509   p = 0;
6510   for (k = 0; k < nsend; k++) {
6511     PetscCheck(offsets[k] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF roots is too large for PetscInt");
6512     for (q = 0; q < nentries[k]; q++, p++) {
6513       iremote[p].rank  = sendto[k];
6514       iremote[p].index = offsets[k] + q;
6515     }
6516   }
6517   PetscCall(PetscSFSetGraph(sf2, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
6518 
6519   /* sf2 only sends contiguous leafdata to contiguous rootdata. We record the permutation which will be used to fill leafdata */
6520   PetscCall(PetscArraycpy(Cperm1, perm1 + rem, n1 - rem));
6521 
6522   /* Send the remote COOs to their owner */
6523   PetscInt    n2 = nroots, *i2, *j2; /* Buffers for received COOs from other ranks, along with a permutation array */
6524   PetscCount *perm2;                 /* Though PetscInt is enough for remote entries, we use PetscCount here as we want to reuse MatSplitEntries_Internal() */
6525   PetscCall(PetscMalloc3(n2, &i2, n2, &j2, n2, &perm2));
6526   PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, i1 + rem, PETSC_MEMTYPE_HOST, i2, MPI_REPLACE));
6527   PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, i1 + rem, i2, MPI_REPLACE));
6528   PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, j1 + rem, PETSC_MEMTYPE_HOST, j2, MPI_REPLACE));
6529   PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, j1 + rem, j2, MPI_REPLACE));
6530 
6531   PetscCall(PetscFree(offsets));
6532   PetscCall(PetscFree2(sendto, nentries));
6533 
6534   /* ---------------------------------------------------------------*/
6535   /* Sort received COOs by row along with the permutation array     */
6536   /* ---------------------------------------------------------------*/
6537   for (k = 0; k < n2; k++) perm2[k] = k;
6538   PetscCall(PetscSortIntWithIntCountArrayPair(n2, i2, j2, perm2));
6539 
6540   /* ---------------------------------------------------------------*/
6541   /* Split received COOs into diag/offdiag portions                 */
6542   /* ---------------------------------------------------------------*/
6543   PetscCount *rowBegin2, *rowMid2, *rowEnd2;
6544   PetscCount *Ajmap2, *Aperm2, *Bjmap2, *Bperm2;
6545   PetscCount  Annz2, Bnnz2, Atot2, Btot2;
6546 
6547   PetscCall(PetscCalloc3(m, &rowBegin2, m, &rowMid2, m, &rowEnd2));
6548   PetscCall(MatSplitEntries_Internal(mat, n2, i2, j2, perm2, rowBegin2, rowMid2, rowEnd2, &Atot2, &Aperm2, &Annz2, &Ajmap2, &Btot2, &Bperm2, &Bnnz2, &Bjmap2));
6549 
6550   /* --------------------------------------------------------------------------*/
6551   /* Merge local COOs with received COOs: diag with diag, offdiag with offdiag */
6552   /* --------------------------------------------------------------------------*/
6553   PetscInt *Ai, *Bi;
6554   PetscInt *Aj, *Bj;
6555 
6556   PetscCall(PetscMalloc1(m + 1, &Ai));
6557   PetscCall(PetscMalloc1(m + 1, &Bi));
6558   PetscCall(PetscMalloc1(Annz1 + Annz2, &Aj)); /* Since local and remote entries might have dups, we might allocate excess memory */
6559   PetscCall(PetscMalloc1(Bnnz1 + Bnnz2, &Bj));
6560 
6561   PetscCount *Aimap1, *Bimap1, *Aimap2, *Bimap2;
6562   PetscCall(PetscMalloc1(Annz1, &Aimap1));
6563   PetscCall(PetscMalloc1(Bnnz1, &Bimap1));
6564   PetscCall(PetscMalloc1(Annz2, &Aimap2));
6565   PetscCall(PetscMalloc1(Bnnz2, &Bimap2));
6566 
6567   PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowBegin1, rowMid1, rowBegin2, rowMid2, Ajmap1, Ajmap2, Aimap1, Aimap2, Ai, Aj));
6568   PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowMid1, rowEnd1, rowMid2, rowEnd2, Bjmap1, Bjmap2, Bimap1, Bimap2, Bi, Bj));
6569 
6570   /* --------------------------------------------------------------------------*/
6571   /* Expand Ajmap1/Bjmap1 to make them based off nonzeros in A/B, since we     */
6572   /* expect nonzeros in A/B most likely have local contributing entries        */
6573   /* --------------------------------------------------------------------------*/
6574   PetscInt    Annz = Ai[m];
6575   PetscInt    Bnnz = Bi[m];
6576   PetscCount *Ajmap1_new, *Bjmap1_new;
6577 
6578   PetscCall(PetscMalloc1(Annz + 1, &Ajmap1_new));
6579   PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap1_new));
6580 
6581   PetscCall(ExpandJmap_Internal(Annz1, Annz, Aimap1, Ajmap1, Ajmap1_new));
6582   PetscCall(ExpandJmap_Internal(Bnnz1, Bnnz, Bimap1, Bjmap1, Bjmap1_new));
6583 
6584   PetscCall(PetscFree(Aimap1));
6585   PetscCall(PetscFree(Ajmap1));
6586   PetscCall(PetscFree(Bimap1));
6587   PetscCall(PetscFree(Bjmap1));
6588   PetscCall(PetscFree3(rowBegin1, rowMid1, rowEnd1));
6589   PetscCall(PetscFree3(rowBegin2, rowMid2, rowEnd2));
6590   PetscCall(PetscFree(perm1));
6591   PetscCall(PetscFree3(i2, j2, perm2));
6592 
6593   Ajmap1 = Ajmap1_new;
6594   Bjmap1 = Bjmap1_new;
6595 
6596   /* Reallocate Aj, Bj once we know actual numbers of unique nonzeros in A and B */
6597   if (Annz < Annz1 + Annz2) {
6598     PetscInt *Aj_new;
6599     PetscCall(PetscMalloc1(Annz, &Aj_new));
6600     PetscCall(PetscArraycpy(Aj_new, Aj, Annz));
6601     PetscCall(PetscFree(Aj));
6602     Aj = Aj_new;
6603   }
6604 
6605   if (Bnnz < Bnnz1 + Bnnz2) {
6606     PetscInt *Bj_new;
6607     PetscCall(PetscMalloc1(Bnnz, &Bj_new));
6608     PetscCall(PetscArraycpy(Bj_new, Bj, Bnnz));
6609     PetscCall(PetscFree(Bj));
6610     Bj = Bj_new;
6611   }
6612 
6613   /* --------------------------------------------------------------------------------*/
6614   /* Create new submatrices for on-process and off-process coupling                  */
6615   /* --------------------------------------------------------------------------------*/
6616   PetscScalar *Aa, *Ba;
6617   MatType      rtype;
6618   Mat_SeqAIJ  *a, *b;
6619   PetscCall(PetscCalloc1(Annz, &Aa)); /* Zero matrix on device */
6620   PetscCall(PetscCalloc1(Bnnz, &Ba));
6621   /* make Aj[] local, i.e, based off the start column of the diagonal portion */
6622   if (cstart) {
6623     for (k = 0; k < Annz; k++) Aj[k] -= cstart;
6624   }
6625   PetscCall(MatDestroy(&mpiaij->A));
6626   PetscCall(MatDestroy(&mpiaij->B));
6627   PetscCall(MatGetRootType_Private(mat, &rtype));
6628   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, Ai, Aj, Aa, &mpiaij->A));
6629   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, mat->cmap->N, Bi, Bj, Ba, &mpiaij->B));
6630   PetscCall(MatSetUpMultiply_MPIAIJ(mat));
6631 
6632   a               = (Mat_SeqAIJ *)mpiaij->A->data;
6633   b               = (Mat_SeqAIJ *)mpiaij->B->data;
6634   a->singlemalloc = b->singlemalloc = PETSC_FALSE; /* Let newmat own Ai,Aj,Aa,Bi,Bj,Ba */
6635   a->free_a = b->free_a = PETSC_TRUE;
6636   a->free_ij = b->free_ij = PETSC_TRUE;
6637 
6638   /* conversion must happen AFTER multiply setup */
6639   PetscCall(MatConvert(mpiaij->A, rtype, MAT_INPLACE_MATRIX, &mpiaij->A));
6640   PetscCall(MatConvert(mpiaij->B, rtype, MAT_INPLACE_MATRIX, &mpiaij->B));
6641   PetscCall(VecDestroy(&mpiaij->lvec));
6642   PetscCall(MatCreateVecs(mpiaij->B, &mpiaij->lvec, NULL));
6643 
6644   mpiaij->coo_n   = coo_n;
6645   mpiaij->coo_sf  = sf2;
6646   mpiaij->sendlen = nleaves;
6647   mpiaij->recvlen = nroots;
6648 
6649   mpiaij->Annz = Annz;
6650   mpiaij->Bnnz = Bnnz;
6651 
6652   mpiaij->Annz2 = Annz2;
6653   mpiaij->Bnnz2 = Bnnz2;
6654 
6655   mpiaij->Atot1 = Atot1;
6656   mpiaij->Atot2 = Atot2;
6657   mpiaij->Btot1 = Btot1;
6658   mpiaij->Btot2 = Btot2;
6659 
6660   mpiaij->Ajmap1 = Ajmap1;
6661   mpiaij->Aperm1 = Aperm1;
6662 
6663   mpiaij->Bjmap1 = Bjmap1;
6664   mpiaij->Bperm1 = Bperm1;
6665 
6666   mpiaij->Aimap2 = Aimap2;
6667   mpiaij->Ajmap2 = Ajmap2;
6668   mpiaij->Aperm2 = Aperm2;
6669 
6670   mpiaij->Bimap2 = Bimap2;
6671   mpiaij->Bjmap2 = Bjmap2;
6672   mpiaij->Bperm2 = Bperm2;
6673 
6674   mpiaij->Cperm1 = Cperm1;
6675 
6676   /* Allocate in preallocation. If not used, it has zero cost on host */
6677   PetscCall(PetscMalloc2(mpiaij->sendlen, &mpiaij->sendbuf, mpiaij->recvlen, &mpiaij->recvbuf));
6678   PetscFunctionReturn(0);
6679 }
6680 
6681 static PetscErrorCode MatSetValuesCOO_MPIAIJ(Mat mat, const PetscScalar v[], InsertMode imode)
6682 {
6683   Mat_MPIAIJ       *mpiaij = (Mat_MPIAIJ *)mat->data;
6684   Mat               A = mpiaij->A, B = mpiaij->B;
6685   PetscCount        Annz = mpiaij->Annz, Annz2 = mpiaij->Annz2, Bnnz = mpiaij->Bnnz, Bnnz2 = mpiaij->Bnnz2;
6686   PetscScalar      *Aa, *Ba;
6687   PetscScalar      *sendbuf = mpiaij->sendbuf;
6688   PetscScalar      *recvbuf = mpiaij->recvbuf;
6689   const PetscCount *Ajmap1 = mpiaij->Ajmap1, *Ajmap2 = mpiaij->Ajmap2, *Aimap2 = mpiaij->Aimap2;
6690   const PetscCount *Bjmap1 = mpiaij->Bjmap1, *Bjmap2 = mpiaij->Bjmap2, *Bimap2 = mpiaij->Bimap2;
6691   const PetscCount *Aperm1 = mpiaij->Aperm1, *Aperm2 = mpiaij->Aperm2, *Bperm1 = mpiaij->Bperm1, *Bperm2 = mpiaij->Bperm2;
6692   const PetscCount *Cperm1 = mpiaij->Cperm1;
6693 
6694   PetscFunctionBegin;
6695   PetscCall(MatSeqAIJGetArray(A, &Aa)); /* Might read and write matrix values */
6696   PetscCall(MatSeqAIJGetArray(B, &Ba));
6697 
6698   /* Pack entries to be sent to remote */
6699   for (PetscCount i = 0; i < mpiaij->sendlen; i++) sendbuf[i] = v[Cperm1[i]];
6700 
6701   /* Send remote entries to their owner and overlap the communication with local computation */
6702   PetscCall(PetscSFReduceWithMemTypeBegin(mpiaij->coo_sf, MPIU_SCALAR, PETSC_MEMTYPE_HOST, sendbuf, PETSC_MEMTYPE_HOST, recvbuf, MPI_REPLACE));
6703   /* Add local entries to A and B */
6704   for (PetscCount i = 0; i < Annz; i++) { /* All nonzeros in A are either zero'ed or added with a value (i.e., initialized) */
6705     PetscScalar sum = 0.0;                /* Do partial summation first to improve numerical stablility */
6706     for (PetscCount k = Ajmap1[i]; k < Ajmap1[i + 1]; k++) sum += v[Aperm1[k]];
6707     Aa[i] = (imode == INSERT_VALUES ? 0.0 : Aa[i]) + sum;
6708   }
6709   for (PetscCount i = 0; i < Bnnz; i++) {
6710     PetscScalar sum = 0.0;
6711     for (PetscCount k = Bjmap1[i]; k < Bjmap1[i + 1]; k++) sum += v[Bperm1[k]];
6712     Ba[i] = (imode == INSERT_VALUES ? 0.0 : Ba[i]) + sum;
6713   }
6714   PetscCall(PetscSFReduceEnd(mpiaij->coo_sf, MPIU_SCALAR, sendbuf, recvbuf, MPI_REPLACE));
6715 
6716   /* Add received remote entries to A and B */
6717   for (PetscCount i = 0; i < Annz2; i++) {
6718     for (PetscCount k = Ajmap2[i]; k < Ajmap2[i + 1]; k++) Aa[Aimap2[i]] += recvbuf[Aperm2[k]];
6719   }
6720   for (PetscCount i = 0; i < Bnnz2; i++) {
6721     for (PetscCount k = Bjmap2[i]; k < Bjmap2[i + 1]; k++) Ba[Bimap2[i]] += recvbuf[Bperm2[k]];
6722   }
6723   PetscCall(MatSeqAIJRestoreArray(A, &Aa));
6724   PetscCall(MatSeqAIJRestoreArray(B, &Ba));
6725   PetscFunctionReturn(0);
6726 }
6727 
6728 /* ----------------------------------------------------------------*/
6729 
6730 /*MC
6731    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
6732 
6733    Options Database Keys:
6734 . -mat_type mpiaij - sets the matrix type to `MATMPIAIJ` during a call to `MatSetFromOptions()`
6735 
6736    Level: beginner
6737 
6738    Notes:
6739     `MatSetValues()` may be called for this matrix type with a NULL argument for the numerical values,
6740     in this case the values associated with the rows and columns one passes in are set to zero
6741     in the matrix
6742 
6743     `MatSetOptions`(,`MAT_STRUCTURE_ONLY`,`PETSC_TRUE`) may be called for this matrix type. In this no
6744     space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored
6745 
6746 .seealso: `MATSEQAIJ`, `MATAIJ`, `MatCreateAIJ()`
6747 M*/
6748 
6749 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
6750 {
6751   Mat_MPIAIJ *b;
6752   PetscMPIInt size;
6753 
6754   PetscFunctionBegin;
6755   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
6756 
6757   PetscCall(PetscNew(&b));
6758   B->data = (void *)b;
6759   PetscCall(PetscMemcpy(B->ops, &MatOps_Values, sizeof(struct _MatOps)));
6760   B->assembled  = PETSC_FALSE;
6761   B->insertmode = NOT_SET_VALUES;
6762   b->size       = size;
6763 
6764   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank));
6765 
6766   /* build cache for off array entries formed */
6767   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash));
6768 
6769   b->donotstash  = PETSC_FALSE;
6770   b->colmap      = NULL;
6771   b->garray      = NULL;
6772   b->roworiented = PETSC_TRUE;
6773 
6774   /* stuff used for matrix vector multiply */
6775   b->lvec  = NULL;
6776   b->Mvctx = NULL;
6777 
6778   /* stuff for MatGetRow() */
6779   b->rowindices   = NULL;
6780   b->rowvalues    = NULL;
6781   b->getrowactive = PETSC_FALSE;
6782 
6783   /* flexible pointer used in CUSPARSE classes */
6784   b->spptr = NULL;
6785 
6786   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetUseScalableIncreaseOverlap_C", MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ));
6787   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPIAIJ));
6788   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPIAIJ));
6789   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatIsTranspose_C", MatIsTranspose_MPIAIJ));
6790   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocation_C", MatMPIAIJSetPreallocation_MPIAIJ));
6791   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatResetPreallocation_C", MatResetPreallocation_MPIAIJ));
6792   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocationCSR_C", MatMPIAIJSetPreallocationCSR_MPIAIJ));
6793   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPIAIJ));
6794   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijperm_C", MatConvert_MPIAIJ_MPIAIJPERM));
6795   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijsell_C", MatConvert_MPIAIJ_MPIAIJSELL));
6796 #if defined(PETSC_HAVE_CUDA)
6797   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcusparse_C", MatConvert_MPIAIJ_MPIAIJCUSPARSE));
6798 #endif
6799 #if defined(PETSC_HAVE_HIP)
6800   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijhipsparse_C", MatConvert_MPIAIJ_MPIAIJHIPSPARSE));
6801 #endif
6802 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
6803   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijkokkos_C", MatConvert_MPIAIJ_MPIAIJKokkos));
6804 #endif
6805 #if defined(PETSC_HAVE_MKL_SPARSE)
6806   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijmkl_C", MatConvert_MPIAIJ_MPIAIJMKL));
6807 #endif
6808   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcrl_C", MatConvert_MPIAIJ_MPIAIJCRL));
6809   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpibaij_C", MatConvert_MPIAIJ_MPIBAIJ));
6810   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisbaij_C", MatConvert_MPIAIJ_MPISBAIJ));
6811   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense));
6812 #if defined(PETSC_HAVE_ELEMENTAL)
6813   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_elemental_C", MatConvert_MPIAIJ_Elemental));
6814 #endif
6815 #if defined(PETSC_HAVE_SCALAPACK)
6816   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_scalapack_C", MatConvert_AIJ_ScaLAPACK));
6817 #endif
6818   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_is_C", MatConvert_XAIJ_IS));
6819   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisell_C", MatConvert_MPIAIJ_MPISELL));
6820 #if defined(PETSC_HAVE_HYPRE)
6821   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_hypre_C", MatConvert_AIJ_HYPRE));
6822   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", MatProductSetFromOptions_Transpose_AIJ_AIJ));
6823 #endif
6824   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_is_mpiaij_C", MatProductSetFromOptions_IS_XAIJ));
6825   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpiaij_C", MatProductSetFromOptions_MPIAIJ));
6826   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetPreallocationCOO_C", MatSetPreallocationCOO_MPIAIJ));
6827   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetValuesCOO_C", MatSetValuesCOO_MPIAIJ));
6828   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIAIJ));
6829   PetscFunctionReturn(0);
6830 }
6831 
6832 /*@C
6833      MatCreateMPIAIJWithSplitArrays - creates a `MATMPIAIJ` matrix using arrays that contain the "diagonal"
6834          and "off-diagonal" part of the matrix in CSR format.
6835 
6836    Collective
6837 
6838    Input Parameters:
6839 +  comm - MPI communicator
6840 .  m - number of local rows (Cannot be `PETSC_DECIDE`)
6841 .  n - This value should be the same as the local size used in creating the
6842        x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have
6843        calculated if N is given) For square matrices n is almost always m.
6844 .  M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
6845 .  N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
6846 .   i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
6847 .   j - column indices, which must be local, i.e., based off the start column of the diagonal portion
6848 .   a - matrix values
6849 .   oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix
6850 .   oj - column indices, which must be global, representing global columns in the MPIAIJ matrix
6851 -   oa - matrix values
6852 
6853    Output Parameter:
6854 .   mat - the matrix
6855 
6856    Level: advanced
6857 
6858    Notes:
6859        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
6860        must free the arrays once the matrix has been destroyed and not before.
6861 
6862        The i and j indices are 0 based
6863 
6864        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
6865 
6866        This sets local rows and cannot be used to set off-processor values.
6867 
6868        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
6869        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
6870        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
6871        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
6872        keep track of the underlying array. Use `MatSetOption`(A,`MAT_NO_OFF_PROC_ENTRIES`,`PETSC_TRUE`) to disable all
6873        communication if it is known that only local entries will be set.
6874 
6875 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
6876           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithArrays()`
6877 @*/
6878 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt i[], PetscInt j[], PetscScalar a[], PetscInt oi[], PetscInt oj[], PetscScalar oa[], Mat *mat)
6879 {
6880   Mat_MPIAIJ *maij;
6881 
6882   PetscFunctionBegin;
6883   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
6884   PetscCheck(i[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
6885   PetscCheck(oi[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "oi (row indices) must start with 0");
6886   PetscCall(MatCreate(comm, mat));
6887   PetscCall(MatSetSizes(*mat, m, n, M, N));
6888   PetscCall(MatSetType(*mat, MATMPIAIJ));
6889   maij = (Mat_MPIAIJ *)(*mat)->data;
6890 
6891   (*mat)->preallocated = PETSC_TRUE;
6892 
6893   PetscCall(PetscLayoutSetUp((*mat)->rmap));
6894   PetscCall(PetscLayoutSetUp((*mat)->cmap));
6895 
6896   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, i, j, a, &maij->A));
6897   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, (*mat)->cmap->N, oi, oj, oa, &maij->B));
6898 
6899   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
6900   PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY));
6901   PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY));
6902   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE));
6903   PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
6904   PetscFunctionReturn(0);
6905 }
6906 
6907 typedef struct {
6908   Mat       *mp;    /* intermediate products */
6909   PetscBool *mptmp; /* is the intermediate product temporary ? */
6910   PetscInt   cp;    /* number of intermediate products */
6911 
6912   /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */
6913   PetscInt    *startsj_s, *startsj_r;
6914   PetscScalar *bufa;
6915   Mat          P_oth;
6916 
6917   /* may take advantage of merging product->B */
6918   Mat Bloc; /* B-local by merging diag and off-diag */
6919 
6920   /* cusparse does not have support to split between symbolic and numeric phases.
6921      When api_user is true, we don't need to update the numerical values
6922      of the temporary storage */
6923   PetscBool reusesym;
6924 
6925   /* support for COO values insertion */
6926   PetscScalar *coo_v, *coo_w; /* store on-process and off-process COO scalars, and used as MPI recv/send buffers respectively */
6927   PetscInt   **own;           /* own[i] points to address of on-process COO indices for Mat mp[i] */
6928   PetscInt   **off;           /* off[i] points to address of off-process COO indices for Mat mp[i] */
6929   PetscBool    hasoffproc;    /* if true, have off-process values insertion (i.e. AtB or PtAP) */
6930   PetscSF      sf;            /* used for non-local values insertion and memory malloc */
6931   PetscMemType mtype;
6932 
6933   /* customization */
6934   PetscBool abmerge;
6935   PetscBool P_oth_bind;
6936 } MatMatMPIAIJBACKEND;
6937 
6938 PetscErrorCode MatDestroy_MatMatMPIAIJBACKEND(void *data)
6939 {
6940   MatMatMPIAIJBACKEND *mmdata = (MatMatMPIAIJBACKEND *)data;
6941   PetscInt             i;
6942 
6943   PetscFunctionBegin;
6944   PetscCall(PetscFree2(mmdata->startsj_s, mmdata->startsj_r));
6945   PetscCall(PetscFree(mmdata->bufa));
6946   PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_v));
6947   PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_w));
6948   PetscCall(MatDestroy(&mmdata->P_oth));
6949   PetscCall(MatDestroy(&mmdata->Bloc));
6950   PetscCall(PetscSFDestroy(&mmdata->sf));
6951   for (i = 0; i < mmdata->cp; i++) PetscCall(MatDestroy(&mmdata->mp[i]));
6952   PetscCall(PetscFree2(mmdata->mp, mmdata->mptmp));
6953   PetscCall(PetscFree(mmdata->own[0]));
6954   PetscCall(PetscFree(mmdata->own));
6955   PetscCall(PetscFree(mmdata->off[0]));
6956   PetscCall(PetscFree(mmdata->off));
6957   PetscCall(PetscFree(mmdata));
6958   PetscFunctionReturn(0);
6959 }
6960 
6961 /* Copy selected n entries with indices in idx[] of A to v[].
6962    If idx is NULL, copy the whole data array of A to v[]
6963  */
6964 static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[])
6965 {
6966   PetscErrorCode (*f)(Mat, PetscInt, const PetscInt[], PetscScalar[]);
6967 
6968   PetscFunctionBegin;
6969   PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqAIJCopySubArray_C", &f));
6970   if (f) {
6971     PetscCall((*f)(A, n, idx, v));
6972   } else {
6973     const PetscScalar *vv;
6974 
6975     PetscCall(MatSeqAIJGetArrayRead(A, &vv));
6976     if (n && idx) {
6977       PetscScalar    *w  = v;
6978       const PetscInt *oi = idx;
6979       PetscInt        j;
6980 
6981       for (j = 0; j < n; j++) *w++ = vv[*oi++];
6982     } else {
6983       PetscCall(PetscArraycpy(v, vv, n));
6984     }
6985     PetscCall(MatSeqAIJRestoreArrayRead(A, &vv));
6986   }
6987   PetscFunctionReturn(0);
6988 }
6989 
6990 static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C)
6991 {
6992   MatMatMPIAIJBACKEND *mmdata;
6993   PetscInt             i, n_d, n_o;
6994 
6995   PetscFunctionBegin;
6996   MatCheckProduct(C, 1);
6997   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
6998   mmdata = (MatMatMPIAIJBACKEND *)C->product->data;
6999   if (!mmdata->reusesym) { /* update temporary matrices */
7000     if (mmdata->P_oth) PetscCall(MatGetBrowsOfAoCols_MPIAIJ(C->product->A, C->product->B, MAT_REUSE_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth));
7001     if (mmdata->Bloc) PetscCall(MatMPIAIJGetLocalMatMerge(C->product->B, MAT_REUSE_MATRIX, NULL, &mmdata->Bloc));
7002   }
7003   mmdata->reusesym = PETSC_FALSE;
7004 
7005   for (i = 0; i < mmdata->cp; i++) {
7006     PetscCheck(mmdata->mp[i]->ops->productnumeric, PetscObjectComm((PetscObject)mmdata->mp[i]), PETSC_ERR_PLIB, "Missing numeric op for %s", MatProductTypes[mmdata->mp[i]->product->type]);
7007     PetscCall((*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i]));
7008   }
7009   for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) {
7010     PetscInt noff = mmdata->off[i + 1] - mmdata->off[i];
7011 
7012     if (mmdata->mptmp[i]) continue;
7013     if (noff) {
7014       PetscInt nown = mmdata->own[i + 1] - mmdata->own[i];
7015 
7016       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], noff, mmdata->off[i], mmdata->coo_w + n_o));
7017       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], nown, mmdata->own[i], mmdata->coo_v + n_d));
7018       n_o += noff;
7019       n_d += nown;
7020     } else {
7021       Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mmdata->mp[i]->data;
7022 
7023       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], mm->nz, NULL, mmdata->coo_v + n_d));
7024       n_d += mm->nz;
7025     }
7026   }
7027   if (mmdata->hasoffproc) { /* offprocess insertion */
7028     PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d));
7029     PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d));
7030   }
7031   PetscCall(MatSetValuesCOO(C, mmdata->coo_v, INSERT_VALUES));
7032   PetscFunctionReturn(0);
7033 }
7034 
7035 /* Support for Pt * A, A * P, or Pt * A * P */
7036 #define MAX_NUMBER_INTERMEDIATE 4
7037 PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C)
7038 {
7039   Mat_Product           *product = C->product;
7040   Mat                    A, P, mp[MAX_NUMBER_INTERMEDIATE]; /* A, P and a series of intermediate matrices */
7041   Mat_MPIAIJ            *a, *p;
7042   MatMatMPIAIJBACKEND   *mmdata;
7043   ISLocalToGlobalMapping P_oth_l2g = NULL;
7044   IS                     glob      = NULL;
7045   const char            *prefix;
7046   char                   pprefix[256];
7047   const PetscInt        *globidx, *P_oth_idx;
7048   PetscInt               i, j, cp, m, n, M, N, *coo_i, *coo_j;
7049   PetscCount             ncoo, ncoo_d, ncoo_o, ncoo_oown;
7050   PetscInt               cmapt[MAX_NUMBER_INTERMEDIATE], rmapt[MAX_NUMBER_INTERMEDIATE]; /* col/row map type for each Mat in mp[]. */
7051                                                                                          /* type-0: consecutive, start from 0; type-1: consecutive with */
7052                                                                                          /* a base offset; type-2: sparse with a local to global map table */
7053   const PetscInt *cmapa[MAX_NUMBER_INTERMEDIATE], *rmapa[MAX_NUMBER_INTERMEDIATE];       /* col/row local to global map array (table) for type-2 map type */
7054 
7055   MatProductType ptype;
7056   PetscBool      mptmp[MAX_NUMBER_INTERMEDIATE], hasoffproc = PETSC_FALSE, iscuda, iship, iskokk;
7057   PetscMPIInt    size;
7058 
7059   PetscFunctionBegin;
7060   MatCheckProduct(C, 1);
7061   PetscCheck(!product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
7062   ptype = product->type;
7063   if (product->A->symmetric == PETSC_BOOL3_TRUE && ptype == MATPRODUCT_AtB) {
7064     ptype                                          = MATPRODUCT_AB;
7065     product->symbolic_used_the_fact_A_is_symmetric = PETSC_TRUE;
7066   }
7067   switch (ptype) {
7068   case MATPRODUCT_AB:
7069     A          = product->A;
7070     P          = product->B;
7071     m          = A->rmap->n;
7072     n          = P->cmap->n;
7073     M          = A->rmap->N;
7074     N          = P->cmap->N;
7075     hasoffproc = PETSC_FALSE; /* will not scatter mat product values to other processes */
7076     break;
7077   case MATPRODUCT_AtB:
7078     P          = product->A;
7079     A          = product->B;
7080     m          = P->cmap->n;
7081     n          = A->cmap->n;
7082     M          = P->cmap->N;
7083     N          = A->cmap->N;
7084     hasoffproc = PETSC_TRUE;
7085     break;
7086   case MATPRODUCT_PtAP:
7087     A          = product->A;
7088     P          = product->B;
7089     m          = P->cmap->n;
7090     n          = P->cmap->n;
7091     M          = P->cmap->N;
7092     N          = P->cmap->N;
7093     hasoffproc = PETSC_TRUE;
7094     break;
7095   default:
7096     SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]);
7097   }
7098   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)C), &size));
7099   if (size == 1) hasoffproc = PETSC_FALSE;
7100 
7101   /* defaults */
7102   for (i = 0; i < MAX_NUMBER_INTERMEDIATE; i++) {
7103     mp[i]    = NULL;
7104     mptmp[i] = PETSC_FALSE;
7105     rmapt[i] = -1;
7106     cmapt[i] = -1;
7107     rmapa[i] = NULL;
7108     cmapa[i] = NULL;
7109   }
7110 
7111   /* customization */
7112   PetscCall(PetscNew(&mmdata));
7113   mmdata->reusesym = product->api_user;
7114   if (ptype == MATPRODUCT_AB) {
7115     if (product->api_user) {
7116       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatMult", "Mat");
7117       PetscCall(PetscOptionsBool("-matmatmult_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL));
7118       PetscCall(PetscOptionsBool("-matmatmult_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7119       PetscOptionsEnd();
7120     } else {
7121       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_AB", "Mat");
7122       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL));
7123       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7124       PetscOptionsEnd();
7125     }
7126   } else if (ptype == MATPRODUCT_PtAP) {
7127     if (product->api_user) {
7128       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatPtAP", "Mat");
7129       PetscCall(PetscOptionsBool("-matptap_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7130       PetscOptionsEnd();
7131     } else {
7132       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_PtAP", "Mat");
7133       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7134       PetscOptionsEnd();
7135     }
7136   }
7137   a = (Mat_MPIAIJ *)A->data;
7138   p = (Mat_MPIAIJ *)P->data;
7139   PetscCall(MatSetSizes(C, m, n, M, N));
7140   PetscCall(PetscLayoutSetUp(C->rmap));
7141   PetscCall(PetscLayoutSetUp(C->cmap));
7142   PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
7143   PetscCall(MatGetOptionsPrefix(C, &prefix));
7144 
7145   cp = 0;
7146   switch (ptype) {
7147   case MATPRODUCT_AB: /* A * P */
7148     PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth));
7149 
7150     /* A_diag * P_local (merged or not) */
7151     if (mmdata->abmerge) { /* P's diagonal and off-diag blocks are merged to one matrix, then multiplied by A_diag */
7152       /* P is product->B */
7153       PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc));
7154       PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp]));
7155       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7156       PetscCall(MatProductSetFill(mp[cp], product->fill));
7157       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7158       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7159       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7160       mp[cp]->product->api_user = product->api_user;
7161       PetscCall(MatProductSetFromOptions(mp[cp]));
7162       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7163       PetscCall(ISGetIndices(glob, &globidx));
7164       rmapt[cp] = 1;
7165       cmapt[cp] = 2;
7166       cmapa[cp] = globidx;
7167       mptmp[cp] = PETSC_FALSE;
7168       cp++;
7169     } else { /* A_diag * P_diag and A_diag * P_off */
7170       PetscCall(MatProductCreate(a->A, p->A, NULL, &mp[cp]));
7171       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7172       PetscCall(MatProductSetFill(mp[cp], product->fill));
7173       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7174       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7175       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7176       mp[cp]->product->api_user = product->api_user;
7177       PetscCall(MatProductSetFromOptions(mp[cp]));
7178       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7179       rmapt[cp] = 1;
7180       cmapt[cp] = 1;
7181       mptmp[cp] = PETSC_FALSE;
7182       cp++;
7183       PetscCall(MatProductCreate(a->A, p->B, NULL, &mp[cp]));
7184       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7185       PetscCall(MatProductSetFill(mp[cp], product->fill));
7186       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7187       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7188       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7189       mp[cp]->product->api_user = product->api_user;
7190       PetscCall(MatProductSetFromOptions(mp[cp]));
7191       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7192       rmapt[cp] = 1;
7193       cmapt[cp] = 2;
7194       cmapa[cp] = p->garray;
7195       mptmp[cp] = PETSC_FALSE;
7196       cp++;
7197     }
7198 
7199     /* A_off * P_other */
7200     if (mmdata->P_oth) {
7201       PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g)); /* make P_oth use local col ids */
7202       PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx));
7203       PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)(a->B))->type_name));
7204       PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind));
7205       PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp]));
7206       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7207       PetscCall(MatProductSetFill(mp[cp], product->fill));
7208       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7209       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7210       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7211       mp[cp]->product->api_user = product->api_user;
7212       PetscCall(MatProductSetFromOptions(mp[cp]));
7213       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7214       rmapt[cp] = 1;
7215       cmapt[cp] = 2;
7216       cmapa[cp] = P_oth_idx;
7217       mptmp[cp] = PETSC_FALSE;
7218       cp++;
7219     }
7220     break;
7221 
7222   case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */
7223     /* A is product->B */
7224     PetscCall(MatMPIAIJGetLocalMatMerge(A, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc));
7225     if (A == P) { /* when A==P, we can take advantage of the already merged mmdata->Bloc */
7226       PetscCall(MatProductCreate(mmdata->Bloc, mmdata->Bloc, NULL, &mp[cp]));
7227       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7228       PetscCall(MatProductSetFill(mp[cp], product->fill));
7229       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7230       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7231       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7232       mp[cp]->product->api_user = product->api_user;
7233       PetscCall(MatProductSetFromOptions(mp[cp]));
7234       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7235       PetscCall(ISGetIndices(glob, &globidx));
7236       rmapt[cp] = 2;
7237       rmapa[cp] = globidx;
7238       cmapt[cp] = 2;
7239       cmapa[cp] = globidx;
7240       mptmp[cp] = PETSC_FALSE;
7241       cp++;
7242     } else {
7243       PetscCall(MatProductCreate(p->A, mmdata->Bloc, NULL, &mp[cp]));
7244       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7245       PetscCall(MatProductSetFill(mp[cp], product->fill));
7246       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7247       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7248       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7249       mp[cp]->product->api_user = product->api_user;
7250       PetscCall(MatProductSetFromOptions(mp[cp]));
7251       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7252       PetscCall(ISGetIndices(glob, &globidx));
7253       rmapt[cp] = 1;
7254       cmapt[cp] = 2;
7255       cmapa[cp] = globidx;
7256       mptmp[cp] = PETSC_FALSE;
7257       cp++;
7258       PetscCall(MatProductCreate(p->B, mmdata->Bloc, NULL, &mp[cp]));
7259       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7260       PetscCall(MatProductSetFill(mp[cp], product->fill));
7261       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7262       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7263       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7264       mp[cp]->product->api_user = product->api_user;
7265       PetscCall(MatProductSetFromOptions(mp[cp]));
7266       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7267       rmapt[cp] = 2;
7268       rmapa[cp] = p->garray;
7269       cmapt[cp] = 2;
7270       cmapa[cp] = globidx;
7271       mptmp[cp] = PETSC_FALSE;
7272       cp++;
7273     }
7274     break;
7275   case MATPRODUCT_PtAP:
7276     PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth));
7277     /* P is product->B */
7278     PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc));
7279     PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp]));
7280     PetscCall(MatProductSetType(mp[cp], MATPRODUCT_PtAP));
7281     PetscCall(MatProductSetFill(mp[cp], product->fill));
7282     PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7283     PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7284     PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7285     mp[cp]->product->api_user = product->api_user;
7286     PetscCall(MatProductSetFromOptions(mp[cp]));
7287     PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7288     PetscCall(ISGetIndices(glob, &globidx));
7289     rmapt[cp] = 2;
7290     rmapa[cp] = globidx;
7291     cmapt[cp] = 2;
7292     cmapa[cp] = globidx;
7293     mptmp[cp] = PETSC_FALSE;
7294     cp++;
7295     if (mmdata->P_oth) {
7296       PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g));
7297       PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx));
7298       PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)(a->B))->type_name));
7299       PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind));
7300       PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp]));
7301       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7302       PetscCall(MatProductSetFill(mp[cp], product->fill));
7303       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7304       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7305       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7306       mp[cp]->product->api_user = product->api_user;
7307       PetscCall(MatProductSetFromOptions(mp[cp]));
7308       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7309       mptmp[cp] = PETSC_TRUE;
7310       cp++;
7311       PetscCall(MatProductCreate(mmdata->Bloc, mp[1], NULL, &mp[cp]));
7312       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7313       PetscCall(MatProductSetFill(mp[cp], product->fill));
7314       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7315       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7316       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7317       mp[cp]->product->api_user = product->api_user;
7318       PetscCall(MatProductSetFromOptions(mp[cp]));
7319       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7320       rmapt[cp] = 2;
7321       rmapa[cp] = globidx;
7322       cmapt[cp] = 2;
7323       cmapa[cp] = P_oth_idx;
7324       mptmp[cp] = PETSC_FALSE;
7325       cp++;
7326     }
7327     break;
7328   default:
7329     SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]);
7330   }
7331   /* sanity check */
7332   if (size > 1)
7333     for (i = 0; i < cp; i++) PetscCheck(rmapt[i] != 2 || hasoffproc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected offproc map type for product %" PetscInt_FMT, i);
7334 
7335   PetscCall(PetscMalloc2(cp, &mmdata->mp, cp, &mmdata->mptmp));
7336   for (i = 0; i < cp; i++) {
7337     mmdata->mp[i]    = mp[i];
7338     mmdata->mptmp[i] = mptmp[i];
7339   }
7340   mmdata->cp             = cp;
7341   C->product->data       = mmdata;
7342   C->product->destroy    = MatDestroy_MatMatMPIAIJBACKEND;
7343   C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND;
7344 
7345   /* memory type */
7346   mmdata->mtype = PETSC_MEMTYPE_HOST;
7347   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iscuda, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, ""));
7348   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iship, MATSEQAIJHIPSPARSE, MATMPIAIJHIPSPARSE, ""));
7349   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iskokk, MATSEQAIJKOKKOS, MATMPIAIJKOKKOS, ""));
7350   if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA;
7351   else if (iship) mmdata->mtype = PETSC_MEMTYPE_HIP;
7352   else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_KOKKOS;
7353 
7354   /* prepare coo coordinates for values insertion */
7355 
7356   /* count total nonzeros of those intermediate seqaij Mats
7357     ncoo_d:    # of nonzeros of matrices that do not have offproc entries
7358     ncoo_o:    # of nonzeros (of matrices that might have offproc entries) that will be inserted to remote procs
7359     ncoo_oown: # of nonzeros (of matrices that might have offproc entries) that will be inserted locally
7360   */
7361   for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) {
7362     Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data;
7363     if (mptmp[cp]) continue;
7364     if (rmapt[cp] == 2 && hasoffproc) { /* the rows need to be scatter to all processes (might include self) */
7365       const PetscInt *rmap = rmapa[cp];
7366       const PetscInt  mr   = mp[cp]->rmap->n;
7367       const PetscInt  rs   = C->rmap->rstart;
7368       const PetscInt  re   = C->rmap->rend;
7369       const PetscInt *ii   = mm->i;
7370       for (i = 0; i < mr; i++) {
7371         const PetscInt gr = rmap[i];
7372         const PetscInt nz = ii[i + 1] - ii[i];
7373         if (gr < rs || gr >= re) ncoo_o += nz; /* this row is offproc */
7374         else ncoo_oown += nz;                  /* this row is local */
7375       }
7376     } else ncoo_d += mm->nz;
7377   }
7378 
7379   /*
7380     ncoo: total number of nonzeros (including those inserted by remote procs) belonging to this proc
7381 
7382     ncoo = ncoo_d + ncoo_oown + ncoo2, which ncoo2 is number of nonzeros inserted to me by other procs.
7383 
7384     off[0] points to a big index array, which is shared by off[1,2,...]. Similarly, for own[0].
7385 
7386     off[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert to others
7387     own[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert locally
7388     so, off[p+1]-off[p] is the number of nonzeros that mp[p] will send to others.
7389 
7390     coo_i/j/v[]: [ncoo] row/col/val of nonzeros belonging to this proc.
7391     Ex. coo_i[]: the beginning part (of size ncoo_d + ncoo_oown) stores i of local nonzeros, and the remaing part stores i of nonzeros I will receive.
7392   */
7393   PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->off)); /* +1 to make a csr-like data structure */
7394   PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->own));
7395 
7396   /* gather (i,j) of nonzeros inserted by remote procs */
7397   if (hasoffproc) {
7398     PetscSF  msf;
7399     PetscInt ncoo2, *coo_i2, *coo_j2;
7400 
7401     PetscCall(PetscMalloc1(ncoo_o, &mmdata->off[0]));
7402     PetscCall(PetscMalloc1(ncoo_oown, &mmdata->own[0]));
7403     PetscCall(PetscMalloc2(ncoo_o, &coo_i, ncoo_o, &coo_j)); /* to collect (i,j) of entries to be sent to others */
7404 
7405     for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) {
7406       Mat_SeqAIJ *mm     = (Mat_SeqAIJ *)mp[cp]->data;
7407       PetscInt   *idxoff = mmdata->off[cp];
7408       PetscInt   *idxown = mmdata->own[cp];
7409       if (!mptmp[cp] && rmapt[cp] == 2) { /* row map is sparse */
7410         const PetscInt *rmap = rmapa[cp];
7411         const PetscInt *cmap = cmapa[cp];
7412         const PetscInt *ii   = mm->i;
7413         PetscInt       *coi  = coo_i + ncoo_o;
7414         PetscInt       *coj  = coo_j + ncoo_o;
7415         const PetscInt  mr   = mp[cp]->rmap->n;
7416         const PetscInt  rs   = C->rmap->rstart;
7417         const PetscInt  re   = C->rmap->rend;
7418         const PetscInt  cs   = C->cmap->rstart;
7419         for (i = 0; i < mr; i++) {
7420           const PetscInt *jj = mm->j + ii[i];
7421           const PetscInt  gr = rmap[i];
7422           const PetscInt  nz = ii[i + 1] - ii[i];
7423           if (gr < rs || gr >= re) { /* this is an offproc row */
7424             for (j = ii[i]; j < ii[i + 1]; j++) {
7425               *coi++    = gr;
7426               *idxoff++ = j;
7427             }
7428             if (!cmapt[cp]) { /* already global */
7429               for (j = 0; j < nz; j++) *coj++ = jj[j];
7430             } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */
7431               for (j = 0; j < nz; j++) *coj++ = jj[j] + cs;
7432             } else { /* offdiag */
7433               for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]];
7434             }
7435             ncoo_o += nz;
7436           } else { /* this is a local row */
7437             for (j = ii[i]; j < ii[i + 1]; j++) *idxown++ = j;
7438           }
7439         }
7440       }
7441       mmdata->off[cp + 1] = idxoff;
7442       mmdata->own[cp + 1] = idxown;
7443     }
7444 
7445     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf));
7446     PetscCall(PetscSFSetGraphLayout(mmdata->sf, C->rmap, ncoo_o /*nleaves*/, NULL /*ilocal*/, PETSC_OWN_POINTER, coo_i));
7447     PetscCall(PetscSFGetMultiSF(mmdata->sf, &msf));
7448     PetscCall(PetscSFGetGraph(msf, &ncoo2 /*nroots*/, NULL, NULL, NULL));
7449     ncoo = ncoo_d + ncoo_oown + ncoo2;
7450     PetscCall(PetscMalloc2(ncoo, &coo_i2, ncoo, &coo_j2));
7451     PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown)); /* put (i,j) of remote nonzeros at back */
7452     PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown));
7453     PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown));
7454     PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown));
7455     PetscCall(PetscFree2(coo_i, coo_j));
7456     /* allocate MPI send buffer to collect nonzero values to be sent to remote procs */
7457     PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo_o * sizeof(PetscScalar), (void **)&mmdata->coo_w));
7458     coo_i = coo_i2;
7459     coo_j = coo_j2;
7460   } else { /* no offproc values insertion */
7461     ncoo = ncoo_d;
7462     PetscCall(PetscMalloc2(ncoo, &coo_i, ncoo, &coo_j));
7463 
7464     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf));
7465     PetscCall(PetscSFSetGraph(mmdata->sf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER));
7466     PetscCall(PetscSFSetUp(mmdata->sf));
7467   }
7468   mmdata->hasoffproc = hasoffproc;
7469 
7470   /* gather (i,j) of nonzeros inserted locally */
7471   for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) {
7472     Mat_SeqAIJ     *mm   = (Mat_SeqAIJ *)mp[cp]->data;
7473     PetscInt       *coi  = coo_i + ncoo_d;
7474     PetscInt       *coj  = coo_j + ncoo_d;
7475     const PetscInt *jj   = mm->j;
7476     const PetscInt *ii   = mm->i;
7477     const PetscInt *cmap = cmapa[cp];
7478     const PetscInt *rmap = rmapa[cp];
7479     const PetscInt  mr   = mp[cp]->rmap->n;
7480     const PetscInt  rs   = C->rmap->rstart;
7481     const PetscInt  re   = C->rmap->rend;
7482     const PetscInt  cs   = C->cmap->rstart;
7483 
7484     if (mptmp[cp]) continue;
7485     if (rmapt[cp] == 1) { /* consecutive rows */
7486       /* fill coo_i */
7487       for (i = 0; i < mr; i++) {
7488         const PetscInt gr = i + rs;
7489         for (j = ii[i]; j < ii[i + 1]; j++) coi[j] = gr;
7490       }
7491       /* fill coo_j */
7492       if (!cmapt[cp]) { /* type-0, already global */
7493         PetscCall(PetscArraycpy(coj, jj, mm->nz));
7494       } else if (cmapt[cp] == 1) {                        /* type-1, local to global for consecutive columns of C */
7495         for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs; /* lid + col start */
7496       } else {                                            /* type-2, local to global for sparse columns */
7497         for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]];
7498       }
7499       ncoo_d += mm->nz;
7500     } else if (rmapt[cp] == 2) { /* sparse rows */
7501       for (i = 0; i < mr; i++) {
7502         const PetscInt *jj = mm->j + ii[i];
7503         const PetscInt  gr = rmap[i];
7504         const PetscInt  nz = ii[i + 1] - ii[i];
7505         if (gr >= rs && gr < re) { /* local rows */
7506           for (j = ii[i]; j < ii[i + 1]; j++) *coi++ = gr;
7507           if (!cmapt[cp]) { /* type-0, already global */
7508             for (j = 0; j < nz; j++) *coj++ = jj[j];
7509           } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */
7510             for (j = 0; j < nz; j++) *coj++ = jj[j] + cs;
7511           } else { /* type-2, local to global for sparse columns */
7512             for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]];
7513           }
7514           ncoo_d += nz;
7515         }
7516       }
7517     }
7518   }
7519   if (glob) PetscCall(ISRestoreIndices(glob, &globidx));
7520   PetscCall(ISDestroy(&glob));
7521   if (P_oth_l2g) PetscCall(ISLocalToGlobalMappingRestoreIndices(P_oth_l2g, &P_oth_idx));
7522   PetscCall(ISLocalToGlobalMappingDestroy(&P_oth_l2g));
7523   /* allocate an array to store all nonzeros (inserted locally or remotely) belonging to this proc */
7524   PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo * sizeof(PetscScalar), (void **)&mmdata->coo_v));
7525 
7526   /* preallocate with COO data */
7527   PetscCall(MatSetPreallocationCOO(C, ncoo, coo_i, coo_j));
7528   PetscCall(PetscFree2(coo_i, coo_j));
7529   PetscFunctionReturn(0);
7530 }
7531 
7532 PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat)
7533 {
7534   Mat_Product *product = mat->product;
7535 #if defined(PETSC_HAVE_DEVICE)
7536   PetscBool match  = PETSC_FALSE;
7537   PetscBool usecpu = PETSC_FALSE;
7538 #else
7539   PetscBool match = PETSC_TRUE;
7540 #endif
7541 
7542   PetscFunctionBegin;
7543   MatCheckProduct(mat, 1);
7544 #if defined(PETSC_HAVE_DEVICE)
7545   if (!product->A->boundtocpu && !product->B->boundtocpu) PetscCall(PetscObjectTypeCompare((PetscObject)product->B, ((PetscObject)product->A)->type_name, &match));
7546   if (match) { /* we can always fallback to the CPU if requested */
7547     switch (product->type) {
7548     case MATPRODUCT_AB:
7549       if (product->api_user) {
7550         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatMatMult", "Mat");
7551         PetscCall(PetscOptionsBool("-matmatmult_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL));
7552         PetscOptionsEnd();
7553       } else {
7554         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AB", "Mat");
7555         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL));
7556         PetscOptionsEnd();
7557       }
7558       break;
7559     case MATPRODUCT_AtB:
7560       if (product->api_user) {
7561         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatTransposeMatMult", "Mat");
7562         PetscCall(PetscOptionsBool("-mattransposematmult_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL));
7563         PetscOptionsEnd();
7564       } else {
7565         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AtB", "Mat");
7566         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL));
7567         PetscOptionsEnd();
7568       }
7569       break;
7570     case MATPRODUCT_PtAP:
7571       if (product->api_user) {
7572         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatPtAP", "Mat");
7573         PetscCall(PetscOptionsBool("-matptap_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL));
7574         PetscOptionsEnd();
7575       } else {
7576         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_PtAP", "Mat");
7577         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL));
7578         PetscOptionsEnd();
7579       }
7580       break;
7581     default:
7582       break;
7583     }
7584     match = (PetscBool)!usecpu;
7585   }
7586 #endif
7587   if (match) {
7588     switch (product->type) {
7589     case MATPRODUCT_AB:
7590     case MATPRODUCT_AtB:
7591     case MATPRODUCT_PtAP:
7592       mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND;
7593       break;
7594     default:
7595       break;
7596     }
7597   }
7598   /* fallback to MPIAIJ ops */
7599   if (!mat->ops->productsymbolic) PetscCall(MatProductSetFromOptions_MPIAIJ(mat));
7600   PetscFunctionReturn(0);
7601 }
7602 
7603 /*
7604    Produces a set of block column indices of the matrix row, one for each block represented in the original row
7605 
7606    n - the number of block indices in cc[]
7607    cc - the block indices (must be large enough to contain the indices)
7608 */
7609 static inline PetscErrorCode MatCollapseRow(Mat Amat, PetscInt row, PetscInt bs, PetscInt *n, PetscInt *cc)
7610 {
7611   PetscInt        cnt = -1, nidx, j;
7612   const PetscInt *idx;
7613 
7614   PetscFunctionBegin;
7615   PetscCall(MatGetRow(Amat, row, &nidx, &idx, NULL));
7616   if (nidx) {
7617     cnt     = 0;
7618     cc[cnt] = idx[0] / bs;
7619     for (j = 1; j < nidx; j++) {
7620       if (cc[cnt] < idx[j] / bs) cc[++cnt] = idx[j] / bs;
7621     }
7622   }
7623   PetscCall(MatRestoreRow(Amat, row, &nidx, &idx, NULL));
7624   *n = cnt + 1;
7625   PetscFunctionReturn(0);
7626 }
7627 
7628 /*
7629     Produces a set of block column indices of the matrix block row, one for each block represented in the original set of rows
7630 
7631     ncollapsed - the number of block indices
7632     collapsed - the block indices (must be large enough to contain the indices)
7633 */
7634 static inline PetscErrorCode MatCollapseRows(Mat Amat, PetscInt start, PetscInt bs, PetscInt *w0, PetscInt *w1, PetscInt *w2, PetscInt *ncollapsed, PetscInt **collapsed)
7635 {
7636   PetscInt i, nprev, *cprev = w0, ncur = 0, *ccur = w1, *merged = w2, *cprevtmp;
7637 
7638   PetscFunctionBegin;
7639   PetscCall(MatCollapseRow(Amat, start, bs, &nprev, cprev));
7640   for (i = start + 1; i < start + bs; i++) {
7641     PetscCall(MatCollapseRow(Amat, i, bs, &ncur, ccur));
7642     PetscCall(PetscMergeIntArray(nprev, cprev, ncur, ccur, &nprev, &merged));
7643     cprevtmp = cprev;
7644     cprev    = merged;
7645     merged   = cprevtmp;
7646   }
7647   *ncollapsed = nprev;
7648   if (collapsed) *collapsed = cprev;
7649   PetscFunctionReturn(0);
7650 }
7651 
7652 /*
7653    This will eventually be folded into MatCreateGraph_AIJ() for optimal performance
7654 */
7655 static PetscErrorCode MatFilter_AIJ(Mat Gmat, PetscReal vfilter, Mat *filteredG)
7656 {
7657   PetscInt           Istart, Iend, ncols, nnz0, nnz1, NN, MM, nloc;
7658   Mat                tGmat;
7659   MPI_Comm           comm;
7660   const PetscScalar *vals;
7661   const PetscInt    *idx;
7662   PetscInt          *d_nnz, *o_nnz, kk, *garray = NULL, *AJ, maxcols = 0;
7663   MatScalar         *AA; // this is checked in graph
7664   PetscBool          isseqaij;
7665   Mat                a, b, c;
7666   MatType            jtype;
7667 
7668   PetscFunctionBegin;
7669   PetscCall(PetscObjectGetComm((PetscObject)Gmat, &comm));
7670   PetscCall(PetscObjectBaseTypeCompare((PetscObject)Gmat, MATSEQAIJ, &isseqaij));
7671   PetscCall(MatGetType(Gmat, &jtype));
7672   PetscCall(MatCreate(comm, &tGmat));
7673   PetscCall(MatSetType(tGmat, jtype));
7674 
7675   /* TODO GPU: this can be called when filter = 0 -> Probably provide MatAIJThresholdCompress that compresses the entries below a threshold?
7676                Also, if the matrix is symmetric, can we skip this
7677                operation? It can be very expensive on large matrices. */
7678 
7679   // global sizes
7680   PetscCall(MatGetSize(Gmat, &MM, &NN));
7681   PetscCall(MatGetOwnershipRange(Gmat, &Istart, &Iend));
7682   nloc = Iend - Istart;
7683   PetscCall(PetscMalloc2(nloc, &d_nnz, nloc, &o_nnz));
7684   if (isseqaij) {
7685     a = Gmat;
7686     b = NULL;
7687   } else {
7688     Mat_MPIAIJ *d = (Mat_MPIAIJ *)Gmat->data;
7689     a             = d->A;
7690     b             = d->B;
7691     garray        = d->garray;
7692   }
7693   /* Determine upper bound on non-zeros needed in new filtered matrix */
7694   for (PetscInt row = 0; row < nloc; row++) {
7695     PetscCall(MatGetRow(a, row, &ncols, NULL, NULL));
7696     d_nnz[row] = ncols;
7697     if (ncols > maxcols) maxcols = ncols;
7698     PetscCall(MatRestoreRow(a, row, &ncols, NULL, NULL));
7699   }
7700   if (b) {
7701     for (PetscInt row = 0; row < nloc; row++) {
7702       PetscCall(MatGetRow(b, row, &ncols, NULL, NULL));
7703       o_nnz[row] = ncols;
7704       if (ncols > maxcols) maxcols = ncols;
7705       PetscCall(MatRestoreRow(b, row, &ncols, NULL, NULL));
7706     }
7707   }
7708   PetscCall(MatSetSizes(tGmat, nloc, nloc, MM, MM));
7709   PetscCall(MatSetBlockSizes(tGmat, 1, 1));
7710   PetscCall(MatSeqAIJSetPreallocation(tGmat, 0, d_nnz));
7711   PetscCall(MatMPIAIJSetPreallocation(tGmat, 0, d_nnz, 0, o_nnz));
7712   PetscCall(MatSetOption(tGmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
7713   PetscCall(PetscFree2(d_nnz, o_nnz));
7714   //
7715   PetscCall(PetscMalloc2(maxcols, &AA, maxcols, &AJ));
7716   nnz0 = nnz1 = 0;
7717   for (c = a, kk = 0; c && kk < 2; c = b, kk++) {
7718     for (PetscInt row = 0, grow = Istart, ncol_row, jj; row < nloc; row++, grow++) {
7719       PetscCall(MatGetRow(c, row, &ncols, &idx, &vals));
7720       for (ncol_row = jj = 0; jj < ncols; jj++, nnz0++) {
7721         PetscScalar sv = PetscAbs(PetscRealPart(vals[jj]));
7722         if (PetscRealPart(sv) > vfilter) {
7723           nnz1++;
7724           PetscInt cid = idx[jj] + Istart; //diag
7725           if (c != a) cid = garray[idx[jj]];
7726           AA[ncol_row] = vals[jj];
7727           AJ[ncol_row] = cid;
7728           ncol_row++;
7729         }
7730       }
7731       PetscCall(MatRestoreRow(c, row, &ncols, &idx, &vals));
7732       PetscCall(MatSetValues(tGmat, 1, &grow, ncol_row, AJ, AA, INSERT_VALUES));
7733     }
7734   }
7735   PetscCall(PetscFree2(AA, AJ));
7736   PetscCall(MatAssemblyBegin(tGmat, MAT_FINAL_ASSEMBLY));
7737   PetscCall(MatAssemblyEnd(tGmat, MAT_FINAL_ASSEMBLY));
7738   PetscCall(MatPropagateSymmetryOptions(Gmat, tGmat)); /* Normal Mat options are not relevant ? */
7739 
7740   PetscCall(PetscInfo(tGmat, "\t %g%% nnz after filtering, with threshold %g, %g nnz ave. (N=%" PetscInt_FMT ", max row size %d)\n", (!nnz0) ? 1. : 100. * (double)nnz1 / (double)nnz0, (double)vfilter, (!nloc) ? 1. : (double)nnz0 / (double)nloc, MM, (int)maxcols));
7741 
7742   *filteredG = tGmat;
7743   PetscCall(MatViewFromOptions(tGmat, NULL, "-mat_filter_graph_view"));
7744   PetscFunctionReturn(0);
7745 }
7746 
7747 /*
7748  MatCreateGraph_Simple_AIJ - create simple scalar matrix (graph) from potentially blocked matrix
7749 
7750  Input Parameter:
7751  . Amat - matrix
7752  - symmetrize - make the result symmetric
7753  + scale - scale with diagonal
7754 
7755  Output Parameter:
7756  . a_Gmat - output scalar graph >= 0
7757 
7758  */
7759 PETSC_INTERN PetscErrorCode MatCreateGraph_Simple_AIJ(Mat Amat, PetscBool symmetrize, PetscBool scale, PetscReal filter, Mat *a_Gmat)
7760 {
7761   PetscInt  Istart, Iend, Ii, jj, kk, ncols, nloc, NN, MM, bs;
7762   MPI_Comm  comm;
7763   Mat       Gmat;
7764   PetscBool ismpiaij, isseqaij;
7765   Mat       a, b, c;
7766   MatType   jtype;
7767 
7768   PetscFunctionBegin;
7769   PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm));
7770   PetscCall(MatGetOwnershipRange(Amat, &Istart, &Iend));
7771   PetscCall(MatGetSize(Amat, &MM, &NN));
7772   PetscCall(MatGetBlockSize(Amat, &bs));
7773   nloc = (Iend - Istart) / bs;
7774 
7775   PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATSEQAIJ, &isseqaij));
7776   PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATMPIAIJ, &ismpiaij));
7777   PetscCheck(isseqaij || ismpiaij, comm, PETSC_ERR_USER, "Require (MPI)AIJ matrix type");
7778 
7779   /* TODO GPU: these calls are potentially expensive if matrices are large and we want to use the GPU */
7780   /* A solution consists in providing a new API, MatAIJGetCollapsedAIJ, and each class can provide a fast
7781      implementation */
7782   if (bs > 1) {
7783     PetscCall(MatGetType(Amat, &jtype));
7784     PetscCall(MatCreate(comm, &Gmat));
7785     PetscCall(MatSetType(Gmat, jtype));
7786     PetscCall(MatSetSizes(Gmat, nloc, nloc, PETSC_DETERMINE, PETSC_DETERMINE));
7787     PetscCall(MatSetBlockSizes(Gmat, 1, 1));
7788     if (isseqaij || ((Mat_MPIAIJ *)Amat->data)->garray) {
7789       PetscInt  *d_nnz, *o_nnz;
7790       MatScalar *aa, val, *AA;
7791       PetscInt  *aj, *ai, *AJ, nc, nmax = 0;
7792       if (isseqaij) {
7793         a = Amat;
7794         b = NULL;
7795       } else {
7796         Mat_MPIAIJ *d = (Mat_MPIAIJ *)Amat->data;
7797         a             = d->A;
7798         b             = d->B;
7799       }
7800       PetscCall(PetscInfo(Amat, "New bs>1 Graph. nloc=%" PetscInt_FMT "\n", nloc));
7801       PetscCall(PetscMalloc2(nloc, &d_nnz, isseqaij ? 0 : nloc, &o_nnz));
7802       for (c = a, kk = 0; c && kk < 2; c = b, kk++) {
7803         PetscInt       *nnz = (c == a) ? d_nnz : o_nnz;
7804         const PetscInt *cols;
7805         for (PetscInt brow = 0, jj, ok = 1, j0; brow < nloc * bs; brow += bs) { // block rows
7806           PetscCall(MatGetRow(c, brow, &jj, &cols, NULL));
7807           nnz[brow / bs] = jj / bs;
7808           if (jj % bs) ok = 0;
7809           if (cols) j0 = cols[0];
7810           else j0 = -1;
7811           PetscCall(MatRestoreRow(c, brow, &jj, &cols, NULL));
7812           if (nnz[brow / bs] > nmax) nmax = nnz[brow / bs];
7813           for (PetscInt ii = 1; ii < bs && nnz[brow / bs]; ii++) { // check for non-dense blocks
7814             PetscCall(MatGetRow(c, brow + ii, &jj, &cols, NULL));
7815             if (jj % bs) ok = 0;
7816             if ((cols && j0 != cols[0]) || (!cols && j0 != -1)) ok = 0;
7817             if (nnz[brow / bs] != jj / bs) ok = 0;
7818             PetscCall(MatRestoreRow(c, brow + ii, &jj, &cols, NULL));
7819           }
7820           if (!ok) {
7821             PetscCall(PetscFree2(d_nnz, o_nnz));
7822             goto old_bs;
7823           }
7824         }
7825       }
7826       PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz));
7827       PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz));
7828       PetscCall(PetscFree2(d_nnz, o_nnz));
7829       PetscCall(PetscMalloc2(nmax, &AA, nmax, &AJ));
7830       // diag
7831       for (PetscInt brow = 0, n, grow; brow < nloc * bs; brow += bs) { // block rows
7832         Mat_SeqAIJ *aseq = (Mat_SeqAIJ *)a->data;
7833         ai               = aseq->i;
7834         n                = ai[brow + 1] - ai[brow];
7835         aj               = aseq->j + ai[brow];
7836         for (int k = 0; k < n; k += bs) {        // block columns
7837           AJ[k / bs] = aj[k] / bs + Istart / bs; // diag starts at (Istart,Istart)
7838           val        = 0;
7839           for (int ii = 0; ii < bs; ii++) { // rows in block
7840             aa = aseq->a + ai[brow + ii] + k;
7841             for (int jj = 0; jj < bs; jj++) {         // columns in block
7842               val += PetscAbs(PetscRealPart(aa[jj])); // a sort of norm
7843             }
7844           }
7845           AA[k / bs] = val;
7846         }
7847         grow = Istart / bs + brow / bs;
7848         PetscCall(MatSetValues(Gmat, 1, &grow, n / bs, AJ, AA, INSERT_VALUES));
7849       }
7850       // off-diag
7851       if (ismpiaij) {
7852         Mat_MPIAIJ        *aij = (Mat_MPIAIJ *)Amat->data;
7853         const PetscScalar *vals;
7854         const PetscInt    *cols, *garray = aij->garray;
7855         PetscCheck(garray, PETSC_COMM_SELF, PETSC_ERR_USER, "No garray ?");
7856         for (PetscInt brow = 0, grow; brow < nloc * bs; brow += bs) { // block rows
7857           PetscCall(MatGetRow(b, brow, &ncols, &cols, NULL));
7858           for (int k = 0, cidx = 0; k < ncols; k += bs, cidx++) {
7859             AA[k / bs] = 0;
7860             AJ[cidx]   = garray[cols[k]] / bs;
7861           }
7862           nc = ncols / bs;
7863           PetscCall(MatRestoreRow(b, brow, &ncols, &cols, NULL));
7864           for (int ii = 0; ii < bs; ii++) { // rows in block
7865             PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals));
7866             for (int k = 0; k < ncols; k += bs) {
7867               for (int jj = 0; jj < bs; jj++) { // cols in block
7868                 AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj]));
7869               }
7870             }
7871             PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals));
7872           }
7873           grow = Istart / bs + brow / bs;
7874           PetscCall(MatSetValues(Gmat, 1, &grow, nc, AJ, AA, INSERT_VALUES));
7875         }
7876       }
7877       PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY));
7878       PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY));
7879       PetscCall(PetscFree2(AA, AJ));
7880     } else {
7881       const PetscScalar *vals;
7882       const PetscInt    *idx;
7883       PetscInt          *d_nnz, *o_nnz, *w0, *w1, *w2;
7884     old_bs:
7885       /*
7886        Determine the preallocation needed for the scalar matrix derived from the vector matrix.
7887        */
7888       PetscCall(PetscInfo(Amat, "OLD bs>1 CreateGraph\n"));
7889       PetscCall(PetscMalloc2(nloc, &d_nnz, isseqaij ? 0 : nloc, &o_nnz));
7890       if (isseqaij) {
7891         PetscInt max_d_nnz;
7892         /*
7893          Determine exact preallocation count for (sequential) scalar matrix
7894          */
7895         PetscCall(MatSeqAIJGetMaxRowNonzeros(Amat, &max_d_nnz));
7896         max_d_nnz = PetscMin(nloc, bs * max_d_nnz);
7897         PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2));
7898         for (Ii = 0, jj = 0; Ii < Iend; Ii += bs, jj++) PetscCall(MatCollapseRows(Amat, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL));
7899         PetscCall(PetscFree3(w0, w1, w2));
7900       } else if (ismpiaij) {
7901         Mat             Daij, Oaij;
7902         const PetscInt *garray;
7903         PetscInt        max_d_nnz;
7904         PetscCall(MatMPIAIJGetSeqAIJ(Amat, &Daij, &Oaij, &garray));
7905         /*
7906          Determine exact preallocation count for diagonal block portion of scalar matrix
7907          */
7908         PetscCall(MatSeqAIJGetMaxRowNonzeros(Daij, &max_d_nnz));
7909         max_d_nnz = PetscMin(nloc, bs * max_d_nnz);
7910         PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2));
7911         for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) PetscCall(MatCollapseRows(Daij, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL));
7912         PetscCall(PetscFree3(w0, w1, w2));
7913         /*
7914          Over estimate (usually grossly over), preallocation count for off-diagonal portion of scalar matrix
7915          */
7916         for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) {
7917           o_nnz[jj] = 0;
7918           for (kk = 0; kk < bs; kk++) { /* rows that get collapsed to a single row */
7919             PetscCall(MatGetRow(Oaij, Ii + kk, &ncols, NULL, NULL));
7920             o_nnz[jj] += ncols;
7921             PetscCall(MatRestoreRow(Oaij, Ii + kk, &ncols, NULL, NULL));
7922           }
7923           if (o_nnz[jj] > (NN / bs - nloc)) o_nnz[jj] = NN / bs - nloc;
7924         }
7925       } else SETERRQ(comm, PETSC_ERR_USER, "Require AIJ matrix type");
7926       /* get scalar copy (norms) of matrix */
7927       PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz));
7928       PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz));
7929       PetscCall(PetscFree2(d_nnz, o_nnz));
7930       for (Ii = Istart; Ii < Iend; Ii++) {
7931         PetscInt dest_row = Ii / bs;
7932         PetscCall(MatGetRow(Amat, Ii, &ncols, &idx, &vals));
7933         for (jj = 0; jj < ncols; jj++) {
7934           PetscInt    dest_col = idx[jj] / bs;
7935           PetscScalar sv       = PetscAbs(PetscRealPart(vals[jj]));
7936           PetscCall(MatSetValues(Gmat, 1, &dest_row, 1, &dest_col, &sv, ADD_VALUES));
7937         }
7938         PetscCall(MatRestoreRow(Amat, Ii, &ncols, &idx, &vals));
7939       }
7940       PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY));
7941       PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY));
7942     }
7943   } else {
7944     if (symmetrize || filter >= 0 || scale) PetscCall(MatDuplicate(Amat, MAT_COPY_VALUES, &Gmat));
7945     else {
7946       Gmat = Amat;
7947       PetscCall(PetscObjectReference((PetscObject)Gmat));
7948     }
7949     if (isseqaij) {
7950       a = Gmat;
7951       b = NULL;
7952     } else {
7953       Mat_MPIAIJ *d = (Mat_MPIAIJ *)Gmat->data;
7954       a             = d->A;
7955       b             = d->B;
7956     }
7957     if (filter >= 0 || scale) {
7958       /* take absolute value of each entry */
7959       for (c = a, kk = 0; c && kk < 2; c = b, kk++) {
7960         MatInfo      info;
7961         PetscScalar *avals;
7962         PetscCall(MatGetInfo(c, MAT_LOCAL, &info));
7963         PetscCall(MatSeqAIJGetArray(c, &avals));
7964         for (int jj = 0; jj < info.nz_used; jj++) avals[jj] = PetscAbsScalar(avals[jj]);
7965         PetscCall(MatSeqAIJRestoreArray(c, &avals));
7966       }
7967     }
7968   }
7969   if (symmetrize) {
7970     PetscBool isset, issym;
7971     PetscCall(MatIsSymmetricKnown(Amat, &isset, &issym));
7972     if (!isset || !issym) {
7973       Mat matTrans;
7974       PetscCall(MatTranspose(Gmat, MAT_INITIAL_MATRIX, &matTrans));
7975       PetscCall(MatAXPY(Gmat, 1.0, matTrans, Gmat->structurally_symmetric == PETSC_BOOL3_TRUE ? SAME_NONZERO_PATTERN : DIFFERENT_NONZERO_PATTERN));
7976       PetscCall(MatDestroy(&matTrans));
7977     }
7978     PetscCall(MatSetOption(Gmat, MAT_SYMMETRIC, PETSC_TRUE));
7979   } else if (Amat != Gmat) PetscCall(MatPropagateSymmetryOptions(Amat, Gmat));
7980   if (scale) {
7981     /* scale c for all diagonal values = 1 or -1 */
7982     Vec diag;
7983     PetscCall(MatCreateVecs(Gmat, &diag, NULL));
7984     PetscCall(MatGetDiagonal(Gmat, diag));
7985     PetscCall(VecReciprocal(diag));
7986     PetscCall(VecSqrtAbs(diag));
7987     PetscCall(MatDiagonalScale(Gmat, diag, diag));
7988     PetscCall(VecDestroy(&diag));
7989   }
7990   PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_graph_view"));
7991 
7992   if (filter >= 0) {
7993     Mat Fmat = NULL; /* some silly compiler needs this */
7994 
7995     PetscCall(MatFilter_AIJ(Gmat, filter, &Fmat));
7996     PetscCall(MatDestroy(&Gmat));
7997     Gmat = Fmat;
7998   }
7999   *a_Gmat = Gmat;
8000   PetscFunctionReturn(0);
8001 }
8002 
8003 /*
8004     Special version for direct calls from Fortran
8005 */
8006 #include <petsc/private/fortranimpl.h>
8007 
8008 /* Change these macros so can be used in void function */
8009 /* Identical to PetscCallVoid, except it assigns to *_ierr */
8010 #undef PetscCall
8011 #define PetscCall(...) \
8012   do { \
8013     PetscErrorCode ierr_msv_mpiaij = __VA_ARGS__; \
8014     if (PetscUnlikely(ierr_msv_mpiaij)) { \
8015       *_ierr = PetscError(PETSC_COMM_SELF, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr_msv_mpiaij, PETSC_ERROR_REPEAT, " "); \
8016       return; \
8017     } \
8018   } while (0)
8019 
8020 #undef SETERRQ
8021 #define SETERRQ(comm, ierr, ...) \
8022   do { \
8023     *_ierr = PetscError(comm, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr, PETSC_ERROR_INITIAL, __VA_ARGS__); \
8024     return; \
8025   } while (0)
8026 
8027 #if defined(PETSC_HAVE_FORTRAN_CAPS)
8028   #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
8029 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
8030   #define matsetvaluesmpiaij_ matsetvaluesmpiaij
8031 #else
8032 #endif
8033 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat, PetscInt *mm, const PetscInt im[], PetscInt *mn, const PetscInt in[], const PetscScalar v[], InsertMode *maddv, PetscErrorCode *_ierr)
8034 {
8035   Mat         mat = *mmat;
8036   PetscInt    m = *mm, n = *mn;
8037   InsertMode  addv = *maddv;
8038   Mat_MPIAIJ *aij  = (Mat_MPIAIJ *)mat->data;
8039   PetscScalar value;
8040 
8041   MatCheckPreallocated(mat, 1);
8042   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
8043   else PetscCheck(mat->insertmode == addv, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot mix add values and insert values");
8044   {
8045     PetscInt  i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend;
8046     PetscInt  cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col;
8047     PetscBool roworiented = aij->roworiented;
8048 
8049     /* Some Variables required in the macro */
8050     Mat         A     = aij->A;
8051     Mat_SeqAIJ *a     = (Mat_SeqAIJ *)A->data;
8052     PetscInt   *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j;
8053     MatScalar  *aa;
8054     PetscBool   ignorezeroentries = (((a->ignorezeroentries) && (addv == ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
8055     Mat         B                 = aij->B;
8056     Mat_SeqAIJ *b                 = (Mat_SeqAIJ *)B->data;
8057     PetscInt   *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n;
8058     MatScalar  *ba;
8059     /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we
8060      * cannot use "#if defined" inside a macro. */
8061     PETSC_UNUSED PetscBool inserted = PETSC_FALSE;
8062 
8063     PetscInt  *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2;
8064     PetscInt   nonew = a->nonew;
8065     MatScalar *ap1, *ap2;
8066 
8067     PetscFunctionBegin;
8068     PetscCall(MatSeqAIJGetArray(A, &aa));
8069     PetscCall(MatSeqAIJGetArray(B, &ba));
8070     for (i = 0; i < m; i++) {
8071       if (im[i] < 0) continue;
8072       PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
8073       if (im[i] >= rstart && im[i] < rend) {
8074         row      = im[i] - rstart;
8075         lastcol1 = -1;
8076         rp1      = aj + ai[row];
8077         ap1      = aa + ai[row];
8078         rmax1    = aimax[row];
8079         nrow1    = ailen[row];
8080         low1     = 0;
8081         high1    = nrow1;
8082         lastcol2 = -1;
8083         rp2      = bj + bi[row];
8084         ap2      = ba + bi[row];
8085         rmax2    = bimax[row];
8086         nrow2    = bilen[row];
8087         low2     = 0;
8088         high2    = nrow2;
8089 
8090         for (j = 0; j < n; j++) {
8091           if (roworiented) value = v[i * n + j];
8092           else value = v[i + j * m];
8093           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
8094           if (in[j] >= cstart && in[j] < cend) {
8095             col = in[j] - cstart;
8096             MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]);
8097           } else if (in[j] < 0) continue;
8098           else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) {
8099             /* extra brace on SETERRQ() is required for --with-errorchecking=0 - due to the next 'else' clause */
8100             SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1);
8101           } else {
8102             if (mat->was_assembled) {
8103               if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
8104 #if defined(PETSC_USE_CTABLE)
8105               PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col));
8106               col--;
8107 #else
8108               col = aij->colmap[in[j]] - 1;
8109 #endif
8110               if (col < 0 && !((Mat_SeqAIJ *)(aij->A->data))->nonew) {
8111                 PetscCall(MatDisAssemble_MPIAIJ(mat));
8112                 col = in[j];
8113                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
8114                 B        = aij->B;
8115                 b        = (Mat_SeqAIJ *)B->data;
8116                 bimax    = b->imax;
8117                 bi       = b->i;
8118                 bilen    = b->ilen;
8119                 bj       = b->j;
8120                 rp2      = bj + bi[row];
8121                 ap2      = ba + bi[row];
8122                 rmax2    = bimax[row];
8123                 nrow2    = bilen[row];
8124                 low2     = 0;
8125                 high2    = nrow2;
8126                 bm       = aij->B->rmap->n;
8127                 ba       = b->a;
8128                 inserted = PETSC_FALSE;
8129               }
8130             } else col = in[j];
8131             MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]);
8132           }
8133         }
8134       } else if (!aij->donotstash) {
8135         if (roworiented) {
8136           PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
8137         } else {
8138           PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
8139         }
8140       }
8141     }
8142     PetscCall(MatSeqAIJRestoreArray(A, &aa));
8143     PetscCall(MatSeqAIJRestoreArray(B, &ba));
8144   }
8145   PetscFunctionReturnVoid();
8146 }
8147 
8148 /* Undefining these here since they were redefined from their original definition above! No
8149  * other PETSc functions should be defined past this point, as it is impossible to recover the
8150  * original definitions */
8151 #undef PetscCall
8152 #undef SETERRQ
8153