xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision d2522c19e8fa9bca20aaca277941d9a63e71db6a)
1 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/
2 #include <petsc/private/vecimpl.h>
3 #include <petsc/private/sfimpl.h>
4 #include <petsc/private/isimpl.h>
5 #include <petscblaslapack.h>
6 #include <petscsf.h>
7 #include <petsc/private/hashmapi.h>
8 
9 PetscErrorCode MatGetRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) {
10   Mat B;
11 
12   PetscFunctionBegin;
13   PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &B));
14   PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject)B));
15   PetscCall(MatGetRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done));
16   PetscCall(MatDestroy(&B));
17   PetscFunctionReturn(0);
18 }
19 
20 PetscErrorCode MatRestoreRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) {
21   Mat B;
22 
23   PetscFunctionBegin;
24   PetscCall(PetscObjectQuery((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject *)&B));
25   PetscCall(MatRestoreRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done));
26   PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", NULL));
27   PetscFunctionReturn(0);
28 }
29 
30 /*MC
31    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
32 
33    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
34    and MATMPIAIJ otherwise.  As a result, for single process communicators,
35   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation() is supported
36   for communicators controlling multiple processes.  It is recommended that you call both of
37   the above preallocation routines for simplicity.
38 
39    Options Database Keys:
40 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
41 
42   Developer Notes:
43     Subclasses include MATAIJCUSPARSE, MATAIJPERM, MATAIJSELL, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when
44    enough exist.
45 
46   Level: beginner
47 
48 .seealso: `MatCreateAIJ()`, `MatCreateSeqAIJ()`, `MATSEQAIJ`, `MATMPIAIJ`
49 M*/
50 
51 /*MC
52    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
53 
54    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
55    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
56    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
57   for communicators controlling multiple processes.  It is recommended that you call both of
58   the above preallocation routines for simplicity.
59 
60    Options Database Keys:
61 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
62 
63   Level: beginner
64 
65 .seealso: `MatCreateMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`
66 M*/
67 
68 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A, PetscBool flg) {
69   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
70 
71   PetscFunctionBegin;
72 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_VIENNACL)
73   A->boundtocpu = flg;
74 #endif
75   if (a->A) PetscCall(MatBindToCPU(a->A, flg));
76   if (a->B) PetscCall(MatBindToCPU(a->B, flg));
77 
78   /* In addition to binding the diagonal and off-diagonal matrices, bind the local vectors used for matrix-vector products.
79    * This maybe seems a little odd for a MatBindToCPU() call to do, but it makes no sense for the binding of these vectors
80    * to differ from the parent matrix. */
81   if (a->lvec) PetscCall(VecBindToCPU(a->lvec, flg));
82   if (a->diag) PetscCall(VecBindToCPU(a->diag, flg));
83 
84   PetscFunctionReturn(0);
85 }
86 
87 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs) {
88   Mat_MPIAIJ *mat = (Mat_MPIAIJ *)M->data;
89 
90   PetscFunctionBegin;
91   if (mat->A) {
92     PetscCall(MatSetBlockSizes(mat->A, rbs, cbs));
93     PetscCall(MatSetBlockSizes(mat->B, rbs, 1));
94   }
95   PetscFunctionReturn(0);
96 }
97 
98 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M, IS *keptrows) {
99   Mat_MPIAIJ      *mat = (Mat_MPIAIJ *)M->data;
100   Mat_SeqAIJ      *a   = (Mat_SeqAIJ *)mat->A->data;
101   Mat_SeqAIJ      *b   = (Mat_SeqAIJ *)mat->B->data;
102   const PetscInt  *ia, *ib;
103   const MatScalar *aa, *bb, *aav, *bav;
104   PetscInt         na, nb, i, j, *rows, cnt = 0, n0rows;
105   PetscInt         m = M->rmap->n, rstart = M->rmap->rstart;
106 
107   PetscFunctionBegin;
108   *keptrows = NULL;
109 
110   ia = a->i;
111   ib = b->i;
112   PetscCall(MatSeqAIJGetArrayRead(mat->A, &aav));
113   PetscCall(MatSeqAIJGetArrayRead(mat->B, &bav));
114   for (i = 0; i < m; i++) {
115     na = ia[i + 1] - ia[i];
116     nb = ib[i + 1] - ib[i];
117     if (!na && !nb) {
118       cnt++;
119       goto ok1;
120     }
121     aa = aav + ia[i];
122     for (j = 0; j < na; j++) {
123       if (aa[j] != 0.0) goto ok1;
124     }
125     bb = bav + ib[i];
126     for (j = 0; j < nb; j++) {
127       if (bb[j] != 0.0) goto ok1;
128     }
129     cnt++;
130   ok1:;
131   }
132   PetscCall(MPIU_Allreduce(&cnt, &n0rows, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)M)));
133   if (!n0rows) {
134     PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav));
135     PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav));
136     PetscFunctionReturn(0);
137   }
138   PetscCall(PetscMalloc1(M->rmap->n - cnt, &rows));
139   cnt = 0;
140   for (i = 0; i < m; i++) {
141     na = ia[i + 1] - ia[i];
142     nb = ib[i + 1] - ib[i];
143     if (!na && !nb) continue;
144     aa = aav + ia[i];
145     for (j = 0; j < na; j++) {
146       if (aa[j] != 0.0) {
147         rows[cnt++] = rstart + i;
148         goto ok2;
149       }
150     }
151     bb = bav + ib[i];
152     for (j = 0; j < nb; j++) {
153       if (bb[j] != 0.0) {
154         rows[cnt++] = rstart + i;
155         goto ok2;
156       }
157     }
158   ok2:;
159   }
160   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), cnt, rows, PETSC_OWN_POINTER, keptrows));
161   PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav));
162   PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav));
163   PetscFunctionReturn(0);
164 }
165 
166 PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y, Vec D, InsertMode is) {
167   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)Y->data;
168   PetscBool   cong;
169 
170   PetscFunctionBegin;
171   PetscCall(MatHasCongruentLayouts(Y, &cong));
172   if (Y->assembled && cong) {
173     PetscCall(MatDiagonalSet(aij->A, D, is));
174   } else {
175     PetscCall(MatDiagonalSet_Default(Y, D, is));
176   }
177   PetscFunctionReturn(0);
178 }
179 
180 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M, IS *zrows) {
181   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)M->data;
182   PetscInt    i, rstart, nrows, *rows;
183 
184   PetscFunctionBegin;
185   *zrows = NULL;
186   PetscCall(MatFindZeroDiagonals_SeqAIJ_Private(aij->A, &nrows, &rows));
187   PetscCall(MatGetOwnershipRange(M, &rstart, NULL));
188   for (i = 0; i < nrows; i++) rows[i] += rstart;
189   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), nrows, rows, PETSC_OWN_POINTER, zrows));
190   PetscFunctionReturn(0);
191 }
192 
193 PetscErrorCode MatGetColumnReductions_MPIAIJ(Mat A, PetscInt type, PetscReal *reductions) {
194   Mat_MPIAIJ        *aij = (Mat_MPIAIJ *)A->data;
195   PetscInt           i, m, n, *garray = aij->garray;
196   Mat_SeqAIJ        *a_aij = (Mat_SeqAIJ *)aij->A->data;
197   Mat_SeqAIJ        *b_aij = (Mat_SeqAIJ *)aij->B->data;
198   PetscReal         *work;
199   const PetscScalar *dummy;
200 
201   PetscFunctionBegin;
202   PetscCall(MatGetSize(A, &m, &n));
203   PetscCall(PetscCalloc1(n, &work));
204   PetscCall(MatSeqAIJGetArrayRead(aij->A, &dummy));
205   PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &dummy));
206   PetscCall(MatSeqAIJGetArrayRead(aij->B, &dummy));
207   PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &dummy));
208   if (type == NORM_2) {
209     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) { work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i] * a_aij->a[i]); }
210     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) { work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i] * b_aij->a[i]); }
211   } else if (type == NORM_1) {
212     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) { work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); }
213     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) { work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); }
214   } else if (type == NORM_INFINITY) {
215     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) { work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); }
216     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) { work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]), work[garray[b_aij->j[i]]]); }
217   } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) {
218     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) { work[A->cmap->rstart + a_aij->j[i]] += PetscRealPart(a_aij->a[i]); }
219     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) { work[garray[b_aij->j[i]]] += PetscRealPart(b_aij->a[i]); }
220   } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) {
221     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) { work[A->cmap->rstart + a_aij->j[i]] += PetscImaginaryPart(a_aij->a[i]); }
222     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) { work[garray[b_aij->j[i]]] += PetscImaginaryPart(b_aij->a[i]); }
223   } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type");
224   if (type == NORM_INFINITY) {
225     PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A)));
226   } else {
227     PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A)));
228   }
229   PetscCall(PetscFree(work));
230   if (type == NORM_2) {
231     for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
232   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
233     for (i = 0; i < n; i++) reductions[i] /= m;
234   }
235   PetscFunctionReturn(0);
236 }
237 
238 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A, IS *is) {
239   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
240   IS              sis, gis;
241   const PetscInt *isis, *igis;
242   PetscInt        n, *iis, nsis, ngis, rstart, i;
243 
244   PetscFunctionBegin;
245   PetscCall(MatFindOffBlockDiagonalEntries(a->A, &sis));
246   PetscCall(MatFindNonzeroRows(a->B, &gis));
247   PetscCall(ISGetSize(gis, &ngis));
248   PetscCall(ISGetSize(sis, &nsis));
249   PetscCall(ISGetIndices(sis, &isis));
250   PetscCall(ISGetIndices(gis, &igis));
251 
252   PetscCall(PetscMalloc1(ngis + nsis, &iis));
253   PetscCall(PetscArraycpy(iis, igis, ngis));
254   PetscCall(PetscArraycpy(iis + ngis, isis, nsis));
255   n = ngis + nsis;
256   PetscCall(PetscSortRemoveDupsInt(&n, iis));
257   PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
258   for (i = 0; i < n; i++) iis[i] += rstart;
259   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)A), n, iis, PETSC_OWN_POINTER, is));
260 
261   PetscCall(ISRestoreIndices(sis, &isis));
262   PetscCall(ISRestoreIndices(gis, &igis));
263   PetscCall(ISDestroy(&sis));
264   PetscCall(ISDestroy(&gis));
265   PetscFunctionReturn(0);
266 }
267 
268 /*
269   Local utility routine that creates a mapping from the global column
270 number to the local number in the off-diagonal part of the local
271 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
272 a slightly higher hash table cost; without it it is not scalable (each processor
273 has an order N integer array but is fast to access.
274 */
275 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) {
276   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
277   PetscInt    n   = aij->B->cmap->n, i;
278 
279   PetscFunctionBegin;
280   PetscCheck(!n || aij->garray, PETSC_COMM_SELF, PETSC_ERR_PLIB, "MPIAIJ Matrix was assembled but is missing garray");
281 #if defined(PETSC_USE_CTABLE)
282   PetscCall(PetscTableCreate(n, mat->cmap->N + 1, &aij->colmap));
283   for (i = 0; i < n; i++) { PetscCall(PetscTableAdd(aij->colmap, aij->garray[i] + 1, i + 1, INSERT_VALUES)); }
284 #else
285   PetscCall(PetscCalloc1(mat->cmap->N + 1, &aij->colmap));
286   PetscCall(PetscLogObjectMemory((PetscObject)mat, (mat->cmap->N + 1) * sizeof(PetscInt)));
287   for (i = 0; i < n; i++) aij->colmap[aij->garray[i]] = i + 1;
288 #endif
289   PetscFunctionReturn(0);
290 }
291 
292 #define MatSetValues_SeqAIJ_A_Private(row, col, value, addv, orow, ocol) \
293   { \
294     if (col <= lastcol1) low1 = 0; \
295     else high1 = nrow1; \
296     lastcol1 = col; \
297     while (high1 - low1 > 5) { \
298       t = (low1 + high1) / 2; \
299       if (rp1[t] > col) high1 = t; \
300       else low1 = t; \
301     } \
302     for (_i = low1; _i < high1; _i++) { \
303       if (rp1[_i] > col) break; \
304       if (rp1[_i] == col) { \
305         if (addv == ADD_VALUES) { \
306           ap1[_i] += value; \
307           /* Not sure LogFlops will slow dow the code or not */ \
308           (void)PetscLogFlops(1.0); \
309         } else ap1[_i] = value; \
310         goto a_noinsert; \
311       } \
312     } \
313     if (value == 0.0 && ignorezeroentries && row != col) { \
314       low1  = 0; \
315       high1 = nrow1; \
316       goto a_noinsert; \
317     } \
318     if (nonew == 1) { \
319       low1  = 0; \
320       high1 = nrow1; \
321       goto a_noinsert; \
322     } \
323     PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
324     MatSeqXAIJReallocateAIJ(A, am, 1, nrow1, row, col, rmax1, aa, ai, aj, rp1, ap1, aimax, nonew, MatScalar); \
325     N = nrow1++ - 1; \
326     a->nz++; \
327     high1++; \
328     /* shift up all the later entries in this row */ \
329     PetscCall(PetscArraymove(rp1 + _i + 1, rp1 + _i, N - _i + 1)); \
330     PetscCall(PetscArraymove(ap1 + _i + 1, ap1 + _i, N - _i + 1)); \
331     rp1[_i] = col; \
332     ap1[_i] = value; \
333     A->nonzerostate++; \
334   a_noinsert:; \
335     ailen[row] = nrow1; \
336   }
337 
338 #define MatSetValues_SeqAIJ_B_Private(row, col, value, addv, orow, ocol) \
339   { \
340     if (col <= lastcol2) low2 = 0; \
341     else high2 = nrow2; \
342     lastcol2 = col; \
343     while (high2 - low2 > 5) { \
344       t = (low2 + high2) / 2; \
345       if (rp2[t] > col) high2 = t; \
346       else low2 = t; \
347     } \
348     for (_i = low2; _i < high2; _i++) { \
349       if (rp2[_i] > col) break; \
350       if (rp2[_i] == col) { \
351         if (addv == ADD_VALUES) { \
352           ap2[_i] += value; \
353           (void)PetscLogFlops(1.0); \
354         } else ap2[_i] = value; \
355         goto b_noinsert; \
356       } \
357     } \
358     if (value == 0.0 && ignorezeroentries) { \
359       low2  = 0; \
360       high2 = nrow2; \
361       goto b_noinsert; \
362     } \
363     if (nonew == 1) { \
364       low2  = 0; \
365       high2 = nrow2; \
366       goto b_noinsert; \
367     } \
368     PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
369     MatSeqXAIJReallocateAIJ(B, bm, 1, nrow2, row, col, rmax2, ba, bi, bj, rp2, ap2, bimax, nonew, MatScalar); \
370     N = nrow2++ - 1; \
371     b->nz++; \
372     high2++; \
373     /* shift up all the later entries in this row */ \
374     PetscCall(PetscArraymove(rp2 + _i + 1, rp2 + _i, N - _i + 1)); \
375     PetscCall(PetscArraymove(ap2 + _i + 1, ap2 + _i, N - _i + 1)); \
376     rp2[_i] = col; \
377     ap2[_i] = value; \
378     B->nonzerostate++; \
379   b_noinsert:; \
380     bilen[row] = nrow2; \
381   }
382 
383 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A, PetscInt row, const PetscScalar v[]) {
384   Mat_MPIAIJ  *mat = (Mat_MPIAIJ *)A->data;
385   Mat_SeqAIJ  *a = (Mat_SeqAIJ *)mat->A->data, *b = (Mat_SeqAIJ *)mat->B->data;
386   PetscInt     l, *garray                         = mat->garray, diag;
387   PetscScalar *aa, *ba;
388 
389   PetscFunctionBegin;
390   /* code only works for square matrices A */
391 
392   /* find size of row to the left of the diagonal part */
393   PetscCall(MatGetOwnershipRange(A, &diag, NULL));
394   row = row - diag;
395   for (l = 0; l < b->i[row + 1] - b->i[row]; l++) {
396     if (garray[b->j[b->i[row] + l]] > diag) break;
397   }
398   if (l) {
399     PetscCall(MatSeqAIJGetArray(mat->B, &ba));
400     PetscCall(PetscArraycpy(ba + b->i[row], v, l));
401     PetscCall(MatSeqAIJRestoreArray(mat->B, &ba));
402   }
403 
404   /* diagonal part */
405   if (a->i[row + 1] - a->i[row]) {
406     PetscCall(MatSeqAIJGetArray(mat->A, &aa));
407     PetscCall(PetscArraycpy(aa + a->i[row], v + l, (a->i[row + 1] - a->i[row])));
408     PetscCall(MatSeqAIJRestoreArray(mat->A, &aa));
409   }
410 
411   /* right of diagonal part */
412   if (b->i[row + 1] - b->i[row] - l) {
413     PetscCall(MatSeqAIJGetArray(mat->B, &ba));
414     PetscCall(PetscArraycpy(ba + b->i[row] + l, v + l + a->i[row + 1] - a->i[row], b->i[row + 1] - b->i[row] - l));
415     PetscCall(MatSeqAIJRestoreArray(mat->B, &ba));
416   }
417   PetscFunctionReturn(0);
418 }
419 
420 PetscErrorCode MatSetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) {
421   Mat_MPIAIJ *aij   = (Mat_MPIAIJ *)mat->data;
422   PetscScalar value = 0.0;
423   PetscInt    i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend;
424   PetscInt    cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col;
425   PetscBool   roworiented = aij->roworiented;
426 
427   /* Some Variables required in the macro */
428   Mat         A     = aij->A;
429   Mat_SeqAIJ *a     = (Mat_SeqAIJ *)A->data;
430   PetscInt   *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j;
431   PetscBool   ignorezeroentries = a->ignorezeroentries;
432   Mat         B                 = aij->B;
433   Mat_SeqAIJ *b                 = (Mat_SeqAIJ *)B->data;
434   PetscInt   *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n;
435   MatScalar  *aa, *ba;
436   PetscInt   *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2;
437   PetscInt    nonew;
438   MatScalar  *ap1, *ap2;
439 
440   PetscFunctionBegin;
441   PetscCall(MatSeqAIJGetArray(A, &aa));
442   PetscCall(MatSeqAIJGetArray(B, &ba));
443   for (i = 0; i < m; i++) {
444     if (im[i] < 0) continue;
445     PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
446     if (im[i] >= rstart && im[i] < rend) {
447       row      = im[i] - rstart;
448       lastcol1 = -1;
449       rp1      = aj + ai[row];
450       ap1      = aa + ai[row];
451       rmax1    = aimax[row];
452       nrow1    = ailen[row];
453       low1     = 0;
454       high1    = nrow1;
455       lastcol2 = -1;
456       rp2      = bj + bi[row];
457       ap2      = ba + bi[row];
458       rmax2    = bimax[row];
459       nrow2    = bilen[row];
460       low2     = 0;
461       high2    = nrow2;
462 
463       for (j = 0; j < n; j++) {
464         if (v) value = roworiented ? v[i * n + j] : v[i + j * m];
465         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
466         if (in[j] >= cstart && in[j] < cend) {
467           col   = in[j] - cstart;
468           nonew = a->nonew;
469           MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]);
470         } else if (in[j] < 0) {
471           continue;
472         } else {
473           PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1);
474           if (mat->was_assembled) {
475             if (!aij->colmap) { PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); }
476 #if defined(PETSC_USE_CTABLE)
477             PetscCall(PetscTableFind(aij->colmap, in[j] + 1, &col)); /* map global col ids to local ones */
478             col--;
479 #else
480             col = aij->colmap[in[j]] - 1;
481 #endif
482             if (col < 0 && !((Mat_SeqAIJ *)(aij->B->data))->nonew) { /* col < 0 means in[j] is a new col for B */
483               PetscCall(MatDisAssemble_MPIAIJ(mat));                 /* Change aij->B from reduced/local format to expanded/global format */
484               col   = in[j];
485               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
486               B     = aij->B;
487               b     = (Mat_SeqAIJ *)B->data;
488               bimax = b->imax;
489               bi    = b->i;
490               bilen = b->ilen;
491               bj    = b->j;
492               ba    = b->a;
493               rp2   = bj + bi[row];
494               ap2   = ba + bi[row];
495               rmax2 = bimax[row];
496               nrow2 = bilen[row];
497               low2  = 0;
498               high2 = nrow2;
499               bm    = aij->B->rmap->n;
500               ba    = b->a;
501             } else if (col < 0 && !(ignorezeroentries && value == 0.0)) {
502               if (1 == ((Mat_SeqAIJ *)(aij->B->data))->nonew) {
503                 PetscCall(PetscInfo(mat, "Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%" PetscInt_FMT ",%" PetscInt_FMT ")\n", (double)PetscRealPart(value), im[i], in[j]));
504               } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]);
505             }
506           } else col = in[j];
507           nonew = b->nonew;
508           MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]);
509         }
510       }
511     } else {
512       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]);
513       if (!aij->donotstash) {
514         mat->assembled = PETSC_FALSE;
515         if (roworiented) {
516           PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
517         } else {
518           PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
519         }
520       }
521     }
522   }
523   PetscCall(MatSeqAIJRestoreArray(A, &aa)); /* aa, bb might have been free'd due to reallocation above. But we don't access them here */
524   PetscCall(MatSeqAIJRestoreArray(B, &ba));
525   PetscFunctionReturn(0);
526 }
527 
528 /*
529     This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
530     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
531     No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE.
532 */
533 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[]) {
534   Mat_MPIAIJ *aij    = (Mat_MPIAIJ *)mat->data;
535   Mat         A      = aij->A; /* diagonal part of the matrix */
536   Mat         B      = aij->B; /* offdiagonal part of the matrix */
537   Mat_SeqAIJ *a      = (Mat_SeqAIJ *)A->data;
538   Mat_SeqAIJ *b      = (Mat_SeqAIJ *)B->data;
539   PetscInt    cstart = mat->cmap->rstart, cend = mat->cmap->rend, col;
540   PetscInt   *ailen = a->ilen, *aj = a->j;
541   PetscInt   *bilen = b->ilen, *bj = b->j;
542   PetscInt    am          = aij->A->rmap->n, j;
543   PetscInt    diag_so_far = 0, dnz;
544   PetscInt    offd_so_far = 0, onz;
545 
546   PetscFunctionBegin;
547   /* Iterate over all rows of the matrix */
548   for (j = 0; j < am; j++) {
549     dnz = onz = 0;
550     /*  Iterate over all non-zero columns of the current row */
551     for (col = mat_i[j]; col < mat_i[j + 1]; col++) {
552       /* If column is in the diagonal */
553       if (mat_j[col] >= cstart && mat_j[col] < cend) {
554         aj[diag_so_far++] = mat_j[col] - cstart;
555         dnz++;
556       } else { /* off-diagonal entries */
557         bj[offd_so_far++] = mat_j[col];
558         onz++;
559       }
560     }
561     ailen[j] = dnz;
562     bilen[j] = onz;
563   }
564   PetscFunctionReturn(0);
565 }
566 
567 /*
568     This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
569     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
570     No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ.
571     Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
572     would not be true and the more complex MatSetValues_MPIAIJ has to be used.
573 */
574 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[], const PetscScalar mat_a[]) {
575   Mat_MPIAIJ  *aij  = (Mat_MPIAIJ *)mat->data;
576   Mat          A    = aij->A; /* diagonal part of the matrix */
577   Mat          B    = aij->B; /* offdiagonal part of the matrix */
578   Mat_SeqAIJ  *aijd = (Mat_SeqAIJ *)(aij->A)->data, *aijo = (Mat_SeqAIJ *)(aij->B)->data;
579   Mat_SeqAIJ  *a      = (Mat_SeqAIJ *)A->data;
580   Mat_SeqAIJ  *b      = (Mat_SeqAIJ *)B->data;
581   PetscInt     cstart = mat->cmap->rstart, cend = mat->cmap->rend;
582   PetscInt    *ailen = a->ilen, *aj = a->j;
583   PetscInt    *bilen = b->ilen, *bj = b->j;
584   PetscInt     am          = aij->A->rmap->n, j;
585   PetscInt    *full_diag_i = aijd->i, *full_offd_i = aijo->i; /* These variables can also include non-local elements, which are set at a later point. */
586   PetscInt     col, dnz_row, onz_row, rowstart_diag, rowstart_offd;
587   PetscScalar *aa = a->a, *ba = b->a;
588 
589   PetscFunctionBegin;
590   /* Iterate over all rows of the matrix */
591   for (j = 0; j < am; j++) {
592     dnz_row = onz_row = 0;
593     rowstart_offd     = full_offd_i[j];
594     rowstart_diag     = full_diag_i[j];
595     /*  Iterate over all non-zero columns of the current row */
596     for (col = mat_i[j]; col < mat_i[j + 1]; col++) {
597       /* If column is in the diagonal */
598       if (mat_j[col] >= cstart && mat_j[col] < cend) {
599         aj[rowstart_diag + dnz_row] = mat_j[col] - cstart;
600         aa[rowstart_diag + dnz_row] = mat_a[col];
601         dnz_row++;
602       } else { /* off-diagonal entries */
603         bj[rowstart_offd + onz_row] = mat_j[col];
604         ba[rowstart_offd + onz_row] = mat_a[col];
605         onz_row++;
606       }
607     }
608     ailen[j] = dnz_row;
609     bilen[j] = onz_row;
610   }
611   PetscFunctionReturn(0);
612 }
613 
614 PetscErrorCode MatGetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[]) {
615   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
616   PetscInt    i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend;
617   PetscInt    cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col;
618 
619   PetscFunctionBegin;
620   for (i = 0; i < m; i++) {
621     if (idxm[i] < 0) continue; /* negative row */
622     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1);
623     if (idxm[i] >= rstart && idxm[i] < rend) {
624       row = idxm[i] - rstart;
625       for (j = 0; j < n; j++) {
626         if (idxn[j] < 0) continue; /* negative column */
627         PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1);
628         if (idxn[j] >= cstart && idxn[j] < cend) {
629           col = idxn[j] - cstart;
630           PetscCall(MatGetValues(aij->A, 1, &row, 1, &col, v + i * n + j));
631         } else {
632           if (!aij->colmap) { PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); }
633 #if defined(PETSC_USE_CTABLE)
634           PetscCall(PetscTableFind(aij->colmap, idxn[j] + 1, &col));
635           col--;
636 #else
637           col = aij->colmap[idxn[j]] - 1;
638 #endif
639           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v + i * n + j) = 0.0;
640           else { PetscCall(MatGetValues(aij->B, 1, &row, 1, &col, v + i * n + j)); }
641         }
642       }
643     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported");
644   }
645   PetscFunctionReturn(0);
646 }
647 
648 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat, MatAssemblyType mode) {
649   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
650   PetscInt    nstash, reallocs;
651 
652   PetscFunctionBegin;
653   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
654 
655   PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range));
656   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
657   PetscCall(PetscInfo(aij->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
658   PetscFunctionReturn(0);
659 }
660 
661 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat, MatAssemblyType mode) {
662   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *)mat->data;
663   PetscMPIInt  n;
664   PetscInt     i, j, rstart, ncols, flg;
665   PetscInt    *row, *col;
666   PetscBool    other_disassembled;
667   PetscScalar *val;
668 
669   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
670 
671   PetscFunctionBegin;
672   if (!aij->donotstash && !mat->nooffprocentries) {
673     while (1) {
674       PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg));
675       if (!flg) break;
676 
677       for (i = 0; i < n;) {
678         /* Now identify the consecutive vals belonging to the same row */
679         for (j = i, rstart = row[j]; j < n; j++) {
680           if (row[j] != rstart) break;
681         }
682         if (j < n) ncols = j - i;
683         else ncols = n - i;
684         /* Now assemble all these values with a single function call */
685         PetscCall(MatSetValues_MPIAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode));
686         i = j;
687       }
688     }
689     PetscCall(MatStashScatterEnd_Private(&mat->stash));
690   }
691 #if defined(PETSC_HAVE_DEVICE)
692   if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU;
693   /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */
694   if (mat->boundtocpu) {
695     PetscCall(MatBindToCPU(aij->A, PETSC_TRUE));
696     PetscCall(MatBindToCPU(aij->B, PETSC_TRUE));
697   }
698 #endif
699   PetscCall(MatAssemblyBegin(aij->A, mode));
700   PetscCall(MatAssemblyEnd(aij->A, mode));
701 
702   /* determine if any processor has disassembled, if so we must
703      also disassemble ourself, in order that we may reassemble. */
704   /*
705      if nonzero structure of submatrix B cannot change then we know that
706      no processor disassembled thus we can skip this stuff
707   */
708   if (!((Mat_SeqAIJ *)aij->B->data)->nonew) {
709     PetscCall(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
710     if (mat->was_assembled && !other_disassembled) { /* mat on this rank has reduced off-diag B with local col ids, but globaly it does not */
711       PetscCall(MatDisAssemble_MPIAIJ(mat));
712     }
713   }
714   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { PetscCall(MatSetUpMultiply_MPIAIJ(mat)); }
715   PetscCall(MatSetOption(aij->B, MAT_USE_INODES, PETSC_FALSE));
716 #if defined(PETSC_HAVE_DEVICE)
717   if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU;
718 #endif
719   PetscCall(MatAssemblyBegin(aij->B, mode));
720   PetscCall(MatAssemblyEnd(aij->B, mode));
721 
722   PetscCall(PetscFree2(aij->rowvalues, aij->rowindices));
723 
724   aij->rowvalues = NULL;
725 
726   PetscCall(VecDestroy(&aij->diag));
727 
728   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
729   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ *)(aij->A->data))->nonew) {
730     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
731     PetscCall(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat)));
732   }
733 #if defined(PETSC_HAVE_DEVICE)
734   mat->offloadmask = PETSC_OFFLOAD_BOTH;
735 #endif
736   PetscFunctionReturn(0);
737 }
738 
739 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) {
740   Mat_MPIAIJ *l = (Mat_MPIAIJ *)A->data;
741 
742   PetscFunctionBegin;
743   PetscCall(MatZeroEntries(l->A));
744   PetscCall(MatZeroEntries(l->B));
745   PetscFunctionReturn(0);
746 }
747 
748 PetscErrorCode MatZeroRows_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) {
749   Mat_MPIAIJ      *mat = (Mat_MPIAIJ *)A->data;
750   PetscObjectState sA, sB;
751   PetscInt        *lrows;
752   PetscInt         r, len;
753   PetscBool        cong, lch, gch;
754 
755   PetscFunctionBegin;
756   /* get locally owned rows */
757   PetscCall(MatZeroRowsMapLocal_Private(A, N, rows, &len, &lrows));
758   PetscCall(MatHasCongruentLayouts(A, &cong));
759   /* fix right hand side if needed */
760   if (x && b) {
761     const PetscScalar *xx;
762     PetscScalar       *bb;
763 
764     PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout");
765     PetscCall(VecGetArrayRead(x, &xx));
766     PetscCall(VecGetArray(b, &bb));
767     for (r = 0; r < len; ++r) bb[lrows[r]] = diag * xx[lrows[r]];
768     PetscCall(VecRestoreArrayRead(x, &xx));
769     PetscCall(VecRestoreArray(b, &bb));
770   }
771 
772   sA = mat->A->nonzerostate;
773   sB = mat->B->nonzerostate;
774 
775   if (diag != 0.0 && cong) {
776     PetscCall(MatZeroRows(mat->A, len, lrows, diag, NULL, NULL));
777     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
778   } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */
779     Mat_SeqAIJ *aijA = (Mat_SeqAIJ *)mat->A->data;
780     Mat_SeqAIJ *aijB = (Mat_SeqAIJ *)mat->B->data;
781     PetscInt    nnwA, nnwB;
782     PetscBool   nnzA, nnzB;
783 
784     nnwA = aijA->nonew;
785     nnwB = aijB->nonew;
786     nnzA = aijA->keepnonzeropattern;
787     nnzB = aijB->keepnonzeropattern;
788     if (!nnzA) {
789       PetscCall(PetscInfo(mat->A, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n"));
790       aijA->nonew = 0;
791     }
792     if (!nnzB) {
793       PetscCall(PetscInfo(mat->B, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n"));
794       aijB->nonew = 0;
795     }
796     /* Must zero here before the next loop */
797     PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL));
798     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
799     for (r = 0; r < len; ++r) {
800       const PetscInt row = lrows[r] + A->rmap->rstart;
801       if (row >= A->cmap->N) continue;
802       PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES));
803     }
804     aijA->nonew = nnwA;
805     aijB->nonew = nnwB;
806   } else {
807     PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL));
808     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
809   }
810   PetscCall(PetscFree(lrows));
811   PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
812   PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
813 
814   /* reduce nonzerostate */
815   lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate);
816   PetscCall(MPIU_Allreduce(&lch, &gch, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)A)));
817   if (gch) A->nonzerostate++;
818   PetscFunctionReturn(0);
819 }
820 
821 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) {
822   Mat_MPIAIJ        *l = (Mat_MPIAIJ *)A->data;
823   PetscMPIInt        n = A->rmap->n;
824   PetscInt           i, j, r, m, len = 0;
825   PetscInt          *lrows, *owners = A->rmap->range;
826   PetscMPIInt        p = 0;
827   PetscSFNode       *rrows;
828   PetscSF            sf;
829   const PetscScalar *xx;
830   PetscScalar       *bb, *mask, *aij_a;
831   Vec                xmask, lmask;
832   Mat_SeqAIJ        *aij = (Mat_SeqAIJ *)l->B->data;
833   const PetscInt    *aj, *ii, *ridx;
834   PetscScalar       *aa;
835 
836   PetscFunctionBegin;
837   /* Create SF where leaves are input rows and roots are owned rows */
838   PetscCall(PetscMalloc1(n, &lrows));
839   for (r = 0; r < n; ++r) lrows[r] = -1;
840   PetscCall(PetscMalloc1(N, &rrows));
841   for (r = 0; r < N; ++r) {
842     const PetscInt idx = rows[r];
843     PetscCheck(idx >= 0 && A->rmap->N > idx, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, A->rmap->N);
844     if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this row too */
845       PetscCall(PetscLayoutFindOwner(A->rmap, idx, &p));
846     }
847     rrows[r].rank  = p;
848     rrows[r].index = rows[r] - owners[p];
849   }
850   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
851   PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER));
852   /* Collect flags for rows to be zeroed */
853   PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR));
854   PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR));
855   PetscCall(PetscSFDestroy(&sf));
856   /* Compress and put in row numbers */
857   for (r = 0; r < n; ++r)
858     if (lrows[r] >= 0) lrows[len++] = r;
859   /* zero diagonal part of matrix */
860   PetscCall(MatZeroRowsColumns(l->A, len, lrows, diag, x, b));
861   /* handle off diagonal part of matrix */
862   PetscCall(MatCreateVecs(A, &xmask, NULL));
863   PetscCall(VecDuplicate(l->lvec, &lmask));
864   PetscCall(VecGetArray(xmask, &bb));
865   for (i = 0; i < len; i++) bb[lrows[i]] = 1;
866   PetscCall(VecRestoreArray(xmask, &bb));
867   PetscCall(VecScatterBegin(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD));
868   PetscCall(VecScatterEnd(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD));
869   PetscCall(VecDestroy(&xmask));
870   if (x && b) { /* this code is buggy when the row and column layout don't match */
871     PetscBool cong;
872 
873     PetscCall(MatHasCongruentLayouts(A, &cong));
874     PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout");
875     PetscCall(VecScatterBegin(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD));
876     PetscCall(VecScatterEnd(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD));
877     PetscCall(VecGetArrayRead(l->lvec, &xx));
878     PetscCall(VecGetArray(b, &bb));
879   }
880   PetscCall(VecGetArray(lmask, &mask));
881   /* remove zeroed rows of off diagonal matrix */
882   PetscCall(MatSeqAIJGetArray(l->B, &aij_a));
883   ii = aij->i;
884   for (i = 0; i < len; i++) { PetscCall(PetscArrayzero(aij_a + ii[lrows[i]], ii[lrows[i] + 1] - ii[lrows[i]])); }
885   /* loop over all elements of off process part of matrix zeroing removed columns*/
886   if (aij->compressedrow.use) {
887     m    = aij->compressedrow.nrows;
888     ii   = aij->compressedrow.i;
889     ridx = aij->compressedrow.rindex;
890     for (i = 0; i < m; i++) {
891       n  = ii[i + 1] - ii[i];
892       aj = aij->j + ii[i];
893       aa = aij_a + ii[i];
894 
895       for (j = 0; j < n; j++) {
896         if (PetscAbsScalar(mask[*aj])) {
897           if (b) bb[*ridx] -= *aa * xx[*aj];
898           *aa = 0.0;
899         }
900         aa++;
901         aj++;
902       }
903       ridx++;
904     }
905   } else { /* do not use compressed row format */
906     m = l->B->rmap->n;
907     for (i = 0; i < m; i++) {
908       n  = ii[i + 1] - ii[i];
909       aj = aij->j + ii[i];
910       aa = aij_a + ii[i];
911       for (j = 0; j < n; j++) {
912         if (PetscAbsScalar(mask[*aj])) {
913           if (b) bb[i] -= *aa * xx[*aj];
914           *aa = 0.0;
915         }
916         aa++;
917         aj++;
918       }
919     }
920   }
921   if (x && b) {
922     PetscCall(VecRestoreArray(b, &bb));
923     PetscCall(VecRestoreArrayRead(l->lvec, &xx));
924   }
925   PetscCall(MatSeqAIJRestoreArray(l->B, &aij_a));
926   PetscCall(VecRestoreArray(lmask, &mask));
927   PetscCall(VecDestroy(&lmask));
928   PetscCall(PetscFree(lrows));
929 
930   /* only change matrix nonzero state if pattern was allowed to be changed */
931   if (!((Mat_SeqAIJ *)(l->A->data))->keepnonzeropattern) {
932     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
933     PetscCall(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A)));
934   }
935   PetscFunctionReturn(0);
936 }
937 
938 PetscErrorCode MatMult_MPIAIJ(Mat A, Vec xx, Vec yy) {
939   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
940   PetscInt    nt;
941   VecScatter  Mvctx = a->Mvctx;
942 
943   PetscFunctionBegin;
944   PetscCall(VecGetLocalSize(xx, &nt));
945   PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A (%" PetscInt_FMT ") and xx (%" PetscInt_FMT ")", A->cmap->n, nt);
946   PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
947   PetscUseTypeMethod(a->A, mult, xx, yy);
948   PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
949   PetscUseTypeMethod(a->B, multadd, a->lvec, yy, yy);
950   PetscFunctionReturn(0);
951 }
952 
953 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A, Vec bb, Vec xx) {
954   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
955 
956   PetscFunctionBegin;
957   PetscCall(MatMultDiagonalBlock(a->A, bb, xx));
958   PetscFunctionReturn(0);
959 }
960 
961 PetscErrorCode MatMultAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz) {
962   Mat_MPIAIJ *a     = (Mat_MPIAIJ *)A->data;
963   VecScatter  Mvctx = a->Mvctx;
964 
965   PetscFunctionBegin;
966   PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
967   PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz));
968   PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
969   PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz));
970   PetscFunctionReturn(0);
971 }
972 
973 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A, Vec xx, Vec yy) {
974   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
975 
976   PetscFunctionBegin;
977   /* do nondiagonal part */
978   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec));
979   /* do local part */
980   PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy));
981   /* add partial results together */
982   PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
983   PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
984   PetscFunctionReturn(0);
985 }
986 
987 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat, Mat Bmat, PetscReal tol, PetscBool *f) {
988   MPI_Comm    comm;
989   Mat_MPIAIJ *Aij  = (Mat_MPIAIJ *)Amat->data, *Bij;
990   Mat         Adia = Aij->A, Bdia, Aoff, Boff, *Aoffs, *Boffs;
991   IS          Me, Notme;
992   PetscInt    M, N, first, last, *notme, i;
993   PetscBool   lf;
994   PetscMPIInt size;
995 
996   PetscFunctionBegin;
997   /* Easy test: symmetric diagonal block */
998   Bij  = (Mat_MPIAIJ *)Bmat->data;
999   Bdia = Bij->A;
1000   PetscCall(MatIsTranspose(Adia, Bdia, tol, &lf));
1001   PetscCall(MPIU_Allreduce(&lf, f, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)Amat)));
1002   if (!*f) PetscFunctionReturn(0);
1003   PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm));
1004   PetscCallMPI(MPI_Comm_size(comm, &size));
1005   if (size == 1) PetscFunctionReturn(0);
1006 
1007   /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */
1008   PetscCall(MatGetSize(Amat, &M, &N));
1009   PetscCall(MatGetOwnershipRange(Amat, &first, &last));
1010   PetscCall(PetscMalloc1(N - last + first, &notme));
1011   for (i = 0; i < first; i++) notme[i] = i;
1012   for (i = last; i < M; i++) notme[i - last + first] = i;
1013   PetscCall(ISCreateGeneral(MPI_COMM_SELF, N - last + first, notme, PETSC_COPY_VALUES, &Notme));
1014   PetscCall(ISCreateStride(MPI_COMM_SELF, last - first, first, 1, &Me));
1015   PetscCall(MatCreateSubMatrices(Amat, 1, &Me, &Notme, MAT_INITIAL_MATRIX, &Aoffs));
1016   Aoff = Aoffs[0];
1017   PetscCall(MatCreateSubMatrices(Bmat, 1, &Notme, &Me, MAT_INITIAL_MATRIX, &Boffs));
1018   Boff = Boffs[0];
1019   PetscCall(MatIsTranspose(Aoff, Boff, tol, f));
1020   PetscCall(MatDestroyMatrices(1, &Aoffs));
1021   PetscCall(MatDestroyMatrices(1, &Boffs));
1022   PetscCall(ISDestroy(&Me));
1023   PetscCall(ISDestroy(&Notme));
1024   PetscCall(PetscFree(notme));
1025   PetscFunctionReturn(0);
1026 }
1027 
1028 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A, PetscReal tol, PetscBool *f) {
1029   PetscFunctionBegin;
1030   PetscCall(MatIsTranspose_MPIAIJ(A, A, tol, f));
1031   PetscFunctionReturn(0);
1032 }
1033 
1034 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz) {
1035   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1036 
1037   PetscFunctionBegin;
1038   /* do nondiagonal part */
1039   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec));
1040   /* do local part */
1041   PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz));
1042   /* add partial results together */
1043   PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE));
1044   PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE));
1045   PetscFunctionReturn(0);
1046 }
1047 
1048 /*
1049   This only works correctly for square matrices where the subblock A->A is the
1050    diagonal block
1051 */
1052 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A, Vec v) {
1053   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1054 
1055   PetscFunctionBegin;
1056   PetscCheck(A->rmap->N == A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block");
1057   PetscCheck(A->rmap->rstart == A->cmap->rstart && A->rmap->rend == A->cmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "row partition must equal col partition");
1058   PetscCall(MatGetDiagonal(a->A, v));
1059   PetscFunctionReturn(0);
1060 }
1061 
1062 PetscErrorCode MatScale_MPIAIJ(Mat A, PetscScalar aa) {
1063   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1064 
1065   PetscFunctionBegin;
1066   PetscCall(MatScale(a->A, aa));
1067   PetscCall(MatScale(a->B, aa));
1068   PetscFunctionReturn(0);
1069 }
1070 
1071 /* Free COO stuff; must match allocation methods in MatSetPreallocationCOO_MPIAIJ() */
1072 PETSC_INTERN PetscErrorCode MatResetPreallocationCOO_MPIAIJ(Mat mat) {
1073   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1074 
1075   PetscFunctionBegin;
1076   PetscCall(PetscSFDestroy(&aij->coo_sf));
1077   PetscCall(PetscFree(aij->Aperm1));
1078   PetscCall(PetscFree(aij->Bperm1));
1079   PetscCall(PetscFree(aij->Ajmap1));
1080   PetscCall(PetscFree(aij->Bjmap1));
1081 
1082   PetscCall(PetscFree(aij->Aimap2));
1083   PetscCall(PetscFree(aij->Bimap2));
1084   PetscCall(PetscFree(aij->Aperm2));
1085   PetscCall(PetscFree(aij->Bperm2));
1086   PetscCall(PetscFree(aij->Ajmap2));
1087   PetscCall(PetscFree(aij->Bjmap2));
1088 
1089   PetscCall(PetscFree2(aij->sendbuf, aij->recvbuf));
1090   PetscCall(PetscFree(aij->Cperm1));
1091   PetscFunctionReturn(0);
1092 }
1093 
1094 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) {
1095   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1096 
1097   PetscFunctionBegin;
1098 #if defined(PETSC_USE_LOG)
1099   PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N);
1100 #endif
1101   PetscCall(MatStashDestroy_Private(&mat->stash));
1102   PetscCall(VecDestroy(&aij->diag));
1103   PetscCall(MatDestroy(&aij->A));
1104   PetscCall(MatDestroy(&aij->B));
1105 #if defined(PETSC_USE_CTABLE)
1106   PetscCall(PetscTableDestroy(&aij->colmap));
1107 #else
1108   PetscCall(PetscFree(aij->colmap));
1109 #endif
1110   PetscCall(PetscFree(aij->garray));
1111   PetscCall(VecDestroy(&aij->lvec));
1112   PetscCall(VecScatterDestroy(&aij->Mvctx));
1113   PetscCall(PetscFree2(aij->rowvalues, aij->rowindices));
1114   PetscCall(PetscFree(aij->ld));
1115 
1116   /* Free COO */
1117   PetscCall(MatResetPreallocationCOO_MPIAIJ(mat));
1118 
1119   PetscCall(PetscFree(mat->data));
1120 
1121   /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */
1122   PetscCall(PetscObjectCompose((PetscObject)mat, "MatMergeSeqsToMPI", NULL));
1123 
1124   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL));
1125   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL));
1126   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL));
1127   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatIsTranspose_C", NULL));
1128   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocation_C", NULL));
1129   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatResetPreallocation_C", NULL));
1130   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocationCSR_C", NULL));
1131   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL));
1132   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpibaij_C", NULL));
1133   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisbaij_C", NULL));
1134 #if defined(PETSC_HAVE_CUDA)
1135   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcusparse_C", NULL));
1136 #endif
1137 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
1138   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijkokkos_C", NULL));
1139 #endif
1140   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL));
1141 #if defined(PETSC_HAVE_ELEMENTAL)
1142   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_elemental_C", NULL));
1143 #endif
1144 #if defined(PETSC_HAVE_SCALAPACK)
1145   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_scalapack_C", NULL));
1146 #endif
1147 #if defined(PETSC_HAVE_HYPRE)
1148   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_hypre_C", NULL));
1149   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", NULL));
1150 #endif
1151   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL));
1152   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_is_mpiaij_C", NULL));
1153   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpiaij_C", NULL));
1154   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetUseScalableIncreaseOverlap_C", NULL));
1155   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijperm_C", NULL));
1156   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijsell_C", NULL));
1157 #if defined(PETSC_HAVE_MKL_SPARSE)
1158   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijmkl_C", NULL));
1159 #endif
1160   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcrl_C", NULL));
1161   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL));
1162   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisell_C", NULL));
1163   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetPreallocationCOO_C", NULL));
1164   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetValuesCOO_C", NULL));
1165   PetscFunctionReturn(0);
1166 }
1167 
1168 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat, PetscViewer viewer) {
1169   Mat_MPIAIJ        *aij    = (Mat_MPIAIJ *)mat->data;
1170   Mat_SeqAIJ        *A      = (Mat_SeqAIJ *)aij->A->data;
1171   Mat_SeqAIJ        *B      = (Mat_SeqAIJ *)aij->B->data;
1172   const PetscInt    *garray = aij->garray;
1173   const PetscScalar *aa, *ba;
1174   PetscInt           header[4], M, N, m, rs, cs, nz, cnt, i, ja, jb;
1175   PetscInt          *rowlens;
1176   PetscInt          *colidxs;
1177   PetscScalar       *matvals;
1178 
1179   PetscFunctionBegin;
1180   PetscCall(PetscViewerSetUp(viewer));
1181 
1182   M  = mat->rmap->N;
1183   N  = mat->cmap->N;
1184   m  = mat->rmap->n;
1185   rs = mat->rmap->rstart;
1186   cs = mat->cmap->rstart;
1187   nz = A->nz + B->nz;
1188 
1189   /* write matrix header */
1190   header[0] = MAT_FILE_CLASSID;
1191   header[1] = M;
1192   header[2] = N;
1193   header[3] = nz;
1194   PetscCallMPI(MPI_Reduce(&nz, &header[3], 1, MPIU_INT, MPI_SUM, 0, PetscObjectComm((PetscObject)mat)));
1195   PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT));
1196 
1197   /* fill in and store row lengths  */
1198   PetscCall(PetscMalloc1(m, &rowlens));
1199   for (i = 0; i < m; i++) rowlens[i] = A->i[i + 1] - A->i[i] + B->i[i + 1] - B->i[i];
1200   PetscCall(PetscViewerBinaryWriteAll(viewer, rowlens, m, rs, M, PETSC_INT));
1201   PetscCall(PetscFree(rowlens));
1202 
1203   /* fill in and store column indices */
1204   PetscCall(PetscMalloc1(nz, &colidxs));
1205   for (cnt = 0, i = 0; i < m; i++) {
1206     for (jb = B->i[i]; jb < B->i[i + 1]; jb++) {
1207       if (garray[B->j[jb]] > cs) break;
1208       colidxs[cnt++] = garray[B->j[jb]];
1209     }
1210     for (ja = A->i[i]; ja < A->i[i + 1]; ja++) colidxs[cnt++] = A->j[ja] + cs;
1211     for (; jb < B->i[i + 1]; jb++) colidxs[cnt++] = garray[B->j[jb]];
1212   }
1213   PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT, cnt, nz);
1214   PetscCall(PetscViewerBinaryWriteAll(viewer, colidxs, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT));
1215   PetscCall(PetscFree(colidxs));
1216 
1217   /* fill in and store nonzero values */
1218   PetscCall(MatSeqAIJGetArrayRead(aij->A, &aa));
1219   PetscCall(MatSeqAIJGetArrayRead(aij->B, &ba));
1220   PetscCall(PetscMalloc1(nz, &matvals));
1221   for (cnt = 0, i = 0; i < m; i++) {
1222     for (jb = B->i[i]; jb < B->i[i + 1]; jb++) {
1223       if (garray[B->j[jb]] > cs) break;
1224       matvals[cnt++] = ba[jb];
1225     }
1226     for (ja = A->i[i]; ja < A->i[i + 1]; ja++) matvals[cnt++] = aa[ja];
1227     for (; jb < B->i[i + 1]; jb++) matvals[cnt++] = ba[jb];
1228   }
1229   PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &aa));
1230   PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &ba));
1231   PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT, cnt, nz);
1232   PetscCall(PetscViewerBinaryWriteAll(viewer, matvals, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR));
1233   PetscCall(PetscFree(matvals));
1234 
1235   /* write block size option to the viewer's .info file */
1236   PetscCall(MatView_Binary_BlockSizes(mat, viewer));
1237   PetscFunctionReturn(0);
1238 }
1239 
1240 #include <petscdraw.h>
1241 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer) {
1242   Mat_MPIAIJ       *aij  = (Mat_MPIAIJ *)mat->data;
1243   PetscMPIInt       rank = aij->rank, size = aij->size;
1244   PetscBool         isdraw, iascii, isbinary;
1245   PetscViewer       sviewer;
1246   PetscViewerFormat format;
1247 
1248   PetscFunctionBegin;
1249   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
1250   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
1251   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
1252   if (iascii) {
1253     PetscCall(PetscViewerGetFormat(viewer, &format));
1254     if (format == PETSC_VIEWER_LOAD_BALANCE) {
1255       PetscInt i, nmax = 0, nmin = PETSC_MAX_INT, navg = 0, *nz, nzlocal = ((Mat_SeqAIJ *)(aij->A->data))->nz + ((Mat_SeqAIJ *)(aij->B->data))->nz;
1256       PetscCall(PetscMalloc1(size, &nz));
1257       PetscCallMPI(MPI_Allgather(&nzlocal, 1, MPIU_INT, nz, 1, MPIU_INT, PetscObjectComm((PetscObject)mat)));
1258       for (i = 0; i < (PetscInt)size; i++) {
1259         nmax = PetscMax(nmax, nz[i]);
1260         nmin = PetscMin(nmin, nz[i]);
1261         navg += nz[i];
1262       }
1263       PetscCall(PetscFree(nz));
1264       navg = navg / size;
1265       PetscCall(PetscViewerASCIIPrintf(viewer, "Load Balance - Nonzeros: Min %" PetscInt_FMT "  avg %" PetscInt_FMT "  max %" PetscInt_FMT "\n", nmin, navg, nmax));
1266       PetscFunctionReturn(0);
1267     }
1268     PetscCall(PetscViewerGetFormat(viewer, &format));
1269     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1270       MatInfo   info;
1271       PetscInt *inodes = NULL;
1272 
1273       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
1274       PetscCall(MatGetInfo(mat, MAT_LOCAL, &info));
1275       PetscCall(MatInodeGetInodeSizes(aij->A, NULL, &inodes, NULL));
1276       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
1277       if (!inodes) {
1278         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, not using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
1279                                                      (double)info.memory));
1280       } else {
1281         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
1282                                                      (double)info.memory));
1283       }
1284       PetscCall(MatGetInfo(aij->A, MAT_LOCAL, &info));
1285       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
1286       PetscCall(MatGetInfo(aij->B, MAT_LOCAL, &info));
1287       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
1288       PetscCall(PetscViewerFlush(viewer));
1289       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
1290       PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n"));
1291       PetscCall(VecScatterView(aij->Mvctx, viewer));
1292       PetscFunctionReturn(0);
1293     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1294       PetscInt inodecount, inodelimit, *inodes;
1295       PetscCall(MatInodeGetInodeSizes(aij->A, &inodecount, &inodes, &inodelimit));
1296       if (inodes) {
1297         PetscCall(PetscViewerASCIIPrintf(viewer, "using I-node (on process 0) routines: found %" PetscInt_FMT " nodes, limit used is %" PetscInt_FMT "\n", inodecount, inodelimit));
1298       } else {
1299         PetscCall(PetscViewerASCIIPrintf(viewer, "not using I-node (on process 0) routines\n"));
1300       }
1301       PetscFunctionReturn(0);
1302     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1303       PetscFunctionReturn(0);
1304     }
1305   } else if (isbinary) {
1306     if (size == 1) {
1307       PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name));
1308       PetscCall(MatView(aij->A, viewer));
1309     } else {
1310       PetscCall(MatView_MPIAIJ_Binary(mat, viewer));
1311     }
1312     PetscFunctionReturn(0);
1313   } else if (iascii && size == 1) {
1314     PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name));
1315     PetscCall(MatView(aij->A, viewer));
1316     PetscFunctionReturn(0);
1317   } else if (isdraw) {
1318     PetscDraw draw;
1319     PetscBool isnull;
1320     PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
1321     PetscCall(PetscDrawIsNull(draw, &isnull));
1322     if (isnull) PetscFunctionReturn(0);
1323   }
1324 
1325   { /* assemble the entire matrix onto first processor */
1326     Mat A = NULL, Av;
1327     IS  isrow, iscol;
1328 
1329     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->rmap->N : 0, 0, 1, &isrow));
1330     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->cmap->N : 0, 0, 1, &iscol));
1331     PetscCall(MatCreateSubMatrix(mat, isrow, iscol, MAT_INITIAL_MATRIX, &A));
1332     PetscCall(MatMPIAIJGetSeqAIJ(A, &Av, NULL, NULL));
1333     /*  The commented code uses MatCreateSubMatrices instead */
1334     /*
1335     Mat *AA, A = NULL, Av;
1336     IS  isrow,iscol;
1337 
1338     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow));
1339     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol));
1340     PetscCall(MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA));
1341     if (rank == 0) {
1342        PetscCall(PetscObjectReference((PetscObject)AA[0]));
1343        A    = AA[0];
1344        Av   = AA[0];
1345     }
1346     PetscCall(MatDestroySubMatrices(1,&AA));
1347 */
1348     PetscCall(ISDestroy(&iscol));
1349     PetscCall(ISDestroy(&isrow));
1350     /*
1351        Everyone has to call to draw the matrix since the graphics waits are
1352        synchronized across all processors that share the PetscDraw object
1353     */
1354     PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1355     if (rank == 0) {
1356       if (((PetscObject)mat)->name) { PetscCall(PetscObjectSetName((PetscObject)Av, ((PetscObject)mat)->name)); }
1357       PetscCall(MatView_SeqAIJ(Av, sviewer));
1358     }
1359     PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1360     PetscCall(PetscViewerFlush(viewer));
1361     PetscCall(MatDestroy(&A));
1362   }
1363   PetscFunctionReturn(0);
1364 }
1365 
1366 PetscErrorCode MatView_MPIAIJ(Mat mat, PetscViewer viewer) {
1367   PetscBool iascii, isdraw, issocket, isbinary;
1368 
1369   PetscFunctionBegin;
1370   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
1371   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
1372   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
1373   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket));
1374   if (iascii || isdraw || isbinary || issocket) { PetscCall(MatView_MPIAIJ_ASCIIorDraworSocket(mat, viewer)); }
1375   PetscFunctionReturn(0);
1376 }
1377 
1378 PetscErrorCode MatSOR_MPIAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) {
1379   Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data;
1380   Vec         bb1 = NULL;
1381   PetscBool   hasop;
1382 
1383   PetscFunctionBegin;
1384   if (flag == SOR_APPLY_UPPER) {
1385     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1386     PetscFunctionReturn(0);
1387   }
1388 
1389   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { PetscCall(VecDuplicate(bb, &bb1)); }
1390 
1391   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1392     if (flag & SOR_ZERO_INITIAL_GUESS) {
1393       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1394       its--;
1395     }
1396 
1397     while (its--) {
1398       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1399       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1400 
1401       /* update rhs: bb1 = bb - B*x */
1402       PetscCall(VecScale(mat->lvec, -1.0));
1403       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
1404 
1405       /* local sweep */
1406       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx));
1407     }
1408   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1409     if (flag & SOR_ZERO_INITIAL_GUESS) {
1410       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1411       its--;
1412     }
1413     while (its--) {
1414       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1415       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1416 
1417       /* update rhs: bb1 = bb - B*x */
1418       PetscCall(VecScale(mat->lvec, -1.0));
1419       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
1420 
1421       /* local sweep */
1422       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx));
1423     }
1424   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1425     if (flag & SOR_ZERO_INITIAL_GUESS) {
1426       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1427       its--;
1428     }
1429     while (its--) {
1430       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1431       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1432 
1433       /* update rhs: bb1 = bb - B*x */
1434       PetscCall(VecScale(mat->lvec, -1.0));
1435       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
1436 
1437       /* local sweep */
1438       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx));
1439     }
1440   } else if (flag & SOR_EISENSTAT) {
1441     Vec xx1;
1442 
1443     PetscCall(VecDuplicate(bb, &xx1));
1444     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP), fshift, lits, 1, xx));
1445 
1446     PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1447     PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1448     if (!mat->diag) {
1449       PetscCall(MatCreateVecs(matin, &mat->diag, NULL));
1450       PetscCall(MatGetDiagonal(matin, mat->diag));
1451     }
1452     PetscCall(MatHasOperation(matin, MATOP_MULT_DIAGONAL_BLOCK, &hasop));
1453     if (hasop) {
1454       PetscCall(MatMultDiagonalBlock(matin, xx, bb1));
1455     } else {
1456       PetscCall(VecPointwiseMult(bb1, mat->diag, xx));
1457     }
1458     PetscCall(VecAYPX(bb1, (omega - 2.0) / omega, bb));
1459 
1460     PetscCall(MatMultAdd(mat->B, mat->lvec, bb1, bb1));
1461 
1462     /* local sweep */
1463     PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP), fshift, lits, 1, xx1));
1464     PetscCall(VecAXPY(xx, 1.0, xx1));
1465     PetscCall(VecDestroy(&xx1));
1466   } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel SOR not supported");
1467 
1468   PetscCall(VecDestroy(&bb1));
1469 
1470   matin->factorerrortype = mat->A->factorerrortype;
1471   PetscFunctionReturn(0);
1472 }
1473 
1474 PetscErrorCode MatPermute_MPIAIJ(Mat A, IS rowp, IS colp, Mat *B) {
1475   Mat             aA, aB, Aperm;
1476   const PetscInt *rwant, *cwant, *gcols, *ai, *bi, *aj, *bj;
1477   PetscScalar    *aa, *ba;
1478   PetscInt        i, j, m, n, ng, anz, bnz, *dnnz, *onnz, *tdnnz, *tonnz, *rdest, *cdest, *work, *gcdest;
1479   PetscSF         rowsf, sf;
1480   IS              parcolp = NULL;
1481   PetscBool       done;
1482 
1483   PetscFunctionBegin;
1484   PetscCall(MatGetLocalSize(A, &m, &n));
1485   PetscCall(ISGetIndices(rowp, &rwant));
1486   PetscCall(ISGetIndices(colp, &cwant));
1487   PetscCall(PetscMalloc3(PetscMax(m, n), &work, m, &rdest, n, &cdest));
1488 
1489   /* Invert row permutation to find out where my rows should go */
1490   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &rowsf));
1491   PetscCall(PetscSFSetGraphLayout(rowsf, A->rmap, A->rmap->n, NULL, PETSC_OWN_POINTER, rwant));
1492   PetscCall(PetscSFSetFromOptions(rowsf));
1493   for (i = 0; i < m; i++) work[i] = A->rmap->rstart + i;
1494   PetscCall(PetscSFReduceBegin(rowsf, MPIU_INT, work, rdest, MPI_REPLACE));
1495   PetscCall(PetscSFReduceEnd(rowsf, MPIU_INT, work, rdest, MPI_REPLACE));
1496 
1497   /* Invert column permutation to find out where my columns should go */
1498   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1499   PetscCall(PetscSFSetGraphLayout(sf, A->cmap, A->cmap->n, NULL, PETSC_OWN_POINTER, cwant));
1500   PetscCall(PetscSFSetFromOptions(sf));
1501   for (i = 0; i < n; i++) work[i] = A->cmap->rstart + i;
1502   PetscCall(PetscSFReduceBegin(sf, MPIU_INT, work, cdest, MPI_REPLACE));
1503   PetscCall(PetscSFReduceEnd(sf, MPIU_INT, work, cdest, MPI_REPLACE));
1504   PetscCall(PetscSFDestroy(&sf));
1505 
1506   PetscCall(ISRestoreIndices(rowp, &rwant));
1507   PetscCall(ISRestoreIndices(colp, &cwant));
1508   PetscCall(MatMPIAIJGetSeqAIJ(A, &aA, &aB, &gcols));
1509 
1510   /* Find out where my gcols should go */
1511   PetscCall(MatGetSize(aB, NULL, &ng));
1512   PetscCall(PetscMalloc1(ng, &gcdest));
1513   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1514   PetscCall(PetscSFSetGraphLayout(sf, A->cmap, ng, NULL, PETSC_OWN_POINTER, gcols));
1515   PetscCall(PetscSFSetFromOptions(sf));
1516   PetscCall(PetscSFBcastBegin(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE));
1517   PetscCall(PetscSFBcastEnd(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE));
1518   PetscCall(PetscSFDestroy(&sf));
1519 
1520   PetscCall(PetscCalloc4(m, &dnnz, m, &onnz, m, &tdnnz, m, &tonnz));
1521   PetscCall(MatGetRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done));
1522   PetscCall(MatGetRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done));
1523   for (i = 0; i < m; i++) {
1524     PetscInt    row = rdest[i];
1525     PetscMPIInt rowner;
1526     PetscCall(PetscLayoutFindOwner(A->rmap, row, &rowner));
1527     for (j = ai[i]; j < ai[i + 1]; j++) {
1528       PetscInt    col = cdest[aj[j]];
1529       PetscMPIInt cowner;
1530       PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner)); /* Could build an index for the columns to eliminate this search */
1531       if (rowner == cowner) dnnz[i]++;
1532       else onnz[i]++;
1533     }
1534     for (j = bi[i]; j < bi[i + 1]; j++) {
1535       PetscInt    col = gcdest[bj[j]];
1536       PetscMPIInt cowner;
1537       PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner));
1538       if (rowner == cowner) dnnz[i]++;
1539       else onnz[i]++;
1540     }
1541   }
1542   PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE));
1543   PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE));
1544   PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE));
1545   PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE));
1546   PetscCall(PetscSFDestroy(&rowsf));
1547 
1548   PetscCall(MatCreateAIJ(PetscObjectComm((PetscObject)A), A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N, 0, tdnnz, 0, tonnz, &Aperm));
1549   PetscCall(MatSeqAIJGetArray(aA, &aa));
1550   PetscCall(MatSeqAIJGetArray(aB, &ba));
1551   for (i = 0; i < m; i++) {
1552     PetscInt *acols = dnnz, *bcols = onnz; /* Repurpose now-unneeded arrays */
1553     PetscInt  j0, rowlen;
1554     rowlen = ai[i + 1] - ai[i];
1555     for (j0 = j = 0; j < rowlen; j0 = j) { /* rowlen could be larger than number of rows m, so sum in batches */
1556       for (; j < PetscMin(rowlen, j0 + m); j++) acols[j - j0] = cdest[aj[ai[i] + j]];
1557       PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, acols, aa + ai[i] + j0, INSERT_VALUES));
1558     }
1559     rowlen = bi[i + 1] - bi[i];
1560     for (j0 = j = 0; j < rowlen; j0 = j) {
1561       for (; j < PetscMin(rowlen, j0 + m); j++) bcols[j - j0] = gcdest[bj[bi[i] + j]];
1562       PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, bcols, ba + bi[i] + j0, INSERT_VALUES));
1563     }
1564   }
1565   PetscCall(MatAssemblyBegin(Aperm, MAT_FINAL_ASSEMBLY));
1566   PetscCall(MatAssemblyEnd(Aperm, MAT_FINAL_ASSEMBLY));
1567   PetscCall(MatRestoreRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done));
1568   PetscCall(MatRestoreRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done));
1569   PetscCall(MatSeqAIJRestoreArray(aA, &aa));
1570   PetscCall(MatSeqAIJRestoreArray(aB, &ba));
1571   PetscCall(PetscFree4(dnnz, onnz, tdnnz, tonnz));
1572   PetscCall(PetscFree3(work, rdest, cdest));
1573   PetscCall(PetscFree(gcdest));
1574   if (parcolp) PetscCall(ISDestroy(&colp));
1575   *B = Aperm;
1576   PetscFunctionReturn(0);
1577 }
1578 
1579 PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[]) {
1580   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1581 
1582   PetscFunctionBegin;
1583   PetscCall(MatGetSize(aij->B, NULL, nghosts));
1584   if (ghosts) *ghosts = aij->garray;
1585   PetscFunctionReturn(0);
1586 }
1587 
1588 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin, MatInfoType flag, MatInfo *info) {
1589   Mat_MPIAIJ    *mat = (Mat_MPIAIJ *)matin->data;
1590   Mat            A = mat->A, B = mat->B;
1591   PetscLogDouble isend[5], irecv[5];
1592 
1593   PetscFunctionBegin;
1594   info->block_size = 1.0;
1595   PetscCall(MatGetInfo(A, MAT_LOCAL, info));
1596 
1597   isend[0] = info->nz_used;
1598   isend[1] = info->nz_allocated;
1599   isend[2] = info->nz_unneeded;
1600   isend[3] = info->memory;
1601   isend[4] = info->mallocs;
1602 
1603   PetscCall(MatGetInfo(B, MAT_LOCAL, info));
1604 
1605   isend[0] += info->nz_used;
1606   isend[1] += info->nz_allocated;
1607   isend[2] += info->nz_unneeded;
1608   isend[3] += info->memory;
1609   isend[4] += info->mallocs;
1610   if (flag == MAT_LOCAL) {
1611     info->nz_used      = isend[0];
1612     info->nz_allocated = isend[1];
1613     info->nz_unneeded  = isend[2];
1614     info->memory       = isend[3];
1615     info->mallocs      = isend[4];
1616   } else if (flag == MAT_GLOBAL_MAX) {
1617     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin)));
1618 
1619     info->nz_used      = irecv[0];
1620     info->nz_allocated = irecv[1];
1621     info->nz_unneeded  = irecv[2];
1622     info->memory       = irecv[3];
1623     info->mallocs      = irecv[4];
1624   } else if (flag == MAT_GLOBAL_SUM) {
1625     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin)));
1626 
1627     info->nz_used      = irecv[0];
1628     info->nz_allocated = irecv[1];
1629     info->nz_unneeded  = irecv[2];
1630     info->memory       = irecv[3];
1631     info->mallocs      = irecv[4];
1632   }
1633   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1634   info->fill_ratio_needed = 0;
1635   info->factor_mallocs    = 0;
1636   PetscFunctionReturn(0);
1637 }
1638 
1639 PetscErrorCode MatSetOption_MPIAIJ(Mat A, MatOption op, PetscBool flg) {
1640   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1641 
1642   PetscFunctionBegin;
1643   switch (op) {
1644   case MAT_NEW_NONZERO_LOCATIONS:
1645   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1646   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1647   case MAT_KEEP_NONZERO_PATTERN:
1648   case MAT_NEW_NONZERO_LOCATION_ERR:
1649   case MAT_USE_INODES:
1650   case MAT_IGNORE_ZERO_ENTRIES:
1651   case MAT_FORM_EXPLICIT_TRANSPOSE:
1652     MatCheckPreallocated(A, 1);
1653     PetscCall(MatSetOption(a->A, op, flg));
1654     PetscCall(MatSetOption(a->B, op, flg));
1655     break;
1656   case MAT_ROW_ORIENTED:
1657     MatCheckPreallocated(A, 1);
1658     a->roworiented = flg;
1659 
1660     PetscCall(MatSetOption(a->A, op, flg));
1661     PetscCall(MatSetOption(a->B, op, flg));
1662     break;
1663   case MAT_FORCE_DIAGONAL_ENTRIES:
1664   case MAT_SORTED_FULL: PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); break;
1665   case MAT_IGNORE_OFF_PROC_ENTRIES: a->donotstash = flg; break;
1666   /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */
1667   case MAT_SPD:
1668   case MAT_SYMMETRIC:
1669   case MAT_STRUCTURALLY_SYMMETRIC:
1670   case MAT_HERMITIAN:
1671   case MAT_SYMMETRY_ETERNAL:
1672   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
1673   case MAT_SPD_ETERNAL:
1674     /* if the diagonal matrix is square it inherits some of the properties above */
1675     break;
1676   case MAT_SUBMAT_SINGLEIS: A->submat_singleis = flg; break;
1677   case MAT_STRUCTURE_ONLY:
1678     /* The option is handled directly by MatSetOption() */
1679     break;
1680   default: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %d", op);
1681   }
1682   PetscFunctionReturn(0);
1683 }
1684 
1685 PetscErrorCode MatGetRow_MPIAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) {
1686   Mat_MPIAIJ  *mat = (Mat_MPIAIJ *)matin->data;
1687   PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p;
1688   PetscInt     i, *cworkA, *cworkB, **pcA, **pcB, cstart = matin->cmap->rstart;
1689   PetscInt     nztot, nzA, nzB, lrow, rstart = matin->rmap->rstart, rend = matin->rmap->rend;
1690   PetscInt    *cmap, *idx_p;
1691 
1692   PetscFunctionBegin;
1693   PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active");
1694   mat->getrowactive = PETSC_TRUE;
1695 
1696   if (!mat->rowvalues && (idx || v)) {
1697     /*
1698         allocate enough space to hold information from the longest row.
1699     */
1700     Mat_SeqAIJ *Aa = (Mat_SeqAIJ *)mat->A->data, *Ba = (Mat_SeqAIJ *)mat->B->data;
1701     PetscInt    max = 1, tmp;
1702     for (i = 0; i < matin->rmap->n; i++) {
1703       tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i];
1704       if (max < tmp) max = tmp;
1705     }
1706     PetscCall(PetscMalloc2(max, &mat->rowvalues, max, &mat->rowindices));
1707   }
1708 
1709   PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Only local rows");
1710   lrow = row - rstart;
1711 
1712   pvA = &vworkA;
1713   pcA = &cworkA;
1714   pvB = &vworkB;
1715   pcB = &cworkB;
1716   if (!v) {
1717     pvA = NULL;
1718     pvB = NULL;
1719   }
1720   if (!idx) {
1721     pcA = NULL;
1722     if (!v) pcB = NULL;
1723   }
1724   PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA));
1725   PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB));
1726   nztot = nzA + nzB;
1727 
1728   cmap = mat->garray;
1729   if (v || idx) {
1730     if (nztot) {
1731       /* Sort by increasing column numbers, assuming A and B already sorted */
1732       PetscInt imark = -1;
1733       if (v) {
1734         *v = v_p = mat->rowvalues;
1735         for (i = 0; i < nzB; i++) {
1736           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1737           else break;
1738         }
1739         imark = i;
1740         for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i];
1741         for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i];
1742       }
1743       if (idx) {
1744         *idx = idx_p = mat->rowindices;
1745         if (imark > -1) {
1746           for (i = 0; i < imark; i++) { idx_p[i] = cmap[cworkB[i]]; }
1747         } else {
1748           for (i = 0; i < nzB; i++) {
1749             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1750             else break;
1751           }
1752           imark = i;
1753         }
1754         for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart + cworkA[i];
1755         for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i]];
1756       }
1757     } else {
1758       if (idx) *idx = NULL;
1759       if (v) *v = NULL;
1760     }
1761   }
1762   *nz = nztot;
1763   PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA));
1764   PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB));
1765   PetscFunctionReturn(0);
1766 }
1767 
1768 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) {
1769   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1770 
1771   PetscFunctionBegin;
1772   PetscCheck(aij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow() must be called first");
1773   aij->getrowactive = PETSC_FALSE;
1774   PetscFunctionReturn(0);
1775 }
1776 
1777 PetscErrorCode MatNorm_MPIAIJ(Mat mat, NormType type, PetscReal *norm) {
1778   Mat_MPIAIJ      *aij  = (Mat_MPIAIJ *)mat->data;
1779   Mat_SeqAIJ      *amat = (Mat_SeqAIJ *)aij->A->data, *bmat = (Mat_SeqAIJ *)aij->B->data;
1780   PetscInt         i, j, cstart = mat->cmap->rstart;
1781   PetscReal        sum = 0.0;
1782   const MatScalar *v, *amata, *bmata;
1783 
1784   PetscFunctionBegin;
1785   if (aij->size == 1) {
1786     PetscCall(MatNorm(aij->A, type, norm));
1787   } else {
1788     PetscCall(MatSeqAIJGetArrayRead(aij->A, &amata));
1789     PetscCall(MatSeqAIJGetArrayRead(aij->B, &bmata));
1790     if (type == NORM_FROBENIUS) {
1791       v = amata;
1792       for (i = 0; i < amat->nz; i++) {
1793         sum += PetscRealPart(PetscConj(*v) * (*v));
1794         v++;
1795       }
1796       v = bmata;
1797       for (i = 0; i < bmat->nz; i++) {
1798         sum += PetscRealPart(PetscConj(*v) * (*v));
1799         v++;
1800       }
1801       PetscCall(MPIU_Allreduce(&sum, norm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
1802       *norm = PetscSqrtReal(*norm);
1803       PetscCall(PetscLogFlops(2.0 * amat->nz + 2.0 * bmat->nz));
1804     } else if (type == NORM_1) { /* max column norm */
1805       PetscReal *tmp, *tmp2;
1806       PetscInt  *jj, *garray = aij->garray;
1807       PetscCall(PetscCalloc1(mat->cmap->N + 1, &tmp));
1808       PetscCall(PetscMalloc1(mat->cmap->N + 1, &tmp2));
1809       *norm = 0.0;
1810       v     = amata;
1811       jj    = amat->j;
1812       for (j = 0; j < amat->nz; j++) {
1813         tmp[cstart + *jj++] += PetscAbsScalar(*v);
1814         v++;
1815       }
1816       v  = bmata;
1817       jj = bmat->j;
1818       for (j = 0; j < bmat->nz; j++) {
1819         tmp[garray[*jj++]] += PetscAbsScalar(*v);
1820         v++;
1821       }
1822       PetscCall(MPIU_Allreduce(tmp, tmp2, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
1823       for (j = 0; j < mat->cmap->N; j++) {
1824         if (tmp2[j] > *norm) *norm = tmp2[j];
1825       }
1826       PetscCall(PetscFree(tmp));
1827       PetscCall(PetscFree(tmp2));
1828       PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0)));
1829     } else if (type == NORM_INFINITY) { /* max row norm */
1830       PetscReal ntemp = 0.0;
1831       for (j = 0; j < aij->A->rmap->n; j++) {
1832         v   = amata + amat->i[j];
1833         sum = 0.0;
1834         for (i = 0; i < amat->i[j + 1] - amat->i[j]; i++) {
1835           sum += PetscAbsScalar(*v);
1836           v++;
1837         }
1838         v = bmata + bmat->i[j];
1839         for (i = 0; i < bmat->i[j + 1] - bmat->i[j]; i++) {
1840           sum += PetscAbsScalar(*v);
1841           v++;
1842         }
1843         if (sum > ntemp) ntemp = sum;
1844       }
1845       PetscCall(MPIU_Allreduce(&ntemp, norm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)mat)));
1846       PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0)));
1847     } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "No support for two norm");
1848     PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &amata));
1849     PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &bmata));
1850   }
1851   PetscFunctionReturn(0);
1852 }
1853 
1854 PetscErrorCode MatTranspose_MPIAIJ(Mat A, MatReuse reuse, Mat *matout) {
1855   Mat_MPIAIJ      *a    = (Mat_MPIAIJ *)A->data, *b;
1856   Mat_SeqAIJ      *Aloc = (Mat_SeqAIJ *)a->A->data, *Bloc = (Mat_SeqAIJ *)a->B->data, *sub_B_diag;
1857   PetscInt         M = A->rmap->N, N = A->cmap->N, ma, na, mb, nb, row, *cols, *cols_tmp, *B_diag_ilen, i, ncol, A_diag_ncol;
1858   const PetscInt  *ai, *aj, *bi, *bj, *B_diag_i;
1859   Mat              B, A_diag, *B_diag;
1860   const MatScalar *pbv, *bv;
1861 
1862   PetscFunctionBegin;
1863   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout));
1864   ma = A->rmap->n;
1865   na = A->cmap->n;
1866   mb = a->B->rmap->n;
1867   nb = a->B->cmap->n;
1868   ai = Aloc->i;
1869   aj = Aloc->j;
1870   bi = Bloc->i;
1871   bj = Bloc->j;
1872   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1873     PetscInt            *d_nnz, *g_nnz, *o_nnz;
1874     PetscSFNode         *oloc;
1875     PETSC_UNUSED PetscSF sf;
1876 
1877     PetscCall(PetscMalloc4(na, &d_nnz, na, &o_nnz, nb, &g_nnz, nb, &oloc));
1878     /* compute d_nnz for preallocation */
1879     PetscCall(PetscArrayzero(d_nnz, na));
1880     for (i = 0; i < ai[ma]; i++) d_nnz[aj[i]]++;
1881     /* compute local off-diagonal contributions */
1882     PetscCall(PetscArrayzero(g_nnz, nb));
1883     for (i = 0; i < bi[ma]; i++) g_nnz[bj[i]]++;
1884     /* map those to global */
1885     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1886     PetscCall(PetscSFSetGraphLayout(sf, A->cmap, nb, NULL, PETSC_USE_POINTER, a->garray));
1887     PetscCall(PetscSFSetFromOptions(sf));
1888     PetscCall(PetscArrayzero(o_nnz, na));
1889     PetscCall(PetscSFReduceBegin(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM));
1890     PetscCall(PetscSFReduceEnd(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM));
1891     PetscCall(PetscSFDestroy(&sf));
1892 
1893     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
1894     PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M));
1895     PetscCall(MatSetBlockSizes(B, PetscAbs(A->cmap->bs), PetscAbs(A->rmap->bs)));
1896     PetscCall(MatSetType(B, ((PetscObject)A)->type_name));
1897     PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz));
1898     PetscCall(PetscFree4(d_nnz, o_nnz, g_nnz, oloc));
1899   } else {
1900     B = *matout;
1901     PetscCall(MatSetOption(B, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE));
1902   }
1903 
1904   b           = (Mat_MPIAIJ *)B->data;
1905   A_diag      = a->A;
1906   B_diag      = &b->A;
1907   sub_B_diag  = (Mat_SeqAIJ *)(*B_diag)->data;
1908   A_diag_ncol = A_diag->cmap->N;
1909   B_diag_ilen = sub_B_diag->ilen;
1910   B_diag_i    = sub_B_diag->i;
1911 
1912   /* Set ilen for diagonal of B */
1913   for (i = 0; i < A_diag_ncol; i++) { B_diag_ilen[i] = B_diag_i[i + 1] - B_diag_i[i]; }
1914 
1915   /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done
1916   very quickly (=without using MatSetValues), because all writes are local. */
1917   PetscCall(MatTransposeSetPrecursor(A_diag, *B_diag));
1918   PetscCall(MatTranspose(A_diag, MAT_REUSE_MATRIX, B_diag));
1919 
1920   /* copy over the B part */
1921   PetscCall(PetscMalloc1(bi[mb], &cols));
1922   PetscCall(MatSeqAIJGetArrayRead(a->B, &bv));
1923   pbv = bv;
1924   row = A->rmap->rstart;
1925   for (i = 0; i < bi[mb]; i++) cols[i] = a->garray[bj[i]];
1926   cols_tmp = cols;
1927   for (i = 0; i < mb; i++) {
1928     ncol = bi[i + 1] - bi[i];
1929     PetscCall(MatSetValues(B, ncol, cols_tmp, 1, &row, pbv, INSERT_VALUES));
1930     row++;
1931     pbv += ncol;
1932     cols_tmp += ncol;
1933   }
1934   PetscCall(PetscFree(cols));
1935   PetscCall(MatSeqAIJRestoreArrayRead(a->B, &bv));
1936 
1937   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
1938   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
1939   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
1940     *matout = B;
1941   } else {
1942     PetscCall(MatHeaderMerge(A, &B));
1943   }
1944   PetscFunctionReturn(0);
1945 }
1946 
1947 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat, Vec ll, Vec rr) {
1948   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1949   Mat         a = aij->A, b = aij->B;
1950   PetscInt    s1, s2, s3;
1951 
1952   PetscFunctionBegin;
1953   PetscCall(MatGetLocalSize(mat, &s2, &s3));
1954   if (rr) {
1955     PetscCall(VecGetLocalSize(rr, &s1));
1956     PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size");
1957     /* Overlap communication with computation. */
1958     PetscCall(VecScatterBegin(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD));
1959   }
1960   if (ll) {
1961     PetscCall(VecGetLocalSize(ll, &s1));
1962     PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size");
1963     PetscUseTypeMethod(b, diagonalscale, ll, NULL);
1964   }
1965   /* scale  the diagonal block */
1966   PetscUseTypeMethod(a, diagonalscale, ll, rr);
1967 
1968   if (rr) {
1969     /* Do a scatter end and then right scale the off-diagonal block */
1970     PetscCall(VecScatterEnd(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD));
1971     PetscUseTypeMethod(b, diagonalscale, NULL, aij->lvec);
1972   }
1973   PetscFunctionReturn(0);
1974 }
1975 
1976 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) {
1977   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1978 
1979   PetscFunctionBegin;
1980   PetscCall(MatSetUnfactored(a->A));
1981   PetscFunctionReturn(0);
1982 }
1983 
1984 PetscErrorCode MatEqual_MPIAIJ(Mat A, Mat B, PetscBool *flag) {
1985   Mat_MPIAIJ *matB = (Mat_MPIAIJ *)B->data, *matA = (Mat_MPIAIJ *)A->data;
1986   Mat         a, b, c, d;
1987   PetscBool   flg;
1988 
1989   PetscFunctionBegin;
1990   a = matA->A;
1991   b = matA->B;
1992   c = matB->A;
1993   d = matB->B;
1994 
1995   PetscCall(MatEqual(a, c, &flg));
1996   if (flg) { PetscCall(MatEqual(b, d, &flg)); }
1997   PetscCall(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
1998   PetscFunctionReturn(0);
1999 }
2000 
2001 PetscErrorCode MatCopy_MPIAIJ(Mat A, Mat B, MatStructure str) {
2002   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2003   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
2004 
2005   PetscFunctionBegin;
2006   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2007   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2008     /* because of the column compression in the off-processor part of the matrix a->B,
2009        the number of columns in a->B and b->B may be different, hence we cannot call
2010        the MatCopy() directly on the two parts. If need be, we can provide a more
2011        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2012        then copying the submatrices */
2013     PetscCall(MatCopy_Basic(A, B, str));
2014   } else {
2015     PetscCall(MatCopy(a->A, b->A, str));
2016     PetscCall(MatCopy(a->B, b->B, str));
2017   }
2018   PetscCall(PetscObjectStateIncrease((PetscObject)B));
2019   PetscFunctionReturn(0);
2020 }
2021 
2022 PetscErrorCode MatSetUp_MPIAIJ(Mat A) {
2023   PetscFunctionBegin;
2024   PetscCall(MatMPIAIJSetPreallocation(A, PETSC_DEFAULT, NULL, PETSC_DEFAULT, NULL));
2025   PetscFunctionReturn(0);
2026 }
2027 
2028 /*
2029    Computes the number of nonzeros per row needed for preallocation when X and Y
2030    have different nonzero structure.
2031 */
2032 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m, const PetscInt *xi, const PetscInt *xj, const PetscInt *xltog, const PetscInt *yi, const PetscInt *yj, const PetscInt *yltog, PetscInt *nnz) {
2033   PetscInt i, j, k, nzx, nzy;
2034 
2035   PetscFunctionBegin;
2036   /* Set the number of nonzeros in the new matrix */
2037   for (i = 0; i < m; i++) {
2038     const PetscInt *xjj = xj + xi[i], *yjj = yj + yi[i];
2039     nzx    = xi[i + 1] - xi[i];
2040     nzy    = yi[i + 1] - yi[i];
2041     nnz[i] = 0;
2042     for (j = 0, k = 0; j < nzx; j++) {                                /* Point in X */
2043       for (; k < nzy && yltog[yjj[k]] < xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2044       if (k < nzy && yltog[yjj[k]] == xltog[xjj[j]]) k++;             /* Skip duplicate */
2045       nnz[i]++;
2046     }
2047     for (; k < nzy; k++) nnz[i]++;
2048   }
2049   PetscFunctionReturn(0);
2050 }
2051 
2052 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2053 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y, const PetscInt *yltog, Mat X, const PetscInt *xltog, PetscInt *nnz) {
2054   PetscInt    m = Y->rmap->N;
2055   Mat_SeqAIJ *x = (Mat_SeqAIJ *)X->data;
2056   Mat_SeqAIJ *y = (Mat_SeqAIJ *)Y->data;
2057 
2058   PetscFunctionBegin;
2059   PetscCall(MatAXPYGetPreallocation_MPIX_private(m, x->i, x->j, xltog, y->i, y->j, yltog, nnz));
2060   PetscFunctionReturn(0);
2061 }
2062 
2063 PetscErrorCode MatAXPY_MPIAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str) {
2064   Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data, *yy = (Mat_MPIAIJ *)Y->data;
2065 
2066   PetscFunctionBegin;
2067   if (str == SAME_NONZERO_PATTERN) {
2068     PetscCall(MatAXPY(yy->A, a, xx->A, str));
2069     PetscCall(MatAXPY(yy->B, a, xx->B, str));
2070   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2071     PetscCall(MatAXPY_Basic(Y, a, X, str));
2072   } else {
2073     Mat       B;
2074     PetscInt *nnz_d, *nnz_o;
2075 
2076     PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d));
2077     PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o));
2078     PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B));
2079     PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name));
2080     PetscCall(MatSetLayouts(B, Y->rmap, Y->cmap));
2081     PetscCall(MatSetType(B, ((PetscObject)Y)->type_name));
2082     PetscCall(MatAXPYGetPreallocation_SeqAIJ(yy->A, xx->A, nnz_d));
2083     PetscCall(MatAXPYGetPreallocation_MPIAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o));
2084     PetscCall(MatMPIAIJSetPreallocation(B, 0, nnz_d, 0, nnz_o));
2085     PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str));
2086     PetscCall(MatHeaderMerge(Y, &B));
2087     PetscCall(PetscFree(nnz_d));
2088     PetscCall(PetscFree(nnz_o));
2089   }
2090   PetscFunctionReturn(0);
2091 }
2092 
2093 PETSC_INTERN PetscErrorCode MatConjugate_SeqAIJ(Mat);
2094 
2095 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) {
2096   PetscFunctionBegin;
2097   if (PetscDefined(USE_COMPLEX)) {
2098     Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
2099 
2100     PetscCall(MatConjugate_SeqAIJ(aij->A));
2101     PetscCall(MatConjugate_SeqAIJ(aij->B));
2102   }
2103   PetscFunctionReturn(0);
2104 }
2105 
2106 PetscErrorCode MatRealPart_MPIAIJ(Mat A) {
2107   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2108 
2109   PetscFunctionBegin;
2110   PetscCall(MatRealPart(a->A));
2111   PetscCall(MatRealPart(a->B));
2112   PetscFunctionReturn(0);
2113 }
2114 
2115 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) {
2116   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2117 
2118   PetscFunctionBegin;
2119   PetscCall(MatImaginaryPart(a->A));
2120   PetscCall(MatImaginaryPart(a->B));
2121   PetscFunctionReturn(0);
2122 }
2123 
2124 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) {
2125   Mat_MPIAIJ        *a = (Mat_MPIAIJ *)A->data;
2126   PetscInt           i, *idxb = NULL, m = A->rmap->n;
2127   PetscScalar       *va, *vv;
2128   Vec                vB, vA;
2129   const PetscScalar *vb;
2130 
2131   PetscFunctionBegin;
2132   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vA));
2133   PetscCall(MatGetRowMaxAbs(a->A, vA, idx));
2134 
2135   PetscCall(VecGetArrayWrite(vA, &va));
2136   if (idx) {
2137     for (i = 0; i < m; i++) {
2138       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2139     }
2140   }
2141 
2142   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vB));
2143   PetscCall(PetscMalloc1(m, &idxb));
2144   PetscCall(MatGetRowMaxAbs(a->B, vB, idxb));
2145 
2146   PetscCall(VecGetArrayWrite(v, &vv));
2147   PetscCall(VecGetArrayRead(vB, &vb));
2148   for (i = 0; i < m; i++) {
2149     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2150       vv[i] = vb[i];
2151       if (idx) idx[i] = a->garray[idxb[i]];
2152     } else {
2153       vv[i] = va[i];
2154       if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]]) idx[i] = a->garray[idxb[i]];
2155     }
2156   }
2157   PetscCall(VecRestoreArrayWrite(vA, &vv));
2158   PetscCall(VecRestoreArrayWrite(vA, &va));
2159   PetscCall(VecRestoreArrayRead(vB, &vb));
2160   PetscCall(PetscFree(idxb));
2161   PetscCall(VecDestroy(&vA));
2162   PetscCall(VecDestroy(&vB));
2163   PetscFunctionReturn(0);
2164 }
2165 
2166 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) {
2167   Mat_MPIAIJ        *mat = (Mat_MPIAIJ *)A->data;
2168   PetscInt           m = A->rmap->n, n = A->cmap->n;
2169   PetscInt           cstart = A->cmap->rstart, cend = A->cmap->rend;
2170   PetscInt          *cmap = mat->garray;
2171   PetscInt          *diagIdx, *offdiagIdx;
2172   Vec                diagV, offdiagV;
2173   PetscScalar       *a, *diagA, *offdiagA;
2174   const PetscScalar *ba, *bav;
2175   PetscInt           r, j, col, ncols, *bi, *bj;
2176   Mat                B = mat->B;
2177   Mat_SeqAIJ        *b = (Mat_SeqAIJ *)B->data;
2178 
2179   PetscFunctionBegin;
2180   /* When a process holds entire A and other processes have no entry */
2181   if (A->cmap->N == n) {
2182     PetscCall(VecGetArrayWrite(v, &diagA));
2183     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV));
2184     PetscCall(MatGetRowMinAbs(mat->A, diagV, idx));
2185     PetscCall(VecDestroy(&diagV));
2186     PetscCall(VecRestoreArrayWrite(v, &diagA));
2187     PetscFunctionReturn(0);
2188   } else if (n == 0) {
2189     if (m) {
2190       PetscCall(VecGetArrayWrite(v, &a));
2191       for (r = 0; r < m; r++) {
2192         a[r] = 0.0;
2193         if (idx) idx[r] = -1;
2194       }
2195       PetscCall(VecRestoreArrayWrite(v, &a));
2196     }
2197     PetscFunctionReturn(0);
2198   }
2199 
2200   PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx));
2201   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2202   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2203   PetscCall(MatGetRowMinAbs(mat->A, diagV, diagIdx));
2204 
2205   /* Get offdiagIdx[] for implicit 0.0 */
2206   PetscCall(MatSeqAIJGetArrayRead(B, &bav));
2207   ba = bav;
2208   bi = b->i;
2209   bj = b->j;
2210   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2211   for (r = 0; r < m; r++) {
2212     ncols = bi[r + 1] - bi[r];
2213     if (ncols == A->cmap->N - n) { /* Brow is dense */
2214       offdiagA[r]   = *ba;
2215       offdiagIdx[r] = cmap[0];
2216     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2217       offdiagA[r] = 0.0;
2218 
2219       /* Find first hole in the cmap */
2220       for (j = 0; j < ncols; j++) {
2221         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2222         if (col > j && j < cstart) {
2223           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2224           break;
2225         } else if (col > j + n && j >= cstart) {
2226           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2227           break;
2228         }
2229       }
2230       if (j == ncols && ncols < A->cmap->N - n) {
2231         /* a hole is outside compressed Bcols */
2232         if (ncols == 0) {
2233           if (cstart) {
2234             offdiagIdx[r] = 0;
2235           } else offdiagIdx[r] = cend;
2236         } else { /* ncols > 0 */
2237           offdiagIdx[r] = cmap[ncols - 1] + 1;
2238           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2239         }
2240       }
2241     }
2242 
2243     for (j = 0; j < ncols; j++) {
2244       if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) {
2245         offdiagA[r]   = *ba;
2246         offdiagIdx[r] = cmap[*bj];
2247       }
2248       ba++;
2249       bj++;
2250     }
2251   }
2252 
2253   PetscCall(VecGetArrayWrite(v, &a));
2254   PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA));
2255   for (r = 0; r < m; ++r) {
2256     if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) {
2257       a[r] = diagA[r];
2258       if (idx) idx[r] = cstart + diagIdx[r];
2259     } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) {
2260       a[r] = diagA[r];
2261       if (idx) {
2262         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2263           idx[r] = cstart + diagIdx[r];
2264         } else idx[r] = offdiagIdx[r];
2265       }
2266     } else {
2267       a[r] = offdiagA[r];
2268       if (idx) idx[r] = offdiagIdx[r];
2269     }
2270   }
2271   PetscCall(MatSeqAIJRestoreArrayRead(B, &bav));
2272   PetscCall(VecRestoreArrayWrite(v, &a));
2273   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA));
2274   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
2275   PetscCall(VecDestroy(&diagV));
2276   PetscCall(VecDestroy(&offdiagV));
2277   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2278   PetscFunctionReturn(0);
2279 }
2280 
2281 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) {
2282   Mat_MPIAIJ        *mat = (Mat_MPIAIJ *)A->data;
2283   PetscInt           m = A->rmap->n, n = A->cmap->n;
2284   PetscInt           cstart = A->cmap->rstart, cend = A->cmap->rend;
2285   PetscInt          *cmap = mat->garray;
2286   PetscInt          *diagIdx, *offdiagIdx;
2287   Vec                diagV, offdiagV;
2288   PetscScalar       *a, *diagA, *offdiagA;
2289   const PetscScalar *ba, *bav;
2290   PetscInt           r, j, col, ncols, *bi, *bj;
2291   Mat                B = mat->B;
2292   Mat_SeqAIJ        *b = (Mat_SeqAIJ *)B->data;
2293 
2294   PetscFunctionBegin;
2295   /* When a process holds entire A and other processes have no entry */
2296   if (A->cmap->N == n) {
2297     PetscCall(VecGetArrayWrite(v, &diagA));
2298     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV));
2299     PetscCall(MatGetRowMin(mat->A, diagV, idx));
2300     PetscCall(VecDestroy(&diagV));
2301     PetscCall(VecRestoreArrayWrite(v, &diagA));
2302     PetscFunctionReturn(0);
2303   } else if (n == 0) {
2304     if (m) {
2305       PetscCall(VecGetArrayWrite(v, &a));
2306       for (r = 0; r < m; r++) {
2307         a[r] = PETSC_MAX_REAL;
2308         if (idx) idx[r] = -1;
2309       }
2310       PetscCall(VecRestoreArrayWrite(v, &a));
2311     }
2312     PetscFunctionReturn(0);
2313   }
2314 
2315   PetscCall(PetscCalloc2(m, &diagIdx, m, &offdiagIdx));
2316   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2317   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2318   PetscCall(MatGetRowMin(mat->A, diagV, diagIdx));
2319 
2320   /* Get offdiagIdx[] for implicit 0.0 */
2321   PetscCall(MatSeqAIJGetArrayRead(B, &bav));
2322   ba = bav;
2323   bi = b->i;
2324   bj = b->j;
2325   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2326   for (r = 0; r < m; r++) {
2327     ncols = bi[r + 1] - bi[r];
2328     if (ncols == A->cmap->N - n) { /* Brow is dense */
2329       offdiagA[r]   = *ba;
2330       offdiagIdx[r] = cmap[0];
2331     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2332       offdiagA[r] = 0.0;
2333 
2334       /* Find first hole in the cmap */
2335       for (j = 0; j < ncols; j++) {
2336         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2337         if (col > j && j < cstart) {
2338           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2339           break;
2340         } else if (col > j + n && j >= cstart) {
2341           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2342           break;
2343         }
2344       }
2345       if (j == ncols && ncols < A->cmap->N - n) {
2346         /* a hole is outside compressed Bcols */
2347         if (ncols == 0) {
2348           if (cstart) {
2349             offdiagIdx[r] = 0;
2350           } else offdiagIdx[r] = cend;
2351         } else { /* ncols > 0 */
2352           offdiagIdx[r] = cmap[ncols - 1] + 1;
2353           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2354         }
2355       }
2356     }
2357 
2358     for (j = 0; j < ncols; j++) {
2359       if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) {
2360         offdiagA[r]   = *ba;
2361         offdiagIdx[r] = cmap[*bj];
2362       }
2363       ba++;
2364       bj++;
2365     }
2366   }
2367 
2368   PetscCall(VecGetArrayWrite(v, &a));
2369   PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA));
2370   for (r = 0; r < m; ++r) {
2371     if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) {
2372       a[r] = diagA[r];
2373       if (idx) idx[r] = cstart + diagIdx[r];
2374     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2375       a[r] = diagA[r];
2376       if (idx) {
2377         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2378           idx[r] = cstart + diagIdx[r];
2379         } else idx[r] = offdiagIdx[r];
2380       }
2381     } else {
2382       a[r] = offdiagA[r];
2383       if (idx) idx[r] = offdiagIdx[r];
2384     }
2385   }
2386   PetscCall(MatSeqAIJRestoreArrayRead(B, &bav));
2387   PetscCall(VecRestoreArrayWrite(v, &a));
2388   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA));
2389   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
2390   PetscCall(VecDestroy(&diagV));
2391   PetscCall(VecDestroy(&offdiagV));
2392   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2393   PetscFunctionReturn(0);
2394 }
2395 
2396 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) {
2397   Mat_MPIAIJ        *mat = (Mat_MPIAIJ *)A->data;
2398   PetscInt           m = A->rmap->n, n = A->cmap->n;
2399   PetscInt           cstart = A->cmap->rstart, cend = A->cmap->rend;
2400   PetscInt          *cmap = mat->garray;
2401   PetscInt          *diagIdx, *offdiagIdx;
2402   Vec                diagV, offdiagV;
2403   PetscScalar       *a, *diagA, *offdiagA;
2404   const PetscScalar *ba, *bav;
2405   PetscInt           r, j, col, ncols, *bi, *bj;
2406   Mat                B = mat->B;
2407   Mat_SeqAIJ        *b = (Mat_SeqAIJ *)B->data;
2408 
2409   PetscFunctionBegin;
2410   /* When a process holds entire A and other processes have no entry */
2411   if (A->cmap->N == n) {
2412     PetscCall(VecGetArrayWrite(v, &diagA));
2413     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV));
2414     PetscCall(MatGetRowMax(mat->A, diagV, idx));
2415     PetscCall(VecDestroy(&diagV));
2416     PetscCall(VecRestoreArrayWrite(v, &diagA));
2417     PetscFunctionReturn(0);
2418   } else if (n == 0) {
2419     if (m) {
2420       PetscCall(VecGetArrayWrite(v, &a));
2421       for (r = 0; r < m; r++) {
2422         a[r] = PETSC_MIN_REAL;
2423         if (idx) idx[r] = -1;
2424       }
2425       PetscCall(VecRestoreArrayWrite(v, &a));
2426     }
2427     PetscFunctionReturn(0);
2428   }
2429 
2430   PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx));
2431   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2432   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2433   PetscCall(MatGetRowMax(mat->A, diagV, diagIdx));
2434 
2435   /* Get offdiagIdx[] for implicit 0.0 */
2436   PetscCall(MatSeqAIJGetArrayRead(B, &bav));
2437   ba = bav;
2438   bi = b->i;
2439   bj = b->j;
2440   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2441   for (r = 0; r < m; r++) {
2442     ncols = bi[r + 1] - bi[r];
2443     if (ncols == A->cmap->N - n) { /* Brow is dense */
2444       offdiagA[r]   = *ba;
2445       offdiagIdx[r] = cmap[0];
2446     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2447       offdiagA[r] = 0.0;
2448 
2449       /* Find first hole in the cmap */
2450       for (j = 0; j < ncols; j++) {
2451         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2452         if (col > j && j < cstart) {
2453           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2454           break;
2455         } else if (col > j + n && j >= cstart) {
2456           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2457           break;
2458         }
2459       }
2460       if (j == ncols && ncols < A->cmap->N - n) {
2461         /* a hole is outside compressed Bcols */
2462         if (ncols == 0) {
2463           if (cstart) {
2464             offdiagIdx[r] = 0;
2465           } else offdiagIdx[r] = cend;
2466         } else { /* ncols > 0 */
2467           offdiagIdx[r] = cmap[ncols - 1] + 1;
2468           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2469         }
2470       }
2471     }
2472 
2473     for (j = 0; j < ncols; j++) {
2474       if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) {
2475         offdiagA[r]   = *ba;
2476         offdiagIdx[r] = cmap[*bj];
2477       }
2478       ba++;
2479       bj++;
2480     }
2481   }
2482 
2483   PetscCall(VecGetArrayWrite(v, &a));
2484   PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA));
2485   for (r = 0; r < m; ++r) {
2486     if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) {
2487       a[r] = diagA[r];
2488       if (idx) idx[r] = cstart + diagIdx[r];
2489     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2490       a[r] = diagA[r];
2491       if (idx) {
2492         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2493           idx[r] = cstart + diagIdx[r];
2494         } else idx[r] = offdiagIdx[r];
2495       }
2496     } else {
2497       a[r] = offdiagA[r];
2498       if (idx) idx[r] = offdiagIdx[r];
2499     }
2500   }
2501   PetscCall(MatSeqAIJRestoreArrayRead(B, &bav));
2502   PetscCall(VecRestoreArrayWrite(v, &a));
2503   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA));
2504   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
2505   PetscCall(VecDestroy(&diagV));
2506   PetscCall(VecDestroy(&offdiagV));
2507   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2508   PetscFunctionReturn(0);
2509 }
2510 
2511 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat, Mat *newmat) {
2512   Mat *dummy;
2513 
2514   PetscFunctionBegin;
2515   PetscCall(MatCreateSubMatrix_MPIAIJ_All(mat, MAT_DO_NOT_GET_VALUES, MAT_INITIAL_MATRIX, &dummy));
2516   *newmat = *dummy;
2517   PetscCall(PetscFree(dummy));
2518   PetscFunctionReturn(0);
2519 }
2520 
2521 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A, const PetscScalar **values) {
2522   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2523 
2524   PetscFunctionBegin;
2525   PetscCall(MatInvertBlockDiagonal(a->A, values));
2526   A->factorerrortype = a->A->factorerrortype;
2527   PetscFunctionReturn(0);
2528 }
2529 
2530 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x, PetscRandom rctx) {
2531   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)x->data;
2532 
2533   PetscFunctionBegin;
2534   PetscCheck(x->assembled || x->preallocated, PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed");
2535   PetscCall(MatSetRandom(aij->A, rctx));
2536   if (x->assembled) {
2537     PetscCall(MatSetRandom(aij->B, rctx));
2538   } else {
2539     PetscCall(MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B, x->cmap->rstart, x->cmap->rend, rctx));
2540   }
2541   PetscCall(MatAssemblyBegin(x, MAT_FINAL_ASSEMBLY));
2542   PetscCall(MatAssemblyEnd(x, MAT_FINAL_ASSEMBLY));
2543   PetscFunctionReturn(0);
2544 }
2545 
2546 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A, PetscBool sc) {
2547   PetscFunctionBegin;
2548   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2549   else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ;
2550   PetscFunctionReturn(0);
2551 }
2552 
2553 /*@
2554    MatMPIAIJGetNumberNonzeros - gets the number of nonzeros in the matrix on this MPI rank
2555 
2556    Not collective
2557 
2558    Input Parameter:
2559 .    A - the matrix
2560 
2561    Output Parameter:
2562 .    nz - the number of nonzeros
2563 
2564  Level: advanced
2565 
2566 @*/
2567 PetscErrorCode MatMPIAIJGetNumberNonzeros(Mat A, PetscCount *nz) {
2568   Mat_MPIAIJ *maij = (Mat_MPIAIJ *)A->data;
2569   Mat_SeqAIJ *aaij = (Mat_SeqAIJ *)maij->A->data, *baij = (Mat_SeqAIJ *)maij->B->data;
2570 
2571   PetscFunctionBegin;
2572   *nz = aaij->i[A->rmap->n] + baij->i[A->rmap->n];
2573   PetscFunctionReturn(0);
2574 }
2575 
2576 /*@
2577    MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2578 
2579    Collective on Mat
2580 
2581    Input Parameters:
2582 +    A - the matrix
2583 -    sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm)
2584 
2585  Level: advanced
2586 
2587 @*/
2588 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A, PetscBool sc) {
2589   PetscFunctionBegin;
2590   PetscTryMethod(A, "MatMPIAIJSetUseScalableIncreaseOverlap_C", (Mat, PetscBool), (A, sc));
2591   PetscFunctionReturn(0);
2592 }
2593 
2594 PetscErrorCode MatSetFromOptions_MPIAIJ(Mat A, PetscOptionItems *PetscOptionsObject) {
2595   PetscBool sc = PETSC_FALSE, flg;
2596 
2597   PetscFunctionBegin;
2598   PetscOptionsHeadBegin(PetscOptionsObject, "MPIAIJ options");
2599   if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2600   PetscCall(PetscOptionsBool("-mat_increase_overlap_scalable", "Use a scalable algorithm to compute the overlap", "MatIncreaseOverlap", sc, &sc, &flg));
2601   if (flg) PetscCall(MatMPIAIJSetUseScalableIncreaseOverlap(A, sc));
2602   PetscOptionsHeadEnd();
2603   PetscFunctionReturn(0);
2604 }
2605 
2606 PetscErrorCode MatShift_MPIAIJ(Mat Y, PetscScalar a) {
2607   Mat_MPIAIJ *maij = (Mat_MPIAIJ *)Y->data;
2608   Mat_SeqAIJ *aij  = (Mat_SeqAIJ *)maij->A->data;
2609 
2610   PetscFunctionBegin;
2611   if (!Y->preallocated) {
2612     PetscCall(MatMPIAIJSetPreallocation(Y, 1, NULL, 0, NULL));
2613   } else if (!aij->nz) { /* It does not matter if diagonals of Y only partially lie in maij->A. We just need an estimated preallocation. */
2614     PetscInt nonew = aij->nonew;
2615     PetscCall(MatSeqAIJSetPreallocation(maij->A, 1, NULL));
2616     aij->nonew = nonew;
2617   }
2618   PetscCall(MatShift_Basic(Y, a));
2619   PetscFunctionReturn(0);
2620 }
2621 
2622 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A, PetscBool *missing, PetscInt *d) {
2623   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2624 
2625   PetscFunctionBegin;
2626   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices");
2627   PetscCall(MatMissingDiagonal(a->A, missing, d));
2628   if (d) {
2629     PetscInt rstart;
2630     PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
2631     *d += rstart;
2632   }
2633   PetscFunctionReturn(0);
2634 }
2635 
2636 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A, PetscInt nblocks, const PetscInt *bsizes, PetscScalar *diag) {
2637   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2638 
2639   PetscFunctionBegin;
2640   PetscCall(MatInvertVariableBlockDiagonal(a->A, nblocks, bsizes, diag));
2641   PetscFunctionReturn(0);
2642 }
2643 
2644 /* -------------------------------------------------------------------*/
2645 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2646                                        MatGetRow_MPIAIJ,
2647                                        MatRestoreRow_MPIAIJ,
2648                                        MatMult_MPIAIJ,
2649                                        /* 4*/ MatMultAdd_MPIAIJ,
2650                                        MatMultTranspose_MPIAIJ,
2651                                        MatMultTransposeAdd_MPIAIJ,
2652                                        NULL,
2653                                        NULL,
2654                                        NULL,
2655                                        /*10*/ NULL,
2656                                        NULL,
2657                                        NULL,
2658                                        MatSOR_MPIAIJ,
2659                                        MatTranspose_MPIAIJ,
2660                                        /*15*/ MatGetInfo_MPIAIJ,
2661                                        MatEqual_MPIAIJ,
2662                                        MatGetDiagonal_MPIAIJ,
2663                                        MatDiagonalScale_MPIAIJ,
2664                                        MatNorm_MPIAIJ,
2665                                        /*20*/ MatAssemblyBegin_MPIAIJ,
2666                                        MatAssemblyEnd_MPIAIJ,
2667                                        MatSetOption_MPIAIJ,
2668                                        MatZeroEntries_MPIAIJ,
2669                                        /*24*/ MatZeroRows_MPIAIJ,
2670                                        NULL,
2671                                        NULL,
2672                                        NULL,
2673                                        NULL,
2674                                        /*29*/ MatSetUp_MPIAIJ,
2675                                        NULL,
2676                                        NULL,
2677                                        MatGetDiagonalBlock_MPIAIJ,
2678                                        NULL,
2679                                        /*34*/ MatDuplicate_MPIAIJ,
2680                                        NULL,
2681                                        NULL,
2682                                        NULL,
2683                                        NULL,
2684                                        /*39*/ MatAXPY_MPIAIJ,
2685                                        MatCreateSubMatrices_MPIAIJ,
2686                                        MatIncreaseOverlap_MPIAIJ,
2687                                        MatGetValues_MPIAIJ,
2688                                        MatCopy_MPIAIJ,
2689                                        /*44*/ MatGetRowMax_MPIAIJ,
2690                                        MatScale_MPIAIJ,
2691                                        MatShift_MPIAIJ,
2692                                        MatDiagonalSet_MPIAIJ,
2693                                        MatZeroRowsColumns_MPIAIJ,
2694                                        /*49*/ MatSetRandom_MPIAIJ,
2695                                        MatGetRowIJ_MPIAIJ,
2696                                        MatRestoreRowIJ_MPIAIJ,
2697                                        NULL,
2698                                        NULL,
2699                                        /*54*/ MatFDColoringCreate_MPIXAIJ,
2700                                        NULL,
2701                                        MatSetUnfactored_MPIAIJ,
2702                                        MatPermute_MPIAIJ,
2703                                        NULL,
2704                                        /*59*/ MatCreateSubMatrix_MPIAIJ,
2705                                        MatDestroy_MPIAIJ,
2706                                        MatView_MPIAIJ,
2707                                        NULL,
2708                                        NULL,
2709                                        /*64*/ NULL,
2710                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2711                                        NULL,
2712                                        NULL,
2713                                        NULL,
2714                                        /*69*/ MatGetRowMaxAbs_MPIAIJ,
2715                                        MatGetRowMinAbs_MPIAIJ,
2716                                        NULL,
2717                                        NULL,
2718                                        NULL,
2719                                        NULL,
2720                                        /*75*/ MatFDColoringApply_AIJ,
2721                                        MatSetFromOptions_MPIAIJ,
2722                                        NULL,
2723                                        NULL,
2724                                        MatFindZeroDiagonals_MPIAIJ,
2725                                        /*80*/ NULL,
2726                                        NULL,
2727                                        NULL,
2728                                        /*83*/ MatLoad_MPIAIJ,
2729                                        MatIsSymmetric_MPIAIJ,
2730                                        NULL,
2731                                        NULL,
2732                                        NULL,
2733                                        NULL,
2734                                        /*89*/ NULL,
2735                                        NULL,
2736                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2737                                        NULL,
2738                                        NULL,
2739                                        /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2740                                        NULL,
2741                                        NULL,
2742                                        NULL,
2743                                        MatBindToCPU_MPIAIJ,
2744                                        /*99*/ MatProductSetFromOptions_MPIAIJ,
2745                                        NULL,
2746                                        NULL,
2747                                        MatConjugate_MPIAIJ,
2748                                        NULL,
2749                                        /*104*/ MatSetValuesRow_MPIAIJ,
2750                                        MatRealPart_MPIAIJ,
2751                                        MatImaginaryPart_MPIAIJ,
2752                                        NULL,
2753                                        NULL,
2754                                        /*109*/ NULL,
2755                                        NULL,
2756                                        MatGetRowMin_MPIAIJ,
2757                                        NULL,
2758                                        MatMissingDiagonal_MPIAIJ,
2759                                        /*114*/ MatGetSeqNonzeroStructure_MPIAIJ,
2760                                        NULL,
2761                                        MatGetGhosts_MPIAIJ,
2762                                        NULL,
2763                                        NULL,
2764                                        /*119*/ MatMultDiagonalBlock_MPIAIJ,
2765                                        NULL,
2766                                        NULL,
2767                                        NULL,
2768                                        MatGetMultiProcBlock_MPIAIJ,
2769                                        /*124*/ MatFindNonzeroRows_MPIAIJ,
2770                                        MatGetColumnReductions_MPIAIJ,
2771                                        MatInvertBlockDiagonal_MPIAIJ,
2772                                        MatInvertVariableBlockDiagonal_MPIAIJ,
2773                                        MatCreateSubMatricesMPI_MPIAIJ,
2774                                        /*129*/ NULL,
2775                                        NULL,
2776                                        NULL,
2777                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2778                                        NULL,
2779                                        /*134*/ NULL,
2780                                        NULL,
2781                                        NULL,
2782                                        NULL,
2783                                        NULL,
2784                                        /*139*/ MatSetBlockSizes_MPIAIJ,
2785                                        NULL,
2786                                        NULL,
2787                                        MatFDColoringSetUp_MPIXAIJ,
2788                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2789                                        MatCreateMPIMatConcatenateSeqMat_MPIAIJ,
2790                                        /*145*/ NULL,
2791                                        NULL,
2792                                        NULL,
2793                                        MatCreateGraph_Simple_AIJ,
2794                                        MatFilter_AIJ,
2795                                        /*150*/ NULL};
2796 
2797 /* ----------------------------------------------------------------------------------------*/
2798 
2799 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) {
2800   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
2801 
2802   PetscFunctionBegin;
2803   PetscCall(MatStoreValues(aij->A));
2804   PetscCall(MatStoreValues(aij->B));
2805   PetscFunctionReturn(0);
2806 }
2807 
2808 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) {
2809   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
2810 
2811   PetscFunctionBegin;
2812   PetscCall(MatRetrieveValues(aij->A));
2813   PetscCall(MatRetrieveValues(aij->B));
2814   PetscFunctionReturn(0);
2815 }
2816 
2817 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) {
2818   Mat_MPIAIJ *b;
2819   PetscMPIInt size;
2820 
2821   PetscFunctionBegin;
2822   PetscCall(PetscLayoutSetUp(B->rmap));
2823   PetscCall(PetscLayoutSetUp(B->cmap));
2824   b = (Mat_MPIAIJ *)B->data;
2825 
2826 #if defined(PETSC_USE_CTABLE)
2827   PetscCall(PetscTableDestroy(&b->colmap));
2828 #else
2829   PetscCall(PetscFree(b->colmap));
2830 #endif
2831   PetscCall(PetscFree(b->garray));
2832   PetscCall(VecDestroy(&b->lvec));
2833   PetscCall(VecScatterDestroy(&b->Mvctx));
2834 
2835   /* Because the B will have been resized we simply destroy it and create a new one each time */
2836   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
2837   PetscCall(MatDestroy(&b->B));
2838   PetscCall(MatCreate(PETSC_COMM_SELF, &b->B));
2839   PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0));
2840   PetscCall(MatSetBlockSizesFromMats(b->B, B, B));
2841   PetscCall(MatSetType(b->B, MATSEQAIJ));
2842   PetscCall(PetscLogObjectParent((PetscObject)B, (PetscObject)b->B));
2843 
2844   if (!B->preallocated) {
2845     PetscCall(MatCreate(PETSC_COMM_SELF, &b->A));
2846     PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n));
2847     PetscCall(MatSetBlockSizesFromMats(b->A, B, B));
2848     PetscCall(MatSetType(b->A, MATSEQAIJ));
2849     PetscCall(PetscLogObjectParent((PetscObject)B, (PetscObject)b->A));
2850   }
2851 
2852   PetscCall(MatSeqAIJSetPreallocation(b->A, d_nz, d_nnz));
2853   PetscCall(MatSeqAIJSetPreallocation(b->B, o_nz, o_nnz));
2854   B->preallocated  = PETSC_TRUE;
2855   B->was_assembled = PETSC_FALSE;
2856   B->assembled     = PETSC_FALSE;
2857   PetscFunctionReturn(0);
2858 }
2859 
2860 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B) {
2861   Mat_MPIAIJ *b;
2862 
2863   PetscFunctionBegin;
2864   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
2865   PetscCall(PetscLayoutSetUp(B->rmap));
2866   PetscCall(PetscLayoutSetUp(B->cmap));
2867   b = (Mat_MPIAIJ *)B->data;
2868 
2869 #if defined(PETSC_USE_CTABLE)
2870   PetscCall(PetscTableDestroy(&b->colmap));
2871 #else
2872   PetscCall(PetscFree(b->colmap));
2873 #endif
2874   PetscCall(PetscFree(b->garray));
2875   PetscCall(VecDestroy(&b->lvec));
2876   PetscCall(VecScatterDestroy(&b->Mvctx));
2877 
2878   PetscCall(MatResetPreallocation(b->A));
2879   PetscCall(MatResetPreallocation(b->B));
2880   B->preallocated  = PETSC_TRUE;
2881   B->was_assembled = PETSC_FALSE;
2882   B->assembled     = PETSC_FALSE;
2883   PetscFunctionReturn(0);
2884 }
2885 
2886 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat) {
2887   Mat         mat;
2888   Mat_MPIAIJ *a, *oldmat = (Mat_MPIAIJ *)matin->data;
2889 
2890   PetscFunctionBegin;
2891   *newmat = NULL;
2892   PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat));
2893   PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N));
2894   PetscCall(MatSetBlockSizesFromMats(mat, matin, matin));
2895   PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name));
2896   a = (Mat_MPIAIJ *)mat->data;
2897 
2898   mat->factortype   = matin->factortype;
2899   mat->assembled    = matin->assembled;
2900   mat->insertmode   = NOT_SET_VALUES;
2901   mat->preallocated = matin->preallocated;
2902 
2903   a->size         = oldmat->size;
2904   a->rank         = oldmat->rank;
2905   a->donotstash   = oldmat->donotstash;
2906   a->roworiented  = oldmat->roworiented;
2907   a->rowindices   = NULL;
2908   a->rowvalues    = NULL;
2909   a->getrowactive = PETSC_FALSE;
2910 
2911   PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap));
2912   PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap));
2913 
2914   if (oldmat->colmap) {
2915 #if defined(PETSC_USE_CTABLE)
2916     PetscCall(PetscTableCreateCopy(oldmat->colmap, &a->colmap));
2917 #else
2918     PetscCall(PetscMalloc1(mat->cmap->N, &a->colmap));
2919     PetscCall(PetscLogObjectMemory((PetscObject)mat, (mat->cmap->N) * sizeof(PetscInt)));
2920     PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, mat->cmap->N));
2921 #endif
2922   } else a->colmap = NULL;
2923   if (oldmat->garray) {
2924     PetscInt len;
2925     len = oldmat->B->cmap->n;
2926     PetscCall(PetscMalloc1(len + 1, &a->garray));
2927     PetscCall(PetscLogObjectMemory((PetscObject)mat, len * sizeof(PetscInt)));
2928     if (len) PetscCall(PetscArraycpy(a->garray, oldmat->garray, len));
2929   } else a->garray = NULL;
2930 
2931   /* It may happen MatDuplicate is called with a non-assembled matrix
2932      In fact, MatDuplicate only requires the matrix to be preallocated
2933      This may happen inside a DMCreateMatrix_Shell */
2934   if (oldmat->lvec) {
2935     PetscCall(VecDuplicate(oldmat->lvec, &a->lvec));
2936     PetscCall(PetscLogObjectParent((PetscObject)mat, (PetscObject)a->lvec));
2937   }
2938   if (oldmat->Mvctx) {
2939     PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx));
2940     PetscCall(PetscLogObjectParent((PetscObject)mat, (PetscObject)a->Mvctx));
2941   }
2942   PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A));
2943   PetscCall(PetscLogObjectParent((PetscObject)mat, (PetscObject)a->A));
2944   PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B));
2945   PetscCall(PetscLogObjectParent((PetscObject)mat, (PetscObject)a->B));
2946   PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist));
2947   *newmat = mat;
2948   PetscFunctionReturn(0);
2949 }
2950 
2951 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) {
2952   PetscBool isbinary, ishdf5;
2953 
2954   PetscFunctionBegin;
2955   PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1);
2956   PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2);
2957   /* force binary viewer to load .info file if it has not yet done so */
2958   PetscCall(PetscViewerSetUp(viewer));
2959   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
2960   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5));
2961   if (isbinary) {
2962     PetscCall(MatLoad_MPIAIJ_Binary(newMat, viewer));
2963   } else if (ishdf5) {
2964 #if defined(PETSC_HAVE_HDF5)
2965     PetscCall(MatLoad_AIJ_HDF5(newMat, viewer));
2966 #else
2967     SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5");
2968 #endif
2969   } else {
2970     SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name);
2971   }
2972   PetscFunctionReturn(0);
2973 }
2974 
2975 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer) {
2976   PetscInt     header[4], M, N, m, nz, rows, cols, sum, i;
2977   PetscInt    *rowidxs, *colidxs;
2978   PetscScalar *matvals;
2979 
2980   PetscFunctionBegin;
2981   PetscCall(PetscViewerSetUp(viewer));
2982 
2983   /* read in matrix header */
2984   PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT));
2985   PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file");
2986   M  = header[1];
2987   N  = header[2];
2988   nz = header[3];
2989   PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M);
2990   PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N);
2991   PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as MPIAIJ");
2992 
2993   /* set block sizes from the viewer's .info file */
2994   PetscCall(MatLoad_Binary_BlockSizes(mat, viewer));
2995   /* set global sizes if not set already */
2996   if (mat->rmap->N < 0) mat->rmap->N = M;
2997   if (mat->cmap->N < 0) mat->cmap->N = N;
2998   PetscCall(PetscLayoutSetUp(mat->rmap));
2999   PetscCall(PetscLayoutSetUp(mat->cmap));
3000 
3001   /* check if the matrix sizes are correct */
3002   PetscCall(MatGetSize(mat, &rows, &cols));
3003   PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols);
3004 
3005   /* read in row lengths and build row indices */
3006   PetscCall(MatGetLocalSize(mat, &m, NULL));
3007   PetscCall(PetscMalloc1(m + 1, &rowidxs));
3008   PetscCall(PetscViewerBinaryReadAll(viewer, rowidxs + 1, m, PETSC_DECIDE, M, PETSC_INT));
3009   rowidxs[0] = 0;
3010   for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i];
3011   PetscCall(MPIU_Allreduce(&rowidxs[m], &sum, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)viewer)));
3012   PetscCheck(sum == nz, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum);
3013   /* read in column indices and matrix values */
3014   PetscCall(PetscMalloc2(rowidxs[m], &colidxs, rowidxs[m], &matvals));
3015   PetscCall(PetscViewerBinaryReadAll(viewer, colidxs, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT));
3016   PetscCall(PetscViewerBinaryReadAll(viewer, matvals, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR));
3017   /* store matrix indices and values */
3018   PetscCall(MatMPIAIJSetPreallocationCSR(mat, rowidxs, colidxs, matvals));
3019   PetscCall(PetscFree(rowidxs));
3020   PetscCall(PetscFree2(colidxs, matvals));
3021   PetscFunctionReturn(0);
3022 }
3023 
3024 /* Not scalable because of ISAllGather() unless getting all columns. */
3025 PetscErrorCode ISGetSeqIS_Private(Mat mat, IS iscol, IS *isseq) {
3026   IS          iscol_local;
3027   PetscBool   isstride;
3028   PetscMPIInt lisstride = 0, gisstride;
3029 
3030   PetscFunctionBegin;
3031   /* check if we are grabbing all columns*/
3032   PetscCall(PetscObjectTypeCompare((PetscObject)iscol, ISSTRIDE, &isstride));
3033 
3034   if (isstride) {
3035     PetscInt start, len, mstart, mlen;
3036     PetscCall(ISStrideGetInfo(iscol, &start, NULL));
3037     PetscCall(ISGetLocalSize(iscol, &len));
3038     PetscCall(MatGetOwnershipRangeColumn(mat, &mstart, &mlen));
3039     if (mstart == start && mlen - mstart == len) lisstride = 1;
3040   }
3041 
3042   PetscCall(MPIU_Allreduce(&lisstride, &gisstride, 1, MPI_INT, MPI_MIN, PetscObjectComm((PetscObject)mat)));
3043   if (gisstride) {
3044     PetscInt N;
3045     PetscCall(MatGetSize(mat, NULL, &N));
3046     PetscCall(ISCreateStride(PETSC_COMM_SELF, N, 0, 1, &iscol_local));
3047     PetscCall(ISSetIdentity(iscol_local));
3048     PetscCall(PetscInfo(mat, "Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n"));
3049   } else {
3050     PetscInt cbs;
3051     PetscCall(ISGetBlockSize(iscol, &cbs));
3052     PetscCall(ISAllGather(iscol, &iscol_local));
3053     PetscCall(ISSetBlockSize(iscol_local, cbs));
3054   }
3055 
3056   *isseq = iscol_local;
3057   PetscFunctionReturn(0);
3058 }
3059 
3060 /*
3061  Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local
3062  (see MatCreateSubMatrix_MPIAIJ_nonscalable)
3063 
3064  Input Parameters:
3065    mat - matrix
3066    isrow - parallel row index set; its local indices are a subset of local columns of mat,
3067            i.e., mat->rstart <= isrow[i] < mat->rend
3068    iscol - parallel column index set; its local indices are a subset of local columns of mat,
3069            i.e., mat->cstart <= iscol[i] < mat->cend
3070  Output Parameter:
3071    isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A
3072    iscol_o - sequential column index set for retrieving mat->B
3073    garray - column map; garray[i] indicates global location of iscol_o[i] in iscol
3074  */
3075 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat, IS isrow, IS iscol, IS *isrow_d, IS *iscol_d, IS *iscol_o, const PetscInt *garray[]) {
3076   Vec             x, cmap;
3077   const PetscInt *is_idx;
3078   PetscScalar    *xarray, *cmaparray;
3079   PetscInt        ncols, isstart, *idx, m, rstart, *cmap1, count;
3080   Mat_MPIAIJ     *a    = (Mat_MPIAIJ *)mat->data;
3081   Mat             B    = a->B;
3082   Vec             lvec = a->lvec, lcmap;
3083   PetscInt        i, cstart, cend, Bn = B->cmap->N;
3084   MPI_Comm        comm;
3085   VecScatter      Mvctx = a->Mvctx;
3086 
3087   PetscFunctionBegin;
3088   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3089   PetscCall(ISGetLocalSize(iscol, &ncols));
3090 
3091   /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */
3092   PetscCall(MatCreateVecs(mat, &x, NULL));
3093   PetscCall(VecSet(x, -1.0));
3094   PetscCall(VecDuplicate(x, &cmap));
3095   PetscCall(VecSet(cmap, -1.0));
3096 
3097   /* Get start indices */
3098   PetscCallMPI(MPI_Scan(&ncols, &isstart, 1, MPIU_INT, MPI_SUM, comm));
3099   isstart -= ncols;
3100   PetscCall(MatGetOwnershipRangeColumn(mat, &cstart, &cend));
3101 
3102   PetscCall(ISGetIndices(iscol, &is_idx));
3103   PetscCall(VecGetArray(x, &xarray));
3104   PetscCall(VecGetArray(cmap, &cmaparray));
3105   PetscCall(PetscMalloc1(ncols, &idx));
3106   for (i = 0; i < ncols; i++) {
3107     xarray[is_idx[i] - cstart]    = (PetscScalar)is_idx[i];
3108     cmaparray[is_idx[i] - cstart] = i + isstart;        /* global index of iscol[i] */
3109     idx[i]                        = is_idx[i] - cstart; /* local index of iscol[i]  */
3110   }
3111   PetscCall(VecRestoreArray(x, &xarray));
3112   PetscCall(VecRestoreArray(cmap, &cmaparray));
3113   PetscCall(ISRestoreIndices(iscol, &is_idx));
3114 
3115   /* Get iscol_d */
3116   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, iscol_d));
3117   PetscCall(ISGetBlockSize(iscol, &i));
3118   PetscCall(ISSetBlockSize(*iscol_d, i));
3119 
3120   /* Get isrow_d */
3121   PetscCall(ISGetLocalSize(isrow, &m));
3122   rstart = mat->rmap->rstart;
3123   PetscCall(PetscMalloc1(m, &idx));
3124   PetscCall(ISGetIndices(isrow, &is_idx));
3125   for (i = 0; i < m; i++) idx[i] = is_idx[i] - rstart;
3126   PetscCall(ISRestoreIndices(isrow, &is_idx));
3127 
3128   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, m, idx, PETSC_OWN_POINTER, isrow_d));
3129   PetscCall(ISGetBlockSize(isrow, &i));
3130   PetscCall(ISSetBlockSize(*isrow_d, i));
3131 
3132   /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */
3133   PetscCall(VecScatterBegin(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD));
3134   PetscCall(VecScatterEnd(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD));
3135 
3136   PetscCall(VecDuplicate(lvec, &lcmap));
3137 
3138   PetscCall(VecScatterBegin(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD));
3139   PetscCall(VecScatterEnd(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD));
3140 
3141   /* (3) create sequential iscol_o (a subset of iscol) and isgarray */
3142   /* off-process column indices */
3143   count = 0;
3144   PetscCall(PetscMalloc1(Bn, &idx));
3145   PetscCall(PetscMalloc1(Bn, &cmap1));
3146 
3147   PetscCall(VecGetArray(lvec, &xarray));
3148   PetscCall(VecGetArray(lcmap, &cmaparray));
3149   for (i = 0; i < Bn; i++) {
3150     if (PetscRealPart(xarray[i]) > -1.0) {
3151       idx[count]   = i;                                     /* local column index in off-diagonal part B */
3152       cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */
3153       count++;
3154     }
3155   }
3156   PetscCall(VecRestoreArray(lvec, &xarray));
3157   PetscCall(VecRestoreArray(lcmap, &cmaparray));
3158 
3159   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_COPY_VALUES, iscol_o));
3160   /* cannot ensure iscol_o has same blocksize as iscol! */
3161 
3162   PetscCall(PetscFree(idx));
3163   *garray = cmap1;
3164 
3165   PetscCall(VecDestroy(&x));
3166   PetscCall(VecDestroy(&cmap));
3167   PetscCall(VecDestroy(&lcmap));
3168   PetscFunctionReturn(0);
3169 }
3170 
3171 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */
3172 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *submat) {
3173   Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data, *asub;
3174   Mat         M = NULL;
3175   MPI_Comm    comm;
3176   IS          iscol_d, isrow_d, iscol_o;
3177   Mat         Asub = NULL, Bsub = NULL;
3178   PetscInt    n;
3179 
3180   PetscFunctionBegin;
3181   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3182 
3183   if (call == MAT_REUSE_MATRIX) {
3184     /* Retrieve isrow_d, iscol_d and iscol_o from submat */
3185     PetscCall(PetscObjectQuery((PetscObject)*submat, "isrow_d", (PetscObject *)&isrow_d));
3186     PetscCheck(isrow_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "isrow_d passed in was not used before, cannot reuse");
3187 
3188     PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_d", (PetscObject *)&iscol_d));
3189     PetscCheck(iscol_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_d passed in was not used before, cannot reuse");
3190 
3191     PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_o", (PetscObject *)&iscol_o));
3192     PetscCheck(iscol_o, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_o passed in was not used before, cannot reuse");
3193 
3194     /* Update diagonal and off-diagonal portions of submat */
3195     asub = (Mat_MPIAIJ *)(*submat)->data;
3196     PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->A));
3197     PetscCall(ISGetLocalSize(iscol_o, &n));
3198     if (n) { PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->B)); }
3199     PetscCall(MatAssemblyBegin(*submat, MAT_FINAL_ASSEMBLY));
3200     PetscCall(MatAssemblyEnd(*submat, MAT_FINAL_ASSEMBLY));
3201 
3202   } else { /* call == MAT_INITIAL_MATRIX) */
3203     const PetscInt *garray;
3204     PetscInt        BsubN;
3205 
3206     /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */
3207     PetscCall(ISGetSeqIS_SameColDist_Private(mat, isrow, iscol, &isrow_d, &iscol_d, &iscol_o, &garray));
3208 
3209     /* Create local submatrices Asub and Bsub */
3210     PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Asub));
3211     PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Bsub));
3212 
3213     /* Create submatrix M */
3214     PetscCall(MatCreateMPIAIJWithSeqAIJ(comm, Asub, Bsub, garray, &M));
3215 
3216     /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */
3217     asub = (Mat_MPIAIJ *)M->data;
3218 
3219     PetscCall(ISGetLocalSize(iscol_o, &BsubN));
3220     n = asub->B->cmap->N;
3221     if (BsubN > n) {
3222       /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */
3223       const PetscInt *idx;
3224       PetscInt        i, j, *idx_new, *subgarray = asub->garray;
3225       PetscCall(PetscInfo(M, "submatrix Bn %" PetscInt_FMT " != BsubN %" PetscInt_FMT ", update iscol_o\n", n, BsubN));
3226 
3227       PetscCall(PetscMalloc1(n, &idx_new));
3228       j = 0;
3229       PetscCall(ISGetIndices(iscol_o, &idx));
3230       for (i = 0; i < n; i++) {
3231         if (j >= BsubN) break;
3232         while (subgarray[i] > garray[j]) j++;
3233 
3234         if (subgarray[i] == garray[j]) {
3235           idx_new[i] = idx[j++];
3236         } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "subgarray[%" PetscInt_FMT "]=%" PetscInt_FMT " cannot < garray[%" PetscInt_FMT "]=%" PetscInt_FMT, i, subgarray[i], j, garray[j]);
3237       }
3238       PetscCall(ISRestoreIndices(iscol_o, &idx));
3239 
3240       PetscCall(ISDestroy(&iscol_o));
3241       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx_new, PETSC_OWN_POINTER, &iscol_o));
3242 
3243     } else if (BsubN < n) {
3244       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Columns of Bsub (%" PetscInt_FMT ") cannot be smaller than B's (%" PetscInt_FMT ")", BsubN, asub->B->cmap->N);
3245     }
3246 
3247     PetscCall(PetscFree(garray));
3248     *submat = M;
3249 
3250     /* Save isrow_d, iscol_d and iscol_o used in processor for next request */
3251     PetscCall(PetscObjectCompose((PetscObject)M, "isrow_d", (PetscObject)isrow_d));
3252     PetscCall(ISDestroy(&isrow_d));
3253 
3254     PetscCall(PetscObjectCompose((PetscObject)M, "iscol_d", (PetscObject)iscol_d));
3255     PetscCall(ISDestroy(&iscol_d));
3256 
3257     PetscCall(PetscObjectCompose((PetscObject)M, "iscol_o", (PetscObject)iscol_o));
3258     PetscCall(ISDestroy(&iscol_o));
3259   }
3260   PetscFunctionReturn(0);
3261 }
3262 
3263 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat) {
3264   IS        iscol_local = NULL, isrow_d;
3265   PetscInt  csize;
3266   PetscInt  n, i, j, start, end;
3267   PetscBool sameRowDist = PETSC_FALSE, sameDist[2], tsameDist[2];
3268   MPI_Comm  comm;
3269 
3270   PetscFunctionBegin;
3271   /* If isrow has same processor distribution as mat,
3272      call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */
3273   if (call == MAT_REUSE_MATRIX) {
3274     PetscCall(PetscObjectQuery((PetscObject)*newmat, "isrow_d", (PetscObject *)&isrow_d));
3275     if (isrow_d) {
3276       sameRowDist  = PETSC_TRUE;
3277       tsameDist[1] = PETSC_TRUE; /* sameColDist */
3278     } else {
3279       PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_local));
3280       if (iscol_local) {
3281         sameRowDist  = PETSC_TRUE;
3282         tsameDist[1] = PETSC_FALSE; /* !sameColDist */
3283       }
3284     }
3285   } else {
3286     /* Check if isrow has same processor distribution as mat */
3287     sameDist[0] = PETSC_FALSE;
3288     PetscCall(ISGetLocalSize(isrow, &n));
3289     if (!n) {
3290       sameDist[0] = PETSC_TRUE;
3291     } else {
3292       PetscCall(ISGetMinMax(isrow, &i, &j));
3293       PetscCall(MatGetOwnershipRange(mat, &start, &end));
3294       if (i >= start && j < end) { sameDist[0] = PETSC_TRUE; }
3295     }
3296 
3297     /* Check if iscol has same processor distribution as mat */
3298     sameDist[1] = PETSC_FALSE;
3299     PetscCall(ISGetLocalSize(iscol, &n));
3300     if (!n) {
3301       sameDist[1] = PETSC_TRUE;
3302     } else {
3303       PetscCall(ISGetMinMax(iscol, &i, &j));
3304       PetscCall(MatGetOwnershipRangeColumn(mat, &start, &end));
3305       if (i >= start && j < end) sameDist[1] = PETSC_TRUE;
3306     }
3307 
3308     PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3309     PetscCall(MPIU_Allreduce(&sameDist, &tsameDist, 2, MPIU_BOOL, MPI_LAND, comm));
3310     sameRowDist = tsameDist[0];
3311   }
3312 
3313   if (sameRowDist) {
3314     if (tsameDist[1]) { /* sameRowDist & sameColDist */
3315       /* isrow and iscol have same processor distribution as mat */
3316       PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat, isrow, iscol, call, newmat));
3317       PetscFunctionReturn(0);
3318     } else { /* sameRowDist */
3319       /* isrow has same processor distribution as mat */
3320       if (call == MAT_INITIAL_MATRIX) {
3321         PetscBool sorted;
3322         PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local));
3323         PetscCall(ISGetLocalSize(iscol_local, &n)); /* local size of iscol_local = global columns of newmat */
3324         PetscCall(ISGetSize(iscol, &i));
3325         PetscCheck(n == i, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "n %" PetscInt_FMT " != size of iscol %" PetscInt_FMT, n, i);
3326 
3327         PetscCall(ISSorted(iscol_local, &sorted));
3328         if (sorted) {
3329           /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */
3330           PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, iscol_local, MAT_INITIAL_MATRIX, newmat));
3331           PetscFunctionReturn(0);
3332         }
3333       } else { /* call == MAT_REUSE_MATRIX */
3334         IS iscol_sub;
3335         PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub));
3336         if (iscol_sub) {
3337           PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, NULL, call, newmat));
3338           PetscFunctionReturn(0);
3339         }
3340       }
3341     }
3342   }
3343 
3344   /* General case: iscol -> iscol_local which has global size of iscol */
3345   if (call == MAT_REUSE_MATRIX) {
3346     PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local));
3347     PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
3348   } else {
3349     if (!iscol_local) { PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local)); }
3350   }
3351 
3352   PetscCall(ISGetLocalSize(iscol, &csize));
3353   PetscCall(MatCreateSubMatrix_MPIAIJ_nonscalable(mat, isrow, iscol_local, csize, call, newmat));
3354 
3355   if (call == MAT_INITIAL_MATRIX) {
3356     PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local));
3357     PetscCall(ISDestroy(&iscol_local));
3358   }
3359   PetscFunctionReturn(0);
3360 }
3361 
3362 /*@C
3363      MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal"
3364          and "off-diagonal" part of the matrix in CSR format.
3365 
3366    Collective
3367 
3368    Input Parameters:
3369 +  comm - MPI communicator
3370 .  A - "diagonal" portion of matrix
3371 .  B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine
3372 -  garray - global index of B columns
3373 
3374    Output Parameter:
3375 .   mat - the matrix, with input A as its local diagonal matrix
3376    Level: advanced
3377 
3378    Notes:
3379        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix.
3380        A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore.
3381 
3382 .seealso: `MatCreateMPIAIJWithSplitArrays()`
3383 @*/
3384 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm, Mat A, Mat B, const PetscInt garray[], Mat *mat) {
3385   Mat_MPIAIJ        *maij;
3386   Mat_SeqAIJ        *b  = (Mat_SeqAIJ *)B->data, *bnew;
3387   PetscInt          *oi = b->i, *oj = b->j, i, nz, col;
3388   const PetscScalar *oa;
3389   Mat                Bnew;
3390   PetscInt           m, n, N;
3391   MatType            mpi_mat_type;
3392 
3393   PetscFunctionBegin;
3394   PetscCall(MatCreate(comm, mat));
3395   PetscCall(MatGetSize(A, &m, &n));
3396   PetscCheck(m == B->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Am %" PetscInt_FMT " != Bm %" PetscInt_FMT, m, B->rmap->N);
3397   PetscCheck(A->rmap->bs == B->rmap->bs, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "A row bs %" PetscInt_FMT " != B row bs %" PetscInt_FMT, A->rmap->bs, B->rmap->bs);
3398   /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */
3399   /* PetscCheck(A->cmap->bs == B->cmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %" PetscInt_FMT " != B column bs %" PetscInt_FMT,A->cmap->bs,B->cmap->bs); */
3400 
3401   /* Get global columns of mat */
3402   PetscCall(MPIU_Allreduce(&n, &N, 1, MPIU_INT, MPI_SUM, comm));
3403 
3404   PetscCall(MatSetSizes(*mat, m, n, PETSC_DECIDE, N));
3405   /* Determine the type of MPI matrix that should be created from the type of matrix A, which holds the "diagonal" portion. */
3406   PetscCall(MatGetMPIMatType_Private(A, &mpi_mat_type));
3407   PetscCall(MatSetType(*mat, mpi_mat_type));
3408 
3409   PetscCall(MatSetBlockSizes(*mat, A->rmap->bs, A->cmap->bs));
3410   maij = (Mat_MPIAIJ *)(*mat)->data;
3411 
3412   (*mat)->preallocated = PETSC_TRUE;
3413 
3414   PetscCall(PetscLayoutSetUp((*mat)->rmap));
3415   PetscCall(PetscLayoutSetUp((*mat)->cmap));
3416 
3417   /* Set A as diagonal portion of *mat */
3418   maij->A = A;
3419 
3420   nz = oi[m];
3421   for (i = 0; i < nz; i++) {
3422     col   = oj[i];
3423     oj[i] = garray[col];
3424   }
3425 
3426   /* Set Bnew as off-diagonal portion of *mat */
3427   PetscCall(MatSeqAIJGetArrayRead(B, &oa));
3428   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, N, oi, oj, (PetscScalar *)oa, &Bnew));
3429   PetscCall(MatSeqAIJRestoreArrayRead(B, &oa));
3430   bnew        = (Mat_SeqAIJ *)Bnew->data;
3431   bnew->maxnz = b->maxnz; /* allocated nonzeros of B */
3432   maij->B     = Bnew;
3433 
3434   PetscCheck(B->rmap->N == Bnew->rmap->N, PETSC_COMM_SELF, PETSC_ERR_PLIB, "BN %" PetscInt_FMT " != BnewN %" PetscInt_FMT, B->rmap->N, Bnew->rmap->N);
3435 
3436   b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */
3437   b->free_a       = PETSC_FALSE;
3438   b->free_ij      = PETSC_FALSE;
3439   PetscCall(MatDestroy(&B));
3440 
3441   bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */
3442   bnew->free_a       = PETSC_TRUE;
3443   bnew->free_ij      = PETSC_TRUE;
3444 
3445   /* condense columns of maij->B */
3446   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
3447   PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY));
3448   PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY));
3449   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE));
3450   PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
3451   PetscFunctionReturn(0);
3452 }
3453 
3454 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat, PetscInt, const IS[], const IS[], MatReuse, PetscBool, Mat *);
3455 
3456 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat, IS isrow, IS iscol, IS iscol_local, MatReuse call, Mat *newmat) {
3457   PetscInt        i, m, n, rstart, row, rend, nz, j, bs, cbs;
3458   PetscInt       *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal;
3459   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)mat->data;
3460   Mat             M, Msub, B = a->B;
3461   MatScalar      *aa;
3462   Mat_SeqAIJ     *aij;
3463   PetscInt       *garray = a->garray, *colsub, Ncols;
3464   PetscInt        count, Bn = B->cmap->N, cstart = mat->cmap->rstart, cend = mat->cmap->rend;
3465   IS              iscol_sub, iscmap;
3466   const PetscInt *is_idx, *cmap;
3467   PetscBool       allcolumns = PETSC_FALSE;
3468   MPI_Comm        comm;
3469 
3470   PetscFunctionBegin;
3471   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3472   if (call == MAT_REUSE_MATRIX) {
3473     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub));
3474     PetscCheck(iscol_sub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "SubIScol passed in was not used before, cannot reuse");
3475     PetscCall(ISGetLocalSize(iscol_sub, &count));
3476 
3477     PetscCall(PetscObjectQuery((PetscObject)*newmat, "Subcmap", (PetscObject *)&iscmap));
3478     PetscCheck(iscmap, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Subcmap passed in was not used before, cannot reuse");
3479 
3480     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Msub));
3481     PetscCheck(Msub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
3482 
3483     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_REUSE_MATRIX, PETSC_FALSE, &Msub));
3484 
3485   } else { /* call == MAT_INITIAL_MATRIX) */
3486     PetscBool flg;
3487 
3488     PetscCall(ISGetLocalSize(iscol, &n));
3489     PetscCall(ISGetSize(iscol, &Ncols));
3490 
3491     /* (1) iscol -> nonscalable iscol_local */
3492     /* Check for special case: each processor gets entire matrix columns */
3493     PetscCall(ISIdentity(iscol_local, &flg));
3494     if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3495     PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
3496     if (allcolumns) {
3497       iscol_sub = iscol_local;
3498       PetscCall(PetscObjectReference((PetscObject)iscol_local));
3499       PetscCall(ISCreateStride(PETSC_COMM_SELF, n, 0, 1, &iscmap));
3500 
3501     } else {
3502       /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */
3503       PetscInt *idx, *cmap1, k;
3504       PetscCall(PetscMalloc1(Ncols, &idx));
3505       PetscCall(PetscMalloc1(Ncols, &cmap1));
3506       PetscCall(ISGetIndices(iscol_local, &is_idx));
3507       count = 0;
3508       k     = 0;
3509       for (i = 0; i < Ncols; i++) {
3510         j = is_idx[i];
3511         if (j >= cstart && j < cend) {
3512           /* diagonal part of mat */
3513           idx[count]     = j;
3514           cmap1[count++] = i; /* column index in submat */
3515         } else if (Bn) {
3516           /* off-diagonal part of mat */
3517           if (j == garray[k]) {
3518             idx[count]     = j;
3519             cmap1[count++] = i; /* column index in submat */
3520           } else if (j > garray[k]) {
3521             while (j > garray[k] && k < Bn - 1) k++;
3522             if (j == garray[k]) {
3523               idx[count]     = j;
3524               cmap1[count++] = i; /* column index in submat */
3525             }
3526           }
3527         }
3528       }
3529       PetscCall(ISRestoreIndices(iscol_local, &is_idx));
3530 
3531       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_OWN_POINTER, &iscol_sub));
3532       PetscCall(ISGetBlockSize(iscol, &cbs));
3533       PetscCall(ISSetBlockSize(iscol_sub, cbs));
3534 
3535       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local), count, cmap1, PETSC_OWN_POINTER, &iscmap));
3536     }
3537 
3538     /* (3) Create sequential Msub */
3539     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_INITIAL_MATRIX, allcolumns, &Msub));
3540   }
3541 
3542   PetscCall(ISGetLocalSize(iscol_sub, &count));
3543   aij = (Mat_SeqAIJ *)(Msub)->data;
3544   ii  = aij->i;
3545   PetscCall(ISGetIndices(iscmap, &cmap));
3546 
3547   /*
3548       m - number of local rows
3549       Ncols - number of columns (same on all processors)
3550       rstart - first row in new global matrix generated
3551   */
3552   PetscCall(MatGetSize(Msub, &m, NULL));
3553 
3554   if (call == MAT_INITIAL_MATRIX) {
3555     /* (4) Create parallel newmat */
3556     PetscMPIInt rank, size;
3557     PetscInt    csize;
3558 
3559     PetscCallMPI(MPI_Comm_size(comm, &size));
3560     PetscCallMPI(MPI_Comm_rank(comm, &rank));
3561 
3562     /*
3563         Determine the number of non-zeros in the diagonal and off-diagonal
3564         portions of the matrix in order to do correct preallocation
3565     */
3566 
3567     /* first get start and end of "diagonal" columns */
3568     PetscCall(ISGetLocalSize(iscol, &csize));
3569     if (csize == PETSC_DECIDE) {
3570       PetscCall(ISGetSize(isrow, &mglobal));
3571       if (mglobal == Ncols) { /* square matrix */
3572         nlocal = m;
3573       } else {
3574         nlocal = Ncols / size + ((Ncols % size) > rank);
3575       }
3576     } else {
3577       nlocal = csize;
3578     }
3579     PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm));
3580     rstart = rend - nlocal;
3581     PetscCheck(rank != size - 1 || rend == Ncols, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, Ncols);
3582 
3583     /* next, compute all the lengths */
3584     jj = aij->j;
3585     PetscCall(PetscMalloc1(2 * m + 1, &dlens));
3586     olens = dlens + m;
3587     for (i = 0; i < m; i++) {
3588       jend = ii[i + 1] - ii[i];
3589       olen = 0;
3590       dlen = 0;
3591       for (j = 0; j < jend; j++) {
3592         if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++;
3593         else dlen++;
3594         jj++;
3595       }
3596       olens[i] = olen;
3597       dlens[i] = dlen;
3598     }
3599 
3600     PetscCall(ISGetBlockSize(isrow, &bs));
3601     PetscCall(ISGetBlockSize(iscol, &cbs));
3602 
3603     PetscCall(MatCreate(comm, &M));
3604     PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, Ncols));
3605     PetscCall(MatSetBlockSizes(M, bs, cbs));
3606     PetscCall(MatSetType(M, ((PetscObject)mat)->type_name));
3607     PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens));
3608     PetscCall(PetscFree(dlens));
3609 
3610   } else { /* call == MAT_REUSE_MATRIX */
3611     M = *newmat;
3612     PetscCall(MatGetLocalSize(M, &i, NULL));
3613     PetscCheck(i == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request");
3614     PetscCall(MatZeroEntries(M));
3615     /*
3616          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3617        rather than the slower MatSetValues().
3618     */
3619     M->was_assembled = PETSC_TRUE;
3620     M->assembled     = PETSC_FALSE;
3621   }
3622 
3623   /* (5) Set values of Msub to *newmat */
3624   PetscCall(PetscMalloc1(count, &colsub));
3625   PetscCall(MatGetOwnershipRange(M, &rstart, NULL));
3626 
3627   jj = aij->j;
3628   PetscCall(MatSeqAIJGetArrayRead(Msub, (const PetscScalar **)&aa));
3629   for (i = 0; i < m; i++) {
3630     row = rstart + i;
3631     nz  = ii[i + 1] - ii[i];
3632     for (j = 0; j < nz; j++) colsub[j] = cmap[jj[j]];
3633     PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, colsub, aa, INSERT_VALUES));
3634     jj += nz;
3635     aa += nz;
3636   }
3637   PetscCall(MatSeqAIJRestoreArrayRead(Msub, (const PetscScalar **)&aa));
3638   PetscCall(ISRestoreIndices(iscmap, &cmap));
3639 
3640   PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY));
3641   PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY));
3642 
3643   PetscCall(PetscFree(colsub));
3644 
3645   /* save Msub, iscol_sub and iscmap used in processor for next request */
3646   if (call == MAT_INITIAL_MATRIX) {
3647     *newmat = M;
3648     PetscCall(PetscObjectCompose((PetscObject)(*newmat), "SubMatrix", (PetscObject)Msub));
3649     PetscCall(MatDestroy(&Msub));
3650 
3651     PetscCall(PetscObjectCompose((PetscObject)(*newmat), "SubIScol", (PetscObject)iscol_sub));
3652     PetscCall(ISDestroy(&iscol_sub));
3653 
3654     PetscCall(PetscObjectCompose((PetscObject)(*newmat), "Subcmap", (PetscObject)iscmap));
3655     PetscCall(ISDestroy(&iscmap));
3656 
3657     if (iscol_local) {
3658       PetscCall(PetscObjectCompose((PetscObject)(*newmat), "ISAllGather", (PetscObject)iscol_local));
3659       PetscCall(ISDestroy(&iscol_local));
3660     }
3661   }
3662   PetscFunctionReturn(0);
3663 }
3664 
3665 /*
3666     Not great since it makes two copies of the submatrix, first an SeqAIJ
3667   in local and then by concatenating the local matrices the end result.
3668   Writing it directly would be much like MatCreateSubMatrices_MPIAIJ()
3669 
3670   Note: This requires a sequential iscol with all indices.
3671 */
3672 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat, IS isrow, IS iscol, PetscInt csize, MatReuse call, Mat *newmat) {
3673   PetscMPIInt rank, size;
3674   PetscInt    i, m, n, rstart, row, rend, nz, *cwork, j, bs, cbs;
3675   PetscInt   *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal;
3676   Mat         M, Mreuse;
3677   MatScalar  *aa, *vwork;
3678   MPI_Comm    comm;
3679   Mat_SeqAIJ *aij;
3680   PetscBool   colflag, allcolumns = PETSC_FALSE;
3681 
3682   PetscFunctionBegin;
3683   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3684   PetscCallMPI(MPI_Comm_rank(comm, &rank));
3685   PetscCallMPI(MPI_Comm_size(comm, &size));
3686 
3687   /* Check for special case: each processor gets entire matrix columns */
3688   PetscCall(ISIdentity(iscol, &colflag));
3689   PetscCall(ISGetLocalSize(iscol, &n));
3690   if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3691   PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
3692 
3693   if (call == MAT_REUSE_MATRIX) {
3694     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Mreuse));
3695     PetscCheck(Mreuse, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
3696     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_REUSE_MATRIX, allcolumns, &Mreuse));
3697   } else {
3698     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_INITIAL_MATRIX, allcolumns, &Mreuse));
3699   }
3700 
3701   /*
3702       m - number of local rows
3703       n - number of columns (same on all processors)
3704       rstart - first row in new global matrix generated
3705   */
3706   PetscCall(MatGetSize(Mreuse, &m, &n));
3707   PetscCall(MatGetBlockSizes(Mreuse, &bs, &cbs));
3708   if (call == MAT_INITIAL_MATRIX) {
3709     aij = (Mat_SeqAIJ *)(Mreuse)->data;
3710     ii  = aij->i;
3711     jj  = aij->j;
3712 
3713     /*
3714         Determine the number of non-zeros in the diagonal and off-diagonal
3715         portions of the matrix in order to do correct preallocation
3716     */
3717 
3718     /* first get start and end of "diagonal" columns */
3719     if (csize == PETSC_DECIDE) {
3720       PetscCall(ISGetSize(isrow, &mglobal));
3721       if (mglobal == n) { /* square matrix */
3722         nlocal = m;
3723       } else {
3724         nlocal = n / size + ((n % size) > rank);
3725       }
3726     } else {
3727       nlocal = csize;
3728     }
3729     PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm));
3730     rstart = rend - nlocal;
3731     PetscCheck(rank != size - 1 || rend == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, n);
3732 
3733     /* next, compute all the lengths */
3734     PetscCall(PetscMalloc1(2 * m + 1, &dlens));
3735     olens = dlens + m;
3736     for (i = 0; i < m; i++) {
3737       jend = ii[i + 1] - ii[i];
3738       olen = 0;
3739       dlen = 0;
3740       for (j = 0; j < jend; j++) {
3741         if (*jj < rstart || *jj >= rend) olen++;
3742         else dlen++;
3743         jj++;
3744       }
3745       olens[i] = olen;
3746       dlens[i] = dlen;
3747     }
3748     PetscCall(MatCreate(comm, &M));
3749     PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, n));
3750     PetscCall(MatSetBlockSizes(M, bs, cbs));
3751     PetscCall(MatSetType(M, ((PetscObject)mat)->type_name));
3752     PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens));
3753     PetscCall(PetscFree(dlens));
3754   } else {
3755     PetscInt ml, nl;
3756 
3757     M = *newmat;
3758     PetscCall(MatGetLocalSize(M, &ml, &nl));
3759     PetscCheck(ml == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request");
3760     PetscCall(MatZeroEntries(M));
3761     /*
3762          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3763        rather than the slower MatSetValues().
3764     */
3765     M->was_assembled = PETSC_TRUE;
3766     M->assembled     = PETSC_FALSE;
3767   }
3768   PetscCall(MatGetOwnershipRange(M, &rstart, &rend));
3769   aij = (Mat_SeqAIJ *)(Mreuse)->data;
3770   ii  = aij->i;
3771   jj  = aij->j;
3772 
3773   /* trigger copy to CPU if needed */
3774   PetscCall(MatSeqAIJGetArrayRead(Mreuse, (const PetscScalar **)&aa));
3775   for (i = 0; i < m; i++) {
3776     row   = rstart + i;
3777     nz    = ii[i + 1] - ii[i];
3778     cwork = jj;
3779     jj += nz;
3780     vwork = aa;
3781     aa += nz;
3782     PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, cwork, vwork, INSERT_VALUES));
3783   }
3784   PetscCall(MatSeqAIJRestoreArrayRead(Mreuse, (const PetscScalar **)&aa));
3785 
3786   PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY));
3787   PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY));
3788   *newmat = M;
3789 
3790   /* save submatrix used in processor for next request */
3791   if (call == MAT_INITIAL_MATRIX) {
3792     PetscCall(PetscObjectCompose((PetscObject)M, "SubMatrix", (PetscObject)Mreuse));
3793     PetscCall(MatDestroy(&Mreuse));
3794   }
3795   PetscFunctionReturn(0);
3796 }
3797 
3798 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[]) {
3799   PetscInt        m, cstart, cend, j, nnz, i, d, *ld;
3800   PetscInt       *d_nnz, *o_nnz, nnz_max = 0, rstart, ii;
3801   const PetscInt *JJ;
3802   PetscBool       nooffprocentries;
3803   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *)B->data;
3804 
3805   PetscFunctionBegin;
3806   PetscCheck(Ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Ii[0] must be 0 it is %" PetscInt_FMT, Ii[0]);
3807 
3808   PetscCall(PetscLayoutSetUp(B->rmap));
3809   PetscCall(PetscLayoutSetUp(B->cmap));
3810   m      = B->rmap->n;
3811   cstart = B->cmap->rstart;
3812   cend   = B->cmap->rend;
3813   rstart = B->rmap->rstart;
3814 
3815   PetscCall(PetscCalloc2(m, &d_nnz, m, &o_nnz));
3816 
3817   if (PetscDefined(USE_DEBUG)) {
3818     for (i = 0; i < m; i++) {
3819       nnz = Ii[i + 1] - Ii[i];
3820       JJ  = J + Ii[i];
3821       PetscCheck(nnz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative %" PetscInt_FMT " number of columns", i, nnz);
3822       PetscCheck(!nnz || !(JJ[0] < 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " starts with negative column index %" PetscInt_FMT, i, JJ[0]);
3823       PetscCheck(!nnz || !(JJ[nnz - 1] >= B->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " ends with too large a column index %" PetscInt_FMT " (max allowed %" PetscInt_FMT ")", i, JJ[nnz - 1], B->cmap->N);
3824     }
3825   }
3826 
3827   for (i = 0; i < m; i++) {
3828     nnz     = Ii[i + 1] - Ii[i];
3829     JJ      = J + Ii[i];
3830     nnz_max = PetscMax(nnz_max, nnz);
3831     d       = 0;
3832     for (j = 0; j < nnz; j++) {
3833       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3834     }
3835     d_nnz[i] = d;
3836     o_nnz[i] = nnz - d;
3837   }
3838   PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz));
3839   PetscCall(PetscFree2(d_nnz, o_nnz));
3840 
3841   for (i = 0; i < m; i++) {
3842     ii = i + rstart;
3843     PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, Ii[i + 1] - Ii[i], J + Ii[i], v ? v + Ii[i] : NULL, INSERT_VALUES));
3844   }
3845   nooffprocentries    = B->nooffprocentries;
3846   B->nooffprocentries = PETSC_TRUE;
3847   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
3848   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
3849   B->nooffprocentries = nooffprocentries;
3850 
3851   /* count number of entries below block diagonal */
3852   PetscCall(PetscFree(Aij->ld));
3853   PetscCall(PetscCalloc1(m, &ld));
3854   Aij->ld = ld;
3855   for (i = 0; i < m; i++) {
3856     nnz = Ii[i + 1] - Ii[i];
3857     j   = 0;
3858     while (j < nnz && J[j] < cstart) { j++; }
3859     ld[i] = j;
3860     J += nnz;
3861   }
3862 
3863   PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
3864   PetscFunctionReturn(0);
3865 }
3866 
3867 /*@
3868    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3869    (the default parallel PETSc format).
3870 
3871    Collective
3872 
3873    Input Parameters:
3874 +  B - the matrix
3875 .  i - the indices into j for the start of each local row (starts with zero)
3876 .  j - the column indices for each local row (starts with zero)
3877 -  v - optional values in the matrix
3878 
3879    Level: developer
3880 
3881    Notes:
3882        The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc;
3883      thus you CANNOT change the matrix entries by changing the values of v[] after you have
3884      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3885 
3886        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3887 
3888        The format which is used for the sparse matrix input, is equivalent to a
3889     row-major ordering.. i.e for the following matrix, the input data expected is
3890     as shown
3891 
3892 $        1 0 0
3893 $        2 0 3     P0
3894 $       -------
3895 $        4 5 6     P1
3896 $
3897 $     Process0 [P0]: rows_owned=[0,1]
3898 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
3899 $        j =  {0,0,2}  [size = 3]
3900 $        v =  {1,2,3}  [size = 3]
3901 $
3902 $     Process1 [P1]: rows_owned=[2]
3903 $        i =  {0,3}    [size = nrow+1  = 1+1]
3904 $        j =  {0,1,2}  [size = 3]
3905 $        v =  {4,5,6}  [size = 3]
3906 
3907 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatCreateAIJ()`, `MATMPIAIJ`,
3908           `MatCreateSeqAIJWithArrays()`, `MatCreateMPIAIJWithSplitArrays()`
3909 @*/
3910 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B, const PetscInt i[], const PetscInt j[], const PetscScalar v[]) {
3911   PetscFunctionBegin;
3912   PetscTryMethod(B, "MatMPIAIJSetPreallocationCSR_C", (Mat, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, i, j, v));
3913   PetscFunctionReturn(0);
3914 }
3915 
3916 /*@C
3917    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3918    (the default parallel PETSc format).  For good matrix assembly performance
3919    the user should preallocate the matrix storage by setting the parameters
3920    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3921    performance can be increased by more than a factor of 50.
3922 
3923    Collective
3924 
3925    Input Parameters:
3926 +  B - the matrix
3927 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3928            (same value is used for all local rows)
3929 .  d_nnz - array containing the number of nonzeros in the various rows of the
3930            DIAGONAL portion of the local submatrix (possibly different for each row)
3931            or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure.
3932            The size of this array is equal to the number of local rows, i.e 'm'.
3933            For matrices that will be factored, you must leave room for (and set)
3934            the diagonal entry even if it is zero.
3935 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3936            submatrix (same value is used for all local rows).
3937 -  o_nnz - array containing the number of nonzeros in the various rows of the
3938            OFF-DIAGONAL portion of the local submatrix (possibly different for
3939            each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero
3940            structure. The size of this array is equal to the number
3941            of local rows, i.e 'm'.
3942 
3943    If the *_nnz parameter is given then the *_nz parameter is ignored
3944 
3945    The AIJ format (also called the Yale sparse matrix format or
3946    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3947    storage.  The stored row and column indices begin with zero.
3948    See Users-Manual: ch_mat for details.
3949 
3950    The parallel matrix is partitioned such that the first m0 rows belong to
3951    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3952    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3953 
3954    The DIAGONAL portion of the local submatrix of a processor can be defined
3955    as the submatrix which is obtained by extraction the part corresponding to
3956    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3957    first row that belongs to the processor, r2 is the last row belonging to
3958    the this processor, and c1-c2 is range of indices of the local part of a
3959    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3960    common case of a square matrix, the row and column ranges are the same and
3961    the DIAGONAL part is also square. The remaining portion of the local
3962    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3963 
3964    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3965 
3966    You can call MatGetInfo() to get information on how effective the preallocation was;
3967    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3968    You can also run with the option -info and look for messages with the string
3969    malloc in them to see if additional memory allocation was needed.
3970 
3971    Example usage:
3972 
3973    Consider the following 8x8 matrix with 34 non-zero values, that is
3974    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3975    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3976    as follows:
3977 
3978 .vb
3979             1  2  0  |  0  3  0  |  0  4
3980     Proc0   0  5  6  |  7  0  0  |  8  0
3981             9  0 10  | 11  0  0  | 12  0
3982     -------------------------------------
3983            13  0 14  | 15 16 17  |  0  0
3984     Proc1   0 18  0  | 19 20 21  |  0  0
3985             0  0  0  | 22 23  0  | 24  0
3986     -------------------------------------
3987     Proc2  25 26 27  |  0  0 28  | 29  0
3988            30  0  0  | 31 32 33  |  0 34
3989 .ve
3990 
3991    This can be represented as a collection of submatrices as:
3992 
3993 .vb
3994       A B C
3995       D E F
3996       G H I
3997 .ve
3998 
3999    Where the submatrices A,B,C are owned by proc0, D,E,F are
4000    owned by proc1, G,H,I are owned by proc2.
4001 
4002    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4003    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4004    The 'M','N' parameters are 8,8, and have the same values on all procs.
4005 
4006    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4007    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4008    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4009    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4010    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4011    matrix, ans [DF] as another SeqAIJ matrix.
4012 
4013    When d_nz, o_nz parameters are specified, d_nz storage elements are
4014    allocated for every row of the local diagonal submatrix, and o_nz
4015    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4016    One way to choose d_nz and o_nz is to use the max nonzerors per local
4017    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4018    In this case, the values of d_nz,o_nz are:
4019 .vb
4020      proc0 : dnz = 2, o_nz = 2
4021      proc1 : dnz = 3, o_nz = 2
4022      proc2 : dnz = 1, o_nz = 4
4023 .ve
4024    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4025    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4026    for proc3. i.e we are using 12+15+10=37 storage locations to store
4027    34 values.
4028 
4029    When d_nnz, o_nnz parameters are specified, the storage is specified
4030    for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4031    In the above case the values for d_nnz,o_nnz are:
4032 .vb
4033      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4034      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4035      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4036 .ve
4037    Here the space allocated is sum of all the above values i.e 34, and
4038    hence pre-allocation is perfect.
4039 
4040    Level: intermediate
4041 
4042 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateAIJ()`, `MatMPIAIJSetPreallocationCSR()`,
4043           `MATMPIAIJ`, `MatGetInfo()`, `PetscSplitOwnership()`
4044 @*/
4045 PetscErrorCode MatMPIAIJSetPreallocation(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) {
4046   PetscFunctionBegin;
4047   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
4048   PetscValidType(B, 1);
4049   PetscTryMethod(B, "MatMPIAIJSetPreallocation_C", (Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, d_nz, d_nnz, o_nz, o_nnz));
4050   PetscFunctionReturn(0);
4051 }
4052 
4053 /*@
4054      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
4055          CSR format for the local rows.
4056 
4057    Collective
4058 
4059    Input Parameters:
4060 +  comm - MPI communicator
4061 .  m - number of local rows (Cannot be PETSC_DECIDE)
4062 .  n - This value should be the same as the local size used in creating the
4063        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4064        calculated if N is given) For square matrices n is almost always m.
4065 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4066 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4067 .   i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
4068 .   j - column indices
4069 -   a - optional matrix values
4070 
4071    Output Parameter:
4072 .   mat - the matrix
4073 
4074    Level: intermediate
4075 
4076    Notes:
4077        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4078      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4079      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4080 
4081        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4082 
4083        The format which is used for the sparse matrix input, is equivalent to a
4084     row-major ordering.. i.e for the following matrix, the input data expected is
4085     as shown
4086 
4087        Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays
4088 
4089 $        1 0 0
4090 $        2 0 3     P0
4091 $       -------
4092 $        4 5 6     P1
4093 $
4094 $     Process0 [P0]: rows_owned=[0,1]
4095 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
4096 $        j =  {0,0,2}  [size = 3]
4097 $        v =  {1,2,3}  [size = 3]
4098 $
4099 $     Process1 [P1]: rows_owned=[2]
4100 $        i =  {0,3}    [size = nrow+1  = 1+1]
4101 $        j =  {0,1,2}  [size = 3]
4102 $        v =  {4,5,6}  [size = 3]
4103 
4104 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4105           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`
4106 @*/
4107 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat) {
4108   PetscFunctionBegin;
4109   PetscCheck(!i || !i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
4110   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
4111   PetscCall(MatCreate(comm, mat));
4112   PetscCall(MatSetSizes(*mat, m, n, M, N));
4113   /* PetscCall(MatSetBlockSizes(M,bs,cbs)); */
4114   PetscCall(MatSetType(*mat, MATMPIAIJ));
4115   PetscCall(MatMPIAIJSetPreallocationCSR(*mat, i, j, a));
4116   PetscFunctionReturn(0);
4117 }
4118 
4119 /*@
4120      MatUpdateMPIAIJWithArrays - updates a MPI AIJ matrix using arrays that contain in standard
4121          CSR format for the local rows. Only the numerical values are updated the other arrays must be identical to what was passed from MatCreateMPIAIJWithArrays()
4122 
4123      Deprecated: Use `MatUpdateMPIAIJWithArray()`
4124 
4125    Collective
4126 
4127    Input Parameters:
4128 +  mat - the matrix
4129 .  m - number of local rows (Cannot be PETSC_DECIDE)
4130 .  n - This value should be the same as the local size used in creating the
4131        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4132        calculated if N is given) For square matrices n is almost always m.
4133 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4134 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4135 .  Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix
4136 .  J - column indices
4137 -  v - matrix values
4138 
4139    Level: intermediate
4140 
4141 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4142           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`, `MatUpdateMPIAIJWithArray()`
4143 @*/
4144 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[]) {
4145   PetscInt        nnz, i;
4146   PetscBool       nooffprocentries;
4147   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *)mat->data;
4148   Mat_SeqAIJ     *Ad  = (Mat_SeqAIJ *)Aij->A->data;
4149   PetscScalar    *ad, *ao;
4150   PetscInt        ldi, Iii, md;
4151   const PetscInt *Adi = Ad->i;
4152   PetscInt       *ld  = Aij->ld;
4153 
4154   PetscFunctionBegin;
4155   PetscCheck(Ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
4156   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
4157   PetscCheck(m == mat->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()");
4158   PetscCheck(n == mat->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()");
4159 
4160   PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad));
4161   PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao));
4162 
4163   for (i = 0; i < m; i++) {
4164     nnz = Ii[i + 1] - Ii[i];
4165     Iii = Ii[i];
4166     ldi = ld[i];
4167     md  = Adi[i + 1] - Adi[i];
4168     PetscCall(PetscArraycpy(ao, v + Iii, ldi));
4169     PetscCall(PetscArraycpy(ad, v + Iii + ldi, md));
4170     PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md));
4171     ad += md;
4172     ao += nnz - md;
4173   }
4174   nooffprocentries      = mat->nooffprocentries;
4175   mat->nooffprocentries = PETSC_TRUE;
4176   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad));
4177   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao));
4178   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A));
4179   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B));
4180   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
4181   PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
4182   PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
4183   mat->nooffprocentries = nooffprocentries;
4184   PetscFunctionReturn(0);
4185 }
4186 
4187 /*@
4188      MatUpdateMPIAIJWithArray - updates an MPI AIJ matrix using an array that contains the nonzero values
4189 
4190    Collective
4191 
4192    Input Parameters:
4193 +  mat - the matrix
4194 -  v - matrix values, stored by row
4195 
4196    Level: intermediate
4197 
4198    Notes:
4199    The matrix must have been obtained with `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()`
4200 
4201 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4202           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`, `MatUpdateMPIAIJWithArrays()`
4203 @*/
4204 PetscErrorCode MatUpdateMPIAIJWithArray(Mat mat, const PetscScalar v[]) {
4205   PetscInt        nnz, i, m;
4206   PetscBool       nooffprocentries;
4207   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *)mat->data;
4208   Mat_SeqAIJ     *Ad  = (Mat_SeqAIJ *)Aij->A->data;
4209   Mat_SeqAIJ     *Ao  = (Mat_SeqAIJ *)Aij->B->data;
4210   PetscScalar    *ad, *ao;
4211   const PetscInt *Adi = Ad->i, *Adj = Ao->i;
4212   PetscInt        ldi, Iii, md;
4213   PetscInt       *ld = Aij->ld;
4214 
4215   PetscFunctionBegin;
4216   m = mat->rmap->n;
4217 
4218   PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad));
4219   PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao));
4220   Iii = 0;
4221   for (i = 0; i < m; i++) {
4222     nnz = Adi[i + 1] - Adi[i] + Adj[i + 1] - Adj[i];
4223     ldi = ld[i];
4224     md  = Adi[i + 1] - Adi[i];
4225     PetscCall(PetscArraycpy(ao, v + Iii, ldi));
4226     PetscCall(PetscArraycpy(ad, v + Iii + ldi, md));
4227     PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md));
4228     ad += md;
4229     ao += nnz - md;
4230     Iii += nnz;
4231   }
4232   nooffprocentries      = mat->nooffprocentries;
4233   mat->nooffprocentries = PETSC_TRUE;
4234   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad));
4235   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao));
4236   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A));
4237   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B));
4238   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
4239   PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
4240   PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
4241   mat->nooffprocentries = nooffprocentries;
4242   PetscFunctionReturn(0);
4243 }
4244 
4245 /*@C
4246    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4247    (the default parallel PETSc format).  For good matrix assembly performance
4248    the user should preallocate the matrix storage by setting the parameters
4249    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4250    performance can be increased by more than a factor of 50.
4251 
4252    Collective
4253 
4254    Input Parameters:
4255 +  comm - MPI communicator
4256 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4257            This value should be the same as the local size used in creating the
4258            y vector for the matrix-vector product y = Ax.
4259 .  n - This value should be the same as the local size used in creating the
4260        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4261        calculated if N is given) For square matrices n is almost always m.
4262 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4263 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4264 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4265            (same value is used for all local rows)
4266 .  d_nnz - array containing the number of nonzeros in the various rows of the
4267            DIAGONAL portion of the local submatrix (possibly different for each row)
4268            or NULL, if d_nz is used to specify the nonzero structure.
4269            The size of this array is equal to the number of local rows, i.e 'm'.
4270 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4271            submatrix (same value is used for all local rows).
4272 -  o_nnz - array containing the number of nonzeros in the various rows of the
4273            OFF-DIAGONAL portion of the local submatrix (possibly different for
4274            each row) or NULL, if o_nz is used to specify the nonzero
4275            structure. The size of this array is equal to the number
4276            of local rows, i.e 'm'.
4277 
4278    Output Parameter:
4279 .  A - the matrix
4280 
4281    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4282    MatXXXXSetPreallocation() paradigm instead of this routine directly.
4283    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4284 
4285    Notes:
4286    If the *_nnz parameter is given then the *_nz parameter is ignored
4287 
4288    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4289    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4290    storage requirements for this matrix.
4291 
4292    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4293    processor than it must be used on all processors that share the object for
4294    that argument.
4295 
4296    The user MUST specify either the local or global matrix dimensions
4297    (possibly both).
4298 
4299    The parallel matrix is partitioned across processors such that the
4300    first m0 rows belong to process 0, the next m1 rows belong to
4301    process 1, the next m2 rows belong to process 2 etc.. where
4302    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4303    values corresponding to [m x N] submatrix.
4304 
4305    The columns are logically partitioned with the n0 columns belonging
4306    to 0th partition, the next n1 columns belonging to the next
4307    partition etc.. where n0,n1,n2... are the input parameter 'n'.
4308 
4309    The DIAGONAL portion of the local submatrix on any given processor
4310    is the submatrix corresponding to the rows and columns m,n
4311    corresponding to the given processor. i.e diagonal matrix on
4312    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4313    etc. The remaining portion of the local submatrix [m x (N-n)]
4314    constitute the OFF-DIAGONAL portion. The example below better
4315    illustrates this concept.
4316 
4317    For a square global matrix we define each processor's diagonal portion
4318    to be its local rows and the corresponding columns (a square submatrix);
4319    each processor's off-diagonal portion encompasses the remainder of the
4320    local matrix (a rectangular submatrix).
4321 
4322    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4323 
4324    When calling this routine with a single process communicator, a matrix of
4325    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4326    type of communicator, use the construction mechanism
4327 .vb
4328      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4329 .ve
4330 
4331 $     MatCreate(...,&A);
4332 $     MatSetType(A,MATMPIAIJ);
4333 $     MatSetSizes(A, m,n,M,N);
4334 $     MatMPIAIJSetPreallocation(A,...);
4335 
4336    By default, this format uses inodes (identical nodes) when possible.
4337    We search for consecutive rows with the same nonzero structure, thereby
4338    reusing matrix information to achieve increased efficiency.
4339 
4340    Options Database Keys:
4341 +  -mat_no_inode  - Do not use inodes
4342 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4343 -  -matmult_vecscatter_view <viewer> - View the vecscatter (i.e., communication pattern) used in MatMult() of sparse parallel matrices.
4344         See viewer types in manual of MatView(). Of them, ascii_matlab, draw or binary cause the vecscatter be viewed as a matrix.
4345         Entry (i,j) is the size of message (in bytes) rank i sends to rank j in one MatMult() call.
4346 
4347    Example usage:
4348 
4349    Consider the following 8x8 matrix with 34 non-zero values, that is
4350    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4351    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4352    as follows
4353 
4354 .vb
4355             1  2  0  |  0  3  0  |  0  4
4356     Proc0   0  5  6  |  7  0  0  |  8  0
4357             9  0 10  | 11  0  0  | 12  0
4358     -------------------------------------
4359            13  0 14  | 15 16 17  |  0  0
4360     Proc1   0 18  0  | 19 20 21  |  0  0
4361             0  0  0  | 22 23  0  | 24  0
4362     -------------------------------------
4363     Proc2  25 26 27  |  0  0 28  | 29  0
4364            30  0  0  | 31 32 33  |  0 34
4365 .ve
4366 
4367    This can be represented as a collection of submatrices as
4368 
4369 .vb
4370       A B C
4371       D E F
4372       G H I
4373 .ve
4374 
4375    Where the submatrices A,B,C are owned by proc0, D,E,F are
4376    owned by proc1, G,H,I are owned by proc2.
4377 
4378    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4379    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4380    The 'M','N' parameters are 8,8, and have the same values on all procs.
4381 
4382    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4383    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4384    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4385    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4386    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4387    matrix, ans [DF] as another SeqAIJ matrix.
4388 
4389    When d_nz, o_nz parameters are specified, d_nz storage elements are
4390    allocated for every row of the local diagonal submatrix, and o_nz
4391    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4392    One way to choose d_nz and o_nz is to use the max nonzerors per local
4393    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4394    In this case, the values of d_nz,o_nz are
4395 .vb
4396      proc0 : dnz = 2, o_nz = 2
4397      proc1 : dnz = 3, o_nz = 2
4398      proc2 : dnz = 1, o_nz = 4
4399 .ve
4400    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4401    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4402    for proc3. i.e we are using 12+15+10=37 storage locations to store
4403    34 values.
4404 
4405    When d_nnz, o_nnz parameters are specified, the storage is specified
4406    for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4407    In the above case the values for d_nnz,o_nnz are
4408 .vb
4409      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4410      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4411      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4412 .ve
4413    Here the space allocated is sum of all the above values i.e 34, and
4414    hence pre-allocation is perfect.
4415 
4416    Level: intermediate
4417 
4418 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4419           `MATMPIAIJ`, `MatCreateMPIAIJWithArrays()`
4420 @*/
4421 PetscErrorCode MatCreateAIJ(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A) {
4422   PetscMPIInt size;
4423 
4424   PetscFunctionBegin;
4425   PetscCall(MatCreate(comm, A));
4426   PetscCall(MatSetSizes(*A, m, n, M, N));
4427   PetscCallMPI(MPI_Comm_size(comm, &size));
4428   if (size > 1) {
4429     PetscCall(MatSetType(*A, MATMPIAIJ));
4430     PetscCall(MatMPIAIJSetPreallocation(*A, d_nz, d_nnz, o_nz, o_nnz));
4431   } else {
4432     PetscCall(MatSetType(*A, MATSEQAIJ));
4433     PetscCall(MatSeqAIJSetPreallocation(*A, d_nz, d_nnz));
4434   }
4435   PetscFunctionReturn(0);
4436 }
4437 
4438 /*@C
4439   MatMPIAIJGetSeqAIJ - Returns the local piece of this distributed matrix
4440 
4441   Not collective
4442 
4443   Input Parameter:
4444 . A - The MPIAIJ matrix
4445 
4446   Output Parameters:
4447 + Ad - The local diagonal block as a SeqAIJ matrix
4448 . Ao - The local off-diagonal block as a SeqAIJ matrix
4449 - colmap - An array mapping local column numbers of Ao to global column numbers of the parallel matrix
4450 
4451   Note: The rows in Ad and Ao are in [0, Nr), where Nr is the number of local rows on this process. The columns
4452   in Ad are in [0, Nc) where Nc is the number of local columns. The columns are Ao are in [0, Nco), where Nco is
4453   the number of nonzero columns in the local off-diagonal piece of the matrix A. The array colmap maps these
4454   local column numbers to global column numbers in the original matrix.
4455 
4456   Level: intermediate
4457 
4458 .seealso: `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`, `MatCreateAIJ()`, `MATMPIAIJ`, `MATSEQAIJ`
4459 @*/
4460 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[]) {
4461   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
4462   PetscBool   flg;
4463 
4464   PetscFunctionBegin;
4465   PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &flg));
4466   PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPIAIJ matrix as input");
4467   if (Ad) *Ad = a->A;
4468   if (Ao) *Ao = a->B;
4469   if (colmap) *colmap = a->garray;
4470   PetscFunctionReturn(0);
4471 }
4472 
4473 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) {
4474   PetscInt     m, N, i, rstart, nnz, Ii;
4475   PetscInt    *indx;
4476   PetscScalar *values;
4477   MatType      rootType;
4478 
4479   PetscFunctionBegin;
4480   PetscCall(MatGetSize(inmat, &m, &N));
4481   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
4482     PetscInt *dnz, *onz, sum, bs, cbs;
4483 
4484     if (n == PETSC_DECIDE) { PetscCall(PetscSplitOwnership(comm, &n, &N)); }
4485     /* Check sum(n) = N */
4486     PetscCall(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm));
4487     PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N);
4488 
4489     PetscCallMPI(MPI_Scan(&m, &rstart, 1, MPIU_INT, MPI_SUM, comm));
4490     rstart -= m;
4491 
4492     MatPreallocateBegin(comm, m, n, dnz, onz);
4493     for (i = 0; i < m; i++) {
4494       PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, NULL));
4495       PetscCall(MatPreallocateSet(i + rstart, nnz, indx, dnz, onz));
4496       PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, NULL));
4497     }
4498 
4499     PetscCall(MatCreate(comm, outmat));
4500     PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
4501     PetscCall(MatGetBlockSizes(inmat, &bs, &cbs));
4502     PetscCall(MatSetBlockSizes(*outmat, bs, cbs));
4503     PetscCall(MatGetRootType_Private(inmat, &rootType));
4504     PetscCall(MatSetType(*outmat, rootType));
4505     PetscCall(MatSeqAIJSetPreallocation(*outmat, 0, dnz));
4506     PetscCall(MatMPIAIJSetPreallocation(*outmat, 0, dnz, 0, onz));
4507     MatPreallocateEnd(dnz, onz);
4508     PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
4509   }
4510 
4511   /* numeric phase */
4512   PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL));
4513   for (i = 0; i < m; i++) {
4514     PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, &values));
4515     Ii = i + rstart;
4516     PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES));
4517     PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, &values));
4518   }
4519   PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY));
4520   PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY));
4521   PetscFunctionReturn(0);
4522 }
4523 
4524 PetscErrorCode MatFileSplit(Mat A, char *outfile) {
4525   PetscMPIInt        rank;
4526   PetscInt           m, N, i, rstart, nnz;
4527   size_t             len;
4528   const PetscInt    *indx;
4529   PetscViewer        out;
4530   char              *name;
4531   Mat                B;
4532   const PetscScalar *values;
4533 
4534   PetscFunctionBegin;
4535   PetscCall(MatGetLocalSize(A, &m, NULL));
4536   PetscCall(MatGetSize(A, NULL, &N));
4537   /* Should this be the type of the diagonal block of A? */
4538   PetscCall(MatCreate(PETSC_COMM_SELF, &B));
4539   PetscCall(MatSetSizes(B, m, N, m, N));
4540   PetscCall(MatSetBlockSizesFromMats(B, A, A));
4541   PetscCall(MatSetType(B, MATSEQAIJ));
4542   PetscCall(MatSeqAIJSetPreallocation(B, 0, NULL));
4543   PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
4544   for (i = 0; i < m; i++) {
4545     PetscCall(MatGetRow(A, i + rstart, &nnz, &indx, &values));
4546     PetscCall(MatSetValues(B, 1, &i, nnz, indx, values, INSERT_VALUES));
4547     PetscCall(MatRestoreRow(A, i + rstart, &nnz, &indx, &values));
4548   }
4549   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
4550   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
4551 
4552   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank));
4553   PetscCall(PetscStrlen(outfile, &len));
4554   PetscCall(PetscMalloc1(len + 6, &name));
4555   PetscCall(PetscSNPrintf(name, len + 6, "%s.%d", outfile, rank));
4556   PetscCall(PetscViewerBinaryOpen(PETSC_COMM_SELF, name, FILE_MODE_APPEND, &out));
4557   PetscCall(PetscFree(name));
4558   PetscCall(MatView(B, out));
4559   PetscCall(PetscViewerDestroy(&out));
4560   PetscCall(MatDestroy(&B));
4561   PetscFunctionReturn(0);
4562 }
4563 
4564 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void *data) {
4565   Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)data;
4566 
4567   PetscFunctionBegin;
4568   if (!merge) PetscFunctionReturn(0);
4569   PetscCall(PetscFree(merge->id_r));
4570   PetscCall(PetscFree(merge->len_s));
4571   PetscCall(PetscFree(merge->len_r));
4572   PetscCall(PetscFree(merge->bi));
4573   PetscCall(PetscFree(merge->bj));
4574   PetscCall(PetscFree(merge->buf_ri[0]));
4575   PetscCall(PetscFree(merge->buf_ri));
4576   PetscCall(PetscFree(merge->buf_rj[0]));
4577   PetscCall(PetscFree(merge->buf_rj));
4578   PetscCall(PetscFree(merge->coi));
4579   PetscCall(PetscFree(merge->coj));
4580   PetscCall(PetscFree(merge->owners_co));
4581   PetscCall(PetscLayoutDestroy(&merge->rowmap));
4582   PetscCall(PetscFree(merge));
4583   PetscFunctionReturn(0);
4584 }
4585 
4586 #include <../src/mat/utils/freespace.h>
4587 #include <petscbt.h>
4588 
4589 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat, Mat mpimat) {
4590   MPI_Comm             comm;
4591   Mat_SeqAIJ          *a = (Mat_SeqAIJ *)seqmat->data;
4592   PetscMPIInt          size, rank, taga, *len_s;
4593   PetscInt             N = mpimat->cmap->N, i, j, *owners, *ai = a->i, *aj;
4594   PetscInt             proc, m;
4595   PetscInt           **buf_ri, **buf_rj;
4596   PetscInt             k, anzi, *bj_i, *bi, *bj, arow, bnzi, nextaj;
4597   PetscInt             nrows, **buf_ri_k, **nextrow, **nextai;
4598   MPI_Request         *s_waits, *r_waits;
4599   MPI_Status          *status;
4600   const MatScalar     *aa, *a_a;
4601   MatScalar          **abuf_r, *ba_i;
4602   Mat_Merge_SeqsToMPI *merge;
4603   PetscContainer       container;
4604 
4605   PetscFunctionBegin;
4606   PetscCall(PetscObjectGetComm((PetscObject)mpimat, &comm));
4607   PetscCall(PetscLogEventBegin(MAT_Seqstompinum, seqmat, 0, 0, 0));
4608 
4609   PetscCallMPI(MPI_Comm_size(comm, &size));
4610   PetscCallMPI(MPI_Comm_rank(comm, &rank));
4611 
4612   PetscCall(PetscObjectQuery((PetscObject)mpimat, "MatMergeSeqsToMPI", (PetscObject *)&container));
4613   PetscCheck(container, PetscObjectComm((PetscObject)mpimat), PETSC_ERR_PLIB, "Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic");
4614   PetscCall(PetscContainerGetPointer(container, (void **)&merge));
4615   PetscCall(MatSeqAIJGetArrayRead(seqmat, &a_a));
4616   aa = a_a;
4617 
4618   bi     = merge->bi;
4619   bj     = merge->bj;
4620   buf_ri = merge->buf_ri;
4621   buf_rj = merge->buf_rj;
4622 
4623   PetscCall(PetscMalloc1(size, &status));
4624   owners = merge->rowmap->range;
4625   len_s  = merge->len_s;
4626 
4627   /* send and recv matrix values */
4628   /*-----------------------------*/
4629   PetscCall(PetscObjectGetNewTag((PetscObject)mpimat, &taga));
4630   PetscCall(PetscPostIrecvScalar(comm, taga, merge->nrecv, merge->id_r, merge->len_r, &abuf_r, &r_waits));
4631 
4632   PetscCall(PetscMalloc1(merge->nsend + 1, &s_waits));
4633   for (proc = 0, k = 0; proc < size; proc++) {
4634     if (!len_s[proc]) continue;
4635     i = owners[proc];
4636     PetscCallMPI(MPI_Isend(aa + ai[i], len_s[proc], MPIU_MATSCALAR, proc, taga, comm, s_waits + k));
4637     k++;
4638   }
4639 
4640   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, r_waits, status));
4641   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, s_waits, status));
4642   PetscCall(PetscFree(status));
4643 
4644   PetscCall(PetscFree(s_waits));
4645   PetscCall(PetscFree(r_waits));
4646 
4647   /* insert mat values of mpimat */
4648   /*----------------------------*/
4649   PetscCall(PetscMalloc1(N, &ba_i));
4650   PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai));
4651 
4652   for (k = 0; k < merge->nrecv; k++) {
4653     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4654     nrows       = *(buf_ri_k[k]);
4655     nextrow[k]  = buf_ri_k[k] + 1;           /* next row number of k-th recved i-structure */
4656     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure  */
4657   }
4658 
4659   /* set values of ba */
4660   m = merge->rowmap->n;
4661   for (i = 0; i < m; i++) {
4662     arow = owners[rank] + i;
4663     bj_i = bj + bi[i]; /* col indices of the i-th row of mpimat */
4664     bnzi = bi[i + 1] - bi[i];
4665     PetscCall(PetscArrayzero(ba_i, bnzi));
4666 
4667     /* add local non-zero vals of this proc's seqmat into ba */
4668     anzi   = ai[arow + 1] - ai[arow];
4669     aj     = a->j + ai[arow];
4670     aa     = a_a + ai[arow];
4671     nextaj = 0;
4672     for (j = 0; nextaj < anzi; j++) {
4673       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4674         ba_i[j] += aa[nextaj++];
4675       }
4676     }
4677 
4678     /* add received vals into ba */
4679     for (k = 0; k < merge->nrecv; k++) { /* k-th received message */
4680       /* i-th row */
4681       if (i == *nextrow[k]) {
4682         anzi   = *(nextai[k] + 1) - *nextai[k];
4683         aj     = buf_rj[k] + *(nextai[k]);
4684         aa     = abuf_r[k] + *(nextai[k]);
4685         nextaj = 0;
4686         for (j = 0; nextaj < anzi; j++) {
4687           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4688             ba_i[j] += aa[nextaj++];
4689           }
4690         }
4691         nextrow[k]++;
4692         nextai[k]++;
4693       }
4694     }
4695     PetscCall(MatSetValues(mpimat, 1, &arow, bnzi, bj_i, ba_i, INSERT_VALUES));
4696   }
4697   PetscCall(MatSeqAIJRestoreArrayRead(seqmat, &a_a));
4698   PetscCall(MatAssemblyBegin(mpimat, MAT_FINAL_ASSEMBLY));
4699   PetscCall(MatAssemblyEnd(mpimat, MAT_FINAL_ASSEMBLY));
4700 
4701   PetscCall(PetscFree(abuf_r[0]));
4702   PetscCall(PetscFree(abuf_r));
4703   PetscCall(PetscFree(ba_i));
4704   PetscCall(PetscFree3(buf_ri_k, nextrow, nextai));
4705   PetscCall(PetscLogEventEnd(MAT_Seqstompinum, seqmat, 0, 0, 0));
4706   PetscFunctionReturn(0);
4707 }
4708 
4709 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, Mat *mpimat) {
4710   Mat                  B_mpi;
4711   Mat_SeqAIJ          *a = (Mat_SeqAIJ *)seqmat->data;
4712   PetscMPIInt          size, rank, tagi, tagj, *len_s, *len_si, *len_ri;
4713   PetscInt           **buf_rj, **buf_ri, **buf_ri_k;
4714   PetscInt             M = seqmat->rmap->n, N = seqmat->cmap->n, i, *owners, *ai = a->i, *aj = a->j;
4715   PetscInt             len, proc, *dnz, *onz, bs, cbs;
4716   PetscInt             k, anzi, *bi, *bj, *lnk, nlnk, arow, bnzi;
4717   PetscInt             nrows, *buf_s, *buf_si, *buf_si_i, **nextrow, **nextai;
4718   MPI_Request         *si_waits, *sj_waits, *ri_waits, *rj_waits;
4719   MPI_Status          *status;
4720   PetscFreeSpaceList   free_space = NULL, current_space = NULL;
4721   PetscBT              lnkbt;
4722   Mat_Merge_SeqsToMPI *merge;
4723   PetscContainer       container;
4724 
4725   PetscFunctionBegin;
4726   PetscCall(PetscLogEventBegin(MAT_Seqstompisym, seqmat, 0, 0, 0));
4727 
4728   /* make sure it is a PETSc comm */
4729   PetscCall(PetscCommDuplicate(comm, &comm, NULL));
4730   PetscCallMPI(MPI_Comm_size(comm, &size));
4731   PetscCallMPI(MPI_Comm_rank(comm, &rank));
4732 
4733   PetscCall(PetscNew(&merge));
4734   PetscCall(PetscMalloc1(size, &status));
4735 
4736   /* determine row ownership */
4737   /*---------------------------------------------------------*/
4738   PetscCall(PetscLayoutCreate(comm, &merge->rowmap));
4739   PetscCall(PetscLayoutSetLocalSize(merge->rowmap, m));
4740   PetscCall(PetscLayoutSetSize(merge->rowmap, M));
4741   PetscCall(PetscLayoutSetBlockSize(merge->rowmap, 1));
4742   PetscCall(PetscLayoutSetUp(merge->rowmap));
4743   PetscCall(PetscMalloc1(size, &len_si));
4744   PetscCall(PetscMalloc1(size, &merge->len_s));
4745 
4746   m      = merge->rowmap->n;
4747   owners = merge->rowmap->range;
4748 
4749   /* determine the number of messages to send, their lengths */
4750   /*---------------------------------------------------------*/
4751   len_s = merge->len_s;
4752 
4753   len          = 0; /* length of buf_si[] */
4754   merge->nsend = 0;
4755   for (proc = 0; proc < size; proc++) {
4756     len_si[proc] = 0;
4757     if (proc == rank) {
4758       len_s[proc] = 0;
4759     } else {
4760       len_si[proc] = owners[proc + 1] - owners[proc] + 1;
4761       len_s[proc]  = ai[owners[proc + 1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4762     }
4763     if (len_s[proc]) {
4764       merge->nsend++;
4765       nrows = 0;
4766       for (i = owners[proc]; i < owners[proc + 1]; i++) {
4767         if (ai[i + 1] > ai[i]) nrows++;
4768       }
4769       len_si[proc] = 2 * (nrows + 1);
4770       len += len_si[proc];
4771     }
4772   }
4773 
4774   /* determine the number and length of messages to receive for ij-structure */
4775   /*-------------------------------------------------------------------------*/
4776   PetscCall(PetscGatherNumberOfMessages(comm, NULL, len_s, &merge->nrecv));
4777   PetscCall(PetscGatherMessageLengths2(comm, merge->nsend, merge->nrecv, len_s, len_si, &merge->id_r, &merge->len_r, &len_ri));
4778 
4779   /* post the Irecv of j-structure */
4780   /*-------------------------------*/
4781   PetscCall(PetscCommGetNewTag(comm, &tagj));
4782   PetscCall(PetscPostIrecvInt(comm, tagj, merge->nrecv, merge->id_r, merge->len_r, &buf_rj, &rj_waits));
4783 
4784   /* post the Isend of j-structure */
4785   /*--------------------------------*/
4786   PetscCall(PetscMalloc2(merge->nsend, &si_waits, merge->nsend, &sj_waits));
4787 
4788   for (proc = 0, k = 0; proc < size; proc++) {
4789     if (!len_s[proc]) continue;
4790     i = owners[proc];
4791     PetscCallMPI(MPI_Isend(aj + ai[i], len_s[proc], MPIU_INT, proc, tagj, comm, sj_waits + k));
4792     k++;
4793   }
4794 
4795   /* receives and sends of j-structure are complete */
4796   /*------------------------------------------------*/
4797   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, rj_waits, status));
4798   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, sj_waits, status));
4799 
4800   /* send and recv i-structure */
4801   /*---------------------------*/
4802   PetscCall(PetscCommGetNewTag(comm, &tagi));
4803   PetscCall(PetscPostIrecvInt(comm, tagi, merge->nrecv, merge->id_r, len_ri, &buf_ri, &ri_waits));
4804 
4805   PetscCall(PetscMalloc1(len + 1, &buf_s));
4806   buf_si = buf_s; /* points to the beginning of k-th msg to be sent */
4807   for (proc = 0, k = 0; proc < size; proc++) {
4808     if (!len_s[proc]) continue;
4809     /* form outgoing message for i-structure:
4810          buf_si[0]:                 nrows to be sent
4811                [1:nrows]:           row index (global)
4812                [nrows+1:2*nrows+1]: i-structure index
4813     */
4814     /*-------------------------------------------*/
4815     nrows       = len_si[proc] / 2 - 1;
4816     buf_si_i    = buf_si + nrows + 1;
4817     buf_si[0]   = nrows;
4818     buf_si_i[0] = 0;
4819     nrows       = 0;
4820     for (i = owners[proc]; i < owners[proc + 1]; i++) {
4821       anzi = ai[i + 1] - ai[i];
4822       if (anzi) {
4823         buf_si_i[nrows + 1] = buf_si_i[nrows] + anzi; /* i-structure */
4824         buf_si[nrows + 1]   = i - owners[proc];       /* local row index */
4825         nrows++;
4826       }
4827     }
4828     PetscCallMPI(MPI_Isend(buf_si, len_si[proc], MPIU_INT, proc, tagi, comm, si_waits + k));
4829     k++;
4830     buf_si += len_si[proc];
4831   }
4832 
4833   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, ri_waits, status));
4834   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, si_waits, status));
4835 
4836   PetscCall(PetscInfo(seqmat, "nsend: %d, nrecv: %d\n", merge->nsend, merge->nrecv));
4837   for (i = 0; i < merge->nrecv; i++) { PetscCall(PetscInfo(seqmat, "recv len_ri=%d, len_rj=%d from [%d]\n", len_ri[i], merge->len_r[i], merge->id_r[i])); }
4838 
4839   PetscCall(PetscFree(len_si));
4840   PetscCall(PetscFree(len_ri));
4841   PetscCall(PetscFree(rj_waits));
4842   PetscCall(PetscFree2(si_waits, sj_waits));
4843   PetscCall(PetscFree(ri_waits));
4844   PetscCall(PetscFree(buf_s));
4845   PetscCall(PetscFree(status));
4846 
4847   /* compute a local seq matrix in each processor */
4848   /*----------------------------------------------*/
4849   /* allocate bi array and free space for accumulating nonzero column info */
4850   PetscCall(PetscMalloc1(m + 1, &bi));
4851   bi[0] = 0;
4852 
4853   /* create and initialize a linked list */
4854   nlnk = N + 1;
4855   PetscCall(PetscLLCreate(N, N, nlnk, lnk, lnkbt));
4856 
4857   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4858   len = ai[owners[rank + 1]] - ai[owners[rank]];
4859   PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(2, len) + 1, &free_space));
4860 
4861   current_space = free_space;
4862 
4863   /* determine symbolic info for each local row */
4864   PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai));
4865 
4866   for (k = 0; k < merge->nrecv; k++) {
4867     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4868     nrows       = *buf_ri_k[k];
4869     nextrow[k]  = buf_ri_k[k] + 1;           /* next row number of k-th recved i-structure */
4870     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure  */
4871   }
4872 
4873   MatPreallocateBegin(comm, m, n, dnz, onz);
4874   len = 0;
4875   for (i = 0; i < m; i++) {
4876     bnzi = 0;
4877     /* add local non-zero cols of this proc's seqmat into lnk */
4878     arow = owners[rank] + i;
4879     anzi = ai[arow + 1] - ai[arow];
4880     aj   = a->j + ai[arow];
4881     PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt));
4882     bnzi += nlnk;
4883     /* add received col data into lnk */
4884     for (k = 0; k < merge->nrecv; k++) { /* k-th received message */
4885       if (i == *nextrow[k]) {            /* i-th row */
4886         anzi = *(nextai[k] + 1) - *nextai[k];
4887         aj   = buf_rj[k] + *nextai[k];
4888         PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt));
4889         bnzi += nlnk;
4890         nextrow[k]++;
4891         nextai[k]++;
4892       }
4893     }
4894     if (len < bnzi) len = bnzi; /* =max(bnzi) */
4895 
4896     /* if free space is not available, make more free space */
4897     if (current_space->local_remaining < bnzi) { PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(bnzi, current_space->total_array_size), &current_space)); }
4898     /* copy data into free space, then initialize lnk */
4899     PetscCall(PetscLLClean(N, N, bnzi, lnk, current_space->array, lnkbt));
4900     PetscCall(MatPreallocateSet(i + owners[rank], bnzi, current_space->array, dnz, onz));
4901 
4902     current_space->array += bnzi;
4903     current_space->local_used += bnzi;
4904     current_space->local_remaining -= bnzi;
4905 
4906     bi[i + 1] = bi[i] + bnzi;
4907   }
4908 
4909   PetscCall(PetscFree3(buf_ri_k, nextrow, nextai));
4910 
4911   PetscCall(PetscMalloc1(bi[m] + 1, &bj));
4912   PetscCall(PetscFreeSpaceContiguous(&free_space, bj));
4913   PetscCall(PetscLLDestroy(lnk, lnkbt));
4914 
4915   /* create symbolic parallel matrix B_mpi */
4916   /*---------------------------------------*/
4917   PetscCall(MatGetBlockSizes(seqmat, &bs, &cbs));
4918   PetscCall(MatCreate(comm, &B_mpi));
4919   if (n == PETSC_DECIDE) {
4920     PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, N));
4921   } else {
4922     PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
4923   }
4924   PetscCall(MatSetBlockSizes(B_mpi, bs, cbs));
4925   PetscCall(MatSetType(B_mpi, MATMPIAIJ));
4926   PetscCall(MatMPIAIJSetPreallocation(B_mpi, 0, dnz, 0, onz));
4927   MatPreallocateEnd(dnz, onz);
4928   PetscCall(MatSetOption(B_mpi, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE));
4929 
4930   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4931   B_mpi->assembled = PETSC_FALSE;
4932   merge->bi        = bi;
4933   merge->bj        = bj;
4934   merge->buf_ri    = buf_ri;
4935   merge->buf_rj    = buf_rj;
4936   merge->coi       = NULL;
4937   merge->coj       = NULL;
4938   merge->owners_co = NULL;
4939 
4940   PetscCall(PetscCommDestroy(&comm));
4941 
4942   /* attach the supporting struct to B_mpi for reuse */
4943   PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container));
4944   PetscCall(PetscContainerSetPointer(container, merge));
4945   PetscCall(PetscContainerSetUserDestroy(container, MatDestroy_MPIAIJ_SeqsToMPI));
4946   PetscCall(PetscObjectCompose((PetscObject)B_mpi, "MatMergeSeqsToMPI", (PetscObject)container));
4947   PetscCall(PetscContainerDestroy(&container));
4948   *mpimat = B_mpi;
4949 
4950   PetscCall(PetscLogEventEnd(MAT_Seqstompisym, seqmat, 0, 0, 0));
4951   PetscFunctionReturn(0);
4952 }
4953 
4954 /*@C
4955       MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential
4956                  matrices from each processor
4957 
4958     Collective
4959 
4960    Input Parameters:
4961 +    comm - the communicators the parallel matrix will live on
4962 .    seqmat - the input sequential matrices
4963 .    m - number of local rows (or PETSC_DECIDE)
4964 .    n - number of local columns (or PETSC_DECIDE)
4965 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4966 
4967    Output Parameter:
4968 .    mpimat - the parallel matrix generated
4969 
4970     Level: advanced
4971 
4972    Notes:
4973      The dimensions of the sequential matrix in each processor MUST be the same.
4974      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4975      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4976 @*/
4977 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, MatReuse scall, Mat *mpimat) {
4978   PetscMPIInt size;
4979 
4980   PetscFunctionBegin;
4981   PetscCallMPI(MPI_Comm_size(comm, &size));
4982   if (size == 1) {
4983     PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0));
4984     if (scall == MAT_INITIAL_MATRIX) {
4985       PetscCall(MatDuplicate(seqmat, MAT_COPY_VALUES, mpimat));
4986     } else {
4987       PetscCall(MatCopy(seqmat, *mpimat, SAME_NONZERO_PATTERN));
4988     }
4989     PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0));
4990     PetscFunctionReturn(0);
4991   }
4992   PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0));
4993   if (scall == MAT_INITIAL_MATRIX) { PetscCall(MatCreateMPIAIJSumSeqAIJSymbolic(comm, seqmat, m, n, mpimat)); }
4994   PetscCall(MatCreateMPIAIJSumSeqAIJNumeric(seqmat, *mpimat));
4995   PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0));
4996   PetscFunctionReturn(0);
4997 }
4998 
4999 /*@
5000      MatAIJGetLocalMat - Creates a SeqAIJ from a MATAIJ matrix by taking all its local rows and putting them into a sequential matrix with
5001           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
5002           with MatGetSize()
5003 
5004     Not Collective
5005 
5006    Input Parameters:
5007 +    A - the matrix
5008 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5009 
5010    Output Parameter:
5011 .    A_loc - the local sequential matrix generated
5012 
5013     Level: developer
5014 
5015    Notes:
5016      In other words combines the two parts of a parallel MPIAIJ matrix on each process to a single matrix.
5017 
5018      Destroy the matrix with MatDestroy()
5019 
5020 .seealso: MatMPIAIJGetLocalMat()
5021 
5022 @*/
5023 PetscErrorCode MatAIJGetLocalMat(Mat A, Mat *A_loc) {
5024   PetscBool mpi;
5025 
5026   PetscFunctionBegin;
5027   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &mpi));
5028   if (mpi) {
5029     PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, A_loc));
5030   } else {
5031     *A_loc = A;
5032     PetscCall(PetscObjectReference((PetscObject)*A_loc));
5033   }
5034   PetscFunctionReturn(0);
5035 }
5036 
5037 /*@
5038      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with
5039           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
5040           with MatGetSize()
5041 
5042     Not Collective
5043 
5044    Input Parameters:
5045 +    A - the matrix
5046 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5047 
5048    Output Parameter:
5049 .    A_loc - the local sequential matrix generated
5050 
5051     Level: developer
5052 
5053    Notes:
5054      In other words combines the two parts of a parallel MPIAIJ matrix on each process to a single matrix.
5055 
5056      When the communicator associated with A has size 1 and MAT_INITIAL_MATRIX is requested, the matrix returned is the diagonal part of A.
5057      If MAT_REUSE_MATRIX is requested with comm size 1, MatCopy(Adiag,*A_loc,SAME_NONZERO_PATTERN) is called.
5058      This means that one can preallocate the proper sequential matrix first and then call this routine with MAT_REUSE_MATRIX to safely
5059      modify the values of the returned A_loc.
5060 
5061 .seealso: `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMatCondensed()`, `MatMPIAIJGetLocalMatMerge()`
5062 @*/
5063 PetscErrorCode MatMPIAIJGetLocalMat(Mat A, MatReuse scall, Mat *A_loc) {
5064   Mat_MPIAIJ        *mpimat = (Mat_MPIAIJ *)A->data;
5065   Mat_SeqAIJ        *mat, *a, *b;
5066   PetscInt          *ai, *aj, *bi, *bj, *cmap = mpimat->garray;
5067   const PetscScalar *aa, *ba, *aav, *bav;
5068   PetscScalar       *ca, *cam;
5069   PetscMPIInt        size;
5070   PetscInt           am = A->rmap->n, i, j, k, cstart = A->cmap->rstart;
5071   PetscInt          *ci, *cj, col, ncols_d, ncols_o, jo;
5072   PetscBool          match;
5073 
5074   PetscFunctionBegin;
5075   PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &match));
5076   PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input");
5077   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
5078   if (size == 1) {
5079     if (scall == MAT_INITIAL_MATRIX) {
5080       PetscCall(PetscObjectReference((PetscObject)mpimat->A));
5081       *A_loc = mpimat->A;
5082     } else if (scall == MAT_REUSE_MATRIX) {
5083       PetscCall(MatCopy(mpimat->A, *A_loc, SAME_NONZERO_PATTERN));
5084     }
5085     PetscFunctionReturn(0);
5086   }
5087 
5088   PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0));
5089   a  = (Mat_SeqAIJ *)(mpimat->A)->data;
5090   b  = (Mat_SeqAIJ *)(mpimat->B)->data;
5091   ai = a->i;
5092   aj = a->j;
5093   bi = b->i;
5094   bj = b->j;
5095   PetscCall(MatSeqAIJGetArrayRead(mpimat->A, &aav));
5096   PetscCall(MatSeqAIJGetArrayRead(mpimat->B, &bav));
5097   aa = aav;
5098   ba = bav;
5099   if (scall == MAT_INITIAL_MATRIX) {
5100     PetscCall(PetscMalloc1(1 + am, &ci));
5101     ci[0] = 0;
5102     for (i = 0; i < am; i++) { ci[i + 1] = ci[i] + (ai[i + 1] - ai[i]) + (bi[i + 1] - bi[i]); }
5103     PetscCall(PetscMalloc1(1 + ci[am], &cj));
5104     PetscCall(PetscMalloc1(1 + ci[am], &ca));
5105     k = 0;
5106     for (i = 0; i < am; i++) {
5107       ncols_o = bi[i + 1] - bi[i];
5108       ncols_d = ai[i + 1] - ai[i];
5109       /* off-diagonal portion of A */
5110       for (jo = 0; jo < ncols_o; jo++) {
5111         col = cmap[*bj];
5112         if (col >= cstart) break;
5113         cj[k] = col;
5114         bj++;
5115         ca[k++] = *ba++;
5116       }
5117       /* diagonal portion of A */
5118       for (j = 0; j < ncols_d; j++) {
5119         cj[k]   = cstart + *aj++;
5120         ca[k++] = *aa++;
5121       }
5122       /* off-diagonal portion of A */
5123       for (j = jo; j < ncols_o; j++) {
5124         cj[k]   = cmap[*bj++];
5125         ca[k++] = *ba++;
5126       }
5127     }
5128     /* put together the new matrix */
5129     PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, A->cmap->N, ci, cj, ca, A_loc));
5130     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5131     /* Since these are PETSc arrays, change flags to free them as necessary. */
5132     mat          = (Mat_SeqAIJ *)(*A_loc)->data;
5133     mat->free_a  = PETSC_TRUE;
5134     mat->free_ij = PETSC_TRUE;
5135     mat->nonew   = 0;
5136   } else if (scall == MAT_REUSE_MATRIX) {
5137     mat = (Mat_SeqAIJ *)(*A_loc)->data;
5138     ci  = mat->i;
5139     cj  = mat->j;
5140     PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &cam));
5141     for (i = 0; i < am; i++) {
5142       /* off-diagonal portion of A */
5143       ncols_o = bi[i + 1] - bi[i];
5144       for (jo = 0; jo < ncols_o; jo++) {
5145         col = cmap[*bj];
5146         if (col >= cstart) break;
5147         *cam++ = *ba++;
5148         bj++;
5149       }
5150       /* diagonal portion of A */
5151       ncols_d = ai[i + 1] - ai[i];
5152       for (j = 0; j < ncols_d; j++) *cam++ = *aa++;
5153       /* off-diagonal portion of A */
5154       for (j = jo; j < ncols_o; j++) {
5155         *cam++ = *ba++;
5156         bj++;
5157       }
5158     }
5159     PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &cam));
5160   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall);
5161   PetscCall(MatSeqAIJRestoreArrayRead(mpimat->A, &aav));
5162   PetscCall(MatSeqAIJRestoreArrayRead(mpimat->B, &bav));
5163   PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0));
5164   PetscFunctionReturn(0);
5165 }
5166 
5167 /*@
5168      MatMPIAIJGetLocalMatMerge - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with
5169           mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and offdiagonal part
5170 
5171     Not Collective
5172 
5173    Input Parameters:
5174 +    A - the matrix
5175 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5176 
5177    Output Parameters:
5178 +    glob - sequential IS with global indices associated with the columns of the local sequential matrix generated (can be NULL)
5179 -    A_loc - the local sequential matrix generated
5180 
5181     Level: developer
5182 
5183    Notes:
5184      This is different from MatMPIAIJGetLocalMat() since the first columns in the returning matrix are those associated with the diagonal part, then those associated with the offdiagonal part (in its local ordering)
5185 
5186 .seealso: `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`
5187 
5188 @*/
5189 PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A, MatReuse scall, IS *glob, Mat *A_loc) {
5190   Mat             Ao, Ad;
5191   const PetscInt *cmap;
5192   PetscMPIInt     size;
5193   PetscErrorCode (*f)(Mat, MatReuse, IS *, Mat *);
5194 
5195   PetscFunctionBegin;
5196   PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &cmap));
5197   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
5198   if (size == 1) {
5199     if (scall == MAT_INITIAL_MATRIX) {
5200       PetscCall(PetscObjectReference((PetscObject)Ad));
5201       *A_loc = Ad;
5202     } else if (scall == MAT_REUSE_MATRIX) {
5203       PetscCall(MatCopy(Ad, *A_loc, SAME_NONZERO_PATTERN));
5204     }
5205     if (glob) PetscCall(ISCreateStride(PetscObjectComm((PetscObject)Ad), Ad->cmap->n, Ad->cmap->rstart, 1, glob));
5206     PetscFunctionReturn(0);
5207   }
5208   PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIAIJGetLocalMatMerge_C", &f));
5209   PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0));
5210   if (f) {
5211     PetscCall((*f)(A, scall, glob, A_loc));
5212   } else {
5213     Mat_SeqAIJ        *a = (Mat_SeqAIJ *)Ad->data;
5214     Mat_SeqAIJ        *b = (Mat_SeqAIJ *)Ao->data;
5215     Mat_SeqAIJ        *c;
5216     PetscInt          *ai = a->i, *aj = a->j;
5217     PetscInt          *bi = b->i, *bj = b->j;
5218     PetscInt          *ci, *cj;
5219     const PetscScalar *aa, *ba;
5220     PetscScalar       *ca;
5221     PetscInt           i, j, am, dn, on;
5222 
5223     PetscCall(MatGetLocalSize(Ad, &am, &dn));
5224     PetscCall(MatGetLocalSize(Ao, NULL, &on));
5225     PetscCall(MatSeqAIJGetArrayRead(Ad, &aa));
5226     PetscCall(MatSeqAIJGetArrayRead(Ao, &ba));
5227     if (scall == MAT_INITIAL_MATRIX) {
5228       PetscInt k;
5229       PetscCall(PetscMalloc1(1 + am, &ci));
5230       PetscCall(PetscMalloc1(ai[am] + bi[am], &cj));
5231       PetscCall(PetscMalloc1(ai[am] + bi[am], &ca));
5232       ci[0] = 0;
5233       for (i = 0, k = 0; i < am; i++) {
5234         const PetscInt ncols_o = bi[i + 1] - bi[i];
5235         const PetscInt ncols_d = ai[i + 1] - ai[i];
5236         ci[i + 1]              = ci[i] + ncols_o + ncols_d;
5237         /* diagonal portion of A */
5238         for (j = 0; j < ncols_d; j++, k++) {
5239           cj[k] = *aj++;
5240           ca[k] = *aa++;
5241         }
5242         /* off-diagonal portion of A */
5243         for (j = 0; j < ncols_o; j++, k++) {
5244           cj[k] = dn + *bj++;
5245           ca[k] = *ba++;
5246         }
5247       }
5248       /* put together the new matrix */
5249       PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, dn + on, ci, cj, ca, A_loc));
5250       /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5251       /* Since these are PETSc arrays, change flags to free them as necessary. */
5252       c          = (Mat_SeqAIJ *)(*A_loc)->data;
5253       c->free_a  = PETSC_TRUE;
5254       c->free_ij = PETSC_TRUE;
5255       c->nonew   = 0;
5256       PetscCall(MatSetType(*A_loc, ((PetscObject)Ad)->type_name));
5257     } else if (scall == MAT_REUSE_MATRIX) {
5258       PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &ca));
5259       for (i = 0; i < am; i++) {
5260         const PetscInt ncols_d = ai[i + 1] - ai[i];
5261         const PetscInt ncols_o = bi[i + 1] - bi[i];
5262         /* diagonal portion of A */
5263         for (j = 0; j < ncols_d; j++) *ca++ = *aa++;
5264         /* off-diagonal portion of A */
5265         for (j = 0; j < ncols_o; j++) *ca++ = *ba++;
5266       }
5267       PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &ca));
5268     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall);
5269     PetscCall(MatSeqAIJRestoreArrayRead(Ad, &aa));
5270     PetscCall(MatSeqAIJRestoreArrayRead(Ao, &aa));
5271     if (glob) {
5272       PetscInt cst, *gidx;
5273 
5274       PetscCall(MatGetOwnershipRangeColumn(A, &cst, NULL));
5275       PetscCall(PetscMalloc1(dn + on, &gidx));
5276       for (i = 0; i < dn; i++) gidx[i] = cst + i;
5277       for (i = 0; i < on; i++) gidx[i + dn] = cmap[i];
5278       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)Ad), dn + on, gidx, PETSC_OWN_POINTER, glob));
5279     }
5280   }
5281   PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0));
5282   PetscFunctionReturn(0);
5283 }
5284 
5285 /*@C
5286      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns
5287 
5288     Not Collective
5289 
5290    Input Parameters:
5291 +    A - the matrix
5292 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5293 -    row, col - index sets of rows and columns to extract (or NULL)
5294 
5295    Output Parameter:
5296 .    A_loc - the local sequential matrix generated
5297 
5298     Level: developer
5299 
5300 .seealso: `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`
5301 
5302 @*/
5303 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A, MatReuse scall, IS *row, IS *col, Mat *A_loc) {
5304   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
5305   PetscInt    i, start, end, ncols, nzA, nzB, *cmap, imark, *idx;
5306   IS          isrowa, iscola;
5307   Mat        *aloc;
5308   PetscBool   match;
5309 
5310   PetscFunctionBegin;
5311   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &match));
5312   PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input");
5313   PetscCall(PetscLogEventBegin(MAT_Getlocalmatcondensed, A, 0, 0, 0));
5314   if (!row) {
5315     start = A->rmap->rstart;
5316     end   = A->rmap->rend;
5317     PetscCall(ISCreateStride(PETSC_COMM_SELF, end - start, start, 1, &isrowa));
5318   } else {
5319     isrowa = *row;
5320   }
5321   if (!col) {
5322     start = A->cmap->rstart;
5323     cmap  = a->garray;
5324     nzA   = a->A->cmap->n;
5325     nzB   = a->B->cmap->n;
5326     PetscCall(PetscMalloc1(nzA + nzB, &idx));
5327     ncols = 0;
5328     for (i = 0; i < nzB; i++) {
5329       if (cmap[i] < start) idx[ncols++] = cmap[i];
5330       else break;
5331     }
5332     imark = i;
5333     for (i = 0; i < nzA; i++) idx[ncols++] = start + i;
5334     for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i];
5335     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &iscola));
5336   } else {
5337     iscola = *col;
5338   }
5339   if (scall != MAT_INITIAL_MATRIX) {
5340     PetscCall(PetscMalloc1(1, &aloc));
5341     aloc[0] = *A_loc;
5342   }
5343   PetscCall(MatCreateSubMatrices(A, 1, &isrowa, &iscola, scall, &aloc));
5344   if (!col) { /* attach global id of condensed columns */
5345     PetscCall(PetscObjectCompose((PetscObject)aloc[0], "_petsc_GetLocalMatCondensed_iscol", (PetscObject)iscola));
5346   }
5347   *A_loc = aloc[0];
5348   PetscCall(PetscFree(aloc));
5349   if (!row) { PetscCall(ISDestroy(&isrowa)); }
5350   if (!col) { PetscCall(ISDestroy(&iscola)); }
5351   PetscCall(PetscLogEventEnd(MAT_Getlocalmatcondensed, A, 0, 0, 0));
5352   PetscFunctionReturn(0);
5353 }
5354 
5355 /*
5356  * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched.
5357  * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based
5358  * on a global size.
5359  * */
5360 PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P, IS rows, Mat *P_oth) {
5361   Mat_MPIAIJ            *p  = (Mat_MPIAIJ *)P->data;
5362   Mat_SeqAIJ            *pd = (Mat_SeqAIJ *)(p->A)->data, *po = (Mat_SeqAIJ *)(p->B)->data, *p_oth;
5363   PetscInt               plocalsize, nrows, *ilocal, *oilocal, i, lidx, *nrcols, *nlcols, ncol;
5364   PetscMPIInt            owner;
5365   PetscSFNode           *iremote, *oiremote;
5366   const PetscInt        *lrowindices;
5367   PetscSF                sf, osf;
5368   PetscInt               pcstart, *roffsets, *loffsets, *pnnz, j;
5369   PetscInt               ontotalcols, dntotalcols, ntotalcols, nout;
5370   MPI_Comm               comm;
5371   ISLocalToGlobalMapping mapping;
5372   const PetscScalar     *pd_a, *po_a;
5373 
5374   PetscFunctionBegin;
5375   PetscCall(PetscObjectGetComm((PetscObject)P, &comm));
5376   /* plocalsize is the number of roots
5377    * nrows is the number of leaves
5378    * */
5379   PetscCall(MatGetLocalSize(P, &plocalsize, NULL));
5380   PetscCall(ISGetLocalSize(rows, &nrows));
5381   PetscCall(PetscCalloc1(nrows, &iremote));
5382   PetscCall(ISGetIndices(rows, &lrowindices));
5383   for (i = 0; i < nrows; i++) {
5384     /* Find a remote index and an owner for a row
5385      * The row could be local or remote
5386      * */
5387     owner = 0;
5388     lidx  = 0;
5389     PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, &lidx));
5390     iremote[i].index = lidx;
5391     iremote[i].rank  = owner;
5392   }
5393   /* Create SF to communicate how many nonzero columns for each row */
5394   PetscCall(PetscSFCreate(comm, &sf));
5395   /* SF will figure out the number of nonzero colunms for each row, and their
5396    * offsets
5397    * */
5398   PetscCall(PetscSFSetGraph(sf, plocalsize, nrows, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
5399   PetscCall(PetscSFSetFromOptions(sf));
5400   PetscCall(PetscSFSetUp(sf));
5401 
5402   PetscCall(PetscCalloc1(2 * (plocalsize + 1), &roffsets));
5403   PetscCall(PetscCalloc1(2 * plocalsize, &nrcols));
5404   PetscCall(PetscCalloc1(nrows, &pnnz));
5405   roffsets[0] = 0;
5406   roffsets[1] = 0;
5407   for (i = 0; i < plocalsize; i++) {
5408     /* diag */
5409     nrcols[i * 2 + 0]         = pd->i[i + 1] - pd->i[i];
5410     /* off diag */
5411     nrcols[i * 2 + 1]         = po->i[i + 1] - po->i[i];
5412     /* compute offsets so that we relative location for each row */
5413     roffsets[(i + 1) * 2 + 0] = roffsets[i * 2 + 0] + nrcols[i * 2 + 0];
5414     roffsets[(i + 1) * 2 + 1] = roffsets[i * 2 + 1] + nrcols[i * 2 + 1];
5415   }
5416   PetscCall(PetscCalloc1(2 * nrows, &nlcols));
5417   PetscCall(PetscCalloc1(2 * nrows, &loffsets));
5418   /* 'r' means root, and 'l' means leaf */
5419   PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE));
5420   PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE));
5421   PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE));
5422   PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE));
5423   PetscCall(PetscSFDestroy(&sf));
5424   PetscCall(PetscFree(roffsets));
5425   PetscCall(PetscFree(nrcols));
5426   dntotalcols = 0;
5427   ontotalcols = 0;
5428   ncol        = 0;
5429   for (i = 0; i < nrows; i++) {
5430     pnnz[i] = nlcols[i * 2 + 0] + nlcols[i * 2 + 1];
5431     ncol    = PetscMax(pnnz[i], ncol);
5432     /* diag */
5433     dntotalcols += nlcols[i * 2 + 0];
5434     /* off diag */
5435     ontotalcols += nlcols[i * 2 + 1];
5436   }
5437   /* We do not need to figure the right number of columns
5438    * since all the calculations will be done by going through the raw data
5439    * */
5440   PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, nrows, ncol, 0, pnnz, P_oth));
5441   PetscCall(MatSetUp(*P_oth));
5442   PetscCall(PetscFree(pnnz));
5443   p_oth = (Mat_SeqAIJ *)(*P_oth)->data;
5444   /* diag */
5445   PetscCall(PetscCalloc1(dntotalcols, &iremote));
5446   /* off diag */
5447   PetscCall(PetscCalloc1(ontotalcols, &oiremote));
5448   /* diag */
5449   PetscCall(PetscCalloc1(dntotalcols, &ilocal));
5450   /* off diag */
5451   PetscCall(PetscCalloc1(ontotalcols, &oilocal));
5452   dntotalcols = 0;
5453   ontotalcols = 0;
5454   ntotalcols  = 0;
5455   for (i = 0; i < nrows; i++) {
5456     owner = 0;
5457     PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, NULL));
5458     /* Set iremote for diag matrix */
5459     for (j = 0; j < nlcols[i * 2 + 0]; j++) {
5460       iremote[dntotalcols].index = loffsets[i * 2 + 0] + j;
5461       iremote[dntotalcols].rank  = owner;
5462       /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */
5463       ilocal[dntotalcols++]      = ntotalcols++;
5464     }
5465     /* off diag */
5466     for (j = 0; j < nlcols[i * 2 + 1]; j++) {
5467       oiremote[ontotalcols].index = loffsets[i * 2 + 1] + j;
5468       oiremote[ontotalcols].rank  = owner;
5469       oilocal[ontotalcols++]      = ntotalcols++;
5470     }
5471   }
5472   PetscCall(ISRestoreIndices(rows, &lrowindices));
5473   PetscCall(PetscFree(loffsets));
5474   PetscCall(PetscFree(nlcols));
5475   PetscCall(PetscSFCreate(comm, &sf));
5476   /* P serves as roots and P_oth is leaves
5477    * Diag matrix
5478    * */
5479   PetscCall(PetscSFSetGraph(sf, pd->i[plocalsize], dntotalcols, ilocal, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
5480   PetscCall(PetscSFSetFromOptions(sf));
5481   PetscCall(PetscSFSetUp(sf));
5482 
5483   PetscCall(PetscSFCreate(comm, &osf));
5484   /* Off diag */
5485   PetscCall(PetscSFSetGraph(osf, po->i[plocalsize], ontotalcols, oilocal, PETSC_OWN_POINTER, oiremote, PETSC_OWN_POINTER));
5486   PetscCall(PetscSFSetFromOptions(osf));
5487   PetscCall(PetscSFSetUp(osf));
5488   PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a));
5489   PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a));
5490   /* We operate on the matrix internal data for saving memory */
5491   PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5492   PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5493   PetscCall(MatGetOwnershipRangeColumn(P, &pcstart, NULL));
5494   /* Convert to global indices for diag matrix */
5495   for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] += pcstart;
5496   PetscCall(PetscSFBcastBegin(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE));
5497   /* We want P_oth store global indices */
5498   PetscCall(ISLocalToGlobalMappingCreate(comm, 1, p->B->cmap->n, p->garray, PETSC_COPY_VALUES, &mapping));
5499   /* Use memory scalable approach */
5500   PetscCall(ISLocalToGlobalMappingSetType(mapping, ISLOCALTOGLOBALMAPPINGHASH));
5501   PetscCall(ISLocalToGlobalMappingApply(mapping, po->i[plocalsize], po->j, po->j));
5502   PetscCall(PetscSFBcastBegin(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE));
5503   PetscCall(PetscSFBcastEnd(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE));
5504   /* Convert back to local indices */
5505   for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] -= pcstart;
5506   PetscCall(PetscSFBcastEnd(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE));
5507   nout = 0;
5508   PetscCall(ISGlobalToLocalMappingApply(mapping, IS_GTOLM_DROP, po->i[plocalsize], po->j, &nout, po->j));
5509   PetscCheck(nout == po->i[plocalsize], comm, PETSC_ERR_ARG_INCOMP, "n %" PetscInt_FMT " does not equal to nout %" PetscInt_FMT " ", po->i[plocalsize], nout);
5510   PetscCall(ISLocalToGlobalMappingDestroy(&mapping));
5511   /* Exchange values */
5512   PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5513   PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5514   PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a));
5515   PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a));
5516   /* Stop PETSc from shrinking memory */
5517   for (i = 0; i < nrows; i++) p_oth->ilen[i] = p_oth->imax[i];
5518   PetscCall(MatAssemblyBegin(*P_oth, MAT_FINAL_ASSEMBLY));
5519   PetscCall(MatAssemblyEnd(*P_oth, MAT_FINAL_ASSEMBLY));
5520   /* Attach PetscSF objects to P_oth so that we can reuse it later */
5521   PetscCall(PetscObjectCompose((PetscObject)*P_oth, "diagsf", (PetscObject)sf));
5522   PetscCall(PetscObjectCompose((PetscObject)*P_oth, "offdiagsf", (PetscObject)osf));
5523   PetscCall(PetscSFDestroy(&sf));
5524   PetscCall(PetscSFDestroy(&osf));
5525   PetscFunctionReturn(0);
5526 }
5527 
5528 /*
5529  * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5530  * This supports MPIAIJ and MAIJ
5531  * */
5532 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A, Mat P, PetscInt dof, MatReuse reuse, Mat *P_oth) {
5533   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data, *p = (Mat_MPIAIJ *)P->data;
5534   Mat_SeqAIJ *p_oth;
5535   IS          rows, map;
5536   PetscHMapI  hamp;
5537   PetscInt    i, htsize, *rowindices, off, *mapping, key, count;
5538   MPI_Comm    comm;
5539   PetscSF     sf, osf;
5540   PetscBool   has;
5541 
5542   PetscFunctionBegin;
5543   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
5544   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, P, 0, 0));
5545   /* If it is the first time, create an index set of off-diag nonzero columns of A,
5546    *  and then create a submatrix (that often is an overlapping matrix)
5547    * */
5548   if (reuse == MAT_INITIAL_MATRIX) {
5549     /* Use a hash table to figure out unique keys */
5550     PetscCall(PetscHMapICreate(&hamp));
5551     PetscCall(PetscHMapIResize(hamp, a->B->cmap->n));
5552     PetscCall(PetscCalloc1(a->B->cmap->n, &mapping));
5553     count = 0;
5554     /* Assume that  a->g is sorted, otherwise the following does not make sense */
5555     for (i = 0; i < a->B->cmap->n; i++) {
5556       key = a->garray[i] / dof;
5557       PetscCall(PetscHMapIHas(hamp, key, &has));
5558       if (!has) {
5559         mapping[i] = count;
5560         PetscCall(PetscHMapISet(hamp, key, count++));
5561       } else {
5562         /* Current 'i' has the same value the previous step */
5563         mapping[i] = count - 1;
5564       }
5565     }
5566     PetscCall(ISCreateGeneral(comm, a->B->cmap->n, mapping, PETSC_OWN_POINTER, &map));
5567     PetscCall(PetscHMapIGetSize(hamp, &htsize));
5568     PetscCheck(htsize == count, comm, PETSC_ERR_ARG_INCOMP, " Size of hash map %" PetscInt_FMT " is inconsistent with count %" PetscInt_FMT " ", htsize, count);
5569     PetscCall(PetscCalloc1(htsize, &rowindices));
5570     off = 0;
5571     PetscCall(PetscHMapIGetKeys(hamp, &off, rowindices));
5572     PetscCall(PetscHMapIDestroy(&hamp));
5573     PetscCall(PetscSortInt(htsize, rowindices));
5574     PetscCall(ISCreateGeneral(comm, htsize, rowindices, PETSC_OWN_POINTER, &rows));
5575     /* In case, the matrix was already created but users want to recreate the matrix */
5576     PetscCall(MatDestroy(P_oth));
5577     PetscCall(MatCreateSeqSubMatrixWithRows_Private(P, rows, P_oth));
5578     PetscCall(PetscObjectCompose((PetscObject)*P_oth, "aoffdiagtopothmapping", (PetscObject)map));
5579     PetscCall(ISDestroy(&map));
5580     PetscCall(ISDestroy(&rows));
5581   } else if (reuse == MAT_REUSE_MATRIX) {
5582     /* If matrix was already created, we simply update values using SF objects
5583      * that as attached to the matrix ealier.
5584      */
5585     const PetscScalar *pd_a, *po_a;
5586 
5587     PetscCall(PetscObjectQuery((PetscObject)*P_oth, "diagsf", (PetscObject *)&sf));
5588     PetscCall(PetscObjectQuery((PetscObject)*P_oth, "offdiagsf", (PetscObject *)&osf));
5589     PetscCheck(sf && osf, comm, PETSC_ERR_ARG_NULL, "Matrix is not initialized yet");
5590     p_oth = (Mat_SeqAIJ *)(*P_oth)->data;
5591     /* Update values in place */
5592     PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a));
5593     PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a));
5594     PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5595     PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5596     PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5597     PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5598     PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a));
5599     PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a));
5600   } else SETERRQ(comm, PETSC_ERR_ARG_UNKNOWN_TYPE, "Unknown reuse type");
5601   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, P, 0, 0));
5602   PetscFunctionReturn(0);
5603 }
5604 
5605 /*@C
5606   MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5607 
5608   Collective on Mat
5609 
5610   Input Parameters:
5611 + A - the first matrix in mpiaij format
5612 . B - the second matrix in mpiaij format
5613 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5614 
5615   Output Parameters:
5616 + rowb - On input index sets of rows of B to extract (or NULL), modified on output
5617 . colb - On input index sets of columns of B to extract (or NULL), modified on output
5618 - B_seq - the sequential matrix generated
5619 
5620   Level: developer
5621 
5622 @*/
5623 PetscErrorCode MatGetBrowsOfAcols(Mat A, Mat B, MatReuse scall, IS *rowb, IS *colb, Mat *B_seq) {
5624   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
5625   PetscInt   *idx, i, start, ncols, nzA, nzB, *cmap, imark;
5626   IS          isrowb, iscolb;
5627   Mat        *bseq = NULL;
5628 
5629   PetscFunctionBegin;
5630   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5631     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend);
5632   }
5633   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAcols, A, B, 0, 0));
5634 
5635   if (scall == MAT_INITIAL_MATRIX) {
5636     start = A->cmap->rstart;
5637     cmap  = a->garray;
5638     nzA   = a->A->cmap->n;
5639     nzB   = a->B->cmap->n;
5640     PetscCall(PetscMalloc1(nzA + nzB, &idx));
5641     ncols = 0;
5642     for (i = 0; i < nzB; i++) { /* row < local row index */
5643       if (cmap[i] < start) idx[ncols++] = cmap[i];
5644       else break;
5645     }
5646     imark = i;
5647     for (i = 0; i < nzA; i++) idx[ncols++] = start + i;   /* local rows */
5648     for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5649     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &isrowb));
5650     PetscCall(ISCreateStride(PETSC_COMM_SELF, B->cmap->N, 0, 1, &iscolb));
5651   } else {
5652     PetscCheck(rowb && colb, PETSC_COMM_SELF, PETSC_ERR_SUP, "IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5653     isrowb = *rowb;
5654     iscolb = *colb;
5655     PetscCall(PetscMalloc1(1, &bseq));
5656     bseq[0] = *B_seq;
5657   }
5658   PetscCall(MatCreateSubMatrices(B, 1, &isrowb, &iscolb, scall, &bseq));
5659   *B_seq = bseq[0];
5660   PetscCall(PetscFree(bseq));
5661   if (!rowb) {
5662     PetscCall(ISDestroy(&isrowb));
5663   } else {
5664     *rowb = isrowb;
5665   }
5666   if (!colb) {
5667     PetscCall(ISDestroy(&iscolb));
5668   } else {
5669     *colb = iscolb;
5670   }
5671   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAcols, A, B, 0, 0));
5672   PetscFunctionReturn(0);
5673 }
5674 
5675 /*
5676     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5677     of the OFF-DIAGONAL portion of local A
5678 
5679     Collective on Mat
5680 
5681    Input Parameters:
5682 +    A,B - the matrices in mpiaij format
5683 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5684 
5685    Output Parameter:
5686 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5687 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5688 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5689 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5690 
5691     Developer Notes: This directly accesses information inside the VecScatter associated with the matrix-vector product
5692      for this matrix. This is not desirable..
5693 
5694     Level: developer
5695 
5696 */
5697 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A, Mat B, MatReuse scall, PetscInt **startsj_s, PetscInt **startsj_r, MatScalar **bufa_ptr, Mat *B_oth) {
5698   Mat_MPIAIJ        *a = (Mat_MPIAIJ *)A->data;
5699   Mat_SeqAIJ        *b_oth;
5700   VecScatter         ctx;
5701   MPI_Comm           comm;
5702   const PetscMPIInt *rprocs, *sprocs;
5703   const PetscInt    *srow, *rstarts, *sstarts;
5704   PetscInt          *rowlen, *bufj, *bufJ, ncols = 0, aBn = a->B->cmap->n, row, *b_othi, *b_othj, *rvalues = NULL, *svalues = NULL, *cols, sbs, rbs;
5705   PetscInt           i, j, k = 0, l, ll, nrecvs, nsends, nrows, *rstartsj = NULL, *sstartsj, len;
5706   PetscScalar       *b_otha, *bufa, *bufA, *vals = NULL;
5707   MPI_Request       *reqs = NULL, *rwaits = NULL, *swaits = NULL;
5708   PetscMPIInt        size, tag, rank, nreqs;
5709 
5710   PetscFunctionBegin;
5711   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
5712   PetscCallMPI(MPI_Comm_size(comm, &size));
5713 
5714   if (PetscUnlikely(A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend)) {
5715     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend);
5716   }
5717   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, B, 0, 0));
5718   PetscCallMPI(MPI_Comm_rank(comm, &rank));
5719 
5720   if (size == 1) {
5721     startsj_s = NULL;
5722     bufa_ptr  = NULL;
5723     *B_oth    = NULL;
5724     PetscFunctionReturn(0);
5725   }
5726 
5727   ctx = a->Mvctx;
5728   tag = ((PetscObject)ctx)->tag;
5729 
5730   PetscCall(VecScatterGetRemote_Private(ctx, PETSC_TRUE /*send*/, &nsends, &sstarts, &srow, &sprocs, &sbs));
5731   /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */
5732   PetscCall(VecScatterGetRemoteOrdered_Private(ctx, PETSC_FALSE /*recv*/, &nrecvs, &rstarts, NULL /*indices not needed*/, &rprocs, &rbs));
5733   PetscCall(PetscMPIIntCast(nsends + nrecvs, &nreqs));
5734   PetscCall(PetscMalloc1(nreqs, &reqs));
5735   rwaits = reqs;
5736   swaits = reqs + nrecvs;
5737 
5738   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5739   if (scall == MAT_INITIAL_MATRIX) {
5740     /* i-array */
5741     /*---------*/
5742     /*  post receives */
5743     if (nrecvs) PetscCall(PetscMalloc1(rbs * (rstarts[nrecvs] - rstarts[0]), &rvalues)); /* rstarts can be NULL when nrecvs=0 */
5744     for (i = 0; i < nrecvs; i++) {
5745       rowlen = rvalues + rstarts[i] * rbs;
5746       nrows  = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of indices to be received */
5747       PetscCallMPI(MPI_Irecv(rowlen, nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i));
5748     }
5749 
5750     /* pack the outgoing message */
5751     PetscCall(PetscMalloc2(nsends + 1, &sstartsj, nrecvs + 1, &rstartsj));
5752 
5753     sstartsj[0] = 0;
5754     rstartsj[0] = 0;
5755     len         = 0; /* total length of j or a array to be sent */
5756     if (nsends) {
5757       k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */
5758       PetscCall(PetscMalloc1(sbs * (sstarts[nsends] - sstarts[0]), &svalues));
5759     }
5760     for (i = 0; i < nsends; i++) {
5761       rowlen = svalues + (sstarts[i] - sstarts[0]) * sbs;
5762       nrows  = sstarts[i + 1] - sstarts[i]; /* num of block rows */
5763       for (j = 0; j < nrows; j++) {
5764         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5765         for (l = 0; l < sbs; l++) {
5766           PetscCall(MatGetRow_MPIAIJ(B, row + l, &ncols, NULL, NULL)); /* rowlength */
5767 
5768           rowlen[j * sbs + l] = ncols;
5769 
5770           len += ncols;
5771           PetscCall(MatRestoreRow_MPIAIJ(B, row + l, &ncols, NULL, NULL));
5772         }
5773         k++;
5774       }
5775       PetscCallMPI(MPI_Isend(rowlen, nrows * sbs, MPIU_INT, sprocs[i], tag, comm, swaits + i));
5776 
5777       sstartsj[i + 1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5778     }
5779     /* recvs and sends of i-array are completed */
5780     if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE));
5781     PetscCall(PetscFree(svalues));
5782 
5783     /* allocate buffers for sending j and a arrays */
5784     PetscCall(PetscMalloc1(len + 1, &bufj));
5785     PetscCall(PetscMalloc1(len + 1, &bufa));
5786 
5787     /* create i-array of B_oth */
5788     PetscCall(PetscMalloc1(aBn + 2, &b_othi));
5789 
5790     b_othi[0] = 0;
5791     len       = 0; /* total length of j or a array to be received */
5792     k         = 0;
5793     for (i = 0; i < nrecvs; i++) {
5794       rowlen = rvalues + (rstarts[i] - rstarts[0]) * rbs;
5795       nrows  = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of rows to be received */
5796       for (j = 0; j < nrows; j++) {
5797         b_othi[k + 1] = b_othi[k] + rowlen[j];
5798         PetscCall(PetscIntSumError(rowlen[j], len, &len));
5799         k++;
5800       }
5801       rstartsj[i + 1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5802     }
5803     PetscCall(PetscFree(rvalues));
5804 
5805     /* allocate space for j and a arrays of B_oth */
5806     PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_othj));
5807     PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_otha));
5808 
5809     /* j-array */
5810     /*---------*/
5811     /*  post receives of j-array */
5812     for (i = 0; i < nrecvs; i++) {
5813       nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */
5814       PetscCallMPI(MPI_Irecv(b_othj + rstartsj[i], nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i));
5815     }
5816 
5817     /* pack the outgoing message j-array */
5818     if (nsends) k = sstarts[0];
5819     for (i = 0; i < nsends; i++) {
5820       nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */
5821       bufJ  = bufj + sstartsj[i];
5822       for (j = 0; j < nrows; j++) {
5823         row = srow[k++] + B->rmap->range[rank]; /* global row idx */
5824         for (ll = 0; ll < sbs; ll++) {
5825           PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL));
5826           for (l = 0; l < ncols; l++) { *bufJ++ = cols[l]; }
5827           PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL));
5828         }
5829       }
5830       PetscCallMPI(MPI_Isend(bufj + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_INT, sprocs[i], tag, comm, swaits + i));
5831     }
5832 
5833     /* recvs and sends of j-array are completed */
5834     if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE));
5835   } else if (scall == MAT_REUSE_MATRIX) {
5836     sstartsj = *startsj_s;
5837     rstartsj = *startsj_r;
5838     bufa     = *bufa_ptr;
5839     b_oth    = (Mat_SeqAIJ *)(*B_oth)->data;
5840     PetscCall(MatSeqAIJGetArrayWrite(*B_oth, &b_otha));
5841   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not possess an object container");
5842 
5843   /* a-array */
5844   /*---------*/
5845   /*  post receives of a-array */
5846   for (i = 0; i < nrecvs; i++) {
5847     nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */
5848     PetscCallMPI(MPI_Irecv(b_otha + rstartsj[i], nrows, MPIU_SCALAR, rprocs[i], tag, comm, rwaits + i));
5849   }
5850 
5851   /* pack the outgoing message a-array */
5852   if (nsends) k = sstarts[0];
5853   for (i = 0; i < nsends; i++) {
5854     nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */
5855     bufA  = bufa + sstartsj[i];
5856     for (j = 0; j < nrows; j++) {
5857       row = srow[k++] + B->rmap->range[rank]; /* global row idx */
5858       for (ll = 0; ll < sbs; ll++) {
5859         PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals));
5860         for (l = 0; l < ncols; l++) { *bufA++ = vals[l]; }
5861         PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals));
5862       }
5863     }
5864     PetscCallMPI(MPI_Isend(bufa + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_SCALAR, sprocs[i], tag, comm, swaits + i));
5865   }
5866   /* recvs and sends of a-array are completed */
5867   if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE));
5868   PetscCall(PetscFree(reqs));
5869 
5870   if (scall == MAT_INITIAL_MATRIX) {
5871     /* put together the new matrix */
5872     PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, aBn, B->cmap->N, b_othi, b_othj, b_otha, B_oth));
5873 
5874     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5875     /* Since these are PETSc arrays, change flags to free them as necessary. */
5876     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
5877     b_oth->free_a  = PETSC_TRUE;
5878     b_oth->free_ij = PETSC_TRUE;
5879     b_oth->nonew   = 0;
5880 
5881     PetscCall(PetscFree(bufj));
5882     if (!startsj_s || !bufa_ptr) {
5883       PetscCall(PetscFree2(sstartsj, rstartsj));
5884       PetscCall(PetscFree(bufa_ptr));
5885     } else {
5886       *startsj_s = sstartsj;
5887       *startsj_r = rstartsj;
5888       *bufa_ptr  = bufa;
5889     }
5890   } else if (scall == MAT_REUSE_MATRIX) {
5891     PetscCall(MatSeqAIJRestoreArrayWrite(*B_oth, &b_otha));
5892   }
5893 
5894   PetscCall(VecScatterRestoreRemote_Private(ctx, PETSC_TRUE, &nsends, &sstarts, &srow, &sprocs, &sbs));
5895   PetscCall(VecScatterRestoreRemoteOrdered_Private(ctx, PETSC_FALSE, &nrecvs, &rstarts, NULL, &rprocs, &rbs));
5896   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, B, 0, 0));
5897   PetscFunctionReturn(0);
5898 }
5899 
5900 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat, MatType, MatReuse, Mat *);
5901 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat, MatType, MatReuse, Mat *);
5902 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat, MatType, MatReuse, Mat *);
5903 #if defined(PETSC_HAVE_MKL_SPARSE)
5904 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat, MatType, MatReuse, Mat *);
5905 #endif
5906 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat, MatType, MatReuse, Mat *);
5907 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat, MatType, MatReuse, Mat *);
5908 #if defined(PETSC_HAVE_ELEMENTAL)
5909 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat, MatType, MatReuse, Mat *);
5910 #endif
5911 #if defined(PETSC_HAVE_SCALAPACK)
5912 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat, MatType, MatReuse, Mat *);
5913 #endif
5914 #if defined(PETSC_HAVE_HYPRE)
5915 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *);
5916 #endif
5917 #if defined(PETSC_HAVE_CUDA)
5918 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat, MatType, MatReuse, Mat *);
5919 #endif
5920 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
5921 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat, MatType, MatReuse, Mat *);
5922 #endif
5923 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat, MatType, MatReuse, Mat *);
5924 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *);
5925 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat);
5926 
5927 /*
5928     Computes (B'*A')' since computing B*A directly is untenable
5929 
5930                n                       p                          p
5931         [             ]       [             ]         [                 ]
5932       m [      A      ]  *  n [       B     ]   =   m [         C       ]
5933         [             ]       [             ]         [                 ]
5934 
5935 */
5936 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A, Mat B, Mat C) {
5937   Mat At, Bt, Ct;
5938 
5939   PetscFunctionBegin;
5940   PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &At));
5941   PetscCall(MatTranspose(B, MAT_INITIAL_MATRIX, &Bt));
5942   PetscCall(MatMatMult(Bt, At, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &Ct));
5943   PetscCall(MatDestroy(&At));
5944   PetscCall(MatDestroy(&Bt));
5945   PetscCall(MatTransposeSetPrecursor(Ct, C));
5946   PetscCall(MatTranspose(Ct, MAT_REUSE_MATRIX, &C));
5947   PetscCall(MatDestroy(&Ct));
5948   PetscFunctionReturn(0);
5949 }
5950 
5951 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A, Mat B, PetscReal fill, Mat C) {
5952   PetscBool cisdense;
5953 
5954   PetscFunctionBegin;
5955   PetscCheck(A->cmap->n == B->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "A->cmap->n %" PetscInt_FMT " != B->rmap->n %" PetscInt_FMT, A->cmap->n, B->rmap->n);
5956   PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, A->rmap->N, B->cmap->N));
5957   PetscCall(MatSetBlockSizesFromMats(C, A, B));
5958   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, ""));
5959   if (!cisdense) { PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); }
5960   PetscCall(MatSetUp(C));
5961 
5962   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5963   PetscFunctionReturn(0);
5964 }
5965 
5966 /* ----------------------------------------------------------------*/
5967 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C) {
5968   Mat_Product *product = C->product;
5969   Mat          A = product->A, B = product->B;
5970 
5971   PetscFunctionBegin;
5972   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend)
5973     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend);
5974 
5975   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ;
5976   C->ops->productsymbolic = MatProductSymbolic_AB;
5977   PetscFunctionReturn(0);
5978 }
5979 
5980 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C) {
5981   Mat_Product *product = C->product;
5982 
5983   PetscFunctionBegin;
5984   if (product->type == MATPRODUCT_AB) { PetscCall(MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C)); }
5985   PetscFunctionReturn(0);
5986 }
5987 
5988 /* Merge two sets of sorted nonzeros and return a CSR for the merged (sequential) matrix
5989 
5990   Input Parameters:
5991 
5992     j1,rowBegin1,rowEnd1,perm1,jmap1: describe the first set of nonzeros (Set1)
5993     j2,rowBegin2,rowEnd2,perm2,jmap2: describe the second set of nonzeros (Set2)
5994 
5995     mat: both sets' nonzeros are on m rows, where m is the number of local rows of the matrix mat
5996 
5997     For Set1, j1[] contains column indices of the nonzeros.
5998     For the k-th row (0<=k<m), [rowBegin1[k],rowEnd1[k]) index into j1[] and point to the begin/end nonzero in row k
5999     respectively (note rowEnd1[k] is not necessarily equal to rwoBegin1[k+1]). Indices in this range of j1[] are sorted,
6000     but might have repeats. jmap1[t+1] - jmap1[t] is the number of repeats for the t-th unique nonzero in Set1.
6001 
6002     Similar for Set2.
6003 
6004     This routine merges the two sets of nonzeros row by row and removes repeats.
6005 
6006   Output Parameters: (memory is allocated by the caller)
6007 
6008     i[],j[]: the CSR of the merged matrix, which has m rows.
6009     imap1[]: the k-th unique nonzero in Set1 (k=0,1,...) corresponds to imap1[k]-th unique nonzero in the merged matrix.
6010     imap2[]: similar to imap1[], but for Set2.
6011     Note we order nonzeros row-by-row and from left to right.
6012 */
6013 static PetscErrorCode MatMergeEntries_Internal(Mat mat, const PetscInt j1[], const PetscInt j2[], const PetscCount rowBegin1[], const PetscCount rowEnd1[], const PetscCount rowBegin2[], const PetscCount rowEnd2[], const PetscCount jmap1[], const PetscCount jmap2[], PetscCount imap1[], PetscCount imap2[], PetscInt i[], PetscInt j[]) {
6014   PetscInt   r, m; /* Row index of mat */
6015   PetscCount t, t1, t2, b1, e1, b2, e2;
6016 
6017   PetscFunctionBegin;
6018   PetscCall(MatGetLocalSize(mat, &m, NULL));
6019   t1 = t2 = t = 0; /* Count unique nonzeros of in Set1, Set1 and the merged respectively */
6020   i[0]        = 0;
6021   for (r = 0; r < m; r++) { /* Do row by row merging */
6022     b1 = rowBegin1[r];
6023     e1 = rowEnd1[r];
6024     b2 = rowBegin2[r];
6025     e2 = rowEnd2[r];
6026     while (b1 < e1 && b2 < e2) {
6027       if (j1[b1] == j2[b2]) { /* Same column index and hence same nonzero */
6028         j[t]      = j1[b1];
6029         imap1[t1] = t;
6030         imap2[t2] = t;
6031         b1 += jmap1[t1 + 1] - jmap1[t1]; /* Jump to next unique local nonzero */
6032         b2 += jmap2[t2 + 1] - jmap2[t2]; /* Jump to next unique remote nonzero */
6033         t1++;
6034         t2++;
6035         t++;
6036       } else if (j1[b1] < j2[b2]) {
6037         j[t]      = j1[b1];
6038         imap1[t1] = t;
6039         b1 += jmap1[t1 + 1] - jmap1[t1];
6040         t1++;
6041         t++;
6042       } else {
6043         j[t]      = j2[b2];
6044         imap2[t2] = t;
6045         b2 += jmap2[t2 + 1] - jmap2[t2];
6046         t2++;
6047         t++;
6048       }
6049     }
6050     /* Merge the remaining in either j1[] or j2[] */
6051     while (b1 < e1) {
6052       j[t]      = j1[b1];
6053       imap1[t1] = t;
6054       b1 += jmap1[t1 + 1] - jmap1[t1];
6055       t1++;
6056       t++;
6057     }
6058     while (b2 < e2) {
6059       j[t]      = j2[b2];
6060       imap2[t2] = t;
6061       b2 += jmap2[t2 + 1] - jmap2[t2];
6062       t2++;
6063       t++;
6064     }
6065     i[r + 1] = t;
6066   }
6067   PetscFunctionReturn(0);
6068 }
6069 
6070 /* Split nonzeros in a block of local rows into two subsets: those in the diagonal block and those in the off-diagonal block
6071 
6072   Input Parameters:
6073     mat: an MPI matrix that provides row and column layout information for splitting. Let's say its number of local rows is m.
6074     n,i[],j[],perm[]: there are n input entries, belonging to m rows. Row/col indices of the entries are stored in i[] and j[]
6075       respectively, along with a permutation array perm[]. Length of the i[],j[],perm[] arrays is n.
6076 
6077       i[] is already sorted, but within a row, j[] is not sorted and might have repeats.
6078       i[] might contain negative indices at the beginning, which means the corresponding entries should be ignored in the splitting.
6079 
6080   Output Parameters:
6081     j[],perm[]: the routine needs to sort j[] within each row along with perm[].
6082     rowBegin[],rowMid[],rowEnd[]: of length m, and the memory is preallocated and zeroed by the caller.
6083       They contain indices pointing to j[]. For 0<=r<m, [rowBegin[r],rowMid[r]) point to begin/end entries of row r of the diagonal block,
6084       and [rowMid[r],rowEnd[r]) point to begin/end entries of row r of the off-diagonal block.
6085 
6086     Aperm[],Ajmap[],Atot,Annz: Arrays are allocated by this routine.
6087       Atot: number of entries belonging to the diagonal block.
6088       Annz: number of unique nonzeros belonging to the diagonal block.
6089       Aperm[Atot] stores values from perm[] for entries belonging to the diagonal block. Length of Aperm[] is Atot, though it may also count
6090         repeats (i.e., same 'i,j' pair).
6091       Ajmap[Annz+1] stores the number of repeats of each unique entry belonging to the diagonal block. More precisely, Ajmap[t+1] - Ajmap[t]
6092         is the number of repeats for the t-th unique entry in the diagonal block. Ajmap[0] is always 0.
6093 
6094       Atot: number of entries belonging to the diagonal block
6095       Annz: number of unique nonzeros belonging to the diagonal block.
6096 
6097     Bperm[], Bjmap[], Btot, Bnnz are similar but for the off-diagonal block.
6098 
6099     Aperm[],Bperm[],Ajmap[] and Bjmap[] are allocated separately by this routine with PetscMalloc1().
6100 */
6101 static PetscErrorCode MatSplitEntries_Internal(Mat mat, PetscCount n, const PetscInt i[], PetscInt j[], PetscCount perm[], PetscCount rowBegin[], PetscCount rowMid[], PetscCount rowEnd[], PetscCount *Atot_, PetscCount **Aperm_, PetscCount *Annz_, PetscCount **Ajmap_, PetscCount *Btot_, PetscCount **Bperm_, PetscCount *Bnnz_, PetscCount **Bjmap_) {
6102   PetscInt    cstart, cend, rstart, rend, row, col;
6103   PetscCount  Atot = 0, Btot = 0; /* Total number of nonzeros in the diagonal and off-diagonal blocks */
6104   PetscCount  Annz = 0, Bnnz = 0; /* Number of unique nonzeros in the diagonal and off-diagonal blocks */
6105   PetscCount  k, m, p, q, r, s, mid;
6106   PetscCount *Aperm, *Bperm, *Ajmap, *Bjmap;
6107 
6108   PetscFunctionBegin;
6109   PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend));
6110   PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend));
6111   m = rend - rstart;
6112 
6113   for (k = 0; k < n; k++) {
6114     if (i[k] >= 0) break;
6115   } /* Skip negative rows */
6116 
6117   /* Process [k,n): sort and partition each local row into diag and offdiag portions,
6118      fill rowBegin[], rowMid[], rowEnd[], and count Atot, Btot, Annz, Bnnz.
6119   */
6120   while (k < n) {
6121     row = i[k];
6122     /* Entries in [k,s) are in one row. Shift diagonal block col indices so that diag is ahead of offdiag after sorting the row */
6123     for (s = k; s < n; s++)
6124       if (i[s] != row) break;
6125     for (p = k; p < s; p++) {
6126       if (j[p] >= cstart && j[p] < cend) j[p] -= PETSC_MAX_INT; /* Shift diag columns to range of [-PETSC_MAX_INT, -1]  */
6127       else PetscAssert((j[p] >= 0) && (j[p] <= mat->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column index %" PetscInt_FMT " is out of range", j[p]);
6128     }
6129     PetscCall(PetscSortIntWithCountArray(s - k, j + k, perm + k));
6130     PetscCall(PetscSortedIntUpperBound(j, k, s, -1, &mid)); /* Separate [k,s) into [k,mid) for diag and [mid,s) for offdiag */
6131     rowBegin[row - rstart] = k;
6132     rowMid[row - rstart]   = mid;
6133     rowEnd[row - rstart]   = s;
6134 
6135     /* Count nonzeros of this diag/offdiag row, which might have repeats */
6136     Atot += mid - k;
6137     Btot += s - mid;
6138 
6139     /* Count unique nonzeros of this diag/offdiag row */
6140     for (p = k; p < mid;) {
6141       col = j[p];
6142       do {
6143         j[p] += PETSC_MAX_INT;
6144         p++;
6145       } while (p < mid && j[p] == col); /* Revert the modified diagonal indices */
6146       Annz++;
6147     }
6148 
6149     for (p = mid; p < s;) {
6150       col = j[p];
6151       do { p++; } while (p < s && j[p] == col);
6152       Bnnz++;
6153     }
6154     k = s;
6155   }
6156 
6157   /* Allocation according to Atot, Btot, Annz, Bnnz */
6158   PetscCall(PetscMalloc1(Atot, &Aperm));
6159   PetscCall(PetscMalloc1(Btot, &Bperm));
6160   PetscCall(PetscMalloc1(Annz + 1, &Ajmap));
6161   PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap));
6162 
6163   /* Re-scan indices and copy diag/offdiag permutation indices to Aperm, Bperm and also fill Ajmap and Bjmap */
6164   Ajmap[0] = Bjmap[0] = Atot = Btot = Annz = Bnnz = 0;
6165   for (r = 0; r < m; r++) {
6166     k   = rowBegin[r];
6167     mid = rowMid[r];
6168     s   = rowEnd[r];
6169     PetscCall(PetscArraycpy(Aperm + Atot, perm + k, mid - k));
6170     PetscCall(PetscArraycpy(Bperm + Btot, perm + mid, s - mid));
6171     Atot += mid - k;
6172     Btot += s - mid;
6173 
6174     /* Scan column indices in this row and find out how many repeats each unique nonzero has */
6175     for (p = k; p < mid;) {
6176       col = j[p];
6177       q   = p;
6178       do { p++; } while (p < mid && j[p] == col);
6179       Ajmap[Annz + 1] = Ajmap[Annz] + (p - q);
6180       Annz++;
6181     }
6182 
6183     for (p = mid; p < s;) {
6184       col = j[p];
6185       q   = p;
6186       do { p++; } while (p < s && j[p] == col);
6187       Bjmap[Bnnz + 1] = Bjmap[Bnnz] + (p - q);
6188       Bnnz++;
6189     }
6190   }
6191   /* Output */
6192   *Aperm_ = Aperm;
6193   *Annz_  = Annz;
6194   *Atot_  = Atot;
6195   *Ajmap_ = Ajmap;
6196   *Bperm_ = Bperm;
6197   *Bnnz_  = Bnnz;
6198   *Btot_  = Btot;
6199   *Bjmap_ = Bjmap;
6200   PetscFunctionReturn(0);
6201 }
6202 
6203 /* Expand the jmap[] array to make a new one in view of nonzeros in the merged matrix
6204 
6205   Input Parameters:
6206     nnz1: number of unique nonzeros in a set that was used to produce imap[], jmap[]
6207     nnz:  number of unique nonzeros in the merged matrix
6208     imap[nnz1]: i-th nonzero in the set is the imap[i]-th nonzero in the merged matrix
6209     jmap[nnz1+1]: i-th nonzeron in the set has jmap[i+1] - jmap[i] repeats in the set
6210 
6211   Output Parameter: (memory is allocated by the caller)
6212     jmap_new[nnz+1]: i-th nonzero in the merged matrix has jmap_new[i+1] - jmap_new[i] repeats in the set
6213 
6214   Example:
6215     nnz1 = 4
6216     nnz  = 6
6217     imap = [1,3,4,5]
6218     jmap = [0,3,5,6,7]
6219    then,
6220     jmap_new = [0,0,3,3,5,6,7]
6221 */
6222 static PetscErrorCode ExpandJmap_Internal(PetscCount nnz1, PetscCount nnz, const PetscCount imap[], const PetscCount jmap[], PetscCount jmap_new[]) {
6223   PetscCount k, p;
6224 
6225   PetscFunctionBegin;
6226   jmap_new[0] = 0;
6227   p           = nnz;                /* p loops over jmap_new[] backwards */
6228   for (k = nnz1 - 1; k >= 0; k--) { /* k loops over imap[] */
6229     for (; p > imap[k]; p--) jmap_new[p] = jmap[k + 1];
6230   }
6231   for (; p >= 0; p--) jmap_new[p] = jmap[0];
6232   PetscFunctionReturn(0);
6233 }
6234 
6235 PetscErrorCode MatSetPreallocationCOO_MPIAIJ(Mat mat, PetscCount coo_n, PetscInt coo_i[], PetscInt coo_j[]) {
6236   MPI_Comm    comm;
6237   PetscMPIInt rank, size;
6238   PetscInt    m, n, M, N, rstart, rend, cstart, cend; /* Sizes, indices of row/col, therefore with type PetscInt */
6239   PetscCount  k, p, q, rem;                           /* Loop variables over coo arrays */
6240   Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)mat->data;
6241 
6242   PetscFunctionBegin;
6243   PetscCall(PetscFree(mpiaij->garray));
6244   PetscCall(VecDestroy(&mpiaij->lvec));
6245 #if defined(PETSC_USE_CTABLE)
6246   PetscCall(PetscTableDestroy(&mpiaij->colmap));
6247 #else
6248   PetscCall(PetscFree(mpiaij->colmap));
6249 #endif
6250   PetscCall(VecScatterDestroy(&mpiaij->Mvctx));
6251   mat->assembled     = PETSC_FALSE;
6252   mat->was_assembled = PETSC_FALSE;
6253   PetscCall(MatResetPreallocationCOO_MPIAIJ(mat));
6254 
6255   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
6256   PetscCallMPI(MPI_Comm_size(comm, &size));
6257   PetscCallMPI(MPI_Comm_rank(comm, &rank));
6258   PetscCall(PetscLayoutSetUp(mat->rmap));
6259   PetscCall(PetscLayoutSetUp(mat->cmap));
6260   PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend));
6261   PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend));
6262   PetscCall(MatGetLocalSize(mat, &m, &n));
6263   PetscCall(MatGetSize(mat, &M, &N));
6264 
6265   /* ---------------------------------------------------------------------------*/
6266   /* Sort (i,j) by row along with a permutation array, so that the to-be-ignored */
6267   /* entries come first, then local rows, then remote rows.                     */
6268   /* ---------------------------------------------------------------------------*/
6269   PetscCount n1 = coo_n, *perm1;
6270   PetscInt  *i1 = coo_i, *j1 = coo_j;
6271 
6272   PetscCall(PetscMalloc1(n1, &perm1));
6273   for (k = 0; k < n1; k++) perm1[k] = k;
6274 
6275   /* Manipulate indices so that entries with negative row or col indices will have smallest
6276      row indices, local entries will have greater but negative row indices, and remote entries
6277      will have positive row indices.
6278   */
6279   for (k = 0; k < n1; k++) {
6280     if (i1[k] < 0 || j1[k] < 0) i1[k] = PETSC_MIN_INT;                /* e.g., -2^31, minimal to move them ahead */
6281     else if (i1[k] >= rstart && i1[k] < rend) i1[k] -= PETSC_MAX_INT; /* e.g., minus 2^31-1 to shift local rows to range of [-PETSC_MAX_INT, -1] */
6282     else {
6283       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_USER_INPUT, "MAT_NO_OFF_PROC_ENTRIES is set but insert to remote rows");
6284       if (mpiaij->donotstash) i1[k] = PETSC_MIN_INT; /* Ignore offproc entries as if they had negative indices */
6285     }
6286   }
6287 
6288   /* Sort by row; after that, [0,k) have ignored entires, [k,rem) have local rows and [rem,n1) have remote rows */
6289   PetscCall(PetscSortIntWithIntCountArrayPair(n1, i1, j1, perm1));
6290   for (k = 0; k < n1; k++) {
6291     if (i1[k] > PETSC_MIN_INT) break;
6292   }                                                                               /* Advance k to the first entry we need to take care of */
6293   PetscCall(PetscSortedIntUpperBound(i1, k, n1, rend - 1 - PETSC_MAX_INT, &rem)); /* rem is upper bound of the last local row */
6294   for (; k < rem; k++) i1[k] += PETSC_MAX_INT;                                    /* Revert row indices of local rows*/
6295 
6296   /* ---------------------------------------------------------------------------*/
6297   /*           Split local rows into diag/offdiag portions                      */
6298   /* ---------------------------------------------------------------------------*/
6299   PetscCount *rowBegin1, *rowMid1, *rowEnd1;
6300   PetscCount *Ajmap1, *Aperm1, *Bjmap1, *Bperm1, *Cperm1;
6301   PetscCount  Annz1, Bnnz1, Atot1, Btot1;
6302 
6303   PetscCall(PetscCalloc3(m, &rowBegin1, m, &rowMid1, m, &rowEnd1));
6304   PetscCall(PetscMalloc1(n1 - rem, &Cperm1));
6305   PetscCall(MatSplitEntries_Internal(mat, rem, i1, j1, perm1, rowBegin1, rowMid1, rowEnd1, &Atot1, &Aperm1, &Annz1, &Ajmap1, &Btot1, &Bperm1, &Bnnz1, &Bjmap1));
6306 
6307   /* ---------------------------------------------------------------------------*/
6308   /*           Send remote rows to their owner                                  */
6309   /* ---------------------------------------------------------------------------*/
6310   /* Find which rows should be sent to which remote ranks*/
6311   PetscInt        nsend = 0; /* Number of MPI ranks to send data to */
6312   PetscMPIInt    *sendto;    /* [nsend], storing remote ranks */
6313   PetscInt       *nentries;  /* [nsend], storing number of entries sent to remote ranks; Assume PetscInt is big enough for this count, and error if not */
6314   const PetscInt *ranges;
6315   PetscInt        maxNsend = size >= 128 ? 128 : size; /* Assume max 128 neighbors; realloc when needed */
6316 
6317   PetscCall(PetscLayoutGetRanges(mat->rmap, &ranges));
6318   PetscCall(PetscMalloc2(maxNsend, &sendto, maxNsend, &nentries));
6319   for (k = rem; k < n1;) {
6320     PetscMPIInt owner;
6321     PetscInt    firstRow, lastRow;
6322 
6323     /* Locate a row range */
6324     firstRow = i1[k]; /* first row of this owner */
6325     PetscCall(PetscLayoutFindOwner(mat->rmap, firstRow, &owner));
6326     lastRow = ranges[owner + 1] - 1; /* last row of this owner */
6327 
6328     /* Find the first index 'p' in [k,n) with i[p] belonging to next owner */
6329     PetscCall(PetscSortedIntUpperBound(i1, k, n1, lastRow, &p));
6330 
6331     /* All entries in [k,p) belong to this remote owner */
6332     if (nsend >= maxNsend) { /* Double the remote ranks arrays if not long enough */
6333       PetscMPIInt *sendto2;
6334       PetscInt    *nentries2;
6335       PetscInt     maxNsend2 = (maxNsend <= size / 2) ? maxNsend * 2 : size;
6336 
6337       PetscCall(PetscMalloc2(maxNsend2, &sendto2, maxNsend2, &nentries2));
6338       PetscCall(PetscArraycpy(sendto2, sendto, maxNsend));
6339       PetscCall(PetscArraycpy(nentries2, nentries2, maxNsend + 1));
6340       PetscCall(PetscFree2(sendto, nentries2));
6341       sendto   = sendto2;
6342       nentries = nentries2;
6343       maxNsend = maxNsend2;
6344     }
6345     sendto[nsend]   = owner;
6346     nentries[nsend] = p - k;
6347     PetscCall(PetscCountCast(p - k, &nentries[nsend]));
6348     nsend++;
6349     k = p;
6350   }
6351 
6352   /* Build 1st SF to know offsets on remote to send data */
6353   PetscSF      sf1;
6354   PetscInt     nroots = 1, nroots2 = 0;
6355   PetscInt     nleaves = nsend, nleaves2 = 0;
6356   PetscInt    *offsets;
6357   PetscSFNode *iremote;
6358 
6359   PetscCall(PetscSFCreate(comm, &sf1));
6360   PetscCall(PetscMalloc1(nsend, &iremote));
6361   PetscCall(PetscMalloc1(nsend, &offsets));
6362   for (k = 0; k < nsend; k++) {
6363     iremote[k].rank  = sendto[k];
6364     iremote[k].index = 0;
6365     nleaves2 += nentries[k];
6366     PetscCheck(nleaves2 >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF leaves is too large for PetscInt");
6367   }
6368   PetscCall(PetscSFSetGraph(sf1, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
6369   PetscCall(PetscSFFetchAndOpWithMemTypeBegin(sf1, MPIU_INT, PETSC_MEMTYPE_HOST, &nroots2 /*rootdata*/, PETSC_MEMTYPE_HOST, nentries /*leafdata*/, PETSC_MEMTYPE_HOST, offsets /*leafupdate*/, MPI_SUM));
6370   PetscCall(PetscSFFetchAndOpEnd(sf1, MPIU_INT, &nroots2, nentries, offsets, MPI_SUM)); /* Would nroots2 overflow, we check offsets[] below */
6371   PetscCall(PetscSFDestroy(&sf1));
6372   PetscAssert(nleaves2 == n1 - rem, PETSC_COMM_SELF, PETSC_ERR_PLIB, "nleaves2 %" PetscInt_FMT " != number of remote entries %" PetscCount_FMT "", nleaves2, n1 - rem);
6373 
6374   /* Build 2nd SF to send remote COOs to their owner */
6375   PetscSF sf2;
6376   nroots  = nroots2;
6377   nleaves = nleaves2;
6378   PetscCall(PetscSFCreate(comm, &sf2));
6379   PetscCall(PetscSFSetFromOptions(sf2));
6380   PetscCall(PetscMalloc1(nleaves, &iremote));
6381   p = 0;
6382   for (k = 0; k < nsend; k++) {
6383     PetscCheck(offsets[k] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF roots is too large for PetscInt");
6384     for (q = 0; q < nentries[k]; q++, p++) {
6385       iremote[p].rank  = sendto[k];
6386       iremote[p].index = offsets[k] + q;
6387     }
6388   }
6389   PetscCall(PetscSFSetGraph(sf2, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
6390 
6391   /* sf2 only sends contiguous leafdata to contiguous rootdata. We record the permutation which will be used to fill leafdata */
6392   PetscCall(PetscArraycpy(Cperm1, perm1 + rem, n1 - rem));
6393 
6394   /* Send the remote COOs to their owner */
6395   PetscInt    n2 = nroots, *i2, *j2; /* Buffers for received COOs from other ranks, along with a permutation array */
6396   PetscCount *perm2;                 /* Though PetscInt is enough for remote entries, we use PetscCount here as we want to reuse MatSplitEntries_Internal() */
6397   PetscCall(PetscMalloc3(n2, &i2, n2, &j2, n2, &perm2));
6398   PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, i1 + rem, PETSC_MEMTYPE_HOST, i2, MPI_REPLACE));
6399   PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, i1 + rem, i2, MPI_REPLACE));
6400   PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, j1 + rem, PETSC_MEMTYPE_HOST, j2, MPI_REPLACE));
6401   PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, j1 + rem, j2, MPI_REPLACE));
6402 
6403   PetscCall(PetscFree(offsets));
6404   PetscCall(PetscFree2(sendto, nentries));
6405 
6406   /* ---------------------------------------------------------------*/
6407   /* Sort received COOs by row along with the permutation array     */
6408   /* ---------------------------------------------------------------*/
6409   for (k = 0; k < n2; k++) perm2[k] = k;
6410   PetscCall(PetscSortIntWithIntCountArrayPair(n2, i2, j2, perm2));
6411 
6412   /* ---------------------------------------------------------------*/
6413   /* Split received COOs into diag/offdiag portions                 */
6414   /* ---------------------------------------------------------------*/
6415   PetscCount *rowBegin2, *rowMid2, *rowEnd2;
6416   PetscCount *Ajmap2, *Aperm2, *Bjmap2, *Bperm2;
6417   PetscCount  Annz2, Bnnz2, Atot2, Btot2;
6418 
6419   PetscCall(PetscCalloc3(m, &rowBegin2, m, &rowMid2, m, &rowEnd2));
6420   PetscCall(MatSplitEntries_Internal(mat, n2, i2, j2, perm2, rowBegin2, rowMid2, rowEnd2, &Atot2, &Aperm2, &Annz2, &Ajmap2, &Btot2, &Bperm2, &Bnnz2, &Bjmap2));
6421 
6422   /* --------------------------------------------------------------------------*/
6423   /* Merge local COOs with received COOs: diag with diag, offdiag with offdiag */
6424   /* --------------------------------------------------------------------------*/
6425   PetscInt *Ai, *Bi;
6426   PetscInt *Aj, *Bj;
6427 
6428   PetscCall(PetscMalloc1(m + 1, &Ai));
6429   PetscCall(PetscMalloc1(m + 1, &Bi));
6430   PetscCall(PetscMalloc1(Annz1 + Annz2, &Aj)); /* Since local and remote entries might have dups, we might allocate excess memory */
6431   PetscCall(PetscMalloc1(Bnnz1 + Bnnz2, &Bj));
6432 
6433   PetscCount *Aimap1, *Bimap1, *Aimap2, *Bimap2;
6434   PetscCall(PetscMalloc1(Annz1, &Aimap1));
6435   PetscCall(PetscMalloc1(Bnnz1, &Bimap1));
6436   PetscCall(PetscMalloc1(Annz2, &Aimap2));
6437   PetscCall(PetscMalloc1(Bnnz2, &Bimap2));
6438 
6439   PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowBegin1, rowMid1, rowBegin2, rowMid2, Ajmap1, Ajmap2, Aimap1, Aimap2, Ai, Aj));
6440   PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowMid1, rowEnd1, rowMid2, rowEnd2, Bjmap1, Bjmap2, Bimap1, Bimap2, Bi, Bj));
6441 
6442   /* --------------------------------------------------------------------------*/
6443   /* Expand Ajmap1/Bjmap1 to make them based off nonzeros in A/B, since we     */
6444   /* expect nonzeros in A/B most likely have local contributing entries        */
6445   /* --------------------------------------------------------------------------*/
6446   PetscInt    Annz = Ai[m];
6447   PetscInt    Bnnz = Bi[m];
6448   PetscCount *Ajmap1_new, *Bjmap1_new;
6449 
6450   PetscCall(PetscMalloc1(Annz + 1, &Ajmap1_new));
6451   PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap1_new));
6452 
6453   PetscCall(ExpandJmap_Internal(Annz1, Annz, Aimap1, Ajmap1, Ajmap1_new));
6454   PetscCall(ExpandJmap_Internal(Bnnz1, Bnnz, Bimap1, Bjmap1, Bjmap1_new));
6455 
6456   PetscCall(PetscFree(Aimap1));
6457   PetscCall(PetscFree(Ajmap1));
6458   PetscCall(PetscFree(Bimap1));
6459   PetscCall(PetscFree(Bjmap1));
6460   PetscCall(PetscFree3(rowBegin1, rowMid1, rowEnd1));
6461   PetscCall(PetscFree3(rowBegin2, rowMid2, rowEnd2));
6462   PetscCall(PetscFree(perm1));
6463   PetscCall(PetscFree3(i2, j2, perm2));
6464 
6465   Ajmap1 = Ajmap1_new;
6466   Bjmap1 = Bjmap1_new;
6467 
6468   /* Reallocate Aj, Bj once we know actual numbers of unique nonzeros in A and B */
6469   if (Annz < Annz1 + Annz2) {
6470     PetscInt *Aj_new;
6471     PetscCall(PetscMalloc1(Annz, &Aj_new));
6472     PetscCall(PetscArraycpy(Aj_new, Aj, Annz));
6473     PetscCall(PetscFree(Aj));
6474     Aj = Aj_new;
6475   }
6476 
6477   if (Bnnz < Bnnz1 + Bnnz2) {
6478     PetscInt *Bj_new;
6479     PetscCall(PetscMalloc1(Bnnz, &Bj_new));
6480     PetscCall(PetscArraycpy(Bj_new, Bj, Bnnz));
6481     PetscCall(PetscFree(Bj));
6482     Bj = Bj_new;
6483   }
6484 
6485   /* --------------------------------------------------------------------------------*/
6486   /* Create new submatrices for on-process and off-process coupling                  */
6487   /* --------------------------------------------------------------------------------*/
6488   PetscScalar *Aa, *Ba;
6489   MatType      rtype;
6490   Mat_SeqAIJ  *a, *b;
6491   PetscCall(PetscCalloc1(Annz, &Aa)); /* Zero matrix on device */
6492   PetscCall(PetscCalloc1(Bnnz, &Ba));
6493   /* make Aj[] local, i.e, based off the start column of the diagonal portion */
6494   if (cstart) {
6495     for (k = 0; k < Annz; k++) Aj[k] -= cstart;
6496   }
6497   PetscCall(MatDestroy(&mpiaij->A));
6498   PetscCall(MatDestroy(&mpiaij->B));
6499   PetscCall(MatGetRootType_Private(mat, &rtype));
6500   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, Ai, Aj, Aa, &mpiaij->A));
6501   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, mat->cmap->N, Bi, Bj, Ba, &mpiaij->B));
6502   PetscCall(MatSetUpMultiply_MPIAIJ(mat));
6503 
6504   a               = (Mat_SeqAIJ *)mpiaij->A->data;
6505   b               = (Mat_SeqAIJ *)mpiaij->B->data;
6506   a->singlemalloc = b->singlemalloc = PETSC_FALSE; /* Let newmat own Ai,Aj,Aa,Bi,Bj,Ba */
6507   a->free_a = b->free_a = PETSC_TRUE;
6508   a->free_ij = b->free_ij = PETSC_TRUE;
6509 
6510   /* conversion must happen AFTER multiply setup */
6511   PetscCall(MatConvert(mpiaij->A, rtype, MAT_INPLACE_MATRIX, &mpiaij->A));
6512   PetscCall(MatConvert(mpiaij->B, rtype, MAT_INPLACE_MATRIX, &mpiaij->B));
6513   PetscCall(VecDestroy(&mpiaij->lvec));
6514   PetscCall(MatCreateVecs(mpiaij->B, &mpiaij->lvec, NULL));
6515   PetscCall(PetscLogObjectParent((PetscObject)mat, (PetscObject)mpiaij->lvec));
6516 
6517   mpiaij->coo_n   = coo_n;
6518   mpiaij->coo_sf  = sf2;
6519   mpiaij->sendlen = nleaves;
6520   mpiaij->recvlen = nroots;
6521 
6522   mpiaij->Annz = Annz;
6523   mpiaij->Bnnz = Bnnz;
6524 
6525   mpiaij->Annz2 = Annz2;
6526   mpiaij->Bnnz2 = Bnnz2;
6527 
6528   mpiaij->Atot1 = Atot1;
6529   mpiaij->Atot2 = Atot2;
6530   mpiaij->Btot1 = Btot1;
6531   mpiaij->Btot2 = Btot2;
6532 
6533   mpiaij->Ajmap1 = Ajmap1;
6534   mpiaij->Aperm1 = Aperm1;
6535 
6536   mpiaij->Bjmap1 = Bjmap1;
6537   mpiaij->Bperm1 = Bperm1;
6538 
6539   mpiaij->Aimap2 = Aimap2;
6540   mpiaij->Ajmap2 = Ajmap2;
6541   mpiaij->Aperm2 = Aperm2;
6542 
6543   mpiaij->Bimap2 = Bimap2;
6544   mpiaij->Bjmap2 = Bjmap2;
6545   mpiaij->Bperm2 = Bperm2;
6546 
6547   mpiaij->Cperm1 = Cperm1;
6548 
6549   /* Allocate in preallocation. If not used, it has zero cost on host */
6550   PetscCall(PetscMalloc2(mpiaij->sendlen, &mpiaij->sendbuf, mpiaij->recvlen, &mpiaij->recvbuf));
6551   PetscFunctionReturn(0);
6552 }
6553 
6554 static PetscErrorCode MatSetValuesCOO_MPIAIJ(Mat mat, const PetscScalar v[], InsertMode imode) {
6555   Mat_MPIAIJ       *mpiaij = (Mat_MPIAIJ *)mat->data;
6556   Mat               A = mpiaij->A, B = mpiaij->B;
6557   PetscCount        Annz = mpiaij->Annz, Annz2 = mpiaij->Annz2, Bnnz = mpiaij->Bnnz, Bnnz2 = mpiaij->Bnnz2;
6558   PetscScalar      *Aa, *Ba;
6559   PetscScalar      *sendbuf = mpiaij->sendbuf;
6560   PetscScalar      *recvbuf = mpiaij->recvbuf;
6561   const PetscCount *Ajmap1 = mpiaij->Ajmap1, *Ajmap2 = mpiaij->Ajmap2, *Aimap2 = mpiaij->Aimap2;
6562   const PetscCount *Bjmap1 = mpiaij->Bjmap1, *Bjmap2 = mpiaij->Bjmap2, *Bimap2 = mpiaij->Bimap2;
6563   const PetscCount *Aperm1 = mpiaij->Aperm1, *Aperm2 = mpiaij->Aperm2, *Bperm1 = mpiaij->Bperm1, *Bperm2 = mpiaij->Bperm2;
6564   const PetscCount *Cperm1 = mpiaij->Cperm1;
6565 
6566   PetscFunctionBegin;
6567   PetscCall(MatSeqAIJGetArray(A, &Aa)); /* Might read and write matrix values */
6568   PetscCall(MatSeqAIJGetArray(B, &Ba));
6569 
6570   /* Pack entries to be sent to remote */
6571   for (PetscCount i = 0; i < mpiaij->sendlen; i++) sendbuf[i] = v[Cperm1[i]];
6572 
6573   /* Send remote entries to their owner and overlap the communication with local computation */
6574   PetscCall(PetscSFReduceWithMemTypeBegin(mpiaij->coo_sf, MPIU_SCALAR, PETSC_MEMTYPE_HOST, sendbuf, PETSC_MEMTYPE_HOST, recvbuf, MPI_REPLACE));
6575   /* Add local entries to A and B */
6576   for (PetscCount i = 0; i < Annz; i++) { /* All nonzeros in A are either zero'ed or added with a value (i.e., initialized) */
6577     PetscScalar sum = 0.0;                /* Do partial summation first to improve numerical stablility */
6578     for (PetscCount k = Ajmap1[i]; k < Ajmap1[i + 1]; k++) sum += v[Aperm1[k]];
6579     Aa[i] = (imode == INSERT_VALUES ? 0.0 : Aa[i]) + sum;
6580   }
6581   for (PetscCount i = 0; i < Bnnz; i++) {
6582     PetscScalar sum = 0.0;
6583     for (PetscCount k = Bjmap1[i]; k < Bjmap1[i + 1]; k++) sum += v[Bperm1[k]];
6584     Ba[i] = (imode == INSERT_VALUES ? 0.0 : Ba[i]) + sum;
6585   }
6586   PetscCall(PetscSFReduceEnd(mpiaij->coo_sf, MPIU_SCALAR, sendbuf, recvbuf, MPI_REPLACE));
6587 
6588   /* Add received remote entries to A and B */
6589   for (PetscCount i = 0; i < Annz2; i++) {
6590     for (PetscCount k = Ajmap2[i]; k < Ajmap2[i + 1]; k++) Aa[Aimap2[i]] += recvbuf[Aperm2[k]];
6591   }
6592   for (PetscCount i = 0; i < Bnnz2; i++) {
6593     for (PetscCount k = Bjmap2[i]; k < Bjmap2[i + 1]; k++) Ba[Bimap2[i]] += recvbuf[Bperm2[k]];
6594   }
6595   PetscCall(MatSeqAIJRestoreArray(A, &Aa));
6596   PetscCall(MatSeqAIJRestoreArray(B, &Ba));
6597   PetscFunctionReturn(0);
6598 }
6599 
6600 /* ----------------------------------------------------------------*/
6601 
6602 /*MC
6603    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
6604 
6605    Options Database Keys:
6606 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
6607 
6608    Level: beginner
6609 
6610    Notes:
6611     MatSetValues() may be called for this matrix type with a NULL argument for the numerical values,
6612     in this case the values associated with the rows and columns one passes in are set to zero
6613     in the matrix
6614 
6615     MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no
6616     space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored
6617 
6618 .seealso: `MatCreateAIJ()`
6619 M*/
6620 
6621 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) {
6622   Mat_MPIAIJ *b;
6623   PetscMPIInt size;
6624 
6625   PetscFunctionBegin;
6626   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
6627 
6628   PetscCall(PetscNewLog(B, &b));
6629   B->data = (void *)b;
6630   PetscCall(PetscMemcpy(B->ops, &MatOps_Values, sizeof(struct _MatOps)));
6631   B->assembled  = PETSC_FALSE;
6632   B->insertmode = NOT_SET_VALUES;
6633   b->size       = size;
6634 
6635   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank));
6636 
6637   /* build cache for off array entries formed */
6638   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash));
6639 
6640   b->donotstash  = PETSC_FALSE;
6641   b->colmap      = NULL;
6642   b->garray      = NULL;
6643   b->roworiented = PETSC_TRUE;
6644 
6645   /* stuff used for matrix vector multiply */
6646   b->lvec  = NULL;
6647   b->Mvctx = NULL;
6648 
6649   /* stuff for MatGetRow() */
6650   b->rowindices   = NULL;
6651   b->rowvalues    = NULL;
6652   b->getrowactive = PETSC_FALSE;
6653 
6654   /* flexible pointer used in CUSPARSE classes */
6655   b->spptr = NULL;
6656 
6657   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetUseScalableIncreaseOverlap_C", MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ));
6658   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPIAIJ));
6659   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPIAIJ));
6660   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatIsTranspose_C", MatIsTranspose_MPIAIJ));
6661   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocation_C", MatMPIAIJSetPreallocation_MPIAIJ));
6662   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatResetPreallocation_C", MatResetPreallocation_MPIAIJ));
6663   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocationCSR_C", MatMPIAIJSetPreallocationCSR_MPIAIJ));
6664   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPIAIJ));
6665   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijperm_C", MatConvert_MPIAIJ_MPIAIJPERM));
6666   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijsell_C", MatConvert_MPIAIJ_MPIAIJSELL));
6667 #if defined(PETSC_HAVE_CUDA)
6668   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcusparse_C", MatConvert_MPIAIJ_MPIAIJCUSPARSE));
6669 #endif
6670 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
6671   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijkokkos_C", MatConvert_MPIAIJ_MPIAIJKokkos));
6672 #endif
6673 #if defined(PETSC_HAVE_MKL_SPARSE)
6674   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijmkl_C", MatConvert_MPIAIJ_MPIAIJMKL));
6675 #endif
6676   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcrl_C", MatConvert_MPIAIJ_MPIAIJCRL));
6677   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpibaij_C", MatConvert_MPIAIJ_MPIBAIJ));
6678   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisbaij_C", MatConvert_MPIAIJ_MPISBAIJ));
6679   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense));
6680 #if defined(PETSC_HAVE_ELEMENTAL)
6681   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_elemental_C", MatConvert_MPIAIJ_Elemental));
6682 #endif
6683 #if defined(PETSC_HAVE_SCALAPACK)
6684   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_scalapack_C", MatConvert_AIJ_ScaLAPACK));
6685 #endif
6686   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_is_C", MatConvert_XAIJ_IS));
6687   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisell_C", MatConvert_MPIAIJ_MPISELL));
6688 #if defined(PETSC_HAVE_HYPRE)
6689   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_hypre_C", MatConvert_AIJ_HYPRE));
6690   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", MatProductSetFromOptions_Transpose_AIJ_AIJ));
6691 #endif
6692   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_is_mpiaij_C", MatProductSetFromOptions_IS_XAIJ));
6693   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpiaij_C", MatProductSetFromOptions_MPIAIJ));
6694   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetPreallocationCOO_C", MatSetPreallocationCOO_MPIAIJ));
6695   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetValuesCOO_C", MatSetValuesCOO_MPIAIJ));
6696   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIAIJ));
6697   PetscFunctionReturn(0);
6698 }
6699 
6700 /*@C
6701      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
6702          and "off-diagonal" part of the matrix in CSR format.
6703 
6704    Collective
6705 
6706    Input Parameters:
6707 +  comm - MPI communicator
6708 .  m - number of local rows (Cannot be PETSC_DECIDE)
6709 .  n - This value should be the same as the local size used in creating the
6710        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
6711        calculated if N is given) For square matrices n is almost always m.
6712 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
6713 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
6714 .   i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
6715 .   j - column indices, which must be local, i.e., based off the start column of the diagonal portion
6716 .   a - matrix values
6717 .   oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix
6718 .   oj - column indices, which must be global, representing global columns in the MPIAIJ matrix
6719 -   oa - matrix values
6720 
6721    Output Parameter:
6722 .   mat - the matrix
6723 
6724    Level: advanced
6725 
6726    Notes:
6727        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
6728        must free the arrays once the matrix has been destroyed and not before.
6729 
6730        The i and j indices are 0 based
6731 
6732        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
6733 
6734        This sets local rows and cannot be used to set off-processor values.
6735 
6736        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
6737        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
6738        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
6739        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
6740        keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
6741        communication if it is known that only local entries will be set.
6742 
6743 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
6744           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithArrays()`
6745 @*/
6746 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt i[], PetscInt j[], PetscScalar a[], PetscInt oi[], PetscInt oj[], PetscScalar oa[], Mat *mat) {
6747   Mat_MPIAIJ *maij;
6748 
6749   PetscFunctionBegin;
6750   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
6751   PetscCheck(i[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
6752   PetscCheck(oi[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "oi (row indices) must start with 0");
6753   PetscCall(MatCreate(comm, mat));
6754   PetscCall(MatSetSizes(*mat, m, n, M, N));
6755   PetscCall(MatSetType(*mat, MATMPIAIJ));
6756   maij = (Mat_MPIAIJ *)(*mat)->data;
6757 
6758   (*mat)->preallocated = PETSC_TRUE;
6759 
6760   PetscCall(PetscLayoutSetUp((*mat)->rmap));
6761   PetscCall(PetscLayoutSetUp((*mat)->cmap));
6762 
6763   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, i, j, a, &maij->A));
6764   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, (*mat)->cmap->N, oi, oj, oa, &maij->B));
6765 
6766   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
6767   PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY));
6768   PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY));
6769   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE));
6770   PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
6771   PetscFunctionReturn(0);
6772 }
6773 
6774 typedef struct {
6775   Mat       *mp;    /* intermediate products */
6776   PetscBool *mptmp; /* is the intermediate product temporary ? */
6777   PetscInt   cp;    /* number of intermediate products */
6778 
6779   /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */
6780   PetscInt    *startsj_s, *startsj_r;
6781   PetscScalar *bufa;
6782   Mat          P_oth;
6783 
6784   /* may take advantage of merging product->B */
6785   Mat Bloc; /* B-local by merging diag and off-diag */
6786 
6787   /* cusparse does not have support to split between symbolic and numeric phases.
6788      When api_user is true, we don't need to update the numerical values
6789      of the temporary storage */
6790   PetscBool reusesym;
6791 
6792   /* support for COO values insertion */
6793   PetscScalar *coo_v, *coo_w; /* store on-process and off-process COO scalars, and used as MPI recv/send buffers respectively */
6794   PetscInt   **own;           /* own[i] points to address of on-process COO indices for Mat mp[i] */
6795   PetscInt   **off;           /* off[i] points to address of off-process COO indices for Mat mp[i] */
6796   PetscBool    hasoffproc;    /* if true, have off-process values insertion (i.e. AtB or PtAP) */
6797   PetscSF      sf;            /* used for non-local values insertion and memory malloc */
6798   PetscMemType mtype;
6799 
6800   /* customization */
6801   PetscBool abmerge;
6802   PetscBool P_oth_bind;
6803 } MatMatMPIAIJBACKEND;
6804 
6805 PetscErrorCode MatDestroy_MatMatMPIAIJBACKEND(void *data) {
6806   MatMatMPIAIJBACKEND *mmdata = (MatMatMPIAIJBACKEND *)data;
6807   PetscInt             i;
6808 
6809   PetscFunctionBegin;
6810   PetscCall(PetscFree2(mmdata->startsj_s, mmdata->startsj_r));
6811   PetscCall(PetscFree(mmdata->bufa));
6812   PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_v));
6813   PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_w));
6814   PetscCall(MatDestroy(&mmdata->P_oth));
6815   PetscCall(MatDestroy(&mmdata->Bloc));
6816   PetscCall(PetscSFDestroy(&mmdata->sf));
6817   for (i = 0; i < mmdata->cp; i++) { PetscCall(MatDestroy(&mmdata->mp[i])); }
6818   PetscCall(PetscFree2(mmdata->mp, mmdata->mptmp));
6819   PetscCall(PetscFree(mmdata->own[0]));
6820   PetscCall(PetscFree(mmdata->own));
6821   PetscCall(PetscFree(mmdata->off[0]));
6822   PetscCall(PetscFree(mmdata->off));
6823   PetscCall(PetscFree(mmdata));
6824   PetscFunctionReturn(0);
6825 }
6826 
6827 /* Copy selected n entries with indices in idx[] of A to v[].
6828    If idx is NULL, copy the whole data array of A to v[]
6829  */
6830 static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[]) {
6831   PetscErrorCode (*f)(Mat, PetscInt, const PetscInt[], PetscScalar[]);
6832 
6833   PetscFunctionBegin;
6834   PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqAIJCopySubArray_C", &f));
6835   if (f) {
6836     PetscCall((*f)(A, n, idx, v));
6837   } else {
6838     const PetscScalar *vv;
6839 
6840     PetscCall(MatSeqAIJGetArrayRead(A, &vv));
6841     if (n && idx) {
6842       PetscScalar    *w  = v;
6843       const PetscInt *oi = idx;
6844       PetscInt        j;
6845 
6846       for (j = 0; j < n; j++) *w++ = vv[*oi++];
6847     } else {
6848       PetscCall(PetscArraycpy(v, vv, n));
6849     }
6850     PetscCall(MatSeqAIJRestoreArrayRead(A, &vv));
6851   }
6852   PetscFunctionReturn(0);
6853 }
6854 
6855 static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C) {
6856   MatMatMPIAIJBACKEND *mmdata;
6857   PetscInt             i, n_d, n_o;
6858 
6859   PetscFunctionBegin;
6860   MatCheckProduct(C, 1);
6861   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
6862   mmdata = (MatMatMPIAIJBACKEND *)C->product->data;
6863   if (!mmdata->reusesym) { /* update temporary matrices */
6864     if (mmdata->P_oth) { PetscCall(MatGetBrowsOfAoCols_MPIAIJ(C->product->A, C->product->B, MAT_REUSE_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); }
6865     if (mmdata->Bloc) { PetscCall(MatMPIAIJGetLocalMatMerge(C->product->B, MAT_REUSE_MATRIX, NULL, &mmdata->Bloc)); }
6866   }
6867   mmdata->reusesym = PETSC_FALSE;
6868 
6869   for (i = 0; i < mmdata->cp; i++) {
6870     PetscCheck(mmdata->mp[i]->ops->productnumeric, PetscObjectComm((PetscObject)mmdata->mp[i]), PETSC_ERR_PLIB, "Missing numeric op for %s", MatProductTypes[mmdata->mp[i]->product->type]);
6871     PetscCall((*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i]));
6872   }
6873   for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) {
6874     PetscInt noff = mmdata->off[i + 1] - mmdata->off[i];
6875 
6876     if (mmdata->mptmp[i]) continue;
6877     if (noff) {
6878       PetscInt nown = mmdata->own[i + 1] - mmdata->own[i];
6879 
6880       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], noff, mmdata->off[i], mmdata->coo_w + n_o));
6881       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], nown, mmdata->own[i], mmdata->coo_v + n_d));
6882       n_o += noff;
6883       n_d += nown;
6884     } else {
6885       Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mmdata->mp[i]->data;
6886 
6887       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], mm->nz, NULL, mmdata->coo_v + n_d));
6888       n_d += mm->nz;
6889     }
6890   }
6891   if (mmdata->hasoffproc) { /* offprocess insertion */
6892     PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d));
6893     PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d));
6894   }
6895   PetscCall(MatSetValuesCOO(C, mmdata->coo_v, INSERT_VALUES));
6896   PetscFunctionReturn(0);
6897 }
6898 
6899 /* Support for Pt * A, A * P, or Pt * A * P */
6900 #define MAX_NUMBER_INTERMEDIATE 4
6901 PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C) {
6902   Mat_Product           *product = C->product;
6903   Mat                    A, P, mp[MAX_NUMBER_INTERMEDIATE]; /* A, P and a series of intermediate matrices */
6904   Mat_MPIAIJ            *a, *p;
6905   MatMatMPIAIJBACKEND   *mmdata;
6906   ISLocalToGlobalMapping P_oth_l2g = NULL;
6907   IS                     glob      = NULL;
6908   const char            *prefix;
6909   char                   pprefix[256];
6910   const PetscInt        *globidx, *P_oth_idx;
6911   PetscInt               i, j, cp, m, n, M, N, *coo_i, *coo_j;
6912   PetscCount             ncoo, ncoo_d, ncoo_o, ncoo_oown;
6913   PetscInt               cmapt[MAX_NUMBER_INTERMEDIATE], rmapt[MAX_NUMBER_INTERMEDIATE];  /* col/row map type for each Mat in mp[]. */
6914                                                                                           /* type-0: consecutive, start from 0; type-1: consecutive with */
6915                                                                                           /* a base offset; type-2: sparse with a local to global map table */
6916   const PetscInt        *cmapa[MAX_NUMBER_INTERMEDIATE], *rmapa[MAX_NUMBER_INTERMEDIATE]; /* col/row local to global map array (table) for type-2 map type */
6917 
6918   MatProductType ptype;
6919   PetscBool      mptmp[MAX_NUMBER_INTERMEDIATE], hasoffproc = PETSC_FALSE, iscuda, iskokk;
6920   PetscMPIInt    size;
6921 
6922   PetscFunctionBegin;
6923   MatCheckProduct(C, 1);
6924   PetscCheck(!product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
6925   ptype = product->type;
6926   if (product->A->symmetric == PETSC_BOOL3_TRUE && ptype == MATPRODUCT_AtB) {
6927     ptype                                          = MATPRODUCT_AB;
6928     product->symbolic_used_the_fact_A_is_symmetric = PETSC_TRUE;
6929   }
6930   switch (ptype) {
6931   case MATPRODUCT_AB:
6932     A          = product->A;
6933     P          = product->B;
6934     m          = A->rmap->n;
6935     n          = P->cmap->n;
6936     M          = A->rmap->N;
6937     N          = P->cmap->N;
6938     hasoffproc = PETSC_FALSE; /* will not scatter mat product values to other processes */
6939     break;
6940   case MATPRODUCT_AtB:
6941     P          = product->A;
6942     A          = product->B;
6943     m          = P->cmap->n;
6944     n          = A->cmap->n;
6945     M          = P->cmap->N;
6946     N          = A->cmap->N;
6947     hasoffproc = PETSC_TRUE;
6948     break;
6949   case MATPRODUCT_PtAP:
6950     A          = product->A;
6951     P          = product->B;
6952     m          = P->cmap->n;
6953     n          = P->cmap->n;
6954     M          = P->cmap->N;
6955     N          = P->cmap->N;
6956     hasoffproc = PETSC_TRUE;
6957     break;
6958   default: SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]);
6959   }
6960   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)C), &size));
6961   if (size == 1) hasoffproc = PETSC_FALSE;
6962 
6963   /* defaults */
6964   for (i = 0; i < MAX_NUMBER_INTERMEDIATE; i++) {
6965     mp[i]    = NULL;
6966     mptmp[i] = PETSC_FALSE;
6967     rmapt[i] = -1;
6968     cmapt[i] = -1;
6969     rmapa[i] = NULL;
6970     cmapa[i] = NULL;
6971   }
6972 
6973   /* customization */
6974   PetscCall(PetscNew(&mmdata));
6975   mmdata->reusesym = product->api_user;
6976   if (ptype == MATPRODUCT_AB) {
6977     if (product->api_user) {
6978       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatMult", "Mat");
6979       PetscCall(PetscOptionsBool("-matmatmult_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL));
6980       PetscCall(PetscOptionsBool("-matmatmult_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
6981       PetscOptionsEnd();
6982     } else {
6983       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_AB", "Mat");
6984       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL));
6985       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
6986       PetscOptionsEnd();
6987     }
6988   } else if (ptype == MATPRODUCT_PtAP) {
6989     if (product->api_user) {
6990       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatPtAP", "Mat");
6991       PetscCall(PetscOptionsBool("-matptap_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
6992       PetscOptionsEnd();
6993     } else {
6994       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_PtAP", "Mat");
6995       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
6996       PetscOptionsEnd();
6997     }
6998   }
6999   a = (Mat_MPIAIJ *)A->data;
7000   p = (Mat_MPIAIJ *)P->data;
7001   PetscCall(MatSetSizes(C, m, n, M, N));
7002   PetscCall(PetscLayoutSetUp(C->rmap));
7003   PetscCall(PetscLayoutSetUp(C->cmap));
7004   PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
7005   PetscCall(MatGetOptionsPrefix(C, &prefix));
7006 
7007   cp = 0;
7008   switch (ptype) {
7009   case MATPRODUCT_AB: /* A * P */
7010     PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth));
7011 
7012     /* A_diag * P_local (merged or not) */
7013     if (mmdata->abmerge) { /* P's diagonal and off-diag blocks are merged to one matrix, then multiplied by A_diag */
7014       /* P is product->B */
7015       PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc));
7016       PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp]));
7017       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7018       PetscCall(MatProductSetFill(mp[cp], product->fill));
7019       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7020       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7021       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7022       mp[cp]->product->api_user = product->api_user;
7023       PetscCall(MatProductSetFromOptions(mp[cp]));
7024       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7025       PetscCall(ISGetIndices(glob, &globidx));
7026       rmapt[cp] = 1;
7027       cmapt[cp] = 2;
7028       cmapa[cp] = globidx;
7029       mptmp[cp] = PETSC_FALSE;
7030       cp++;
7031     } else { /* A_diag * P_diag and A_diag * P_off */
7032       PetscCall(MatProductCreate(a->A, p->A, NULL, &mp[cp]));
7033       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7034       PetscCall(MatProductSetFill(mp[cp], product->fill));
7035       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7036       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7037       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7038       mp[cp]->product->api_user = product->api_user;
7039       PetscCall(MatProductSetFromOptions(mp[cp]));
7040       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7041       rmapt[cp] = 1;
7042       cmapt[cp] = 1;
7043       mptmp[cp] = PETSC_FALSE;
7044       cp++;
7045       PetscCall(MatProductCreate(a->A, p->B, NULL, &mp[cp]));
7046       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7047       PetscCall(MatProductSetFill(mp[cp], product->fill));
7048       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7049       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7050       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7051       mp[cp]->product->api_user = product->api_user;
7052       PetscCall(MatProductSetFromOptions(mp[cp]));
7053       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7054       rmapt[cp] = 1;
7055       cmapt[cp] = 2;
7056       cmapa[cp] = p->garray;
7057       mptmp[cp] = PETSC_FALSE;
7058       cp++;
7059     }
7060 
7061     /* A_off * P_other */
7062     if (mmdata->P_oth) {
7063       PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g)); /* make P_oth use local col ids */
7064       PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx));
7065       PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)(a->B))->type_name));
7066       PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind));
7067       PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp]));
7068       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7069       PetscCall(MatProductSetFill(mp[cp], product->fill));
7070       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7071       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7072       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7073       mp[cp]->product->api_user = product->api_user;
7074       PetscCall(MatProductSetFromOptions(mp[cp]));
7075       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7076       rmapt[cp] = 1;
7077       cmapt[cp] = 2;
7078       cmapa[cp] = P_oth_idx;
7079       mptmp[cp] = PETSC_FALSE;
7080       cp++;
7081     }
7082     break;
7083 
7084   case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */
7085     /* A is product->B */
7086     PetscCall(MatMPIAIJGetLocalMatMerge(A, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc));
7087     if (A == P) { /* when A==P, we can take advantage of the already merged mmdata->Bloc */
7088       PetscCall(MatProductCreate(mmdata->Bloc, mmdata->Bloc, NULL, &mp[cp]));
7089       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7090       PetscCall(MatProductSetFill(mp[cp], product->fill));
7091       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7092       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7093       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7094       mp[cp]->product->api_user = product->api_user;
7095       PetscCall(MatProductSetFromOptions(mp[cp]));
7096       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7097       PetscCall(ISGetIndices(glob, &globidx));
7098       rmapt[cp] = 2;
7099       rmapa[cp] = globidx;
7100       cmapt[cp] = 2;
7101       cmapa[cp] = globidx;
7102       mptmp[cp] = PETSC_FALSE;
7103       cp++;
7104     } else {
7105       PetscCall(MatProductCreate(p->A, mmdata->Bloc, NULL, &mp[cp]));
7106       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7107       PetscCall(MatProductSetFill(mp[cp], product->fill));
7108       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7109       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7110       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7111       mp[cp]->product->api_user = product->api_user;
7112       PetscCall(MatProductSetFromOptions(mp[cp]));
7113       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7114       PetscCall(ISGetIndices(glob, &globidx));
7115       rmapt[cp] = 1;
7116       cmapt[cp] = 2;
7117       cmapa[cp] = globidx;
7118       mptmp[cp] = PETSC_FALSE;
7119       cp++;
7120       PetscCall(MatProductCreate(p->B, mmdata->Bloc, NULL, &mp[cp]));
7121       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7122       PetscCall(MatProductSetFill(mp[cp], product->fill));
7123       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7124       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7125       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7126       mp[cp]->product->api_user = product->api_user;
7127       PetscCall(MatProductSetFromOptions(mp[cp]));
7128       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7129       rmapt[cp] = 2;
7130       rmapa[cp] = p->garray;
7131       cmapt[cp] = 2;
7132       cmapa[cp] = globidx;
7133       mptmp[cp] = PETSC_FALSE;
7134       cp++;
7135     }
7136     break;
7137   case MATPRODUCT_PtAP:
7138     PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth));
7139     /* P is product->B */
7140     PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc));
7141     PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp]));
7142     PetscCall(MatProductSetType(mp[cp], MATPRODUCT_PtAP));
7143     PetscCall(MatProductSetFill(mp[cp], product->fill));
7144     PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7145     PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7146     PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7147     mp[cp]->product->api_user = product->api_user;
7148     PetscCall(MatProductSetFromOptions(mp[cp]));
7149     PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7150     PetscCall(ISGetIndices(glob, &globidx));
7151     rmapt[cp] = 2;
7152     rmapa[cp] = globidx;
7153     cmapt[cp] = 2;
7154     cmapa[cp] = globidx;
7155     mptmp[cp] = PETSC_FALSE;
7156     cp++;
7157     if (mmdata->P_oth) {
7158       PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g));
7159       PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx));
7160       PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)(a->B))->type_name));
7161       PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind));
7162       PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp]));
7163       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7164       PetscCall(MatProductSetFill(mp[cp], product->fill));
7165       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7166       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7167       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7168       mp[cp]->product->api_user = product->api_user;
7169       PetscCall(MatProductSetFromOptions(mp[cp]));
7170       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7171       mptmp[cp] = PETSC_TRUE;
7172       cp++;
7173       PetscCall(MatProductCreate(mmdata->Bloc, mp[1], NULL, &mp[cp]));
7174       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7175       PetscCall(MatProductSetFill(mp[cp], product->fill));
7176       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7177       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7178       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7179       mp[cp]->product->api_user = product->api_user;
7180       PetscCall(MatProductSetFromOptions(mp[cp]));
7181       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7182       rmapt[cp] = 2;
7183       rmapa[cp] = globidx;
7184       cmapt[cp] = 2;
7185       cmapa[cp] = P_oth_idx;
7186       mptmp[cp] = PETSC_FALSE;
7187       cp++;
7188     }
7189     break;
7190   default: SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]);
7191   }
7192   /* sanity check */
7193   if (size > 1)
7194     for (i = 0; i < cp; i++) PetscCheck(rmapt[i] != 2 || hasoffproc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected offproc map type for product %" PetscInt_FMT, i);
7195 
7196   PetscCall(PetscMalloc2(cp, &mmdata->mp, cp, &mmdata->mptmp));
7197   for (i = 0; i < cp; i++) {
7198     mmdata->mp[i]    = mp[i];
7199     mmdata->mptmp[i] = mptmp[i];
7200   }
7201   mmdata->cp             = cp;
7202   C->product->data       = mmdata;
7203   C->product->destroy    = MatDestroy_MatMatMPIAIJBACKEND;
7204   C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND;
7205 
7206   /* memory type */
7207   mmdata->mtype = PETSC_MEMTYPE_HOST;
7208   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iscuda, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, ""));
7209   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iskokk, MATSEQAIJKOKKOS, MATMPIAIJKOKKOS, ""));
7210   if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA;
7211   else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_KOKKOS;
7212 
7213   /* prepare coo coordinates for values insertion */
7214 
7215   /* count total nonzeros of those intermediate seqaij Mats
7216     ncoo_d:    # of nonzeros of matrices that do not have offproc entries
7217     ncoo_o:    # of nonzeros (of matrices that might have offproc entries) that will be inserted to remote procs
7218     ncoo_oown: # of nonzeros (of matrices that might have offproc entries) that will be inserted locally
7219   */
7220   for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) {
7221     Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data;
7222     if (mptmp[cp]) continue;
7223     if (rmapt[cp] == 2 && hasoffproc) { /* the rows need to be scatter to all processes (might include self) */
7224       const PetscInt *rmap = rmapa[cp];
7225       const PetscInt  mr   = mp[cp]->rmap->n;
7226       const PetscInt  rs   = C->rmap->rstart;
7227       const PetscInt  re   = C->rmap->rend;
7228       const PetscInt *ii   = mm->i;
7229       for (i = 0; i < mr; i++) {
7230         const PetscInt gr = rmap[i];
7231         const PetscInt nz = ii[i + 1] - ii[i];
7232         if (gr < rs || gr >= re) ncoo_o += nz; /* this row is offproc */
7233         else ncoo_oown += nz;                  /* this row is local */
7234       }
7235     } else ncoo_d += mm->nz;
7236   }
7237 
7238   /*
7239     ncoo: total number of nonzeros (including those inserted by remote procs) belonging to this proc
7240 
7241     ncoo = ncoo_d + ncoo_oown + ncoo2, which ncoo2 is number of nonzeros inserted to me by other procs.
7242 
7243     off[0] points to a big index array, which is shared by off[1,2,...]. Similarily, for own[0].
7244 
7245     off[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert to others
7246     own[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert locally
7247     so, off[p+1]-off[p] is the number of nonzeros that mp[p] will send to others.
7248 
7249     coo_i/j/v[]: [ncoo] row/col/val of nonzeros belonging to this proc.
7250     Ex. coo_i[]: the beginning part (of size ncoo_d + ncoo_oown) stores i of local nonzeros, and the remaing part stores i of nonzeros I will receive.
7251   */
7252   PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->off)); /* +1 to make a csr-like data structure */
7253   PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->own));
7254 
7255   /* gather (i,j) of nonzeros inserted by remote procs */
7256   if (hasoffproc) {
7257     PetscSF  msf;
7258     PetscInt ncoo2, *coo_i2, *coo_j2;
7259 
7260     PetscCall(PetscMalloc1(ncoo_o, &mmdata->off[0]));
7261     PetscCall(PetscMalloc1(ncoo_oown, &mmdata->own[0]));
7262     PetscCall(PetscMalloc2(ncoo_o, &coo_i, ncoo_o, &coo_j)); /* to collect (i,j) of entries to be sent to others */
7263 
7264     for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) {
7265       Mat_SeqAIJ *mm     = (Mat_SeqAIJ *)mp[cp]->data;
7266       PetscInt   *idxoff = mmdata->off[cp];
7267       PetscInt   *idxown = mmdata->own[cp];
7268       if (!mptmp[cp] && rmapt[cp] == 2) { /* row map is sparse */
7269         const PetscInt *rmap = rmapa[cp];
7270         const PetscInt *cmap = cmapa[cp];
7271         const PetscInt *ii   = mm->i;
7272         PetscInt       *coi  = coo_i + ncoo_o;
7273         PetscInt       *coj  = coo_j + ncoo_o;
7274         const PetscInt  mr   = mp[cp]->rmap->n;
7275         const PetscInt  rs   = C->rmap->rstart;
7276         const PetscInt  re   = C->rmap->rend;
7277         const PetscInt  cs   = C->cmap->rstart;
7278         for (i = 0; i < mr; i++) {
7279           const PetscInt *jj = mm->j + ii[i];
7280           const PetscInt  gr = rmap[i];
7281           const PetscInt  nz = ii[i + 1] - ii[i];
7282           if (gr < rs || gr >= re) { /* this is an offproc row */
7283             for (j = ii[i]; j < ii[i + 1]; j++) {
7284               *coi++    = gr;
7285               *idxoff++ = j;
7286             }
7287             if (!cmapt[cp]) { /* already global */
7288               for (j = 0; j < nz; j++) *coj++ = jj[j];
7289             } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */
7290               for (j = 0; j < nz; j++) *coj++ = jj[j] + cs;
7291             } else { /* offdiag */
7292               for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]];
7293             }
7294             ncoo_o += nz;
7295           } else { /* this is a local row */
7296             for (j = ii[i]; j < ii[i + 1]; j++) *idxown++ = j;
7297           }
7298         }
7299       }
7300       mmdata->off[cp + 1] = idxoff;
7301       mmdata->own[cp + 1] = idxown;
7302     }
7303 
7304     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf));
7305     PetscCall(PetscSFSetGraphLayout(mmdata->sf, C->rmap, ncoo_o /*nleaves*/, NULL /*ilocal*/, PETSC_OWN_POINTER, coo_i));
7306     PetscCall(PetscSFGetMultiSF(mmdata->sf, &msf));
7307     PetscCall(PetscSFGetGraph(msf, &ncoo2 /*nroots*/, NULL, NULL, NULL));
7308     ncoo = ncoo_d + ncoo_oown + ncoo2;
7309     PetscCall(PetscMalloc2(ncoo, &coo_i2, ncoo, &coo_j2));
7310     PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown)); /* put (i,j) of remote nonzeros at back */
7311     PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown));
7312     PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown));
7313     PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown));
7314     PetscCall(PetscFree2(coo_i, coo_j));
7315     /* allocate MPI send buffer to collect nonzero values to be sent to remote procs */
7316     PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo_o * sizeof(PetscScalar), (void **)&mmdata->coo_w));
7317     coo_i = coo_i2;
7318     coo_j = coo_j2;
7319   } else { /* no offproc values insertion */
7320     ncoo = ncoo_d;
7321     PetscCall(PetscMalloc2(ncoo, &coo_i, ncoo, &coo_j));
7322 
7323     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf));
7324     PetscCall(PetscSFSetGraph(mmdata->sf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER));
7325     PetscCall(PetscSFSetUp(mmdata->sf));
7326   }
7327   mmdata->hasoffproc = hasoffproc;
7328 
7329   /* gather (i,j) of nonzeros inserted locally */
7330   for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) {
7331     Mat_SeqAIJ     *mm   = (Mat_SeqAIJ *)mp[cp]->data;
7332     PetscInt       *coi  = coo_i + ncoo_d;
7333     PetscInt       *coj  = coo_j + ncoo_d;
7334     const PetscInt *jj   = mm->j;
7335     const PetscInt *ii   = mm->i;
7336     const PetscInt *cmap = cmapa[cp];
7337     const PetscInt *rmap = rmapa[cp];
7338     const PetscInt  mr   = mp[cp]->rmap->n;
7339     const PetscInt  rs   = C->rmap->rstart;
7340     const PetscInt  re   = C->rmap->rend;
7341     const PetscInt  cs   = C->cmap->rstart;
7342 
7343     if (mptmp[cp]) continue;
7344     if (rmapt[cp] == 1) { /* consecutive rows */
7345       /* fill coo_i */
7346       for (i = 0; i < mr; i++) {
7347         const PetscInt gr = i + rs;
7348         for (j = ii[i]; j < ii[i + 1]; j++) coi[j] = gr;
7349       }
7350       /* fill coo_j */
7351       if (!cmapt[cp]) { /* type-0, already global */
7352         PetscCall(PetscArraycpy(coj, jj, mm->nz));
7353       } else if (cmapt[cp] == 1) {                        /* type-1, local to global for consecutive columns of C */
7354         for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs; /* lid + col start */
7355       } else {                                            /* type-2, local to global for sparse columns */
7356         for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]];
7357       }
7358       ncoo_d += mm->nz;
7359     } else if (rmapt[cp] == 2) { /* sparse rows */
7360       for (i = 0; i < mr; i++) {
7361         const PetscInt *jj = mm->j + ii[i];
7362         const PetscInt  gr = rmap[i];
7363         const PetscInt  nz = ii[i + 1] - ii[i];
7364         if (gr >= rs && gr < re) { /* local rows */
7365           for (j = ii[i]; j < ii[i + 1]; j++) *coi++ = gr;
7366           if (!cmapt[cp]) { /* type-0, already global */
7367             for (j = 0; j < nz; j++) *coj++ = jj[j];
7368           } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */
7369             for (j = 0; j < nz; j++) *coj++ = jj[j] + cs;
7370           } else { /* type-2, local to global for sparse columns */
7371             for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]];
7372           }
7373           ncoo_d += nz;
7374         }
7375       }
7376     }
7377   }
7378   if (glob) { PetscCall(ISRestoreIndices(glob, &globidx)); }
7379   PetscCall(ISDestroy(&glob));
7380   if (P_oth_l2g) { PetscCall(ISLocalToGlobalMappingRestoreIndices(P_oth_l2g, &P_oth_idx)); }
7381   PetscCall(ISLocalToGlobalMappingDestroy(&P_oth_l2g));
7382   /* allocate an array to store all nonzeros (inserted locally or remotely) belonging to this proc */
7383   PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo * sizeof(PetscScalar), (void **)&mmdata->coo_v));
7384 
7385   /* preallocate with COO data */
7386   PetscCall(MatSetPreallocationCOO(C, ncoo, coo_i, coo_j));
7387   PetscCall(PetscFree2(coo_i, coo_j));
7388   PetscFunctionReturn(0);
7389 }
7390 
7391 PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat) {
7392   Mat_Product *product = mat->product;
7393 #if defined(PETSC_HAVE_DEVICE)
7394   PetscBool match  = PETSC_FALSE;
7395   PetscBool usecpu = PETSC_FALSE;
7396 #else
7397   PetscBool match = PETSC_TRUE;
7398 #endif
7399 
7400   PetscFunctionBegin;
7401   MatCheckProduct(mat, 1);
7402 #if defined(PETSC_HAVE_DEVICE)
7403   if (!product->A->boundtocpu && !product->B->boundtocpu) { PetscCall(PetscObjectTypeCompare((PetscObject)product->B, ((PetscObject)product->A)->type_name, &match)); }
7404   if (match) { /* we can always fallback to the CPU if requested */
7405     switch (product->type) {
7406     case MATPRODUCT_AB:
7407       if (product->api_user) {
7408         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatMatMult", "Mat");
7409         PetscCall(PetscOptionsBool("-matmatmult_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL));
7410         PetscOptionsEnd();
7411       } else {
7412         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AB", "Mat");
7413         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL));
7414         PetscOptionsEnd();
7415       }
7416       break;
7417     case MATPRODUCT_AtB:
7418       if (product->api_user) {
7419         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatTransposeMatMult", "Mat");
7420         PetscCall(PetscOptionsBool("-mattransposematmult_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL));
7421         PetscOptionsEnd();
7422       } else {
7423         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AtB", "Mat");
7424         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL));
7425         PetscOptionsEnd();
7426       }
7427       break;
7428     case MATPRODUCT_PtAP:
7429       if (product->api_user) {
7430         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatPtAP", "Mat");
7431         PetscCall(PetscOptionsBool("-matptap_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL));
7432         PetscOptionsEnd();
7433       } else {
7434         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_PtAP", "Mat");
7435         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL));
7436         PetscOptionsEnd();
7437       }
7438       break;
7439     default: break;
7440     }
7441     match = (PetscBool)!usecpu;
7442   }
7443 #endif
7444   if (match) {
7445     switch (product->type) {
7446     case MATPRODUCT_AB:
7447     case MATPRODUCT_AtB:
7448     case MATPRODUCT_PtAP: mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND; break;
7449     default: break;
7450     }
7451   }
7452   /* fallback to MPIAIJ ops */
7453   if (!mat->ops->productsymbolic) PetscCall(MatProductSetFromOptions_MPIAIJ(mat));
7454   PetscFunctionReturn(0);
7455 }
7456 
7457 /*
7458    Produces a set of block column indices of the matrix row, one for each block represented in the original row
7459 
7460    n - the number of block indices in cc[]
7461    cc - the block indices (must be large enough to contain the indices)
7462 */
7463 static inline PetscErrorCode MatCollapseRow(Mat Amat, PetscInt row, PetscInt bs, PetscInt *n, PetscInt *cc) {
7464   PetscInt        cnt = -1, nidx, j;
7465   const PetscInt *idx;
7466 
7467   PetscFunctionBegin;
7468   PetscCall(MatGetRow(Amat, row, &nidx, &idx, NULL));
7469   if (nidx) {
7470     cnt     = 0;
7471     cc[cnt] = idx[0] / bs;
7472     for (j = 1; j < nidx; j++) {
7473       if (cc[cnt] < idx[j] / bs) cc[++cnt] = idx[j] / bs;
7474     }
7475   }
7476   PetscCall(MatRestoreRow(Amat, row, &nidx, &idx, NULL));
7477   *n = cnt + 1;
7478   PetscFunctionReturn(0);
7479 }
7480 
7481 /*
7482     Produces a set of block column indices of the matrix block row, one for each block represented in the original set of rows
7483 
7484     ncollapsed - the number of block indices
7485     collapsed - the block indices (must be large enough to contain the indices)
7486 */
7487 static inline PetscErrorCode MatCollapseRows(Mat Amat, PetscInt start, PetscInt bs, PetscInt *w0, PetscInt *w1, PetscInt *w2, PetscInt *ncollapsed, PetscInt **collapsed) {
7488   PetscInt i, nprev, *cprev = w0, ncur = 0, *ccur = w1, *merged = w2, *cprevtmp;
7489 
7490   PetscFunctionBegin;
7491   PetscCall(MatCollapseRow(Amat, start, bs, &nprev, cprev));
7492   for (i = start + 1; i < start + bs; i++) {
7493     PetscCall(MatCollapseRow(Amat, i, bs, &ncur, ccur));
7494     PetscCall(PetscMergeIntArray(nprev, cprev, ncur, ccur, &nprev, &merged));
7495     cprevtmp = cprev;
7496     cprev    = merged;
7497     merged   = cprevtmp;
7498   }
7499   *ncollapsed = nprev;
7500   if (collapsed) *collapsed = cprev;
7501   PetscFunctionReturn(0);
7502 }
7503 
7504 /* -------------------------------------------------------------------------- */
7505 /*
7506  MatCreateGraph_Simple_AIJ - create simple scalar matrix (graph) from potentially blocked matrix
7507 
7508  Input Parameter:
7509  . Amat - matrix
7510  - symmetrize - make the result symmetric
7511  + scale - scale with diagonal
7512 
7513  Output Parameter:
7514  . a_Gmat - output scalar graph >= 0
7515 
7516  */
7517 PETSC_INTERN PetscErrorCode MatCreateGraph_Simple_AIJ(Mat Amat, PetscBool symmetrize, PetscBool scale, Mat *a_Gmat) {
7518   PetscInt  Istart, Iend, Ii, jj, kk, ncols, nloc, NN, MM, bs;
7519   MPI_Comm  comm;
7520   Mat       Gmat;
7521   PetscBool ismpiaij, isseqaij;
7522   Mat       a, b, c;
7523   MatType   jtype;
7524 
7525   PetscFunctionBegin;
7526   PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm));
7527   PetscCall(MatGetOwnershipRange(Amat, &Istart, &Iend));
7528   PetscCall(MatGetSize(Amat, &MM, &NN));
7529   PetscCall(MatGetBlockSize(Amat, &bs));
7530   nloc = (Iend - Istart) / bs;
7531 
7532   PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATSEQAIJ, &isseqaij));
7533   PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATMPIAIJ, &ismpiaij));
7534   PetscCheck(isseqaij || ismpiaij, comm, PETSC_ERR_USER, "Require (MPI)AIJ matrix type");
7535 
7536   /* TODO GPU: these calls are potentially expensive if matrices are large and we want to use the GPU */
7537   /* A solution consists in providing a new API, MatAIJGetCollapsedAIJ, and each class can provide a fast
7538      implementation */
7539   if (bs > 1) {
7540     PetscCall(MatGetType(Amat, &jtype));
7541     PetscCall(MatCreate(comm, &Gmat));
7542     PetscCall(MatSetType(Gmat, jtype));
7543     PetscCall(MatSetSizes(Gmat, nloc, nloc, PETSC_DETERMINE, PETSC_DETERMINE));
7544     PetscCall(MatSetBlockSizes(Gmat, 1, 1));
7545     if (isseqaij || ((Mat_MPIAIJ *)Amat->data)->garray) {
7546       PetscInt  *d_nnz, *o_nnz;
7547       MatScalar *aa, val, AA[4096];
7548       PetscInt  *aj, *ai, AJ[4096], nc;
7549       if (isseqaij) {
7550         a = Amat;
7551         b = NULL;
7552       } else {
7553         Mat_MPIAIJ *d = (Mat_MPIAIJ *)Amat->data;
7554         a             = d->A;
7555         b             = d->B;
7556       }
7557       PetscCall(PetscInfo(Amat, "New bs>1 Graph. nloc=%" PetscInt_FMT "\n", nloc));
7558       PetscCall(PetscMalloc2(nloc, &d_nnz, isseqaij ? 0 : nloc, &o_nnz));
7559       for (c = a, kk = 0; c && kk < 2; c = b, kk++) {
7560         PetscInt       *nnz = (c == a) ? d_nnz : o_nnz, nmax = 0;
7561         const PetscInt *cols;
7562         for (PetscInt brow = 0, jj, ok = 1, j0; brow < nloc * bs; brow += bs) { // block rows
7563           PetscCall(MatGetRow(c, brow, &jj, &cols, NULL));
7564           nnz[brow / bs] = jj / bs;
7565           if (jj % bs) ok = 0;
7566           if (cols) j0 = cols[0];
7567           else j0 = -1;
7568           PetscCall(MatRestoreRow(c, brow, &jj, &cols, NULL));
7569           if (nnz[brow / bs] > nmax) nmax = nnz[brow / bs];
7570           for (PetscInt ii = 1; ii < bs && nnz[brow / bs]; ii++) { // check for non-dense blocks
7571             PetscCall(MatGetRow(c, brow + ii, &jj, &cols, NULL));
7572             if (jj % bs) ok = 0;
7573             if ((cols && j0 != cols[0]) || (!cols && j0 != -1)) ok = 0;
7574             if (nnz[brow / bs] != jj / bs) ok = 0;
7575             PetscCall(MatRestoreRow(c, brow + ii, &jj, &cols, NULL));
7576           }
7577           if (!ok) {
7578             PetscCall(PetscFree2(d_nnz, o_nnz));
7579             goto old_bs;
7580           }
7581         }
7582         PetscCheck(nmax < 4096, PETSC_COMM_SELF, PETSC_ERR_USER, "Buffer %" PetscInt_FMT " too small 4096.", nmax);
7583       }
7584       PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz));
7585       PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz));
7586       PetscCall(PetscFree2(d_nnz, o_nnz));
7587       // diag
7588       for (PetscInt brow = 0, n, grow; brow < nloc * bs; brow += bs) { // block rows
7589         Mat_SeqAIJ *aseq = (Mat_SeqAIJ *)a->data;
7590         ai               = aseq->i;
7591         n                = ai[brow + 1] - ai[brow];
7592         aj               = aseq->j + ai[brow];
7593         for (int k = 0; k < n; k += bs) {        // block columns
7594           AJ[k / bs] = aj[k] / bs + Istart / bs; // diag starts at (Istart,Istart)
7595           val        = 0;
7596           for (int ii = 0; ii < bs; ii++) { // rows in block
7597             aa = aseq->a + ai[brow + ii] + k;
7598             for (int jj = 0; jj < bs; jj++) {         // columns in block
7599               val += PetscAbs(PetscRealPart(aa[jj])); // a sort of norm
7600             }
7601           }
7602           AA[k / bs] = val;
7603         }
7604         grow = Istart / bs + brow / bs;
7605         PetscCall(MatSetValues(Gmat, 1, &grow, n / bs, AJ, AA, INSERT_VALUES));
7606       }
7607       // off-diag
7608       if (ismpiaij) {
7609         Mat_MPIAIJ        *aij = (Mat_MPIAIJ *)Amat->data;
7610         const PetscScalar *vals;
7611         const PetscInt    *cols, *garray = aij->garray;
7612         PetscCheck(garray, PETSC_COMM_SELF, PETSC_ERR_USER, "No garray ?");
7613         for (PetscInt brow = 0, grow; brow < nloc * bs; brow += bs) { // block rows
7614           PetscCall(MatGetRow(b, brow, &ncols, &cols, NULL));
7615           for (int k = 0, cidx = 0; k < ncols; k += bs, cidx++) {
7616             AA[k / bs] = 0;
7617             AJ[cidx]   = garray[cols[k]] / bs;
7618           }
7619           nc = ncols / bs;
7620           PetscCall(MatRestoreRow(b, brow, &ncols, &cols, NULL));
7621           for (int ii = 0; ii < bs; ii++) { // rows in block
7622             PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals));
7623             for (int k = 0; k < ncols; k += bs) {
7624               for (int jj = 0; jj < bs; jj++) { // cols in block
7625                 AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj]));
7626               }
7627             }
7628             PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals));
7629           }
7630           grow = Istart / bs + brow / bs;
7631           PetscCall(MatSetValues(Gmat, 1, &grow, nc, AJ, AA, INSERT_VALUES));
7632         }
7633       }
7634       PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY));
7635       PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY));
7636     } else {
7637       const PetscScalar *vals;
7638       const PetscInt    *idx;
7639       PetscInt          *d_nnz, *o_nnz, *w0, *w1, *w2;
7640     old_bs:
7641       /*
7642        Determine the preallocation needed for the scalar matrix derived from the vector matrix.
7643        */
7644       PetscCall(PetscInfo(Amat, "OLD bs>1 CreateGraph\n"));
7645       PetscCall(PetscMalloc2(nloc, &d_nnz, isseqaij ? 0 : nloc, &o_nnz));
7646       if (isseqaij) {
7647         PetscInt max_d_nnz;
7648         /*
7649          Determine exact preallocation count for (sequential) scalar matrix
7650          */
7651         PetscCall(MatSeqAIJGetMaxRowNonzeros(Amat, &max_d_nnz));
7652         max_d_nnz = PetscMin(nloc, bs * max_d_nnz);
7653         PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2));
7654         for (Ii = 0, jj = 0; Ii < Iend; Ii += bs, jj++) { PetscCall(MatCollapseRows(Amat, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL)); }
7655         PetscCall(PetscFree3(w0, w1, w2));
7656       } else if (ismpiaij) {
7657         Mat             Daij, Oaij;
7658         const PetscInt *garray;
7659         PetscInt        max_d_nnz;
7660         PetscCall(MatMPIAIJGetSeqAIJ(Amat, &Daij, &Oaij, &garray));
7661         /*
7662          Determine exact preallocation count for diagonal block portion of scalar matrix
7663          */
7664         PetscCall(MatSeqAIJGetMaxRowNonzeros(Daij, &max_d_nnz));
7665         max_d_nnz = PetscMin(nloc, bs * max_d_nnz);
7666         PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2));
7667         for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) { PetscCall(MatCollapseRows(Daij, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL)); }
7668         PetscCall(PetscFree3(w0, w1, w2));
7669         /*
7670          Over estimate (usually grossly over), preallocation count for off-diagonal portion of scalar matrix
7671          */
7672         for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) {
7673           o_nnz[jj] = 0;
7674           for (kk = 0; kk < bs; kk++) { /* rows that get collapsed to a single row */
7675             PetscCall(MatGetRow(Oaij, Ii + kk, &ncols, NULL, NULL));
7676             o_nnz[jj] += ncols;
7677             PetscCall(MatRestoreRow(Oaij, Ii + kk, &ncols, NULL, NULL));
7678           }
7679           if (o_nnz[jj] > (NN / bs - nloc)) o_nnz[jj] = NN / bs - nloc;
7680         }
7681       } else SETERRQ(comm, PETSC_ERR_USER, "Require AIJ matrix type");
7682       /* get scalar copy (norms) of matrix */
7683       PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz));
7684       PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz));
7685       PetscCall(PetscFree2(d_nnz, o_nnz));
7686       for (Ii = Istart; Ii < Iend; Ii++) {
7687         PetscInt dest_row = Ii / bs;
7688         PetscCall(MatGetRow(Amat, Ii, &ncols, &idx, &vals));
7689         for (jj = 0; jj < ncols; jj++) {
7690           PetscInt    dest_col = idx[jj] / bs;
7691           PetscScalar sv       = PetscAbs(PetscRealPart(vals[jj]));
7692           PetscCall(MatSetValues(Gmat, 1, &dest_row, 1, &dest_col, &sv, ADD_VALUES));
7693         }
7694         PetscCall(MatRestoreRow(Amat, Ii, &ncols, &idx, &vals));
7695       }
7696       PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY));
7697       PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY));
7698     }
7699   } else {
7700     /* TODO GPU: optimization proposal, each class provides fast implementation of this
7701      procedure via MatAbs API */
7702     /* just copy scalar matrix & abs() */
7703     PetscCall(MatDuplicate(Amat, MAT_COPY_VALUES, &Gmat));
7704     if (isseqaij) {
7705       a = Gmat;
7706       b = NULL;
7707     } else {
7708       Mat_MPIAIJ *d = (Mat_MPIAIJ *)Gmat->data;
7709       a             = d->A;
7710       b             = d->B;
7711     }
7712     /* abs */
7713     for (c = a, kk = 0; c && kk < 2; c = b, kk++) {
7714       MatInfo      info;
7715       PetscScalar *avals;
7716       PetscCall(MatGetInfo(c, MAT_LOCAL, &info));
7717       PetscCall(MatSeqAIJGetArray(c, &avals));
7718       for (int jj = 0; jj < info.nz_used; jj++) avals[jj] = PetscAbsScalar(avals[jj]);
7719       PetscCall(MatSeqAIJRestoreArray(c, &avals));
7720     }
7721   }
7722   if (symmetrize) {
7723     PetscBool isset, issym;
7724     PetscCall(MatIsSymmetricKnown(Amat, &isset, &issym));
7725     if (!isset || !issym) {
7726       Mat matTrans;
7727       PetscCall(MatTranspose(Gmat, MAT_INITIAL_MATRIX, &matTrans));
7728       PetscCall(MatAXPY(Gmat, 1.0, matTrans, Gmat->structurally_symmetric ? SAME_NONZERO_PATTERN : DIFFERENT_NONZERO_PATTERN));
7729       PetscCall(MatDestroy(&matTrans));
7730     }
7731     PetscCall(MatSetOption(Gmat, MAT_SYMMETRIC, PETSC_TRUE));
7732   } else {
7733     PetscCall(MatPropagateSymmetryOptions(Amat, Gmat));
7734   }
7735   if (scale) {
7736     /* scale c for all diagonal values = 1 or -1 */
7737     Vec diag;
7738     PetscCall(MatCreateVecs(Gmat, &diag, NULL));
7739     PetscCall(MatGetDiagonal(Gmat, diag));
7740     PetscCall(VecReciprocal(diag));
7741     PetscCall(VecSqrtAbs(diag));
7742     PetscCall(MatDiagonalScale(Gmat, diag, diag));
7743     PetscCall(VecDestroy(&diag));
7744   }
7745   PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_graph_view"));
7746   *a_Gmat = Gmat;
7747   PetscFunctionReturn(0);
7748 }
7749 
7750 PETSC_INTERN PetscErrorCode MatFilter_AIJ(Mat Gmat, PetscReal vfilter, Mat *filteredG) {
7751   PetscInt           Istart, Iend, ncols, nnz0, nnz1, NN, MM, nloc;
7752   Mat                tGmat;
7753   MPI_Comm           comm;
7754   const PetscScalar *vals;
7755   const PetscInt    *idx;
7756   PetscInt          *d_nnz, *o_nnz, kk, *garray = NULL, *AJ, maxcols = 0;
7757   MatScalar         *AA; // this is checked in graph
7758   PetscBool          isseqaij;
7759   Mat                a, b, c;
7760   MatType            jtype;
7761 
7762   PetscFunctionBegin;
7763   PetscCall(PetscObjectGetComm((PetscObject)Gmat, &comm));
7764   PetscCall(PetscObjectBaseTypeCompare((PetscObject)Gmat, MATSEQAIJ, &isseqaij));
7765   PetscCall(MatGetType(Gmat, &jtype));
7766   PetscCall(MatCreate(comm, &tGmat));
7767   PetscCall(MatSetType(tGmat, jtype));
7768 
7769   /* TODO GPU: this can be called when filter = 0 -> Probably provide MatAIJThresholdCompress that compresses the entries below a threshold?
7770                Also, if the matrix is symmetric, can we skip this
7771                operation? It can be very expensive on large matrices. */
7772 
7773   // global sizes
7774   PetscCall(MatGetSize(Gmat, &MM, &NN));
7775   PetscCall(MatGetOwnershipRange(Gmat, &Istart, &Iend));
7776   nloc = Iend - Istart;
7777   PetscCall(PetscMalloc2(nloc, &d_nnz, nloc, &o_nnz));
7778   if (isseqaij) {
7779     a = Gmat;
7780     b = NULL;
7781   } else {
7782     Mat_MPIAIJ *d = (Mat_MPIAIJ *)Gmat->data;
7783     a             = d->A;
7784     b             = d->B;
7785     garray        = d->garray;
7786   }
7787   /* Determine upper bound on non-zeros needed in new filtered matrix */
7788   for (PetscInt row = 0; row < nloc; row++) {
7789     PetscCall(MatGetRow(a, row, &ncols, NULL, NULL));
7790     d_nnz[row] = ncols;
7791     if (ncols > maxcols) maxcols = ncols;
7792     PetscCall(MatRestoreRow(a, row, &ncols, NULL, NULL));
7793   }
7794   if (b) {
7795     for (PetscInt row = 0; row < nloc; row++) {
7796       PetscCall(MatGetRow(b, row, &ncols, NULL, NULL));
7797       o_nnz[row] = ncols;
7798       if (ncols > maxcols) maxcols = ncols;
7799       PetscCall(MatRestoreRow(b, row, &ncols, NULL, NULL));
7800     }
7801   }
7802   PetscCall(MatSetSizes(tGmat, nloc, nloc, MM, MM));
7803   PetscCall(MatSetBlockSizes(tGmat, 1, 1));
7804   PetscCall(MatSeqAIJSetPreallocation(tGmat, 0, d_nnz));
7805   PetscCall(MatMPIAIJSetPreallocation(tGmat, 0, d_nnz, 0, o_nnz));
7806   PetscCall(MatSetOption(tGmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
7807   PetscCall(PetscFree2(d_nnz, o_nnz));
7808   //
7809   PetscCall(PetscMalloc2(maxcols, &AA, maxcols, &AJ));
7810   nnz0 = nnz1 = 0;
7811   for (c = a, kk = 0; c && kk < 2; c = b, kk++) {
7812     for (PetscInt row = 0, grow = Istart, ncol_row, jj; row < nloc; row++, grow++) {
7813       PetscCall(MatGetRow(c, row, &ncols, &idx, &vals));
7814       for (ncol_row = jj = 0; jj < ncols; jj++, nnz0++) {
7815         PetscScalar sv = PetscAbs(PetscRealPart(vals[jj]));
7816         if (PetscRealPart(sv) > vfilter) {
7817           nnz1++;
7818           PetscInt cid = idx[jj] + Istart; //diag
7819           if (c != a) cid = garray[idx[jj]];
7820           AA[ncol_row] = vals[jj];
7821           AJ[ncol_row] = cid;
7822           ncol_row++;
7823         }
7824       }
7825       PetscCall(MatRestoreRow(c, row, &ncols, &idx, &vals));
7826       PetscCall(MatSetValues(tGmat, 1, &grow, ncol_row, AJ, AA, INSERT_VALUES));
7827     }
7828   }
7829   PetscCall(PetscFree2(AA, AJ));
7830   PetscCall(MatAssemblyBegin(tGmat, MAT_FINAL_ASSEMBLY));
7831   PetscCall(MatAssemblyEnd(tGmat, MAT_FINAL_ASSEMBLY));
7832   PetscCall(MatPropagateSymmetryOptions(Gmat, tGmat)); /* Normal Mat options are not relevant ? */
7833 
7834   PetscCall(PetscInfo(tGmat, "\t %g%% nnz after filtering, with threshold %g, %g nnz ave. (N=%" PetscInt_FMT ", max row size %d)\n", (!nnz0) ? 1. : 100. * (double)nnz1 / (double)nnz0, (double)vfilter, (!nloc) ? 1. : (double)nnz0 / (double)nloc, MM, (int)maxcols));
7835 
7836   *filteredG = tGmat;
7837   PetscCall(MatViewFromOptions(tGmat, NULL, "-mat_filter_graph_view"));
7838   PetscFunctionReturn(0);
7839 }
7840 
7841 /*
7842     Special version for direct calls from Fortran
7843 */
7844 #include <petsc/private/fortranimpl.h>
7845 
7846 /* Change these macros so can be used in void function */
7847 /* Identical to PetscCallVoid, except it assigns to *_ierr */
7848 #undef PetscCall
7849 #define PetscCall(...) \
7850   do { \
7851     PetscErrorCode ierr_msv_mpiaij = __VA_ARGS__; \
7852     if (PetscUnlikely(ierr_msv_mpiaij)) { \
7853       *_ierr = PetscError(PETSC_COMM_SELF, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr_msv_mpiaij, PETSC_ERROR_REPEAT, " "); \
7854       return; \
7855     } \
7856   } while (0)
7857 
7858 #undef SETERRQ
7859 #define SETERRQ(comm, ierr, ...) \
7860   do { \
7861     *_ierr = PetscError(comm, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr, PETSC_ERROR_INITIAL, __VA_ARGS__); \
7862     return; \
7863   } while (0)
7864 
7865 #if defined(PETSC_HAVE_FORTRAN_CAPS)
7866 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
7867 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
7868 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
7869 #else
7870 #endif
7871 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat, PetscInt *mm, const PetscInt im[], PetscInt *mn, const PetscInt in[], const PetscScalar v[], InsertMode *maddv, PetscErrorCode *_ierr) {
7872   Mat         mat = *mmat;
7873   PetscInt    m = *mm, n = *mn;
7874   InsertMode  addv = *maddv;
7875   Mat_MPIAIJ *aij  = (Mat_MPIAIJ *)mat->data;
7876   PetscScalar value;
7877 
7878   MatCheckPreallocated(mat, 1);
7879   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
7880   else PetscCheck(mat->insertmode == addv, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot mix add values and insert values");
7881   {
7882     PetscInt  i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend;
7883     PetscInt  cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col;
7884     PetscBool roworiented = aij->roworiented;
7885 
7886     /* Some Variables required in the macro */
7887     Mat                    A     = aij->A;
7888     Mat_SeqAIJ            *a     = (Mat_SeqAIJ *)A->data;
7889     PetscInt              *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j;
7890     MatScalar             *aa;
7891     PetscBool              ignorezeroentries = (((a->ignorezeroentries) && (addv == ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
7892     Mat                    B                 = aij->B;
7893     Mat_SeqAIJ            *b                 = (Mat_SeqAIJ *)B->data;
7894     PetscInt              *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n;
7895     MatScalar             *ba;
7896     /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we
7897      * cannot use "#if defined" inside a macro. */
7898     PETSC_UNUSED PetscBool inserted = PETSC_FALSE;
7899 
7900     PetscInt  *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2;
7901     PetscInt   nonew = a->nonew;
7902     MatScalar *ap1, *ap2;
7903 
7904     PetscFunctionBegin;
7905     PetscCall(MatSeqAIJGetArray(A, &aa));
7906     PetscCall(MatSeqAIJGetArray(B, &ba));
7907     for (i = 0; i < m; i++) {
7908       if (im[i] < 0) continue;
7909       PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
7910       if (im[i] >= rstart && im[i] < rend) {
7911         row      = im[i] - rstart;
7912         lastcol1 = -1;
7913         rp1      = aj + ai[row];
7914         ap1      = aa + ai[row];
7915         rmax1    = aimax[row];
7916         nrow1    = ailen[row];
7917         low1     = 0;
7918         high1    = nrow1;
7919         lastcol2 = -1;
7920         rp2      = bj + bi[row];
7921         ap2      = ba + bi[row];
7922         rmax2    = bimax[row];
7923         nrow2    = bilen[row];
7924         low2     = 0;
7925         high2    = nrow2;
7926 
7927         for (j = 0; j < n; j++) {
7928           if (roworiented) value = v[i * n + j];
7929           else value = v[i + j * m];
7930           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
7931           if (in[j] >= cstart && in[j] < cend) {
7932             col = in[j] - cstart;
7933             MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]);
7934           } else if (in[j] < 0) continue;
7935           else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) {
7936             /* extra brace on SETERRQ() is required for --with-errorchecking=0 - due to the next 'else' clause */
7937             SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1);
7938           } else {
7939             if (mat->was_assembled) {
7940               if (!aij->colmap) { PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); }
7941 #if defined(PETSC_USE_CTABLE)
7942               PetscCall(PetscTableFind(aij->colmap, in[j] + 1, &col));
7943               col--;
7944 #else
7945               col = aij->colmap[in[j]] - 1;
7946 #endif
7947               if (col < 0 && !((Mat_SeqAIJ *)(aij->A->data))->nonew) {
7948                 PetscCall(MatDisAssemble_MPIAIJ(mat));
7949                 col      = in[j];
7950                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
7951                 B        = aij->B;
7952                 b        = (Mat_SeqAIJ *)B->data;
7953                 bimax    = b->imax;
7954                 bi       = b->i;
7955                 bilen    = b->ilen;
7956                 bj       = b->j;
7957                 rp2      = bj + bi[row];
7958                 ap2      = ba + bi[row];
7959                 rmax2    = bimax[row];
7960                 nrow2    = bilen[row];
7961                 low2     = 0;
7962                 high2    = nrow2;
7963                 bm       = aij->B->rmap->n;
7964                 ba       = b->a;
7965                 inserted = PETSC_FALSE;
7966               }
7967             } else col = in[j];
7968             MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]);
7969           }
7970         }
7971       } else if (!aij->donotstash) {
7972         if (roworiented) {
7973           PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
7974         } else {
7975           PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
7976         }
7977       }
7978     }
7979     PetscCall(MatSeqAIJRestoreArray(A, &aa));
7980     PetscCall(MatSeqAIJRestoreArray(B, &ba));
7981   }
7982   PetscFunctionReturnVoid();
7983 }
7984 
7985 /* Undefining these here since they were redefined from their original definition above! No
7986  * other PETSc functions should be defined past this point, as it is impossible to recover the
7987  * original definitions */
7988 #undef PetscCall
7989 #undef SETERRQ
7990