xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision a1cb98fac0cdf0eb4d3e8a0c8b58f3fe8f800bc6)
1 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/
2 #include <petsc/private/vecimpl.h>
3 #include <petsc/private/sfimpl.h>
4 #include <petsc/private/isimpl.h>
5 #include <petscblaslapack.h>
6 #include <petscsf.h>
7 #include <petsc/private/hashmapi.h>
8 
9 PetscErrorCode MatGetRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done)
10 {
11   Mat B;
12 
13   PetscFunctionBegin;
14   PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &B));
15   PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject)B));
16   PetscCall(MatGetRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done));
17   PetscCall(MatDestroy(&B));
18   PetscFunctionReturn(0);
19 }
20 
21 PetscErrorCode MatRestoreRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done)
22 {
23   Mat B;
24 
25   PetscFunctionBegin;
26   PetscCall(PetscObjectQuery((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject *)&B));
27   PetscCall(MatRestoreRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done));
28   PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", NULL));
29   PetscFunctionReturn(0);
30 }
31 
32 /*MC
33    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
34 
35    This matrix type is identical to` MATSEQAIJ` when constructed with a single process communicator,
36    and `MATMPIAIJ` otherwise.  As a result, for single process communicators,
37   `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported
38   for communicators controlling multiple processes.  It is recommended that you call both of
39   the above preallocation routines for simplicity.
40 
41    Options Database Keys:
42 . -mat_type aij - sets the matrix type to `MATAIJ` during a call to `MatSetFromOptions()`
43 
44   Developer Note:
45     Subclasses include `MATAIJCUSPARSE`, `MATAIJPERM`, `MATAIJSELL`, `MATAIJMKL`, `MATAIJCRL`, `MATAIJKOKKOS`,and also automatically switches over to use inodes when
46    enough exist.
47 
48   Level: beginner
49 
50 .seealso: `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateAIJ()`, `MatCreateSeqAIJ()`, `MATSEQAIJ`, `MATMPIAIJ`
51 M*/
52 
53 /*MC
54    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
55 
56    This matrix type is identical to `MATSEQAIJCRL` when constructed with a single process communicator,
57    and `MATMPIAIJCRL` otherwise.  As a result, for single process communicators,
58    `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported
59   for communicators controlling multiple processes.  It is recommended that you call both of
60   the above preallocation routines for simplicity.
61 
62    Options Database Keys:
63 . -mat_type aijcrl - sets the matrix type to `MATMPIAIJCRL` during a call to `MatSetFromOptions()`
64 
65   Level: beginner
66 
67 .seealso: `MatCreateMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`
68 M*/
69 
70 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A, PetscBool flg)
71 {
72   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
73 
74   PetscFunctionBegin;
75 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_VIENNACL)
76   A->boundtocpu = flg;
77 #endif
78   if (a->A) PetscCall(MatBindToCPU(a->A, flg));
79   if (a->B) PetscCall(MatBindToCPU(a->B, flg));
80 
81   /* In addition to binding the diagonal and off-diagonal matrices, bind the local vectors used for matrix-vector products.
82    * This maybe seems a little odd for a MatBindToCPU() call to do, but it makes no sense for the binding of these vectors
83    * to differ from the parent matrix. */
84   if (a->lvec) PetscCall(VecBindToCPU(a->lvec, flg));
85   if (a->diag) PetscCall(VecBindToCPU(a->diag, flg));
86 
87   PetscFunctionReturn(0);
88 }
89 
90 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs)
91 {
92   Mat_MPIAIJ *mat = (Mat_MPIAIJ *)M->data;
93 
94   PetscFunctionBegin;
95   if (mat->A) {
96     PetscCall(MatSetBlockSizes(mat->A, rbs, cbs));
97     PetscCall(MatSetBlockSizes(mat->B, rbs, 1));
98   }
99   PetscFunctionReturn(0);
100 }
101 
102 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M, IS *keptrows)
103 {
104   Mat_MPIAIJ      *mat = (Mat_MPIAIJ *)M->data;
105   Mat_SeqAIJ      *a   = (Mat_SeqAIJ *)mat->A->data;
106   Mat_SeqAIJ      *b   = (Mat_SeqAIJ *)mat->B->data;
107   const PetscInt  *ia, *ib;
108   const MatScalar *aa, *bb, *aav, *bav;
109   PetscInt         na, nb, i, j, *rows, cnt = 0, n0rows;
110   PetscInt         m = M->rmap->n, rstart = M->rmap->rstart;
111 
112   PetscFunctionBegin;
113   *keptrows = NULL;
114 
115   ia = a->i;
116   ib = b->i;
117   PetscCall(MatSeqAIJGetArrayRead(mat->A, &aav));
118   PetscCall(MatSeqAIJGetArrayRead(mat->B, &bav));
119   for (i = 0; i < m; i++) {
120     na = ia[i + 1] - ia[i];
121     nb = ib[i + 1] - ib[i];
122     if (!na && !nb) {
123       cnt++;
124       goto ok1;
125     }
126     aa = aav + ia[i];
127     for (j = 0; j < na; j++) {
128       if (aa[j] != 0.0) goto ok1;
129     }
130     bb = bav + ib[i];
131     for (j = 0; j < nb; j++) {
132       if (bb[j] != 0.0) goto ok1;
133     }
134     cnt++;
135   ok1:;
136   }
137   PetscCall(MPIU_Allreduce(&cnt, &n0rows, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)M)));
138   if (!n0rows) {
139     PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav));
140     PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav));
141     PetscFunctionReturn(0);
142   }
143   PetscCall(PetscMalloc1(M->rmap->n - cnt, &rows));
144   cnt = 0;
145   for (i = 0; i < m; i++) {
146     na = ia[i + 1] - ia[i];
147     nb = ib[i + 1] - ib[i];
148     if (!na && !nb) continue;
149     aa = aav + ia[i];
150     for (j = 0; j < na; j++) {
151       if (aa[j] != 0.0) {
152         rows[cnt++] = rstart + i;
153         goto ok2;
154       }
155     }
156     bb = bav + ib[i];
157     for (j = 0; j < nb; j++) {
158       if (bb[j] != 0.0) {
159         rows[cnt++] = rstart + i;
160         goto ok2;
161       }
162     }
163   ok2:;
164   }
165   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), cnt, rows, PETSC_OWN_POINTER, keptrows));
166   PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav));
167   PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav));
168   PetscFunctionReturn(0);
169 }
170 
171 PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y, Vec D, InsertMode is)
172 {
173   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)Y->data;
174   PetscBool   cong;
175 
176   PetscFunctionBegin;
177   PetscCall(MatHasCongruentLayouts(Y, &cong));
178   if (Y->assembled && cong) {
179     PetscCall(MatDiagonalSet(aij->A, D, is));
180   } else {
181     PetscCall(MatDiagonalSet_Default(Y, D, is));
182   }
183   PetscFunctionReturn(0);
184 }
185 
186 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M, IS *zrows)
187 {
188   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)M->data;
189   PetscInt    i, rstart, nrows, *rows;
190 
191   PetscFunctionBegin;
192   *zrows = NULL;
193   PetscCall(MatFindZeroDiagonals_SeqAIJ_Private(aij->A, &nrows, &rows));
194   PetscCall(MatGetOwnershipRange(M, &rstart, NULL));
195   for (i = 0; i < nrows; i++) rows[i] += rstart;
196   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), nrows, rows, PETSC_OWN_POINTER, zrows));
197   PetscFunctionReturn(0);
198 }
199 
200 PetscErrorCode MatGetColumnReductions_MPIAIJ(Mat A, PetscInt type, PetscReal *reductions)
201 {
202   Mat_MPIAIJ        *aij = (Mat_MPIAIJ *)A->data;
203   PetscInt           i, m, n, *garray = aij->garray;
204   Mat_SeqAIJ        *a_aij = (Mat_SeqAIJ *)aij->A->data;
205   Mat_SeqAIJ        *b_aij = (Mat_SeqAIJ *)aij->B->data;
206   PetscReal         *work;
207   const PetscScalar *dummy;
208 
209   PetscFunctionBegin;
210   PetscCall(MatGetSize(A, &m, &n));
211   PetscCall(PetscCalloc1(n, &work));
212   PetscCall(MatSeqAIJGetArrayRead(aij->A, &dummy));
213   PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &dummy));
214   PetscCall(MatSeqAIJGetArrayRead(aij->B, &dummy));
215   PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &dummy));
216   if (type == NORM_2) {
217     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i] * a_aij->a[i]);
218     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i] * b_aij->a[i]);
219   } else if (type == NORM_1) {
220     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
221     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
222   } else if (type == NORM_INFINITY) {
223     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
224     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]), work[garray[b_aij->j[i]]]);
225   } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) {
226     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscRealPart(a_aij->a[i]);
227     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscRealPart(b_aij->a[i]);
228   } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) {
229     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscImaginaryPart(a_aij->a[i]);
230     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscImaginaryPart(b_aij->a[i]);
231   } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type");
232   if (type == NORM_INFINITY) {
233     PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A)));
234   } else {
235     PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A)));
236   }
237   PetscCall(PetscFree(work));
238   if (type == NORM_2) {
239     for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
240   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
241     for (i = 0; i < n; i++) reductions[i] /= m;
242   }
243   PetscFunctionReturn(0);
244 }
245 
246 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A, IS *is)
247 {
248   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
249   IS              sis, gis;
250   const PetscInt *isis, *igis;
251   PetscInt        n, *iis, nsis, ngis, rstart, i;
252 
253   PetscFunctionBegin;
254   PetscCall(MatFindOffBlockDiagonalEntries(a->A, &sis));
255   PetscCall(MatFindNonzeroRows(a->B, &gis));
256   PetscCall(ISGetSize(gis, &ngis));
257   PetscCall(ISGetSize(sis, &nsis));
258   PetscCall(ISGetIndices(sis, &isis));
259   PetscCall(ISGetIndices(gis, &igis));
260 
261   PetscCall(PetscMalloc1(ngis + nsis, &iis));
262   PetscCall(PetscArraycpy(iis, igis, ngis));
263   PetscCall(PetscArraycpy(iis + ngis, isis, nsis));
264   n = ngis + nsis;
265   PetscCall(PetscSortRemoveDupsInt(&n, iis));
266   PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
267   for (i = 0; i < n; i++) iis[i] += rstart;
268   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)A), n, iis, PETSC_OWN_POINTER, is));
269 
270   PetscCall(ISRestoreIndices(sis, &isis));
271   PetscCall(ISRestoreIndices(gis, &igis));
272   PetscCall(ISDestroy(&sis));
273   PetscCall(ISDestroy(&gis));
274   PetscFunctionReturn(0);
275 }
276 
277 /*
278   Local utility routine that creates a mapping from the global column
279 number to the local number in the off-diagonal part of the local
280 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
281 a slightly higher hash table cost; without it it is not scalable (each processor
282 has an order N integer array but is fast to access.
283 */
284 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
285 {
286   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
287   PetscInt    n   = aij->B->cmap->n, i;
288 
289   PetscFunctionBegin;
290   PetscCheck(!n || aij->garray, PETSC_COMM_SELF, PETSC_ERR_PLIB, "MPIAIJ Matrix was assembled but is missing garray");
291 #if defined(PETSC_USE_CTABLE)
292   PetscCall(PetscTableCreate(n, mat->cmap->N + 1, &aij->colmap));
293   for (i = 0; i < n; i++) PetscCall(PetscTableAdd(aij->colmap, aij->garray[i] + 1, i + 1, INSERT_VALUES));
294 #else
295   PetscCall(PetscCalloc1(mat->cmap->N + 1, &aij->colmap));
296   for (i = 0; i < n; i++) aij->colmap[aij->garray[i]] = i + 1;
297 #endif
298   PetscFunctionReturn(0);
299 }
300 
301 #define MatSetValues_SeqAIJ_A_Private(row, col, value, addv, orow, ocol) \
302   { \
303     if (col <= lastcol1) low1 = 0; \
304     else high1 = nrow1; \
305     lastcol1 = col; \
306     while (high1 - low1 > 5) { \
307       t = (low1 + high1) / 2; \
308       if (rp1[t] > col) high1 = t; \
309       else low1 = t; \
310     } \
311     for (_i = low1; _i < high1; _i++) { \
312       if (rp1[_i] > col) break; \
313       if (rp1[_i] == col) { \
314         if (addv == ADD_VALUES) { \
315           ap1[_i] += value; \
316           /* Not sure LogFlops will slow dow the code or not */ \
317           (void)PetscLogFlops(1.0); \
318         } else ap1[_i] = value; \
319         goto a_noinsert; \
320       } \
321     } \
322     if (value == 0.0 && ignorezeroentries && row != col) { \
323       low1  = 0; \
324       high1 = nrow1; \
325       goto a_noinsert; \
326     } \
327     if (nonew == 1) { \
328       low1  = 0; \
329       high1 = nrow1; \
330       goto a_noinsert; \
331     } \
332     PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
333     MatSeqXAIJReallocateAIJ(A, am, 1, nrow1, row, col, rmax1, aa, ai, aj, rp1, ap1, aimax, nonew, MatScalar); \
334     N = nrow1++ - 1; \
335     a->nz++; \
336     high1++; \
337     /* shift up all the later entries in this row */ \
338     PetscCall(PetscArraymove(rp1 + _i + 1, rp1 + _i, N - _i + 1)); \
339     PetscCall(PetscArraymove(ap1 + _i + 1, ap1 + _i, N - _i + 1)); \
340     rp1[_i] = col; \
341     ap1[_i] = value; \
342     A->nonzerostate++; \
343   a_noinsert:; \
344     ailen[row] = nrow1; \
345   }
346 
347 #define MatSetValues_SeqAIJ_B_Private(row, col, value, addv, orow, ocol) \
348   { \
349     if (col <= lastcol2) low2 = 0; \
350     else high2 = nrow2; \
351     lastcol2 = col; \
352     while (high2 - low2 > 5) { \
353       t = (low2 + high2) / 2; \
354       if (rp2[t] > col) high2 = t; \
355       else low2 = t; \
356     } \
357     for (_i = low2; _i < high2; _i++) { \
358       if (rp2[_i] > col) break; \
359       if (rp2[_i] == col) { \
360         if (addv == ADD_VALUES) { \
361           ap2[_i] += value; \
362           (void)PetscLogFlops(1.0); \
363         } else ap2[_i] = value; \
364         goto b_noinsert; \
365       } \
366     } \
367     if (value == 0.0 && ignorezeroentries) { \
368       low2  = 0; \
369       high2 = nrow2; \
370       goto b_noinsert; \
371     } \
372     if (nonew == 1) { \
373       low2  = 0; \
374       high2 = nrow2; \
375       goto b_noinsert; \
376     } \
377     PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
378     MatSeqXAIJReallocateAIJ(B, bm, 1, nrow2, row, col, rmax2, ba, bi, bj, rp2, ap2, bimax, nonew, MatScalar); \
379     N = nrow2++ - 1; \
380     b->nz++; \
381     high2++; \
382     /* shift up all the later entries in this row */ \
383     PetscCall(PetscArraymove(rp2 + _i + 1, rp2 + _i, N - _i + 1)); \
384     PetscCall(PetscArraymove(ap2 + _i + 1, ap2 + _i, N - _i + 1)); \
385     rp2[_i] = col; \
386     ap2[_i] = value; \
387     B->nonzerostate++; \
388   b_noinsert:; \
389     bilen[row] = nrow2; \
390   }
391 
392 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A, PetscInt row, const PetscScalar v[])
393 {
394   Mat_MPIAIJ  *mat = (Mat_MPIAIJ *)A->data;
395   Mat_SeqAIJ  *a = (Mat_SeqAIJ *)mat->A->data, *b = (Mat_SeqAIJ *)mat->B->data;
396   PetscInt     l, *garray                         = mat->garray, diag;
397   PetscScalar *aa, *ba;
398 
399   PetscFunctionBegin;
400   /* code only works for square matrices A */
401 
402   /* find size of row to the left of the diagonal part */
403   PetscCall(MatGetOwnershipRange(A, &diag, NULL));
404   row = row - diag;
405   for (l = 0; l < b->i[row + 1] - b->i[row]; l++) {
406     if (garray[b->j[b->i[row] + l]] > diag) break;
407   }
408   if (l) {
409     PetscCall(MatSeqAIJGetArray(mat->B, &ba));
410     PetscCall(PetscArraycpy(ba + b->i[row], v, l));
411     PetscCall(MatSeqAIJRestoreArray(mat->B, &ba));
412   }
413 
414   /* diagonal part */
415   if (a->i[row + 1] - a->i[row]) {
416     PetscCall(MatSeqAIJGetArray(mat->A, &aa));
417     PetscCall(PetscArraycpy(aa + a->i[row], v + l, (a->i[row + 1] - a->i[row])));
418     PetscCall(MatSeqAIJRestoreArray(mat->A, &aa));
419   }
420 
421   /* right of diagonal part */
422   if (b->i[row + 1] - b->i[row] - l) {
423     PetscCall(MatSeqAIJGetArray(mat->B, &ba));
424     PetscCall(PetscArraycpy(ba + b->i[row] + l, v + l + a->i[row + 1] - a->i[row], b->i[row + 1] - b->i[row] - l));
425     PetscCall(MatSeqAIJRestoreArray(mat->B, &ba));
426   }
427   PetscFunctionReturn(0);
428 }
429 
430 PetscErrorCode MatSetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
431 {
432   Mat_MPIAIJ *aij   = (Mat_MPIAIJ *)mat->data;
433   PetscScalar value = 0.0;
434   PetscInt    i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend;
435   PetscInt    cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col;
436   PetscBool   roworiented = aij->roworiented;
437 
438   /* Some Variables required in the macro */
439   Mat         A     = aij->A;
440   Mat_SeqAIJ *a     = (Mat_SeqAIJ *)A->data;
441   PetscInt   *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j;
442   PetscBool   ignorezeroentries = a->ignorezeroentries;
443   Mat         B                 = aij->B;
444   Mat_SeqAIJ *b                 = (Mat_SeqAIJ *)B->data;
445   PetscInt   *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n;
446   MatScalar  *aa, *ba;
447   PetscInt   *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2;
448   PetscInt    nonew;
449   MatScalar  *ap1, *ap2;
450 
451   PetscFunctionBegin;
452   PetscCall(MatSeqAIJGetArray(A, &aa));
453   PetscCall(MatSeqAIJGetArray(B, &ba));
454   for (i = 0; i < m; i++) {
455     if (im[i] < 0) continue;
456     PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
457     if (im[i] >= rstart && im[i] < rend) {
458       row      = im[i] - rstart;
459       lastcol1 = -1;
460       rp1      = aj + ai[row];
461       ap1      = aa + ai[row];
462       rmax1    = aimax[row];
463       nrow1    = ailen[row];
464       low1     = 0;
465       high1    = nrow1;
466       lastcol2 = -1;
467       rp2      = bj + bi[row];
468       ap2      = ba + bi[row];
469       rmax2    = bimax[row];
470       nrow2    = bilen[row];
471       low2     = 0;
472       high2    = nrow2;
473 
474       for (j = 0; j < n; j++) {
475         if (v) value = roworiented ? v[i * n + j] : v[i + j * m];
476         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
477         if (in[j] >= cstart && in[j] < cend) {
478           col   = in[j] - cstart;
479           nonew = a->nonew;
480           MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]);
481         } else if (in[j] < 0) {
482           continue;
483         } else {
484           PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1);
485           if (mat->was_assembled) {
486             if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
487 #if defined(PETSC_USE_CTABLE)
488             PetscCall(PetscTableFind(aij->colmap, in[j] + 1, &col)); /* map global col ids to local ones */
489             col--;
490 #else
491             col = aij->colmap[in[j]] - 1;
492 #endif
493             if (col < 0 && !((Mat_SeqAIJ *)(aij->B->data))->nonew) { /* col < 0 means in[j] is a new col for B */
494               PetscCall(MatDisAssemble_MPIAIJ(mat));                 /* Change aij->B from reduced/local format to expanded/global format */
495               col = in[j];
496               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
497               B     = aij->B;
498               b     = (Mat_SeqAIJ *)B->data;
499               bimax = b->imax;
500               bi    = b->i;
501               bilen = b->ilen;
502               bj    = b->j;
503               ba    = b->a;
504               rp2   = bj + bi[row];
505               ap2   = ba + bi[row];
506               rmax2 = bimax[row];
507               nrow2 = bilen[row];
508               low2  = 0;
509               high2 = nrow2;
510               bm    = aij->B->rmap->n;
511               ba    = b->a;
512             } else if (col < 0 && !(ignorezeroentries && value == 0.0)) {
513               if (1 == ((Mat_SeqAIJ *)(aij->B->data))->nonew) {
514                 PetscCall(PetscInfo(mat, "Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%" PetscInt_FMT ",%" PetscInt_FMT ")\n", (double)PetscRealPart(value), im[i], in[j]));
515               } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]);
516             }
517           } else col = in[j];
518           nonew = b->nonew;
519           MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]);
520         }
521       }
522     } else {
523       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]);
524       if (!aij->donotstash) {
525         mat->assembled = PETSC_FALSE;
526         if (roworiented) {
527           PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
528         } else {
529           PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
530         }
531       }
532     }
533   }
534   PetscCall(MatSeqAIJRestoreArray(A, &aa)); /* aa, bb might have been free'd due to reallocation above. But we don't access them here */
535   PetscCall(MatSeqAIJRestoreArray(B, &ba));
536   PetscFunctionReturn(0);
537 }
538 
539 /*
540     This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
541     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
542     No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE.
543 */
544 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[])
545 {
546   Mat_MPIAIJ *aij    = (Mat_MPIAIJ *)mat->data;
547   Mat         A      = aij->A; /* diagonal part of the matrix */
548   Mat         B      = aij->B; /* offdiagonal part of the matrix */
549   Mat_SeqAIJ *a      = (Mat_SeqAIJ *)A->data;
550   Mat_SeqAIJ *b      = (Mat_SeqAIJ *)B->data;
551   PetscInt    cstart = mat->cmap->rstart, cend = mat->cmap->rend, col;
552   PetscInt   *ailen = a->ilen, *aj = a->j;
553   PetscInt   *bilen = b->ilen, *bj = b->j;
554   PetscInt    am          = aij->A->rmap->n, j;
555   PetscInt    diag_so_far = 0, dnz;
556   PetscInt    offd_so_far = 0, onz;
557 
558   PetscFunctionBegin;
559   /* Iterate over all rows of the matrix */
560   for (j = 0; j < am; j++) {
561     dnz = onz = 0;
562     /*  Iterate over all non-zero columns of the current row */
563     for (col = mat_i[j]; col < mat_i[j + 1]; col++) {
564       /* If column is in the diagonal */
565       if (mat_j[col] >= cstart && mat_j[col] < cend) {
566         aj[diag_so_far++] = mat_j[col] - cstart;
567         dnz++;
568       } else { /* off-diagonal entries */
569         bj[offd_so_far++] = mat_j[col];
570         onz++;
571       }
572     }
573     ailen[j] = dnz;
574     bilen[j] = onz;
575   }
576   PetscFunctionReturn(0);
577 }
578 
579 /*
580     This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
581     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
582     No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ.
583     Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
584     would not be true and the more complex MatSetValues_MPIAIJ has to be used.
585 */
586 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[], const PetscScalar mat_a[])
587 {
588   Mat_MPIAIJ  *aij  = (Mat_MPIAIJ *)mat->data;
589   Mat          A    = aij->A; /* diagonal part of the matrix */
590   Mat          B    = aij->B; /* offdiagonal part of the matrix */
591   Mat_SeqAIJ  *aijd = (Mat_SeqAIJ *)(aij->A)->data, *aijo = (Mat_SeqAIJ *)(aij->B)->data;
592   Mat_SeqAIJ  *a      = (Mat_SeqAIJ *)A->data;
593   Mat_SeqAIJ  *b      = (Mat_SeqAIJ *)B->data;
594   PetscInt     cstart = mat->cmap->rstart, cend = mat->cmap->rend;
595   PetscInt    *ailen = a->ilen, *aj = a->j;
596   PetscInt    *bilen = b->ilen, *bj = b->j;
597   PetscInt     am          = aij->A->rmap->n, j;
598   PetscInt    *full_diag_i = aijd->i, *full_offd_i = aijo->i; /* These variables can also include non-local elements, which are set at a later point. */
599   PetscInt     col, dnz_row, onz_row, rowstart_diag, rowstart_offd;
600   PetscScalar *aa = a->a, *ba = b->a;
601 
602   PetscFunctionBegin;
603   /* Iterate over all rows of the matrix */
604   for (j = 0; j < am; j++) {
605     dnz_row = onz_row = 0;
606     rowstart_offd     = full_offd_i[j];
607     rowstart_diag     = full_diag_i[j];
608     /*  Iterate over all non-zero columns of the current row */
609     for (col = mat_i[j]; col < mat_i[j + 1]; col++) {
610       /* If column is in the diagonal */
611       if (mat_j[col] >= cstart && mat_j[col] < cend) {
612         aj[rowstart_diag + dnz_row] = mat_j[col] - cstart;
613         aa[rowstart_diag + dnz_row] = mat_a[col];
614         dnz_row++;
615       } else { /* off-diagonal entries */
616         bj[rowstart_offd + onz_row] = mat_j[col];
617         ba[rowstart_offd + onz_row] = mat_a[col];
618         onz_row++;
619       }
620     }
621     ailen[j] = dnz_row;
622     bilen[j] = onz_row;
623   }
624   PetscFunctionReturn(0);
625 }
626 
627 PetscErrorCode MatGetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[])
628 {
629   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
630   PetscInt    i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend;
631   PetscInt    cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col;
632 
633   PetscFunctionBegin;
634   for (i = 0; i < m; i++) {
635     if (idxm[i] < 0) continue; /* negative row */
636     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1);
637     if (idxm[i] >= rstart && idxm[i] < rend) {
638       row = idxm[i] - rstart;
639       for (j = 0; j < n; j++) {
640         if (idxn[j] < 0) continue; /* negative column */
641         PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1);
642         if (idxn[j] >= cstart && idxn[j] < cend) {
643           col = idxn[j] - cstart;
644           PetscCall(MatGetValues(aij->A, 1, &row, 1, &col, v + i * n + j));
645         } else {
646           if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
647 #if defined(PETSC_USE_CTABLE)
648           PetscCall(PetscTableFind(aij->colmap, idxn[j] + 1, &col));
649           col--;
650 #else
651           col = aij->colmap[idxn[j]] - 1;
652 #endif
653           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v + i * n + j) = 0.0;
654           else PetscCall(MatGetValues(aij->B, 1, &row, 1, &col, v + i * n + j));
655         }
656       }
657     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported");
658   }
659   PetscFunctionReturn(0);
660 }
661 
662 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat, MatAssemblyType mode)
663 {
664   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
665   PetscInt    nstash, reallocs;
666 
667   PetscFunctionBegin;
668   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
669 
670   PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range));
671   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
672   PetscCall(PetscInfo(aij->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
673   PetscFunctionReturn(0);
674 }
675 
676 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat, MatAssemblyType mode)
677 {
678   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *)mat->data;
679   PetscMPIInt  n;
680   PetscInt     i, j, rstart, ncols, flg;
681   PetscInt    *row, *col;
682   PetscBool    other_disassembled;
683   PetscScalar *val;
684 
685   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
686 
687   PetscFunctionBegin;
688   if (!aij->donotstash && !mat->nooffprocentries) {
689     while (1) {
690       PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg));
691       if (!flg) break;
692 
693       for (i = 0; i < n;) {
694         /* Now identify the consecutive vals belonging to the same row */
695         for (j = i, rstart = row[j]; j < n; j++) {
696           if (row[j] != rstart) break;
697         }
698         if (j < n) ncols = j - i;
699         else ncols = n - i;
700         /* Now assemble all these values with a single function call */
701         PetscCall(MatSetValues_MPIAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode));
702         i = j;
703       }
704     }
705     PetscCall(MatStashScatterEnd_Private(&mat->stash));
706   }
707 #if defined(PETSC_HAVE_DEVICE)
708   if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU;
709   /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */
710   if (mat->boundtocpu) {
711     PetscCall(MatBindToCPU(aij->A, PETSC_TRUE));
712     PetscCall(MatBindToCPU(aij->B, PETSC_TRUE));
713   }
714 #endif
715   PetscCall(MatAssemblyBegin(aij->A, mode));
716   PetscCall(MatAssemblyEnd(aij->A, mode));
717 
718   /* determine if any processor has disassembled, if so we must
719      also disassemble ourself, in order that we may reassemble. */
720   /*
721      if nonzero structure of submatrix B cannot change then we know that
722      no processor disassembled thus we can skip this stuff
723   */
724   if (!((Mat_SeqAIJ *)aij->B->data)->nonew) {
725     PetscCall(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
726     if (mat->was_assembled && !other_disassembled) { /* mat on this rank has reduced off-diag B with local col ids, but globaly it does not */
727       PetscCall(MatDisAssemble_MPIAIJ(mat));
728     }
729   }
730   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPIAIJ(mat));
731   PetscCall(MatSetOption(aij->B, MAT_USE_INODES, PETSC_FALSE));
732 #if defined(PETSC_HAVE_DEVICE)
733   if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU;
734 #endif
735   PetscCall(MatAssemblyBegin(aij->B, mode));
736   PetscCall(MatAssemblyEnd(aij->B, mode));
737 
738   PetscCall(PetscFree2(aij->rowvalues, aij->rowindices));
739 
740   aij->rowvalues = NULL;
741 
742   PetscCall(VecDestroy(&aij->diag));
743 
744   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
745   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ *)(aij->A->data))->nonew) {
746     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
747     PetscCall(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat)));
748   }
749 #if defined(PETSC_HAVE_DEVICE)
750   mat->offloadmask = PETSC_OFFLOAD_BOTH;
751 #endif
752   PetscFunctionReturn(0);
753 }
754 
755 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
756 {
757   Mat_MPIAIJ *l = (Mat_MPIAIJ *)A->data;
758 
759   PetscFunctionBegin;
760   PetscCall(MatZeroEntries(l->A));
761   PetscCall(MatZeroEntries(l->B));
762   PetscFunctionReturn(0);
763 }
764 
765 PetscErrorCode MatZeroRows_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
766 {
767   Mat_MPIAIJ      *mat = (Mat_MPIAIJ *)A->data;
768   PetscObjectState sA, sB;
769   PetscInt        *lrows;
770   PetscInt         r, len;
771   PetscBool        cong, lch, gch;
772 
773   PetscFunctionBegin;
774   /* get locally owned rows */
775   PetscCall(MatZeroRowsMapLocal_Private(A, N, rows, &len, &lrows));
776   PetscCall(MatHasCongruentLayouts(A, &cong));
777   /* fix right hand side if needed */
778   if (x && b) {
779     const PetscScalar *xx;
780     PetscScalar       *bb;
781 
782     PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout");
783     PetscCall(VecGetArrayRead(x, &xx));
784     PetscCall(VecGetArray(b, &bb));
785     for (r = 0; r < len; ++r) bb[lrows[r]] = diag * xx[lrows[r]];
786     PetscCall(VecRestoreArrayRead(x, &xx));
787     PetscCall(VecRestoreArray(b, &bb));
788   }
789 
790   sA = mat->A->nonzerostate;
791   sB = mat->B->nonzerostate;
792 
793   if (diag != 0.0 && cong) {
794     PetscCall(MatZeroRows(mat->A, len, lrows, diag, NULL, NULL));
795     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
796   } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */
797     Mat_SeqAIJ *aijA = (Mat_SeqAIJ *)mat->A->data;
798     Mat_SeqAIJ *aijB = (Mat_SeqAIJ *)mat->B->data;
799     PetscInt    nnwA, nnwB;
800     PetscBool   nnzA, nnzB;
801 
802     nnwA = aijA->nonew;
803     nnwB = aijB->nonew;
804     nnzA = aijA->keepnonzeropattern;
805     nnzB = aijB->keepnonzeropattern;
806     if (!nnzA) {
807       PetscCall(PetscInfo(mat->A, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n"));
808       aijA->nonew = 0;
809     }
810     if (!nnzB) {
811       PetscCall(PetscInfo(mat->B, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n"));
812       aijB->nonew = 0;
813     }
814     /* Must zero here before the next loop */
815     PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL));
816     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
817     for (r = 0; r < len; ++r) {
818       const PetscInt row = lrows[r] + A->rmap->rstart;
819       if (row >= A->cmap->N) continue;
820       PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES));
821     }
822     aijA->nonew = nnwA;
823     aijB->nonew = nnwB;
824   } else {
825     PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL));
826     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
827   }
828   PetscCall(PetscFree(lrows));
829   PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
830   PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
831 
832   /* reduce nonzerostate */
833   lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate);
834   PetscCall(MPIU_Allreduce(&lch, &gch, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)A)));
835   if (gch) A->nonzerostate++;
836   PetscFunctionReturn(0);
837 }
838 
839 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
840 {
841   Mat_MPIAIJ        *l = (Mat_MPIAIJ *)A->data;
842   PetscMPIInt        n = A->rmap->n;
843   PetscInt           i, j, r, m, len = 0;
844   PetscInt          *lrows, *owners = A->rmap->range;
845   PetscMPIInt        p = 0;
846   PetscSFNode       *rrows;
847   PetscSF            sf;
848   const PetscScalar *xx;
849   PetscScalar       *bb, *mask, *aij_a;
850   Vec                xmask, lmask;
851   Mat_SeqAIJ        *aij = (Mat_SeqAIJ *)l->B->data;
852   const PetscInt    *aj, *ii, *ridx;
853   PetscScalar       *aa;
854 
855   PetscFunctionBegin;
856   /* Create SF where leaves are input rows and roots are owned rows */
857   PetscCall(PetscMalloc1(n, &lrows));
858   for (r = 0; r < n; ++r) lrows[r] = -1;
859   PetscCall(PetscMalloc1(N, &rrows));
860   for (r = 0; r < N; ++r) {
861     const PetscInt idx = rows[r];
862     PetscCheck(idx >= 0 && A->rmap->N > idx, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, A->rmap->N);
863     if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this row too */
864       PetscCall(PetscLayoutFindOwner(A->rmap, idx, &p));
865     }
866     rrows[r].rank  = p;
867     rrows[r].index = rows[r] - owners[p];
868   }
869   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
870   PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER));
871   /* Collect flags for rows to be zeroed */
872   PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR));
873   PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR));
874   PetscCall(PetscSFDestroy(&sf));
875   /* Compress and put in row numbers */
876   for (r = 0; r < n; ++r)
877     if (lrows[r] >= 0) lrows[len++] = r;
878   /* zero diagonal part of matrix */
879   PetscCall(MatZeroRowsColumns(l->A, len, lrows, diag, x, b));
880   /* handle off diagonal part of matrix */
881   PetscCall(MatCreateVecs(A, &xmask, NULL));
882   PetscCall(VecDuplicate(l->lvec, &lmask));
883   PetscCall(VecGetArray(xmask, &bb));
884   for (i = 0; i < len; i++) bb[lrows[i]] = 1;
885   PetscCall(VecRestoreArray(xmask, &bb));
886   PetscCall(VecScatterBegin(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD));
887   PetscCall(VecScatterEnd(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD));
888   PetscCall(VecDestroy(&xmask));
889   if (x && b) { /* this code is buggy when the row and column layout don't match */
890     PetscBool cong;
891 
892     PetscCall(MatHasCongruentLayouts(A, &cong));
893     PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout");
894     PetscCall(VecScatterBegin(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD));
895     PetscCall(VecScatterEnd(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD));
896     PetscCall(VecGetArrayRead(l->lvec, &xx));
897     PetscCall(VecGetArray(b, &bb));
898   }
899   PetscCall(VecGetArray(lmask, &mask));
900   /* remove zeroed rows of off diagonal matrix */
901   PetscCall(MatSeqAIJGetArray(l->B, &aij_a));
902   ii = aij->i;
903   for (i = 0; i < len; i++) PetscCall(PetscArrayzero(aij_a + ii[lrows[i]], ii[lrows[i] + 1] - ii[lrows[i]]));
904   /* loop over all elements of off process part of matrix zeroing removed columns*/
905   if (aij->compressedrow.use) {
906     m    = aij->compressedrow.nrows;
907     ii   = aij->compressedrow.i;
908     ridx = aij->compressedrow.rindex;
909     for (i = 0; i < m; i++) {
910       n  = ii[i + 1] - ii[i];
911       aj = aij->j + ii[i];
912       aa = aij_a + ii[i];
913 
914       for (j = 0; j < n; j++) {
915         if (PetscAbsScalar(mask[*aj])) {
916           if (b) bb[*ridx] -= *aa * xx[*aj];
917           *aa = 0.0;
918         }
919         aa++;
920         aj++;
921       }
922       ridx++;
923     }
924   } else { /* do not use compressed row format */
925     m = l->B->rmap->n;
926     for (i = 0; i < m; i++) {
927       n  = ii[i + 1] - ii[i];
928       aj = aij->j + ii[i];
929       aa = aij_a + ii[i];
930       for (j = 0; j < n; j++) {
931         if (PetscAbsScalar(mask[*aj])) {
932           if (b) bb[i] -= *aa * xx[*aj];
933           *aa = 0.0;
934         }
935         aa++;
936         aj++;
937       }
938     }
939   }
940   if (x && b) {
941     PetscCall(VecRestoreArray(b, &bb));
942     PetscCall(VecRestoreArrayRead(l->lvec, &xx));
943   }
944   PetscCall(MatSeqAIJRestoreArray(l->B, &aij_a));
945   PetscCall(VecRestoreArray(lmask, &mask));
946   PetscCall(VecDestroy(&lmask));
947   PetscCall(PetscFree(lrows));
948 
949   /* only change matrix nonzero state if pattern was allowed to be changed */
950   if (!((Mat_SeqAIJ *)(l->A->data))->keepnonzeropattern) {
951     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
952     PetscCall(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A)));
953   }
954   PetscFunctionReturn(0);
955 }
956 
957 PetscErrorCode MatMult_MPIAIJ(Mat A, Vec xx, Vec yy)
958 {
959   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
960   PetscInt    nt;
961   VecScatter  Mvctx = a->Mvctx;
962 
963   PetscFunctionBegin;
964   PetscCall(VecGetLocalSize(xx, &nt));
965   PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A (%" PetscInt_FMT ") and xx (%" PetscInt_FMT ")", A->cmap->n, nt);
966   PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
967   PetscUseTypeMethod(a->A, mult, xx, yy);
968   PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
969   PetscUseTypeMethod(a->B, multadd, a->lvec, yy, yy);
970   PetscFunctionReturn(0);
971 }
972 
973 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A, Vec bb, Vec xx)
974 {
975   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
976 
977   PetscFunctionBegin;
978   PetscCall(MatMultDiagonalBlock(a->A, bb, xx));
979   PetscFunctionReturn(0);
980 }
981 
982 PetscErrorCode MatMultAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz)
983 {
984   Mat_MPIAIJ *a     = (Mat_MPIAIJ *)A->data;
985   VecScatter  Mvctx = a->Mvctx;
986 
987   PetscFunctionBegin;
988   PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
989   PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz));
990   PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
991   PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz));
992   PetscFunctionReturn(0);
993 }
994 
995 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A, Vec xx, Vec yy)
996 {
997   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
998 
999   PetscFunctionBegin;
1000   /* do nondiagonal part */
1001   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec));
1002   /* do local part */
1003   PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy));
1004   /* add partial results together */
1005   PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
1006   PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
1007   PetscFunctionReturn(0);
1008 }
1009 
1010 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat, Mat Bmat, PetscReal tol, PetscBool *f)
1011 {
1012   MPI_Comm    comm;
1013   Mat_MPIAIJ *Aij  = (Mat_MPIAIJ *)Amat->data, *Bij;
1014   Mat         Adia = Aij->A, Bdia, Aoff, Boff, *Aoffs, *Boffs;
1015   IS          Me, Notme;
1016   PetscInt    M, N, first, last, *notme, i;
1017   PetscBool   lf;
1018   PetscMPIInt size;
1019 
1020   PetscFunctionBegin;
1021   /* Easy test: symmetric diagonal block */
1022   Bij  = (Mat_MPIAIJ *)Bmat->data;
1023   Bdia = Bij->A;
1024   PetscCall(MatIsTranspose(Adia, Bdia, tol, &lf));
1025   PetscCall(MPIU_Allreduce(&lf, f, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)Amat)));
1026   if (!*f) PetscFunctionReturn(0);
1027   PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm));
1028   PetscCallMPI(MPI_Comm_size(comm, &size));
1029   if (size == 1) PetscFunctionReturn(0);
1030 
1031   /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */
1032   PetscCall(MatGetSize(Amat, &M, &N));
1033   PetscCall(MatGetOwnershipRange(Amat, &first, &last));
1034   PetscCall(PetscMalloc1(N - last + first, &notme));
1035   for (i = 0; i < first; i++) notme[i] = i;
1036   for (i = last; i < M; i++) notme[i - last + first] = i;
1037   PetscCall(ISCreateGeneral(MPI_COMM_SELF, N - last + first, notme, PETSC_COPY_VALUES, &Notme));
1038   PetscCall(ISCreateStride(MPI_COMM_SELF, last - first, first, 1, &Me));
1039   PetscCall(MatCreateSubMatrices(Amat, 1, &Me, &Notme, MAT_INITIAL_MATRIX, &Aoffs));
1040   Aoff = Aoffs[0];
1041   PetscCall(MatCreateSubMatrices(Bmat, 1, &Notme, &Me, MAT_INITIAL_MATRIX, &Boffs));
1042   Boff = Boffs[0];
1043   PetscCall(MatIsTranspose(Aoff, Boff, tol, f));
1044   PetscCall(MatDestroyMatrices(1, &Aoffs));
1045   PetscCall(MatDestroyMatrices(1, &Boffs));
1046   PetscCall(ISDestroy(&Me));
1047   PetscCall(ISDestroy(&Notme));
1048   PetscCall(PetscFree(notme));
1049   PetscFunctionReturn(0);
1050 }
1051 
1052 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A, PetscReal tol, PetscBool *f)
1053 {
1054   PetscFunctionBegin;
1055   PetscCall(MatIsTranspose_MPIAIJ(A, A, tol, f));
1056   PetscFunctionReturn(0);
1057 }
1058 
1059 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1060 {
1061   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1062 
1063   PetscFunctionBegin;
1064   /* do nondiagonal part */
1065   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec));
1066   /* do local part */
1067   PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz));
1068   /* add partial results together */
1069   PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE));
1070   PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE));
1071   PetscFunctionReturn(0);
1072 }
1073 
1074 /*
1075   This only works correctly for square matrices where the subblock A->A is the
1076    diagonal block
1077 */
1078 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A, Vec v)
1079 {
1080   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1081 
1082   PetscFunctionBegin;
1083   PetscCheck(A->rmap->N == A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block");
1084   PetscCheck(A->rmap->rstart == A->cmap->rstart && A->rmap->rend == A->cmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "row partition must equal col partition");
1085   PetscCall(MatGetDiagonal(a->A, v));
1086   PetscFunctionReturn(0);
1087 }
1088 
1089 PetscErrorCode MatScale_MPIAIJ(Mat A, PetscScalar aa)
1090 {
1091   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1092 
1093   PetscFunctionBegin;
1094   PetscCall(MatScale(a->A, aa));
1095   PetscCall(MatScale(a->B, aa));
1096   PetscFunctionReturn(0);
1097 }
1098 
1099 /* Free COO stuff; must match allocation methods in MatSetPreallocationCOO_MPIAIJ() */
1100 PETSC_INTERN PetscErrorCode MatResetPreallocationCOO_MPIAIJ(Mat mat)
1101 {
1102   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1103 
1104   PetscFunctionBegin;
1105   PetscCall(PetscSFDestroy(&aij->coo_sf));
1106   PetscCall(PetscFree(aij->Aperm1));
1107   PetscCall(PetscFree(aij->Bperm1));
1108   PetscCall(PetscFree(aij->Ajmap1));
1109   PetscCall(PetscFree(aij->Bjmap1));
1110 
1111   PetscCall(PetscFree(aij->Aimap2));
1112   PetscCall(PetscFree(aij->Bimap2));
1113   PetscCall(PetscFree(aij->Aperm2));
1114   PetscCall(PetscFree(aij->Bperm2));
1115   PetscCall(PetscFree(aij->Ajmap2));
1116   PetscCall(PetscFree(aij->Bjmap2));
1117 
1118   PetscCall(PetscFree2(aij->sendbuf, aij->recvbuf));
1119   PetscCall(PetscFree(aij->Cperm1));
1120   PetscFunctionReturn(0);
1121 }
1122 
1123 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1124 {
1125   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1126 
1127   PetscFunctionBegin;
1128 #if defined(PETSC_USE_LOG)
1129   PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N);
1130 #endif
1131   PetscCall(MatStashDestroy_Private(&mat->stash));
1132   PetscCall(VecDestroy(&aij->diag));
1133   PetscCall(MatDestroy(&aij->A));
1134   PetscCall(MatDestroy(&aij->B));
1135 #if defined(PETSC_USE_CTABLE)
1136   PetscCall(PetscTableDestroy(&aij->colmap));
1137 #else
1138   PetscCall(PetscFree(aij->colmap));
1139 #endif
1140   PetscCall(PetscFree(aij->garray));
1141   PetscCall(VecDestroy(&aij->lvec));
1142   PetscCall(VecScatterDestroy(&aij->Mvctx));
1143   PetscCall(PetscFree2(aij->rowvalues, aij->rowindices));
1144   PetscCall(PetscFree(aij->ld));
1145 
1146   /* Free COO */
1147   PetscCall(MatResetPreallocationCOO_MPIAIJ(mat));
1148 
1149   PetscCall(PetscFree(mat->data));
1150 
1151   /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */
1152   PetscCall(PetscObjectCompose((PetscObject)mat, "MatMergeSeqsToMPI", NULL));
1153 
1154   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL));
1155   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL));
1156   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL));
1157   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatIsTranspose_C", NULL));
1158   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocation_C", NULL));
1159   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatResetPreallocation_C", NULL));
1160   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocationCSR_C", NULL));
1161   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL));
1162   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpibaij_C", NULL));
1163   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisbaij_C", NULL));
1164 #if defined(PETSC_HAVE_CUDA)
1165   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcusparse_C", NULL));
1166 #endif
1167 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
1168   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijkokkos_C", NULL));
1169 #endif
1170   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL));
1171 #if defined(PETSC_HAVE_ELEMENTAL)
1172   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_elemental_C", NULL));
1173 #endif
1174 #if defined(PETSC_HAVE_SCALAPACK)
1175   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_scalapack_C", NULL));
1176 #endif
1177 #if defined(PETSC_HAVE_HYPRE)
1178   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_hypre_C", NULL));
1179   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", NULL));
1180 #endif
1181   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL));
1182   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_is_mpiaij_C", NULL));
1183   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpiaij_C", NULL));
1184   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetUseScalableIncreaseOverlap_C", NULL));
1185   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijperm_C", NULL));
1186   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijsell_C", NULL));
1187 #if defined(PETSC_HAVE_MKL_SPARSE)
1188   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijmkl_C", NULL));
1189 #endif
1190   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcrl_C", NULL));
1191   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL));
1192   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisell_C", NULL));
1193   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetPreallocationCOO_C", NULL));
1194   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetValuesCOO_C", NULL));
1195   PetscFunctionReturn(0);
1196 }
1197 
1198 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat, PetscViewer viewer)
1199 {
1200   Mat_MPIAIJ        *aij    = (Mat_MPIAIJ *)mat->data;
1201   Mat_SeqAIJ        *A      = (Mat_SeqAIJ *)aij->A->data;
1202   Mat_SeqAIJ        *B      = (Mat_SeqAIJ *)aij->B->data;
1203   const PetscInt    *garray = aij->garray;
1204   const PetscScalar *aa, *ba;
1205   PetscInt           header[4], M, N, m, rs, cs, nz, cnt, i, ja, jb;
1206   PetscInt          *rowlens;
1207   PetscInt          *colidxs;
1208   PetscScalar       *matvals;
1209 
1210   PetscFunctionBegin;
1211   PetscCall(PetscViewerSetUp(viewer));
1212 
1213   M  = mat->rmap->N;
1214   N  = mat->cmap->N;
1215   m  = mat->rmap->n;
1216   rs = mat->rmap->rstart;
1217   cs = mat->cmap->rstart;
1218   nz = A->nz + B->nz;
1219 
1220   /* write matrix header */
1221   header[0] = MAT_FILE_CLASSID;
1222   header[1] = M;
1223   header[2] = N;
1224   header[3] = nz;
1225   PetscCallMPI(MPI_Reduce(&nz, &header[3], 1, MPIU_INT, MPI_SUM, 0, PetscObjectComm((PetscObject)mat)));
1226   PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT));
1227 
1228   /* fill in and store row lengths  */
1229   PetscCall(PetscMalloc1(m, &rowlens));
1230   for (i = 0; i < m; i++) rowlens[i] = A->i[i + 1] - A->i[i] + B->i[i + 1] - B->i[i];
1231   PetscCall(PetscViewerBinaryWriteAll(viewer, rowlens, m, rs, M, PETSC_INT));
1232   PetscCall(PetscFree(rowlens));
1233 
1234   /* fill in and store column indices */
1235   PetscCall(PetscMalloc1(nz, &colidxs));
1236   for (cnt = 0, i = 0; i < m; i++) {
1237     for (jb = B->i[i]; jb < B->i[i + 1]; jb++) {
1238       if (garray[B->j[jb]] > cs) break;
1239       colidxs[cnt++] = garray[B->j[jb]];
1240     }
1241     for (ja = A->i[i]; ja < A->i[i + 1]; ja++) colidxs[cnt++] = A->j[ja] + cs;
1242     for (; jb < B->i[i + 1]; jb++) colidxs[cnt++] = garray[B->j[jb]];
1243   }
1244   PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT, cnt, nz);
1245   PetscCall(PetscViewerBinaryWriteAll(viewer, colidxs, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT));
1246   PetscCall(PetscFree(colidxs));
1247 
1248   /* fill in and store nonzero values */
1249   PetscCall(MatSeqAIJGetArrayRead(aij->A, &aa));
1250   PetscCall(MatSeqAIJGetArrayRead(aij->B, &ba));
1251   PetscCall(PetscMalloc1(nz, &matvals));
1252   for (cnt = 0, i = 0; i < m; i++) {
1253     for (jb = B->i[i]; jb < B->i[i + 1]; jb++) {
1254       if (garray[B->j[jb]] > cs) break;
1255       matvals[cnt++] = ba[jb];
1256     }
1257     for (ja = A->i[i]; ja < A->i[i + 1]; ja++) matvals[cnt++] = aa[ja];
1258     for (; jb < B->i[i + 1]; jb++) matvals[cnt++] = ba[jb];
1259   }
1260   PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &aa));
1261   PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &ba));
1262   PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT, cnt, nz);
1263   PetscCall(PetscViewerBinaryWriteAll(viewer, matvals, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR));
1264   PetscCall(PetscFree(matvals));
1265 
1266   /* write block size option to the viewer's .info file */
1267   PetscCall(MatView_Binary_BlockSizes(mat, viewer));
1268   PetscFunctionReturn(0);
1269 }
1270 
1271 #include <petscdraw.h>
1272 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer)
1273 {
1274   Mat_MPIAIJ       *aij  = (Mat_MPIAIJ *)mat->data;
1275   PetscMPIInt       rank = aij->rank, size = aij->size;
1276   PetscBool         isdraw, iascii, isbinary;
1277   PetscViewer       sviewer;
1278   PetscViewerFormat format;
1279 
1280   PetscFunctionBegin;
1281   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
1282   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
1283   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
1284   if (iascii) {
1285     PetscCall(PetscViewerGetFormat(viewer, &format));
1286     if (format == PETSC_VIEWER_LOAD_BALANCE) {
1287       PetscInt i, nmax = 0, nmin = PETSC_MAX_INT, navg = 0, *nz, nzlocal = ((Mat_SeqAIJ *)(aij->A->data))->nz + ((Mat_SeqAIJ *)(aij->B->data))->nz;
1288       PetscCall(PetscMalloc1(size, &nz));
1289       PetscCallMPI(MPI_Allgather(&nzlocal, 1, MPIU_INT, nz, 1, MPIU_INT, PetscObjectComm((PetscObject)mat)));
1290       for (i = 0; i < (PetscInt)size; i++) {
1291         nmax = PetscMax(nmax, nz[i]);
1292         nmin = PetscMin(nmin, nz[i]);
1293         navg += nz[i];
1294       }
1295       PetscCall(PetscFree(nz));
1296       navg = navg / size;
1297       PetscCall(PetscViewerASCIIPrintf(viewer, "Load Balance - Nonzeros: Min %" PetscInt_FMT "  avg %" PetscInt_FMT "  max %" PetscInt_FMT "\n", nmin, navg, nmax));
1298       PetscFunctionReturn(0);
1299     }
1300     PetscCall(PetscViewerGetFormat(viewer, &format));
1301     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1302       MatInfo   info;
1303       PetscInt *inodes = NULL;
1304 
1305       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
1306       PetscCall(MatGetInfo(mat, MAT_LOCAL, &info));
1307       PetscCall(MatInodeGetInodeSizes(aij->A, NULL, &inodes, NULL));
1308       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
1309       if (!inodes) {
1310         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, not using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
1311                                                      (double)info.memory));
1312       } else {
1313         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
1314                                                      (double)info.memory));
1315       }
1316       PetscCall(MatGetInfo(aij->A, MAT_LOCAL, &info));
1317       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
1318       PetscCall(MatGetInfo(aij->B, MAT_LOCAL, &info));
1319       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
1320       PetscCall(PetscViewerFlush(viewer));
1321       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
1322       PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n"));
1323       PetscCall(VecScatterView(aij->Mvctx, viewer));
1324       PetscFunctionReturn(0);
1325     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1326       PetscInt inodecount, inodelimit, *inodes;
1327       PetscCall(MatInodeGetInodeSizes(aij->A, &inodecount, &inodes, &inodelimit));
1328       if (inodes) {
1329         PetscCall(PetscViewerASCIIPrintf(viewer, "using I-node (on process 0) routines: found %" PetscInt_FMT " nodes, limit used is %" PetscInt_FMT "\n", inodecount, inodelimit));
1330       } else {
1331         PetscCall(PetscViewerASCIIPrintf(viewer, "not using I-node (on process 0) routines\n"));
1332       }
1333       PetscFunctionReturn(0);
1334     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1335       PetscFunctionReturn(0);
1336     }
1337   } else if (isbinary) {
1338     if (size == 1) {
1339       PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name));
1340       PetscCall(MatView(aij->A, viewer));
1341     } else {
1342       PetscCall(MatView_MPIAIJ_Binary(mat, viewer));
1343     }
1344     PetscFunctionReturn(0);
1345   } else if (iascii && size == 1) {
1346     PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name));
1347     PetscCall(MatView(aij->A, viewer));
1348     PetscFunctionReturn(0);
1349   } else if (isdraw) {
1350     PetscDraw draw;
1351     PetscBool isnull;
1352     PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
1353     PetscCall(PetscDrawIsNull(draw, &isnull));
1354     if (isnull) PetscFunctionReturn(0);
1355   }
1356 
1357   { /* assemble the entire matrix onto first processor */
1358     Mat A = NULL, Av;
1359     IS  isrow, iscol;
1360 
1361     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->rmap->N : 0, 0, 1, &isrow));
1362     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->cmap->N : 0, 0, 1, &iscol));
1363     PetscCall(MatCreateSubMatrix(mat, isrow, iscol, MAT_INITIAL_MATRIX, &A));
1364     PetscCall(MatMPIAIJGetSeqAIJ(A, &Av, NULL, NULL));
1365     /*  The commented code uses MatCreateSubMatrices instead */
1366     /*
1367     Mat *AA, A = NULL, Av;
1368     IS  isrow,iscol;
1369 
1370     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow));
1371     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol));
1372     PetscCall(MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA));
1373     if (rank == 0) {
1374        PetscCall(PetscObjectReference((PetscObject)AA[0]));
1375        A    = AA[0];
1376        Av   = AA[0];
1377     }
1378     PetscCall(MatDestroySubMatrices(1,&AA));
1379 */
1380     PetscCall(ISDestroy(&iscol));
1381     PetscCall(ISDestroy(&isrow));
1382     /*
1383        Everyone has to call to draw the matrix since the graphics waits are
1384        synchronized across all processors that share the PetscDraw object
1385     */
1386     PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1387     if (rank == 0) {
1388       if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)Av, ((PetscObject)mat)->name));
1389       PetscCall(MatView_SeqAIJ(Av, sviewer));
1390     }
1391     PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1392     PetscCall(PetscViewerFlush(viewer));
1393     PetscCall(MatDestroy(&A));
1394   }
1395   PetscFunctionReturn(0);
1396 }
1397 
1398 PetscErrorCode MatView_MPIAIJ(Mat mat, PetscViewer viewer)
1399 {
1400   PetscBool iascii, isdraw, issocket, isbinary;
1401 
1402   PetscFunctionBegin;
1403   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
1404   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
1405   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
1406   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket));
1407   if (iascii || isdraw || isbinary || issocket) PetscCall(MatView_MPIAIJ_ASCIIorDraworSocket(mat, viewer));
1408   PetscFunctionReturn(0);
1409 }
1410 
1411 PetscErrorCode MatSOR_MPIAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx)
1412 {
1413   Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data;
1414   Vec         bb1 = NULL;
1415   PetscBool   hasop;
1416 
1417   PetscFunctionBegin;
1418   if (flag == SOR_APPLY_UPPER) {
1419     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1420     PetscFunctionReturn(0);
1421   }
1422 
1423   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) PetscCall(VecDuplicate(bb, &bb1));
1424 
1425   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1426     if (flag & SOR_ZERO_INITIAL_GUESS) {
1427       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1428       its--;
1429     }
1430 
1431     while (its--) {
1432       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1433       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1434 
1435       /* update rhs: bb1 = bb - B*x */
1436       PetscCall(VecScale(mat->lvec, -1.0));
1437       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
1438 
1439       /* local sweep */
1440       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx));
1441     }
1442   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1443     if (flag & SOR_ZERO_INITIAL_GUESS) {
1444       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1445       its--;
1446     }
1447     while (its--) {
1448       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1449       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1450 
1451       /* update rhs: bb1 = bb - B*x */
1452       PetscCall(VecScale(mat->lvec, -1.0));
1453       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
1454 
1455       /* local sweep */
1456       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx));
1457     }
1458   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1459     if (flag & SOR_ZERO_INITIAL_GUESS) {
1460       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1461       its--;
1462     }
1463     while (its--) {
1464       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1465       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1466 
1467       /* update rhs: bb1 = bb - B*x */
1468       PetscCall(VecScale(mat->lvec, -1.0));
1469       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
1470 
1471       /* local sweep */
1472       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx));
1473     }
1474   } else if (flag & SOR_EISENSTAT) {
1475     Vec xx1;
1476 
1477     PetscCall(VecDuplicate(bb, &xx1));
1478     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP), fshift, lits, 1, xx));
1479 
1480     PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1481     PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1482     if (!mat->diag) {
1483       PetscCall(MatCreateVecs(matin, &mat->diag, NULL));
1484       PetscCall(MatGetDiagonal(matin, mat->diag));
1485     }
1486     PetscCall(MatHasOperation(matin, MATOP_MULT_DIAGONAL_BLOCK, &hasop));
1487     if (hasop) {
1488       PetscCall(MatMultDiagonalBlock(matin, xx, bb1));
1489     } else {
1490       PetscCall(VecPointwiseMult(bb1, mat->diag, xx));
1491     }
1492     PetscCall(VecAYPX(bb1, (omega - 2.0) / omega, bb));
1493 
1494     PetscCall(MatMultAdd(mat->B, mat->lvec, bb1, bb1));
1495 
1496     /* local sweep */
1497     PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP), fshift, lits, 1, xx1));
1498     PetscCall(VecAXPY(xx, 1.0, xx1));
1499     PetscCall(VecDestroy(&xx1));
1500   } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel SOR not supported");
1501 
1502   PetscCall(VecDestroy(&bb1));
1503 
1504   matin->factorerrortype = mat->A->factorerrortype;
1505   PetscFunctionReturn(0);
1506 }
1507 
1508 PetscErrorCode MatPermute_MPIAIJ(Mat A, IS rowp, IS colp, Mat *B)
1509 {
1510   Mat             aA, aB, Aperm;
1511   const PetscInt *rwant, *cwant, *gcols, *ai, *bi, *aj, *bj;
1512   PetscScalar    *aa, *ba;
1513   PetscInt        i, j, m, n, ng, anz, bnz, *dnnz, *onnz, *tdnnz, *tonnz, *rdest, *cdest, *work, *gcdest;
1514   PetscSF         rowsf, sf;
1515   IS              parcolp = NULL;
1516   PetscBool       done;
1517 
1518   PetscFunctionBegin;
1519   PetscCall(MatGetLocalSize(A, &m, &n));
1520   PetscCall(ISGetIndices(rowp, &rwant));
1521   PetscCall(ISGetIndices(colp, &cwant));
1522   PetscCall(PetscMalloc3(PetscMax(m, n), &work, m, &rdest, n, &cdest));
1523 
1524   /* Invert row permutation to find out where my rows should go */
1525   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &rowsf));
1526   PetscCall(PetscSFSetGraphLayout(rowsf, A->rmap, A->rmap->n, NULL, PETSC_OWN_POINTER, rwant));
1527   PetscCall(PetscSFSetFromOptions(rowsf));
1528   for (i = 0; i < m; i++) work[i] = A->rmap->rstart + i;
1529   PetscCall(PetscSFReduceBegin(rowsf, MPIU_INT, work, rdest, MPI_REPLACE));
1530   PetscCall(PetscSFReduceEnd(rowsf, MPIU_INT, work, rdest, MPI_REPLACE));
1531 
1532   /* Invert column permutation to find out where my columns should go */
1533   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1534   PetscCall(PetscSFSetGraphLayout(sf, A->cmap, A->cmap->n, NULL, PETSC_OWN_POINTER, cwant));
1535   PetscCall(PetscSFSetFromOptions(sf));
1536   for (i = 0; i < n; i++) work[i] = A->cmap->rstart + i;
1537   PetscCall(PetscSFReduceBegin(sf, MPIU_INT, work, cdest, MPI_REPLACE));
1538   PetscCall(PetscSFReduceEnd(sf, MPIU_INT, work, cdest, MPI_REPLACE));
1539   PetscCall(PetscSFDestroy(&sf));
1540 
1541   PetscCall(ISRestoreIndices(rowp, &rwant));
1542   PetscCall(ISRestoreIndices(colp, &cwant));
1543   PetscCall(MatMPIAIJGetSeqAIJ(A, &aA, &aB, &gcols));
1544 
1545   /* Find out where my gcols should go */
1546   PetscCall(MatGetSize(aB, NULL, &ng));
1547   PetscCall(PetscMalloc1(ng, &gcdest));
1548   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1549   PetscCall(PetscSFSetGraphLayout(sf, A->cmap, ng, NULL, PETSC_OWN_POINTER, gcols));
1550   PetscCall(PetscSFSetFromOptions(sf));
1551   PetscCall(PetscSFBcastBegin(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE));
1552   PetscCall(PetscSFBcastEnd(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE));
1553   PetscCall(PetscSFDestroy(&sf));
1554 
1555   PetscCall(PetscCalloc4(m, &dnnz, m, &onnz, m, &tdnnz, m, &tonnz));
1556   PetscCall(MatGetRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done));
1557   PetscCall(MatGetRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done));
1558   for (i = 0; i < m; i++) {
1559     PetscInt    row = rdest[i];
1560     PetscMPIInt rowner;
1561     PetscCall(PetscLayoutFindOwner(A->rmap, row, &rowner));
1562     for (j = ai[i]; j < ai[i + 1]; j++) {
1563       PetscInt    col = cdest[aj[j]];
1564       PetscMPIInt cowner;
1565       PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner)); /* Could build an index for the columns to eliminate this search */
1566       if (rowner == cowner) dnnz[i]++;
1567       else onnz[i]++;
1568     }
1569     for (j = bi[i]; j < bi[i + 1]; j++) {
1570       PetscInt    col = gcdest[bj[j]];
1571       PetscMPIInt cowner;
1572       PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner));
1573       if (rowner == cowner) dnnz[i]++;
1574       else onnz[i]++;
1575     }
1576   }
1577   PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE));
1578   PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE));
1579   PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE));
1580   PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE));
1581   PetscCall(PetscSFDestroy(&rowsf));
1582 
1583   PetscCall(MatCreateAIJ(PetscObjectComm((PetscObject)A), A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N, 0, tdnnz, 0, tonnz, &Aperm));
1584   PetscCall(MatSeqAIJGetArray(aA, &aa));
1585   PetscCall(MatSeqAIJGetArray(aB, &ba));
1586   for (i = 0; i < m; i++) {
1587     PetscInt *acols = dnnz, *bcols = onnz; /* Repurpose now-unneeded arrays */
1588     PetscInt  j0, rowlen;
1589     rowlen = ai[i + 1] - ai[i];
1590     for (j0 = j = 0; j < rowlen; j0 = j) { /* rowlen could be larger than number of rows m, so sum in batches */
1591       for (; j < PetscMin(rowlen, j0 + m); j++) acols[j - j0] = cdest[aj[ai[i] + j]];
1592       PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, acols, aa + ai[i] + j0, INSERT_VALUES));
1593     }
1594     rowlen = bi[i + 1] - bi[i];
1595     for (j0 = j = 0; j < rowlen; j0 = j) {
1596       for (; j < PetscMin(rowlen, j0 + m); j++) bcols[j - j0] = gcdest[bj[bi[i] + j]];
1597       PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, bcols, ba + bi[i] + j0, INSERT_VALUES));
1598     }
1599   }
1600   PetscCall(MatAssemblyBegin(Aperm, MAT_FINAL_ASSEMBLY));
1601   PetscCall(MatAssemblyEnd(Aperm, MAT_FINAL_ASSEMBLY));
1602   PetscCall(MatRestoreRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done));
1603   PetscCall(MatRestoreRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done));
1604   PetscCall(MatSeqAIJRestoreArray(aA, &aa));
1605   PetscCall(MatSeqAIJRestoreArray(aB, &ba));
1606   PetscCall(PetscFree4(dnnz, onnz, tdnnz, tonnz));
1607   PetscCall(PetscFree3(work, rdest, cdest));
1608   PetscCall(PetscFree(gcdest));
1609   if (parcolp) PetscCall(ISDestroy(&colp));
1610   *B = Aperm;
1611   PetscFunctionReturn(0);
1612 }
1613 
1614 PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[])
1615 {
1616   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1617 
1618   PetscFunctionBegin;
1619   PetscCall(MatGetSize(aij->B, NULL, nghosts));
1620   if (ghosts) *ghosts = aij->garray;
1621   PetscFunctionReturn(0);
1622 }
1623 
1624 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin, MatInfoType flag, MatInfo *info)
1625 {
1626   Mat_MPIAIJ    *mat = (Mat_MPIAIJ *)matin->data;
1627   Mat            A = mat->A, B = mat->B;
1628   PetscLogDouble isend[5], irecv[5];
1629 
1630   PetscFunctionBegin;
1631   info->block_size = 1.0;
1632   PetscCall(MatGetInfo(A, MAT_LOCAL, info));
1633 
1634   isend[0] = info->nz_used;
1635   isend[1] = info->nz_allocated;
1636   isend[2] = info->nz_unneeded;
1637   isend[3] = info->memory;
1638   isend[4] = info->mallocs;
1639 
1640   PetscCall(MatGetInfo(B, MAT_LOCAL, info));
1641 
1642   isend[0] += info->nz_used;
1643   isend[1] += info->nz_allocated;
1644   isend[2] += info->nz_unneeded;
1645   isend[3] += info->memory;
1646   isend[4] += info->mallocs;
1647   if (flag == MAT_LOCAL) {
1648     info->nz_used      = isend[0];
1649     info->nz_allocated = isend[1];
1650     info->nz_unneeded  = isend[2];
1651     info->memory       = isend[3];
1652     info->mallocs      = isend[4];
1653   } else if (flag == MAT_GLOBAL_MAX) {
1654     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin)));
1655 
1656     info->nz_used      = irecv[0];
1657     info->nz_allocated = irecv[1];
1658     info->nz_unneeded  = irecv[2];
1659     info->memory       = irecv[3];
1660     info->mallocs      = irecv[4];
1661   } else if (flag == MAT_GLOBAL_SUM) {
1662     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin)));
1663 
1664     info->nz_used      = irecv[0];
1665     info->nz_allocated = irecv[1];
1666     info->nz_unneeded  = irecv[2];
1667     info->memory       = irecv[3];
1668     info->mallocs      = irecv[4];
1669   }
1670   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1671   info->fill_ratio_needed = 0;
1672   info->factor_mallocs    = 0;
1673   PetscFunctionReturn(0);
1674 }
1675 
1676 PetscErrorCode MatSetOption_MPIAIJ(Mat A, MatOption op, PetscBool flg)
1677 {
1678   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1679 
1680   PetscFunctionBegin;
1681   switch (op) {
1682   case MAT_NEW_NONZERO_LOCATIONS:
1683   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1684   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1685   case MAT_KEEP_NONZERO_PATTERN:
1686   case MAT_NEW_NONZERO_LOCATION_ERR:
1687   case MAT_USE_INODES:
1688   case MAT_IGNORE_ZERO_ENTRIES:
1689   case MAT_FORM_EXPLICIT_TRANSPOSE:
1690     MatCheckPreallocated(A, 1);
1691     PetscCall(MatSetOption(a->A, op, flg));
1692     PetscCall(MatSetOption(a->B, op, flg));
1693     break;
1694   case MAT_ROW_ORIENTED:
1695     MatCheckPreallocated(A, 1);
1696     a->roworiented = flg;
1697 
1698     PetscCall(MatSetOption(a->A, op, flg));
1699     PetscCall(MatSetOption(a->B, op, flg));
1700     break;
1701   case MAT_FORCE_DIAGONAL_ENTRIES:
1702   case MAT_SORTED_FULL:
1703     PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
1704     break;
1705   case MAT_IGNORE_OFF_PROC_ENTRIES:
1706     a->donotstash = flg;
1707     break;
1708   /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */
1709   case MAT_SPD:
1710   case MAT_SYMMETRIC:
1711   case MAT_STRUCTURALLY_SYMMETRIC:
1712   case MAT_HERMITIAN:
1713   case MAT_SYMMETRY_ETERNAL:
1714   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
1715   case MAT_SPD_ETERNAL:
1716     /* if the diagonal matrix is square it inherits some of the properties above */
1717     break;
1718   case MAT_SUBMAT_SINGLEIS:
1719     A->submat_singleis = flg;
1720     break;
1721   case MAT_STRUCTURE_ONLY:
1722     /* The option is handled directly by MatSetOption() */
1723     break;
1724   default:
1725     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %d", op);
1726   }
1727   PetscFunctionReturn(0);
1728 }
1729 
1730 PetscErrorCode MatGetRow_MPIAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1731 {
1732   Mat_MPIAIJ  *mat = (Mat_MPIAIJ *)matin->data;
1733   PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p;
1734   PetscInt     i, *cworkA, *cworkB, **pcA, **pcB, cstart = matin->cmap->rstart;
1735   PetscInt     nztot, nzA, nzB, lrow, rstart = matin->rmap->rstart, rend = matin->rmap->rend;
1736   PetscInt    *cmap, *idx_p;
1737 
1738   PetscFunctionBegin;
1739   PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active");
1740   mat->getrowactive = PETSC_TRUE;
1741 
1742   if (!mat->rowvalues && (idx || v)) {
1743     /*
1744         allocate enough space to hold information from the longest row.
1745     */
1746     Mat_SeqAIJ *Aa = (Mat_SeqAIJ *)mat->A->data, *Ba = (Mat_SeqAIJ *)mat->B->data;
1747     PetscInt    max = 1, tmp;
1748     for (i = 0; i < matin->rmap->n; i++) {
1749       tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i];
1750       if (max < tmp) max = tmp;
1751     }
1752     PetscCall(PetscMalloc2(max, &mat->rowvalues, max, &mat->rowindices));
1753   }
1754 
1755   PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Only local rows");
1756   lrow = row - rstart;
1757 
1758   pvA = &vworkA;
1759   pcA = &cworkA;
1760   pvB = &vworkB;
1761   pcB = &cworkB;
1762   if (!v) {
1763     pvA = NULL;
1764     pvB = NULL;
1765   }
1766   if (!idx) {
1767     pcA = NULL;
1768     if (!v) pcB = NULL;
1769   }
1770   PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA));
1771   PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB));
1772   nztot = nzA + nzB;
1773 
1774   cmap = mat->garray;
1775   if (v || idx) {
1776     if (nztot) {
1777       /* Sort by increasing column numbers, assuming A and B already sorted */
1778       PetscInt imark = -1;
1779       if (v) {
1780         *v = v_p = mat->rowvalues;
1781         for (i = 0; i < nzB; i++) {
1782           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1783           else break;
1784         }
1785         imark = i;
1786         for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i];
1787         for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i];
1788       }
1789       if (idx) {
1790         *idx = idx_p = mat->rowindices;
1791         if (imark > -1) {
1792           for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i]];
1793         } else {
1794           for (i = 0; i < nzB; i++) {
1795             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1796             else break;
1797           }
1798           imark = i;
1799         }
1800         for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart + cworkA[i];
1801         for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i]];
1802       }
1803     } else {
1804       if (idx) *idx = NULL;
1805       if (v) *v = NULL;
1806     }
1807   }
1808   *nz = nztot;
1809   PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA));
1810   PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB));
1811   PetscFunctionReturn(0);
1812 }
1813 
1814 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1815 {
1816   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1817 
1818   PetscFunctionBegin;
1819   PetscCheck(aij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow() must be called first");
1820   aij->getrowactive = PETSC_FALSE;
1821   PetscFunctionReturn(0);
1822 }
1823 
1824 PetscErrorCode MatNorm_MPIAIJ(Mat mat, NormType type, PetscReal *norm)
1825 {
1826   Mat_MPIAIJ      *aij  = (Mat_MPIAIJ *)mat->data;
1827   Mat_SeqAIJ      *amat = (Mat_SeqAIJ *)aij->A->data, *bmat = (Mat_SeqAIJ *)aij->B->data;
1828   PetscInt         i, j, cstart = mat->cmap->rstart;
1829   PetscReal        sum = 0.0;
1830   const MatScalar *v, *amata, *bmata;
1831 
1832   PetscFunctionBegin;
1833   if (aij->size == 1) {
1834     PetscCall(MatNorm(aij->A, type, norm));
1835   } else {
1836     PetscCall(MatSeqAIJGetArrayRead(aij->A, &amata));
1837     PetscCall(MatSeqAIJGetArrayRead(aij->B, &bmata));
1838     if (type == NORM_FROBENIUS) {
1839       v = amata;
1840       for (i = 0; i < amat->nz; i++) {
1841         sum += PetscRealPart(PetscConj(*v) * (*v));
1842         v++;
1843       }
1844       v = bmata;
1845       for (i = 0; i < bmat->nz; i++) {
1846         sum += PetscRealPart(PetscConj(*v) * (*v));
1847         v++;
1848       }
1849       PetscCall(MPIU_Allreduce(&sum, norm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
1850       *norm = PetscSqrtReal(*norm);
1851       PetscCall(PetscLogFlops(2.0 * amat->nz + 2.0 * bmat->nz));
1852     } else if (type == NORM_1) { /* max column norm */
1853       PetscReal *tmp, *tmp2;
1854       PetscInt  *jj, *garray = aij->garray;
1855       PetscCall(PetscCalloc1(mat->cmap->N + 1, &tmp));
1856       PetscCall(PetscMalloc1(mat->cmap->N + 1, &tmp2));
1857       *norm = 0.0;
1858       v     = amata;
1859       jj    = amat->j;
1860       for (j = 0; j < amat->nz; j++) {
1861         tmp[cstart + *jj++] += PetscAbsScalar(*v);
1862         v++;
1863       }
1864       v  = bmata;
1865       jj = bmat->j;
1866       for (j = 0; j < bmat->nz; j++) {
1867         tmp[garray[*jj++]] += PetscAbsScalar(*v);
1868         v++;
1869       }
1870       PetscCall(MPIU_Allreduce(tmp, tmp2, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
1871       for (j = 0; j < mat->cmap->N; j++) {
1872         if (tmp2[j] > *norm) *norm = tmp2[j];
1873       }
1874       PetscCall(PetscFree(tmp));
1875       PetscCall(PetscFree(tmp2));
1876       PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0)));
1877     } else if (type == NORM_INFINITY) { /* max row norm */
1878       PetscReal ntemp = 0.0;
1879       for (j = 0; j < aij->A->rmap->n; j++) {
1880         v   = amata + amat->i[j];
1881         sum = 0.0;
1882         for (i = 0; i < amat->i[j + 1] - amat->i[j]; i++) {
1883           sum += PetscAbsScalar(*v);
1884           v++;
1885         }
1886         v = bmata + bmat->i[j];
1887         for (i = 0; i < bmat->i[j + 1] - bmat->i[j]; i++) {
1888           sum += PetscAbsScalar(*v);
1889           v++;
1890         }
1891         if (sum > ntemp) ntemp = sum;
1892       }
1893       PetscCall(MPIU_Allreduce(&ntemp, norm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)mat)));
1894       PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0)));
1895     } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "No support for two norm");
1896     PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &amata));
1897     PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &bmata));
1898   }
1899   PetscFunctionReturn(0);
1900 }
1901 
1902 PetscErrorCode MatTranspose_MPIAIJ(Mat A, MatReuse reuse, Mat *matout)
1903 {
1904   Mat_MPIAIJ      *a    = (Mat_MPIAIJ *)A->data, *b;
1905   Mat_SeqAIJ      *Aloc = (Mat_SeqAIJ *)a->A->data, *Bloc = (Mat_SeqAIJ *)a->B->data, *sub_B_diag;
1906   PetscInt         M = A->rmap->N, N = A->cmap->N, ma, na, mb, nb, row, *cols, *cols_tmp, *B_diag_ilen, i, ncol, A_diag_ncol;
1907   const PetscInt  *ai, *aj, *bi, *bj, *B_diag_i;
1908   Mat              B, A_diag, *B_diag;
1909   const MatScalar *pbv, *bv;
1910 
1911   PetscFunctionBegin;
1912   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout));
1913   ma = A->rmap->n;
1914   na = A->cmap->n;
1915   mb = a->B->rmap->n;
1916   nb = a->B->cmap->n;
1917   ai = Aloc->i;
1918   aj = Aloc->j;
1919   bi = Bloc->i;
1920   bj = Bloc->j;
1921   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1922     PetscInt            *d_nnz, *g_nnz, *o_nnz;
1923     PetscSFNode         *oloc;
1924     PETSC_UNUSED PetscSF sf;
1925 
1926     PetscCall(PetscMalloc4(na, &d_nnz, na, &o_nnz, nb, &g_nnz, nb, &oloc));
1927     /* compute d_nnz for preallocation */
1928     PetscCall(PetscArrayzero(d_nnz, na));
1929     for (i = 0; i < ai[ma]; i++) d_nnz[aj[i]]++;
1930     /* compute local off-diagonal contributions */
1931     PetscCall(PetscArrayzero(g_nnz, nb));
1932     for (i = 0; i < bi[ma]; i++) g_nnz[bj[i]]++;
1933     /* map those to global */
1934     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1935     PetscCall(PetscSFSetGraphLayout(sf, A->cmap, nb, NULL, PETSC_USE_POINTER, a->garray));
1936     PetscCall(PetscSFSetFromOptions(sf));
1937     PetscCall(PetscArrayzero(o_nnz, na));
1938     PetscCall(PetscSFReduceBegin(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM));
1939     PetscCall(PetscSFReduceEnd(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM));
1940     PetscCall(PetscSFDestroy(&sf));
1941 
1942     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
1943     PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M));
1944     PetscCall(MatSetBlockSizes(B, PetscAbs(A->cmap->bs), PetscAbs(A->rmap->bs)));
1945     PetscCall(MatSetType(B, ((PetscObject)A)->type_name));
1946     PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz));
1947     PetscCall(PetscFree4(d_nnz, o_nnz, g_nnz, oloc));
1948   } else {
1949     B = *matout;
1950     PetscCall(MatSetOption(B, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE));
1951   }
1952 
1953   b           = (Mat_MPIAIJ *)B->data;
1954   A_diag      = a->A;
1955   B_diag      = &b->A;
1956   sub_B_diag  = (Mat_SeqAIJ *)(*B_diag)->data;
1957   A_diag_ncol = A_diag->cmap->N;
1958   B_diag_ilen = sub_B_diag->ilen;
1959   B_diag_i    = sub_B_diag->i;
1960 
1961   /* Set ilen for diagonal of B */
1962   for (i = 0; i < A_diag_ncol; i++) B_diag_ilen[i] = B_diag_i[i + 1] - B_diag_i[i];
1963 
1964   /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done
1965   very quickly (=without using MatSetValues), because all writes are local. */
1966   PetscCall(MatTransposeSetPrecursor(A_diag, *B_diag));
1967   PetscCall(MatTranspose(A_diag, MAT_REUSE_MATRIX, B_diag));
1968 
1969   /* copy over the B part */
1970   PetscCall(PetscMalloc1(bi[mb], &cols));
1971   PetscCall(MatSeqAIJGetArrayRead(a->B, &bv));
1972   pbv = bv;
1973   row = A->rmap->rstart;
1974   for (i = 0; i < bi[mb]; i++) cols[i] = a->garray[bj[i]];
1975   cols_tmp = cols;
1976   for (i = 0; i < mb; i++) {
1977     ncol = bi[i + 1] - bi[i];
1978     PetscCall(MatSetValues(B, ncol, cols_tmp, 1, &row, pbv, INSERT_VALUES));
1979     row++;
1980     pbv += ncol;
1981     cols_tmp += ncol;
1982   }
1983   PetscCall(PetscFree(cols));
1984   PetscCall(MatSeqAIJRestoreArrayRead(a->B, &bv));
1985 
1986   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
1987   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
1988   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
1989     *matout = B;
1990   } else {
1991     PetscCall(MatHeaderMerge(A, &B));
1992   }
1993   PetscFunctionReturn(0);
1994 }
1995 
1996 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat, Vec ll, Vec rr)
1997 {
1998   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1999   Mat         a = aij->A, b = aij->B;
2000   PetscInt    s1, s2, s3;
2001 
2002   PetscFunctionBegin;
2003   PetscCall(MatGetLocalSize(mat, &s2, &s3));
2004   if (rr) {
2005     PetscCall(VecGetLocalSize(rr, &s1));
2006     PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size");
2007     /* Overlap communication with computation. */
2008     PetscCall(VecScatterBegin(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD));
2009   }
2010   if (ll) {
2011     PetscCall(VecGetLocalSize(ll, &s1));
2012     PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size");
2013     PetscUseTypeMethod(b, diagonalscale, ll, NULL);
2014   }
2015   /* scale  the diagonal block */
2016   PetscUseTypeMethod(a, diagonalscale, ll, rr);
2017 
2018   if (rr) {
2019     /* Do a scatter end and then right scale the off-diagonal block */
2020     PetscCall(VecScatterEnd(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD));
2021     PetscUseTypeMethod(b, diagonalscale, NULL, aij->lvec);
2022   }
2023   PetscFunctionReturn(0);
2024 }
2025 
2026 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2027 {
2028   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2029 
2030   PetscFunctionBegin;
2031   PetscCall(MatSetUnfactored(a->A));
2032   PetscFunctionReturn(0);
2033 }
2034 
2035 PetscErrorCode MatEqual_MPIAIJ(Mat A, Mat B, PetscBool *flag)
2036 {
2037   Mat_MPIAIJ *matB = (Mat_MPIAIJ *)B->data, *matA = (Mat_MPIAIJ *)A->data;
2038   Mat         a, b, c, d;
2039   PetscBool   flg;
2040 
2041   PetscFunctionBegin;
2042   a = matA->A;
2043   b = matA->B;
2044   c = matB->A;
2045   d = matB->B;
2046 
2047   PetscCall(MatEqual(a, c, &flg));
2048   if (flg) PetscCall(MatEqual(b, d, &flg));
2049   PetscCall(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
2050   PetscFunctionReturn(0);
2051 }
2052 
2053 PetscErrorCode MatCopy_MPIAIJ(Mat A, Mat B, MatStructure str)
2054 {
2055   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2056   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
2057 
2058   PetscFunctionBegin;
2059   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2060   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2061     /* because of the column compression in the off-processor part of the matrix a->B,
2062        the number of columns in a->B and b->B may be different, hence we cannot call
2063        the MatCopy() directly on the two parts. If need be, we can provide a more
2064        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2065        then copying the submatrices */
2066     PetscCall(MatCopy_Basic(A, B, str));
2067   } else {
2068     PetscCall(MatCopy(a->A, b->A, str));
2069     PetscCall(MatCopy(a->B, b->B, str));
2070   }
2071   PetscCall(PetscObjectStateIncrease((PetscObject)B));
2072   PetscFunctionReturn(0);
2073 }
2074 
2075 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2076 {
2077   PetscFunctionBegin;
2078   PetscCall(MatMPIAIJSetPreallocation(A, PETSC_DEFAULT, NULL, PETSC_DEFAULT, NULL));
2079   PetscFunctionReturn(0);
2080 }
2081 
2082 /*
2083    Computes the number of nonzeros per row needed for preallocation when X and Y
2084    have different nonzero structure.
2085 */
2086 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m, const PetscInt *xi, const PetscInt *xj, const PetscInt *xltog, const PetscInt *yi, const PetscInt *yj, const PetscInt *yltog, PetscInt *nnz)
2087 {
2088   PetscInt i, j, k, nzx, nzy;
2089 
2090   PetscFunctionBegin;
2091   /* Set the number of nonzeros in the new matrix */
2092   for (i = 0; i < m; i++) {
2093     const PetscInt *xjj = xj + xi[i], *yjj = yj + yi[i];
2094     nzx    = xi[i + 1] - xi[i];
2095     nzy    = yi[i + 1] - yi[i];
2096     nnz[i] = 0;
2097     for (j = 0, k = 0; j < nzx; j++) {                                /* Point in X */
2098       for (; k < nzy && yltog[yjj[k]] < xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2099       if (k < nzy && yltog[yjj[k]] == xltog[xjj[j]]) k++;             /* Skip duplicate */
2100       nnz[i]++;
2101     }
2102     for (; k < nzy; k++) nnz[i]++;
2103   }
2104   PetscFunctionReturn(0);
2105 }
2106 
2107 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2108 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y, const PetscInt *yltog, Mat X, const PetscInt *xltog, PetscInt *nnz)
2109 {
2110   PetscInt    m = Y->rmap->N;
2111   Mat_SeqAIJ *x = (Mat_SeqAIJ *)X->data;
2112   Mat_SeqAIJ *y = (Mat_SeqAIJ *)Y->data;
2113 
2114   PetscFunctionBegin;
2115   PetscCall(MatAXPYGetPreallocation_MPIX_private(m, x->i, x->j, xltog, y->i, y->j, yltog, nnz));
2116   PetscFunctionReturn(0);
2117 }
2118 
2119 PetscErrorCode MatAXPY_MPIAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str)
2120 {
2121   Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data, *yy = (Mat_MPIAIJ *)Y->data;
2122 
2123   PetscFunctionBegin;
2124   if (str == SAME_NONZERO_PATTERN) {
2125     PetscCall(MatAXPY(yy->A, a, xx->A, str));
2126     PetscCall(MatAXPY(yy->B, a, xx->B, str));
2127   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2128     PetscCall(MatAXPY_Basic(Y, a, X, str));
2129   } else {
2130     Mat       B;
2131     PetscInt *nnz_d, *nnz_o;
2132 
2133     PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d));
2134     PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o));
2135     PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B));
2136     PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name));
2137     PetscCall(MatSetLayouts(B, Y->rmap, Y->cmap));
2138     PetscCall(MatSetType(B, ((PetscObject)Y)->type_name));
2139     PetscCall(MatAXPYGetPreallocation_SeqAIJ(yy->A, xx->A, nnz_d));
2140     PetscCall(MatAXPYGetPreallocation_MPIAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o));
2141     PetscCall(MatMPIAIJSetPreallocation(B, 0, nnz_d, 0, nnz_o));
2142     PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str));
2143     PetscCall(MatHeaderMerge(Y, &B));
2144     PetscCall(PetscFree(nnz_d));
2145     PetscCall(PetscFree(nnz_o));
2146   }
2147   PetscFunctionReturn(0);
2148 }
2149 
2150 PETSC_INTERN PetscErrorCode MatConjugate_SeqAIJ(Mat);
2151 
2152 PetscErrorCode MatConjugate_MPIAIJ(Mat mat)
2153 {
2154   PetscFunctionBegin;
2155   if (PetscDefined(USE_COMPLEX)) {
2156     Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
2157 
2158     PetscCall(MatConjugate_SeqAIJ(aij->A));
2159     PetscCall(MatConjugate_SeqAIJ(aij->B));
2160   }
2161   PetscFunctionReturn(0);
2162 }
2163 
2164 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2165 {
2166   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2167 
2168   PetscFunctionBegin;
2169   PetscCall(MatRealPart(a->A));
2170   PetscCall(MatRealPart(a->B));
2171   PetscFunctionReturn(0);
2172 }
2173 
2174 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2175 {
2176   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2177 
2178   PetscFunctionBegin;
2179   PetscCall(MatImaginaryPart(a->A));
2180   PetscCall(MatImaginaryPart(a->B));
2181   PetscFunctionReturn(0);
2182 }
2183 
2184 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2185 {
2186   Mat_MPIAIJ        *a = (Mat_MPIAIJ *)A->data;
2187   PetscInt           i, *idxb = NULL, m = A->rmap->n;
2188   PetscScalar       *va, *vv;
2189   Vec                vB, vA;
2190   const PetscScalar *vb;
2191 
2192   PetscFunctionBegin;
2193   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vA));
2194   PetscCall(MatGetRowMaxAbs(a->A, vA, idx));
2195 
2196   PetscCall(VecGetArrayWrite(vA, &va));
2197   if (idx) {
2198     for (i = 0; i < m; i++) {
2199       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2200     }
2201   }
2202 
2203   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vB));
2204   PetscCall(PetscMalloc1(m, &idxb));
2205   PetscCall(MatGetRowMaxAbs(a->B, vB, idxb));
2206 
2207   PetscCall(VecGetArrayWrite(v, &vv));
2208   PetscCall(VecGetArrayRead(vB, &vb));
2209   for (i = 0; i < m; i++) {
2210     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2211       vv[i] = vb[i];
2212       if (idx) idx[i] = a->garray[idxb[i]];
2213     } else {
2214       vv[i] = va[i];
2215       if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]]) idx[i] = a->garray[idxb[i]];
2216     }
2217   }
2218   PetscCall(VecRestoreArrayWrite(vA, &vv));
2219   PetscCall(VecRestoreArrayWrite(vA, &va));
2220   PetscCall(VecRestoreArrayRead(vB, &vb));
2221   PetscCall(PetscFree(idxb));
2222   PetscCall(VecDestroy(&vA));
2223   PetscCall(VecDestroy(&vB));
2224   PetscFunctionReturn(0);
2225 }
2226 
2227 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2228 {
2229   Mat_MPIAIJ        *mat = (Mat_MPIAIJ *)A->data;
2230   PetscInt           m = A->rmap->n, n = A->cmap->n;
2231   PetscInt           cstart = A->cmap->rstart, cend = A->cmap->rend;
2232   PetscInt          *cmap = mat->garray;
2233   PetscInt          *diagIdx, *offdiagIdx;
2234   Vec                diagV, offdiagV;
2235   PetscScalar       *a, *diagA, *offdiagA;
2236   const PetscScalar *ba, *bav;
2237   PetscInt           r, j, col, ncols, *bi, *bj;
2238   Mat                B = mat->B;
2239   Mat_SeqAIJ        *b = (Mat_SeqAIJ *)B->data;
2240 
2241   PetscFunctionBegin;
2242   /* When a process holds entire A and other processes have no entry */
2243   if (A->cmap->N == n) {
2244     PetscCall(VecGetArrayWrite(v, &diagA));
2245     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV));
2246     PetscCall(MatGetRowMinAbs(mat->A, diagV, idx));
2247     PetscCall(VecDestroy(&diagV));
2248     PetscCall(VecRestoreArrayWrite(v, &diagA));
2249     PetscFunctionReturn(0);
2250   } else if (n == 0) {
2251     if (m) {
2252       PetscCall(VecGetArrayWrite(v, &a));
2253       for (r = 0; r < m; r++) {
2254         a[r] = 0.0;
2255         if (idx) idx[r] = -1;
2256       }
2257       PetscCall(VecRestoreArrayWrite(v, &a));
2258     }
2259     PetscFunctionReturn(0);
2260   }
2261 
2262   PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx));
2263   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2264   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2265   PetscCall(MatGetRowMinAbs(mat->A, diagV, diagIdx));
2266 
2267   /* Get offdiagIdx[] for implicit 0.0 */
2268   PetscCall(MatSeqAIJGetArrayRead(B, &bav));
2269   ba = bav;
2270   bi = b->i;
2271   bj = b->j;
2272   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2273   for (r = 0; r < m; r++) {
2274     ncols = bi[r + 1] - bi[r];
2275     if (ncols == A->cmap->N - n) { /* Brow is dense */
2276       offdiagA[r]   = *ba;
2277       offdiagIdx[r] = cmap[0];
2278     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2279       offdiagA[r] = 0.0;
2280 
2281       /* Find first hole in the cmap */
2282       for (j = 0; j < ncols; j++) {
2283         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2284         if (col > j && j < cstart) {
2285           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2286           break;
2287         } else if (col > j + n && j >= cstart) {
2288           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2289           break;
2290         }
2291       }
2292       if (j == ncols && ncols < A->cmap->N - n) {
2293         /* a hole is outside compressed Bcols */
2294         if (ncols == 0) {
2295           if (cstart) {
2296             offdiagIdx[r] = 0;
2297           } else offdiagIdx[r] = cend;
2298         } else { /* ncols > 0 */
2299           offdiagIdx[r] = cmap[ncols - 1] + 1;
2300           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2301         }
2302       }
2303     }
2304 
2305     for (j = 0; j < ncols; j++) {
2306       if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) {
2307         offdiagA[r]   = *ba;
2308         offdiagIdx[r] = cmap[*bj];
2309       }
2310       ba++;
2311       bj++;
2312     }
2313   }
2314 
2315   PetscCall(VecGetArrayWrite(v, &a));
2316   PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA));
2317   for (r = 0; r < m; ++r) {
2318     if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) {
2319       a[r] = diagA[r];
2320       if (idx) idx[r] = cstart + diagIdx[r];
2321     } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) {
2322       a[r] = diagA[r];
2323       if (idx) {
2324         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2325           idx[r] = cstart + diagIdx[r];
2326         } else idx[r] = offdiagIdx[r];
2327       }
2328     } else {
2329       a[r] = offdiagA[r];
2330       if (idx) idx[r] = offdiagIdx[r];
2331     }
2332   }
2333   PetscCall(MatSeqAIJRestoreArrayRead(B, &bav));
2334   PetscCall(VecRestoreArrayWrite(v, &a));
2335   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA));
2336   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
2337   PetscCall(VecDestroy(&diagV));
2338   PetscCall(VecDestroy(&offdiagV));
2339   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2340   PetscFunctionReturn(0);
2341 }
2342 
2343 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2344 {
2345   Mat_MPIAIJ        *mat = (Mat_MPIAIJ *)A->data;
2346   PetscInt           m = A->rmap->n, n = A->cmap->n;
2347   PetscInt           cstart = A->cmap->rstart, cend = A->cmap->rend;
2348   PetscInt          *cmap = mat->garray;
2349   PetscInt          *diagIdx, *offdiagIdx;
2350   Vec                diagV, offdiagV;
2351   PetscScalar       *a, *diagA, *offdiagA;
2352   const PetscScalar *ba, *bav;
2353   PetscInt           r, j, col, ncols, *bi, *bj;
2354   Mat                B = mat->B;
2355   Mat_SeqAIJ        *b = (Mat_SeqAIJ *)B->data;
2356 
2357   PetscFunctionBegin;
2358   /* When a process holds entire A and other processes have no entry */
2359   if (A->cmap->N == n) {
2360     PetscCall(VecGetArrayWrite(v, &diagA));
2361     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV));
2362     PetscCall(MatGetRowMin(mat->A, diagV, idx));
2363     PetscCall(VecDestroy(&diagV));
2364     PetscCall(VecRestoreArrayWrite(v, &diagA));
2365     PetscFunctionReturn(0);
2366   } else if (n == 0) {
2367     if (m) {
2368       PetscCall(VecGetArrayWrite(v, &a));
2369       for (r = 0; r < m; r++) {
2370         a[r] = PETSC_MAX_REAL;
2371         if (idx) idx[r] = -1;
2372       }
2373       PetscCall(VecRestoreArrayWrite(v, &a));
2374     }
2375     PetscFunctionReturn(0);
2376   }
2377 
2378   PetscCall(PetscCalloc2(m, &diagIdx, m, &offdiagIdx));
2379   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2380   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2381   PetscCall(MatGetRowMin(mat->A, diagV, diagIdx));
2382 
2383   /* Get offdiagIdx[] for implicit 0.0 */
2384   PetscCall(MatSeqAIJGetArrayRead(B, &bav));
2385   ba = bav;
2386   bi = b->i;
2387   bj = b->j;
2388   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2389   for (r = 0; r < m; r++) {
2390     ncols = bi[r + 1] - bi[r];
2391     if (ncols == A->cmap->N - n) { /* Brow is dense */
2392       offdiagA[r]   = *ba;
2393       offdiagIdx[r] = cmap[0];
2394     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2395       offdiagA[r] = 0.0;
2396 
2397       /* Find first hole in the cmap */
2398       for (j = 0; j < ncols; j++) {
2399         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2400         if (col > j && j < cstart) {
2401           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2402           break;
2403         } else if (col > j + n && j >= cstart) {
2404           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2405           break;
2406         }
2407       }
2408       if (j == ncols && ncols < A->cmap->N - n) {
2409         /* a hole is outside compressed Bcols */
2410         if (ncols == 0) {
2411           if (cstart) {
2412             offdiagIdx[r] = 0;
2413           } else offdiagIdx[r] = cend;
2414         } else { /* ncols > 0 */
2415           offdiagIdx[r] = cmap[ncols - 1] + 1;
2416           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2417         }
2418       }
2419     }
2420 
2421     for (j = 0; j < ncols; j++) {
2422       if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) {
2423         offdiagA[r]   = *ba;
2424         offdiagIdx[r] = cmap[*bj];
2425       }
2426       ba++;
2427       bj++;
2428     }
2429   }
2430 
2431   PetscCall(VecGetArrayWrite(v, &a));
2432   PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA));
2433   for (r = 0; r < m; ++r) {
2434     if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) {
2435       a[r] = diagA[r];
2436       if (idx) idx[r] = cstart + diagIdx[r];
2437     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2438       a[r] = diagA[r];
2439       if (idx) {
2440         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2441           idx[r] = cstart + diagIdx[r];
2442         } else idx[r] = offdiagIdx[r];
2443       }
2444     } else {
2445       a[r] = offdiagA[r];
2446       if (idx) idx[r] = offdiagIdx[r];
2447     }
2448   }
2449   PetscCall(MatSeqAIJRestoreArrayRead(B, &bav));
2450   PetscCall(VecRestoreArrayWrite(v, &a));
2451   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA));
2452   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
2453   PetscCall(VecDestroy(&diagV));
2454   PetscCall(VecDestroy(&offdiagV));
2455   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2456   PetscFunctionReturn(0);
2457 }
2458 
2459 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2460 {
2461   Mat_MPIAIJ        *mat = (Mat_MPIAIJ *)A->data;
2462   PetscInt           m = A->rmap->n, n = A->cmap->n;
2463   PetscInt           cstart = A->cmap->rstart, cend = A->cmap->rend;
2464   PetscInt          *cmap = mat->garray;
2465   PetscInt          *diagIdx, *offdiagIdx;
2466   Vec                diagV, offdiagV;
2467   PetscScalar       *a, *diagA, *offdiagA;
2468   const PetscScalar *ba, *bav;
2469   PetscInt           r, j, col, ncols, *bi, *bj;
2470   Mat                B = mat->B;
2471   Mat_SeqAIJ        *b = (Mat_SeqAIJ *)B->data;
2472 
2473   PetscFunctionBegin;
2474   /* When a process holds entire A and other processes have no entry */
2475   if (A->cmap->N == n) {
2476     PetscCall(VecGetArrayWrite(v, &diagA));
2477     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV));
2478     PetscCall(MatGetRowMax(mat->A, diagV, idx));
2479     PetscCall(VecDestroy(&diagV));
2480     PetscCall(VecRestoreArrayWrite(v, &diagA));
2481     PetscFunctionReturn(0);
2482   } else if (n == 0) {
2483     if (m) {
2484       PetscCall(VecGetArrayWrite(v, &a));
2485       for (r = 0; r < m; r++) {
2486         a[r] = PETSC_MIN_REAL;
2487         if (idx) idx[r] = -1;
2488       }
2489       PetscCall(VecRestoreArrayWrite(v, &a));
2490     }
2491     PetscFunctionReturn(0);
2492   }
2493 
2494   PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx));
2495   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2496   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2497   PetscCall(MatGetRowMax(mat->A, diagV, diagIdx));
2498 
2499   /* Get offdiagIdx[] for implicit 0.0 */
2500   PetscCall(MatSeqAIJGetArrayRead(B, &bav));
2501   ba = bav;
2502   bi = b->i;
2503   bj = b->j;
2504   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2505   for (r = 0; r < m; r++) {
2506     ncols = bi[r + 1] - bi[r];
2507     if (ncols == A->cmap->N - n) { /* Brow is dense */
2508       offdiagA[r]   = *ba;
2509       offdiagIdx[r] = cmap[0];
2510     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2511       offdiagA[r] = 0.0;
2512 
2513       /* Find first hole in the cmap */
2514       for (j = 0; j < ncols; j++) {
2515         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2516         if (col > j && j < cstart) {
2517           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2518           break;
2519         } else if (col > j + n && j >= cstart) {
2520           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2521           break;
2522         }
2523       }
2524       if (j == ncols && ncols < A->cmap->N - n) {
2525         /* a hole is outside compressed Bcols */
2526         if (ncols == 0) {
2527           if (cstart) {
2528             offdiagIdx[r] = 0;
2529           } else offdiagIdx[r] = cend;
2530         } else { /* ncols > 0 */
2531           offdiagIdx[r] = cmap[ncols - 1] + 1;
2532           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2533         }
2534       }
2535     }
2536 
2537     for (j = 0; j < ncols; j++) {
2538       if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) {
2539         offdiagA[r]   = *ba;
2540         offdiagIdx[r] = cmap[*bj];
2541       }
2542       ba++;
2543       bj++;
2544     }
2545   }
2546 
2547   PetscCall(VecGetArrayWrite(v, &a));
2548   PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA));
2549   for (r = 0; r < m; ++r) {
2550     if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) {
2551       a[r] = diagA[r];
2552       if (idx) idx[r] = cstart + diagIdx[r];
2553     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2554       a[r] = diagA[r];
2555       if (idx) {
2556         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2557           idx[r] = cstart + diagIdx[r];
2558         } else idx[r] = offdiagIdx[r];
2559       }
2560     } else {
2561       a[r] = offdiagA[r];
2562       if (idx) idx[r] = offdiagIdx[r];
2563     }
2564   }
2565   PetscCall(MatSeqAIJRestoreArrayRead(B, &bav));
2566   PetscCall(VecRestoreArrayWrite(v, &a));
2567   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA));
2568   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
2569   PetscCall(VecDestroy(&diagV));
2570   PetscCall(VecDestroy(&offdiagV));
2571   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2572   PetscFunctionReturn(0);
2573 }
2574 
2575 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat, Mat *newmat)
2576 {
2577   Mat *dummy;
2578 
2579   PetscFunctionBegin;
2580   PetscCall(MatCreateSubMatrix_MPIAIJ_All(mat, MAT_DO_NOT_GET_VALUES, MAT_INITIAL_MATRIX, &dummy));
2581   *newmat = *dummy;
2582   PetscCall(PetscFree(dummy));
2583   PetscFunctionReturn(0);
2584 }
2585 
2586 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A, const PetscScalar **values)
2587 {
2588   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2589 
2590   PetscFunctionBegin;
2591   PetscCall(MatInvertBlockDiagonal(a->A, values));
2592   A->factorerrortype = a->A->factorerrortype;
2593   PetscFunctionReturn(0);
2594 }
2595 
2596 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x, PetscRandom rctx)
2597 {
2598   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)x->data;
2599 
2600   PetscFunctionBegin;
2601   PetscCheck(x->assembled || x->preallocated, PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed");
2602   PetscCall(MatSetRandom(aij->A, rctx));
2603   if (x->assembled) {
2604     PetscCall(MatSetRandom(aij->B, rctx));
2605   } else {
2606     PetscCall(MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B, x->cmap->rstart, x->cmap->rend, rctx));
2607   }
2608   PetscCall(MatAssemblyBegin(x, MAT_FINAL_ASSEMBLY));
2609   PetscCall(MatAssemblyEnd(x, MAT_FINAL_ASSEMBLY));
2610   PetscFunctionReturn(0);
2611 }
2612 
2613 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A, PetscBool sc)
2614 {
2615   PetscFunctionBegin;
2616   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2617   else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ;
2618   PetscFunctionReturn(0);
2619 }
2620 
2621 /*@
2622    MatMPIAIJGetNumberNonzeros - gets the number of nonzeros in the matrix on this MPI rank
2623 
2624    Not collective
2625 
2626    Input Parameter:
2627 .    A - the matrix
2628 
2629    Output Parameter:
2630 .    nz - the number of nonzeros
2631 
2632  Level: advanced
2633 
2634 .seealso: `MATMPIAIJ`, `Mat`
2635 @*/
2636 PetscErrorCode MatMPIAIJGetNumberNonzeros(Mat A, PetscCount *nz)
2637 {
2638   Mat_MPIAIJ *maij = (Mat_MPIAIJ *)A->data;
2639   Mat_SeqAIJ *aaij = (Mat_SeqAIJ *)maij->A->data, *baij = (Mat_SeqAIJ *)maij->B->data;
2640 
2641   PetscFunctionBegin;
2642   *nz = aaij->i[A->rmap->n] + baij->i[A->rmap->n];
2643   PetscFunctionReturn(0);
2644 }
2645 
2646 /*@
2647    MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2648 
2649    Collective on A
2650 
2651    Input Parameters:
2652 +    A - the matrix
2653 -    sc - `PETSC_TRUE` indicates use the scalable algorithm (default is not to use the scalable algorithm)
2654 
2655  Level: advanced
2656 
2657 @*/
2658 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A, PetscBool sc)
2659 {
2660   PetscFunctionBegin;
2661   PetscTryMethod(A, "MatMPIAIJSetUseScalableIncreaseOverlap_C", (Mat, PetscBool), (A, sc));
2662   PetscFunctionReturn(0);
2663 }
2664 
2665 PetscErrorCode MatSetFromOptions_MPIAIJ(Mat A, PetscOptionItems *PetscOptionsObject)
2666 {
2667   PetscBool sc = PETSC_FALSE, flg;
2668 
2669   PetscFunctionBegin;
2670   PetscOptionsHeadBegin(PetscOptionsObject, "MPIAIJ options");
2671   if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2672   PetscCall(PetscOptionsBool("-mat_increase_overlap_scalable", "Use a scalable algorithm to compute the overlap", "MatIncreaseOverlap", sc, &sc, &flg));
2673   if (flg) PetscCall(MatMPIAIJSetUseScalableIncreaseOverlap(A, sc));
2674   PetscOptionsHeadEnd();
2675   PetscFunctionReturn(0);
2676 }
2677 
2678 PetscErrorCode MatShift_MPIAIJ(Mat Y, PetscScalar a)
2679 {
2680   Mat_MPIAIJ *maij = (Mat_MPIAIJ *)Y->data;
2681   Mat_SeqAIJ *aij  = (Mat_SeqAIJ *)maij->A->data;
2682 
2683   PetscFunctionBegin;
2684   if (!Y->preallocated) {
2685     PetscCall(MatMPIAIJSetPreallocation(Y, 1, NULL, 0, NULL));
2686   } else if (!aij->nz) { /* It does not matter if diagonals of Y only partially lie in maij->A. We just need an estimated preallocation. */
2687     PetscInt nonew = aij->nonew;
2688     PetscCall(MatSeqAIJSetPreallocation(maij->A, 1, NULL));
2689     aij->nonew = nonew;
2690   }
2691   PetscCall(MatShift_Basic(Y, a));
2692   PetscFunctionReturn(0);
2693 }
2694 
2695 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A, PetscBool *missing, PetscInt *d)
2696 {
2697   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2698 
2699   PetscFunctionBegin;
2700   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices");
2701   PetscCall(MatMissingDiagonal(a->A, missing, d));
2702   if (d) {
2703     PetscInt rstart;
2704     PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
2705     *d += rstart;
2706   }
2707   PetscFunctionReturn(0);
2708 }
2709 
2710 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A, PetscInt nblocks, const PetscInt *bsizes, PetscScalar *diag)
2711 {
2712   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2713 
2714   PetscFunctionBegin;
2715   PetscCall(MatInvertVariableBlockDiagonal(a->A, nblocks, bsizes, diag));
2716   PetscFunctionReturn(0);
2717 }
2718 
2719 /* -------------------------------------------------------------------*/
2720 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2721                                        MatGetRow_MPIAIJ,
2722                                        MatRestoreRow_MPIAIJ,
2723                                        MatMult_MPIAIJ,
2724                                        /* 4*/ MatMultAdd_MPIAIJ,
2725                                        MatMultTranspose_MPIAIJ,
2726                                        MatMultTransposeAdd_MPIAIJ,
2727                                        NULL,
2728                                        NULL,
2729                                        NULL,
2730                                        /*10*/ NULL,
2731                                        NULL,
2732                                        NULL,
2733                                        MatSOR_MPIAIJ,
2734                                        MatTranspose_MPIAIJ,
2735                                        /*15*/ MatGetInfo_MPIAIJ,
2736                                        MatEqual_MPIAIJ,
2737                                        MatGetDiagonal_MPIAIJ,
2738                                        MatDiagonalScale_MPIAIJ,
2739                                        MatNorm_MPIAIJ,
2740                                        /*20*/ MatAssemblyBegin_MPIAIJ,
2741                                        MatAssemblyEnd_MPIAIJ,
2742                                        MatSetOption_MPIAIJ,
2743                                        MatZeroEntries_MPIAIJ,
2744                                        /*24*/ MatZeroRows_MPIAIJ,
2745                                        NULL,
2746                                        NULL,
2747                                        NULL,
2748                                        NULL,
2749                                        /*29*/ MatSetUp_MPIAIJ,
2750                                        NULL,
2751                                        NULL,
2752                                        MatGetDiagonalBlock_MPIAIJ,
2753                                        NULL,
2754                                        /*34*/ MatDuplicate_MPIAIJ,
2755                                        NULL,
2756                                        NULL,
2757                                        NULL,
2758                                        NULL,
2759                                        /*39*/ MatAXPY_MPIAIJ,
2760                                        MatCreateSubMatrices_MPIAIJ,
2761                                        MatIncreaseOverlap_MPIAIJ,
2762                                        MatGetValues_MPIAIJ,
2763                                        MatCopy_MPIAIJ,
2764                                        /*44*/ MatGetRowMax_MPIAIJ,
2765                                        MatScale_MPIAIJ,
2766                                        MatShift_MPIAIJ,
2767                                        MatDiagonalSet_MPIAIJ,
2768                                        MatZeroRowsColumns_MPIAIJ,
2769                                        /*49*/ MatSetRandom_MPIAIJ,
2770                                        MatGetRowIJ_MPIAIJ,
2771                                        MatRestoreRowIJ_MPIAIJ,
2772                                        NULL,
2773                                        NULL,
2774                                        /*54*/ MatFDColoringCreate_MPIXAIJ,
2775                                        NULL,
2776                                        MatSetUnfactored_MPIAIJ,
2777                                        MatPermute_MPIAIJ,
2778                                        NULL,
2779                                        /*59*/ MatCreateSubMatrix_MPIAIJ,
2780                                        MatDestroy_MPIAIJ,
2781                                        MatView_MPIAIJ,
2782                                        NULL,
2783                                        NULL,
2784                                        /*64*/ NULL,
2785                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2786                                        NULL,
2787                                        NULL,
2788                                        NULL,
2789                                        /*69*/ MatGetRowMaxAbs_MPIAIJ,
2790                                        MatGetRowMinAbs_MPIAIJ,
2791                                        NULL,
2792                                        NULL,
2793                                        NULL,
2794                                        NULL,
2795                                        /*75*/ MatFDColoringApply_AIJ,
2796                                        MatSetFromOptions_MPIAIJ,
2797                                        NULL,
2798                                        NULL,
2799                                        MatFindZeroDiagonals_MPIAIJ,
2800                                        /*80*/ NULL,
2801                                        NULL,
2802                                        NULL,
2803                                        /*83*/ MatLoad_MPIAIJ,
2804                                        MatIsSymmetric_MPIAIJ,
2805                                        NULL,
2806                                        NULL,
2807                                        NULL,
2808                                        NULL,
2809                                        /*89*/ NULL,
2810                                        NULL,
2811                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2812                                        NULL,
2813                                        NULL,
2814                                        /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2815                                        NULL,
2816                                        NULL,
2817                                        NULL,
2818                                        MatBindToCPU_MPIAIJ,
2819                                        /*99*/ MatProductSetFromOptions_MPIAIJ,
2820                                        NULL,
2821                                        NULL,
2822                                        MatConjugate_MPIAIJ,
2823                                        NULL,
2824                                        /*104*/ MatSetValuesRow_MPIAIJ,
2825                                        MatRealPart_MPIAIJ,
2826                                        MatImaginaryPart_MPIAIJ,
2827                                        NULL,
2828                                        NULL,
2829                                        /*109*/ NULL,
2830                                        NULL,
2831                                        MatGetRowMin_MPIAIJ,
2832                                        NULL,
2833                                        MatMissingDiagonal_MPIAIJ,
2834                                        /*114*/ MatGetSeqNonzeroStructure_MPIAIJ,
2835                                        NULL,
2836                                        MatGetGhosts_MPIAIJ,
2837                                        NULL,
2838                                        NULL,
2839                                        /*119*/ MatMultDiagonalBlock_MPIAIJ,
2840                                        NULL,
2841                                        NULL,
2842                                        NULL,
2843                                        MatGetMultiProcBlock_MPIAIJ,
2844                                        /*124*/ MatFindNonzeroRows_MPIAIJ,
2845                                        MatGetColumnReductions_MPIAIJ,
2846                                        MatInvertBlockDiagonal_MPIAIJ,
2847                                        MatInvertVariableBlockDiagonal_MPIAIJ,
2848                                        MatCreateSubMatricesMPI_MPIAIJ,
2849                                        /*129*/ NULL,
2850                                        NULL,
2851                                        NULL,
2852                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2853                                        NULL,
2854                                        /*134*/ NULL,
2855                                        NULL,
2856                                        NULL,
2857                                        NULL,
2858                                        NULL,
2859                                        /*139*/ MatSetBlockSizes_MPIAIJ,
2860                                        NULL,
2861                                        NULL,
2862                                        MatFDColoringSetUp_MPIXAIJ,
2863                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2864                                        MatCreateMPIMatConcatenateSeqMat_MPIAIJ,
2865                                        /*145*/ NULL,
2866                                        NULL,
2867                                        NULL,
2868                                        MatCreateGraph_Simple_AIJ,
2869                                        NULL,
2870                                        /*150*/ NULL};
2871 
2872 /* ----------------------------------------------------------------------------------------*/
2873 
2874 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat)
2875 {
2876   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
2877 
2878   PetscFunctionBegin;
2879   PetscCall(MatStoreValues(aij->A));
2880   PetscCall(MatStoreValues(aij->B));
2881   PetscFunctionReturn(0);
2882 }
2883 
2884 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat)
2885 {
2886   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
2887 
2888   PetscFunctionBegin;
2889   PetscCall(MatRetrieveValues(aij->A));
2890   PetscCall(MatRetrieveValues(aij->B));
2891   PetscFunctionReturn(0);
2892 }
2893 
2894 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[])
2895 {
2896   Mat_MPIAIJ *b;
2897   PetscMPIInt size;
2898 
2899   PetscFunctionBegin;
2900   PetscCall(PetscLayoutSetUp(B->rmap));
2901   PetscCall(PetscLayoutSetUp(B->cmap));
2902   b = (Mat_MPIAIJ *)B->data;
2903 
2904 #if defined(PETSC_USE_CTABLE)
2905   PetscCall(PetscTableDestroy(&b->colmap));
2906 #else
2907   PetscCall(PetscFree(b->colmap));
2908 #endif
2909   PetscCall(PetscFree(b->garray));
2910   PetscCall(VecDestroy(&b->lvec));
2911   PetscCall(VecScatterDestroy(&b->Mvctx));
2912 
2913   /* Because the B will have been resized we simply destroy it and create a new one each time */
2914   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
2915   PetscCall(MatDestroy(&b->B));
2916   PetscCall(MatCreate(PETSC_COMM_SELF, &b->B));
2917   PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0));
2918   PetscCall(MatSetBlockSizesFromMats(b->B, B, B));
2919   PetscCall(MatSetType(b->B, MATSEQAIJ));
2920 
2921   if (!B->preallocated) {
2922     PetscCall(MatCreate(PETSC_COMM_SELF, &b->A));
2923     PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n));
2924     PetscCall(MatSetBlockSizesFromMats(b->A, B, B));
2925     PetscCall(MatSetType(b->A, MATSEQAIJ));
2926   }
2927 
2928   PetscCall(MatSeqAIJSetPreallocation(b->A, d_nz, d_nnz));
2929   PetscCall(MatSeqAIJSetPreallocation(b->B, o_nz, o_nnz));
2930   B->preallocated  = PETSC_TRUE;
2931   B->was_assembled = PETSC_FALSE;
2932   B->assembled     = PETSC_FALSE;
2933   PetscFunctionReturn(0);
2934 }
2935 
2936 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B)
2937 {
2938   Mat_MPIAIJ *b;
2939 
2940   PetscFunctionBegin;
2941   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
2942   PetscCall(PetscLayoutSetUp(B->rmap));
2943   PetscCall(PetscLayoutSetUp(B->cmap));
2944   b = (Mat_MPIAIJ *)B->data;
2945 
2946 #if defined(PETSC_USE_CTABLE)
2947   PetscCall(PetscTableDestroy(&b->colmap));
2948 #else
2949   PetscCall(PetscFree(b->colmap));
2950 #endif
2951   PetscCall(PetscFree(b->garray));
2952   PetscCall(VecDestroy(&b->lvec));
2953   PetscCall(VecScatterDestroy(&b->Mvctx));
2954 
2955   PetscCall(MatResetPreallocation(b->A));
2956   PetscCall(MatResetPreallocation(b->B));
2957   B->preallocated  = PETSC_TRUE;
2958   B->was_assembled = PETSC_FALSE;
2959   B->assembled     = PETSC_FALSE;
2960   PetscFunctionReturn(0);
2961 }
2962 
2963 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat)
2964 {
2965   Mat         mat;
2966   Mat_MPIAIJ *a, *oldmat = (Mat_MPIAIJ *)matin->data;
2967 
2968   PetscFunctionBegin;
2969   *newmat = NULL;
2970   PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat));
2971   PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N));
2972   PetscCall(MatSetBlockSizesFromMats(mat, matin, matin));
2973   PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name));
2974   a = (Mat_MPIAIJ *)mat->data;
2975 
2976   mat->factortype   = matin->factortype;
2977   mat->assembled    = matin->assembled;
2978   mat->insertmode   = NOT_SET_VALUES;
2979   mat->preallocated = matin->preallocated;
2980 
2981   a->size         = oldmat->size;
2982   a->rank         = oldmat->rank;
2983   a->donotstash   = oldmat->donotstash;
2984   a->roworiented  = oldmat->roworiented;
2985   a->rowindices   = NULL;
2986   a->rowvalues    = NULL;
2987   a->getrowactive = PETSC_FALSE;
2988 
2989   PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap));
2990   PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap));
2991 
2992   if (oldmat->colmap) {
2993 #if defined(PETSC_USE_CTABLE)
2994     PetscCall(PetscTableCreateCopy(oldmat->colmap, &a->colmap));
2995 #else
2996     PetscCall(PetscMalloc1(mat->cmap->N, &a->colmap));
2997     PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, mat->cmap->N));
2998 #endif
2999   } else a->colmap = NULL;
3000   if (oldmat->garray) {
3001     PetscInt len;
3002     len = oldmat->B->cmap->n;
3003     PetscCall(PetscMalloc1(len + 1, &a->garray));
3004     if (len) PetscCall(PetscArraycpy(a->garray, oldmat->garray, len));
3005   } else a->garray = NULL;
3006 
3007   /* It may happen MatDuplicate is called with a non-assembled matrix
3008      In fact, MatDuplicate only requires the matrix to be preallocated
3009      This may happen inside a DMCreateMatrix_Shell */
3010   if (oldmat->lvec) { PetscCall(VecDuplicate(oldmat->lvec, &a->lvec)); }
3011   if (oldmat->Mvctx) { PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx)); }
3012   PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A));
3013   PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B));
3014   PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist));
3015   *newmat = mat;
3016   PetscFunctionReturn(0);
3017 }
3018 
3019 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3020 {
3021   PetscBool isbinary, ishdf5;
3022 
3023   PetscFunctionBegin;
3024   PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1);
3025   PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2);
3026   /* force binary viewer to load .info file if it has not yet done so */
3027   PetscCall(PetscViewerSetUp(viewer));
3028   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
3029   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5));
3030   if (isbinary) {
3031     PetscCall(MatLoad_MPIAIJ_Binary(newMat, viewer));
3032   } else if (ishdf5) {
3033 #if defined(PETSC_HAVE_HDF5)
3034     PetscCall(MatLoad_AIJ_HDF5(newMat, viewer));
3035 #else
3036     SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5");
3037 #endif
3038   } else {
3039     SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name);
3040   }
3041   PetscFunctionReturn(0);
3042 }
3043 
3044 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer)
3045 {
3046   PetscInt     header[4], M, N, m, nz, rows, cols, sum, i;
3047   PetscInt    *rowidxs, *colidxs;
3048   PetscScalar *matvals;
3049 
3050   PetscFunctionBegin;
3051   PetscCall(PetscViewerSetUp(viewer));
3052 
3053   /* read in matrix header */
3054   PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT));
3055   PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file");
3056   M  = header[1];
3057   N  = header[2];
3058   nz = header[3];
3059   PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M);
3060   PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N);
3061   PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as MPIAIJ");
3062 
3063   /* set block sizes from the viewer's .info file */
3064   PetscCall(MatLoad_Binary_BlockSizes(mat, viewer));
3065   /* set global sizes if not set already */
3066   if (mat->rmap->N < 0) mat->rmap->N = M;
3067   if (mat->cmap->N < 0) mat->cmap->N = N;
3068   PetscCall(PetscLayoutSetUp(mat->rmap));
3069   PetscCall(PetscLayoutSetUp(mat->cmap));
3070 
3071   /* check if the matrix sizes are correct */
3072   PetscCall(MatGetSize(mat, &rows, &cols));
3073   PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols);
3074 
3075   /* read in row lengths and build row indices */
3076   PetscCall(MatGetLocalSize(mat, &m, NULL));
3077   PetscCall(PetscMalloc1(m + 1, &rowidxs));
3078   PetscCall(PetscViewerBinaryReadAll(viewer, rowidxs + 1, m, PETSC_DECIDE, M, PETSC_INT));
3079   rowidxs[0] = 0;
3080   for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i];
3081   PetscCall(MPIU_Allreduce(&rowidxs[m], &sum, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)viewer)));
3082   PetscCheck(sum == nz, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum);
3083   /* read in column indices and matrix values */
3084   PetscCall(PetscMalloc2(rowidxs[m], &colidxs, rowidxs[m], &matvals));
3085   PetscCall(PetscViewerBinaryReadAll(viewer, colidxs, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT));
3086   PetscCall(PetscViewerBinaryReadAll(viewer, matvals, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR));
3087   /* store matrix indices and values */
3088   PetscCall(MatMPIAIJSetPreallocationCSR(mat, rowidxs, colidxs, matvals));
3089   PetscCall(PetscFree(rowidxs));
3090   PetscCall(PetscFree2(colidxs, matvals));
3091   PetscFunctionReturn(0);
3092 }
3093 
3094 /* Not scalable because of ISAllGather() unless getting all columns. */
3095 PetscErrorCode ISGetSeqIS_Private(Mat mat, IS iscol, IS *isseq)
3096 {
3097   IS          iscol_local;
3098   PetscBool   isstride;
3099   PetscMPIInt lisstride = 0, gisstride;
3100 
3101   PetscFunctionBegin;
3102   /* check if we are grabbing all columns*/
3103   PetscCall(PetscObjectTypeCompare((PetscObject)iscol, ISSTRIDE, &isstride));
3104 
3105   if (isstride) {
3106     PetscInt start, len, mstart, mlen;
3107     PetscCall(ISStrideGetInfo(iscol, &start, NULL));
3108     PetscCall(ISGetLocalSize(iscol, &len));
3109     PetscCall(MatGetOwnershipRangeColumn(mat, &mstart, &mlen));
3110     if (mstart == start && mlen - mstart == len) lisstride = 1;
3111   }
3112 
3113   PetscCall(MPIU_Allreduce(&lisstride, &gisstride, 1, MPI_INT, MPI_MIN, PetscObjectComm((PetscObject)mat)));
3114   if (gisstride) {
3115     PetscInt N;
3116     PetscCall(MatGetSize(mat, NULL, &N));
3117     PetscCall(ISCreateStride(PETSC_COMM_SELF, N, 0, 1, &iscol_local));
3118     PetscCall(ISSetIdentity(iscol_local));
3119     PetscCall(PetscInfo(mat, "Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n"));
3120   } else {
3121     PetscInt cbs;
3122     PetscCall(ISGetBlockSize(iscol, &cbs));
3123     PetscCall(ISAllGather(iscol, &iscol_local));
3124     PetscCall(ISSetBlockSize(iscol_local, cbs));
3125   }
3126 
3127   *isseq = iscol_local;
3128   PetscFunctionReturn(0);
3129 }
3130 
3131 /*
3132  Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local
3133  (see MatCreateSubMatrix_MPIAIJ_nonscalable)
3134 
3135  Input Parameters:
3136    mat - matrix
3137    isrow - parallel row index set; its local indices are a subset of local columns of mat,
3138            i.e., mat->rstart <= isrow[i] < mat->rend
3139    iscol - parallel column index set; its local indices are a subset of local columns of mat,
3140            i.e., mat->cstart <= iscol[i] < mat->cend
3141  Output Parameter:
3142    isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A
3143    iscol_o - sequential column index set for retrieving mat->B
3144    garray - column map; garray[i] indicates global location of iscol_o[i] in iscol
3145  */
3146 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat, IS isrow, IS iscol, IS *isrow_d, IS *iscol_d, IS *iscol_o, const PetscInt *garray[])
3147 {
3148   Vec             x, cmap;
3149   const PetscInt *is_idx;
3150   PetscScalar    *xarray, *cmaparray;
3151   PetscInt        ncols, isstart, *idx, m, rstart, *cmap1, count;
3152   Mat_MPIAIJ     *a    = (Mat_MPIAIJ *)mat->data;
3153   Mat             B    = a->B;
3154   Vec             lvec = a->lvec, lcmap;
3155   PetscInt        i, cstart, cend, Bn = B->cmap->N;
3156   MPI_Comm        comm;
3157   VecScatter      Mvctx = a->Mvctx;
3158 
3159   PetscFunctionBegin;
3160   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3161   PetscCall(ISGetLocalSize(iscol, &ncols));
3162 
3163   /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */
3164   PetscCall(MatCreateVecs(mat, &x, NULL));
3165   PetscCall(VecSet(x, -1.0));
3166   PetscCall(VecDuplicate(x, &cmap));
3167   PetscCall(VecSet(cmap, -1.0));
3168 
3169   /* Get start indices */
3170   PetscCallMPI(MPI_Scan(&ncols, &isstart, 1, MPIU_INT, MPI_SUM, comm));
3171   isstart -= ncols;
3172   PetscCall(MatGetOwnershipRangeColumn(mat, &cstart, &cend));
3173 
3174   PetscCall(ISGetIndices(iscol, &is_idx));
3175   PetscCall(VecGetArray(x, &xarray));
3176   PetscCall(VecGetArray(cmap, &cmaparray));
3177   PetscCall(PetscMalloc1(ncols, &idx));
3178   for (i = 0; i < ncols; i++) {
3179     xarray[is_idx[i] - cstart]    = (PetscScalar)is_idx[i];
3180     cmaparray[is_idx[i] - cstart] = i + isstart;        /* global index of iscol[i] */
3181     idx[i]                        = is_idx[i] - cstart; /* local index of iscol[i]  */
3182   }
3183   PetscCall(VecRestoreArray(x, &xarray));
3184   PetscCall(VecRestoreArray(cmap, &cmaparray));
3185   PetscCall(ISRestoreIndices(iscol, &is_idx));
3186 
3187   /* Get iscol_d */
3188   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, iscol_d));
3189   PetscCall(ISGetBlockSize(iscol, &i));
3190   PetscCall(ISSetBlockSize(*iscol_d, i));
3191 
3192   /* Get isrow_d */
3193   PetscCall(ISGetLocalSize(isrow, &m));
3194   rstart = mat->rmap->rstart;
3195   PetscCall(PetscMalloc1(m, &idx));
3196   PetscCall(ISGetIndices(isrow, &is_idx));
3197   for (i = 0; i < m; i++) idx[i] = is_idx[i] - rstart;
3198   PetscCall(ISRestoreIndices(isrow, &is_idx));
3199 
3200   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, m, idx, PETSC_OWN_POINTER, isrow_d));
3201   PetscCall(ISGetBlockSize(isrow, &i));
3202   PetscCall(ISSetBlockSize(*isrow_d, i));
3203 
3204   /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */
3205   PetscCall(VecScatterBegin(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD));
3206   PetscCall(VecScatterEnd(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD));
3207 
3208   PetscCall(VecDuplicate(lvec, &lcmap));
3209 
3210   PetscCall(VecScatterBegin(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD));
3211   PetscCall(VecScatterEnd(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD));
3212 
3213   /* (3) create sequential iscol_o (a subset of iscol) and isgarray */
3214   /* off-process column indices */
3215   count = 0;
3216   PetscCall(PetscMalloc1(Bn, &idx));
3217   PetscCall(PetscMalloc1(Bn, &cmap1));
3218 
3219   PetscCall(VecGetArray(lvec, &xarray));
3220   PetscCall(VecGetArray(lcmap, &cmaparray));
3221   for (i = 0; i < Bn; i++) {
3222     if (PetscRealPart(xarray[i]) > -1.0) {
3223       idx[count]   = i;                                     /* local column index in off-diagonal part B */
3224       cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */
3225       count++;
3226     }
3227   }
3228   PetscCall(VecRestoreArray(lvec, &xarray));
3229   PetscCall(VecRestoreArray(lcmap, &cmaparray));
3230 
3231   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_COPY_VALUES, iscol_o));
3232   /* cannot ensure iscol_o has same blocksize as iscol! */
3233 
3234   PetscCall(PetscFree(idx));
3235   *garray = cmap1;
3236 
3237   PetscCall(VecDestroy(&x));
3238   PetscCall(VecDestroy(&cmap));
3239   PetscCall(VecDestroy(&lcmap));
3240   PetscFunctionReturn(0);
3241 }
3242 
3243 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */
3244 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *submat)
3245 {
3246   Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data, *asub;
3247   Mat         M = NULL;
3248   MPI_Comm    comm;
3249   IS          iscol_d, isrow_d, iscol_o;
3250   Mat         Asub = NULL, Bsub = NULL;
3251   PetscInt    n;
3252 
3253   PetscFunctionBegin;
3254   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3255 
3256   if (call == MAT_REUSE_MATRIX) {
3257     /* Retrieve isrow_d, iscol_d and iscol_o from submat */
3258     PetscCall(PetscObjectQuery((PetscObject)*submat, "isrow_d", (PetscObject *)&isrow_d));
3259     PetscCheck(isrow_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "isrow_d passed in was not used before, cannot reuse");
3260 
3261     PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_d", (PetscObject *)&iscol_d));
3262     PetscCheck(iscol_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_d passed in was not used before, cannot reuse");
3263 
3264     PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_o", (PetscObject *)&iscol_o));
3265     PetscCheck(iscol_o, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_o passed in was not used before, cannot reuse");
3266 
3267     /* Update diagonal and off-diagonal portions of submat */
3268     asub = (Mat_MPIAIJ *)(*submat)->data;
3269     PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->A));
3270     PetscCall(ISGetLocalSize(iscol_o, &n));
3271     if (n) PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->B));
3272     PetscCall(MatAssemblyBegin(*submat, MAT_FINAL_ASSEMBLY));
3273     PetscCall(MatAssemblyEnd(*submat, MAT_FINAL_ASSEMBLY));
3274 
3275   } else { /* call == MAT_INITIAL_MATRIX) */
3276     const PetscInt *garray;
3277     PetscInt        BsubN;
3278 
3279     /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */
3280     PetscCall(ISGetSeqIS_SameColDist_Private(mat, isrow, iscol, &isrow_d, &iscol_d, &iscol_o, &garray));
3281 
3282     /* Create local submatrices Asub and Bsub */
3283     PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Asub));
3284     PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Bsub));
3285 
3286     /* Create submatrix M */
3287     PetscCall(MatCreateMPIAIJWithSeqAIJ(comm, Asub, Bsub, garray, &M));
3288 
3289     /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */
3290     asub = (Mat_MPIAIJ *)M->data;
3291 
3292     PetscCall(ISGetLocalSize(iscol_o, &BsubN));
3293     n = asub->B->cmap->N;
3294     if (BsubN > n) {
3295       /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */
3296       const PetscInt *idx;
3297       PetscInt        i, j, *idx_new, *subgarray = asub->garray;
3298       PetscCall(PetscInfo(M, "submatrix Bn %" PetscInt_FMT " != BsubN %" PetscInt_FMT ", update iscol_o\n", n, BsubN));
3299 
3300       PetscCall(PetscMalloc1(n, &idx_new));
3301       j = 0;
3302       PetscCall(ISGetIndices(iscol_o, &idx));
3303       for (i = 0; i < n; i++) {
3304         if (j >= BsubN) break;
3305         while (subgarray[i] > garray[j]) j++;
3306 
3307         if (subgarray[i] == garray[j]) {
3308           idx_new[i] = idx[j++];
3309         } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "subgarray[%" PetscInt_FMT "]=%" PetscInt_FMT " cannot < garray[%" PetscInt_FMT "]=%" PetscInt_FMT, i, subgarray[i], j, garray[j]);
3310       }
3311       PetscCall(ISRestoreIndices(iscol_o, &idx));
3312 
3313       PetscCall(ISDestroy(&iscol_o));
3314       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx_new, PETSC_OWN_POINTER, &iscol_o));
3315 
3316     } else if (BsubN < n) {
3317       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Columns of Bsub (%" PetscInt_FMT ") cannot be smaller than B's (%" PetscInt_FMT ")", BsubN, asub->B->cmap->N);
3318     }
3319 
3320     PetscCall(PetscFree(garray));
3321     *submat = M;
3322 
3323     /* Save isrow_d, iscol_d and iscol_o used in processor for next request */
3324     PetscCall(PetscObjectCompose((PetscObject)M, "isrow_d", (PetscObject)isrow_d));
3325     PetscCall(ISDestroy(&isrow_d));
3326 
3327     PetscCall(PetscObjectCompose((PetscObject)M, "iscol_d", (PetscObject)iscol_d));
3328     PetscCall(ISDestroy(&iscol_d));
3329 
3330     PetscCall(PetscObjectCompose((PetscObject)M, "iscol_o", (PetscObject)iscol_o));
3331     PetscCall(ISDestroy(&iscol_o));
3332   }
3333   PetscFunctionReturn(0);
3334 }
3335 
3336 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat)
3337 {
3338   IS        iscol_local = NULL, isrow_d;
3339   PetscInt  csize;
3340   PetscInt  n, i, j, start, end;
3341   PetscBool sameRowDist = PETSC_FALSE, sameDist[2], tsameDist[2];
3342   MPI_Comm  comm;
3343 
3344   PetscFunctionBegin;
3345   /* If isrow has same processor distribution as mat,
3346      call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */
3347   if (call == MAT_REUSE_MATRIX) {
3348     PetscCall(PetscObjectQuery((PetscObject)*newmat, "isrow_d", (PetscObject *)&isrow_d));
3349     if (isrow_d) {
3350       sameRowDist  = PETSC_TRUE;
3351       tsameDist[1] = PETSC_TRUE; /* sameColDist */
3352     } else {
3353       PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_local));
3354       if (iscol_local) {
3355         sameRowDist  = PETSC_TRUE;
3356         tsameDist[1] = PETSC_FALSE; /* !sameColDist */
3357       }
3358     }
3359   } else {
3360     /* Check if isrow has same processor distribution as mat */
3361     sameDist[0] = PETSC_FALSE;
3362     PetscCall(ISGetLocalSize(isrow, &n));
3363     if (!n) {
3364       sameDist[0] = PETSC_TRUE;
3365     } else {
3366       PetscCall(ISGetMinMax(isrow, &i, &j));
3367       PetscCall(MatGetOwnershipRange(mat, &start, &end));
3368       if (i >= start && j < end) sameDist[0] = PETSC_TRUE;
3369     }
3370 
3371     /* Check if iscol has same processor distribution as mat */
3372     sameDist[1] = PETSC_FALSE;
3373     PetscCall(ISGetLocalSize(iscol, &n));
3374     if (!n) {
3375       sameDist[1] = PETSC_TRUE;
3376     } else {
3377       PetscCall(ISGetMinMax(iscol, &i, &j));
3378       PetscCall(MatGetOwnershipRangeColumn(mat, &start, &end));
3379       if (i >= start && j < end) sameDist[1] = PETSC_TRUE;
3380     }
3381 
3382     PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3383     PetscCall(MPIU_Allreduce(&sameDist, &tsameDist, 2, MPIU_BOOL, MPI_LAND, comm));
3384     sameRowDist = tsameDist[0];
3385   }
3386 
3387   if (sameRowDist) {
3388     if (tsameDist[1]) { /* sameRowDist & sameColDist */
3389       /* isrow and iscol have same processor distribution as mat */
3390       PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat, isrow, iscol, call, newmat));
3391       PetscFunctionReturn(0);
3392     } else { /* sameRowDist */
3393       /* isrow has same processor distribution as mat */
3394       if (call == MAT_INITIAL_MATRIX) {
3395         PetscBool sorted;
3396         PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local));
3397         PetscCall(ISGetLocalSize(iscol_local, &n)); /* local size of iscol_local = global columns of newmat */
3398         PetscCall(ISGetSize(iscol, &i));
3399         PetscCheck(n == i, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "n %" PetscInt_FMT " != size of iscol %" PetscInt_FMT, n, i);
3400 
3401         PetscCall(ISSorted(iscol_local, &sorted));
3402         if (sorted) {
3403           /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */
3404           PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, iscol_local, MAT_INITIAL_MATRIX, newmat));
3405           PetscFunctionReturn(0);
3406         }
3407       } else { /* call == MAT_REUSE_MATRIX */
3408         IS iscol_sub;
3409         PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub));
3410         if (iscol_sub) {
3411           PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, NULL, call, newmat));
3412           PetscFunctionReturn(0);
3413         }
3414       }
3415     }
3416   }
3417 
3418   /* General case: iscol -> iscol_local which has global size of iscol */
3419   if (call == MAT_REUSE_MATRIX) {
3420     PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local));
3421     PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
3422   } else {
3423     if (!iscol_local) PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local));
3424   }
3425 
3426   PetscCall(ISGetLocalSize(iscol, &csize));
3427   PetscCall(MatCreateSubMatrix_MPIAIJ_nonscalable(mat, isrow, iscol_local, csize, call, newmat));
3428 
3429   if (call == MAT_INITIAL_MATRIX) {
3430     PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local));
3431     PetscCall(ISDestroy(&iscol_local));
3432   }
3433   PetscFunctionReturn(0);
3434 }
3435 
3436 /*@C
3437      MatCreateMPIAIJWithSeqAIJ - creates a `MATMPIAIJ` matrix using `MATSEQAIJ` matrices that contain the "diagonal"
3438          and "off-diagonal" part of the matrix in CSR format.
3439 
3440    Collective
3441 
3442    Input Parameters:
3443 +  comm - MPI communicator
3444 .  A - "diagonal" portion of matrix
3445 .  B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine
3446 -  garray - global index of B columns
3447 
3448    Output Parameter:
3449 .   mat - the matrix, with input A as its local diagonal matrix
3450    Level: advanced
3451 
3452    Notes:
3453    See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix.
3454 
3455    A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore.
3456 
3457 .seealso: `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateMPIAIJWithSplitArrays()`
3458 @*/
3459 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm, Mat A, Mat B, const PetscInt garray[], Mat *mat)
3460 {
3461   Mat_MPIAIJ        *maij;
3462   Mat_SeqAIJ        *b  = (Mat_SeqAIJ *)B->data, *bnew;
3463   PetscInt          *oi = b->i, *oj = b->j, i, nz, col;
3464   const PetscScalar *oa;
3465   Mat                Bnew;
3466   PetscInt           m, n, N;
3467   MatType            mpi_mat_type;
3468 
3469   PetscFunctionBegin;
3470   PetscCall(MatCreate(comm, mat));
3471   PetscCall(MatGetSize(A, &m, &n));
3472   PetscCheck(m == B->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Am %" PetscInt_FMT " != Bm %" PetscInt_FMT, m, B->rmap->N);
3473   PetscCheck(A->rmap->bs == B->rmap->bs, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "A row bs %" PetscInt_FMT " != B row bs %" PetscInt_FMT, A->rmap->bs, B->rmap->bs);
3474   /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */
3475   /* PetscCheck(A->cmap->bs == B->cmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %" PetscInt_FMT " != B column bs %" PetscInt_FMT,A->cmap->bs,B->cmap->bs); */
3476 
3477   /* Get global columns of mat */
3478   PetscCall(MPIU_Allreduce(&n, &N, 1, MPIU_INT, MPI_SUM, comm));
3479 
3480   PetscCall(MatSetSizes(*mat, m, n, PETSC_DECIDE, N));
3481   /* Determine the type of MPI matrix that should be created from the type of matrix A, which holds the "diagonal" portion. */
3482   PetscCall(MatGetMPIMatType_Private(A, &mpi_mat_type));
3483   PetscCall(MatSetType(*mat, mpi_mat_type));
3484 
3485   PetscCall(MatSetBlockSizes(*mat, A->rmap->bs, A->cmap->bs));
3486   maij = (Mat_MPIAIJ *)(*mat)->data;
3487 
3488   (*mat)->preallocated = PETSC_TRUE;
3489 
3490   PetscCall(PetscLayoutSetUp((*mat)->rmap));
3491   PetscCall(PetscLayoutSetUp((*mat)->cmap));
3492 
3493   /* Set A as diagonal portion of *mat */
3494   maij->A = A;
3495 
3496   nz = oi[m];
3497   for (i = 0; i < nz; i++) {
3498     col   = oj[i];
3499     oj[i] = garray[col];
3500   }
3501 
3502   /* Set Bnew as off-diagonal portion of *mat */
3503   PetscCall(MatSeqAIJGetArrayRead(B, &oa));
3504   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, N, oi, oj, (PetscScalar *)oa, &Bnew));
3505   PetscCall(MatSeqAIJRestoreArrayRead(B, &oa));
3506   bnew        = (Mat_SeqAIJ *)Bnew->data;
3507   bnew->maxnz = b->maxnz; /* allocated nonzeros of B */
3508   maij->B     = Bnew;
3509 
3510   PetscCheck(B->rmap->N == Bnew->rmap->N, PETSC_COMM_SELF, PETSC_ERR_PLIB, "BN %" PetscInt_FMT " != BnewN %" PetscInt_FMT, B->rmap->N, Bnew->rmap->N);
3511 
3512   b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */
3513   b->free_a       = PETSC_FALSE;
3514   b->free_ij      = PETSC_FALSE;
3515   PetscCall(MatDestroy(&B));
3516 
3517   bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */
3518   bnew->free_a       = PETSC_TRUE;
3519   bnew->free_ij      = PETSC_TRUE;
3520 
3521   /* condense columns of maij->B */
3522   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
3523   PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY));
3524   PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY));
3525   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE));
3526   PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
3527   PetscFunctionReturn(0);
3528 }
3529 
3530 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat, PetscInt, const IS[], const IS[], MatReuse, PetscBool, Mat *);
3531 
3532 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat, IS isrow, IS iscol, IS iscol_local, MatReuse call, Mat *newmat)
3533 {
3534   PetscInt        i, m, n, rstart, row, rend, nz, j, bs, cbs;
3535   PetscInt       *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal;
3536   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)mat->data;
3537   Mat             M, Msub, B = a->B;
3538   MatScalar      *aa;
3539   Mat_SeqAIJ     *aij;
3540   PetscInt       *garray = a->garray, *colsub, Ncols;
3541   PetscInt        count, Bn = B->cmap->N, cstart = mat->cmap->rstart, cend = mat->cmap->rend;
3542   IS              iscol_sub, iscmap;
3543   const PetscInt *is_idx, *cmap;
3544   PetscBool       allcolumns = PETSC_FALSE;
3545   MPI_Comm        comm;
3546 
3547   PetscFunctionBegin;
3548   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3549   if (call == MAT_REUSE_MATRIX) {
3550     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub));
3551     PetscCheck(iscol_sub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "SubIScol passed in was not used before, cannot reuse");
3552     PetscCall(ISGetLocalSize(iscol_sub, &count));
3553 
3554     PetscCall(PetscObjectQuery((PetscObject)*newmat, "Subcmap", (PetscObject *)&iscmap));
3555     PetscCheck(iscmap, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Subcmap passed in was not used before, cannot reuse");
3556 
3557     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Msub));
3558     PetscCheck(Msub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
3559 
3560     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_REUSE_MATRIX, PETSC_FALSE, &Msub));
3561 
3562   } else { /* call == MAT_INITIAL_MATRIX) */
3563     PetscBool flg;
3564 
3565     PetscCall(ISGetLocalSize(iscol, &n));
3566     PetscCall(ISGetSize(iscol, &Ncols));
3567 
3568     /* (1) iscol -> nonscalable iscol_local */
3569     /* Check for special case: each processor gets entire matrix columns */
3570     PetscCall(ISIdentity(iscol_local, &flg));
3571     if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3572     PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
3573     if (allcolumns) {
3574       iscol_sub = iscol_local;
3575       PetscCall(PetscObjectReference((PetscObject)iscol_local));
3576       PetscCall(ISCreateStride(PETSC_COMM_SELF, n, 0, 1, &iscmap));
3577 
3578     } else {
3579       /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */
3580       PetscInt *idx, *cmap1, k;
3581       PetscCall(PetscMalloc1(Ncols, &idx));
3582       PetscCall(PetscMalloc1(Ncols, &cmap1));
3583       PetscCall(ISGetIndices(iscol_local, &is_idx));
3584       count = 0;
3585       k     = 0;
3586       for (i = 0; i < Ncols; i++) {
3587         j = is_idx[i];
3588         if (j >= cstart && j < cend) {
3589           /* diagonal part of mat */
3590           idx[count]     = j;
3591           cmap1[count++] = i; /* column index in submat */
3592         } else if (Bn) {
3593           /* off-diagonal part of mat */
3594           if (j == garray[k]) {
3595             idx[count]     = j;
3596             cmap1[count++] = i; /* column index in submat */
3597           } else if (j > garray[k]) {
3598             while (j > garray[k] && k < Bn - 1) k++;
3599             if (j == garray[k]) {
3600               idx[count]     = j;
3601               cmap1[count++] = i; /* column index in submat */
3602             }
3603           }
3604         }
3605       }
3606       PetscCall(ISRestoreIndices(iscol_local, &is_idx));
3607 
3608       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_OWN_POINTER, &iscol_sub));
3609       PetscCall(ISGetBlockSize(iscol, &cbs));
3610       PetscCall(ISSetBlockSize(iscol_sub, cbs));
3611 
3612       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local), count, cmap1, PETSC_OWN_POINTER, &iscmap));
3613     }
3614 
3615     /* (3) Create sequential Msub */
3616     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_INITIAL_MATRIX, allcolumns, &Msub));
3617   }
3618 
3619   PetscCall(ISGetLocalSize(iscol_sub, &count));
3620   aij = (Mat_SeqAIJ *)(Msub)->data;
3621   ii  = aij->i;
3622   PetscCall(ISGetIndices(iscmap, &cmap));
3623 
3624   /*
3625       m - number of local rows
3626       Ncols - number of columns (same on all processors)
3627       rstart - first row in new global matrix generated
3628   */
3629   PetscCall(MatGetSize(Msub, &m, NULL));
3630 
3631   if (call == MAT_INITIAL_MATRIX) {
3632     /* (4) Create parallel newmat */
3633     PetscMPIInt rank, size;
3634     PetscInt    csize;
3635 
3636     PetscCallMPI(MPI_Comm_size(comm, &size));
3637     PetscCallMPI(MPI_Comm_rank(comm, &rank));
3638 
3639     /*
3640         Determine the number of non-zeros in the diagonal and off-diagonal
3641         portions of the matrix in order to do correct preallocation
3642     */
3643 
3644     /* first get start and end of "diagonal" columns */
3645     PetscCall(ISGetLocalSize(iscol, &csize));
3646     if (csize == PETSC_DECIDE) {
3647       PetscCall(ISGetSize(isrow, &mglobal));
3648       if (mglobal == Ncols) { /* square matrix */
3649         nlocal = m;
3650       } else {
3651         nlocal = Ncols / size + ((Ncols % size) > rank);
3652       }
3653     } else {
3654       nlocal = csize;
3655     }
3656     PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm));
3657     rstart = rend - nlocal;
3658     PetscCheck(rank != size - 1 || rend == Ncols, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, Ncols);
3659 
3660     /* next, compute all the lengths */
3661     jj = aij->j;
3662     PetscCall(PetscMalloc1(2 * m + 1, &dlens));
3663     olens = dlens + m;
3664     for (i = 0; i < m; i++) {
3665       jend = ii[i + 1] - ii[i];
3666       olen = 0;
3667       dlen = 0;
3668       for (j = 0; j < jend; j++) {
3669         if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++;
3670         else dlen++;
3671         jj++;
3672       }
3673       olens[i] = olen;
3674       dlens[i] = dlen;
3675     }
3676 
3677     PetscCall(ISGetBlockSize(isrow, &bs));
3678     PetscCall(ISGetBlockSize(iscol, &cbs));
3679 
3680     PetscCall(MatCreate(comm, &M));
3681     PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, Ncols));
3682     PetscCall(MatSetBlockSizes(M, bs, cbs));
3683     PetscCall(MatSetType(M, ((PetscObject)mat)->type_name));
3684     PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens));
3685     PetscCall(PetscFree(dlens));
3686 
3687   } else { /* call == MAT_REUSE_MATRIX */
3688     M = *newmat;
3689     PetscCall(MatGetLocalSize(M, &i, NULL));
3690     PetscCheck(i == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request");
3691     PetscCall(MatZeroEntries(M));
3692     /*
3693          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3694        rather than the slower MatSetValues().
3695     */
3696     M->was_assembled = PETSC_TRUE;
3697     M->assembled     = PETSC_FALSE;
3698   }
3699 
3700   /* (5) Set values of Msub to *newmat */
3701   PetscCall(PetscMalloc1(count, &colsub));
3702   PetscCall(MatGetOwnershipRange(M, &rstart, NULL));
3703 
3704   jj = aij->j;
3705   PetscCall(MatSeqAIJGetArrayRead(Msub, (const PetscScalar **)&aa));
3706   for (i = 0; i < m; i++) {
3707     row = rstart + i;
3708     nz  = ii[i + 1] - ii[i];
3709     for (j = 0; j < nz; j++) colsub[j] = cmap[jj[j]];
3710     PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, colsub, aa, INSERT_VALUES));
3711     jj += nz;
3712     aa += nz;
3713   }
3714   PetscCall(MatSeqAIJRestoreArrayRead(Msub, (const PetscScalar **)&aa));
3715   PetscCall(ISRestoreIndices(iscmap, &cmap));
3716 
3717   PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY));
3718   PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY));
3719 
3720   PetscCall(PetscFree(colsub));
3721 
3722   /* save Msub, iscol_sub and iscmap used in processor for next request */
3723   if (call == MAT_INITIAL_MATRIX) {
3724     *newmat = M;
3725     PetscCall(PetscObjectCompose((PetscObject)(*newmat), "SubMatrix", (PetscObject)Msub));
3726     PetscCall(MatDestroy(&Msub));
3727 
3728     PetscCall(PetscObjectCompose((PetscObject)(*newmat), "SubIScol", (PetscObject)iscol_sub));
3729     PetscCall(ISDestroy(&iscol_sub));
3730 
3731     PetscCall(PetscObjectCompose((PetscObject)(*newmat), "Subcmap", (PetscObject)iscmap));
3732     PetscCall(ISDestroy(&iscmap));
3733 
3734     if (iscol_local) {
3735       PetscCall(PetscObjectCompose((PetscObject)(*newmat), "ISAllGather", (PetscObject)iscol_local));
3736       PetscCall(ISDestroy(&iscol_local));
3737     }
3738   }
3739   PetscFunctionReturn(0);
3740 }
3741 
3742 /*
3743     Not great since it makes two copies of the submatrix, first an SeqAIJ
3744   in local and then by concatenating the local matrices the end result.
3745   Writing it directly would be much like MatCreateSubMatrices_MPIAIJ()
3746 
3747   This requires a sequential iscol with all indices.
3748 */
3749 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat, IS isrow, IS iscol, PetscInt csize, MatReuse call, Mat *newmat)
3750 {
3751   PetscMPIInt rank, size;
3752   PetscInt    i, m, n, rstart, row, rend, nz, *cwork, j, bs, cbs;
3753   PetscInt   *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal;
3754   Mat         M, Mreuse;
3755   MatScalar  *aa, *vwork;
3756   MPI_Comm    comm;
3757   Mat_SeqAIJ *aij;
3758   PetscBool   colflag, allcolumns = PETSC_FALSE;
3759 
3760   PetscFunctionBegin;
3761   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3762   PetscCallMPI(MPI_Comm_rank(comm, &rank));
3763   PetscCallMPI(MPI_Comm_size(comm, &size));
3764 
3765   /* Check for special case: each processor gets entire matrix columns */
3766   PetscCall(ISIdentity(iscol, &colflag));
3767   PetscCall(ISGetLocalSize(iscol, &n));
3768   if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3769   PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
3770 
3771   if (call == MAT_REUSE_MATRIX) {
3772     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Mreuse));
3773     PetscCheck(Mreuse, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
3774     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_REUSE_MATRIX, allcolumns, &Mreuse));
3775   } else {
3776     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_INITIAL_MATRIX, allcolumns, &Mreuse));
3777   }
3778 
3779   /*
3780       m - number of local rows
3781       n - number of columns (same on all processors)
3782       rstart - first row in new global matrix generated
3783   */
3784   PetscCall(MatGetSize(Mreuse, &m, &n));
3785   PetscCall(MatGetBlockSizes(Mreuse, &bs, &cbs));
3786   if (call == MAT_INITIAL_MATRIX) {
3787     aij = (Mat_SeqAIJ *)(Mreuse)->data;
3788     ii  = aij->i;
3789     jj  = aij->j;
3790 
3791     /*
3792         Determine the number of non-zeros in the diagonal and off-diagonal
3793         portions of the matrix in order to do correct preallocation
3794     */
3795 
3796     /* first get start and end of "diagonal" columns */
3797     if (csize == PETSC_DECIDE) {
3798       PetscCall(ISGetSize(isrow, &mglobal));
3799       if (mglobal == n) { /* square matrix */
3800         nlocal = m;
3801       } else {
3802         nlocal = n / size + ((n % size) > rank);
3803       }
3804     } else {
3805       nlocal = csize;
3806     }
3807     PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm));
3808     rstart = rend - nlocal;
3809     PetscCheck(rank != size - 1 || rend == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, n);
3810 
3811     /* next, compute all the lengths */
3812     PetscCall(PetscMalloc1(2 * m + 1, &dlens));
3813     olens = dlens + m;
3814     for (i = 0; i < m; i++) {
3815       jend = ii[i + 1] - ii[i];
3816       olen = 0;
3817       dlen = 0;
3818       for (j = 0; j < jend; j++) {
3819         if (*jj < rstart || *jj >= rend) olen++;
3820         else dlen++;
3821         jj++;
3822       }
3823       olens[i] = olen;
3824       dlens[i] = dlen;
3825     }
3826     PetscCall(MatCreate(comm, &M));
3827     PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, n));
3828     PetscCall(MatSetBlockSizes(M, bs, cbs));
3829     PetscCall(MatSetType(M, ((PetscObject)mat)->type_name));
3830     PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens));
3831     PetscCall(PetscFree(dlens));
3832   } else {
3833     PetscInt ml, nl;
3834 
3835     M = *newmat;
3836     PetscCall(MatGetLocalSize(M, &ml, &nl));
3837     PetscCheck(ml == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request");
3838     PetscCall(MatZeroEntries(M));
3839     /*
3840          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3841        rather than the slower MatSetValues().
3842     */
3843     M->was_assembled = PETSC_TRUE;
3844     M->assembled     = PETSC_FALSE;
3845   }
3846   PetscCall(MatGetOwnershipRange(M, &rstart, &rend));
3847   aij = (Mat_SeqAIJ *)(Mreuse)->data;
3848   ii  = aij->i;
3849   jj  = aij->j;
3850 
3851   /* trigger copy to CPU if needed */
3852   PetscCall(MatSeqAIJGetArrayRead(Mreuse, (const PetscScalar **)&aa));
3853   for (i = 0; i < m; i++) {
3854     row   = rstart + i;
3855     nz    = ii[i + 1] - ii[i];
3856     cwork = jj;
3857     jj += nz;
3858     vwork = aa;
3859     aa += nz;
3860     PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, cwork, vwork, INSERT_VALUES));
3861   }
3862   PetscCall(MatSeqAIJRestoreArrayRead(Mreuse, (const PetscScalar **)&aa));
3863 
3864   PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY));
3865   PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY));
3866   *newmat = M;
3867 
3868   /* save submatrix used in processor for next request */
3869   if (call == MAT_INITIAL_MATRIX) {
3870     PetscCall(PetscObjectCompose((PetscObject)M, "SubMatrix", (PetscObject)Mreuse));
3871     PetscCall(MatDestroy(&Mreuse));
3872   }
3873   PetscFunctionReturn(0);
3874 }
3875 
3876 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[])
3877 {
3878   PetscInt        m, cstart, cend, j, nnz, i, d, *ld;
3879   PetscInt       *d_nnz, *o_nnz, nnz_max = 0, rstart, ii;
3880   const PetscInt *JJ;
3881   PetscBool       nooffprocentries;
3882   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *)B->data;
3883 
3884   PetscFunctionBegin;
3885   PetscCheck(Ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Ii[0] must be 0 it is %" PetscInt_FMT, Ii[0]);
3886 
3887   PetscCall(PetscLayoutSetUp(B->rmap));
3888   PetscCall(PetscLayoutSetUp(B->cmap));
3889   m      = B->rmap->n;
3890   cstart = B->cmap->rstart;
3891   cend   = B->cmap->rend;
3892   rstart = B->rmap->rstart;
3893 
3894   PetscCall(PetscCalloc2(m, &d_nnz, m, &o_nnz));
3895 
3896   if (PetscDefined(USE_DEBUG)) {
3897     for (i = 0; i < m; i++) {
3898       nnz = Ii[i + 1] - Ii[i];
3899       JJ  = J + Ii[i];
3900       PetscCheck(nnz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative %" PetscInt_FMT " number of columns", i, nnz);
3901       PetscCheck(!nnz || !(JJ[0] < 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " starts with negative column index %" PetscInt_FMT, i, JJ[0]);
3902       PetscCheck(!nnz || !(JJ[nnz - 1] >= B->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " ends with too large a column index %" PetscInt_FMT " (max allowed %" PetscInt_FMT ")", i, JJ[nnz - 1], B->cmap->N);
3903     }
3904   }
3905 
3906   for (i = 0; i < m; i++) {
3907     nnz     = Ii[i + 1] - Ii[i];
3908     JJ      = J + Ii[i];
3909     nnz_max = PetscMax(nnz_max, nnz);
3910     d       = 0;
3911     for (j = 0; j < nnz; j++) {
3912       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3913     }
3914     d_nnz[i] = d;
3915     o_nnz[i] = nnz - d;
3916   }
3917   PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz));
3918   PetscCall(PetscFree2(d_nnz, o_nnz));
3919 
3920   for (i = 0; i < m; i++) {
3921     ii = i + rstart;
3922     PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, Ii[i + 1] - Ii[i], J + Ii[i], v ? v + Ii[i] : NULL, INSERT_VALUES));
3923   }
3924   nooffprocentries    = B->nooffprocentries;
3925   B->nooffprocentries = PETSC_TRUE;
3926   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
3927   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
3928   B->nooffprocentries = nooffprocentries;
3929 
3930   /* count number of entries below block diagonal */
3931   PetscCall(PetscFree(Aij->ld));
3932   PetscCall(PetscCalloc1(m, &ld));
3933   Aij->ld = ld;
3934   for (i = 0; i < m; i++) {
3935     nnz = Ii[i + 1] - Ii[i];
3936     j   = 0;
3937     while (j < nnz && J[j] < cstart) j++;
3938     ld[i] = j;
3939     J += nnz;
3940   }
3941 
3942   PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
3943   PetscFunctionReturn(0);
3944 }
3945 
3946 /*@
3947    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in `MATAIJ` format
3948    (the default parallel PETSc format).
3949 
3950    Collective
3951 
3952    Input Parameters:
3953 +  B - the matrix
3954 .  i - the indices into j for the start of each local row (starts with zero)
3955 .  j - the column indices for each local row (starts with zero)
3956 -  v - optional values in the matrix
3957 
3958    Level: developer
3959 
3960    Notes:
3961        The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc;
3962      thus you CANNOT change the matrix entries by changing the values of v[] after you have
3963      called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays.
3964 
3965        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3966 
3967        The format which is used for the sparse matrix input, is equivalent to a
3968     row-major ordering.. i.e for the following matrix, the input data expected is
3969     as shown
3970 
3971 $        1 0 0
3972 $        2 0 3     P0
3973 $       -------
3974 $        4 5 6     P1
3975 $
3976 $     Process0 [P0]: rows_owned=[0,1]
3977 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
3978 $        j =  {0,0,2}  [size = 3]
3979 $        v =  {1,2,3}  [size = 3]
3980 $
3981 $     Process1 [P1]: rows_owned=[2]
3982 $        i =  {0,3}    [size = nrow+1  = 1+1]
3983 $        j =  {0,1,2}  [size = 3]
3984 $        v =  {4,5,6}  [size = 3]
3985 
3986 .seealso: `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatCreateAIJ()`, `MATMPIAIJ`,
3987           `MatCreateSeqAIJWithArrays()`, `MatCreateMPIAIJWithSplitArrays()`
3988 @*/
3989 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B, const PetscInt i[], const PetscInt j[], const PetscScalar v[])
3990 {
3991   PetscFunctionBegin;
3992   PetscTryMethod(B, "MatMPIAIJSetPreallocationCSR_C", (Mat, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, i, j, v));
3993   PetscFunctionReturn(0);
3994 }
3995 
3996 /*@C
3997    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in `MATMPIAIJ` format
3998    (the default parallel PETSc format).  For good matrix assembly performance
3999    the user should preallocate the matrix storage by setting the parameters
4000    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4001    performance can be increased by more than a factor of 50.
4002 
4003    Collective
4004 
4005    Input Parameters:
4006 +  B - the matrix
4007 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4008            (same value is used for all local rows)
4009 .  d_nnz - array containing the number of nonzeros in the various rows of the
4010            DIAGONAL portion of the local submatrix (possibly different for each row)
4011            or NULL (`PETSC_NULL_INTEGER` in Fortran), if d_nz is used to specify the nonzero structure.
4012            The size of this array is equal to the number of local rows, i.e 'm'.
4013            For matrices that will be factored, you must leave room for (and set)
4014            the diagonal entry even if it is zero.
4015 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4016            submatrix (same value is used for all local rows).
4017 -  o_nnz - array containing the number of nonzeros in the various rows of the
4018            OFF-DIAGONAL portion of the local submatrix (possibly different for
4019            each row) or NULL (`PETSC_NULL_INTEGER` in Fortran), if o_nz is used to specify the nonzero
4020            structure. The size of this array is equal to the number
4021            of local rows, i.e 'm'.
4022 
4023    If the *_nnz parameter is given then the *_nz parameter is ignored
4024 
4025    The `MATAIJ` format, also called compressed row storage (CSR)), is fully compatible with standard Fortran 77
4026    storage.  The stored row and column indices begin with zero.
4027    See [Sparse Matrices](sec_matsparse) for details.
4028 
4029    The parallel matrix is partitioned such that the first m0 rows belong to
4030    process 0, the next m1 rows belong to process 1, the next m2 rows belong
4031    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
4032 
4033    The DIAGONAL portion of the local submatrix of a processor can be defined
4034    as the submatrix which is obtained by extraction the part corresponding to
4035    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
4036    first row that belongs to the processor, r2 is the last row belonging to
4037    the this processor, and c1-c2 is range of indices of the local part of a
4038    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
4039    common case of a square matrix, the row and column ranges are the same and
4040    the DIAGONAL part is also square. The remaining portion of the local
4041    submatrix (mxN) constitute the OFF-DIAGONAL portion.
4042 
4043    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4044 
4045    You can call MatGetInfo() to get information on how effective the preallocation was;
4046    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
4047    You can also run with the option -info and look for messages with the string
4048    malloc in them to see if additional memory allocation was needed.
4049 
4050    Example usage:
4051 
4052    Consider the following 8x8 matrix with 34 non-zero values, that is
4053    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4054    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4055    as follows:
4056 
4057 .vb
4058             1  2  0  |  0  3  0  |  0  4
4059     Proc0   0  5  6  |  7  0  0  |  8  0
4060             9  0 10  | 11  0  0  | 12  0
4061     -------------------------------------
4062            13  0 14  | 15 16 17  |  0  0
4063     Proc1   0 18  0  | 19 20 21  |  0  0
4064             0  0  0  | 22 23  0  | 24  0
4065     -------------------------------------
4066     Proc2  25 26 27  |  0  0 28  | 29  0
4067            30  0  0  | 31 32 33  |  0 34
4068 .ve
4069 
4070    This can be represented as a collection of submatrices as:
4071 
4072 .vb
4073       A B C
4074       D E F
4075       G H I
4076 .ve
4077 
4078    Where the submatrices A,B,C are owned by proc0, D,E,F are
4079    owned by proc1, G,H,I are owned by proc2.
4080 
4081    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4082    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4083    The 'M','N' parameters are 8,8, and have the same values on all procs.
4084 
4085    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4086    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4087    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4088    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4089    part as `MATSEQAIJ` matrices. for eg: proc1 will store [E] as a SeqAIJ
4090    matrix, ans [DF] as another `MATSEQAIJ` matrix.
4091 
4092    When d_nz, o_nz parameters are specified, d_nz storage elements are
4093    allocated for every row of the local diagonal submatrix, and o_nz
4094    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4095    One way to choose d_nz and o_nz is to use the max nonzerors per local
4096    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4097    In this case, the values of d_nz,o_nz are:
4098 .vb
4099      proc0 : dnz = 2, o_nz = 2
4100      proc1 : dnz = 3, o_nz = 2
4101      proc2 : dnz = 1, o_nz = 4
4102 .ve
4103    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4104    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4105    for proc3. i.e we are using 12+15+10=37 storage locations to store
4106    34 values.
4107 
4108    When d_nnz, o_nnz parameters are specified, the storage is specified
4109    for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4110    In the above case the values for d_nnz,o_nnz are:
4111 .vb
4112      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4113      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4114      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4115 .ve
4116    Here the space allocated is sum of all the above values i.e 34, and
4117    hence pre-allocation is perfect.
4118 
4119    Level: intermediate
4120 
4121 .seealso: [Sparse Matrices](sec_matsparse), `MATMPIAIJ`, `MATAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateAIJ()`, `MatMPIAIJSetPreallocationCSR()`,
4122           `MATMPIAIJ`, `MatGetInfo()`, `PetscSplitOwnership()`
4123 @*/
4124 PetscErrorCode MatMPIAIJSetPreallocation(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[])
4125 {
4126   PetscFunctionBegin;
4127   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
4128   PetscValidType(B, 1);
4129   PetscTryMethod(B, "MatMPIAIJSetPreallocation_C", (Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, d_nz, d_nnz, o_nz, o_nnz));
4130   PetscFunctionReturn(0);
4131 }
4132 
4133 /*@
4134      MatCreateMPIAIJWithArrays - creates a `MATMPIAIJ` matrix using arrays that contain in standard
4135          CSR format for the local rows.
4136 
4137    Collective
4138 
4139    Input Parameters:
4140 +  comm - MPI communicator
4141 .  m - number of local rows (Cannot be `PETSC_DECIDE`)
4142 .  n - This value should be the same as the local size used in creating the
4143        x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have
4144        calculated if N is given) For square matrices n is almost always m.
4145 .  M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
4146 .  N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
4147 .   i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
4148 .   j - column indices
4149 -   a - optional matrix values
4150 
4151    Output Parameter:
4152 .   mat - the matrix
4153 
4154    Level: intermediate
4155 
4156    Notes:
4157        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4158      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4159      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4160 
4161        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4162 
4163        The format which is used for the sparse matrix input, is equivalent to a
4164     row-major ordering.. i.e for the following matrix, the input data expected is
4165     as shown
4166 
4167        Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays
4168 
4169 $        1 0 0
4170 $        2 0 3     P0
4171 $       -------
4172 $        4 5 6     P1
4173 $
4174 $     Process0 [P0]: rows_owned=[0,1]
4175 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
4176 $        j =  {0,0,2}  [size = 3]
4177 $        v =  {1,2,3}  [size = 3]
4178 $
4179 $     Process1 [P1]: rows_owned=[2]
4180 $        i =  {0,3}    [size = nrow+1  = 1+1]
4181 $        j =  {0,1,2}  [size = 3]
4182 $        v =  {4,5,6}  [size = 3]
4183 
4184 .seealso: `MATMPIAIK`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4185           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`
4186 @*/
4187 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat)
4188 {
4189   PetscFunctionBegin;
4190   PetscCheck(!i || !i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
4191   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
4192   PetscCall(MatCreate(comm, mat));
4193   PetscCall(MatSetSizes(*mat, m, n, M, N));
4194   /* PetscCall(MatSetBlockSizes(M,bs,cbs)); */
4195   PetscCall(MatSetType(*mat, MATMPIAIJ));
4196   PetscCall(MatMPIAIJSetPreallocationCSR(*mat, i, j, a));
4197   PetscFunctionReturn(0);
4198 }
4199 
4200 /*@
4201      MatUpdateMPIAIJWithArrays - updates a `MATMPIAIJ` matrix using arrays that contain in standard
4202          CSR format for the local rows. Only the numerical values are updated the other arrays must be identical to what was passed from `MatCreateMPIAIJWithArrays()`
4203 
4204      Deprecated: Use `MatUpdateMPIAIJWithArray()`
4205 
4206    Collective
4207 
4208    Input Parameters:
4209 +  mat - the matrix
4210 .  m - number of local rows (Cannot be `PETSC_DECIDE`)
4211 .  n - This value should be the same as the local size used in creating the
4212        x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have
4213        calculated if N is given) For square matrices n is almost always m.
4214 .  M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
4215 .  N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
4216 .  Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix
4217 .  J - column indices
4218 -  v - matrix values
4219 
4220    Level: intermediate
4221 
4222 .seealso: `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4223           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`, `MatUpdateMPIAIJWithArray()`
4224 @*/
4225 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[])
4226 {
4227   PetscInt        nnz, i;
4228   PetscBool       nooffprocentries;
4229   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *)mat->data;
4230   Mat_SeqAIJ     *Ad  = (Mat_SeqAIJ *)Aij->A->data;
4231   PetscScalar    *ad, *ao;
4232   PetscInt        ldi, Iii, md;
4233   const PetscInt *Adi = Ad->i;
4234   PetscInt       *ld  = Aij->ld;
4235 
4236   PetscFunctionBegin;
4237   PetscCheck(Ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
4238   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
4239   PetscCheck(m == mat->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()");
4240   PetscCheck(n == mat->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()");
4241 
4242   PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad));
4243   PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao));
4244 
4245   for (i = 0; i < m; i++) {
4246     nnz = Ii[i + 1] - Ii[i];
4247     Iii = Ii[i];
4248     ldi = ld[i];
4249     md  = Adi[i + 1] - Adi[i];
4250     PetscCall(PetscArraycpy(ao, v + Iii, ldi));
4251     PetscCall(PetscArraycpy(ad, v + Iii + ldi, md));
4252     PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md));
4253     ad += md;
4254     ao += nnz - md;
4255   }
4256   nooffprocentries      = mat->nooffprocentries;
4257   mat->nooffprocentries = PETSC_TRUE;
4258   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad));
4259   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao));
4260   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A));
4261   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B));
4262   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
4263   PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
4264   PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
4265   mat->nooffprocentries = nooffprocentries;
4266   PetscFunctionReturn(0);
4267 }
4268 
4269 /*@
4270      MatUpdateMPIAIJWithArray - updates an `MATMPIAIJ` matrix using an array that contains the nonzero values
4271 
4272    Collective
4273 
4274    Input Parameters:
4275 +  mat - the matrix
4276 -  v - matrix values, stored by row
4277 
4278    Level: intermediate
4279 
4280    Note:
4281    The matrix must have been obtained with `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()`
4282 
4283 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4284           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`, `MatUpdateMPIAIJWithArrays()`
4285 @*/
4286 PetscErrorCode MatUpdateMPIAIJWithArray(Mat mat, const PetscScalar v[])
4287 {
4288   PetscInt        nnz, i, m;
4289   PetscBool       nooffprocentries;
4290   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *)mat->data;
4291   Mat_SeqAIJ     *Ad  = (Mat_SeqAIJ *)Aij->A->data;
4292   Mat_SeqAIJ     *Ao  = (Mat_SeqAIJ *)Aij->B->data;
4293   PetscScalar    *ad, *ao;
4294   const PetscInt *Adi = Ad->i, *Adj = Ao->i;
4295   PetscInt        ldi, Iii, md;
4296   PetscInt       *ld = Aij->ld;
4297 
4298   PetscFunctionBegin;
4299   m = mat->rmap->n;
4300 
4301   PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad));
4302   PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao));
4303   Iii = 0;
4304   for (i = 0; i < m; i++) {
4305     nnz = Adi[i + 1] - Adi[i] + Adj[i + 1] - Adj[i];
4306     ldi = ld[i];
4307     md  = Adi[i + 1] - Adi[i];
4308     PetscCall(PetscArraycpy(ao, v + Iii, ldi));
4309     PetscCall(PetscArraycpy(ad, v + Iii + ldi, md));
4310     PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md));
4311     ad += md;
4312     ao += nnz - md;
4313     Iii += nnz;
4314   }
4315   nooffprocentries      = mat->nooffprocentries;
4316   mat->nooffprocentries = PETSC_TRUE;
4317   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad));
4318   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao));
4319   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A));
4320   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B));
4321   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
4322   PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
4323   PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
4324   mat->nooffprocentries = nooffprocentries;
4325   PetscFunctionReturn(0);
4326 }
4327 
4328 /*@C
4329    MatCreateAIJ - Creates a sparse parallel matrix in `MATAIJ` format
4330    (the default parallel PETSc format).  For good matrix assembly performance
4331    the user should preallocate the matrix storage by setting the parameters
4332    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4333    performance can be increased by more than a factor of 50.
4334 
4335    Collective
4336 
4337    Input Parameters:
4338 +  comm - MPI communicator
4339 .  m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given)
4340            This value should be the same as the local size used in creating the
4341            y vector for the matrix-vector product y = Ax.
4342 .  n - This value should be the same as the local size used in creating the
4343        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4344        calculated if N is given) For square matrices n is almost always m.
4345 .  M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
4346 .  N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
4347 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4348            (same value is used for all local rows)
4349 .  d_nnz - array containing the number of nonzeros in the various rows of the
4350            DIAGONAL portion of the local submatrix (possibly different for each row)
4351            or NULL, if d_nz is used to specify the nonzero structure.
4352            The size of this array is equal to the number of local rows, i.e 'm'.
4353 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4354            submatrix (same value is used for all local rows).
4355 -  o_nnz - array containing the number of nonzeros in the various rows of the
4356            OFF-DIAGONAL portion of the local submatrix (possibly different for
4357            each row) or NULL, if o_nz is used to specify the nonzero
4358            structure. The size of this array is equal to the number
4359            of local rows, i.e 'm'.
4360 
4361    Output Parameter:
4362 .  A - the matrix
4363 
4364    It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
4365    MatXXXXSetPreallocation() paradigm instead of this routine directly.
4366    [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`]
4367 
4368    Notes:
4369    If the *_nnz parameter is given then the *_nz parameter is ignored
4370 
4371    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4372    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4373    storage requirements for this matrix.
4374 
4375    If `PETSC_DECIDE` or  `PETSC_DETERMINE` is used for a particular argument on one
4376    processor than it must be used on all processors that share the object for
4377    that argument.
4378 
4379    The user MUST specify either the local or global matrix dimensions
4380    (possibly both).
4381 
4382    The parallel matrix is partitioned across processors such that the
4383    first m0 rows belong to process 0, the next m1 rows belong to
4384    process 1, the next m2 rows belong to process 2 etc.. where
4385    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4386    values corresponding to [m x N] submatrix.
4387 
4388    The columns are logically partitioned with the n0 columns belonging
4389    to 0th partition, the next n1 columns belonging to the next
4390    partition etc.. where n0,n1,n2... are the input parameter 'n'.
4391 
4392    The DIAGONAL portion of the local submatrix on any given processor
4393    is the submatrix corresponding to the rows and columns m,n
4394    corresponding to the given processor. i.e diagonal matrix on
4395    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4396    etc. The remaining portion of the local submatrix [m x (N-n)]
4397    constitute the OFF-DIAGONAL portion. The example below better
4398    illustrates this concept.
4399 
4400    For a square global matrix we define each processor's diagonal portion
4401    to be its local rows and the corresponding columns (a square submatrix);
4402    each processor's off-diagonal portion encompasses the remainder of the
4403    local matrix (a rectangular submatrix).
4404 
4405    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4406 
4407    When calling this routine with a single process communicator, a matrix of
4408    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4409    type of communicator, use the construction mechanism
4410 .vb
4411      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4412 .ve
4413 
4414 $     MatCreate(...,&A);
4415 $     MatSetType(A,MATMPIAIJ);
4416 $     MatSetSizes(A, m,n,M,N);
4417 $     MatMPIAIJSetPreallocation(A,...);
4418 
4419    By default, this format uses inodes (identical nodes) when possible.
4420    We search for consecutive rows with the same nonzero structure, thereby
4421    reusing matrix information to achieve increased efficiency.
4422 
4423    Options Database Keys:
4424 +  -mat_no_inode  - Do not use inodes
4425 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4426 -  -matmult_vecscatter_view <viewer> - View the vecscatter (i.e., communication pattern) used in `MatMult()` of sparse parallel matrices.
4427         See viewer types in manual of `MatView()`. Of them, ascii_matlab, draw or binary cause the vecscatter be viewed as a matrix.
4428         Entry (i,j) is the size of message (in bytes) rank i sends to rank j in one `MatMult()` call.
4429 
4430    Example usage:
4431 
4432    Consider the following 8x8 matrix with 34 non-zero values, that is
4433    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4434    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4435    as follows
4436 
4437 .vb
4438             1  2  0  |  0  3  0  |  0  4
4439     Proc0   0  5  6  |  7  0  0  |  8  0
4440             9  0 10  | 11  0  0  | 12  0
4441     -------------------------------------
4442            13  0 14  | 15 16 17  |  0  0
4443     Proc1   0 18  0  | 19 20 21  |  0  0
4444             0  0  0  | 22 23  0  | 24  0
4445     -------------------------------------
4446     Proc2  25 26 27  |  0  0 28  | 29  0
4447            30  0  0  | 31 32 33  |  0 34
4448 .ve
4449 
4450    This can be represented as a collection of submatrices as
4451 
4452 .vb
4453       A B C
4454       D E F
4455       G H I
4456 .ve
4457 
4458    Where the submatrices A,B,C are owned by proc0, D,E,F are
4459    owned by proc1, G,H,I are owned by proc2.
4460 
4461    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4462    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4463    The 'M','N' parameters are 8,8, and have the same values on all procs.
4464 
4465    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4466    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4467    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4468    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4469    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4470    matrix, ans [DF] as another SeqAIJ matrix.
4471 
4472    When d_nz, o_nz parameters are specified, d_nz storage elements are
4473    allocated for every row of the local diagonal submatrix, and o_nz
4474    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4475    One way to choose d_nz and o_nz is to use the max nonzerors per local
4476    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4477    In this case, the values of d_nz,o_nz are
4478 .vb
4479      proc0 : dnz = 2, o_nz = 2
4480      proc1 : dnz = 3, o_nz = 2
4481      proc2 : dnz = 1, o_nz = 4
4482 .ve
4483    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4484    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4485    for proc3. i.e we are using 12+15+10=37 storage locations to store
4486    34 values.
4487 
4488    When d_nnz, o_nnz parameters are specified, the storage is specified
4489    for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4490    In the above case the values for d_nnz,o_nnz are
4491 .vb
4492      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4493      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4494      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4495 .ve
4496    Here the space allocated is sum of all the above values i.e 34, and
4497    hence pre-allocation is perfect.
4498 
4499    Level: intermediate
4500 
4501 .seealso: [Sparse Matrix Creation](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4502           `MATMPIAIJ`, `MatCreateMPIAIJWithArrays()`
4503 @*/
4504 PetscErrorCode MatCreateAIJ(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A)
4505 {
4506   PetscMPIInt size;
4507 
4508   PetscFunctionBegin;
4509   PetscCall(MatCreate(comm, A));
4510   PetscCall(MatSetSizes(*A, m, n, M, N));
4511   PetscCallMPI(MPI_Comm_size(comm, &size));
4512   if (size > 1) {
4513     PetscCall(MatSetType(*A, MATMPIAIJ));
4514     PetscCall(MatMPIAIJSetPreallocation(*A, d_nz, d_nnz, o_nz, o_nnz));
4515   } else {
4516     PetscCall(MatSetType(*A, MATSEQAIJ));
4517     PetscCall(MatSeqAIJSetPreallocation(*A, d_nz, d_nnz));
4518   }
4519   PetscFunctionReturn(0);
4520 }
4521 
4522 /*@C
4523   MatMPIAIJGetSeqAIJ - Returns the local piece of this distributed matrix
4524 
4525   Not collective
4526 
4527   Input Parameter:
4528 . A - The `MATMPIAIJ` matrix
4529 
4530   Output Parameters:
4531 + Ad - The local diagonal block as a `MATSEQAIJ` matrix
4532 . Ao - The local off-diagonal block as a `MATSEQAIJ` matrix
4533 - colmap - An array mapping local column numbers of Ao to global column numbers of the parallel matrix
4534 
4535   Note:
4536   The rows in Ad and Ao are in [0, Nr), where Nr is the number of local rows on this process. The columns
4537   in Ad are in [0, Nc) where Nc is the number of local columns. The columns are Ao are in [0, Nco), where Nco is
4538   the number of nonzero columns in the local off-diagonal piece of the matrix A. The array colmap maps these
4539   local column numbers to global column numbers in the original matrix.
4540 
4541   Level: intermediate
4542 
4543 .seealso: `MATMPIAIJ`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`, `MatCreateAIJ()`, `MATMPIAIJ`, `MATSEQAIJ`
4544 @*/
4545 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[])
4546 {
4547   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
4548   PetscBool   flg;
4549 
4550   PetscFunctionBegin;
4551   PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &flg));
4552   PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPIAIJ matrix as input");
4553   if (Ad) *Ad = a->A;
4554   if (Ao) *Ao = a->B;
4555   if (colmap) *colmap = a->garray;
4556   PetscFunctionReturn(0);
4557 }
4558 
4559 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
4560 {
4561   PetscInt     m, N, i, rstart, nnz, Ii;
4562   PetscInt    *indx;
4563   PetscScalar *values;
4564   MatType      rootType;
4565 
4566   PetscFunctionBegin;
4567   PetscCall(MatGetSize(inmat, &m, &N));
4568   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
4569     PetscInt *dnz, *onz, sum, bs, cbs;
4570 
4571     if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N));
4572     /* Check sum(n) = N */
4573     PetscCall(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm));
4574     PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N);
4575 
4576     PetscCallMPI(MPI_Scan(&m, &rstart, 1, MPIU_INT, MPI_SUM, comm));
4577     rstart -= m;
4578 
4579     MatPreallocateBegin(comm, m, n, dnz, onz);
4580     for (i = 0; i < m; i++) {
4581       PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, NULL));
4582       PetscCall(MatPreallocateSet(i + rstart, nnz, indx, dnz, onz));
4583       PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, NULL));
4584     }
4585 
4586     PetscCall(MatCreate(comm, outmat));
4587     PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
4588     PetscCall(MatGetBlockSizes(inmat, &bs, &cbs));
4589     PetscCall(MatSetBlockSizes(*outmat, bs, cbs));
4590     PetscCall(MatGetRootType_Private(inmat, &rootType));
4591     PetscCall(MatSetType(*outmat, rootType));
4592     PetscCall(MatSeqAIJSetPreallocation(*outmat, 0, dnz));
4593     PetscCall(MatMPIAIJSetPreallocation(*outmat, 0, dnz, 0, onz));
4594     MatPreallocateEnd(dnz, onz);
4595     PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
4596   }
4597 
4598   /* numeric phase */
4599   PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL));
4600   for (i = 0; i < m; i++) {
4601     PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, &values));
4602     Ii = i + rstart;
4603     PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES));
4604     PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, &values));
4605   }
4606   PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY));
4607   PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY));
4608   PetscFunctionReturn(0);
4609 }
4610 
4611 PetscErrorCode MatFileSplit(Mat A, char *outfile)
4612 {
4613   PetscMPIInt        rank;
4614   PetscInt           m, N, i, rstart, nnz;
4615   size_t             len;
4616   const PetscInt    *indx;
4617   PetscViewer        out;
4618   char              *name;
4619   Mat                B;
4620   const PetscScalar *values;
4621 
4622   PetscFunctionBegin;
4623   PetscCall(MatGetLocalSize(A, &m, NULL));
4624   PetscCall(MatGetSize(A, NULL, &N));
4625   /* Should this be the type of the diagonal block of A? */
4626   PetscCall(MatCreate(PETSC_COMM_SELF, &B));
4627   PetscCall(MatSetSizes(B, m, N, m, N));
4628   PetscCall(MatSetBlockSizesFromMats(B, A, A));
4629   PetscCall(MatSetType(B, MATSEQAIJ));
4630   PetscCall(MatSeqAIJSetPreallocation(B, 0, NULL));
4631   PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
4632   for (i = 0; i < m; i++) {
4633     PetscCall(MatGetRow(A, i + rstart, &nnz, &indx, &values));
4634     PetscCall(MatSetValues(B, 1, &i, nnz, indx, values, INSERT_VALUES));
4635     PetscCall(MatRestoreRow(A, i + rstart, &nnz, &indx, &values));
4636   }
4637   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
4638   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
4639 
4640   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank));
4641   PetscCall(PetscStrlen(outfile, &len));
4642   PetscCall(PetscMalloc1(len + 6, &name));
4643   PetscCall(PetscSNPrintf(name, len + 6, "%s.%d", outfile, rank));
4644   PetscCall(PetscViewerBinaryOpen(PETSC_COMM_SELF, name, FILE_MODE_APPEND, &out));
4645   PetscCall(PetscFree(name));
4646   PetscCall(MatView(B, out));
4647   PetscCall(PetscViewerDestroy(&out));
4648   PetscCall(MatDestroy(&B));
4649   PetscFunctionReturn(0);
4650 }
4651 
4652 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void *data)
4653 {
4654   Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)data;
4655 
4656   PetscFunctionBegin;
4657   if (!merge) PetscFunctionReturn(0);
4658   PetscCall(PetscFree(merge->id_r));
4659   PetscCall(PetscFree(merge->len_s));
4660   PetscCall(PetscFree(merge->len_r));
4661   PetscCall(PetscFree(merge->bi));
4662   PetscCall(PetscFree(merge->bj));
4663   PetscCall(PetscFree(merge->buf_ri[0]));
4664   PetscCall(PetscFree(merge->buf_ri));
4665   PetscCall(PetscFree(merge->buf_rj[0]));
4666   PetscCall(PetscFree(merge->buf_rj));
4667   PetscCall(PetscFree(merge->coi));
4668   PetscCall(PetscFree(merge->coj));
4669   PetscCall(PetscFree(merge->owners_co));
4670   PetscCall(PetscLayoutDestroy(&merge->rowmap));
4671   PetscCall(PetscFree(merge));
4672   PetscFunctionReturn(0);
4673 }
4674 
4675 #include <../src/mat/utils/freespace.h>
4676 #include <petscbt.h>
4677 
4678 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat, Mat mpimat)
4679 {
4680   MPI_Comm             comm;
4681   Mat_SeqAIJ          *a = (Mat_SeqAIJ *)seqmat->data;
4682   PetscMPIInt          size, rank, taga, *len_s;
4683   PetscInt             N = mpimat->cmap->N, i, j, *owners, *ai = a->i, *aj;
4684   PetscInt             proc, m;
4685   PetscInt           **buf_ri, **buf_rj;
4686   PetscInt             k, anzi, *bj_i, *bi, *bj, arow, bnzi, nextaj;
4687   PetscInt             nrows, **buf_ri_k, **nextrow, **nextai;
4688   MPI_Request         *s_waits, *r_waits;
4689   MPI_Status          *status;
4690   const MatScalar     *aa, *a_a;
4691   MatScalar          **abuf_r, *ba_i;
4692   Mat_Merge_SeqsToMPI *merge;
4693   PetscContainer       container;
4694 
4695   PetscFunctionBegin;
4696   PetscCall(PetscObjectGetComm((PetscObject)mpimat, &comm));
4697   PetscCall(PetscLogEventBegin(MAT_Seqstompinum, seqmat, 0, 0, 0));
4698 
4699   PetscCallMPI(MPI_Comm_size(comm, &size));
4700   PetscCallMPI(MPI_Comm_rank(comm, &rank));
4701 
4702   PetscCall(PetscObjectQuery((PetscObject)mpimat, "MatMergeSeqsToMPI", (PetscObject *)&container));
4703   PetscCheck(container, PetscObjectComm((PetscObject)mpimat), PETSC_ERR_PLIB, "Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic");
4704   PetscCall(PetscContainerGetPointer(container, (void **)&merge));
4705   PetscCall(MatSeqAIJGetArrayRead(seqmat, &a_a));
4706   aa = a_a;
4707 
4708   bi     = merge->bi;
4709   bj     = merge->bj;
4710   buf_ri = merge->buf_ri;
4711   buf_rj = merge->buf_rj;
4712 
4713   PetscCall(PetscMalloc1(size, &status));
4714   owners = merge->rowmap->range;
4715   len_s  = merge->len_s;
4716 
4717   /* send and recv matrix values */
4718   /*-----------------------------*/
4719   PetscCall(PetscObjectGetNewTag((PetscObject)mpimat, &taga));
4720   PetscCall(PetscPostIrecvScalar(comm, taga, merge->nrecv, merge->id_r, merge->len_r, &abuf_r, &r_waits));
4721 
4722   PetscCall(PetscMalloc1(merge->nsend + 1, &s_waits));
4723   for (proc = 0, k = 0; proc < size; proc++) {
4724     if (!len_s[proc]) continue;
4725     i = owners[proc];
4726     PetscCallMPI(MPI_Isend(aa + ai[i], len_s[proc], MPIU_MATSCALAR, proc, taga, comm, s_waits + k));
4727     k++;
4728   }
4729 
4730   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, r_waits, status));
4731   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, s_waits, status));
4732   PetscCall(PetscFree(status));
4733 
4734   PetscCall(PetscFree(s_waits));
4735   PetscCall(PetscFree(r_waits));
4736 
4737   /* insert mat values of mpimat */
4738   /*----------------------------*/
4739   PetscCall(PetscMalloc1(N, &ba_i));
4740   PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai));
4741 
4742   for (k = 0; k < merge->nrecv; k++) {
4743     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4744     nrows       = *(buf_ri_k[k]);
4745     nextrow[k]  = buf_ri_k[k] + 1;           /* next row number of k-th recved i-structure */
4746     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure  */
4747   }
4748 
4749   /* set values of ba */
4750   m = merge->rowmap->n;
4751   for (i = 0; i < m; i++) {
4752     arow = owners[rank] + i;
4753     bj_i = bj + bi[i]; /* col indices of the i-th row of mpimat */
4754     bnzi = bi[i + 1] - bi[i];
4755     PetscCall(PetscArrayzero(ba_i, bnzi));
4756 
4757     /* add local non-zero vals of this proc's seqmat into ba */
4758     anzi   = ai[arow + 1] - ai[arow];
4759     aj     = a->j + ai[arow];
4760     aa     = a_a + ai[arow];
4761     nextaj = 0;
4762     for (j = 0; nextaj < anzi; j++) {
4763       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4764         ba_i[j] += aa[nextaj++];
4765       }
4766     }
4767 
4768     /* add received vals into ba */
4769     for (k = 0; k < merge->nrecv; k++) { /* k-th received message */
4770       /* i-th row */
4771       if (i == *nextrow[k]) {
4772         anzi   = *(nextai[k] + 1) - *nextai[k];
4773         aj     = buf_rj[k] + *(nextai[k]);
4774         aa     = abuf_r[k] + *(nextai[k]);
4775         nextaj = 0;
4776         for (j = 0; nextaj < anzi; j++) {
4777           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4778             ba_i[j] += aa[nextaj++];
4779           }
4780         }
4781         nextrow[k]++;
4782         nextai[k]++;
4783       }
4784     }
4785     PetscCall(MatSetValues(mpimat, 1, &arow, bnzi, bj_i, ba_i, INSERT_VALUES));
4786   }
4787   PetscCall(MatSeqAIJRestoreArrayRead(seqmat, &a_a));
4788   PetscCall(MatAssemblyBegin(mpimat, MAT_FINAL_ASSEMBLY));
4789   PetscCall(MatAssemblyEnd(mpimat, MAT_FINAL_ASSEMBLY));
4790 
4791   PetscCall(PetscFree(abuf_r[0]));
4792   PetscCall(PetscFree(abuf_r));
4793   PetscCall(PetscFree(ba_i));
4794   PetscCall(PetscFree3(buf_ri_k, nextrow, nextai));
4795   PetscCall(PetscLogEventEnd(MAT_Seqstompinum, seqmat, 0, 0, 0));
4796   PetscFunctionReturn(0);
4797 }
4798 
4799 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, Mat *mpimat)
4800 {
4801   Mat                  B_mpi;
4802   Mat_SeqAIJ          *a = (Mat_SeqAIJ *)seqmat->data;
4803   PetscMPIInt          size, rank, tagi, tagj, *len_s, *len_si, *len_ri;
4804   PetscInt           **buf_rj, **buf_ri, **buf_ri_k;
4805   PetscInt             M = seqmat->rmap->n, N = seqmat->cmap->n, i, *owners, *ai = a->i, *aj = a->j;
4806   PetscInt             len, proc, *dnz, *onz, bs, cbs;
4807   PetscInt             k, anzi, *bi, *bj, *lnk, nlnk, arow, bnzi;
4808   PetscInt             nrows, *buf_s, *buf_si, *buf_si_i, **nextrow, **nextai;
4809   MPI_Request         *si_waits, *sj_waits, *ri_waits, *rj_waits;
4810   MPI_Status          *status;
4811   PetscFreeSpaceList   free_space = NULL, current_space = NULL;
4812   PetscBT              lnkbt;
4813   Mat_Merge_SeqsToMPI *merge;
4814   PetscContainer       container;
4815 
4816   PetscFunctionBegin;
4817   PetscCall(PetscLogEventBegin(MAT_Seqstompisym, seqmat, 0, 0, 0));
4818 
4819   /* make sure it is a PETSc comm */
4820   PetscCall(PetscCommDuplicate(comm, &comm, NULL));
4821   PetscCallMPI(MPI_Comm_size(comm, &size));
4822   PetscCallMPI(MPI_Comm_rank(comm, &rank));
4823 
4824   PetscCall(PetscNew(&merge));
4825   PetscCall(PetscMalloc1(size, &status));
4826 
4827   /* determine row ownership */
4828   /*---------------------------------------------------------*/
4829   PetscCall(PetscLayoutCreate(comm, &merge->rowmap));
4830   PetscCall(PetscLayoutSetLocalSize(merge->rowmap, m));
4831   PetscCall(PetscLayoutSetSize(merge->rowmap, M));
4832   PetscCall(PetscLayoutSetBlockSize(merge->rowmap, 1));
4833   PetscCall(PetscLayoutSetUp(merge->rowmap));
4834   PetscCall(PetscMalloc1(size, &len_si));
4835   PetscCall(PetscMalloc1(size, &merge->len_s));
4836 
4837   m      = merge->rowmap->n;
4838   owners = merge->rowmap->range;
4839 
4840   /* determine the number of messages to send, their lengths */
4841   /*---------------------------------------------------------*/
4842   len_s = merge->len_s;
4843 
4844   len          = 0; /* length of buf_si[] */
4845   merge->nsend = 0;
4846   for (proc = 0; proc < size; proc++) {
4847     len_si[proc] = 0;
4848     if (proc == rank) {
4849       len_s[proc] = 0;
4850     } else {
4851       len_si[proc] = owners[proc + 1] - owners[proc] + 1;
4852       len_s[proc]  = ai[owners[proc + 1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4853     }
4854     if (len_s[proc]) {
4855       merge->nsend++;
4856       nrows = 0;
4857       for (i = owners[proc]; i < owners[proc + 1]; i++) {
4858         if (ai[i + 1] > ai[i]) nrows++;
4859       }
4860       len_si[proc] = 2 * (nrows + 1);
4861       len += len_si[proc];
4862     }
4863   }
4864 
4865   /* determine the number and length of messages to receive for ij-structure */
4866   /*-------------------------------------------------------------------------*/
4867   PetscCall(PetscGatherNumberOfMessages(comm, NULL, len_s, &merge->nrecv));
4868   PetscCall(PetscGatherMessageLengths2(comm, merge->nsend, merge->nrecv, len_s, len_si, &merge->id_r, &merge->len_r, &len_ri));
4869 
4870   /* post the Irecv of j-structure */
4871   /*-------------------------------*/
4872   PetscCall(PetscCommGetNewTag(comm, &tagj));
4873   PetscCall(PetscPostIrecvInt(comm, tagj, merge->nrecv, merge->id_r, merge->len_r, &buf_rj, &rj_waits));
4874 
4875   /* post the Isend of j-structure */
4876   /*--------------------------------*/
4877   PetscCall(PetscMalloc2(merge->nsend, &si_waits, merge->nsend, &sj_waits));
4878 
4879   for (proc = 0, k = 0; proc < size; proc++) {
4880     if (!len_s[proc]) continue;
4881     i = owners[proc];
4882     PetscCallMPI(MPI_Isend(aj + ai[i], len_s[proc], MPIU_INT, proc, tagj, comm, sj_waits + k));
4883     k++;
4884   }
4885 
4886   /* receives and sends of j-structure are complete */
4887   /*------------------------------------------------*/
4888   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, rj_waits, status));
4889   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, sj_waits, status));
4890 
4891   /* send and recv i-structure */
4892   /*---------------------------*/
4893   PetscCall(PetscCommGetNewTag(comm, &tagi));
4894   PetscCall(PetscPostIrecvInt(comm, tagi, merge->nrecv, merge->id_r, len_ri, &buf_ri, &ri_waits));
4895 
4896   PetscCall(PetscMalloc1(len + 1, &buf_s));
4897   buf_si = buf_s; /* points to the beginning of k-th msg to be sent */
4898   for (proc = 0, k = 0; proc < size; proc++) {
4899     if (!len_s[proc]) continue;
4900     /* form outgoing message for i-structure:
4901          buf_si[0]:                 nrows to be sent
4902                [1:nrows]:           row index (global)
4903                [nrows+1:2*nrows+1]: i-structure index
4904     */
4905     /*-------------------------------------------*/
4906     nrows       = len_si[proc] / 2 - 1;
4907     buf_si_i    = buf_si + nrows + 1;
4908     buf_si[0]   = nrows;
4909     buf_si_i[0] = 0;
4910     nrows       = 0;
4911     for (i = owners[proc]; i < owners[proc + 1]; i++) {
4912       anzi = ai[i + 1] - ai[i];
4913       if (anzi) {
4914         buf_si_i[nrows + 1] = buf_si_i[nrows] + anzi; /* i-structure */
4915         buf_si[nrows + 1]   = i - owners[proc];       /* local row index */
4916         nrows++;
4917       }
4918     }
4919     PetscCallMPI(MPI_Isend(buf_si, len_si[proc], MPIU_INT, proc, tagi, comm, si_waits + k));
4920     k++;
4921     buf_si += len_si[proc];
4922   }
4923 
4924   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, ri_waits, status));
4925   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, si_waits, status));
4926 
4927   PetscCall(PetscInfo(seqmat, "nsend: %d, nrecv: %d\n", merge->nsend, merge->nrecv));
4928   for (i = 0; i < merge->nrecv; i++) PetscCall(PetscInfo(seqmat, "recv len_ri=%d, len_rj=%d from [%d]\n", len_ri[i], merge->len_r[i], merge->id_r[i]));
4929 
4930   PetscCall(PetscFree(len_si));
4931   PetscCall(PetscFree(len_ri));
4932   PetscCall(PetscFree(rj_waits));
4933   PetscCall(PetscFree2(si_waits, sj_waits));
4934   PetscCall(PetscFree(ri_waits));
4935   PetscCall(PetscFree(buf_s));
4936   PetscCall(PetscFree(status));
4937 
4938   /* compute a local seq matrix in each processor */
4939   /*----------------------------------------------*/
4940   /* allocate bi array and free space for accumulating nonzero column info */
4941   PetscCall(PetscMalloc1(m + 1, &bi));
4942   bi[0] = 0;
4943 
4944   /* create and initialize a linked list */
4945   nlnk = N + 1;
4946   PetscCall(PetscLLCreate(N, N, nlnk, lnk, lnkbt));
4947 
4948   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4949   len = ai[owners[rank + 1]] - ai[owners[rank]];
4950   PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(2, len) + 1, &free_space));
4951 
4952   current_space = free_space;
4953 
4954   /* determine symbolic info for each local row */
4955   PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai));
4956 
4957   for (k = 0; k < merge->nrecv; k++) {
4958     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4959     nrows       = *buf_ri_k[k];
4960     nextrow[k]  = buf_ri_k[k] + 1;           /* next row number of k-th recved i-structure */
4961     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure  */
4962   }
4963 
4964   MatPreallocateBegin(comm, m, n, dnz, onz);
4965   len = 0;
4966   for (i = 0; i < m; i++) {
4967     bnzi = 0;
4968     /* add local non-zero cols of this proc's seqmat into lnk */
4969     arow = owners[rank] + i;
4970     anzi = ai[arow + 1] - ai[arow];
4971     aj   = a->j + ai[arow];
4972     PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt));
4973     bnzi += nlnk;
4974     /* add received col data into lnk */
4975     for (k = 0; k < merge->nrecv; k++) { /* k-th received message */
4976       if (i == *nextrow[k]) {            /* i-th row */
4977         anzi = *(nextai[k] + 1) - *nextai[k];
4978         aj   = buf_rj[k] + *nextai[k];
4979         PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt));
4980         bnzi += nlnk;
4981         nextrow[k]++;
4982         nextai[k]++;
4983       }
4984     }
4985     if (len < bnzi) len = bnzi; /* =max(bnzi) */
4986 
4987     /* if free space is not available, make more free space */
4988     if (current_space->local_remaining < bnzi) PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(bnzi, current_space->total_array_size), &current_space));
4989     /* copy data into free space, then initialize lnk */
4990     PetscCall(PetscLLClean(N, N, bnzi, lnk, current_space->array, lnkbt));
4991     PetscCall(MatPreallocateSet(i + owners[rank], bnzi, current_space->array, dnz, onz));
4992 
4993     current_space->array += bnzi;
4994     current_space->local_used += bnzi;
4995     current_space->local_remaining -= bnzi;
4996 
4997     bi[i + 1] = bi[i] + bnzi;
4998   }
4999 
5000   PetscCall(PetscFree3(buf_ri_k, nextrow, nextai));
5001 
5002   PetscCall(PetscMalloc1(bi[m] + 1, &bj));
5003   PetscCall(PetscFreeSpaceContiguous(&free_space, bj));
5004   PetscCall(PetscLLDestroy(lnk, lnkbt));
5005 
5006   /* create symbolic parallel matrix B_mpi */
5007   /*---------------------------------------*/
5008   PetscCall(MatGetBlockSizes(seqmat, &bs, &cbs));
5009   PetscCall(MatCreate(comm, &B_mpi));
5010   if (n == PETSC_DECIDE) {
5011     PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, N));
5012   } else {
5013     PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
5014   }
5015   PetscCall(MatSetBlockSizes(B_mpi, bs, cbs));
5016   PetscCall(MatSetType(B_mpi, MATMPIAIJ));
5017   PetscCall(MatMPIAIJSetPreallocation(B_mpi, 0, dnz, 0, onz));
5018   MatPreallocateEnd(dnz, onz);
5019   PetscCall(MatSetOption(B_mpi, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE));
5020 
5021   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
5022   B_mpi->assembled = PETSC_FALSE;
5023   merge->bi        = bi;
5024   merge->bj        = bj;
5025   merge->buf_ri    = buf_ri;
5026   merge->buf_rj    = buf_rj;
5027   merge->coi       = NULL;
5028   merge->coj       = NULL;
5029   merge->owners_co = NULL;
5030 
5031   PetscCall(PetscCommDestroy(&comm));
5032 
5033   /* attach the supporting struct to B_mpi for reuse */
5034   PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container));
5035   PetscCall(PetscContainerSetPointer(container, merge));
5036   PetscCall(PetscContainerSetUserDestroy(container, MatDestroy_MPIAIJ_SeqsToMPI));
5037   PetscCall(PetscObjectCompose((PetscObject)B_mpi, "MatMergeSeqsToMPI", (PetscObject)container));
5038   PetscCall(PetscContainerDestroy(&container));
5039   *mpimat = B_mpi;
5040 
5041   PetscCall(PetscLogEventEnd(MAT_Seqstompisym, seqmat, 0, 0, 0));
5042   PetscFunctionReturn(0);
5043 }
5044 
5045 /*@C
5046       MatCreateMPIAIJSumSeqAIJ - Creates a `MATMPIAIJ` matrix by adding sequential
5047                  matrices from each processor
5048 
5049     Collective
5050 
5051    Input Parameters:
5052 +    comm - the communicators the parallel matrix will live on
5053 .    seqmat - the input sequential matrices
5054 .    m - number of local rows (or `PETSC_DECIDE`)
5055 .    n - number of local columns (or `PETSC_DECIDE`)
5056 -    scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5057 
5058    Output Parameter:
5059 .    mpimat - the parallel matrix generated
5060 
5061     Level: advanced
5062 
5063    Note:
5064      The dimensions of the sequential matrix in each processor MUST be the same.
5065      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
5066      destroyed when mpimat is destroyed. Call `PetscObjectQuery()` to access seqmat.
5067 @*/
5068 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, MatReuse scall, Mat *mpimat)
5069 {
5070   PetscMPIInt size;
5071 
5072   PetscFunctionBegin;
5073   PetscCallMPI(MPI_Comm_size(comm, &size));
5074   if (size == 1) {
5075     PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0));
5076     if (scall == MAT_INITIAL_MATRIX) {
5077       PetscCall(MatDuplicate(seqmat, MAT_COPY_VALUES, mpimat));
5078     } else {
5079       PetscCall(MatCopy(seqmat, *mpimat, SAME_NONZERO_PATTERN));
5080     }
5081     PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0));
5082     PetscFunctionReturn(0);
5083   }
5084   PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0));
5085   if (scall == MAT_INITIAL_MATRIX) PetscCall(MatCreateMPIAIJSumSeqAIJSymbolic(comm, seqmat, m, n, mpimat));
5086   PetscCall(MatCreateMPIAIJSumSeqAIJNumeric(seqmat, *mpimat));
5087   PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0));
5088   PetscFunctionReturn(0);
5089 }
5090 
5091 /*@
5092      MatAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATAIJ` matrix by taking all its local rows and putting them into a sequential matrix with
5093           mlocal rows and n columns. Where mlocal is the row count obtained with `MatGetLocalSize()` and n is the global column count obtained
5094           with `MatGetSize()`
5095 
5096     Not Collective
5097 
5098    Input Parameters:
5099 +    A - the matrix
5100 -    scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5101 
5102    Output Parameter:
5103 .    A_loc - the local sequential matrix generated
5104 
5105     Level: developer
5106 
5107    Notes:
5108      In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix.
5109 
5110      Destroy the matrix with `MatDestroy()`
5111 
5112 .seealso: `MatMPIAIJGetLocalMat()`
5113 @*/
5114 PetscErrorCode MatAIJGetLocalMat(Mat A, Mat *A_loc)
5115 {
5116   PetscBool mpi;
5117 
5118   PetscFunctionBegin;
5119   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &mpi));
5120   if (mpi) {
5121     PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, A_loc));
5122   } else {
5123     *A_loc = A;
5124     PetscCall(PetscObjectReference((PetscObject)*A_loc));
5125   }
5126   PetscFunctionReturn(0);
5127 }
5128 
5129 /*@
5130      MatMPIAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix by taking all its local rows and putting them into a sequential matrix with
5131           mlocal rows and n columns. Where mlocal is the row count obtained with `MatGetLocalSize()` and n is the global column count obtained
5132           with `MatGetSize()`
5133 
5134     Not Collective
5135 
5136    Input Parameters:
5137 +    A - the matrix
5138 -    scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5139 
5140    Output Parameter:
5141 .    A_loc - the local sequential matrix generated
5142 
5143     Level: developer
5144 
5145    Notes:
5146      In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix.
5147 
5148      When the communicator associated with A has size 1 and `MAT_INITIAL_MATRIX` is requested, the matrix returned is the diagonal part of A.
5149      If `MAT_REUSE_MATRIX` is requested with comm size 1, `MatCopy`(Adiag,*A_loc,`SAME_NONZERO_PATTERN`) is called.
5150      This means that one can preallocate the proper sequential matrix first and then call this routine with `MAT_REUSE_MATRIX` to safely
5151      modify the values of the returned A_loc.
5152 
5153 .seealso: `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMatCondensed()`, `MatMPIAIJGetLocalMatMerge()`
5154 @*/
5155 PetscErrorCode MatMPIAIJGetLocalMat(Mat A, MatReuse scall, Mat *A_loc)
5156 {
5157   Mat_MPIAIJ        *mpimat = (Mat_MPIAIJ *)A->data;
5158   Mat_SeqAIJ        *mat, *a, *b;
5159   PetscInt          *ai, *aj, *bi, *bj, *cmap = mpimat->garray;
5160   const PetscScalar *aa, *ba, *aav, *bav;
5161   PetscScalar       *ca, *cam;
5162   PetscMPIInt        size;
5163   PetscInt           am = A->rmap->n, i, j, k, cstart = A->cmap->rstart;
5164   PetscInt          *ci, *cj, col, ncols_d, ncols_o, jo;
5165   PetscBool          match;
5166 
5167   PetscFunctionBegin;
5168   PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &match));
5169   PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input");
5170   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
5171   if (size == 1) {
5172     if (scall == MAT_INITIAL_MATRIX) {
5173       PetscCall(PetscObjectReference((PetscObject)mpimat->A));
5174       *A_loc = mpimat->A;
5175     } else if (scall == MAT_REUSE_MATRIX) {
5176       PetscCall(MatCopy(mpimat->A, *A_loc, SAME_NONZERO_PATTERN));
5177     }
5178     PetscFunctionReturn(0);
5179   }
5180 
5181   PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0));
5182   a  = (Mat_SeqAIJ *)(mpimat->A)->data;
5183   b  = (Mat_SeqAIJ *)(mpimat->B)->data;
5184   ai = a->i;
5185   aj = a->j;
5186   bi = b->i;
5187   bj = b->j;
5188   PetscCall(MatSeqAIJGetArrayRead(mpimat->A, &aav));
5189   PetscCall(MatSeqAIJGetArrayRead(mpimat->B, &bav));
5190   aa = aav;
5191   ba = bav;
5192   if (scall == MAT_INITIAL_MATRIX) {
5193     PetscCall(PetscMalloc1(1 + am, &ci));
5194     ci[0] = 0;
5195     for (i = 0; i < am; i++) ci[i + 1] = ci[i] + (ai[i + 1] - ai[i]) + (bi[i + 1] - bi[i]);
5196     PetscCall(PetscMalloc1(1 + ci[am], &cj));
5197     PetscCall(PetscMalloc1(1 + ci[am], &ca));
5198     k = 0;
5199     for (i = 0; i < am; i++) {
5200       ncols_o = bi[i + 1] - bi[i];
5201       ncols_d = ai[i + 1] - ai[i];
5202       /* off-diagonal portion of A */
5203       for (jo = 0; jo < ncols_o; jo++) {
5204         col = cmap[*bj];
5205         if (col >= cstart) break;
5206         cj[k] = col;
5207         bj++;
5208         ca[k++] = *ba++;
5209       }
5210       /* diagonal portion of A */
5211       for (j = 0; j < ncols_d; j++) {
5212         cj[k]   = cstart + *aj++;
5213         ca[k++] = *aa++;
5214       }
5215       /* off-diagonal portion of A */
5216       for (j = jo; j < ncols_o; j++) {
5217         cj[k]   = cmap[*bj++];
5218         ca[k++] = *ba++;
5219       }
5220     }
5221     /* put together the new matrix */
5222     PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, A->cmap->N, ci, cj, ca, A_loc));
5223     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5224     /* Since these are PETSc arrays, change flags to free them as necessary. */
5225     mat          = (Mat_SeqAIJ *)(*A_loc)->data;
5226     mat->free_a  = PETSC_TRUE;
5227     mat->free_ij = PETSC_TRUE;
5228     mat->nonew   = 0;
5229   } else if (scall == MAT_REUSE_MATRIX) {
5230     mat = (Mat_SeqAIJ *)(*A_loc)->data;
5231     ci  = mat->i;
5232     cj  = mat->j;
5233     PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &cam));
5234     for (i = 0; i < am; i++) {
5235       /* off-diagonal portion of A */
5236       ncols_o = bi[i + 1] - bi[i];
5237       for (jo = 0; jo < ncols_o; jo++) {
5238         col = cmap[*bj];
5239         if (col >= cstart) break;
5240         *cam++ = *ba++;
5241         bj++;
5242       }
5243       /* diagonal portion of A */
5244       ncols_d = ai[i + 1] - ai[i];
5245       for (j = 0; j < ncols_d; j++) *cam++ = *aa++;
5246       /* off-diagonal portion of A */
5247       for (j = jo; j < ncols_o; j++) {
5248         *cam++ = *ba++;
5249         bj++;
5250       }
5251     }
5252     PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &cam));
5253   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall);
5254   PetscCall(MatSeqAIJRestoreArrayRead(mpimat->A, &aav));
5255   PetscCall(MatSeqAIJRestoreArrayRead(mpimat->B, &bav));
5256   PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0));
5257   PetscFunctionReturn(0);
5258 }
5259 
5260 /*@
5261      MatMPIAIJGetLocalMatMerge - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix by taking all its local rows and putting them into a sequential matrix with
5262           mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and offdiagonal part
5263 
5264     Not Collective
5265 
5266    Input Parameters:
5267 +    A - the matrix
5268 -    scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5269 
5270    Output Parameters:
5271 +    glob - sequential `IS` with global indices associated with the columns of the local sequential matrix generated (can be NULL)
5272 -    A_loc - the local sequential matrix generated
5273 
5274     Level: developer
5275 
5276    Note:
5277      This is different from `MatMPIAIJGetLocalMat()` since the first columns in the returning matrix are those associated with the diagonal part, then those associated with the off diagonal part (in its local ordering)
5278 
5279 .seealso: `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`
5280 @*/
5281 PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A, MatReuse scall, IS *glob, Mat *A_loc)
5282 {
5283   Mat             Ao, Ad;
5284   const PetscInt *cmap;
5285   PetscMPIInt     size;
5286   PetscErrorCode (*f)(Mat, MatReuse, IS *, Mat *);
5287 
5288   PetscFunctionBegin;
5289   PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &cmap));
5290   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
5291   if (size == 1) {
5292     if (scall == MAT_INITIAL_MATRIX) {
5293       PetscCall(PetscObjectReference((PetscObject)Ad));
5294       *A_loc = Ad;
5295     } else if (scall == MAT_REUSE_MATRIX) {
5296       PetscCall(MatCopy(Ad, *A_loc, SAME_NONZERO_PATTERN));
5297     }
5298     if (glob) PetscCall(ISCreateStride(PetscObjectComm((PetscObject)Ad), Ad->cmap->n, Ad->cmap->rstart, 1, glob));
5299     PetscFunctionReturn(0);
5300   }
5301   PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIAIJGetLocalMatMerge_C", &f));
5302   PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0));
5303   if (f) {
5304     PetscCall((*f)(A, scall, glob, A_loc));
5305   } else {
5306     Mat_SeqAIJ        *a = (Mat_SeqAIJ *)Ad->data;
5307     Mat_SeqAIJ        *b = (Mat_SeqAIJ *)Ao->data;
5308     Mat_SeqAIJ        *c;
5309     PetscInt          *ai = a->i, *aj = a->j;
5310     PetscInt          *bi = b->i, *bj = b->j;
5311     PetscInt          *ci, *cj;
5312     const PetscScalar *aa, *ba;
5313     PetscScalar       *ca;
5314     PetscInt           i, j, am, dn, on;
5315 
5316     PetscCall(MatGetLocalSize(Ad, &am, &dn));
5317     PetscCall(MatGetLocalSize(Ao, NULL, &on));
5318     PetscCall(MatSeqAIJGetArrayRead(Ad, &aa));
5319     PetscCall(MatSeqAIJGetArrayRead(Ao, &ba));
5320     if (scall == MAT_INITIAL_MATRIX) {
5321       PetscInt k;
5322       PetscCall(PetscMalloc1(1 + am, &ci));
5323       PetscCall(PetscMalloc1(ai[am] + bi[am], &cj));
5324       PetscCall(PetscMalloc1(ai[am] + bi[am], &ca));
5325       ci[0] = 0;
5326       for (i = 0, k = 0; i < am; i++) {
5327         const PetscInt ncols_o = bi[i + 1] - bi[i];
5328         const PetscInt ncols_d = ai[i + 1] - ai[i];
5329         ci[i + 1]              = ci[i] + ncols_o + ncols_d;
5330         /* diagonal portion of A */
5331         for (j = 0; j < ncols_d; j++, k++) {
5332           cj[k] = *aj++;
5333           ca[k] = *aa++;
5334         }
5335         /* off-diagonal portion of A */
5336         for (j = 0; j < ncols_o; j++, k++) {
5337           cj[k] = dn + *bj++;
5338           ca[k] = *ba++;
5339         }
5340       }
5341       /* put together the new matrix */
5342       PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, dn + on, ci, cj, ca, A_loc));
5343       /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5344       /* Since these are PETSc arrays, change flags to free them as necessary. */
5345       c          = (Mat_SeqAIJ *)(*A_loc)->data;
5346       c->free_a  = PETSC_TRUE;
5347       c->free_ij = PETSC_TRUE;
5348       c->nonew   = 0;
5349       PetscCall(MatSetType(*A_loc, ((PetscObject)Ad)->type_name));
5350     } else if (scall == MAT_REUSE_MATRIX) {
5351       PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &ca));
5352       for (i = 0; i < am; i++) {
5353         const PetscInt ncols_d = ai[i + 1] - ai[i];
5354         const PetscInt ncols_o = bi[i + 1] - bi[i];
5355         /* diagonal portion of A */
5356         for (j = 0; j < ncols_d; j++) *ca++ = *aa++;
5357         /* off-diagonal portion of A */
5358         for (j = 0; j < ncols_o; j++) *ca++ = *ba++;
5359       }
5360       PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &ca));
5361     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall);
5362     PetscCall(MatSeqAIJRestoreArrayRead(Ad, &aa));
5363     PetscCall(MatSeqAIJRestoreArrayRead(Ao, &aa));
5364     if (glob) {
5365       PetscInt cst, *gidx;
5366 
5367       PetscCall(MatGetOwnershipRangeColumn(A, &cst, NULL));
5368       PetscCall(PetscMalloc1(dn + on, &gidx));
5369       for (i = 0; i < dn; i++) gidx[i] = cst + i;
5370       for (i = 0; i < on; i++) gidx[i + dn] = cmap[i];
5371       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)Ad), dn + on, gidx, PETSC_OWN_POINTER, glob));
5372     }
5373   }
5374   PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0));
5375   PetscFunctionReturn(0);
5376 }
5377 
5378 /*@C
5379      MatMPIAIJGetLocalMatCondensed - Creates a `MATSEQAIJ` matrix from an `MATMPIAIJ` matrix by taking all its local rows and NON-ZERO columns
5380 
5381     Not Collective
5382 
5383    Input Parameters:
5384 +    A - the matrix
5385 .    scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5386 -    row, col - index sets of rows and columns to extract (or NULL)
5387 
5388    Output Parameter:
5389 .    A_loc - the local sequential matrix generated
5390 
5391     Level: developer
5392 
5393 .seealso: `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`
5394 @*/
5395 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A, MatReuse scall, IS *row, IS *col, Mat *A_loc)
5396 {
5397   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
5398   PetscInt    i, start, end, ncols, nzA, nzB, *cmap, imark, *idx;
5399   IS          isrowa, iscola;
5400   Mat        *aloc;
5401   PetscBool   match;
5402 
5403   PetscFunctionBegin;
5404   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &match));
5405   PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input");
5406   PetscCall(PetscLogEventBegin(MAT_Getlocalmatcondensed, A, 0, 0, 0));
5407   if (!row) {
5408     start = A->rmap->rstart;
5409     end   = A->rmap->rend;
5410     PetscCall(ISCreateStride(PETSC_COMM_SELF, end - start, start, 1, &isrowa));
5411   } else {
5412     isrowa = *row;
5413   }
5414   if (!col) {
5415     start = A->cmap->rstart;
5416     cmap  = a->garray;
5417     nzA   = a->A->cmap->n;
5418     nzB   = a->B->cmap->n;
5419     PetscCall(PetscMalloc1(nzA + nzB, &idx));
5420     ncols = 0;
5421     for (i = 0; i < nzB; i++) {
5422       if (cmap[i] < start) idx[ncols++] = cmap[i];
5423       else break;
5424     }
5425     imark = i;
5426     for (i = 0; i < nzA; i++) idx[ncols++] = start + i;
5427     for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i];
5428     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &iscola));
5429   } else {
5430     iscola = *col;
5431   }
5432   if (scall != MAT_INITIAL_MATRIX) {
5433     PetscCall(PetscMalloc1(1, &aloc));
5434     aloc[0] = *A_loc;
5435   }
5436   PetscCall(MatCreateSubMatrices(A, 1, &isrowa, &iscola, scall, &aloc));
5437   if (!col) { /* attach global id of condensed columns */
5438     PetscCall(PetscObjectCompose((PetscObject)aloc[0], "_petsc_GetLocalMatCondensed_iscol", (PetscObject)iscola));
5439   }
5440   *A_loc = aloc[0];
5441   PetscCall(PetscFree(aloc));
5442   if (!row) PetscCall(ISDestroy(&isrowa));
5443   if (!col) PetscCall(ISDestroy(&iscola));
5444   PetscCall(PetscLogEventEnd(MAT_Getlocalmatcondensed, A, 0, 0, 0));
5445   PetscFunctionReturn(0);
5446 }
5447 
5448 /*
5449  * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched.
5450  * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based
5451  * on a global size.
5452  * */
5453 PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P, IS rows, Mat *P_oth)
5454 {
5455   Mat_MPIAIJ            *p  = (Mat_MPIAIJ *)P->data;
5456   Mat_SeqAIJ            *pd = (Mat_SeqAIJ *)(p->A)->data, *po = (Mat_SeqAIJ *)(p->B)->data, *p_oth;
5457   PetscInt               plocalsize, nrows, *ilocal, *oilocal, i, lidx, *nrcols, *nlcols, ncol;
5458   PetscMPIInt            owner;
5459   PetscSFNode           *iremote, *oiremote;
5460   const PetscInt        *lrowindices;
5461   PetscSF                sf, osf;
5462   PetscInt               pcstart, *roffsets, *loffsets, *pnnz, j;
5463   PetscInt               ontotalcols, dntotalcols, ntotalcols, nout;
5464   MPI_Comm               comm;
5465   ISLocalToGlobalMapping mapping;
5466   const PetscScalar     *pd_a, *po_a;
5467 
5468   PetscFunctionBegin;
5469   PetscCall(PetscObjectGetComm((PetscObject)P, &comm));
5470   /* plocalsize is the number of roots
5471    * nrows is the number of leaves
5472    * */
5473   PetscCall(MatGetLocalSize(P, &plocalsize, NULL));
5474   PetscCall(ISGetLocalSize(rows, &nrows));
5475   PetscCall(PetscCalloc1(nrows, &iremote));
5476   PetscCall(ISGetIndices(rows, &lrowindices));
5477   for (i = 0; i < nrows; i++) {
5478     /* Find a remote index and an owner for a row
5479      * The row could be local or remote
5480      * */
5481     owner = 0;
5482     lidx  = 0;
5483     PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, &lidx));
5484     iremote[i].index = lidx;
5485     iremote[i].rank  = owner;
5486   }
5487   /* Create SF to communicate how many nonzero columns for each row */
5488   PetscCall(PetscSFCreate(comm, &sf));
5489   /* SF will figure out the number of nonzero colunms for each row, and their
5490    * offsets
5491    * */
5492   PetscCall(PetscSFSetGraph(sf, plocalsize, nrows, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
5493   PetscCall(PetscSFSetFromOptions(sf));
5494   PetscCall(PetscSFSetUp(sf));
5495 
5496   PetscCall(PetscCalloc1(2 * (plocalsize + 1), &roffsets));
5497   PetscCall(PetscCalloc1(2 * plocalsize, &nrcols));
5498   PetscCall(PetscCalloc1(nrows, &pnnz));
5499   roffsets[0] = 0;
5500   roffsets[1] = 0;
5501   for (i = 0; i < plocalsize; i++) {
5502     /* diag */
5503     nrcols[i * 2 + 0] = pd->i[i + 1] - pd->i[i];
5504     /* off diag */
5505     nrcols[i * 2 + 1] = po->i[i + 1] - po->i[i];
5506     /* compute offsets so that we relative location for each row */
5507     roffsets[(i + 1) * 2 + 0] = roffsets[i * 2 + 0] + nrcols[i * 2 + 0];
5508     roffsets[(i + 1) * 2 + 1] = roffsets[i * 2 + 1] + nrcols[i * 2 + 1];
5509   }
5510   PetscCall(PetscCalloc1(2 * nrows, &nlcols));
5511   PetscCall(PetscCalloc1(2 * nrows, &loffsets));
5512   /* 'r' means root, and 'l' means leaf */
5513   PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE));
5514   PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE));
5515   PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE));
5516   PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE));
5517   PetscCall(PetscSFDestroy(&sf));
5518   PetscCall(PetscFree(roffsets));
5519   PetscCall(PetscFree(nrcols));
5520   dntotalcols = 0;
5521   ontotalcols = 0;
5522   ncol        = 0;
5523   for (i = 0; i < nrows; i++) {
5524     pnnz[i] = nlcols[i * 2 + 0] + nlcols[i * 2 + 1];
5525     ncol    = PetscMax(pnnz[i], ncol);
5526     /* diag */
5527     dntotalcols += nlcols[i * 2 + 0];
5528     /* off diag */
5529     ontotalcols += nlcols[i * 2 + 1];
5530   }
5531   /* We do not need to figure the right number of columns
5532    * since all the calculations will be done by going through the raw data
5533    * */
5534   PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, nrows, ncol, 0, pnnz, P_oth));
5535   PetscCall(MatSetUp(*P_oth));
5536   PetscCall(PetscFree(pnnz));
5537   p_oth = (Mat_SeqAIJ *)(*P_oth)->data;
5538   /* diag */
5539   PetscCall(PetscCalloc1(dntotalcols, &iremote));
5540   /* off diag */
5541   PetscCall(PetscCalloc1(ontotalcols, &oiremote));
5542   /* diag */
5543   PetscCall(PetscCalloc1(dntotalcols, &ilocal));
5544   /* off diag */
5545   PetscCall(PetscCalloc1(ontotalcols, &oilocal));
5546   dntotalcols = 0;
5547   ontotalcols = 0;
5548   ntotalcols  = 0;
5549   for (i = 0; i < nrows; i++) {
5550     owner = 0;
5551     PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, NULL));
5552     /* Set iremote for diag matrix */
5553     for (j = 0; j < nlcols[i * 2 + 0]; j++) {
5554       iremote[dntotalcols].index = loffsets[i * 2 + 0] + j;
5555       iremote[dntotalcols].rank  = owner;
5556       /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */
5557       ilocal[dntotalcols++] = ntotalcols++;
5558     }
5559     /* off diag */
5560     for (j = 0; j < nlcols[i * 2 + 1]; j++) {
5561       oiremote[ontotalcols].index = loffsets[i * 2 + 1] + j;
5562       oiremote[ontotalcols].rank  = owner;
5563       oilocal[ontotalcols++]      = ntotalcols++;
5564     }
5565   }
5566   PetscCall(ISRestoreIndices(rows, &lrowindices));
5567   PetscCall(PetscFree(loffsets));
5568   PetscCall(PetscFree(nlcols));
5569   PetscCall(PetscSFCreate(comm, &sf));
5570   /* P serves as roots and P_oth is leaves
5571    * Diag matrix
5572    * */
5573   PetscCall(PetscSFSetGraph(sf, pd->i[plocalsize], dntotalcols, ilocal, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
5574   PetscCall(PetscSFSetFromOptions(sf));
5575   PetscCall(PetscSFSetUp(sf));
5576 
5577   PetscCall(PetscSFCreate(comm, &osf));
5578   /* Off diag */
5579   PetscCall(PetscSFSetGraph(osf, po->i[plocalsize], ontotalcols, oilocal, PETSC_OWN_POINTER, oiremote, PETSC_OWN_POINTER));
5580   PetscCall(PetscSFSetFromOptions(osf));
5581   PetscCall(PetscSFSetUp(osf));
5582   PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a));
5583   PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a));
5584   /* We operate on the matrix internal data for saving memory */
5585   PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5586   PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5587   PetscCall(MatGetOwnershipRangeColumn(P, &pcstart, NULL));
5588   /* Convert to global indices for diag matrix */
5589   for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] += pcstart;
5590   PetscCall(PetscSFBcastBegin(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE));
5591   /* We want P_oth store global indices */
5592   PetscCall(ISLocalToGlobalMappingCreate(comm, 1, p->B->cmap->n, p->garray, PETSC_COPY_VALUES, &mapping));
5593   /* Use memory scalable approach */
5594   PetscCall(ISLocalToGlobalMappingSetType(mapping, ISLOCALTOGLOBALMAPPINGHASH));
5595   PetscCall(ISLocalToGlobalMappingApply(mapping, po->i[plocalsize], po->j, po->j));
5596   PetscCall(PetscSFBcastBegin(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE));
5597   PetscCall(PetscSFBcastEnd(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE));
5598   /* Convert back to local indices */
5599   for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] -= pcstart;
5600   PetscCall(PetscSFBcastEnd(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE));
5601   nout = 0;
5602   PetscCall(ISGlobalToLocalMappingApply(mapping, IS_GTOLM_DROP, po->i[plocalsize], po->j, &nout, po->j));
5603   PetscCheck(nout == po->i[plocalsize], comm, PETSC_ERR_ARG_INCOMP, "n %" PetscInt_FMT " does not equal to nout %" PetscInt_FMT " ", po->i[plocalsize], nout);
5604   PetscCall(ISLocalToGlobalMappingDestroy(&mapping));
5605   /* Exchange values */
5606   PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5607   PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5608   PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a));
5609   PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a));
5610   /* Stop PETSc from shrinking memory */
5611   for (i = 0; i < nrows; i++) p_oth->ilen[i] = p_oth->imax[i];
5612   PetscCall(MatAssemblyBegin(*P_oth, MAT_FINAL_ASSEMBLY));
5613   PetscCall(MatAssemblyEnd(*P_oth, MAT_FINAL_ASSEMBLY));
5614   /* Attach PetscSF objects to P_oth so that we can reuse it later */
5615   PetscCall(PetscObjectCompose((PetscObject)*P_oth, "diagsf", (PetscObject)sf));
5616   PetscCall(PetscObjectCompose((PetscObject)*P_oth, "offdiagsf", (PetscObject)osf));
5617   PetscCall(PetscSFDestroy(&sf));
5618   PetscCall(PetscSFDestroy(&osf));
5619   PetscFunctionReturn(0);
5620 }
5621 
5622 /*
5623  * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5624  * This supports MPIAIJ and MAIJ
5625  * */
5626 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A, Mat P, PetscInt dof, MatReuse reuse, Mat *P_oth)
5627 {
5628   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data, *p = (Mat_MPIAIJ *)P->data;
5629   Mat_SeqAIJ *p_oth;
5630   IS          rows, map;
5631   PetscHMapI  hamp;
5632   PetscInt    i, htsize, *rowindices, off, *mapping, key, count;
5633   MPI_Comm    comm;
5634   PetscSF     sf, osf;
5635   PetscBool   has;
5636 
5637   PetscFunctionBegin;
5638   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
5639   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, P, 0, 0));
5640   /* If it is the first time, create an index set of off-diag nonzero columns of A,
5641    *  and then create a submatrix (that often is an overlapping matrix)
5642    * */
5643   if (reuse == MAT_INITIAL_MATRIX) {
5644     /* Use a hash table to figure out unique keys */
5645     PetscCall(PetscHMapICreate(&hamp));
5646     PetscCall(PetscHMapIResize(hamp, a->B->cmap->n));
5647     PetscCall(PetscCalloc1(a->B->cmap->n, &mapping));
5648     count = 0;
5649     /* Assume that  a->g is sorted, otherwise the following does not make sense */
5650     for (i = 0; i < a->B->cmap->n; i++) {
5651       key = a->garray[i] / dof;
5652       PetscCall(PetscHMapIHas(hamp, key, &has));
5653       if (!has) {
5654         mapping[i] = count;
5655         PetscCall(PetscHMapISet(hamp, key, count++));
5656       } else {
5657         /* Current 'i' has the same value the previous step */
5658         mapping[i] = count - 1;
5659       }
5660     }
5661     PetscCall(ISCreateGeneral(comm, a->B->cmap->n, mapping, PETSC_OWN_POINTER, &map));
5662     PetscCall(PetscHMapIGetSize(hamp, &htsize));
5663     PetscCheck(htsize == count, comm, PETSC_ERR_ARG_INCOMP, " Size of hash map %" PetscInt_FMT " is inconsistent with count %" PetscInt_FMT " ", htsize, count);
5664     PetscCall(PetscCalloc1(htsize, &rowindices));
5665     off = 0;
5666     PetscCall(PetscHMapIGetKeys(hamp, &off, rowindices));
5667     PetscCall(PetscHMapIDestroy(&hamp));
5668     PetscCall(PetscSortInt(htsize, rowindices));
5669     PetscCall(ISCreateGeneral(comm, htsize, rowindices, PETSC_OWN_POINTER, &rows));
5670     /* In case, the matrix was already created but users want to recreate the matrix */
5671     PetscCall(MatDestroy(P_oth));
5672     PetscCall(MatCreateSeqSubMatrixWithRows_Private(P, rows, P_oth));
5673     PetscCall(PetscObjectCompose((PetscObject)*P_oth, "aoffdiagtopothmapping", (PetscObject)map));
5674     PetscCall(ISDestroy(&map));
5675     PetscCall(ISDestroy(&rows));
5676   } else if (reuse == MAT_REUSE_MATRIX) {
5677     /* If matrix was already created, we simply update values using SF objects
5678      * that as attached to the matrix ealier.
5679      */
5680     const PetscScalar *pd_a, *po_a;
5681 
5682     PetscCall(PetscObjectQuery((PetscObject)*P_oth, "diagsf", (PetscObject *)&sf));
5683     PetscCall(PetscObjectQuery((PetscObject)*P_oth, "offdiagsf", (PetscObject *)&osf));
5684     PetscCheck(sf && osf, comm, PETSC_ERR_ARG_NULL, "Matrix is not initialized yet");
5685     p_oth = (Mat_SeqAIJ *)(*P_oth)->data;
5686     /* Update values in place */
5687     PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a));
5688     PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a));
5689     PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5690     PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5691     PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5692     PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5693     PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a));
5694     PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a));
5695   } else SETERRQ(comm, PETSC_ERR_ARG_UNKNOWN_TYPE, "Unknown reuse type");
5696   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, P, 0, 0));
5697   PetscFunctionReturn(0);
5698 }
5699 
5700 /*@C
5701   MatGetBrowsOfAcols - Returns `IS` that contain rows of B that equal to nonzero columns of local A
5702 
5703   Collective on A
5704 
5705   Input Parameters:
5706 + A - the first matrix in `MATMPIAIJ` format
5707 . B - the second matrix in `MATMPIAIJ` format
5708 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5709 
5710   Output Parameters:
5711 + rowb - On input index sets of rows of B to extract (or NULL), modified on output
5712 . colb - On input index sets of columns of B to extract (or NULL), modified on output
5713 - B_seq - the sequential matrix generated
5714 
5715   Level: developer
5716 
5717 @*/
5718 PetscErrorCode MatGetBrowsOfAcols(Mat A, Mat B, MatReuse scall, IS *rowb, IS *colb, Mat *B_seq)
5719 {
5720   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
5721   PetscInt   *idx, i, start, ncols, nzA, nzB, *cmap, imark;
5722   IS          isrowb, iscolb;
5723   Mat        *bseq = NULL;
5724 
5725   PetscFunctionBegin;
5726   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5727     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend);
5728   }
5729   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAcols, A, B, 0, 0));
5730 
5731   if (scall == MAT_INITIAL_MATRIX) {
5732     start = A->cmap->rstart;
5733     cmap  = a->garray;
5734     nzA   = a->A->cmap->n;
5735     nzB   = a->B->cmap->n;
5736     PetscCall(PetscMalloc1(nzA + nzB, &idx));
5737     ncols = 0;
5738     for (i = 0; i < nzB; i++) { /* row < local row index */
5739       if (cmap[i] < start) idx[ncols++] = cmap[i];
5740       else break;
5741     }
5742     imark = i;
5743     for (i = 0; i < nzA; i++) idx[ncols++] = start + i;   /* local rows */
5744     for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5745     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &isrowb));
5746     PetscCall(ISCreateStride(PETSC_COMM_SELF, B->cmap->N, 0, 1, &iscolb));
5747   } else {
5748     PetscCheck(rowb && colb, PETSC_COMM_SELF, PETSC_ERR_SUP, "IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5749     isrowb = *rowb;
5750     iscolb = *colb;
5751     PetscCall(PetscMalloc1(1, &bseq));
5752     bseq[0] = *B_seq;
5753   }
5754   PetscCall(MatCreateSubMatrices(B, 1, &isrowb, &iscolb, scall, &bseq));
5755   *B_seq = bseq[0];
5756   PetscCall(PetscFree(bseq));
5757   if (!rowb) {
5758     PetscCall(ISDestroy(&isrowb));
5759   } else {
5760     *rowb = isrowb;
5761   }
5762   if (!colb) {
5763     PetscCall(ISDestroy(&iscolb));
5764   } else {
5765     *colb = iscolb;
5766   }
5767   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAcols, A, B, 0, 0));
5768   PetscFunctionReturn(0);
5769 }
5770 
5771 /*
5772     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5773     of the OFF-DIAGONAL portion of local A
5774 
5775     Collective on Mat
5776 
5777    Input Parameters:
5778 +    A,B - the matrices in mpiaij format
5779 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5780 
5781    Output Parameter:
5782 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5783 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5784 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5785 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5786 
5787     Developer Note:
5788     This directly accesses information inside the VecScatter associated with the matrix-vector product
5789      for this matrix. This is not desirable..
5790 
5791     Level: developer
5792 
5793 */
5794 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A, Mat B, MatReuse scall, PetscInt **startsj_s, PetscInt **startsj_r, MatScalar **bufa_ptr, Mat *B_oth)
5795 {
5796   Mat_MPIAIJ        *a = (Mat_MPIAIJ *)A->data;
5797   Mat_SeqAIJ        *b_oth;
5798   VecScatter         ctx;
5799   MPI_Comm           comm;
5800   const PetscMPIInt *rprocs, *sprocs;
5801   const PetscInt    *srow, *rstarts, *sstarts;
5802   PetscInt          *rowlen, *bufj, *bufJ, ncols = 0, aBn = a->B->cmap->n, row, *b_othi, *b_othj, *rvalues = NULL, *svalues = NULL, *cols, sbs, rbs;
5803   PetscInt           i, j, k = 0, l, ll, nrecvs, nsends, nrows, *rstartsj = NULL, *sstartsj, len;
5804   PetscScalar       *b_otha, *bufa, *bufA, *vals = NULL;
5805   MPI_Request       *reqs = NULL, *rwaits = NULL, *swaits = NULL;
5806   PetscMPIInt        size, tag, rank, nreqs;
5807 
5808   PetscFunctionBegin;
5809   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
5810   PetscCallMPI(MPI_Comm_size(comm, &size));
5811 
5812   if (PetscUnlikely(A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend)) {
5813     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend);
5814   }
5815   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, B, 0, 0));
5816   PetscCallMPI(MPI_Comm_rank(comm, &rank));
5817 
5818   if (size == 1) {
5819     startsj_s = NULL;
5820     bufa_ptr  = NULL;
5821     *B_oth    = NULL;
5822     PetscFunctionReturn(0);
5823   }
5824 
5825   ctx = a->Mvctx;
5826   tag = ((PetscObject)ctx)->tag;
5827 
5828   PetscCall(VecScatterGetRemote_Private(ctx, PETSC_TRUE /*send*/, &nsends, &sstarts, &srow, &sprocs, &sbs));
5829   /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */
5830   PetscCall(VecScatterGetRemoteOrdered_Private(ctx, PETSC_FALSE /*recv*/, &nrecvs, &rstarts, NULL /*indices not needed*/, &rprocs, &rbs));
5831   PetscCall(PetscMPIIntCast(nsends + nrecvs, &nreqs));
5832   PetscCall(PetscMalloc1(nreqs, &reqs));
5833   rwaits = reqs;
5834   swaits = reqs + nrecvs;
5835 
5836   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5837   if (scall == MAT_INITIAL_MATRIX) {
5838     /* i-array */
5839     /*---------*/
5840     /*  post receives */
5841     if (nrecvs) PetscCall(PetscMalloc1(rbs * (rstarts[nrecvs] - rstarts[0]), &rvalues)); /* rstarts can be NULL when nrecvs=0 */
5842     for (i = 0; i < nrecvs; i++) {
5843       rowlen = rvalues + rstarts[i] * rbs;
5844       nrows  = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of indices to be received */
5845       PetscCallMPI(MPI_Irecv(rowlen, nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i));
5846     }
5847 
5848     /* pack the outgoing message */
5849     PetscCall(PetscMalloc2(nsends + 1, &sstartsj, nrecvs + 1, &rstartsj));
5850 
5851     sstartsj[0] = 0;
5852     rstartsj[0] = 0;
5853     len         = 0; /* total length of j or a array to be sent */
5854     if (nsends) {
5855       k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */
5856       PetscCall(PetscMalloc1(sbs * (sstarts[nsends] - sstarts[0]), &svalues));
5857     }
5858     for (i = 0; i < nsends; i++) {
5859       rowlen = svalues + (sstarts[i] - sstarts[0]) * sbs;
5860       nrows  = sstarts[i + 1] - sstarts[i]; /* num of block rows */
5861       for (j = 0; j < nrows; j++) {
5862         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5863         for (l = 0; l < sbs; l++) {
5864           PetscCall(MatGetRow_MPIAIJ(B, row + l, &ncols, NULL, NULL)); /* rowlength */
5865 
5866           rowlen[j * sbs + l] = ncols;
5867 
5868           len += ncols;
5869           PetscCall(MatRestoreRow_MPIAIJ(B, row + l, &ncols, NULL, NULL));
5870         }
5871         k++;
5872       }
5873       PetscCallMPI(MPI_Isend(rowlen, nrows * sbs, MPIU_INT, sprocs[i], tag, comm, swaits + i));
5874 
5875       sstartsj[i + 1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5876     }
5877     /* recvs and sends of i-array are completed */
5878     if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE));
5879     PetscCall(PetscFree(svalues));
5880 
5881     /* allocate buffers for sending j and a arrays */
5882     PetscCall(PetscMalloc1(len + 1, &bufj));
5883     PetscCall(PetscMalloc1(len + 1, &bufa));
5884 
5885     /* create i-array of B_oth */
5886     PetscCall(PetscMalloc1(aBn + 2, &b_othi));
5887 
5888     b_othi[0] = 0;
5889     len       = 0; /* total length of j or a array to be received */
5890     k         = 0;
5891     for (i = 0; i < nrecvs; i++) {
5892       rowlen = rvalues + (rstarts[i] - rstarts[0]) * rbs;
5893       nrows  = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of rows to be received */
5894       for (j = 0; j < nrows; j++) {
5895         b_othi[k + 1] = b_othi[k] + rowlen[j];
5896         PetscCall(PetscIntSumError(rowlen[j], len, &len));
5897         k++;
5898       }
5899       rstartsj[i + 1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5900     }
5901     PetscCall(PetscFree(rvalues));
5902 
5903     /* allocate space for j and a arrays of B_oth */
5904     PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_othj));
5905     PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_otha));
5906 
5907     /* j-array */
5908     /*---------*/
5909     /*  post receives of j-array */
5910     for (i = 0; i < nrecvs; i++) {
5911       nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */
5912       PetscCallMPI(MPI_Irecv(b_othj + rstartsj[i], nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i));
5913     }
5914 
5915     /* pack the outgoing message j-array */
5916     if (nsends) k = sstarts[0];
5917     for (i = 0; i < nsends; i++) {
5918       nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */
5919       bufJ  = bufj + sstartsj[i];
5920       for (j = 0; j < nrows; j++) {
5921         row = srow[k++] + B->rmap->range[rank]; /* global row idx */
5922         for (ll = 0; ll < sbs; ll++) {
5923           PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL));
5924           for (l = 0; l < ncols; l++) *bufJ++ = cols[l];
5925           PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL));
5926         }
5927       }
5928       PetscCallMPI(MPI_Isend(bufj + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_INT, sprocs[i], tag, comm, swaits + i));
5929     }
5930 
5931     /* recvs and sends of j-array are completed */
5932     if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE));
5933   } else if (scall == MAT_REUSE_MATRIX) {
5934     sstartsj = *startsj_s;
5935     rstartsj = *startsj_r;
5936     bufa     = *bufa_ptr;
5937     b_oth    = (Mat_SeqAIJ *)(*B_oth)->data;
5938     PetscCall(MatSeqAIJGetArrayWrite(*B_oth, &b_otha));
5939   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not possess an object container");
5940 
5941   /* a-array */
5942   /*---------*/
5943   /*  post receives of a-array */
5944   for (i = 0; i < nrecvs; i++) {
5945     nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */
5946     PetscCallMPI(MPI_Irecv(b_otha + rstartsj[i], nrows, MPIU_SCALAR, rprocs[i], tag, comm, rwaits + i));
5947   }
5948 
5949   /* pack the outgoing message a-array */
5950   if (nsends) k = sstarts[0];
5951   for (i = 0; i < nsends; i++) {
5952     nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */
5953     bufA  = bufa + sstartsj[i];
5954     for (j = 0; j < nrows; j++) {
5955       row = srow[k++] + B->rmap->range[rank]; /* global row idx */
5956       for (ll = 0; ll < sbs; ll++) {
5957         PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals));
5958         for (l = 0; l < ncols; l++) *bufA++ = vals[l];
5959         PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals));
5960       }
5961     }
5962     PetscCallMPI(MPI_Isend(bufa + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_SCALAR, sprocs[i], tag, comm, swaits + i));
5963   }
5964   /* recvs and sends of a-array are completed */
5965   if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE));
5966   PetscCall(PetscFree(reqs));
5967 
5968   if (scall == MAT_INITIAL_MATRIX) {
5969     /* put together the new matrix */
5970     PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, aBn, B->cmap->N, b_othi, b_othj, b_otha, B_oth));
5971 
5972     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5973     /* Since these are PETSc arrays, change flags to free them as necessary. */
5974     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
5975     b_oth->free_a  = PETSC_TRUE;
5976     b_oth->free_ij = PETSC_TRUE;
5977     b_oth->nonew   = 0;
5978 
5979     PetscCall(PetscFree(bufj));
5980     if (!startsj_s || !bufa_ptr) {
5981       PetscCall(PetscFree2(sstartsj, rstartsj));
5982       PetscCall(PetscFree(bufa_ptr));
5983     } else {
5984       *startsj_s = sstartsj;
5985       *startsj_r = rstartsj;
5986       *bufa_ptr  = bufa;
5987     }
5988   } else if (scall == MAT_REUSE_MATRIX) {
5989     PetscCall(MatSeqAIJRestoreArrayWrite(*B_oth, &b_otha));
5990   }
5991 
5992   PetscCall(VecScatterRestoreRemote_Private(ctx, PETSC_TRUE, &nsends, &sstarts, &srow, &sprocs, &sbs));
5993   PetscCall(VecScatterRestoreRemoteOrdered_Private(ctx, PETSC_FALSE, &nrecvs, &rstarts, NULL, &rprocs, &rbs));
5994   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, B, 0, 0));
5995   PetscFunctionReturn(0);
5996 }
5997 
5998 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat, MatType, MatReuse, Mat *);
5999 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat, MatType, MatReuse, Mat *);
6000 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat, MatType, MatReuse, Mat *);
6001 #if defined(PETSC_HAVE_MKL_SPARSE)
6002 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat, MatType, MatReuse, Mat *);
6003 #endif
6004 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat, MatType, MatReuse, Mat *);
6005 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat, MatType, MatReuse, Mat *);
6006 #if defined(PETSC_HAVE_ELEMENTAL)
6007 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat, MatType, MatReuse, Mat *);
6008 #endif
6009 #if defined(PETSC_HAVE_SCALAPACK)
6010 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat, MatType, MatReuse, Mat *);
6011 #endif
6012 #if defined(PETSC_HAVE_HYPRE)
6013 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *);
6014 #endif
6015 #if defined(PETSC_HAVE_CUDA)
6016 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat, MatType, MatReuse, Mat *);
6017 #endif
6018 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
6019 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat, MatType, MatReuse, Mat *);
6020 #endif
6021 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat, MatType, MatReuse, Mat *);
6022 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *);
6023 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat);
6024 
6025 /*
6026     Computes (B'*A')' since computing B*A directly is untenable
6027 
6028                n                       p                          p
6029         [             ]       [             ]         [                 ]
6030       m [      A      ]  *  n [       B     ]   =   m [         C       ]
6031         [             ]       [             ]         [                 ]
6032 
6033 */
6034 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A, Mat B, Mat C)
6035 {
6036   Mat At, Bt, Ct;
6037 
6038   PetscFunctionBegin;
6039   PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &At));
6040   PetscCall(MatTranspose(B, MAT_INITIAL_MATRIX, &Bt));
6041   PetscCall(MatMatMult(Bt, At, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &Ct));
6042   PetscCall(MatDestroy(&At));
6043   PetscCall(MatDestroy(&Bt));
6044   PetscCall(MatTransposeSetPrecursor(Ct, C));
6045   PetscCall(MatTranspose(Ct, MAT_REUSE_MATRIX, &C));
6046   PetscCall(MatDestroy(&Ct));
6047   PetscFunctionReturn(0);
6048 }
6049 
6050 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A, Mat B, PetscReal fill, Mat C)
6051 {
6052   PetscBool cisdense;
6053 
6054   PetscFunctionBegin;
6055   PetscCheck(A->cmap->n == B->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "A->cmap->n %" PetscInt_FMT " != B->rmap->n %" PetscInt_FMT, A->cmap->n, B->rmap->n);
6056   PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, A->rmap->N, B->cmap->N));
6057   PetscCall(MatSetBlockSizesFromMats(C, A, B));
6058   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, ""));
6059   if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
6060   PetscCall(MatSetUp(C));
6061 
6062   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
6063   PetscFunctionReturn(0);
6064 }
6065 
6066 /* ----------------------------------------------------------------*/
6067 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C)
6068 {
6069   Mat_Product *product = C->product;
6070   Mat          A = product->A, B = product->B;
6071 
6072   PetscFunctionBegin;
6073   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend)
6074     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend);
6075 
6076   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ;
6077   C->ops->productsymbolic = MatProductSymbolic_AB;
6078   PetscFunctionReturn(0);
6079 }
6080 
6081 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C)
6082 {
6083   Mat_Product *product = C->product;
6084 
6085   PetscFunctionBegin;
6086   if (product->type == MATPRODUCT_AB) PetscCall(MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C));
6087   PetscFunctionReturn(0);
6088 }
6089 
6090 /* Merge two sets of sorted nonzeros and return a CSR for the merged (sequential) matrix
6091 
6092   Input Parameters:
6093 
6094     j1,rowBegin1,rowEnd1,perm1,jmap1: describe the first set of nonzeros (Set1)
6095     j2,rowBegin2,rowEnd2,perm2,jmap2: describe the second set of nonzeros (Set2)
6096 
6097     mat: both sets' nonzeros are on m rows, where m is the number of local rows of the matrix mat
6098 
6099     For Set1, j1[] contains column indices of the nonzeros.
6100     For the k-th row (0<=k<m), [rowBegin1[k],rowEnd1[k]) index into j1[] and point to the begin/end nonzero in row k
6101     respectively (note rowEnd1[k] is not necessarily equal to rwoBegin1[k+1]). Indices in this range of j1[] are sorted,
6102     but might have repeats. jmap1[t+1] - jmap1[t] is the number of repeats for the t-th unique nonzero in Set1.
6103 
6104     Similar for Set2.
6105 
6106     This routine merges the two sets of nonzeros row by row and removes repeats.
6107 
6108   Output Parameters: (memory is allocated by the caller)
6109 
6110     i[],j[]: the CSR of the merged matrix, which has m rows.
6111     imap1[]: the k-th unique nonzero in Set1 (k=0,1,...) corresponds to imap1[k]-th unique nonzero in the merged matrix.
6112     imap2[]: similar to imap1[], but for Set2.
6113     Note we order nonzeros row-by-row and from left to right.
6114 */
6115 static PetscErrorCode MatMergeEntries_Internal(Mat mat, const PetscInt j1[], const PetscInt j2[], const PetscCount rowBegin1[], const PetscCount rowEnd1[], const PetscCount rowBegin2[], const PetscCount rowEnd2[], const PetscCount jmap1[], const PetscCount jmap2[], PetscCount imap1[], PetscCount imap2[], PetscInt i[], PetscInt j[])
6116 {
6117   PetscInt   r, m; /* Row index of mat */
6118   PetscCount t, t1, t2, b1, e1, b2, e2;
6119 
6120   PetscFunctionBegin;
6121   PetscCall(MatGetLocalSize(mat, &m, NULL));
6122   t1 = t2 = t = 0; /* Count unique nonzeros of in Set1, Set1 and the merged respectively */
6123   i[0]        = 0;
6124   for (r = 0; r < m; r++) { /* Do row by row merging */
6125     b1 = rowBegin1[r];
6126     e1 = rowEnd1[r];
6127     b2 = rowBegin2[r];
6128     e2 = rowEnd2[r];
6129     while (b1 < e1 && b2 < e2) {
6130       if (j1[b1] == j2[b2]) { /* Same column index and hence same nonzero */
6131         j[t]      = j1[b1];
6132         imap1[t1] = t;
6133         imap2[t2] = t;
6134         b1 += jmap1[t1 + 1] - jmap1[t1]; /* Jump to next unique local nonzero */
6135         b2 += jmap2[t2 + 1] - jmap2[t2]; /* Jump to next unique remote nonzero */
6136         t1++;
6137         t2++;
6138         t++;
6139       } else if (j1[b1] < j2[b2]) {
6140         j[t]      = j1[b1];
6141         imap1[t1] = t;
6142         b1 += jmap1[t1 + 1] - jmap1[t1];
6143         t1++;
6144         t++;
6145       } else {
6146         j[t]      = j2[b2];
6147         imap2[t2] = t;
6148         b2 += jmap2[t2 + 1] - jmap2[t2];
6149         t2++;
6150         t++;
6151       }
6152     }
6153     /* Merge the remaining in either j1[] or j2[] */
6154     while (b1 < e1) {
6155       j[t]      = j1[b1];
6156       imap1[t1] = t;
6157       b1 += jmap1[t1 + 1] - jmap1[t1];
6158       t1++;
6159       t++;
6160     }
6161     while (b2 < e2) {
6162       j[t]      = j2[b2];
6163       imap2[t2] = t;
6164       b2 += jmap2[t2 + 1] - jmap2[t2];
6165       t2++;
6166       t++;
6167     }
6168     i[r + 1] = t;
6169   }
6170   PetscFunctionReturn(0);
6171 }
6172 
6173 /* Split nonzeros in a block of local rows into two subsets: those in the diagonal block and those in the off-diagonal block
6174 
6175   Input Parameters:
6176     mat: an MPI matrix that provides row and column layout information for splitting. Let's say its number of local rows is m.
6177     n,i[],j[],perm[]: there are n input entries, belonging to m rows. Row/col indices of the entries are stored in i[] and j[]
6178       respectively, along with a permutation array perm[]. Length of the i[],j[],perm[] arrays is n.
6179 
6180       i[] is already sorted, but within a row, j[] is not sorted and might have repeats.
6181       i[] might contain negative indices at the beginning, which means the corresponding entries should be ignored in the splitting.
6182 
6183   Output Parameters:
6184     j[],perm[]: the routine needs to sort j[] within each row along with perm[].
6185     rowBegin[],rowMid[],rowEnd[]: of length m, and the memory is preallocated and zeroed by the caller.
6186       They contain indices pointing to j[]. For 0<=r<m, [rowBegin[r],rowMid[r]) point to begin/end entries of row r of the diagonal block,
6187       and [rowMid[r],rowEnd[r]) point to begin/end entries of row r of the off-diagonal block.
6188 
6189     Aperm[],Ajmap[],Atot,Annz: Arrays are allocated by this routine.
6190       Atot: number of entries belonging to the diagonal block.
6191       Annz: number of unique nonzeros belonging to the diagonal block.
6192       Aperm[Atot] stores values from perm[] for entries belonging to the diagonal block. Length of Aperm[] is Atot, though it may also count
6193         repeats (i.e., same 'i,j' pair).
6194       Ajmap[Annz+1] stores the number of repeats of each unique entry belonging to the diagonal block. More precisely, Ajmap[t+1] - Ajmap[t]
6195         is the number of repeats for the t-th unique entry in the diagonal block. Ajmap[0] is always 0.
6196 
6197       Atot: number of entries belonging to the diagonal block
6198       Annz: number of unique nonzeros belonging to the diagonal block.
6199 
6200     Bperm[], Bjmap[], Btot, Bnnz are similar but for the off-diagonal block.
6201 
6202     Aperm[],Bperm[],Ajmap[] and Bjmap[] are allocated separately by this routine with PetscMalloc1().
6203 */
6204 static PetscErrorCode MatSplitEntries_Internal(Mat mat, PetscCount n, const PetscInt i[], PetscInt j[], PetscCount perm[], PetscCount rowBegin[], PetscCount rowMid[], PetscCount rowEnd[], PetscCount *Atot_, PetscCount **Aperm_, PetscCount *Annz_, PetscCount **Ajmap_, PetscCount *Btot_, PetscCount **Bperm_, PetscCount *Bnnz_, PetscCount **Bjmap_)
6205 {
6206   PetscInt    cstart, cend, rstart, rend, row, col;
6207   PetscCount  Atot = 0, Btot = 0; /* Total number of nonzeros in the diagonal and off-diagonal blocks */
6208   PetscCount  Annz = 0, Bnnz = 0; /* Number of unique nonzeros in the diagonal and off-diagonal blocks */
6209   PetscCount  k, m, p, q, r, s, mid;
6210   PetscCount *Aperm, *Bperm, *Ajmap, *Bjmap;
6211 
6212   PetscFunctionBegin;
6213   PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend));
6214   PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend));
6215   m = rend - rstart;
6216 
6217   for (k = 0; k < n; k++) {
6218     if (i[k] >= 0) break;
6219   } /* Skip negative rows */
6220 
6221   /* Process [k,n): sort and partition each local row into diag and offdiag portions,
6222      fill rowBegin[], rowMid[], rowEnd[], and count Atot, Btot, Annz, Bnnz.
6223   */
6224   while (k < n) {
6225     row = i[k];
6226     /* Entries in [k,s) are in one row. Shift diagonal block col indices so that diag is ahead of offdiag after sorting the row */
6227     for (s = k; s < n; s++)
6228       if (i[s] != row) break;
6229     for (p = k; p < s; p++) {
6230       if (j[p] >= cstart && j[p] < cend) j[p] -= PETSC_MAX_INT; /* Shift diag columns to range of [-PETSC_MAX_INT, -1]  */
6231       else PetscAssert((j[p] >= 0) && (j[p] <= mat->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column index %" PetscInt_FMT " is out of range", j[p]);
6232     }
6233     PetscCall(PetscSortIntWithCountArray(s - k, j + k, perm + k));
6234     PetscCall(PetscSortedIntUpperBound(j, k, s, -1, &mid)); /* Separate [k,s) into [k,mid) for diag and [mid,s) for offdiag */
6235     rowBegin[row - rstart] = k;
6236     rowMid[row - rstart]   = mid;
6237     rowEnd[row - rstart]   = s;
6238 
6239     /* Count nonzeros of this diag/offdiag row, which might have repeats */
6240     Atot += mid - k;
6241     Btot += s - mid;
6242 
6243     /* Count unique nonzeros of this diag/offdiag row */
6244     for (p = k; p < mid;) {
6245       col = j[p];
6246       do {
6247         j[p] += PETSC_MAX_INT;
6248         p++;
6249       } while (p < mid && j[p] == col); /* Revert the modified diagonal indices */
6250       Annz++;
6251     }
6252 
6253     for (p = mid; p < s;) {
6254       col = j[p];
6255       do {
6256         p++;
6257       } while (p < s && j[p] == col);
6258       Bnnz++;
6259     }
6260     k = s;
6261   }
6262 
6263   /* Allocation according to Atot, Btot, Annz, Bnnz */
6264   PetscCall(PetscMalloc1(Atot, &Aperm));
6265   PetscCall(PetscMalloc1(Btot, &Bperm));
6266   PetscCall(PetscMalloc1(Annz + 1, &Ajmap));
6267   PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap));
6268 
6269   /* Re-scan indices and copy diag/offdiag permutation indices to Aperm, Bperm and also fill Ajmap and Bjmap */
6270   Ajmap[0] = Bjmap[0] = Atot = Btot = Annz = Bnnz = 0;
6271   for (r = 0; r < m; r++) {
6272     k   = rowBegin[r];
6273     mid = rowMid[r];
6274     s   = rowEnd[r];
6275     PetscCall(PetscArraycpy(Aperm + Atot, perm + k, mid - k));
6276     PetscCall(PetscArraycpy(Bperm + Btot, perm + mid, s - mid));
6277     Atot += mid - k;
6278     Btot += s - mid;
6279 
6280     /* Scan column indices in this row and find out how many repeats each unique nonzero has */
6281     for (p = k; p < mid;) {
6282       col = j[p];
6283       q   = p;
6284       do {
6285         p++;
6286       } while (p < mid && j[p] == col);
6287       Ajmap[Annz + 1] = Ajmap[Annz] + (p - q);
6288       Annz++;
6289     }
6290 
6291     for (p = mid; p < s;) {
6292       col = j[p];
6293       q   = p;
6294       do {
6295         p++;
6296       } while (p < s && j[p] == col);
6297       Bjmap[Bnnz + 1] = Bjmap[Bnnz] + (p - q);
6298       Bnnz++;
6299     }
6300   }
6301   /* Output */
6302   *Aperm_ = Aperm;
6303   *Annz_  = Annz;
6304   *Atot_  = Atot;
6305   *Ajmap_ = Ajmap;
6306   *Bperm_ = Bperm;
6307   *Bnnz_  = Bnnz;
6308   *Btot_  = Btot;
6309   *Bjmap_ = Bjmap;
6310   PetscFunctionReturn(0);
6311 }
6312 
6313 /* Expand the jmap[] array to make a new one in view of nonzeros in the merged matrix
6314 
6315   Input Parameters:
6316     nnz1: number of unique nonzeros in a set that was used to produce imap[], jmap[]
6317     nnz:  number of unique nonzeros in the merged matrix
6318     imap[nnz1]: i-th nonzero in the set is the imap[i]-th nonzero in the merged matrix
6319     jmap[nnz1+1]: i-th nonzeron in the set has jmap[i+1] - jmap[i] repeats in the set
6320 
6321   Output Parameter: (memory is allocated by the caller)
6322     jmap_new[nnz+1]: i-th nonzero in the merged matrix has jmap_new[i+1] - jmap_new[i] repeats in the set
6323 
6324   Example:
6325     nnz1 = 4
6326     nnz  = 6
6327     imap = [1,3,4,5]
6328     jmap = [0,3,5,6,7]
6329    then,
6330     jmap_new = [0,0,3,3,5,6,7]
6331 */
6332 static PetscErrorCode ExpandJmap_Internal(PetscCount nnz1, PetscCount nnz, const PetscCount imap[], const PetscCount jmap[], PetscCount jmap_new[])
6333 {
6334   PetscCount k, p;
6335 
6336   PetscFunctionBegin;
6337   jmap_new[0] = 0;
6338   p           = nnz;                /* p loops over jmap_new[] backwards */
6339   for (k = nnz1 - 1; k >= 0; k--) { /* k loops over imap[] */
6340     for (; p > imap[k]; p--) jmap_new[p] = jmap[k + 1];
6341   }
6342   for (; p >= 0; p--) jmap_new[p] = jmap[0];
6343   PetscFunctionReturn(0);
6344 }
6345 
6346 PetscErrorCode MatSetPreallocationCOO_MPIAIJ(Mat mat, PetscCount coo_n, PetscInt coo_i[], PetscInt coo_j[])
6347 {
6348   MPI_Comm    comm;
6349   PetscMPIInt rank, size;
6350   PetscInt    m, n, M, N, rstart, rend, cstart, cend; /* Sizes, indices of row/col, therefore with type PetscInt */
6351   PetscCount  k, p, q, rem;                           /* Loop variables over coo arrays */
6352   Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)mat->data;
6353 
6354   PetscFunctionBegin;
6355   PetscCall(PetscFree(mpiaij->garray));
6356   PetscCall(VecDestroy(&mpiaij->lvec));
6357 #if defined(PETSC_USE_CTABLE)
6358   PetscCall(PetscTableDestroy(&mpiaij->colmap));
6359 #else
6360   PetscCall(PetscFree(mpiaij->colmap));
6361 #endif
6362   PetscCall(VecScatterDestroy(&mpiaij->Mvctx));
6363   mat->assembled     = PETSC_FALSE;
6364   mat->was_assembled = PETSC_FALSE;
6365   PetscCall(MatResetPreallocationCOO_MPIAIJ(mat));
6366 
6367   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
6368   PetscCallMPI(MPI_Comm_size(comm, &size));
6369   PetscCallMPI(MPI_Comm_rank(comm, &rank));
6370   PetscCall(PetscLayoutSetUp(mat->rmap));
6371   PetscCall(PetscLayoutSetUp(mat->cmap));
6372   PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend));
6373   PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend));
6374   PetscCall(MatGetLocalSize(mat, &m, &n));
6375   PetscCall(MatGetSize(mat, &M, &N));
6376 
6377   /* ---------------------------------------------------------------------------*/
6378   /* Sort (i,j) by row along with a permutation array, so that the to-be-ignored */
6379   /* entries come first, then local rows, then remote rows.                     */
6380   /* ---------------------------------------------------------------------------*/
6381   PetscCount n1 = coo_n, *perm1;
6382   PetscInt  *i1 = coo_i, *j1 = coo_j;
6383 
6384   PetscCall(PetscMalloc1(n1, &perm1));
6385   for (k = 0; k < n1; k++) perm1[k] = k;
6386 
6387   /* Manipulate indices so that entries with negative row or col indices will have smallest
6388      row indices, local entries will have greater but negative row indices, and remote entries
6389      will have positive row indices.
6390   */
6391   for (k = 0; k < n1; k++) {
6392     if (i1[k] < 0 || j1[k] < 0) i1[k] = PETSC_MIN_INT;                /* e.g., -2^31, minimal to move them ahead */
6393     else if (i1[k] >= rstart && i1[k] < rend) i1[k] -= PETSC_MAX_INT; /* e.g., minus 2^31-1 to shift local rows to range of [-PETSC_MAX_INT, -1] */
6394     else {
6395       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_USER_INPUT, "MAT_NO_OFF_PROC_ENTRIES is set but insert to remote rows");
6396       if (mpiaij->donotstash) i1[k] = PETSC_MIN_INT; /* Ignore offproc entries as if they had negative indices */
6397     }
6398   }
6399 
6400   /* Sort by row; after that, [0,k) have ignored entires, [k,rem) have local rows and [rem,n1) have remote rows */
6401   PetscCall(PetscSortIntWithIntCountArrayPair(n1, i1, j1, perm1));
6402   for (k = 0; k < n1; k++) {
6403     if (i1[k] > PETSC_MIN_INT) break;
6404   }                                                                               /* Advance k to the first entry we need to take care of */
6405   PetscCall(PetscSortedIntUpperBound(i1, k, n1, rend - 1 - PETSC_MAX_INT, &rem)); /* rem is upper bound of the last local row */
6406   for (; k < rem; k++) i1[k] += PETSC_MAX_INT;                                    /* Revert row indices of local rows*/
6407 
6408   /* ---------------------------------------------------------------------------*/
6409   /*           Split local rows into diag/offdiag portions                      */
6410   /* ---------------------------------------------------------------------------*/
6411   PetscCount *rowBegin1, *rowMid1, *rowEnd1;
6412   PetscCount *Ajmap1, *Aperm1, *Bjmap1, *Bperm1, *Cperm1;
6413   PetscCount  Annz1, Bnnz1, Atot1, Btot1;
6414 
6415   PetscCall(PetscCalloc3(m, &rowBegin1, m, &rowMid1, m, &rowEnd1));
6416   PetscCall(PetscMalloc1(n1 - rem, &Cperm1));
6417   PetscCall(MatSplitEntries_Internal(mat, rem, i1, j1, perm1, rowBegin1, rowMid1, rowEnd1, &Atot1, &Aperm1, &Annz1, &Ajmap1, &Btot1, &Bperm1, &Bnnz1, &Bjmap1));
6418 
6419   /* ---------------------------------------------------------------------------*/
6420   /*           Send remote rows to their owner                                  */
6421   /* ---------------------------------------------------------------------------*/
6422   /* Find which rows should be sent to which remote ranks*/
6423   PetscInt        nsend = 0; /* Number of MPI ranks to send data to */
6424   PetscMPIInt    *sendto;    /* [nsend], storing remote ranks */
6425   PetscInt       *nentries;  /* [nsend], storing number of entries sent to remote ranks; Assume PetscInt is big enough for this count, and error if not */
6426   const PetscInt *ranges;
6427   PetscInt        maxNsend = size >= 128 ? 128 : size; /* Assume max 128 neighbors; realloc when needed */
6428 
6429   PetscCall(PetscLayoutGetRanges(mat->rmap, &ranges));
6430   PetscCall(PetscMalloc2(maxNsend, &sendto, maxNsend, &nentries));
6431   for (k = rem; k < n1;) {
6432     PetscMPIInt owner;
6433     PetscInt    firstRow, lastRow;
6434 
6435     /* Locate a row range */
6436     firstRow = i1[k]; /* first row of this owner */
6437     PetscCall(PetscLayoutFindOwner(mat->rmap, firstRow, &owner));
6438     lastRow = ranges[owner + 1] - 1; /* last row of this owner */
6439 
6440     /* Find the first index 'p' in [k,n) with i[p] belonging to next owner */
6441     PetscCall(PetscSortedIntUpperBound(i1, k, n1, lastRow, &p));
6442 
6443     /* All entries in [k,p) belong to this remote owner */
6444     if (nsend >= maxNsend) { /* Double the remote ranks arrays if not long enough */
6445       PetscMPIInt *sendto2;
6446       PetscInt    *nentries2;
6447       PetscInt     maxNsend2 = (maxNsend <= size / 2) ? maxNsend * 2 : size;
6448 
6449       PetscCall(PetscMalloc2(maxNsend2, &sendto2, maxNsend2, &nentries2));
6450       PetscCall(PetscArraycpy(sendto2, sendto, maxNsend));
6451       PetscCall(PetscArraycpy(nentries2, nentries2, maxNsend + 1));
6452       PetscCall(PetscFree2(sendto, nentries2));
6453       sendto   = sendto2;
6454       nentries = nentries2;
6455       maxNsend = maxNsend2;
6456     }
6457     sendto[nsend]   = owner;
6458     nentries[nsend] = p - k;
6459     PetscCall(PetscCountCast(p - k, &nentries[nsend]));
6460     nsend++;
6461     k = p;
6462   }
6463 
6464   /* Build 1st SF to know offsets on remote to send data */
6465   PetscSF      sf1;
6466   PetscInt     nroots = 1, nroots2 = 0;
6467   PetscInt     nleaves = nsend, nleaves2 = 0;
6468   PetscInt    *offsets;
6469   PetscSFNode *iremote;
6470 
6471   PetscCall(PetscSFCreate(comm, &sf1));
6472   PetscCall(PetscMalloc1(nsend, &iremote));
6473   PetscCall(PetscMalloc1(nsend, &offsets));
6474   for (k = 0; k < nsend; k++) {
6475     iremote[k].rank  = sendto[k];
6476     iremote[k].index = 0;
6477     nleaves2 += nentries[k];
6478     PetscCheck(nleaves2 >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF leaves is too large for PetscInt");
6479   }
6480   PetscCall(PetscSFSetGraph(sf1, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
6481   PetscCall(PetscSFFetchAndOpWithMemTypeBegin(sf1, MPIU_INT, PETSC_MEMTYPE_HOST, &nroots2 /*rootdata*/, PETSC_MEMTYPE_HOST, nentries /*leafdata*/, PETSC_MEMTYPE_HOST, offsets /*leafupdate*/, MPI_SUM));
6482   PetscCall(PetscSFFetchAndOpEnd(sf1, MPIU_INT, &nroots2, nentries, offsets, MPI_SUM)); /* Would nroots2 overflow, we check offsets[] below */
6483   PetscCall(PetscSFDestroy(&sf1));
6484   PetscAssert(nleaves2 == n1 - rem, PETSC_COMM_SELF, PETSC_ERR_PLIB, "nleaves2 %" PetscInt_FMT " != number of remote entries %" PetscCount_FMT "", nleaves2, n1 - rem);
6485 
6486   /* Build 2nd SF to send remote COOs to their owner */
6487   PetscSF sf2;
6488   nroots  = nroots2;
6489   nleaves = nleaves2;
6490   PetscCall(PetscSFCreate(comm, &sf2));
6491   PetscCall(PetscSFSetFromOptions(sf2));
6492   PetscCall(PetscMalloc1(nleaves, &iremote));
6493   p = 0;
6494   for (k = 0; k < nsend; k++) {
6495     PetscCheck(offsets[k] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF roots is too large for PetscInt");
6496     for (q = 0; q < nentries[k]; q++, p++) {
6497       iremote[p].rank  = sendto[k];
6498       iremote[p].index = offsets[k] + q;
6499     }
6500   }
6501   PetscCall(PetscSFSetGraph(sf2, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
6502 
6503   /* sf2 only sends contiguous leafdata to contiguous rootdata. We record the permutation which will be used to fill leafdata */
6504   PetscCall(PetscArraycpy(Cperm1, perm1 + rem, n1 - rem));
6505 
6506   /* Send the remote COOs to their owner */
6507   PetscInt    n2 = nroots, *i2, *j2; /* Buffers for received COOs from other ranks, along with a permutation array */
6508   PetscCount *perm2;                 /* Though PetscInt is enough for remote entries, we use PetscCount here as we want to reuse MatSplitEntries_Internal() */
6509   PetscCall(PetscMalloc3(n2, &i2, n2, &j2, n2, &perm2));
6510   PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, i1 + rem, PETSC_MEMTYPE_HOST, i2, MPI_REPLACE));
6511   PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, i1 + rem, i2, MPI_REPLACE));
6512   PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, j1 + rem, PETSC_MEMTYPE_HOST, j2, MPI_REPLACE));
6513   PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, j1 + rem, j2, MPI_REPLACE));
6514 
6515   PetscCall(PetscFree(offsets));
6516   PetscCall(PetscFree2(sendto, nentries));
6517 
6518   /* ---------------------------------------------------------------*/
6519   /* Sort received COOs by row along with the permutation array     */
6520   /* ---------------------------------------------------------------*/
6521   for (k = 0; k < n2; k++) perm2[k] = k;
6522   PetscCall(PetscSortIntWithIntCountArrayPair(n2, i2, j2, perm2));
6523 
6524   /* ---------------------------------------------------------------*/
6525   /* Split received COOs into diag/offdiag portions                 */
6526   /* ---------------------------------------------------------------*/
6527   PetscCount *rowBegin2, *rowMid2, *rowEnd2;
6528   PetscCount *Ajmap2, *Aperm2, *Bjmap2, *Bperm2;
6529   PetscCount  Annz2, Bnnz2, Atot2, Btot2;
6530 
6531   PetscCall(PetscCalloc3(m, &rowBegin2, m, &rowMid2, m, &rowEnd2));
6532   PetscCall(MatSplitEntries_Internal(mat, n2, i2, j2, perm2, rowBegin2, rowMid2, rowEnd2, &Atot2, &Aperm2, &Annz2, &Ajmap2, &Btot2, &Bperm2, &Bnnz2, &Bjmap2));
6533 
6534   /* --------------------------------------------------------------------------*/
6535   /* Merge local COOs with received COOs: diag with diag, offdiag with offdiag */
6536   /* --------------------------------------------------------------------------*/
6537   PetscInt *Ai, *Bi;
6538   PetscInt *Aj, *Bj;
6539 
6540   PetscCall(PetscMalloc1(m + 1, &Ai));
6541   PetscCall(PetscMalloc1(m + 1, &Bi));
6542   PetscCall(PetscMalloc1(Annz1 + Annz2, &Aj)); /* Since local and remote entries might have dups, we might allocate excess memory */
6543   PetscCall(PetscMalloc1(Bnnz1 + Bnnz2, &Bj));
6544 
6545   PetscCount *Aimap1, *Bimap1, *Aimap2, *Bimap2;
6546   PetscCall(PetscMalloc1(Annz1, &Aimap1));
6547   PetscCall(PetscMalloc1(Bnnz1, &Bimap1));
6548   PetscCall(PetscMalloc1(Annz2, &Aimap2));
6549   PetscCall(PetscMalloc1(Bnnz2, &Bimap2));
6550 
6551   PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowBegin1, rowMid1, rowBegin2, rowMid2, Ajmap1, Ajmap2, Aimap1, Aimap2, Ai, Aj));
6552   PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowMid1, rowEnd1, rowMid2, rowEnd2, Bjmap1, Bjmap2, Bimap1, Bimap2, Bi, Bj));
6553 
6554   /* --------------------------------------------------------------------------*/
6555   /* Expand Ajmap1/Bjmap1 to make them based off nonzeros in A/B, since we     */
6556   /* expect nonzeros in A/B most likely have local contributing entries        */
6557   /* --------------------------------------------------------------------------*/
6558   PetscInt    Annz = Ai[m];
6559   PetscInt    Bnnz = Bi[m];
6560   PetscCount *Ajmap1_new, *Bjmap1_new;
6561 
6562   PetscCall(PetscMalloc1(Annz + 1, &Ajmap1_new));
6563   PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap1_new));
6564 
6565   PetscCall(ExpandJmap_Internal(Annz1, Annz, Aimap1, Ajmap1, Ajmap1_new));
6566   PetscCall(ExpandJmap_Internal(Bnnz1, Bnnz, Bimap1, Bjmap1, Bjmap1_new));
6567 
6568   PetscCall(PetscFree(Aimap1));
6569   PetscCall(PetscFree(Ajmap1));
6570   PetscCall(PetscFree(Bimap1));
6571   PetscCall(PetscFree(Bjmap1));
6572   PetscCall(PetscFree3(rowBegin1, rowMid1, rowEnd1));
6573   PetscCall(PetscFree3(rowBegin2, rowMid2, rowEnd2));
6574   PetscCall(PetscFree(perm1));
6575   PetscCall(PetscFree3(i2, j2, perm2));
6576 
6577   Ajmap1 = Ajmap1_new;
6578   Bjmap1 = Bjmap1_new;
6579 
6580   /* Reallocate Aj, Bj once we know actual numbers of unique nonzeros in A and B */
6581   if (Annz < Annz1 + Annz2) {
6582     PetscInt *Aj_new;
6583     PetscCall(PetscMalloc1(Annz, &Aj_new));
6584     PetscCall(PetscArraycpy(Aj_new, Aj, Annz));
6585     PetscCall(PetscFree(Aj));
6586     Aj = Aj_new;
6587   }
6588 
6589   if (Bnnz < Bnnz1 + Bnnz2) {
6590     PetscInt *Bj_new;
6591     PetscCall(PetscMalloc1(Bnnz, &Bj_new));
6592     PetscCall(PetscArraycpy(Bj_new, Bj, Bnnz));
6593     PetscCall(PetscFree(Bj));
6594     Bj = Bj_new;
6595   }
6596 
6597   /* --------------------------------------------------------------------------------*/
6598   /* Create new submatrices for on-process and off-process coupling                  */
6599   /* --------------------------------------------------------------------------------*/
6600   PetscScalar *Aa, *Ba;
6601   MatType      rtype;
6602   Mat_SeqAIJ  *a, *b;
6603   PetscCall(PetscCalloc1(Annz, &Aa)); /* Zero matrix on device */
6604   PetscCall(PetscCalloc1(Bnnz, &Ba));
6605   /* make Aj[] local, i.e, based off the start column of the diagonal portion */
6606   if (cstart) {
6607     for (k = 0; k < Annz; k++) Aj[k] -= cstart;
6608   }
6609   PetscCall(MatDestroy(&mpiaij->A));
6610   PetscCall(MatDestroy(&mpiaij->B));
6611   PetscCall(MatGetRootType_Private(mat, &rtype));
6612   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, Ai, Aj, Aa, &mpiaij->A));
6613   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, mat->cmap->N, Bi, Bj, Ba, &mpiaij->B));
6614   PetscCall(MatSetUpMultiply_MPIAIJ(mat));
6615 
6616   a               = (Mat_SeqAIJ *)mpiaij->A->data;
6617   b               = (Mat_SeqAIJ *)mpiaij->B->data;
6618   a->singlemalloc = b->singlemalloc = PETSC_FALSE; /* Let newmat own Ai,Aj,Aa,Bi,Bj,Ba */
6619   a->free_a = b->free_a = PETSC_TRUE;
6620   a->free_ij = b->free_ij = PETSC_TRUE;
6621 
6622   /* conversion must happen AFTER multiply setup */
6623   PetscCall(MatConvert(mpiaij->A, rtype, MAT_INPLACE_MATRIX, &mpiaij->A));
6624   PetscCall(MatConvert(mpiaij->B, rtype, MAT_INPLACE_MATRIX, &mpiaij->B));
6625   PetscCall(VecDestroy(&mpiaij->lvec));
6626   PetscCall(MatCreateVecs(mpiaij->B, &mpiaij->lvec, NULL));
6627 
6628   mpiaij->coo_n   = coo_n;
6629   mpiaij->coo_sf  = sf2;
6630   mpiaij->sendlen = nleaves;
6631   mpiaij->recvlen = nroots;
6632 
6633   mpiaij->Annz = Annz;
6634   mpiaij->Bnnz = Bnnz;
6635 
6636   mpiaij->Annz2 = Annz2;
6637   mpiaij->Bnnz2 = Bnnz2;
6638 
6639   mpiaij->Atot1 = Atot1;
6640   mpiaij->Atot2 = Atot2;
6641   mpiaij->Btot1 = Btot1;
6642   mpiaij->Btot2 = Btot2;
6643 
6644   mpiaij->Ajmap1 = Ajmap1;
6645   mpiaij->Aperm1 = Aperm1;
6646 
6647   mpiaij->Bjmap1 = Bjmap1;
6648   mpiaij->Bperm1 = Bperm1;
6649 
6650   mpiaij->Aimap2 = Aimap2;
6651   mpiaij->Ajmap2 = Ajmap2;
6652   mpiaij->Aperm2 = Aperm2;
6653 
6654   mpiaij->Bimap2 = Bimap2;
6655   mpiaij->Bjmap2 = Bjmap2;
6656   mpiaij->Bperm2 = Bperm2;
6657 
6658   mpiaij->Cperm1 = Cperm1;
6659 
6660   /* Allocate in preallocation. If not used, it has zero cost on host */
6661   PetscCall(PetscMalloc2(mpiaij->sendlen, &mpiaij->sendbuf, mpiaij->recvlen, &mpiaij->recvbuf));
6662   PetscFunctionReturn(0);
6663 }
6664 
6665 static PetscErrorCode MatSetValuesCOO_MPIAIJ(Mat mat, const PetscScalar v[], InsertMode imode)
6666 {
6667   Mat_MPIAIJ       *mpiaij = (Mat_MPIAIJ *)mat->data;
6668   Mat               A = mpiaij->A, B = mpiaij->B;
6669   PetscCount        Annz = mpiaij->Annz, Annz2 = mpiaij->Annz2, Bnnz = mpiaij->Bnnz, Bnnz2 = mpiaij->Bnnz2;
6670   PetscScalar      *Aa, *Ba;
6671   PetscScalar      *sendbuf = mpiaij->sendbuf;
6672   PetscScalar      *recvbuf = mpiaij->recvbuf;
6673   const PetscCount *Ajmap1 = mpiaij->Ajmap1, *Ajmap2 = mpiaij->Ajmap2, *Aimap2 = mpiaij->Aimap2;
6674   const PetscCount *Bjmap1 = mpiaij->Bjmap1, *Bjmap2 = mpiaij->Bjmap2, *Bimap2 = mpiaij->Bimap2;
6675   const PetscCount *Aperm1 = mpiaij->Aperm1, *Aperm2 = mpiaij->Aperm2, *Bperm1 = mpiaij->Bperm1, *Bperm2 = mpiaij->Bperm2;
6676   const PetscCount *Cperm1 = mpiaij->Cperm1;
6677 
6678   PetscFunctionBegin;
6679   PetscCall(MatSeqAIJGetArray(A, &Aa)); /* Might read and write matrix values */
6680   PetscCall(MatSeqAIJGetArray(B, &Ba));
6681 
6682   /* Pack entries to be sent to remote */
6683   for (PetscCount i = 0; i < mpiaij->sendlen; i++) sendbuf[i] = v[Cperm1[i]];
6684 
6685   /* Send remote entries to their owner and overlap the communication with local computation */
6686   PetscCall(PetscSFReduceWithMemTypeBegin(mpiaij->coo_sf, MPIU_SCALAR, PETSC_MEMTYPE_HOST, sendbuf, PETSC_MEMTYPE_HOST, recvbuf, MPI_REPLACE));
6687   /* Add local entries to A and B */
6688   for (PetscCount i = 0; i < Annz; i++) { /* All nonzeros in A are either zero'ed or added with a value (i.e., initialized) */
6689     PetscScalar sum = 0.0;                /* Do partial summation first to improve numerical stablility */
6690     for (PetscCount k = Ajmap1[i]; k < Ajmap1[i + 1]; k++) sum += v[Aperm1[k]];
6691     Aa[i] = (imode == INSERT_VALUES ? 0.0 : Aa[i]) + sum;
6692   }
6693   for (PetscCount i = 0; i < Bnnz; i++) {
6694     PetscScalar sum = 0.0;
6695     for (PetscCount k = Bjmap1[i]; k < Bjmap1[i + 1]; k++) sum += v[Bperm1[k]];
6696     Ba[i] = (imode == INSERT_VALUES ? 0.0 : Ba[i]) + sum;
6697   }
6698   PetscCall(PetscSFReduceEnd(mpiaij->coo_sf, MPIU_SCALAR, sendbuf, recvbuf, MPI_REPLACE));
6699 
6700   /* Add received remote entries to A and B */
6701   for (PetscCount i = 0; i < Annz2; i++) {
6702     for (PetscCount k = Ajmap2[i]; k < Ajmap2[i + 1]; k++) Aa[Aimap2[i]] += recvbuf[Aperm2[k]];
6703   }
6704   for (PetscCount i = 0; i < Bnnz2; i++) {
6705     for (PetscCount k = Bjmap2[i]; k < Bjmap2[i + 1]; k++) Ba[Bimap2[i]] += recvbuf[Bperm2[k]];
6706   }
6707   PetscCall(MatSeqAIJRestoreArray(A, &Aa));
6708   PetscCall(MatSeqAIJRestoreArray(B, &Ba));
6709   PetscFunctionReturn(0);
6710 }
6711 
6712 /* ----------------------------------------------------------------*/
6713 
6714 /*MC
6715    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
6716 
6717    Options Database Keys:
6718 . -mat_type mpiaij - sets the matrix type to `MATMPIAIJ` during a call to `MatSetFromOptions()`
6719 
6720    Level: beginner
6721 
6722    Notes:
6723     `MatSetValues()` may be called for this matrix type with a NULL argument for the numerical values,
6724     in this case the values associated with the rows and columns one passes in are set to zero
6725     in the matrix
6726 
6727     `MatSetOptions`(,`MAT_STRUCTURE_ONLY`,`PETSC_TRUE`) may be called for this matrix type. In this no
6728     space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored
6729 
6730 .seealso: `MATSEQAIJ`, `MATAIJ`, `MatCreateAIJ()`
6731 M*/
6732 
6733 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
6734 {
6735   Mat_MPIAIJ *b;
6736   PetscMPIInt size;
6737 
6738   PetscFunctionBegin;
6739   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
6740 
6741   PetscCall(PetscNew(&b));
6742   B->data = (void *)b;
6743   PetscCall(PetscMemcpy(B->ops, &MatOps_Values, sizeof(struct _MatOps)));
6744   B->assembled  = PETSC_FALSE;
6745   B->insertmode = NOT_SET_VALUES;
6746   b->size       = size;
6747 
6748   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank));
6749 
6750   /* build cache for off array entries formed */
6751   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash));
6752 
6753   b->donotstash  = PETSC_FALSE;
6754   b->colmap      = NULL;
6755   b->garray      = NULL;
6756   b->roworiented = PETSC_TRUE;
6757 
6758   /* stuff used for matrix vector multiply */
6759   b->lvec  = NULL;
6760   b->Mvctx = NULL;
6761 
6762   /* stuff for MatGetRow() */
6763   b->rowindices   = NULL;
6764   b->rowvalues    = NULL;
6765   b->getrowactive = PETSC_FALSE;
6766 
6767   /* flexible pointer used in CUSPARSE classes */
6768   b->spptr = NULL;
6769 
6770   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetUseScalableIncreaseOverlap_C", MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ));
6771   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPIAIJ));
6772   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPIAIJ));
6773   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatIsTranspose_C", MatIsTranspose_MPIAIJ));
6774   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocation_C", MatMPIAIJSetPreallocation_MPIAIJ));
6775   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatResetPreallocation_C", MatResetPreallocation_MPIAIJ));
6776   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocationCSR_C", MatMPIAIJSetPreallocationCSR_MPIAIJ));
6777   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPIAIJ));
6778   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijperm_C", MatConvert_MPIAIJ_MPIAIJPERM));
6779   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijsell_C", MatConvert_MPIAIJ_MPIAIJSELL));
6780 #if defined(PETSC_HAVE_CUDA)
6781   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcusparse_C", MatConvert_MPIAIJ_MPIAIJCUSPARSE));
6782 #endif
6783 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
6784   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijkokkos_C", MatConvert_MPIAIJ_MPIAIJKokkos));
6785 #endif
6786 #if defined(PETSC_HAVE_MKL_SPARSE)
6787   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijmkl_C", MatConvert_MPIAIJ_MPIAIJMKL));
6788 #endif
6789   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcrl_C", MatConvert_MPIAIJ_MPIAIJCRL));
6790   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpibaij_C", MatConvert_MPIAIJ_MPIBAIJ));
6791   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisbaij_C", MatConvert_MPIAIJ_MPISBAIJ));
6792   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense));
6793 #if defined(PETSC_HAVE_ELEMENTAL)
6794   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_elemental_C", MatConvert_MPIAIJ_Elemental));
6795 #endif
6796 #if defined(PETSC_HAVE_SCALAPACK)
6797   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_scalapack_C", MatConvert_AIJ_ScaLAPACK));
6798 #endif
6799   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_is_C", MatConvert_XAIJ_IS));
6800   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisell_C", MatConvert_MPIAIJ_MPISELL));
6801 #if defined(PETSC_HAVE_HYPRE)
6802   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_hypre_C", MatConvert_AIJ_HYPRE));
6803   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", MatProductSetFromOptions_Transpose_AIJ_AIJ));
6804 #endif
6805   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_is_mpiaij_C", MatProductSetFromOptions_IS_XAIJ));
6806   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpiaij_C", MatProductSetFromOptions_MPIAIJ));
6807   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetPreallocationCOO_C", MatSetPreallocationCOO_MPIAIJ));
6808   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetValuesCOO_C", MatSetValuesCOO_MPIAIJ));
6809   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIAIJ));
6810   PetscFunctionReturn(0);
6811 }
6812 
6813 /*@C
6814      MatCreateMPIAIJWithSplitArrays - creates a `MATMPIAIJ` matrix using arrays that contain the "diagonal"
6815          and "off-diagonal" part of the matrix in CSR format.
6816 
6817    Collective
6818 
6819    Input Parameters:
6820 +  comm - MPI communicator
6821 .  m - number of local rows (Cannot be `PETSC_DECIDE`)
6822 .  n - This value should be the same as the local size used in creating the
6823        x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have
6824        calculated if N is given) For square matrices n is almost always m.
6825 .  M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
6826 .  N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
6827 .   i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
6828 .   j - column indices, which must be local, i.e., based off the start column of the diagonal portion
6829 .   a - matrix values
6830 .   oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix
6831 .   oj - column indices, which must be global, representing global columns in the MPIAIJ matrix
6832 -   oa - matrix values
6833 
6834    Output Parameter:
6835 .   mat - the matrix
6836 
6837    Level: advanced
6838 
6839    Notes:
6840        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
6841        must free the arrays once the matrix has been destroyed and not before.
6842 
6843        The i and j indices are 0 based
6844 
6845        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
6846 
6847        This sets local rows and cannot be used to set off-processor values.
6848 
6849        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
6850        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
6851        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
6852        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
6853        keep track of the underlying array. Use `MatSetOption`(A,`MAT_NO_OFF_PROC_ENTRIES`,`PETSC_TRUE`) to disable all
6854        communication if it is known that only local entries will be set.
6855 
6856 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
6857           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithArrays()`
6858 @*/
6859 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt i[], PetscInt j[], PetscScalar a[], PetscInt oi[], PetscInt oj[], PetscScalar oa[], Mat *mat)
6860 {
6861   Mat_MPIAIJ *maij;
6862 
6863   PetscFunctionBegin;
6864   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
6865   PetscCheck(i[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
6866   PetscCheck(oi[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "oi (row indices) must start with 0");
6867   PetscCall(MatCreate(comm, mat));
6868   PetscCall(MatSetSizes(*mat, m, n, M, N));
6869   PetscCall(MatSetType(*mat, MATMPIAIJ));
6870   maij = (Mat_MPIAIJ *)(*mat)->data;
6871 
6872   (*mat)->preallocated = PETSC_TRUE;
6873 
6874   PetscCall(PetscLayoutSetUp((*mat)->rmap));
6875   PetscCall(PetscLayoutSetUp((*mat)->cmap));
6876 
6877   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, i, j, a, &maij->A));
6878   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, (*mat)->cmap->N, oi, oj, oa, &maij->B));
6879 
6880   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
6881   PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY));
6882   PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY));
6883   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE));
6884   PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
6885   PetscFunctionReturn(0);
6886 }
6887 
6888 typedef struct {
6889   Mat       *mp;    /* intermediate products */
6890   PetscBool *mptmp; /* is the intermediate product temporary ? */
6891   PetscInt   cp;    /* number of intermediate products */
6892 
6893   /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */
6894   PetscInt    *startsj_s, *startsj_r;
6895   PetscScalar *bufa;
6896   Mat          P_oth;
6897 
6898   /* may take advantage of merging product->B */
6899   Mat Bloc; /* B-local by merging diag and off-diag */
6900 
6901   /* cusparse does not have support to split between symbolic and numeric phases.
6902      When api_user is true, we don't need to update the numerical values
6903      of the temporary storage */
6904   PetscBool reusesym;
6905 
6906   /* support for COO values insertion */
6907   PetscScalar *coo_v, *coo_w; /* store on-process and off-process COO scalars, and used as MPI recv/send buffers respectively */
6908   PetscInt   **own;           /* own[i] points to address of on-process COO indices for Mat mp[i] */
6909   PetscInt   **off;           /* off[i] points to address of off-process COO indices for Mat mp[i] */
6910   PetscBool    hasoffproc;    /* if true, have off-process values insertion (i.e. AtB or PtAP) */
6911   PetscSF      sf;            /* used for non-local values insertion and memory malloc */
6912   PetscMemType mtype;
6913 
6914   /* customization */
6915   PetscBool abmerge;
6916   PetscBool P_oth_bind;
6917 } MatMatMPIAIJBACKEND;
6918 
6919 PetscErrorCode MatDestroy_MatMatMPIAIJBACKEND(void *data)
6920 {
6921   MatMatMPIAIJBACKEND *mmdata = (MatMatMPIAIJBACKEND *)data;
6922   PetscInt             i;
6923 
6924   PetscFunctionBegin;
6925   PetscCall(PetscFree2(mmdata->startsj_s, mmdata->startsj_r));
6926   PetscCall(PetscFree(mmdata->bufa));
6927   PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_v));
6928   PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_w));
6929   PetscCall(MatDestroy(&mmdata->P_oth));
6930   PetscCall(MatDestroy(&mmdata->Bloc));
6931   PetscCall(PetscSFDestroy(&mmdata->sf));
6932   for (i = 0; i < mmdata->cp; i++) PetscCall(MatDestroy(&mmdata->mp[i]));
6933   PetscCall(PetscFree2(mmdata->mp, mmdata->mptmp));
6934   PetscCall(PetscFree(mmdata->own[0]));
6935   PetscCall(PetscFree(mmdata->own));
6936   PetscCall(PetscFree(mmdata->off[0]));
6937   PetscCall(PetscFree(mmdata->off));
6938   PetscCall(PetscFree(mmdata));
6939   PetscFunctionReturn(0);
6940 }
6941 
6942 /* Copy selected n entries with indices in idx[] of A to v[].
6943    If idx is NULL, copy the whole data array of A to v[]
6944  */
6945 static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[])
6946 {
6947   PetscErrorCode (*f)(Mat, PetscInt, const PetscInt[], PetscScalar[]);
6948 
6949   PetscFunctionBegin;
6950   PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqAIJCopySubArray_C", &f));
6951   if (f) {
6952     PetscCall((*f)(A, n, idx, v));
6953   } else {
6954     const PetscScalar *vv;
6955 
6956     PetscCall(MatSeqAIJGetArrayRead(A, &vv));
6957     if (n && idx) {
6958       PetscScalar    *w  = v;
6959       const PetscInt *oi = idx;
6960       PetscInt        j;
6961 
6962       for (j = 0; j < n; j++) *w++ = vv[*oi++];
6963     } else {
6964       PetscCall(PetscArraycpy(v, vv, n));
6965     }
6966     PetscCall(MatSeqAIJRestoreArrayRead(A, &vv));
6967   }
6968   PetscFunctionReturn(0);
6969 }
6970 
6971 static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C)
6972 {
6973   MatMatMPIAIJBACKEND *mmdata;
6974   PetscInt             i, n_d, n_o;
6975 
6976   PetscFunctionBegin;
6977   MatCheckProduct(C, 1);
6978   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
6979   mmdata = (MatMatMPIAIJBACKEND *)C->product->data;
6980   if (!mmdata->reusesym) { /* update temporary matrices */
6981     if (mmdata->P_oth) PetscCall(MatGetBrowsOfAoCols_MPIAIJ(C->product->A, C->product->B, MAT_REUSE_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth));
6982     if (mmdata->Bloc) PetscCall(MatMPIAIJGetLocalMatMerge(C->product->B, MAT_REUSE_MATRIX, NULL, &mmdata->Bloc));
6983   }
6984   mmdata->reusesym = PETSC_FALSE;
6985 
6986   for (i = 0; i < mmdata->cp; i++) {
6987     PetscCheck(mmdata->mp[i]->ops->productnumeric, PetscObjectComm((PetscObject)mmdata->mp[i]), PETSC_ERR_PLIB, "Missing numeric op for %s", MatProductTypes[mmdata->mp[i]->product->type]);
6988     PetscCall((*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i]));
6989   }
6990   for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) {
6991     PetscInt noff = mmdata->off[i + 1] - mmdata->off[i];
6992 
6993     if (mmdata->mptmp[i]) continue;
6994     if (noff) {
6995       PetscInt nown = mmdata->own[i + 1] - mmdata->own[i];
6996 
6997       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], noff, mmdata->off[i], mmdata->coo_w + n_o));
6998       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], nown, mmdata->own[i], mmdata->coo_v + n_d));
6999       n_o += noff;
7000       n_d += nown;
7001     } else {
7002       Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mmdata->mp[i]->data;
7003 
7004       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], mm->nz, NULL, mmdata->coo_v + n_d));
7005       n_d += mm->nz;
7006     }
7007   }
7008   if (mmdata->hasoffproc) { /* offprocess insertion */
7009     PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d));
7010     PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d));
7011   }
7012   PetscCall(MatSetValuesCOO(C, mmdata->coo_v, INSERT_VALUES));
7013   PetscFunctionReturn(0);
7014 }
7015 
7016 /* Support for Pt * A, A * P, or Pt * A * P */
7017 #define MAX_NUMBER_INTERMEDIATE 4
7018 PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C)
7019 {
7020   Mat_Product           *product = C->product;
7021   Mat                    A, P, mp[MAX_NUMBER_INTERMEDIATE]; /* A, P and a series of intermediate matrices */
7022   Mat_MPIAIJ            *a, *p;
7023   MatMatMPIAIJBACKEND   *mmdata;
7024   ISLocalToGlobalMapping P_oth_l2g = NULL;
7025   IS                     glob      = NULL;
7026   const char            *prefix;
7027   char                   pprefix[256];
7028   const PetscInt        *globidx, *P_oth_idx;
7029   PetscInt               i, j, cp, m, n, M, N, *coo_i, *coo_j;
7030   PetscCount             ncoo, ncoo_d, ncoo_o, ncoo_oown;
7031   PetscInt               cmapt[MAX_NUMBER_INTERMEDIATE], rmapt[MAX_NUMBER_INTERMEDIATE]; /* col/row map type for each Mat in mp[]. */
7032                                                                                          /* type-0: consecutive, start from 0; type-1: consecutive with */
7033                                                                                          /* a base offset; type-2: sparse with a local to global map table */
7034   const PetscInt *cmapa[MAX_NUMBER_INTERMEDIATE], *rmapa[MAX_NUMBER_INTERMEDIATE];       /* col/row local to global map array (table) for type-2 map type */
7035 
7036   MatProductType ptype;
7037   PetscBool      mptmp[MAX_NUMBER_INTERMEDIATE], hasoffproc = PETSC_FALSE, iscuda, iskokk;
7038   PetscMPIInt    size;
7039 
7040   PetscFunctionBegin;
7041   MatCheckProduct(C, 1);
7042   PetscCheck(!product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
7043   ptype = product->type;
7044   if (product->A->symmetric == PETSC_BOOL3_TRUE && ptype == MATPRODUCT_AtB) {
7045     ptype                                          = MATPRODUCT_AB;
7046     product->symbolic_used_the_fact_A_is_symmetric = PETSC_TRUE;
7047   }
7048   switch (ptype) {
7049   case MATPRODUCT_AB:
7050     A          = product->A;
7051     P          = product->B;
7052     m          = A->rmap->n;
7053     n          = P->cmap->n;
7054     M          = A->rmap->N;
7055     N          = P->cmap->N;
7056     hasoffproc = PETSC_FALSE; /* will not scatter mat product values to other processes */
7057     break;
7058   case MATPRODUCT_AtB:
7059     P          = product->A;
7060     A          = product->B;
7061     m          = P->cmap->n;
7062     n          = A->cmap->n;
7063     M          = P->cmap->N;
7064     N          = A->cmap->N;
7065     hasoffproc = PETSC_TRUE;
7066     break;
7067   case MATPRODUCT_PtAP:
7068     A          = product->A;
7069     P          = product->B;
7070     m          = P->cmap->n;
7071     n          = P->cmap->n;
7072     M          = P->cmap->N;
7073     N          = P->cmap->N;
7074     hasoffproc = PETSC_TRUE;
7075     break;
7076   default:
7077     SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]);
7078   }
7079   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)C), &size));
7080   if (size == 1) hasoffproc = PETSC_FALSE;
7081 
7082   /* defaults */
7083   for (i = 0; i < MAX_NUMBER_INTERMEDIATE; i++) {
7084     mp[i]    = NULL;
7085     mptmp[i] = PETSC_FALSE;
7086     rmapt[i] = -1;
7087     cmapt[i] = -1;
7088     rmapa[i] = NULL;
7089     cmapa[i] = NULL;
7090   }
7091 
7092   /* customization */
7093   PetscCall(PetscNew(&mmdata));
7094   mmdata->reusesym = product->api_user;
7095   if (ptype == MATPRODUCT_AB) {
7096     if (product->api_user) {
7097       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatMult", "Mat");
7098       PetscCall(PetscOptionsBool("-matmatmult_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL));
7099       PetscCall(PetscOptionsBool("-matmatmult_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7100       PetscOptionsEnd();
7101     } else {
7102       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_AB", "Mat");
7103       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL));
7104       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7105       PetscOptionsEnd();
7106     }
7107   } else if (ptype == MATPRODUCT_PtAP) {
7108     if (product->api_user) {
7109       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatPtAP", "Mat");
7110       PetscCall(PetscOptionsBool("-matptap_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7111       PetscOptionsEnd();
7112     } else {
7113       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_PtAP", "Mat");
7114       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7115       PetscOptionsEnd();
7116     }
7117   }
7118   a = (Mat_MPIAIJ *)A->data;
7119   p = (Mat_MPIAIJ *)P->data;
7120   PetscCall(MatSetSizes(C, m, n, M, N));
7121   PetscCall(PetscLayoutSetUp(C->rmap));
7122   PetscCall(PetscLayoutSetUp(C->cmap));
7123   PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
7124   PetscCall(MatGetOptionsPrefix(C, &prefix));
7125 
7126   cp = 0;
7127   switch (ptype) {
7128   case MATPRODUCT_AB: /* A * P */
7129     PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth));
7130 
7131     /* A_diag * P_local (merged or not) */
7132     if (mmdata->abmerge) { /* P's diagonal and off-diag blocks are merged to one matrix, then multiplied by A_diag */
7133       /* P is product->B */
7134       PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc));
7135       PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp]));
7136       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7137       PetscCall(MatProductSetFill(mp[cp], product->fill));
7138       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7139       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7140       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7141       mp[cp]->product->api_user = product->api_user;
7142       PetscCall(MatProductSetFromOptions(mp[cp]));
7143       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7144       PetscCall(ISGetIndices(glob, &globidx));
7145       rmapt[cp] = 1;
7146       cmapt[cp] = 2;
7147       cmapa[cp] = globidx;
7148       mptmp[cp] = PETSC_FALSE;
7149       cp++;
7150     } else { /* A_diag * P_diag and A_diag * P_off */
7151       PetscCall(MatProductCreate(a->A, p->A, NULL, &mp[cp]));
7152       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7153       PetscCall(MatProductSetFill(mp[cp], product->fill));
7154       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7155       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7156       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7157       mp[cp]->product->api_user = product->api_user;
7158       PetscCall(MatProductSetFromOptions(mp[cp]));
7159       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7160       rmapt[cp] = 1;
7161       cmapt[cp] = 1;
7162       mptmp[cp] = PETSC_FALSE;
7163       cp++;
7164       PetscCall(MatProductCreate(a->A, p->B, NULL, &mp[cp]));
7165       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7166       PetscCall(MatProductSetFill(mp[cp], product->fill));
7167       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7168       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7169       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7170       mp[cp]->product->api_user = product->api_user;
7171       PetscCall(MatProductSetFromOptions(mp[cp]));
7172       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7173       rmapt[cp] = 1;
7174       cmapt[cp] = 2;
7175       cmapa[cp] = p->garray;
7176       mptmp[cp] = PETSC_FALSE;
7177       cp++;
7178     }
7179 
7180     /* A_off * P_other */
7181     if (mmdata->P_oth) {
7182       PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g)); /* make P_oth use local col ids */
7183       PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx));
7184       PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)(a->B))->type_name));
7185       PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind));
7186       PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp]));
7187       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7188       PetscCall(MatProductSetFill(mp[cp], product->fill));
7189       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7190       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7191       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7192       mp[cp]->product->api_user = product->api_user;
7193       PetscCall(MatProductSetFromOptions(mp[cp]));
7194       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7195       rmapt[cp] = 1;
7196       cmapt[cp] = 2;
7197       cmapa[cp] = P_oth_idx;
7198       mptmp[cp] = PETSC_FALSE;
7199       cp++;
7200     }
7201     break;
7202 
7203   case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */
7204     /* A is product->B */
7205     PetscCall(MatMPIAIJGetLocalMatMerge(A, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc));
7206     if (A == P) { /* when A==P, we can take advantage of the already merged mmdata->Bloc */
7207       PetscCall(MatProductCreate(mmdata->Bloc, mmdata->Bloc, NULL, &mp[cp]));
7208       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7209       PetscCall(MatProductSetFill(mp[cp], product->fill));
7210       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7211       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7212       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7213       mp[cp]->product->api_user = product->api_user;
7214       PetscCall(MatProductSetFromOptions(mp[cp]));
7215       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7216       PetscCall(ISGetIndices(glob, &globidx));
7217       rmapt[cp] = 2;
7218       rmapa[cp] = globidx;
7219       cmapt[cp] = 2;
7220       cmapa[cp] = globidx;
7221       mptmp[cp] = PETSC_FALSE;
7222       cp++;
7223     } else {
7224       PetscCall(MatProductCreate(p->A, mmdata->Bloc, NULL, &mp[cp]));
7225       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7226       PetscCall(MatProductSetFill(mp[cp], product->fill));
7227       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7228       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7229       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7230       mp[cp]->product->api_user = product->api_user;
7231       PetscCall(MatProductSetFromOptions(mp[cp]));
7232       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7233       PetscCall(ISGetIndices(glob, &globidx));
7234       rmapt[cp] = 1;
7235       cmapt[cp] = 2;
7236       cmapa[cp] = globidx;
7237       mptmp[cp] = PETSC_FALSE;
7238       cp++;
7239       PetscCall(MatProductCreate(p->B, mmdata->Bloc, NULL, &mp[cp]));
7240       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7241       PetscCall(MatProductSetFill(mp[cp], product->fill));
7242       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7243       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7244       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7245       mp[cp]->product->api_user = product->api_user;
7246       PetscCall(MatProductSetFromOptions(mp[cp]));
7247       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7248       rmapt[cp] = 2;
7249       rmapa[cp] = p->garray;
7250       cmapt[cp] = 2;
7251       cmapa[cp] = globidx;
7252       mptmp[cp] = PETSC_FALSE;
7253       cp++;
7254     }
7255     break;
7256   case MATPRODUCT_PtAP:
7257     PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth));
7258     /* P is product->B */
7259     PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc));
7260     PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp]));
7261     PetscCall(MatProductSetType(mp[cp], MATPRODUCT_PtAP));
7262     PetscCall(MatProductSetFill(mp[cp], product->fill));
7263     PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7264     PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7265     PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7266     mp[cp]->product->api_user = product->api_user;
7267     PetscCall(MatProductSetFromOptions(mp[cp]));
7268     PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7269     PetscCall(ISGetIndices(glob, &globidx));
7270     rmapt[cp] = 2;
7271     rmapa[cp] = globidx;
7272     cmapt[cp] = 2;
7273     cmapa[cp] = globidx;
7274     mptmp[cp] = PETSC_FALSE;
7275     cp++;
7276     if (mmdata->P_oth) {
7277       PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g));
7278       PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx));
7279       PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)(a->B))->type_name));
7280       PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind));
7281       PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp]));
7282       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7283       PetscCall(MatProductSetFill(mp[cp], product->fill));
7284       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7285       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7286       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7287       mp[cp]->product->api_user = product->api_user;
7288       PetscCall(MatProductSetFromOptions(mp[cp]));
7289       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7290       mptmp[cp] = PETSC_TRUE;
7291       cp++;
7292       PetscCall(MatProductCreate(mmdata->Bloc, mp[1], NULL, &mp[cp]));
7293       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7294       PetscCall(MatProductSetFill(mp[cp], product->fill));
7295       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7296       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7297       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7298       mp[cp]->product->api_user = product->api_user;
7299       PetscCall(MatProductSetFromOptions(mp[cp]));
7300       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7301       rmapt[cp] = 2;
7302       rmapa[cp] = globidx;
7303       cmapt[cp] = 2;
7304       cmapa[cp] = P_oth_idx;
7305       mptmp[cp] = PETSC_FALSE;
7306       cp++;
7307     }
7308     break;
7309   default:
7310     SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]);
7311   }
7312   /* sanity check */
7313   if (size > 1)
7314     for (i = 0; i < cp; i++) PetscCheck(rmapt[i] != 2 || hasoffproc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected offproc map type for product %" PetscInt_FMT, i);
7315 
7316   PetscCall(PetscMalloc2(cp, &mmdata->mp, cp, &mmdata->mptmp));
7317   for (i = 0; i < cp; i++) {
7318     mmdata->mp[i]    = mp[i];
7319     mmdata->mptmp[i] = mptmp[i];
7320   }
7321   mmdata->cp             = cp;
7322   C->product->data       = mmdata;
7323   C->product->destroy    = MatDestroy_MatMatMPIAIJBACKEND;
7324   C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND;
7325 
7326   /* memory type */
7327   mmdata->mtype = PETSC_MEMTYPE_HOST;
7328   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iscuda, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, ""));
7329   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iskokk, MATSEQAIJKOKKOS, MATMPIAIJKOKKOS, ""));
7330   if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA;
7331   else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_KOKKOS;
7332 
7333   /* prepare coo coordinates for values insertion */
7334 
7335   /* count total nonzeros of those intermediate seqaij Mats
7336     ncoo_d:    # of nonzeros of matrices that do not have offproc entries
7337     ncoo_o:    # of nonzeros (of matrices that might have offproc entries) that will be inserted to remote procs
7338     ncoo_oown: # of nonzeros (of matrices that might have offproc entries) that will be inserted locally
7339   */
7340   for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) {
7341     Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data;
7342     if (mptmp[cp]) continue;
7343     if (rmapt[cp] == 2 && hasoffproc) { /* the rows need to be scatter to all processes (might include self) */
7344       const PetscInt *rmap = rmapa[cp];
7345       const PetscInt  mr   = mp[cp]->rmap->n;
7346       const PetscInt  rs   = C->rmap->rstart;
7347       const PetscInt  re   = C->rmap->rend;
7348       const PetscInt *ii   = mm->i;
7349       for (i = 0; i < mr; i++) {
7350         const PetscInt gr = rmap[i];
7351         const PetscInt nz = ii[i + 1] - ii[i];
7352         if (gr < rs || gr >= re) ncoo_o += nz; /* this row is offproc */
7353         else ncoo_oown += nz;                  /* this row is local */
7354       }
7355     } else ncoo_d += mm->nz;
7356   }
7357 
7358   /*
7359     ncoo: total number of nonzeros (including those inserted by remote procs) belonging to this proc
7360 
7361     ncoo = ncoo_d + ncoo_oown + ncoo2, which ncoo2 is number of nonzeros inserted to me by other procs.
7362 
7363     off[0] points to a big index array, which is shared by off[1,2,...]. Similarly, for own[0].
7364 
7365     off[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert to others
7366     own[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert locally
7367     so, off[p+1]-off[p] is the number of nonzeros that mp[p] will send to others.
7368 
7369     coo_i/j/v[]: [ncoo] row/col/val of nonzeros belonging to this proc.
7370     Ex. coo_i[]: the beginning part (of size ncoo_d + ncoo_oown) stores i of local nonzeros, and the remaing part stores i of nonzeros I will receive.
7371   */
7372   PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->off)); /* +1 to make a csr-like data structure */
7373   PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->own));
7374 
7375   /* gather (i,j) of nonzeros inserted by remote procs */
7376   if (hasoffproc) {
7377     PetscSF  msf;
7378     PetscInt ncoo2, *coo_i2, *coo_j2;
7379 
7380     PetscCall(PetscMalloc1(ncoo_o, &mmdata->off[0]));
7381     PetscCall(PetscMalloc1(ncoo_oown, &mmdata->own[0]));
7382     PetscCall(PetscMalloc2(ncoo_o, &coo_i, ncoo_o, &coo_j)); /* to collect (i,j) of entries to be sent to others */
7383 
7384     for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) {
7385       Mat_SeqAIJ *mm     = (Mat_SeqAIJ *)mp[cp]->data;
7386       PetscInt   *idxoff = mmdata->off[cp];
7387       PetscInt   *idxown = mmdata->own[cp];
7388       if (!mptmp[cp] && rmapt[cp] == 2) { /* row map is sparse */
7389         const PetscInt *rmap = rmapa[cp];
7390         const PetscInt *cmap = cmapa[cp];
7391         const PetscInt *ii   = mm->i;
7392         PetscInt       *coi  = coo_i + ncoo_o;
7393         PetscInt       *coj  = coo_j + ncoo_o;
7394         const PetscInt  mr   = mp[cp]->rmap->n;
7395         const PetscInt  rs   = C->rmap->rstart;
7396         const PetscInt  re   = C->rmap->rend;
7397         const PetscInt  cs   = C->cmap->rstart;
7398         for (i = 0; i < mr; i++) {
7399           const PetscInt *jj = mm->j + ii[i];
7400           const PetscInt  gr = rmap[i];
7401           const PetscInt  nz = ii[i + 1] - ii[i];
7402           if (gr < rs || gr >= re) { /* this is an offproc row */
7403             for (j = ii[i]; j < ii[i + 1]; j++) {
7404               *coi++    = gr;
7405               *idxoff++ = j;
7406             }
7407             if (!cmapt[cp]) { /* already global */
7408               for (j = 0; j < nz; j++) *coj++ = jj[j];
7409             } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */
7410               for (j = 0; j < nz; j++) *coj++ = jj[j] + cs;
7411             } else { /* offdiag */
7412               for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]];
7413             }
7414             ncoo_o += nz;
7415           } else { /* this is a local row */
7416             for (j = ii[i]; j < ii[i + 1]; j++) *idxown++ = j;
7417           }
7418         }
7419       }
7420       mmdata->off[cp + 1] = idxoff;
7421       mmdata->own[cp + 1] = idxown;
7422     }
7423 
7424     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf));
7425     PetscCall(PetscSFSetGraphLayout(mmdata->sf, C->rmap, ncoo_o /*nleaves*/, NULL /*ilocal*/, PETSC_OWN_POINTER, coo_i));
7426     PetscCall(PetscSFGetMultiSF(mmdata->sf, &msf));
7427     PetscCall(PetscSFGetGraph(msf, &ncoo2 /*nroots*/, NULL, NULL, NULL));
7428     ncoo = ncoo_d + ncoo_oown + ncoo2;
7429     PetscCall(PetscMalloc2(ncoo, &coo_i2, ncoo, &coo_j2));
7430     PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown)); /* put (i,j) of remote nonzeros at back */
7431     PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown));
7432     PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown));
7433     PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown));
7434     PetscCall(PetscFree2(coo_i, coo_j));
7435     /* allocate MPI send buffer to collect nonzero values to be sent to remote procs */
7436     PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo_o * sizeof(PetscScalar), (void **)&mmdata->coo_w));
7437     coo_i = coo_i2;
7438     coo_j = coo_j2;
7439   } else { /* no offproc values insertion */
7440     ncoo = ncoo_d;
7441     PetscCall(PetscMalloc2(ncoo, &coo_i, ncoo, &coo_j));
7442 
7443     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf));
7444     PetscCall(PetscSFSetGraph(mmdata->sf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER));
7445     PetscCall(PetscSFSetUp(mmdata->sf));
7446   }
7447   mmdata->hasoffproc = hasoffproc;
7448 
7449   /* gather (i,j) of nonzeros inserted locally */
7450   for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) {
7451     Mat_SeqAIJ     *mm   = (Mat_SeqAIJ *)mp[cp]->data;
7452     PetscInt       *coi  = coo_i + ncoo_d;
7453     PetscInt       *coj  = coo_j + ncoo_d;
7454     const PetscInt *jj   = mm->j;
7455     const PetscInt *ii   = mm->i;
7456     const PetscInt *cmap = cmapa[cp];
7457     const PetscInt *rmap = rmapa[cp];
7458     const PetscInt  mr   = mp[cp]->rmap->n;
7459     const PetscInt  rs   = C->rmap->rstart;
7460     const PetscInt  re   = C->rmap->rend;
7461     const PetscInt  cs   = C->cmap->rstart;
7462 
7463     if (mptmp[cp]) continue;
7464     if (rmapt[cp] == 1) { /* consecutive rows */
7465       /* fill coo_i */
7466       for (i = 0; i < mr; i++) {
7467         const PetscInt gr = i + rs;
7468         for (j = ii[i]; j < ii[i + 1]; j++) coi[j] = gr;
7469       }
7470       /* fill coo_j */
7471       if (!cmapt[cp]) { /* type-0, already global */
7472         PetscCall(PetscArraycpy(coj, jj, mm->nz));
7473       } else if (cmapt[cp] == 1) {                        /* type-1, local to global for consecutive columns of C */
7474         for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs; /* lid + col start */
7475       } else {                                            /* type-2, local to global for sparse columns */
7476         for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]];
7477       }
7478       ncoo_d += mm->nz;
7479     } else if (rmapt[cp] == 2) { /* sparse rows */
7480       for (i = 0; i < mr; i++) {
7481         const PetscInt *jj = mm->j + ii[i];
7482         const PetscInt  gr = rmap[i];
7483         const PetscInt  nz = ii[i + 1] - ii[i];
7484         if (gr >= rs && gr < re) { /* local rows */
7485           for (j = ii[i]; j < ii[i + 1]; j++) *coi++ = gr;
7486           if (!cmapt[cp]) { /* type-0, already global */
7487             for (j = 0; j < nz; j++) *coj++ = jj[j];
7488           } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */
7489             for (j = 0; j < nz; j++) *coj++ = jj[j] + cs;
7490           } else { /* type-2, local to global for sparse columns */
7491             for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]];
7492           }
7493           ncoo_d += nz;
7494         }
7495       }
7496     }
7497   }
7498   if (glob) PetscCall(ISRestoreIndices(glob, &globidx));
7499   PetscCall(ISDestroy(&glob));
7500   if (P_oth_l2g) PetscCall(ISLocalToGlobalMappingRestoreIndices(P_oth_l2g, &P_oth_idx));
7501   PetscCall(ISLocalToGlobalMappingDestroy(&P_oth_l2g));
7502   /* allocate an array to store all nonzeros (inserted locally or remotely) belonging to this proc */
7503   PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo * sizeof(PetscScalar), (void **)&mmdata->coo_v));
7504 
7505   /* preallocate with COO data */
7506   PetscCall(MatSetPreallocationCOO(C, ncoo, coo_i, coo_j));
7507   PetscCall(PetscFree2(coo_i, coo_j));
7508   PetscFunctionReturn(0);
7509 }
7510 
7511 PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat)
7512 {
7513   Mat_Product *product = mat->product;
7514 #if defined(PETSC_HAVE_DEVICE)
7515   PetscBool match  = PETSC_FALSE;
7516   PetscBool usecpu = PETSC_FALSE;
7517 #else
7518   PetscBool match = PETSC_TRUE;
7519 #endif
7520 
7521   PetscFunctionBegin;
7522   MatCheckProduct(mat, 1);
7523 #if defined(PETSC_HAVE_DEVICE)
7524   if (!product->A->boundtocpu && !product->B->boundtocpu) PetscCall(PetscObjectTypeCompare((PetscObject)product->B, ((PetscObject)product->A)->type_name, &match));
7525   if (match) { /* we can always fallback to the CPU if requested */
7526     switch (product->type) {
7527     case MATPRODUCT_AB:
7528       if (product->api_user) {
7529         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatMatMult", "Mat");
7530         PetscCall(PetscOptionsBool("-matmatmult_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL));
7531         PetscOptionsEnd();
7532       } else {
7533         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AB", "Mat");
7534         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL));
7535         PetscOptionsEnd();
7536       }
7537       break;
7538     case MATPRODUCT_AtB:
7539       if (product->api_user) {
7540         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatTransposeMatMult", "Mat");
7541         PetscCall(PetscOptionsBool("-mattransposematmult_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL));
7542         PetscOptionsEnd();
7543       } else {
7544         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AtB", "Mat");
7545         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL));
7546         PetscOptionsEnd();
7547       }
7548       break;
7549     case MATPRODUCT_PtAP:
7550       if (product->api_user) {
7551         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatPtAP", "Mat");
7552         PetscCall(PetscOptionsBool("-matptap_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL));
7553         PetscOptionsEnd();
7554       } else {
7555         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_PtAP", "Mat");
7556         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL));
7557         PetscOptionsEnd();
7558       }
7559       break;
7560     default:
7561       break;
7562     }
7563     match = (PetscBool)!usecpu;
7564   }
7565 #endif
7566   if (match) {
7567     switch (product->type) {
7568     case MATPRODUCT_AB:
7569     case MATPRODUCT_AtB:
7570     case MATPRODUCT_PtAP:
7571       mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND;
7572       break;
7573     default:
7574       break;
7575     }
7576   }
7577   /* fallback to MPIAIJ ops */
7578   if (!mat->ops->productsymbolic) PetscCall(MatProductSetFromOptions_MPIAIJ(mat));
7579   PetscFunctionReturn(0);
7580 }
7581 
7582 /*
7583    Produces a set of block column indices of the matrix row, one for each block represented in the original row
7584 
7585    n - the number of block indices in cc[]
7586    cc - the block indices (must be large enough to contain the indices)
7587 */
7588 static inline PetscErrorCode MatCollapseRow(Mat Amat, PetscInt row, PetscInt bs, PetscInt *n, PetscInt *cc)
7589 {
7590   PetscInt        cnt = -1, nidx, j;
7591   const PetscInt *idx;
7592 
7593   PetscFunctionBegin;
7594   PetscCall(MatGetRow(Amat, row, &nidx, &idx, NULL));
7595   if (nidx) {
7596     cnt     = 0;
7597     cc[cnt] = idx[0] / bs;
7598     for (j = 1; j < nidx; j++) {
7599       if (cc[cnt] < idx[j] / bs) cc[++cnt] = idx[j] / bs;
7600     }
7601   }
7602   PetscCall(MatRestoreRow(Amat, row, &nidx, &idx, NULL));
7603   *n = cnt + 1;
7604   PetscFunctionReturn(0);
7605 }
7606 
7607 /*
7608     Produces a set of block column indices of the matrix block row, one for each block represented in the original set of rows
7609 
7610     ncollapsed - the number of block indices
7611     collapsed - the block indices (must be large enough to contain the indices)
7612 */
7613 static inline PetscErrorCode MatCollapseRows(Mat Amat, PetscInt start, PetscInt bs, PetscInt *w0, PetscInt *w1, PetscInt *w2, PetscInt *ncollapsed, PetscInt **collapsed)
7614 {
7615   PetscInt i, nprev, *cprev = w0, ncur = 0, *ccur = w1, *merged = w2, *cprevtmp;
7616 
7617   PetscFunctionBegin;
7618   PetscCall(MatCollapseRow(Amat, start, bs, &nprev, cprev));
7619   for (i = start + 1; i < start + bs; i++) {
7620     PetscCall(MatCollapseRow(Amat, i, bs, &ncur, ccur));
7621     PetscCall(PetscMergeIntArray(nprev, cprev, ncur, ccur, &nprev, &merged));
7622     cprevtmp = cprev;
7623     cprev    = merged;
7624     merged   = cprevtmp;
7625   }
7626   *ncollapsed = nprev;
7627   if (collapsed) *collapsed = cprev;
7628   PetscFunctionReturn(0);
7629 }
7630 
7631 /*
7632    This will eventually be folded into MatCreateGraph_AIJ() for optimal performance
7633 */
7634 static PetscErrorCode MatFilter_AIJ(Mat Gmat, PetscReal vfilter, Mat *filteredG)
7635 {
7636   PetscInt           Istart, Iend, ncols, nnz0, nnz1, NN, MM, nloc;
7637   Mat                tGmat;
7638   MPI_Comm           comm;
7639   const PetscScalar *vals;
7640   const PetscInt    *idx;
7641   PetscInt          *d_nnz, *o_nnz, kk, *garray = NULL, *AJ, maxcols = 0;
7642   MatScalar         *AA; // this is checked in graph
7643   PetscBool          isseqaij;
7644   Mat                a, b, c;
7645   MatType            jtype;
7646 
7647   PetscFunctionBegin;
7648   PetscCall(PetscObjectGetComm((PetscObject)Gmat, &comm));
7649   PetscCall(PetscObjectBaseTypeCompare((PetscObject)Gmat, MATSEQAIJ, &isseqaij));
7650   PetscCall(MatGetType(Gmat, &jtype));
7651   PetscCall(MatCreate(comm, &tGmat));
7652   PetscCall(MatSetType(tGmat, jtype));
7653 
7654   /* TODO GPU: this can be called when filter = 0 -> Probably provide MatAIJThresholdCompress that compresses the entries below a threshold?
7655                Also, if the matrix is symmetric, can we skip this
7656                operation? It can be very expensive on large matrices. */
7657 
7658   // global sizes
7659   PetscCall(MatGetSize(Gmat, &MM, &NN));
7660   PetscCall(MatGetOwnershipRange(Gmat, &Istart, &Iend));
7661   nloc = Iend - Istart;
7662   PetscCall(PetscMalloc2(nloc, &d_nnz, nloc, &o_nnz));
7663   if (isseqaij) {
7664     a = Gmat;
7665     b = NULL;
7666   } else {
7667     Mat_MPIAIJ *d = (Mat_MPIAIJ *)Gmat->data;
7668     a             = d->A;
7669     b             = d->B;
7670     garray        = d->garray;
7671   }
7672   /* Determine upper bound on non-zeros needed in new filtered matrix */
7673   for (PetscInt row = 0; row < nloc; row++) {
7674     PetscCall(MatGetRow(a, row, &ncols, NULL, NULL));
7675     d_nnz[row] = ncols;
7676     if (ncols > maxcols) maxcols = ncols;
7677     PetscCall(MatRestoreRow(a, row, &ncols, NULL, NULL));
7678   }
7679   if (b) {
7680     for (PetscInt row = 0; row < nloc; row++) {
7681       PetscCall(MatGetRow(b, row, &ncols, NULL, NULL));
7682       o_nnz[row] = ncols;
7683       if (ncols > maxcols) maxcols = ncols;
7684       PetscCall(MatRestoreRow(b, row, &ncols, NULL, NULL));
7685     }
7686   }
7687   PetscCall(MatSetSizes(tGmat, nloc, nloc, MM, MM));
7688   PetscCall(MatSetBlockSizes(tGmat, 1, 1));
7689   PetscCall(MatSeqAIJSetPreallocation(tGmat, 0, d_nnz));
7690   PetscCall(MatMPIAIJSetPreallocation(tGmat, 0, d_nnz, 0, o_nnz));
7691   PetscCall(MatSetOption(tGmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
7692   PetscCall(PetscFree2(d_nnz, o_nnz));
7693   //
7694   PetscCall(PetscMalloc2(maxcols, &AA, maxcols, &AJ));
7695   nnz0 = nnz1 = 0;
7696   for (c = a, kk = 0; c && kk < 2; c = b, kk++) {
7697     for (PetscInt row = 0, grow = Istart, ncol_row, jj; row < nloc; row++, grow++) {
7698       PetscCall(MatGetRow(c, row, &ncols, &idx, &vals));
7699       for (ncol_row = jj = 0; jj < ncols; jj++, nnz0++) {
7700         PetscScalar sv = PetscAbs(PetscRealPart(vals[jj]));
7701         if (PetscRealPart(sv) > vfilter) {
7702           nnz1++;
7703           PetscInt cid = idx[jj] + Istart; //diag
7704           if (c != a) cid = garray[idx[jj]];
7705           AA[ncol_row] = vals[jj];
7706           AJ[ncol_row] = cid;
7707           ncol_row++;
7708         }
7709       }
7710       PetscCall(MatRestoreRow(c, row, &ncols, &idx, &vals));
7711       PetscCall(MatSetValues(tGmat, 1, &grow, ncol_row, AJ, AA, INSERT_VALUES));
7712     }
7713   }
7714   PetscCall(PetscFree2(AA, AJ));
7715   PetscCall(MatAssemblyBegin(tGmat, MAT_FINAL_ASSEMBLY));
7716   PetscCall(MatAssemblyEnd(tGmat, MAT_FINAL_ASSEMBLY));
7717   PetscCall(MatPropagateSymmetryOptions(Gmat, tGmat)); /* Normal Mat options are not relevant ? */
7718 
7719   PetscCall(PetscInfo(tGmat, "\t %g%% nnz after filtering, with threshold %g, %g nnz ave. (N=%" PetscInt_FMT ", max row size %d)\n", (!nnz0) ? 1. : 100. * (double)nnz1 / (double)nnz0, (double)vfilter, (!nloc) ? 1. : (double)nnz0 / (double)nloc, MM, (int)maxcols));
7720 
7721   *filteredG = tGmat;
7722   PetscCall(MatViewFromOptions(tGmat, NULL, "-mat_filter_graph_view"));
7723   PetscFunctionReturn(0);
7724 }
7725 
7726 /*
7727  MatCreateGraph_Simple_AIJ - create simple scalar matrix (graph) from potentially blocked matrix
7728 
7729  Input Parameter:
7730  . Amat - matrix
7731  - symmetrize - make the result symmetric
7732  + scale - scale with diagonal
7733 
7734  Output Parameter:
7735  . a_Gmat - output scalar graph >= 0
7736 
7737  */
7738 PETSC_INTERN PetscErrorCode MatCreateGraph_Simple_AIJ(Mat Amat, PetscBool symmetrize, PetscBool scale, PetscReal filter, Mat *a_Gmat)
7739 {
7740   PetscInt  Istart, Iend, Ii, jj, kk, ncols, nloc, NN, MM, bs;
7741   MPI_Comm  comm;
7742   Mat       Gmat;
7743   PetscBool ismpiaij, isseqaij;
7744   Mat       a, b, c;
7745   MatType   jtype;
7746 
7747   PetscFunctionBegin;
7748   PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm));
7749   PetscCall(MatGetOwnershipRange(Amat, &Istart, &Iend));
7750   PetscCall(MatGetSize(Amat, &MM, &NN));
7751   PetscCall(MatGetBlockSize(Amat, &bs));
7752   nloc = (Iend - Istart) / bs;
7753 
7754   PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATSEQAIJ, &isseqaij));
7755   PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATMPIAIJ, &ismpiaij));
7756   PetscCheck(isseqaij || ismpiaij, comm, PETSC_ERR_USER, "Require (MPI)AIJ matrix type");
7757 
7758   /* TODO GPU: these calls are potentially expensive if matrices are large and we want to use the GPU */
7759   /* A solution consists in providing a new API, MatAIJGetCollapsedAIJ, and each class can provide a fast
7760      implementation */
7761   if (bs > 1) {
7762     PetscCall(MatGetType(Amat, &jtype));
7763     PetscCall(MatCreate(comm, &Gmat));
7764     PetscCall(MatSetType(Gmat, jtype));
7765     PetscCall(MatSetSizes(Gmat, nloc, nloc, PETSC_DETERMINE, PETSC_DETERMINE));
7766     PetscCall(MatSetBlockSizes(Gmat, 1, 1));
7767     if (isseqaij || ((Mat_MPIAIJ *)Amat->data)->garray) {
7768       PetscInt  *d_nnz, *o_nnz;
7769       MatScalar *aa, val, AA[4096];
7770       PetscInt  *aj, *ai, AJ[4096], nc;
7771       if (isseqaij) {
7772         a = Amat;
7773         b = NULL;
7774       } else {
7775         Mat_MPIAIJ *d = (Mat_MPIAIJ *)Amat->data;
7776         a             = d->A;
7777         b             = d->B;
7778       }
7779       PetscCall(PetscInfo(Amat, "New bs>1 Graph. nloc=%" PetscInt_FMT "\n", nloc));
7780       PetscCall(PetscMalloc2(nloc, &d_nnz, isseqaij ? 0 : nloc, &o_nnz));
7781       for (c = a, kk = 0; c && kk < 2; c = b, kk++) {
7782         PetscInt       *nnz = (c == a) ? d_nnz : o_nnz, nmax = 0;
7783         const PetscInt *cols;
7784         for (PetscInt brow = 0, jj, ok = 1, j0; brow < nloc * bs; brow += bs) { // block rows
7785           PetscCall(MatGetRow(c, brow, &jj, &cols, NULL));
7786           nnz[brow / bs] = jj / bs;
7787           if (jj % bs) ok = 0;
7788           if (cols) j0 = cols[0];
7789           else j0 = -1;
7790           PetscCall(MatRestoreRow(c, brow, &jj, &cols, NULL));
7791           if (nnz[brow / bs] > nmax) nmax = nnz[brow / bs];
7792           for (PetscInt ii = 1; ii < bs && nnz[brow / bs]; ii++) { // check for non-dense blocks
7793             PetscCall(MatGetRow(c, brow + ii, &jj, &cols, NULL));
7794             if (jj % bs) ok = 0;
7795             if ((cols && j0 != cols[0]) || (!cols && j0 != -1)) ok = 0;
7796             if (nnz[brow / bs] != jj / bs) ok = 0;
7797             PetscCall(MatRestoreRow(c, brow + ii, &jj, &cols, NULL));
7798           }
7799           if (!ok) {
7800             PetscCall(PetscFree2(d_nnz, o_nnz));
7801             goto old_bs;
7802           }
7803         }
7804         PetscCheck(nmax < 4096, PETSC_COMM_SELF, PETSC_ERR_USER, "Buffer %" PetscInt_FMT " too small 4096.", nmax);
7805       }
7806       PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz));
7807       PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz));
7808       PetscCall(PetscFree2(d_nnz, o_nnz));
7809       // diag
7810       for (PetscInt brow = 0, n, grow; brow < nloc * bs; brow += bs) { // block rows
7811         Mat_SeqAIJ *aseq = (Mat_SeqAIJ *)a->data;
7812         ai               = aseq->i;
7813         n                = ai[brow + 1] - ai[brow];
7814         aj               = aseq->j + ai[brow];
7815         for (int k = 0; k < n; k += bs) {        // block columns
7816           AJ[k / bs] = aj[k] / bs + Istart / bs; // diag starts at (Istart,Istart)
7817           val        = 0;
7818           for (int ii = 0; ii < bs; ii++) { // rows in block
7819             aa = aseq->a + ai[brow + ii] + k;
7820             for (int jj = 0; jj < bs; jj++) {         // columns in block
7821               val += PetscAbs(PetscRealPart(aa[jj])); // a sort of norm
7822             }
7823           }
7824           AA[k / bs] = val;
7825         }
7826         grow = Istart / bs + brow / bs;
7827         PetscCall(MatSetValues(Gmat, 1, &grow, n / bs, AJ, AA, INSERT_VALUES));
7828       }
7829       // off-diag
7830       if (ismpiaij) {
7831         Mat_MPIAIJ        *aij = (Mat_MPIAIJ *)Amat->data;
7832         const PetscScalar *vals;
7833         const PetscInt    *cols, *garray = aij->garray;
7834         PetscCheck(garray, PETSC_COMM_SELF, PETSC_ERR_USER, "No garray ?");
7835         for (PetscInt brow = 0, grow; brow < nloc * bs; brow += bs) { // block rows
7836           PetscCall(MatGetRow(b, brow, &ncols, &cols, NULL));
7837           for (int k = 0, cidx = 0; k < ncols; k += bs, cidx++) {
7838             AA[k / bs] = 0;
7839             AJ[cidx]   = garray[cols[k]] / bs;
7840           }
7841           nc = ncols / bs;
7842           PetscCall(MatRestoreRow(b, brow, &ncols, &cols, NULL));
7843           for (int ii = 0; ii < bs; ii++) { // rows in block
7844             PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals));
7845             for (int k = 0; k < ncols; k += bs) {
7846               for (int jj = 0; jj < bs; jj++) { // cols in block
7847                 AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj]));
7848               }
7849             }
7850             PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals));
7851           }
7852           grow = Istart / bs + brow / bs;
7853           PetscCall(MatSetValues(Gmat, 1, &grow, nc, AJ, AA, INSERT_VALUES));
7854         }
7855       }
7856       PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY));
7857       PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY));
7858     } else {
7859       const PetscScalar *vals;
7860       const PetscInt    *idx;
7861       PetscInt          *d_nnz, *o_nnz, *w0, *w1, *w2;
7862     old_bs:
7863       /*
7864        Determine the preallocation needed for the scalar matrix derived from the vector matrix.
7865        */
7866       PetscCall(PetscInfo(Amat, "OLD bs>1 CreateGraph\n"));
7867       PetscCall(PetscMalloc2(nloc, &d_nnz, isseqaij ? 0 : nloc, &o_nnz));
7868       if (isseqaij) {
7869         PetscInt max_d_nnz;
7870         /*
7871          Determine exact preallocation count for (sequential) scalar matrix
7872          */
7873         PetscCall(MatSeqAIJGetMaxRowNonzeros(Amat, &max_d_nnz));
7874         max_d_nnz = PetscMin(nloc, bs * max_d_nnz);
7875         PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2));
7876         for (Ii = 0, jj = 0; Ii < Iend; Ii += bs, jj++) PetscCall(MatCollapseRows(Amat, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL));
7877         PetscCall(PetscFree3(w0, w1, w2));
7878       } else if (ismpiaij) {
7879         Mat             Daij, Oaij;
7880         const PetscInt *garray;
7881         PetscInt        max_d_nnz;
7882         PetscCall(MatMPIAIJGetSeqAIJ(Amat, &Daij, &Oaij, &garray));
7883         /*
7884          Determine exact preallocation count for diagonal block portion of scalar matrix
7885          */
7886         PetscCall(MatSeqAIJGetMaxRowNonzeros(Daij, &max_d_nnz));
7887         max_d_nnz = PetscMin(nloc, bs * max_d_nnz);
7888         PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2));
7889         for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) PetscCall(MatCollapseRows(Daij, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL));
7890         PetscCall(PetscFree3(w0, w1, w2));
7891         /*
7892          Over estimate (usually grossly over), preallocation count for off-diagonal portion of scalar matrix
7893          */
7894         for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) {
7895           o_nnz[jj] = 0;
7896           for (kk = 0; kk < bs; kk++) { /* rows that get collapsed to a single row */
7897             PetscCall(MatGetRow(Oaij, Ii + kk, &ncols, NULL, NULL));
7898             o_nnz[jj] += ncols;
7899             PetscCall(MatRestoreRow(Oaij, Ii + kk, &ncols, NULL, NULL));
7900           }
7901           if (o_nnz[jj] > (NN / bs - nloc)) o_nnz[jj] = NN / bs - nloc;
7902         }
7903       } else SETERRQ(comm, PETSC_ERR_USER, "Require AIJ matrix type");
7904       /* get scalar copy (norms) of matrix */
7905       PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz));
7906       PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz));
7907       PetscCall(PetscFree2(d_nnz, o_nnz));
7908       for (Ii = Istart; Ii < Iend; Ii++) {
7909         PetscInt dest_row = Ii / bs;
7910         PetscCall(MatGetRow(Amat, Ii, &ncols, &idx, &vals));
7911         for (jj = 0; jj < ncols; jj++) {
7912           PetscInt    dest_col = idx[jj] / bs;
7913           PetscScalar sv       = PetscAbs(PetscRealPart(vals[jj]));
7914           PetscCall(MatSetValues(Gmat, 1, &dest_row, 1, &dest_col, &sv, ADD_VALUES));
7915         }
7916         PetscCall(MatRestoreRow(Amat, Ii, &ncols, &idx, &vals));
7917       }
7918       PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY));
7919       PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY));
7920     }
7921   } else {
7922     if (symmetrize || filter >= 0 || scale) PetscCall(MatDuplicate(Amat, MAT_COPY_VALUES, &Gmat));
7923     else {
7924       Gmat = Amat;
7925       PetscCall(PetscObjectReference((PetscObject)Gmat));
7926     }
7927     if (isseqaij) {
7928       a = Gmat;
7929       b = NULL;
7930     } else {
7931       Mat_MPIAIJ *d = (Mat_MPIAIJ *)Gmat->data;
7932       a             = d->A;
7933       b             = d->B;
7934     }
7935     if (filter >= 0 || scale) {
7936       /* take absolute value of each entry */
7937       for (c = a, kk = 0; c && kk < 2; c = b, kk++) {
7938         MatInfo      info;
7939         PetscScalar *avals;
7940         PetscCall(MatGetInfo(c, MAT_LOCAL, &info));
7941         PetscCall(MatSeqAIJGetArray(c, &avals));
7942         for (int jj = 0; jj < info.nz_used; jj++) avals[jj] = PetscAbsScalar(avals[jj]);
7943         PetscCall(MatSeqAIJRestoreArray(c, &avals));
7944       }
7945     }
7946   }
7947   if (symmetrize) {
7948     PetscBool isset, issym;
7949     PetscCall(MatIsSymmetricKnown(Amat, &isset, &issym));
7950     if (!isset || !issym) {
7951       Mat matTrans;
7952       PetscCall(MatTranspose(Gmat, MAT_INITIAL_MATRIX, &matTrans));
7953       PetscCall(MatAXPY(Gmat, 1.0, matTrans, Gmat->structurally_symmetric == PETSC_BOOL3_TRUE ? SAME_NONZERO_PATTERN : DIFFERENT_NONZERO_PATTERN));
7954       PetscCall(MatDestroy(&matTrans));
7955     }
7956     PetscCall(MatSetOption(Gmat, MAT_SYMMETRIC, PETSC_TRUE));
7957   } else if (Amat != Gmat) PetscCall(MatPropagateSymmetryOptions(Amat, Gmat));
7958   if (scale) {
7959     /* scale c for all diagonal values = 1 or -1 */
7960     Vec diag;
7961     PetscCall(MatCreateVecs(Gmat, &diag, NULL));
7962     PetscCall(MatGetDiagonal(Gmat, diag));
7963     PetscCall(VecReciprocal(diag));
7964     PetscCall(VecSqrtAbs(diag));
7965     PetscCall(MatDiagonalScale(Gmat, diag, diag));
7966     PetscCall(VecDestroy(&diag));
7967   }
7968   PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_graph_view"));
7969 
7970   if (filter >= 0) {
7971     Mat Fmat = NULL; /* some silly compiler needs this */
7972 
7973     PetscCall(MatFilter_AIJ(Gmat, filter, &Fmat));
7974     PetscCall(MatDestroy(&Gmat));
7975     Gmat = Fmat;
7976   }
7977   *a_Gmat = Gmat;
7978   PetscFunctionReturn(0);
7979 }
7980 
7981 /*
7982     Special version for direct calls from Fortran
7983 */
7984 #include <petsc/private/fortranimpl.h>
7985 
7986 /* Change these macros so can be used in void function */
7987 /* Identical to PetscCallVoid, except it assigns to *_ierr */
7988 #undef PetscCall
7989 #define PetscCall(...) \
7990   do { \
7991     PetscErrorCode ierr_msv_mpiaij = __VA_ARGS__; \
7992     if (PetscUnlikely(ierr_msv_mpiaij)) { \
7993       *_ierr = PetscError(PETSC_COMM_SELF, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr_msv_mpiaij, PETSC_ERROR_REPEAT, " "); \
7994       return; \
7995     } \
7996   } while (0)
7997 
7998 #undef SETERRQ
7999 #define SETERRQ(comm, ierr, ...) \
8000   do { \
8001     *_ierr = PetscError(comm, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr, PETSC_ERROR_INITIAL, __VA_ARGS__); \
8002     return; \
8003   } while (0)
8004 
8005 #if defined(PETSC_HAVE_FORTRAN_CAPS)
8006   #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
8007 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
8008   #define matsetvaluesmpiaij_ matsetvaluesmpiaij
8009 #else
8010 #endif
8011 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat, PetscInt *mm, const PetscInt im[], PetscInt *mn, const PetscInt in[], const PetscScalar v[], InsertMode *maddv, PetscErrorCode *_ierr)
8012 {
8013   Mat         mat = *mmat;
8014   PetscInt    m = *mm, n = *mn;
8015   InsertMode  addv = *maddv;
8016   Mat_MPIAIJ *aij  = (Mat_MPIAIJ *)mat->data;
8017   PetscScalar value;
8018 
8019   MatCheckPreallocated(mat, 1);
8020   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
8021   else PetscCheck(mat->insertmode == addv, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot mix add values and insert values");
8022   {
8023     PetscInt  i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend;
8024     PetscInt  cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col;
8025     PetscBool roworiented = aij->roworiented;
8026 
8027     /* Some Variables required in the macro */
8028     Mat         A     = aij->A;
8029     Mat_SeqAIJ *a     = (Mat_SeqAIJ *)A->data;
8030     PetscInt   *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j;
8031     MatScalar  *aa;
8032     PetscBool   ignorezeroentries = (((a->ignorezeroentries) && (addv == ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
8033     Mat         B                 = aij->B;
8034     Mat_SeqAIJ *b                 = (Mat_SeqAIJ *)B->data;
8035     PetscInt   *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n;
8036     MatScalar  *ba;
8037     /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we
8038      * cannot use "#if defined" inside a macro. */
8039     PETSC_UNUSED PetscBool inserted = PETSC_FALSE;
8040 
8041     PetscInt  *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2;
8042     PetscInt   nonew = a->nonew;
8043     MatScalar *ap1, *ap2;
8044 
8045     PetscFunctionBegin;
8046     PetscCall(MatSeqAIJGetArray(A, &aa));
8047     PetscCall(MatSeqAIJGetArray(B, &ba));
8048     for (i = 0; i < m; i++) {
8049       if (im[i] < 0) continue;
8050       PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
8051       if (im[i] >= rstart && im[i] < rend) {
8052         row      = im[i] - rstart;
8053         lastcol1 = -1;
8054         rp1      = aj + ai[row];
8055         ap1      = aa + ai[row];
8056         rmax1    = aimax[row];
8057         nrow1    = ailen[row];
8058         low1     = 0;
8059         high1    = nrow1;
8060         lastcol2 = -1;
8061         rp2      = bj + bi[row];
8062         ap2      = ba + bi[row];
8063         rmax2    = bimax[row];
8064         nrow2    = bilen[row];
8065         low2     = 0;
8066         high2    = nrow2;
8067 
8068         for (j = 0; j < n; j++) {
8069           if (roworiented) value = v[i * n + j];
8070           else value = v[i + j * m];
8071           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
8072           if (in[j] >= cstart && in[j] < cend) {
8073             col = in[j] - cstart;
8074             MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]);
8075           } else if (in[j] < 0) continue;
8076           else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) {
8077             /* extra brace on SETERRQ() is required for --with-errorchecking=0 - due to the next 'else' clause */
8078             SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1);
8079           } else {
8080             if (mat->was_assembled) {
8081               if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
8082 #if defined(PETSC_USE_CTABLE)
8083               PetscCall(PetscTableFind(aij->colmap, in[j] + 1, &col));
8084               col--;
8085 #else
8086               col = aij->colmap[in[j]] - 1;
8087 #endif
8088               if (col < 0 && !((Mat_SeqAIJ *)(aij->A->data))->nonew) {
8089                 PetscCall(MatDisAssemble_MPIAIJ(mat));
8090                 col = in[j];
8091                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
8092                 B        = aij->B;
8093                 b        = (Mat_SeqAIJ *)B->data;
8094                 bimax    = b->imax;
8095                 bi       = b->i;
8096                 bilen    = b->ilen;
8097                 bj       = b->j;
8098                 rp2      = bj + bi[row];
8099                 ap2      = ba + bi[row];
8100                 rmax2    = bimax[row];
8101                 nrow2    = bilen[row];
8102                 low2     = 0;
8103                 high2    = nrow2;
8104                 bm       = aij->B->rmap->n;
8105                 ba       = b->a;
8106                 inserted = PETSC_FALSE;
8107               }
8108             } else col = in[j];
8109             MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]);
8110           }
8111         }
8112       } else if (!aij->donotstash) {
8113         if (roworiented) {
8114           PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
8115         } else {
8116           PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
8117         }
8118       }
8119     }
8120     PetscCall(MatSeqAIJRestoreArray(A, &aa));
8121     PetscCall(MatSeqAIJRestoreArray(B, &ba));
8122   }
8123   PetscFunctionReturnVoid();
8124 }
8125 
8126 /* Undefining these here since they were redefined from their original definition above! No
8127  * other PETSc functions should be defined past this point, as it is impossible to recover the
8128  * original definitions */
8129 #undef PetscCall
8130 #undef SETERRQ
8131