xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision e874ec00d637a86419bb2cc912cf88b33e5547ef)
1 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/
2 #include <petsc/private/vecimpl.h>
3 #include <petsc/private/sfimpl.h>
4 #include <petsc/private/isimpl.h>
5 #include <petscblaslapack.h>
6 #include <petscsf.h>
7 #include <petsc/private/hashmapi.h>
8 
9 PetscErrorCode MatGetRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done)
10 {
11   Mat B;
12 
13   PetscFunctionBegin;
14   PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &B));
15   PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject)B));
16   PetscCall(MatGetRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done));
17   PetscCall(MatDestroy(&B));
18   PetscFunctionReturn(0);
19 }
20 
21 PetscErrorCode MatRestoreRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done)
22 {
23   Mat B;
24 
25   PetscFunctionBegin;
26   PetscCall(PetscObjectQuery((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject *)&B));
27   PetscCall(MatRestoreRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done));
28   PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", NULL));
29   PetscFunctionReturn(0);
30 }
31 
32 /*MC
33    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
34 
35    This matrix type is identical to` MATSEQAIJ` when constructed with a single process communicator,
36    and `MATMPIAIJ` otherwise.  As a result, for single process communicators,
37   `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported
38   for communicators controlling multiple processes.  It is recommended that you call both of
39   the above preallocation routines for simplicity.
40 
41    Options Database Keys:
42 . -mat_type aij - sets the matrix type to `MATAIJ` during a call to `MatSetFromOptions()`
43 
44   Developer Note:
45     Subclasses include `MATAIJCUSPARSE`, `MATAIJPERM`, `MATAIJSELL`, `MATAIJMKL`, `MATAIJCRL`, `MATAIJKOKKOS`,and also automatically switches over to use inodes when
46    enough exist.
47 
48   Level: beginner
49 
50 .seealso: `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateAIJ()`, `MatCreateSeqAIJ()`, `MATSEQAIJ`, `MATMPIAIJ`
51 M*/
52 
53 /*MC
54    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
55 
56    This matrix type is identical to `MATSEQAIJCRL` when constructed with a single process communicator,
57    and `MATMPIAIJCRL` otherwise.  As a result, for single process communicators,
58    `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported
59   for communicators controlling multiple processes.  It is recommended that you call both of
60   the above preallocation routines for simplicity.
61 
62    Options Database Keys:
63 . -mat_type aijcrl - sets the matrix type to `MATMPIAIJCRL` during a call to `MatSetFromOptions()`
64 
65   Level: beginner
66 
67 .seealso: `MatCreateMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`
68 M*/
69 
70 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A, PetscBool flg)
71 {
72   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
73 
74   PetscFunctionBegin;
75 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_VIENNACL)
76   A->boundtocpu = flg;
77 #endif
78   if (a->A) PetscCall(MatBindToCPU(a->A, flg));
79   if (a->B) PetscCall(MatBindToCPU(a->B, flg));
80 
81   /* In addition to binding the diagonal and off-diagonal matrices, bind the local vectors used for matrix-vector products.
82    * This maybe seems a little odd for a MatBindToCPU() call to do, but it makes no sense for the binding of these vectors
83    * to differ from the parent matrix. */
84   if (a->lvec) PetscCall(VecBindToCPU(a->lvec, flg));
85   if (a->diag) PetscCall(VecBindToCPU(a->diag, flg));
86 
87   PetscFunctionReturn(0);
88 }
89 
90 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs)
91 {
92   Mat_MPIAIJ *mat = (Mat_MPIAIJ *)M->data;
93 
94   PetscFunctionBegin;
95   if (mat->A) {
96     PetscCall(MatSetBlockSizes(mat->A, rbs, cbs));
97     PetscCall(MatSetBlockSizes(mat->B, rbs, 1));
98   }
99   PetscFunctionReturn(0);
100 }
101 
102 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M, IS *keptrows)
103 {
104   Mat_MPIAIJ      *mat = (Mat_MPIAIJ *)M->data;
105   Mat_SeqAIJ      *a   = (Mat_SeqAIJ *)mat->A->data;
106   Mat_SeqAIJ      *b   = (Mat_SeqAIJ *)mat->B->data;
107   const PetscInt  *ia, *ib;
108   const MatScalar *aa, *bb, *aav, *bav;
109   PetscInt         na, nb, i, j, *rows, cnt = 0, n0rows;
110   PetscInt         m = M->rmap->n, rstart = M->rmap->rstart;
111 
112   PetscFunctionBegin;
113   *keptrows = NULL;
114 
115   ia = a->i;
116   ib = b->i;
117   PetscCall(MatSeqAIJGetArrayRead(mat->A, &aav));
118   PetscCall(MatSeqAIJGetArrayRead(mat->B, &bav));
119   for (i = 0; i < m; i++) {
120     na = ia[i + 1] - ia[i];
121     nb = ib[i + 1] - ib[i];
122     if (!na && !nb) {
123       cnt++;
124       goto ok1;
125     }
126     aa = aav + ia[i];
127     for (j = 0; j < na; j++) {
128       if (aa[j] != 0.0) goto ok1;
129     }
130     bb = bav + ib[i];
131     for (j = 0; j < nb; j++) {
132       if (bb[j] != 0.0) goto ok1;
133     }
134     cnt++;
135   ok1:;
136   }
137   PetscCall(MPIU_Allreduce(&cnt, &n0rows, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)M)));
138   if (!n0rows) {
139     PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav));
140     PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav));
141     PetscFunctionReturn(0);
142   }
143   PetscCall(PetscMalloc1(M->rmap->n - cnt, &rows));
144   cnt = 0;
145   for (i = 0; i < m; i++) {
146     na = ia[i + 1] - ia[i];
147     nb = ib[i + 1] - ib[i];
148     if (!na && !nb) continue;
149     aa = aav + ia[i];
150     for (j = 0; j < na; j++) {
151       if (aa[j] != 0.0) {
152         rows[cnt++] = rstart + i;
153         goto ok2;
154       }
155     }
156     bb = bav + ib[i];
157     for (j = 0; j < nb; j++) {
158       if (bb[j] != 0.0) {
159         rows[cnt++] = rstart + i;
160         goto ok2;
161       }
162     }
163   ok2:;
164   }
165   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), cnt, rows, PETSC_OWN_POINTER, keptrows));
166   PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav));
167   PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav));
168   PetscFunctionReturn(0);
169 }
170 
171 PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y, Vec D, InsertMode is)
172 {
173   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)Y->data;
174   PetscBool   cong;
175 
176   PetscFunctionBegin;
177   PetscCall(MatHasCongruentLayouts(Y, &cong));
178   if (Y->assembled && cong) {
179     PetscCall(MatDiagonalSet(aij->A, D, is));
180   } else {
181     PetscCall(MatDiagonalSet_Default(Y, D, is));
182   }
183   PetscFunctionReturn(0);
184 }
185 
186 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M, IS *zrows)
187 {
188   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)M->data;
189   PetscInt    i, rstart, nrows, *rows;
190 
191   PetscFunctionBegin;
192   *zrows = NULL;
193   PetscCall(MatFindZeroDiagonals_SeqAIJ_Private(aij->A, &nrows, &rows));
194   PetscCall(MatGetOwnershipRange(M, &rstart, NULL));
195   for (i = 0; i < nrows; i++) rows[i] += rstart;
196   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), nrows, rows, PETSC_OWN_POINTER, zrows));
197   PetscFunctionReturn(0);
198 }
199 
200 PetscErrorCode MatGetColumnReductions_MPIAIJ(Mat A, PetscInt type, PetscReal *reductions)
201 {
202   Mat_MPIAIJ        *aij = (Mat_MPIAIJ *)A->data;
203   PetscInt           i, m, n, *garray = aij->garray;
204   Mat_SeqAIJ        *a_aij = (Mat_SeqAIJ *)aij->A->data;
205   Mat_SeqAIJ        *b_aij = (Mat_SeqAIJ *)aij->B->data;
206   PetscReal         *work;
207   const PetscScalar *dummy;
208 
209   PetscFunctionBegin;
210   PetscCall(MatGetSize(A, &m, &n));
211   PetscCall(PetscCalloc1(n, &work));
212   PetscCall(MatSeqAIJGetArrayRead(aij->A, &dummy));
213   PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &dummy));
214   PetscCall(MatSeqAIJGetArrayRead(aij->B, &dummy));
215   PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &dummy));
216   if (type == NORM_2) {
217     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i] * a_aij->a[i]);
218     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i] * b_aij->a[i]);
219   } else if (type == NORM_1) {
220     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
221     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
222   } else if (type == NORM_INFINITY) {
223     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
224     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]), work[garray[b_aij->j[i]]]);
225   } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) {
226     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscRealPart(a_aij->a[i]);
227     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscRealPart(b_aij->a[i]);
228   } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) {
229     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscImaginaryPart(a_aij->a[i]);
230     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscImaginaryPart(b_aij->a[i]);
231   } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type");
232   if (type == NORM_INFINITY) {
233     PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A)));
234   } else {
235     PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A)));
236   }
237   PetscCall(PetscFree(work));
238   if (type == NORM_2) {
239     for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
240   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
241     for (i = 0; i < n; i++) reductions[i] /= m;
242   }
243   PetscFunctionReturn(0);
244 }
245 
246 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A, IS *is)
247 {
248   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
249   IS              sis, gis;
250   const PetscInt *isis, *igis;
251   PetscInt        n, *iis, nsis, ngis, rstart, i;
252 
253   PetscFunctionBegin;
254   PetscCall(MatFindOffBlockDiagonalEntries(a->A, &sis));
255   PetscCall(MatFindNonzeroRows(a->B, &gis));
256   PetscCall(ISGetSize(gis, &ngis));
257   PetscCall(ISGetSize(sis, &nsis));
258   PetscCall(ISGetIndices(sis, &isis));
259   PetscCall(ISGetIndices(gis, &igis));
260 
261   PetscCall(PetscMalloc1(ngis + nsis, &iis));
262   PetscCall(PetscArraycpy(iis, igis, ngis));
263   PetscCall(PetscArraycpy(iis + ngis, isis, nsis));
264   n = ngis + nsis;
265   PetscCall(PetscSortRemoveDupsInt(&n, iis));
266   PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
267   for (i = 0; i < n; i++) iis[i] += rstart;
268   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)A), n, iis, PETSC_OWN_POINTER, is));
269 
270   PetscCall(ISRestoreIndices(sis, &isis));
271   PetscCall(ISRestoreIndices(gis, &igis));
272   PetscCall(ISDestroy(&sis));
273   PetscCall(ISDestroy(&gis));
274   PetscFunctionReturn(0);
275 }
276 
277 /*
278   Local utility routine that creates a mapping from the global column
279 number to the local number in the off-diagonal part of the local
280 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
281 a slightly higher hash table cost; without it it is not scalable (each processor
282 has an order N integer array but is fast to access.
283 */
284 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
285 {
286   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
287   PetscInt    n   = aij->B->cmap->n, i;
288 
289   PetscFunctionBegin;
290   PetscCheck(!n || aij->garray, PETSC_COMM_SELF, PETSC_ERR_PLIB, "MPIAIJ Matrix was assembled but is missing garray");
291 #if defined(PETSC_USE_CTABLE)
292   PetscCall(PetscHMapICreateWithSize(n, &aij->colmap));
293   for (i = 0; i < n; i++) PetscCall(PetscHMapISet(aij->colmap, aij->garray[i] + 1, i + 1));
294 #else
295   PetscCall(PetscCalloc1(mat->cmap->N + 1, &aij->colmap));
296   for (i = 0; i < n; i++) aij->colmap[aij->garray[i]] = i + 1;
297 #endif
298   PetscFunctionReturn(0);
299 }
300 
301 #define MatSetValues_SeqAIJ_A_Private(row, col, value, addv, orow, ocol) \
302   { \
303     if (col <= lastcol1) low1 = 0; \
304     else high1 = nrow1; \
305     lastcol1 = col; \
306     while (high1 - low1 > 5) { \
307       t = (low1 + high1) / 2; \
308       if (rp1[t] > col) high1 = t; \
309       else low1 = t; \
310     } \
311     for (_i = low1; _i < high1; _i++) { \
312       if (rp1[_i] > col) break; \
313       if (rp1[_i] == col) { \
314         if (addv == ADD_VALUES) { \
315           ap1[_i] += value; \
316           /* Not sure LogFlops will slow dow the code or not */ \
317           (void)PetscLogFlops(1.0); \
318         } else ap1[_i] = value; \
319         goto a_noinsert; \
320       } \
321     } \
322     if (value == 0.0 && ignorezeroentries && row != col) { \
323       low1  = 0; \
324       high1 = nrow1; \
325       goto a_noinsert; \
326     } \
327     if (nonew == 1) { \
328       low1  = 0; \
329       high1 = nrow1; \
330       goto a_noinsert; \
331     } \
332     PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
333     MatSeqXAIJReallocateAIJ(A, am, 1, nrow1, row, col, rmax1, aa, ai, aj, rp1, ap1, aimax, nonew, MatScalar); \
334     N = nrow1++ - 1; \
335     a->nz++; \
336     high1++; \
337     /* shift up all the later entries in this row */ \
338     PetscCall(PetscArraymove(rp1 + _i + 1, rp1 + _i, N - _i + 1)); \
339     PetscCall(PetscArraymove(ap1 + _i + 1, ap1 + _i, N - _i + 1)); \
340     rp1[_i] = col; \
341     ap1[_i] = value; \
342     A->nonzerostate++; \
343   a_noinsert:; \
344     ailen[row] = nrow1; \
345   }
346 
347 #define MatSetValues_SeqAIJ_B_Private(row, col, value, addv, orow, ocol) \
348   { \
349     if (col <= lastcol2) low2 = 0; \
350     else high2 = nrow2; \
351     lastcol2 = col; \
352     while (high2 - low2 > 5) { \
353       t = (low2 + high2) / 2; \
354       if (rp2[t] > col) high2 = t; \
355       else low2 = t; \
356     } \
357     for (_i = low2; _i < high2; _i++) { \
358       if (rp2[_i] > col) break; \
359       if (rp2[_i] == col) { \
360         if (addv == ADD_VALUES) { \
361           ap2[_i] += value; \
362           (void)PetscLogFlops(1.0); \
363         } else ap2[_i] = value; \
364         goto b_noinsert; \
365       } \
366     } \
367     if (value == 0.0 && ignorezeroentries) { \
368       low2  = 0; \
369       high2 = nrow2; \
370       goto b_noinsert; \
371     } \
372     if (nonew == 1) { \
373       low2  = 0; \
374       high2 = nrow2; \
375       goto b_noinsert; \
376     } \
377     PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
378     MatSeqXAIJReallocateAIJ(B, bm, 1, nrow2, row, col, rmax2, ba, bi, bj, rp2, ap2, bimax, nonew, MatScalar); \
379     N = nrow2++ - 1; \
380     b->nz++; \
381     high2++; \
382     /* shift up all the later entries in this row */ \
383     PetscCall(PetscArraymove(rp2 + _i + 1, rp2 + _i, N - _i + 1)); \
384     PetscCall(PetscArraymove(ap2 + _i + 1, ap2 + _i, N - _i + 1)); \
385     rp2[_i] = col; \
386     ap2[_i] = value; \
387     B->nonzerostate++; \
388   b_noinsert:; \
389     bilen[row] = nrow2; \
390   }
391 
392 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A, PetscInt row, const PetscScalar v[])
393 {
394   Mat_MPIAIJ  *mat = (Mat_MPIAIJ *)A->data;
395   Mat_SeqAIJ  *a = (Mat_SeqAIJ *)mat->A->data, *b = (Mat_SeqAIJ *)mat->B->data;
396   PetscInt     l, *garray                         = mat->garray, diag;
397   PetscScalar *aa, *ba;
398 
399   PetscFunctionBegin;
400   /* code only works for square matrices A */
401 
402   /* find size of row to the left of the diagonal part */
403   PetscCall(MatGetOwnershipRange(A, &diag, NULL));
404   row = row - diag;
405   for (l = 0; l < b->i[row + 1] - b->i[row]; l++) {
406     if (garray[b->j[b->i[row] + l]] > diag) break;
407   }
408   if (l) {
409     PetscCall(MatSeqAIJGetArray(mat->B, &ba));
410     PetscCall(PetscArraycpy(ba + b->i[row], v, l));
411     PetscCall(MatSeqAIJRestoreArray(mat->B, &ba));
412   }
413 
414   /* diagonal part */
415   if (a->i[row + 1] - a->i[row]) {
416     PetscCall(MatSeqAIJGetArray(mat->A, &aa));
417     PetscCall(PetscArraycpy(aa + a->i[row], v + l, (a->i[row + 1] - a->i[row])));
418     PetscCall(MatSeqAIJRestoreArray(mat->A, &aa));
419   }
420 
421   /* right of diagonal part */
422   if (b->i[row + 1] - b->i[row] - l) {
423     PetscCall(MatSeqAIJGetArray(mat->B, &ba));
424     PetscCall(PetscArraycpy(ba + b->i[row] + l, v + l + a->i[row + 1] - a->i[row], b->i[row + 1] - b->i[row] - l));
425     PetscCall(MatSeqAIJRestoreArray(mat->B, &ba));
426   }
427   PetscFunctionReturn(0);
428 }
429 
430 PetscErrorCode MatSetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
431 {
432   Mat_MPIAIJ *aij   = (Mat_MPIAIJ *)mat->data;
433   PetscScalar value = 0.0;
434   PetscInt    i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend;
435   PetscInt    cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col;
436   PetscBool   roworiented = aij->roworiented;
437 
438   /* Some Variables required in the macro */
439   Mat         A     = aij->A;
440   Mat_SeqAIJ *a     = (Mat_SeqAIJ *)A->data;
441   PetscInt   *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j;
442   PetscBool   ignorezeroentries = a->ignorezeroentries;
443   Mat         B                 = aij->B;
444   Mat_SeqAIJ *b                 = (Mat_SeqAIJ *)B->data;
445   PetscInt   *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n;
446   MatScalar  *aa, *ba;
447   PetscInt   *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2;
448   PetscInt    nonew;
449   MatScalar  *ap1, *ap2;
450 
451   PetscFunctionBegin;
452   PetscCall(MatSeqAIJGetArray(A, &aa));
453   PetscCall(MatSeqAIJGetArray(B, &ba));
454   for (i = 0; i < m; i++) {
455     if (im[i] < 0) continue;
456     PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
457     if (im[i] >= rstart && im[i] < rend) {
458       row      = im[i] - rstart;
459       lastcol1 = -1;
460       rp1      = aj + ai[row];
461       ap1      = aa + ai[row];
462       rmax1    = aimax[row];
463       nrow1    = ailen[row];
464       low1     = 0;
465       high1    = nrow1;
466       lastcol2 = -1;
467       rp2      = bj + bi[row];
468       ap2      = ba + bi[row];
469       rmax2    = bimax[row];
470       nrow2    = bilen[row];
471       low2     = 0;
472       high2    = nrow2;
473 
474       for (j = 0; j < n; j++) {
475         if (v) value = roworiented ? v[i * n + j] : v[i + j * m];
476         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
477         if (in[j] >= cstart && in[j] < cend) {
478           col   = in[j] - cstart;
479           nonew = a->nonew;
480           MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]);
481         } else if (in[j] < 0) {
482           continue;
483         } else {
484           PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1);
485           if (mat->was_assembled) {
486             if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
487 #if defined(PETSC_USE_CTABLE)
488             PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col)); /* map global col ids to local ones */
489             col--;
490 #else
491             col = aij->colmap[in[j]] - 1;
492 #endif
493             if (col < 0 && !((Mat_SeqAIJ *)(aij->B->data))->nonew) { /* col < 0 means in[j] is a new col for B */
494               PetscCall(MatDisAssemble_MPIAIJ(mat));                 /* Change aij->B from reduced/local format to expanded/global format */
495               col = in[j];
496               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
497               B     = aij->B;
498               b     = (Mat_SeqAIJ *)B->data;
499               bimax = b->imax;
500               bi    = b->i;
501               bilen = b->ilen;
502               bj    = b->j;
503               ba    = b->a;
504               rp2   = bj + bi[row];
505               ap2   = ba + bi[row];
506               rmax2 = bimax[row];
507               nrow2 = bilen[row];
508               low2  = 0;
509               high2 = nrow2;
510               bm    = aij->B->rmap->n;
511               ba    = b->a;
512             } else if (col < 0 && !(ignorezeroentries && value == 0.0)) {
513               if (1 == ((Mat_SeqAIJ *)(aij->B->data))->nonew) {
514                 PetscCall(PetscInfo(mat, "Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%" PetscInt_FMT ",%" PetscInt_FMT ")\n", (double)PetscRealPart(value), im[i], in[j]));
515               } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]);
516             }
517           } else col = in[j];
518           nonew = b->nonew;
519           MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]);
520         }
521       }
522     } else {
523       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]);
524       if (!aij->donotstash) {
525         mat->assembled = PETSC_FALSE;
526         if (roworiented) {
527           PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
528         } else {
529           PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
530         }
531       }
532     }
533   }
534   PetscCall(MatSeqAIJRestoreArray(A, &aa)); /* aa, bb might have been free'd due to reallocation above. But we don't access them here */
535   PetscCall(MatSeqAIJRestoreArray(B, &ba));
536   PetscFunctionReturn(0);
537 }
538 
539 /*
540     This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
541     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
542     No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE.
543 */
544 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[])
545 {
546   Mat_MPIAIJ *aij    = (Mat_MPIAIJ *)mat->data;
547   Mat         A      = aij->A; /* diagonal part of the matrix */
548   Mat         B      = aij->B; /* offdiagonal part of the matrix */
549   Mat_SeqAIJ *a      = (Mat_SeqAIJ *)A->data;
550   Mat_SeqAIJ *b      = (Mat_SeqAIJ *)B->data;
551   PetscInt    cstart = mat->cmap->rstart, cend = mat->cmap->rend, col;
552   PetscInt   *ailen = a->ilen, *aj = a->j;
553   PetscInt   *bilen = b->ilen, *bj = b->j;
554   PetscInt    am          = aij->A->rmap->n, j;
555   PetscInt    diag_so_far = 0, dnz;
556   PetscInt    offd_so_far = 0, onz;
557 
558   PetscFunctionBegin;
559   /* Iterate over all rows of the matrix */
560   for (j = 0; j < am; j++) {
561     dnz = onz = 0;
562     /*  Iterate over all non-zero columns of the current row */
563     for (col = mat_i[j]; col < mat_i[j + 1]; col++) {
564       /* If column is in the diagonal */
565       if (mat_j[col] >= cstart && mat_j[col] < cend) {
566         aj[diag_so_far++] = mat_j[col] - cstart;
567         dnz++;
568       } else { /* off-diagonal entries */
569         bj[offd_so_far++] = mat_j[col];
570         onz++;
571       }
572     }
573     ailen[j] = dnz;
574     bilen[j] = onz;
575   }
576   PetscFunctionReturn(0);
577 }
578 
579 /*
580     This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
581     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
582     No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ.
583     Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
584     would not be true and the more complex MatSetValues_MPIAIJ has to be used.
585 */
586 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[], const PetscScalar mat_a[])
587 {
588   Mat_MPIAIJ  *aij  = (Mat_MPIAIJ *)mat->data;
589   Mat          A    = aij->A; /* diagonal part of the matrix */
590   Mat          B    = aij->B; /* offdiagonal part of the matrix */
591   Mat_SeqAIJ  *aijd = (Mat_SeqAIJ *)(aij->A)->data, *aijo = (Mat_SeqAIJ *)(aij->B)->data;
592   Mat_SeqAIJ  *a      = (Mat_SeqAIJ *)A->data;
593   Mat_SeqAIJ  *b      = (Mat_SeqAIJ *)B->data;
594   PetscInt     cstart = mat->cmap->rstart, cend = mat->cmap->rend;
595   PetscInt    *ailen = a->ilen, *aj = a->j;
596   PetscInt    *bilen = b->ilen, *bj = b->j;
597   PetscInt     am          = aij->A->rmap->n, j;
598   PetscInt    *full_diag_i = aijd->i, *full_offd_i = aijo->i; /* These variables can also include non-local elements, which are set at a later point. */
599   PetscInt     col, dnz_row, onz_row, rowstart_diag, rowstart_offd;
600   PetscScalar *aa = a->a, *ba = b->a;
601 
602   PetscFunctionBegin;
603   /* Iterate over all rows of the matrix */
604   for (j = 0; j < am; j++) {
605     dnz_row = onz_row = 0;
606     rowstart_offd     = full_offd_i[j];
607     rowstart_diag     = full_diag_i[j];
608     /*  Iterate over all non-zero columns of the current row */
609     for (col = mat_i[j]; col < mat_i[j + 1]; col++) {
610       /* If column is in the diagonal */
611       if (mat_j[col] >= cstart && mat_j[col] < cend) {
612         aj[rowstart_diag + dnz_row] = mat_j[col] - cstart;
613         aa[rowstart_diag + dnz_row] = mat_a[col];
614         dnz_row++;
615       } else { /* off-diagonal entries */
616         bj[rowstart_offd + onz_row] = mat_j[col];
617         ba[rowstart_offd + onz_row] = mat_a[col];
618         onz_row++;
619       }
620     }
621     ailen[j] = dnz_row;
622     bilen[j] = onz_row;
623   }
624   PetscFunctionReturn(0);
625 }
626 
627 PetscErrorCode MatGetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[])
628 {
629   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
630   PetscInt    i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend;
631   PetscInt    cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col;
632 
633   PetscFunctionBegin;
634   for (i = 0; i < m; i++) {
635     if (idxm[i] < 0) continue; /* negative row */
636     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1);
637     if (idxm[i] >= rstart && idxm[i] < rend) {
638       row = idxm[i] - rstart;
639       for (j = 0; j < n; j++) {
640         if (idxn[j] < 0) continue; /* negative column */
641         PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1);
642         if (idxn[j] >= cstart && idxn[j] < cend) {
643           col = idxn[j] - cstart;
644           PetscCall(MatGetValues(aij->A, 1, &row, 1, &col, v + i * n + j));
645         } else {
646           if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
647 #if defined(PETSC_USE_CTABLE)
648           PetscCall(PetscHMapIGetWithDefault(aij->colmap, idxn[j] + 1, 0, &col));
649           col--;
650 #else
651           col = aij->colmap[idxn[j]] - 1;
652 #endif
653           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v + i * n + j) = 0.0;
654           else PetscCall(MatGetValues(aij->B, 1, &row, 1, &col, v + i * n + j));
655         }
656       }
657     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported");
658   }
659   PetscFunctionReturn(0);
660 }
661 
662 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat, MatAssemblyType mode)
663 {
664   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
665   PetscInt    nstash, reallocs;
666 
667   PetscFunctionBegin;
668   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
669 
670   PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range));
671   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
672   PetscCall(PetscInfo(aij->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
673   PetscFunctionReturn(0);
674 }
675 
676 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat, MatAssemblyType mode)
677 {
678   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *)mat->data;
679   PetscMPIInt  n;
680   PetscInt     i, j, rstart, ncols, flg;
681   PetscInt    *row, *col;
682   PetscBool    other_disassembled;
683   PetscScalar *val;
684 
685   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
686 
687   PetscFunctionBegin;
688   if (!aij->donotstash && !mat->nooffprocentries) {
689     while (1) {
690       PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg));
691       if (!flg) break;
692 
693       for (i = 0; i < n;) {
694         /* Now identify the consecutive vals belonging to the same row */
695         for (j = i, rstart = row[j]; j < n; j++) {
696           if (row[j] != rstart) break;
697         }
698         if (j < n) ncols = j - i;
699         else ncols = n - i;
700         /* Now assemble all these values with a single function call */
701         PetscCall(MatSetValues_MPIAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode));
702         i = j;
703       }
704     }
705     PetscCall(MatStashScatterEnd_Private(&mat->stash));
706   }
707 #if defined(PETSC_HAVE_DEVICE)
708   if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU;
709   /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */
710   if (mat->boundtocpu) {
711     PetscCall(MatBindToCPU(aij->A, PETSC_TRUE));
712     PetscCall(MatBindToCPU(aij->B, PETSC_TRUE));
713   }
714 #endif
715   PetscCall(MatAssemblyBegin(aij->A, mode));
716   PetscCall(MatAssemblyEnd(aij->A, mode));
717 
718   /* determine if any processor has disassembled, if so we must
719      also disassemble ourself, in order that we may reassemble. */
720   /*
721      if nonzero structure of submatrix B cannot change then we know that
722      no processor disassembled thus we can skip this stuff
723   */
724   if (!((Mat_SeqAIJ *)aij->B->data)->nonew) {
725     PetscCall(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
726     if (mat->was_assembled && !other_disassembled) { /* mat on this rank has reduced off-diag B with local col ids, but globaly it does not */
727       PetscCall(MatDisAssemble_MPIAIJ(mat));
728     }
729   }
730   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPIAIJ(mat));
731   PetscCall(MatSetOption(aij->B, MAT_USE_INODES, PETSC_FALSE));
732 #if defined(PETSC_HAVE_DEVICE)
733   if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU;
734 #endif
735   PetscCall(MatAssemblyBegin(aij->B, mode));
736   PetscCall(MatAssemblyEnd(aij->B, mode));
737 
738   PetscCall(PetscFree2(aij->rowvalues, aij->rowindices));
739 
740   aij->rowvalues = NULL;
741 
742   PetscCall(VecDestroy(&aij->diag));
743 
744   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
745   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ *)(aij->A->data))->nonew) {
746     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
747     PetscCall(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat)));
748   }
749 #if defined(PETSC_HAVE_DEVICE)
750   mat->offloadmask = PETSC_OFFLOAD_BOTH;
751 #endif
752   PetscFunctionReturn(0);
753 }
754 
755 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
756 {
757   Mat_MPIAIJ *l = (Mat_MPIAIJ *)A->data;
758 
759   PetscFunctionBegin;
760   PetscCall(MatZeroEntries(l->A));
761   PetscCall(MatZeroEntries(l->B));
762   PetscFunctionReturn(0);
763 }
764 
765 PetscErrorCode MatZeroRows_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
766 {
767   Mat_MPIAIJ      *mat = (Mat_MPIAIJ *)A->data;
768   PetscObjectState sA, sB;
769   PetscInt        *lrows;
770   PetscInt         r, len;
771   PetscBool        cong, lch, gch;
772 
773   PetscFunctionBegin;
774   /* get locally owned rows */
775   PetscCall(MatZeroRowsMapLocal_Private(A, N, rows, &len, &lrows));
776   PetscCall(MatHasCongruentLayouts(A, &cong));
777   /* fix right hand side if needed */
778   if (x && b) {
779     const PetscScalar *xx;
780     PetscScalar       *bb;
781 
782     PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout");
783     PetscCall(VecGetArrayRead(x, &xx));
784     PetscCall(VecGetArray(b, &bb));
785     for (r = 0; r < len; ++r) bb[lrows[r]] = diag * xx[lrows[r]];
786     PetscCall(VecRestoreArrayRead(x, &xx));
787     PetscCall(VecRestoreArray(b, &bb));
788   }
789 
790   sA = mat->A->nonzerostate;
791   sB = mat->B->nonzerostate;
792 
793   if (diag != 0.0 && cong) {
794     PetscCall(MatZeroRows(mat->A, len, lrows, diag, NULL, NULL));
795     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
796   } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */
797     Mat_SeqAIJ *aijA = (Mat_SeqAIJ *)mat->A->data;
798     Mat_SeqAIJ *aijB = (Mat_SeqAIJ *)mat->B->data;
799     PetscInt    nnwA, nnwB;
800     PetscBool   nnzA, nnzB;
801 
802     nnwA = aijA->nonew;
803     nnwB = aijB->nonew;
804     nnzA = aijA->keepnonzeropattern;
805     nnzB = aijB->keepnonzeropattern;
806     if (!nnzA) {
807       PetscCall(PetscInfo(mat->A, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n"));
808       aijA->nonew = 0;
809     }
810     if (!nnzB) {
811       PetscCall(PetscInfo(mat->B, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n"));
812       aijB->nonew = 0;
813     }
814     /* Must zero here before the next loop */
815     PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL));
816     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
817     for (r = 0; r < len; ++r) {
818       const PetscInt row = lrows[r] + A->rmap->rstart;
819       if (row >= A->cmap->N) continue;
820       PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES));
821     }
822     aijA->nonew = nnwA;
823     aijB->nonew = nnwB;
824   } else {
825     PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL));
826     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
827   }
828   PetscCall(PetscFree(lrows));
829   PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
830   PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
831 
832   /* reduce nonzerostate */
833   lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate);
834   PetscCall(MPIU_Allreduce(&lch, &gch, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)A)));
835   if (gch) A->nonzerostate++;
836   PetscFunctionReturn(0);
837 }
838 
839 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
840 {
841   Mat_MPIAIJ        *l = (Mat_MPIAIJ *)A->data;
842   PetscMPIInt        n = A->rmap->n;
843   PetscInt           i, j, r, m, len = 0;
844   PetscInt          *lrows, *owners = A->rmap->range;
845   PetscMPIInt        p = 0;
846   PetscSFNode       *rrows;
847   PetscSF            sf;
848   const PetscScalar *xx;
849   PetscScalar       *bb, *mask, *aij_a;
850   Vec                xmask, lmask;
851   Mat_SeqAIJ        *aij = (Mat_SeqAIJ *)l->B->data;
852   const PetscInt    *aj, *ii, *ridx;
853   PetscScalar       *aa;
854 
855   PetscFunctionBegin;
856   /* Create SF where leaves are input rows and roots are owned rows */
857   PetscCall(PetscMalloc1(n, &lrows));
858   for (r = 0; r < n; ++r) lrows[r] = -1;
859   PetscCall(PetscMalloc1(N, &rrows));
860   for (r = 0; r < N; ++r) {
861     const PetscInt idx = rows[r];
862     PetscCheck(idx >= 0 && A->rmap->N > idx, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, A->rmap->N);
863     if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this row too */
864       PetscCall(PetscLayoutFindOwner(A->rmap, idx, &p));
865     }
866     rrows[r].rank  = p;
867     rrows[r].index = rows[r] - owners[p];
868   }
869   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
870   PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER));
871   /* Collect flags for rows to be zeroed */
872   PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR));
873   PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR));
874   PetscCall(PetscSFDestroy(&sf));
875   /* Compress and put in row numbers */
876   for (r = 0; r < n; ++r)
877     if (lrows[r] >= 0) lrows[len++] = r;
878   /* zero diagonal part of matrix */
879   PetscCall(MatZeroRowsColumns(l->A, len, lrows, diag, x, b));
880   /* handle off diagonal part of matrix */
881   PetscCall(MatCreateVecs(A, &xmask, NULL));
882   PetscCall(VecDuplicate(l->lvec, &lmask));
883   PetscCall(VecGetArray(xmask, &bb));
884   for (i = 0; i < len; i++) bb[lrows[i]] = 1;
885   PetscCall(VecRestoreArray(xmask, &bb));
886   PetscCall(VecScatterBegin(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD));
887   PetscCall(VecScatterEnd(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD));
888   PetscCall(VecDestroy(&xmask));
889   if (x && b) { /* this code is buggy when the row and column layout don't match */
890     PetscBool cong;
891 
892     PetscCall(MatHasCongruentLayouts(A, &cong));
893     PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout");
894     PetscCall(VecScatterBegin(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD));
895     PetscCall(VecScatterEnd(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD));
896     PetscCall(VecGetArrayRead(l->lvec, &xx));
897     PetscCall(VecGetArray(b, &bb));
898   }
899   PetscCall(VecGetArray(lmask, &mask));
900   /* remove zeroed rows of off diagonal matrix */
901   PetscCall(MatSeqAIJGetArray(l->B, &aij_a));
902   ii = aij->i;
903   for (i = 0; i < len; i++) PetscCall(PetscArrayzero(aij_a + ii[lrows[i]], ii[lrows[i] + 1] - ii[lrows[i]]));
904   /* loop over all elements of off process part of matrix zeroing removed columns*/
905   if (aij->compressedrow.use) {
906     m    = aij->compressedrow.nrows;
907     ii   = aij->compressedrow.i;
908     ridx = aij->compressedrow.rindex;
909     for (i = 0; i < m; i++) {
910       n  = ii[i + 1] - ii[i];
911       aj = aij->j + ii[i];
912       aa = aij_a + ii[i];
913 
914       for (j = 0; j < n; j++) {
915         if (PetscAbsScalar(mask[*aj])) {
916           if (b) bb[*ridx] -= *aa * xx[*aj];
917           *aa = 0.0;
918         }
919         aa++;
920         aj++;
921       }
922       ridx++;
923     }
924   } else { /* do not use compressed row format */
925     m = l->B->rmap->n;
926     for (i = 0; i < m; i++) {
927       n  = ii[i + 1] - ii[i];
928       aj = aij->j + ii[i];
929       aa = aij_a + ii[i];
930       for (j = 0; j < n; j++) {
931         if (PetscAbsScalar(mask[*aj])) {
932           if (b) bb[i] -= *aa * xx[*aj];
933           *aa = 0.0;
934         }
935         aa++;
936         aj++;
937       }
938     }
939   }
940   if (x && b) {
941     PetscCall(VecRestoreArray(b, &bb));
942     PetscCall(VecRestoreArrayRead(l->lvec, &xx));
943   }
944   PetscCall(MatSeqAIJRestoreArray(l->B, &aij_a));
945   PetscCall(VecRestoreArray(lmask, &mask));
946   PetscCall(VecDestroy(&lmask));
947   PetscCall(PetscFree(lrows));
948 
949   /* only change matrix nonzero state if pattern was allowed to be changed */
950   if (!((Mat_SeqAIJ *)(l->A->data))->keepnonzeropattern) {
951     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
952     PetscCall(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A)));
953   }
954   PetscFunctionReturn(0);
955 }
956 
957 PetscErrorCode MatMult_MPIAIJ(Mat A, Vec xx, Vec yy)
958 {
959   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
960   PetscInt    nt;
961   VecScatter  Mvctx = a->Mvctx;
962 
963   PetscFunctionBegin;
964   PetscCall(VecGetLocalSize(xx, &nt));
965   PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A (%" PetscInt_FMT ") and xx (%" PetscInt_FMT ")", A->cmap->n, nt);
966   PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
967   PetscUseTypeMethod(a->A, mult, xx, yy);
968   PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
969   PetscUseTypeMethod(a->B, multadd, a->lvec, yy, yy);
970   PetscFunctionReturn(0);
971 }
972 
973 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A, Vec bb, Vec xx)
974 {
975   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
976 
977   PetscFunctionBegin;
978   PetscCall(MatMultDiagonalBlock(a->A, bb, xx));
979   PetscFunctionReturn(0);
980 }
981 
982 PetscErrorCode MatMultAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz)
983 {
984   Mat_MPIAIJ *a     = (Mat_MPIAIJ *)A->data;
985   VecScatter  Mvctx = a->Mvctx;
986 
987   PetscFunctionBegin;
988   PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
989   PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz));
990   PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
991   PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz));
992   PetscFunctionReturn(0);
993 }
994 
995 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A, Vec xx, Vec yy)
996 {
997   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
998 
999   PetscFunctionBegin;
1000   /* do nondiagonal part */
1001   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec));
1002   /* do local part */
1003   PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy));
1004   /* add partial results together */
1005   PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
1006   PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
1007   PetscFunctionReturn(0);
1008 }
1009 
1010 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat, Mat Bmat, PetscReal tol, PetscBool *f)
1011 {
1012   MPI_Comm    comm;
1013   Mat_MPIAIJ *Aij  = (Mat_MPIAIJ *)Amat->data, *Bij;
1014   Mat         Adia = Aij->A, Bdia, Aoff, Boff, *Aoffs, *Boffs;
1015   IS          Me, Notme;
1016   PetscInt    M, N, first, last, *notme, i;
1017   PetscBool   lf;
1018   PetscMPIInt size;
1019 
1020   PetscFunctionBegin;
1021   /* Easy test: symmetric diagonal block */
1022   Bij  = (Mat_MPIAIJ *)Bmat->data;
1023   Bdia = Bij->A;
1024   PetscCall(MatIsTranspose(Adia, Bdia, tol, &lf));
1025   PetscCall(MPIU_Allreduce(&lf, f, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)Amat)));
1026   if (!*f) PetscFunctionReturn(0);
1027   PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm));
1028   PetscCallMPI(MPI_Comm_size(comm, &size));
1029   if (size == 1) PetscFunctionReturn(0);
1030 
1031   /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */
1032   PetscCall(MatGetSize(Amat, &M, &N));
1033   PetscCall(MatGetOwnershipRange(Amat, &first, &last));
1034   PetscCall(PetscMalloc1(N - last + first, &notme));
1035   for (i = 0; i < first; i++) notme[i] = i;
1036   for (i = last; i < M; i++) notme[i - last + first] = i;
1037   PetscCall(ISCreateGeneral(MPI_COMM_SELF, N - last + first, notme, PETSC_COPY_VALUES, &Notme));
1038   PetscCall(ISCreateStride(MPI_COMM_SELF, last - first, first, 1, &Me));
1039   PetscCall(MatCreateSubMatrices(Amat, 1, &Me, &Notme, MAT_INITIAL_MATRIX, &Aoffs));
1040   Aoff = Aoffs[0];
1041   PetscCall(MatCreateSubMatrices(Bmat, 1, &Notme, &Me, MAT_INITIAL_MATRIX, &Boffs));
1042   Boff = Boffs[0];
1043   PetscCall(MatIsTranspose(Aoff, Boff, tol, f));
1044   PetscCall(MatDestroyMatrices(1, &Aoffs));
1045   PetscCall(MatDestroyMatrices(1, &Boffs));
1046   PetscCall(ISDestroy(&Me));
1047   PetscCall(ISDestroy(&Notme));
1048   PetscCall(PetscFree(notme));
1049   PetscFunctionReturn(0);
1050 }
1051 
1052 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A, PetscReal tol, PetscBool *f)
1053 {
1054   PetscFunctionBegin;
1055   PetscCall(MatIsTranspose_MPIAIJ(A, A, tol, f));
1056   PetscFunctionReturn(0);
1057 }
1058 
1059 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1060 {
1061   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1062 
1063   PetscFunctionBegin;
1064   /* do nondiagonal part */
1065   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec));
1066   /* do local part */
1067   PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz));
1068   /* add partial results together */
1069   PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE));
1070   PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE));
1071   PetscFunctionReturn(0);
1072 }
1073 
1074 /*
1075   This only works correctly for square matrices where the subblock A->A is the
1076    diagonal block
1077 */
1078 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A, Vec v)
1079 {
1080   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1081 
1082   PetscFunctionBegin;
1083   PetscCheck(A->rmap->N == A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block");
1084   PetscCheck(A->rmap->rstart == A->cmap->rstart && A->rmap->rend == A->cmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "row partition must equal col partition");
1085   PetscCall(MatGetDiagonal(a->A, v));
1086   PetscFunctionReturn(0);
1087 }
1088 
1089 PetscErrorCode MatScale_MPIAIJ(Mat A, PetscScalar aa)
1090 {
1091   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1092 
1093   PetscFunctionBegin;
1094   PetscCall(MatScale(a->A, aa));
1095   PetscCall(MatScale(a->B, aa));
1096   PetscFunctionReturn(0);
1097 }
1098 
1099 /* Free COO stuff; must match allocation methods in MatSetPreallocationCOO_MPIAIJ() */
1100 PETSC_INTERN PetscErrorCode MatResetPreallocationCOO_MPIAIJ(Mat mat)
1101 {
1102   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1103 
1104   PetscFunctionBegin;
1105   PetscCall(PetscSFDestroy(&aij->coo_sf));
1106   PetscCall(PetscFree(aij->Aperm1));
1107   PetscCall(PetscFree(aij->Bperm1));
1108   PetscCall(PetscFree(aij->Ajmap1));
1109   PetscCall(PetscFree(aij->Bjmap1));
1110 
1111   PetscCall(PetscFree(aij->Aimap2));
1112   PetscCall(PetscFree(aij->Bimap2));
1113   PetscCall(PetscFree(aij->Aperm2));
1114   PetscCall(PetscFree(aij->Bperm2));
1115   PetscCall(PetscFree(aij->Ajmap2));
1116   PetscCall(PetscFree(aij->Bjmap2));
1117 
1118   PetscCall(PetscFree2(aij->sendbuf, aij->recvbuf));
1119   PetscCall(PetscFree(aij->Cperm1));
1120   PetscFunctionReturn(0);
1121 }
1122 
1123 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1124 {
1125   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1126 
1127   PetscFunctionBegin;
1128 #if defined(PETSC_USE_LOG)
1129   PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N);
1130 #endif
1131   PetscCall(MatStashDestroy_Private(&mat->stash));
1132   PetscCall(VecDestroy(&aij->diag));
1133   PetscCall(MatDestroy(&aij->A));
1134   PetscCall(MatDestroy(&aij->B));
1135 #if defined(PETSC_USE_CTABLE)
1136   PetscCall(PetscHMapIDestroy(&aij->colmap));
1137 #else
1138   PetscCall(PetscFree(aij->colmap));
1139 #endif
1140   PetscCall(PetscFree(aij->garray));
1141   PetscCall(VecDestroy(&aij->lvec));
1142   PetscCall(VecScatterDestroy(&aij->Mvctx));
1143   PetscCall(PetscFree2(aij->rowvalues, aij->rowindices));
1144   PetscCall(PetscFree(aij->ld));
1145 
1146   /* Free COO */
1147   PetscCall(MatResetPreallocationCOO_MPIAIJ(mat));
1148 
1149   PetscCall(PetscFree(mat->data));
1150 
1151   /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */
1152   PetscCall(PetscObjectCompose((PetscObject)mat, "MatMergeSeqsToMPI", NULL));
1153 
1154   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL));
1155   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL));
1156   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL));
1157   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatIsTranspose_C", NULL));
1158   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocation_C", NULL));
1159   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatResetPreallocation_C", NULL));
1160   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocationCSR_C", NULL));
1161   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL));
1162   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpibaij_C", NULL));
1163   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisbaij_C", NULL));
1164 #if defined(PETSC_HAVE_CUDA)
1165   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcusparse_C", NULL));
1166 #endif
1167 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
1168   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijkokkos_C", NULL));
1169 #endif
1170   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL));
1171 #if defined(PETSC_HAVE_ELEMENTAL)
1172   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_elemental_C", NULL));
1173 #endif
1174 #if defined(PETSC_HAVE_SCALAPACK)
1175   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_scalapack_C", NULL));
1176 #endif
1177 #if defined(PETSC_HAVE_HYPRE)
1178   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_hypre_C", NULL));
1179   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", NULL));
1180 #endif
1181   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL));
1182   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_is_mpiaij_C", NULL));
1183   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpiaij_C", NULL));
1184   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetUseScalableIncreaseOverlap_C", NULL));
1185   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijperm_C", NULL));
1186   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijsell_C", NULL));
1187 #if defined(PETSC_HAVE_MKL_SPARSE)
1188   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijmkl_C", NULL));
1189 #endif
1190   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcrl_C", NULL));
1191   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL));
1192   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisell_C", NULL));
1193   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetPreallocationCOO_C", NULL));
1194   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetValuesCOO_C", NULL));
1195   PetscFunctionReturn(0);
1196 }
1197 
1198 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat, PetscViewer viewer)
1199 {
1200   Mat_MPIAIJ        *aij    = (Mat_MPIAIJ *)mat->data;
1201   Mat_SeqAIJ        *A      = (Mat_SeqAIJ *)aij->A->data;
1202   Mat_SeqAIJ        *B      = (Mat_SeqAIJ *)aij->B->data;
1203   const PetscInt    *garray = aij->garray;
1204   const PetscScalar *aa, *ba;
1205   PetscInt           header[4], M, N, m, rs, cs, nz, cnt, i, ja, jb;
1206   PetscInt          *rowlens;
1207   PetscInt          *colidxs;
1208   PetscScalar       *matvals;
1209 
1210   PetscFunctionBegin;
1211   PetscCall(PetscViewerSetUp(viewer));
1212 
1213   M  = mat->rmap->N;
1214   N  = mat->cmap->N;
1215   m  = mat->rmap->n;
1216   rs = mat->rmap->rstart;
1217   cs = mat->cmap->rstart;
1218   nz = A->nz + B->nz;
1219 
1220   /* write matrix header */
1221   header[0] = MAT_FILE_CLASSID;
1222   header[1] = M;
1223   header[2] = N;
1224   header[3] = nz;
1225   PetscCallMPI(MPI_Reduce(&nz, &header[3], 1, MPIU_INT, MPI_SUM, 0, PetscObjectComm((PetscObject)mat)));
1226   PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT));
1227 
1228   /* fill in and store row lengths  */
1229   PetscCall(PetscMalloc1(m, &rowlens));
1230   for (i = 0; i < m; i++) rowlens[i] = A->i[i + 1] - A->i[i] + B->i[i + 1] - B->i[i];
1231   PetscCall(PetscViewerBinaryWriteAll(viewer, rowlens, m, rs, M, PETSC_INT));
1232   PetscCall(PetscFree(rowlens));
1233 
1234   /* fill in and store column indices */
1235   PetscCall(PetscMalloc1(nz, &colidxs));
1236   for (cnt = 0, i = 0; i < m; i++) {
1237     for (jb = B->i[i]; jb < B->i[i + 1]; jb++) {
1238       if (garray[B->j[jb]] > cs) break;
1239       colidxs[cnt++] = garray[B->j[jb]];
1240     }
1241     for (ja = A->i[i]; ja < A->i[i + 1]; ja++) colidxs[cnt++] = A->j[ja] + cs;
1242     for (; jb < B->i[i + 1]; jb++) colidxs[cnt++] = garray[B->j[jb]];
1243   }
1244   PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT, cnt, nz);
1245   PetscCall(PetscViewerBinaryWriteAll(viewer, colidxs, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT));
1246   PetscCall(PetscFree(colidxs));
1247 
1248   /* fill in and store nonzero values */
1249   PetscCall(MatSeqAIJGetArrayRead(aij->A, &aa));
1250   PetscCall(MatSeqAIJGetArrayRead(aij->B, &ba));
1251   PetscCall(PetscMalloc1(nz, &matvals));
1252   for (cnt = 0, i = 0; i < m; i++) {
1253     for (jb = B->i[i]; jb < B->i[i + 1]; jb++) {
1254       if (garray[B->j[jb]] > cs) break;
1255       matvals[cnt++] = ba[jb];
1256     }
1257     for (ja = A->i[i]; ja < A->i[i + 1]; ja++) matvals[cnt++] = aa[ja];
1258     for (; jb < B->i[i + 1]; jb++) matvals[cnt++] = ba[jb];
1259   }
1260   PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &aa));
1261   PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &ba));
1262   PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT, cnt, nz);
1263   PetscCall(PetscViewerBinaryWriteAll(viewer, matvals, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR));
1264   PetscCall(PetscFree(matvals));
1265 
1266   /* write block size option to the viewer's .info file */
1267   PetscCall(MatView_Binary_BlockSizes(mat, viewer));
1268   PetscFunctionReturn(0);
1269 }
1270 
1271 #include <petscdraw.h>
1272 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer)
1273 {
1274   Mat_MPIAIJ       *aij  = (Mat_MPIAIJ *)mat->data;
1275   PetscMPIInt       rank = aij->rank, size = aij->size;
1276   PetscBool         isdraw, iascii, isbinary;
1277   PetscViewer       sviewer;
1278   PetscViewerFormat format;
1279 
1280   PetscFunctionBegin;
1281   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
1282   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
1283   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
1284   if (iascii) {
1285     PetscCall(PetscViewerGetFormat(viewer, &format));
1286     if (format == PETSC_VIEWER_LOAD_BALANCE) {
1287       PetscInt i, nmax = 0, nmin = PETSC_MAX_INT, navg = 0, *nz, nzlocal = ((Mat_SeqAIJ *)(aij->A->data))->nz + ((Mat_SeqAIJ *)(aij->B->data))->nz;
1288       PetscCall(PetscMalloc1(size, &nz));
1289       PetscCallMPI(MPI_Allgather(&nzlocal, 1, MPIU_INT, nz, 1, MPIU_INT, PetscObjectComm((PetscObject)mat)));
1290       for (i = 0; i < (PetscInt)size; i++) {
1291         nmax = PetscMax(nmax, nz[i]);
1292         nmin = PetscMin(nmin, nz[i]);
1293         navg += nz[i];
1294       }
1295       PetscCall(PetscFree(nz));
1296       navg = navg / size;
1297       PetscCall(PetscViewerASCIIPrintf(viewer, "Load Balance - Nonzeros: Min %" PetscInt_FMT "  avg %" PetscInt_FMT "  max %" PetscInt_FMT "\n", nmin, navg, nmax));
1298       PetscFunctionReturn(0);
1299     }
1300     PetscCall(PetscViewerGetFormat(viewer, &format));
1301     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1302       MatInfo   info;
1303       PetscInt *inodes = NULL;
1304 
1305       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
1306       PetscCall(MatGetInfo(mat, MAT_LOCAL, &info));
1307       PetscCall(MatInodeGetInodeSizes(aij->A, NULL, &inodes, NULL));
1308       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
1309       if (!inodes) {
1310         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, not using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
1311                                                      (double)info.memory));
1312       } else {
1313         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
1314                                                      (double)info.memory));
1315       }
1316       PetscCall(MatGetInfo(aij->A, MAT_LOCAL, &info));
1317       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
1318       PetscCall(MatGetInfo(aij->B, MAT_LOCAL, &info));
1319       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
1320       PetscCall(PetscViewerFlush(viewer));
1321       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
1322       PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n"));
1323       PetscCall(VecScatterView(aij->Mvctx, viewer));
1324       PetscFunctionReturn(0);
1325     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1326       PetscInt inodecount, inodelimit, *inodes;
1327       PetscCall(MatInodeGetInodeSizes(aij->A, &inodecount, &inodes, &inodelimit));
1328       if (inodes) {
1329         PetscCall(PetscViewerASCIIPrintf(viewer, "using I-node (on process 0) routines: found %" PetscInt_FMT " nodes, limit used is %" PetscInt_FMT "\n", inodecount, inodelimit));
1330       } else {
1331         PetscCall(PetscViewerASCIIPrintf(viewer, "not using I-node (on process 0) routines\n"));
1332       }
1333       PetscFunctionReturn(0);
1334     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1335       PetscFunctionReturn(0);
1336     }
1337   } else if (isbinary) {
1338     if (size == 1) {
1339       PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name));
1340       PetscCall(MatView(aij->A, viewer));
1341     } else {
1342       PetscCall(MatView_MPIAIJ_Binary(mat, viewer));
1343     }
1344     PetscFunctionReturn(0);
1345   } else if (iascii && size == 1) {
1346     PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name));
1347     PetscCall(MatView(aij->A, viewer));
1348     PetscFunctionReturn(0);
1349   } else if (isdraw) {
1350     PetscDraw draw;
1351     PetscBool isnull;
1352     PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
1353     PetscCall(PetscDrawIsNull(draw, &isnull));
1354     if (isnull) PetscFunctionReturn(0);
1355   }
1356 
1357   { /* assemble the entire matrix onto first processor */
1358     Mat A = NULL, Av;
1359     IS  isrow, iscol;
1360 
1361     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->rmap->N : 0, 0, 1, &isrow));
1362     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->cmap->N : 0, 0, 1, &iscol));
1363     PetscCall(MatCreateSubMatrix(mat, isrow, iscol, MAT_INITIAL_MATRIX, &A));
1364     PetscCall(MatMPIAIJGetSeqAIJ(A, &Av, NULL, NULL));
1365     /*  The commented code uses MatCreateSubMatrices instead */
1366     /*
1367     Mat *AA, A = NULL, Av;
1368     IS  isrow,iscol;
1369 
1370     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow));
1371     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol));
1372     PetscCall(MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA));
1373     if (rank == 0) {
1374        PetscCall(PetscObjectReference((PetscObject)AA[0]));
1375        A    = AA[0];
1376        Av   = AA[0];
1377     }
1378     PetscCall(MatDestroySubMatrices(1,&AA));
1379 */
1380     PetscCall(ISDestroy(&iscol));
1381     PetscCall(ISDestroy(&isrow));
1382     /*
1383        Everyone has to call to draw the matrix since the graphics waits are
1384        synchronized across all processors that share the PetscDraw object
1385     */
1386     PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1387     if (rank == 0) {
1388       if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)Av, ((PetscObject)mat)->name));
1389       PetscCall(MatView_SeqAIJ(Av, sviewer));
1390     }
1391     PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1392     PetscCall(PetscViewerFlush(viewer));
1393     PetscCall(MatDestroy(&A));
1394   }
1395   PetscFunctionReturn(0);
1396 }
1397 
1398 PetscErrorCode MatView_MPIAIJ(Mat mat, PetscViewer viewer)
1399 {
1400   PetscBool iascii, isdraw, issocket, isbinary;
1401 
1402   PetscFunctionBegin;
1403   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
1404   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
1405   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
1406   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket));
1407   if (iascii || isdraw || isbinary || issocket) PetscCall(MatView_MPIAIJ_ASCIIorDraworSocket(mat, viewer));
1408   PetscFunctionReturn(0);
1409 }
1410 
1411 PetscErrorCode MatSOR_MPIAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx)
1412 {
1413   Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data;
1414   Vec         bb1 = NULL;
1415   PetscBool   hasop;
1416 
1417   PetscFunctionBegin;
1418   if (flag == SOR_APPLY_UPPER) {
1419     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1420     PetscFunctionReturn(0);
1421   }
1422 
1423   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) PetscCall(VecDuplicate(bb, &bb1));
1424 
1425   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1426     if (flag & SOR_ZERO_INITIAL_GUESS) {
1427       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1428       its--;
1429     }
1430 
1431     while (its--) {
1432       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1433       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1434 
1435       /* update rhs: bb1 = bb - B*x */
1436       PetscCall(VecScale(mat->lvec, -1.0));
1437       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
1438 
1439       /* local sweep */
1440       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx));
1441     }
1442   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1443     if (flag & SOR_ZERO_INITIAL_GUESS) {
1444       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1445       its--;
1446     }
1447     while (its--) {
1448       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1449       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1450 
1451       /* update rhs: bb1 = bb - B*x */
1452       PetscCall(VecScale(mat->lvec, -1.0));
1453       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
1454 
1455       /* local sweep */
1456       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx));
1457     }
1458   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1459     if (flag & SOR_ZERO_INITIAL_GUESS) {
1460       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1461       its--;
1462     }
1463     while (its--) {
1464       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1465       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1466 
1467       /* update rhs: bb1 = bb - B*x */
1468       PetscCall(VecScale(mat->lvec, -1.0));
1469       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
1470 
1471       /* local sweep */
1472       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx));
1473     }
1474   } else if (flag & SOR_EISENSTAT) {
1475     Vec xx1;
1476 
1477     PetscCall(VecDuplicate(bb, &xx1));
1478     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP), fshift, lits, 1, xx));
1479 
1480     PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1481     PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1482     if (!mat->diag) {
1483       PetscCall(MatCreateVecs(matin, &mat->diag, NULL));
1484       PetscCall(MatGetDiagonal(matin, mat->diag));
1485     }
1486     PetscCall(MatHasOperation(matin, MATOP_MULT_DIAGONAL_BLOCK, &hasop));
1487     if (hasop) {
1488       PetscCall(MatMultDiagonalBlock(matin, xx, bb1));
1489     } else {
1490       PetscCall(VecPointwiseMult(bb1, mat->diag, xx));
1491     }
1492     PetscCall(VecAYPX(bb1, (omega - 2.0) / omega, bb));
1493 
1494     PetscCall(MatMultAdd(mat->B, mat->lvec, bb1, bb1));
1495 
1496     /* local sweep */
1497     PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP), fshift, lits, 1, xx1));
1498     PetscCall(VecAXPY(xx, 1.0, xx1));
1499     PetscCall(VecDestroy(&xx1));
1500   } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel SOR not supported");
1501 
1502   PetscCall(VecDestroy(&bb1));
1503 
1504   matin->factorerrortype = mat->A->factorerrortype;
1505   PetscFunctionReturn(0);
1506 }
1507 
1508 PetscErrorCode MatPermute_MPIAIJ(Mat A, IS rowp, IS colp, Mat *B)
1509 {
1510   Mat             aA, aB, Aperm;
1511   const PetscInt *rwant, *cwant, *gcols, *ai, *bi, *aj, *bj;
1512   PetscScalar    *aa, *ba;
1513   PetscInt        i, j, m, n, ng, anz, bnz, *dnnz, *onnz, *tdnnz, *tonnz, *rdest, *cdest, *work, *gcdest;
1514   PetscSF         rowsf, sf;
1515   IS              parcolp = NULL;
1516   PetscBool       done;
1517 
1518   PetscFunctionBegin;
1519   PetscCall(MatGetLocalSize(A, &m, &n));
1520   PetscCall(ISGetIndices(rowp, &rwant));
1521   PetscCall(ISGetIndices(colp, &cwant));
1522   PetscCall(PetscMalloc3(PetscMax(m, n), &work, m, &rdest, n, &cdest));
1523 
1524   /* Invert row permutation to find out where my rows should go */
1525   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &rowsf));
1526   PetscCall(PetscSFSetGraphLayout(rowsf, A->rmap, A->rmap->n, NULL, PETSC_OWN_POINTER, rwant));
1527   PetscCall(PetscSFSetFromOptions(rowsf));
1528   for (i = 0; i < m; i++) work[i] = A->rmap->rstart + i;
1529   PetscCall(PetscSFReduceBegin(rowsf, MPIU_INT, work, rdest, MPI_REPLACE));
1530   PetscCall(PetscSFReduceEnd(rowsf, MPIU_INT, work, rdest, MPI_REPLACE));
1531 
1532   /* Invert column permutation to find out where my columns should go */
1533   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1534   PetscCall(PetscSFSetGraphLayout(sf, A->cmap, A->cmap->n, NULL, PETSC_OWN_POINTER, cwant));
1535   PetscCall(PetscSFSetFromOptions(sf));
1536   for (i = 0; i < n; i++) work[i] = A->cmap->rstart + i;
1537   PetscCall(PetscSFReduceBegin(sf, MPIU_INT, work, cdest, MPI_REPLACE));
1538   PetscCall(PetscSFReduceEnd(sf, MPIU_INT, work, cdest, MPI_REPLACE));
1539   PetscCall(PetscSFDestroy(&sf));
1540 
1541   PetscCall(ISRestoreIndices(rowp, &rwant));
1542   PetscCall(ISRestoreIndices(colp, &cwant));
1543   PetscCall(MatMPIAIJGetSeqAIJ(A, &aA, &aB, &gcols));
1544 
1545   /* Find out where my gcols should go */
1546   PetscCall(MatGetSize(aB, NULL, &ng));
1547   PetscCall(PetscMalloc1(ng, &gcdest));
1548   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1549   PetscCall(PetscSFSetGraphLayout(sf, A->cmap, ng, NULL, PETSC_OWN_POINTER, gcols));
1550   PetscCall(PetscSFSetFromOptions(sf));
1551   PetscCall(PetscSFBcastBegin(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE));
1552   PetscCall(PetscSFBcastEnd(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE));
1553   PetscCall(PetscSFDestroy(&sf));
1554 
1555   PetscCall(PetscCalloc4(m, &dnnz, m, &onnz, m, &tdnnz, m, &tonnz));
1556   PetscCall(MatGetRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done));
1557   PetscCall(MatGetRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done));
1558   for (i = 0; i < m; i++) {
1559     PetscInt    row = rdest[i];
1560     PetscMPIInt rowner;
1561     PetscCall(PetscLayoutFindOwner(A->rmap, row, &rowner));
1562     for (j = ai[i]; j < ai[i + 1]; j++) {
1563       PetscInt    col = cdest[aj[j]];
1564       PetscMPIInt cowner;
1565       PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner)); /* Could build an index for the columns to eliminate this search */
1566       if (rowner == cowner) dnnz[i]++;
1567       else onnz[i]++;
1568     }
1569     for (j = bi[i]; j < bi[i + 1]; j++) {
1570       PetscInt    col = gcdest[bj[j]];
1571       PetscMPIInt cowner;
1572       PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner));
1573       if (rowner == cowner) dnnz[i]++;
1574       else onnz[i]++;
1575     }
1576   }
1577   PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE));
1578   PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE));
1579   PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE));
1580   PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE));
1581   PetscCall(PetscSFDestroy(&rowsf));
1582 
1583   PetscCall(MatCreateAIJ(PetscObjectComm((PetscObject)A), A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N, 0, tdnnz, 0, tonnz, &Aperm));
1584   PetscCall(MatSeqAIJGetArray(aA, &aa));
1585   PetscCall(MatSeqAIJGetArray(aB, &ba));
1586   for (i = 0; i < m; i++) {
1587     PetscInt *acols = dnnz, *bcols = onnz; /* Repurpose now-unneeded arrays */
1588     PetscInt  j0, rowlen;
1589     rowlen = ai[i + 1] - ai[i];
1590     for (j0 = j = 0; j < rowlen; j0 = j) { /* rowlen could be larger than number of rows m, so sum in batches */
1591       for (; j < PetscMin(rowlen, j0 + m); j++) acols[j - j0] = cdest[aj[ai[i] + j]];
1592       PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, acols, aa + ai[i] + j0, INSERT_VALUES));
1593     }
1594     rowlen = bi[i + 1] - bi[i];
1595     for (j0 = j = 0; j < rowlen; j0 = j) {
1596       for (; j < PetscMin(rowlen, j0 + m); j++) bcols[j - j0] = gcdest[bj[bi[i] + j]];
1597       PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, bcols, ba + bi[i] + j0, INSERT_VALUES));
1598     }
1599   }
1600   PetscCall(MatAssemblyBegin(Aperm, MAT_FINAL_ASSEMBLY));
1601   PetscCall(MatAssemblyEnd(Aperm, MAT_FINAL_ASSEMBLY));
1602   PetscCall(MatRestoreRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done));
1603   PetscCall(MatRestoreRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done));
1604   PetscCall(MatSeqAIJRestoreArray(aA, &aa));
1605   PetscCall(MatSeqAIJRestoreArray(aB, &ba));
1606   PetscCall(PetscFree4(dnnz, onnz, tdnnz, tonnz));
1607   PetscCall(PetscFree3(work, rdest, cdest));
1608   PetscCall(PetscFree(gcdest));
1609   if (parcolp) PetscCall(ISDestroy(&colp));
1610   *B = Aperm;
1611   PetscFunctionReturn(0);
1612 }
1613 
1614 PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[])
1615 {
1616   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1617 
1618   PetscFunctionBegin;
1619   PetscCall(MatGetSize(aij->B, NULL, nghosts));
1620   if (ghosts) *ghosts = aij->garray;
1621   PetscFunctionReturn(0);
1622 }
1623 
1624 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin, MatInfoType flag, MatInfo *info)
1625 {
1626   Mat_MPIAIJ    *mat = (Mat_MPIAIJ *)matin->data;
1627   Mat            A = mat->A, B = mat->B;
1628   PetscLogDouble isend[5], irecv[5];
1629 
1630   PetscFunctionBegin;
1631   info->block_size = 1.0;
1632   PetscCall(MatGetInfo(A, MAT_LOCAL, info));
1633 
1634   isend[0] = info->nz_used;
1635   isend[1] = info->nz_allocated;
1636   isend[2] = info->nz_unneeded;
1637   isend[3] = info->memory;
1638   isend[4] = info->mallocs;
1639 
1640   PetscCall(MatGetInfo(B, MAT_LOCAL, info));
1641 
1642   isend[0] += info->nz_used;
1643   isend[1] += info->nz_allocated;
1644   isend[2] += info->nz_unneeded;
1645   isend[3] += info->memory;
1646   isend[4] += info->mallocs;
1647   if (flag == MAT_LOCAL) {
1648     info->nz_used      = isend[0];
1649     info->nz_allocated = isend[1];
1650     info->nz_unneeded  = isend[2];
1651     info->memory       = isend[3];
1652     info->mallocs      = isend[4];
1653   } else if (flag == MAT_GLOBAL_MAX) {
1654     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin)));
1655 
1656     info->nz_used      = irecv[0];
1657     info->nz_allocated = irecv[1];
1658     info->nz_unneeded  = irecv[2];
1659     info->memory       = irecv[3];
1660     info->mallocs      = irecv[4];
1661   } else if (flag == MAT_GLOBAL_SUM) {
1662     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin)));
1663 
1664     info->nz_used      = irecv[0];
1665     info->nz_allocated = irecv[1];
1666     info->nz_unneeded  = irecv[2];
1667     info->memory       = irecv[3];
1668     info->mallocs      = irecv[4];
1669   }
1670   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1671   info->fill_ratio_needed = 0;
1672   info->factor_mallocs    = 0;
1673   PetscFunctionReturn(0);
1674 }
1675 
1676 PetscErrorCode MatSetOption_MPIAIJ(Mat A, MatOption op, PetscBool flg)
1677 {
1678   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1679 
1680   PetscFunctionBegin;
1681   switch (op) {
1682   case MAT_NEW_NONZERO_LOCATIONS:
1683   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1684   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1685   case MAT_KEEP_NONZERO_PATTERN:
1686   case MAT_NEW_NONZERO_LOCATION_ERR:
1687   case MAT_USE_INODES:
1688   case MAT_IGNORE_ZERO_ENTRIES:
1689   case MAT_FORM_EXPLICIT_TRANSPOSE:
1690     MatCheckPreallocated(A, 1);
1691     PetscCall(MatSetOption(a->A, op, flg));
1692     PetscCall(MatSetOption(a->B, op, flg));
1693     break;
1694   case MAT_ROW_ORIENTED:
1695     MatCheckPreallocated(A, 1);
1696     a->roworiented = flg;
1697 
1698     PetscCall(MatSetOption(a->A, op, flg));
1699     PetscCall(MatSetOption(a->B, op, flg));
1700     break;
1701   case MAT_FORCE_DIAGONAL_ENTRIES:
1702   case MAT_SORTED_FULL:
1703     PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
1704     break;
1705   case MAT_IGNORE_OFF_PROC_ENTRIES:
1706     a->donotstash = flg;
1707     break;
1708   /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */
1709   case MAT_SPD:
1710   case MAT_SYMMETRIC:
1711   case MAT_STRUCTURALLY_SYMMETRIC:
1712   case MAT_HERMITIAN:
1713   case MAT_SYMMETRY_ETERNAL:
1714   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
1715   case MAT_SPD_ETERNAL:
1716     /* if the diagonal matrix is square it inherits some of the properties above */
1717     break;
1718   case MAT_SUBMAT_SINGLEIS:
1719     A->submat_singleis = flg;
1720     break;
1721   case MAT_STRUCTURE_ONLY:
1722     /* The option is handled directly by MatSetOption() */
1723     break;
1724   default:
1725     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %d", op);
1726   }
1727   PetscFunctionReturn(0);
1728 }
1729 
1730 PetscErrorCode MatGetRow_MPIAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1731 {
1732   Mat_MPIAIJ  *mat = (Mat_MPIAIJ *)matin->data;
1733   PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p;
1734   PetscInt     i, *cworkA, *cworkB, **pcA, **pcB, cstart = matin->cmap->rstart;
1735   PetscInt     nztot, nzA, nzB, lrow, rstart = matin->rmap->rstart, rend = matin->rmap->rend;
1736   PetscInt    *cmap, *idx_p;
1737 
1738   PetscFunctionBegin;
1739   PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active");
1740   mat->getrowactive = PETSC_TRUE;
1741 
1742   if (!mat->rowvalues && (idx || v)) {
1743     /*
1744         allocate enough space to hold information from the longest row.
1745     */
1746     Mat_SeqAIJ *Aa = (Mat_SeqAIJ *)mat->A->data, *Ba = (Mat_SeqAIJ *)mat->B->data;
1747     PetscInt    max = 1, tmp;
1748     for (i = 0; i < matin->rmap->n; i++) {
1749       tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i];
1750       if (max < tmp) max = tmp;
1751     }
1752     PetscCall(PetscMalloc2(max, &mat->rowvalues, max, &mat->rowindices));
1753   }
1754 
1755   PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Only local rows");
1756   lrow = row - rstart;
1757 
1758   pvA = &vworkA;
1759   pcA = &cworkA;
1760   pvB = &vworkB;
1761   pcB = &cworkB;
1762   if (!v) {
1763     pvA = NULL;
1764     pvB = NULL;
1765   }
1766   if (!idx) {
1767     pcA = NULL;
1768     if (!v) pcB = NULL;
1769   }
1770   PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA));
1771   PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB));
1772   nztot = nzA + nzB;
1773 
1774   cmap = mat->garray;
1775   if (v || idx) {
1776     if (nztot) {
1777       /* Sort by increasing column numbers, assuming A and B already sorted */
1778       PetscInt imark = -1;
1779       if (v) {
1780         *v = v_p = mat->rowvalues;
1781         for (i = 0; i < nzB; i++) {
1782           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1783           else break;
1784         }
1785         imark = i;
1786         for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i];
1787         for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i];
1788       }
1789       if (idx) {
1790         *idx = idx_p = mat->rowindices;
1791         if (imark > -1) {
1792           for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i]];
1793         } else {
1794           for (i = 0; i < nzB; i++) {
1795             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1796             else break;
1797           }
1798           imark = i;
1799         }
1800         for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart + cworkA[i];
1801         for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i]];
1802       }
1803     } else {
1804       if (idx) *idx = NULL;
1805       if (v) *v = NULL;
1806     }
1807   }
1808   *nz = nztot;
1809   PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA));
1810   PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB));
1811   PetscFunctionReturn(0);
1812 }
1813 
1814 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1815 {
1816   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1817 
1818   PetscFunctionBegin;
1819   PetscCheck(aij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow() must be called first");
1820   aij->getrowactive = PETSC_FALSE;
1821   PetscFunctionReturn(0);
1822 }
1823 
1824 PetscErrorCode MatNorm_MPIAIJ(Mat mat, NormType type, PetscReal *norm)
1825 {
1826   Mat_MPIAIJ      *aij  = (Mat_MPIAIJ *)mat->data;
1827   Mat_SeqAIJ      *amat = (Mat_SeqAIJ *)aij->A->data, *bmat = (Mat_SeqAIJ *)aij->B->data;
1828   PetscInt         i, j, cstart = mat->cmap->rstart;
1829   PetscReal        sum = 0.0;
1830   const MatScalar *v, *amata, *bmata;
1831 
1832   PetscFunctionBegin;
1833   if (aij->size == 1) {
1834     PetscCall(MatNorm(aij->A, type, norm));
1835   } else {
1836     PetscCall(MatSeqAIJGetArrayRead(aij->A, &amata));
1837     PetscCall(MatSeqAIJGetArrayRead(aij->B, &bmata));
1838     if (type == NORM_FROBENIUS) {
1839       v = amata;
1840       for (i = 0; i < amat->nz; i++) {
1841         sum += PetscRealPart(PetscConj(*v) * (*v));
1842         v++;
1843       }
1844       v = bmata;
1845       for (i = 0; i < bmat->nz; i++) {
1846         sum += PetscRealPart(PetscConj(*v) * (*v));
1847         v++;
1848       }
1849       PetscCall(MPIU_Allreduce(&sum, norm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
1850       *norm = PetscSqrtReal(*norm);
1851       PetscCall(PetscLogFlops(2.0 * amat->nz + 2.0 * bmat->nz));
1852     } else if (type == NORM_1) { /* max column norm */
1853       PetscReal *tmp, *tmp2;
1854       PetscInt  *jj, *garray = aij->garray;
1855       PetscCall(PetscCalloc1(mat->cmap->N + 1, &tmp));
1856       PetscCall(PetscMalloc1(mat->cmap->N + 1, &tmp2));
1857       *norm = 0.0;
1858       v     = amata;
1859       jj    = amat->j;
1860       for (j = 0; j < amat->nz; j++) {
1861         tmp[cstart + *jj++] += PetscAbsScalar(*v);
1862         v++;
1863       }
1864       v  = bmata;
1865       jj = bmat->j;
1866       for (j = 0; j < bmat->nz; j++) {
1867         tmp[garray[*jj++]] += PetscAbsScalar(*v);
1868         v++;
1869       }
1870       PetscCall(MPIU_Allreduce(tmp, tmp2, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
1871       for (j = 0; j < mat->cmap->N; j++) {
1872         if (tmp2[j] > *norm) *norm = tmp2[j];
1873       }
1874       PetscCall(PetscFree(tmp));
1875       PetscCall(PetscFree(tmp2));
1876       PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0)));
1877     } else if (type == NORM_INFINITY) { /* max row norm */
1878       PetscReal ntemp = 0.0;
1879       for (j = 0; j < aij->A->rmap->n; j++) {
1880         v   = amata + amat->i[j];
1881         sum = 0.0;
1882         for (i = 0; i < amat->i[j + 1] - amat->i[j]; i++) {
1883           sum += PetscAbsScalar(*v);
1884           v++;
1885         }
1886         v = bmata + bmat->i[j];
1887         for (i = 0; i < bmat->i[j + 1] - bmat->i[j]; i++) {
1888           sum += PetscAbsScalar(*v);
1889           v++;
1890         }
1891         if (sum > ntemp) ntemp = sum;
1892       }
1893       PetscCall(MPIU_Allreduce(&ntemp, norm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)mat)));
1894       PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0)));
1895     } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "No support for two norm");
1896     PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &amata));
1897     PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &bmata));
1898   }
1899   PetscFunctionReturn(0);
1900 }
1901 
1902 PetscErrorCode MatTranspose_MPIAIJ(Mat A, MatReuse reuse, Mat *matout)
1903 {
1904   Mat_MPIAIJ      *a    = (Mat_MPIAIJ *)A->data, *b;
1905   Mat_SeqAIJ      *Aloc = (Mat_SeqAIJ *)a->A->data, *Bloc = (Mat_SeqAIJ *)a->B->data, *sub_B_diag;
1906   PetscInt         M = A->rmap->N, N = A->cmap->N, ma, na, mb, nb, row, *cols, *cols_tmp, *B_diag_ilen, i, ncol, A_diag_ncol;
1907   const PetscInt  *ai, *aj, *bi, *bj, *B_diag_i;
1908   Mat              B, A_diag, *B_diag;
1909   const MatScalar *pbv, *bv;
1910 
1911   PetscFunctionBegin;
1912   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout));
1913   ma = A->rmap->n;
1914   na = A->cmap->n;
1915   mb = a->B->rmap->n;
1916   nb = a->B->cmap->n;
1917   ai = Aloc->i;
1918   aj = Aloc->j;
1919   bi = Bloc->i;
1920   bj = Bloc->j;
1921   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1922     PetscInt            *d_nnz, *g_nnz, *o_nnz;
1923     PetscSFNode         *oloc;
1924     PETSC_UNUSED PetscSF sf;
1925 
1926     PetscCall(PetscMalloc4(na, &d_nnz, na, &o_nnz, nb, &g_nnz, nb, &oloc));
1927     /* compute d_nnz for preallocation */
1928     PetscCall(PetscArrayzero(d_nnz, na));
1929     for (i = 0; i < ai[ma]; i++) d_nnz[aj[i]]++;
1930     /* compute local off-diagonal contributions */
1931     PetscCall(PetscArrayzero(g_nnz, nb));
1932     for (i = 0; i < bi[ma]; i++) g_nnz[bj[i]]++;
1933     /* map those to global */
1934     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1935     PetscCall(PetscSFSetGraphLayout(sf, A->cmap, nb, NULL, PETSC_USE_POINTER, a->garray));
1936     PetscCall(PetscSFSetFromOptions(sf));
1937     PetscCall(PetscArrayzero(o_nnz, na));
1938     PetscCall(PetscSFReduceBegin(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM));
1939     PetscCall(PetscSFReduceEnd(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM));
1940     PetscCall(PetscSFDestroy(&sf));
1941 
1942     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
1943     PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M));
1944     PetscCall(MatSetBlockSizes(B, PetscAbs(A->cmap->bs), PetscAbs(A->rmap->bs)));
1945     PetscCall(MatSetType(B, ((PetscObject)A)->type_name));
1946     PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz));
1947     PetscCall(PetscFree4(d_nnz, o_nnz, g_nnz, oloc));
1948   } else {
1949     B = *matout;
1950     PetscCall(MatSetOption(B, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE));
1951   }
1952 
1953   b           = (Mat_MPIAIJ *)B->data;
1954   A_diag      = a->A;
1955   B_diag      = &b->A;
1956   sub_B_diag  = (Mat_SeqAIJ *)(*B_diag)->data;
1957   A_diag_ncol = A_diag->cmap->N;
1958   B_diag_ilen = sub_B_diag->ilen;
1959   B_diag_i    = sub_B_diag->i;
1960 
1961   /* Set ilen for diagonal of B */
1962   for (i = 0; i < A_diag_ncol; i++) B_diag_ilen[i] = B_diag_i[i + 1] - B_diag_i[i];
1963 
1964   /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done
1965   very quickly (=without using MatSetValues), because all writes are local. */
1966   PetscCall(MatTransposeSetPrecursor(A_diag, *B_diag));
1967   PetscCall(MatTranspose(A_diag, MAT_REUSE_MATRIX, B_diag));
1968 
1969   /* copy over the B part */
1970   PetscCall(PetscMalloc1(bi[mb], &cols));
1971   PetscCall(MatSeqAIJGetArrayRead(a->B, &bv));
1972   pbv = bv;
1973   row = A->rmap->rstart;
1974   for (i = 0; i < bi[mb]; i++) cols[i] = a->garray[bj[i]];
1975   cols_tmp = cols;
1976   for (i = 0; i < mb; i++) {
1977     ncol = bi[i + 1] - bi[i];
1978     PetscCall(MatSetValues(B, ncol, cols_tmp, 1, &row, pbv, INSERT_VALUES));
1979     row++;
1980     pbv += ncol;
1981     cols_tmp += ncol;
1982   }
1983   PetscCall(PetscFree(cols));
1984   PetscCall(MatSeqAIJRestoreArrayRead(a->B, &bv));
1985 
1986   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
1987   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
1988   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
1989     *matout = B;
1990   } else {
1991     PetscCall(MatHeaderMerge(A, &B));
1992   }
1993   PetscFunctionReturn(0);
1994 }
1995 
1996 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat, Vec ll, Vec rr)
1997 {
1998   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1999   Mat         a = aij->A, b = aij->B;
2000   PetscInt    s1, s2, s3;
2001 
2002   PetscFunctionBegin;
2003   PetscCall(MatGetLocalSize(mat, &s2, &s3));
2004   if (rr) {
2005     PetscCall(VecGetLocalSize(rr, &s1));
2006     PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size");
2007     /* Overlap communication with computation. */
2008     PetscCall(VecScatterBegin(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD));
2009   }
2010   if (ll) {
2011     PetscCall(VecGetLocalSize(ll, &s1));
2012     PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size");
2013     PetscUseTypeMethod(b, diagonalscale, ll, NULL);
2014   }
2015   /* scale  the diagonal block */
2016   PetscUseTypeMethod(a, diagonalscale, ll, rr);
2017 
2018   if (rr) {
2019     /* Do a scatter end and then right scale the off-diagonal block */
2020     PetscCall(VecScatterEnd(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD));
2021     PetscUseTypeMethod(b, diagonalscale, NULL, aij->lvec);
2022   }
2023   PetscFunctionReturn(0);
2024 }
2025 
2026 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2027 {
2028   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2029 
2030   PetscFunctionBegin;
2031   PetscCall(MatSetUnfactored(a->A));
2032   PetscFunctionReturn(0);
2033 }
2034 
2035 PetscErrorCode MatEqual_MPIAIJ(Mat A, Mat B, PetscBool *flag)
2036 {
2037   Mat_MPIAIJ *matB = (Mat_MPIAIJ *)B->data, *matA = (Mat_MPIAIJ *)A->data;
2038   Mat         a, b, c, d;
2039   PetscBool   flg;
2040 
2041   PetscFunctionBegin;
2042   a = matA->A;
2043   b = matA->B;
2044   c = matB->A;
2045   d = matB->B;
2046 
2047   PetscCall(MatEqual(a, c, &flg));
2048   if (flg) PetscCall(MatEqual(b, d, &flg));
2049   PetscCall(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
2050   PetscFunctionReturn(0);
2051 }
2052 
2053 PetscErrorCode MatCopy_MPIAIJ(Mat A, Mat B, MatStructure str)
2054 {
2055   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2056   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
2057 
2058   PetscFunctionBegin;
2059   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2060   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2061     /* because of the column compression in the off-processor part of the matrix a->B,
2062        the number of columns in a->B and b->B may be different, hence we cannot call
2063        the MatCopy() directly on the two parts. If need be, we can provide a more
2064        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2065        then copying the submatrices */
2066     PetscCall(MatCopy_Basic(A, B, str));
2067   } else {
2068     PetscCall(MatCopy(a->A, b->A, str));
2069     PetscCall(MatCopy(a->B, b->B, str));
2070   }
2071   PetscCall(PetscObjectStateIncrease((PetscObject)B));
2072   PetscFunctionReturn(0);
2073 }
2074 
2075 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2076 {
2077   PetscFunctionBegin;
2078   PetscCall(MatMPIAIJSetPreallocation(A, PETSC_DEFAULT, NULL, PETSC_DEFAULT, NULL));
2079   PetscFunctionReturn(0);
2080 }
2081 
2082 /*
2083    Computes the number of nonzeros per row needed for preallocation when X and Y
2084    have different nonzero structure.
2085 */
2086 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m, const PetscInt *xi, const PetscInt *xj, const PetscInt *xltog, const PetscInt *yi, const PetscInt *yj, const PetscInt *yltog, PetscInt *nnz)
2087 {
2088   PetscInt i, j, k, nzx, nzy;
2089 
2090   PetscFunctionBegin;
2091   /* Set the number of nonzeros in the new matrix */
2092   for (i = 0; i < m; i++) {
2093     const PetscInt *xjj = xj + xi[i], *yjj = yj + yi[i];
2094     nzx    = xi[i + 1] - xi[i];
2095     nzy    = yi[i + 1] - yi[i];
2096     nnz[i] = 0;
2097     for (j = 0, k = 0; j < nzx; j++) {                                /* Point in X */
2098       for (; k < nzy && yltog[yjj[k]] < xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2099       if (k < nzy && yltog[yjj[k]] == xltog[xjj[j]]) k++;             /* Skip duplicate */
2100       nnz[i]++;
2101     }
2102     for (; k < nzy; k++) nnz[i]++;
2103   }
2104   PetscFunctionReturn(0);
2105 }
2106 
2107 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2108 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y, const PetscInt *yltog, Mat X, const PetscInt *xltog, PetscInt *nnz)
2109 {
2110   PetscInt    m = Y->rmap->N;
2111   Mat_SeqAIJ *x = (Mat_SeqAIJ *)X->data;
2112   Mat_SeqAIJ *y = (Mat_SeqAIJ *)Y->data;
2113 
2114   PetscFunctionBegin;
2115   PetscCall(MatAXPYGetPreallocation_MPIX_private(m, x->i, x->j, xltog, y->i, y->j, yltog, nnz));
2116   PetscFunctionReturn(0);
2117 }
2118 
2119 PetscErrorCode MatAXPY_MPIAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str)
2120 {
2121   Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data, *yy = (Mat_MPIAIJ *)Y->data;
2122 
2123   PetscFunctionBegin;
2124   if (str == SAME_NONZERO_PATTERN) {
2125     PetscCall(MatAXPY(yy->A, a, xx->A, str));
2126     PetscCall(MatAXPY(yy->B, a, xx->B, str));
2127   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2128     PetscCall(MatAXPY_Basic(Y, a, X, str));
2129   } else {
2130     Mat       B;
2131     PetscInt *nnz_d, *nnz_o;
2132 
2133     PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d));
2134     PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o));
2135     PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B));
2136     PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name));
2137     PetscCall(MatSetLayouts(B, Y->rmap, Y->cmap));
2138     PetscCall(MatSetType(B, ((PetscObject)Y)->type_name));
2139     PetscCall(MatAXPYGetPreallocation_SeqAIJ(yy->A, xx->A, nnz_d));
2140     PetscCall(MatAXPYGetPreallocation_MPIAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o));
2141     PetscCall(MatMPIAIJSetPreallocation(B, 0, nnz_d, 0, nnz_o));
2142     PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str));
2143     PetscCall(MatHeaderMerge(Y, &B));
2144     PetscCall(PetscFree(nnz_d));
2145     PetscCall(PetscFree(nnz_o));
2146   }
2147   PetscFunctionReturn(0);
2148 }
2149 
2150 PETSC_INTERN PetscErrorCode MatConjugate_SeqAIJ(Mat);
2151 
2152 PetscErrorCode MatConjugate_MPIAIJ(Mat mat)
2153 {
2154   PetscFunctionBegin;
2155   if (PetscDefined(USE_COMPLEX)) {
2156     Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
2157 
2158     PetscCall(MatConjugate_SeqAIJ(aij->A));
2159     PetscCall(MatConjugate_SeqAIJ(aij->B));
2160   }
2161   PetscFunctionReturn(0);
2162 }
2163 
2164 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2165 {
2166   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2167 
2168   PetscFunctionBegin;
2169   PetscCall(MatRealPart(a->A));
2170   PetscCall(MatRealPart(a->B));
2171   PetscFunctionReturn(0);
2172 }
2173 
2174 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2175 {
2176   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2177 
2178   PetscFunctionBegin;
2179   PetscCall(MatImaginaryPart(a->A));
2180   PetscCall(MatImaginaryPart(a->B));
2181   PetscFunctionReturn(0);
2182 }
2183 
2184 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2185 {
2186   Mat_MPIAIJ        *a = (Mat_MPIAIJ *)A->data;
2187   PetscInt           i, *idxb = NULL, m = A->rmap->n;
2188   PetscScalar       *va, *vv;
2189   Vec                vB, vA;
2190   const PetscScalar *vb;
2191 
2192   PetscFunctionBegin;
2193   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vA));
2194   PetscCall(MatGetRowMaxAbs(a->A, vA, idx));
2195 
2196   PetscCall(VecGetArrayWrite(vA, &va));
2197   if (idx) {
2198     for (i = 0; i < m; i++) {
2199       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2200     }
2201   }
2202 
2203   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vB));
2204   PetscCall(PetscMalloc1(m, &idxb));
2205   PetscCall(MatGetRowMaxAbs(a->B, vB, idxb));
2206 
2207   PetscCall(VecGetArrayWrite(v, &vv));
2208   PetscCall(VecGetArrayRead(vB, &vb));
2209   for (i = 0; i < m; i++) {
2210     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2211       vv[i] = vb[i];
2212       if (idx) idx[i] = a->garray[idxb[i]];
2213     } else {
2214       vv[i] = va[i];
2215       if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]]) idx[i] = a->garray[idxb[i]];
2216     }
2217   }
2218   PetscCall(VecRestoreArrayWrite(vA, &vv));
2219   PetscCall(VecRestoreArrayWrite(vA, &va));
2220   PetscCall(VecRestoreArrayRead(vB, &vb));
2221   PetscCall(PetscFree(idxb));
2222   PetscCall(VecDestroy(&vA));
2223   PetscCall(VecDestroy(&vB));
2224   PetscFunctionReturn(0);
2225 }
2226 
2227 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2228 {
2229   Mat_MPIAIJ        *mat = (Mat_MPIAIJ *)A->data;
2230   PetscInt           m = A->rmap->n, n = A->cmap->n;
2231   PetscInt           cstart = A->cmap->rstart, cend = A->cmap->rend;
2232   PetscInt          *cmap = mat->garray;
2233   PetscInt          *diagIdx, *offdiagIdx;
2234   Vec                diagV, offdiagV;
2235   PetscScalar       *a, *diagA, *offdiagA;
2236   const PetscScalar *ba, *bav;
2237   PetscInt           r, j, col, ncols, *bi, *bj;
2238   Mat                B = mat->B;
2239   Mat_SeqAIJ        *b = (Mat_SeqAIJ *)B->data;
2240 
2241   PetscFunctionBegin;
2242   /* When a process holds entire A and other processes have no entry */
2243   if (A->cmap->N == n) {
2244     PetscCall(VecGetArrayWrite(v, &diagA));
2245     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV));
2246     PetscCall(MatGetRowMinAbs(mat->A, diagV, idx));
2247     PetscCall(VecDestroy(&diagV));
2248     PetscCall(VecRestoreArrayWrite(v, &diagA));
2249     PetscFunctionReturn(0);
2250   } else if (n == 0) {
2251     if (m) {
2252       PetscCall(VecGetArrayWrite(v, &a));
2253       for (r = 0; r < m; r++) {
2254         a[r] = 0.0;
2255         if (idx) idx[r] = -1;
2256       }
2257       PetscCall(VecRestoreArrayWrite(v, &a));
2258     }
2259     PetscFunctionReturn(0);
2260   }
2261 
2262   PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx));
2263   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2264   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2265   PetscCall(MatGetRowMinAbs(mat->A, diagV, diagIdx));
2266 
2267   /* Get offdiagIdx[] for implicit 0.0 */
2268   PetscCall(MatSeqAIJGetArrayRead(B, &bav));
2269   ba = bav;
2270   bi = b->i;
2271   bj = b->j;
2272   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2273   for (r = 0; r < m; r++) {
2274     ncols = bi[r + 1] - bi[r];
2275     if (ncols == A->cmap->N - n) { /* Brow is dense */
2276       offdiagA[r]   = *ba;
2277       offdiagIdx[r] = cmap[0];
2278     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2279       offdiagA[r] = 0.0;
2280 
2281       /* Find first hole in the cmap */
2282       for (j = 0; j < ncols; j++) {
2283         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2284         if (col > j && j < cstart) {
2285           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2286           break;
2287         } else if (col > j + n && j >= cstart) {
2288           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2289           break;
2290         }
2291       }
2292       if (j == ncols && ncols < A->cmap->N - n) {
2293         /* a hole is outside compressed Bcols */
2294         if (ncols == 0) {
2295           if (cstart) {
2296             offdiagIdx[r] = 0;
2297           } else offdiagIdx[r] = cend;
2298         } else { /* ncols > 0 */
2299           offdiagIdx[r] = cmap[ncols - 1] + 1;
2300           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2301         }
2302       }
2303     }
2304 
2305     for (j = 0; j < ncols; j++) {
2306       if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) {
2307         offdiagA[r]   = *ba;
2308         offdiagIdx[r] = cmap[*bj];
2309       }
2310       ba++;
2311       bj++;
2312     }
2313   }
2314 
2315   PetscCall(VecGetArrayWrite(v, &a));
2316   PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA));
2317   for (r = 0; r < m; ++r) {
2318     if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) {
2319       a[r] = diagA[r];
2320       if (idx) idx[r] = cstart + diagIdx[r];
2321     } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) {
2322       a[r] = diagA[r];
2323       if (idx) {
2324         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2325           idx[r] = cstart + diagIdx[r];
2326         } else idx[r] = offdiagIdx[r];
2327       }
2328     } else {
2329       a[r] = offdiagA[r];
2330       if (idx) idx[r] = offdiagIdx[r];
2331     }
2332   }
2333   PetscCall(MatSeqAIJRestoreArrayRead(B, &bav));
2334   PetscCall(VecRestoreArrayWrite(v, &a));
2335   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA));
2336   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
2337   PetscCall(VecDestroy(&diagV));
2338   PetscCall(VecDestroy(&offdiagV));
2339   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2340   PetscFunctionReturn(0);
2341 }
2342 
2343 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2344 {
2345   Mat_MPIAIJ        *mat = (Mat_MPIAIJ *)A->data;
2346   PetscInt           m = A->rmap->n, n = A->cmap->n;
2347   PetscInt           cstart = A->cmap->rstart, cend = A->cmap->rend;
2348   PetscInt          *cmap = mat->garray;
2349   PetscInt          *diagIdx, *offdiagIdx;
2350   Vec                diagV, offdiagV;
2351   PetscScalar       *a, *diagA, *offdiagA;
2352   const PetscScalar *ba, *bav;
2353   PetscInt           r, j, col, ncols, *bi, *bj;
2354   Mat                B = mat->B;
2355   Mat_SeqAIJ        *b = (Mat_SeqAIJ *)B->data;
2356 
2357   PetscFunctionBegin;
2358   /* When a process holds entire A and other processes have no entry */
2359   if (A->cmap->N == n) {
2360     PetscCall(VecGetArrayWrite(v, &diagA));
2361     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV));
2362     PetscCall(MatGetRowMin(mat->A, diagV, idx));
2363     PetscCall(VecDestroy(&diagV));
2364     PetscCall(VecRestoreArrayWrite(v, &diagA));
2365     PetscFunctionReturn(0);
2366   } else if (n == 0) {
2367     if (m) {
2368       PetscCall(VecGetArrayWrite(v, &a));
2369       for (r = 0; r < m; r++) {
2370         a[r] = PETSC_MAX_REAL;
2371         if (idx) idx[r] = -1;
2372       }
2373       PetscCall(VecRestoreArrayWrite(v, &a));
2374     }
2375     PetscFunctionReturn(0);
2376   }
2377 
2378   PetscCall(PetscCalloc2(m, &diagIdx, m, &offdiagIdx));
2379   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2380   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2381   PetscCall(MatGetRowMin(mat->A, diagV, diagIdx));
2382 
2383   /* Get offdiagIdx[] for implicit 0.0 */
2384   PetscCall(MatSeqAIJGetArrayRead(B, &bav));
2385   ba = bav;
2386   bi = b->i;
2387   bj = b->j;
2388   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2389   for (r = 0; r < m; r++) {
2390     ncols = bi[r + 1] - bi[r];
2391     if (ncols == A->cmap->N - n) { /* Brow is dense */
2392       offdiagA[r]   = *ba;
2393       offdiagIdx[r] = cmap[0];
2394     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2395       offdiagA[r] = 0.0;
2396 
2397       /* Find first hole in the cmap */
2398       for (j = 0; j < ncols; j++) {
2399         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2400         if (col > j && j < cstart) {
2401           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2402           break;
2403         } else if (col > j + n && j >= cstart) {
2404           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2405           break;
2406         }
2407       }
2408       if (j == ncols && ncols < A->cmap->N - n) {
2409         /* a hole is outside compressed Bcols */
2410         if (ncols == 0) {
2411           if (cstart) {
2412             offdiagIdx[r] = 0;
2413           } else offdiagIdx[r] = cend;
2414         } else { /* ncols > 0 */
2415           offdiagIdx[r] = cmap[ncols - 1] + 1;
2416           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2417         }
2418       }
2419     }
2420 
2421     for (j = 0; j < ncols; j++) {
2422       if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) {
2423         offdiagA[r]   = *ba;
2424         offdiagIdx[r] = cmap[*bj];
2425       }
2426       ba++;
2427       bj++;
2428     }
2429   }
2430 
2431   PetscCall(VecGetArrayWrite(v, &a));
2432   PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA));
2433   for (r = 0; r < m; ++r) {
2434     if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) {
2435       a[r] = diagA[r];
2436       if (idx) idx[r] = cstart + diagIdx[r];
2437     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2438       a[r] = diagA[r];
2439       if (idx) {
2440         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2441           idx[r] = cstart + diagIdx[r];
2442         } else idx[r] = offdiagIdx[r];
2443       }
2444     } else {
2445       a[r] = offdiagA[r];
2446       if (idx) idx[r] = offdiagIdx[r];
2447     }
2448   }
2449   PetscCall(MatSeqAIJRestoreArrayRead(B, &bav));
2450   PetscCall(VecRestoreArrayWrite(v, &a));
2451   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA));
2452   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
2453   PetscCall(VecDestroy(&diagV));
2454   PetscCall(VecDestroy(&offdiagV));
2455   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2456   PetscFunctionReturn(0);
2457 }
2458 
2459 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2460 {
2461   Mat_MPIAIJ        *mat = (Mat_MPIAIJ *)A->data;
2462   PetscInt           m = A->rmap->n, n = A->cmap->n;
2463   PetscInt           cstart = A->cmap->rstart, cend = A->cmap->rend;
2464   PetscInt          *cmap = mat->garray;
2465   PetscInt          *diagIdx, *offdiagIdx;
2466   Vec                diagV, offdiagV;
2467   PetscScalar       *a, *diagA, *offdiagA;
2468   const PetscScalar *ba, *bav;
2469   PetscInt           r, j, col, ncols, *bi, *bj;
2470   Mat                B = mat->B;
2471   Mat_SeqAIJ        *b = (Mat_SeqAIJ *)B->data;
2472 
2473   PetscFunctionBegin;
2474   /* When a process holds entire A and other processes have no entry */
2475   if (A->cmap->N == n) {
2476     PetscCall(VecGetArrayWrite(v, &diagA));
2477     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV));
2478     PetscCall(MatGetRowMax(mat->A, diagV, idx));
2479     PetscCall(VecDestroy(&diagV));
2480     PetscCall(VecRestoreArrayWrite(v, &diagA));
2481     PetscFunctionReturn(0);
2482   } else if (n == 0) {
2483     if (m) {
2484       PetscCall(VecGetArrayWrite(v, &a));
2485       for (r = 0; r < m; r++) {
2486         a[r] = PETSC_MIN_REAL;
2487         if (idx) idx[r] = -1;
2488       }
2489       PetscCall(VecRestoreArrayWrite(v, &a));
2490     }
2491     PetscFunctionReturn(0);
2492   }
2493 
2494   PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx));
2495   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2496   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2497   PetscCall(MatGetRowMax(mat->A, diagV, diagIdx));
2498 
2499   /* Get offdiagIdx[] for implicit 0.0 */
2500   PetscCall(MatSeqAIJGetArrayRead(B, &bav));
2501   ba = bav;
2502   bi = b->i;
2503   bj = b->j;
2504   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2505   for (r = 0; r < m; r++) {
2506     ncols = bi[r + 1] - bi[r];
2507     if (ncols == A->cmap->N - n) { /* Brow is dense */
2508       offdiagA[r]   = *ba;
2509       offdiagIdx[r] = cmap[0];
2510     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2511       offdiagA[r] = 0.0;
2512 
2513       /* Find first hole in the cmap */
2514       for (j = 0; j < ncols; j++) {
2515         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2516         if (col > j && j < cstart) {
2517           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2518           break;
2519         } else if (col > j + n && j >= cstart) {
2520           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2521           break;
2522         }
2523       }
2524       if (j == ncols && ncols < A->cmap->N - n) {
2525         /* a hole is outside compressed Bcols */
2526         if (ncols == 0) {
2527           if (cstart) {
2528             offdiagIdx[r] = 0;
2529           } else offdiagIdx[r] = cend;
2530         } else { /* ncols > 0 */
2531           offdiagIdx[r] = cmap[ncols - 1] + 1;
2532           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2533         }
2534       }
2535     }
2536 
2537     for (j = 0; j < ncols; j++) {
2538       if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) {
2539         offdiagA[r]   = *ba;
2540         offdiagIdx[r] = cmap[*bj];
2541       }
2542       ba++;
2543       bj++;
2544     }
2545   }
2546 
2547   PetscCall(VecGetArrayWrite(v, &a));
2548   PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA));
2549   for (r = 0; r < m; ++r) {
2550     if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) {
2551       a[r] = diagA[r];
2552       if (idx) idx[r] = cstart + diagIdx[r];
2553     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2554       a[r] = diagA[r];
2555       if (idx) {
2556         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2557           idx[r] = cstart + diagIdx[r];
2558         } else idx[r] = offdiagIdx[r];
2559       }
2560     } else {
2561       a[r] = offdiagA[r];
2562       if (idx) idx[r] = offdiagIdx[r];
2563     }
2564   }
2565   PetscCall(MatSeqAIJRestoreArrayRead(B, &bav));
2566   PetscCall(VecRestoreArrayWrite(v, &a));
2567   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA));
2568   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
2569   PetscCall(VecDestroy(&diagV));
2570   PetscCall(VecDestroy(&offdiagV));
2571   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2572   PetscFunctionReturn(0);
2573 }
2574 
2575 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat, Mat *newmat)
2576 {
2577   Mat *dummy;
2578 
2579   PetscFunctionBegin;
2580   PetscCall(MatCreateSubMatrix_MPIAIJ_All(mat, MAT_DO_NOT_GET_VALUES, MAT_INITIAL_MATRIX, &dummy));
2581   *newmat = *dummy;
2582   PetscCall(PetscFree(dummy));
2583   PetscFunctionReturn(0);
2584 }
2585 
2586 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A, const PetscScalar **values)
2587 {
2588   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2589 
2590   PetscFunctionBegin;
2591   PetscCall(MatInvertBlockDiagonal(a->A, values));
2592   A->factorerrortype = a->A->factorerrortype;
2593   PetscFunctionReturn(0);
2594 }
2595 
2596 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x, PetscRandom rctx)
2597 {
2598   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)x->data;
2599 
2600   PetscFunctionBegin;
2601   PetscCheck(x->assembled || x->preallocated, PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed");
2602   PetscCall(MatSetRandom(aij->A, rctx));
2603   if (x->assembled) {
2604     PetscCall(MatSetRandom(aij->B, rctx));
2605   } else {
2606     PetscCall(MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B, x->cmap->rstart, x->cmap->rend, rctx));
2607   }
2608   PetscCall(MatAssemblyBegin(x, MAT_FINAL_ASSEMBLY));
2609   PetscCall(MatAssemblyEnd(x, MAT_FINAL_ASSEMBLY));
2610   PetscFunctionReturn(0);
2611 }
2612 
2613 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A, PetscBool sc)
2614 {
2615   PetscFunctionBegin;
2616   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2617   else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ;
2618   PetscFunctionReturn(0);
2619 }
2620 
2621 /*@
2622    MatMPIAIJGetNumberNonzeros - gets the number of nonzeros in the matrix on this MPI rank
2623 
2624    Not collective
2625 
2626    Input Parameter:
2627 .    A - the matrix
2628 
2629    Output Parameter:
2630 .    nz - the number of nonzeros
2631 
2632  Level: advanced
2633 
2634 .seealso: `MATMPIAIJ`, `Mat`
2635 @*/
2636 PetscErrorCode MatMPIAIJGetNumberNonzeros(Mat A, PetscCount *nz)
2637 {
2638   Mat_MPIAIJ *maij = (Mat_MPIAIJ *)A->data;
2639   Mat_SeqAIJ *aaij = (Mat_SeqAIJ *)maij->A->data, *baij = (Mat_SeqAIJ *)maij->B->data;
2640 
2641   PetscFunctionBegin;
2642   *nz = aaij->i[A->rmap->n] + baij->i[A->rmap->n];
2643   PetscFunctionReturn(0);
2644 }
2645 
2646 /*@
2647    MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2648 
2649    Collective on A
2650 
2651    Input Parameters:
2652 +    A - the matrix
2653 -    sc - `PETSC_TRUE` indicates use the scalable algorithm (default is not to use the scalable algorithm)
2654 
2655  Level: advanced
2656 
2657 @*/
2658 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A, PetscBool sc)
2659 {
2660   PetscFunctionBegin;
2661   PetscTryMethod(A, "MatMPIAIJSetUseScalableIncreaseOverlap_C", (Mat, PetscBool), (A, sc));
2662   PetscFunctionReturn(0);
2663 }
2664 
2665 PetscErrorCode MatSetFromOptions_MPIAIJ(Mat A, PetscOptionItems *PetscOptionsObject)
2666 {
2667   PetscBool sc = PETSC_FALSE, flg;
2668 
2669   PetscFunctionBegin;
2670   PetscOptionsHeadBegin(PetscOptionsObject, "MPIAIJ options");
2671   if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2672   PetscCall(PetscOptionsBool("-mat_increase_overlap_scalable", "Use a scalable algorithm to compute the overlap", "MatIncreaseOverlap", sc, &sc, &flg));
2673   if (flg) PetscCall(MatMPIAIJSetUseScalableIncreaseOverlap(A, sc));
2674   PetscOptionsHeadEnd();
2675   PetscFunctionReturn(0);
2676 }
2677 
2678 PetscErrorCode MatShift_MPIAIJ(Mat Y, PetscScalar a)
2679 {
2680   Mat_MPIAIJ *maij = (Mat_MPIAIJ *)Y->data;
2681   Mat_SeqAIJ *aij  = (Mat_SeqAIJ *)maij->A->data;
2682 
2683   PetscFunctionBegin;
2684   if (!Y->preallocated) {
2685     PetscCall(MatMPIAIJSetPreallocation(Y, 1, NULL, 0, NULL));
2686   } else if (!aij->nz) { /* It does not matter if diagonals of Y only partially lie in maij->A. We just need an estimated preallocation. */
2687     PetscInt nonew = aij->nonew;
2688     PetscCall(MatSeqAIJSetPreallocation(maij->A, 1, NULL));
2689     aij->nonew = nonew;
2690   }
2691   PetscCall(MatShift_Basic(Y, a));
2692   PetscFunctionReturn(0);
2693 }
2694 
2695 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A, PetscBool *missing, PetscInt *d)
2696 {
2697   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2698 
2699   PetscFunctionBegin;
2700   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices");
2701   PetscCall(MatMissingDiagonal(a->A, missing, d));
2702   if (d) {
2703     PetscInt rstart;
2704     PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
2705     *d += rstart;
2706   }
2707   PetscFunctionReturn(0);
2708 }
2709 
2710 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A, PetscInt nblocks, const PetscInt *bsizes, PetscScalar *diag)
2711 {
2712   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2713 
2714   PetscFunctionBegin;
2715   PetscCall(MatInvertVariableBlockDiagonal(a->A, nblocks, bsizes, diag));
2716   PetscFunctionReturn(0);
2717 }
2718 
2719 PetscErrorCode MatEliminateZeros_MPIAIJ(Mat A)
2720 {
2721   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2722 
2723   PetscFunctionBegin;
2724   PetscCall(MatEliminateZeros(a->A));
2725   PetscCall(MatEliminateZeros(a->B));
2726   PetscFunctionReturn(0);
2727 }
2728 
2729 /* -------------------------------------------------------------------*/
2730 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2731                                        MatGetRow_MPIAIJ,
2732                                        MatRestoreRow_MPIAIJ,
2733                                        MatMult_MPIAIJ,
2734                                        /* 4*/ MatMultAdd_MPIAIJ,
2735                                        MatMultTranspose_MPIAIJ,
2736                                        MatMultTransposeAdd_MPIAIJ,
2737                                        NULL,
2738                                        NULL,
2739                                        NULL,
2740                                        /*10*/ NULL,
2741                                        NULL,
2742                                        NULL,
2743                                        MatSOR_MPIAIJ,
2744                                        MatTranspose_MPIAIJ,
2745                                        /*15*/ MatGetInfo_MPIAIJ,
2746                                        MatEqual_MPIAIJ,
2747                                        MatGetDiagonal_MPIAIJ,
2748                                        MatDiagonalScale_MPIAIJ,
2749                                        MatNorm_MPIAIJ,
2750                                        /*20*/ MatAssemblyBegin_MPIAIJ,
2751                                        MatAssemblyEnd_MPIAIJ,
2752                                        MatSetOption_MPIAIJ,
2753                                        MatZeroEntries_MPIAIJ,
2754                                        /*24*/ MatZeroRows_MPIAIJ,
2755                                        NULL,
2756                                        NULL,
2757                                        NULL,
2758                                        NULL,
2759                                        /*29*/ MatSetUp_MPIAIJ,
2760                                        NULL,
2761                                        NULL,
2762                                        MatGetDiagonalBlock_MPIAIJ,
2763                                        NULL,
2764                                        /*34*/ MatDuplicate_MPIAIJ,
2765                                        NULL,
2766                                        NULL,
2767                                        NULL,
2768                                        NULL,
2769                                        /*39*/ MatAXPY_MPIAIJ,
2770                                        MatCreateSubMatrices_MPIAIJ,
2771                                        MatIncreaseOverlap_MPIAIJ,
2772                                        MatGetValues_MPIAIJ,
2773                                        MatCopy_MPIAIJ,
2774                                        /*44*/ MatGetRowMax_MPIAIJ,
2775                                        MatScale_MPIAIJ,
2776                                        MatShift_MPIAIJ,
2777                                        MatDiagonalSet_MPIAIJ,
2778                                        MatZeroRowsColumns_MPIAIJ,
2779                                        /*49*/ MatSetRandom_MPIAIJ,
2780                                        MatGetRowIJ_MPIAIJ,
2781                                        MatRestoreRowIJ_MPIAIJ,
2782                                        NULL,
2783                                        NULL,
2784                                        /*54*/ MatFDColoringCreate_MPIXAIJ,
2785                                        NULL,
2786                                        MatSetUnfactored_MPIAIJ,
2787                                        MatPermute_MPIAIJ,
2788                                        NULL,
2789                                        /*59*/ MatCreateSubMatrix_MPIAIJ,
2790                                        MatDestroy_MPIAIJ,
2791                                        MatView_MPIAIJ,
2792                                        NULL,
2793                                        NULL,
2794                                        /*64*/ NULL,
2795                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2796                                        NULL,
2797                                        NULL,
2798                                        NULL,
2799                                        /*69*/ MatGetRowMaxAbs_MPIAIJ,
2800                                        MatGetRowMinAbs_MPIAIJ,
2801                                        NULL,
2802                                        NULL,
2803                                        NULL,
2804                                        NULL,
2805                                        /*75*/ MatFDColoringApply_AIJ,
2806                                        MatSetFromOptions_MPIAIJ,
2807                                        NULL,
2808                                        NULL,
2809                                        MatFindZeroDiagonals_MPIAIJ,
2810                                        /*80*/ NULL,
2811                                        NULL,
2812                                        NULL,
2813                                        /*83*/ MatLoad_MPIAIJ,
2814                                        MatIsSymmetric_MPIAIJ,
2815                                        NULL,
2816                                        NULL,
2817                                        NULL,
2818                                        NULL,
2819                                        /*89*/ NULL,
2820                                        NULL,
2821                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2822                                        NULL,
2823                                        NULL,
2824                                        /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2825                                        NULL,
2826                                        NULL,
2827                                        NULL,
2828                                        MatBindToCPU_MPIAIJ,
2829                                        /*99*/ MatProductSetFromOptions_MPIAIJ,
2830                                        NULL,
2831                                        NULL,
2832                                        MatConjugate_MPIAIJ,
2833                                        NULL,
2834                                        /*104*/ MatSetValuesRow_MPIAIJ,
2835                                        MatRealPart_MPIAIJ,
2836                                        MatImaginaryPart_MPIAIJ,
2837                                        NULL,
2838                                        NULL,
2839                                        /*109*/ NULL,
2840                                        NULL,
2841                                        MatGetRowMin_MPIAIJ,
2842                                        NULL,
2843                                        MatMissingDiagonal_MPIAIJ,
2844                                        /*114*/ MatGetSeqNonzeroStructure_MPIAIJ,
2845                                        NULL,
2846                                        MatGetGhosts_MPIAIJ,
2847                                        NULL,
2848                                        NULL,
2849                                        /*119*/ MatMultDiagonalBlock_MPIAIJ,
2850                                        NULL,
2851                                        NULL,
2852                                        NULL,
2853                                        MatGetMultiProcBlock_MPIAIJ,
2854                                        /*124*/ MatFindNonzeroRows_MPIAIJ,
2855                                        MatGetColumnReductions_MPIAIJ,
2856                                        MatInvertBlockDiagonal_MPIAIJ,
2857                                        MatInvertVariableBlockDiagonal_MPIAIJ,
2858                                        MatCreateSubMatricesMPI_MPIAIJ,
2859                                        /*129*/ NULL,
2860                                        NULL,
2861                                        NULL,
2862                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2863                                        NULL,
2864                                        /*134*/ NULL,
2865                                        NULL,
2866                                        NULL,
2867                                        NULL,
2868                                        NULL,
2869                                        /*139*/ MatSetBlockSizes_MPIAIJ,
2870                                        NULL,
2871                                        NULL,
2872                                        MatFDColoringSetUp_MPIXAIJ,
2873                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2874                                        MatCreateMPIMatConcatenateSeqMat_MPIAIJ,
2875                                        /*145*/ NULL,
2876                                        NULL,
2877                                        NULL,
2878                                        MatCreateGraph_Simple_AIJ,
2879                                        NULL,
2880                                        /*150*/ NULL,
2881                                        MatEliminateZeros_MPIAIJ};
2882 
2883 /* ----------------------------------------------------------------------------------------*/
2884 
2885 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat)
2886 {
2887   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
2888 
2889   PetscFunctionBegin;
2890   PetscCall(MatStoreValues(aij->A));
2891   PetscCall(MatStoreValues(aij->B));
2892   PetscFunctionReturn(0);
2893 }
2894 
2895 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat)
2896 {
2897   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
2898 
2899   PetscFunctionBegin;
2900   PetscCall(MatRetrieveValues(aij->A));
2901   PetscCall(MatRetrieveValues(aij->B));
2902   PetscFunctionReturn(0);
2903 }
2904 
2905 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[])
2906 {
2907   Mat_MPIAIJ *b;
2908   PetscMPIInt size;
2909 
2910   PetscFunctionBegin;
2911   PetscCall(PetscLayoutSetUp(B->rmap));
2912   PetscCall(PetscLayoutSetUp(B->cmap));
2913   b = (Mat_MPIAIJ *)B->data;
2914 
2915 #if defined(PETSC_USE_CTABLE)
2916   PetscCall(PetscHMapIDestroy(&b->colmap));
2917 #else
2918   PetscCall(PetscFree(b->colmap));
2919 #endif
2920   PetscCall(PetscFree(b->garray));
2921   PetscCall(VecDestroy(&b->lvec));
2922   PetscCall(VecScatterDestroy(&b->Mvctx));
2923 
2924   /* Because the B will have been resized we simply destroy it and create a new one each time */
2925   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
2926   PetscCall(MatDestroy(&b->B));
2927   PetscCall(MatCreate(PETSC_COMM_SELF, &b->B));
2928   PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0));
2929   PetscCall(MatSetBlockSizesFromMats(b->B, B, B));
2930   PetscCall(MatSetType(b->B, MATSEQAIJ));
2931 
2932   if (!B->preallocated) {
2933     PetscCall(MatCreate(PETSC_COMM_SELF, &b->A));
2934     PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n));
2935     PetscCall(MatSetBlockSizesFromMats(b->A, B, B));
2936     PetscCall(MatSetType(b->A, MATSEQAIJ));
2937   }
2938 
2939   PetscCall(MatSeqAIJSetPreallocation(b->A, d_nz, d_nnz));
2940   PetscCall(MatSeqAIJSetPreallocation(b->B, o_nz, o_nnz));
2941   B->preallocated  = PETSC_TRUE;
2942   B->was_assembled = PETSC_FALSE;
2943   B->assembled     = PETSC_FALSE;
2944   PetscFunctionReturn(0);
2945 }
2946 
2947 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B)
2948 {
2949   Mat_MPIAIJ *b;
2950 
2951   PetscFunctionBegin;
2952   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
2953   PetscCall(PetscLayoutSetUp(B->rmap));
2954   PetscCall(PetscLayoutSetUp(B->cmap));
2955   b = (Mat_MPIAIJ *)B->data;
2956 
2957 #if defined(PETSC_USE_CTABLE)
2958   PetscCall(PetscHMapIDestroy(&b->colmap));
2959 #else
2960   PetscCall(PetscFree(b->colmap));
2961 #endif
2962   PetscCall(PetscFree(b->garray));
2963   PetscCall(VecDestroy(&b->lvec));
2964   PetscCall(VecScatterDestroy(&b->Mvctx));
2965 
2966   PetscCall(MatResetPreallocation(b->A));
2967   PetscCall(MatResetPreallocation(b->B));
2968   B->preallocated  = PETSC_TRUE;
2969   B->was_assembled = PETSC_FALSE;
2970   B->assembled     = PETSC_FALSE;
2971   PetscFunctionReturn(0);
2972 }
2973 
2974 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat)
2975 {
2976   Mat         mat;
2977   Mat_MPIAIJ *a, *oldmat = (Mat_MPIAIJ *)matin->data;
2978 
2979   PetscFunctionBegin;
2980   *newmat = NULL;
2981   PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat));
2982   PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N));
2983   PetscCall(MatSetBlockSizesFromMats(mat, matin, matin));
2984   PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name));
2985   a = (Mat_MPIAIJ *)mat->data;
2986 
2987   mat->factortype   = matin->factortype;
2988   mat->assembled    = matin->assembled;
2989   mat->insertmode   = NOT_SET_VALUES;
2990   mat->preallocated = matin->preallocated;
2991 
2992   a->size         = oldmat->size;
2993   a->rank         = oldmat->rank;
2994   a->donotstash   = oldmat->donotstash;
2995   a->roworiented  = oldmat->roworiented;
2996   a->rowindices   = NULL;
2997   a->rowvalues    = NULL;
2998   a->getrowactive = PETSC_FALSE;
2999 
3000   PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap));
3001   PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap));
3002 
3003   if (oldmat->colmap) {
3004 #if defined(PETSC_USE_CTABLE)
3005     PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap));
3006 #else
3007     PetscCall(PetscMalloc1(mat->cmap->N, &a->colmap));
3008     PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, mat->cmap->N));
3009 #endif
3010   } else a->colmap = NULL;
3011   if (oldmat->garray) {
3012     PetscInt len;
3013     len = oldmat->B->cmap->n;
3014     PetscCall(PetscMalloc1(len + 1, &a->garray));
3015     if (len) PetscCall(PetscArraycpy(a->garray, oldmat->garray, len));
3016   } else a->garray = NULL;
3017 
3018   /* It may happen MatDuplicate is called with a non-assembled matrix
3019      In fact, MatDuplicate only requires the matrix to be preallocated
3020      This may happen inside a DMCreateMatrix_Shell */
3021   if (oldmat->lvec) { PetscCall(VecDuplicate(oldmat->lvec, &a->lvec)); }
3022   if (oldmat->Mvctx) { PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx)); }
3023   PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A));
3024   PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B));
3025   PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist));
3026   *newmat = mat;
3027   PetscFunctionReturn(0);
3028 }
3029 
3030 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3031 {
3032   PetscBool isbinary, ishdf5;
3033 
3034   PetscFunctionBegin;
3035   PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1);
3036   PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2);
3037   /* force binary viewer to load .info file if it has not yet done so */
3038   PetscCall(PetscViewerSetUp(viewer));
3039   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
3040   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5));
3041   if (isbinary) {
3042     PetscCall(MatLoad_MPIAIJ_Binary(newMat, viewer));
3043   } else if (ishdf5) {
3044 #if defined(PETSC_HAVE_HDF5)
3045     PetscCall(MatLoad_AIJ_HDF5(newMat, viewer));
3046 #else
3047     SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5");
3048 #endif
3049   } else {
3050     SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name);
3051   }
3052   PetscFunctionReturn(0);
3053 }
3054 
3055 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer)
3056 {
3057   PetscInt     header[4], M, N, m, nz, rows, cols, sum, i;
3058   PetscInt    *rowidxs, *colidxs;
3059   PetscScalar *matvals;
3060 
3061   PetscFunctionBegin;
3062   PetscCall(PetscViewerSetUp(viewer));
3063 
3064   /* read in matrix header */
3065   PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT));
3066   PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file");
3067   M  = header[1];
3068   N  = header[2];
3069   nz = header[3];
3070   PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M);
3071   PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N);
3072   PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as MPIAIJ");
3073 
3074   /* set block sizes from the viewer's .info file */
3075   PetscCall(MatLoad_Binary_BlockSizes(mat, viewer));
3076   /* set global sizes if not set already */
3077   if (mat->rmap->N < 0) mat->rmap->N = M;
3078   if (mat->cmap->N < 0) mat->cmap->N = N;
3079   PetscCall(PetscLayoutSetUp(mat->rmap));
3080   PetscCall(PetscLayoutSetUp(mat->cmap));
3081 
3082   /* check if the matrix sizes are correct */
3083   PetscCall(MatGetSize(mat, &rows, &cols));
3084   PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols);
3085 
3086   /* read in row lengths and build row indices */
3087   PetscCall(MatGetLocalSize(mat, &m, NULL));
3088   PetscCall(PetscMalloc1(m + 1, &rowidxs));
3089   PetscCall(PetscViewerBinaryReadAll(viewer, rowidxs + 1, m, PETSC_DECIDE, M, PETSC_INT));
3090   rowidxs[0] = 0;
3091   for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i];
3092   PetscCall(MPIU_Allreduce(&rowidxs[m], &sum, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)viewer)));
3093   PetscCheck(sum == nz, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum);
3094   /* read in column indices and matrix values */
3095   PetscCall(PetscMalloc2(rowidxs[m], &colidxs, rowidxs[m], &matvals));
3096   PetscCall(PetscViewerBinaryReadAll(viewer, colidxs, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT));
3097   PetscCall(PetscViewerBinaryReadAll(viewer, matvals, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR));
3098   /* store matrix indices and values */
3099   PetscCall(MatMPIAIJSetPreallocationCSR(mat, rowidxs, colidxs, matvals));
3100   PetscCall(PetscFree(rowidxs));
3101   PetscCall(PetscFree2(colidxs, matvals));
3102   PetscFunctionReturn(0);
3103 }
3104 
3105 /* Not scalable because of ISAllGather() unless getting all columns. */
3106 PetscErrorCode ISGetSeqIS_Private(Mat mat, IS iscol, IS *isseq)
3107 {
3108   IS          iscol_local;
3109   PetscBool   isstride;
3110   PetscMPIInt lisstride = 0, gisstride;
3111 
3112   PetscFunctionBegin;
3113   /* check if we are grabbing all columns*/
3114   PetscCall(PetscObjectTypeCompare((PetscObject)iscol, ISSTRIDE, &isstride));
3115 
3116   if (isstride) {
3117     PetscInt start, len, mstart, mlen;
3118     PetscCall(ISStrideGetInfo(iscol, &start, NULL));
3119     PetscCall(ISGetLocalSize(iscol, &len));
3120     PetscCall(MatGetOwnershipRangeColumn(mat, &mstart, &mlen));
3121     if (mstart == start && mlen - mstart == len) lisstride = 1;
3122   }
3123 
3124   PetscCall(MPIU_Allreduce(&lisstride, &gisstride, 1, MPI_INT, MPI_MIN, PetscObjectComm((PetscObject)mat)));
3125   if (gisstride) {
3126     PetscInt N;
3127     PetscCall(MatGetSize(mat, NULL, &N));
3128     PetscCall(ISCreateStride(PETSC_COMM_SELF, N, 0, 1, &iscol_local));
3129     PetscCall(ISSetIdentity(iscol_local));
3130     PetscCall(PetscInfo(mat, "Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n"));
3131   } else {
3132     PetscInt cbs;
3133     PetscCall(ISGetBlockSize(iscol, &cbs));
3134     PetscCall(ISAllGather(iscol, &iscol_local));
3135     PetscCall(ISSetBlockSize(iscol_local, cbs));
3136   }
3137 
3138   *isseq = iscol_local;
3139   PetscFunctionReturn(0);
3140 }
3141 
3142 /*
3143  Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local
3144  (see MatCreateSubMatrix_MPIAIJ_nonscalable)
3145 
3146  Input Parameters:
3147    mat - matrix
3148    isrow - parallel row index set; its local indices are a subset of local columns of mat,
3149            i.e., mat->rstart <= isrow[i] < mat->rend
3150    iscol - parallel column index set; its local indices are a subset of local columns of mat,
3151            i.e., mat->cstart <= iscol[i] < mat->cend
3152  Output Parameter:
3153    isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A
3154    iscol_o - sequential column index set for retrieving mat->B
3155    garray - column map; garray[i] indicates global location of iscol_o[i] in iscol
3156  */
3157 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat, IS isrow, IS iscol, IS *isrow_d, IS *iscol_d, IS *iscol_o, const PetscInt *garray[])
3158 {
3159   Vec             x, cmap;
3160   const PetscInt *is_idx;
3161   PetscScalar    *xarray, *cmaparray;
3162   PetscInt        ncols, isstart, *idx, m, rstart, *cmap1, count;
3163   Mat_MPIAIJ     *a    = (Mat_MPIAIJ *)mat->data;
3164   Mat             B    = a->B;
3165   Vec             lvec = a->lvec, lcmap;
3166   PetscInt        i, cstart, cend, Bn = B->cmap->N;
3167   MPI_Comm        comm;
3168   VecScatter      Mvctx = a->Mvctx;
3169 
3170   PetscFunctionBegin;
3171   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3172   PetscCall(ISGetLocalSize(iscol, &ncols));
3173 
3174   /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */
3175   PetscCall(MatCreateVecs(mat, &x, NULL));
3176   PetscCall(VecSet(x, -1.0));
3177   PetscCall(VecDuplicate(x, &cmap));
3178   PetscCall(VecSet(cmap, -1.0));
3179 
3180   /* Get start indices */
3181   PetscCallMPI(MPI_Scan(&ncols, &isstart, 1, MPIU_INT, MPI_SUM, comm));
3182   isstart -= ncols;
3183   PetscCall(MatGetOwnershipRangeColumn(mat, &cstart, &cend));
3184 
3185   PetscCall(ISGetIndices(iscol, &is_idx));
3186   PetscCall(VecGetArray(x, &xarray));
3187   PetscCall(VecGetArray(cmap, &cmaparray));
3188   PetscCall(PetscMalloc1(ncols, &idx));
3189   for (i = 0; i < ncols; i++) {
3190     xarray[is_idx[i] - cstart]    = (PetscScalar)is_idx[i];
3191     cmaparray[is_idx[i] - cstart] = i + isstart;        /* global index of iscol[i] */
3192     idx[i]                        = is_idx[i] - cstart; /* local index of iscol[i]  */
3193   }
3194   PetscCall(VecRestoreArray(x, &xarray));
3195   PetscCall(VecRestoreArray(cmap, &cmaparray));
3196   PetscCall(ISRestoreIndices(iscol, &is_idx));
3197 
3198   /* Get iscol_d */
3199   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, iscol_d));
3200   PetscCall(ISGetBlockSize(iscol, &i));
3201   PetscCall(ISSetBlockSize(*iscol_d, i));
3202 
3203   /* Get isrow_d */
3204   PetscCall(ISGetLocalSize(isrow, &m));
3205   rstart = mat->rmap->rstart;
3206   PetscCall(PetscMalloc1(m, &idx));
3207   PetscCall(ISGetIndices(isrow, &is_idx));
3208   for (i = 0; i < m; i++) idx[i] = is_idx[i] - rstart;
3209   PetscCall(ISRestoreIndices(isrow, &is_idx));
3210 
3211   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, m, idx, PETSC_OWN_POINTER, isrow_d));
3212   PetscCall(ISGetBlockSize(isrow, &i));
3213   PetscCall(ISSetBlockSize(*isrow_d, i));
3214 
3215   /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */
3216   PetscCall(VecScatterBegin(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD));
3217   PetscCall(VecScatterEnd(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD));
3218 
3219   PetscCall(VecDuplicate(lvec, &lcmap));
3220 
3221   PetscCall(VecScatterBegin(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD));
3222   PetscCall(VecScatterEnd(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD));
3223 
3224   /* (3) create sequential iscol_o (a subset of iscol) and isgarray */
3225   /* off-process column indices */
3226   count = 0;
3227   PetscCall(PetscMalloc1(Bn, &idx));
3228   PetscCall(PetscMalloc1(Bn, &cmap1));
3229 
3230   PetscCall(VecGetArray(lvec, &xarray));
3231   PetscCall(VecGetArray(lcmap, &cmaparray));
3232   for (i = 0; i < Bn; i++) {
3233     if (PetscRealPart(xarray[i]) > -1.0) {
3234       idx[count]   = i;                                     /* local column index in off-diagonal part B */
3235       cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */
3236       count++;
3237     }
3238   }
3239   PetscCall(VecRestoreArray(lvec, &xarray));
3240   PetscCall(VecRestoreArray(lcmap, &cmaparray));
3241 
3242   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_COPY_VALUES, iscol_o));
3243   /* cannot ensure iscol_o has same blocksize as iscol! */
3244 
3245   PetscCall(PetscFree(idx));
3246   *garray = cmap1;
3247 
3248   PetscCall(VecDestroy(&x));
3249   PetscCall(VecDestroy(&cmap));
3250   PetscCall(VecDestroy(&lcmap));
3251   PetscFunctionReturn(0);
3252 }
3253 
3254 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */
3255 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *submat)
3256 {
3257   Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data, *asub;
3258   Mat         M = NULL;
3259   MPI_Comm    comm;
3260   IS          iscol_d, isrow_d, iscol_o;
3261   Mat         Asub = NULL, Bsub = NULL;
3262   PetscInt    n;
3263 
3264   PetscFunctionBegin;
3265   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3266 
3267   if (call == MAT_REUSE_MATRIX) {
3268     /* Retrieve isrow_d, iscol_d and iscol_o from submat */
3269     PetscCall(PetscObjectQuery((PetscObject)*submat, "isrow_d", (PetscObject *)&isrow_d));
3270     PetscCheck(isrow_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "isrow_d passed in was not used before, cannot reuse");
3271 
3272     PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_d", (PetscObject *)&iscol_d));
3273     PetscCheck(iscol_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_d passed in was not used before, cannot reuse");
3274 
3275     PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_o", (PetscObject *)&iscol_o));
3276     PetscCheck(iscol_o, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_o passed in was not used before, cannot reuse");
3277 
3278     /* Update diagonal and off-diagonal portions of submat */
3279     asub = (Mat_MPIAIJ *)(*submat)->data;
3280     PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->A));
3281     PetscCall(ISGetLocalSize(iscol_o, &n));
3282     if (n) PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->B));
3283     PetscCall(MatAssemblyBegin(*submat, MAT_FINAL_ASSEMBLY));
3284     PetscCall(MatAssemblyEnd(*submat, MAT_FINAL_ASSEMBLY));
3285 
3286   } else { /* call == MAT_INITIAL_MATRIX) */
3287     const PetscInt *garray;
3288     PetscInt        BsubN;
3289 
3290     /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */
3291     PetscCall(ISGetSeqIS_SameColDist_Private(mat, isrow, iscol, &isrow_d, &iscol_d, &iscol_o, &garray));
3292 
3293     /* Create local submatrices Asub and Bsub */
3294     PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Asub));
3295     PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Bsub));
3296 
3297     /* Create submatrix M */
3298     PetscCall(MatCreateMPIAIJWithSeqAIJ(comm, Asub, Bsub, garray, &M));
3299 
3300     /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */
3301     asub = (Mat_MPIAIJ *)M->data;
3302 
3303     PetscCall(ISGetLocalSize(iscol_o, &BsubN));
3304     n = asub->B->cmap->N;
3305     if (BsubN > n) {
3306       /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */
3307       const PetscInt *idx;
3308       PetscInt        i, j, *idx_new, *subgarray = asub->garray;
3309       PetscCall(PetscInfo(M, "submatrix Bn %" PetscInt_FMT " != BsubN %" PetscInt_FMT ", update iscol_o\n", n, BsubN));
3310 
3311       PetscCall(PetscMalloc1(n, &idx_new));
3312       j = 0;
3313       PetscCall(ISGetIndices(iscol_o, &idx));
3314       for (i = 0; i < n; i++) {
3315         if (j >= BsubN) break;
3316         while (subgarray[i] > garray[j]) j++;
3317 
3318         if (subgarray[i] == garray[j]) {
3319           idx_new[i] = idx[j++];
3320         } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "subgarray[%" PetscInt_FMT "]=%" PetscInt_FMT " cannot < garray[%" PetscInt_FMT "]=%" PetscInt_FMT, i, subgarray[i], j, garray[j]);
3321       }
3322       PetscCall(ISRestoreIndices(iscol_o, &idx));
3323 
3324       PetscCall(ISDestroy(&iscol_o));
3325       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx_new, PETSC_OWN_POINTER, &iscol_o));
3326 
3327     } else if (BsubN < n) {
3328       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Columns of Bsub (%" PetscInt_FMT ") cannot be smaller than B's (%" PetscInt_FMT ")", BsubN, asub->B->cmap->N);
3329     }
3330 
3331     PetscCall(PetscFree(garray));
3332     *submat = M;
3333 
3334     /* Save isrow_d, iscol_d and iscol_o used in processor for next request */
3335     PetscCall(PetscObjectCompose((PetscObject)M, "isrow_d", (PetscObject)isrow_d));
3336     PetscCall(ISDestroy(&isrow_d));
3337 
3338     PetscCall(PetscObjectCompose((PetscObject)M, "iscol_d", (PetscObject)iscol_d));
3339     PetscCall(ISDestroy(&iscol_d));
3340 
3341     PetscCall(PetscObjectCompose((PetscObject)M, "iscol_o", (PetscObject)iscol_o));
3342     PetscCall(ISDestroy(&iscol_o));
3343   }
3344   PetscFunctionReturn(0);
3345 }
3346 
3347 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat)
3348 {
3349   IS        iscol_local = NULL, isrow_d;
3350   PetscInt  csize;
3351   PetscInt  n, i, j, start, end;
3352   PetscBool sameRowDist = PETSC_FALSE, sameDist[2], tsameDist[2];
3353   MPI_Comm  comm;
3354 
3355   PetscFunctionBegin;
3356   /* If isrow has same processor distribution as mat,
3357      call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */
3358   if (call == MAT_REUSE_MATRIX) {
3359     PetscCall(PetscObjectQuery((PetscObject)*newmat, "isrow_d", (PetscObject *)&isrow_d));
3360     if (isrow_d) {
3361       sameRowDist  = PETSC_TRUE;
3362       tsameDist[1] = PETSC_TRUE; /* sameColDist */
3363     } else {
3364       PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_local));
3365       if (iscol_local) {
3366         sameRowDist  = PETSC_TRUE;
3367         tsameDist[1] = PETSC_FALSE; /* !sameColDist */
3368       }
3369     }
3370   } else {
3371     /* Check if isrow has same processor distribution as mat */
3372     sameDist[0] = PETSC_FALSE;
3373     PetscCall(ISGetLocalSize(isrow, &n));
3374     if (!n) {
3375       sameDist[0] = PETSC_TRUE;
3376     } else {
3377       PetscCall(ISGetMinMax(isrow, &i, &j));
3378       PetscCall(MatGetOwnershipRange(mat, &start, &end));
3379       if (i >= start && j < end) sameDist[0] = PETSC_TRUE;
3380     }
3381 
3382     /* Check if iscol has same processor distribution as mat */
3383     sameDist[1] = PETSC_FALSE;
3384     PetscCall(ISGetLocalSize(iscol, &n));
3385     if (!n) {
3386       sameDist[1] = PETSC_TRUE;
3387     } else {
3388       PetscCall(ISGetMinMax(iscol, &i, &j));
3389       PetscCall(MatGetOwnershipRangeColumn(mat, &start, &end));
3390       if (i >= start && j < end) sameDist[1] = PETSC_TRUE;
3391     }
3392 
3393     PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3394     PetscCall(MPIU_Allreduce(&sameDist, &tsameDist, 2, MPIU_BOOL, MPI_LAND, comm));
3395     sameRowDist = tsameDist[0];
3396   }
3397 
3398   if (sameRowDist) {
3399     if (tsameDist[1]) { /* sameRowDist & sameColDist */
3400       /* isrow and iscol have same processor distribution as mat */
3401       PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat, isrow, iscol, call, newmat));
3402       PetscFunctionReturn(0);
3403     } else { /* sameRowDist */
3404       /* isrow has same processor distribution as mat */
3405       if (call == MAT_INITIAL_MATRIX) {
3406         PetscBool sorted;
3407         PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local));
3408         PetscCall(ISGetLocalSize(iscol_local, &n)); /* local size of iscol_local = global columns of newmat */
3409         PetscCall(ISGetSize(iscol, &i));
3410         PetscCheck(n == i, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "n %" PetscInt_FMT " != size of iscol %" PetscInt_FMT, n, i);
3411 
3412         PetscCall(ISSorted(iscol_local, &sorted));
3413         if (sorted) {
3414           /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */
3415           PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, iscol_local, MAT_INITIAL_MATRIX, newmat));
3416           PetscFunctionReturn(0);
3417         }
3418       } else { /* call == MAT_REUSE_MATRIX */
3419         IS iscol_sub;
3420         PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub));
3421         if (iscol_sub) {
3422           PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, NULL, call, newmat));
3423           PetscFunctionReturn(0);
3424         }
3425       }
3426     }
3427   }
3428 
3429   /* General case: iscol -> iscol_local which has global size of iscol */
3430   if (call == MAT_REUSE_MATRIX) {
3431     PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local));
3432     PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
3433   } else {
3434     if (!iscol_local) PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local));
3435   }
3436 
3437   PetscCall(ISGetLocalSize(iscol, &csize));
3438   PetscCall(MatCreateSubMatrix_MPIAIJ_nonscalable(mat, isrow, iscol_local, csize, call, newmat));
3439 
3440   if (call == MAT_INITIAL_MATRIX) {
3441     PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local));
3442     PetscCall(ISDestroy(&iscol_local));
3443   }
3444   PetscFunctionReturn(0);
3445 }
3446 
3447 /*@C
3448      MatCreateMPIAIJWithSeqAIJ - creates a `MATMPIAIJ` matrix using `MATSEQAIJ` matrices that contain the "diagonal"
3449          and "off-diagonal" part of the matrix in CSR format.
3450 
3451    Collective
3452 
3453    Input Parameters:
3454 +  comm - MPI communicator
3455 .  A - "diagonal" portion of matrix
3456 .  B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine
3457 -  garray - global index of B columns
3458 
3459    Output Parameter:
3460 .   mat - the matrix, with input A as its local diagonal matrix
3461    Level: advanced
3462 
3463    Notes:
3464    See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix.
3465 
3466    A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore.
3467 
3468 .seealso: `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateMPIAIJWithSplitArrays()`
3469 @*/
3470 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm, Mat A, Mat B, const PetscInt garray[], Mat *mat)
3471 {
3472   Mat_MPIAIJ        *maij;
3473   Mat_SeqAIJ        *b  = (Mat_SeqAIJ *)B->data, *bnew;
3474   PetscInt          *oi = b->i, *oj = b->j, i, nz, col;
3475   const PetscScalar *oa;
3476   Mat                Bnew;
3477   PetscInt           m, n, N;
3478   MatType            mpi_mat_type;
3479 
3480   PetscFunctionBegin;
3481   PetscCall(MatCreate(comm, mat));
3482   PetscCall(MatGetSize(A, &m, &n));
3483   PetscCheck(m == B->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Am %" PetscInt_FMT " != Bm %" PetscInt_FMT, m, B->rmap->N);
3484   PetscCheck(A->rmap->bs == B->rmap->bs, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "A row bs %" PetscInt_FMT " != B row bs %" PetscInt_FMT, A->rmap->bs, B->rmap->bs);
3485   /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */
3486   /* PetscCheck(A->cmap->bs == B->cmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %" PetscInt_FMT " != B column bs %" PetscInt_FMT,A->cmap->bs,B->cmap->bs); */
3487 
3488   /* Get global columns of mat */
3489   PetscCall(MPIU_Allreduce(&n, &N, 1, MPIU_INT, MPI_SUM, comm));
3490 
3491   PetscCall(MatSetSizes(*mat, m, n, PETSC_DECIDE, N));
3492   /* Determine the type of MPI matrix that should be created from the type of matrix A, which holds the "diagonal" portion. */
3493   PetscCall(MatGetMPIMatType_Private(A, &mpi_mat_type));
3494   PetscCall(MatSetType(*mat, mpi_mat_type));
3495 
3496   PetscCall(MatSetBlockSizes(*mat, A->rmap->bs, A->cmap->bs));
3497   maij = (Mat_MPIAIJ *)(*mat)->data;
3498 
3499   (*mat)->preallocated = PETSC_TRUE;
3500 
3501   PetscCall(PetscLayoutSetUp((*mat)->rmap));
3502   PetscCall(PetscLayoutSetUp((*mat)->cmap));
3503 
3504   /* Set A as diagonal portion of *mat */
3505   maij->A = A;
3506 
3507   nz = oi[m];
3508   for (i = 0; i < nz; i++) {
3509     col   = oj[i];
3510     oj[i] = garray[col];
3511   }
3512 
3513   /* Set Bnew as off-diagonal portion of *mat */
3514   PetscCall(MatSeqAIJGetArrayRead(B, &oa));
3515   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, N, oi, oj, (PetscScalar *)oa, &Bnew));
3516   PetscCall(MatSeqAIJRestoreArrayRead(B, &oa));
3517   bnew        = (Mat_SeqAIJ *)Bnew->data;
3518   bnew->maxnz = b->maxnz; /* allocated nonzeros of B */
3519   maij->B     = Bnew;
3520 
3521   PetscCheck(B->rmap->N == Bnew->rmap->N, PETSC_COMM_SELF, PETSC_ERR_PLIB, "BN %" PetscInt_FMT " != BnewN %" PetscInt_FMT, B->rmap->N, Bnew->rmap->N);
3522 
3523   b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */
3524   b->free_a       = PETSC_FALSE;
3525   b->free_ij      = PETSC_FALSE;
3526   PetscCall(MatDestroy(&B));
3527 
3528   bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */
3529   bnew->free_a       = PETSC_TRUE;
3530   bnew->free_ij      = PETSC_TRUE;
3531 
3532   /* condense columns of maij->B */
3533   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
3534   PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY));
3535   PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY));
3536   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE));
3537   PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
3538   PetscFunctionReturn(0);
3539 }
3540 
3541 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat, PetscInt, const IS[], const IS[], MatReuse, PetscBool, Mat *);
3542 
3543 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat, IS isrow, IS iscol, IS iscol_local, MatReuse call, Mat *newmat)
3544 {
3545   PetscInt        i, m, n, rstart, row, rend, nz, j, bs, cbs;
3546   PetscInt       *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal;
3547   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)mat->data;
3548   Mat             M, Msub, B = a->B;
3549   MatScalar      *aa;
3550   Mat_SeqAIJ     *aij;
3551   PetscInt       *garray = a->garray, *colsub, Ncols;
3552   PetscInt        count, Bn = B->cmap->N, cstart = mat->cmap->rstart, cend = mat->cmap->rend;
3553   IS              iscol_sub, iscmap;
3554   const PetscInt *is_idx, *cmap;
3555   PetscBool       allcolumns = PETSC_FALSE;
3556   MPI_Comm        comm;
3557 
3558   PetscFunctionBegin;
3559   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3560   if (call == MAT_REUSE_MATRIX) {
3561     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub));
3562     PetscCheck(iscol_sub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "SubIScol passed in was not used before, cannot reuse");
3563     PetscCall(ISGetLocalSize(iscol_sub, &count));
3564 
3565     PetscCall(PetscObjectQuery((PetscObject)*newmat, "Subcmap", (PetscObject *)&iscmap));
3566     PetscCheck(iscmap, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Subcmap passed in was not used before, cannot reuse");
3567 
3568     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Msub));
3569     PetscCheck(Msub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
3570 
3571     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_REUSE_MATRIX, PETSC_FALSE, &Msub));
3572 
3573   } else { /* call == MAT_INITIAL_MATRIX) */
3574     PetscBool flg;
3575 
3576     PetscCall(ISGetLocalSize(iscol, &n));
3577     PetscCall(ISGetSize(iscol, &Ncols));
3578 
3579     /* (1) iscol -> nonscalable iscol_local */
3580     /* Check for special case: each processor gets entire matrix columns */
3581     PetscCall(ISIdentity(iscol_local, &flg));
3582     if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3583     PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
3584     if (allcolumns) {
3585       iscol_sub = iscol_local;
3586       PetscCall(PetscObjectReference((PetscObject)iscol_local));
3587       PetscCall(ISCreateStride(PETSC_COMM_SELF, n, 0, 1, &iscmap));
3588 
3589     } else {
3590       /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */
3591       PetscInt *idx, *cmap1, k;
3592       PetscCall(PetscMalloc1(Ncols, &idx));
3593       PetscCall(PetscMalloc1(Ncols, &cmap1));
3594       PetscCall(ISGetIndices(iscol_local, &is_idx));
3595       count = 0;
3596       k     = 0;
3597       for (i = 0; i < Ncols; i++) {
3598         j = is_idx[i];
3599         if (j >= cstart && j < cend) {
3600           /* diagonal part of mat */
3601           idx[count]     = j;
3602           cmap1[count++] = i; /* column index in submat */
3603         } else if (Bn) {
3604           /* off-diagonal part of mat */
3605           if (j == garray[k]) {
3606             idx[count]     = j;
3607             cmap1[count++] = i; /* column index in submat */
3608           } else if (j > garray[k]) {
3609             while (j > garray[k] && k < Bn - 1) k++;
3610             if (j == garray[k]) {
3611               idx[count]     = j;
3612               cmap1[count++] = i; /* column index in submat */
3613             }
3614           }
3615         }
3616       }
3617       PetscCall(ISRestoreIndices(iscol_local, &is_idx));
3618 
3619       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_OWN_POINTER, &iscol_sub));
3620       PetscCall(ISGetBlockSize(iscol, &cbs));
3621       PetscCall(ISSetBlockSize(iscol_sub, cbs));
3622 
3623       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local), count, cmap1, PETSC_OWN_POINTER, &iscmap));
3624     }
3625 
3626     /* (3) Create sequential Msub */
3627     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_INITIAL_MATRIX, allcolumns, &Msub));
3628   }
3629 
3630   PetscCall(ISGetLocalSize(iscol_sub, &count));
3631   aij = (Mat_SeqAIJ *)(Msub)->data;
3632   ii  = aij->i;
3633   PetscCall(ISGetIndices(iscmap, &cmap));
3634 
3635   /*
3636       m - number of local rows
3637       Ncols - number of columns (same on all processors)
3638       rstart - first row in new global matrix generated
3639   */
3640   PetscCall(MatGetSize(Msub, &m, NULL));
3641 
3642   if (call == MAT_INITIAL_MATRIX) {
3643     /* (4) Create parallel newmat */
3644     PetscMPIInt rank, size;
3645     PetscInt    csize;
3646 
3647     PetscCallMPI(MPI_Comm_size(comm, &size));
3648     PetscCallMPI(MPI_Comm_rank(comm, &rank));
3649 
3650     /*
3651         Determine the number of non-zeros in the diagonal and off-diagonal
3652         portions of the matrix in order to do correct preallocation
3653     */
3654 
3655     /* first get start and end of "diagonal" columns */
3656     PetscCall(ISGetLocalSize(iscol, &csize));
3657     if (csize == PETSC_DECIDE) {
3658       PetscCall(ISGetSize(isrow, &mglobal));
3659       if (mglobal == Ncols) { /* square matrix */
3660         nlocal = m;
3661       } else {
3662         nlocal = Ncols / size + ((Ncols % size) > rank);
3663       }
3664     } else {
3665       nlocal = csize;
3666     }
3667     PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm));
3668     rstart = rend - nlocal;
3669     PetscCheck(rank != size - 1 || rend == Ncols, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, Ncols);
3670 
3671     /* next, compute all the lengths */
3672     jj = aij->j;
3673     PetscCall(PetscMalloc1(2 * m + 1, &dlens));
3674     olens = dlens + m;
3675     for (i = 0; i < m; i++) {
3676       jend = ii[i + 1] - ii[i];
3677       olen = 0;
3678       dlen = 0;
3679       for (j = 0; j < jend; j++) {
3680         if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++;
3681         else dlen++;
3682         jj++;
3683       }
3684       olens[i] = olen;
3685       dlens[i] = dlen;
3686     }
3687 
3688     PetscCall(ISGetBlockSize(isrow, &bs));
3689     PetscCall(ISGetBlockSize(iscol, &cbs));
3690 
3691     PetscCall(MatCreate(comm, &M));
3692     PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, Ncols));
3693     PetscCall(MatSetBlockSizes(M, bs, cbs));
3694     PetscCall(MatSetType(M, ((PetscObject)mat)->type_name));
3695     PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens));
3696     PetscCall(PetscFree(dlens));
3697 
3698   } else { /* call == MAT_REUSE_MATRIX */
3699     M = *newmat;
3700     PetscCall(MatGetLocalSize(M, &i, NULL));
3701     PetscCheck(i == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request");
3702     PetscCall(MatZeroEntries(M));
3703     /*
3704          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3705        rather than the slower MatSetValues().
3706     */
3707     M->was_assembled = PETSC_TRUE;
3708     M->assembled     = PETSC_FALSE;
3709   }
3710 
3711   /* (5) Set values of Msub to *newmat */
3712   PetscCall(PetscMalloc1(count, &colsub));
3713   PetscCall(MatGetOwnershipRange(M, &rstart, NULL));
3714 
3715   jj = aij->j;
3716   PetscCall(MatSeqAIJGetArrayRead(Msub, (const PetscScalar **)&aa));
3717   for (i = 0; i < m; i++) {
3718     row = rstart + i;
3719     nz  = ii[i + 1] - ii[i];
3720     for (j = 0; j < nz; j++) colsub[j] = cmap[jj[j]];
3721     PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, colsub, aa, INSERT_VALUES));
3722     jj += nz;
3723     aa += nz;
3724   }
3725   PetscCall(MatSeqAIJRestoreArrayRead(Msub, (const PetscScalar **)&aa));
3726   PetscCall(ISRestoreIndices(iscmap, &cmap));
3727 
3728   PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY));
3729   PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY));
3730 
3731   PetscCall(PetscFree(colsub));
3732 
3733   /* save Msub, iscol_sub and iscmap used in processor for next request */
3734   if (call == MAT_INITIAL_MATRIX) {
3735     *newmat = M;
3736     PetscCall(PetscObjectCompose((PetscObject)(*newmat), "SubMatrix", (PetscObject)Msub));
3737     PetscCall(MatDestroy(&Msub));
3738 
3739     PetscCall(PetscObjectCompose((PetscObject)(*newmat), "SubIScol", (PetscObject)iscol_sub));
3740     PetscCall(ISDestroy(&iscol_sub));
3741 
3742     PetscCall(PetscObjectCompose((PetscObject)(*newmat), "Subcmap", (PetscObject)iscmap));
3743     PetscCall(ISDestroy(&iscmap));
3744 
3745     if (iscol_local) {
3746       PetscCall(PetscObjectCompose((PetscObject)(*newmat), "ISAllGather", (PetscObject)iscol_local));
3747       PetscCall(ISDestroy(&iscol_local));
3748     }
3749   }
3750   PetscFunctionReturn(0);
3751 }
3752 
3753 /*
3754     Not great since it makes two copies of the submatrix, first an SeqAIJ
3755   in local and then by concatenating the local matrices the end result.
3756   Writing it directly would be much like MatCreateSubMatrices_MPIAIJ()
3757 
3758   This requires a sequential iscol with all indices.
3759 */
3760 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat, IS isrow, IS iscol, PetscInt csize, MatReuse call, Mat *newmat)
3761 {
3762   PetscMPIInt rank, size;
3763   PetscInt    i, m, n, rstart, row, rend, nz, *cwork, j, bs, cbs;
3764   PetscInt   *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal;
3765   Mat         M, Mreuse;
3766   MatScalar  *aa, *vwork;
3767   MPI_Comm    comm;
3768   Mat_SeqAIJ *aij;
3769   PetscBool   colflag, allcolumns = PETSC_FALSE;
3770 
3771   PetscFunctionBegin;
3772   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3773   PetscCallMPI(MPI_Comm_rank(comm, &rank));
3774   PetscCallMPI(MPI_Comm_size(comm, &size));
3775 
3776   /* Check for special case: each processor gets entire matrix columns */
3777   PetscCall(ISIdentity(iscol, &colflag));
3778   PetscCall(ISGetLocalSize(iscol, &n));
3779   if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3780   PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
3781 
3782   if (call == MAT_REUSE_MATRIX) {
3783     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Mreuse));
3784     PetscCheck(Mreuse, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
3785     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_REUSE_MATRIX, allcolumns, &Mreuse));
3786   } else {
3787     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_INITIAL_MATRIX, allcolumns, &Mreuse));
3788   }
3789 
3790   /*
3791       m - number of local rows
3792       n - number of columns (same on all processors)
3793       rstart - first row in new global matrix generated
3794   */
3795   PetscCall(MatGetSize(Mreuse, &m, &n));
3796   PetscCall(MatGetBlockSizes(Mreuse, &bs, &cbs));
3797   if (call == MAT_INITIAL_MATRIX) {
3798     aij = (Mat_SeqAIJ *)(Mreuse)->data;
3799     ii  = aij->i;
3800     jj  = aij->j;
3801 
3802     /*
3803         Determine the number of non-zeros in the diagonal and off-diagonal
3804         portions of the matrix in order to do correct preallocation
3805     */
3806 
3807     /* first get start and end of "diagonal" columns */
3808     if (csize == PETSC_DECIDE) {
3809       PetscCall(ISGetSize(isrow, &mglobal));
3810       if (mglobal == n) { /* square matrix */
3811         nlocal = m;
3812       } else {
3813         nlocal = n / size + ((n % size) > rank);
3814       }
3815     } else {
3816       nlocal = csize;
3817     }
3818     PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm));
3819     rstart = rend - nlocal;
3820     PetscCheck(rank != size - 1 || rend == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, n);
3821 
3822     /* next, compute all the lengths */
3823     PetscCall(PetscMalloc1(2 * m + 1, &dlens));
3824     olens = dlens + m;
3825     for (i = 0; i < m; i++) {
3826       jend = ii[i + 1] - ii[i];
3827       olen = 0;
3828       dlen = 0;
3829       for (j = 0; j < jend; j++) {
3830         if (*jj < rstart || *jj >= rend) olen++;
3831         else dlen++;
3832         jj++;
3833       }
3834       olens[i] = olen;
3835       dlens[i] = dlen;
3836     }
3837     PetscCall(MatCreate(comm, &M));
3838     PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, n));
3839     PetscCall(MatSetBlockSizes(M, bs, cbs));
3840     PetscCall(MatSetType(M, ((PetscObject)mat)->type_name));
3841     PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens));
3842     PetscCall(PetscFree(dlens));
3843   } else {
3844     PetscInt ml, nl;
3845 
3846     M = *newmat;
3847     PetscCall(MatGetLocalSize(M, &ml, &nl));
3848     PetscCheck(ml == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request");
3849     PetscCall(MatZeroEntries(M));
3850     /*
3851          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3852        rather than the slower MatSetValues().
3853     */
3854     M->was_assembled = PETSC_TRUE;
3855     M->assembled     = PETSC_FALSE;
3856   }
3857   PetscCall(MatGetOwnershipRange(M, &rstart, &rend));
3858   aij = (Mat_SeqAIJ *)(Mreuse)->data;
3859   ii  = aij->i;
3860   jj  = aij->j;
3861 
3862   /* trigger copy to CPU if needed */
3863   PetscCall(MatSeqAIJGetArrayRead(Mreuse, (const PetscScalar **)&aa));
3864   for (i = 0; i < m; i++) {
3865     row   = rstart + i;
3866     nz    = ii[i + 1] - ii[i];
3867     cwork = jj;
3868     jj += nz;
3869     vwork = aa;
3870     aa += nz;
3871     PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, cwork, vwork, INSERT_VALUES));
3872   }
3873   PetscCall(MatSeqAIJRestoreArrayRead(Mreuse, (const PetscScalar **)&aa));
3874 
3875   PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY));
3876   PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY));
3877   *newmat = M;
3878 
3879   /* save submatrix used in processor for next request */
3880   if (call == MAT_INITIAL_MATRIX) {
3881     PetscCall(PetscObjectCompose((PetscObject)M, "SubMatrix", (PetscObject)Mreuse));
3882     PetscCall(MatDestroy(&Mreuse));
3883   }
3884   PetscFunctionReturn(0);
3885 }
3886 
3887 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[])
3888 {
3889   PetscInt        m, cstart, cend, j, nnz, i, d, *ld;
3890   PetscInt       *d_nnz, *o_nnz, nnz_max = 0, rstart, ii;
3891   const PetscInt *JJ;
3892   PetscBool       nooffprocentries;
3893   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *)B->data;
3894 
3895   PetscFunctionBegin;
3896   PetscCheck(Ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Ii[0] must be 0 it is %" PetscInt_FMT, Ii[0]);
3897 
3898   PetscCall(PetscLayoutSetUp(B->rmap));
3899   PetscCall(PetscLayoutSetUp(B->cmap));
3900   m      = B->rmap->n;
3901   cstart = B->cmap->rstart;
3902   cend   = B->cmap->rend;
3903   rstart = B->rmap->rstart;
3904 
3905   PetscCall(PetscCalloc2(m, &d_nnz, m, &o_nnz));
3906 
3907   if (PetscDefined(USE_DEBUG)) {
3908     for (i = 0; i < m; i++) {
3909       nnz = Ii[i + 1] - Ii[i];
3910       JJ  = J + Ii[i];
3911       PetscCheck(nnz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative %" PetscInt_FMT " number of columns", i, nnz);
3912       PetscCheck(!nnz || !(JJ[0] < 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " starts with negative column index %" PetscInt_FMT, i, JJ[0]);
3913       PetscCheck(!nnz || !(JJ[nnz - 1] >= B->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " ends with too large a column index %" PetscInt_FMT " (max allowed %" PetscInt_FMT ")", i, JJ[nnz - 1], B->cmap->N);
3914     }
3915   }
3916 
3917   for (i = 0; i < m; i++) {
3918     nnz     = Ii[i + 1] - Ii[i];
3919     JJ      = J + Ii[i];
3920     nnz_max = PetscMax(nnz_max, nnz);
3921     d       = 0;
3922     for (j = 0; j < nnz; j++) {
3923       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3924     }
3925     d_nnz[i] = d;
3926     o_nnz[i] = nnz - d;
3927   }
3928   PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz));
3929   PetscCall(PetscFree2(d_nnz, o_nnz));
3930 
3931   for (i = 0; i < m; i++) {
3932     ii = i + rstart;
3933     PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, Ii[i + 1] - Ii[i], J + Ii[i], v ? v + Ii[i] : NULL, INSERT_VALUES));
3934   }
3935   nooffprocentries    = B->nooffprocentries;
3936   B->nooffprocentries = PETSC_TRUE;
3937   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
3938   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
3939   B->nooffprocentries = nooffprocentries;
3940 
3941   /* count number of entries below block diagonal */
3942   PetscCall(PetscFree(Aij->ld));
3943   PetscCall(PetscCalloc1(m, &ld));
3944   Aij->ld = ld;
3945   for (i = 0; i < m; i++) {
3946     nnz = Ii[i + 1] - Ii[i];
3947     j   = 0;
3948     while (j < nnz && J[j] < cstart) j++;
3949     ld[i] = j;
3950     J += nnz;
3951   }
3952 
3953   PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
3954   PetscFunctionReturn(0);
3955 }
3956 
3957 /*@
3958    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in `MATAIJ` format
3959    (the default parallel PETSc format).
3960 
3961    Collective
3962 
3963    Input Parameters:
3964 +  B - the matrix
3965 .  i - the indices into j for the start of each local row (starts with zero)
3966 .  j - the column indices for each local row (starts with zero)
3967 -  v - optional values in the matrix
3968 
3969    Level: developer
3970 
3971    Notes:
3972        The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc;
3973      thus you CANNOT change the matrix entries by changing the values of v[] after you have
3974      called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays.
3975 
3976        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3977 
3978        The format which is used for the sparse matrix input, is equivalent to a
3979     row-major ordering.. i.e for the following matrix, the input data expected is
3980     as shown
3981 
3982 $        1 0 0
3983 $        2 0 3     P0
3984 $       -------
3985 $        4 5 6     P1
3986 $
3987 $     Process0 [P0]: rows_owned=[0,1]
3988 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
3989 $        j =  {0,0,2}  [size = 3]
3990 $        v =  {1,2,3}  [size = 3]
3991 $
3992 $     Process1 [P1]: rows_owned=[2]
3993 $        i =  {0,3}    [size = nrow+1  = 1+1]
3994 $        j =  {0,1,2}  [size = 3]
3995 $        v =  {4,5,6}  [size = 3]
3996 
3997 .seealso: `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatCreateAIJ()`, `MATMPIAIJ`,
3998           `MatCreateSeqAIJWithArrays()`, `MatCreateMPIAIJWithSplitArrays()`
3999 @*/
4000 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B, const PetscInt i[], const PetscInt j[], const PetscScalar v[])
4001 {
4002   PetscFunctionBegin;
4003   PetscTryMethod(B, "MatMPIAIJSetPreallocationCSR_C", (Mat, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, i, j, v));
4004   PetscFunctionReturn(0);
4005 }
4006 
4007 /*@C
4008    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in `MATMPIAIJ` format
4009    (the default parallel PETSc format).  For good matrix assembly performance
4010    the user should preallocate the matrix storage by setting the parameters
4011    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4012    performance can be increased by more than a factor of 50.
4013 
4014    Collective
4015 
4016    Input Parameters:
4017 +  B - the matrix
4018 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4019            (same value is used for all local rows)
4020 .  d_nnz - array containing the number of nonzeros in the various rows of the
4021            DIAGONAL portion of the local submatrix (possibly different for each row)
4022            or NULL (`PETSC_NULL_INTEGER` in Fortran), if d_nz is used to specify the nonzero structure.
4023            The size of this array is equal to the number of local rows, i.e 'm'.
4024            For matrices that will be factored, you must leave room for (and set)
4025            the diagonal entry even if it is zero.
4026 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4027            submatrix (same value is used for all local rows).
4028 -  o_nnz - array containing the number of nonzeros in the various rows of the
4029            OFF-DIAGONAL portion of the local submatrix (possibly different for
4030            each row) or NULL (`PETSC_NULL_INTEGER` in Fortran), if o_nz is used to specify the nonzero
4031            structure. The size of this array is equal to the number
4032            of local rows, i.e 'm'.
4033 
4034    If the *_nnz parameter is given then the *_nz parameter is ignored
4035 
4036    The `MATAIJ` format, also called compressed row storage (CSR)), is fully compatible with standard Fortran 77
4037    storage.  The stored row and column indices begin with zero.
4038    See [Sparse Matrices](sec_matsparse) for details.
4039 
4040    The parallel matrix is partitioned such that the first m0 rows belong to
4041    process 0, the next m1 rows belong to process 1, the next m2 rows belong
4042    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
4043 
4044    The DIAGONAL portion of the local submatrix of a processor can be defined
4045    as the submatrix which is obtained by extraction the part corresponding to
4046    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
4047    first row that belongs to the processor, r2 is the last row belonging to
4048    the this processor, and c1-c2 is range of indices of the local part of a
4049    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
4050    common case of a square matrix, the row and column ranges are the same and
4051    the DIAGONAL part is also square. The remaining portion of the local
4052    submatrix (mxN) constitute the OFF-DIAGONAL portion.
4053 
4054    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4055 
4056    You can call MatGetInfo() to get information on how effective the preallocation was;
4057    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
4058    You can also run with the option -info and look for messages with the string
4059    malloc in them to see if additional memory allocation was needed.
4060 
4061    Example usage:
4062 
4063    Consider the following 8x8 matrix with 34 non-zero values, that is
4064    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4065    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4066    as follows:
4067 
4068 .vb
4069             1  2  0  |  0  3  0  |  0  4
4070     Proc0   0  5  6  |  7  0  0  |  8  0
4071             9  0 10  | 11  0  0  | 12  0
4072     -------------------------------------
4073            13  0 14  | 15 16 17  |  0  0
4074     Proc1   0 18  0  | 19 20 21  |  0  0
4075             0  0  0  | 22 23  0  | 24  0
4076     -------------------------------------
4077     Proc2  25 26 27  |  0  0 28  | 29  0
4078            30  0  0  | 31 32 33  |  0 34
4079 .ve
4080 
4081    This can be represented as a collection of submatrices as:
4082 
4083 .vb
4084       A B C
4085       D E F
4086       G H I
4087 .ve
4088 
4089    Where the submatrices A,B,C are owned by proc0, D,E,F are
4090    owned by proc1, G,H,I are owned by proc2.
4091 
4092    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4093    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4094    The 'M','N' parameters are 8,8, and have the same values on all procs.
4095 
4096    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4097    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4098    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4099    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4100    part as `MATSEQAIJ` matrices. for eg: proc1 will store [E] as a SeqAIJ
4101    matrix, ans [DF] as another `MATSEQAIJ` matrix.
4102 
4103    When d_nz, o_nz parameters are specified, d_nz storage elements are
4104    allocated for every row of the local diagonal submatrix, and o_nz
4105    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4106    One way to choose d_nz and o_nz is to use the max nonzerors per local
4107    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4108    In this case, the values of d_nz,o_nz are:
4109 .vb
4110      proc0 : dnz = 2, o_nz = 2
4111      proc1 : dnz = 3, o_nz = 2
4112      proc2 : dnz = 1, o_nz = 4
4113 .ve
4114    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4115    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4116    for proc3. i.e we are using 12+15+10=37 storage locations to store
4117    34 values.
4118 
4119    When d_nnz, o_nnz parameters are specified, the storage is specified
4120    for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4121    In the above case the values for d_nnz,o_nnz are:
4122 .vb
4123      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4124      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4125      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4126 .ve
4127    Here the space allocated is sum of all the above values i.e 34, and
4128    hence pre-allocation is perfect.
4129 
4130    Level: intermediate
4131 
4132 .seealso: [Sparse Matrices](sec_matsparse), `MATMPIAIJ`, `MATAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateAIJ()`, `MatMPIAIJSetPreallocationCSR()`,
4133           `MATMPIAIJ`, `MatGetInfo()`, `PetscSplitOwnership()`
4134 @*/
4135 PetscErrorCode MatMPIAIJSetPreallocation(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[])
4136 {
4137   PetscFunctionBegin;
4138   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
4139   PetscValidType(B, 1);
4140   PetscTryMethod(B, "MatMPIAIJSetPreallocation_C", (Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, d_nz, d_nnz, o_nz, o_nnz));
4141   PetscFunctionReturn(0);
4142 }
4143 
4144 /*@
4145      MatCreateMPIAIJWithArrays - creates a `MATMPIAIJ` matrix using arrays that contain in standard
4146          CSR format for the local rows.
4147 
4148    Collective
4149 
4150    Input Parameters:
4151 +  comm - MPI communicator
4152 .  m - number of local rows (Cannot be `PETSC_DECIDE`)
4153 .  n - This value should be the same as the local size used in creating the
4154        x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have
4155        calculated if N is given) For square matrices n is almost always m.
4156 .  M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
4157 .  N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
4158 .   i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
4159 .   j - column indices
4160 -   a - optional matrix values
4161 
4162    Output Parameter:
4163 .   mat - the matrix
4164 
4165    Level: intermediate
4166 
4167    Notes:
4168        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4169      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4170      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4171 
4172        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4173 
4174        The format which is used for the sparse matrix input, is equivalent to a
4175     row-major ordering.. i.e for the following matrix, the input data expected is
4176     as shown
4177 
4178        Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays
4179 
4180 $        1 0 0
4181 $        2 0 3     P0
4182 $       -------
4183 $        4 5 6     P1
4184 $
4185 $     Process0 [P0]: rows_owned=[0,1]
4186 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
4187 $        j =  {0,0,2}  [size = 3]
4188 $        v =  {1,2,3}  [size = 3]
4189 $
4190 $     Process1 [P1]: rows_owned=[2]
4191 $        i =  {0,3}    [size = nrow+1  = 1+1]
4192 $        j =  {0,1,2}  [size = 3]
4193 $        v =  {4,5,6}  [size = 3]
4194 
4195 .seealso: `MATMPIAIK`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4196           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`
4197 @*/
4198 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat)
4199 {
4200   PetscFunctionBegin;
4201   PetscCheck(!i || !i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
4202   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
4203   PetscCall(MatCreate(comm, mat));
4204   PetscCall(MatSetSizes(*mat, m, n, M, N));
4205   /* PetscCall(MatSetBlockSizes(M,bs,cbs)); */
4206   PetscCall(MatSetType(*mat, MATMPIAIJ));
4207   PetscCall(MatMPIAIJSetPreallocationCSR(*mat, i, j, a));
4208   PetscFunctionReturn(0);
4209 }
4210 
4211 /*@
4212      MatUpdateMPIAIJWithArrays - updates a `MATMPIAIJ` matrix using arrays that contain in standard
4213          CSR format for the local rows. Only the numerical values are updated the other arrays must be identical to what was passed from `MatCreateMPIAIJWithArrays()`
4214 
4215      Deprecated: Use `MatUpdateMPIAIJWithArray()`
4216 
4217    Collective
4218 
4219    Input Parameters:
4220 +  mat - the matrix
4221 .  m - number of local rows (Cannot be `PETSC_DECIDE`)
4222 .  n - This value should be the same as the local size used in creating the
4223        x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have
4224        calculated if N is given) For square matrices n is almost always m.
4225 .  M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
4226 .  N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
4227 .  Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix
4228 .  J - column indices
4229 -  v - matrix values
4230 
4231    Level: intermediate
4232 
4233 .seealso: `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4234           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`, `MatUpdateMPIAIJWithArray()`
4235 @*/
4236 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[])
4237 {
4238   PetscInt        nnz, i;
4239   PetscBool       nooffprocentries;
4240   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *)mat->data;
4241   Mat_SeqAIJ     *Ad  = (Mat_SeqAIJ *)Aij->A->data;
4242   PetscScalar    *ad, *ao;
4243   PetscInt        ldi, Iii, md;
4244   const PetscInt *Adi = Ad->i;
4245   PetscInt       *ld  = Aij->ld;
4246 
4247   PetscFunctionBegin;
4248   PetscCheck(Ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
4249   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
4250   PetscCheck(m == mat->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()");
4251   PetscCheck(n == mat->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()");
4252 
4253   PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad));
4254   PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao));
4255 
4256   for (i = 0; i < m; i++) {
4257     nnz = Ii[i + 1] - Ii[i];
4258     Iii = Ii[i];
4259     ldi = ld[i];
4260     md  = Adi[i + 1] - Adi[i];
4261     PetscCall(PetscArraycpy(ao, v + Iii, ldi));
4262     PetscCall(PetscArraycpy(ad, v + Iii + ldi, md));
4263     PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md));
4264     ad += md;
4265     ao += nnz - md;
4266   }
4267   nooffprocentries      = mat->nooffprocentries;
4268   mat->nooffprocentries = PETSC_TRUE;
4269   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad));
4270   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao));
4271   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A));
4272   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B));
4273   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
4274   PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
4275   PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
4276   mat->nooffprocentries = nooffprocentries;
4277   PetscFunctionReturn(0);
4278 }
4279 
4280 /*@
4281      MatUpdateMPIAIJWithArray - updates an `MATMPIAIJ` matrix using an array that contains the nonzero values
4282 
4283    Collective
4284 
4285    Input Parameters:
4286 +  mat - the matrix
4287 -  v - matrix values, stored by row
4288 
4289    Level: intermediate
4290 
4291    Note:
4292    The matrix must have been obtained with `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()`
4293 
4294 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4295           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`, `MatUpdateMPIAIJWithArrays()`
4296 @*/
4297 PetscErrorCode MatUpdateMPIAIJWithArray(Mat mat, const PetscScalar v[])
4298 {
4299   PetscInt        nnz, i, m;
4300   PetscBool       nooffprocentries;
4301   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *)mat->data;
4302   Mat_SeqAIJ     *Ad  = (Mat_SeqAIJ *)Aij->A->data;
4303   Mat_SeqAIJ     *Ao  = (Mat_SeqAIJ *)Aij->B->data;
4304   PetscScalar    *ad, *ao;
4305   const PetscInt *Adi = Ad->i, *Adj = Ao->i;
4306   PetscInt        ldi, Iii, md;
4307   PetscInt       *ld = Aij->ld;
4308 
4309   PetscFunctionBegin;
4310   m = mat->rmap->n;
4311 
4312   PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad));
4313   PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao));
4314   Iii = 0;
4315   for (i = 0; i < m; i++) {
4316     nnz = Adi[i + 1] - Adi[i] + Adj[i + 1] - Adj[i];
4317     ldi = ld[i];
4318     md  = Adi[i + 1] - Adi[i];
4319     PetscCall(PetscArraycpy(ao, v + Iii, ldi));
4320     PetscCall(PetscArraycpy(ad, v + Iii + ldi, md));
4321     PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md));
4322     ad += md;
4323     ao += nnz - md;
4324     Iii += nnz;
4325   }
4326   nooffprocentries      = mat->nooffprocentries;
4327   mat->nooffprocentries = PETSC_TRUE;
4328   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad));
4329   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao));
4330   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A));
4331   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B));
4332   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
4333   PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
4334   PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
4335   mat->nooffprocentries = nooffprocentries;
4336   PetscFunctionReturn(0);
4337 }
4338 
4339 /*@C
4340    MatCreateAIJ - Creates a sparse parallel matrix in `MATAIJ` format
4341    (the default parallel PETSc format).  For good matrix assembly performance
4342    the user should preallocate the matrix storage by setting the parameters
4343    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4344    performance can be increased by more than a factor of 50.
4345 
4346    Collective
4347 
4348    Input Parameters:
4349 +  comm - MPI communicator
4350 .  m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given)
4351            This value should be the same as the local size used in creating the
4352            y vector for the matrix-vector product y = Ax.
4353 .  n - This value should be the same as the local size used in creating the
4354        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4355        calculated if N is given) For square matrices n is almost always m.
4356 .  M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
4357 .  N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
4358 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4359            (same value is used for all local rows)
4360 .  d_nnz - array containing the number of nonzeros in the various rows of the
4361            DIAGONAL portion of the local submatrix (possibly different for each row)
4362            or NULL, if d_nz is used to specify the nonzero structure.
4363            The size of this array is equal to the number of local rows, i.e 'm'.
4364 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4365            submatrix (same value is used for all local rows).
4366 -  o_nnz - array containing the number of nonzeros in the various rows of the
4367            OFF-DIAGONAL portion of the local submatrix (possibly different for
4368            each row) or NULL, if o_nz is used to specify the nonzero
4369            structure. The size of this array is equal to the number
4370            of local rows, i.e 'm'.
4371 
4372    Output Parameter:
4373 .  A - the matrix
4374 
4375    It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
4376    MatXXXXSetPreallocation() paradigm instead of this routine directly.
4377    [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`]
4378 
4379    Notes:
4380    If the *_nnz parameter is given then the *_nz parameter is ignored
4381 
4382    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4383    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4384    storage requirements for this matrix.
4385 
4386    If `PETSC_DECIDE` or  `PETSC_DETERMINE` is used for a particular argument on one
4387    processor than it must be used on all processors that share the object for
4388    that argument.
4389 
4390    The user MUST specify either the local or global matrix dimensions
4391    (possibly both).
4392 
4393    The parallel matrix is partitioned across processors such that the
4394    first m0 rows belong to process 0, the next m1 rows belong to
4395    process 1, the next m2 rows belong to process 2 etc.. where
4396    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4397    values corresponding to [m x N] submatrix.
4398 
4399    The columns are logically partitioned with the n0 columns belonging
4400    to 0th partition, the next n1 columns belonging to the next
4401    partition etc.. where n0,n1,n2... are the input parameter 'n'.
4402 
4403    The DIAGONAL portion of the local submatrix on any given processor
4404    is the submatrix corresponding to the rows and columns m,n
4405    corresponding to the given processor. i.e diagonal matrix on
4406    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4407    etc. The remaining portion of the local submatrix [m x (N-n)]
4408    constitute the OFF-DIAGONAL portion. The example below better
4409    illustrates this concept.
4410 
4411    For a square global matrix we define each processor's diagonal portion
4412    to be its local rows and the corresponding columns (a square submatrix);
4413    each processor's off-diagonal portion encompasses the remainder of the
4414    local matrix (a rectangular submatrix).
4415 
4416    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4417 
4418    When calling this routine with a single process communicator, a matrix of
4419    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4420    type of communicator, use the construction mechanism
4421 .vb
4422      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4423 .ve
4424 
4425 $     MatCreate(...,&A);
4426 $     MatSetType(A,MATMPIAIJ);
4427 $     MatSetSizes(A, m,n,M,N);
4428 $     MatMPIAIJSetPreallocation(A,...);
4429 
4430    By default, this format uses inodes (identical nodes) when possible.
4431    We search for consecutive rows with the same nonzero structure, thereby
4432    reusing matrix information to achieve increased efficiency.
4433 
4434    Options Database Keys:
4435 +  -mat_no_inode  - Do not use inodes
4436 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4437 -  -matmult_vecscatter_view <viewer> - View the vecscatter (i.e., communication pattern) used in `MatMult()` of sparse parallel matrices.
4438         See viewer types in manual of `MatView()`. Of them, ascii_matlab, draw or binary cause the vecscatter be viewed as a matrix.
4439         Entry (i,j) is the size of message (in bytes) rank i sends to rank j in one `MatMult()` call.
4440 
4441    Example usage:
4442 
4443    Consider the following 8x8 matrix with 34 non-zero values, that is
4444    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4445    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4446    as follows
4447 
4448 .vb
4449             1  2  0  |  0  3  0  |  0  4
4450     Proc0   0  5  6  |  7  0  0  |  8  0
4451             9  0 10  | 11  0  0  | 12  0
4452     -------------------------------------
4453            13  0 14  | 15 16 17  |  0  0
4454     Proc1   0 18  0  | 19 20 21  |  0  0
4455             0  0  0  | 22 23  0  | 24  0
4456     -------------------------------------
4457     Proc2  25 26 27  |  0  0 28  | 29  0
4458            30  0  0  | 31 32 33  |  0 34
4459 .ve
4460 
4461    This can be represented as a collection of submatrices as
4462 
4463 .vb
4464       A B C
4465       D E F
4466       G H I
4467 .ve
4468 
4469    Where the submatrices A,B,C are owned by proc0, D,E,F are
4470    owned by proc1, G,H,I are owned by proc2.
4471 
4472    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4473    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4474    The 'M','N' parameters are 8,8, and have the same values on all procs.
4475 
4476    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4477    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4478    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4479    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4480    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4481    matrix, ans [DF] as another SeqAIJ matrix.
4482 
4483    When d_nz, o_nz parameters are specified, d_nz storage elements are
4484    allocated for every row of the local diagonal submatrix, and o_nz
4485    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4486    One way to choose d_nz and o_nz is to use the max nonzerors per local
4487    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4488    In this case, the values of d_nz,o_nz are
4489 .vb
4490      proc0 : dnz = 2, o_nz = 2
4491      proc1 : dnz = 3, o_nz = 2
4492      proc2 : dnz = 1, o_nz = 4
4493 .ve
4494    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4495    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4496    for proc3. i.e we are using 12+15+10=37 storage locations to store
4497    34 values.
4498 
4499    When d_nnz, o_nnz parameters are specified, the storage is specified
4500    for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4501    In the above case the values for d_nnz,o_nnz are
4502 .vb
4503      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4504      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4505      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4506 .ve
4507    Here the space allocated is sum of all the above values i.e 34, and
4508    hence pre-allocation is perfect.
4509 
4510    Level: intermediate
4511 
4512 .seealso: [Sparse Matrix Creation](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4513           `MATMPIAIJ`, `MatCreateMPIAIJWithArrays()`
4514 @*/
4515 PetscErrorCode MatCreateAIJ(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A)
4516 {
4517   PetscMPIInt size;
4518 
4519   PetscFunctionBegin;
4520   PetscCall(MatCreate(comm, A));
4521   PetscCall(MatSetSizes(*A, m, n, M, N));
4522   PetscCallMPI(MPI_Comm_size(comm, &size));
4523   if (size > 1) {
4524     PetscCall(MatSetType(*A, MATMPIAIJ));
4525     PetscCall(MatMPIAIJSetPreallocation(*A, d_nz, d_nnz, o_nz, o_nnz));
4526   } else {
4527     PetscCall(MatSetType(*A, MATSEQAIJ));
4528     PetscCall(MatSeqAIJSetPreallocation(*A, d_nz, d_nnz));
4529   }
4530   PetscFunctionReturn(0);
4531 }
4532 
4533 /*@C
4534   MatMPIAIJGetSeqAIJ - Returns the local piece of this distributed matrix
4535 
4536   Not collective
4537 
4538   Input Parameter:
4539 . A - The `MATMPIAIJ` matrix
4540 
4541   Output Parameters:
4542 + Ad - The local diagonal block as a `MATSEQAIJ` matrix
4543 . Ao - The local off-diagonal block as a `MATSEQAIJ` matrix
4544 - colmap - An array mapping local column numbers of Ao to global column numbers of the parallel matrix
4545 
4546   Note:
4547   The rows in Ad and Ao are in [0, Nr), where Nr is the number of local rows on this process. The columns
4548   in Ad are in [0, Nc) where Nc is the number of local columns. The columns are Ao are in [0, Nco), where Nco is
4549   the number of nonzero columns in the local off-diagonal piece of the matrix A. The array colmap maps these
4550   local column numbers to global column numbers in the original matrix.
4551 
4552   Level: intermediate
4553 
4554 .seealso: `MATMPIAIJ`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`, `MatCreateAIJ()`, `MATMPIAIJ`, `MATSEQAIJ`
4555 @*/
4556 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[])
4557 {
4558   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
4559   PetscBool   flg;
4560 
4561   PetscFunctionBegin;
4562   PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &flg));
4563   PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPIAIJ matrix as input");
4564   if (Ad) *Ad = a->A;
4565   if (Ao) *Ao = a->B;
4566   if (colmap) *colmap = a->garray;
4567   PetscFunctionReturn(0);
4568 }
4569 
4570 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
4571 {
4572   PetscInt     m, N, i, rstart, nnz, Ii;
4573   PetscInt    *indx;
4574   PetscScalar *values;
4575   MatType      rootType;
4576 
4577   PetscFunctionBegin;
4578   PetscCall(MatGetSize(inmat, &m, &N));
4579   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
4580     PetscInt *dnz, *onz, sum, bs, cbs;
4581 
4582     if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N));
4583     /* Check sum(n) = N */
4584     PetscCall(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm));
4585     PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N);
4586 
4587     PetscCallMPI(MPI_Scan(&m, &rstart, 1, MPIU_INT, MPI_SUM, comm));
4588     rstart -= m;
4589 
4590     MatPreallocateBegin(comm, m, n, dnz, onz);
4591     for (i = 0; i < m; i++) {
4592       PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, NULL));
4593       PetscCall(MatPreallocateSet(i + rstart, nnz, indx, dnz, onz));
4594       PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, NULL));
4595     }
4596 
4597     PetscCall(MatCreate(comm, outmat));
4598     PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
4599     PetscCall(MatGetBlockSizes(inmat, &bs, &cbs));
4600     PetscCall(MatSetBlockSizes(*outmat, bs, cbs));
4601     PetscCall(MatGetRootType_Private(inmat, &rootType));
4602     PetscCall(MatSetType(*outmat, rootType));
4603     PetscCall(MatSeqAIJSetPreallocation(*outmat, 0, dnz));
4604     PetscCall(MatMPIAIJSetPreallocation(*outmat, 0, dnz, 0, onz));
4605     MatPreallocateEnd(dnz, onz);
4606     PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
4607   }
4608 
4609   /* numeric phase */
4610   PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL));
4611   for (i = 0; i < m; i++) {
4612     PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, &values));
4613     Ii = i + rstart;
4614     PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES));
4615     PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, &values));
4616   }
4617   PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY));
4618   PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY));
4619   PetscFunctionReturn(0);
4620 }
4621 
4622 PetscErrorCode MatFileSplit(Mat A, char *outfile)
4623 {
4624   PetscMPIInt        rank;
4625   PetscInt           m, N, i, rstart, nnz;
4626   size_t             len;
4627   const PetscInt    *indx;
4628   PetscViewer        out;
4629   char              *name;
4630   Mat                B;
4631   const PetscScalar *values;
4632 
4633   PetscFunctionBegin;
4634   PetscCall(MatGetLocalSize(A, &m, NULL));
4635   PetscCall(MatGetSize(A, NULL, &N));
4636   /* Should this be the type of the diagonal block of A? */
4637   PetscCall(MatCreate(PETSC_COMM_SELF, &B));
4638   PetscCall(MatSetSizes(B, m, N, m, N));
4639   PetscCall(MatSetBlockSizesFromMats(B, A, A));
4640   PetscCall(MatSetType(B, MATSEQAIJ));
4641   PetscCall(MatSeqAIJSetPreallocation(B, 0, NULL));
4642   PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
4643   for (i = 0; i < m; i++) {
4644     PetscCall(MatGetRow(A, i + rstart, &nnz, &indx, &values));
4645     PetscCall(MatSetValues(B, 1, &i, nnz, indx, values, INSERT_VALUES));
4646     PetscCall(MatRestoreRow(A, i + rstart, &nnz, &indx, &values));
4647   }
4648   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
4649   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
4650 
4651   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank));
4652   PetscCall(PetscStrlen(outfile, &len));
4653   PetscCall(PetscMalloc1(len + 6, &name));
4654   PetscCall(PetscSNPrintf(name, len + 6, "%s.%d", outfile, rank));
4655   PetscCall(PetscViewerBinaryOpen(PETSC_COMM_SELF, name, FILE_MODE_APPEND, &out));
4656   PetscCall(PetscFree(name));
4657   PetscCall(MatView(B, out));
4658   PetscCall(PetscViewerDestroy(&out));
4659   PetscCall(MatDestroy(&B));
4660   PetscFunctionReturn(0);
4661 }
4662 
4663 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void *data)
4664 {
4665   Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)data;
4666 
4667   PetscFunctionBegin;
4668   if (!merge) PetscFunctionReturn(0);
4669   PetscCall(PetscFree(merge->id_r));
4670   PetscCall(PetscFree(merge->len_s));
4671   PetscCall(PetscFree(merge->len_r));
4672   PetscCall(PetscFree(merge->bi));
4673   PetscCall(PetscFree(merge->bj));
4674   PetscCall(PetscFree(merge->buf_ri[0]));
4675   PetscCall(PetscFree(merge->buf_ri));
4676   PetscCall(PetscFree(merge->buf_rj[0]));
4677   PetscCall(PetscFree(merge->buf_rj));
4678   PetscCall(PetscFree(merge->coi));
4679   PetscCall(PetscFree(merge->coj));
4680   PetscCall(PetscFree(merge->owners_co));
4681   PetscCall(PetscLayoutDestroy(&merge->rowmap));
4682   PetscCall(PetscFree(merge));
4683   PetscFunctionReturn(0);
4684 }
4685 
4686 #include <../src/mat/utils/freespace.h>
4687 #include <petscbt.h>
4688 
4689 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat, Mat mpimat)
4690 {
4691   MPI_Comm             comm;
4692   Mat_SeqAIJ          *a = (Mat_SeqAIJ *)seqmat->data;
4693   PetscMPIInt          size, rank, taga, *len_s;
4694   PetscInt             N = mpimat->cmap->N, i, j, *owners, *ai = a->i, *aj;
4695   PetscInt             proc, m;
4696   PetscInt           **buf_ri, **buf_rj;
4697   PetscInt             k, anzi, *bj_i, *bi, *bj, arow, bnzi, nextaj;
4698   PetscInt             nrows, **buf_ri_k, **nextrow, **nextai;
4699   MPI_Request         *s_waits, *r_waits;
4700   MPI_Status          *status;
4701   const MatScalar     *aa, *a_a;
4702   MatScalar          **abuf_r, *ba_i;
4703   Mat_Merge_SeqsToMPI *merge;
4704   PetscContainer       container;
4705 
4706   PetscFunctionBegin;
4707   PetscCall(PetscObjectGetComm((PetscObject)mpimat, &comm));
4708   PetscCall(PetscLogEventBegin(MAT_Seqstompinum, seqmat, 0, 0, 0));
4709 
4710   PetscCallMPI(MPI_Comm_size(comm, &size));
4711   PetscCallMPI(MPI_Comm_rank(comm, &rank));
4712 
4713   PetscCall(PetscObjectQuery((PetscObject)mpimat, "MatMergeSeqsToMPI", (PetscObject *)&container));
4714   PetscCheck(container, PetscObjectComm((PetscObject)mpimat), PETSC_ERR_PLIB, "Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic");
4715   PetscCall(PetscContainerGetPointer(container, (void **)&merge));
4716   PetscCall(MatSeqAIJGetArrayRead(seqmat, &a_a));
4717   aa = a_a;
4718 
4719   bi     = merge->bi;
4720   bj     = merge->bj;
4721   buf_ri = merge->buf_ri;
4722   buf_rj = merge->buf_rj;
4723 
4724   PetscCall(PetscMalloc1(size, &status));
4725   owners = merge->rowmap->range;
4726   len_s  = merge->len_s;
4727 
4728   /* send and recv matrix values */
4729   /*-----------------------------*/
4730   PetscCall(PetscObjectGetNewTag((PetscObject)mpimat, &taga));
4731   PetscCall(PetscPostIrecvScalar(comm, taga, merge->nrecv, merge->id_r, merge->len_r, &abuf_r, &r_waits));
4732 
4733   PetscCall(PetscMalloc1(merge->nsend + 1, &s_waits));
4734   for (proc = 0, k = 0; proc < size; proc++) {
4735     if (!len_s[proc]) continue;
4736     i = owners[proc];
4737     PetscCallMPI(MPI_Isend(aa + ai[i], len_s[proc], MPIU_MATSCALAR, proc, taga, comm, s_waits + k));
4738     k++;
4739   }
4740 
4741   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, r_waits, status));
4742   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, s_waits, status));
4743   PetscCall(PetscFree(status));
4744 
4745   PetscCall(PetscFree(s_waits));
4746   PetscCall(PetscFree(r_waits));
4747 
4748   /* insert mat values of mpimat */
4749   /*----------------------------*/
4750   PetscCall(PetscMalloc1(N, &ba_i));
4751   PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai));
4752 
4753   for (k = 0; k < merge->nrecv; k++) {
4754     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4755     nrows       = *(buf_ri_k[k]);
4756     nextrow[k]  = buf_ri_k[k] + 1;           /* next row number of k-th recved i-structure */
4757     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure  */
4758   }
4759 
4760   /* set values of ba */
4761   m = merge->rowmap->n;
4762   for (i = 0; i < m; i++) {
4763     arow = owners[rank] + i;
4764     bj_i = bj + bi[i]; /* col indices of the i-th row of mpimat */
4765     bnzi = bi[i + 1] - bi[i];
4766     PetscCall(PetscArrayzero(ba_i, bnzi));
4767 
4768     /* add local non-zero vals of this proc's seqmat into ba */
4769     anzi   = ai[arow + 1] - ai[arow];
4770     aj     = a->j + ai[arow];
4771     aa     = a_a + ai[arow];
4772     nextaj = 0;
4773     for (j = 0; nextaj < anzi; j++) {
4774       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4775         ba_i[j] += aa[nextaj++];
4776       }
4777     }
4778 
4779     /* add received vals into ba */
4780     for (k = 0; k < merge->nrecv; k++) { /* k-th received message */
4781       /* i-th row */
4782       if (i == *nextrow[k]) {
4783         anzi   = *(nextai[k] + 1) - *nextai[k];
4784         aj     = buf_rj[k] + *(nextai[k]);
4785         aa     = abuf_r[k] + *(nextai[k]);
4786         nextaj = 0;
4787         for (j = 0; nextaj < anzi; j++) {
4788           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4789             ba_i[j] += aa[nextaj++];
4790           }
4791         }
4792         nextrow[k]++;
4793         nextai[k]++;
4794       }
4795     }
4796     PetscCall(MatSetValues(mpimat, 1, &arow, bnzi, bj_i, ba_i, INSERT_VALUES));
4797   }
4798   PetscCall(MatSeqAIJRestoreArrayRead(seqmat, &a_a));
4799   PetscCall(MatAssemblyBegin(mpimat, MAT_FINAL_ASSEMBLY));
4800   PetscCall(MatAssemblyEnd(mpimat, MAT_FINAL_ASSEMBLY));
4801 
4802   PetscCall(PetscFree(abuf_r[0]));
4803   PetscCall(PetscFree(abuf_r));
4804   PetscCall(PetscFree(ba_i));
4805   PetscCall(PetscFree3(buf_ri_k, nextrow, nextai));
4806   PetscCall(PetscLogEventEnd(MAT_Seqstompinum, seqmat, 0, 0, 0));
4807   PetscFunctionReturn(0);
4808 }
4809 
4810 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, Mat *mpimat)
4811 {
4812   Mat                  B_mpi;
4813   Mat_SeqAIJ          *a = (Mat_SeqAIJ *)seqmat->data;
4814   PetscMPIInt          size, rank, tagi, tagj, *len_s, *len_si, *len_ri;
4815   PetscInt           **buf_rj, **buf_ri, **buf_ri_k;
4816   PetscInt             M = seqmat->rmap->n, N = seqmat->cmap->n, i, *owners, *ai = a->i, *aj = a->j;
4817   PetscInt             len, proc, *dnz, *onz, bs, cbs;
4818   PetscInt             k, anzi, *bi, *bj, *lnk, nlnk, arow, bnzi;
4819   PetscInt             nrows, *buf_s, *buf_si, *buf_si_i, **nextrow, **nextai;
4820   MPI_Request         *si_waits, *sj_waits, *ri_waits, *rj_waits;
4821   MPI_Status          *status;
4822   PetscFreeSpaceList   free_space = NULL, current_space = NULL;
4823   PetscBT              lnkbt;
4824   Mat_Merge_SeqsToMPI *merge;
4825   PetscContainer       container;
4826 
4827   PetscFunctionBegin;
4828   PetscCall(PetscLogEventBegin(MAT_Seqstompisym, seqmat, 0, 0, 0));
4829 
4830   /* make sure it is a PETSc comm */
4831   PetscCall(PetscCommDuplicate(comm, &comm, NULL));
4832   PetscCallMPI(MPI_Comm_size(comm, &size));
4833   PetscCallMPI(MPI_Comm_rank(comm, &rank));
4834 
4835   PetscCall(PetscNew(&merge));
4836   PetscCall(PetscMalloc1(size, &status));
4837 
4838   /* determine row ownership */
4839   /*---------------------------------------------------------*/
4840   PetscCall(PetscLayoutCreate(comm, &merge->rowmap));
4841   PetscCall(PetscLayoutSetLocalSize(merge->rowmap, m));
4842   PetscCall(PetscLayoutSetSize(merge->rowmap, M));
4843   PetscCall(PetscLayoutSetBlockSize(merge->rowmap, 1));
4844   PetscCall(PetscLayoutSetUp(merge->rowmap));
4845   PetscCall(PetscMalloc1(size, &len_si));
4846   PetscCall(PetscMalloc1(size, &merge->len_s));
4847 
4848   m      = merge->rowmap->n;
4849   owners = merge->rowmap->range;
4850 
4851   /* determine the number of messages to send, their lengths */
4852   /*---------------------------------------------------------*/
4853   len_s = merge->len_s;
4854 
4855   len          = 0; /* length of buf_si[] */
4856   merge->nsend = 0;
4857   for (proc = 0; proc < size; proc++) {
4858     len_si[proc] = 0;
4859     if (proc == rank) {
4860       len_s[proc] = 0;
4861     } else {
4862       len_si[proc] = owners[proc + 1] - owners[proc] + 1;
4863       len_s[proc]  = ai[owners[proc + 1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4864     }
4865     if (len_s[proc]) {
4866       merge->nsend++;
4867       nrows = 0;
4868       for (i = owners[proc]; i < owners[proc + 1]; i++) {
4869         if (ai[i + 1] > ai[i]) nrows++;
4870       }
4871       len_si[proc] = 2 * (nrows + 1);
4872       len += len_si[proc];
4873     }
4874   }
4875 
4876   /* determine the number and length of messages to receive for ij-structure */
4877   /*-------------------------------------------------------------------------*/
4878   PetscCall(PetscGatherNumberOfMessages(comm, NULL, len_s, &merge->nrecv));
4879   PetscCall(PetscGatherMessageLengths2(comm, merge->nsend, merge->nrecv, len_s, len_si, &merge->id_r, &merge->len_r, &len_ri));
4880 
4881   /* post the Irecv of j-structure */
4882   /*-------------------------------*/
4883   PetscCall(PetscCommGetNewTag(comm, &tagj));
4884   PetscCall(PetscPostIrecvInt(comm, tagj, merge->nrecv, merge->id_r, merge->len_r, &buf_rj, &rj_waits));
4885 
4886   /* post the Isend of j-structure */
4887   /*--------------------------------*/
4888   PetscCall(PetscMalloc2(merge->nsend, &si_waits, merge->nsend, &sj_waits));
4889 
4890   for (proc = 0, k = 0; proc < size; proc++) {
4891     if (!len_s[proc]) continue;
4892     i = owners[proc];
4893     PetscCallMPI(MPI_Isend(aj + ai[i], len_s[proc], MPIU_INT, proc, tagj, comm, sj_waits + k));
4894     k++;
4895   }
4896 
4897   /* receives and sends of j-structure are complete */
4898   /*------------------------------------------------*/
4899   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, rj_waits, status));
4900   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, sj_waits, status));
4901 
4902   /* send and recv i-structure */
4903   /*---------------------------*/
4904   PetscCall(PetscCommGetNewTag(comm, &tagi));
4905   PetscCall(PetscPostIrecvInt(comm, tagi, merge->nrecv, merge->id_r, len_ri, &buf_ri, &ri_waits));
4906 
4907   PetscCall(PetscMalloc1(len + 1, &buf_s));
4908   buf_si = buf_s; /* points to the beginning of k-th msg to be sent */
4909   for (proc = 0, k = 0; proc < size; proc++) {
4910     if (!len_s[proc]) continue;
4911     /* form outgoing message for i-structure:
4912          buf_si[0]:                 nrows to be sent
4913                [1:nrows]:           row index (global)
4914                [nrows+1:2*nrows+1]: i-structure index
4915     */
4916     /*-------------------------------------------*/
4917     nrows       = len_si[proc] / 2 - 1;
4918     buf_si_i    = buf_si + nrows + 1;
4919     buf_si[0]   = nrows;
4920     buf_si_i[0] = 0;
4921     nrows       = 0;
4922     for (i = owners[proc]; i < owners[proc + 1]; i++) {
4923       anzi = ai[i + 1] - ai[i];
4924       if (anzi) {
4925         buf_si_i[nrows + 1] = buf_si_i[nrows] + anzi; /* i-structure */
4926         buf_si[nrows + 1]   = i - owners[proc];       /* local row index */
4927         nrows++;
4928       }
4929     }
4930     PetscCallMPI(MPI_Isend(buf_si, len_si[proc], MPIU_INT, proc, tagi, comm, si_waits + k));
4931     k++;
4932     buf_si += len_si[proc];
4933   }
4934 
4935   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, ri_waits, status));
4936   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, si_waits, status));
4937 
4938   PetscCall(PetscInfo(seqmat, "nsend: %d, nrecv: %d\n", merge->nsend, merge->nrecv));
4939   for (i = 0; i < merge->nrecv; i++) PetscCall(PetscInfo(seqmat, "recv len_ri=%d, len_rj=%d from [%d]\n", len_ri[i], merge->len_r[i], merge->id_r[i]));
4940 
4941   PetscCall(PetscFree(len_si));
4942   PetscCall(PetscFree(len_ri));
4943   PetscCall(PetscFree(rj_waits));
4944   PetscCall(PetscFree2(si_waits, sj_waits));
4945   PetscCall(PetscFree(ri_waits));
4946   PetscCall(PetscFree(buf_s));
4947   PetscCall(PetscFree(status));
4948 
4949   /* compute a local seq matrix in each processor */
4950   /*----------------------------------------------*/
4951   /* allocate bi array and free space for accumulating nonzero column info */
4952   PetscCall(PetscMalloc1(m + 1, &bi));
4953   bi[0] = 0;
4954 
4955   /* create and initialize a linked list */
4956   nlnk = N + 1;
4957   PetscCall(PetscLLCreate(N, N, nlnk, lnk, lnkbt));
4958 
4959   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4960   len = ai[owners[rank + 1]] - ai[owners[rank]];
4961   PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(2, len) + 1, &free_space));
4962 
4963   current_space = free_space;
4964 
4965   /* determine symbolic info for each local row */
4966   PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai));
4967 
4968   for (k = 0; k < merge->nrecv; k++) {
4969     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4970     nrows       = *buf_ri_k[k];
4971     nextrow[k]  = buf_ri_k[k] + 1;           /* next row number of k-th recved i-structure */
4972     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure  */
4973   }
4974 
4975   MatPreallocateBegin(comm, m, n, dnz, onz);
4976   len = 0;
4977   for (i = 0; i < m; i++) {
4978     bnzi = 0;
4979     /* add local non-zero cols of this proc's seqmat into lnk */
4980     arow = owners[rank] + i;
4981     anzi = ai[arow + 1] - ai[arow];
4982     aj   = a->j + ai[arow];
4983     PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt));
4984     bnzi += nlnk;
4985     /* add received col data into lnk */
4986     for (k = 0; k < merge->nrecv; k++) { /* k-th received message */
4987       if (i == *nextrow[k]) {            /* i-th row */
4988         anzi = *(nextai[k] + 1) - *nextai[k];
4989         aj   = buf_rj[k] + *nextai[k];
4990         PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt));
4991         bnzi += nlnk;
4992         nextrow[k]++;
4993         nextai[k]++;
4994       }
4995     }
4996     if (len < bnzi) len = bnzi; /* =max(bnzi) */
4997 
4998     /* if free space is not available, make more free space */
4999     if (current_space->local_remaining < bnzi) PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(bnzi, current_space->total_array_size), &current_space));
5000     /* copy data into free space, then initialize lnk */
5001     PetscCall(PetscLLClean(N, N, bnzi, lnk, current_space->array, lnkbt));
5002     PetscCall(MatPreallocateSet(i + owners[rank], bnzi, current_space->array, dnz, onz));
5003 
5004     current_space->array += bnzi;
5005     current_space->local_used += bnzi;
5006     current_space->local_remaining -= bnzi;
5007 
5008     bi[i + 1] = bi[i] + bnzi;
5009   }
5010 
5011   PetscCall(PetscFree3(buf_ri_k, nextrow, nextai));
5012 
5013   PetscCall(PetscMalloc1(bi[m] + 1, &bj));
5014   PetscCall(PetscFreeSpaceContiguous(&free_space, bj));
5015   PetscCall(PetscLLDestroy(lnk, lnkbt));
5016 
5017   /* create symbolic parallel matrix B_mpi */
5018   /*---------------------------------------*/
5019   PetscCall(MatGetBlockSizes(seqmat, &bs, &cbs));
5020   PetscCall(MatCreate(comm, &B_mpi));
5021   if (n == PETSC_DECIDE) {
5022     PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, N));
5023   } else {
5024     PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
5025   }
5026   PetscCall(MatSetBlockSizes(B_mpi, bs, cbs));
5027   PetscCall(MatSetType(B_mpi, MATMPIAIJ));
5028   PetscCall(MatMPIAIJSetPreallocation(B_mpi, 0, dnz, 0, onz));
5029   MatPreallocateEnd(dnz, onz);
5030   PetscCall(MatSetOption(B_mpi, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE));
5031 
5032   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
5033   B_mpi->assembled = PETSC_FALSE;
5034   merge->bi        = bi;
5035   merge->bj        = bj;
5036   merge->buf_ri    = buf_ri;
5037   merge->buf_rj    = buf_rj;
5038   merge->coi       = NULL;
5039   merge->coj       = NULL;
5040   merge->owners_co = NULL;
5041 
5042   PetscCall(PetscCommDestroy(&comm));
5043 
5044   /* attach the supporting struct to B_mpi for reuse */
5045   PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container));
5046   PetscCall(PetscContainerSetPointer(container, merge));
5047   PetscCall(PetscContainerSetUserDestroy(container, MatDestroy_MPIAIJ_SeqsToMPI));
5048   PetscCall(PetscObjectCompose((PetscObject)B_mpi, "MatMergeSeqsToMPI", (PetscObject)container));
5049   PetscCall(PetscContainerDestroy(&container));
5050   *mpimat = B_mpi;
5051 
5052   PetscCall(PetscLogEventEnd(MAT_Seqstompisym, seqmat, 0, 0, 0));
5053   PetscFunctionReturn(0);
5054 }
5055 
5056 /*@C
5057       MatCreateMPIAIJSumSeqAIJ - Creates a `MATMPIAIJ` matrix by adding sequential
5058                  matrices from each processor
5059 
5060     Collective
5061 
5062    Input Parameters:
5063 +    comm - the communicators the parallel matrix will live on
5064 .    seqmat - the input sequential matrices
5065 .    m - number of local rows (or `PETSC_DECIDE`)
5066 .    n - number of local columns (or `PETSC_DECIDE`)
5067 -    scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5068 
5069    Output Parameter:
5070 .    mpimat - the parallel matrix generated
5071 
5072     Level: advanced
5073 
5074    Note:
5075      The dimensions of the sequential matrix in each processor MUST be the same.
5076      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
5077      destroyed when mpimat is destroyed. Call `PetscObjectQuery()` to access seqmat.
5078 @*/
5079 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, MatReuse scall, Mat *mpimat)
5080 {
5081   PetscMPIInt size;
5082 
5083   PetscFunctionBegin;
5084   PetscCallMPI(MPI_Comm_size(comm, &size));
5085   if (size == 1) {
5086     PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0));
5087     if (scall == MAT_INITIAL_MATRIX) {
5088       PetscCall(MatDuplicate(seqmat, MAT_COPY_VALUES, mpimat));
5089     } else {
5090       PetscCall(MatCopy(seqmat, *mpimat, SAME_NONZERO_PATTERN));
5091     }
5092     PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0));
5093     PetscFunctionReturn(0);
5094   }
5095   PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0));
5096   if (scall == MAT_INITIAL_MATRIX) PetscCall(MatCreateMPIAIJSumSeqAIJSymbolic(comm, seqmat, m, n, mpimat));
5097   PetscCall(MatCreateMPIAIJSumSeqAIJNumeric(seqmat, *mpimat));
5098   PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0));
5099   PetscFunctionReturn(0);
5100 }
5101 
5102 /*@
5103      MatAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATAIJ` matrix by taking all its local rows and putting them into a sequential matrix with
5104           mlocal rows and n columns. Where mlocal is the row count obtained with `MatGetLocalSize()` and n is the global column count obtained
5105           with `MatGetSize()`
5106 
5107     Not Collective
5108 
5109    Input Parameters:
5110 +    A - the matrix
5111 -    scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5112 
5113    Output Parameter:
5114 .    A_loc - the local sequential matrix generated
5115 
5116     Level: developer
5117 
5118    Notes:
5119      In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix.
5120 
5121      Destroy the matrix with `MatDestroy()`
5122 
5123 .seealso: `MatMPIAIJGetLocalMat()`
5124 @*/
5125 PetscErrorCode MatAIJGetLocalMat(Mat A, Mat *A_loc)
5126 {
5127   PetscBool mpi;
5128 
5129   PetscFunctionBegin;
5130   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &mpi));
5131   if (mpi) {
5132     PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, A_loc));
5133   } else {
5134     *A_loc = A;
5135     PetscCall(PetscObjectReference((PetscObject)*A_loc));
5136   }
5137   PetscFunctionReturn(0);
5138 }
5139 
5140 /*@
5141      MatMPIAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix by taking all its local rows and putting them into a sequential matrix with
5142           mlocal rows and n columns. Where mlocal is the row count obtained with `MatGetLocalSize()` and n is the global column count obtained
5143           with `MatGetSize()`
5144 
5145     Not Collective
5146 
5147    Input Parameters:
5148 +    A - the matrix
5149 -    scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5150 
5151    Output Parameter:
5152 .    A_loc - the local sequential matrix generated
5153 
5154     Level: developer
5155 
5156    Notes:
5157      In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix.
5158 
5159      When the communicator associated with A has size 1 and `MAT_INITIAL_MATRIX` is requested, the matrix returned is the diagonal part of A.
5160      If `MAT_REUSE_MATRIX` is requested with comm size 1, `MatCopy`(Adiag,*A_loc,`SAME_NONZERO_PATTERN`) is called.
5161      This means that one can preallocate the proper sequential matrix first and then call this routine with `MAT_REUSE_MATRIX` to safely
5162      modify the values of the returned A_loc.
5163 
5164 .seealso: `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMatCondensed()`, `MatMPIAIJGetLocalMatMerge()`
5165 @*/
5166 PetscErrorCode MatMPIAIJGetLocalMat(Mat A, MatReuse scall, Mat *A_loc)
5167 {
5168   Mat_MPIAIJ        *mpimat = (Mat_MPIAIJ *)A->data;
5169   Mat_SeqAIJ        *mat, *a, *b;
5170   PetscInt          *ai, *aj, *bi, *bj, *cmap = mpimat->garray;
5171   const PetscScalar *aa, *ba, *aav, *bav;
5172   PetscScalar       *ca, *cam;
5173   PetscMPIInt        size;
5174   PetscInt           am = A->rmap->n, i, j, k, cstart = A->cmap->rstart;
5175   PetscInt          *ci, *cj, col, ncols_d, ncols_o, jo;
5176   PetscBool          match;
5177 
5178   PetscFunctionBegin;
5179   PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &match));
5180   PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input");
5181   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
5182   if (size == 1) {
5183     if (scall == MAT_INITIAL_MATRIX) {
5184       PetscCall(PetscObjectReference((PetscObject)mpimat->A));
5185       *A_loc = mpimat->A;
5186     } else if (scall == MAT_REUSE_MATRIX) {
5187       PetscCall(MatCopy(mpimat->A, *A_loc, SAME_NONZERO_PATTERN));
5188     }
5189     PetscFunctionReturn(0);
5190   }
5191 
5192   PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0));
5193   a  = (Mat_SeqAIJ *)(mpimat->A)->data;
5194   b  = (Mat_SeqAIJ *)(mpimat->B)->data;
5195   ai = a->i;
5196   aj = a->j;
5197   bi = b->i;
5198   bj = b->j;
5199   PetscCall(MatSeqAIJGetArrayRead(mpimat->A, &aav));
5200   PetscCall(MatSeqAIJGetArrayRead(mpimat->B, &bav));
5201   aa = aav;
5202   ba = bav;
5203   if (scall == MAT_INITIAL_MATRIX) {
5204     PetscCall(PetscMalloc1(1 + am, &ci));
5205     ci[0] = 0;
5206     for (i = 0; i < am; i++) ci[i + 1] = ci[i] + (ai[i + 1] - ai[i]) + (bi[i + 1] - bi[i]);
5207     PetscCall(PetscMalloc1(1 + ci[am], &cj));
5208     PetscCall(PetscMalloc1(1 + ci[am], &ca));
5209     k = 0;
5210     for (i = 0; i < am; i++) {
5211       ncols_o = bi[i + 1] - bi[i];
5212       ncols_d = ai[i + 1] - ai[i];
5213       /* off-diagonal portion of A */
5214       for (jo = 0; jo < ncols_o; jo++) {
5215         col = cmap[*bj];
5216         if (col >= cstart) break;
5217         cj[k] = col;
5218         bj++;
5219         ca[k++] = *ba++;
5220       }
5221       /* diagonal portion of A */
5222       for (j = 0; j < ncols_d; j++) {
5223         cj[k]   = cstart + *aj++;
5224         ca[k++] = *aa++;
5225       }
5226       /* off-diagonal portion of A */
5227       for (j = jo; j < ncols_o; j++) {
5228         cj[k]   = cmap[*bj++];
5229         ca[k++] = *ba++;
5230       }
5231     }
5232     /* put together the new matrix */
5233     PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, A->cmap->N, ci, cj, ca, A_loc));
5234     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5235     /* Since these are PETSc arrays, change flags to free them as necessary. */
5236     mat          = (Mat_SeqAIJ *)(*A_loc)->data;
5237     mat->free_a  = PETSC_TRUE;
5238     mat->free_ij = PETSC_TRUE;
5239     mat->nonew   = 0;
5240   } else if (scall == MAT_REUSE_MATRIX) {
5241     mat = (Mat_SeqAIJ *)(*A_loc)->data;
5242     ci  = mat->i;
5243     cj  = mat->j;
5244     PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &cam));
5245     for (i = 0; i < am; i++) {
5246       /* off-diagonal portion of A */
5247       ncols_o = bi[i + 1] - bi[i];
5248       for (jo = 0; jo < ncols_o; jo++) {
5249         col = cmap[*bj];
5250         if (col >= cstart) break;
5251         *cam++ = *ba++;
5252         bj++;
5253       }
5254       /* diagonal portion of A */
5255       ncols_d = ai[i + 1] - ai[i];
5256       for (j = 0; j < ncols_d; j++) *cam++ = *aa++;
5257       /* off-diagonal portion of A */
5258       for (j = jo; j < ncols_o; j++) {
5259         *cam++ = *ba++;
5260         bj++;
5261       }
5262     }
5263     PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &cam));
5264   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall);
5265   PetscCall(MatSeqAIJRestoreArrayRead(mpimat->A, &aav));
5266   PetscCall(MatSeqAIJRestoreArrayRead(mpimat->B, &bav));
5267   PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0));
5268   PetscFunctionReturn(0);
5269 }
5270 
5271 /*@
5272      MatMPIAIJGetLocalMatMerge - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix by taking all its local rows and putting them into a sequential matrix with
5273           mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and offdiagonal part
5274 
5275     Not Collective
5276 
5277    Input Parameters:
5278 +    A - the matrix
5279 -    scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5280 
5281    Output Parameters:
5282 +    glob - sequential `IS` with global indices associated with the columns of the local sequential matrix generated (can be NULL)
5283 -    A_loc - the local sequential matrix generated
5284 
5285     Level: developer
5286 
5287    Note:
5288      This is different from `MatMPIAIJGetLocalMat()` since the first columns in the returning matrix are those associated with the diagonal part, then those associated with the off diagonal part (in its local ordering)
5289 
5290 .seealso: `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`
5291 @*/
5292 PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A, MatReuse scall, IS *glob, Mat *A_loc)
5293 {
5294   Mat             Ao, Ad;
5295   const PetscInt *cmap;
5296   PetscMPIInt     size;
5297   PetscErrorCode (*f)(Mat, MatReuse, IS *, Mat *);
5298 
5299   PetscFunctionBegin;
5300   PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &cmap));
5301   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
5302   if (size == 1) {
5303     if (scall == MAT_INITIAL_MATRIX) {
5304       PetscCall(PetscObjectReference((PetscObject)Ad));
5305       *A_loc = Ad;
5306     } else if (scall == MAT_REUSE_MATRIX) {
5307       PetscCall(MatCopy(Ad, *A_loc, SAME_NONZERO_PATTERN));
5308     }
5309     if (glob) PetscCall(ISCreateStride(PetscObjectComm((PetscObject)Ad), Ad->cmap->n, Ad->cmap->rstart, 1, glob));
5310     PetscFunctionReturn(0);
5311   }
5312   PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIAIJGetLocalMatMerge_C", &f));
5313   PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0));
5314   if (f) {
5315     PetscCall((*f)(A, scall, glob, A_loc));
5316   } else {
5317     Mat_SeqAIJ        *a = (Mat_SeqAIJ *)Ad->data;
5318     Mat_SeqAIJ        *b = (Mat_SeqAIJ *)Ao->data;
5319     Mat_SeqAIJ        *c;
5320     PetscInt          *ai = a->i, *aj = a->j;
5321     PetscInt          *bi = b->i, *bj = b->j;
5322     PetscInt          *ci, *cj;
5323     const PetscScalar *aa, *ba;
5324     PetscScalar       *ca;
5325     PetscInt           i, j, am, dn, on;
5326 
5327     PetscCall(MatGetLocalSize(Ad, &am, &dn));
5328     PetscCall(MatGetLocalSize(Ao, NULL, &on));
5329     PetscCall(MatSeqAIJGetArrayRead(Ad, &aa));
5330     PetscCall(MatSeqAIJGetArrayRead(Ao, &ba));
5331     if (scall == MAT_INITIAL_MATRIX) {
5332       PetscInt k;
5333       PetscCall(PetscMalloc1(1 + am, &ci));
5334       PetscCall(PetscMalloc1(ai[am] + bi[am], &cj));
5335       PetscCall(PetscMalloc1(ai[am] + bi[am], &ca));
5336       ci[0] = 0;
5337       for (i = 0, k = 0; i < am; i++) {
5338         const PetscInt ncols_o = bi[i + 1] - bi[i];
5339         const PetscInt ncols_d = ai[i + 1] - ai[i];
5340         ci[i + 1]              = ci[i] + ncols_o + ncols_d;
5341         /* diagonal portion of A */
5342         for (j = 0; j < ncols_d; j++, k++) {
5343           cj[k] = *aj++;
5344           ca[k] = *aa++;
5345         }
5346         /* off-diagonal portion of A */
5347         for (j = 0; j < ncols_o; j++, k++) {
5348           cj[k] = dn + *bj++;
5349           ca[k] = *ba++;
5350         }
5351       }
5352       /* put together the new matrix */
5353       PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, dn + on, ci, cj, ca, A_loc));
5354       /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5355       /* Since these are PETSc arrays, change flags to free them as necessary. */
5356       c          = (Mat_SeqAIJ *)(*A_loc)->data;
5357       c->free_a  = PETSC_TRUE;
5358       c->free_ij = PETSC_TRUE;
5359       c->nonew   = 0;
5360       PetscCall(MatSetType(*A_loc, ((PetscObject)Ad)->type_name));
5361     } else if (scall == MAT_REUSE_MATRIX) {
5362       PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &ca));
5363       for (i = 0; i < am; i++) {
5364         const PetscInt ncols_d = ai[i + 1] - ai[i];
5365         const PetscInt ncols_o = bi[i + 1] - bi[i];
5366         /* diagonal portion of A */
5367         for (j = 0; j < ncols_d; j++) *ca++ = *aa++;
5368         /* off-diagonal portion of A */
5369         for (j = 0; j < ncols_o; j++) *ca++ = *ba++;
5370       }
5371       PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &ca));
5372     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall);
5373     PetscCall(MatSeqAIJRestoreArrayRead(Ad, &aa));
5374     PetscCall(MatSeqAIJRestoreArrayRead(Ao, &aa));
5375     if (glob) {
5376       PetscInt cst, *gidx;
5377 
5378       PetscCall(MatGetOwnershipRangeColumn(A, &cst, NULL));
5379       PetscCall(PetscMalloc1(dn + on, &gidx));
5380       for (i = 0; i < dn; i++) gidx[i] = cst + i;
5381       for (i = 0; i < on; i++) gidx[i + dn] = cmap[i];
5382       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)Ad), dn + on, gidx, PETSC_OWN_POINTER, glob));
5383     }
5384   }
5385   PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0));
5386   PetscFunctionReturn(0);
5387 }
5388 
5389 /*@C
5390      MatMPIAIJGetLocalMatCondensed - Creates a `MATSEQAIJ` matrix from an `MATMPIAIJ` matrix by taking all its local rows and NON-ZERO columns
5391 
5392     Not Collective
5393 
5394    Input Parameters:
5395 +    A - the matrix
5396 .    scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5397 -    row, col - index sets of rows and columns to extract (or NULL)
5398 
5399    Output Parameter:
5400 .    A_loc - the local sequential matrix generated
5401 
5402     Level: developer
5403 
5404 .seealso: `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`
5405 @*/
5406 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A, MatReuse scall, IS *row, IS *col, Mat *A_loc)
5407 {
5408   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
5409   PetscInt    i, start, end, ncols, nzA, nzB, *cmap, imark, *idx;
5410   IS          isrowa, iscola;
5411   Mat        *aloc;
5412   PetscBool   match;
5413 
5414   PetscFunctionBegin;
5415   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &match));
5416   PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input");
5417   PetscCall(PetscLogEventBegin(MAT_Getlocalmatcondensed, A, 0, 0, 0));
5418   if (!row) {
5419     start = A->rmap->rstart;
5420     end   = A->rmap->rend;
5421     PetscCall(ISCreateStride(PETSC_COMM_SELF, end - start, start, 1, &isrowa));
5422   } else {
5423     isrowa = *row;
5424   }
5425   if (!col) {
5426     start = A->cmap->rstart;
5427     cmap  = a->garray;
5428     nzA   = a->A->cmap->n;
5429     nzB   = a->B->cmap->n;
5430     PetscCall(PetscMalloc1(nzA + nzB, &idx));
5431     ncols = 0;
5432     for (i = 0; i < nzB; i++) {
5433       if (cmap[i] < start) idx[ncols++] = cmap[i];
5434       else break;
5435     }
5436     imark = i;
5437     for (i = 0; i < nzA; i++) idx[ncols++] = start + i;
5438     for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i];
5439     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &iscola));
5440   } else {
5441     iscola = *col;
5442   }
5443   if (scall != MAT_INITIAL_MATRIX) {
5444     PetscCall(PetscMalloc1(1, &aloc));
5445     aloc[0] = *A_loc;
5446   }
5447   PetscCall(MatCreateSubMatrices(A, 1, &isrowa, &iscola, scall, &aloc));
5448   if (!col) { /* attach global id of condensed columns */
5449     PetscCall(PetscObjectCompose((PetscObject)aloc[0], "_petsc_GetLocalMatCondensed_iscol", (PetscObject)iscola));
5450   }
5451   *A_loc = aloc[0];
5452   PetscCall(PetscFree(aloc));
5453   if (!row) PetscCall(ISDestroy(&isrowa));
5454   if (!col) PetscCall(ISDestroy(&iscola));
5455   PetscCall(PetscLogEventEnd(MAT_Getlocalmatcondensed, A, 0, 0, 0));
5456   PetscFunctionReturn(0);
5457 }
5458 
5459 /*
5460  * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched.
5461  * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based
5462  * on a global size.
5463  * */
5464 PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P, IS rows, Mat *P_oth)
5465 {
5466   Mat_MPIAIJ            *p  = (Mat_MPIAIJ *)P->data;
5467   Mat_SeqAIJ            *pd = (Mat_SeqAIJ *)(p->A)->data, *po = (Mat_SeqAIJ *)(p->B)->data, *p_oth;
5468   PetscInt               plocalsize, nrows, *ilocal, *oilocal, i, lidx, *nrcols, *nlcols, ncol;
5469   PetscMPIInt            owner;
5470   PetscSFNode           *iremote, *oiremote;
5471   const PetscInt        *lrowindices;
5472   PetscSF                sf, osf;
5473   PetscInt               pcstart, *roffsets, *loffsets, *pnnz, j;
5474   PetscInt               ontotalcols, dntotalcols, ntotalcols, nout;
5475   MPI_Comm               comm;
5476   ISLocalToGlobalMapping mapping;
5477   const PetscScalar     *pd_a, *po_a;
5478 
5479   PetscFunctionBegin;
5480   PetscCall(PetscObjectGetComm((PetscObject)P, &comm));
5481   /* plocalsize is the number of roots
5482    * nrows is the number of leaves
5483    * */
5484   PetscCall(MatGetLocalSize(P, &plocalsize, NULL));
5485   PetscCall(ISGetLocalSize(rows, &nrows));
5486   PetscCall(PetscCalloc1(nrows, &iremote));
5487   PetscCall(ISGetIndices(rows, &lrowindices));
5488   for (i = 0; i < nrows; i++) {
5489     /* Find a remote index and an owner for a row
5490      * The row could be local or remote
5491      * */
5492     owner = 0;
5493     lidx  = 0;
5494     PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, &lidx));
5495     iremote[i].index = lidx;
5496     iremote[i].rank  = owner;
5497   }
5498   /* Create SF to communicate how many nonzero columns for each row */
5499   PetscCall(PetscSFCreate(comm, &sf));
5500   /* SF will figure out the number of nonzero colunms for each row, and their
5501    * offsets
5502    * */
5503   PetscCall(PetscSFSetGraph(sf, plocalsize, nrows, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
5504   PetscCall(PetscSFSetFromOptions(sf));
5505   PetscCall(PetscSFSetUp(sf));
5506 
5507   PetscCall(PetscCalloc1(2 * (plocalsize + 1), &roffsets));
5508   PetscCall(PetscCalloc1(2 * plocalsize, &nrcols));
5509   PetscCall(PetscCalloc1(nrows, &pnnz));
5510   roffsets[0] = 0;
5511   roffsets[1] = 0;
5512   for (i = 0; i < plocalsize; i++) {
5513     /* diag */
5514     nrcols[i * 2 + 0] = pd->i[i + 1] - pd->i[i];
5515     /* off diag */
5516     nrcols[i * 2 + 1] = po->i[i + 1] - po->i[i];
5517     /* compute offsets so that we relative location for each row */
5518     roffsets[(i + 1) * 2 + 0] = roffsets[i * 2 + 0] + nrcols[i * 2 + 0];
5519     roffsets[(i + 1) * 2 + 1] = roffsets[i * 2 + 1] + nrcols[i * 2 + 1];
5520   }
5521   PetscCall(PetscCalloc1(2 * nrows, &nlcols));
5522   PetscCall(PetscCalloc1(2 * nrows, &loffsets));
5523   /* 'r' means root, and 'l' means leaf */
5524   PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE));
5525   PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE));
5526   PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE));
5527   PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE));
5528   PetscCall(PetscSFDestroy(&sf));
5529   PetscCall(PetscFree(roffsets));
5530   PetscCall(PetscFree(nrcols));
5531   dntotalcols = 0;
5532   ontotalcols = 0;
5533   ncol        = 0;
5534   for (i = 0; i < nrows; i++) {
5535     pnnz[i] = nlcols[i * 2 + 0] + nlcols[i * 2 + 1];
5536     ncol    = PetscMax(pnnz[i], ncol);
5537     /* diag */
5538     dntotalcols += nlcols[i * 2 + 0];
5539     /* off diag */
5540     ontotalcols += nlcols[i * 2 + 1];
5541   }
5542   /* We do not need to figure the right number of columns
5543    * since all the calculations will be done by going through the raw data
5544    * */
5545   PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, nrows, ncol, 0, pnnz, P_oth));
5546   PetscCall(MatSetUp(*P_oth));
5547   PetscCall(PetscFree(pnnz));
5548   p_oth = (Mat_SeqAIJ *)(*P_oth)->data;
5549   /* diag */
5550   PetscCall(PetscCalloc1(dntotalcols, &iremote));
5551   /* off diag */
5552   PetscCall(PetscCalloc1(ontotalcols, &oiremote));
5553   /* diag */
5554   PetscCall(PetscCalloc1(dntotalcols, &ilocal));
5555   /* off diag */
5556   PetscCall(PetscCalloc1(ontotalcols, &oilocal));
5557   dntotalcols = 0;
5558   ontotalcols = 0;
5559   ntotalcols  = 0;
5560   for (i = 0; i < nrows; i++) {
5561     owner = 0;
5562     PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, NULL));
5563     /* Set iremote for diag matrix */
5564     for (j = 0; j < nlcols[i * 2 + 0]; j++) {
5565       iremote[dntotalcols].index = loffsets[i * 2 + 0] + j;
5566       iremote[dntotalcols].rank  = owner;
5567       /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */
5568       ilocal[dntotalcols++] = ntotalcols++;
5569     }
5570     /* off diag */
5571     for (j = 0; j < nlcols[i * 2 + 1]; j++) {
5572       oiremote[ontotalcols].index = loffsets[i * 2 + 1] + j;
5573       oiremote[ontotalcols].rank  = owner;
5574       oilocal[ontotalcols++]      = ntotalcols++;
5575     }
5576   }
5577   PetscCall(ISRestoreIndices(rows, &lrowindices));
5578   PetscCall(PetscFree(loffsets));
5579   PetscCall(PetscFree(nlcols));
5580   PetscCall(PetscSFCreate(comm, &sf));
5581   /* P serves as roots and P_oth is leaves
5582    * Diag matrix
5583    * */
5584   PetscCall(PetscSFSetGraph(sf, pd->i[plocalsize], dntotalcols, ilocal, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
5585   PetscCall(PetscSFSetFromOptions(sf));
5586   PetscCall(PetscSFSetUp(sf));
5587 
5588   PetscCall(PetscSFCreate(comm, &osf));
5589   /* Off diag */
5590   PetscCall(PetscSFSetGraph(osf, po->i[plocalsize], ontotalcols, oilocal, PETSC_OWN_POINTER, oiremote, PETSC_OWN_POINTER));
5591   PetscCall(PetscSFSetFromOptions(osf));
5592   PetscCall(PetscSFSetUp(osf));
5593   PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a));
5594   PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a));
5595   /* We operate on the matrix internal data for saving memory */
5596   PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5597   PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5598   PetscCall(MatGetOwnershipRangeColumn(P, &pcstart, NULL));
5599   /* Convert to global indices for diag matrix */
5600   for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] += pcstart;
5601   PetscCall(PetscSFBcastBegin(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE));
5602   /* We want P_oth store global indices */
5603   PetscCall(ISLocalToGlobalMappingCreate(comm, 1, p->B->cmap->n, p->garray, PETSC_COPY_VALUES, &mapping));
5604   /* Use memory scalable approach */
5605   PetscCall(ISLocalToGlobalMappingSetType(mapping, ISLOCALTOGLOBALMAPPINGHASH));
5606   PetscCall(ISLocalToGlobalMappingApply(mapping, po->i[plocalsize], po->j, po->j));
5607   PetscCall(PetscSFBcastBegin(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE));
5608   PetscCall(PetscSFBcastEnd(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE));
5609   /* Convert back to local indices */
5610   for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] -= pcstart;
5611   PetscCall(PetscSFBcastEnd(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE));
5612   nout = 0;
5613   PetscCall(ISGlobalToLocalMappingApply(mapping, IS_GTOLM_DROP, po->i[plocalsize], po->j, &nout, po->j));
5614   PetscCheck(nout == po->i[plocalsize], comm, PETSC_ERR_ARG_INCOMP, "n %" PetscInt_FMT " does not equal to nout %" PetscInt_FMT " ", po->i[plocalsize], nout);
5615   PetscCall(ISLocalToGlobalMappingDestroy(&mapping));
5616   /* Exchange values */
5617   PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5618   PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5619   PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a));
5620   PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a));
5621   /* Stop PETSc from shrinking memory */
5622   for (i = 0; i < nrows; i++) p_oth->ilen[i] = p_oth->imax[i];
5623   PetscCall(MatAssemblyBegin(*P_oth, MAT_FINAL_ASSEMBLY));
5624   PetscCall(MatAssemblyEnd(*P_oth, MAT_FINAL_ASSEMBLY));
5625   /* Attach PetscSF objects to P_oth so that we can reuse it later */
5626   PetscCall(PetscObjectCompose((PetscObject)*P_oth, "diagsf", (PetscObject)sf));
5627   PetscCall(PetscObjectCompose((PetscObject)*P_oth, "offdiagsf", (PetscObject)osf));
5628   PetscCall(PetscSFDestroy(&sf));
5629   PetscCall(PetscSFDestroy(&osf));
5630   PetscFunctionReturn(0);
5631 }
5632 
5633 /*
5634  * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5635  * This supports MPIAIJ and MAIJ
5636  * */
5637 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A, Mat P, PetscInt dof, MatReuse reuse, Mat *P_oth)
5638 {
5639   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data, *p = (Mat_MPIAIJ *)P->data;
5640   Mat_SeqAIJ *p_oth;
5641   IS          rows, map;
5642   PetscHMapI  hamp;
5643   PetscInt    i, htsize, *rowindices, off, *mapping, key, count;
5644   MPI_Comm    comm;
5645   PetscSF     sf, osf;
5646   PetscBool   has;
5647 
5648   PetscFunctionBegin;
5649   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
5650   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, P, 0, 0));
5651   /* If it is the first time, create an index set of off-diag nonzero columns of A,
5652    *  and then create a submatrix (that often is an overlapping matrix)
5653    * */
5654   if (reuse == MAT_INITIAL_MATRIX) {
5655     /* Use a hash table to figure out unique keys */
5656     PetscCall(PetscHMapICreateWithSize(a->B->cmap->n, &hamp));
5657     PetscCall(PetscCalloc1(a->B->cmap->n, &mapping));
5658     count = 0;
5659     /* Assume that  a->g is sorted, otherwise the following does not make sense */
5660     for (i = 0; i < a->B->cmap->n; i++) {
5661       key = a->garray[i] / dof;
5662       PetscCall(PetscHMapIHas(hamp, key, &has));
5663       if (!has) {
5664         mapping[i] = count;
5665         PetscCall(PetscHMapISet(hamp, key, count++));
5666       } else {
5667         /* Current 'i' has the same value the previous step */
5668         mapping[i] = count - 1;
5669       }
5670     }
5671     PetscCall(ISCreateGeneral(comm, a->B->cmap->n, mapping, PETSC_OWN_POINTER, &map));
5672     PetscCall(PetscHMapIGetSize(hamp, &htsize));
5673     PetscCheck(htsize == count, comm, PETSC_ERR_ARG_INCOMP, " Size of hash map %" PetscInt_FMT " is inconsistent with count %" PetscInt_FMT, htsize, count);
5674     PetscCall(PetscCalloc1(htsize, &rowindices));
5675     off = 0;
5676     PetscCall(PetscHMapIGetKeys(hamp, &off, rowindices));
5677     PetscCall(PetscHMapIDestroy(&hamp));
5678     PetscCall(PetscSortInt(htsize, rowindices));
5679     PetscCall(ISCreateGeneral(comm, htsize, rowindices, PETSC_OWN_POINTER, &rows));
5680     /* In case, the matrix was already created but users want to recreate the matrix */
5681     PetscCall(MatDestroy(P_oth));
5682     PetscCall(MatCreateSeqSubMatrixWithRows_Private(P, rows, P_oth));
5683     PetscCall(PetscObjectCompose((PetscObject)*P_oth, "aoffdiagtopothmapping", (PetscObject)map));
5684     PetscCall(ISDestroy(&map));
5685     PetscCall(ISDestroy(&rows));
5686   } else if (reuse == MAT_REUSE_MATRIX) {
5687     /* If matrix was already created, we simply update values using SF objects
5688      * that as attached to the matrix ealier.
5689      */
5690     const PetscScalar *pd_a, *po_a;
5691 
5692     PetscCall(PetscObjectQuery((PetscObject)*P_oth, "diagsf", (PetscObject *)&sf));
5693     PetscCall(PetscObjectQuery((PetscObject)*P_oth, "offdiagsf", (PetscObject *)&osf));
5694     PetscCheck(sf && osf, comm, PETSC_ERR_ARG_NULL, "Matrix is not initialized yet");
5695     p_oth = (Mat_SeqAIJ *)(*P_oth)->data;
5696     /* Update values in place */
5697     PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a));
5698     PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a));
5699     PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5700     PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5701     PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5702     PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5703     PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a));
5704     PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a));
5705   } else SETERRQ(comm, PETSC_ERR_ARG_UNKNOWN_TYPE, "Unknown reuse type");
5706   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, P, 0, 0));
5707   PetscFunctionReturn(0);
5708 }
5709 
5710 /*@C
5711   MatGetBrowsOfAcols - Returns `IS` that contain rows of B that equal to nonzero columns of local A
5712 
5713   Collective on A
5714 
5715   Input Parameters:
5716 + A - the first matrix in `MATMPIAIJ` format
5717 . B - the second matrix in `MATMPIAIJ` format
5718 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5719 
5720   Output Parameters:
5721 + rowb - On input index sets of rows of B to extract (or NULL), modified on output
5722 . colb - On input index sets of columns of B to extract (or NULL), modified on output
5723 - B_seq - the sequential matrix generated
5724 
5725   Level: developer
5726 
5727 @*/
5728 PetscErrorCode MatGetBrowsOfAcols(Mat A, Mat B, MatReuse scall, IS *rowb, IS *colb, Mat *B_seq)
5729 {
5730   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
5731   PetscInt   *idx, i, start, ncols, nzA, nzB, *cmap, imark;
5732   IS          isrowb, iscolb;
5733   Mat        *bseq = NULL;
5734 
5735   PetscFunctionBegin;
5736   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5737     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend);
5738   }
5739   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAcols, A, B, 0, 0));
5740 
5741   if (scall == MAT_INITIAL_MATRIX) {
5742     start = A->cmap->rstart;
5743     cmap  = a->garray;
5744     nzA   = a->A->cmap->n;
5745     nzB   = a->B->cmap->n;
5746     PetscCall(PetscMalloc1(nzA + nzB, &idx));
5747     ncols = 0;
5748     for (i = 0; i < nzB; i++) { /* row < local row index */
5749       if (cmap[i] < start) idx[ncols++] = cmap[i];
5750       else break;
5751     }
5752     imark = i;
5753     for (i = 0; i < nzA; i++) idx[ncols++] = start + i;   /* local rows */
5754     for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5755     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &isrowb));
5756     PetscCall(ISCreateStride(PETSC_COMM_SELF, B->cmap->N, 0, 1, &iscolb));
5757   } else {
5758     PetscCheck(rowb && colb, PETSC_COMM_SELF, PETSC_ERR_SUP, "IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5759     isrowb = *rowb;
5760     iscolb = *colb;
5761     PetscCall(PetscMalloc1(1, &bseq));
5762     bseq[0] = *B_seq;
5763   }
5764   PetscCall(MatCreateSubMatrices(B, 1, &isrowb, &iscolb, scall, &bseq));
5765   *B_seq = bseq[0];
5766   PetscCall(PetscFree(bseq));
5767   if (!rowb) {
5768     PetscCall(ISDestroy(&isrowb));
5769   } else {
5770     *rowb = isrowb;
5771   }
5772   if (!colb) {
5773     PetscCall(ISDestroy(&iscolb));
5774   } else {
5775     *colb = iscolb;
5776   }
5777   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAcols, A, B, 0, 0));
5778   PetscFunctionReturn(0);
5779 }
5780 
5781 /*
5782     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5783     of the OFF-DIAGONAL portion of local A
5784 
5785     Collective on Mat
5786 
5787    Input Parameters:
5788 +    A,B - the matrices in mpiaij format
5789 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5790 
5791    Output Parameter:
5792 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5793 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5794 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5795 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5796 
5797     Developer Note:
5798     This directly accesses information inside the VecScatter associated with the matrix-vector product
5799      for this matrix. This is not desirable..
5800 
5801     Level: developer
5802 
5803 */
5804 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A, Mat B, MatReuse scall, PetscInt **startsj_s, PetscInt **startsj_r, MatScalar **bufa_ptr, Mat *B_oth)
5805 {
5806   Mat_MPIAIJ        *a = (Mat_MPIAIJ *)A->data;
5807   Mat_SeqAIJ        *b_oth;
5808   VecScatter         ctx;
5809   MPI_Comm           comm;
5810   const PetscMPIInt *rprocs, *sprocs;
5811   const PetscInt    *srow, *rstarts, *sstarts;
5812   PetscInt          *rowlen, *bufj, *bufJ, ncols = 0, aBn = a->B->cmap->n, row, *b_othi, *b_othj, *rvalues = NULL, *svalues = NULL, *cols, sbs, rbs;
5813   PetscInt           i, j, k = 0, l, ll, nrecvs, nsends, nrows, *rstartsj = NULL, *sstartsj, len;
5814   PetscScalar       *b_otha, *bufa, *bufA, *vals = NULL;
5815   MPI_Request       *reqs = NULL, *rwaits = NULL, *swaits = NULL;
5816   PetscMPIInt        size, tag, rank, nreqs;
5817 
5818   PetscFunctionBegin;
5819   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
5820   PetscCallMPI(MPI_Comm_size(comm, &size));
5821 
5822   if (PetscUnlikely(A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend)) {
5823     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend);
5824   }
5825   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, B, 0, 0));
5826   PetscCallMPI(MPI_Comm_rank(comm, &rank));
5827 
5828   if (size == 1) {
5829     startsj_s = NULL;
5830     bufa_ptr  = NULL;
5831     *B_oth    = NULL;
5832     PetscFunctionReturn(0);
5833   }
5834 
5835   ctx = a->Mvctx;
5836   tag = ((PetscObject)ctx)->tag;
5837 
5838   PetscCall(VecScatterGetRemote_Private(ctx, PETSC_TRUE /*send*/, &nsends, &sstarts, &srow, &sprocs, &sbs));
5839   /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */
5840   PetscCall(VecScatterGetRemoteOrdered_Private(ctx, PETSC_FALSE /*recv*/, &nrecvs, &rstarts, NULL /*indices not needed*/, &rprocs, &rbs));
5841   PetscCall(PetscMPIIntCast(nsends + nrecvs, &nreqs));
5842   PetscCall(PetscMalloc1(nreqs, &reqs));
5843   rwaits = reqs;
5844   swaits = reqs + nrecvs;
5845 
5846   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5847   if (scall == MAT_INITIAL_MATRIX) {
5848     /* i-array */
5849     /*---------*/
5850     /*  post receives */
5851     if (nrecvs) PetscCall(PetscMalloc1(rbs * (rstarts[nrecvs] - rstarts[0]), &rvalues)); /* rstarts can be NULL when nrecvs=0 */
5852     for (i = 0; i < nrecvs; i++) {
5853       rowlen = rvalues + rstarts[i] * rbs;
5854       nrows  = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of indices to be received */
5855       PetscCallMPI(MPI_Irecv(rowlen, nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i));
5856     }
5857 
5858     /* pack the outgoing message */
5859     PetscCall(PetscMalloc2(nsends + 1, &sstartsj, nrecvs + 1, &rstartsj));
5860 
5861     sstartsj[0] = 0;
5862     rstartsj[0] = 0;
5863     len         = 0; /* total length of j or a array to be sent */
5864     if (nsends) {
5865       k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */
5866       PetscCall(PetscMalloc1(sbs * (sstarts[nsends] - sstarts[0]), &svalues));
5867     }
5868     for (i = 0; i < nsends; i++) {
5869       rowlen = svalues + (sstarts[i] - sstarts[0]) * sbs;
5870       nrows  = sstarts[i + 1] - sstarts[i]; /* num of block rows */
5871       for (j = 0; j < nrows; j++) {
5872         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5873         for (l = 0; l < sbs; l++) {
5874           PetscCall(MatGetRow_MPIAIJ(B, row + l, &ncols, NULL, NULL)); /* rowlength */
5875 
5876           rowlen[j * sbs + l] = ncols;
5877 
5878           len += ncols;
5879           PetscCall(MatRestoreRow_MPIAIJ(B, row + l, &ncols, NULL, NULL));
5880         }
5881         k++;
5882       }
5883       PetscCallMPI(MPI_Isend(rowlen, nrows * sbs, MPIU_INT, sprocs[i], tag, comm, swaits + i));
5884 
5885       sstartsj[i + 1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5886     }
5887     /* recvs and sends of i-array are completed */
5888     if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE));
5889     PetscCall(PetscFree(svalues));
5890 
5891     /* allocate buffers for sending j and a arrays */
5892     PetscCall(PetscMalloc1(len + 1, &bufj));
5893     PetscCall(PetscMalloc1(len + 1, &bufa));
5894 
5895     /* create i-array of B_oth */
5896     PetscCall(PetscMalloc1(aBn + 2, &b_othi));
5897 
5898     b_othi[0] = 0;
5899     len       = 0; /* total length of j or a array to be received */
5900     k         = 0;
5901     for (i = 0; i < nrecvs; i++) {
5902       rowlen = rvalues + (rstarts[i] - rstarts[0]) * rbs;
5903       nrows  = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of rows to be received */
5904       for (j = 0; j < nrows; j++) {
5905         b_othi[k + 1] = b_othi[k] + rowlen[j];
5906         PetscCall(PetscIntSumError(rowlen[j], len, &len));
5907         k++;
5908       }
5909       rstartsj[i + 1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5910     }
5911     PetscCall(PetscFree(rvalues));
5912 
5913     /* allocate space for j and a arrays of B_oth */
5914     PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_othj));
5915     PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_otha));
5916 
5917     /* j-array */
5918     /*---------*/
5919     /*  post receives of j-array */
5920     for (i = 0; i < nrecvs; i++) {
5921       nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */
5922       PetscCallMPI(MPI_Irecv(b_othj + rstartsj[i], nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i));
5923     }
5924 
5925     /* pack the outgoing message j-array */
5926     if (nsends) k = sstarts[0];
5927     for (i = 0; i < nsends; i++) {
5928       nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */
5929       bufJ  = bufj + sstartsj[i];
5930       for (j = 0; j < nrows; j++) {
5931         row = srow[k++] + B->rmap->range[rank]; /* global row idx */
5932         for (ll = 0; ll < sbs; ll++) {
5933           PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL));
5934           for (l = 0; l < ncols; l++) *bufJ++ = cols[l];
5935           PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL));
5936         }
5937       }
5938       PetscCallMPI(MPI_Isend(bufj + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_INT, sprocs[i], tag, comm, swaits + i));
5939     }
5940 
5941     /* recvs and sends of j-array are completed */
5942     if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE));
5943   } else if (scall == MAT_REUSE_MATRIX) {
5944     sstartsj = *startsj_s;
5945     rstartsj = *startsj_r;
5946     bufa     = *bufa_ptr;
5947     b_oth    = (Mat_SeqAIJ *)(*B_oth)->data;
5948     PetscCall(MatSeqAIJGetArrayWrite(*B_oth, &b_otha));
5949   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not possess an object container");
5950 
5951   /* a-array */
5952   /*---------*/
5953   /*  post receives of a-array */
5954   for (i = 0; i < nrecvs; i++) {
5955     nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */
5956     PetscCallMPI(MPI_Irecv(b_otha + rstartsj[i], nrows, MPIU_SCALAR, rprocs[i], tag, comm, rwaits + i));
5957   }
5958 
5959   /* pack the outgoing message a-array */
5960   if (nsends) k = sstarts[0];
5961   for (i = 0; i < nsends; i++) {
5962     nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */
5963     bufA  = bufa + sstartsj[i];
5964     for (j = 0; j < nrows; j++) {
5965       row = srow[k++] + B->rmap->range[rank]; /* global row idx */
5966       for (ll = 0; ll < sbs; ll++) {
5967         PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals));
5968         for (l = 0; l < ncols; l++) *bufA++ = vals[l];
5969         PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals));
5970       }
5971     }
5972     PetscCallMPI(MPI_Isend(bufa + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_SCALAR, sprocs[i], tag, comm, swaits + i));
5973   }
5974   /* recvs and sends of a-array are completed */
5975   if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE));
5976   PetscCall(PetscFree(reqs));
5977 
5978   if (scall == MAT_INITIAL_MATRIX) {
5979     /* put together the new matrix */
5980     PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, aBn, B->cmap->N, b_othi, b_othj, b_otha, B_oth));
5981 
5982     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5983     /* Since these are PETSc arrays, change flags to free them as necessary. */
5984     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
5985     b_oth->free_a  = PETSC_TRUE;
5986     b_oth->free_ij = PETSC_TRUE;
5987     b_oth->nonew   = 0;
5988 
5989     PetscCall(PetscFree(bufj));
5990     if (!startsj_s || !bufa_ptr) {
5991       PetscCall(PetscFree2(sstartsj, rstartsj));
5992       PetscCall(PetscFree(bufa_ptr));
5993     } else {
5994       *startsj_s = sstartsj;
5995       *startsj_r = rstartsj;
5996       *bufa_ptr  = bufa;
5997     }
5998   } else if (scall == MAT_REUSE_MATRIX) {
5999     PetscCall(MatSeqAIJRestoreArrayWrite(*B_oth, &b_otha));
6000   }
6001 
6002   PetscCall(VecScatterRestoreRemote_Private(ctx, PETSC_TRUE, &nsends, &sstarts, &srow, &sprocs, &sbs));
6003   PetscCall(VecScatterRestoreRemoteOrdered_Private(ctx, PETSC_FALSE, &nrecvs, &rstarts, NULL, &rprocs, &rbs));
6004   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, B, 0, 0));
6005   PetscFunctionReturn(0);
6006 }
6007 
6008 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat, MatType, MatReuse, Mat *);
6009 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat, MatType, MatReuse, Mat *);
6010 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat, MatType, MatReuse, Mat *);
6011 #if defined(PETSC_HAVE_MKL_SPARSE)
6012 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat, MatType, MatReuse, Mat *);
6013 #endif
6014 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat, MatType, MatReuse, Mat *);
6015 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat, MatType, MatReuse, Mat *);
6016 #if defined(PETSC_HAVE_ELEMENTAL)
6017 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat, MatType, MatReuse, Mat *);
6018 #endif
6019 #if defined(PETSC_HAVE_SCALAPACK)
6020 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat, MatType, MatReuse, Mat *);
6021 #endif
6022 #if defined(PETSC_HAVE_HYPRE)
6023 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *);
6024 #endif
6025 #if defined(PETSC_HAVE_CUDA)
6026 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat, MatType, MatReuse, Mat *);
6027 #endif
6028 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
6029 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat, MatType, MatReuse, Mat *);
6030 #endif
6031 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat, MatType, MatReuse, Mat *);
6032 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *);
6033 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat);
6034 
6035 /*
6036     Computes (B'*A')' since computing B*A directly is untenable
6037 
6038                n                       p                          p
6039         [             ]       [             ]         [                 ]
6040       m [      A      ]  *  n [       B     ]   =   m [         C       ]
6041         [             ]       [             ]         [                 ]
6042 
6043 */
6044 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A, Mat B, Mat C)
6045 {
6046   Mat At, Bt, Ct;
6047 
6048   PetscFunctionBegin;
6049   PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &At));
6050   PetscCall(MatTranspose(B, MAT_INITIAL_MATRIX, &Bt));
6051   PetscCall(MatMatMult(Bt, At, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &Ct));
6052   PetscCall(MatDestroy(&At));
6053   PetscCall(MatDestroy(&Bt));
6054   PetscCall(MatTransposeSetPrecursor(Ct, C));
6055   PetscCall(MatTranspose(Ct, MAT_REUSE_MATRIX, &C));
6056   PetscCall(MatDestroy(&Ct));
6057   PetscFunctionReturn(0);
6058 }
6059 
6060 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A, Mat B, PetscReal fill, Mat C)
6061 {
6062   PetscBool cisdense;
6063 
6064   PetscFunctionBegin;
6065   PetscCheck(A->cmap->n == B->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "A->cmap->n %" PetscInt_FMT " != B->rmap->n %" PetscInt_FMT, A->cmap->n, B->rmap->n);
6066   PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, A->rmap->N, B->cmap->N));
6067   PetscCall(MatSetBlockSizesFromMats(C, A, B));
6068   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, ""));
6069   if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
6070   PetscCall(MatSetUp(C));
6071 
6072   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
6073   PetscFunctionReturn(0);
6074 }
6075 
6076 /* ----------------------------------------------------------------*/
6077 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C)
6078 {
6079   Mat_Product *product = C->product;
6080   Mat          A = product->A, B = product->B;
6081 
6082   PetscFunctionBegin;
6083   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend)
6084     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend);
6085 
6086   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ;
6087   C->ops->productsymbolic = MatProductSymbolic_AB;
6088   PetscFunctionReturn(0);
6089 }
6090 
6091 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C)
6092 {
6093   Mat_Product *product = C->product;
6094 
6095   PetscFunctionBegin;
6096   if (product->type == MATPRODUCT_AB) PetscCall(MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C));
6097   PetscFunctionReturn(0);
6098 }
6099 
6100 /* Merge two sets of sorted nonzeros and return a CSR for the merged (sequential) matrix
6101 
6102   Input Parameters:
6103 
6104     j1,rowBegin1,rowEnd1,perm1,jmap1: describe the first set of nonzeros (Set1)
6105     j2,rowBegin2,rowEnd2,perm2,jmap2: describe the second set of nonzeros (Set2)
6106 
6107     mat: both sets' nonzeros are on m rows, where m is the number of local rows of the matrix mat
6108 
6109     For Set1, j1[] contains column indices of the nonzeros.
6110     For the k-th row (0<=k<m), [rowBegin1[k],rowEnd1[k]) index into j1[] and point to the begin/end nonzero in row k
6111     respectively (note rowEnd1[k] is not necessarily equal to rwoBegin1[k+1]). Indices in this range of j1[] are sorted,
6112     but might have repeats. jmap1[t+1] - jmap1[t] is the number of repeats for the t-th unique nonzero in Set1.
6113 
6114     Similar for Set2.
6115 
6116     This routine merges the two sets of nonzeros row by row and removes repeats.
6117 
6118   Output Parameters: (memory is allocated by the caller)
6119 
6120     i[],j[]: the CSR of the merged matrix, which has m rows.
6121     imap1[]: the k-th unique nonzero in Set1 (k=0,1,...) corresponds to imap1[k]-th unique nonzero in the merged matrix.
6122     imap2[]: similar to imap1[], but for Set2.
6123     Note we order nonzeros row-by-row and from left to right.
6124 */
6125 static PetscErrorCode MatMergeEntries_Internal(Mat mat, const PetscInt j1[], const PetscInt j2[], const PetscCount rowBegin1[], const PetscCount rowEnd1[], const PetscCount rowBegin2[], const PetscCount rowEnd2[], const PetscCount jmap1[], const PetscCount jmap2[], PetscCount imap1[], PetscCount imap2[], PetscInt i[], PetscInt j[])
6126 {
6127   PetscInt   r, m; /* Row index of mat */
6128   PetscCount t, t1, t2, b1, e1, b2, e2;
6129 
6130   PetscFunctionBegin;
6131   PetscCall(MatGetLocalSize(mat, &m, NULL));
6132   t1 = t2 = t = 0; /* Count unique nonzeros of in Set1, Set1 and the merged respectively */
6133   i[0]        = 0;
6134   for (r = 0; r < m; r++) { /* Do row by row merging */
6135     b1 = rowBegin1[r];
6136     e1 = rowEnd1[r];
6137     b2 = rowBegin2[r];
6138     e2 = rowEnd2[r];
6139     while (b1 < e1 && b2 < e2) {
6140       if (j1[b1] == j2[b2]) { /* Same column index and hence same nonzero */
6141         j[t]      = j1[b1];
6142         imap1[t1] = t;
6143         imap2[t2] = t;
6144         b1 += jmap1[t1 + 1] - jmap1[t1]; /* Jump to next unique local nonzero */
6145         b2 += jmap2[t2 + 1] - jmap2[t2]; /* Jump to next unique remote nonzero */
6146         t1++;
6147         t2++;
6148         t++;
6149       } else if (j1[b1] < j2[b2]) {
6150         j[t]      = j1[b1];
6151         imap1[t1] = t;
6152         b1 += jmap1[t1 + 1] - jmap1[t1];
6153         t1++;
6154         t++;
6155       } else {
6156         j[t]      = j2[b2];
6157         imap2[t2] = t;
6158         b2 += jmap2[t2 + 1] - jmap2[t2];
6159         t2++;
6160         t++;
6161       }
6162     }
6163     /* Merge the remaining in either j1[] or j2[] */
6164     while (b1 < e1) {
6165       j[t]      = j1[b1];
6166       imap1[t1] = t;
6167       b1 += jmap1[t1 + 1] - jmap1[t1];
6168       t1++;
6169       t++;
6170     }
6171     while (b2 < e2) {
6172       j[t]      = j2[b2];
6173       imap2[t2] = t;
6174       b2 += jmap2[t2 + 1] - jmap2[t2];
6175       t2++;
6176       t++;
6177     }
6178     i[r + 1] = t;
6179   }
6180   PetscFunctionReturn(0);
6181 }
6182 
6183 /* Split nonzeros in a block of local rows into two subsets: those in the diagonal block and those in the off-diagonal block
6184 
6185   Input Parameters:
6186     mat: an MPI matrix that provides row and column layout information for splitting. Let's say its number of local rows is m.
6187     n,i[],j[],perm[]: there are n input entries, belonging to m rows. Row/col indices of the entries are stored in i[] and j[]
6188       respectively, along with a permutation array perm[]. Length of the i[],j[],perm[] arrays is n.
6189 
6190       i[] is already sorted, but within a row, j[] is not sorted and might have repeats.
6191       i[] might contain negative indices at the beginning, which means the corresponding entries should be ignored in the splitting.
6192 
6193   Output Parameters:
6194     j[],perm[]: the routine needs to sort j[] within each row along with perm[].
6195     rowBegin[],rowMid[],rowEnd[]: of length m, and the memory is preallocated and zeroed by the caller.
6196       They contain indices pointing to j[]. For 0<=r<m, [rowBegin[r],rowMid[r]) point to begin/end entries of row r of the diagonal block,
6197       and [rowMid[r],rowEnd[r]) point to begin/end entries of row r of the off-diagonal block.
6198 
6199     Aperm[],Ajmap[],Atot,Annz: Arrays are allocated by this routine.
6200       Atot: number of entries belonging to the diagonal block.
6201       Annz: number of unique nonzeros belonging to the diagonal block.
6202       Aperm[Atot] stores values from perm[] for entries belonging to the diagonal block. Length of Aperm[] is Atot, though it may also count
6203         repeats (i.e., same 'i,j' pair).
6204       Ajmap[Annz+1] stores the number of repeats of each unique entry belonging to the diagonal block. More precisely, Ajmap[t+1] - Ajmap[t]
6205         is the number of repeats for the t-th unique entry in the diagonal block. Ajmap[0] is always 0.
6206 
6207       Atot: number of entries belonging to the diagonal block
6208       Annz: number of unique nonzeros belonging to the diagonal block.
6209 
6210     Bperm[], Bjmap[], Btot, Bnnz are similar but for the off-diagonal block.
6211 
6212     Aperm[],Bperm[],Ajmap[] and Bjmap[] are allocated separately by this routine with PetscMalloc1().
6213 */
6214 static PetscErrorCode MatSplitEntries_Internal(Mat mat, PetscCount n, const PetscInt i[], PetscInt j[], PetscCount perm[], PetscCount rowBegin[], PetscCount rowMid[], PetscCount rowEnd[], PetscCount *Atot_, PetscCount **Aperm_, PetscCount *Annz_, PetscCount **Ajmap_, PetscCount *Btot_, PetscCount **Bperm_, PetscCount *Bnnz_, PetscCount **Bjmap_)
6215 {
6216   PetscInt    cstart, cend, rstart, rend, row, col;
6217   PetscCount  Atot = 0, Btot = 0; /* Total number of nonzeros in the diagonal and off-diagonal blocks */
6218   PetscCount  Annz = 0, Bnnz = 0; /* Number of unique nonzeros in the diagonal and off-diagonal blocks */
6219   PetscCount  k, m, p, q, r, s, mid;
6220   PetscCount *Aperm, *Bperm, *Ajmap, *Bjmap;
6221 
6222   PetscFunctionBegin;
6223   PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend));
6224   PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend));
6225   m = rend - rstart;
6226 
6227   for (k = 0; k < n; k++) {
6228     if (i[k] >= 0) break;
6229   } /* Skip negative rows */
6230 
6231   /* Process [k,n): sort and partition each local row into diag and offdiag portions,
6232      fill rowBegin[], rowMid[], rowEnd[], and count Atot, Btot, Annz, Bnnz.
6233   */
6234   while (k < n) {
6235     row = i[k];
6236     /* Entries in [k,s) are in one row. Shift diagonal block col indices so that diag is ahead of offdiag after sorting the row */
6237     for (s = k; s < n; s++)
6238       if (i[s] != row) break;
6239     for (p = k; p < s; p++) {
6240       if (j[p] >= cstart && j[p] < cend) j[p] -= PETSC_MAX_INT; /* Shift diag columns to range of [-PETSC_MAX_INT, -1]  */
6241       else PetscAssert((j[p] >= 0) && (j[p] <= mat->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column index %" PetscInt_FMT " is out of range", j[p]);
6242     }
6243     PetscCall(PetscSortIntWithCountArray(s - k, j + k, perm + k));
6244     PetscCall(PetscSortedIntUpperBound(j, k, s, -1, &mid)); /* Separate [k,s) into [k,mid) for diag and [mid,s) for offdiag */
6245     rowBegin[row - rstart] = k;
6246     rowMid[row - rstart]   = mid;
6247     rowEnd[row - rstart]   = s;
6248 
6249     /* Count nonzeros of this diag/offdiag row, which might have repeats */
6250     Atot += mid - k;
6251     Btot += s - mid;
6252 
6253     /* Count unique nonzeros of this diag/offdiag row */
6254     for (p = k; p < mid;) {
6255       col = j[p];
6256       do {
6257         j[p] += PETSC_MAX_INT;
6258         p++;
6259       } while (p < mid && j[p] == col); /* Revert the modified diagonal indices */
6260       Annz++;
6261     }
6262 
6263     for (p = mid; p < s;) {
6264       col = j[p];
6265       do {
6266         p++;
6267       } while (p < s && j[p] == col);
6268       Bnnz++;
6269     }
6270     k = s;
6271   }
6272 
6273   /* Allocation according to Atot, Btot, Annz, Bnnz */
6274   PetscCall(PetscMalloc1(Atot, &Aperm));
6275   PetscCall(PetscMalloc1(Btot, &Bperm));
6276   PetscCall(PetscMalloc1(Annz + 1, &Ajmap));
6277   PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap));
6278 
6279   /* Re-scan indices and copy diag/offdiag permutation indices to Aperm, Bperm and also fill Ajmap and Bjmap */
6280   Ajmap[0] = Bjmap[0] = Atot = Btot = Annz = Bnnz = 0;
6281   for (r = 0; r < m; r++) {
6282     k   = rowBegin[r];
6283     mid = rowMid[r];
6284     s   = rowEnd[r];
6285     PetscCall(PetscArraycpy(Aperm + Atot, perm + k, mid - k));
6286     PetscCall(PetscArraycpy(Bperm + Btot, perm + mid, s - mid));
6287     Atot += mid - k;
6288     Btot += s - mid;
6289 
6290     /* Scan column indices in this row and find out how many repeats each unique nonzero has */
6291     for (p = k; p < mid;) {
6292       col = j[p];
6293       q   = p;
6294       do {
6295         p++;
6296       } while (p < mid && j[p] == col);
6297       Ajmap[Annz + 1] = Ajmap[Annz] + (p - q);
6298       Annz++;
6299     }
6300 
6301     for (p = mid; p < s;) {
6302       col = j[p];
6303       q   = p;
6304       do {
6305         p++;
6306       } while (p < s && j[p] == col);
6307       Bjmap[Bnnz + 1] = Bjmap[Bnnz] + (p - q);
6308       Bnnz++;
6309     }
6310   }
6311   /* Output */
6312   *Aperm_ = Aperm;
6313   *Annz_  = Annz;
6314   *Atot_  = Atot;
6315   *Ajmap_ = Ajmap;
6316   *Bperm_ = Bperm;
6317   *Bnnz_  = Bnnz;
6318   *Btot_  = Btot;
6319   *Bjmap_ = Bjmap;
6320   PetscFunctionReturn(0);
6321 }
6322 
6323 /* Expand the jmap[] array to make a new one in view of nonzeros in the merged matrix
6324 
6325   Input Parameters:
6326     nnz1: number of unique nonzeros in a set that was used to produce imap[], jmap[]
6327     nnz:  number of unique nonzeros in the merged matrix
6328     imap[nnz1]: i-th nonzero in the set is the imap[i]-th nonzero in the merged matrix
6329     jmap[nnz1+1]: i-th nonzeron in the set has jmap[i+1] - jmap[i] repeats in the set
6330 
6331   Output Parameter: (memory is allocated by the caller)
6332     jmap_new[nnz+1]: i-th nonzero in the merged matrix has jmap_new[i+1] - jmap_new[i] repeats in the set
6333 
6334   Example:
6335     nnz1 = 4
6336     nnz  = 6
6337     imap = [1,3,4,5]
6338     jmap = [0,3,5,6,7]
6339    then,
6340     jmap_new = [0,0,3,3,5,6,7]
6341 */
6342 static PetscErrorCode ExpandJmap_Internal(PetscCount nnz1, PetscCount nnz, const PetscCount imap[], const PetscCount jmap[], PetscCount jmap_new[])
6343 {
6344   PetscCount k, p;
6345 
6346   PetscFunctionBegin;
6347   jmap_new[0] = 0;
6348   p           = nnz;                /* p loops over jmap_new[] backwards */
6349   for (k = nnz1 - 1; k >= 0; k--) { /* k loops over imap[] */
6350     for (; p > imap[k]; p--) jmap_new[p] = jmap[k + 1];
6351   }
6352   for (; p >= 0; p--) jmap_new[p] = jmap[0];
6353   PetscFunctionReturn(0);
6354 }
6355 
6356 PetscErrorCode MatSetPreallocationCOO_MPIAIJ(Mat mat, PetscCount coo_n, PetscInt coo_i[], PetscInt coo_j[])
6357 {
6358   MPI_Comm    comm;
6359   PetscMPIInt rank, size;
6360   PetscInt    m, n, M, N, rstart, rend, cstart, cend; /* Sizes, indices of row/col, therefore with type PetscInt */
6361   PetscCount  k, p, q, rem;                           /* Loop variables over coo arrays */
6362   Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)mat->data;
6363 
6364   PetscFunctionBegin;
6365   PetscCall(PetscFree(mpiaij->garray));
6366   PetscCall(VecDestroy(&mpiaij->lvec));
6367 #if defined(PETSC_USE_CTABLE)
6368   PetscCall(PetscHMapIDestroy(&mpiaij->colmap));
6369 #else
6370   PetscCall(PetscFree(mpiaij->colmap));
6371 #endif
6372   PetscCall(VecScatterDestroy(&mpiaij->Mvctx));
6373   mat->assembled     = PETSC_FALSE;
6374   mat->was_assembled = PETSC_FALSE;
6375   PetscCall(MatResetPreallocationCOO_MPIAIJ(mat));
6376 
6377   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
6378   PetscCallMPI(MPI_Comm_size(comm, &size));
6379   PetscCallMPI(MPI_Comm_rank(comm, &rank));
6380   PetscCall(PetscLayoutSetUp(mat->rmap));
6381   PetscCall(PetscLayoutSetUp(mat->cmap));
6382   PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend));
6383   PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend));
6384   PetscCall(MatGetLocalSize(mat, &m, &n));
6385   PetscCall(MatGetSize(mat, &M, &N));
6386 
6387   /* ---------------------------------------------------------------------------*/
6388   /* Sort (i,j) by row along with a permutation array, so that the to-be-ignored */
6389   /* entries come first, then local rows, then remote rows.                     */
6390   /* ---------------------------------------------------------------------------*/
6391   PetscCount n1 = coo_n, *perm1;
6392   PetscInt  *i1 = coo_i, *j1 = coo_j;
6393 
6394   PetscCall(PetscMalloc1(n1, &perm1));
6395   for (k = 0; k < n1; k++) perm1[k] = k;
6396 
6397   /* Manipulate indices so that entries with negative row or col indices will have smallest
6398      row indices, local entries will have greater but negative row indices, and remote entries
6399      will have positive row indices.
6400   */
6401   for (k = 0; k < n1; k++) {
6402     if (i1[k] < 0 || j1[k] < 0) i1[k] = PETSC_MIN_INT;                /* e.g., -2^31, minimal to move them ahead */
6403     else if (i1[k] >= rstart && i1[k] < rend) i1[k] -= PETSC_MAX_INT; /* e.g., minus 2^31-1 to shift local rows to range of [-PETSC_MAX_INT, -1] */
6404     else {
6405       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_USER_INPUT, "MAT_NO_OFF_PROC_ENTRIES is set but insert to remote rows");
6406       if (mpiaij->donotstash) i1[k] = PETSC_MIN_INT; /* Ignore offproc entries as if they had negative indices */
6407     }
6408   }
6409 
6410   /* Sort by row; after that, [0,k) have ignored entires, [k,rem) have local rows and [rem,n1) have remote rows */
6411   PetscCall(PetscSortIntWithIntCountArrayPair(n1, i1, j1, perm1));
6412   for (k = 0; k < n1; k++) {
6413     if (i1[k] > PETSC_MIN_INT) break;
6414   }                                                                               /* Advance k to the first entry we need to take care of */
6415   PetscCall(PetscSortedIntUpperBound(i1, k, n1, rend - 1 - PETSC_MAX_INT, &rem)); /* rem is upper bound of the last local row */
6416   for (; k < rem; k++) i1[k] += PETSC_MAX_INT;                                    /* Revert row indices of local rows*/
6417 
6418   /* ---------------------------------------------------------------------------*/
6419   /*           Split local rows into diag/offdiag portions                      */
6420   /* ---------------------------------------------------------------------------*/
6421   PetscCount *rowBegin1, *rowMid1, *rowEnd1;
6422   PetscCount *Ajmap1, *Aperm1, *Bjmap1, *Bperm1, *Cperm1;
6423   PetscCount  Annz1, Bnnz1, Atot1, Btot1;
6424 
6425   PetscCall(PetscCalloc3(m, &rowBegin1, m, &rowMid1, m, &rowEnd1));
6426   PetscCall(PetscMalloc1(n1 - rem, &Cperm1));
6427   PetscCall(MatSplitEntries_Internal(mat, rem, i1, j1, perm1, rowBegin1, rowMid1, rowEnd1, &Atot1, &Aperm1, &Annz1, &Ajmap1, &Btot1, &Bperm1, &Bnnz1, &Bjmap1));
6428 
6429   /* ---------------------------------------------------------------------------*/
6430   /*           Send remote rows to their owner                                  */
6431   /* ---------------------------------------------------------------------------*/
6432   /* Find which rows should be sent to which remote ranks*/
6433   PetscInt        nsend = 0; /* Number of MPI ranks to send data to */
6434   PetscMPIInt    *sendto;    /* [nsend], storing remote ranks */
6435   PetscInt       *nentries;  /* [nsend], storing number of entries sent to remote ranks; Assume PetscInt is big enough for this count, and error if not */
6436   const PetscInt *ranges;
6437   PetscInt        maxNsend = size >= 128 ? 128 : size; /* Assume max 128 neighbors; realloc when needed */
6438 
6439   PetscCall(PetscLayoutGetRanges(mat->rmap, &ranges));
6440   PetscCall(PetscMalloc2(maxNsend, &sendto, maxNsend, &nentries));
6441   for (k = rem; k < n1;) {
6442     PetscMPIInt owner;
6443     PetscInt    firstRow, lastRow;
6444 
6445     /* Locate a row range */
6446     firstRow = i1[k]; /* first row of this owner */
6447     PetscCall(PetscLayoutFindOwner(mat->rmap, firstRow, &owner));
6448     lastRow = ranges[owner + 1] - 1; /* last row of this owner */
6449 
6450     /* Find the first index 'p' in [k,n) with i[p] belonging to next owner */
6451     PetscCall(PetscSortedIntUpperBound(i1, k, n1, lastRow, &p));
6452 
6453     /* All entries in [k,p) belong to this remote owner */
6454     if (nsend >= maxNsend) { /* Double the remote ranks arrays if not long enough */
6455       PetscMPIInt *sendto2;
6456       PetscInt    *nentries2;
6457       PetscInt     maxNsend2 = (maxNsend <= size / 2) ? maxNsend * 2 : size;
6458 
6459       PetscCall(PetscMalloc2(maxNsend2, &sendto2, maxNsend2, &nentries2));
6460       PetscCall(PetscArraycpy(sendto2, sendto, maxNsend));
6461       PetscCall(PetscArraycpy(nentries2, nentries2, maxNsend + 1));
6462       PetscCall(PetscFree2(sendto, nentries2));
6463       sendto   = sendto2;
6464       nentries = nentries2;
6465       maxNsend = maxNsend2;
6466     }
6467     sendto[nsend]   = owner;
6468     nentries[nsend] = p - k;
6469     PetscCall(PetscCountCast(p - k, &nentries[nsend]));
6470     nsend++;
6471     k = p;
6472   }
6473 
6474   /* Build 1st SF to know offsets on remote to send data */
6475   PetscSF      sf1;
6476   PetscInt     nroots = 1, nroots2 = 0;
6477   PetscInt     nleaves = nsend, nleaves2 = 0;
6478   PetscInt    *offsets;
6479   PetscSFNode *iremote;
6480 
6481   PetscCall(PetscSFCreate(comm, &sf1));
6482   PetscCall(PetscMalloc1(nsend, &iremote));
6483   PetscCall(PetscMalloc1(nsend, &offsets));
6484   for (k = 0; k < nsend; k++) {
6485     iremote[k].rank  = sendto[k];
6486     iremote[k].index = 0;
6487     nleaves2 += nentries[k];
6488     PetscCheck(nleaves2 >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF leaves is too large for PetscInt");
6489   }
6490   PetscCall(PetscSFSetGraph(sf1, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
6491   PetscCall(PetscSFFetchAndOpWithMemTypeBegin(sf1, MPIU_INT, PETSC_MEMTYPE_HOST, &nroots2 /*rootdata*/, PETSC_MEMTYPE_HOST, nentries /*leafdata*/, PETSC_MEMTYPE_HOST, offsets /*leafupdate*/, MPI_SUM));
6492   PetscCall(PetscSFFetchAndOpEnd(sf1, MPIU_INT, &nroots2, nentries, offsets, MPI_SUM)); /* Would nroots2 overflow, we check offsets[] below */
6493   PetscCall(PetscSFDestroy(&sf1));
6494   PetscAssert(nleaves2 == n1 - rem, PETSC_COMM_SELF, PETSC_ERR_PLIB, "nleaves2 %" PetscInt_FMT " != number of remote entries %" PetscCount_FMT "", nleaves2, n1 - rem);
6495 
6496   /* Build 2nd SF to send remote COOs to their owner */
6497   PetscSF sf2;
6498   nroots  = nroots2;
6499   nleaves = nleaves2;
6500   PetscCall(PetscSFCreate(comm, &sf2));
6501   PetscCall(PetscSFSetFromOptions(sf2));
6502   PetscCall(PetscMalloc1(nleaves, &iremote));
6503   p = 0;
6504   for (k = 0; k < nsend; k++) {
6505     PetscCheck(offsets[k] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF roots is too large for PetscInt");
6506     for (q = 0; q < nentries[k]; q++, p++) {
6507       iremote[p].rank  = sendto[k];
6508       iremote[p].index = offsets[k] + q;
6509     }
6510   }
6511   PetscCall(PetscSFSetGraph(sf2, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
6512 
6513   /* sf2 only sends contiguous leafdata to contiguous rootdata. We record the permutation which will be used to fill leafdata */
6514   PetscCall(PetscArraycpy(Cperm1, perm1 + rem, n1 - rem));
6515 
6516   /* Send the remote COOs to their owner */
6517   PetscInt    n2 = nroots, *i2, *j2; /* Buffers for received COOs from other ranks, along with a permutation array */
6518   PetscCount *perm2;                 /* Though PetscInt is enough for remote entries, we use PetscCount here as we want to reuse MatSplitEntries_Internal() */
6519   PetscCall(PetscMalloc3(n2, &i2, n2, &j2, n2, &perm2));
6520   PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, i1 + rem, PETSC_MEMTYPE_HOST, i2, MPI_REPLACE));
6521   PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, i1 + rem, i2, MPI_REPLACE));
6522   PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, j1 + rem, PETSC_MEMTYPE_HOST, j2, MPI_REPLACE));
6523   PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, j1 + rem, j2, MPI_REPLACE));
6524 
6525   PetscCall(PetscFree(offsets));
6526   PetscCall(PetscFree2(sendto, nentries));
6527 
6528   /* ---------------------------------------------------------------*/
6529   /* Sort received COOs by row along with the permutation array     */
6530   /* ---------------------------------------------------------------*/
6531   for (k = 0; k < n2; k++) perm2[k] = k;
6532   PetscCall(PetscSortIntWithIntCountArrayPair(n2, i2, j2, perm2));
6533 
6534   /* ---------------------------------------------------------------*/
6535   /* Split received COOs into diag/offdiag portions                 */
6536   /* ---------------------------------------------------------------*/
6537   PetscCount *rowBegin2, *rowMid2, *rowEnd2;
6538   PetscCount *Ajmap2, *Aperm2, *Bjmap2, *Bperm2;
6539   PetscCount  Annz2, Bnnz2, Atot2, Btot2;
6540 
6541   PetscCall(PetscCalloc3(m, &rowBegin2, m, &rowMid2, m, &rowEnd2));
6542   PetscCall(MatSplitEntries_Internal(mat, n2, i2, j2, perm2, rowBegin2, rowMid2, rowEnd2, &Atot2, &Aperm2, &Annz2, &Ajmap2, &Btot2, &Bperm2, &Bnnz2, &Bjmap2));
6543 
6544   /* --------------------------------------------------------------------------*/
6545   /* Merge local COOs with received COOs: diag with diag, offdiag with offdiag */
6546   /* --------------------------------------------------------------------------*/
6547   PetscInt *Ai, *Bi;
6548   PetscInt *Aj, *Bj;
6549 
6550   PetscCall(PetscMalloc1(m + 1, &Ai));
6551   PetscCall(PetscMalloc1(m + 1, &Bi));
6552   PetscCall(PetscMalloc1(Annz1 + Annz2, &Aj)); /* Since local and remote entries might have dups, we might allocate excess memory */
6553   PetscCall(PetscMalloc1(Bnnz1 + Bnnz2, &Bj));
6554 
6555   PetscCount *Aimap1, *Bimap1, *Aimap2, *Bimap2;
6556   PetscCall(PetscMalloc1(Annz1, &Aimap1));
6557   PetscCall(PetscMalloc1(Bnnz1, &Bimap1));
6558   PetscCall(PetscMalloc1(Annz2, &Aimap2));
6559   PetscCall(PetscMalloc1(Bnnz2, &Bimap2));
6560 
6561   PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowBegin1, rowMid1, rowBegin2, rowMid2, Ajmap1, Ajmap2, Aimap1, Aimap2, Ai, Aj));
6562   PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowMid1, rowEnd1, rowMid2, rowEnd2, Bjmap1, Bjmap2, Bimap1, Bimap2, Bi, Bj));
6563 
6564   /* --------------------------------------------------------------------------*/
6565   /* Expand Ajmap1/Bjmap1 to make them based off nonzeros in A/B, since we     */
6566   /* expect nonzeros in A/B most likely have local contributing entries        */
6567   /* --------------------------------------------------------------------------*/
6568   PetscInt    Annz = Ai[m];
6569   PetscInt    Bnnz = Bi[m];
6570   PetscCount *Ajmap1_new, *Bjmap1_new;
6571 
6572   PetscCall(PetscMalloc1(Annz + 1, &Ajmap1_new));
6573   PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap1_new));
6574 
6575   PetscCall(ExpandJmap_Internal(Annz1, Annz, Aimap1, Ajmap1, Ajmap1_new));
6576   PetscCall(ExpandJmap_Internal(Bnnz1, Bnnz, Bimap1, Bjmap1, Bjmap1_new));
6577 
6578   PetscCall(PetscFree(Aimap1));
6579   PetscCall(PetscFree(Ajmap1));
6580   PetscCall(PetscFree(Bimap1));
6581   PetscCall(PetscFree(Bjmap1));
6582   PetscCall(PetscFree3(rowBegin1, rowMid1, rowEnd1));
6583   PetscCall(PetscFree3(rowBegin2, rowMid2, rowEnd2));
6584   PetscCall(PetscFree(perm1));
6585   PetscCall(PetscFree3(i2, j2, perm2));
6586 
6587   Ajmap1 = Ajmap1_new;
6588   Bjmap1 = Bjmap1_new;
6589 
6590   /* Reallocate Aj, Bj once we know actual numbers of unique nonzeros in A and B */
6591   if (Annz < Annz1 + Annz2) {
6592     PetscInt *Aj_new;
6593     PetscCall(PetscMalloc1(Annz, &Aj_new));
6594     PetscCall(PetscArraycpy(Aj_new, Aj, Annz));
6595     PetscCall(PetscFree(Aj));
6596     Aj = Aj_new;
6597   }
6598 
6599   if (Bnnz < Bnnz1 + Bnnz2) {
6600     PetscInt *Bj_new;
6601     PetscCall(PetscMalloc1(Bnnz, &Bj_new));
6602     PetscCall(PetscArraycpy(Bj_new, Bj, Bnnz));
6603     PetscCall(PetscFree(Bj));
6604     Bj = Bj_new;
6605   }
6606 
6607   /* --------------------------------------------------------------------------------*/
6608   /* Create new submatrices for on-process and off-process coupling                  */
6609   /* --------------------------------------------------------------------------------*/
6610   PetscScalar *Aa, *Ba;
6611   MatType      rtype;
6612   Mat_SeqAIJ  *a, *b;
6613   PetscCall(PetscCalloc1(Annz, &Aa)); /* Zero matrix on device */
6614   PetscCall(PetscCalloc1(Bnnz, &Ba));
6615   /* make Aj[] local, i.e, based off the start column of the diagonal portion */
6616   if (cstart) {
6617     for (k = 0; k < Annz; k++) Aj[k] -= cstart;
6618   }
6619   PetscCall(MatDestroy(&mpiaij->A));
6620   PetscCall(MatDestroy(&mpiaij->B));
6621   PetscCall(MatGetRootType_Private(mat, &rtype));
6622   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, Ai, Aj, Aa, &mpiaij->A));
6623   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, mat->cmap->N, Bi, Bj, Ba, &mpiaij->B));
6624   PetscCall(MatSetUpMultiply_MPIAIJ(mat));
6625 
6626   a               = (Mat_SeqAIJ *)mpiaij->A->data;
6627   b               = (Mat_SeqAIJ *)mpiaij->B->data;
6628   a->singlemalloc = b->singlemalloc = PETSC_FALSE; /* Let newmat own Ai,Aj,Aa,Bi,Bj,Ba */
6629   a->free_a = b->free_a = PETSC_TRUE;
6630   a->free_ij = b->free_ij = PETSC_TRUE;
6631 
6632   /* conversion must happen AFTER multiply setup */
6633   PetscCall(MatConvert(mpiaij->A, rtype, MAT_INPLACE_MATRIX, &mpiaij->A));
6634   PetscCall(MatConvert(mpiaij->B, rtype, MAT_INPLACE_MATRIX, &mpiaij->B));
6635   PetscCall(VecDestroy(&mpiaij->lvec));
6636   PetscCall(MatCreateVecs(mpiaij->B, &mpiaij->lvec, NULL));
6637 
6638   mpiaij->coo_n   = coo_n;
6639   mpiaij->coo_sf  = sf2;
6640   mpiaij->sendlen = nleaves;
6641   mpiaij->recvlen = nroots;
6642 
6643   mpiaij->Annz = Annz;
6644   mpiaij->Bnnz = Bnnz;
6645 
6646   mpiaij->Annz2 = Annz2;
6647   mpiaij->Bnnz2 = Bnnz2;
6648 
6649   mpiaij->Atot1 = Atot1;
6650   mpiaij->Atot2 = Atot2;
6651   mpiaij->Btot1 = Btot1;
6652   mpiaij->Btot2 = Btot2;
6653 
6654   mpiaij->Ajmap1 = Ajmap1;
6655   mpiaij->Aperm1 = Aperm1;
6656 
6657   mpiaij->Bjmap1 = Bjmap1;
6658   mpiaij->Bperm1 = Bperm1;
6659 
6660   mpiaij->Aimap2 = Aimap2;
6661   mpiaij->Ajmap2 = Ajmap2;
6662   mpiaij->Aperm2 = Aperm2;
6663 
6664   mpiaij->Bimap2 = Bimap2;
6665   mpiaij->Bjmap2 = Bjmap2;
6666   mpiaij->Bperm2 = Bperm2;
6667 
6668   mpiaij->Cperm1 = Cperm1;
6669 
6670   /* Allocate in preallocation. If not used, it has zero cost on host */
6671   PetscCall(PetscMalloc2(mpiaij->sendlen, &mpiaij->sendbuf, mpiaij->recvlen, &mpiaij->recvbuf));
6672   PetscFunctionReturn(0);
6673 }
6674 
6675 static PetscErrorCode MatSetValuesCOO_MPIAIJ(Mat mat, const PetscScalar v[], InsertMode imode)
6676 {
6677   Mat_MPIAIJ       *mpiaij = (Mat_MPIAIJ *)mat->data;
6678   Mat               A = mpiaij->A, B = mpiaij->B;
6679   PetscCount        Annz = mpiaij->Annz, Annz2 = mpiaij->Annz2, Bnnz = mpiaij->Bnnz, Bnnz2 = mpiaij->Bnnz2;
6680   PetscScalar      *Aa, *Ba;
6681   PetscScalar      *sendbuf = mpiaij->sendbuf;
6682   PetscScalar      *recvbuf = mpiaij->recvbuf;
6683   const PetscCount *Ajmap1 = mpiaij->Ajmap1, *Ajmap2 = mpiaij->Ajmap2, *Aimap2 = mpiaij->Aimap2;
6684   const PetscCount *Bjmap1 = mpiaij->Bjmap1, *Bjmap2 = mpiaij->Bjmap2, *Bimap2 = mpiaij->Bimap2;
6685   const PetscCount *Aperm1 = mpiaij->Aperm1, *Aperm2 = mpiaij->Aperm2, *Bperm1 = mpiaij->Bperm1, *Bperm2 = mpiaij->Bperm2;
6686   const PetscCount *Cperm1 = mpiaij->Cperm1;
6687 
6688   PetscFunctionBegin;
6689   PetscCall(MatSeqAIJGetArray(A, &Aa)); /* Might read and write matrix values */
6690   PetscCall(MatSeqAIJGetArray(B, &Ba));
6691 
6692   /* Pack entries to be sent to remote */
6693   for (PetscCount i = 0; i < mpiaij->sendlen; i++) sendbuf[i] = v[Cperm1[i]];
6694 
6695   /* Send remote entries to their owner and overlap the communication with local computation */
6696   PetscCall(PetscSFReduceWithMemTypeBegin(mpiaij->coo_sf, MPIU_SCALAR, PETSC_MEMTYPE_HOST, sendbuf, PETSC_MEMTYPE_HOST, recvbuf, MPI_REPLACE));
6697   /* Add local entries to A and B */
6698   for (PetscCount i = 0; i < Annz; i++) { /* All nonzeros in A are either zero'ed or added with a value (i.e., initialized) */
6699     PetscScalar sum = 0.0;                /* Do partial summation first to improve numerical stablility */
6700     for (PetscCount k = Ajmap1[i]; k < Ajmap1[i + 1]; k++) sum += v[Aperm1[k]];
6701     Aa[i] = (imode == INSERT_VALUES ? 0.0 : Aa[i]) + sum;
6702   }
6703   for (PetscCount i = 0; i < Bnnz; i++) {
6704     PetscScalar sum = 0.0;
6705     for (PetscCount k = Bjmap1[i]; k < Bjmap1[i + 1]; k++) sum += v[Bperm1[k]];
6706     Ba[i] = (imode == INSERT_VALUES ? 0.0 : Ba[i]) + sum;
6707   }
6708   PetscCall(PetscSFReduceEnd(mpiaij->coo_sf, MPIU_SCALAR, sendbuf, recvbuf, MPI_REPLACE));
6709 
6710   /* Add received remote entries to A and B */
6711   for (PetscCount i = 0; i < Annz2; i++) {
6712     for (PetscCount k = Ajmap2[i]; k < Ajmap2[i + 1]; k++) Aa[Aimap2[i]] += recvbuf[Aperm2[k]];
6713   }
6714   for (PetscCount i = 0; i < Bnnz2; i++) {
6715     for (PetscCount k = Bjmap2[i]; k < Bjmap2[i + 1]; k++) Ba[Bimap2[i]] += recvbuf[Bperm2[k]];
6716   }
6717   PetscCall(MatSeqAIJRestoreArray(A, &Aa));
6718   PetscCall(MatSeqAIJRestoreArray(B, &Ba));
6719   PetscFunctionReturn(0);
6720 }
6721 
6722 /* ----------------------------------------------------------------*/
6723 
6724 /*MC
6725    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
6726 
6727    Options Database Keys:
6728 . -mat_type mpiaij - sets the matrix type to `MATMPIAIJ` during a call to `MatSetFromOptions()`
6729 
6730    Level: beginner
6731 
6732    Notes:
6733     `MatSetValues()` may be called for this matrix type with a NULL argument for the numerical values,
6734     in this case the values associated with the rows and columns one passes in are set to zero
6735     in the matrix
6736 
6737     `MatSetOptions`(,`MAT_STRUCTURE_ONLY`,`PETSC_TRUE`) may be called for this matrix type. In this no
6738     space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored
6739 
6740 .seealso: `MATSEQAIJ`, `MATAIJ`, `MatCreateAIJ()`
6741 M*/
6742 
6743 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
6744 {
6745   Mat_MPIAIJ *b;
6746   PetscMPIInt size;
6747 
6748   PetscFunctionBegin;
6749   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
6750 
6751   PetscCall(PetscNew(&b));
6752   B->data = (void *)b;
6753   PetscCall(PetscMemcpy(B->ops, &MatOps_Values, sizeof(struct _MatOps)));
6754   B->assembled  = PETSC_FALSE;
6755   B->insertmode = NOT_SET_VALUES;
6756   b->size       = size;
6757 
6758   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank));
6759 
6760   /* build cache for off array entries formed */
6761   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash));
6762 
6763   b->donotstash  = PETSC_FALSE;
6764   b->colmap      = NULL;
6765   b->garray      = NULL;
6766   b->roworiented = PETSC_TRUE;
6767 
6768   /* stuff used for matrix vector multiply */
6769   b->lvec  = NULL;
6770   b->Mvctx = NULL;
6771 
6772   /* stuff for MatGetRow() */
6773   b->rowindices   = NULL;
6774   b->rowvalues    = NULL;
6775   b->getrowactive = PETSC_FALSE;
6776 
6777   /* flexible pointer used in CUSPARSE classes */
6778   b->spptr = NULL;
6779 
6780   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetUseScalableIncreaseOverlap_C", MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ));
6781   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPIAIJ));
6782   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPIAIJ));
6783   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatIsTranspose_C", MatIsTranspose_MPIAIJ));
6784   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocation_C", MatMPIAIJSetPreallocation_MPIAIJ));
6785   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatResetPreallocation_C", MatResetPreallocation_MPIAIJ));
6786   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocationCSR_C", MatMPIAIJSetPreallocationCSR_MPIAIJ));
6787   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPIAIJ));
6788   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijperm_C", MatConvert_MPIAIJ_MPIAIJPERM));
6789   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijsell_C", MatConvert_MPIAIJ_MPIAIJSELL));
6790 #if defined(PETSC_HAVE_CUDA)
6791   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcusparse_C", MatConvert_MPIAIJ_MPIAIJCUSPARSE));
6792 #endif
6793 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
6794   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijkokkos_C", MatConvert_MPIAIJ_MPIAIJKokkos));
6795 #endif
6796 #if defined(PETSC_HAVE_MKL_SPARSE)
6797   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijmkl_C", MatConvert_MPIAIJ_MPIAIJMKL));
6798 #endif
6799   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcrl_C", MatConvert_MPIAIJ_MPIAIJCRL));
6800   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpibaij_C", MatConvert_MPIAIJ_MPIBAIJ));
6801   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisbaij_C", MatConvert_MPIAIJ_MPISBAIJ));
6802   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense));
6803 #if defined(PETSC_HAVE_ELEMENTAL)
6804   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_elemental_C", MatConvert_MPIAIJ_Elemental));
6805 #endif
6806 #if defined(PETSC_HAVE_SCALAPACK)
6807   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_scalapack_C", MatConvert_AIJ_ScaLAPACK));
6808 #endif
6809   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_is_C", MatConvert_XAIJ_IS));
6810   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisell_C", MatConvert_MPIAIJ_MPISELL));
6811 #if defined(PETSC_HAVE_HYPRE)
6812   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_hypre_C", MatConvert_AIJ_HYPRE));
6813   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", MatProductSetFromOptions_Transpose_AIJ_AIJ));
6814 #endif
6815   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_is_mpiaij_C", MatProductSetFromOptions_IS_XAIJ));
6816   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpiaij_C", MatProductSetFromOptions_MPIAIJ));
6817   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetPreallocationCOO_C", MatSetPreallocationCOO_MPIAIJ));
6818   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetValuesCOO_C", MatSetValuesCOO_MPIAIJ));
6819   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIAIJ));
6820   PetscFunctionReturn(0);
6821 }
6822 
6823 /*@C
6824      MatCreateMPIAIJWithSplitArrays - creates a `MATMPIAIJ` matrix using arrays that contain the "diagonal"
6825          and "off-diagonal" part of the matrix in CSR format.
6826 
6827    Collective
6828 
6829    Input Parameters:
6830 +  comm - MPI communicator
6831 .  m - number of local rows (Cannot be `PETSC_DECIDE`)
6832 .  n - This value should be the same as the local size used in creating the
6833        x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have
6834        calculated if N is given) For square matrices n is almost always m.
6835 .  M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
6836 .  N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
6837 .   i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
6838 .   j - column indices, which must be local, i.e., based off the start column of the diagonal portion
6839 .   a - matrix values
6840 .   oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix
6841 .   oj - column indices, which must be global, representing global columns in the MPIAIJ matrix
6842 -   oa - matrix values
6843 
6844    Output Parameter:
6845 .   mat - the matrix
6846 
6847    Level: advanced
6848 
6849    Notes:
6850        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
6851        must free the arrays once the matrix has been destroyed and not before.
6852 
6853        The i and j indices are 0 based
6854 
6855        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
6856 
6857        This sets local rows and cannot be used to set off-processor values.
6858 
6859        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
6860        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
6861        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
6862        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
6863        keep track of the underlying array. Use `MatSetOption`(A,`MAT_NO_OFF_PROC_ENTRIES`,`PETSC_TRUE`) to disable all
6864        communication if it is known that only local entries will be set.
6865 
6866 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
6867           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithArrays()`
6868 @*/
6869 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt i[], PetscInt j[], PetscScalar a[], PetscInt oi[], PetscInt oj[], PetscScalar oa[], Mat *mat)
6870 {
6871   Mat_MPIAIJ *maij;
6872 
6873   PetscFunctionBegin;
6874   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
6875   PetscCheck(i[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
6876   PetscCheck(oi[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "oi (row indices) must start with 0");
6877   PetscCall(MatCreate(comm, mat));
6878   PetscCall(MatSetSizes(*mat, m, n, M, N));
6879   PetscCall(MatSetType(*mat, MATMPIAIJ));
6880   maij = (Mat_MPIAIJ *)(*mat)->data;
6881 
6882   (*mat)->preallocated = PETSC_TRUE;
6883 
6884   PetscCall(PetscLayoutSetUp((*mat)->rmap));
6885   PetscCall(PetscLayoutSetUp((*mat)->cmap));
6886 
6887   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, i, j, a, &maij->A));
6888   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, (*mat)->cmap->N, oi, oj, oa, &maij->B));
6889 
6890   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
6891   PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY));
6892   PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY));
6893   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE));
6894   PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
6895   PetscFunctionReturn(0);
6896 }
6897 
6898 typedef struct {
6899   Mat       *mp;    /* intermediate products */
6900   PetscBool *mptmp; /* is the intermediate product temporary ? */
6901   PetscInt   cp;    /* number of intermediate products */
6902 
6903   /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */
6904   PetscInt    *startsj_s, *startsj_r;
6905   PetscScalar *bufa;
6906   Mat          P_oth;
6907 
6908   /* may take advantage of merging product->B */
6909   Mat Bloc; /* B-local by merging diag and off-diag */
6910 
6911   /* cusparse does not have support to split between symbolic and numeric phases.
6912      When api_user is true, we don't need to update the numerical values
6913      of the temporary storage */
6914   PetscBool reusesym;
6915 
6916   /* support for COO values insertion */
6917   PetscScalar *coo_v, *coo_w; /* store on-process and off-process COO scalars, and used as MPI recv/send buffers respectively */
6918   PetscInt   **own;           /* own[i] points to address of on-process COO indices for Mat mp[i] */
6919   PetscInt   **off;           /* off[i] points to address of off-process COO indices for Mat mp[i] */
6920   PetscBool    hasoffproc;    /* if true, have off-process values insertion (i.e. AtB or PtAP) */
6921   PetscSF      sf;            /* used for non-local values insertion and memory malloc */
6922   PetscMemType mtype;
6923 
6924   /* customization */
6925   PetscBool abmerge;
6926   PetscBool P_oth_bind;
6927 } MatMatMPIAIJBACKEND;
6928 
6929 PetscErrorCode MatDestroy_MatMatMPIAIJBACKEND(void *data)
6930 {
6931   MatMatMPIAIJBACKEND *mmdata = (MatMatMPIAIJBACKEND *)data;
6932   PetscInt             i;
6933 
6934   PetscFunctionBegin;
6935   PetscCall(PetscFree2(mmdata->startsj_s, mmdata->startsj_r));
6936   PetscCall(PetscFree(mmdata->bufa));
6937   PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_v));
6938   PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_w));
6939   PetscCall(MatDestroy(&mmdata->P_oth));
6940   PetscCall(MatDestroy(&mmdata->Bloc));
6941   PetscCall(PetscSFDestroy(&mmdata->sf));
6942   for (i = 0; i < mmdata->cp; i++) PetscCall(MatDestroy(&mmdata->mp[i]));
6943   PetscCall(PetscFree2(mmdata->mp, mmdata->mptmp));
6944   PetscCall(PetscFree(mmdata->own[0]));
6945   PetscCall(PetscFree(mmdata->own));
6946   PetscCall(PetscFree(mmdata->off[0]));
6947   PetscCall(PetscFree(mmdata->off));
6948   PetscCall(PetscFree(mmdata));
6949   PetscFunctionReturn(0);
6950 }
6951 
6952 /* Copy selected n entries with indices in idx[] of A to v[].
6953    If idx is NULL, copy the whole data array of A to v[]
6954  */
6955 static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[])
6956 {
6957   PetscErrorCode (*f)(Mat, PetscInt, const PetscInt[], PetscScalar[]);
6958 
6959   PetscFunctionBegin;
6960   PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqAIJCopySubArray_C", &f));
6961   if (f) {
6962     PetscCall((*f)(A, n, idx, v));
6963   } else {
6964     const PetscScalar *vv;
6965 
6966     PetscCall(MatSeqAIJGetArrayRead(A, &vv));
6967     if (n && idx) {
6968       PetscScalar    *w  = v;
6969       const PetscInt *oi = idx;
6970       PetscInt        j;
6971 
6972       for (j = 0; j < n; j++) *w++ = vv[*oi++];
6973     } else {
6974       PetscCall(PetscArraycpy(v, vv, n));
6975     }
6976     PetscCall(MatSeqAIJRestoreArrayRead(A, &vv));
6977   }
6978   PetscFunctionReturn(0);
6979 }
6980 
6981 static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C)
6982 {
6983   MatMatMPIAIJBACKEND *mmdata;
6984   PetscInt             i, n_d, n_o;
6985 
6986   PetscFunctionBegin;
6987   MatCheckProduct(C, 1);
6988   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
6989   mmdata = (MatMatMPIAIJBACKEND *)C->product->data;
6990   if (!mmdata->reusesym) { /* update temporary matrices */
6991     if (mmdata->P_oth) PetscCall(MatGetBrowsOfAoCols_MPIAIJ(C->product->A, C->product->B, MAT_REUSE_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth));
6992     if (mmdata->Bloc) PetscCall(MatMPIAIJGetLocalMatMerge(C->product->B, MAT_REUSE_MATRIX, NULL, &mmdata->Bloc));
6993   }
6994   mmdata->reusesym = PETSC_FALSE;
6995 
6996   for (i = 0; i < mmdata->cp; i++) {
6997     PetscCheck(mmdata->mp[i]->ops->productnumeric, PetscObjectComm((PetscObject)mmdata->mp[i]), PETSC_ERR_PLIB, "Missing numeric op for %s", MatProductTypes[mmdata->mp[i]->product->type]);
6998     PetscCall((*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i]));
6999   }
7000   for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) {
7001     PetscInt noff = mmdata->off[i + 1] - mmdata->off[i];
7002 
7003     if (mmdata->mptmp[i]) continue;
7004     if (noff) {
7005       PetscInt nown = mmdata->own[i + 1] - mmdata->own[i];
7006 
7007       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], noff, mmdata->off[i], mmdata->coo_w + n_o));
7008       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], nown, mmdata->own[i], mmdata->coo_v + n_d));
7009       n_o += noff;
7010       n_d += nown;
7011     } else {
7012       Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mmdata->mp[i]->data;
7013 
7014       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], mm->nz, NULL, mmdata->coo_v + n_d));
7015       n_d += mm->nz;
7016     }
7017   }
7018   if (mmdata->hasoffproc) { /* offprocess insertion */
7019     PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d));
7020     PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d));
7021   }
7022   PetscCall(MatSetValuesCOO(C, mmdata->coo_v, INSERT_VALUES));
7023   PetscFunctionReturn(0);
7024 }
7025 
7026 /* Support for Pt * A, A * P, or Pt * A * P */
7027 #define MAX_NUMBER_INTERMEDIATE 4
7028 PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C)
7029 {
7030   Mat_Product           *product = C->product;
7031   Mat                    A, P, mp[MAX_NUMBER_INTERMEDIATE]; /* A, P and a series of intermediate matrices */
7032   Mat_MPIAIJ            *a, *p;
7033   MatMatMPIAIJBACKEND   *mmdata;
7034   ISLocalToGlobalMapping P_oth_l2g = NULL;
7035   IS                     glob      = NULL;
7036   const char            *prefix;
7037   char                   pprefix[256];
7038   const PetscInt        *globidx, *P_oth_idx;
7039   PetscInt               i, j, cp, m, n, M, N, *coo_i, *coo_j;
7040   PetscCount             ncoo, ncoo_d, ncoo_o, ncoo_oown;
7041   PetscInt               cmapt[MAX_NUMBER_INTERMEDIATE], rmapt[MAX_NUMBER_INTERMEDIATE]; /* col/row map type for each Mat in mp[]. */
7042                                                                                          /* type-0: consecutive, start from 0; type-1: consecutive with */
7043                                                                                          /* a base offset; type-2: sparse with a local to global map table */
7044   const PetscInt *cmapa[MAX_NUMBER_INTERMEDIATE], *rmapa[MAX_NUMBER_INTERMEDIATE];       /* col/row local to global map array (table) for type-2 map type */
7045 
7046   MatProductType ptype;
7047   PetscBool      mptmp[MAX_NUMBER_INTERMEDIATE], hasoffproc = PETSC_FALSE, iscuda, iskokk;
7048   PetscMPIInt    size;
7049 
7050   PetscFunctionBegin;
7051   MatCheckProduct(C, 1);
7052   PetscCheck(!product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
7053   ptype = product->type;
7054   if (product->A->symmetric == PETSC_BOOL3_TRUE && ptype == MATPRODUCT_AtB) {
7055     ptype                                          = MATPRODUCT_AB;
7056     product->symbolic_used_the_fact_A_is_symmetric = PETSC_TRUE;
7057   }
7058   switch (ptype) {
7059   case MATPRODUCT_AB:
7060     A          = product->A;
7061     P          = product->B;
7062     m          = A->rmap->n;
7063     n          = P->cmap->n;
7064     M          = A->rmap->N;
7065     N          = P->cmap->N;
7066     hasoffproc = PETSC_FALSE; /* will not scatter mat product values to other processes */
7067     break;
7068   case MATPRODUCT_AtB:
7069     P          = product->A;
7070     A          = product->B;
7071     m          = P->cmap->n;
7072     n          = A->cmap->n;
7073     M          = P->cmap->N;
7074     N          = A->cmap->N;
7075     hasoffproc = PETSC_TRUE;
7076     break;
7077   case MATPRODUCT_PtAP:
7078     A          = product->A;
7079     P          = product->B;
7080     m          = P->cmap->n;
7081     n          = P->cmap->n;
7082     M          = P->cmap->N;
7083     N          = P->cmap->N;
7084     hasoffproc = PETSC_TRUE;
7085     break;
7086   default:
7087     SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]);
7088   }
7089   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)C), &size));
7090   if (size == 1) hasoffproc = PETSC_FALSE;
7091 
7092   /* defaults */
7093   for (i = 0; i < MAX_NUMBER_INTERMEDIATE; i++) {
7094     mp[i]    = NULL;
7095     mptmp[i] = PETSC_FALSE;
7096     rmapt[i] = -1;
7097     cmapt[i] = -1;
7098     rmapa[i] = NULL;
7099     cmapa[i] = NULL;
7100   }
7101 
7102   /* customization */
7103   PetscCall(PetscNew(&mmdata));
7104   mmdata->reusesym = product->api_user;
7105   if (ptype == MATPRODUCT_AB) {
7106     if (product->api_user) {
7107       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatMult", "Mat");
7108       PetscCall(PetscOptionsBool("-matmatmult_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL));
7109       PetscCall(PetscOptionsBool("-matmatmult_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7110       PetscOptionsEnd();
7111     } else {
7112       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_AB", "Mat");
7113       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL));
7114       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7115       PetscOptionsEnd();
7116     }
7117   } else if (ptype == MATPRODUCT_PtAP) {
7118     if (product->api_user) {
7119       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatPtAP", "Mat");
7120       PetscCall(PetscOptionsBool("-matptap_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7121       PetscOptionsEnd();
7122     } else {
7123       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_PtAP", "Mat");
7124       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7125       PetscOptionsEnd();
7126     }
7127   }
7128   a = (Mat_MPIAIJ *)A->data;
7129   p = (Mat_MPIAIJ *)P->data;
7130   PetscCall(MatSetSizes(C, m, n, M, N));
7131   PetscCall(PetscLayoutSetUp(C->rmap));
7132   PetscCall(PetscLayoutSetUp(C->cmap));
7133   PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
7134   PetscCall(MatGetOptionsPrefix(C, &prefix));
7135 
7136   cp = 0;
7137   switch (ptype) {
7138   case MATPRODUCT_AB: /* A * P */
7139     PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth));
7140 
7141     /* A_diag * P_local (merged or not) */
7142     if (mmdata->abmerge) { /* P's diagonal and off-diag blocks are merged to one matrix, then multiplied by A_diag */
7143       /* P is product->B */
7144       PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc));
7145       PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp]));
7146       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7147       PetscCall(MatProductSetFill(mp[cp], product->fill));
7148       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7149       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7150       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7151       mp[cp]->product->api_user = product->api_user;
7152       PetscCall(MatProductSetFromOptions(mp[cp]));
7153       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7154       PetscCall(ISGetIndices(glob, &globidx));
7155       rmapt[cp] = 1;
7156       cmapt[cp] = 2;
7157       cmapa[cp] = globidx;
7158       mptmp[cp] = PETSC_FALSE;
7159       cp++;
7160     } else { /* A_diag * P_diag and A_diag * P_off */
7161       PetscCall(MatProductCreate(a->A, p->A, NULL, &mp[cp]));
7162       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7163       PetscCall(MatProductSetFill(mp[cp], product->fill));
7164       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7165       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7166       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7167       mp[cp]->product->api_user = product->api_user;
7168       PetscCall(MatProductSetFromOptions(mp[cp]));
7169       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7170       rmapt[cp] = 1;
7171       cmapt[cp] = 1;
7172       mptmp[cp] = PETSC_FALSE;
7173       cp++;
7174       PetscCall(MatProductCreate(a->A, p->B, NULL, &mp[cp]));
7175       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7176       PetscCall(MatProductSetFill(mp[cp], product->fill));
7177       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7178       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7179       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7180       mp[cp]->product->api_user = product->api_user;
7181       PetscCall(MatProductSetFromOptions(mp[cp]));
7182       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7183       rmapt[cp] = 1;
7184       cmapt[cp] = 2;
7185       cmapa[cp] = p->garray;
7186       mptmp[cp] = PETSC_FALSE;
7187       cp++;
7188     }
7189 
7190     /* A_off * P_other */
7191     if (mmdata->P_oth) {
7192       PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g)); /* make P_oth use local col ids */
7193       PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx));
7194       PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)(a->B))->type_name));
7195       PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind));
7196       PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp]));
7197       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7198       PetscCall(MatProductSetFill(mp[cp], product->fill));
7199       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7200       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7201       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7202       mp[cp]->product->api_user = product->api_user;
7203       PetscCall(MatProductSetFromOptions(mp[cp]));
7204       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7205       rmapt[cp] = 1;
7206       cmapt[cp] = 2;
7207       cmapa[cp] = P_oth_idx;
7208       mptmp[cp] = PETSC_FALSE;
7209       cp++;
7210     }
7211     break;
7212 
7213   case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */
7214     /* A is product->B */
7215     PetscCall(MatMPIAIJGetLocalMatMerge(A, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc));
7216     if (A == P) { /* when A==P, we can take advantage of the already merged mmdata->Bloc */
7217       PetscCall(MatProductCreate(mmdata->Bloc, mmdata->Bloc, NULL, &mp[cp]));
7218       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7219       PetscCall(MatProductSetFill(mp[cp], product->fill));
7220       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7221       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7222       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7223       mp[cp]->product->api_user = product->api_user;
7224       PetscCall(MatProductSetFromOptions(mp[cp]));
7225       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7226       PetscCall(ISGetIndices(glob, &globidx));
7227       rmapt[cp] = 2;
7228       rmapa[cp] = globidx;
7229       cmapt[cp] = 2;
7230       cmapa[cp] = globidx;
7231       mptmp[cp] = PETSC_FALSE;
7232       cp++;
7233     } else {
7234       PetscCall(MatProductCreate(p->A, mmdata->Bloc, NULL, &mp[cp]));
7235       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7236       PetscCall(MatProductSetFill(mp[cp], product->fill));
7237       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7238       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7239       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7240       mp[cp]->product->api_user = product->api_user;
7241       PetscCall(MatProductSetFromOptions(mp[cp]));
7242       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7243       PetscCall(ISGetIndices(glob, &globidx));
7244       rmapt[cp] = 1;
7245       cmapt[cp] = 2;
7246       cmapa[cp] = globidx;
7247       mptmp[cp] = PETSC_FALSE;
7248       cp++;
7249       PetscCall(MatProductCreate(p->B, mmdata->Bloc, NULL, &mp[cp]));
7250       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7251       PetscCall(MatProductSetFill(mp[cp], product->fill));
7252       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7253       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7254       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7255       mp[cp]->product->api_user = product->api_user;
7256       PetscCall(MatProductSetFromOptions(mp[cp]));
7257       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7258       rmapt[cp] = 2;
7259       rmapa[cp] = p->garray;
7260       cmapt[cp] = 2;
7261       cmapa[cp] = globidx;
7262       mptmp[cp] = PETSC_FALSE;
7263       cp++;
7264     }
7265     break;
7266   case MATPRODUCT_PtAP:
7267     PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth));
7268     /* P is product->B */
7269     PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc));
7270     PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp]));
7271     PetscCall(MatProductSetType(mp[cp], MATPRODUCT_PtAP));
7272     PetscCall(MatProductSetFill(mp[cp], product->fill));
7273     PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7274     PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7275     PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7276     mp[cp]->product->api_user = product->api_user;
7277     PetscCall(MatProductSetFromOptions(mp[cp]));
7278     PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7279     PetscCall(ISGetIndices(glob, &globidx));
7280     rmapt[cp] = 2;
7281     rmapa[cp] = globidx;
7282     cmapt[cp] = 2;
7283     cmapa[cp] = globidx;
7284     mptmp[cp] = PETSC_FALSE;
7285     cp++;
7286     if (mmdata->P_oth) {
7287       PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g));
7288       PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx));
7289       PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)(a->B))->type_name));
7290       PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind));
7291       PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp]));
7292       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7293       PetscCall(MatProductSetFill(mp[cp], product->fill));
7294       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7295       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7296       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7297       mp[cp]->product->api_user = product->api_user;
7298       PetscCall(MatProductSetFromOptions(mp[cp]));
7299       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7300       mptmp[cp] = PETSC_TRUE;
7301       cp++;
7302       PetscCall(MatProductCreate(mmdata->Bloc, mp[1], NULL, &mp[cp]));
7303       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7304       PetscCall(MatProductSetFill(mp[cp], product->fill));
7305       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7306       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7307       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7308       mp[cp]->product->api_user = product->api_user;
7309       PetscCall(MatProductSetFromOptions(mp[cp]));
7310       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7311       rmapt[cp] = 2;
7312       rmapa[cp] = globidx;
7313       cmapt[cp] = 2;
7314       cmapa[cp] = P_oth_idx;
7315       mptmp[cp] = PETSC_FALSE;
7316       cp++;
7317     }
7318     break;
7319   default:
7320     SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]);
7321   }
7322   /* sanity check */
7323   if (size > 1)
7324     for (i = 0; i < cp; i++) PetscCheck(rmapt[i] != 2 || hasoffproc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected offproc map type for product %" PetscInt_FMT, i);
7325 
7326   PetscCall(PetscMalloc2(cp, &mmdata->mp, cp, &mmdata->mptmp));
7327   for (i = 0; i < cp; i++) {
7328     mmdata->mp[i]    = mp[i];
7329     mmdata->mptmp[i] = mptmp[i];
7330   }
7331   mmdata->cp             = cp;
7332   C->product->data       = mmdata;
7333   C->product->destroy    = MatDestroy_MatMatMPIAIJBACKEND;
7334   C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND;
7335 
7336   /* memory type */
7337   mmdata->mtype = PETSC_MEMTYPE_HOST;
7338   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iscuda, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, ""));
7339   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iskokk, MATSEQAIJKOKKOS, MATMPIAIJKOKKOS, ""));
7340   if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA;
7341   else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_KOKKOS;
7342 
7343   /* prepare coo coordinates for values insertion */
7344 
7345   /* count total nonzeros of those intermediate seqaij Mats
7346     ncoo_d:    # of nonzeros of matrices that do not have offproc entries
7347     ncoo_o:    # of nonzeros (of matrices that might have offproc entries) that will be inserted to remote procs
7348     ncoo_oown: # of nonzeros (of matrices that might have offproc entries) that will be inserted locally
7349   */
7350   for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) {
7351     Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data;
7352     if (mptmp[cp]) continue;
7353     if (rmapt[cp] == 2 && hasoffproc) { /* the rows need to be scatter to all processes (might include self) */
7354       const PetscInt *rmap = rmapa[cp];
7355       const PetscInt  mr   = mp[cp]->rmap->n;
7356       const PetscInt  rs   = C->rmap->rstart;
7357       const PetscInt  re   = C->rmap->rend;
7358       const PetscInt *ii   = mm->i;
7359       for (i = 0; i < mr; i++) {
7360         const PetscInt gr = rmap[i];
7361         const PetscInt nz = ii[i + 1] - ii[i];
7362         if (gr < rs || gr >= re) ncoo_o += nz; /* this row is offproc */
7363         else ncoo_oown += nz;                  /* this row is local */
7364       }
7365     } else ncoo_d += mm->nz;
7366   }
7367 
7368   /*
7369     ncoo: total number of nonzeros (including those inserted by remote procs) belonging to this proc
7370 
7371     ncoo = ncoo_d + ncoo_oown + ncoo2, which ncoo2 is number of nonzeros inserted to me by other procs.
7372 
7373     off[0] points to a big index array, which is shared by off[1,2,...]. Similarly, for own[0].
7374 
7375     off[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert to others
7376     own[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert locally
7377     so, off[p+1]-off[p] is the number of nonzeros that mp[p] will send to others.
7378 
7379     coo_i/j/v[]: [ncoo] row/col/val of nonzeros belonging to this proc.
7380     Ex. coo_i[]: the beginning part (of size ncoo_d + ncoo_oown) stores i of local nonzeros, and the remaing part stores i of nonzeros I will receive.
7381   */
7382   PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->off)); /* +1 to make a csr-like data structure */
7383   PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->own));
7384 
7385   /* gather (i,j) of nonzeros inserted by remote procs */
7386   if (hasoffproc) {
7387     PetscSF  msf;
7388     PetscInt ncoo2, *coo_i2, *coo_j2;
7389 
7390     PetscCall(PetscMalloc1(ncoo_o, &mmdata->off[0]));
7391     PetscCall(PetscMalloc1(ncoo_oown, &mmdata->own[0]));
7392     PetscCall(PetscMalloc2(ncoo_o, &coo_i, ncoo_o, &coo_j)); /* to collect (i,j) of entries to be sent to others */
7393 
7394     for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) {
7395       Mat_SeqAIJ *mm     = (Mat_SeqAIJ *)mp[cp]->data;
7396       PetscInt   *idxoff = mmdata->off[cp];
7397       PetscInt   *idxown = mmdata->own[cp];
7398       if (!mptmp[cp] && rmapt[cp] == 2) { /* row map is sparse */
7399         const PetscInt *rmap = rmapa[cp];
7400         const PetscInt *cmap = cmapa[cp];
7401         const PetscInt *ii   = mm->i;
7402         PetscInt       *coi  = coo_i + ncoo_o;
7403         PetscInt       *coj  = coo_j + ncoo_o;
7404         const PetscInt  mr   = mp[cp]->rmap->n;
7405         const PetscInt  rs   = C->rmap->rstart;
7406         const PetscInt  re   = C->rmap->rend;
7407         const PetscInt  cs   = C->cmap->rstart;
7408         for (i = 0; i < mr; i++) {
7409           const PetscInt *jj = mm->j + ii[i];
7410           const PetscInt  gr = rmap[i];
7411           const PetscInt  nz = ii[i + 1] - ii[i];
7412           if (gr < rs || gr >= re) { /* this is an offproc row */
7413             for (j = ii[i]; j < ii[i + 1]; j++) {
7414               *coi++    = gr;
7415               *idxoff++ = j;
7416             }
7417             if (!cmapt[cp]) { /* already global */
7418               for (j = 0; j < nz; j++) *coj++ = jj[j];
7419             } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */
7420               for (j = 0; j < nz; j++) *coj++ = jj[j] + cs;
7421             } else { /* offdiag */
7422               for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]];
7423             }
7424             ncoo_o += nz;
7425           } else { /* this is a local row */
7426             for (j = ii[i]; j < ii[i + 1]; j++) *idxown++ = j;
7427           }
7428         }
7429       }
7430       mmdata->off[cp + 1] = idxoff;
7431       mmdata->own[cp + 1] = idxown;
7432     }
7433 
7434     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf));
7435     PetscCall(PetscSFSetGraphLayout(mmdata->sf, C->rmap, ncoo_o /*nleaves*/, NULL /*ilocal*/, PETSC_OWN_POINTER, coo_i));
7436     PetscCall(PetscSFGetMultiSF(mmdata->sf, &msf));
7437     PetscCall(PetscSFGetGraph(msf, &ncoo2 /*nroots*/, NULL, NULL, NULL));
7438     ncoo = ncoo_d + ncoo_oown + ncoo2;
7439     PetscCall(PetscMalloc2(ncoo, &coo_i2, ncoo, &coo_j2));
7440     PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown)); /* put (i,j) of remote nonzeros at back */
7441     PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown));
7442     PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown));
7443     PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown));
7444     PetscCall(PetscFree2(coo_i, coo_j));
7445     /* allocate MPI send buffer to collect nonzero values to be sent to remote procs */
7446     PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo_o * sizeof(PetscScalar), (void **)&mmdata->coo_w));
7447     coo_i = coo_i2;
7448     coo_j = coo_j2;
7449   } else { /* no offproc values insertion */
7450     ncoo = ncoo_d;
7451     PetscCall(PetscMalloc2(ncoo, &coo_i, ncoo, &coo_j));
7452 
7453     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf));
7454     PetscCall(PetscSFSetGraph(mmdata->sf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER));
7455     PetscCall(PetscSFSetUp(mmdata->sf));
7456   }
7457   mmdata->hasoffproc = hasoffproc;
7458 
7459   /* gather (i,j) of nonzeros inserted locally */
7460   for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) {
7461     Mat_SeqAIJ     *mm   = (Mat_SeqAIJ *)mp[cp]->data;
7462     PetscInt       *coi  = coo_i + ncoo_d;
7463     PetscInt       *coj  = coo_j + ncoo_d;
7464     const PetscInt *jj   = mm->j;
7465     const PetscInt *ii   = mm->i;
7466     const PetscInt *cmap = cmapa[cp];
7467     const PetscInt *rmap = rmapa[cp];
7468     const PetscInt  mr   = mp[cp]->rmap->n;
7469     const PetscInt  rs   = C->rmap->rstart;
7470     const PetscInt  re   = C->rmap->rend;
7471     const PetscInt  cs   = C->cmap->rstart;
7472 
7473     if (mptmp[cp]) continue;
7474     if (rmapt[cp] == 1) { /* consecutive rows */
7475       /* fill coo_i */
7476       for (i = 0; i < mr; i++) {
7477         const PetscInt gr = i + rs;
7478         for (j = ii[i]; j < ii[i + 1]; j++) coi[j] = gr;
7479       }
7480       /* fill coo_j */
7481       if (!cmapt[cp]) { /* type-0, already global */
7482         PetscCall(PetscArraycpy(coj, jj, mm->nz));
7483       } else if (cmapt[cp] == 1) {                        /* type-1, local to global for consecutive columns of C */
7484         for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs; /* lid + col start */
7485       } else {                                            /* type-2, local to global for sparse columns */
7486         for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]];
7487       }
7488       ncoo_d += mm->nz;
7489     } else if (rmapt[cp] == 2) { /* sparse rows */
7490       for (i = 0; i < mr; i++) {
7491         const PetscInt *jj = mm->j + ii[i];
7492         const PetscInt  gr = rmap[i];
7493         const PetscInt  nz = ii[i + 1] - ii[i];
7494         if (gr >= rs && gr < re) { /* local rows */
7495           for (j = ii[i]; j < ii[i + 1]; j++) *coi++ = gr;
7496           if (!cmapt[cp]) { /* type-0, already global */
7497             for (j = 0; j < nz; j++) *coj++ = jj[j];
7498           } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */
7499             for (j = 0; j < nz; j++) *coj++ = jj[j] + cs;
7500           } else { /* type-2, local to global for sparse columns */
7501             for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]];
7502           }
7503           ncoo_d += nz;
7504         }
7505       }
7506     }
7507   }
7508   if (glob) PetscCall(ISRestoreIndices(glob, &globidx));
7509   PetscCall(ISDestroy(&glob));
7510   if (P_oth_l2g) PetscCall(ISLocalToGlobalMappingRestoreIndices(P_oth_l2g, &P_oth_idx));
7511   PetscCall(ISLocalToGlobalMappingDestroy(&P_oth_l2g));
7512   /* allocate an array to store all nonzeros (inserted locally or remotely) belonging to this proc */
7513   PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo * sizeof(PetscScalar), (void **)&mmdata->coo_v));
7514 
7515   /* preallocate with COO data */
7516   PetscCall(MatSetPreallocationCOO(C, ncoo, coo_i, coo_j));
7517   PetscCall(PetscFree2(coo_i, coo_j));
7518   PetscFunctionReturn(0);
7519 }
7520 
7521 PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat)
7522 {
7523   Mat_Product *product = mat->product;
7524 #if defined(PETSC_HAVE_DEVICE)
7525   PetscBool match  = PETSC_FALSE;
7526   PetscBool usecpu = PETSC_FALSE;
7527 #else
7528   PetscBool match = PETSC_TRUE;
7529 #endif
7530 
7531   PetscFunctionBegin;
7532   MatCheckProduct(mat, 1);
7533 #if defined(PETSC_HAVE_DEVICE)
7534   if (!product->A->boundtocpu && !product->B->boundtocpu) PetscCall(PetscObjectTypeCompare((PetscObject)product->B, ((PetscObject)product->A)->type_name, &match));
7535   if (match) { /* we can always fallback to the CPU if requested */
7536     switch (product->type) {
7537     case MATPRODUCT_AB:
7538       if (product->api_user) {
7539         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatMatMult", "Mat");
7540         PetscCall(PetscOptionsBool("-matmatmult_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL));
7541         PetscOptionsEnd();
7542       } else {
7543         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AB", "Mat");
7544         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL));
7545         PetscOptionsEnd();
7546       }
7547       break;
7548     case MATPRODUCT_AtB:
7549       if (product->api_user) {
7550         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatTransposeMatMult", "Mat");
7551         PetscCall(PetscOptionsBool("-mattransposematmult_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL));
7552         PetscOptionsEnd();
7553       } else {
7554         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AtB", "Mat");
7555         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL));
7556         PetscOptionsEnd();
7557       }
7558       break;
7559     case MATPRODUCT_PtAP:
7560       if (product->api_user) {
7561         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatPtAP", "Mat");
7562         PetscCall(PetscOptionsBool("-matptap_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL));
7563         PetscOptionsEnd();
7564       } else {
7565         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_PtAP", "Mat");
7566         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL));
7567         PetscOptionsEnd();
7568       }
7569       break;
7570     default:
7571       break;
7572     }
7573     match = (PetscBool)!usecpu;
7574   }
7575 #endif
7576   if (match) {
7577     switch (product->type) {
7578     case MATPRODUCT_AB:
7579     case MATPRODUCT_AtB:
7580     case MATPRODUCT_PtAP:
7581       mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND;
7582       break;
7583     default:
7584       break;
7585     }
7586   }
7587   /* fallback to MPIAIJ ops */
7588   if (!mat->ops->productsymbolic) PetscCall(MatProductSetFromOptions_MPIAIJ(mat));
7589   PetscFunctionReturn(0);
7590 }
7591 
7592 /*
7593    Produces a set of block column indices of the matrix row, one for each block represented in the original row
7594 
7595    n - the number of block indices in cc[]
7596    cc - the block indices (must be large enough to contain the indices)
7597 */
7598 static inline PetscErrorCode MatCollapseRow(Mat Amat, PetscInt row, PetscInt bs, PetscInt *n, PetscInt *cc)
7599 {
7600   PetscInt        cnt = -1, nidx, j;
7601   const PetscInt *idx;
7602 
7603   PetscFunctionBegin;
7604   PetscCall(MatGetRow(Amat, row, &nidx, &idx, NULL));
7605   if (nidx) {
7606     cnt     = 0;
7607     cc[cnt] = idx[0] / bs;
7608     for (j = 1; j < nidx; j++) {
7609       if (cc[cnt] < idx[j] / bs) cc[++cnt] = idx[j] / bs;
7610     }
7611   }
7612   PetscCall(MatRestoreRow(Amat, row, &nidx, &idx, NULL));
7613   *n = cnt + 1;
7614   PetscFunctionReturn(0);
7615 }
7616 
7617 /*
7618     Produces a set of block column indices of the matrix block row, one for each block represented in the original set of rows
7619 
7620     ncollapsed - the number of block indices
7621     collapsed - the block indices (must be large enough to contain the indices)
7622 */
7623 static inline PetscErrorCode MatCollapseRows(Mat Amat, PetscInt start, PetscInt bs, PetscInt *w0, PetscInt *w1, PetscInt *w2, PetscInt *ncollapsed, PetscInt **collapsed)
7624 {
7625   PetscInt i, nprev, *cprev = w0, ncur = 0, *ccur = w1, *merged = w2, *cprevtmp;
7626 
7627   PetscFunctionBegin;
7628   PetscCall(MatCollapseRow(Amat, start, bs, &nprev, cprev));
7629   for (i = start + 1; i < start + bs; i++) {
7630     PetscCall(MatCollapseRow(Amat, i, bs, &ncur, ccur));
7631     PetscCall(PetscMergeIntArray(nprev, cprev, ncur, ccur, &nprev, &merged));
7632     cprevtmp = cprev;
7633     cprev    = merged;
7634     merged   = cprevtmp;
7635   }
7636   *ncollapsed = nprev;
7637   if (collapsed) *collapsed = cprev;
7638   PetscFunctionReturn(0);
7639 }
7640 
7641 /*
7642    This will eventually be folded into MatCreateGraph_AIJ() for optimal performance
7643 */
7644 static PetscErrorCode MatFilter_AIJ(Mat Gmat, PetscReal vfilter, Mat *filteredG)
7645 {
7646   PetscInt           Istart, Iend, ncols, nnz0, nnz1, NN, MM, nloc;
7647   Mat                tGmat;
7648   MPI_Comm           comm;
7649   const PetscScalar *vals;
7650   const PetscInt    *idx;
7651   PetscInt          *d_nnz, *o_nnz, kk, *garray = NULL, *AJ, maxcols = 0;
7652   MatScalar         *AA; // this is checked in graph
7653   PetscBool          isseqaij;
7654   Mat                a, b, c;
7655   MatType            jtype;
7656 
7657   PetscFunctionBegin;
7658   PetscCall(PetscObjectGetComm((PetscObject)Gmat, &comm));
7659   PetscCall(PetscObjectBaseTypeCompare((PetscObject)Gmat, MATSEQAIJ, &isseqaij));
7660   PetscCall(MatGetType(Gmat, &jtype));
7661   PetscCall(MatCreate(comm, &tGmat));
7662   PetscCall(MatSetType(tGmat, jtype));
7663 
7664   /* TODO GPU: this can be called when filter = 0 -> Probably provide MatAIJThresholdCompress that compresses the entries below a threshold?
7665                Also, if the matrix is symmetric, can we skip this
7666                operation? It can be very expensive on large matrices. */
7667 
7668   // global sizes
7669   PetscCall(MatGetSize(Gmat, &MM, &NN));
7670   PetscCall(MatGetOwnershipRange(Gmat, &Istart, &Iend));
7671   nloc = Iend - Istart;
7672   PetscCall(PetscMalloc2(nloc, &d_nnz, nloc, &o_nnz));
7673   if (isseqaij) {
7674     a = Gmat;
7675     b = NULL;
7676   } else {
7677     Mat_MPIAIJ *d = (Mat_MPIAIJ *)Gmat->data;
7678     a             = d->A;
7679     b             = d->B;
7680     garray        = d->garray;
7681   }
7682   /* Determine upper bound on non-zeros needed in new filtered matrix */
7683   for (PetscInt row = 0; row < nloc; row++) {
7684     PetscCall(MatGetRow(a, row, &ncols, NULL, NULL));
7685     d_nnz[row] = ncols;
7686     if (ncols > maxcols) maxcols = ncols;
7687     PetscCall(MatRestoreRow(a, row, &ncols, NULL, NULL));
7688   }
7689   if (b) {
7690     for (PetscInt row = 0; row < nloc; row++) {
7691       PetscCall(MatGetRow(b, row, &ncols, NULL, NULL));
7692       o_nnz[row] = ncols;
7693       if (ncols > maxcols) maxcols = ncols;
7694       PetscCall(MatRestoreRow(b, row, &ncols, NULL, NULL));
7695     }
7696   }
7697   PetscCall(MatSetSizes(tGmat, nloc, nloc, MM, MM));
7698   PetscCall(MatSetBlockSizes(tGmat, 1, 1));
7699   PetscCall(MatSeqAIJSetPreallocation(tGmat, 0, d_nnz));
7700   PetscCall(MatMPIAIJSetPreallocation(tGmat, 0, d_nnz, 0, o_nnz));
7701   PetscCall(MatSetOption(tGmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
7702   PetscCall(PetscFree2(d_nnz, o_nnz));
7703   //
7704   PetscCall(PetscMalloc2(maxcols, &AA, maxcols, &AJ));
7705   nnz0 = nnz1 = 0;
7706   for (c = a, kk = 0; c && kk < 2; c = b, kk++) {
7707     for (PetscInt row = 0, grow = Istart, ncol_row, jj; row < nloc; row++, grow++) {
7708       PetscCall(MatGetRow(c, row, &ncols, &idx, &vals));
7709       for (ncol_row = jj = 0; jj < ncols; jj++, nnz0++) {
7710         PetscScalar sv = PetscAbs(PetscRealPart(vals[jj]));
7711         if (PetscRealPart(sv) > vfilter) {
7712           nnz1++;
7713           PetscInt cid = idx[jj] + Istart; //diag
7714           if (c != a) cid = garray[idx[jj]];
7715           AA[ncol_row] = vals[jj];
7716           AJ[ncol_row] = cid;
7717           ncol_row++;
7718         }
7719       }
7720       PetscCall(MatRestoreRow(c, row, &ncols, &idx, &vals));
7721       PetscCall(MatSetValues(tGmat, 1, &grow, ncol_row, AJ, AA, INSERT_VALUES));
7722     }
7723   }
7724   PetscCall(PetscFree2(AA, AJ));
7725   PetscCall(MatAssemblyBegin(tGmat, MAT_FINAL_ASSEMBLY));
7726   PetscCall(MatAssemblyEnd(tGmat, MAT_FINAL_ASSEMBLY));
7727   PetscCall(MatPropagateSymmetryOptions(Gmat, tGmat)); /* Normal Mat options are not relevant ? */
7728 
7729   PetscCall(PetscInfo(tGmat, "\t %g%% nnz after filtering, with threshold %g, %g nnz ave. (N=%" PetscInt_FMT ", max row size %d)\n", (!nnz0) ? 1. : 100. * (double)nnz1 / (double)nnz0, (double)vfilter, (!nloc) ? 1. : (double)nnz0 / (double)nloc, MM, (int)maxcols));
7730 
7731   *filteredG = tGmat;
7732   PetscCall(MatViewFromOptions(tGmat, NULL, "-mat_filter_graph_view"));
7733   PetscFunctionReturn(0);
7734 }
7735 
7736 /*
7737  MatCreateGraph_Simple_AIJ - create simple scalar matrix (graph) from potentially blocked matrix
7738 
7739  Input Parameter:
7740  . Amat - matrix
7741  - symmetrize - make the result symmetric
7742  + scale - scale with diagonal
7743 
7744  Output Parameter:
7745  . a_Gmat - output scalar graph >= 0
7746 
7747  */
7748 PETSC_INTERN PetscErrorCode MatCreateGraph_Simple_AIJ(Mat Amat, PetscBool symmetrize, PetscBool scale, PetscReal filter, Mat *a_Gmat)
7749 {
7750   PetscInt  Istart, Iend, Ii, jj, kk, ncols, nloc, NN, MM, bs;
7751   MPI_Comm  comm;
7752   Mat       Gmat;
7753   PetscBool ismpiaij, isseqaij;
7754   Mat       a, b, c;
7755   MatType   jtype;
7756 
7757   PetscFunctionBegin;
7758   PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm));
7759   PetscCall(MatGetOwnershipRange(Amat, &Istart, &Iend));
7760   PetscCall(MatGetSize(Amat, &MM, &NN));
7761   PetscCall(MatGetBlockSize(Amat, &bs));
7762   nloc = (Iend - Istart) / bs;
7763 
7764   PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATSEQAIJ, &isseqaij));
7765   PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATMPIAIJ, &ismpiaij));
7766   PetscCheck(isseqaij || ismpiaij, comm, PETSC_ERR_USER, "Require (MPI)AIJ matrix type");
7767 
7768   /* TODO GPU: these calls are potentially expensive if matrices are large and we want to use the GPU */
7769   /* A solution consists in providing a new API, MatAIJGetCollapsedAIJ, and each class can provide a fast
7770      implementation */
7771   if (bs > 1) {
7772     PetscCall(MatGetType(Amat, &jtype));
7773     PetscCall(MatCreate(comm, &Gmat));
7774     PetscCall(MatSetType(Gmat, jtype));
7775     PetscCall(MatSetSizes(Gmat, nloc, nloc, PETSC_DETERMINE, PETSC_DETERMINE));
7776     PetscCall(MatSetBlockSizes(Gmat, 1, 1));
7777     if (isseqaij || ((Mat_MPIAIJ *)Amat->data)->garray) {
7778       PetscInt  *d_nnz, *o_nnz;
7779       MatScalar *aa, val, AA[4096];
7780       PetscInt  *aj, *ai, AJ[4096], nc;
7781       if (isseqaij) {
7782         a = Amat;
7783         b = NULL;
7784       } else {
7785         Mat_MPIAIJ *d = (Mat_MPIAIJ *)Amat->data;
7786         a             = d->A;
7787         b             = d->B;
7788       }
7789       PetscCall(PetscInfo(Amat, "New bs>1 Graph. nloc=%" PetscInt_FMT "\n", nloc));
7790       PetscCall(PetscMalloc2(nloc, &d_nnz, isseqaij ? 0 : nloc, &o_nnz));
7791       for (c = a, kk = 0; c && kk < 2; c = b, kk++) {
7792         PetscInt       *nnz = (c == a) ? d_nnz : o_nnz, nmax = 0;
7793         const PetscInt *cols;
7794         for (PetscInt brow = 0, jj, ok = 1, j0; brow < nloc * bs; brow += bs) { // block rows
7795           PetscCall(MatGetRow(c, brow, &jj, &cols, NULL));
7796           nnz[brow / bs] = jj / bs;
7797           if (jj % bs) ok = 0;
7798           if (cols) j0 = cols[0];
7799           else j0 = -1;
7800           PetscCall(MatRestoreRow(c, brow, &jj, &cols, NULL));
7801           if (nnz[brow / bs] > nmax) nmax = nnz[brow / bs];
7802           for (PetscInt ii = 1; ii < bs && nnz[brow / bs]; ii++) { // check for non-dense blocks
7803             PetscCall(MatGetRow(c, brow + ii, &jj, &cols, NULL));
7804             if (jj % bs) ok = 0;
7805             if ((cols && j0 != cols[0]) || (!cols && j0 != -1)) ok = 0;
7806             if (nnz[brow / bs] != jj / bs) ok = 0;
7807             PetscCall(MatRestoreRow(c, brow + ii, &jj, &cols, NULL));
7808           }
7809           if (!ok) {
7810             PetscCall(PetscFree2(d_nnz, o_nnz));
7811             goto old_bs;
7812           }
7813         }
7814         PetscCheck(nmax < 4096, PETSC_COMM_SELF, PETSC_ERR_USER, "Buffer %" PetscInt_FMT " too small 4096.", nmax);
7815       }
7816       PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz));
7817       PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz));
7818       PetscCall(PetscFree2(d_nnz, o_nnz));
7819       // diag
7820       for (PetscInt brow = 0, n, grow; brow < nloc * bs; brow += bs) { // block rows
7821         Mat_SeqAIJ *aseq = (Mat_SeqAIJ *)a->data;
7822         ai               = aseq->i;
7823         n                = ai[brow + 1] - ai[brow];
7824         aj               = aseq->j + ai[brow];
7825         for (int k = 0; k < n; k += bs) {        // block columns
7826           AJ[k / bs] = aj[k] / bs + Istart / bs; // diag starts at (Istart,Istart)
7827           val        = 0;
7828           for (int ii = 0; ii < bs; ii++) { // rows in block
7829             aa = aseq->a + ai[brow + ii] + k;
7830             for (int jj = 0; jj < bs; jj++) {         // columns in block
7831               val += PetscAbs(PetscRealPart(aa[jj])); // a sort of norm
7832             }
7833           }
7834           AA[k / bs] = val;
7835         }
7836         grow = Istart / bs + brow / bs;
7837         PetscCall(MatSetValues(Gmat, 1, &grow, n / bs, AJ, AA, INSERT_VALUES));
7838       }
7839       // off-diag
7840       if (ismpiaij) {
7841         Mat_MPIAIJ        *aij = (Mat_MPIAIJ *)Amat->data;
7842         const PetscScalar *vals;
7843         const PetscInt    *cols, *garray = aij->garray;
7844         PetscCheck(garray, PETSC_COMM_SELF, PETSC_ERR_USER, "No garray ?");
7845         for (PetscInt brow = 0, grow; brow < nloc * bs; brow += bs) { // block rows
7846           PetscCall(MatGetRow(b, brow, &ncols, &cols, NULL));
7847           for (int k = 0, cidx = 0; k < ncols; k += bs, cidx++) {
7848             AA[k / bs] = 0;
7849             AJ[cidx]   = garray[cols[k]] / bs;
7850           }
7851           nc = ncols / bs;
7852           PetscCall(MatRestoreRow(b, brow, &ncols, &cols, NULL));
7853           for (int ii = 0; ii < bs; ii++) { // rows in block
7854             PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals));
7855             for (int k = 0; k < ncols; k += bs) {
7856               for (int jj = 0; jj < bs; jj++) { // cols in block
7857                 AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj]));
7858               }
7859             }
7860             PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals));
7861           }
7862           grow = Istart / bs + brow / bs;
7863           PetscCall(MatSetValues(Gmat, 1, &grow, nc, AJ, AA, INSERT_VALUES));
7864         }
7865       }
7866       PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY));
7867       PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY));
7868     } else {
7869       const PetscScalar *vals;
7870       const PetscInt    *idx;
7871       PetscInt          *d_nnz, *o_nnz, *w0, *w1, *w2;
7872     old_bs:
7873       /*
7874        Determine the preallocation needed for the scalar matrix derived from the vector matrix.
7875        */
7876       PetscCall(PetscInfo(Amat, "OLD bs>1 CreateGraph\n"));
7877       PetscCall(PetscMalloc2(nloc, &d_nnz, isseqaij ? 0 : nloc, &o_nnz));
7878       if (isseqaij) {
7879         PetscInt max_d_nnz;
7880         /*
7881          Determine exact preallocation count for (sequential) scalar matrix
7882          */
7883         PetscCall(MatSeqAIJGetMaxRowNonzeros(Amat, &max_d_nnz));
7884         max_d_nnz = PetscMin(nloc, bs * max_d_nnz);
7885         PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2));
7886         for (Ii = 0, jj = 0; Ii < Iend; Ii += bs, jj++) PetscCall(MatCollapseRows(Amat, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL));
7887         PetscCall(PetscFree3(w0, w1, w2));
7888       } else if (ismpiaij) {
7889         Mat             Daij, Oaij;
7890         const PetscInt *garray;
7891         PetscInt        max_d_nnz;
7892         PetscCall(MatMPIAIJGetSeqAIJ(Amat, &Daij, &Oaij, &garray));
7893         /*
7894          Determine exact preallocation count for diagonal block portion of scalar matrix
7895          */
7896         PetscCall(MatSeqAIJGetMaxRowNonzeros(Daij, &max_d_nnz));
7897         max_d_nnz = PetscMin(nloc, bs * max_d_nnz);
7898         PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2));
7899         for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) PetscCall(MatCollapseRows(Daij, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL));
7900         PetscCall(PetscFree3(w0, w1, w2));
7901         /*
7902          Over estimate (usually grossly over), preallocation count for off-diagonal portion of scalar matrix
7903          */
7904         for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) {
7905           o_nnz[jj] = 0;
7906           for (kk = 0; kk < bs; kk++) { /* rows that get collapsed to a single row */
7907             PetscCall(MatGetRow(Oaij, Ii + kk, &ncols, NULL, NULL));
7908             o_nnz[jj] += ncols;
7909             PetscCall(MatRestoreRow(Oaij, Ii + kk, &ncols, NULL, NULL));
7910           }
7911           if (o_nnz[jj] > (NN / bs - nloc)) o_nnz[jj] = NN / bs - nloc;
7912         }
7913       } else SETERRQ(comm, PETSC_ERR_USER, "Require AIJ matrix type");
7914       /* get scalar copy (norms) of matrix */
7915       PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz));
7916       PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz));
7917       PetscCall(PetscFree2(d_nnz, o_nnz));
7918       for (Ii = Istart; Ii < Iend; Ii++) {
7919         PetscInt dest_row = Ii / bs;
7920         PetscCall(MatGetRow(Amat, Ii, &ncols, &idx, &vals));
7921         for (jj = 0; jj < ncols; jj++) {
7922           PetscInt    dest_col = idx[jj] / bs;
7923           PetscScalar sv       = PetscAbs(PetscRealPart(vals[jj]));
7924           PetscCall(MatSetValues(Gmat, 1, &dest_row, 1, &dest_col, &sv, ADD_VALUES));
7925         }
7926         PetscCall(MatRestoreRow(Amat, Ii, &ncols, &idx, &vals));
7927       }
7928       PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY));
7929       PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY));
7930     }
7931   } else {
7932     if (symmetrize || filter >= 0 || scale) PetscCall(MatDuplicate(Amat, MAT_COPY_VALUES, &Gmat));
7933     else {
7934       Gmat = Amat;
7935       PetscCall(PetscObjectReference((PetscObject)Gmat));
7936     }
7937     if (isseqaij) {
7938       a = Gmat;
7939       b = NULL;
7940     } else {
7941       Mat_MPIAIJ *d = (Mat_MPIAIJ *)Gmat->data;
7942       a             = d->A;
7943       b             = d->B;
7944     }
7945     if (filter >= 0 || scale) {
7946       /* take absolute value of each entry */
7947       for (c = a, kk = 0; c && kk < 2; c = b, kk++) {
7948         MatInfo      info;
7949         PetscScalar *avals;
7950         PetscCall(MatGetInfo(c, MAT_LOCAL, &info));
7951         PetscCall(MatSeqAIJGetArray(c, &avals));
7952         for (int jj = 0; jj < info.nz_used; jj++) avals[jj] = PetscAbsScalar(avals[jj]);
7953         PetscCall(MatSeqAIJRestoreArray(c, &avals));
7954       }
7955     }
7956   }
7957   if (symmetrize) {
7958     PetscBool isset, issym;
7959     PetscCall(MatIsSymmetricKnown(Amat, &isset, &issym));
7960     if (!isset || !issym) {
7961       Mat matTrans;
7962       PetscCall(MatTranspose(Gmat, MAT_INITIAL_MATRIX, &matTrans));
7963       PetscCall(MatAXPY(Gmat, 1.0, matTrans, Gmat->structurally_symmetric == PETSC_BOOL3_TRUE ? SAME_NONZERO_PATTERN : DIFFERENT_NONZERO_PATTERN));
7964       PetscCall(MatDestroy(&matTrans));
7965     }
7966     PetscCall(MatSetOption(Gmat, MAT_SYMMETRIC, PETSC_TRUE));
7967   } else if (Amat != Gmat) PetscCall(MatPropagateSymmetryOptions(Amat, Gmat));
7968   if (scale) {
7969     /* scale c for all diagonal values = 1 or -1 */
7970     Vec diag;
7971     PetscCall(MatCreateVecs(Gmat, &diag, NULL));
7972     PetscCall(MatGetDiagonal(Gmat, diag));
7973     PetscCall(VecReciprocal(diag));
7974     PetscCall(VecSqrtAbs(diag));
7975     PetscCall(MatDiagonalScale(Gmat, diag, diag));
7976     PetscCall(VecDestroy(&diag));
7977   }
7978   PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_graph_view"));
7979 
7980   if (filter >= 0) {
7981     Mat Fmat = NULL; /* some silly compiler needs this */
7982 
7983     PetscCall(MatFilter_AIJ(Gmat, filter, &Fmat));
7984     PetscCall(MatDestroy(&Gmat));
7985     Gmat = Fmat;
7986   }
7987   *a_Gmat = Gmat;
7988   PetscFunctionReturn(0);
7989 }
7990 
7991 /*
7992     Special version for direct calls from Fortran
7993 */
7994 #include <petsc/private/fortranimpl.h>
7995 
7996 /* Change these macros so can be used in void function */
7997 /* Identical to PetscCallVoid, except it assigns to *_ierr */
7998 #undef PetscCall
7999 #define PetscCall(...) \
8000   do { \
8001     PetscErrorCode ierr_msv_mpiaij = __VA_ARGS__; \
8002     if (PetscUnlikely(ierr_msv_mpiaij)) { \
8003       *_ierr = PetscError(PETSC_COMM_SELF, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr_msv_mpiaij, PETSC_ERROR_REPEAT, " "); \
8004       return; \
8005     } \
8006   } while (0)
8007 
8008 #undef SETERRQ
8009 #define SETERRQ(comm, ierr, ...) \
8010   do { \
8011     *_ierr = PetscError(comm, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr, PETSC_ERROR_INITIAL, __VA_ARGS__); \
8012     return; \
8013   } while (0)
8014 
8015 #if defined(PETSC_HAVE_FORTRAN_CAPS)
8016   #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
8017 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
8018   #define matsetvaluesmpiaij_ matsetvaluesmpiaij
8019 #else
8020 #endif
8021 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat, PetscInt *mm, const PetscInt im[], PetscInt *mn, const PetscInt in[], const PetscScalar v[], InsertMode *maddv, PetscErrorCode *_ierr)
8022 {
8023   Mat         mat = *mmat;
8024   PetscInt    m = *mm, n = *mn;
8025   InsertMode  addv = *maddv;
8026   Mat_MPIAIJ *aij  = (Mat_MPIAIJ *)mat->data;
8027   PetscScalar value;
8028 
8029   MatCheckPreallocated(mat, 1);
8030   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
8031   else PetscCheck(mat->insertmode == addv, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot mix add values and insert values");
8032   {
8033     PetscInt  i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend;
8034     PetscInt  cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col;
8035     PetscBool roworiented = aij->roworiented;
8036 
8037     /* Some Variables required in the macro */
8038     Mat         A     = aij->A;
8039     Mat_SeqAIJ *a     = (Mat_SeqAIJ *)A->data;
8040     PetscInt   *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j;
8041     MatScalar  *aa;
8042     PetscBool   ignorezeroentries = (((a->ignorezeroentries) && (addv == ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
8043     Mat         B                 = aij->B;
8044     Mat_SeqAIJ *b                 = (Mat_SeqAIJ *)B->data;
8045     PetscInt   *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n;
8046     MatScalar  *ba;
8047     /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we
8048      * cannot use "#if defined" inside a macro. */
8049     PETSC_UNUSED PetscBool inserted = PETSC_FALSE;
8050 
8051     PetscInt  *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2;
8052     PetscInt   nonew = a->nonew;
8053     MatScalar *ap1, *ap2;
8054 
8055     PetscFunctionBegin;
8056     PetscCall(MatSeqAIJGetArray(A, &aa));
8057     PetscCall(MatSeqAIJGetArray(B, &ba));
8058     for (i = 0; i < m; i++) {
8059       if (im[i] < 0) continue;
8060       PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
8061       if (im[i] >= rstart && im[i] < rend) {
8062         row      = im[i] - rstart;
8063         lastcol1 = -1;
8064         rp1      = aj + ai[row];
8065         ap1      = aa + ai[row];
8066         rmax1    = aimax[row];
8067         nrow1    = ailen[row];
8068         low1     = 0;
8069         high1    = nrow1;
8070         lastcol2 = -1;
8071         rp2      = bj + bi[row];
8072         ap2      = ba + bi[row];
8073         rmax2    = bimax[row];
8074         nrow2    = bilen[row];
8075         low2     = 0;
8076         high2    = nrow2;
8077 
8078         for (j = 0; j < n; j++) {
8079           if (roworiented) value = v[i * n + j];
8080           else value = v[i + j * m];
8081           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
8082           if (in[j] >= cstart && in[j] < cend) {
8083             col = in[j] - cstart;
8084             MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]);
8085           } else if (in[j] < 0) continue;
8086           else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) {
8087             /* extra brace on SETERRQ() is required for --with-errorchecking=0 - due to the next 'else' clause */
8088             SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1);
8089           } else {
8090             if (mat->was_assembled) {
8091               if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
8092 #if defined(PETSC_USE_CTABLE)
8093               PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col));
8094               col--;
8095 #else
8096               col = aij->colmap[in[j]] - 1;
8097 #endif
8098               if (col < 0 && !((Mat_SeqAIJ *)(aij->A->data))->nonew) {
8099                 PetscCall(MatDisAssemble_MPIAIJ(mat));
8100                 col = in[j];
8101                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
8102                 B        = aij->B;
8103                 b        = (Mat_SeqAIJ *)B->data;
8104                 bimax    = b->imax;
8105                 bi       = b->i;
8106                 bilen    = b->ilen;
8107                 bj       = b->j;
8108                 rp2      = bj + bi[row];
8109                 ap2      = ba + bi[row];
8110                 rmax2    = bimax[row];
8111                 nrow2    = bilen[row];
8112                 low2     = 0;
8113                 high2    = nrow2;
8114                 bm       = aij->B->rmap->n;
8115                 ba       = b->a;
8116                 inserted = PETSC_FALSE;
8117               }
8118             } else col = in[j];
8119             MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]);
8120           }
8121         }
8122       } else if (!aij->donotstash) {
8123         if (roworiented) {
8124           PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
8125         } else {
8126           PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
8127         }
8128       }
8129     }
8130     PetscCall(MatSeqAIJRestoreArray(A, &aa));
8131     PetscCall(MatSeqAIJRestoreArray(B, &ba));
8132   }
8133   PetscFunctionReturnVoid();
8134 }
8135 
8136 /* Undefining these here since they were redefined from their original definition above! No
8137  * other PETSc functions should be defined past this point, as it is impossible to recover the
8138  * original definitions */
8139 #undef PetscCall
8140 #undef SETERRQ
8141