xref: /petsc/src/mat/impls/nest/matnest.c (revision 2d30e087755efd99e28fdfe792ffbeb2ee1ea928)
1 #include <../src/mat/impls/nest/matnestimpl.h> /*I   "petscmat.h"   I*/
2 #include <../src/mat/impls/aij/seq/aij.h>
3 #include <petscsf.h>
4 
5 static PetscErrorCode MatSetUp_NestIS_Private(Mat, PetscInt, const IS[], PetscInt, const IS[]);
6 static PetscErrorCode MatCreateVecs_Nest(Mat, Vec *, Vec *);
7 static PetscErrorCode MatReset_Nest(Mat);
8 
9 PETSC_INTERN PetscErrorCode MatConvert_Nest_IS(Mat, MatType, MatReuse, Mat *);
10 
11 /* private functions */
12 static PetscErrorCode MatNestGetSizes_Private(Mat A, PetscInt *m, PetscInt *n, PetscInt *M, PetscInt *N) {
13   Mat_Nest *bA = (Mat_Nest *)A->data;
14   PetscInt  i, j;
15 
16   PetscFunctionBegin;
17   *m = *n = *M = *N = 0;
18   for (i = 0; i < bA->nr; i++) { /* rows */
19     PetscInt sm, sM;
20     PetscCall(ISGetLocalSize(bA->isglobal.row[i], &sm));
21     PetscCall(ISGetSize(bA->isglobal.row[i], &sM));
22     *m += sm;
23     *M += sM;
24   }
25   for (j = 0; j < bA->nc; j++) { /* cols */
26     PetscInt sn, sN;
27     PetscCall(ISGetLocalSize(bA->isglobal.col[j], &sn));
28     PetscCall(ISGetSize(bA->isglobal.col[j], &sN));
29     *n += sn;
30     *N += sN;
31   }
32   PetscFunctionReturn(0);
33 }
34 
35 /* operations */
36 static PetscErrorCode MatMult_Nest(Mat A, Vec x, Vec y) {
37   Mat_Nest *bA = (Mat_Nest *)A->data;
38   Vec      *bx = bA->right, *by = bA->left;
39   PetscInt  i, j, nr = bA->nr, nc = bA->nc;
40 
41   PetscFunctionBegin;
42   for (i = 0; i < nr; i++) PetscCall(VecGetSubVector(y, bA->isglobal.row[i], &by[i]));
43   for (i = 0; i < nc; i++) PetscCall(VecGetSubVector(x, bA->isglobal.col[i], &bx[i]));
44   for (i = 0; i < nr; i++) {
45     PetscCall(VecZeroEntries(by[i]));
46     for (j = 0; j < nc; j++) {
47       if (!bA->m[i][j]) continue;
48       /* y[i] <- y[i] + A[i][j] * x[j] */
49       PetscCall(MatMultAdd(bA->m[i][j], bx[j], by[i], by[i]));
50     }
51   }
52   for (i = 0; i < nr; i++) PetscCall(VecRestoreSubVector(y, bA->isglobal.row[i], &by[i]));
53   for (i = 0; i < nc; i++) PetscCall(VecRestoreSubVector(x, bA->isglobal.col[i], &bx[i]));
54   PetscFunctionReturn(0);
55 }
56 
57 static PetscErrorCode MatMultAdd_Nest(Mat A, Vec x, Vec y, Vec z) {
58   Mat_Nest *bA = (Mat_Nest *)A->data;
59   Vec      *bx = bA->right, *bz = bA->left;
60   PetscInt  i, j, nr = bA->nr, nc = bA->nc;
61 
62   PetscFunctionBegin;
63   for (i = 0; i < nr; i++) PetscCall(VecGetSubVector(z, bA->isglobal.row[i], &bz[i]));
64   for (i = 0; i < nc; i++) PetscCall(VecGetSubVector(x, bA->isglobal.col[i], &bx[i]));
65   for (i = 0; i < nr; i++) {
66     if (y != z) {
67       Vec by;
68       PetscCall(VecGetSubVector(y, bA->isglobal.row[i], &by));
69       PetscCall(VecCopy(by, bz[i]));
70       PetscCall(VecRestoreSubVector(y, bA->isglobal.row[i], &by));
71     }
72     for (j = 0; j < nc; j++) {
73       if (!bA->m[i][j]) continue;
74       /* y[i] <- y[i] + A[i][j] * x[j] */
75       PetscCall(MatMultAdd(bA->m[i][j], bx[j], bz[i], bz[i]));
76     }
77   }
78   for (i = 0; i < nr; i++) PetscCall(VecRestoreSubVector(z, bA->isglobal.row[i], &bz[i]));
79   for (i = 0; i < nc; i++) PetscCall(VecRestoreSubVector(x, bA->isglobal.col[i], &bx[i]));
80   PetscFunctionReturn(0);
81 }
82 
83 typedef struct {
84   Mat         *workC;      /* array of Mat with specific containers depending on the underlying MatMatMult implementation */
85   PetscScalar *tarray;     /* buffer for storing all temporary products A[i][j] B[j] */
86   PetscInt    *dm, *dn, k; /* displacements and number of submatrices */
87 } Nest_Dense;
88 
89 PETSC_INTERN PetscErrorCode MatProductNumeric_Nest_Dense(Mat C) {
90   Mat_Nest          *bA;
91   Nest_Dense        *contents;
92   Mat                viewB, viewC, productB, workC;
93   const PetscScalar *barray;
94   PetscScalar       *carray;
95   PetscInt           i, j, M, N, nr, nc, ldb, ldc;
96   Mat                A, B;
97 
98   PetscFunctionBegin;
99   MatCheckProduct(C, 3);
100   A = C->product->A;
101   B = C->product->B;
102   PetscCall(MatGetSize(B, NULL, &N));
103   if (!N) {
104     PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
105     PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
106     PetscFunctionReturn(0);
107   }
108   contents = (Nest_Dense *)C->product->data;
109   PetscCheck(contents, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
110   bA = (Mat_Nest *)A->data;
111   nr = bA->nr;
112   nc = bA->nc;
113   PetscCall(MatDenseGetLDA(B, &ldb));
114   PetscCall(MatDenseGetLDA(C, &ldc));
115   PetscCall(MatZeroEntries(C));
116   PetscCall(MatDenseGetArrayRead(B, &barray));
117   PetscCall(MatDenseGetArray(C, &carray));
118   for (i = 0; i < nr; i++) {
119     PetscCall(ISGetSize(bA->isglobal.row[i], &M));
120     PetscCall(MatCreateDense(PetscObjectComm((PetscObject)A), contents->dm[i + 1] - contents->dm[i], PETSC_DECIDE, M, N, carray + contents->dm[i], &viewC));
121     PetscCall(MatDenseSetLDA(viewC, ldc));
122     for (j = 0; j < nc; j++) {
123       if (!bA->m[i][j]) continue;
124       PetscCall(ISGetSize(bA->isglobal.col[j], &M));
125       PetscCall(MatCreateDense(PetscObjectComm((PetscObject)A), contents->dn[j + 1] - contents->dn[j], PETSC_DECIDE, M, N, (PetscScalar *)(barray + contents->dn[j]), &viewB));
126       PetscCall(MatDenseSetLDA(viewB, ldb));
127 
128       /* MatMatMultNumeric(bA->m[i][j],viewB,contents->workC[i*nc + j]); */
129       workC             = contents->workC[i * nc + j];
130       productB          = workC->product->B;
131       workC->product->B = viewB; /* use newly created dense matrix viewB */
132       PetscCall(MatProductNumeric(workC));
133       PetscCall(MatDestroy(&viewB));
134       workC->product->B = productB; /* resume original B */
135 
136       /* C[i] <- workC + C[i] */
137       PetscCall(MatAXPY(viewC, 1.0, contents->workC[i * nc + j], SAME_NONZERO_PATTERN));
138     }
139     PetscCall(MatDestroy(&viewC));
140   }
141   PetscCall(MatDenseRestoreArray(C, &carray));
142   PetscCall(MatDenseRestoreArrayRead(B, &barray));
143 
144   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
145   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
146   PetscFunctionReturn(0);
147 }
148 
149 PetscErrorCode MatNest_DenseDestroy(void *ctx) {
150   Nest_Dense *contents = (Nest_Dense *)ctx;
151   PetscInt    i;
152 
153   PetscFunctionBegin;
154   PetscCall(PetscFree(contents->tarray));
155   for (i = 0; i < contents->k; i++) PetscCall(MatDestroy(contents->workC + i));
156   PetscCall(PetscFree3(contents->dm, contents->dn, contents->workC));
157   PetscCall(PetscFree(contents));
158   PetscFunctionReturn(0);
159 }
160 
161 PETSC_INTERN PetscErrorCode MatProductSymbolic_Nest_Dense(Mat C) {
162   Mat_Nest          *bA;
163   Mat                viewB, workC;
164   const PetscScalar *barray;
165   PetscInt           i, j, M, N, m, n, nr, nc, maxm = 0, ldb;
166   Nest_Dense        *contents = NULL;
167   PetscBool          cisdense;
168   Mat                A, B;
169   PetscReal          fill;
170 
171   PetscFunctionBegin;
172   MatCheckProduct(C, 4);
173   PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
174   A    = C->product->A;
175   B    = C->product->B;
176   fill = C->product->fill;
177   bA   = (Mat_Nest *)A->data;
178   nr   = bA->nr;
179   nc   = bA->nc;
180   PetscCall(MatGetLocalSize(C, &m, &n));
181   PetscCall(MatGetSize(C, &M, &N));
182   if (m == PETSC_DECIDE || n == PETSC_DECIDE || M == PETSC_DECIDE || N == PETSC_DECIDE) {
183     PetscCall(MatGetLocalSize(B, NULL, &n));
184     PetscCall(MatGetSize(B, NULL, &N));
185     PetscCall(MatGetLocalSize(A, &m, NULL));
186     PetscCall(MatGetSize(A, &M, NULL));
187     PetscCall(MatSetSizes(C, m, n, M, N));
188   }
189   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATSEQDENSE, MATMPIDENSE, MATSEQDENSECUDA, MATMPIDENSECUDA, ""));
190   if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)B)->type_name));
191   PetscCall(MatSetUp(C));
192   if (!N) {
193     C->ops->productnumeric = MatProductNumeric_Nest_Dense;
194     PetscFunctionReturn(0);
195   }
196 
197   PetscCall(PetscNew(&contents));
198   C->product->data    = contents;
199   C->product->destroy = MatNest_DenseDestroy;
200   PetscCall(PetscCalloc3(nr + 1, &contents->dm, nc + 1, &contents->dn, nr * nc, &contents->workC));
201   contents->k = nr * nc;
202   for (i = 0; i < nr; i++) {
203     PetscCall(ISGetLocalSize(bA->isglobal.row[i], contents->dm + i + 1));
204     maxm = PetscMax(maxm, contents->dm[i + 1]);
205     contents->dm[i + 1] += contents->dm[i];
206   }
207   for (i = 0; i < nc; i++) {
208     PetscCall(ISGetLocalSize(bA->isglobal.col[i], contents->dn + i + 1));
209     contents->dn[i + 1] += contents->dn[i];
210   }
211   PetscCall(PetscMalloc1(maxm * N, &contents->tarray));
212   PetscCall(MatDenseGetLDA(B, &ldb));
213   PetscCall(MatGetSize(B, NULL, &N));
214   PetscCall(MatDenseGetArrayRead(B, &barray));
215   /* loops are permuted compared to MatMatMultNumeric so that viewB is created only once per column of A */
216   for (j = 0; j < nc; j++) {
217     PetscCall(ISGetSize(bA->isglobal.col[j], &M));
218     PetscCall(MatCreateDense(PetscObjectComm((PetscObject)A), contents->dn[j + 1] - contents->dn[j], PETSC_DECIDE, M, N, (PetscScalar *)(barray + contents->dn[j]), &viewB));
219     PetscCall(MatDenseSetLDA(viewB, ldb));
220     for (i = 0; i < nr; i++) {
221       if (!bA->m[i][j]) continue;
222       /* MatMatMultSymbolic may attach a specific container (depending on MatType of bA->m[i][j]) to workC[i][j] */
223 
224       PetscCall(MatProductCreate(bA->m[i][j], viewB, NULL, &contents->workC[i * nc + j]));
225       workC = contents->workC[i * nc + j];
226       PetscCall(MatProductSetType(workC, MATPRODUCT_AB));
227       PetscCall(MatProductSetAlgorithm(workC, "default"));
228       PetscCall(MatProductSetFill(workC, fill));
229       PetscCall(MatProductSetFromOptions(workC));
230       PetscCall(MatProductSymbolic(workC));
231 
232       /* since tarray will be shared by all Mat */
233       PetscCall(MatSeqDenseSetPreallocation(workC, contents->tarray));
234       PetscCall(MatMPIDenseSetPreallocation(workC, contents->tarray));
235     }
236     PetscCall(MatDestroy(&viewB));
237   }
238   PetscCall(MatDenseRestoreArrayRead(B, &barray));
239 
240   C->ops->productnumeric = MatProductNumeric_Nest_Dense;
241   PetscFunctionReturn(0);
242 }
243 
244 /* --------------------------------------------------------- */
245 static PetscErrorCode MatProductSetFromOptions_Nest_Dense_AB(Mat C) {
246   PetscFunctionBegin;
247   C->ops->productsymbolic = MatProductSymbolic_Nest_Dense;
248   PetscFunctionReturn(0);
249 }
250 
251 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_Nest_Dense(Mat C) {
252   Mat_Product *product = C->product;
253 
254   PetscFunctionBegin;
255   if (product->type == MATPRODUCT_AB) PetscCall(MatProductSetFromOptions_Nest_Dense_AB(C));
256   PetscFunctionReturn(0);
257 }
258 /* --------------------------------------------------------- */
259 
260 static PetscErrorCode MatMultTranspose_Nest(Mat A, Vec x, Vec y) {
261   Mat_Nest *bA = (Mat_Nest *)A->data;
262   Vec      *bx = bA->left, *by = bA->right;
263   PetscInt  i, j, nr = bA->nr, nc = bA->nc;
264 
265   PetscFunctionBegin;
266   for (i = 0; i < nr; i++) PetscCall(VecGetSubVector(x, bA->isglobal.row[i], &bx[i]));
267   for (i = 0; i < nc; i++) PetscCall(VecGetSubVector(y, bA->isglobal.col[i], &by[i]));
268   for (j = 0; j < nc; j++) {
269     PetscCall(VecZeroEntries(by[j]));
270     for (i = 0; i < nr; i++) {
271       if (!bA->m[i][j]) continue;
272       /* y[j] <- y[j] + (A[i][j])^T * x[i] */
273       PetscCall(MatMultTransposeAdd(bA->m[i][j], bx[i], by[j], by[j]));
274     }
275   }
276   for (i = 0; i < nr; i++) PetscCall(VecRestoreSubVector(x, bA->isglobal.row[i], &bx[i]));
277   for (i = 0; i < nc; i++) PetscCall(VecRestoreSubVector(y, bA->isglobal.col[i], &by[i]));
278   PetscFunctionReturn(0);
279 }
280 
281 static PetscErrorCode MatMultTransposeAdd_Nest(Mat A, Vec x, Vec y, Vec z) {
282   Mat_Nest *bA = (Mat_Nest *)A->data;
283   Vec      *bx = bA->left, *bz = bA->right;
284   PetscInt  i, j, nr = bA->nr, nc = bA->nc;
285 
286   PetscFunctionBegin;
287   for (i = 0; i < nr; i++) PetscCall(VecGetSubVector(x, bA->isglobal.row[i], &bx[i]));
288   for (i = 0; i < nc; i++) PetscCall(VecGetSubVector(z, bA->isglobal.col[i], &bz[i]));
289   for (j = 0; j < nc; j++) {
290     if (y != z) {
291       Vec by;
292       PetscCall(VecGetSubVector(y, bA->isglobal.col[j], &by));
293       PetscCall(VecCopy(by, bz[j]));
294       PetscCall(VecRestoreSubVector(y, bA->isglobal.col[j], &by));
295     }
296     for (i = 0; i < nr; i++) {
297       if (!bA->m[i][j]) continue;
298       /* z[j] <- y[j] + (A[i][j])^T * x[i] */
299       PetscCall(MatMultTransposeAdd(bA->m[i][j], bx[i], bz[j], bz[j]));
300     }
301   }
302   for (i = 0; i < nr; i++) PetscCall(VecRestoreSubVector(x, bA->isglobal.row[i], &bx[i]));
303   for (i = 0; i < nc; i++) PetscCall(VecRestoreSubVector(z, bA->isglobal.col[i], &bz[i]));
304   PetscFunctionReturn(0);
305 }
306 
307 static PetscErrorCode MatTranspose_Nest(Mat A, MatReuse reuse, Mat *B) {
308   Mat_Nest *bA = (Mat_Nest *)A->data, *bC;
309   Mat       C;
310   PetscInt  i, j, nr = bA->nr, nc = bA->nc;
311 
312   PetscFunctionBegin;
313   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *B));
314   PetscCheck(reuse != MAT_INPLACE_MATRIX || nr == nc, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_SIZ, "Square nested matrix only for in-place");
315 
316   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
317     Mat *subs;
318     IS  *is_row, *is_col;
319 
320     PetscCall(PetscCalloc1(nr * nc, &subs));
321     PetscCall(PetscMalloc2(nr, &is_row, nc, &is_col));
322     PetscCall(MatNestGetISs(A, is_row, is_col));
323     if (reuse == MAT_INPLACE_MATRIX) {
324       for (i = 0; i < nr; i++) {
325         for (j = 0; j < nc; j++) subs[i + nr * j] = bA->m[i][j];
326       }
327     }
328 
329     PetscCall(MatCreateNest(PetscObjectComm((PetscObject)A), nc, is_col, nr, is_row, subs, &C));
330     PetscCall(PetscFree(subs));
331     PetscCall(PetscFree2(is_row, is_col));
332   } else {
333     C = *B;
334   }
335 
336   bC = (Mat_Nest *)C->data;
337   for (i = 0; i < nr; i++) {
338     for (j = 0; j < nc; j++) {
339       if (bA->m[i][j]) {
340         PetscCall(MatTranspose(bA->m[i][j], reuse, &(bC->m[j][i])));
341       } else {
342         bC->m[j][i] = NULL;
343       }
344     }
345   }
346 
347   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
348     *B = C;
349   } else {
350     PetscCall(MatHeaderMerge(A, &C));
351   }
352   PetscFunctionReturn(0);
353 }
354 
355 static PetscErrorCode MatNestDestroyISList(PetscInt n, IS **list) {
356   IS      *lst = *list;
357   PetscInt i;
358 
359   PetscFunctionBegin;
360   if (!lst) PetscFunctionReturn(0);
361   for (i = 0; i < n; i++)
362     if (lst[i]) PetscCall(ISDestroy(&lst[i]));
363   PetscCall(PetscFree(lst));
364   *list = NULL;
365   PetscFunctionReturn(0);
366 }
367 
368 static PetscErrorCode MatReset_Nest(Mat A) {
369   Mat_Nest *vs = (Mat_Nest *)A->data;
370   PetscInt  i, j;
371 
372   PetscFunctionBegin;
373   /* release the matrices and the place holders */
374   PetscCall(MatNestDestroyISList(vs->nr, &vs->isglobal.row));
375   PetscCall(MatNestDestroyISList(vs->nc, &vs->isglobal.col));
376   PetscCall(MatNestDestroyISList(vs->nr, &vs->islocal.row));
377   PetscCall(MatNestDestroyISList(vs->nc, &vs->islocal.col));
378 
379   PetscCall(PetscFree(vs->row_len));
380   PetscCall(PetscFree(vs->col_len));
381   PetscCall(PetscFree(vs->nnzstate));
382 
383   PetscCall(PetscFree2(vs->left, vs->right));
384 
385   /* release the matrices and the place holders */
386   if (vs->m) {
387     for (i = 0; i < vs->nr; i++) {
388       for (j = 0; j < vs->nc; j++) PetscCall(MatDestroy(&vs->m[i][j]));
389       PetscCall(PetscFree(vs->m[i]));
390     }
391     PetscCall(PetscFree(vs->m));
392   }
393 
394   /* restore defaults */
395   vs->nr            = 0;
396   vs->nc            = 0;
397   vs->splitassembly = PETSC_FALSE;
398   PetscFunctionReturn(0);
399 }
400 
401 static PetscErrorCode MatDestroy_Nest(Mat A) {
402   PetscFunctionBegin;
403   PetscCall(MatReset_Nest(A));
404   PetscCall(PetscFree(A->data));
405   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatNestGetSubMat_C", NULL));
406   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatNestSetSubMat_C", NULL));
407   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatNestGetSubMats_C", NULL));
408   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatNestGetSize_C", NULL));
409   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatNestGetISs_C", NULL));
410   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatNestGetLocalISs_C", NULL));
411   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatNestSetVecType_C", NULL));
412   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatNestSetSubMats_C", NULL));
413   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_nest_mpiaij_C", NULL));
414   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_nest_seqaij_C", NULL));
415   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_nest_aij_C", NULL));
416   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_nest_is_C", NULL));
417   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_nest_mpidense_C", NULL));
418   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_nest_seqdense_C", NULL));
419   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_nest_seqdense_C", NULL));
420   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_nest_mpidense_C", NULL));
421   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_nest_dense_C", NULL));
422   PetscFunctionReturn(0);
423 }
424 
425 static PetscErrorCode MatMissingDiagonal_Nest(Mat mat, PetscBool *missing, PetscInt *dd) {
426   Mat_Nest *vs = (Mat_Nest *)mat->data;
427   PetscInt  i;
428 
429   PetscFunctionBegin;
430   if (dd) *dd = 0;
431   if (!vs->nr) {
432     *missing = PETSC_TRUE;
433     PetscFunctionReturn(0);
434   }
435   *missing = PETSC_FALSE;
436   for (i = 0; i < vs->nr && !(*missing); i++) {
437     *missing = PETSC_TRUE;
438     if (vs->m[i][i]) {
439       PetscCall(MatMissingDiagonal(vs->m[i][i], missing, NULL));
440       PetscCheck(!*missing || !dd, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "First missing entry not yet implemented");
441     }
442   }
443   PetscFunctionReturn(0);
444 }
445 
446 static PetscErrorCode MatAssemblyBegin_Nest(Mat A, MatAssemblyType type) {
447   Mat_Nest *vs = (Mat_Nest *)A->data;
448   PetscInt  i, j;
449   PetscBool nnzstate = PETSC_FALSE;
450 
451   PetscFunctionBegin;
452   for (i = 0; i < vs->nr; i++) {
453     for (j = 0; j < vs->nc; j++) {
454       PetscObjectState subnnzstate = 0;
455       if (vs->m[i][j]) {
456         PetscCall(MatAssemblyBegin(vs->m[i][j], type));
457         if (!vs->splitassembly) {
458           /* Note: split assembly will fail if the same block appears more than once (even indirectly through a nested
459            * sub-block). This could be fixed by adding a flag to Mat so that there was a way to check if a Mat was
460            * already performing an assembly, but the result would by more complicated and appears to offer less
461            * potential for diagnostics and correctness checking. Split assembly should be fixed once there is an
462            * interface for libraries to make asynchronous progress in "user-defined non-blocking collectives".
463            */
464           PetscCall(MatAssemblyEnd(vs->m[i][j], type));
465           PetscCall(MatGetNonzeroState(vs->m[i][j], &subnnzstate));
466         }
467       }
468       nnzstate                     = (PetscBool)(nnzstate || vs->nnzstate[i * vs->nc + j] != subnnzstate);
469       vs->nnzstate[i * vs->nc + j] = subnnzstate;
470     }
471   }
472   if (nnzstate) A->nonzerostate++;
473   PetscFunctionReturn(0);
474 }
475 
476 static PetscErrorCode MatAssemblyEnd_Nest(Mat A, MatAssemblyType type) {
477   Mat_Nest *vs = (Mat_Nest *)A->data;
478   PetscInt  i, j;
479 
480   PetscFunctionBegin;
481   for (i = 0; i < vs->nr; i++) {
482     for (j = 0; j < vs->nc; j++) {
483       if (vs->m[i][j]) {
484         if (vs->splitassembly) PetscCall(MatAssemblyEnd(vs->m[i][j], type));
485       }
486     }
487   }
488   PetscFunctionReturn(0);
489 }
490 
491 static PetscErrorCode MatNestFindNonzeroSubMatRow(Mat A, PetscInt row, Mat *B) {
492   Mat_Nest *vs = (Mat_Nest *)A->data;
493   PetscInt  j;
494   Mat       sub;
495 
496   PetscFunctionBegin;
497   sub = (row < vs->nc) ? vs->m[row][row] : (Mat)NULL; /* Prefer to find on the diagonal */
498   for (j = 0; !sub && j < vs->nc; j++) sub = vs->m[row][j];
499   if (sub) PetscCall(MatSetUp(sub)); /* Ensure that the sizes are available */
500   *B = sub;
501   PetscFunctionReturn(0);
502 }
503 
504 static PetscErrorCode MatNestFindNonzeroSubMatCol(Mat A, PetscInt col, Mat *B) {
505   Mat_Nest *vs = (Mat_Nest *)A->data;
506   PetscInt  i;
507   Mat       sub;
508 
509   PetscFunctionBegin;
510   sub = (col < vs->nr) ? vs->m[col][col] : (Mat)NULL; /* Prefer to find on the diagonal */
511   for (i = 0; !sub && i < vs->nr; i++) sub = vs->m[i][col];
512   if (sub) PetscCall(MatSetUp(sub)); /* Ensure that the sizes are available */
513   *B = sub;
514   PetscFunctionReturn(0);
515 }
516 
517 static PetscErrorCode MatNestFindISRange(Mat A, PetscInt n, const IS list[], IS is, PetscInt *begin, PetscInt *end) {
518   PetscInt  i, j, size, m;
519   PetscBool flg;
520   IS        out, concatenate[2];
521 
522   PetscFunctionBegin;
523   PetscValidPointer(list, 3);
524   PetscValidHeaderSpecific(is, IS_CLASSID, 4);
525   if (begin) {
526     PetscValidIntPointer(begin, 5);
527     *begin = -1;
528   }
529   if (end) {
530     PetscValidIntPointer(end, 6);
531     *end = -1;
532   }
533   for (i = 0; i < n; i++) {
534     if (!list[i]) continue;
535     PetscCall(ISEqualUnsorted(list[i], is, &flg));
536     if (flg) {
537       if (begin) *begin = i;
538       if (end) *end = i + 1;
539       PetscFunctionReturn(0);
540     }
541   }
542   PetscCall(ISGetSize(is, &size));
543   for (i = 0; i < n - 1; i++) {
544     if (!list[i]) continue;
545     m = 0;
546     PetscCall(ISConcatenate(PetscObjectComm((PetscObject)A), 2, list + i, &out));
547     PetscCall(ISGetSize(out, &m));
548     for (j = i + 2; j < n && m < size; j++) {
549       if (list[j]) {
550         concatenate[0] = out;
551         concatenate[1] = list[j];
552         PetscCall(ISConcatenate(PetscObjectComm((PetscObject)A), 2, concatenate, &out));
553         PetscCall(ISDestroy(concatenate));
554         PetscCall(ISGetSize(out, &m));
555       }
556     }
557     if (m == size) {
558       PetscCall(ISEqualUnsorted(out, is, &flg));
559       if (flg) {
560         if (begin) *begin = i;
561         if (end) *end = j;
562         PetscCall(ISDestroy(&out));
563         PetscFunctionReturn(0);
564       }
565     }
566     PetscCall(ISDestroy(&out));
567   }
568   PetscFunctionReturn(0);
569 }
570 
571 static PetscErrorCode MatNestFillEmptyMat_Private(Mat A, PetscInt i, PetscInt j, Mat *B) {
572   Mat_Nest *vs = (Mat_Nest *)A->data;
573   PetscInt  lr, lc;
574 
575   PetscFunctionBegin;
576   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), B));
577   PetscCall(ISGetLocalSize(vs->isglobal.row[i], &lr));
578   PetscCall(ISGetLocalSize(vs->isglobal.col[j], &lc));
579   PetscCall(MatSetSizes(*B, lr, lc, PETSC_DECIDE, PETSC_DECIDE));
580   PetscCall(MatSetType(*B, MATAIJ));
581   PetscCall(MatSeqAIJSetPreallocation(*B, 0, NULL));
582   PetscCall(MatMPIAIJSetPreallocation(*B, 0, NULL, 0, NULL));
583   PetscCall(MatSetUp(*B));
584   PetscCall(MatSetOption(*B, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
585   PetscCall(MatAssemblyBegin(*B, MAT_FINAL_ASSEMBLY));
586   PetscCall(MatAssemblyEnd(*B, MAT_FINAL_ASSEMBLY));
587   PetscFunctionReturn(0);
588 }
589 
590 static PetscErrorCode MatNestGetBlock_Private(Mat A, PetscInt rbegin, PetscInt rend, PetscInt cbegin, PetscInt cend, Mat *B) {
591   Mat_Nest  *vs = (Mat_Nest *)A->data;
592   Mat       *a;
593   PetscInt   i, j, k, l, nr = rend - rbegin, nc = cend - cbegin;
594   char       keyname[256];
595   PetscBool *b;
596   PetscBool  flg;
597 
598   PetscFunctionBegin;
599   *B = NULL;
600   PetscCall(PetscSNPrintf(keyname, sizeof(keyname), "NestBlock_%" PetscInt_FMT "-%" PetscInt_FMT "x%" PetscInt_FMT "-%" PetscInt_FMT, rbegin, rend, cbegin, cend));
601   PetscCall(PetscObjectQuery((PetscObject)A, keyname, (PetscObject *)B));
602   if (*B) PetscFunctionReturn(0);
603 
604   PetscCall(PetscMalloc2(nr * nc, &a, nr * nc, &b));
605   for (i = 0; i < nr; i++) {
606     for (j = 0; j < nc; j++) {
607       a[i * nc + j] = vs->m[rbegin + i][cbegin + j];
608       b[i * nc + j] = PETSC_FALSE;
609     }
610   }
611   if (nc != vs->nc && nr != vs->nr) {
612     for (i = 0; i < nr; i++) {
613       for (j = 0; j < nc; j++) {
614         flg = PETSC_FALSE;
615         for (k = 0; (k < nr && !flg); k++) {
616           if (a[j + k * nc]) flg = PETSC_TRUE;
617         }
618         if (flg) {
619           flg = PETSC_FALSE;
620           for (l = 0; (l < nc && !flg); l++) {
621             if (a[i * nc + l]) flg = PETSC_TRUE;
622           }
623         }
624         if (!flg) {
625           b[i * nc + j] = PETSC_TRUE;
626           PetscCall(MatNestFillEmptyMat_Private(A, rbegin + i, cbegin + j, a + i * nc + j));
627         }
628       }
629     }
630   }
631   PetscCall(MatCreateNest(PetscObjectComm((PetscObject)A), nr, nr != vs->nr ? NULL : vs->isglobal.row, nc, nc != vs->nc ? NULL : vs->isglobal.col, a, B));
632   for (i = 0; i < nr; i++) {
633     for (j = 0; j < nc; j++) {
634       if (b[i * nc + j]) PetscCall(MatDestroy(a + i * nc + j));
635     }
636   }
637   PetscCall(PetscFree2(a, b));
638   (*B)->assembled = A->assembled;
639   PetscCall(PetscObjectCompose((PetscObject)A, keyname, (PetscObject)*B));
640   PetscCall(PetscObjectDereference((PetscObject)*B)); /* Leave the only remaining reference in the composition */
641   PetscFunctionReturn(0);
642 }
643 
644 static PetscErrorCode MatNestFindSubMat(Mat A, struct MatNestISPair *is, IS isrow, IS iscol, Mat *B) {
645   Mat_Nest *vs = (Mat_Nest *)A->data;
646   PetscInt  rbegin, rend, cbegin, cend;
647 
648   PetscFunctionBegin;
649   PetscCall(MatNestFindISRange(A, vs->nr, is->row, isrow, &rbegin, &rend));
650   PetscCall(MatNestFindISRange(A, vs->nc, is->col, iscol, &cbegin, &cend));
651   if (rend == rbegin + 1 && cend == cbegin + 1) {
652     if (!vs->m[rbegin][cbegin]) PetscCall(MatNestFillEmptyMat_Private(A, rbegin, cbegin, vs->m[rbegin] + cbegin));
653     *B = vs->m[rbegin][cbegin];
654   } else if (rbegin != -1 && cbegin != -1) {
655     PetscCall(MatNestGetBlock_Private(A, rbegin, rend, cbegin, cend, B));
656   } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_INCOMP, "Could not find index set");
657   PetscFunctionReturn(0);
658 }
659 
660 /*
661    TODO: This does not actually returns a submatrix we can modify
662 */
663 static PetscErrorCode MatCreateSubMatrix_Nest(Mat A, IS isrow, IS iscol, MatReuse reuse, Mat *B) {
664   Mat_Nest *vs = (Mat_Nest *)A->data;
665   Mat       sub;
666 
667   PetscFunctionBegin;
668   PetscCall(MatNestFindSubMat(A, &vs->isglobal, isrow, iscol, &sub));
669   switch (reuse) {
670   case MAT_INITIAL_MATRIX:
671     if (sub) PetscCall(PetscObjectReference((PetscObject)sub));
672     *B = sub;
673     break;
674   case MAT_REUSE_MATRIX: PetscCheck(sub == *B, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONGSTATE, "Submatrix was not used before in this call"); break;
675   case MAT_IGNORE_MATRIX: /* Nothing to do */ break;
676   case MAT_INPLACE_MATRIX: /* Nothing to do */ SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MAT_INPLACE_MATRIX is not supported yet");
677   }
678   PetscFunctionReturn(0);
679 }
680 
681 PetscErrorCode MatGetLocalSubMatrix_Nest(Mat A, IS isrow, IS iscol, Mat *B) {
682   Mat_Nest *vs = (Mat_Nest *)A->data;
683   Mat       sub;
684 
685   PetscFunctionBegin;
686   PetscCall(MatNestFindSubMat(A, &vs->islocal, isrow, iscol, &sub));
687   /* We allow the submatrix to be NULL, perhaps it would be better for the user to return an empty matrix instead */
688   if (sub) PetscCall(PetscObjectReference((PetscObject)sub));
689   *B = sub;
690   PetscFunctionReturn(0);
691 }
692 
693 static PetscErrorCode MatRestoreLocalSubMatrix_Nest(Mat A, IS isrow, IS iscol, Mat *B) {
694   Mat_Nest *vs = (Mat_Nest *)A->data;
695   Mat       sub;
696 
697   PetscFunctionBegin;
698   PetscCall(MatNestFindSubMat(A, &vs->islocal, isrow, iscol, &sub));
699   PetscCheck(*B == sub, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONGSTATE, "Local submatrix has not been gotten");
700   if (sub) {
701     PetscCheck(((PetscObject)sub)->refct > 1, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONGSTATE, "Local submatrix has had reference count decremented too many times");
702     PetscCall(MatDestroy(B));
703   }
704   PetscFunctionReturn(0);
705 }
706 
707 static PetscErrorCode MatGetDiagonal_Nest(Mat A, Vec v) {
708   Mat_Nest *bA = (Mat_Nest *)A->data;
709   PetscInt  i;
710 
711   PetscFunctionBegin;
712   for (i = 0; i < bA->nr; i++) {
713     Vec bv;
714     PetscCall(VecGetSubVector(v, bA->isglobal.row[i], &bv));
715     if (bA->m[i][i]) {
716       PetscCall(MatGetDiagonal(bA->m[i][i], bv));
717     } else {
718       PetscCall(VecSet(bv, 0.0));
719     }
720     PetscCall(VecRestoreSubVector(v, bA->isglobal.row[i], &bv));
721   }
722   PetscFunctionReturn(0);
723 }
724 
725 static PetscErrorCode MatDiagonalScale_Nest(Mat A, Vec l, Vec r) {
726   Mat_Nest *bA = (Mat_Nest *)A->data;
727   Vec       bl, *br;
728   PetscInt  i, j;
729 
730   PetscFunctionBegin;
731   PetscCall(PetscCalloc1(bA->nc, &br));
732   if (r) {
733     for (j = 0; j < bA->nc; j++) PetscCall(VecGetSubVector(r, bA->isglobal.col[j], &br[j]));
734   }
735   bl = NULL;
736   for (i = 0; i < bA->nr; i++) {
737     if (l) PetscCall(VecGetSubVector(l, bA->isglobal.row[i], &bl));
738     for (j = 0; j < bA->nc; j++) {
739       if (bA->m[i][j]) PetscCall(MatDiagonalScale(bA->m[i][j], bl, br[j]));
740     }
741     if (l) PetscCall(VecRestoreSubVector(l, bA->isglobal.row[i], &bl));
742   }
743   if (r) {
744     for (j = 0; j < bA->nc; j++) PetscCall(VecRestoreSubVector(r, bA->isglobal.col[j], &br[j]));
745   }
746   PetscCall(PetscFree(br));
747   PetscFunctionReturn(0);
748 }
749 
750 static PetscErrorCode MatScale_Nest(Mat A, PetscScalar a) {
751   Mat_Nest *bA = (Mat_Nest *)A->data;
752   PetscInt  i, j;
753 
754   PetscFunctionBegin;
755   for (i = 0; i < bA->nr; i++) {
756     for (j = 0; j < bA->nc; j++) {
757       if (bA->m[i][j]) PetscCall(MatScale(bA->m[i][j], a));
758     }
759   }
760   PetscFunctionReturn(0);
761 }
762 
763 static PetscErrorCode MatShift_Nest(Mat A, PetscScalar a) {
764   Mat_Nest *bA = (Mat_Nest *)A->data;
765   PetscInt  i;
766   PetscBool nnzstate = PETSC_FALSE;
767 
768   PetscFunctionBegin;
769   for (i = 0; i < bA->nr; i++) {
770     PetscObjectState subnnzstate = 0;
771     PetscCheck(bA->m[i][i], PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for shifting an empty diagonal block, insert a matrix in block (%" PetscInt_FMT ",%" PetscInt_FMT ")", i, i);
772     PetscCall(MatShift(bA->m[i][i], a));
773     PetscCall(MatGetNonzeroState(bA->m[i][i], &subnnzstate));
774     nnzstate                     = (PetscBool)(nnzstate || bA->nnzstate[i * bA->nc + i] != subnnzstate);
775     bA->nnzstate[i * bA->nc + i] = subnnzstate;
776   }
777   if (nnzstate) A->nonzerostate++;
778   PetscFunctionReturn(0);
779 }
780 
781 static PetscErrorCode MatDiagonalSet_Nest(Mat A, Vec D, InsertMode is) {
782   Mat_Nest *bA = (Mat_Nest *)A->data;
783   PetscInt  i;
784   PetscBool nnzstate = PETSC_FALSE;
785 
786   PetscFunctionBegin;
787   for (i = 0; i < bA->nr; i++) {
788     PetscObjectState subnnzstate = 0;
789     Vec              bv;
790     PetscCall(VecGetSubVector(D, bA->isglobal.row[i], &bv));
791     if (bA->m[i][i]) {
792       PetscCall(MatDiagonalSet(bA->m[i][i], bv, is));
793       PetscCall(MatGetNonzeroState(bA->m[i][i], &subnnzstate));
794     }
795     PetscCall(VecRestoreSubVector(D, bA->isglobal.row[i], &bv));
796     nnzstate                     = (PetscBool)(nnzstate || bA->nnzstate[i * bA->nc + i] != subnnzstate);
797     bA->nnzstate[i * bA->nc + i] = subnnzstate;
798   }
799   if (nnzstate) A->nonzerostate++;
800   PetscFunctionReturn(0);
801 }
802 
803 static PetscErrorCode MatSetRandom_Nest(Mat A, PetscRandom rctx) {
804   Mat_Nest *bA = (Mat_Nest *)A->data;
805   PetscInt  i, j;
806 
807   PetscFunctionBegin;
808   for (i = 0; i < bA->nr; i++) {
809     for (j = 0; j < bA->nc; j++) {
810       if (bA->m[i][j]) PetscCall(MatSetRandom(bA->m[i][j], rctx));
811     }
812   }
813   PetscFunctionReturn(0);
814 }
815 
816 static PetscErrorCode MatCreateVecs_Nest(Mat A, Vec *right, Vec *left) {
817   Mat_Nest *bA = (Mat_Nest *)A->data;
818   Vec      *L, *R;
819   MPI_Comm  comm;
820   PetscInt  i, j;
821 
822   PetscFunctionBegin;
823   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
824   if (right) {
825     /* allocate R */
826     PetscCall(PetscMalloc1(bA->nc, &R));
827     /* Create the right vectors */
828     for (j = 0; j < bA->nc; j++) {
829       for (i = 0; i < bA->nr; i++) {
830         if (bA->m[i][j]) {
831           PetscCall(MatCreateVecs(bA->m[i][j], &R[j], NULL));
832           break;
833         }
834       }
835       PetscCheck(i != bA->nr, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Mat(Nest) contains a null column.");
836     }
837     PetscCall(VecCreateNest(comm, bA->nc, bA->isglobal.col, R, right));
838     /* hand back control to the nest vector */
839     for (j = 0; j < bA->nc; j++) PetscCall(VecDestroy(&R[j]));
840     PetscCall(PetscFree(R));
841   }
842 
843   if (left) {
844     /* allocate L */
845     PetscCall(PetscMalloc1(bA->nr, &L));
846     /* Create the left vectors */
847     for (i = 0; i < bA->nr; i++) {
848       for (j = 0; j < bA->nc; j++) {
849         if (bA->m[i][j]) {
850           PetscCall(MatCreateVecs(bA->m[i][j], NULL, &L[i]));
851           break;
852         }
853       }
854       PetscCheck(j != bA->nc, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Mat(Nest) contains a null row.");
855     }
856 
857     PetscCall(VecCreateNest(comm, bA->nr, bA->isglobal.row, L, left));
858     for (i = 0; i < bA->nr; i++) PetscCall(VecDestroy(&L[i]));
859 
860     PetscCall(PetscFree(L));
861   }
862   PetscFunctionReturn(0);
863 }
864 
865 static PetscErrorCode MatView_Nest(Mat A, PetscViewer viewer) {
866   Mat_Nest *bA = (Mat_Nest *)A->data;
867   PetscBool isascii, viewSub = PETSC_FALSE;
868   PetscInt  i, j;
869 
870   PetscFunctionBegin;
871   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));
872   if (isascii) {
873     PetscCall(PetscOptionsGetBool(((PetscObject)A)->options, ((PetscObject)A)->prefix, "-mat_view_nest_sub", &viewSub, NULL));
874     PetscCall(PetscViewerASCIIPrintf(viewer, "Matrix object: \n"));
875     PetscCall(PetscViewerASCIIPushTab(viewer));
876     PetscCall(PetscViewerASCIIPrintf(viewer, "type=nest, rows=%" PetscInt_FMT ", cols=%" PetscInt_FMT " \n", bA->nr, bA->nc));
877 
878     PetscCall(PetscViewerASCIIPrintf(viewer, "MatNest structure: \n"));
879     for (i = 0; i < bA->nr; i++) {
880       for (j = 0; j < bA->nc; j++) {
881         MatType   type;
882         char      name[256] = "", prefix[256] = "";
883         PetscInt  NR, NC;
884         PetscBool isNest = PETSC_FALSE;
885 
886         if (!bA->m[i][j]) {
887           PetscCall(PetscViewerASCIIPrintf(viewer, "(%" PetscInt_FMT ",%" PetscInt_FMT ") : NULL \n", i, j));
888           continue;
889         }
890         PetscCall(MatGetSize(bA->m[i][j], &NR, &NC));
891         PetscCall(MatGetType(bA->m[i][j], &type));
892         if (((PetscObject)bA->m[i][j])->name) PetscCall(PetscSNPrintf(name, sizeof(name), "name=\"%s\", ", ((PetscObject)bA->m[i][j])->name));
893         if (((PetscObject)bA->m[i][j])->prefix) PetscCall(PetscSNPrintf(prefix, sizeof(prefix), "prefix=\"%s\", ", ((PetscObject)bA->m[i][j])->prefix));
894         PetscCall(PetscObjectTypeCompare((PetscObject)bA->m[i][j], MATNEST, &isNest));
895 
896         PetscCall(PetscViewerASCIIPrintf(viewer, "(%" PetscInt_FMT ",%" PetscInt_FMT ") : %s%stype=%s, rows=%" PetscInt_FMT ", cols=%" PetscInt_FMT " \n", i, j, name, prefix, type, NR, NC));
897 
898         if (isNest || viewSub) {
899           PetscCall(PetscViewerASCIIPushTab(viewer)); /* push1 */
900           PetscCall(MatView(bA->m[i][j], viewer));
901           PetscCall(PetscViewerASCIIPopTab(viewer)); /* pop1 */
902         }
903       }
904     }
905     PetscCall(PetscViewerASCIIPopTab(viewer)); /* pop0 */
906   }
907   PetscFunctionReturn(0);
908 }
909 
910 static PetscErrorCode MatZeroEntries_Nest(Mat A) {
911   Mat_Nest *bA = (Mat_Nest *)A->data;
912   PetscInt  i, j;
913 
914   PetscFunctionBegin;
915   for (i = 0; i < bA->nr; i++) {
916     for (j = 0; j < bA->nc; j++) {
917       if (!bA->m[i][j]) continue;
918       PetscCall(MatZeroEntries(bA->m[i][j]));
919     }
920   }
921   PetscFunctionReturn(0);
922 }
923 
924 static PetscErrorCode MatCopy_Nest(Mat A, Mat B, MatStructure str) {
925   Mat_Nest *bA = (Mat_Nest *)A->data, *bB = (Mat_Nest *)B->data;
926   PetscInt  i, j, nr = bA->nr, nc = bA->nc;
927   PetscBool nnzstate = PETSC_FALSE;
928 
929   PetscFunctionBegin;
930   PetscCheck(nr == bB->nr && nc == bB->nc, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_INCOMP, "Cannot copy a Mat_Nest of block size (%" PetscInt_FMT ",%" PetscInt_FMT ") to a Mat_Nest of block size (%" PetscInt_FMT ",%" PetscInt_FMT ")", bB->nr, bB->nc, nr, nc);
931   for (i = 0; i < nr; i++) {
932     for (j = 0; j < nc; j++) {
933       PetscObjectState subnnzstate = 0;
934       if (bA->m[i][j] && bB->m[i][j]) {
935         PetscCall(MatCopy(bA->m[i][j], bB->m[i][j], str));
936       } else PetscCheck(!bA->m[i][j] && !bB->m[i][j], PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_INCOMP, "Matrix block does not exist at %" PetscInt_FMT ",%" PetscInt_FMT, i, j);
937       PetscCall(MatGetNonzeroState(bB->m[i][j], &subnnzstate));
938       nnzstate                 = (PetscBool)(nnzstate || bB->nnzstate[i * nc + j] != subnnzstate);
939       bB->nnzstate[i * nc + j] = subnnzstate;
940     }
941   }
942   if (nnzstate) B->nonzerostate++;
943   PetscFunctionReturn(0);
944 }
945 
946 static PetscErrorCode MatAXPY_Nest(Mat Y, PetscScalar a, Mat X, MatStructure str) {
947   Mat_Nest *bY = (Mat_Nest *)Y->data, *bX = (Mat_Nest *)X->data;
948   PetscInt  i, j, nr = bY->nr, nc = bY->nc;
949   PetscBool nnzstate = PETSC_FALSE;
950 
951   PetscFunctionBegin;
952   PetscCheck(nr == bX->nr && nc == bX->nc, PetscObjectComm((PetscObject)Y), PETSC_ERR_ARG_INCOMP, "Cannot AXPY a MatNest of block size (%" PetscInt_FMT ",%" PetscInt_FMT ") with a MatNest of block size (%" PetscInt_FMT ",%" PetscInt_FMT ")", bX->nr, bX->nc, nr, nc);
953   for (i = 0; i < nr; i++) {
954     for (j = 0; j < nc; j++) {
955       PetscObjectState subnnzstate = 0;
956       if (bY->m[i][j] && bX->m[i][j]) {
957         PetscCall(MatAXPY(bY->m[i][j], a, bX->m[i][j], str));
958       } else if (bX->m[i][j]) {
959         Mat M;
960 
961         PetscCheck(str == DIFFERENT_NONZERO_PATTERN, PetscObjectComm((PetscObject)Y), PETSC_ERR_ARG_INCOMP, "Matrix block does not exist at %" PetscInt_FMT ",%" PetscInt_FMT ". Use DIFFERENT_NONZERO_PATTERN", i, j);
962         PetscCall(MatDuplicate(bX->m[i][j], MAT_COPY_VALUES, &M));
963         PetscCall(MatNestSetSubMat(Y, i, j, M));
964         PetscCall(MatDestroy(&M));
965       }
966       if (bY->m[i][j]) PetscCall(MatGetNonzeroState(bY->m[i][j], &subnnzstate));
967       nnzstate                 = (PetscBool)(nnzstate || bY->nnzstate[i * nc + j] != subnnzstate);
968       bY->nnzstate[i * nc + j] = subnnzstate;
969     }
970   }
971   if (nnzstate) Y->nonzerostate++;
972   PetscFunctionReturn(0);
973 }
974 
975 static PetscErrorCode MatDuplicate_Nest(Mat A, MatDuplicateOption op, Mat *B) {
976   Mat_Nest *bA = (Mat_Nest *)A->data;
977   Mat      *b;
978   PetscInt  i, j, nr = bA->nr, nc = bA->nc;
979 
980   PetscFunctionBegin;
981   PetscCall(PetscMalloc1(nr * nc, &b));
982   for (i = 0; i < nr; i++) {
983     for (j = 0; j < nc; j++) {
984       if (bA->m[i][j]) {
985         PetscCall(MatDuplicate(bA->m[i][j], op, &b[i * nc + j]));
986       } else {
987         b[i * nc + j] = NULL;
988       }
989     }
990   }
991   PetscCall(MatCreateNest(PetscObjectComm((PetscObject)A), nr, bA->isglobal.row, nc, bA->isglobal.col, b, B));
992   /* Give the new MatNest exclusive ownership */
993   for (i = 0; i < nr * nc; i++) PetscCall(MatDestroy(&b[i]));
994   PetscCall(PetscFree(b));
995 
996   PetscCall(MatAssemblyBegin(*B, MAT_FINAL_ASSEMBLY));
997   PetscCall(MatAssemblyEnd(*B, MAT_FINAL_ASSEMBLY));
998   PetscFunctionReturn(0);
999 }
1000 
1001 /* nest api */
1002 PetscErrorCode MatNestGetSubMat_Nest(Mat A, PetscInt idxm, PetscInt jdxm, Mat *mat) {
1003   Mat_Nest *bA = (Mat_Nest *)A->data;
1004 
1005   PetscFunctionBegin;
1006   PetscCheck(idxm < bA->nr, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm, bA->nr - 1);
1007   PetscCheck(jdxm < bA->nc, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_OUTOFRANGE, "Col too large: row %" PetscInt_FMT " max %" PetscInt_FMT, jdxm, bA->nc - 1);
1008   *mat = bA->m[idxm][jdxm];
1009   PetscFunctionReturn(0);
1010 }
1011 
1012 /*@
1013  MatNestGetSubMat - Returns a single, sub-matrix from a `MATNEST`
1014 
1015  Not collective
1016 
1017  Input Parameters:
1018 +   A  - `MATNEST` matrix
1019 .   idxm - index of the matrix within the nest matrix
1020 -   jdxm - index of the matrix within the nest matrix
1021 
1022  Output Parameter:
1023 .   sub - matrix at index idxm,jdxm within the nest matrix
1024 
1025  Level: developer
1026 
1027 .seealso: `MATNEST`, `MatNestGetSize()`, `MatNestGetSubMats()`, `MatCreateNest()`, `MATNEST`, `MatNestSetSubMat()`,
1028           `MatNestGetLocalISs()`, `MatNestGetISs()`
1029 @*/
1030 PetscErrorCode MatNestGetSubMat(Mat A, PetscInt idxm, PetscInt jdxm, Mat *sub) {
1031   PetscFunctionBegin;
1032   PetscUseMethod(A, "MatNestGetSubMat_C", (Mat, PetscInt, PetscInt, Mat *), (A, idxm, jdxm, sub));
1033   PetscFunctionReturn(0);
1034 }
1035 
1036 PetscErrorCode MatNestSetSubMat_Nest(Mat A, PetscInt idxm, PetscInt jdxm, Mat mat) {
1037   Mat_Nest *bA = (Mat_Nest *)A->data;
1038   PetscInt  m, n, M, N, mi, ni, Mi, Ni;
1039 
1040   PetscFunctionBegin;
1041   PetscCheck(idxm < bA->nr, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm, bA->nr - 1);
1042   PetscCheck(jdxm < bA->nc, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_OUTOFRANGE, "Col too large: row %" PetscInt_FMT " max %" PetscInt_FMT, jdxm, bA->nc - 1);
1043   PetscCall(MatGetLocalSize(mat, &m, &n));
1044   PetscCall(MatGetSize(mat, &M, &N));
1045   PetscCall(ISGetLocalSize(bA->isglobal.row[idxm], &mi));
1046   PetscCall(ISGetSize(bA->isglobal.row[idxm], &Mi));
1047   PetscCall(ISGetLocalSize(bA->isglobal.col[jdxm], &ni));
1048   PetscCall(ISGetSize(bA->isglobal.col[jdxm], &Ni));
1049   PetscCheck(M == Mi && N == Ni, PetscObjectComm((PetscObject)mat), PETSC_ERR_ARG_INCOMP, "Submatrix dimension (%" PetscInt_FMT ",%" PetscInt_FMT ") incompatible with nest block (%" PetscInt_FMT ",%" PetscInt_FMT ")", M, N, Mi, Ni);
1050   PetscCheck(m == mi && n == ni, PetscObjectComm((PetscObject)mat), PETSC_ERR_ARG_INCOMP, "Submatrix local dimension (%" PetscInt_FMT ",%" PetscInt_FMT ") incompatible with nest block (%" PetscInt_FMT ",%" PetscInt_FMT ")", m, n, mi, ni);
1051 
1052   /* do not increase object state */
1053   if (mat == bA->m[idxm][jdxm]) PetscFunctionReturn(0);
1054 
1055   PetscCall(PetscObjectReference((PetscObject)mat));
1056   PetscCall(MatDestroy(&bA->m[idxm][jdxm]));
1057   bA->m[idxm][jdxm] = mat;
1058   PetscCall(PetscObjectStateIncrease((PetscObject)A));
1059   PetscCall(MatGetNonzeroState(mat, &bA->nnzstate[idxm * bA->nc + jdxm]));
1060   A->nonzerostate++;
1061   PetscFunctionReturn(0);
1062 }
1063 
1064 /*@
1065  MatNestSetSubMat - Set a single submatrix in the `MATNEST`
1066 
1067  Logically collective on the submatrix communicator
1068 
1069  Input Parameters:
1070 +   A  - `MATNEST` matrix
1071 .   idxm - index of the matrix within the nest matrix
1072 .   jdxm - index of the matrix within the nest matrix
1073 -   sub - matrix at index idxm,jdxm within the nest matrix
1074 
1075  Notes:
1076  The new submatrix must have the same size and communicator as that block of the nest.
1077 
1078  This increments the reference count of the submatrix.
1079 
1080  Level: developer
1081 
1082 .seealso: `MATNEST`, `MatNestSetSubMats()`, `MatNestGetSubMats()`, `MatNestGetLocalISs()`, `MATNEST`, `MatCreateNest()`,
1083           `MatNestGetSubMat()`, `MatNestGetISs()`, `MatNestGetSize()`
1084 @*/
1085 PetscErrorCode MatNestSetSubMat(Mat A, PetscInt idxm, PetscInt jdxm, Mat sub) {
1086   PetscFunctionBegin;
1087   PetscUseMethod(A, "MatNestSetSubMat_C", (Mat, PetscInt, PetscInt, Mat), (A, idxm, jdxm, sub));
1088   PetscFunctionReturn(0);
1089 }
1090 
1091 PetscErrorCode MatNestGetSubMats_Nest(Mat A, PetscInt *M, PetscInt *N, Mat ***mat) {
1092   Mat_Nest *bA = (Mat_Nest *)A->data;
1093 
1094   PetscFunctionBegin;
1095   if (M) *M = bA->nr;
1096   if (N) *N = bA->nc;
1097   if (mat) *mat = bA->m;
1098   PetscFunctionReturn(0);
1099 }
1100 
1101 /*@C
1102  MatNestGetSubMats - Returns the entire two dimensional array of matrices defining a `MATNEST` matrix.
1103 
1104  Not collective
1105 
1106  Input Parameter:
1107 .   A  - nest matrix
1108 
1109  Output Parameters:
1110 +   M - number of rows in the nest matrix
1111 .   N - number of cols in the nest matrix
1112 -   mat - 2d array of matrices
1113 
1114  Note:
1115  The user should not free the array mat.
1116 
1117  Fortran Note:
1118  This routine has a calling sequence
1119 $   call MatNestGetSubMats(A, M, N, mat, ierr)
1120  where the space allocated for the optional argument mat is assumed large enough (if provided).
1121 
1122  Level: developer
1123 
1124 .seealso: `MATNEST`, `MatNestGetSize()`, `MatNestGetSubMat()`, `MatNestGetLocalISs()`, `MATNEST`, `MatCreateNest()`,
1125           `MatNestSetSubMats()`, `MatNestGetISs()`, `MatNestSetSubMat()`
1126 @*/
1127 PetscErrorCode MatNestGetSubMats(Mat A, PetscInt *M, PetscInt *N, Mat ***mat) {
1128   PetscFunctionBegin;
1129   PetscUseMethod(A, "MatNestGetSubMats_C", (Mat, PetscInt *, PetscInt *, Mat ***), (A, M, N, mat));
1130   PetscFunctionReturn(0);
1131 }
1132 
1133 PetscErrorCode MatNestGetSize_Nest(Mat A, PetscInt *M, PetscInt *N) {
1134   Mat_Nest *bA = (Mat_Nest *)A->data;
1135 
1136   PetscFunctionBegin;
1137   if (M) *M = bA->nr;
1138   if (N) *N = bA->nc;
1139   PetscFunctionReturn(0);
1140 }
1141 
1142 /*@
1143  MatNestGetSize - Returns the size of the `MATNEST` matrix.
1144 
1145  Not collective
1146 
1147  Input Parameter:
1148 .   A  - `MATNEST` matrix
1149 
1150  Output Parameters:
1151 +   M - number of rows in the nested mat
1152 -   N - number of cols in the nested mat
1153 
1154  Level: developer
1155 
1156 .seealso: `MATNEST`, `MatNestGetSubMat()`, `MatNestGetSubMats()`, `MATNEST`, `MatCreateNest()`, `MatNestGetLocalISs()`,
1157           `MatNestGetISs()`
1158 @*/
1159 PetscErrorCode MatNestGetSize(Mat A, PetscInt *M, PetscInt *N) {
1160   PetscFunctionBegin;
1161   PetscUseMethod(A, "MatNestGetSize_C", (Mat, PetscInt *, PetscInt *), (A, M, N));
1162   PetscFunctionReturn(0);
1163 }
1164 
1165 static PetscErrorCode MatNestGetISs_Nest(Mat A, IS rows[], IS cols[]) {
1166   Mat_Nest *vs = (Mat_Nest *)A->data;
1167   PetscInt  i;
1168 
1169   PetscFunctionBegin;
1170   if (rows)
1171     for (i = 0; i < vs->nr; i++) rows[i] = vs->isglobal.row[i];
1172   if (cols)
1173     for (i = 0; i < vs->nc; i++) cols[i] = vs->isglobal.col[i];
1174   PetscFunctionReturn(0);
1175 }
1176 
1177 /*@C
1178  MatNestGetISs - Returns the index sets partitioning the row and column spaces of a `MATNEST`
1179 
1180  Not collective
1181 
1182  Input Parameter:
1183 .   A  - `MATNEST` matrix
1184 
1185  Output Parameters:
1186 +   rows - array of row index sets
1187 -   cols - array of column index sets
1188 
1189  Level: advanced
1190 
1191  Note:
1192  The user must have allocated arrays of the correct size. The reference count is not increased on the returned ISs.
1193 
1194 .seealso: `MATNEST`, `MatNestGetSubMat()`, `MatNestGetSubMats()`, `MatNestGetSize()`, `MatNestGetLocalISs()`, `MATNEST`,
1195           `MatCreateNest()`, `MatNestGetSubMats()`, `MatNestSetSubMats()`
1196 @*/
1197 PetscErrorCode MatNestGetISs(Mat A, IS rows[], IS cols[]) {
1198   PetscFunctionBegin;
1199   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
1200   PetscUseMethod(A, "MatNestGetISs_C", (Mat, IS[], IS[]), (A, rows, cols));
1201   PetscFunctionReturn(0);
1202 }
1203 
1204 static PetscErrorCode MatNestGetLocalISs_Nest(Mat A, IS rows[], IS cols[]) {
1205   Mat_Nest *vs = (Mat_Nest *)A->data;
1206   PetscInt  i;
1207 
1208   PetscFunctionBegin;
1209   if (rows)
1210     for (i = 0; i < vs->nr; i++) rows[i] = vs->islocal.row[i];
1211   if (cols)
1212     for (i = 0; i < vs->nc; i++) cols[i] = vs->islocal.col[i];
1213   PetscFunctionReturn(0);
1214 }
1215 
1216 /*@C
1217  MatNestGetLocalISs - Returns the index sets partitioning the row and column spaces of a `MATNEST`
1218 
1219  Not collective
1220 
1221  Input Parameter:
1222 .   A  - `MATNEST` matrix
1223 
1224  Output Parameters:
1225 +   rows - array of row index sets (or NULL to ignore)
1226 -   cols - array of column index sets (or NULL to ignore)
1227 
1228  Level: advanced
1229 
1230  Note:
1231  The user must have allocated arrays of the correct size. The reference count is not increased on the returned ISs.
1232 
1233 .seealso: `MATNEST`, `MatNestGetSubMat()`, `MatNestGetSubMats()`, `MatNestGetSize()`, `MatNestGetISs()`, `MatCreateNest()`,
1234           `MATNEST`, `MatNestSetSubMats()`, `MatNestSetSubMat()`
1235 @*/
1236 PetscErrorCode MatNestGetLocalISs(Mat A, IS rows[], IS cols[]) {
1237   PetscFunctionBegin;
1238   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
1239   PetscUseMethod(A, "MatNestGetLocalISs_C", (Mat, IS[], IS[]), (A, rows, cols));
1240   PetscFunctionReturn(0);
1241 }
1242 
1243 PetscErrorCode MatNestSetVecType_Nest(Mat A, VecType vtype) {
1244   PetscBool flg;
1245 
1246   PetscFunctionBegin;
1247   PetscCall(PetscStrcmp(vtype, VECNEST, &flg));
1248   /* In reality, this only distinguishes VECNEST and "other" */
1249   if (flg) A->ops->getvecs = MatCreateVecs_Nest;
1250   else A->ops->getvecs = (PetscErrorCode(*)(Mat, Vec *, Vec *))0;
1251   PetscFunctionReturn(0);
1252 }
1253 
1254 /*@C
1255  MatNestSetVecType - Sets the type of `Vec` returned by `MatCreateVecs()`
1256 
1257  Not collective
1258 
1259  Input Parameters:
1260 +  A  - `MATNEST` matrix
1261 -  vtype - `VecType` to use for creating vectors
1262 
1263  Level: developer
1264 
1265 .seealso: `MATNEST`, `MatCreateVecs()`, `MATNEST`, `MatCreateNest()`, `VecType`
1266 @*/
1267 PetscErrorCode MatNestSetVecType(Mat A, VecType vtype) {
1268   PetscFunctionBegin;
1269   PetscTryMethod(A, "MatNestSetVecType_C", (Mat, VecType), (A, vtype));
1270   PetscFunctionReturn(0);
1271 }
1272 
1273 PetscErrorCode MatNestSetSubMats_Nest(Mat A, PetscInt nr, const IS is_row[], PetscInt nc, const IS is_col[], const Mat a[]) {
1274   Mat_Nest *s = (Mat_Nest *)A->data;
1275   PetscInt  i, j, m, n, M, N;
1276   PetscBool cong, isstd, sametype = PETSC_FALSE;
1277   VecType   vtype, type;
1278 
1279   PetscFunctionBegin;
1280   PetscCall(MatReset_Nest(A));
1281 
1282   s->nr = nr;
1283   s->nc = nc;
1284 
1285   /* Create space for submatrices */
1286   PetscCall(PetscMalloc1(nr, &s->m));
1287   for (i = 0; i < nr; i++) PetscCall(PetscMalloc1(nc, &s->m[i]));
1288   for (i = 0; i < nr; i++) {
1289     for (j = 0; j < nc; j++) {
1290       s->m[i][j] = a[i * nc + j];
1291       if (a[i * nc + j]) PetscCall(PetscObjectReference((PetscObject)a[i * nc + j]));
1292     }
1293   }
1294   PetscCall(MatGetVecType(A, &vtype));
1295   PetscCall(PetscStrcmp(vtype, VECSTANDARD, &isstd));
1296   if (isstd) {
1297     /* check if all blocks have the same vectype */
1298     vtype = NULL;
1299     for (i = 0; i < nr; i++) {
1300       for (j = 0; j < nc; j++) {
1301         if (a[i * nc + j]) {
1302           if (!vtype) { /* first visited block */
1303             PetscCall(MatGetVecType(a[i * nc + j], &vtype));
1304             sametype = PETSC_TRUE;
1305           } else if (sametype) {
1306             PetscCall(MatGetVecType(a[i * nc + j], &type));
1307             PetscCall(PetscStrcmp(vtype, type, &sametype));
1308           }
1309         }
1310       }
1311     }
1312     if (sametype) { /* propagate vectype */
1313       PetscCall(MatSetVecType(A, vtype));
1314     }
1315   }
1316 
1317   PetscCall(MatSetUp_NestIS_Private(A, nr, is_row, nc, is_col));
1318 
1319   PetscCall(PetscMalloc1(nr, &s->row_len));
1320   PetscCall(PetscMalloc1(nc, &s->col_len));
1321   for (i = 0; i < nr; i++) s->row_len[i] = -1;
1322   for (j = 0; j < nc; j++) s->col_len[j] = -1;
1323 
1324   PetscCall(PetscCalloc1(nr * nc, &s->nnzstate));
1325   for (i = 0; i < nr; i++) {
1326     for (j = 0; j < nc; j++) {
1327       if (s->m[i][j]) PetscCall(MatGetNonzeroState(s->m[i][j], &s->nnzstate[i * nc + j]));
1328     }
1329   }
1330 
1331   PetscCall(MatNestGetSizes_Private(A, &m, &n, &M, &N));
1332 
1333   PetscCall(PetscLayoutSetSize(A->rmap, M));
1334   PetscCall(PetscLayoutSetLocalSize(A->rmap, m));
1335   PetscCall(PetscLayoutSetSize(A->cmap, N));
1336   PetscCall(PetscLayoutSetLocalSize(A->cmap, n));
1337 
1338   PetscCall(PetscLayoutSetUp(A->rmap));
1339   PetscCall(PetscLayoutSetUp(A->cmap));
1340 
1341   /* disable operations that are not supported for non-square matrices,
1342      or matrices for which is_row != is_col  */
1343   PetscCall(MatHasCongruentLayouts(A, &cong));
1344   if (cong && nr != nc) cong = PETSC_FALSE;
1345   if (cong) {
1346     for (i = 0; cong && i < nr; i++) PetscCall(ISEqualUnsorted(s->isglobal.row[i], s->isglobal.col[i], &cong));
1347   }
1348   if (!cong) {
1349     A->ops->missingdiagonal = NULL;
1350     A->ops->getdiagonal     = NULL;
1351     A->ops->shift           = NULL;
1352     A->ops->diagonalset     = NULL;
1353   }
1354 
1355   PetscCall(PetscCalloc2(nr, &s->left, nc, &s->right));
1356   PetscCall(PetscObjectStateIncrease((PetscObject)A));
1357   A->nonzerostate++;
1358   PetscFunctionReturn(0);
1359 }
1360 
1361 /*@
1362    MatNestSetSubMats - Sets the nested submatrices in a `MATNEST`
1363 
1364    Collective on A
1365 
1366    Input Parameters:
1367 +  A - `MATNEST` matrix
1368 .  nr - number of nested row blocks
1369 .  is_row - index sets for each nested row block, or NULL to make contiguous
1370 .  nc - number of nested column blocks
1371 .  is_col - index sets for each nested column block, or NULL to make contiguous
1372 -  a - row-aligned array of nr*nc submatrices, empty submatrices can be passed using NULL
1373 
1374    Note:
1375    This always resets any submatrix information previously set
1376 
1377    Level: advanced
1378 
1379 .seealso: `MATNEST`, `MatCreateNest()`, `MATNEST`, `MatNestSetSubMat()`, `MatNestGetSubMat()`, `MatNestGetSubMats()`
1380 @*/
1381 PetscErrorCode MatNestSetSubMats(Mat A, PetscInt nr, const IS is_row[], PetscInt nc, const IS is_col[], const Mat a[]) {
1382   PetscInt i;
1383 
1384   PetscFunctionBegin;
1385   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
1386   PetscCheck(nr >= 0, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_OUTOFRANGE, "Number of rows cannot be negative");
1387   if (nr && is_row) {
1388     PetscValidPointer(is_row, 3);
1389     for (i = 0; i < nr; i++) PetscValidHeaderSpecific(is_row[i], IS_CLASSID, 3);
1390   }
1391   PetscCheck(nc >= 0, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_OUTOFRANGE, "Number of columns cannot be negative");
1392   if (nc && is_col) {
1393     PetscValidPointer(is_col, 5);
1394     for (i = 0; i < nc; i++) PetscValidHeaderSpecific(is_col[i], IS_CLASSID, 5);
1395   }
1396   if (nr * nc > 0) PetscValidPointer(a, 6);
1397   PetscUseMethod(A, "MatNestSetSubMats_C", (Mat, PetscInt, const IS[], PetscInt, const IS[], const Mat[]), (A, nr, is_row, nc, is_col, a));
1398   PetscFunctionReturn(0);
1399 }
1400 
1401 static PetscErrorCode MatNestCreateAggregateL2G_Private(Mat A, PetscInt n, const IS islocal[], const IS isglobal[], PetscBool colflg, ISLocalToGlobalMapping *ltog) {
1402   PetscBool flg;
1403   PetscInt  i, j, m, mi, *ix;
1404 
1405   PetscFunctionBegin;
1406   *ltog = NULL;
1407   for (i = 0, m = 0, flg = PETSC_FALSE; i < n; i++) {
1408     if (islocal[i]) {
1409       PetscCall(ISGetLocalSize(islocal[i], &mi));
1410       flg = PETSC_TRUE; /* We found a non-trivial entry */
1411     } else {
1412       PetscCall(ISGetLocalSize(isglobal[i], &mi));
1413     }
1414     m += mi;
1415   }
1416   if (!flg) PetscFunctionReturn(0);
1417 
1418   PetscCall(PetscMalloc1(m, &ix));
1419   for (i = 0, m = 0; i < n; i++) {
1420     ISLocalToGlobalMapping smap = NULL;
1421     Mat                    sub  = NULL;
1422     PetscSF                sf;
1423     PetscLayout            map;
1424     const PetscInt        *ix2;
1425 
1426     if (!colflg) {
1427       PetscCall(MatNestFindNonzeroSubMatRow(A, i, &sub));
1428     } else {
1429       PetscCall(MatNestFindNonzeroSubMatCol(A, i, &sub));
1430     }
1431     if (sub) {
1432       if (!colflg) {
1433         PetscCall(MatGetLocalToGlobalMapping(sub, &smap, NULL));
1434       } else {
1435         PetscCall(MatGetLocalToGlobalMapping(sub, NULL, &smap));
1436       }
1437     }
1438     /*
1439        Now we need to extract the monolithic global indices that correspond to the given split global indices.
1440        In many/most cases, we only want MatGetLocalSubMatrix() to work, in which case we only need to know the size of the local spaces.
1441     */
1442     PetscCall(ISGetIndices(isglobal[i], &ix2));
1443     if (islocal[i]) {
1444       PetscInt *ilocal, *iremote;
1445       PetscInt  mil, nleaves;
1446 
1447       PetscCall(ISGetLocalSize(islocal[i], &mi));
1448       PetscCheck(smap, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing local to global map");
1449       for (j = 0; j < mi; j++) ix[m + j] = j;
1450       PetscCall(ISLocalToGlobalMappingApply(smap, mi, ix + m, ix + m));
1451 
1452       /* PetscSFSetGraphLayout does not like negative indices */
1453       PetscCall(PetscMalloc2(mi, &ilocal, mi, &iremote));
1454       for (j = 0, nleaves = 0; j < mi; j++) {
1455         if (ix[m + j] < 0) continue;
1456         ilocal[nleaves]  = j;
1457         iremote[nleaves] = ix[m + j];
1458         nleaves++;
1459       }
1460       PetscCall(ISGetLocalSize(isglobal[i], &mil));
1461       PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1462       PetscCall(PetscLayoutCreate(PetscObjectComm((PetscObject)A), &map));
1463       PetscCall(PetscLayoutSetLocalSize(map, mil));
1464       PetscCall(PetscLayoutSetUp(map));
1465       PetscCall(PetscSFSetGraphLayout(sf, map, nleaves, ilocal, PETSC_USE_POINTER, iremote));
1466       PetscCall(PetscLayoutDestroy(&map));
1467       PetscCall(PetscSFBcastBegin(sf, MPIU_INT, ix2, ix + m, MPI_REPLACE));
1468       PetscCall(PetscSFBcastEnd(sf, MPIU_INT, ix2, ix + m, MPI_REPLACE));
1469       PetscCall(PetscSFDestroy(&sf));
1470       PetscCall(PetscFree2(ilocal, iremote));
1471     } else {
1472       PetscCall(ISGetLocalSize(isglobal[i], &mi));
1473       for (j = 0; j < mi; j++) ix[m + j] = ix2[i];
1474     }
1475     PetscCall(ISRestoreIndices(isglobal[i], &ix2));
1476     m += mi;
1477   }
1478   PetscCall(ISLocalToGlobalMappingCreate(PetscObjectComm((PetscObject)A), 1, m, ix, PETSC_OWN_POINTER, ltog));
1479   PetscFunctionReturn(0);
1480 }
1481 
1482 /* If an IS was provided, there is nothing Nest needs to do, otherwise Nest will build a strided IS */
1483 /*
1484   nprocessors = NP
1485   Nest x^T = ((g_0,g_1,...g_nprocs-1), (h_0,h_1,...h_NP-1))
1486        proc 0: => (g_0,h_0,)
1487        proc 1: => (g_1,h_1,)
1488        ...
1489        proc nprocs-1: => (g_NP-1,h_NP-1,)
1490 
1491             proc 0:                      proc 1:                    proc nprocs-1:
1492     is[0] = (0,1,2,...,nlocal(g_0)-1)  (0,1,...,nlocal(g_1)-1)  (0,1,...,nlocal(g_NP-1))
1493 
1494             proc 0:
1495     is[1] = (nlocal(g_0),nlocal(g_0)+1,...,nlocal(g_0)+nlocal(h_0)-1)
1496             proc 1:
1497     is[1] = (nlocal(g_1),nlocal(g_1)+1,...,nlocal(g_1)+nlocal(h_1)-1)
1498 
1499             proc NP-1:
1500     is[1] = (nlocal(g_NP-1),nlocal(g_NP-1)+1,...,nlocal(g_NP-1)+nlocal(h_NP-1)-1)
1501 */
1502 static PetscErrorCode MatSetUp_NestIS_Private(Mat A, PetscInt nr, const IS is_row[], PetscInt nc, const IS is_col[]) {
1503   Mat_Nest *vs = (Mat_Nest *)A->data;
1504   PetscInt  i, j, offset, n, nsum, bs;
1505   Mat       sub = NULL;
1506 
1507   PetscFunctionBegin;
1508   PetscCall(PetscMalloc1(nr, &vs->isglobal.row));
1509   PetscCall(PetscMalloc1(nc, &vs->isglobal.col));
1510   if (is_row) { /* valid IS is passed in */
1511     /* refs on is[] are incremented */
1512     for (i = 0; i < vs->nr; i++) {
1513       PetscCall(PetscObjectReference((PetscObject)is_row[i]));
1514 
1515       vs->isglobal.row[i] = is_row[i];
1516     }
1517   } else { /* Create the ISs by inspecting sizes of a submatrix in each row */
1518     nsum = 0;
1519     for (i = 0; i < vs->nr; i++) { /* Add up the local sizes to compute the aggregate offset */
1520       PetscCall(MatNestFindNonzeroSubMatRow(A, i, &sub));
1521       PetscCheck(sub, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "No nonzero submatrix in row %" PetscInt_FMT, i);
1522       PetscCall(MatGetLocalSize(sub, &n, NULL));
1523       PetscCheck(n >= 0, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONGSTATE, "Sizes have not yet been set for submatrix");
1524       nsum += n;
1525     }
1526     PetscCallMPI(MPI_Scan(&nsum, &offset, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)A)));
1527     offset -= nsum;
1528     for (i = 0; i < vs->nr; i++) {
1529       PetscCall(MatNestFindNonzeroSubMatRow(A, i, &sub));
1530       PetscCall(MatGetLocalSize(sub, &n, NULL));
1531       PetscCall(MatGetBlockSizes(sub, &bs, NULL));
1532       PetscCall(ISCreateStride(PetscObjectComm((PetscObject)sub), n, offset, 1, &vs->isglobal.row[i]));
1533       PetscCall(ISSetBlockSize(vs->isglobal.row[i], bs));
1534       offset += n;
1535     }
1536   }
1537 
1538   if (is_col) { /* valid IS is passed in */
1539     /* refs on is[] are incremented */
1540     for (j = 0; j < vs->nc; j++) {
1541       PetscCall(PetscObjectReference((PetscObject)is_col[j]));
1542 
1543       vs->isglobal.col[j] = is_col[j];
1544     }
1545   } else { /* Create the ISs by inspecting sizes of a submatrix in each column */
1546     offset = A->cmap->rstart;
1547     nsum   = 0;
1548     for (j = 0; j < vs->nc; j++) {
1549       PetscCall(MatNestFindNonzeroSubMatCol(A, j, &sub));
1550       PetscCheck(sub, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "No nonzero submatrix in column %" PetscInt_FMT, i);
1551       PetscCall(MatGetLocalSize(sub, NULL, &n));
1552       PetscCheck(n >= 0, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONGSTATE, "Sizes have not yet been set for submatrix");
1553       nsum += n;
1554     }
1555     PetscCallMPI(MPI_Scan(&nsum, &offset, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)A)));
1556     offset -= nsum;
1557     for (j = 0; j < vs->nc; j++) {
1558       PetscCall(MatNestFindNonzeroSubMatCol(A, j, &sub));
1559       PetscCall(MatGetLocalSize(sub, NULL, &n));
1560       PetscCall(MatGetBlockSizes(sub, NULL, &bs));
1561       PetscCall(ISCreateStride(PetscObjectComm((PetscObject)sub), n, offset, 1, &vs->isglobal.col[j]));
1562       PetscCall(ISSetBlockSize(vs->isglobal.col[j], bs));
1563       offset += n;
1564     }
1565   }
1566 
1567   /* Set up the local ISs */
1568   PetscCall(PetscMalloc1(vs->nr, &vs->islocal.row));
1569   PetscCall(PetscMalloc1(vs->nc, &vs->islocal.col));
1570   for (i = 0, offset = 0; i < vs->nr; i++) {
1571     IS                     isloc;
1572     ISLocalToGlobalMapping rmap = NULL;
1573     PetscInt               nlocal, bs;
1574     PetscCall(MatNestFindNonzeroSubMatRow(A, i, &sub));
1575     if (sub) PetscCall(MatGetLocalToGlobalMapping(sub, &rmap, NULL));
1576     if (rmap) {
1577       PetscCall(MatGetBlockSizes(sub, &bs, NULL));
1578       PetscCall(ISLocalToGlobalMappingGetSize(rmap, &nlocal));
1579       PetscCall(ISCreateStride(PETSC_COMM_SELF, nlocal, offset, 1, &isloc));
1580       PetscCall(ISSetBlockSize(isloc, bs));
1581     } else {
1582       nlocal = 0;
1583       isloc  = NULL;
1584     }
1585     vs->islocal.row[i] = isloc;
1586     offset += nlocal;
1587   }
1588   for (i = 0, offset = 0; i < vs->nc; i++) {
1589     IS                     isloc;
1590     ISLocalToGlobalMapping cmap = NULL;
1591     PetscInt               nlocal, bs;
1592     PetscCall(MatNestFindNonzeroSubMatCol(A, i, &sub));
1593     if (sub) PetscCall(MatGetLocalToGlobalMapping(sub, NULL, &cmap));
1594     if (cmap) {
1595       PetscCall(MatGetBlockSizes(sub, NULL, &bs));
1596       PetscCall(ISLocalToGlobalMappingGetSize(cmap, &nlocal));
1597       PetscCall(ISCreateStride(PETSC_COMM_SELF, nlocal, offset, 1, &isloc));
1598       PetscCall(ISSetBlockSize(isloc, bs));
1599     } else {
1600       nlocal = 0;
1601       isloc  = NULL;
1602     }
1603     vs->islocal.col[i] = isloc;
1604     offset += nlocal;
1605   }
1606 
1607   /* Set up the aggregate ISLocalToGlobalMapping */
1608   {
1609     ISLocalToGlobalMapping rmap, cmap;
1610     PetscCall(MatNestCreateAggregateL2G_Private(A, vs->nr, vs->islocal.row, vs->isglobal.row, PETSC_FALSE, &rmap));
1611     PetscCall(MatNestCreateAggregateL2G_Private(A, vs->nc, vs->islocal.col, vs->isglobal.col, PETSC_TRUE, &cmap));
1612     if (rmap && cmap) PetscCall(MatSetLocalToGlobalMapping(A, rmap, cmap));
1613     PetscCall(ISLocalToGlobalMappingDestroy(&rmap));
1614     PetscCall(ISLocalToGlobalMappingDestroy(&cmap));
1615   }
1616 
1617   if (PetscDefined(USE_DEBUG)) {
1618     for (i = 0; i < vs->nr; i++) {
1619       for (j = 0; j < vs->nc; j++) {
1620         PetscInt m, n, M, N, mi, ni, Mi, Ni;
1621         Mat      B = vs->m[i][j];
1622         if (!B) continue;
1623         PetscCall(MatGetSize(B, &M, &N));
1624         PetscCall(MatGetLocalSize(B, &m, &n));
1625         PetscCall(ISGetSize(vs->isglobal.row[i], &Mi));
1626         PetscCall(ISGetSize(vs->isglobal.col[j], &Ni));
1627         PetscCall(ISGetLocalSize(vs->isglobal.row[i], &mi));
1628         PetscCall(ISGetLocalSize(vs->isglobal.col[j], &ni));
1629         PetscCheck(M == Mi && N == Ni, PetscObjectComm((PetscObject)sub), PETSC_ERR_ARG_INCOMP, "Global sizes (%" PetscInt_FMT ",%" PetscInt_FMT ") of nested submatrix (%" PetscInt_FMT ",%" PetscInt_FMT ") do not agree with space defined by index sets (%" PetscInt_FMT ",%" PetscInt_FMT ")", M, N, i, j, Mi, Ni);
1630         PetscCheck(m == mi && n == ni, PetscObjectComm((PetscObject)sub), PETSC_ERR_ARG_INCOMP, "Local sizes (%" PetscInt_FMT ",%" PetscInt_FMT ") of nested submatrix (%" PetscInt_FMT ",%" PetscInt_FMT ") do not agree with space defined by index sets (%" PetscInt_FMT ",%" PetscInt_FMT ")", m, n, i, j, mi, ni);
1631       }
1632     }
1633   }
1634 
1635   /* Set A->assembled if all non-null blocks are currently assembled */
1636   for (i = 0; i < vs->nr; i++) {
1637     for (j = 0; j < vs->nc; j++) {
1638       if (vs->m[i][j] && !vs->m[i][j]->assembled) PetscFunctionReturn(0);
1639     }
1640   }
1641   A->assembled = PETSC_TRUE;
1642   PetscFunctionReturn(0);
1643 }
1644 
1645 /*@C
1646    MatCreateNest - Creates a new `MATNEST` matrix containing several nested submatrices, each stored separately
1647 
1648    Collective
1649 
1650    Input Parameters:
1651 +  comm - Communicator for the new `MATNEST`
1652 .  nr - number of nested row blocks
1653 .  is_row - index sets for each nested row block, or NULL to make contiguous
1654 .  nc - number of nested column blocks
1655 .  is_col - index sets for each nested column block, or NULL to make contiguous
1656 -  a - row-aligned array of nr*nc submatrices, empty submatrices can be passed using NULL
1657 
1658    Output Parameter:
1659 .  B - new matrix
1660 
1661    Level: advanced
1662 
1663 .seealso: `MATNEST`, `MatCreate()`, `VecCreateNest()`, `DMCreateMatrix()`, `MATNEST`, `MatNestSetSubMat()`,
1664           `MatNestGetSubMat()`, `MatNestGetLocalISs()`, `MatNestGetSize()`,
1665           `MatNestGetISs()`, `MatNestSetSubMats()`, `MatNestGetSubMats()`
1666 @*/
1667 PetscErrorCode MatCreateNest(MPI_Comm comm, PetscInt nr, const IS is_row[], PetscInt nc, const IS is_col[], const Mat a[], Mat *B) {
1668   Mat A;
1669 
1670   PetscFunctionBegin;
1671   *B = NULL;
1672   PetscCall(MatCreate(comm, &A));
1673   PetscCall(MatSetType(A, MATNEST));
1674   A->preallocated = PETSC_TRUE;
1675   PetscCall(MatNestSetSubMats(A, nr, is_row, nc, is_col, a));
1676   *B = A;
1677   PetscFunctionReturn(0);
1678 }
1679 
1680 PetscErrorCode MatConvert_Nest_SeqAIJ_fast(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) {
1681   Mat_Nest     *nest = (Mat_Nest *)A->data;
1682   Mat          *trans;
1683   PetscScalar **avv;
1684   PetscScalar  *vv;
1685   PetscInt    **aii, **ajj;
1686   PetscInt     *ii, *jj, *ci;
1687   PetscInt      nr, nc, nnz, i, j;
1688   PetscBool     done;
1689 
1690   PetscFunctionBegin;
1691   PetscCall(MatGetSize(A, &nr, &nc));
1692   if (reuse == MAT_REUSE_MATRIX) {
1693     PetscInt rnr;
1694 
1695     PetscCall(MatGetRowIJ(*newmat, 0, PETSC_FALSE, PETSC_FALSE, &rnr, (const PetscInt **)&ii, (const PetscInt **)&jj, &done));
1696     PetscCheck(done, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "MatGetRowIJ");
1697     PetscCheck(rnr == nr, PetscObjectComm((PetscObject)A), PETSC_ERR_USER, "Cannot reuse matrix, wrong number of rows");
1698     PetscCall(MatSeqAIJGetArray(*newmat, &vv));
1699   }
1700   /* extract CSR for nested SeqAIJ matrices */
1701   nnz = 0;
1702   PetscCall(PetscCalloc4(nest->nr * nest->nc, &aii, nest->nr * nest->nc, &ajj, nest->nr * nest->nc, &avv, nest->nr * nest->nc, &trans));
1703   for (i = 0; i < nest->nr; ++i) {
1704     for (j = 0; j < nest->nc; ++j) {
1705       Mat B = nest->m[i][j];
1706       if (B) {
1707         PetscScalar *naa;
1708         PetscInt    *nii, *njj, nnr;
1709         PetscBool    istrans;
1710 
1711         PetscCall(PetscObjectTypeCompare((PetscObject)B, MATTRANSPOSEVIRTUAL, &istrans));
1712         if (istrans) {
1713           Mat Bt;
1714 
1715           PetscCall(MatTransposeGetMat(B, &Bt));
1716           PetscCall(MatTranspose(Bt, MAT_INITIAL_MATRIX, &trans[i * nest->nc + j]));
1717           B = trans[i * nest->nc + j];
1718         } else {
1719           PetscCall(PetscObjectTypeCompare((PetscObject)B, MATHERMITIANTRANSPOSEVIRTUAL, &istrans));
1720           if (istrans) {
1721             Mat Bt;
1722 
1723             PetscCall(MatHermitianTransposeGetMat(B, &Bt));
1724             PetscCall(MatHermitianTranspose(Bt, MAT_INITIAL_MATRIX, &trans[i * nest->nc + j]));
1725             B = trans[i * nest->nc + j];
1726           }
1727         }
1728         PetscCall(MatGetRowIJ(B, 0, PETSC_FALSE, PETSC_FALSE, &nnr, (const PetscInt **)&nii, (const PetscInt **)&njj, &done));
1729         PetscCheck(done, PetscObjectComm((PetscObject)B), PETSC_ERR_PLIB, "MatGetRowIJ");
1730         PetscCall(MatSeqAIJGetArray(B, &naa));
1731         nnz += nii[nnr];
1732 
1733         aii[i * nest->nc + j] = nii;
1734         ajj[i * nest->nc + j] = njj;
1735         avv[i * nest->nc + j] = naa;
1736       }
1737     }
1738   }
1739   if (reuse != MAT_REUSE_MATRIX) {
1740     PetscCall(PetscMalloc1(nr + 1, &ii));
1741     PetscCall(PetscMalloc1(nnz, &jj));
1742     PetscCall(PetscMalloc1(nnz, &vv));
1743   } else {
1744     PetscCheck(nnz == ii[nr], PetscObjectComm((PetscObject)A), PETSC_ERR_USER, "Cannot reuse matrix, wrong number of nonzeros");
1745   }
1746 
1747   /* new row pointer */
1748   PetscCall(PetscArrayzero(ii, nr + 1));
1749   for (i = 0; i < nest->nr; ++i) {
1750     PetscInt ncr, rst;
1751 
1752     PetscCall(ISStrideGetInfo(nest->isglobal.row[i], &rst, NULL));
1753     PetscCall(ISGetLocalSize(nest->isglobal.row[i], &ncr));
1754     for (j = 0; j < nest->nc; ++j) {
1755       if (aii[i * nest->nc + j]) {
1756         PetscInt *nii = aii[i * nest->nc + j];
1757         PetscInt  ir;
1758 
1759         for (ir = rst; ir < ncr + rst; ++ir) {
1760           ii[ir + 1] += nii[1] - nii[0];
1761           nii++;
1762         }
1763       }
1764     }
1765   }
1766   for (i = 0; i < nr; i++) ii[i + 1] += ii[i];
1767 
1768   /* construct CSR for the new matrix */
1769   PetscCall(PetscCalloc1(nr, &ci));
1770   for (i = 0; i < nest->nr; ++i) {
1771     PetscInt ncr, rst;
1772 
1773     PetscCall(ISStrideGetInfo(nest->isglobal.row[i], &rst, NULL));
1774     PetscCall(ISGetLocalSize(nest->isglobal.row[i], &ncr));
1775     for (j = 0; j < nest->nc; ++j) {
1776       if (aii[i * nest->nc + j]) {
1777         PetscScalar *nvv = avv[i * nest->nc + j];
1778         PetscInt    *nii = aii[i * nest->nc + j];
1779         PetscInt    *njj = ajj[i * nest->nc + j];
1780         PetscInt     ir, cst;
1781 
1782         PetscCall(ISStrideGetInfo(nest->isglobal.col[j], &cst, NULL));
1783         for (ir = rst; ir < ncr + rst; ++ir) {
1784           PetscInt ij, rsize = nii[1] - nii[0], ist = ii[ir] + ci[ir];
1785 
1786           for (ij = 0; ij < rsize; ij++) {
1787             jj[ist + ij] = *njj + cst;
1788             vv[ist + ij] = *nvv;
1789             njj++;
1790             nvv++;
1791           }
1792           ci[ir] += rsize;
1793           nii++;
1794         }
1795       }
1796     }
1797   }
1798   PetscCall(PetscFree(ci));
1799 
1800   /* restore info */
1801   for (i = 0; i < nest->nr; ++i) {
1802     for (j = 0; j < nest->nc; ++j) {
1803       Mat B = nest->m[i][j];
1804       if (B) {
1805         PetscInt nnr = 0, k = i * nest->nc + j;
1806 
1807         B = (trans[k] ? trans[k] : B);
1808         PetscCall(MatRestoreRowIJ(B, 0, PETSC_FALSE, PETSC_FALSE, &nnr, (const PetscInt **)&aii[k], (const PetscInt **)&ajj[k], &done));
1809         PetscCheck(done, PetscObjectComm((PetscObject)B), PETSC_ERR_PLIB, "MatRestoreRowIJ");
1810         PetscCall(MatSeqAIJRestoreArray(B, &avv[k]));
1811         PetscCall(MatDestroy(&trans[k]));
1812       }
1813     }
1814   }
1815   PetscCall(PetscFree4(aii, ajj, avv, trans));
1816 
1817   /* finalize newmat */
1818   if (reuse == MAT_INITIAL_MATRIX) {
1819     PetscCall(MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A), nr, nc, ii, jj, vv, newmat));
1820   } else if (reuse == MAT_INPLACE_MATRIX) {
1821     Mat B;
1822 
1823     PetscCall(MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A), nr, nc, ii, jj, vv, &B));
1824     PetscCall(MatHeaderReplace(A, &B));
1825   }
1826   PetscCall(MatAssemblyBegin(*newmat, MAT_FINAL_ASSEMBLY));
1827   PetscCall(MatAssemblyEnd(*newmat, MAT_FINAL_ASSEMBLY));
1828   {
1829     Mat_SeqAIJ *a = (Mat_SeqAIJ *)((*newmat)->data);
1830     a->free_a     = PETSC_TRUE;
1831     a->free_ij    = PETSC_TRUE;
1832   }
1833   PetscFunctionReturn(0);
1834 }
1835 
1836 PETSC_INTERN PetscErrorCode MatAXPY_Dense_Nest(Mat Y, PetscScalar a, Mat X) {
1837   Mat_Nest *nest = (Mat_Nest *)X->data;
1838   PetscInt  i, j, k, rstart;
1839   PetscBool flg;
1840 
1841   PetscFunctionBegin;
1842   /* Fill by row */
1843   for (j = 0; j < nest->nc; ++j) {
1844     /* Using global column indices and ISAllGather() is not scalable. */
1845     IS              bNis;
1846     PetscInt        bN;
1847     const PetscInt *bNindices;
1848     PetscCall(ISAllGather(nest->isglobal.col[j], &bNis));
1849     PetscCall(ISGetSize(bNis, &bN));
1850     PetscCall(ISGetIndices(bNis, &bNindices));
1851     for (i = 0; i < nest->nr; ++i) {
1852       Mat             B = nest->m[i][j], D = NULL;
1853       PetscInt        bm, br;
1854       const PetscInt *bmindices;
1855       if (!B) continue;
1856       PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &flg, MATTRANSPOSEVIRTUAL, MATHERMITIANTRANSPOSEVIRTUAL, ""));
1857       if (flg) {
1858         PetscTryMethod(B, "MatTransposeGetMat_C", (Mat, Mat *), (B, &D));
1859         PetscTryMethod(B, "MatHermitianTransposeGetMat_C", (Mat, Mat *), (B, &D));
1860         PetscCall(MatConvert(B, ((PetscObject)D)->type_name, MAT_INITIAL_MATRIX, &D));
1861         B = D;
1862       }
1863       PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &flg, MATSEQSBAIJ, MATMPISBAIJ, ""));
1864       if (flg) {
1865         if (D) PetscCall(MatConvert(D, MATBAIJ, MAT_INPLACE_MATRIX, &D));
1866         else PetscCall(MatConvert(B, MATBAIJ, MAT_INITIAL_MATRIX, &D));
1867         B = D;
1868       }
1869       PetscCall(ISGetLocalSize(nest->isglobal.row[i], &bm));
1870       PetscCall(ISGetIndices(nest->isglobal.row[i], &bmindices));
1871       PetscCall(MatGetOwnershipRange(B, &rstart, NULL));
1872       for (br = 0; br < bm; ++br) {
1873         PetscInt           row = bmindices[br], brncols, *cols;
1874         const PetscInt    *brcols;
1875         const PetscScalar *brcoldata;
1876         PetscScalar       *vals = NULL;
1877         PetscCall(MatGetRow(B, br + rstart, &brncols, &brcols, &brcoldata));
1878         PetscCall(PetscMalloc1(brncols, &cols));
1879         for (k = 0; k < brncols; k++) cols[k] = bNindices[brcols[k]];
1880         /*
1881           Nest blocks are required to be nonoverlapping -- otherwise nest and monolithic index layouts wouldn't match.
1882           Thus, we could use INSERT_VALUES, but I prefer ADD_VALUES.
1883          */
1884         if (a != 1.0) {
1885           PetscCall(PetscMalloc1(brncols, &vals));
1886           for (k = 0; k < brncols; k++) vals[k] = a * brcoldata[k];
1887           PetscCall(MatSetValues(Y, 1, &row, brncols, cols, vals, ADD_VALUES));
1888           PetscCall(PetscFree(vals));
1889         } else {
1890           PetscCall(MatSetValues(Y, 1, &row, brncols, cols, brcoldata, ADD_VALUES));
1891         }
1892         PetscCall(MatRestoreRow(B, br + rstart, &brncols, &brcols, &brcoldata));
1893         PetscCall(PetscFree(cols));
1894       }
1895       if (D) PetscCall(MatDestroy(&D));
1896       PetscCall(ISRestoreIndices(nest->isglobal.row[i], &bmindices));
1897     }
1898     PetscCall(ISRestoreIndices(bNis, &bNindices));
1899     PetscCall(ISDestroy(&bNis));
1900   }
1901   PetscCall(MatAssemblyBegin(Y, MAT_FINAL_ASSEMBLY));
1902   PetscCall(MatAssemblyEnd(Y, MAT_FINAL_ASSEMBLY));
1903   PetscFunctionReturn(0);
1904 }
1905 
1906 PetscErrorCode MatConvert_Nest_AIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) {
1907   Mat_Nest   *nest = (Mat_Nest *)A->data;
1908   PetscInt    m, n, M, N, i, j, k, *dnnz, *onnz, rstart, cstart, cend;
1909   PetscMPIInt size;
1910   Mat         C;
1911 
1912   PetscFunctionBegin;
1913   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
1914   if (size == 1) { /* look for a special case with SeqAIJ matrices and strided-1, contiguous, blocks */
1915     PetscInt  nf;
1916     PetscBool fast;
1917 
1918     PetscCall(PetscStrcmp(newtype, MATAIJ, &fast));
1919     if (!fast) PetscCall(PetscStrcmp(newtype, MATSEQAIJ, &fast));
1920     for (i = 0; i < nest->nr && fast; ++i) {
1921       for (j = 0; j < nest->nc && fast; ++j) {
1922         Mat B = nest->m[i][j];
1923         if (B) {
1924           PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSEQAIJ, &fast));
1925           if (!fast) {
1926             PetscBool istrans;
1927 
1928             PetscCall(PetscObjectTypeCompare((PetscObject)B, MATTRANSPOSEVIRTUAL, &istrans));
1929             if (istrans) {
1930               Mat Bt;
1931 
1932               PetscCall(MatTransposeGetMat(B, &Bt));
1933               PetscCall(PetscObjectTypeCompare((PetscObject)Bt, MATSEQAIJ, &fast));
1934             } else {
1935               PetscCall(PetscObjectTypeCompare((PetscObject)B, MATHERMITIANTRANSPOSEVIRTUAL, &istrans));
1936               if (istrans) {
1937                 Mat Bt;
1938 
1939                 PetscCall(MatHermitianTransposeGetMat(B, &Bt));
1940                 PetscCall(PetscObjectTypeCompare((PetscObject)Bt, MATSEQAIJ, &fast));
1941               }
1942             }
1943           }
1944         }
1945       }
1946     }
1947     for (i = 0, nf = 0; i < nest->nr && fast; ++i) {
1948       PetscCall(PetscObjectTypeCompare((PetscObject)nest->isglobal.row[i], ISSTRIDE, &fast));
1949       if (fast) {
1950         PetscInt f, s;
1951 
1952         PetscCall(ISStrideGetInfo(nest->isglobal.row[i], &f, &s));
1953         if (f != nf || s != 1) {
1954           fast = PETSC_FALSE;
1955         } else {
1956           PetscCall(ISGetSize(nest->isglobal.row[i], &f));
1957           nf += f;
1958         }
1959       }
1960     }
1961     for (i = 0, nf = 0; i < nest->nc && fast; ++i) {
1962       PetscCall(PetscObjectTypeCompare((PetscObject)nest->isglobal.col[i], ISSTRIDE, &fast));
1963       if (fast) {
1964         PetscInt f, s;
1965 
1966         PetscCall(ISStrideGetInfo(nest->isglobal.col[i], &f, &s));
1967         if (f != nf || s != 1) {
1968           fast = PETSC_FALSE;
1969         } else {
1970           PetscCall(ISGetSize(nest->isglobal.col[i], &f));
1971           nf += f;
1972         }
1973       }
1974     }
1975     if (fast) {
1976       PetscCall(MatConvert_Nest_SeqAIJ_fast(A, newtype, reuse, newmat));
1977       PetscFunctionReturn(0);
1978     }
1979   }
1980   PetscCall(MatGetSize(A, &M, &N));
1981   PetscCall(MatGetLocalSize(A, &m, &n));
1982   PetscCall(MatGetOwnershipRangeColumn(A, &cstart, &cend));
1983   if (reuse == MAT_REUSE_MATRIX) C = *newmat;
1984   else {
1985     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &C));
1986     PetscCall(MatSetType(C, newtype));
1987     PetscCall(MatSetSizes(C, m, n, M, N));
1988   }
1989   PetscCall(PetscMalloc1(2 * m, &dnnz));
1990   onnz = dnnz + m;
1991   for (k = 0; k < m; k++) {
1992     dnnz[k] = 0;
1993     onnz[k] = 0;
1994   }
1995   for (j = 0; j < nest->nc; ++j) {
1996     IS              bNis;
1997     PetscInt        bN;
1998     const PetscInt *bNindices;
1999     PetscBool       flg;
2000     /* Using global column indices and ISAllGather() is not scalable. */
2001     PetscCall(ISAllGather(nest->isglobal.col[j], &bNis));
2002     PetscCall(ISGetSize(bNis, &bN));
2003     PetscCall(ISGetIndices(bNis, &bNindices));
2004     for (i = 0; i < nest->nr; ++i) {
2005       PetscSF         bmsf;
2006       PetscSFNode    *iremote;
2007       Mat             B = nest->m[i][j], D = NULL;
2008       PetscInt        bm, *sub_dnnz, *sub_onnz, br;
2009       const PetscInt *bmindices;
2010       if (!B) continue;
2011       PetscCall(ISGetLocalSize(nest->isglobal.row[i], &bm));
2012       PetscCall(ISGetIndices(nest->isglobal.row[i], &bmindices));
2013       PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &bmsf));
2014       PetscCall(PetscMalloc1(bm, &iremote));
2015       PetscCall(PetscMalloc1(bm, &sub_dnnz));
2016       PetscCall(PetscMalloc1(bm, &sub_onnz));
2017       for (k = 0; k < bm; ++k) {
2018         sub_dnnz[k] = 0;
2019         sub_onnz[k] = 0;
2020       }
2021       PetscCall(PetscObjectTypeCompare((PetscObject)B, MATTRANSPOSEVIRTUAL, &flg));
2022       if (flg) {
2023         PetscTryMethod(B, "MatTransposeGetMat_C", (Mat, Mat *), (B, &D));
2024         PetscTryMethod(B, "MatHermitianTransposeGetMat_C", (Mat, Mat *), (B, &D));
2025         PetscCall(MatConvert(B, ((PetscObject)D)->type_name, MAT_INITIAL_MATRIX, &D));
2026         B = D;
2027       }
2028       PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &flg, MATSEQSBAIJ, MATMPISBAIJ, ""));
2029       if (flg) {
2030         if (D) PetscCall(MatConvert(D, MATBAIJ, MAT_INPLACE_MATRIX, &D));
2031         else PetscCall(MatConvert(B, MATBAIJ, MAT_INITIAL_MATRIX, &D));
2032         B = D;
2033       }
2034       /*
2035        Locate the owners for all of the locally-owned global row indices for this row block.
2036        These determine the roots of PetscSF used to communicate preallocation data to row owners.
2037        The roots correspond to the dnnz and onnz entries; thus, there are two roots per row.
2038        */
2039       PetscCall(MatGetOwnershipRange(B, &rstart, NULL));
2040       for (br = 0; br < bm; ++br) {
2041         PetscInt        row = bmindices[br], brncols, col;
2042         const PetscInt *brcols;
2043         PetscInt        rowrel   = 0; /* row's relative index on its owner rank */
2044         PetscMPIInt     rowowner = 0;
2045         PetscCall(PetscLayoutFindOwnerIndex(A->rmap, row, &rowowner, &rowrel));
2046         /* how many roots  */
2047         iremote[br].rank  = rowowner;
2048         iremote[br].index = rowrel; /* edge from bmdnnz to dnnz */
2049         /* get nonzero pattern */
2050         PetscCall(MatGetRow(B, br + rstart, &brncols, &brcols, NULL));
2051         for (k = 0; k < brncols; k++) {
2052           col = bNindices[brcols[k]];
2053           if (col >= A->cmap->range[rowowner] && col < A->cmap->range[rowowner + 1]) {
2054             sub_dnnz[br]++;
2055           } else {
2056             sub_onnz[br]++;
2057           }
2058         }
2059         PetscCall(MatRestoreRow(B, br + rstart, &brncols, &brcols, NULL));
2060       }
2061       if (D) PetscCall(MatDestroy(&D));
2062       PetscCall(ISRestoreIndices(nest->isglobal.row[i], &bmindices));
2063       /* bsf will have to take care of disposing of bedges. */
2064       PetscCall(PetscSFSetGraph(bmsf, m, bm, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
2065       PetscCall(PetscSFReduceBegin(bmsf, MPIU_INT, sub_dnnz, dnnz, MPI_SUM));
2066       PetscCall(PetscSFReduceEnd(bmsf, MPIU_INT, sub_dnnz, dnnz, MPI_SUM));
2067       PetscCall(PetscSFReduceBegin(bmsf, MPIU_INT, sub_onnz, onnz, MPI_SUM));
2068       PetscCall(PetscSFReduceEnd(bmsf, MPIU_INT, sub_onnz, onnz, MPI_SUM));
2069       PetscCall(PetscFree(sub_dnnz));
2070       PetscCall(PetscFree(sub_onnz));
2071       PetscCall(PetscSFDestroy(&bmsf));
2072     }
2073     PetscCall(ISRestoreIndices(bNis, &bNindices));
2074     PetscCall(ISDestroy(&bNis));
2075   }
2076   /* Resize preallocation if overestimated */
2077   for (i = 0; i < m; i++) {
2078     dnnz[i] = PetscMin(dnnz[i], A->cmap->n);
2079     onnz[i] = PetscMin(onnz[i], A->cmap->N - A->cmap->n);
2080   }
2081   PetscCall(MatSeqAIJSetPreallocation(C, 0, dnnz));
2082   PetscCall(MatMPIAIJSetPreallocation(C, 0, dnnz, 0, onnz));
2083   PetscCall(PetscFree(dnnz));
2084   PetscCall(MatAXPY_Dense_Nest(C, 1.0, A));
2085   if (reuse == MAT_INPLACE_MATRIX) {
2086     PetscCall(MatHeaderReplace(A, &C));
2087   } else *newmat = C;
2088   PetscFunctionReturn(0);
2089 }
2090 
2091 PetscErrorCode MatConvert_Nest_Dense(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) {
2092   Mat      B;
2093   PetscInt m, n, M, N;
2094 
2095   PetscFunctionBegin;
2096   PetscCall(MatGetSize(A, &M, &N));
2097   PetscCall(MatGetLocalSize(A, &m, &n));
2098   if (reuse == MAT_REUSE_MATRIX) {
2099     B = *newmat;
2100     PetscCall(MatZeroEntries(B));
2101   } else {
2102     PetscCall(MatCreateDense(PetscObjectComm((PetscObject)A), m, PETSC_DECIDE, M, N, NULL, &B));
2103   }
2104   PetscCall(MatAXPY_Dense_Nest(B, 1.0, A));
2105   if (reuse == MAT_INPLACE_MATRIX) {
2106     PetscCall(MatHeaderReplace(A, &B));
2107   } else if (reuse == MAT_INITIAL_MATRIX) *newmat = B;
2108   PetscFunctionReturn(0);
2109 }
2110 
2111 PetscErrorCode MatHasOperation_Nest(Mat mat, MatOperation op, PetscBool *has) {
2112   Mat_Nest    *bA = (Mat_Nest *)mat->data;
2113   MatOperation opAdd;
2114   PetscInt     i, j, nr = bA->nr, nc = bA->nc;
2115   PetscBool    flg;
2116   PetscFunctionBegin;
2117 
2118   *has = PETSC_FALSE;
2119   if (op == MATOP_MULT || op == MATOP_MULT_ADD || op == MATOP_MULT_TRANSPOSE || op == MATOP_MULT_TRANSPOSE_ADD) {
2120     opAdd = (op == MATOP_MULT || op == MATOP_MULT_ADD ? MATOP_MULT_ADD : MATOP_MULT_TRANSPOSE_ADD);
2121     for (j = 0; j < nc; j++) {
2122       for (i = 0; i < nr; i++) {
2123         if (!bA->m[i][j]) continue;
2124         PetscCall(MatHasOperation(bA->m[i][j], opAdd, &flg));
2125         if (!flg) PetscFunctionReturn(0);
2126       }
2127     }
2128   }
2129   if (((void **)mat->ops)[op]) *has = PETSC_TRUE;
2130   PetscFunctionReturn(0);
2131 }
2132 
2133 /*MC
2134   MATNEST - MATNEST = "nest" - Matrix type consisting of nested submatrices, each stored separately.
2135 
2136   Level: intermediate
2137 
2138   Notes:
2139   This matrix type permits scalable use of `PCFIELDSPLIT` and avoids the large memory costs of extracting submatrices.
2140   It allows the use of symmetric and block formats for parts of multi-physics simulations.
2141   It is usually used with `DMCOMPOSITE` and `DMCreateMatrix()`
2142 
2143   Each of the submatrices lives on the same MPI communicator as the original nest matrix (though they can have zero
2144   rows/columns on some processes.) Thus this is not meant for cases where the submatrices live on far fewer processes
2145   than the nest matrix.
2146 
2147 .seealso: `MATNEST`, `MatCreate()`, `MatType`, `MatCreateNest()`, `MatNestSetSubMat()`, `MatNestGetSubMat()`,
2148           `VecCreateNest()`, `DMCreateMatrix()`, `DMCOMPOSITE`, `MatNestSetVecType()`, `MatNestGetLocalISs()`,
2149           `MatNestGetISs()`, `MatNestSetSubMats()`, `MatNestGetSubMats()`
2150 M*/
2151 PETSC_EXTERN PetscErrorCode MatCreate_Nest(Mat A) {
2152   Mat_Nest *s;
2153 
2154   PetscFunctionBegin;
2155   PetscCall(PetscNewLog(A, &s));
2156   A->data = (void *)s;
2157 
2158   s->nr            = -1;
2159   s->nc            = -1;
2160   s->m             = NULL;
2161   s->splitassembly = PETSC_FALSE;
2162 
2163   PetscCall(PetscMemzero(A->ops, sizeof(*A->ops)));
2164 
2165   A->ops->mult                  = MatMult_Nest;
2166   A->ops->multadd               = MatMultAdd_Nest;
2167   A->ops->multtranspose         = MatMultTranspose_Nest;
2168   A->ops->multtransposeadd      = MatMultTransposeAdd_Nest;
2169   A->ops->transpose             = MatTranspose_Nest;
2170   A->ops->assemblybegin         = MatAssemblyBegin_Nest;
2171   A->ops->assemblyend           = MatAssemblyEnd_Nest;
2172   A->ops->zeroentries           = MatZeroEntries_Nest;
2173   A->ops->copy                  = MatCopy_Nest;
2174   A->ops->axpy                  = MatAXPY_Nest;
2175   A->ops->duplicate             = MatDuplicate_Nest;
2176   A->ops->createsubmatrix       = MatCreateSubMatrix_Nest;
2177   A->ops->destroy               = MatDestroy_Nest;
2178   A->ops->view                  = MatView_Nest;
2179   A->ops->getvecs               = NULL; /* Use VECNEST by calling MatNestSetVecType(A,VECNEST) */
2180   A->ops->getlocalsubmatrix     = MatGetLocalSubMatrix_Nest;
2181   A->ops->restorelocalsubmatrix = MatRestoreLocalSubMatrix_Nest;
2182   A->ops->getdiagonal           = MatGetDiagonal_Nest;
2183   A->ops->diagonalscale         = MatDiagonalScale_Nest;
2184   A->ops->scale                 = MatScale_Nest;
2185   A->ops->shift                 = MatShift_Nest;
2186   A->ops->diagonalset           = MatDiagonalSet_Nest;
2187   A->ops->setrandom             = MatSetRandom_Nest;
2188   A->ops->hasoperation          = MatHasOperation_Nest;
2189   A->ops->missingdiagonal       = MatMissingDiagonal_Nest;
2190 
2191   A->spptr     = NULL;
2192   A->assembled = PETSC_FALSE;
2193 
2194   /* expose Nest api's */
2195   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatNestGetSubMat_C", MatNestGetSubMat_Nest));
2196   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatNestSetSubMat_C", MatNestSetSubMat_Nest));
2197   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatNestGetSubMats_C", MatNestGetSubMats_Nest));
2198   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatNestGetSize_C", MatNestGetSize_Nest));
2199   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatNestGetISs_C", MatNestGetISs_Nest));
2200   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatNestGetLocalISs_C", MatNestGetLocalISs_Nest));
2201   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatNestSetVecType_C", MatNestSetVecType_Nest));
2202   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatNestSetSubMats_C", MatNestSetSubMats_Nest));
2203   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_nest_mpiaij_C", MatConvert_Nest_AIJ));
2204   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_nest_seqaij_C", MatConvert_Nest_AIJ));
2205   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_nest_aij_C", MatConvert_Nest_AIJ));
2206   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_nest_is_C", MatConvert_Nest_IS));
2207   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_nest_mpidense_C", MatConvert_Nest_Dense));
2208   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_nest_seqdense_C", MatConvert_Nest_Dense));
2209   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_nest_seqdense_C", MatProductSetFromOptions_Nest_Dense));
2210   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_nest_mpidense_C", MatProductSetFromOptions_Nest_Dense));
2211   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_nest_dense_C", MatProductSetFromOptions_Nest_Dense));
2212 
2213   PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATNEST));
2214   PetscFunctionReturn(0);
2215 }
2216