xref: /petsc/src/ksp/pc/impls/hpddm/pchpddm.cxx (revision bcd4bb4a4158aa96f212e9537e87b40407faf83e)
1 #include <petscsf.h>
2 #include <petsc/private/vecimpl.h>
3 #include <petsc/private/matimpl.h>
4 #include <petsc/private/petschpddm.h> /*I "petscpc.h" I*/
5 #include <petsc/private/pcimpl.h>
6 #include <petsc/private/dmimpl.h> /* this must be included after petschpddm.h so that DM_MAX_WORK_VECTORS is not defined  */
7                                   /* otherwise, it is assumed that one is compiling libhpddm_petsc => circular dependency */
8 
9 static PetscErrorCode (*loadedSym)(HPDDM::Schwarz<PetscScalar> *const, IS, Mat, Mat, Mat, std::vector<Vec>, PC_HPDDM_Level **const) = nullptr;
10 
11 static PetscBool PCHPDDMPackageInitialized = PETSC_FALSE;
12 
13 PetscLogEvent PC_HPDDM_Strc;
14 PetscLogEvent PC_HPDDM_PtAP;
15 PetscLogEvent PC_HPDDM_PtBP;
16 PetscLogEvent PC_HPDDM_Next;
17 PetscLogEvent PC_HPDDM_SetUp[PETSC_PCHPDDM_MAXLEVELS];
18 PetscLogEvent PC_HPDDM_Solve[PETSC_PCHPDDM_MAXLEVELS];
19 
20 const char *const PCHPDDMCoarseCorrectionTypes[] = {"DEFLATED", "ADDITIVE", "BALANCED", "NONE", "PCHPDDMCoarseCorrectionType", "PC_HPDDM_COARSE_CORRECTION_", nullptr};
21 const char *const PCHPDDMSchurPreTypes[]         = {"LEAST_SQUARES", "GENEO", "PCHPDDMSchurPreType", "PC_HPDDM_SCHUR_PRE", nullptr};
22 
23 static PetscErrorCode PCReset_HPDDM(PC pc)
24 {
25   PC_HPDDM *data = (PC_HPDDM *)pc->data;
26 
27   PetscFunctionBegin;
28   if (data->levels) {
29     for (PetscInt i = 0; i < PETSC_PCHPDDM_MAXLEVELS && data->levels[i]; ++i) {
30       PetscCall(KSPDestroy(&data->levels[i]->ksp));
31       PetscCall(PCDestroy(&data->levels[i]->pc));
32       PetscCall(PetscFree(data->levels[i]));
33     }
34     PetscCall(PetscFree(data->levels));
35   }
36   PetscCall(ISDestroy(&data->is));
37   PetscCall(MatDestroy(&data->aux));
38   PetscCall(MatDestroy(&data->B));
39   PetscCall(VecDestroy(&data->normal));
40   data->correction = PC_HPDDM_COARSE_CORRECTION_DEFLATED;
41   data->Neumann    = PETSC_BOOL3_UNKNOWN;
42   data->deflation  = PETSC_FALSE;
43   data->setup      = nullptr;
44   data->setup_ctx  = nullptr;
45   PetscFunctionReturn(PETSC_SUCCESS);
46 }
47 
48 static PetscErrorCode PCDestroy_HPDDM(PC pc)
49 {
50   PC_HPDDM *data = (PC_HPDDM *)pc->data;
51 
52   PetscFunctionBegin;
53   PetscCall(PCReset_HPDDM(pc));
54   PetscCall(PetscFree(data));
55   PetscCall(PetscObjectChangeTypeName((PetscObject)pc, nullptr));
56   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCHPDDMSetAuxiliaryMat_C", nullptr));
57   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCHPDDMHasNeumannMat_C", nullptr));
58   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCHPDDMSetRHSMat_C", nullptr));
59   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCHPDDMSetCoarseCorrectionType_C", nullptr));
60   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCHPDDMGetCoarseCorrectionType_C", nullptr));
61   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCHPDDMSetSTShareSubKSP_C", nullptr));
62   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCHPDDMGetSTShareSubKSP_C", nullptr));
63   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCHPDDMSetDeflationMat_C", nullptr));
64   PetscCall(PetscObjectCompose((PetscObject)pc, "_PCHPDDM_Schur", nullptr));
65   PetscFunctionReturn(PETSC_SUCCESS);
66 }
67 
68 static inline PetscErrorCode PCHPDDMSetAuxiliaryMat_Private(PC pc, IS is, Mat A, PetscBool deflation)
69 {
70   PC_HPDDM                   *data = (PC_HPDDM *)pc->data;
71   PCHPDDMCoarseCorrectionType type = data->correction;
72 
73   PetscFunctionBegin;
74   PetscValidLogicalCollectiveBool(pc, deflation, 4);
75   if (is && A) {
76     PetscInt m[2];
77 
78     PetscCall(ISGetLocalSize(is, m));
79     PetscCall(MatGetLocalSize(A, m + 1, nullptr));
80     PetscCheck(m[0] == m[1], PETSC_COMM_SELF, PETSC_ERR_USER_INPUT, "Inconsistent IS and Mat sizes (%" PetscInt_FMT " v. %" PetscInt_FMT ")", m[0], m[1]);
81   }
82   if (is) {
83     PetscCall(PetscObjectReference((PetscObject)is));
84     if (data->is) { /* new overlap definition resets the PC */
85       PetscCall(PCReset_HPDDM(pc));
86       pc->setfromoptionscalled = 0;
87       pc->setupcalled          = PETSC_FALSE;
88       data->correction         = type;
89     }
90     PetscCall(ISDestroy(&data->is));
91     data->is = is;
92   }
93   if (A) {
94     PetscCall(PetscObjectReference((PetscObject)A));
95     PetscCall(MatDestroy(&data->aux));
96     data->aux = A;
97   }
98   data->deflation = deflation;
99   PetscFunctionReturn(PETSC_SUCCESS);
100 }
101 
102 static inline PetscErrorCode PCHPDDMSplittingMatNormal_Private(Mat A, IS *is, Mat *splitting[])
103 {
104   Mat *sub;
105   IS   zero;
106 
107   PetscFunctionBegin;
108   PetscCall(MatSetOption(A, MAT_SUBMAT_SINGLEIS, PETSC_TRUE));
109   PetscCall(MatCreateSubMatrices(A, 1, is + 2, is, MAT_INITIAL_MATRIX, splitting));
110   PetscCall(MatCreateSubMatrices(**splitting, 1, is + 2, is + 1, MAT_INITIAL_MATRIX, &sub));
111   PetscCall(MatFindZeroRows(*sub, &zero));
112   PetscCall(MatDestroySubMatrices(1, &sub));
113   PetscCall(MatSetOption(**splitting, MAT_KEEP_NONZERO_PATTERN, PETSC_TRUE));
114   PetscCall(MatZeroRowsIS(**splitting, zero, 0.0, nullptr, nullptr));
115   PetscCall(ISDestroy(&zero));
116   PetscFunctionReturn(PETSC_SUCCESS);
117 }
118 
119 static inline PetscErrorCode PCHPDDMSetAuxiliaryMatNormal_Private(PC pc, Mat A, Mat N, Mat *B, const char *pcpre, Vec *diagonal = nullptr, Mat B01 = nullptr)
120 {
121   PC_HPDDM *data         = (PC_HPDDM *)pc->data;
122   Mat      *splitting[2] = {}, aux;
123   Vec       d;
124   IS        is[3];
125   PetscReal norm;
126   PetscBool flg;
127   char      type[256] = {}; /* same size as in src/ksp/pc/interface/pcset.c */
128 
129   PetscFunctionBegin;
130   if (!B01) PetscCall(MatConvert(N, MATAIJ, MAT_INITIAL_MATRIX, B));
131   else PetscCall(MatTransposeMatMult(B01, A, MAT_INITIAL_MATRIX, PETSC_DETERMINE, B));
132   PetscCall(MatEliminateZeros(*B, PETSC_TRUE));
133   PetscCall(ISCreateStride(PETSC_COMM_SELF, A->cmap->n, A->cmap->rstart, 1, is));
134   PetscCall(MatIncreaseOverlap(*B, 1, is, 1));
135   PetscCall(ISCreateStride(PETSC_COMM_SELF, A->cmap->n, A->cmap->rstart, 1, is + 2));
136   PetscCall(ISEmbed(is[0], is[2], PETSC_TRUE, is + 1));
137   PetscCall(ISDestroy(is + 2));
138   PetscCall(ISCreateStride(PETSC_COMM_SELF, A->rmap->N, 0, 1, is + 2));
139   PetscCall(PCHPDDMSplittingMatNormal_Private(A, is, &splitting[0]));
140   if (B01) {
141     PetscCall(PCHPDDMSplittingMatNormal_Private(B01, is, &splitting[1]));
142     PetscCall(MatDestroy(&B01));
143   }
144   PetscCall(ISDestroy(is + 2));
145   PetscCall(ISDestroy(is + 1));
146   PetscCall(PetscOptionsGetString(((PetscObject)pc)->options, pcpre, "-pc_hpddm_levels_1_sub_pc_type", type, sizeof(type), nullptr));
147   PetscCall(PetscStrcmp(type, PCQR, &flg));
148   if (!flg) {
149     Mat conjugate = *splitting[splitting[1] ? 1 : 0];
150 
151     if (PetscDefined(USE_COMPLEX) && !splitting[1]) {
152       PetscCall(MatDuplicate(*splitting[0], MAT_COPY_VALUES, &conjugate));
153       PetscCall(MatConjugate(conjugate));
154     }
155     PetscCall(MatTransposeMatMult(conjugate, *splitting[0], MAT_INITIAL_MATRIX, PETSC_DETERMINE, &aux));
156     if (PetscDefined(USE_COMPLEX) && !splitting[1]) PetscCall(MatDestroy(&conjugate));
157     else if (splitting[1]) PetscCall(MatDestroySubMatrices(1, &splitting[1]));
158     PetscCall(MatNorm(aux, NORM_FROBENIUS, &norm));
159     PetscCall(MatSetOption(aux, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE));
160     if (diagonal) {
161       PetscReal norm;
162 
163       PetscCall(VecScale(*diagonal, -1.0));
164       PetscCall(VecNorm(*diagonal, NORM_INFINITY, &norm));
165       if (norm > PETSC_SMALL) {
166         PetscSF  scatter;
167         PetscInt n;
168 
169         PetscCall(ISGetLocalSize(*is, &n));
170         PetscCall(VecCreateMPI(PetscObjectComm((PetscObject)pc), n, PETSC_DECIDE, &d));
171         PetscCall(VecScatterCreate(*diagonal, *is, d, nullptr, &scatter));
172         PetscCall(VecScatterBegin(scatter, *diagonal, d, INSERT_VALUES, SCATTER_FORWARD));
173         PetscCall(VecScatterEnd(scatter, *diagonal, d, INSERT_VALUES, SCATTER_FORWARD));
174         PetscCall(PetscSFDestroy(&scatter));
175         PetscCall(MatDiagonalSet(aux, d, ADD_VALUES));
176         PetscCall(VecDestroy(&d));
177       } else PetscCall(VecDestroy(diagonal));
178     }
179     if (!diagonal) PetscCall(MatShift(aux, PETSC_SMALL * norm));
180     PetscCall(MatEliminateZeros(aux, PETSC_TRUE));
181   } else {
182     PetscBool flg;
183 
184     PetscCheck(!splitting[1], PetscObjectComm((PetscObject)pc), PETSC_ERR_SUP, "Cannot use PCQR when A01 != A10^T");
185     if (diagonal) {
186       PetscCall(VecNorm(*diagonal, NORM_INFINITY, &norm));
187       PetscCheck(norm < PETSC_SMALL, PetscObjectComm((PetscObject)pc), PETSC_ERR_SUP, "Nonzero diagonal A11 block");
188       PetscCall(VecDestroy(diagonal));
189     }
190     PetscCall(PetscObjectTypeCompare((PetscObject)N, MATNORMAL, &flg));
191     if (flg) PetscCall(MatCreateNormal(*splitting[0], &aux));
192     else PetscCall(MatCreateNormalHermitian(*splitting[0], &aux));
193   }
194   PetscCall(MatDestroySubMatrices(1, &splitting[0]));
195   PetscCall(PCHPDDMSetAuxiliaryMat(pc, *is, aux, nullptr, nullptr));
196   data->Neumann = PETSC_BOOL3_TRUE;
197   PetscCall(ISDestroy(is));
198   PetscCall(MatDestroy(&aux));
199   PetscFunctionReturn(PETSC_SUCCESS);
200 }
201 
202 static PetscErrorCode PCHPDDMSetAuxiliaryMat_HPDDM(PC pc, IS is, Mat A, PetscErrorCode (*setup)(Mat, PetscReal, Vec, Vec, PetscReal, IS, void *), void *setup_ctx)
203 {
204   PC_HPDDM *data = (PC_HPDDM *)pc->data;
205 
206   PetscFunctionBegin;
207   PetscCall(PCHPDDMSetAuxiliaryMat_Private(pc, is, A, PETSC_FALSE));
208   if (setup) {
209     data->setup     = setup;
210     data->setup_ctx = setup_ctx;
211   }
212   PetscFunctionReturn(PETSC_SUCCESS);
213 }
214 
215 /*@C
216   PCHPDDMSetAuxiliaryMat - Sets the auxiliary matrix used by `PCHPDDM` for the concurrent GenEO problems at the finest level.
217 
218   Input Parameters:
219 + pc    - preconditioner context
220 . is    - index set of the local auxiliary, e.g., Neumann, matrix
221 . A     - auxiliary sequential matrix
222 . setup - function for generating the auxiliary matrix entries, may be `NULL`
223 - ctx   - context for `setup`, may be `NULL`
224 
225   Calling sequence of `setup`:
226 + J   - matrix whose values are to be set
227 . t   - time
228 . X   - linearization point
229 . X_t - time-derivative of the linearization point
230 . s   - step
231 . ovl - index set of the local auxiliary, e.g., Neumann, matrix
232 - ctx - context for `setup`, may be `NULL`
233 
234   Level: intermediate
235 
236   Note:
237   As an example, in a finite element context with nonoverlapping subdomains plus (overlapping) ghost elements, this could be the unassembled (Neumann)
238   local overlapping operator. As opposed to the assembled (Dirichlet) local overlapping operator obtained by summing neighborhood contributions
239   at the interface of ghost elements.
240 
241   Fortran Notes:
242   Only `PETSC_NULL_FUNCTION` is supported for `setup` and `ctx` is never accessed
243 
244 .seealso: [](ch_ksp), `PCHPDDM`, `PCCreate()`, `PCSetType()`, `PCType`, `PC`, `PCHPDDMSetRHSMat()`, `MATIS`
245 @*/
246 PetscErrorCode PCHPDDMSetAuxiliaryMat(PC pc, IS is, Mat A, PetscErrorCode (*setup)(Mat J, PetscReal t, Vec X, Vec X_t, PetscReal s, IS ovl, void *ctx), void *ctx)
247 {
248   PetscFunctionBegin;
249   PetscValidHeaderSpecific(pc, PC_CLASSID, 1);
250   if (is) PetscValidHeaderSpecific(is, IS_CLASSID, 2);
251   if (A) PetscValidHeaderSpecific(A, MAT_CLASSID, 3);
252   PetscTryMethod(pc, "PCHPDDMSetAuxiliaryMat_C", (PC, IS, Mat, PetscErrorCode (*)(Mat, PetscReal, Vec, Vec, PetscReal, IS, void *), void *), (pc, is, A, setup, ctx));
253   PetscFunctionReturn(PETSC_SUCCESS);
254 }
255 
256 static PetscErrorCode PCHPDDMHasNeumannMat_HPDDM(PC pc, PetscBool has)
257 {
258   PC_HPDDM *data = (PC_HPDDM *)pc->data;
259 
260   PetscFunctionBegin;
261   data->Neumann = PetscBoolToBool3(has);
262   PetscFunctionReturn(PETSC_SUCCESS);
263 }
264 
265 /*@
266   PCHPDDMHasNeumannMat - Informs `PCHPDDM` that the `Mat` passed to `PCHPDDMSetAuxiliaryMat()` is the local Neumann matrix.
267 
268   Input Parameters:
269 + pc  - preconditioner context
270 - has - Boolean value
271 
272   Level: intermediate
273 
274   Notes:
275   This may be used to bypass a call to `MatCreateSubMatrices()` and to `MatConvert()` for `MATSBAIJ` matrices.
276 
277   If a function is composed with DMCreateNeumannOverlap_C implementation is available in the `DM` attached to the Pmat, or the Amat, or the `PC`, the flag is internally set to `PETSC_TRUE`. Its default value is otherwise `PETSC_FALSE`.
278 
279 .seealso: [](ch_ksp), `PCHPDDM`, `PCHPDDMSetAuxiliaryMat()`
280 @*/
281 PetscErrorCode PCHPDDMHasNeumannMat(PC pc, PetscBool has)
282 {
283   PetscFunctionBegin;
284   PetscValidHeaderSpecific(pc, PC_CLASSID, 1);
285   PetscTryMethod(pc, "PCHPDDMHasNeumannMat_C", (PC, PetscBool), (pc, has));
286   PetscFunctionReturn(PETSC_SUCCESS);
287 }
288 
289 static PetscErrorCode PCHPDDMSetRHSMat_HPDDM(PC pc, Mat B)
290 {
291   PC_HPDDM *data = (PC_HPDDM *)pc->data;
292 
293   PetscFunctionBegin;
294   PetscCall(PetscObjectReference((PetscObject)B));
295   PetscCall(MatDestroy(&data->B));
296   data->B = B;
297   PetscFunctionReturn(PETSC_SUCCESS);
298 }
299 
300 /*@
301   PCHPDDMSetRHSMat - Sets the right-hand side matrix used by `PCHPDDM` for the concurrent GenEO problems at the finest level.
302 
303   Input Parameters:
304 + pc - preconditioner context
305 - B  - right-hand side sequential matrix
306 
307   Level: advanced
308 
309   Note:
310   Must be used in conjunction with `PCHPDDMSetAuxiliaryMat`(N), so that Nv = lambda Bv is solved using `EPSSetOperators`(N, B).
311   It is assumed that N and `B` are provided using the same numbering. This provides a means to try more advanced methods such as GenEO-II or H-GenEO.
312 
313 .seealso: [](ch_ksp), `PCHPDDMSetAuxiliaryMat()`, `PCHPDDM`
314 @*/
315 PetscErrorCode PCHPDDMSetRHSMat(PC pc, Mat B)
316 {
317   PetscFunctionBegin;
318   PetscValidHeaderSpecific(pc, PC_CLASSID, 1);
319   if (B) {
320     PetscValidHeaderSpecific(B, MAT_CLASSID, 2);
321     PetscTryMethod(pc, "PCHPDDMSetRHSMat_C", (PC, Mat), (pc, B));
322   }
323   PetscFunctionReturn(PETSC_SUCCESS);
324 }
325 
326 static PetscErrorCode PCSetFromOptions_HPDDM(PC pc, PetscOptionItems PetscOptionsObject)
327 {
328   PC_HPDDM                   *data   = (PC_HPDDM *)pc->data;
329   PC_HPDDM_Level            **levels = data->levels;
330   char                        prefix[256], deprecated[256];
331   int                         i = 1;
332   PetscMPIInt                 size, previous;
333   PetscInt                    n, overlap = 1;
334   PCHPDDMCoarseCorrectionType type;
335   PetscBool                   flg = PETSC_TRUE, set;
336 
337   PetscFunctionBegin;
338   if (!data->levels) {
339     PetscCall(PetscCalloc1(PETSC_PCHPDDM_MAXLEVELS, &levels));
340     data->levels = levels;
341   }
342   PetscOptionsHeadBegin(PetscOptionsObject, "PCHPDDM options");
343   PetscCall(PetscOptionsBoundedInt("-pc_hpddm_harmonic_overlap", "Overlap prior to computing local harmonic extensions", "PCHPDDM", overlap, &overlap, &set, 1));
344   if (!set) overlap = -1;
345   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)pc), &size));
346   previous = size;
347   while (i < PETSC_PCHPDDM_MAXLEVELS) {
348     PetscInt p = 1;
349 
350     if (!data->levels[i - 1]) PetscCall(PetscNew(data->levels + i - 1));
351     data->levels[i - 1]->parent = data;
352     /* if the previous level has a single process, it is not possible to coarsen further */
353     if (previous == 1 || !flg) break;
354     data->levels[i - 1]->nu        = 0;
355     data->levels[i - 1]->threshold = -1.0;
356     PetscCall(PetscSNPrintf(prefix, sizeof(prefix), "-pc_hpddm_levels_%d_eps_nev", i));
357     PetscCall(PetscOptionsBoundedInt(prefix, "Local number of deflation vectors computed by SLEPc", "EPSSetDimensions", data->levels[i - 1]->nu, &data->levels[i - 1]->nu, nullptr, 0));
358     PetscCall(PetscSNPrintf(deprecated, sizeof(deprecated), "-pc_hpddm_levels_%d_eps_threshold", i));
359     PetscCall(PetscSNPrintf(prefix, sizeof(prefix), "-pc_hpddm_levels_%d_eps_threshold_absolute", i));
360     PetscCall(PetscOptionsDeprecated(deprecated, prefix, "3.24", nullptr));
361     PetscCall(PetscOptionsReal(prefix, "Local absolute threshold for selecting deflation vectors returned by SLEPc", "PCHPDDM", data->levels[i - 1]->threshold, &data->levels[i - 1]->threshold, nullptr));
362     if (i == 1) {
363       PetscCheck(overlap == -1 || PetscAbsReal(data->levels[i - 1]->threshold + static_cast<PetscReal>(1.0)) < PETSC_MACHINE_EPSILON, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot supply both -pc_hpddm_levels_1_eps_threshold_absolute and -pc_hpddm_harmonic_overlap");
364       if (overlap != -1) {
365         PetscInt  nsv    = 0;
366         PetscBool set[2] = {PETSC_FALSE, PETSC_FALSE};
367 
368         PetscCall(PetscSNPrintf(prefix, sizeof(prefix), "-pc_hpddm_levels_%d_svd_nsv", i));
369         PetscCall(PetscOptionsBoundedInt(prefix, "Local number of deflation vectors computed by SLEPc", "SVDSetDimensions", nsv, &nsv, nullptr, 0));
370         PetscCheck(data->levels[0]->nu == 0 || nsv == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot supply both -pc_hpddm_levels_1_eps_nev and -pc_hpddm_levels_1_svd_nsv");
371         if (data->levels[0]->nu == 0) { /* -eps_nev has not been used, so nu is 0 */
372           data->levels[0]->nu = nsv;    /* nu may still be 0 if -svd_nsv has not been used */
373           PetscCall(PetscSNPrintf(deprecated, sizeof(deprecated), "-pc_hpddm_levels_%d_svd_relative_threshold", i));
374           PetscCall(PetscSNPrintf(prefix, sizeof(prefix), "-pc_hpddm_levels_%d_svd_threshold_relative", i));
375           PetscCall(PetscOptionsDeprecated(deprecated, prefix, "3.24", nullptr));
376           PetscCall(PetscOptionsReal(prefix, "Local relative threshold for selecting deflation vectors returned by SLEPc", "PCHPDDM", data->levels[0]->threshold, &data->levels[0]->threshold, set)); /* cache whether this option has been used or not to error out in case of exclusive options being used simultaneously later on */
377         }
378         if (data->levels[0]->nu == 0 || nsv == 0) { /* if neither -eps_nev nor -svd_nsv has been used */
379           PetscCall(PetscSNPrintf(deprecated, sizeof(deprecated), "-pc_hpddm_levels_%d_eps_relative_threshold", i));
380           PetscCall(PetscSNPrintf(prefix, sizeof(prefix), "-pc_hpddm_levels_%d_eps_threshold_relative", i));
381           PetscCall(PetscOptionsDeprecated(deprecated, prefix, "3.24", nullptr));
382           PetscCall(PetscOptionsReal(prefix, "Local relative threshold for selecting deflation vectors returned by SLEPc", "PCHPDDM", data->levels[0]->threshold, &data->levels[0]->threshold, set + 1));
383           PetscCheck(!set[0] || !set[1], PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot supply both -pc_hpddm_levels_1_eps_threshold_relative and -pc_hpddm_levels_1_svd_threshold_relative");
384         }
385         PetscCheck(data->levels[0]->nu || PetscAbsReal(data->levels[i - 1]->threshold + static_cast<PetscReal>(1.0)) > PETSC_MACHINE_EPSILON, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Need to supply at least one of 1) -pc_hpddm_levels_1_eps_nev, 2) -pc_hpddm_levels_1_svd_nsv, 3) -pc_hpddm_levels_1_eps_threshold_relative, 4) -pc_hpddm_levels_1_svd_threshold_relative (for nonsymmetric matrices, only option 2 and option 4 are appropriate)");
386       }
387       PetscCall(PetscSNPrintf(prefix, sizeof(prefix), "-pc_hpddm_levels_1_st_share_sub_ksp"));
388       PetscCall(PetscOptionsBool(prefix, "Shared KSP between SLEPc ST and the fine-level subdomain solver", "PCHPDDMSetSTShareSubKSP", PETSC_FALSE, &data->share, nullptr));
389     }
390     /* if there is no prescribed coarsening, just break out of the loop */
391     if (data->levels[i - 1]->threshold <= PetscReal() && data->levels[i - 1]->nu <= 0 && !(data->deflation && i == 1)) break;
392     else {
393       ++i;
394       PetscCall(PetscSNPrintf(prefix, sizeof(prefix), "-pc_hpddm_levels_%d_eps_nev", i));
395       PetscCall(PetscOptionsHasName(PetscOptionsObject->options, PetscOptionsObject->prefix, prefix, &flg));
396       if (!flg) {
397         PetscCall(PetscSNPrintf(prefix, sizeof(prefix), "-pc_hpddm_levels_%d_eps_threshold_absolute", i));
398         PetscCall(PetscOptionsHasName(PetscOptionsObject->options, PetscOptionsObject->prefix, prefix, &flg));
399       }
400       if (flg) {
401         /* if there are coarsening options for the next level, then register it  */
402         /* otherwise, don't to avoid having both options levels_N_p and coarse_p */
403         PetscCall(PetscSNPrintf(prefix, sizeof(prefix), "-pc_hpddm_levels_%d_p", i));
404         PetscCall(PetscOptionsRangeInt(prefix, "Number of processes used to assemble the coarse operator at this level", "PCHPDDM", p, &p, &flg, 1, PetscMax(1, previous / 2)));
405         previous = p;
406       }
407     }
408   }
409   data->N = i;
410   n       = 1;
411   if (i > 1) {
412     PetscCall(PetscSNPrintf(prefix, sizeof(prefix), "-pc_hpddm_coarse_p"));
413     PetscCall(PetscOptionsRangeInt(prefix, "Number of processes used to assemble the coarsest operator", "PCHPDDM", n, &n, nullptr, 1, PetscMax(1, previous / 2)));
414 #if PetscDefined(HAVE_MUMPS)
415     PetscCall(PetscSNPrintf(prefix, sizeof(prefix), "pc_hpddm_coarse_"));
416     PetscCall(PetscOptionsHasName(PetscOptionsObject->options, prefix, "-mat_mumps_use_omp_threads", &flg));
417     if (flg) {
418       char type[64]; /* same size as in src/ksp/pc/impls/factor/factimpl.c */
419 
420       PetscCall(PetscStrncpy(type, n > 1 && PetscDefined(HAVE_MUMPS) ? MATSOLVERMUMPS : MATSOLVERPETSC, sizeof(type))); /* default solver for a MatMPIAIJ or a MatSeqAIJ */
421       PetscCall(PetscOptionsGetString(PetscOptionsObject->options, prefix, "-pc_factor_mat_solver_type", type, sizeof(type), nullptr));
422       PetscCall(PetscStrcmp(type, MATSOLVERMUMPS, &flg));
423       PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-%smat_mumps_use_omp_threads and -%spc_factor_mat_solver_type != %s", prefix, prefix, MATSOLVERMUMPS);
424       size = n;
425       n    = -1;
426       PetscCall(PetscOptionsGetInt(PetscOptionsObject->options, prefix, "-mat_mumps_use_omp_threads", &n, nullptr));
427       PetscCheck(n >= 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Need to specify a positive integer for -%smat_mumps_use_omp_threads", prefix);
428       PetscCheck(n * size <= previous, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "%d MPI process%s x %d OpenMP thread%s greater than %d available MPI process%s for the coarsest operator", (int)size, size > 1 ? "es" : "", (int)n, n > 1 ? "s" : "", (int)previous, previous > 1 ? "es" : "");
429     }
430 #endif
431     PetscCall(PetscOptionsEnum("-pc_hpddm_coarse_correction", "Type of coarse correction applied each iteration", "PCHPDDMSetCoarseCorrectionType", PCHPDDMCoarseCorrectionTypes, (PetscEnum)data->correction, (PetscEnum *)&type, &flg));
432     if (flg) PetscCall(PCHPDDMSetCoarseCorrectionType(pc, type));
433     PetscCall(PetscSNPrintf(prefix, sizeof(prefix), "-pc_hpddm_has_neumann"));
434     PetscCall(PetscOptionsBool(prefix, "Is the auxiliary Mat the local Neumann matrix?", "PCHPDDMHasNeumannMat", PetscBool3ToBool(data->Neumann), &flg, &set));
435     if (set) data->Neumann = PetscBoolToBool3(flg);
436     data->log_separate = PETSC_FALSE;
437     if (PetscDefined(USE_LOG)) {
438       PetscCall(PetscSNPrintf(prefix, sizeof(prefix), "-pc_hpddm_log_separate"));
439       PetscCall(PetscOptionsBool(prefix, "Log events level by level instead of inside PCSetUp()/KSPSolve()", nullptr, data->log_separate, &data->log_separate, nullptr));
440     }
441   }
442   PetscOptionsHeadEnd();
443   while (i < PETSC_PCHPDDM_MAXLEVELS && data->levels[i]) PetscCall(PetscFree(data->levels[i++]));
444   PetscFunctionReturn(PETSC_SUCCESS);
445 }
446 
447 template <bool transpose>
448 static PetscErrorCode PCApply_HPDDM(PC pc, Vec x, Vec y)
449 {
450   PC_HPDDM *data = (PC_HPDDM *)pc->data;
451 
452   PetscFunctionBegin;
453   PetscCall(PetscCitationsRegister(HPDDMCitation, &HPDDMCite));
454   PetscCheck(data->levels[0]->ksp, PETSC_COMM_SELF, PETSC_ERR_PLIB, "No KSP attached to PCHPDDM");
455   if (data->log_separate) PetscCall(PetscLogEventBegin(PC_HPDDM_Solve[0], data->levels[0]->ksp, nullptr, nullptr, nullptr)); /* coarser-level events are directly triggered in HPDDM */
456   if (!transpose) PetscCall(KSPSolve(data->levels[0]->ksp, x, y));
457   else PetscCall(KSPSolveTranspose(data->levels[0]->ksp, x, y));
458   if (data->log_separate) PetscCall(PetscLogEventEnd(PC_HPDDM_Solve[0], data->levels[0]->ksp, nullptr, nullptr, nullptr));
459   PetscFunctionReturn(PETSC_SUCCESS);
460 }
461 
462 template <bool transpose>
463 static PetscErrorCode PCMatApply_HPDDM(PC pc, Mat X, Mat Y)
464 {
465   PC_HPDDM *data = (PC_HPDDM *)pc->data;
466 
467   PetscFunctionBegin;
468   PetscCall(PetscCitationsRegister(HPDDMCitation, &HPDDMCite));
469   PetscCheck(data->levels[0]->ksp, PETSC_COMM_SELF, PETSC_ERR_PLIB, "No KSP attached to PCHPDDM");
470   if (!transpose) PetscCall(KSPMatSolve(data->levels[0]->ksp, X, Y));
471   else PetscCall(KSPMatSolveTranspose(data->levels[0]->ksp, X, Y));
472   PetscFunctionReturn(PETSC_SUCCESS);
473 }
474 
475 /*@
476   PCHPDDMGetComplexities - Computes the grid and operator complexities.
477 
478   Collective
479 
480   Input Parameter:
481 . pc - preconditioner context
482 
483   Output Parameters:
484 + gc - grid complexity $ \sum_i m_i / m_1 $
485 - oc - operator complexity $ \sum_i nnz_i / nnz_1 $
486 
487   Level: advanced
488 
489 .seealso: [](ch_ksp), `PCMGGetGridComplexity()`, `PCHPDDM`, `PCHYPRE`, `PCGAMG`
490 @*/
491 PetscErrorCode PCHPDDMGetComplexities(PC pc, PetscReal *gc, PetscReal *oc)
492 {
493   PC_HPDDM      *data = (PC_HPDDM *)pc->data;
494   MatInfo        info;
495   PetscLogDouble accumulate[2]{}, nnz1 = 1.0, m1 = 1.0;
496 
497   PetscFunctionBegin;
498   if (gc) {
499     PetscAssertPointer(gc, 2);
500     *gc = 0;
501   }
502   if (oc) {
503     PetscAssertPointer(oc, 3);
504     *oc = 0;
505   }
506   for (PetscInt n = 0; n < data->N; ++n) {
507     if (data->levels[n]->ksp) {
508       Mat       P, A = nullptr;
509       PetscInt  m;
510       PetscBool flg = PETSC_FALSE;
511 
512       PetscCall(KSPGetOperators(data->levels[n]->ksp, nullptr, &P));
513       PetscCall(MatGetSize(P, &m, nullptr));
514       accumulate[0] += m;
515       if (n == 0) {
516         PetscCall(PetscObjectTypeCompareAny((PetscObject)P, &flg, MATNORMAL, MATNORMALHERMITIAN, ""));
517         if (flg) {
518           PetscCall(MatConvert(P, MATAIJ, MAT_INITIAL_MATRIX, &A));
519           P = A;
520         } else {
521           PetscCall(PetscObjectTypeCompare((PetscObject)P, MATSCHURCOMPLEMENT, &flg));
522           PetscCall(PetscObjectReference((PetscObject)P));
523         }
524       }
525       if (!A && flg) accumulate[1] += m * m; /* assumption that a MATSCHURCOMPLEMENT is dense if stored explicitly */
526       else if (P->ops->getinfo) {
527         PetscCall(MatGetInfo(P, MAT_GLOBAL_SUM, &info));
528         accumulate[1] += info.nz_used;
529       }
530       if (n == 0) {
531         m1 = m;
532         if (!A && flg) nnz1 = m * m;
533         else if (P->ops->getinfo) nnz1 = info.nz_used;
534         PetscCall(MatDestroy(&P));
535       }
536     }
537   }
538   /* only process #0 has access to the full hierarchy by construction, so broadcast to ensure consistent outputs */
539   PetscCallMPI(MPI_Bcast(accumulate, 2, MPIU_PETSCLOGDOUBLE, 0, PetscObjectComm((PetscObject)pc)));
540   if (gc) *gc = static_cast<PetscReal>(accumulate[0] / m1);
541   if (oc) *oc = static_cast<PetscReal>(accumulate[1] / nnz1);
542   PetscFunctionReturn(PETSC_SUCCESS);
543 }
544 
545 static PetscErrorCode PCView_HPDDM(PC pc, PetscViewer viewer)
546 {
547   PC_HPDDM         *data = (PC_HPDDM *)pc->data;
548   PetscViewer       subviewer;
549   PetscViewerFormat format;
550   PetscSubcomm      subcomm;
551   PetscReal         oc, gc;
552   PetscInt          tabs;
553   PetscMPIInt       size, color, rank;
554   PetscBool         flg;
555   const char       *name;
556 
557   PetscFunctionBegin;
558   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &flg));
559   if (flg) {
560     PetscCall(PetscViewerASCIIPrintf(viewer, "level%s: %" PetscInt_FMT "\n", data->N > 1 ? "s" : "", data->N));
561     PetscCall(PCHPDDMGetComplexities(pc, &gc, &oc));
562     if (data->N > 1) {
563       if (!data->deflation) {
564         PetscCall(PetscViewerASCIIPrintf(viewer, "Neumann matrix attached? %s\n", PetscBools[PetscBool3ToBool(data->Neumann)]));
565         PetscCall(PetscViewerASCIIPrintf(viewer, "shared subdomain KSP between SLEPc and PETSc? %s\n", PetscBools[data->share]));
566       } else PetscCall(PetscViewerASCIIPrintf(viewer, "user-supplied deflation matrix\n"));
567       PetscCall(PetscViewerASCIIPrintf(viewer, "coarse correction: %s\n", PCHPDDMCoarseCorrectionTypes[data->correction]));
568       PetscCall(PetscViewerASCIIPrintf(viewer, "on process #0, value%s (+ threshold%s if available) for selecting deflation vectors:", data->N > 2 ? "s" : "", data->N > 2 ? "s" : ""));
569       PetscCall(PetscViewerASCIIGetTab(viewer, &tabs));
570       PetscCall(PetscViewerASCIISetTab(viewer, 0));
571       for (PetscInt i = 1; i < data->N; ++i) {
572         PetscCall(PetscViewerASCIIPrintf(viewer, " %" PetscInt_FMT, data->levels[i - 1]->nu));
573         if (data->levels[i - 1]->threshold > static_cast<PetscReal>(-0.1)) PetscCall(PetscViewerASCIIPrintf(viewer, " (%g)", (double)data->levels[i - 1]->threshold));
574       }
575       PetscCall(PetscViewerASCIIPrintf(viewer, "\n"));
576       PetscCall(PetscViewerASCIISetTab(viewer, tabs));
577     }
578     PetscCall(PetscViewerASCIIPrintf(viewer, "grid and operator complexities: %g %g\n", (double)gc, (double)oc));
579     PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)pc), &size));
580     if (data->levels[0]->ksp) {
581       PetscCall(KSPView(data->levels[0]->ksp, viewer));
582       if (data->levels[0]->pc) PetscCall(PCView(data->levels[0]->pc, viewer));
583       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)pc), &rank));
584       for (PetscInt i = 1; i < data->N; ++i) {
585         if (data->levels[i]->ksp) color = 1;
586         else color = 0;
587         PetscCall(PetscSubcommCreate(PetscObjectComm((PetscObject)pc), &subcomm));
588         PetscCall(PetscSubcommSetNumber(subcomm, PetscMin(size, 2)));
589         PetscCall(PetscSubcommSetTypeGeneral(subcomm, color, rank));
590         PetscCall(PetscViewerASCIIPushTab(viewer));
591         PetscCall(PetscViewerGetSubViewer(viewer, PetscSubcommChild(subcomm), &subviewer));
592         if (color == 1) {
593           PetscCall(KSPView(data->levels[i]->ksp, subviewer));
594           if (data->levels[i]->pc) PetscCall(PCView(data->levels[i]->pc, subviewer));
595           PetscCall(PetscViewerFlush(subviewer));
596         }
597         PetscCall(PetscViewerRestoreSubViewer(viewer, PetscSubcommChild(subcomm), &subviewer));
598         PetscCall(PetscViewerASCIIPopTab(viewer));
599         PetscCall(PetscSubcommDestroy(&subcomm));
600       }
601     }
602     PetscCall(PetscViewerGetFormat(viewer, &format));
603     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
604       PetscCall(PetscViewerFileGetName(viewer, &name));
605       if (name) {
606         Mat             aux[2];
607         IS              is;
608         const PetscInt *indices;
609         PetscInt        m, n, sizes[5] = {pc->mat->rmap->n, pc->mat->cmap->n, pc->mat->rmap->N, pc->mat->cmap->N, 0};
610         char           *tmp;
611         std::string     prefix, suffix;
612         size_t          pos;
613 
614         PetscCall(PetscStrstr(name, ".", &tmp));
615         if (tmp) {
616           pos    = std::distance(const_cast<char *>(name), tmp);
617           prefix = std::string(name, pos);
618           suffix = std::string(name + pos + 1);
619         } else prefix = name;
620         if (data->aux) {
621           PetscCall(MatGetSize(data->aux, &m, &n));
622           PetscCall(MatCreate(PetscObjectComm((PetscObject)pc), aux));
623           PetscCall(MatSetSizes(aux[0], m, n, PETSC_DETERMINE, PETSC_DETERMINE));
624           PetscCall(PetscObjectBaseTypeCompare((PetscObject)data->aux, MATSEQAIJ, &flg));
625           if (flg) PetscCall(MatSetType(aux[0], MATMPIAIJ));
626           else {
627             PetscCall(PetscObjectBaseTypeCompare((PetscObject)data->aux, MATSEQBAIJ, &flg));
628             if (flg) PetscCall(MatSetType(aux[0], MATMPIBAIJ));
629             else {
630               PetscCall(PetscObjectBaseTypeCompare((PetscObject)data->aux, MATSEQSBAIJ, &flg));
631               PetscCheck(flg, PetscObjectComm((PetscObject)pc), PETSC_ERR_SUP, "MatType of auxiliary Mat (%s) is not any of the following: MATSEQAIJ, MATSEQBAIJ, or MATSEQSBAIJ", ((PetscObject)data->aux)->type_name);
632               PetscCall(MatSetType(aux[0], MATMPISBAIJ));
633             }
634           }
635           PetscCall(MatSetBlockSizesFromMats(aux[0], data->aux, data->aux));
636           PetscCall(MatAssemblyBegin(aux[0], MAT_FINAL_ASSEMBLY));
637           PetscCall(MatAssemblyEnd(aux[0], MAT_FINAL_ASSEMBLY));
638           PetscCall(MatGetDiagonalBlock(aux[0], aux + 1));
639           PetscCall(MatCopy(data->aux, aux[1], DIFFERENT_NONZERO_PATTERN));
640           PetscCall(PetscViewerBinaryOpen(PetscObjectComm((PetscObject)pc), std::string(prefix + "_aux_" + std::to_string(size) + (tmp ? ("." + suffix) : "")).c_str(), FILE_MODE_WRITE, &subviewer));
641           PetscCall(MatView(aux[0], subviewer));
642           PetscCall(PetscViewerDestroy(&subviewer));
643           PetscCall(MatDestroy(aux));
644         }
645         if (data->is) {
646           PetscCall(ISGetIndices(data->is, &indices));
647           PetscCall(ISGetSize(data->is, sizes + 4));
648           PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), sizes[4], indices, PETSC_USE_POINTER, &is));
649           PetscCall(PetscViewerBinaryOpen(PetscObjectComm((PetscObject)pc), std::string(prefix + "_is_" + std::to_string(size) + (tmp ? ("." + suffix) : "")).c_str(), FILE_MODE_WRITE, &subviewer));
650           PetscCall(ISView(is, subviewer));
651           PetscCall(PetscViewerDestroy(&subviewer));
652           PetscCall(ISDestroy(&is));
653           PetscCall(ISRestoreIndices(data->is, &indices));
654         }
655         PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), PETSC_STATIC_ARRAY_LENGTH(sizes), sizes, PETSC_USE_POINTER, &is));
656         PetscCall(PetscViewerBinaryOpen(PetscObjectComm((PetscObject)pc), std::string(prefix + "_sizes_" + std::to_string(size) + (tmp ? ("." + suffix) : "")).c_str(), FILE_MODE_WRITE, &subviewer));
657         PetscCall(ISView(is, subviewer));
658         PetscCall(PetscViewerDestroy(&subviewer));
659         PetscCall(ISDestroy(&is));
660       }
661     }
662   }
663   PetscFunctionReturn(PETSC_SUCCESS);
664 }
665 
666 static PetscErrorCode PCPreSolve_HPDDM(PC pc, KSP ksp, Vec, Vec)
667 {
668   PC_HPDDM *data = (PC_HPDDM *)pc->data;
669   Mat       A;
670   PetscBool flg;
671 
672   PetscFunctionBegin;
673   if (ksp) {
674     PetscCall(PetscObjectTypeCompare((PetscObject)ksp, KSPLSQR, &flg));
675     if (flg && !data->normal) {
676       PetscCall(KSPGetOperators(ksp, &A, nullptr));
677       PetscCall(MatCreateVecs(A, nullptr, &data->normal)); /* temporary Vec used in PCApply_HPDDMShell() for coarse grid corrections */
678     } else if (!flg) {
679       PetscCall(PetscObjectTypeCompareAny((PetscObject)ksp, &flg, KSPCG, KSPGROPPCG, KSPPIPECG, KSPPIPECGRR, KSPPIPELCG, KSPPIPEPRCG, KSPPIPECG2, KSPSTCG, KSPFCG, KSPPIPEFCG, KSPMINRES, KSPNASH, KSPSYMMLQ, ""));
680       if (!flg) {
681         PetscCall(PetscObjectTypeCompare((PetscObject)ksp, KSPHPDDM, &flg));
682         if (flg) {
683           KSPHPDDMType type;
684 
685           PetscCall(KSPHPDDMGetType(ksp, &type));
686           flg = (type == KSP_HPDDM_TYPE_CG || type == KSP_HPDDM_TYPE_BCG || type == KSP_HPDDM_TYPE_BFBCG ? PETSC_TRUE : PETSC_FALSE);
687         }
688       }
689     }
690     if (flg) {
691       if (data->correction == PC_HPDDM_COARSE_CORRECTION_DEFLATED) {
692         PetscCall(PetscOptionsHasName(((PetscObject)pc)->options, ((PetscObject)pc)->prefix, "-pc_hpddm_coarse_correction", &flg));
693         PetscCheck(flg, PetscObjectComm((PetscObject)pc), PETSC_ERR_ARG_INCOMP, "PCHPDDMCoarseCorrectionType %s is known to be not symmetric, but KSPType %s requires a symmetric PC, if you insist on using this configuration, use the additional option -%spc_hpddm_coarse_correction %s, or alternatively, switch to a symmetric PCHPDDMCoarseCorrectionType such as %s",
694                    PCHPDDMCoarseCorrectionTypes[data->correction], ((PetscObject)ksp)->type_name, ((PetscObject)pc)->prefix ? ((PetscObject)pc)->prefix : "", PCHPDDMCoarseCorrectionTypes[data->correction], PCHPDDMCoarseCorrectionTypes[PC_HPDDM_COARSE_CORRECTION_BALANCED]);
695       }
696       for (PetscInt n = 0; n < data->N; ++n) {
697         if (data->levels[n]->pc) {
698           PetscCall(PetscObjectTypeCompare((PetscObject)data->levels[n]->pc, PCASM, &flg));
699           if (flg) {
700             PCASMType type;
701 
702             PetscCall(PCASMGetType(data->levels[n]->pc, &type));
703             if (type == PC_ASM_RESTRICT || type == PC_ASM_INTERPOLATE) {
704               PetscCall(PetscOptionsHasName(((PetscObject)data->levels[n]->pc)->options, ((PetscObject)data->levels[n]->pc)->prefix, "-pc_asm_type", &flg));
705               PetscCheck(flg, PetscObjectComm((PetscObject)data->levels[n]->pc), PETSC_ERR_ARG_INCOMP, "PCASMType %s is known to be not symmetric, but KSPType %s requires a symmetric PC, if you insist on using this configuration, use the additional option -%spc_asm_type %s, or alternatively, switch to a symmetric PCASMType such as %s", PCASMTypes[type],
706                          ((PetscObject)ksp)->type_name, ((PetscObject)data->levels[n]->pc)->prefix, PCASMTypes[type], PCASMTypes[PC_ASM_BASIC]);
707             }
708           }
709         }
710       }
711     }
712   }
713   PetscFunctionReturn(PETSC_SUCCESS);
714 }
715 
716 static PetscErrorCode PCSetUp_HPDDMShell(PC pc)
717 {
718   PC_HPDDM_Level *ctx;
719   Mat             A, P;
720   Vec             x;
721   const char     *pcpre;
722 
723   PetscFunctionBegin;
724   PetscCall(PCShellGetContext(pc, &ctx));
725   PetscCall(KSPGetOptionsPrefix(ctx->ksp, &pcpre));
726   PetscCall(KSPGetOperators(ctx->ksp, &A, &P));
727   /* smoother */
728   PetscCall(PCSetOptionsPrefix(ctx->pc, pcpre));
729   PetscCall(PCSetOperators(ctx->pc, A, P));
730   if (!ctx->v[0]) {
731     PetscCall(VecDuplicateVecs(ctx->D, 1, &ctx->v[0]));
732     if (!std::is_same<PetscScalar, PetscReal>::value) PetscCall(VecDestroy(&ctx->D));
733     PetscCall(MatCreateVecs(A, &x, nullptr));
734     PetscCall(VecDuplicateVecs(x, 2, &ctx->v[1]));
735     PetscCall(VecDestroy(&x));
736   }
737   std::fill_n(ctx->V, 3, nullptr);
738   PetscFunctionReturn(PETSC_SUCCESS);
739 }
740 
741 template <bool transpose = false, class Type = Vec, typename std::enable_if<std::is_same<Type, Vec>::value>::type * = nullptr>
742 static inline PetscErrorCode PCHPDDMDeflate_Private(PC pc, Type x, Type y)
743 {
744   PC_HPDDM_Level *ctx;
745   PetscScalar    *out;
746 
747   PetscFunctionBegin;
748   PetscCall(PCShellGetContext(pc, &ctx));
749   /* going from PETSc to HPDDM numbering */
750   PetscCall(VecScatterBegin(ctx->scatter, x, ctx->v[0][0], INSERT_VALUES, SCATTER_FORWARD));
751   PetscCall(VecScatterEnd(ctx->scatter, x, ctx->v[0][0], INSERT_VALUES, SCATTER_FORWARD));
752   PetscCall(VecGetArrayWrite(ctx->v[0][0], &out));
753   PetscCallCXX(ctx->P->deflation<false, transpose>(nullptr, out, 1)); /* y = Q x */
754   PetscCall(VecRestoreArrayWrite(ctx->v[0][0], &out));
755   /* going from HPDDM to PETSc numbering */
756   PetscCall(VecScatterBegin(ctx->scatter, ctx->v[0][0], y, INSERT_VALUES, SCATTER_REVERSE));
757   PetscCall(VecScatterEnd(ctx->scatter, ctx->v[0][0], y, INSERT_VALUES, SCATTER_REVERSE));
758   PetscFunctionReturn(PETSC_SUCCESS);
759 }
760 
761 template <bool transpose = false, class Type = Mat, typename std::enable_if<std::is_same<Type, Mat>::value>::type * = nullptr>
762 static inline PetscErrorCode PCHPDDMDeflate_Private(PC pc, Type X, Type Y)
763 {
764   PC_HPDDM_Level *ctx;
765   Vec             vX, vY, vC;
766   PetscScalar    *out;
767   PetscInt        N;
768 
769   PetscFunctionBegin;
770   PetscCall(PCShellGetContext(pc, &ctx));
771   PetscCall(MatGetSize(X, nullptr, &N));
772   /* going from PETSc to HPDDM numbering */
773   for (PetscInt i = 0; i < N; ++i) {
774     PetscCall(MatDenseGetColumnVecRead(X, i, &vX));
775     PetscCall(MatDenseGetColumnVecWrite(ctx->V[0], i, &vC));
776     PetscCall(VecScatterBegin(ctx->scatter, vX, vC, INSERT_VALUES, SCATTER_FORWARD));
777     PetscCall(VecScatterEnd(ctx->scatter, vX, vC, INSERT_VALUES, SCATTER_FORWARD));
778     PetscCall(MatDenseRestoreColumnVecWrite(ctx->V[0], i, &vC));
779     PetscCall(MatDenseRestoreColumnVecRead(X, i, &vX));
780   }
781   PetscCall(MatDenseGetArrayWrite(ctx->V[0], &out));
782   PetscCallCXX(ctx->P->deflation<false, transpose>(nullptr, out, N)); /* Y = Q X */
783   PetscCall(MatDenseRestoreArrayWrite(ctx->V[0], &out));
784   /* going from HPDDM to PETSc numbering */
785   for (PetscInt i = 0; i < N; ++i) {
786     PetscCall(MatDenseGetColumnVecRead(ctx->V[0], i, &vC));
787     PetscCall(MatDenseGetColumnVecWrite(Y, i, &vY));
788     PetscCall(VecScatterBegin(ctx->scatter, vC, vY, INSERT_VALUES, SCATTER_REVERSE));
789     PetscCall(VecScatterEnd(ctx->scatter, vC, vY, INSERT_VALUES, SCATTER_REVERSE));
790     PetscCall(MatDenseRestoreColumnVecWrite(Y, i, &vY));
791     PetscCall(MatDenseRestoreColumnVecRead(ctx->V[0], i, &vC));
792   }
793   PetscFunctionReturn(PETSC_SUCCESS);
794 }
795 
796 /*
797      PCApply_HPDDMShell - Applies a (2) deflated, (1) additive, (3) balanced, or (4) no coarse correction. In what follows, E = Z Pmat Z^T and Q = Z^T E^-1 Z.
798 
799 .vb
800    (1) y =                  Pmat^-1              x + Q x,
801    (2) y =                  Pmat^-1 (I - Amat Q) x + Q x (default),
802    (3) y = (I - Q^T Amat^T) Pmat^-1 (I - Amat Q) x + Q x,
803    (4) y =                  Pmat^-1              x      .
804 .ve
805 
806    Input Parameters:
807 +     pc - preconditioner context
808 -     x - input vector
809 
810    Output Parameter:
811 .     y - output vector
812 
813    Notes:
814      The options of Pmat^1 = pc(Pmat) are prefixed by -pc_hpddm_levels_1_pc_. Z is a tall-and-skiny matrix assembled by HPDDM. The number of processes on which (Z Pmat Z^T) is aggregated is set via -pc_hpddm_coarse_p.
815      The options of (Z Pmat Z^T)^-1 = ksp(Z Pmat Z^T) are prefixed by -pc_hpddm_coarse_ (`KSPPREONLY` and `PCCHOLESKY` by default), unless a multilevel correction is turned on, in which case, this function is called recursively at each level except the coarsest one.
816      (1) and (2) visit the "next" level (in terms of coarsening) once per application, while (3) visits it twice, so it is asymptotically twice costlier. (2) is not symmetric even if both Amat and Pmat are symmetric.
817 
818    Level: advanced
819 
820    Developer Note:
821    Since this is not an actual manual page the material below should be moved to an appropriate manual page with the appropriate context, i.e. explaining when it is used and how
822    to trigger it. Likely the manual page is `PCHPDDM`
823 
824 .seealso: [](ch_ksp), `PCHPDDM`, `PCHPDDMCoarseCorrectionType`
825 */
826 static PetscErrorCode PCApply_HPDDMShell(PC pc, Vec x, Vec y)
827 {
828   PC_HPDDM_Level *ctx;
829   Mat             A;
830 
831   PetscFunctionBegin;
832   PetscCall(PCShellGetContext(pc, &ctx));
833   PetscCheck(ctx->P, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCSHELL from PCHPDDM called with no HPDDM object");
834   PetscCall(KSPGetOperators(ctx->ksp, &A, nullptr));
835   if (ctx->parent->correction == PC_HPDDM_COARSE_CORRECTION_NONE) PetscCall(PCApply(ctx->pc, x, y)); /* y = M^-1 x */
836   else {
837     PetscCall(PCHPDDMDeflate_Private(pc, x, y)); /* y = Q x */
838     if (ctx->parent->correction == PC_HPDDM_COARSE_CORRECTION_DEFLATED || ctx->parent->correction == PC_HPDDM_COARSE_CORRECTION_BALANCED) {
839       if (!ctx->parent->normal || ctx != ctx->parent->levels[0]) PetscCall(MatMult(A, y, ctx->v[1][0])); /* y = A Q x */
840       else {
841         /* KSPLSQR and finest level */
842         PetscCall(MatMult(A, y, ctx->parent->normal));                              /* y = A Q x                 */
843         PetscCall(MatMultHermitianTranspose(A, ctx->parent->normal, ctx->v[1][0])); /* y = A^T A Q x             */
844       }
845       PetscCall(VecWAXPY(ctx->v[1][1], -1.0, ctx->v[1][0], x)); /* y = (I - A Q) x                               */
846       PetscCall(PCApply(ctx->pc, ctx->v[1][1], ctx->v[1][0]));  /* y = M^-1 (I - A Q) x                          */
847       if (ctx->parent->correction == PC_HPDDM_COARSE_CORRECTION_BALANCED) {
848         if (!ctx->parent->normal || ctx != ctx->parent->levels[0]) PetscCall(MatMultHermitianTranspose(A, ctx->v[1][0], ctx->v[1][1])); /* z = A^T y */
849         else {
850           PetscCall(MatMult(A, ctx->v[1][0], ctx->parent->normal));
851           PetscCall(MatMultHermitianTranspose(A, ctx->parent->normal, ctx->v[1][1])); /* z = A^T A y             */
852         }
853         PetscCall(PCHPDDMDeflate_Private<true>(pc, ctx->v[1][1], ctx->v[1][1])); /* z = Q^T z                    */
854         PetscCall(VecAXPBYPCZ(y, -1.0, 1.0, 1.0, ctx->v[1][1], ctx->v[1][0]));   /* y = (I - Q^T A^T) y + Q x    */
855       } else PetscCall(VecAXPY(y, 1.0, ctx->v[1][0]));                           /* y = Q M^-1 (I - A Q) x + Q x */
856     } else {
857       PetscCheck(ctx->parent->correction == PC_HPDDM_COARSE_CORRECTION_ADDITIVE, PetscObjectComm((PetscObject)pc), PETSC_ERR_PLIB, "PCSHELL from PCHPDDM called with an unknown PCHPDDMCoarseCorrectionType %d", ctx->parent->correction);
858       PetscCall(PCApply(ctx->pc, x, ctx->v[1][0]));
859       PetscCall(VecAXPY(y, 1.0, ctx->v[1][0])); /* y = M^-1 x + Q x */
860     }
861   }
862   PetscFunctionReturn(PETSC_SUCCESS);
863 }
864 
865 template <bool transpose>
866 static PetscErrorCode PCHPDDMMatApply_Private(PC_HPDDM_Level *ctx, Mat Y, PetscBool *reset)
867 {
868   Mat            A, *ptr;
869   PetscScalar   *array;
870   PetscInt       m, M, N, prev = 0;
871   PetscContainer container = nullptr;
872 
873   PetscFunctionBegin;
874   PetscCall(KSPGetOperators(ctx->ksp, &A, nullptr));
875   PetscCall(MatGetSize(Y, nullptr, &N));
876   PetscCall(PetscObjectQuery((PetscObject)A, "_HPDDM_MatProduct", (PetscObject *)&container));
877   if (container) { /* MatProduct container already attached */
878     PetscCall(PetscContainerGetPointer(container, (void **)&ptr));
879     if (ptr[1] != ctx->V[2]) /* Mat has changed or may have been set first in KSPHPDDM */
880       for (m = 0; m < 2; ++m) {
881         PetscCall(MatDestroy(ctx->V + m + 1));
882         ctx->V[m + 1] = ptr[m];
883         PetscCall(PetscObjectReference((PetscObject)ctx->V[m + 1]));
884       }
885   }
886   if (ctx->V[1]) PetscCall(MatGetSize(ctx->V[1], nullptr, &prev));
887   if (N != prev || !ctx->V[0]) {
888     PetscCall(MatDestroy(ctx->V));
889     PetscCall(VecGetLocalSize(ctx->v[0][0], &m));
890     PetscCall(MatCreateDense(PetscObjectComm((PetscObject)Y), m, PETSC_DECIDE, PETSC_DECIDE, N, nullptr, ctx->V));
891     if (N != prev) {
892       PetscCall(MatDestroy(ctx->V + 1));
893       PetscCall(MatDestroy(ctx->V + 2));
894       PetscCall(MatGetLocalSize(Y, &m, nullptr));
895       PetscCall(MatGetSize(Y, &M, nullptr));
896       if (ctx->parent->correction != PC_HPDDM_COARSE_CORRECTION_BALANCED) PetscCall(MatDenseGetArrayWrite(ctx->V[0], &array));
897       else array = nullptr;
898       PetscCall(MatCreateDense(PetscObjectComm((PetscObject)Y), m, PETSC_DECIDE, M, N, array, ctx->V + 1));
899       if (ctx->parent->correction != PC_HPDDM_COARSE_CORRECTION_BALANCED) PetscCall(MatDenseRestoreArrayWrite(ctx->V[0], &array));
900       PetscCall(MatCreateDense(PetscObjectComm((PetscObject)Y), m, PETSC_DECIDE, M, N, nullptr, ctx->V + 2));
901       PetscCall(MatProductCreateWithMat(A, !transpose ? Y : ctx->V[2], nullptr, ctx->V[1]));
902       PetscCall(MatProductSetType(ctx->V[1], !transpose ? MATPRODUCT_AB : MATPRODUCT_AtB));
903       PetscCall(MatProductSetFromOptions(ctx->V[1]));
904       PetscCall(MatProductSymbolic(ctx->V[1]));
905       if (!container) PetscCall(PetscObjectContainerCompose((PetscObject)A, "_HPDDM_MatProduct", ctx->V + 1, nullptr)); /* no MatProduct container attached, create one to be queried in KSPHPDDM or at the next call to PCMatApply() */
906       else PetscCall(PetscContainerSetPointer(container, ctx->V + 1));                                                  /* need to compose B and D from MatProductCreateWithMat(A, B, NULL, D), which are stored in the contiguous array ctx->V */
907     }
908     if (ctx->parent->correction == PC_HPDDM_COARSE_CORRECTION_BALANCED) {
909       PetscCall(MatProductCreateWithMat(A, !transpose ? ctx->V[1] : Y, nullptr, ctx->V[2]));
910       PetscCall(MatProductSetType(ctx->V[2], !transpose ? MATPRODUCT_AtB : MATPRODUCT_AB));
911       PetscCall(MatProductSetFromOptions(ctx->V[2]));
912       PetscCall(MatProductSymbolic(ctx->V[2]));
913     }
914     PetscCallCXX(ctx->P->start(N));
915   }
916   if (N == prev || container) { /* when MatProduct container is attached, always need to MatProductReplaceMats() since KSPHPDDM may have replaced the Mat as well */
917     PetscCall(MatProductReplaceMats(nullptr, !transpose ? Y : ctx->V[2], nullptr, ctx->V[1]));
918     if (container && ctx->parent->correction != PC_HPDDM_COARSE_CORRECTION_BALANCED) {
919       PetscCall(MatDenseGetArrayWrite(ctx->V[0], &array));
920       PetscCall(MatDensePlaceArray(ctx->V[1], array));
921       PetscCall(MatDenseRestoreArrayWrite(ctx->V[0], &array));
922       *reset = PETSC_TRUE;
923     }
924   }
925   PetscFunctionReturn(PETSC_SUCCESS);
926 }
927 
928 /*
929      PCMatApply_HPDDMShell - Variant of PCApply_HPDDMShell() for blocks of vectors.
930 
931    Input Parameters:
932 +     pc - preconditioner context
933 -     X - block of input vectors
934 
935    Output Parameter:
936 .     Y - block of output vectors
937 
938    Level: advanced
939 
940 .seealso: [](ch_ksp), `PCHPDDM`, `PCApply_HPDDMShell()`, `PCHPDDMCoarseCorrectionType`
941 */
942 static PetscErrorCode PCMatApply_HPDDMShell(PC pc, Mat X, Mat Y)
943 {
944   PC_HPDDM_Level *ctx;
945   PetscBool       reset = PETSC_FALSE;
946 
947   PetscFunctionBegin;
948   PetscCall(PCShellGetContext(pc, &ctx));
949   PetscCheck(ctx->P, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCSHELL from PCHPDDM called with no HPDDM object");
950   if (ctx->parent->correction == PC_HPDDM_COARSE_CORRECTION_NONE) PetscCall(PCMatApply(ctx->pc, X, Y));
951   else {
952     PetscCall(PCHPDDMMatApply_Private<false>(ctx, Y, &reset));
953     PetscCall(PCHPDDMDeflate_Private(pc, X, Y));
954     if (ctx->parent->correction == PC_HPDDM_COARSE_CORRECTION_DEFLATED || ctx->parent->correction == PC_HPDDM_COARSE_CORRECTION_BALANCED) {
955       PetscCall(MatProductNumeric(ctx->V[1]));
956       PetscCall(MatCopy(ctx->V[1], ctx->V[2], SAME_NONZERO_PATTERN));
957       PetscCall(MatAXPY(ctx->V[2], -1.0, X, SAME_NONZERO_PATTERN));
958       PetscCall(PCMatApply(ctx->pc, ctx->V[2], ctx->V[1]));
959       if (ctx->parent->correction == PC_HPDDM_COARSE_CORRECTION_BALANCED) {
960         PetscCall(MatProductNumeric(ctx->V[2]));
961         PetscCall(PCHPDDMDeflate_Private<true>(pc, ctx->V[2], ctx->V[2]));
962         PetscCall(MatAXPY(ctx->V[1], -1.0, ctx->V[2], SAME_NONZERO_PATTERN));
963       }
964       PetscCall(MatAXPY(Y, -1.0, ctx->V[1], SAME_NONZERO_PATTERN));
965     } else {
966       PetscCheck(ctx->parent->correction == PC_HPDDM_COARSE_CORRECTION_ADDITIVE, PetscObjectComm((PetscObject)pc), PETSC_ERR_PLIB, "PCSHELL from PCHPDDM called with an unknown PCHPDDMCoarseCorrectionType %d", ctx->parent->correction);
967       PetscCall(PCMatApply(ctx->pc, X, ctx->V[1]));
968       PetscCall(MatAXPY(Y, 1.0, ctx->V[1], SAME_NONZERO_PATTERN));
969     }
970     if (reset) PetscCall(MatDenseResetArray(ctx->V[1]));
971   }
972   PetscFunctionReturn(PETSC_SUCCESS);
973 }
974 
975 /*
976      PCApplyTranspose_HPDDMShell - Applies the transpose of a (2) deflated, (1) additive, (3) balanced, or (4) no coarse correction. In what follows, E = Z Pmat Z^T and Q = Z^T E^-1 Z.
977 
978 .vb
979    (1) y =                  Pmat^-T              x + Q^T x,
980    (2) y = (I - Q^T Amat^T) Pmat^-T              x + Q^T x (default),
981    (3) y = (I - Q^T Amat^T) Pmat^-T (I - Amat Q) x + Q^T x,
982    (4) y =                  Pmat^-T              x        .
983 .ve
984 
985    Input Parameters:
986 +     pc - preconditioner context
987 -     x - input vector
988 
989    Output Parameter:
990 .     y - output vector
991 
992    Level: advanced
993 
994    Developer Note:
995    Since this is not an actual manual page the material below should be moved to an appropriate manual page with the appropriate context, i.e. explaining when it is used and how
996    to trigger it. Likely the manual page is `PCHPDDM`
997 
998 .seealso: [](ch_ksp), `PCHPDDM`, `PCApply_HPDDMShell()`, `PCHPDDMCoarseCorrectionType`
999 */
1000 static PetscErrorCode PCApplyTranspose_HPDDMShell(PC pc, Vec x, Vec y)
1001 {
1002   PC_HPDDM_Level *ctx;
1003   Mat             A;
1004 
1005   PetscFunctionBegin;
1006   PetscCall(PCShellGetContext(pc, &ctx));
1007   PetscCheck(ctx->P, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCSHELL from PCHPDDM called with no HPDDM object");
1008   PetscCheck(!ctx->parent->normal, PetscObjectComm((PetscObject)pc), PETSC_ERR_SUP, "Not implemented for the normal equations");
1009   PetscCall(KSPGetOperators(ctx->ksp, &A, nullptr));
1010   if (ctx->parent->correction == PC_HPDDM_COARSE_CORRECTION_NONE) PetscCall(PCApplyTranspose(ctx->pc, x, y)); /* y = M^-T x */
1011   else {
1012     PetscCall(PCHPDDMDeflate_Private<true>(pc, x, y)); /* y = Q^T x */
1013     if (ctx->parent->correction == PC_HPDDM_COARSE_CORRECTION_DEFLATED || ctx->parent->correction == PC_HPDDM_COARSE_CORRECTION_BALANCED) {
1014       if (ctx->parent->correction == PC_HPDDM_COARSE_CORRECTION_BALANCED) {
1015         /* TODO: checking whether Q^T = Q would make it possible to skip this coarse correction */
1016         PetscCall(PCHPDDMDeflate_Private(pc, x, ctx->v[1][1]));                /* y = Q x                     */
1017         PetscCall(MatMult(A, ctx->v[1][1], ctx->v[1][0]));                     /* y = A Q x                   */
1018         PetscCall(VecWAXPY(ctx->v[1][1], -1.0, ctx->v[1][0], x));              /* y = (I - A Q) x             */
1019         PetscCall(PCApplyTranspose(ctx->pc, ctx->v[1][1], ctx->v[1][0]));      /* y = M^-T (I - A Q) x        */
1020       } else PetscCall(PCApplyTranspose(ctx->pc, x, ctx->v[1][0]));            /* y = M^-T x                  */
1021       PetscCall(MatMultHermitianTranspose(A, ctx->v[1][0], ctx->v[1][1]));     /* z = A^T y                   */
1022       PetscCall(PCHPDDMDeflate_Private<true>(pc, ctx->v[1][1], ctx->v[1][1])); /* z = Q^T z                   */
1023       PetscCall(VecAXPBYPCZ(y, -1.0, 1.0, 1.0, ctx->v[1][1], ctx->v[1][0]));   /* y = (I - Q^T A^T) y + Q^T x */
1024     } else {
1025       PetscCheck(ctx->parent->correction == PC_HPDDM_COARSE_CORRECTION_ADDITIVE, PetscObjectComm((PetscObject)pc), PETSC_ERR_PLIB, "PCSHELL from PCHPDDM called with an unknown PCHPDDMCoarseCorrectionType %d", ctx->parent->correction);
1026       PetscCall(PCApplyTranspose(ctx->pc, x, ctx->v[1][0]));
1027       PetscCall(VecAXPY(y, 1.0, ctx->v[1][0])); /* y = M^-T x + Q^T x */
1028     }
1029   }
1030   PetscFunctionReturn(PETSC_SUCCESS);
1031 }
1032 
1033 /*
1034      PCMatApplyTranspose_HPDDMShell - Variant of PCApplyTranspose_HPDDMShell() for blocks of vectors.
1035 
1036    Input Parameters:
1037 +     pc - preconditioner context
1038 -     X - block of input vectors
1039 
1040    Output Parameter:
1041 .     Y - block of output vectors
1042 
1043    Level: advanced
1044 
1045 .seealso: [](ch_ksp), `PCHPDDM`, `PCApplyTranspose_HPDDMShell()`, `PCHPDDMCoarseCorrectionType`
1046 */
1047 static PetscErrorCode PCMatApplyTranspose_HPDDMShell(PC pc, Mat X, Mat Y)
1048 {
1049   PC_HPDDM_Level *ctx;
1050   PetscScalar    *array;
1051   PetscBool       reset = PETSC_FALSE;
1052 
1053   PetscFunctionBegin;
1054   PetscCall(PCShellGetContext(pc, &ctx));
1055   PetscCheck(ctx->P, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCSHELL from PCHPDDM called with no HPDDM object");
1056   if (ctx->parent->correction == PC_HPDDM_COARSE_CORRECTION_NONE) PetscCall(PCMatApplyTranspose(ctx->pc, X, Y));
1057   else if (ctx->parent->correction == PC_HPDDM_COARSE_CORRECTION_BALANCED) {
1058     /* similar code as in PCMatApply_HPDDMShell() with an extra call to PCHPDDMDeflate_Private<true>() */
1059     PetscCall(PCHPDDMMatApply_Private<false>(ctx, Y, &reset));
1060     PetscCall(PCHPDDMDeflate_Private(pc, X, Y));
1061     PetscCall(MatProductNumeric(ctx->V[1]));
1062     PetscCall(MatCopy(ctx->V[1], ctx->V[2], SAME_NONZERO_PATTERN));
1063     PetscCall(MatAXPY(ctx->V[2], -1.0, X, SAME_NONZERO_PATTERN));
1064     PetscCall(PCMatApplyTranspose(ctx->pc, ctx->V[2], ctx->V[1]));
1065     PetscCall(MatProductNumeric(ctx->V[2]));
1066     PetscCall(PCHPDDMDeflate_Private<true>(pc, ctx->V[2], ctx->V[2]));
1067     PetscCall(MatAXPY(ctx->V[1], -1.0, ctx->V[2], SAME_NONZERO_PATTERN));
1068     PetscCall(PCHPDDMDeflate_Private<true>(pc, X, Y)); /* TODO: checking whether Q^T = Q would make it possible to skip this coarse correction */
1069     PetscCall(MatAXPY(Y, -1.0, ctx->V[1], SAME_NONZERO_PATTERN));
1070   } else {
1071     PetscCall(PCHPDDMMatApply_Private<true>(ctx, Y, &reset));
1072     PetscCall(PCHPDDMDeflate_Private<true>(pc, X, Y));
1073     if (ctx->parent->correction == PC_HPDDM_COARSE_CORRECTION_DEFLATED) {
1074       PetscCall(PCMatApplyTranspose(ctx->pc, X, ctx->V[2]));
1075       PetscCall(MatAXPY(Y, 1.0, ctx->V[2], SAME_NONZERO_PATTERN));
1076       PetscCall(MatProductNumeric(ctx->V[1]));
1077       /* ctx->V[0] and ctx->V[1] memory regions overlap, so need to copy to ctx->V[2] and switch array */
1078       PetscCall(MatCopy(ctx->V[1], ctx->V[2], SAME_NONZERO_PATTERN));
1079       if (reset) PetscCall(MatDenseResetArray(ctx->V[1]));
1080       PetscCall(MatDenseGetArrayWrite(ctx->V[2], &array));
1081       PetscCall(MatDensePlaceArray(ctx->V[1], array));
1082       PetscCall(MatDenseRestoreArrayWrite(ctx->V[2], &array));
1083       reset = PETSC_TRUE;
1084       PetscCall(PCHPDDMDeflate_Private<true>(pc, ctx->V[1], ctx->V[1]));
1085       PetscCall(MatAXPY(Y, -1.0, ctx->V[1], SAME_NONZERO_PATTERN));
1086     } else {
1087       PetscCheck(ctx->parent->correction == PC_HPDDM_COARSE_CORRECTION_ADDITIVE, PetscObjectComm((PetscObject)pc), PETSC_ERR_PLIB, "PCSHELL from PCHPDDM called with an unknown PCHPDDMCoarseCorrectionType %d", ctx->parent->correction);
1088       PetscCall(PCMatApplyTranspose(ctx->pc, X, ctx->V[1]));
1089       PetscCall(MatAXPY(Y, 1.0, ctx->V[1], SAME_NONZERO_PATTERN));
1090     }
1091     if (reset) PetscCall(MatDenseResetArray(ctx->V[1]));
1092   }
1093   PetscFunctionReturn(PETSC_SUCCESS);
1094 }
1095 
1096 static PetscErrorCode PCDestroy_HPDDMShell(PC pc)
1097 {
1098   PC_HPDDM_Level *ctx;
1099 
1100   PetscFunctionBegin;
1101   PetscCall(PCShellGetContext(pc, &ctx));
1102   PetscCall(HPDDM::Schwarz<PetscScalar>::destroy(ctx, PETSC_TRUE));
1103   PetscCall(VecDestroyVecs(1, &ctx->v[0]));
1104   PetscCall(VecDestroyVecs(2, &ctx->v[1]));
1105   PetscCall(PetscObjectCompose((PetscObject)ctx->pc->mat, "_HPDDM_MatProduct", nullptr));
1106   PetscCall(MatDestroy(ctx->V));
1107   PetscCall(MatDestroy(ctx->V + 1));
1108   PetscCall(MatDestroy(ctx->V + 2));
1109   PetscCall(VecDestroy(&ctx->D));
1110   PetscCall(PetscSFDestroy(&ctx->scatter));
1111   PetscCall(PCDestroy(&ctx->pc));
1112   PetscFunctionReturn(PETSC_SUCCESS);
1113 }
1114 
1115 template <class Type, bool T = false, typename std::enable_if<std::is_same<Type, Vec>::value>::type * = nullptr>
1116 static inline PetscErrorCode PCApply_Schur_Private(std::tuple<KSP, IS, Vec[2]> *p, PC factor, Type x, Type y)
1117 {
1118   PetscFunctionBegin;
1119   PetscCall(VecISCopy(std::get<2>(*p)[0], std::get<1>(*p), SCATTER_FORWARD, x));
1120   if (!T) PetscCall(PCApply(factor, std::get<2>(*p)[0], std::get<2>(*p)[1]));
1121   else PetscCall(PCApplyTranspose(factor, std::get<2>(*p)[0], std::get<2>(*p)[1]));
1122   PetscCall(VecISCopy(std::get<2>(*p)[1], std::get<1>(*p), SCATTER_REVERSE, y));
1123   PetscFunctionReturn(PETSC_SUCCESS);
1124 }
1125 
1126 template <class Type, bool = false, typename std::enable_if<std::is_same<Type, Mat>::value>::type * = nullptr>
1127 static inline PetscErrorCode PCApply_Schur_Private(std::tuple<KSP, IS, Vec[2]> *p, PC factor, Type X, Type Y)
1128 {
1129   Mat B[2];
1130   Vec x, y;
1131 
1132   PetscFunctionBegin;
1133   PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, factor->mat->rmap->n, X->cmap->n, nullptr, B));
1134   PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, factor->mat->rmap->n, X->cmap->n, nullptr, B + 1));
1135   for (PetscInt i = 0; i < X->cmap->n; ++i) {
1136     PetscCall(MatDenseGetColumnVecRead(X, i, &x));
1137     PetscCall(MatDenseGetColumnVecWrite(B[0], i, &y));
1138     PetscCall(VecISCopy(y, std::get<1>(*p), SCATTER_FORWARD, x));
1139     PetscCall(MatDenseRestoreColumnVecWrite(B[0], i, &y));
1140     PetscCall(MatDenseRestoreColumnVecRead(X, i, &x));
1141   }
1142   PetscCall(PCMatApply(factor, B[0], B[1]));
1143   PetscCall(MatDestroy(B));
1144   for (PetscInt i = 0; i < X->cmap->n; ++i) {
1145     PetscCall(MatDenseGetColumnVecRead(B[1], i, &x));
1146     PetscCall(MatDenseGetColumnVecWrite(Y, i, &y));
1147     PetscCall(VecISCopy(x, std::get<1>(*p), SCATTER_REVERSE, y));
1148     PetscCall(MatDenseRestoreColumnVecWrite(Y, i, &y));
1149     PetscCall(MatDenseRestoreColumnVecRead(B[1], i, &x));
1150   }
1151   PetscCall(MatDestroy(B + 1));
1152   PetscFunctionReturn(PETSC_SUCCESS);
1153 }
1154 
1155 template <class Type = Vec, bool T = false>
1156 static PetscErrorCode PCApply_Schur(PC pc, Type x, Type y)
1157 {
1158   PC                           factor;
1159   Mat                          A;
1160   MatSolverType                type;
1161   PetscBool                    flg;
1162   std::tuple<KSP, IS, Vec[2]> *p;
1163 
1164   PetscFunctionBegin;
1165   PetscCall(PCShellGetContext(pc, &p));
1166   PetscCall(KSPGetPC(std::get<0>(*p), &factor));
1167   PetscCall(PCFactorGetMatSolverType(factor, &type));
1168   PetscCall(PCFactorGetMatrix(factor, &A));
1169   PetscCall(PetscStrcmp(type, MATSOLVERMUMPS, &flg));
1170   if (flg) {
1171     PetscCheck(PetscDefined(HAVE_MUMPS), PETSC_COMM_SELF, PETSC_ERR_PLIB, "Inconsistent MatSolverType");
1172     PetscCall(MatMumpsSetIcntl(A, 26, 0));
1173   } else {
1174     PetscCall(PetscStrcmp(type, MATSOLVERMKL_PARDISO, &flg));
1175     PetscCheck(flg && PetscDefined(HAVE_MKL_PARDISO), PETSC_COMM_SELF, PETSC_ERR_PLIB, "Inconsistent MatSolverType");
1176     flg = PETSC_FALSE;
1177 #if PetscDefined(HAVE_MKL_PARDISO)
1178     PetscCall(MatMkl_PardisoSetCntl(A, 70, 1));
1179 #endif
1180   }
1181   PetscCall(PCApply_Schur_Private<Type, T>(p, factor, x, y));
1182   if (flg) {
1183     PetscCall(MatMumpsSetIcntl(A, 26, -1));
1184   } else {
1185 #if PetscDefined(HAVE_MKL_PARDISO)
1186     PetscCall(MatMkl_PardisoSetCntl(A, 70, 0));
1187 #endif
1188   }
1189   PetscFunctionReturn(PETSC_SUCCESS);
1190 }
1191 
1192 static PetscErrorCode PCDestroy_Schur(PC pc)
1193 {
1194   std::tuple<KSP, IS, Vec[2]> *p;
1195 
1196   PetscFunctionBegin;
1197   PetscCall(PCShellGetContext(pc, &p));
1198   PetscCall(ISDestroy(&std::get<1>(*p)));
1199   PetscCall(VecDestroy(std::get<2>(*p)));
1200   PetscCall(VecDestroy(std::get<2>(*p) + 1));
1201   PetscCall(PetscFree(p));
1202   PetscFunctionReturn(PETSC_SUCCESS);
1203 }
1204 
1205 template <bool transpose>
1206 static PetscErrorCode PCHPDDMSolve_Private(const PC_HPDDM_Level *ctx, PetscScalar *rhs, const unsigned short &mu)
1207 {
1208   Mat      B, X;
1209   PetscInt n, N, j = 0;
1210 
1211   PetscFunctionBegin;
1212   PetscCall(KSPGetOperators(ctx->ksp, &B, nullptr));
1213   PetscCall(MatGetLocalSize(B, &n, nullptr));
1214   PetscCall(MatGetSize(B, &N, nullptr));
1215   if (ctx->parent->log_separate) {
1216     j = std::distance(ctx->parent->levels, std::find(ctx->parent->levels, ctx->parent->levels + ctx->parent->N, ctx));
1217     PetscCall(PetscLogEventBegin(PC_HPDDM_Solve[j], ctx->ksp, nullptr, nullptr, nullptr));
1218   }
1219   if (mu == 1) {
1220     if (!ctx->ksp->vec_rhs) {
1221       PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)ctx->ksp), 1, n, N, nullptr, &ctx->ksp->vec_rhs));
1222       PetscCall(VecCreateMPI(PetscObjectComm((PetscObject)ctx->ksp), n, N, &ctx->ksp->vec_sol));
1223     }
1224     PetscCall(VecPlaceArray(ctx->ksp->vec_rhs, rhs));
1225     if (!transpose) PetscCall(KSPSolve(ctx->ksp, nullptr, nullptr));
1226     else {
1227       PetscCall(VecConjugate(ctx->ksp->vec_rhs));
1228       PetscCall(KSPSolveTranspose(ctx->ksp, nullptr, nullptr)); /* TODO: missing KSPSolveHermitianTranspose() */
1229       PetscCall(VecConjugate(ctx->ksp->vec_sol));
1230     }
1231     PetscCall(VecCopy(ctx->ksp->vec_sol, ctx->ksp->vec_rhs));
1232     PetscCall(VecResetArray(ctx->ksp->vec_rhs));
1233   } else {
1234     PetscCall(MatCreateDense(PetscObjectComm((PetscObject)ctx->ksp), n, PETSC_DECIDE, N, mu, rhs, &B));
1235     PetscCall(MatCreateDense(PetscObjectComm((PetscObject)ctx->ksp), n, PETSC_DECIDE, N, mu, nullptr, &X));
1236     if (!transpose) PetscCall(KSPMatSolve(ctx->ksp, B, X));
1237     else {
1238       PetscCall(MatConjugate(B));
1239       PetscCall(KSPMatSolveTranspose(ctx->ksp, B, X)); /* TODO: missing KSPMatSolveHermitianTranspose() */
1240       PetscCall(MatConjugate(X));
1241     }
1242     PetscCall(MatCopy(X, B, SAME_NONZERO_PATTERN));
1243     PetscCall(MatDestroy(&X));
1244     PetscCall(MatDestroy(&B));
1245   }
1246   if (ctx->parent->log_separate) PetscCall(PetscLogEventEnd(PC_HPDDM_Solve[j], ctx->ksp, nullptr, nullptr, nullptr));
1247   PetscFunctionReturn(PETSC_SUCCESS);
1248 }
1249 
1250 static PetscErrorCode PCHPDDMSetUpNeumannOverlap_Private(PC pc)
1251 {
1252   PC_HPDDM *data = (PC_HPDDM *)pc->data;
1253 
1254   PetscFunctionBegin;
1255   if (data->setup) {
1256     Mat       P;
1257     Vec       x, xt = nullptr;
1258     PetscReal t = 0.0, s = 0.0;
1259 
1260     PetscCall(PCGetOperators(pc, nullptr, &P));
1261     PetscCall(PetscObjectQuery((PetscObject)P, "__SNES_latest_X", (PetscObject *)&x));
1262     PetscCallBack("PCHPDDM Neumann callback", (*data->setup)(data->aux, t, x, xt, s, data->is, data->setup_ctx));
1263   }
1264   PetscFunctionReturn(PETSC_SUCCESS);
1265 }
1266 
1267 static PetscErrorCode PCHPDDMCreateSubMatrices_Private(Mat mat, PetscInt n, const IS *, const IS *, MatReuse scall, Mat *submat[])
1268 {
1269   Mat       A;
1270   PetscBool flg;
1271 
1272   PetscFunctionBegin;
1273   PetscCheck(n == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "MatCreateSubMatrices() called to extract %" PetscInt_FMT " submatrices, which is different than 1", n);
1274   /* previously composed Mat */
1275   PetscCall(PetscObjectQuery((PetscObject)mat, "_PCHPDDM_SubMatrices", (PetscObject *)&A));
1276   PetscCheck(A, PETSC_COMM_SELF, PETSC_ERR_PLIB, "SubMatrices not found in Mat");
1277   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSCHURCOMPLEMENT, &flg)); /* MATSCHURCOMPLEMENT has neither a MatDuplicate() nor a MatCopy() implementation */
1278   if (scall == MAT_INITIAL_MATRIX) {
1279     PetscCall(PetscCalloc1(2, submat)); /* allocate an extra Mat to avoid errors in MatDestroySubMatrices_Dummy() */
1280     if (!flg) PetscCall(MatDuplicate(A, MAT_COPY_VALUES, *submat));
1281   } else if (!flg) PetscCall(MatCopy(A, (*submat)[0], SAME_NONZERO_PATTERN));
1282   if (flg) {
1283     PetscCall(MatDestroy(*submat)); /* previously created Mat has to be destroyed */
1284     (*submat)[0] = A;
1285     PetscCall(PetscObjectReference((PetscObject)A));
1286   }
1287   PetscFunctionReturn(PETSC_SUCCESS);
1288 }
1289 
1290 static PetscErrorCode PCHPDDMCommunicationAvoidingPCASM_Private(PC pc, Mat C, PetscBool sorted)
1291 {
1292   PetscErrorCodeFn *op;
1293 
1294   PetscFunctionBegin;
1295   /* previously-composed Mat */
1296   PetscCall(PetscObjectCompose((PetscObject)pc->pmat, "_PCHPDDM_SubMatrices", (PetscObject)C));
1297   PetscCall(MatGetOperation(pc->pmat, MATOP_CREATE_SUBMATRICES, &op));
1298   /* trick suggested by Barry https://lists.mcs.anl.gov/pipermail/petsc-dev/2020-January/025491.html */
1299   PetscCall(MatSetOperation(pc->pmat, MATOP_CREATE_SUBMATRICES, (PetscErrorCodeFn *)PCHPDDMCreateSubMatrices_Private));
1300   if (sorted) PetscCall(PCASMSetSortIndices(pc, PETSC_FALSE)); /* everything is already sorted */
1301   PetscCall(PCSetFromOptions(pc));                             /* otherwise -pc_hpddm_levels_1_pc_asm_sub_mat_type is not used */
1302   PetscCall(PCSetUp(pc));
1303   /* reset MatCreateSubMatrices() */
1304   PetscCall(MatSetOperation(pc->pmat, MATOP_CREATE_SUBMATRICES, op));
1305   PetscCall(PetscObjectCompose((PetscObject)pc->pmat, "_PCHPDDM_SubMatrices", nullptr));
1306   PetscFunctionReturn(PETSC_SUCCESS);
1307 }
1308 
1309 static PetscErrorCode PCHPDDMPermute_Private(IS is, IS in_is, IS *out_is, Mat in_C, Mat *out_C, IS *p)
1310 {
1311   IS                           perm;
1312   const PetscInt              *ptr;
1313   PetscInt                    *concatenate, size, bs;
1314   std::map<PetscInt, PetscInt> order;
1315   PetscBool                    sorted;
1316 
1317   PetscFunctionBegin;
1318   PetscValidHeaderSpecific(is, IS_CLASSID, 1);
1319   PetscValidHeaderSpecific(in_C, MAT_CLASSID, 4);
1320   PetscCall(ISSorted(is, &sorted));
1321   if (!sorted) {
1322     PetscCall(ISGetLocalSize(is, &size));
1323     PetscCall(ISGetIndices(is, &ptr));
1324     PetscCall(ISGetBlockSize(is, &bs));
1325     /* MatCreateSubMatrices(), called by PCASM, follows the global numbering of Pmat */
1326     for (PetscInt n = 0; n < size; n += bs) order.insert(std::make_pair(ptr[n] / bs, n / bs));
1327     PetscCall(ISRestoreIndices(is, &ptr));
1328     size /= bs;
1329     if (out_C) {
1330       PetscCall(PetscMalloc1(size, &concatenate));
1331       for (const std::pair<const PetscInt, PetscInt> &i : order) *concatenate++ = i.second;
1332       concatenate -= size;
1333       PetscCall(ISCreateBlock(PetscObjectComm((PetscObject)in_C), bs, size, concatenate, PETSC_OWN_POINTER, &perm));
1334       PetscCall(ISSetPermutation(perm));
1335       /* permute user-provided Mat so that it matches with MatCreateSubMatrices() numbering */
1336       PetscCall(MatPermute(in_C, perm, perm, out_C));
1337       if (p) *p = perm;
1338       else PetscCall(ISDestroy(&perm)); /* no need to save the permutation */
1339     }
1340     if (out_is) {
1341       PetscCall(PetscMalloc1(size, &concatenate));
1342       for (const std::pair<const PetscInt, PetscInt> &i : order) *concatenate++ = i.first;
1343       concatenate -= size;
1344       /* permute user-provided IS so that it matches with MatCreateSubMatrices() numbering */
1345       PetscCall(ISCreateBlock(PetscObjectComm((PetscObject)in_is), bs, size, concatenate, PETSC_OWN_POINTER, out_is));
1346     }
1347   } else { /* input IS is sorted, nothing to permute, simply duplicate inputs when needed */
1348     if (out_C) PetscCall(MatDuplicate(in_C, MAT_COPY_VALUES, out_C));
1349     if (out_is) PetscCall(ISDuplicate(in_is, out_is));
1350   }
1351   PetscFunctionReturn(PETSC_SUCCESS);
1352 }
1353 
1354 static PetscErrorCode PCHPDDMCheckSymmetry_Private(PC pc, Mat A01, Mat A10, Mat *B01 = nullptr)
1355 {
1356   Mat       T, U = nullptr, B = nullptr;
1357   IS        z;
1358   PetscBool flg, conjugate = PETSC_FALSE;
1359 
1360   PetscFunctionBegin;
1361   PetscCall(PetscObjectTypeCompare((PetscObject)A10, MATTRANSPOSEVIRTUAL, &flg));
1362   if (B01) *B01 = nullptr;
1363   if (flg) {
1364     PetscCall(MatShellGetScalingShifts(A10, (PetscScalar *)MAT_SHELL_NOT_ALLOWED, (PetscScalar *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Mat *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED));
1365     PetscCall(MatTransposeGetMat(A10, &U));
1366   } else {
1367     PetscCall(PetscObjectTypeCompare((PetscObject)A10, MATHERMITIANTRANSPOSEVIRTUAL, &flg));
1368     if (flg) {
1369       PetscCall(MatShellGetScalingShifts(A10, (PetscScalar *)MAT_SHELL_NOT_ALLOWED, (PetscScalar *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Mat *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED));
1370       PetscCall(MatHermitianTransposeGetMat(A10, &U));
1371       conjugate = PETSC_TRUE;
1372     }
1373   }
1374   if (U) PetscCall(MatDuplicate(U, MAT_COPY_VALUES, &T));
1375   else PetscCall(MatHermitianTranspose(A10, MAT_INITIAL_MATRIX, &T));
1376   PetscCall(PetscObjectTypeCompare((PetscObject)A01, MATTRANSPOSEVIRTUAL, &flg));
1377   if (flg) {
1378     PetscCall(MatShellGetScalingShifts(A01, (PetscScalar *)MAT_SHELL_NOT_ALLOWED, (PetscScalar *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Mat *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED));
1379     PetscCall(MatTransposeGetMat(A01, &A01));
1380     PetscCall(MatTranspose(A01, MAT_INITIAL_MATRIX, &B));
1381     A01 = B;
1382   } else {
1383     PetscCall(PetscObjectTypeCompare((PetscObject)A01, MATHERMITIANTRANSPOSEVIRTUAL, &flg));
1384     if (flg) {
1385       PetscCall(MatShellGetScalingShifts(A01, (PetscScalar *)MAT_SHELL_NOT_ALLOWED, (PetscScalar *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Mat *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED));
1386       PetscCall(MatHermitianTransposeGetMat(A01, &A01));
1387       PetscCall(MatHermitianTranspose(A01, MAT_INITIAL_MATRIX, &B));
1388       A01 = B;
1389     }
1390   }
1391   PetscCall(PetscLayoutCompare(T->rmap, A01->rmap, &flg));
1392   if (flg) {
1393     PetscCall(PetscLayoutCompare(T->cmap, A01->cmap, &flg));
1394     if (flg) {
1395       PetscCall(MatFindZeroRows(A01, &z)); /* for essential boundary conditions, some implementations will */
1396       if (z) {                             /*  zero rows in [P00 A01] except for the diagonal of P00       */
1397         if (B01) PetscCall(MatDuplicate(T, MAT_COPY_VALUES, B01));
1398         PetscCall(MatSetOption(T, MAT_NO_OFF_PROC_ZERO_ROWS, PETSC_TRUE));
1399         PetscCall(MatZeroRowsIS(T, z, 0.0, nullptr, nullptr)); /* corresponding zero rows from A01 */
1400       }
1401       PetscCall(MatMultEqual(A01, T, 20, &flg));
1402       if (!B01) PetscCheck(flg, PetscObjectComm((PetscObject)pc), PETSC_ERR_SUP, "A01 != A10^T");
1403       else {
1404         PetscCall(PetscInfo(pc, "A01 and A10^T are equal? %s\n", PetscBools[flg]));
1405         if (!flg) {
1406           if (z) PetscCall(MatDestroy(&T));
1407           else *B01 = T;
1408           flg = PETSC_TRUE;
1409         } else PetscCall(MatDestroy(B01));
1410       }
1411       PetscCall(ISDestroy(&z));
1412     }
1413   }
1414   if (!flg) PetscCall(PetscInfo(pc, "A01 and A10^T have non-congruent layouts, cannot test for equality\n"));
1415   if (!B01 || !*B01) PetscCall(MatDestroy(&T));
1416   else if (conjugate) PetscCall(MatConjugate(T));
1417   PetscCall(MatDestroy(&B));
1418   PetscFunctionReturn(PETSC_SUCCESS);
1419 }
1420 
1421 static PetscErrorCode PCHPDDMCheckInclusion_Private(PC pc, IS is, IS is_local, PetscBool check)
1422 {
1423   IS          intersect;
1424   const char *str = "IS of the auxiliary Mat does not include all local rows of A";
1425   PetscBool   equal;
1426 
1427   PetscFunctionBegin;
1428   PetscCall(ISIntersect(is, is_local, &intersect));
1429   PetscCall(ISEqualUnsorted(is_local, intersect, &equal));
1430   PetscCall(ISDestroy(&intersect));
1431   if (check) PetscCheck(equal, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "%s", str);
1432   else if (!equal) PetscCall(PetscInfo(pc, "%s\n", str));
1433   PetscFunctionReturn(PETSC_SUCCESS);
1434 }
1435 
1436 static PetscErrorCode PCHPDDMCheckMatStructure_Private(PC pc, Mat A, Mat B)
1437 {
1438   Mat             X, Y;
1439   const PetscInt *i[2], *j[2];
1440   PetscBool       flg = PETSC_TRUE;
1441 
1442   PetscFunctionBegin;
1443   PetscCall(MatConvert(A, MATAIJ, MAT_INITIAL_MATRIX, &X)); /* no common way to compare sparsity pattern, so just convert to MATSEQAIJ */
1444   PetscCall(MatConvert(B, MATAIJ, MAT_INITIAL_MATRIX, &Y)); /* the second Mat (B = Neumann) should have a SUBSET_NONZERO_PATTERN MatStructure of the first one (A = Dirichlet) */
1445   PetscCall(MatSeqAIJGetCSRAndMemType(X, &i[0], &j[0], nullptr, nullptr));
1446   PetscCall(MatSeqAIJGetCSRAndMemType(Y, &i[1], &j[1], nullptr, nullptr));
1447   for (PetscInt row = 0; (row < X->rmap->n) && flg; ++row) {
1448     const PetscInt n = i[0][row + 1] - i[0][row];
1449 
1450     for (PetscInt k = i[1][row], location; k < i[1][row + 1]; ++k) {
1451       PetscCall(PetscFindInt(j[1][k], n, j[0] + i[0][row], &location));
1452       if (location < 0) {
1453         flg = PETSC_FALSE;
1454         break;
1455       }
1456     }
1457   }
1458   PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &flg, 1, MPI_C_BOOL, MPI_LAND, PetscObjectComm((PetscObject)pc)));
1459   PetscCheck(flg, PetscObjectComm((PetscObject)pc), PETSC_ERR_USER_INPUT, "Auxiliary Mat is supposedly the local Neumann matrix but it has a sparsity pattern which is not a subset of the one of the local assembled matrix");
1460   PetscCall(MatDestroy(&Y));
1461   PetscCall(MatDestroy(&X));
1462   PetscFunctionReturn(PETSC_SUCCESS);
1463 }
1464 
1465 static PetscErrorCode PCHPDDMDestroySubMatrices_Private(PetscBool flg, PetscBool algebraic, Mat *sub)
1466 {
1467   IS is;
1468 
1469   PetscFunctionBegin;
1470   if (!flg) {
1471     if (algebraic) {
1472       PetscCall(PetscObjectQuery((PetscObject)sub[0], "_PCHPDDM_Embed", (PetscObject *)&is));
1473       PetscCall(ISDestroy(&is));
1474       PetscCall(PetscObjectCompose((PetscObject)sub[0], "_PCHPDDM_Embed", nullptr));
1475       PetscCall(PetscObjectCompose((PetscObject)sub[0], "_PCHPDDM_Compact", nullptr));
1476     }
1477     PetscCall(MatDestroySubMatrices(algebraic ? 2 : 1, &sub));
1478   }
1479   PetscFunctionReturn(PETSC_SUCCESS);
1480 }
1481 
1482 static PetscErrorCode PCHPDDMAlgebraicAuxiliaryMat_Private(Mat P, IS *is, Mat *sub[], PetscBool block)
1483 {
1484   IS         icol[3], irow[2];
1485   Mat       *M, Q;
1486   PetscReal *ptr;
1487   PetscInt  *idx, p = 0, bs = P->cmap->bs;
1488   PetscBool  flg;
1489 
1490   PetscFunctionBegin;
1491   PetscCall(ISCreateStride(PETSC_COMM_SELF, P->cmap->N, 0, 1, icol + 2));
1492   PetscCall(ISSetBlockSize(icol[2], bs));
1493   PetscCall(ISSetIdentity(icol[2]));
1494   PetscCall(PetscObjectTypeCompare((PetscObject)P, MATMPISBAIJ, &flg));
1495   if (flg) {
1496     /* MatCreateSubMatrices() does not handle MATMPISBAIJ properly when iscol != isrow, so convert first to MATMPIBAIJ */
1497     PetscCall(MatConvert(P, MATMPIBAIJ, MAT_INITIAL_MATRIX, &Q));
1498     std::swap(P, Q);
1499   }
1500   PetscCall(MatCreateSubMatrices(P, 1, is, icol + 2, MAT_INITIAL_MATRIX, &M));
1501   if (flg) {
1502     std::swap(P, Q);
1503     PetscCall(MatDestroy(&Q));
1504   }
1505   PetscCall(ISDestroy(icol + 2));
1506   PetscCall(ISCreateStride(PETSC_COMM_SELF, M[0]->rmap->N, 0, 1, irow));
1507   PetscCall(ISSetBlockSize(irow[0], bs));
1508   PetscCall(ISSetIdentity(irow[0]));
1509   if (!block) {
1510     PetscCall(PetscMalloc2(P->cmap->N, &ptr, P->cmap->N / bs, &idx));
1511     PetscCall(MatGetColumnNorms(M[0], NORM_INFINITY, ptr));
1512     /* check for nonzero columns so that M[0] may be expressed in compact form */
1513     for (PetscInt n = 0; n < P->cmap->N; n += bs) {
1514       if (std::find_if(ptr + n, ptr + n + bs, [](PetscReal v) { return v > PETSC_MACHINE_EPSILON; }) != ptr + n + bs) idx[p++] = n / bs;
1515     }
1516     PetscCall(ISCreateBlock(PETSC_COMM_SELF, bs, p, idx, PETSC_USE_POINTER, icol + 1));
1517     PetscCall(ISSetInfo(icol[1], IS_SORTED, IS_GLOBAL, PETSC_TRUE, PETSC_TRUE));
1518     PetscCall(ISEmbed(*is, icol[1], PETSC_FALSE, icol + 2));
1519     irow[1] = irow[0];
1520     /* first Mat will be used in PCASM (if it is used as a PC on this level) and as the left-hand side of GenEO */
1521     icol[0] = is[0];
1522     PetscCall(MatCreateSubMatrices(M[0], 2, irow, icol, MAT_INITIAL_MATRIX, sub));
1523     PetscCall(ISDestroy(icol + 1));
1524     PetscCall(PetscFree2(ptr, idx));
1525     /* IS used to go back and forth between the augmented and the original local linear system, see eq. (3.4) of [2022b] */
1526     PetscCall(PetscObjectCompose((PetscObject)(*sub)[0], "_PCHPDDM_Embed", (PetscObject)icol[2]));
1527     /* Mat used in eq. (3.1) of [2022b] */
1528     PetscCall(PetscObjectCompose((PetscObject)(*sub)[0], "_PCHPDDM_Compact", (PetscObject)(*sub)[1]));
1529   } else {
1530     Mat aux;
1531 
1532     PetscCall(MatSetOption(M[0], MAT_SUBMAT_SINGLEIS, PETSC_TRUE));
1533     /* diagonal block of the overlapping rows */
1534     PetscCall(MatCreateSubMatrices(M[0], 1, irow, is, MAT_INITIAL_MATRIX, sub));
1535     PetscCall(MatDuplicate((*sub)[0], MAT_COPY_VALUES, &aux));
1536     PetscCall(MatSetOption(aux, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE));
1537     if (bs == 1) { /* scalar case */
1538       Vec sum[2];
1539 
1540       PetscCall(MatCreateVecs(aux, sum, sum + 1));
1541       PetscCall(MatGetRowSum(M[0], sum[0]));
1542       PetscCall(MatGetRowSum(aux, sum[1]));
1543       /* off-diagonal block row sum (full rows - diagonal block rows) */
1544       PetscCall(VecAXPY(sum[0], -1.0, sum[1]));
1545       /* subdomain matrix plus off-diagonal block row sum */
1546       PetscCall(MatDiagonalSet(aux, sum[0], ADD_VALUES));
1547       PetscCall(VecDestroy(sum));
1548       PetscCall(VecDestroy(sum + 1));
1549     } else { /* vectorial case */
1550       /* TODO: missing MatGetValuesBlocked(), so the code below is     */
1551       /* an extension of the scalar case for when bs > 1, but it could */
1552       /* be more efficient by avoiding all these MatMatMult()          */
1553       Mat          sum[2], ones;
1554       PetscScalar *ptr;
1555 
1556       PetscCall(PetscCalloc1(M[0]->cmap->n * bs, &ptr));
1557       PetscCall(MatCreateDense(PETSC_COMM_SELF, M[0]->cmap->n, bs, M[0]->cmap->n, bs, ptr, &ones));
1558       for (PetscInt n = 0; n < M[0]->cmap->n; n += bs) {
1559         for (p = 0; p < bs; ++p) ptr[n + p * (M[0]->cmap->n + 1)] = 1.0;
1560       }
1561       PetscCall(MatMatMult(M[0], ones, MAT_INITIAL_MATRIX, PETSC_CURRENT, sum));
1562       PetscCall(MatDestroy(&ones));
1563       PetscCall(MatCreateDense(PETSC_COMM_SELF, aux->cmap->n, bs, aux->cmap->n, bs, ptr, &ones));
1564       PetscCall(MatDenseSetLDA(ones, M[0]->cmap->n));
1565       PetscCall(MatMatMult(aux, ones, MAT_INITIAL_MATRIX, PETSC_CURRENT, sum + 1));
1566       PetscCall(MatDestroy(&ones));
1567       PetscCall(PetscFree(ptr));
1568       /* off-diagonal block row sum (full rows - diagonal block rows) */
1569       PetscCall(MatAXPY(sum[0], -1.0, sum[1], SAME_NONZERO_PATTERN));
1570       PetscCall(MatDestroy(sum + 1));
1571       /* re-order values to be consistent with MatSetValuesBlocked()           */
1572       /* equivalent to MatTranspose() which does not truly handle              */
1573       /* MAT_INPLACE_MATRIX in the rectangular case, as it calls PetscMalloc() */
1574       PetscCall(MatDenseGetArrayWrite(sum[0], &ptr));
1575       HPDDM::Wrapper<PetscScalar>::imatcopy<'T'>(bs, sum[0]->rmap->n, ptr, sum[0]->rmap->n, bs);
1576       /* subdomain matrix plus off-diagonal block row sum */
1577       for (PetscInt n = 0; n < aux->cmap->n / bs; ++n) PetscCall(MatSetValuesBlocked(aux, 1, &n, 1, &n, ptr + n * bs * bs, ADD_VALUES));
1578       PetscCall(MatAssemblyBegin(aux, MAT_FINAL_ASSEMBLY));
1579       PetscCall(MatAssemblyEnd(aux, MAT_FINAL_ASSEMBLY));
1580       PetscCall(MatDenseRestoreArrayWrite(sum[0], &ptr));
1581       PetscCall(MatDestroy(sum));
1582     }
1583     PetscCall(MatSetOption(aux, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE));
1584     /* left-hand side of GenEO, with the same sparsity pattern as PCASM subdomain solvers  */
1585     PetscCall(PetscObjectCompose((PetscObject)(*sub)[0], "_PCHPDDM_Neumann_Mat", (PetscObject)aux));
1586   }
1587   PetscCall(ISDestroy(irow));
1588   PetscCall(MatDestroySubMatrices(1, &M));
1589   PetscFunctionReturn(PETSC_SUCCESS);
1590 }
1591 
1592 static PetscErrorCode PCApply_Nest(PC pc, Vec x, Vec y)
1593 {
1594   Mat                    A;
1595   MatSolverType          type;
1596   IS                     is[2];
1597   PetscBool              flg;
1598   std::pair<PC, Vec[2]> *p;
1599 
1600   PetscFunctionBegin;
1601   PetscCall(PCShellGetContext(pc, &p));
1602   if (p->second[0]) { /* in case of a centralized Schur complement, some processes may have no local operator */
1603     PetscCall(PCGetOperators(p->first, &A, nullptr));
1604     PetscCall(MatNestGetISs(A, is, nullptr));
1605     PetscCall(PetscObjectTypeCompareAny((PetscObject)p->first, &flg, PCLU, PCCHOLESKY, ""));
1606     if (flg) { /* partial solve currently only makes sense with exact factorizations */
1607       PetscCall(PCFactorGetMatSolverType(p->first, &type));
1608       PetscCall(PCFactorGetMatrix(p->first, &A));
1609       if (A->schur) {
1610         PetscCall(PetscStrcmp(type, MATSOLVERMUMPS, &flg));
1611         if (flg) PetscCall(MatMumpsSetIcntl(A, 26, 1)); /* reduction/condensation phase followed by Schur complement solve */
1612       } else flg = PETSC_FALSE;
1613     }
1614     PetscCall(VecISCopy(p->second[0], is[1], SCATTER_FORWARD, x)); /* assign the RHS associated to the Schur complement */
1615     PetscCall(PCApply(p->first, p->second[0], p->second[1]));
1616     PetscCall(VecISCopy(p->second[1], is[1], SCATTER_REVERSE, y)); /* retrieve the partial solution associated to the Schur complement */
1617     if (flg) PetscCall(MatMumpsSetIcntl(A, 26, -1));               /* default ICNTL(26) value in PETSc */
1618   }
1619   PetscFunctionReturn(PETSC_SUCCESS);
1620 }
1621 
1622 static PetscErrorCode PCView_Nest(PC pc, PetscViewer viewer)
1623 {
1624   std::pair<PC, Vec[2]> *p;
1625 
1626   PetscFunctionBegin;
1627   PetscCall(PCShellGetContext(pc, &p));
1628   PetscCall(PCView(p->first, viewer));
1629   PetscFunctionReturn(PETSC_SUCCESS);
1630 }
1631 
1632 static PetscErrorCode PCDestroy_Nest(PC pc)
1633 {
1634   std::pair<PC, Vec[2]> *p;
1635 
1636   PetscFunctionBegin;
1637   PetscCall(PCShellGetContext(pc, &p));
1638   PetscCall(VecDestroy(p->second));
1639   PetscCall(VecDestroy(p->second + 1));
1640   PetscCall(PCDestroy(&p->first));
1641   PetscCall(PetscFree(p));
1642   PetscFunctionReturn(PETSC_SUCCESS);
1643 }
1644 
1645 template <bool T = false>
1646 static PetscErrorCode MatMult_Schur(Mat A, Vec x, Vec y)
1647 {
1648   std::tuple<Mat, PetscSF, Vec[2]> *ctx;
1649 
1650   PetscFunctionBegin;
1651   PetscCall(MatShellGetContext(A, &ctx));
1652   PetscCall(VecScatterBegin(std::get<1>(*ctx), x, std::get<2>(*ctx)[0], INSERT_VALUES, SCATTER_FORWARD)); /* local Vec with overlap */
1653   PetscCall(VecScatterEnd(std::get<1>(*ctx), x, std::get<2>(*ctx)[0], INSERT_VALUES, SCATTER_FORWARD));
1654   if (!T) PetscCall(MatMult(std::get<0>(*ctx), std::get<2>(*ctx)[0], std::get<2>(*ctx)[1])); /* local Schur complement */
1655   else PetscCall(MatMultTranspose(std::get<0>(*ctx), std::get<2>(*ctx)[0], std::get<2>(*ctx)[1]));
1656   PetscCall(VecSet(y, 0.0));
1657   PetscCall(VecScatterBegin(std::get<1>(*ctx), std::get<2>(*ctx)[1], y, ADD_VALUES, SCATTER_REVERSE)); /* global Vec with summed up contributions on the overlap */
1658   PetscCall(VecScatterEnd(std::get<1>(*ctx), std::get<2>(*ctx)[1], y, ADD_VALUES, SCATTER_REVERSE));
1659   PetscFunctionReturn(PETSC_SUCCESS);
1660 }
1661 
1662 static PetscErrorCode MatDestroy_Schur(Mat A)
1663 {
1664   std::tuple<Mat, PetscSF, Vec[2]> *ctx;
1665 
1666   PetscFunctionBegin;
1667   PetscCall(MatShellGetContext(A, &ctx));
1668   PetscCall(VecDestroy(std::get<2>(*ctx)));
1669   PetscCall(VecDestroy(std::get<2>(*ctx) + 1));
1670   PetscCall(PetscFree(ctx));
1671   PetscFunctionReturn(PETSC_SUCCESS);
1672 }
1673 
1674 static PetscErrorCode MatMult_SchurCorrection(Mat A, Vec x, Vec y)
1675 {
1676   PC                                         pc;
1677   std::tuple<PC[2], Mat[2], PCSide, Vec[3]> *ctx;
1678 
1679   PetscFunctionBegin;
1680   PetscCall(MatShellGetContext(A, &ctx));
1681   pc = ((PC_HPDDM *)std::get<0>(*ctx)[0]->data)->levels[0]->ksp->pc;
1682   if (std::get<2>(*ctx) == PC_LEFT || std::get<2>(*ctx) == PC_SIDE_DEFAULT) {             /* Q_0 is the coarse correction associated to the A00 block from PCFIELDSPLIT */
1683     PetscCall(MatMult(std::get<1>(*ctx)[0], x, std::get<3>(*ctx)[1]));                    /*     A_01 x                 */
1684     PetscCall(PCHPDDMDeflate_Private(pc, std::get<3>(*ctx)[1], std::get<3>(*ctx)[1]));    /*     Q_0 A_01 x             */
1685     PetscCall(MatMult(std::get<1>(*ctx)[1], std::get<3>(*ctx)[1], std::get<3>(*ctx)[0])); /*     A_10 Q_0 A_01 x        */
1686     PetscCall(PCApply(std::get<0>(*ctx)[1], std::get<3>(*ctx)[0], y));                    /* y = M_S^-1 A_10 Q_0 A_01 x */
1687   } else {
1688     PetscCall(PCApply(std::get<0>(*ctx)[1], x, std::get<3>(*ctx)[0]));                    /*     M_S^-1 x               */
1689     PetscCall(MatMult(std::get<1>(*ctx)[0], std::get<3>(*ctx)[0], std::get<3>(*ctx)[1])); /*     A_01 M_S^-1 x          */
1690     PetscCall(PCHPDDMDeflate_Private(pc, std::get<3>(*ctx)[1], std::get<3>(*ctx)[1]));    /*     Q_0 A_01 M_S^-1 x      */
1691     PetscCall(MatMult(std::get<1>(*ctx)[1], std::get<3>(*ctx)[1], y));                    /* y = A_10 Q_0 A_01 M_S^-1 x */
1692   }
1693   PetscCall(VecAXPY(y, -1.0, x)); /* y -= x, preconditioned eq. (24) of https://hal.science/hal-02343808v6/document (with a sign flip) */
1694   PetscFunctionReturn(PETSC_SUCCESS);
1695 }
1696 
1697 static PetscErrorCode MatView_SchurCorrection(Mat A, PetscViewer viewer)
1698 {
1699   PetscBool                                  ascii;
1700   std::tuple<PC[2], Mat[2], PCSide, Vec[3]> *ctx;
1701 
1702   PetscFunctionBegin;
1703   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &ascii));
1704   if (ascii) {
1705     PetscCall(MatShellGetContext(A, &ctx));
1706     PetscCall(PetscViewerASCIIPrintf(viewer, "action of %s\n", std::get<2>(*ctx) == PC_LEFT || std::get<2>(*ctx) == PC_SIDE_DEFAULT ? "(I - M_S^-1 A_10 Q_0 A_01)" : "(I - A_10 Q_0 A_01 M_S^-1)"));
1707     PetscCall(PCView(std::get<0>(*ctx)[1], viewer)); /* no need to PCView(Q_0) since it will be done by PCFIELDSPLIT */
1708   }
1709   PetscFunctionReturn(PETSC_SUCCESS);
1710 }
1711 
1712 static PetscErrorCode MatDestroy_SchurCorrection(Mat A)
1713 {
1714   std::tuple<PC[2], Mat[2], PCSide, Vec[3]> *ctx;
1715 
1716   PetscFunctionBegin;
1717   PetscCall(MatShellGetContext(A, &ctx));
1718   PetscCall(VecDestroy(std::get<3>(*ctx)));
1719   PetscCall(VecDestroy(std::get<3>(*ctx) + 1));
1720   PetscCall(VecDestroy(std::get<3>(*ctx) + 2));
1721   PetscCall(PCDestroy(std::get<0>(*ctx) + 1));
1722   PetscCall(PetscFree(ctx));
1723   PetscFunctionReturn(PETSC_SUCCESS);
1724 }
1725 
1726 static PetscErrorCode PCPostSolve_SchurPreLeastSquares(PC, KSP, Vec, Vec x)
1727 {
1728   PetscFunctionBegin;
1729   PetscCall(VecScale(x, -1.0));
1730   PetscFunctionReturn(PETSC_SUCCESS);
1731 }
1732 
1733 static PetscErrorCode KSPPreSolve_SchurCorrection(KSP, Vec b, Vec, void *context)
1734 {
1735   std::tuple<PC[2], Mat[2], PCSide, Vec[3]> *ctx = reinterpret_cast<std::tuple<PC[2], Mat[2], PCSide, Vec[3]> *>(context);
1736 
1737   PetscFunctionBegin;
1738   if (std::get<2>(*ctx) == PC_LEFT || std::get<2>(*ctx) == PC_SIDE_DEFAULT) {
1739     PetscCall(PCApply(std::get<0>(*ctx)[1], b, std::get<3>(*ctx)[2]));
1740     std::swap(*b, *std::get<3>(*ctx)[2]); /* replace b by M^-1 b, but need to keep a copy of the original RHS, so swap it with the work Vec */
1741   }
1742   PetscFunctionReturn(PETSC_SUCCESS);
1743 }
1744 
1745 static PetscErrorCode KSPPostSolve_SchurCorrection(KSP, Vec b, Vec x, void *context)
1746 {
1747   std::tuple<PC[2], Mat[2], PCSide, Vec[3]> *ctx = reinterpret_cast<std::tuple<PC[2], Mat[2], PCSide, Vec[3]> *>(context);
1748 
1749   PetscFunctionBegin;
1750   if (std::get<2>(*ctx) == PC_LEFT || std::get<2>(*ctx) == PC_SIDE_DEFAULT) std::swap(*b, *std::get<3>(*ctx)[2]); /* put back the original RHS where it belongs */
1751   else {
1752     PetscCall(PCApply(std::get<0>(*ctx)[1], x, std::get<3>(*ctx)[2]));
1753     PetscCall(VecCopy(std::get<3>(*ctx)[2], x)); /* replace x by M^-1 x */
1754   }
1755   PetscFunctionReturn(PETSC_SUCCESS);
1756 }
1757 
1758 static PetscErrorCode MatMult_Harmonic(Mat, Vec, Vec);
1759 static PetscErrorCode MatMultTranspose_Harmonic(Mat, Vec, Vec);
1760 static PetscErrorCode MatProduct_AB_Harmonic(Mat, Mat, Mat, void *);
1761 static PetscErrorCode MatProduct_AtB_Harmonic(Mat, Mat, Mat, void *);
1762 static PetscErrorCode MatDestroy_Harmonic(Mat);
1763 
1764 static PetscErrorCode PCSetUp_HPDDM(PC pc)
1765 {
1766   PC_HPDDM                                  *data = (PC_HPDDM *)pc->data;
1767   PC                                         inner;
1768   KSP                                       *ksp;
1769   Mat                                       *sub, A, P, N, C = nullptr, uaux = nullptr, weighted, subA[2], S;
1770   Vec                                        xin, v;
1771   std::vector<Vec>                           initial;
1772   IS                                         is[1], loc, uis = data->is, unsorted = nullptr;
1773   ISLocalToGlobalMapping                     l2g;
1774   char                                       prefix[256];
1775   const char                                *pcpre;
1776   const PetscScalar *const                  *ev;
1777   PetscInt                                   n, requested = data->N, reused = 0, overlap = -1;
1778   MatStructure                               structure  = UNKNOWN_NONZERO_PATTERN;
1779   PetscBool                                  subdomains = PETSC_FALSE, flg = PETSC_FALSE, ismatis, swap = PETSC_FALSE, algebraic = PETSC_FALSE, block = PETSC_FALSE;
1780   DM                                         dm;
1781   std::tuple<PC[2], Mat[2], PCSide, Vec[3]> *ctx = nullptr;
1782 #if PetscDefined(USE_DEBUG)
1783   IS  dis  = nullptr;
1784   Mat daux = nullptr;
1785 #endif
1786 
1787   PetscFunctionBegin;
1788   PetscCheck(data->levels && data->levels[0], PETSC_COMM_SELF, PETSC_ERR_PLIB, "Not a single level allocated");
1789   PetscCall(PCGetOptionsPrefix(pc, &pcpre));
1790   PetscCall(PCGetOperators(pc, &A, &P));
1791   if (!data->levels[0]->ksp) {
1792     PetscCall(KSPCreate(PetscObjectComm((PetscObject)pc), &data->levels[0]->ksp));
1793     PetscCall(KSPSetNestLevel(data->levels[0]->ksp, pc->kspnestlevel));
1794     PetscCall(PetscSNPrintf(prefix, sizeof(prefix), "%spc_hpddm_%s_", pcpre ? pcpre : "", data->N > 1 ? "levels_1" : "coarse"));
1795     PetscCall(KSPSetOptionsPrefix(data->levels[0]->ksp, prefix));
1796     PetscCall(KSPSetType(data->levels[0]->ksp, KSPPREONLY));
1797   } else if (data->levels[0]->ksp->pc && data->levels[0]->ksp->pc->setupcalled && data->levels[0]->ksp->pc->reusepreconditioner) {
1798     /* if the fine-level PCSHELL exists, its setup has succeeded, and one wants to reuse it, */
1799     /* then just propagate the appropriate flag to the coarser levels                        */
1800     for (n = 0; n < PETSC_PCHPDDM_MAXLEVELS && data->levels[n]; ++n) {
1801       /* the following KSP and PC may be NULL for some processes, hence the check            */
1802       if (data->levels[n]->ksp) PetscCall(KSPSetReusePreconditioner(data->levels[n]->ksp, PETSC_TRUE));
1803       if (data->levels[n]->pc) PetscCall(PCSetReusePreconditioner(data->levels[n]->pc, PETSC_TRUE));
1804     }
1805     /* early bail out because there is nothing to do */
1806     PetscFunctionReturn(PETSC_SUCCESS);
1807   } else {
1808     /* reset coarser levels */
1809     for (n = 1; n < PETSC_PCHPDDM_MAXLEVELS && data->levels[n]; ++n) {
1810       if (data->levels[n]->ksp && data->levels[n]->ksp->pc && data->levels[n]->ksp->pc->setupcalled && data->levels[n]->ksp->pc->reusepreconditioner && n < data->N) {
1811         reused = data->N - n;
1812         break;
1813       }
1814       PetscCall(KSPDestroy(&data->levels[n]->ksp));
1815       PetscCall(PCDestroy(&data->levels[n]->pc));
1816     }
1817     /* check if some coarser levels are being reused */
1818     PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &reused, 1, MPIU_INT, MPI_MAX, PetscObjectComm((PetscObject)pc)));
1819     const int *addr = data->levels[0]->P ? data->levels[0]->P->getAddrLocal() : &HPDDM::i__0;
1820 
1821     if (addr != &HPDDM::i__0 && reused != data->N - 1) {
1822       /* reuse previously computed eigenvectors */
1823       ev = data->levels[0]->P->getVectors();
1824       if (ev) {
1825         initial.reserve(*addr);
1826         PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, data->levels[0]->P->getDof(), ev[0], &xin));
1827         for (n = 0; n < *addr; ++n) {
1828           PetscCall(VecDuplicate(xin, &v));
1829           PetscCall(VecPlaceArray(xin, ev[n]));
1830           PetscCall(VecCopy(xin, v));
1831           initial.emplace_back(v);
1832           PetscCall(VecResetArray(xin));
1833         }
1834         PetscCall(VecDestroy(&xin));
1835       }
1836     }
1837   }
1838   data->N -= reused;
1839   PetscCall(KSPSetOperators(data->levels[0]->ksp, A, P));
1840 
1841   PetscCall(PetscObjectTypeCompare((PetscObject)P, MATIS, &ismatis));
1842   if (!data->is && !ismatis) {
1843     PetscErrorCode (*create)(DM, IS *, Mat *, PetscErrorCode (**)(Mat, PetscReal, Vec, Vec, PetscReal, IS, void *), void **) = nullptr;
1844     PetscErrorCode (*usetup)(Mat, PetscReal, Vec, Vec, PetscReal, IS, void *)                                                = nullptr;
1845     void *uctx                                                                                                               = nullptr;
1846 
1847     /* first see if we can get the data from the DM */
1848     PetscCall(MatGetDM(P, &dm));
1849     if (!dm) PetscCall(MatGetDM(A, &dm));
1850     if (!dm) PetscCall(PCGetDM(pc, &dm));
1851     if (dm) { /* this is the hook for DMPLEX for which the auxiliary Mat is the local Neumann matrix */
1852       PetscCall(PetscObjectQueryFunction((PetscObject)dm, "DMCreateNeumannOverlap_C", &create));
1853       if (create) {
1854         PetscCall((*create)(dm, &uis, &uaux, &usetup, &uctx));
1855         if (data->Neumann == PETSC_BOOL3_UNKNOWN) data->Neumann = PETSC_BOOL3_TRUE; /* set the value only if it was not already provided by the user */
1856       }
1857     }
1858     if (!create) {
1859       if (!uis) {
1860         PetscCall(PetscObjectQuery((PetscObject)pc, "_PCHPDDM_Neumann_IS", (PetscObject *)&uis));
1861         PetscCall(PetscObjectReference((PetscObject)uis));
1862       }
1863       if (!uaux) {
1864         PetscCall(PetscObjectQuery((PetscObject)pc, "_PCHPDDM_Neumann_Mat", (PetscObject *)&uaux));
1865         PetscCall(PetscObjectReference((PetscObject)uaux));
1866       }
1867       /* look inside the Pmat instead of the PC, needed for MatSchurComplementComputeExplicitOperator() */
1868       if (!uis) {
1869         PetscCall(PetscObjectQuery((PetscObject)P, "_PCHPDDM_Neumann_IS", (PetscObject *)&uis));
1870         PetscCall(PetscObjectReference((PetscObject)uis));
1871       }
1872       if (!uaux) {
1873         PetscCall(PetscObjectQuery((PetscObject)P, "_PCHPDDM_Neumann_Mat", (PetscObject *)&uaux));
1874         PetscCall(PetscObjectReference((PetscObject)uaux));
1875       }
1876     }
1877     PetscCall(PCHPDDMSetAuxiliaryMat(pc, uis, uaux, usetup, uctx));
1878     PetscCall(MatDestroy(&uaux));
1879     PetscCall(ISDestroy(&uis));
1880   }
1881 
1882   if (!ismatis) {
1883     PetscCall(PCHPDDMSetUpNeumannOverlap_Private(pc));
1884     PetscCall(PetscOptionsGetBool(((PetscObject)pc)->options, pcpre, "-pc_hpddm_block_splitting", &block, nullptr));
1885     PetscCall(PetscOptionsGetInt(((PetscObject)pc)->options, pcpre, "-pc_hpddm_harmonic_overlap", &overlap, nullptr));
1886     PetscCall(PetscObjectTypeCompare((PetscObject)P, MATSCHURCOMPLEMENT, &flg));
1887     if (data->is || flg) {
1888       if (block || overlap != -1) {
1889         PetscCall(ISDestroy(&data->is));
1890         PetscCall(MatDestroy(&data->aux));
1891       } else if (flg) {
1892         PCHPDDMSchurPreType type = PC_HPDDM_SCHUR_PRE_GENEO;
1893 
1894         PetscCall(PetscOptionsGetEnum(((PetscObject)pc)->options, pcpre, "-pc_hpddm_schur_precondition", PCHPDDMSchurPreTypes, (PetscEnum *)&type, &flg));
1895         if (type == PC_HPDDM_SCHUR_PRE_LEAST_SQUARES) {
1896           PetscCall(ISDestroy(&data->is)); /* destroy any previously user-set objects since they will be set automatically */
1897           PetscCall(MatDestroy(&data->aux));
1898         } else if (type == PC_HPDDM_SCHUR_PRE_GENEO) {
1899           PetscContainer container = nullptr;
1900 
1901           PetscCall(PetscObjectQuery((PetscObject)pc, "_PCHPDDM_Schur", (PetscObject *)&container));
1902           if (!container) { /* first call to PCSetUp() on the PC associated to the Schur complement */
1903             PC_HPDDM       *data_00;
1904             KSP             ksp, inner_ksp;
1905             PC              pc_00;
1906             Mat             A11 = nullptr;
1907             Vec             d   = nullptr;
1908             PetscReal       norm;
1909             const PetscInt *ranges;
1910             PetscMPIInt     size;
1911             char           *prefix;
1912 
1913             PetscCall(MatSchurComplementGetKSP(P, &ksp));
1914             PetscCall(KSPGetPC(ksp, &pc_00));
1915             PetscCall(PetscObjectTypeCompare((PetscObject)pc_00, PCHPDDM, &flg));
1916             PetscCheck(flg, PetscObjectComm((PetscObject)P), PETSC_ERR_ARG_INCOMP, "-%spc_hpddm_schur_precondition %s and -%spc_type %s (!= %s)", pcpre ? pcpre : "", PCHPDDMSchurPreTypes[type], ((PetscObject)pc_00)->prefix ? ((PetscObject)pc_00)->prefix : "",
1917                        ((PetscObject)pc_00)->type_name, PCHPDDM);
1918             data_00 = (PC_HPDDM *)pc_00->data;
1919             PetscCheck(data_00->N == 2, PetscObjectComm((PetscObject)P), PETSC_ERR_ARG_INCOMP, "-%spc_hpddm_schur_precondition %s and %" PetscInt_FMT " level%s instead of 2 for the A00 block -%s", pcpre ? pcpre : "", PCHPDDMSchurPreTypes[type],
1920                        data_00->N, data_00->N > 1 ? "s" : "", ((PetscObject)pc_00)->prefix);
1921             PetscCheck(data_00->levels[0]->pc, PetscObjectComm((PetscObject)P), PETSC_ERR_ORDER, "PC of the first block%s not setup yet", ((PetscObject)pc_00)->prefix ? std::string(std::string(" (") + ((PetscObject)pc_00)->prefix + std::string(")")).c_str() : "");
1922             PetscCall(PetscObjectTypeCompare((PetscObject)data_00->levels[0]->pc, PCASM, &flg));
1923             PetscCheck(flg, PetscObjectComm((PetscObject)P), PETSC_ERR_ARG_INCOMP, "-%spc_hpddm_schur_precondition %s and -%spc_type %s (!= %s)", pcpre ? pcpre : "", PCHPDDMSchurPreTypes[type], ((PetscObject)data_00->levels[0]->pc)->prefix,
1924                        ((PetscObject)data_00->levels[0]->pc)->type_name, PCASM);
1925             PetscCall(PetscNew(&ctx)); /* context to pass data around for the inner-most PC, which will be a proper PCHPDDM (or a dummy variable if the Schur complement is centralized on a single process)  */
1926             PetscCall(MatSchurComplementGetSubMatrices(P, nullptr, nullptr, nullptr, nullptr, &A11));
1927             PetscCall(MatGetOwnershipRanges(A11, &ranges));
1928             PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A11), &size));
1929             flg = PetscBool(std::find_if(ranges, ranges + size + 1, [&](PetscInt v) { return v != ranges[0] && v != ranges[size]; }) == ranges + size + 1); /* are all local matrices but one of dimension 0 (centralized Schur complement)? */
1930             if (!flg) {
1931               if (PetscDefined(USE_DEBUG) || !data->is) {
1932                 Mat A01, A10, B = nullptr, C = nullptr, *sub;
1933 
1934                 PetscCall(MatSchurComplementGetSubMatrices(P, &A, nullptr, &A01, &A10, nullptr));
1935                 PetscCall(PetscObjectTypeCompare((PetscObject)A10, MATTRANSPOSEVIRTUAL, &flg));
1936                 if (flg) {
1937                   PetscCall(MatTransposeGetMat(A10, &C));
1938                   PetscCall(MatTranspose(C, MAT_INITIAL_MATRIX, &B));
1939                 } else {
1940                   PetscCall(PetscObjectTypeCompare((PetscObject)A10, MATHERMITIANTRANSPOSEVIRTUAL, &flg));
1941                   if (flg) {
1942                     PetscCall(MatHermitianTransposeGetMat(A10, &C));
1943                     PetscCall(MatHermitianTranspose(C, MAT_INITIAL_MATRIX, &B));
1944                   }
1945                 }
1946                 if (flg)
1947                   PetscCall(MatShellGetScalingShifts(A10, (PetscScalar *)MAT_SHELL_NOT_ALLOWED, (PetscScalar *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Mat *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED));
1948                 if (!B) {
1949                   B = A10;
1950                   PetscCall(PetscObjectReference((PetscObject)B));
1951                 } else if (!data->is) {
1952                   PetscCall(PetscObjectTypeCompareAny((PetscObject)A01, &flg, MATTRANSPOSEVIRTUAL, MATHERMITIANTRANSPOSEVIRTUAL, ""));
1953                   if (!flg) C = A01;
1954                   else
1955                     PetscCall(MatShellGetScalingShifts(A01, (PetscScalar *)MAT_SHELL_NOT_ALLOWED, (PetscScalar *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Mat *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED));
1956                 }
1957                 PetscCall(ISCreateStride(PETSC_COMM_SELF, B->rmap->N, 0, 1, &uis));
1958                 PetscCall(ISSetIdentity(uis));
1959                 if (!data->is) {
1960                   if (C) PetscCall(PetscObjectReference((PetscObject)C));
1961                   else PetscCall(MatTranspose(B, MAT_INITIAL_MATRIX, &C));
1962                   PetscCall(ISDuplicate(data_00->is, is));
1963                   PetscCall(MatIncreaseOverlap(A, 1, is, 1));
1964                   PetscCall(MatSetOption(C, MAT_SUBMAT_SINGLEIS, PETSC_TRUE));
1965                   PetscCall(MatCreateSubMatrices(C, 1, is, &uis, MAT_INITIAL_MATRIX, &sub));
1966                   PetscCall(MatDestroy(&C));
1967                   PetscCall(MatTranspose(sub[0], MAT_INITIAL_MATRIX, &C));
1968                   PetscCall(MatDestroySubMatrices(1, &sub));
1969                   PetscCall(MatFindNonzeroRows(C, &data->is));
1970                   PetscCheck(data->is, PetscObjectComm((PetscObject)C), PETSC_ERR_SUP, "No empty row, which likely means that some rows of A_10 are dense");
1971                   PetscCall(MatDestroy(&C));
1972                   PetscCall(ISDestroy(is));
1973                   PetscCall(ISCreateStride(PetscObjectComm((PetscObject)data->is), A11->rmap->n, A11->rmap->rstart, 1, &loc));
1974                   if (PetscDefined(USE_DEBUG)) PetscCall(PCHPDDMCheckInclusion_Private(pc, data->is, loc, PETSC_FALSE));
1975                   PetscCall(ISExpand(data->is, loc, is));
1976                   PetscCall(ISDestroy(&loc));
1977                   PetscCall(ISDestroy(&data->is));
1978                   data->is = is[0];
1979                   is[0]    = nullptr;
1980                 }
1981                 if (PetscDefined(USE_DEBUG)) {
1982                   PetscCall(PCHPDDMCheckSymmetry_Private(pc, A01, A10));
1983                   PetscCall(MatCreateSubMatrices(B, 1, &uis, &data_00->is, MAT_INITIAL_MATRIX, &sub)); /* expensive check since all processes fetch all rows (but only some columns) of the constraint matrix */
1984                   PetscCall(ISDestroy(&uis));
1985                   PetscCall(ISDuplicate(data->is, &uis));
1986                   PetscCall(ISSort(uis));
1987                   PetscCall(ISComplement(uis, 0, B->rmap->N, is));
1988                   PetscCall(MatDuplicate(sub[0], MAT_COPY_VALUES, &C));
1989                   PetscCall(MatZeroRowsIS(C, is[0], 0.0, nullptr, nullptr));
1990                   PetscCall(ISDestroy(is));
1991                   PetscCall(MatMultEqual(sub[0], C, 20, &flg));
1992                   PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "The image of A_10 (R_i^p)^T from the local primal (e.g., velocity) space to the full dual (e.g., pressure) space is not restricted to the local dual space: A_10 (R_i^p)^T != R_i^d (R_i^d)^T A_10 (R_i^p)^T"); /* cf. eq. (9) of https://hal.science/hal-02343808v6/document */
1993                   PetscCall(MatDestroy(&C));
1994                   PetscCall(MatDestroySubMatrices(1, &sub));
1995                 }
1996                 PetscCall(ISDestroy(&uis));
1997                 PetscCall(MatDestroy(&B));
1998               }
1999               flg = PETSC_FALSE;
2000               if (!data->aux) {
2001                 Mat D;
2002 
2003                 PetscCall(MatCreateVecs(A11, &d, nullptr));
2004                 PetscCall(MatGetDiagonal(A11, d));
2005                 PetscCall(PetscObjectTypeCompareAny((PetscObject)A11, &flg, MATDIAGONAL, MATCONSTANTDIAGONAL, ""));
2006                 if (!flg) {
2007                   PetscCall(MatCreateDiagonal(d, &D));
2008                   PetscCall(MatMultEqual(A11, D, 20, &flg));
2009                   PetscCall(MatDestroy(&D));
2010                 }
2011                 if (flg) PetscCall(PetscInfo(pc, "A11 block is likely diagonal so the PC will build an auxiliary Mat (which was not initially provided by the user)\n"));
2012               }
2013               if ((PetscDefined(USE_DEBUG) || (data->Neumann != PETSC_BOOL3_TRUE && !flg)) && A11) {
2014                 PetscCall(MatNorm(A11, NORM_INFINITY, &norm));
2015                 if (data->Neumann != PETSC_BOOL3_TRUE && !flg) {
2016                   PetscCheck(norm < PETSC_MACHINE_EPSILON * static_cast<PetscReal>(10.0), PetscObjectComm((PetscObject)P), PETSC_ERR_ARG_INCOMP, "-%spc_hpddm_schur_precondition geneo and -%spc_hpddm_has_neumann != true with a nonzero or non-diagonal A11 block", pcpre ? pcpre : "", pcpre ? pcpre : "");
2017                   PetscCall(PetscInfo(pc, "A11 block is likely zero so the PC will build an auxiliary Mat (which was%s initially provided by the user)\n", data->aux ? "" : " not"));
2018                   PetscCall(MatDestroy(&data->aux));
2019                   flg = PETSC_TRUE;
2020                 }
2021               }
2022               if (!data->aux) { /* if A11 is near zero, e.g., Stokes equation, or diagonal, build an auxiliary (Neumann) Mat which is a (possibly slightly shifted) diagonal weighted by the inverse of the multiplicity */
2023                 PetscSF            scatter;
2024                 const PetscScalar *read;
2025                 PetscScalar       *write, *diagonal = nullptr;
2026 
2027                 PetscCall(MatDestroy(&data->aux));
2028                 PetscCall(ISGetLocalSize(data->is, &n));
2029                 PetscCall(VecCreateMPI(PetscObjectComm((PetscObject)P), n, PETSC_DECIDE, &xin));
2030                 PetscCall(VecDuplicate(xin, &v));
2031                 PetscCall(VecScatterCreate(xin, data->is, v, nullptr, &scatter));
2032                 PetscCall(VecSet(v, 1.0));
2033                 PetscCall(VecSet(xin, 1.0));
2034                 PetscCall(VecScatterBegin(scatter, v, xin, ADD_VALUES, SCATTER_REVERSE));
2035                 PetscCall(VecScatterEnd(scatter, v, xin, ADD_VALUES, SCATTER_REVERSE)); /* v has the multiplicity of all unknowns on the overlap */
2036                 PetscCall(PetscSFDestroy(&scatter));
2037                 if (d) {
2038                   PetscCall(VecScatterCreate(d, data->is, v, nullptr, &scatter));
2039                   PetscCall(VecScatterBegin(scatter, d, v, INSERT_VALUES, SCATTER_FORWARD));
2040                   PetscCall(VecScatterEnd(scatter, d, v, INSERT_VALUES, SCATTER_FORWARD));
2041                   PetscCall(PetscSFDestroy(&scatter));
2042                   PetscCall(VecDestroy(&d));
2043                   PetscCall(PetscMalloc1(n, &diagonal));
2044                   PetscCall(VecGetArrayRead(v, &read));
2045                   PetscCallCXX(std::copy_n(read, n, diagonal));
2046                   PetscCall(VecRestoreArrayRead(v, &read));
2047                 }
2048                 PetscCall(VecDestroy(&v));
2049                 PetscCall(VecCreateSeq(PETSC_COMM_SELF, n, &v));
2050                 PetscCall(VecGetArrayRead(xin, &read));
2051                 PetscCall(VecGetArrayWrite(v, &write));
2052                 for (PetscInt i = 0; i < n; ++i) write[i] = (!diagonal || std::abs(diagonal[i]) < PETSC_MACHINE_EPSILON) ? PETSC_SMALL / (static_cast<PetscReal>(1000.0) * read[i]) : diagonal[i] / read[i];
2053                 PetscCall(PetscFree(diagonal));
2054                 PetscCall(VecRestoreArrayRead(xin, &read));
2055                 PetscCall(VecRestoreArrayWrite(v, &write));
2056                 PetscCall(VecDestroy(&xin));
2057                 PetscCall(MatCreateDiagonal(v, &data->aux));
2058                 PetscCall(VecDestroy(&v));
2059               }
2060               uis  = data->is;
2061               uaux = data->aux;
2062               PetscCall(PetscObjectReference((PetscObject)uis));
2063               PetscCall(PetscObjectReference((PetscObject)uaux));
2064               PetscCall(PetscStrallocpy(pcpre, &prefix));
2065               PetscCall(PCSetOptionsPrefix(pc, nullptr));
2066               PetscCall(PCSetType(pc, PCKSP));                                    /* replace the PC associated to the Schur complement by PCKSP */
2067               PetscCall(KSPCreate(PetscObjectComm((PetscObject)pc), &inner_ksp)); /* new KSP that will be attached to the previously set PC */
2068               PetscCall(PetscObjectGetTabLevel((PetscObject)pc, &n));
2069               PetscCall(PetscObjectSetTabLevel((PetscObject)inner_ksp, n + 2));
2070               PetscCall(KSPSetOperators(inner_ksp, pc->mat, pc->pmat));
2071               PetscCall(KSPSetOptionsPrefix(inner_ksp, std::string(std::string(prefix) + "pc_hpddm_").c_str()));
2072               PetscCall(KSPSetSkipPCSetFromOptions(inner_ksp, PETSC_TRUE));
2073               PetscCall(KSPSetFromOptions(inner_ksp));
2074               PetscCall(KSPGetPC(inner_ksp, &inner));
2075               PetscCall(PCSetOptionsPrefix(inner, nullptr));
2076               PetscCall(PCSetType(inner, PCNONE)); /* no preconditioner since the action of M^-1 A or A M^-1 will be computed by the Amat */
2077               PetscCall(PCKSPSetKSP(pc, inner_ksp));
2078               std::get<0>(*ctx)[0] = pc_00; /* for coarse correction on the primal (e.g., velocity) space */
2079               PetscCall(PCCreate(PetscObjectComm((PetscObject)pc), &std::get<0>(*ctx)[1]));
2080               PetscCall(PCSetOptionsPrefix(pc, prefix)); /* both PC share the same prefix so that the outer PC can be reset with PCSetFromOptions() */
2081               PetscCall(PCSetOptionsPrefix(std::get<0>(*ctx)[1], prefix));
2082               PetscCall(PetscFree(prefix));
2083               PetscCall(PCSetOperators(std::get<0>(*ctx)[1], pc->mat, pc->pmat));
2084               PetscCall(PCSetType(std::get<0>(*ctx)[1], PCHPDDM));
2085               PetscCall(PCHPDDMSetAuxiliaryMat(std::get<0>(*ctx)[1], uis, uaux, nullptr, nullptr)); /* transfer ownership of the auxiliary inputs from the inner (PCKSP) to the inner-most (PCHPDDM) PC */
2086               if (flg) static_cast<PC_HPDDM *>(std::get<0>(*ctx)[1]->data)->Neumann = PETSC_BOOL3_TRUE;
2087               else if (PetscDefined(USE_DEBUG) && norm > PETSC_MACHINE_EPSILON * static_cast<PetscReal>(10.0)) {
2088                 /* no check when A11 is near zero */
2089                 PetscCall(MatCreateSubMatrices(A11, 1, &uis, &uis, MAT_INITIAL_MATRIX, &sub));
2090                 PetscCall(PCHPDDMCheckMatStructure_Private(pc, sub[0], uaux));
2091                 PetscCall(MatDestroySubMatrices(1, &sub));
2092               }
2093               PetscCall(PCSetFromOptions(std::get<0>(*ctx)[1]));
2094               PetscCall(PetscObjectDereference((PetscObject)uis));
2095               PetscCall(PetscObjectDereference((PetscObject)uaux));
2096               PetscCall(MatCreateShell(PetscObjectComm((PetscObject)pc), inner->mat->rmap->n, inner->mat->cmap->n, inner->mat->rmap->N, inner->mat->cmap->N, ctx, &S)); /* MatShell computing the action of M^-1 A or A M^-1 */
2097               PetscCall(MatShellSetOperation(S, MATOP_MULT, (PetscErrorCodeFn *)MatMult_SchurCorrection));
2098               PetscCall(MatShellSetOperation(S, MATOP_VIEW, (PetscErrorCodeFn *)MatView_SchurCorrection));
2099               PetscCall(MatShellSetOperation(S, MATOP_DESTROY, (PetscErrorCodeFn *)MatDestroy_SchurCorrection));
2100               PetscCall(KSPGetPCSide(inner_ksp, &(std::get<2>(*ctx))));
2101               if (std::get<2>(*ctx) == PC_LEFT || std::get<2>(*ctx) == PC_SIDE_DEFAULT) {
2102                 PetscCall(KSPSetPreSolve(inner_ksp, KSPPreSolve_SchurCorrection, ctx));
2103               } else { /* no support for PC_SYMMETRIC */
2104                 PetscCheck(std::get<2>(*ctx) == PC_RIGHT, PetscObjectComm((PetscObject)pc), PETSC_ERR_SUP, "PCSide %s (!= %s or %s or %s)", PCSides[std::get<2>(*ctx)], PCSides[PC_SIDE_DEFAULT], PCSides[PC_LEFT], PCSides[PC_RIGHT]);
2105               }
2106               PetscCall(KSPSetPostSolve(inner_ksp, KSPPostSolve_SchurCorrection, ctx));
2107               PetscCall(PetscObjectContainerCompose((PetscObject)std::get<0>(*ctx)[1], "_PCHPDDM_Schur", ctx, nullptr));
2108               PetscCall(PCSetUp(std::get<0>(*ctx)[1]));
2109               PetscCall(KSPSetOperators(inner_ksp, S, S));
2110               PetscCall(MatCreateVecs(std::get<1>(*ctx)[0], std::get<3>(*ctx), std::get<3>(*ctx) + 1));
2111               PetscCall(VecDuplicate(std::get<3>(*ctx)[0], std::get<3>(*ctx) + 2));
2112               PetscCall(PetscObjectDereference((PetscObject)inner_ksp));
2113               PetscCall(PetscObjectDereference((PetscObject)S));
2114             } else {
2115               std::get<0>(*ctx)[0] = pc_00;
2116               PetscCall(PetscObjectContainerCompose((PetscObject)pc, "_PCHPDDM_Schur", ctx, nullptr));
2117               PetscCall(ISCreateStride(PetscObjectComm((PetscObject)data_00->is), A11->rmap->n, A11->rmap->rstart, 1, &data->is)); /* dummy variables in the case of a centralized Schur complement */
2118               PetscCall(MatGetDiagonalBlock(A11, &data->aux));
2119               PetscCall(PetscObjectReference((PetscObject)data->aux));
2120               PetscCall(PCSetUp(pc));
2121             }
2122             for (std::vector<Vec>::iterator it = initial.begin(); it != initial.end(); ++it) PetscCall(VecDestroy(&*it));
2123             PetscFunctionReturn(PETSC_SUCCESS);
2124           } else { /* second call to PCSetUp() on the PC associated to the Schur complement, retrieve previously set context */
2125             PetscCall(PetscContainerGetPointer(container, (void **)&ctx));
2126           }
2127         }
2128       }
2129     }
2130     if (!data->is && data->N > 1) {
2131       char type[256] = {}; /* same size as in src/ksp/pc/interface/pcset.c */
2132 
2133       PetscCall(PetscObjectTypeCompareAny((PetscObject)P, &flg, MATNORMAL, MATNORMALHERMITIAN, ""));
2134       if (flg || (A->rmap->N != A->cmap->N && P->rmap->N == P->cmap->N && P->rmap->N == A->cmap->N)) {
2135         Mat B;
2136 
2137         PetscCall(PCHPDDMSetAuxiliaryMatNormal_Private(pc, A, P, &B, pcpre));
2138         if (data->correction == PC_HPDDM_COARSE_CORRECTION_DEFLATED) data->correction = PC_HPDDM_COARSE_CORRECTION_BALANCED;
2139         PetscCall(MatDestroy(&B));
2140       } else {
2141         PetscCall(PetscObjectTypeCompare((PetscObject)P, MATSCHURCOMPLEMENT, &flg));
2142         if (flg) {
2143           Mat                 A00, P00, A01, A10, A11, B, N;
2144           PCHPDDMSchurPreType type = PC_HPDDM_SCHUR_PRE_LEAST_SQUARES;
2145 
2146           PetscCall(MatSchurComplementGetSubMatrices(P, &A00, &P00, &A01, &A10, &A11));
2147           PetscCall(PetscOptionsGetEnum(((PetscObject)pc)->options, pcpre, "-pc_hpddm_schur_precondition", PCHPDDMSchurPreTypes, (PetscEnum *)&type, &flg));
2148           if (type == PC_HPDDM_SCHUR_PRE_LEAST_SQUARES) {
2149             Mat                        B01;
2150             Vec                        diagonal = nullptr;
2151             const PetscScalar         *array;
2152             MatSchurComplementAinvType type;
2153 
2154             PetscCall(PCHPDDMCheckSymmetry_Private(pc, A01, A10, &B01));
2155             if (A11) {
2156               PetscCall(MatCreateVecs(A11, &diagonal, nullptr));
2157               PetscCall(MatGetDiagonal(A11, diagonal));
2158             }
2159             PetscCall(MatCreateVecs(P00, &v, nullptr));
2160             PetscCall(MatSchurComplementGetAinvType(P, &type));
2161             PetscCheck(type == MAT_SCHUR_COMPLEMENT_AINV_DIAG || type == MAT_SCHUR_COMPLEMENT_AINV_LUMP || type == MAT_SCHUR_COMPLEMENT_AINV_BLOCK_DIAG, PetscObjectComm((PetscObject)P), PETSC_ERR_SUP, "-%smat_schur_complement_ainv_type %s",
2162                        ((PetscObject)P)->prefix ? ((PetscObject)P)->prefix : "", MatSchurComplementAinvTypes[type]);
2163             if (type != MAT_SCHUR_COMPLEMENT_AINV_BLOCK_DIAG) {
2164               if (type == MAT_SCHUR_COMPLEMENT_AINV_LUMP) {
2165                 PetscCall(MatGetRowSum(P00, v));
2166                 if (A00 == P00) PetscCall(PetscObjectReference((PetscObject)A00));
2167                 PetscCall(MatDestroy(&P00));
2168                 PetscCall(VecGetArrayRead(v, &array));
2169                 PetscCall(MatCreateAIJ(PetscObjectComm((PetscObject)A00), A00->rmap->n, A00->cmap->n, A00->rmap->N, A00->cmap->N, 1, nullptr, 0, nullptr, &P00));
2170                 PetscCall(MatSetOption(P00, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
2171                 for (n = A00->rmap->rstart; n < A00->rmap->rend; ++n) PetscCall(MatSetValue(P00, n, n, array[n - A00->rmap->rstart], INSERT_VALUES));
2172                 PetscCall(MatAssemblyBegin(P00, MAT_FINAL_ASSEMBLY));
2173                 PetscCall(MatAssemblyEnd(P00, MAT_FINAL_ASSEMBLY));
2174                 PetscCall(VecRestoreArrayRead(v, &array));
2175                 PetscCall(MatSchurComplementUpdateSubMatrices(P, A00, P00, A01, A10, A11)); /* replace P00 by diag(sum of each row of P00) */
2176                 PetscCall(MatDestroy(&P00));
2177               } else PetscCall(MatGetDiagonal(P00, v));
2178               PetscCall(VecReciprocal(v)); /* inv(diag(P00))       */
2179               PetscCall(VecSqrtAbs(v));    /* sqrt(inv(diag(P00))) */
2180               PetscCall(MatDuplicate(A01, MAT_COPY_VALUES, &B));
2181               PetscCall(MatDiagonalScale(B, v, nullptr));
2182               if (B01) PetscCall(MatDiagonalScale(B01, v, nullptr));
2183             } else {
2184               Mat     D00;
2185               MatType type;
2186 
2187               PetscCall(MatCreate(PetscObjectComm((PetscObject)A00), &D00));
2188               PetscCall(MatSetType(D00, MATAIJ));
2189               PetscCall(MatSetOptionsPrefix(D00, ((PetscObject)A00)->prefix));
2190               PetscCall(MatAppendOptionsPrefix(D00, "block_diagonal_"));
2191               PetscCall(MatSetFromOptions(D00));                          /* for setting -mat_block_size dynamically */
2192               PetscCall(MatConvert(A00, MATAIJ, MAT_INITIAL_MATRIX, &B)); /* not all MatTypes have a MatInvertBlockDiagonal() implementation, plus one may want to use a different block size than the one of A00 */
2193               PetscCall(MatSetBlockSizesFromMats(B, D00, D00));
2194               PetscCall(MatInvertBlockDiagonalMat(B, D00));
2195               PetscCall(MatDestroy(&B));
2196               PetscCall(MatGetType(A01, &type));                            /* cache MatType */
2197               PetscCall(MatConvert(A01, MATAIJ, MAT_INPLACE_MATRIX, &A01)); /* MatProduct is not versatile enough to fallback gracefully if no implementation found, so MatConvert() */
2198               PetscCall(MatMatMult(D00, A01, MAT_INITIAL_MATRIX, PETSC_CURRENT, &B));
2199               PetscCall(MatDestroy(&D00));
2200               PetscCall(MatConvert(A01, type, MAT_INPLACE_MATRIX, &A01)); /* reset to previous MatType */
2201               PetscCall(MatConvert(B, type, MAT_INPLACE_MATRIX, &B));
2202               if (!B01) { /* symmetric case */
2203                 B01 = A01;
2204                 PetscCall(PetscObjectReference((PetscObject)B01));
2205               }
2206             }
2207             if (B01 && B01 != A01) PetscCall(MatSetBlockSizesFromMats(B01, A01, A01)); /* TODO: remove this line once Firedrake is fixed */
2208             PetscCall(VecDestroy(&v));
2209             PetscCall(MatCreateNormalHermitian(B, &N));
2210             PetscCall(PCHPDDMSetAuxiliaryMatNormal_Private(pc, B, N, &P, pcpre, &diagonal, B01));
2211             PetscCall(PetscObjectTypeCompare((PetscObject)data->aux, MATSEQAIJ, &flg));
2212             if (!flg) {
2213               PetscCall(MatDestroy(&P));
2214               P = N;
2215               PetscCall(PetscObjectReference((PetscObject)P));
2216             }
2217             if (diagonal) {
2218               PetscCall(MatSetOption(P, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE)); /* may have missing diagonal entries */
2219               PetscCall(MatDiagonalSet(P, diagonal, ADD_VALUES));
2220               PetscCall(PCSetOperators(pc, P, P)); /* replace P by A01^T inv(diag(P00)) A01 - diag(P11) */
2221               PetscCall(VecDestroy(&diagonal));
2222             } else PetscCall(PCSetOperators(pc, B01 ? P : N, P));  /* replace P by A01^T inv(diag(P00)) A01                         */
2223             pc->ops->postsolve = PCPostSolve_SchurPreLeastSquares; /*  PCFIELDSPLIT expect a KSP for (P11 - A10 inv(diag(P00)) A01) */
2224             PetscCall(MatDestroy(&N));                             /*  but a PC for (A10 inv(diag(P00)) A10 - P11) is setup instead */
2225             PetscCall(MatDestroy(&P));                             /*  so the sign of the solution must be flipped                  */
2226             PetscCall(MatDestroy(&B));
2227           } else
2228             PetscCheck(type != PC_HPDDM_SCHUR_PRE_GENEO, PetscObjectComm((PetscObject)P), PETSC_ERR_ARG_INCOMP, "-%spc_hpddm_schur_precondition %s without a prior call to PCHPDDMSetAuxiliaryMat() on the A11 block%s%s", pcpre ? pcpre : "", PCHPDDMSchurPreTypes[type], pcpre ? " -" : "", pcpre ? pcpre : "");
2229           for (std::vector<Vec>::iterator it = initial.begin(); it != initial.end(); ++it) PetscCall(VecDestroy(&*it));
2230           PetscFunctionReturn(PETSC_SUCCESS);
2231         } else {
2232           PetscCall(PetscOptionsGetString(((PetscObject)pc)->options, pcpre, "-pc_hpddm_levels_1_st_pc_type", type, sizeof(type), nullptr));
2233           PetscCall(PetscStrcmp(type, PCMAT, &algebraic));
2234           PetscCheck(!algebraic || !block, PetscObjectComm((PetscObject)P), PETSC_ERR_ARG_INCOMP, "-pc_hpddm_levels_1_st_pc_type mat and -pc_hpddm_block_splitting");
2235           if (overlap != -1) {
2236             PetscCheck(!block && !algebraic, PetscObjectComm((PetscObject)P), PETSC_ERR_ARG_INCOMP, "-pc_hpddm_%s and -pc_hpddm_harmonic_overlap", block ? "block_splitting" : "levels_1_st_pc_type mat");
2237             PetscCheck(overlap >= 1, PetscObjectComm((PetscObject)P), PETSC_ERR_ARG_WRONG, "-pc_hpddm_harmonic_overlap %" PetscInt_FMT " < 1", overlap);
2238           }
2239           if (block || overlap != -1) algebraic = PETSC_TRUE;
2240           if (algebraic) {
2241             PetscCall(ISCreateStride(PETSC_COMM_SELF, P->rmap->n, P->rmap->rstart, 1, &data->is));
2242             PetscCall(MatIncreaseOverlap(P, 1, &data->is, 1));
2243             PetscCall(ISSort(data->is));
2244           } else
2245             PetscCall(PetscInfo(pc, "Cannot assemble a fully-algebraic coarse operator with an assembled Pmat and -%spc_hpddm_levels_1_st_pc_type != mat and -%spc_hpddm_block_splitting != true and -%spc_hpddm_harmonic_overlap < 1\n", pcpre ? pcpre : "", pcpre ? pcpre : "", pcpre ? pcpre : ""));
2246         }
2247       }
2248     }
2249   }
2250 #if PetscDefined(USE_DEBUG)
2251   if (data->is) PetscCall(ISDuplicate(data->is, &dis));
2252   if (data->aux) PetscCall(MatDuplicate(data->aux, MAT_COPY_VALUES, &daux));
2253 #endif
2254   if (data->is || (ismatis && data->N > 1)) {
2255     if (ismatis) {
2256       std::initializer_list<std::string> list = {MATSEQBAIJ, MATSEQSBAIJ};
2257       PetscCall(MatISGetLocalMat(P, &N));
2258       std::initializer_list<std::string>::const_iterator it = std::find(list.begin(), list.end(), ((PetscObject)N)->type_name);
2259       PetscCall(MatISRestoreLocalMat(P, &N));
2260       switch (std::distance(list.begin(), it)) {
2261       case 0:
2262         PetscCall(MatConvert(P, MATMPIBAIJ, MAT_INITIAL_MATRIX, &C));
2263         break;
2264       case 1:
2265         /* MatCreateSubMatrices() does not work with MATSBAIJ and unsorted ISes, so convert to MPIBAIJ */
2266         PetscCall(MatConvert(P, MATMPIBAIJ, MAT_INITIAL_MATRIX, &C));
2267         break;
2268       default:
2269         PetscCall(MatConvert(P, MATMPIAIJ, MAT_INITIAL_MATRIX, &C));
2270       }
2271       PetscCall(MatISGetLocalToGlobalMapping(P, &l2g, nullptr));
2272       PetscCall(PetscObjectReference((PetscObject)P));
2273       PetscCall(KSPSetOperators(data->levels[0]->ksp, A, C));
2274       std::swap(C, P);
2275       PetscCall(ISLocalToGlobalMappingGetSize(l2g, &n));
2276       PetscCall(ISCreateStride(PETSC_COMM_SELF, n, 0, 1, &loc));
2277       PetscCall(ISLocalToGlobalMappingApplyIS(l2g, loc, &is[0]));
2278       PetscCall(ISDestroy(&loc));
2279       /* the auxiliary Mat is _not_ the local Neumann matrix                                */
2280       /* it is the local Neumann matrix augmented (with zeros) through MatIncreaseOverlap() */
2281       data->Neumann = PETSC_BOOL3_FALSE;
2282       structure     = SAME_NONZERO_PATTERN;
2283     } else {
2284       is[0] = data->is;
2285       if (algebraic || ctx) subdomains = PETSC_TRUE;
2286       PetscCall(PetscOptionsGetBool(((PetscObject)pc)->options, pcpre, "-pc_hpddm_define_subdomains", &subdomains, nullptr));
2287       if (ctx) PetscCheck(subdomains, PetscObjectComm((PetscObject)P), PETSC_ERR_ARG_INCOMP, "-%spc_hpddm_schur_precondition geneo and -%spc_hpddm_define_subdomains false", pcpre, pcpre);
2288       if (PetscBool3ToBool(data->Neumann)) {
2289         PetscCheck(!block, PetscObjectComm((PetscObject)P), PETSC_ERR_ARG_INCOMP, "-pc_hpddm_block_splitting and -pc_hpddm_has_neumann");
2290         PetscCheck(overlap == -1, PetscObjectComm((PetscObject)P), PETSC_ERR_ARG_INCOMP, "-pc_hpddm_harmonic_overlap %" PetscInt_FMT " and -pc_hpddm_has_neumann", overlap);
2291         PetscCheck(!algebraic, PetscObjectComm((PetscObject)P), PETSC_ERR_ARG_INCOMP, "-pc_hpddm_levels_1_st_pc_type mat and -pc_hpddm_has_neumann");
2292       }
2293       if (PetscBool3ToBool(data->Neumann) || block) structure = SAME_NONZERO_PATTERN;
2294       PetscCall(ISCreateStride(PetscObjectComm((PetscObject)data->is), P->rmap->n, P->rmap->rstart, 1, &loc));
2295     }
2296     PetscCall(PetscSNPrintf(prefix, sizeof(prefix), "%spc_hpddm_levels_1_", pcpre ? pcpre : ""));
2297     PetscCall(PetscOptionsGetEnum(((PetscObject)pc)->options, prefix, "-st_matstructure", MatStructures, (PetscEnum *)&structure, &flg)); /* if not user-provided, force its value when possible */
2298     if (!flg && structure == SAME_NONZERO_PATTERN) { /* cannot call STSetMatStructure() yet, insert the appropriate option in the database, parsed by STSetFromOptions() */
2299       PetscCall(PetscSNPrintf(prefix, sizeof(prefix), "-%spc_hpddm_levels_1_st_matstructure", pcpre ? pcpre : ""));
2300       PetscCall(PetscOptionsSetValue(((PetscObject)pc)->options, prefix, MatStructures[structure]));
2301     }
2302     flg = PETSC_FALSE;
2303     if (data->share) {
2304       data->share = PETSC_FALSE; /* will be reset to PETSC_TRUE if none of the conditions below are true */
2305       if (!subdomains) PetscCall(PetscInfo(pc, "Cannot share subdomain KSP between SLEPc and PETSc since -%spc_hpddm_define_subdomains is not true\n", pcpre ? pcpre : ""));
2306       else if (data->deflation) PetscCall(PetscInfo(pc, "Nothing to share since PCHPDDMSetDeflationMat() has been called\n"));
2307       else if (ismatis) PetscCall(PetscInfo(pc, "Cannot share subdomain KSP between SLEPc and PETSc with a Pmat of type MATIS\n"));
2308       else if (!algebraic && structure != SAME_NONZERO_PATTERN)
2309         PetscCall(PetscInfo(pc, "Cannot share subdomain KSP between SLEPc and PETSc since -%spc_hpddm_levels_1_st_matstructure %s (!= %s)\n", pcpre ? pcpre : "", MatStructures[structure], MatStructures[SAME_NONZERO_PATTERN]));
2310       else data->share = PETSC_TRUE;
2311     }
2312     if (!ismatis) {
2313       if (data->share || (!PetscBool3ToBool(data->Neumann) && subdomains)) PetscCall(ISDuplicate(is[0], &unsorted));
2314       else unsorted = is[0];
2315     }
2316     if ((ctx || data->N > 1) && (data->aux || ismatis || algebraic)) {
2317       PetscCheck(loadedSym, PETSC_COMM_SELF, PETSC_ERR_PLIB, "HPDDM library not loaded, cannot use more than one level");
2318       PetscCall(MatSetOption(P, MAT_SUBMAT_SINGLEIS, PETSC_TRUE));
2319       if (ismatis) {
2320         /* needed by HPDDM (currently) so that the partition of unity is 0 on subdomain interfaces */
2321         PetscCall(MatIncreaseOverlap(P, 1, is, 1));
2322         PetscCall(ISDestroy(&data->is));
2323         data->is = is[0];
2324       } else {
2325         if (PetscDefined(USE_DEBUG)) PetscCall(PCHPDDMCheckInclusion_Private(pc, data->is, loc, PETSC_TRUE));
2326         if (!ctx && overlap == -1) PetscCall(PetscObjectComposeFunction((PetscObject)pc->pmat, "PCHPDDMAlgebraicAuxiliaryMat_Private_C", PCHPDDMAlgebraicAuxiliaryMat_Private));
2327         if (!PetscBool3ToBool(data->Neumann) && (!algebraic || overlap != -1)) {
2328           PetscCall(PetscObjectTypeCompare((PetscObject)P, MATMPISBAIJ, &flg));
2329           if (flg) {
2330             /* maybe better to ISSort(is[0]), MatCreateSubMatrices(), and then MatPermute() */
2331             /* but there is no MatPermute_SeqSBAIJ(), so as before, just use MATMPIBAIJ     */
2332             PetscCall(MatConvert(P, MATMPIBAIJ, MAT_INITIAL_MATRIX, &uaux));
2333             flg = PETSC_FALSE;
2334           }
2335         }
2336       }
2337       if (algebraic && overlap == -1) {
2338         PetscUseMethod(pc->pmat, "PCHPDDMAlgebraicAuxiliaryMat_Private_C", (Mat, IS *, Mat *[], PetscBool), (P, is, &sub, block));
2339         if (block) {
2340           PetscCall(PetscObjectQuery((PetscObject)sub[0], "_PCHPDDM_Neumann_Mat", (PetscObject *)&data->aux));
2341           PetscCall(PetscObjectCompose((PetscObject)sub[0], "_PCHPDDM_Neumann_Mat", nullptr));
2342         }
2343       } else if (!uaux || overlap != -1) {
2344         if (!ctx) {
2345           if (PetscBool3ToBool(data->Neumann)) sub = &data->aux;
2346           else {
2347             PetscBool flg;
2348             if (overlap != -1) {
2349               Harmonic              h;
2350               Mat                   A0, *a;                    /* with an SVD: [ A_00  A_01       ] */
2351               IS                    ov[2], rows, cols, stride; /*              [ A_10  A_11  A_12 ] */
2352               const PetscInt       *i[2], bs = P->cmap->bs;    /* with a GEVP: [ A_00  A_01       ] */
2353               PetscInt              n[2], location;            /*              [ A_10  A_11  A_12 ] */
2354               std::vector<PetscInt> v[2];                      /*              [       A_21  A_22 ] */
2355 
2356               do {
2357                 PetscCall(ISDuplicate(data->is, ov));
2358                 if (overlap > 1) PetscCall(MatIncreaseOverlap(P, 1, ov, overlap - 1));
2359                 PetscCall(ISDuplicate(ov[0], ov + 1));
2360                 PetscCall(MatIncreaseOverlap(P, 1, ov + 1, 1));
2361                 PetscCall(ISGetLocalSize(ov[0], n));
2362                 PetscCall(ISGetLocalSize(ov[1], n + 1));
2363                 flg = PetscBool(n[0] == n[1] && n[0] != P->rmap->n);
2364                 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &flg, 1, MPI_C_BOOL, MPI_LOR, PetscObjectComm((PetscObject)pc)));
2365                 if (flg) {
2366                   PetscCall(ISDestroy(ov));
2367                   PetscCall(ISDestroy(ov + 1));
2368                   PetscCheck(--overlap, PetscObjectComm((PetscObject)pc), PETSC_ERR_SUP, "No oversampling possible");
2369                   PetscCall(PetscInfo(pc, "Supplied -%spc_hpddm_harmonic_overlap parameter is too large, it has been decreased to %" PetscInt_FMT "\n", pcpre ? pcpre : "", overlap));
2370                 } else break;
2371               } while (1);
2372               PetscCall(PetscNew(&h));
2373               h->ksp = nullptr;
2374               PetscCall(PetscCalloc1(2, &h->A));
2375               PetscCall(PetscOptionsHasName(((PetscObject)pc)->options, prefix, "-eps_nev", &flg));
2376               if (!flg) {
2377                 PetscCall(PetscOptionsHasName(((PetscObject)pc)->options, prefix, "-svd_nsv", &flg));
2378                 if (!flg) PetscCall(PetscOptionsHasName(((PetscObject)pc)->options, prefix, "-svd_threshold_relative", &flg));
2379               } else flg = PETSC_FALSE;
2380               PetscCall(ISSort(ov[0]));
2381               if (!flg) PetscCall(ISSort(ov[1]));
2382               PetscCall(PetscCalloc1(5, &h->is));
2383               PetscCall(MatCreateSubMatrices(uaux ? uaux : P, 1, ov + !flg, ov + 1, MAT_INITIAL_MATRIX, &a)); /* submatrix from above, either square (!flg) or rectangular (flg) */
2384               for (PetscInt j = 0; j < 2; ++j) PetscCall(ISGetIndices(ov[j], i + j));
2385               v[1].reserve((n[1] - n[0]) / bs);
2386               for (PetscInt j = 0; j < n[1]; j += bs) { /* indices of the (2,2) block */
2387                 PetscCall(ISLocate(ov[0], i[1][j], &location));
2388                 if (location < 0) v[1].emplace_back(j / bs);
2389               }
2390               if (!flg) {
2391                 h->A[1] = a[0];
2392                 PetscCall(PetscObjectReference((PetscObject)h->A[1]));
2393                 v[0].reserve((n[0] - P->rmap->n) / bs);
2394                 for (PetscInt j = 0; j < n[1]; j += bs) { /* row indices of the (1,2) block */
2395                   PetscCall(ISLocate(loc, i[1][j], &location));
2396                   if (location < 0) {
2397                     PetscCall(ISLocate(ov[0], i[1][j], &location));
2398                     if (location >= 0) v[0].emplace_back(j / bs);
2399                   }
2400                 }
2401                 PetscCall(ISCreateBlock(PETSC_COMM_SELF, bs, v[0].size(), v[0].data(), PETSC_USE_POINTER, &rows));
2402                 PetscCall(ISCreateBlock(PETSC_COMM_SELF, bs, v[1].size(), v[1].data(), PETSC_COPY_VALUES, h->is + 4));
2403                 PetscCall(MatCreateSubMatrix(a[0], rows, h->is[4], MAT_INITIAL_MATRIX, h->A)); /* A_12 submatrix from above */
2404                 PetscCall(ISDestroy(&rows));
2405                 if (uaux) PetscCall(MatConvert(a[0], MATSEQSBAIJ, MAT_INPLACE_MATRIX, a)); /* initial Pmat was MATSBAIJ, convert back to the same format since the rectangular A_12 submatrix has been created */
2406                 PetscCall(ISEmbed(ov[0], ov[1], PETSC_TRUE, &rows));
2407                 PetscCall(MatCreateSubMatrix(a[0], rows, cols = rows, MAT_INITIAL_MATRIX, &A0)); /* [ A_00  A_01 ; A_10  A_11 ] submatrix from above */
2408                 PetscCall(ISDestroy(&rows));
2409                 v[0].clear();
2410                 PetscCall(ISEmbed(loc, ov[1], PETSC_TRUE, h->is + 3));
2411                 PetscCall(ISEmbed(data->is, ov[1], PETSC_TRUE, h->is + 2));
2412               }
2413               v[0].reserve((n[0] - P->rmap->n) / bs);
2414               for (PetscInt j = 0; j < n[0]; j += bs) {
2415                 PetscCall(ISLocate(loc, i[0][j], &location));
2416                 if (location < 0) v[0].emplace_back(j / bs);
2417               }
2418               PetscCall(ISCreateBlock(PETSC_COMM_SELF, bs, v[0].size(), v[0].data(), PETSC_USE_POINTER, &rows));
2419               for (PetscInt j = 0; j < 2; ++j) PetscCall(ISRestoreIndices(ov[j], i + j));
2420               if (flg) {
2421                 PetscCall(ISCreateStride(PETSC_COMM_SELF, a[0]->rmap->n, 0, 1, &stride));
2422                 PetscCall(ISEmbed(ov[0], ov[1], PETSC_TRUE, &cols));
2423                 PetscCall(MatCreateSubMatrix(a[0], stride, cols, MAT_INITIAL_MATRIX, &A0)); /* [ A_00  A_01 ; A_10  A_11 ] submatrix from above */
2424                 PetscCall(ISDestroy(&cols));
2425                 PetscCall(ISDestroy(&stride));
2426                 if (uaux) { /* initial Pmat was MATSBAIJ, convert back to the same format since this submatrix is square */
2427                   PetscCall(MatSetOption(A0, MAT_SYMMETRIC, PETSC_TRUE));
2428                   PetscCall(MatConvert(A0, MATSEQSBAIJ, MAT_INPLACE_MATRIX, &A0));
2429                 }
2430                 PetscCall(ISEmbed(loc, data->is, PETSC_TRUE, h->is + 2));
2431                 PetscCall(ISCreateBlock(PETSC_COMM_SELF, bs, v[1].size(), v[1].data(), PETSC_USE_POINTER, &cols));
2432                 PetscCall(MatCreateSubMatrix(a[0], rows, cols, MAT_INITIAL_MATRIX, h->A)); /* A_12 submatrix from above */
2433                 PetscCall(ISDestroy(&cols));
2434               }
2435               PetscCall(ISCreateStride(PETSC_COMM_SELF, A0->rmap->n, 0, 1, &stride));
2436               PetscCall(ISEmbed(rows, stride, PETSC_TRUE, h->is));
2437               PetscCall(ISDestroy(&stride));
2438               PetscCall(ISDestroy(&rows));
2439               PetscCall(ISEmbed(loc, ov[0], PETSC_TRUE, h->is + 1));
2440               if (subdomains) {
2441                 if (!data->levels[0]->pc) {
2442                   PetscCall(PCCreate(PetscObjectComm((PetscObject)pc), &data->levels[0]->pc));
2443                   PetscCall(PetscSNPrintf(prefix, sizeof(prefix), "%spc_hpddm_levels_1_", pcpre ? pcpre : ""));
2444                   PetscCall(PCSetOptionsPrefix(data->levels[0]->pc, prefix));
2445                   PetscCall(PCSetOperators(data->levels[0]->pc, A, P));
2446                 }
2447                 PetscCall(PCSetType(data->levels[0]->pc, PCASM));
2448                 if (!data->levels[0]->pc->setupcalled) PetscCall(PCASMSetLocalSubdomains(data->levels[0]->pc, 1, ov + !flg, &loc));
2449                 PetscCall(PCHPDDMCommunicationAvoidingPCASM_Private(data->levels[0]->pc, flg ? A0 : a[0], PETSC_TRUE));
2450                 if (!flg) ++overlap;
2451                 if (data->share) {
2452                   PetscInt n = -1;
2453                   PetscTryMethod(data->levels[0]->pc, "PCASMGetSubKSP_C", (PC, PetscInt *, PetscInt *, KSP **), (data->levels[0]->pc, &n, nullptr, &ksp));
2454                   PetscCheck(n == 1, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Number of subdomain solver %" PetscInt_FMT " != 1", n);
2455                   if (flg) {
2456                     h->ksp = ksp[0];
2457                     PetscCall(PetscObjectReference((PetscObject)h->ksp));
2458                   }
2459                 }
2460               }
2461               if (!h->ksp) {
2462                 PetscBool share = data->share;
2463 
2464                 PetscCall(KSPCreate(PETSC_COMM_SELF, &h->ksp));
2465                 PetscCall(KSPSetType(h->ksp, KSPPREONLY));
2466                 PetscCall(KSPSetOperators(h->ksp, A0, A0));
2467                 do {
2468                   if (!data->share) {
2469                     share = PETSC_FALSE;
2470                     PetscCall(PetscSNPrintf(prefix, sizeof(prefix), "%spc_hpddm_levels_1_%s", pcpre ? pcpre : "", flg ? "svd_" : "eps_"));
2471                     PetscCall(KSPSetOptionsPrefix(h->ksp, prefix));
2472                     PetscCall(KSPSetFromOptions(h->ksp));
2473                   } else {
2474                     MatSolverType type;
2475 
2476                     PetscCall(PetscObjectTypeCompareAny((PetscObject)ksp[0]->pc, &data->share, PCLU, PCCHOLESKY, ""));
2477                     if (data->share) {
2478                       PetscCall(PCFactorGetMatSolverType(ksp[0]->pc, &type));
2479                       if (!type) {
2480                         if (PetscDefined(HAVE_MUMPS)) PetscCall(PCFactorSetMatSolverType(ksp[0]->pc, MATSOLVERMUMPS));
2481                         else if (PetscDefined(HAVE_MKL_PARDISO)) PetscCall(PCFactorSetMatSolverType(ksp[0]->pc, MATSOLVERMKL_PARDISO));
2482                         else data->share = PETSC_FALSE;
2483                         if (data->share) PetscCall(PCSetFromOptions(ksp[0]->pc));
2484                       } else {
2485                         PetscCall(PetscStrcmp(type, MATSOLVERMUMPS, &data->share));
2486                         if (!data->share) PetscCall(PetscStrcmp(type, MATSOLVERMKL_PARDISO, &data->share));
2487                       }
2488                       if (data->share) {
2489                         std::tuple<KSP, IS, Vec[2]> *p;
2490 
2491                         PetscCall(PCFactorGetMatrix(ksp[0]->pc, &A));
2492                         PetscCall(MatFactorSetSchurIS(A, h->is[4]));
2493                         PetscCall(KSPSetUp(ksp[0]));
2494                         PetscCall(PetscSNPrintf(prefix, sizeof(prefix), "%spc_hpddm_levels_1_eps_shell_", pcpre ? pcpre : ""));
2495                         PetscCall(KSPSetOptionsPrefix(h->ksp, prefix));
2496                         PetscCall(KSPSetFromOptions(h->ksp));
2497                         PetscCall(PCSetType(h->ksp->pc, PCSHELL));
2498                         PetscCall(PetscNew(&p));
2499                         std::get<0>(*p) = ksp[0];
2500                         PetscCall(ISEmbed(ov[0], ov[1], PETSC_TRUE, &std::get<1>(*p)));
2501                         PetscCall(MatCreateVecs(A, std::get<2>(*p), std::get<2>(*p) + 1));
2502                         PetscCall(PCShellSetContext(h->ksp->pc, p));
2503                         PetscCall(PCShellSetApply(h->ksp->pc, PCApply_Schur));
2504                         PetscCall(PCShellSetApplyTranspose(h->ksp->pc, PCApply_Schur<Vec, true>));
2505                         PetscCall(PCShellSetMatApply(h->ksp->pc, PCApply_Schur<Mat>));
2506                         PetscCall(PCShellSetDestroy(h->ksp->pc, PCDestroy_Schur));
2507                       }
2508                     }
2509                     if (!data->share) PetscCall(PetscInfo(pc, "Cannot share subdomain KSP between SLEPc and PETSc since neither MUMPS nor MKL PARDISO is used\n"));
2510                   }
2511                 } while (!share != !data->share); /* if data->share is initially PETSC_TRUE, but then reset to PETSC_FALSE, then go back to the beginning of the do loop */
2512               }
2513               PetscCall(ISDestroy(ov));
2514               PetscCall(ISDestroy(ov + 1));
2515               if (overlap == 1 && subdomains && flg) {
2516                 *subA = A0;
2517                 sub   = subA;
2518                 if (uaux) PetscCall(MatDestroy(&uaux));
2519               } else PetscCall(MatDestroy(&A0));
2520               PetscCall(MatCreateShell(PETSC_COMM_SELF, P->rmap->n, n[1] - n[0], P->rmap->n, n[1] - n[0], h, &data->aux));
2521               PetscCall(KSPSetErrorIfNotConverged(h->ksp, PETSC_TRUE)); /* bail out as early as possible to avoid (apparently) unrelated error messages */
2522               PetscCall(MatCreateVecs(h->ksp->pc->pmat, &h->v, nullptr));
2523               PetscCall(MatShellSetOperation(data->aux, MATOP_MULT, (PetscErrorCodeFn *)MatMult_Harmonic));
2524               PetscCall(MatShellSetOperation(data->aux, MATOP_MULT_TRANSPOSE, (PetscErrorCodeFn *)MatMultTranspose_Harmonic));
2525               PetscCall(MatShellSetMatProductOperation(data->aux, MATPRODUCT_AB, nullptr, MatProduct_AB_Harmonic, nullptr, MATDENSE, MATDENSE));
2526               PetscCall(MatShellSetMatProductOperation(data->aux, MATPRODUCT_AtB, nullptr, MatProduct_AtB_Harmonic, nullptr, MATDENSE, MATDENSE));
2527               PetscCall(MatShellSetOperation(data->aux, MATOP_DESTROY, (PetscErrorCodeFn *)MatDestroy_Harmonic));
2528               PetscCall(MatDestroySubMatrices(1, &a));
2529             }
2530             if (overlap != 1 || !subdomains) {
2531               PetscCall(MatCreateSubMatrices(uaux ? uaux : P, 1, is, is, MAT_INITIAL_MATRIX, &sub));
2532               if (ismatis) {
2533                 PetscCall(MatISGetLocalMat(C, &N));
2534                 PetscCall(PetscObjectTypeCompare((PetscObject)N, MATSEQSBAIJ, &flg));
2535                 if (flg) PetscCall(MatConvert(sub[0], MATSEQSBAIJ, MAT_INPLACE_MATRIX, sub));
2536                 PetscCall(MatISRestoreLocalMat(C, &N));
2537               }
2538             }
2539             if (uaux) {
2540               PetscCall(MatDestroy(&uaux));
2541               PetscCall(MatConvert(sub[0], MATSEQSBAIJ, MAT_INPLACE_MATRIX, sub));
2542             }
2543           }
2544         }
2545       } else if (!ctx) {
2546         PetscCall(MatCreateSubMatrices(uaux, 1, is, is, MAT_INITIAL_MATRIX, &sub));
2547         PetscCall(MatDestroy(&uaux));
2548         PetscCall(MatConvert(sub[0], MATSEQSBAIJ, MAT_INPLACE_MATRIX, sub));
2549       }
2550       if (data->N > 1) {
2551         /* Vec holding the partition of unity */
2552         if (!data->levels[0]->D) {
2553           PetscCall(ISGetLocalSize(data->is, &n));
2554           PetscCall(VecCreateMPI(PETSC_COMM_SELF, n, PETSC_DETERMINE, &data->levels[0]->D));
2555         }
2556         if (data->share && overlap == -1) {
2557           Mat      D;
2558           IS       perm = nullptr;
2559           PetscInt size = -1;
2560 
2561           if (!data->levels[0]->pc) {
2562             PetscCall(PetscSNPrintf(prefix, sizeof(prefix), "%spc_hpddm_levels_1_", pcpre ? pcpre : ""));
2563             PetscCall(PCCreate(PetscObjectComm((PetscObject)pc), &data->levels[0]->pc));
2564             PetscCall(PCSetOptionsPrefix(data->levels[0]->pc, prefix));
2565             PetscCall(PCSetOperators(data->levels[0]->pc, A, P));
2566           }
2567           PetscCall(PCSetType(data->levels[0]->pc, PCASM));
2568           if (!ctx) {
2569             if (!data->levels[0]->pc->setupcalled) {
2570               IS sorted; /* PCASM will sort the input IS, duplicate it to return an unmodified (PCHPDDM) input IS */
2571               PetscCall(ISDuplicate(is[0], &sorted));
2572               PetscCall(PCASMSetLocalSubdomains(data->levels[0]->pc, 1, &sorted, &loc));
2573               PetscCall(PetscObjectDereference((PetscObject)sorted));
2574             }
2575             PetscCall(PCSetFromOptions(data->levels[0]->pc));
2576             if (block) {
2577               PetscCall(PCHPDDMPermute_Private(unsorted, data->is, &uis, sub[0], &C, &perm));
2578               PetscCall(PCHPDDMCommunicationAvoidingPCASM_Private(data->levels[0]->pc, C, algebraic));
2579             } else PetscCall(PCSetUp(data->levels[0]->pc));
2580             PetscTryMethod(data->levels[0]->pc, "PCASMGetSubKSP_C", (PC, PetscInt *, PetscInt *, KSP **), (data->levels[0]->pc, &size, nullptr, &ksp));
2581             if (size != 1) {
2582               data->share = PETSC_FALSE;
2583               PetscCheck(size == -1, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Number of subdomain solver %" PetscInt_FMT " != 1", size);
2584               PetscCall(PetscInfo(pc, "Cannot share subdomain KSP between SLEPc and PETSc since PCASMGetSubKSP() not found in fine-level PC\n"));
2585               PetscCall(ISDestroy(&unsorted));
2586               unsorted = is[0];
2587             } else {
2588               const char *matpre;
2589               PetscBool   cmp[4];
2590 
2591               if (!block && !ctx) PetscCall(PCHPDDMPermute_Private(unsorted, data->is, &uis, PetscBool3ToBool(data->Neumann) ? sub[0] : data->aux, &C, &perm));
2592               if (perm) { /* unsorted input IS */
2593                 if (!PetscBool3ToBool(data->Neumann) && !block) {
2594                   PetscCall(MatPermute(sub[0], perm, perm, &D)); /* permute since PCASM will call ISSort() */
2595                   PetscCall(MatHeaderReplace(sub[0], &D));
2596                 }
2597                 if (data->B) { /* see PCHPDDMSetRHSMat() */
2598                   PetscCall(MatPermute(data->B, perm, perm, &D));
2599                   PetscCall(MatHeaderReplace(data->B, &D));
2600                 }
2601                 PetscCall(ISDestroy(&perm));
2602               }
2603               PetscCall(KSPGetOperators(ksp[0], subA, subA + 1));
2604               PetscCall(PetscObjectReference((PetscObject)subA[0]));
2605               PetscCall(MatDuplicate(subA[1], MAT_SHARE_NONZERO_PATTERN, &D));
2606               PetscCall(MatGetOptionsPrefix(subA[1], &matpre));
2607               PetscCall(MatSetOptionsPrefix(D, matpre));
2608               PetscCall(PetscObjectTypeCompare((PetscObject)D, MATNORMAL, cmp));
2609               PetscCall(PetscObjectTypeCompare((PetscObject)C, MATNORMAL, cmp + 1));
2610               if (!cmp[0]) PetscCall(PetscObjectTypeCompare((PetscObject)D, MATNORMALHERMITIAN, cmp + 2));
2611               else cmp[2] = PETSC_FALSE;
2612               if (!cmp[1]) PetscCall(PetscObjectTypeCompare((PetscObject)C, MATNORMALHERMITIAN, cmp + 3));
2613               else cmp[3] = PETSC_FALSE;
2614               PetscCheck(cmp[0] == cmp[1] && cmp[2] == cmp[3], PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "-%spc_hpddm_levels_1_pc_asm_sub_mat_type %s and auxiliary Mat of type %s", pcpre ? pcpre : "", ((PetscObject)D)->type_name, ((PetscObject)C)->type_name);
2615               if (!cmp[0] && !cmp[2]) {
2616                 if (!block) {
2617                   if (PetscDefined(USE_DEBUG)) PetscCall(PCHPDDMCheckMatStructure_Private(pc, D, C));
2618                   PetscCall(MatAXPY(D, 1.0, C, SUBSET_NONZERO_PATTERN));
2619                 } else {
2620                   structure = DIFFERENT_NONZERO_PATTERN;
2621                   PetscCall(MatAXPY(D, 1.0, data->aux, structure));
2622                 }
2623               } else {
2624                 Mat mat[2];
2625 
2626                 if (cmp[0]) {
2627                   PetscCall(MatNormalGetMat(D, mat));
2628                   PetscCall(MatNormalGetMat(C, mat + 1));
2629                 } else {
2630                   PetscCall(MatNormalHermitianGetMat(D, mat));
2631                   PetscCall(MatNormalHermitianGetMat(C, mat + 1));
2632                 }
2633                 PetscCall(MatAXPY(mat[0], 1.0, mat[1], SUBSET_NONZERO_PATTERN));
2634               }
2635               PetscCall(MatPropagateSymmetryOptions(C, D));
2636               PetscCall(MatDestroy(&C));
2637               C = D;
2638               /* swap pointers so that variables stay consistent throughout PCSetUp() */
2639               std::swap(C, data->aux);
2640               std::swap(uis, data->is);
2641               swap = PETSC_TRUE;
2642             }
2643           }
2644         }
2645       }
2646       if (ctx) {
2647         PC_HPDDM              *data_00 = (PC_HPDDM *)std::get<0>(*ctx)[0]->data;
2648         PC                     s;
2649         Mat                    A00, P00, A01 = nullptr, A10, A11, N, b[4];
2650         IS                     sorted, is[2], *is_00;
2651         MatSolverType          type;
2652         std::pair<PC, Vec[2]> *p;
2653 
2654         n = -1;
2655         PetscTryMethod(data_00->levels[0]->pc, "PCASMGetSubKSP_C", (PC, PetscInt *, PetscInt *, KSP **), (data_00->levels[0]->pc, &n, nullptr, &ksp));
2656         PetscCheck(n == 1, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Number of subdomain solver %" PetscInt_FMT " != 1", n);
2657         PetscCall(KSPGetOperators(ksp[0], subA, subA + 1));
2658         PetscCall(ISGetLocalSize(data_00->is, &n));
2659         if (n != subA[0]->rmap->n || n != subA[0]->cmap->n) {
2660           PetscCall(PCASMGetLocalSubdomains(data_00->levels[0]->pc, &n, &is_00, nullptr));
2661           PetscCall(ISGetLocalSize(*is_00, &n));
2662           PetscCheck(n == subA[0]->rmap->n && n == subA[0]->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "-%spc_hpddm_schur_precondition geneo and -%spc_hpddm_define_subdomains false", pcpre ? pcpre : "", ((PetscObject)pc)->prefix);
2663         } else is_00 = &data_00->is;
2664         PetscCall(PCHPDDMPermute_Private(unsorted, data->is, &uis, data->aux, &C, nullptr)); /* permute since PCASM works with a sorted IS */
2665         std::swap(C, data->aux);
2666         std::swap(uis, data->is);
2667         swap = PETSC_TRUE;
2668         PetscCall(MatSchurComplementGetSubMatrices(P, &A00, &P00, std::get<1>(*ctx), &A10, &A11));
2669         std::get<1>(*ctx)[1] = A10;
2670         PetscCall(PetscObjectTypeCompare((PetscObject)A10, MATTRANSPOSEVIRTUAL, &flg));
2671         if (flg) PetscCall(MatTransposeGetMat(A10, &A01));
2672         else {
2673           PetscBool flg;
2674 
2675           PetscCall(PetscObjectTypeCompare((PetscObject)A10, MATHERMITIANTRANSPOSEVIRTUAL, &flg));
2676           if (flg) PetscCall(MatHermitianTransposeGetMat(A10, &A01));
2677         }
2678         PetscCall(ISDuplicate(*is_00, &sorted)); /* during setup of the PC associated to the A00 block, this IS has already been sorted, but it's put back to its original state at the end of PCSetUp_HPDDM(), which may be unsorted */
2679         PetscCall(ISSort(sorted));               /* this is to avoid changing users inputs, but it requires a new call to ISSort() here                                                                                               */
2680         if (!A01) {
2681           PetscCall(MatSetOption(A10, MAT_SUBMAT_SINGLEIS, PETSC_TRUE));
2682           PetscCall(MatCreateSubMatrices(A10, 1, &data->is, &sorted, MAT_INITIAL_MATRIX, &sub));
2683           b[2] = sub[0];
2684           PetscCall(PetscObjectReference((PetscObject)sub[0]));
2685           PetscCall(MatDestroySubMatrices(1, &sub));
2686           PetscCall(PetscObjectTypeCompare((PetscObject)std::get<1>(*ctx)[0], MATTRANSPOSEVIRTUAL, &flg));
2687           A10 = nullptr;
2688           if (flg) PetscCall(MatTransposeGetMat(std::get<1>(*ctx)[0], &A10));
2689           else {
2690             PetscBool flg;
2691 
2692             PetscCall(PetscObjectTypeCompare((PetscObject)std::get<1>(*ctx)[0], MATHERMITIANTRANSPOSEVIRTUAL, &flg));
2693             if (flg) PetscCall(MatHermitianTransposeGetMat(std::get<1>(*ctx)[0], &A10));
2694           }
2695           if (!A10) PetscCall(MatCreateSubMatrices(std::get<1>(*ctx)[0], 1, &sorted, &data->is, MAT_INITIAL_MATRIX, &sub));
2696           else {
2697             if (flg) PetscCall(MatCreateTranspose(b[2], b + 1));
2698             else PetscCall(MatCreateHermitianTranspose(b[2], b + 1));
2699           }
2700         } else {
2701           PetscCall(MatSetOption(A01, MAT_SUBMAT_SINGLEIS, PETSC_TRUE));
2702           PetscCall(MatCreateSubMatrices(A01, 1, &sorted, &data->is, MAT_INITIAL_MATRIX, &sub));
2703           if (flg) PetscCall(MatCreateTranspose(*sub, b + 2));
2704           else PetscCall(MatCreateHermitianTranspose(*sub, b + 2));
2705         }
2706         if (A01 || !A10) {
2707           b[1] = sub[0];
2708           PetscCall(PetscObjectReference((PetscObject)sub[0]));
2709         }
2710         PetscCall(MatDestroySubMatrices(1, &sub));
2711         PetscCall(ISDestroy(&sorted));
2712         b[3] = data->aux;
2713         PetscCall(MatCreateSchurComplement(subA[0], subA[1], b[1], b[2], b[3], &S));
2714         PetscCall(MatSchurComplementSetKSP(S, ksp[0]));
2715         if (data->N != 1) {
2716           PetscCall(PCASMSetType(data->levels[0]->pc, PC_ASM_NONE)); /* "Neumann--Neumann" preconditioning with overlap and a Boolean partition of unity */
2717           PetscCall(PCASMSetLocalSubdomains(data->levels[0]->pc, 1, &data->is, &loc));
2718           PetscCall(PCSetFromOptions(data->levels[0]->pc)); /* action of eq. (15) of https://hal.science/hal-02343808v6/document (with a sign flip) */
2719           s = data->levels[0]->pc;
2720         } else {
2721           is[0] = data->is;
2722           PetscCall(PetscObjectReference((PetscObject)is[0]));
2723           PetscCall(PetscObjectReference((PetscObject)b[3]));
2724           PetscCall(PCSetType(pc, PCASM));                          /* change the type of the current PC */
2725           data = nullptr;                                           /* destroyed in the previous PCSetType(), so reset to NULL to avoid any faulty use */
2726           PetscCall(PCAppendOptionsPrefix(pc, "pc_hpddm_coarse_")); /* same prefix as when using PCHPDDM with a single level */
2727           PetscCall(PCASMSetLocalSubdomains(pc, 1, is, &loc));
2728           PetscCall(ISDestroy(is));
2729           PetscCall(ISDestroy(&loc));
2730           s = pc;
2731         }
2732         PetscCall(PCHPDDMCommunicationAvoidingPCASM_Private(s, S, PETSC_TRUE)); /* the subdomain Mat is already known and the input IS of PCASMSetLocalSubdomains() is already sorted */
2733         PetscTryMethod(s, "PCASMGetSubKSP_C", (PC, PetscInt *, PetscInt *, KSP **), (s, &n, nullptr, &ksp));
2734         PetscCheck(n == 1, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Number of subdomain solver %" PetscInt_FMT " != 1", n);
2735         PetscCall(KSPGetPC(ksp[0], &inner));
2736         PetscCall(PCSetType(inner, PCSHELL)); /* compute the action of the inverse of the local Schur complement with a PCSHELL */
2737         b[0] = subA[0];
2738         PetscCall(MatCreateNest(PETSC_COMM_SELF, 2, nullptr, 2, nullptr, b, &N)); /* instead of computing inv(A11 - A10 inv(A00) A01), compute inv([A00, A01; A10, A11]) followed by a partial solution associated to the A11 block */
2739         if (!data) PetscCall(PetscObjectDereference((PetscObject)b[3]));
2740         PetscCall(PetscObjectDereference((PetscObject)b[1]));
2741         PetscCall(PetscObjectDereference((PetscObject)b[2]));
2742         PetscCall(PCCreate(PETSC_COMM_SELF, &s));
2743         PetscCall(PCSetOptionsPrefix(s, ((PetscObject)inner)->prefix));
2744         PetscCall(PCSetOptionsPrefix(inner, nullptr));
2745         PetscCall(KSPSetSkipPCSetFromOptions(ksp[0], PETSC_TRUE));
2746         PetscCall(PCSetType(s, PCLU));
2747         if (PetscDefined(HAVE_MUMPS)) PetscCall(PCFactorSetMatSolverType(s, MATSOLVERMUMPS)); /* only MATSOLVERMUMPS handles MATNEST, so for the others, e.g., MATSOLVERPETSC or MATSOLVERMKL_PARDISO, convert to plain MATAIJ */
2748         PetscCall(PCSetFromOptions(s));
2749         PetscCall(PCFactorGetMatSolverType(s, &type));
2750         PetscCall(PetscStrcmp(type, MATSOLVERMUMPS, &flg));
2751         PetscCall(MatGetLocalSize(A11, &n, nullptr));
2752         if (flg || n == 0) {
2753           PetscCall(PCSetOperators(s, N, N));
2754           if (n) {
2755             PetscCall(PCFactorGetMatrix(s, b));
2756             PetscCall(MatSetOptionsPrefix(*b, ((PetscObject)s)->prefix));
2757             n = -1;
2758             PetscCall(PetscOptionsGetInt(((PetscObject)pc)->options, ((PetscObject)s)->prefix, "-mat_mumps_icntl_26", &n, nullptr));
2759             if (n == 1) {                                /* allocates a square MatDense of size is[1]->map->n, so one */
2760               PetscCall(MatNestGetISs(N, is, nullptr));  /*  needs to be able to deactivate this path when dealing    */
2761               PetscCall(MatFactorSetSchurIS(*b, is[1])); /*  with a large constraint space in order to avoid OOM      */
2762             }
2763           } else PetscCall(PCSetType(s, PCNONE)); /* empty local Schur complement (e.g., centralized on another process) */
2764         } else {
2765           PetscCall(MatConvert(N, MATAIJ, MAT_INITIAL_MATRIX, b));
2766           PetscCall(PCSetOperators(s, N, *b));
2767           PetscCall(PetscObjectDereference((PetscObject)*b));
2768           PetscCall(PetscObjectTypeCompareAny((PetscObject)s, &flg, PCLU, PCCHOLESKY, PCILU, PCICC, PCQR, ""));
2769           if (flg) PetscCall(PCFactorGetMatrix(s, b)); /* MATSOLVERMKL_PARDISO cannot compute in PETSc (yet) a partial solution associated to the A11 block, only partial solution associated to the A00 block or full solution */
2770         }
2771         PetscCall(PetscNew(&p));
2772         p->first = s;
2773         if (n != 0) PetscCall(MatCreateVecs(*b, p->second, p->second + 1));
2774         else p->second[0] = p->second[1] = nullptr;
2775         PetscCall(PCShellSetContext(inner, p));
2776         PetscCall(PCShellSetApply(inner, PCApply_Nest));
2777         PetscCall(PCShellSetView(inner, PCView_Nest));
2778         PetscCall(PCShellSetDestroy(inner, PCDestroy_Nest));
2779         PetscCall(PetscObjectDereference((PetscObject)N));
2780         if (!data) {
2781           PetscCall(MatDestroy(&S));
2782           PetscCall(ISDestroy(&unsorted));
2783           PetscCall(MatDestroy(&C));
2784           PetscCall(ISDestroy(&uis));
2785           PetscCall(PetscFree(ctx));
2786 #if PetscDefined(USE_DEBUG)
2787           PetscCall(ISDestroy(&dis));
2788           PetscCall(MatDestroy(&daux));
2789 #endif
2790           PetscFunctionReturn(PETSC_SUCCESS);
2791         }
2792       }
2793       if (!data->levels[0]->scatter) {
2794         PetscCall(MatCreateVecs(P, &xin, nullptr));
2795         if (ismatis) PetscCall(MatDestroy(&P));
2796         PetscCall(VecScatterCreate(xin, data->is, data->levels[0]->D, nullptr, &data->levels[0]->scatter));
2797         PetscCall(VecDestroy(&xin));
2798       }
2799       if (data->levels[0]->P) {
2800         /* if the pattern is the same and PCSetUp() has previously succeeded, reuse HPDDM buffers and connectivity */
2801         PetscCall(HPDDM::Schwarz<PetscScalar>::destroy(data->levels[0], !pc->setupcalled || pc->flag == DIFFERENT_NONZERO_PATTERN ? PETSC_TRUE : PETSC_FALSE));
2802       }
2803       if (!data->levels[0]->P) data->levels[0]->P = new HPDDM::Schwarz<PetscScalar>();
2804       if (data->log_separate) PetscCall(PetscLogEventBegin(PC_HPDDM_SetUp[0], data->levels[0]->ksp, nullptr, nullptr, nullptr));
2805       else PetscCall(PetscLogEventBegin(PC_HPDDM_Strc, data->levels[0]->ksp, nullptr, nullptr, nullptr));
2806       /* HPDDM internal data structure */
2807       PetscCall(data->levels[0]->P->structure(loc, data->is, !ctx ? sub[0] : nullptr, ismatis ? C : data->aux, data->levels));
2808       if (!data->log_separate) PetscCall(PetscLogEventEnd(PC_HPDDM_Strc, data->levels[0]->ksp, nullptr, nullptr, nullptr));
2809       /* matrix pencil of the generalized eigenvalue problem on the overlap (GenEO) */
2810       if (!ctx) {
2811         if (data->deflation || overlap != -1) weighted = data->aux;
2812         else if (!data->B) {
2813           PetscBool cmp;
2814 
2815           PetscCall(MatDuplicate(sub[0], MAT_COPY_VALUES, &weighted));
2816           PetscCall(PetscObjectTypeCompareAny((PetscObject)weighted, &cmp, MATNORMAL, MATNORMALHERMITIAN, ""));
2817           if (cmp) flg = PETSC_FALSE;
2818           PetscCall(MatDiagonalScale(weighted, data->levels[0]->D, data->levels[0]->D));
2819           /* neither MatDuplicate() nor MatDiagonalScale() handles the symmetry options, so propagate the options explicitly */
2820           /* only useful for -mat_type baij -pc_hpddm_levels_1_st_pc_type cholesky (no problem with MATAIJ or MATSBAIJ)      */
2821           PetscCall(MatPropagateSymmetryOptions(sub[0], weighted));
2822           if (PetscDefined(USE_DEBUG) && PetscBool3ToBool(data->Neumann)) {
2823             Mat      *sub, A[3];
2824             PetscReal norm[2];
2825             PetscBool flg;
2826 
2827             PetscCall(PetscObjectTypeCompare((PetscObject)P, MATMPISBAIJ, &flg)); /* MatCreateSubMatrices() does not work with MATSBAIJ and unsorted ISes, so convert to MPIAIJ */
2828             if (flg) PetscCall(MatConvert(P, MATMPIAIJ, MAT_INITIAL_MATRIX, A));
2829             else {
2830               A[0] = P;
2831               PetscCall(PetscObjectReference((PetscObject)P));
2832             }
2833             PetscCall(MatCreateSubMatrices(A[0], 1, &data->is, &data->is, MAT_INITIAL_MATRIX, &sub));
2834             PetscCall(MatDiagonalScale(sub[0], data->levels[0]->D, data->levels[0]->D));
2835             PetscCall(MatConvert(sub[0], MATSEQAIJ, MAT_INITIAL_MATRIX, A + 1)); /* too many corner cases to handle (MATNORMAL, MATNORMALHERMITIAN, MATBAIJ with different block sizes...), so just MatConvert() to MATSEQAIJ since this is just for debugging */
2836             PetscCall(MatConvert(weighted, MATSEQAIJ, MAT_INITIAL_MATRIX, A + 2));
2837             PetscCall(MatAXPY(A[1], -1.0, A[2], UNKNOWN_NONZERO_PATTERN));
2838             PetscCall(MatNorm(A[1], NORM_FROBENIUS, norm));
2839             if (norm[0]) {
2840               PetscCall(MatNorm(A[2], NORM_FROBENIUS, norm + 1));
2841               PetscCheck(PetscAbsReal(norm[0] / norm[1]) < PetscSqrtReal(PETSC_SMALL), PETSC_COMM_SELF, PETSC_ERR_USER_INPUT, "Auxiliary Mat is different from the (assembled) subdomain Mat for the interior unknowns, so it cannot be the Neumann matrix, remove -%spc_hpddm_has_neumann", pcpre ? pcpre : "");
2842             }
2843             PetscCall(MatDestroySubMatrices(1, &sub));
2844             for (PetscInt i = 0; i < 3; ++i) PetscCall(MatDestroy(A + i));
2845           }
2846         } else weighted = data->B;
2847       } else weighted = nullptr;
2848       /* SLEPc is used inside the loaded symbol */
2849       PetscCall((*loadedSym)(data->levels[0]->P, data->is, ismatis ? C : (algebraic && !block && overlap == -1 ? sub[0] : (!ctx ? data->aux : S)), weighted, data->B, initial, data->levels));
2850       if (!ctx && data->share && overlap == -1) {
2851         Mat st[2];
2852 
2853         PetscCall(KSPGetOperators(ksp[0], st, st + 1));
2854         PetscCall(MatCopy(subA[0], st[0], structure));
2855         if (subA[1] != subA[0] || st[1] != st[0]) PetscCall(MatCopy(subA[1], st[1], SAME_NONZERO_PATTERN));
2856         PetscCall(PetscObjectDereference((PetscObject)subA[0]));
2857       }
2858       if (data->log_separate) PetscCall(PetscLogEventEnd(PC_HPDDM_SetUp[0], data->levels[0]->ksp, nullptr, nullptr, nullptr));
2859       if (ismatis) PetscCall(MatISGetLocalMat(C, &N));
2860       else N = data->aux;
2861       if (!ctx) P = sub[0];
2862       else P = S;
2863       /* going through the grid hierarchy */
2864       for (n = 1; n < data->N; ++n) {
2865         if (data->log_separate) PetscCall(PetscLogEventBegin(PC_HPDDM_SetUp[n], data->levels[n]->ksp, nullptr, nullptr, nullptr));
2866         /* method composed in the loaded symbol since there, SLEPc is used as well */
2867         PetscTryMethod(data->levels[0]->ksp, "PCHPDDMSetUp_Private_C", (Mat *, Mat *, PetscInt, PetscInt *const, PC_HPDDM_Level **const), (&P, &N, n, &data->N, data->levels));
2868         if (data->log_separate) PetscCall(PetscLogEventEnd(PC_HPDDM_SetUp[n], data->levels[n]->ksp, nullptr, nullptr, nullptr));
2869       }
2870       /* reset to NULL to avoid any faulty use */
2871       PetscCall(PetscObjectComposeFunction((PetscObject)data->levels[0]->ksp, "PCHPDDMSetUp_Private_C", nullptr));
2872       if (!ismatis) PetscCall(PetscObjectComposeFunction((PetscObject)pc->pmat, "PCHPDDMAlgebraicAuxiliaryMat_C", nullptr));
2873       else PetscCall(PetscObjectDereference((PetscObject)C)); /* matching PetscObjectReference() above */
2874       for (n = 0; n < data->N - 1; ++n)
2875         if (data->levels[n]->P) {
2876           /* HPDDM internal work buffers */
2877           data->levels[n]->P->setBuffer();
2878           data->levels[n]->P->super::start();
2879         }
2880       if (ismatis || !subdomains) PetscCall(PCHPDDMDestroySubMatrices_Private(PetscBool3ToBool(data->Neumann), PetscBool(algebraic && !block && overlap == -1), sub));
2881       if (ismatis) data->is = nullptr;
2882       for (n = 0; n < data->N - 1 + (reused > 0); ++n) {
2883         if (data->levels[n]->P) {
2884           PC spc;
2885 
2886           /* force the PC to be PCSHELL to do the coarse grid corrections */
2887           PetscCall(KSPSetSkipPCSetFromOptions(data->levels[n]->ksp, PETSC_TRUE));
2888           PetscCall(KSPGetPC(data->levels[n]->ksp, &spc));
2889           PetscCall(PCSetType(spc, PCSHELL));
2890           PetscCall(PCShellSetContext(spc, data->levels[n]));
2891           PetscCall(PCShellSetSetUp(spc, PCSetUp_HPDDMShell));
2892           PetscCall(PCShellSetApply(spc, PCApply_HPDDMShell));
2893           PetscCall(PCShellSetMatApply(spc, PCMatApply_HPDDMShell));
2894           PetscCall(PCShellSetApplyTranspose(spc, PCApplyTranspose_HPDDMShell));
2895           PetscCall(PCShellSetMatApplyTranspose(spc, PCMatApplyTranspose_HPDDMShell));
2896           if (ctx && n == 0) {
2897             Mat                               Amat, Pmat;
2898             PetscInt                          m, M;
2899             std::tuple<Mat, PetscSF, Vec[2]> *ctx;
2900 
2901             PetscCall(KSPGetOperators(data->levels[n]->ksp, nullptr, &Pmat));
2902             PetscCall(MatGetLocalSize(Pmat, &m, nullptr));
2903             PetscCall(MatGetSize(Pmat, &M, nullptr));
2904             PetscCall(PetscNew(&ctx));
2905             std::get<0>(*ctx) = S;
2906             std::get<1>(*ctx) = data->levels[n]->scatter;
2907             PetscCall(MatCreateShell(PetscObjectComm((PetscObject)data->levels[n]->ksp), m, m, M, M, ctx, &Amat));
2908             PetscCall(MatShellSetOperation(Amat, MATOP_MULT, (PetscErrorCodeFn *)MatMult_Schur<false>));
2909             PetscCall(MatShellSetOperation(Amat, MATOP_MULT_TRANSPOSE, (PetscErrorCodeFn *)MatMult_Schur<true>));
2910             PetscCall(MatShellSetOperation(Amat, MATOP_DESTROY, (PetscErrorCodeFn *)MatDestroy_Schur));
2911             PetscCall(MatCreateVecs(S, std::get<2>(*ctx), std::get<2>(*ctx) + 1));
2912             PetscCall(KSPSetOperators(data->levels[n]->ksp, Amat, Pmat));
2913             PetscCall(PetscObjectDereference((PetscObject)Amat));
2914           }
2915           PetscCall(PCShellSetDestroy(spc, PCDestroy_HPDDMShell));
2916           if (!data->levels[n]->pc) PetscCall(PCCreate(PetscObjectComm((PetscObject)data->levels[n]->ksp), &data->levels[n]->pc));
2917           if (n < reused) {
2918             PetscCall(PCSetReusePreconditioner(spc, PETSC_TRUE));
2919             PetscCall(PCSetReusePreconditioner(data->levels[n]->pc, PETSC_TRUE));
2920           }
2921           PetscCall(PCSetUp(spc));
2922         }
2923       }
2924       if (ctx) PetscCall(MatDestroy(&S));
2925       if (overlap == -1) PetscCall(PetscObjectComposeFunction((PetscObject)pc->pmat, "PCHPDDMAlgebraicAuxiliaryMat_Private_C", nullptr));
2926     } else flg = reused ? PETSC_FALSE : PETSC_TRUE;
2927     if (!ismatis && subdomains) {
2928       if (flg) PetscCall(KSPGetPC(data->levels[0]->ksp, &inner));
2929       else inner = data->levels[0]->pc;
2930       if (inner) {
2931         if (!inner->setupcalled) PetscCall(PCSetType(inner, PCASM));
2932         PetscCall(PCSetFromOptions(inner));
2933         PetscCall(PetscStrcmp(((PetscObject)inner)->type_name, PCASM, &flg));
2934         if (flg) {
2935           if (!inner->setupcalled) { /* evaluates to PETSC_FALSE when -pc_hpddm_block_splitting */
2936             IS sorted;               /* PCASM will sort the input IS, duplicate it to return an unmodified (PCHPDDM) input IS */
2937 
2938             PetscCall(ISDuplicate(is[0], &sorted));
2939             PetscCall(PCASMSetLocalSubdomains(inner, 1, &sorted, &loc));
2940             PetscCall(PetscObjectDereference((PetscObject)sorted));
2941           }
2942           if (!PetscBool3ToBool(data->Neumann) && data->N > 1) { /* subdomain matrices are already created for the eigenproblem, reuse them for the fine-level PC */
2943             PetscCall(PCHPDDMPermute_Private(*is, nullptr, nullptr, sub[0], &P, nullptr));
2944             PetscCall(PCHPDDMCommunicationAvoidingPCASM_Private(inner, P, algebraic));
2945             PetscCall(PetscObjectDereference((PetscObject)P));
2946           }
2947         }
2948       }
2949       if (data->N > 1) {
2950         if (overlap != 1) PetscCall(PCHPDDMDestroySubMatrices_Private(PetscBool3ToBool(data->Neumann), PetscBool(algebraic && !block && overlap == -1), sub));
2951         if (overlap == 1) PetscCall(MatDestroy(subA));
2952       }
2953     }
2954     PetscCall(ISDestroy(&loc));
2955   } else data->N = 1 + reused; /* enforce this value to 1 + reused if there is no way to build another level */
2956   if (requested != data->N + reused) {
2957     PetscCall(PetscInfo(pc, "%" PetscInt_FMT " levels requested, only %" PetscInt_FMT " built + %" PetscInt_FMT " reused. Options for level(s) > %" PetscInt_FMT ", including -%spc_hpddm_coarse_ will not be taken into account\n", requested, data->N, reused,
2958                         data->N, pcpre ? pcpre : ""));
2959     PetscCall(PetscInfo(pc, "It is best to tune parameters, e.g., a higher value for -%spc_hpddm_levels_%" PetscInt_FMT "_eps_threshold_absolute or a lower value for -%spc_hpddm_levels_%" PetscInt_FMT "_svd_threshold_relative, so that at least one local deflation vector will be selected\n", pcpre ? pcpre : "",
2960                         data->N, pcpre ? pcpre : "", data->N));
2961     /* cannot use PCDestroy_HPDDMShell() because PCSHELL not set for unassembled levels */
2962     for (n = data->N - 1; n < requested - 1; ++n) {
2963       if (data->levels[n]->P) {
2964         PetscCall(HPDDM::Schwarz<PetscScalar>::destroy(data->levels[n], PETSC_TRUE));
2965         PetscCall(VecDestroyVecs(1, &data->levels[n]->v[0]));
2966         PetscCall(VecDestroyVecs(2, &data->levels[n]->v[1]));
2967         PetscCall(MatDestroy(data->levels[n]->V));
2968         PetscCall(MatDestroy(data->levels[n]->V + 1));
2969         PetscCall(MatDestroy(data->levels[n]->V + 2));
2970         PetscCall(VecDestroy(&data->levels[n]->D));
2971         PetscCall(PetscSFDestroy(&data->levels[n]->scatter));
2972       }
2973     }
2974     if (reused) {
2975       for (n = reused; n < PETSC_PCHPDDM_MAXLEVELS && data->levels[n]; ++n) {
2976         PetscCall(KSPDestroy(&data->levels[n]->ksp));
2977         PetscCall(PCDestroy(&data->levels[n]->pc));
2978       }
2979     }
2980     PetscCheck(!PetscDefined(USE_DEBUG), PetscObjectComm((PetscObject)pc), PETSC_ERR_ARG_WRONG, "%" PetscInt_FMT " levels requested, only %" PetscInt_FMT " built + %" PetscInt_FMT " reused. Options for level(s) > %" PetscInt_FMT ", including -%spc_hpddm_coarse_ will not be taken into account. It is best to tune parameters, e.g., a higher value for -%spc_hpddm_levels_%" PetscInt_FMT "_eps_threshold or a lower value for -%spc_hpddm_levels_%" PetscInt_FMT "_svd_threshold_relative, so that at least one local deflation vector will be selected. If you don't want this to error out, compile --with-debugging=0", requested,
2981                data->N, reused, data->N, pcpre ? pcpre : "", pcpre ? pcpre : "", data->N, pcpre ? pcpre : "", data->N);
2982   }
2983   /* these solvers are created after PCSetFromOptions() is called */
2984   if (pc->setfromoptionscalled) {
2985     for (n = 0; n < data->N; ++n) {
2986       if (data->levels[n]->ksp) PetscCall(KSPSetFromOptions(data->levels[n]->ksp));
2987       if (data->levels[n]->pc) PetscCall(PCSetFromOptions(data->levels[n]->pc));
2988     }
2989     pc->setfromoptionscalled = 0;
2990   }
2991   data->N += reused;
2992   if (data->share && swap) {
2993     /* swap back pointers so that variables follow the user-provided numbering */
2994     std::swap(C, data->aux);
2995     std::swap(uis, data->is);
2996     PetscCall(MatDestroy(&C));
2997     PetscCall(ISDestroy(&uis));
2998   }
2999   if (algebraic) PetscCall(MatDestroy(&data->aux));
3000   if (unsorted && unsorted != is[0]) {
3001     PetscCall(ISCopy(unsorted, data->is));
3002     PetscCall(ISDestroy(&unsorted));
3003   }
3004 #if PetscDefined(USE_DEBUG)
3005   PetscCheck((data->is && dis) || (!data->is && !dis), PETSC_COMM_SELF, PETSC_ERR_PLIB, "An IS pointer is NULL but not the other: input IS pointer (%p) v. output IS pointer (%p)", (void *)dis, (void *)data->is);
3006   if (data->is) {
3007     PetscCall(ISEqualUnsorted(data->is, dis, &flg));
3008     PetscCall(ISDestroy(&dis));
3009     PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Input IS and output IS are not equal");
3010   }
3011   PetscCheck((data->aux && daux) || (!data->aux && !daux), PETSC_COMM_SELF, PETSC_ERR_PLIB, "A Mat pointer is NULL but not the other: input Mat pointer (%p) v. output Mat pointer (%p)", (void *)daux, (void *)data->aux);
3012   if (data->aux) {
3013     PetscCall(MatMultEqual(data->aux, daux, 20, &flg));
3014     PetscCall(MatDestroy(&daux));
3015     PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Input Mat and output Mat are not equal");
3016   }
3017 #endif
3018   PetscFunctionReturn(PETSC_SUCCESS);
3019 }
3020 
3021 /*@
3022   PCHPDDMSetCoarseCorrectionType - Sets the coarse correction type.
3023 
3024   Collective
3025 
3026   Input Parameters:
3027 + pc   - preconditioner context
3028 - type - `PC_HPDDM_COARSE_CORRECTION_DEFLATED`, `PC_HPDDM_COARSE_CORRECTION_ADDITIVE`, `PC_HPDDM_COARSE_CORRECTION_BALANCED`, or `PC_HPDDM_COARSE_CORRECTION_NONE`
3029 
3030   Options Database Key:
3031 . -pc_hpddm_coarse_correction <deflated, additive, balanced, none> - type of coarse correction to apply
3032 
3033   Level: intermediate
3034 
3035 .seealso: [](ch_ksp), `PCHPDDMGetCoarseCorrectionType()`, `PCHPDDM`, `PCHPDDMCoarseCorrectionType`
3036 @*/
3037 PetscErrorCode PCHPDDMSetCoarseCorrectionType(PC pc, PCHPDDMCoarseCorrectionType type)
3038 {
3039   PetscFunctionBegin;
3040   PetscValidHeaderSpecific(pc, PC_CLASSID, 1);
3041   PetscValidLogicalCollectiveEnum(pc, type, 2);
3042   PetscTryMethod(pc, "PCHPDDMSetCoarseCorrectionType_C", (PC, PCHPDDMCoarseCorrectionType), (pc, type));
3043   PetscFunctionReturn(PETSC_SUCCESS);
3044 }
3045 
3046 /*@
3047   PCHPDDMGetCoarseCorrectionType - Gets the coarse correction type.
3048 
3049   Input Parameter:
3050 . pc - preconditioner context
3051 
3052   Output Parameter:
3053 . type - `PC_HPDDM_COARSE_CORRECTION_DEFLATED`, `PC_HPDDM_COARSE_CORRECTION_ADDITIVE`, `PC_HPDDM_COARSE_CORRECTION_BALANCED`, or `PC_HPDDM_COARSE_CORRECTION_NONE`
3054 
3055   Level: intermediate
3056 
3057 .seealso: [](ch_ksp), `PCHPDDMSetCoarseCorrectionType()`, `PCHPDDM`, `PCHPDDMCoarseCorrectionType`
3058 @*/
3059 PetscErrorCode PCHPDDMGetCoarseCorrectionType(PC pc, PCHPDDMCoarseCorrectionType *type)
3060 {
3061   PetscFunctionBegin;
3062   PetscValidHeaderSpecific(pc, PC_CLASSID, 1);
3063   if (type) {
3064     PetscAssertPointer(type, 2);
3065     PetscUseMethod(pc, "PCHPDDMGetCoarseCorrectionType_C", (PC, PCHPDDMCoarseCorrectionType *), (pc, type));
3066   }
3067   PetscFunctionReturn(PETSC_SUCCESS);
3068 }
3069 
3070 static PetscErrorCode PCHPDDMSetCoarseCorrectionType_HPDDM(PC pc, PCHPDDMCoarseCorrectionType type)
3071 {
3072   PC_HPDDM *data = (PC_HPDDM *)pc->data;
3073 
3074   PetscFunctionBegin;
3075   data->correction = type;
3076   PetscFunctionReturn(PETSC_SUCCESS);
3077 }
3078 
3079 static PetscErrorCode PCHPDDMGetCoarseCorrectionType_HPDDM(PC pc, PCHPDDMCoarseCorrectionType *type)
3080 {
3081   PC_HPDDM *data = (PC_HPDDM *)pc->data;
3082 
3083   PetscFunctionBegin;
3084   *type = data->correction;
3085   PetscFunctionReturn(PETSC_SUCCESS);
3086 }
3087 
3088 /*@
3089   PCHPDDMSetSTShareSubKSP - Sets whether the `KSP` in SLEPc `ST` and the fine-level subdomain solver should be shared.
3090 
3091   Input Parameters:
3092 + pc    - preconditioner context
3093 - share - whether the `KSP` should be shared or not
3094 
3095   Note:
3096   This is not the same as `PCSetReusePreconditioner()`. Given certain conditions (visible using -info), a symbolic factorization can be skipped
3097   when using a subdomain `PCType` such as `PCLU` or `PCCHOLESKY`.
3098 
3099   Level: advanced
3100 
3101 .seealso: [](ch_ksp), `PCHPDDM`, `PCHPDDMGetSTShareSubKSP()`
3102 @*/
3103 PetscErrorCode PCHPDDMSetSTShareSubKSP(PC pc, PetscBool share)
3104 {
3105   PetscFunctionBegin;
3106   PetscValidHeaderSpecific(pc, PC_CLASSID, 1);
3107   PetscTryMethod(pc, "PCHPDDMSetSTShareSubKSP_C", (PC, PetscBool), (pc, share));
3108   PetscFunctionReturn(PETSC_SUCCESS);
3109 }
3110 
3111 /*@
3112   PCHPDDMGetSTShareSubKSP - Gets whether the `KSP` in SLEPc `ST` and the fine-level subdomain solver is shared.
3113 
3114   Input Parameter:
3115 . pc - preconditioner context
3116 
3117   Output Parameter:
3118 . share - whether the `KSP` is shared or not
3119 
3120   Note:
3121   This is not the same as `PCGetReusePreconditioner()`. The return value is unlikely to be true, but when it is, a symbolic factorization can be skipped
3122   when using a subdomain `PCType` such as `PCLU` or `PCCHOLESKY`.
3123 
3124   Level: advanced
3125 
3126 .seealso: [](ch_ksp), `PCHPDDM`, `PCHPDDMSetSTShareSubKSP()`
3127 @*/
3128 PetscErrorCode PCHPDDMGetSTShareSubKSP(PC pc, PetscBool *share)
3129 {
3130   PetscFunctionBegin;
3131   PetscValidHeaderSpecific(pc, PC_CLASSID, 1);
3132   if (share) {
3133     PetscAssertPointer(share, 2);
3134     PetscUseMethod(pc, "PCHPDDMGetSTShareSubKSP_C", (PC, PetscBool *), (pc, share));
3135   }
3136   PetscFunctionReturn(PETSC_SUCCESS);
3137 }
3138 
3139 static PetscErrorCode PCHPDDMSetSTShareSubKSP_HPDDM(PC pc, PetscBool share)
3140 {
3141   PC_HPDDM *data = (PC_HPDDM *)pc->data;
3142 
3143   PetscFunctionBegin;
3144   data->share = share;
3145   PetscFunctionReturn(PETSC_SUCCESS);
3146 }
3147 
3148 static PetscErrorCode PCHPDDMGetSTShareSubKSP_HPDDM(PC pc, PetscBool *share)
3149 {
3150   PC_HPDDM *data = (PC_HPDDM *)pc->data;
3151 
3152   PetscFunctionBegin;
3153   *share = data->share;
3154   PetscFunctionReturn(PETSC_SUCCESS);
3155 }
3156 
3157 /*@
3158   PCHPDDMSetDeflationMat - Sets the deflation space used to assemble a coarser operator.
3159 
3160   Input Parameters:
3161 + pc - preconditioner context
3162 . is - index set of the local deflation matrix
3163 - U  - deflation sequential matrix stored as a `MATSEQDENSE`
3164 
3165   Level: advanced
3166 
3167 .seealso: [](ch_ksp), `PCHPDDM`, `PCDeflationSetSpace()`, `PCMGSetRestriction()`
3168 @*/
3169 PetscErrorCode PCHPDDMSetDeflationMat(PC pc, IS is, Mat U)
3170 {
3171   PetscFunctionBegin;
3172   PetscValidHeaderSpecific(pc, PC_CLASSID, 1);
3173   PetscValidHeaderSpecific(is, IS_CLASSID, 2);
3174   PetscValidHeaderSpecific(U, MAT_CLASSID, 3);
3175   PetscTryMethod(pc, "PCHPDDMSetDeflationMat_C", (PC, IS, Mat), (pc, is, U));
3176   PetscFunctionReturn(PETSC_SUCCESS);
3177 }
3178 
3179 static PetscErrorCode PCHPDDMSetDeflationMat_HPDDM(PC pc, IS is, Mat U)
3180 {
3181   PetscFunctionBegin;
3182   PetscCall(PCHPDDMSetAuxiliaryMat_Private(pc, is, U, PETSC_TRUE));
3183   PetscFunctionReturn(PETSC_SUCCESS);
3184 }
3185 
3186 PetscErrorCode HPDDMLoadDL_Private(PetscBool *found)
3187 {
3188   PetscBool flg;
3189   char      lib[PETSC_MAX_PATH_LEN], dlib[PETSC_MAX_PATH_LEN], dir[PETSC_MAX_PATH_LEN];
3190 
3191   PetscFunctionBegin;
3192   PetscAssertPointer(found, 1);
3193   PetscCall(PetscStrncpy(dir, "${PETSC_LIB_DIR}", sizeof(dir)));
3194   PetscCall(PetscOptionsGetString(nullptr, nullptr, "-hpddm_dir", dir, sizeof(dir), nullptr));
3195   PetscCall(PetscSNPrintf(lib, sizeof(lib), "%s/libhpddm_petsc", dir));
3196   PetscCall(PetscDLLibraryRetrieve(PETSC_COMM_SELF, lib, dlib, 1024, found));
3197 #if defined(SLEPC_LIB_DIR) /* this variable is passed during SLEPc ./configure when PETSc has not been configured   */
3198   if (!*found) {           /* with --download-hpddm since slepcconf.h is not yet built (and thus can't be included) */
3199     PetscCall(PetscStrncpy(dir, HPDDM_STR(SLEPC_LIB_DIR), sizeof(dir)));
3200     PetscCall(PetscSNPrintf(lib, sizeof(lib), "%s/libhpddm_petsc", dir));
3201     PetscCall(PetscDLLibraryRetrieve(PETSC_COMM_SELF, lib, dlib, 1024, found));
3202   }
3203 #endif
3204   if (!*found) { /* probable options for this to evaluate to PETSC_TRUE: system inconsistency (libhpddm_petsc moved by user?) or PETSc configured without --download-slepc */
3205     PetscCall(PetscOptionsGetenv(PETSC_COMM_SELF, "SLEPC_DIR", dir, sizeof(dir), &flg));
3206     if (flg) { /* if both PETSc and SLEPc are configured with --download-hpddm but PETSc has been configured without --download-slepc, one must ensure that libslepc is loaded before libhpddm_petsc */
3207       PetscCall(PetscSNPrintf(lib, sizeof(lib), "%s/lib/libslepc", dir));
3208       PetscCall(PetscDLLibraryRetrieve(PETSC_COMM_SELF, lib, dlib, 1024, found));
3209       PetscCheck(*found, PETSC_COMM_SELF, PETSC_ERR_PLIB, "%s not found but SLEPC_DIR=%s", lib, dir);
3210       PetscCall(PetscDLLibraryAppend(PETSC_COMM_SELF, &PetscDLLibrariesLoaded, dlib));
3211       PetscCall(PetscSNPrintf(lib, sizeof(lib), "%s/lib/libhpddm_petsc", dir)); /* libhpddm_petsc is always in the same directory as libslepc */
3212       PetscCall(PetscDLLibraryRetrieve(PETSC_COMM_SELF, lib, dlib, 1024, found));
3213     }
3214   }
3215   PetscCheck(*found, PETSC_COMM_SELF, PETSC_ERR_PLIB, "%s not found", lib);
3216   PetscCall(PetscDLLibraryAppend(PETSC_COMM_SELF, &PetscDLLibrariesLoaded, dlib));
3217   PetscFunctionReturn(PETSC_SUCCESS);
3218 }
3219 
3220 /*MC
3221    PCHPDDM - Interface with the HPDDM library.
3222 
3223    This `PC` may be used to build multilevel spectral domain decomposition methods based on the GenEO framework {cite}`spillane2011robust` {cite}`al2021multilevel`.
3224    It may be viewed as an alternative to spectral
3225    AMGe or `PCBDDC` with adaptive selection of constraints. The interface is explained in details in {cite}`jolivetromanzampini2020`
3226 
3227    The matrix used for building the preconditioner (Pmat) may be unassembled (`MATIS`), assembled (`MATAIJ`, `MATBAIJ`, or `MATSBAIJ`), hierarchical (`MATHTOOL`), `MATNORMAL`, `MATNORMALHERMITIAN`, or `MATSCHURCOMPLEMENT` (when `PCHPDDM` is used as part of an outer `PCFIELDSPLIT`).
3228 
3229    For multilevel preconditioning, when using an assembled or hierarchical Pmat, one must provide an auxiliary local `Mat` (unassembled local operator for GenEO) using
3230    `PCHPDDMSetAuxiliaryMat()`. Calling this routine is not needed when using a `MATIS` Pmat, assembly is done internally using `MatConvert()`.
3231 
3232    Options Database Keys:
3233 +   -pc_hpddm_define_subdomains <true, default=false>    - on the finest level, calls `PCASMSetLocalSubdomains()` with the `IS` supplied in `PCHPDDMSetAuxiliaryMat()`
3234                                                          (not relevant with an unassembled Pmat)
3235 .   -pc_hpddm_has_neumann <true, default=false>          - on the finest level, informs the `PC` that the local Neumann matrix is supplied in `PCHPDDMSetAuxiliaryMat()`
3236 -   -pc_hpddm_coarse_correction <type, default=deflated> - determines the `PCHPDDMCoarseCorrectionType` when calling `PCApply()`
3237 
3238    Options for subdomain solvers, subdomain eigensolvers (for computing deflation vectors), and the coarse solver can be set using the following options database prefixes.
3239 .vb
3240       -pc_hpddm_levels_%d_pc_
3241       -pc_hpddm_levels_%d_ksp_
3242       -pc_hpddm_levels_%d_eps_
3243       -pc_hpddm_levels_%d_p
3244       -pc_hpddm_levels_%d_mat_type
3245       -pc_hpddm_coarse_
3246       -pc_hpddm_coarse_p
3247       -pc_hpddm_coarse_mat_type
3248       -pc_hpddm_coarse_mat_filter
3249 .ve
3250 
3251    E.g., -pc_hpddm_levels_1_sub_pc_type lu -pc_hpddm_levels_1_eps_nev 10 -pc_hpddm_levels_2_p 4 -pc_hpddm_levels_2_sub_pc_type lu -pc_hpddm_levels_2_eps_nev 10
3252     -pc_hpddm_coarse_p 2 -pc_hpddm_coarse_mat_type baij will use 10 deflation vectors per subdomain on the fine "level 1",
3253     aggregate the fine subdomains into 4 "level 2" subdomains, then use 10 deflation vectors per subdomain on "level 2",
3254     and assemble the coarse matrix (of dimension 4 x 10 = 40) on two processes as a `MATBAIJ` (default is `MATSBAIJ`).
3255 
3256    In order to activate a "level N+1" coarse correction, it is mandatory to call -pc_hpddm_levels_N_eps_nev <nu> or -pc_hpddm_levels_N_eps_threshold_absolute <val>. The default -pc_hpddm_coarse_p value is 1, meaning that the coarse operator is aggregated on a single process.
3257 
3258    Level: intermediate
3259 
3260    Notes:
3261    This preconditioner requires that PETSc is built with SLEPc (``--download-slepc``).
3262 
3263    By default, the underlying concurrent eigenproblems
3264    are solved using SLEPc shift-and-invert spectral transformation. This is usually what gives the best performance for GenEO, cf.
3265    {cite}`spillane2011robust` {cite}`jolivet2013scalabledd`. As
3266    stated above, SLEPc options are available through -pc_hpddm_levels_%d_, e.g., -pc_hpddm_levels_1_eps_type arpack -pc_hpddm_levels_1_eps_nev 10
3267    -pc_hpddm_levels_1_st_type sinvert. There are furthermore three options related to the (subdomain-wise local) eigensolver that are not described in
3268    SLEPc documentation since they are specific to `PCHPDDM`.
3269 .vb
3270       -pc_hpddm_levels_1_st_share_sub_ksp
3271       -pc_hpddm_levels_%d_eps_threshold_absolute
3272       -pc_hpddm_levels_1_eps_use_inertia
3273 .ve
3274 
3275    The first option from the list only applies to the fine-level eigensolver, see `PCHPDDMSetSTShareSubKSP()`. The second option from the list is
3276    used to filter eigenmodes retrieved after convergence of `EPSSolve()` at "level N" such that eigenvectors used to define a "level N+1" coarse
3277    correction are associated to eigenvalues whose magnitude are lower or equal than -pc_hpddm_levels_N_eps_threshold_absolute. When using an `EPS` which cannot
3278    determine a priori the proper -pc_hpddm_levels_N_eps_nev such that all wanted eigenmodes are retrieved, it is possible to get an estimation of the
3279    correct value using the third option from the list, -pc_hpddm_levels_1_eps_use_inertia, see `MatGetInertia()`. In that case, there is no need
3280    to supply -pc_hpddm_levels_1_eps_nev. This last option also only applies to the fine-level (N = 1) eigensolver.
3281 
3282    See also {cite}`dolean2015introduction`, {cite}`al2022robust`, {cite}`al2022robustpd`, and {cite}`nataf2022recent`
3283 
3284 .seealso: [](ch_ksp), `PCCreate()`, `PCSetType()`, `PCType`, `PC`, `PCHPDDMSetAuxiliaryMat()`, `MATIS`, `PCBDDC`, `PCDEFLATION`, `PCTELESCOPE`, `PCASM`,
3285           `PCHPDDMSetCoarseCorrectionType()`, `PCHPDDMHasNeumannMat()`, `PCHPDDMSetRHSMat()`, `PCHPDDMSetDeflationMat()`, `PCHPDDMSetSTShareSubKSP()`,
3286           `PCHPDDMGetSTShareSubKSP()`, `PCHPDDMGetCoarseCorrectionType()`, `PCHPDDMGetComplexities()`
3287 M*/
3288 PETSC_EXTERN PetscErrorCode PCCreate_HPDDM(PC pc)
3289 {
3290   PC_HPDDM *data;
3291   PetscBool found;
3292 
3293   PetscFunctionBegin;
3294   if (!loadedSym) {
3295     PetscCall(HPDDMLoadDL_Private(&found));
3296     if (found) PetscCall(PetscDLLibrarySym(PETSC_COMM_SELF, &PetscDLLibrariesLoaded, nullptr, "PCHPDDM_Internal", (void **)&loadedSym));
3297   }
3298   PetscCheck(loadedSym, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCHPDDM_Internal symbol not found in loaded libhpddm_petsc");
3299   PetscCall(PetscNew(&data));
3300   pc->data                   = data;
3301   data->Neumann              = PETSC_BOOL3_UNKNOWN;
3302   pc->ops->reset             = PCReset_HPDDM;
3303   pc->ops->destroy           = PCDestroy_HPDDM;
3304   pc->ops->setfromoptions    = PCSetFromOptions_HPDDM;
3305   pc->ops->setup             = PCSetUp_HPDDM;
3306   pc->ops->apply             = PCApply_HPDDM<false>;
3307   pc->ops->matapply          = PCMatApply_HPDDM<false>;
3308   pc->ops->applytranspose    = PCApply_HPDDM<true>;
3309   pc->ops->matapplytranspose = PCMatApply_HPDDM<true>;
3310   pc->ops->view              = PCView_HPDDM;
3311   pc->ops->presolve          = PCPreSolve_HPDDM;
3312 
3313   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCHPDDMSetAuxiliaryMat_C", PCHPDDMSetAuxiliaryMat_HPDDM));
3314   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCHPDDMHasNeumannMat_C", PCHPDDMHasNeumannMat_HPDDM));
3315   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCHPDDMSetRHSMat_C", PCHPDDMSetRHSMat_HPDDM));
3316   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCHPDDMSetCoarseCorrectionType_C", PCHPDDMSetCoarseCorrectionType_HPDDM));
3317   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCHPDDMGetCoarseCorrectionType_C", PCHPDDMGetCoarseCorrectionType_HPDDM));
3318   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCHPDDMSetSTShareSubKSP_C", PCHPDDMSetSTShareSubKSP_HPDDM));
3319   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCHPDDMGetSTShareSubKSP_C", PCHPDDMGetSTShareSubKSP_HPDDM));
3320   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCHPDDMSetDeflationMat_C", PCHPDDMSetDeflationMat_HPDDM));
3321   PetscFunctionReturn(PETSC_SUCCESS);
3322 }
3323 
3324 /*@C
3325   PCHPDDMInitializePackage - This function initializes everything in the `PCHPDDM` package. It is called from `PCInitializePackage()`.
3326 
3327   Level: developer
3328 
3329 .seealso: [](ch_ksp), `PetscInitialize()`
3330 @*/
3331 PetscErrorCode PCHPDDMInitializePackage(void)
3332 {
3333   char ename[32];
3334 
3335   PetscFunctionBegin;
3336   if (PCHPDDMPackageInitialized) PetscFunctionReturn(PETSC_SUCCESS);
3337   PCHPDDMPackageInitialized = PETSC_TRUE;
3338   PetscCall(PetscRegisterFinalize(PCHPDDMFinalizePackage));
3339   /* general events registered once during package initialization */
3340   /* some of these events are not triggered in libpetsc,          */
3341   /* but rather directly in libhpddm_petsc,                       */
3342   /* which is in charge of performing the following operations    */
3343 
3344   /* domain decomposition structure from Pmat sparsity pattern    */
3345   PetscCall(PetscLogEventRegister("PCHPDDMStrc", PC_CLASSID, &PC_HPDDM_Strc));
3346   /* Galerkin product, redistribution, and setup (not triggered in libpetsc)                */
3347   PetscCall(PetscLogEventRegister("PCHPDDMPtAP", PC_CLASSID, &PC_HPDDM_PtAP));
3348   /* Galerkin product with summation, redistribution, and setup (not triggered in libpetsc) */
3349   PetscCall(PetscLogEventRegister("PCHPDDMPtBP", PC_CLASSID, &PC_HPDDM_PtBP));
3350   /* next level construction using PtAP and PtBP (not triggered in libpetsc)                */
3351   PetscCall(PetscLogEventRegister("PCHPDDMNext", PC_CLASSID, &PC_HPDDM_Next));
3352   static_assert(PETSC_PCHPDDM_MAXLEVELS <= 9, "PETSC_PCHPDDM_MAXLEVELS value is too high");
3353   for (PetscInt i = 1; i < PETSC_PCHPDDM_MAXLEVELS; ++i) {
3354     PetscCall(PetscSNPrintf(ename, sizeof(ename), "PCHPDDMSetUp L%1" PetscInt_FMT, i));
3355     /* events during a PCSetUp() at level #i _except_ the assembly */
3356     /* of the Galerkin operator of the coarser level #(i + 1)      */
3357     PetscCall(PetscLogEventRegister(ename, PC_CLASSID, &PC_HPDDM_SetUp[i - 1]));
3358     PetscCall(PetscSNPrintf(ename, sizeof(ename), "PCHPDDMSolve L%1" PetscInt_FMT, i));
3359     /* events during a PCApply() at level #i _except_              */
3360     /* the KSPSolve() of the coarser level #(i + 1)                */
3361     PetscCall(PetscLogEventRegister(ename, PC_CLASSID, &PC_HPDDM_Solve[i - 1]));
3362   }
3363   PetscFunctionReturn(PETSC_SUCCESS);
3364 }
3365 
3366 /*@C
3367   PCHPDDMFinalizePackage - This function frees everything from the `PCHPDDM` package. It is called from `PetscFinalize()`.
3368 
3369   Level: developer
3370 
3371 .seealso: [](ch_ksp), `PetscFinalize()`
3372 @*/
3373 PetscErrorCode PCHPDDMFinalizePackage(void)
3374 {
3375   PetscFunctionBegin;
3376   PCHPDDMPackageInitialized = PETSC_FALSE;
3377   PetscFunctionReturn(PETSC_SUCCESS);
3378 }
3379 
3380 static PetscErrorCode MatMult_Harmonic(Mat A, Vec x, Vec y)
3381 {
3382   Harmonic h; /* [ A_00  A_01       ], furthermore, A_00 = [ A_loc,loc  A_loc,ovl ], thus, A_01 = [         ] */
3383               /* [ A_10  A_11  A_12 ]                      [ A_ovl,loc  A_ovl,ovl ]               [ A_ovl,1 ] */
3384   Vec sub;    /*  y = A x = R_loc R_0 [ A_00  A_01 ]^-1                                   R_loc = [  I_loc  ] */
3385               /*                      [ A_10  A_11 ]    R_1^T A_12 x                              [         ] */
3386   PetscFunctionBegin;
3387   PetscCall(MatShellGetContext(A, &h));
3388   PetscCall(VecSet(h->v, 0.0));
3389   PetscCall(VecGetSubVector(h->v, h->is[0], &sub));
3390   PetscCall(MatMult(h->A[0], x, sub));
3391   PetscCall(VecRestoreSubVector(h->v, h->is[0], &sub));
3392   PetscCall(KSPSolve(h->ksp, h->v, h->v));
3393   PetscCall(VecISCopy(h->v, h->is[1], SCATTER_REVERSE, y));
3394   PetscFunctionReturn(PETSC_SUCCESS);
3395 }
3396 
3397 static PetscErrorCode MatMultTranspose_Harmonic(Mat A, Vec y, Vec x)
3398 {
3399   Harmonic h;   /* x = A^T y =            [ A_00  A_01 ]^-T R_0^T R_loc^T y */
3400   Vec      sub; /*             A_12^T R_1 [ A_10  A_11 ]                    */
3401 
3402   PetscFunctionBegin;
3403   PetscCall(MatShellGetContext(A, &h));
3404   PetscCall(VecSet(h->v, 0.0));
3405   PetscCall(VecISCopy(h->v, h->is[1], SCATTER_FORWARD, y));
3406   PetscCall(KSPSolveTranspose(h->ksp, h->v, h->v));
3407   PetscCall(VecGetSubVector(h->v, h->is[0], &sub));
3408   PetscCall(MatMultTranspose(h->A[0], sub, x));
3409   PetscCall(VecRestoreSubVector(h->v, h->is[0], &sub));
3410   PetscFunctionReturn(PETSC_SUCCESS);
3411 }
3412 
3413 static PetscErrorCode MatProduct_AB_Harmonic(Mat S, Mat X, Mat Y, void *)
3414 {
3415   Harmonic h;
3416   Mat      A, B;
3417   Vec      a, b;
3418 
3419   PetscFunctionBegin;
3420   PetscCall(MatShellGetContext(S, &h));
3421   PetscCall(MatMatMult(h->A[0], X, MAT_INITIAL_MATRIX, PETSC_CURRENT, &A));
3422   PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, h->ksp->pc->mat->rmap->n, A->cmap->n, nullptr, &B));
3423   for (PetscInt i = 0; i < A->cmap->n; ++i) {
3424     PetscCall(MatDenseGetColumnVecRead(A, i, &a));
3425     PetscCall(MatDenseGetColumnVecWrite(B, i, &b));
3426     PetscCall(VecISCopy(b, h->is[0], SCATTER_FORWARD, a));
3427     PetscCall(MatDenseRestoreColumnVecWrite(B, i, &b));
3428     PetscCall(MatDenseRestoreColumnVecRead(A, i, &a));
3429   }
3430   PetscCall(MatDestroy(&A));
3431   PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, h->ksp->pc->mat->rmap->n, B->cmap->n, nullptr, &A));
3432   PetscCall(KSPMatSolve(h->ksp, B, A));
3433   PetscCall(MatDestroy(&B));
3434   for (PetscInt i = 0; i < A->cmap->n; ++i) {
3435     PetscCall(MatDenseGetColumnVecRead(A, i, &a));
3436     PetscCall(MatDenseGetColumnVecWrite(Y, i, &b));
3437     PetscCall(VecISCopy(a, h->is[1], SCATTER_REVERSE, b));
3438     PetscCall(MatDenseRestoreColumnVecWrite(Y, i, &b));
3439     PetscCall(MatDenseRestoreColumnVecRead(A, i, &a));
3440   }
3441   PetscCall(MatDestroy(&A));
3442   PetscFunctionReturn(PETSC_SUCCESS);
3443 }
3444 
3445 static PetscErrorCode MatProduct_AtB_Harmonic(Mat S, Mat Y, Mat X, void *)
3446 {
3447   Harmonic h;
3448   Mat      A, B;
3449   Vec      a, b;
3450 
3451   PetscFunctionBegin;
3452   PetscCall(MatShellGetContext(S, &h));
3453   PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, h->ksp->pc->mat->rmap->n, Y->cmap->n, nullptr, &A));
3454   for (PetscInt i = 0; i < A->cmap->n; ++i) {
3455     PetscCall(MatDenseGetColumnVecRead(Y, i, &b));
3456     PetscCall(MatDenseGetColumnVecWrite(A, i, &a));
3457     PetscCall(VecISCopy(a, h->is[1], SCATTER_FORWARD, b));
3458     PetscCall(MatDenseRestoreColumnVecWrite(A, i, &a));
3459     PetscCall(MatDenseRestoreColumnVecRead(Y, i, &b));
3460   }
3461   PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, h->ksp->pc->mat->rmap->n, A->cmap->n, nullptr, &B));
3462   PetscCall(KSPMatSolveTranspose(h->ksp, A, B));
3463   PetscCall(MatDestroy(&A));
3464   PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, h->A[0]->rmap->n, B->cmap->n, nullptr, &A));
3465   for (PetscInt i = 0; i < A->cmap->n; ++i) {
3466     PetscCall(MatDenseGetColumnVecRead(B, i, &b));
3467     PetscCall(MatDenseGetColumnVecWrite(A, i, &a));
3468     PetscCall(VecISCopy(b, h->is[0], SCATTER_REVERSE, a));
3469     PetscCall(MatDenseRestoreColumnVecWrite(A, i, &a));
3470     PetscCall(MatDenseRestoreColumnVecRead(B, i, &b));
3471   }
3472   PetscCall(MatDestroy(&B));
3473   PetscCall(MatTransposeMatMult(h->A[0], A, MAT_REUSE_MATRIX, PETSC_CURRENT, &X));
3474   PetscCall(MatDestroy(&A));
3475   PetscFunctionReturn(PETSC_SUCCESS);
3476 }
3477 
3478 static PetscErrorCode MatDestroy_Harmonic(Mat A)
3479 {
3480   Harmonic h;
3481 
3482   PetscFunctionBegin;
3483   PetscCall(MatShellGetContext(A, &h));
3484   for (PetscInt i = 0; i < 5; ++i) PetscCall(ISDestroy(h->is + i));
3485   PetscCall(PetscFree(h->is));
3486   PetscCall(VecDestroy(&h->v));
3487   for (PetscInt i = 0; i < 2; ++i) PetscCall(MatDestroy(h->A + i));
3488   PetscCall(PetscFree(h->A));
3489   PetscCall(KSPDestroy(&h->ksp));
3490   PetscCall(PetscFree(h));
3491   PetscFunctionReturn(PETSC_SUCCESS);
3492 }
3493