xref: /petsc/src/ksp/pc/impls/bddc/bddcschurs.c (revision d2522c19e8fa9bca20aaca277941d9a63e71db6a)
1 #include <petsc/private/pcbddcimpl.h>
2 #include <petsc/private/pcbddcprivateimpl.h>
3 #include <../src/mat/impls/dense/seq/dense.h>
4 #include <petscblaslapack.h>
5 
6 static inline PetscErrorCode PCBDDCAdjGetNextLayer_Private(PetscInt *, PetscInt, PetscBT, PetscInt *, PetscInt *, PetscInt *);
7 static PetscErrorCode        PCBDDCComputeExplicitSchur(Mat, PetscBool, MatReuse, Mat *);
8 static PetscErrorCode        PCBDDCReuseSolvers_Interior(PC, Vec, Vec);
9 static PetscErrorCode        PCBDDCReuseSolvers_Correction(PC, Vec, Vec);
10 
11 /* if v2 is not present, correction is done in-place */
12 PetscErrorCode PCBDDCReuseSolversBenignAdapt(PCBDDCReuseSolvers ctx, Vec v, Vec v2, PetscBool sol, PetscBool full) {
13   PetscScalar *array;
14   PetscScalar *array2;
15 
16   PetscFunctionBegin;
17   if (!ctx->benign_n) PetscFunctionReturn(0);
18   if (sol && full) {
19     PetscInt n_I, size_schur;
20 
21     /* get sizes */
22     PetscCall(MatGetSize(ctx->benign_csAIB, &size_schur, NULL));
23     PetscCall(VecGetSize(v, &n_I));
24     n_I = n_I - size_schur;
25     /* get schur sol from array */
26     PetscCall(VecGetArray(v, &array));
27     PetscCall(VecPlaceArray(ctx->benign_dummy_schur_vec, array + n_I));
28     PetscCall(VecRestoreArray(v, &array));
29     /* apply interior sol correction */
30     PetscCall(MatMultTranspose(ctx->benign_csAIB, ctx->benign_dummy_schur_vec, ctx->benign_corr_work));
31     PetscCall(VecResetArray(ctx->benign_dummy_schur_vec));
32     PetscCall(MatMultAdd(ctx->benign_AIIm1ones, ctx->benign_corr_work, v, v));
33   }
34   if (v2) {
35     PetscInt nl;
36 
37     PetscCall(VecGetArrayRead(v, (const PetscScalar **)&array));
38     PetscCall(VecGetLocalSize(v2, &nl));
39     PetscCall(VecGetArray(v2, &array2));
40     PetscCall(PetscArraycpy(array2, array, nl));
41   } else {
42     PetscCall(VecGetArray(v, &array));
43     array2 = array;
44   }
45   if (!sol) { /* change rhs */
46     PetscInt n;
47     for (n = 0; n < ctx->benign_n; n++) {
48       PetscScalar     sum = 0.;
49       const PetscInt *cols;
50       PetscInt        nz, i;
51 
52       PetscCall(ISGetLocalSize(ctx->benign_zerodiag_subs[n], &nz));
53       PetscCall(ISGetIndices(ctx->benign_zerodiag_subs[n], &cols));
54       for (i = 0; i < nz - 1; i++) sum += array[cols[i]];
55 #if defined(PETSC_USE_COMPLEX)
56       sum = -(PetscRealPart(sum) / nz + PETSC_i * (PetscImaginaryPart(sum) / nz));
57 #else
58       sum = -sum / nz;
59 #endif
60       for (i = 0; i < nz - 1; i++) array2[cols[i]] += sum;
61       ctx->benign_save_vals[n] = array2[cols[nz - 1]];
62       array2[cols[nz - 1]]     = sum;
63       PetscCall(ISRestoreIndices(ctx->benign_zerodiag_subs[n], &cols));
64     }
65   } else {
66     PetscInt n;
67     for (n = 0; n < ctx->benign_n; n++) {
68       PetscScalar     sum = 0.;
69       const PetscInt *cols;
70       PetscInt        nz, i;
71       PetscCall(ISGetLocalSize(ctx->benign_zerodiag_subs[n], &nz));
72       PetscCall(ISGetIndices(ctx->benign_zerodiag_subs[n], &cols));
73       for (i = 0; i < nz - 1; i++) sum += array[cols[i]];
74 #if defined(PETSC_USE_COMPLEX)
75       sum = -(PetscRealPart(sum) / nz + PETSC_i * (PetscImaginaryPart(sum) / nz));
76 #else
77       sum = -sum / nz;
78 #endif
79       for (i = 0; i < nz - 1; i++) array2[cols[i]] += sum;
80       array2[cols[nz - 1]] = ctx->benign_save_vals[n];
81       PetscCall(ISRestoreIndices(ctx->benign_zerodiag_subs[n], &cols));
82     }
83   }
84   if (v2) {
85     PetscCall(VecRestoreArrayRead(v, (const PetscScalar **)&array));
86     PetscCall(VecRestoreArray(v2, &array2));
87   } else {
88     PetscCall(VecRestoreArray(v, &array));
89   }
90   if (!sol && full) {
91     Vec      usedv;
92     PetscInt n_I, size_schur;
93 
94     /* get sizes */
95     PetscCall(MatGetSize(ctx->benign_csAIB, &size_schur, NULL));
96     PetscCall(VecGetSize(v, &n_I));
97     n_I = n_I - size_schur;
98     /* compute schur rhs correction */
99     if (v2) {
100       usedv = v2;
101     } else {
102       usedv = v;
103     }
104     /* apply schur rhs correction */
105     PetscCall(MatMultTranspose(ctx->benign_AIIm1ones, usedv, ctx->benign_corr_work));
106     PetscCall(VecGetArrayRead(usedv, (const PetscScalar **)&array));
107     PetscCall(VecPlaceArray(ctx->benign_dummy_schur_vec, array + n_I));
108     PetscCall(VecRestoreArrayRead(usedv, (const PetscScalar **)&array));
109     PetscCall(MatMultAdd(ctx->benign_csAIB, ctx->benign_corr_work, ctx->benign_dummy_schur_vec, ctx->benign_dummy_schur_vec));
110     PetscCall(VecResetArray(ctx->benign_dummy_schur_vec));
111   }
112   PetscFunctionReturn(0);
113 }
114 
115 static PetscErrorCode PCBDDCReuseSolvers_Solve_Private(PC pc, Vec rhs, Vec sol, PetscBool transpose, PetscBool full) {
116   PCBDDCReuseSolvers ctx;
117   PetscBool          copy = PETSC_FALSE;
118 
119   PetscFunctionBegin;
120   PetscCall(PCShellGetContext(pc, &ctx));
121   if (full) {
122 #if defined(PETSC_HAVE_MUMPS)
123     PetscCall(MatMumpsSetIcntl(ctx->F, 26, -1));
124 #endif
125 #if defined(PETSC_HAVE_MKL_PARDISO)
126     PetscCall(MatMkl_PardisoSetCntl(ctx->F, 70, 0));
127 #endif
128     copy = ctx->has_vertices;
129   } else { /* interior solver */
130 #if defined(PETSC_HAVE_MUMPS)
131     PetscCall(MatMumpsSetIcntl(ctx->F, 26, 0));
132 #endif
133 #if defined(PETSC_HAVE_MKL_PARDISO)
134     PetscCall(MatMkl_PardisoSetCntl(ctx->F, 70, 1));
135 #endif
136     copy = PETSC_TRUE;
137   }
138   /* copy rhs into factored matrix workspace */
139   if (copy) {
140     PetscInt     n;
141     PetscScalar *array, *array_solver;
142 
143     PetscCall(VecGetLocalSize(rhs, &n));
144     PetscCall(VecGetArrayRead(rhs, (const PetscScalar **)&array));
145     PetscCall(VecGetArray(ctx->rhs, &array_solver));
146     PetscCall(PetscArraycpy(array_solver, array, n));
147     PetscCall(VecRestoreArray(ctx->rhs, &array_solver));
148     PetscCall(VecRestoreArrayRead(rhs, (const PetscScalar **)&array));
149 
150     PetscCall(PCBDDCReuseSolversBenignAdapt(ctx, ctx->rhs, NULL, PETSC_FALSE, full));
151     if (transpose) {
152       PetscCall(MatSolveTranspose(ctx->F, ctx->rhs, ctx->sol));
153     } else {
154       PetscCall(MatSolve(ctx->F, ctx->rhs, ctx->sol));
155     }
156     PetscCall(PCBDDCReuseSolversBenignAdapt(ctx, ctx->sol, NULL, PETSC_TRUE, full));
157 
158     /* get back data to caller worskpace */
159     PetscCall(VecGetArrayRead(ctx->sol, (const PetscScalar **)&array_solver));
160     PetscCall(VecGetArray(sol, &array));
161     PetscCall(PetscArraycpy(array, array_solver, n));
162     PetscCall(VecRestoreArray(sol, &array));
163     PetscCall(VecRestoreArrayRead(ctx->sol, (const PetscScalar **)&array_solver));
164   } else {
165     if (ctx->benign_n) {
166       PetscCall(PCBDDCReuseSolversBenignAdapt(ctx, rhs, ctx->rhs, PETSC_FALSE, full));
167       if (transpose) {
168         PetscCall(MatSolveTranspose(ctx->F, ctx->rhs, sol));
169       } else {
170         PetscCall(MatSolve(ctx->F, ctx->rhs, sol));
171       }
172       PetscCall(PCBDDCReuseSolversBenignAdapt(ctx, sol, NULL, PETSC_TRUE, full));
173     } else {
174       if (transpose) {
175         PetscCall(MatSolveTranspose(ctx->F, rhs, sol));
176       } else {
177         PetscCall(MatSolve(ctx->F, rhs, sol));
178       }
179     }
180   }
181   /* restore defaults */
182 #if defined(PETSC_HAVE_MUMPS)
183   PetscCall(MatMumpsSetIcntl(ctx->F, 26, -1));
184 #endif
185 #if defined(PETSC_HAVE_MKL_PARDISO)
186   PetscCall(MatMkl_PardisoSetCntl(ctx->F, 70, 0));
187 #endif
188   PetscFunctionReturn(0);
189 }
190 
191 static PetscErrorCode PCBDDCReuseSolvers_Correction(PC pc, Vec rhs, Vec sol) {
192   PetscFunctionBegin;
193   PetscCall(PCBDDCReuseSolvers_Solve_Private(pc, rhs, sol, PETSC_FALSE, PETSC_TRUE));
194   PetscFunctionReturn(0);
195 }
196 
197 static PetscErrorCode PCBDDCReuseSolvers_CorrectionTranspose(PC pc, Vec rhs, Vec sol) {
198   PetscFunctionBegin;
199   PetscCall(PCBDDCReuseSolvers_Solve_Private(pc, rhs, sol, PETSC_TRUE, PETSC_TRUE));
200   PetscFunctionReturn(0);
201 }
202 
203 static PetscErrorCode PCBDDCReuseSolvers_Interior(PC pc, Vec rhs, Vec sol) {
204   PetscFunctionBegin;
205   PetscCall(PCBDDCReuseSolvers_Solve_Private(pc, rhs, sol, PETSC_FALSE, PETSC_FALSE));
206   PetscFunctionReturn(0);
207 }
208 
209 static PetscErrorCode PCBDDCReuseSolvers_InteriorTranspose(PC pc, Vec rhs, Vec sol) {
210   PetscFunctionBegin;
211   PetscCall(PCBDDCReuseSolvers_Solve_Private(pc, rhs, sol, PETSC_TRUE, PETSC_FALSE));
212   PetscFunctionReturn(0);
213 }
214 
215 static PetscErrorCode PCBDDCReuseSolvers_View(PC pc, PetscViewer viewer) {
216   PCBDDCReuseSolvers ctx;
217   PetscBool          iascii;
218 
219   PetscFunctionBegin;
220   PetscCall(PCShellGetContext(pc, &ctx));
221   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
222   if (iascii) PetscCall(PetscViewerPushFormat(viewer, PETSC_VIEWER_ASCII_INFO));
223   PetscCall(MatView(ctx->F, viewer));
224   if (iascii) PetscCall(PetscViewerPopFormat(viewer));
225   PetscFunctionReturn(0);
226 }
227 
228 static PetscErrorCode PCBDDCReuseSolversReset(PCBDDCReuseSolvers reuse) {
229   PetscInt i;
230 
231   PetscFunctionBegin;
232   PetscCall(MatDestroy(&reuse->F));
233   PetscCall(VecDestroy(&reuse->sol));
234   PetscCall(VecDestroy(&reuse->rhs));
235   PetscCall(PCDestroy(&reuse->interior_solver));
236   PetscCall(PCDestroy(&reuse->correction_solver));
237   PetscCall(ISDestroy(&reuse->is_R));
238   PetscCall(ISDestroy(&reuse->is_B));
239   PetscCall(VecScatterDestroy(&reuse->correction_scatter_B));
240   PetscCall(VecDestroy(&reuse->sol_B));
241   PetscCall(VecDestroy(&reuse->rhs_B));
242   for (i = 0; i < reuse->benign_n; i++) { PetscCall(ISDestroy(&reuse->benign_zerodiag_subs[i])); }
243   PetscCall(PetscFree(reuse->benign_zerodiag_subs));
244   PetscCall(PetscFree(reuse->benign_save_vals));
245   PetscCall(MatDestroy(&reuse->benign_csAIB));
246   PetscCall(MatDestroy(&reuse->benign_AIIm1ones));
247   PetscCall(VecDestroy(&reuse->benign_corr_work));
248   PetscCall(VecDestroy(&reuse->benign_dummy_schur_vec));
249   PetscFunctionReturn(0);
250 }
251 
252 static PetscErrorCode PCBDDCReuseSolvers_Destroy(PC pc) {
253   PCBDDCReuseSolvers ctx;
254 
255   PetscFunctionBegin;
256   PetscCall(PCShellGetContext(pc, &ctx));
257   PetscCall(PCBDDCReuseSolversReset(ctx));
258   PetscCall(PetscFree(ctx));
259   PetscCall(PCShellSetContext(pc, NULL));
260   PetscFunctionReturn(0);
261 }
262 
263 static PetscErrorCode PCBDDCComputeExplicitSchur(Mat M, PetscBool issym, MatReuse reuse, Mat *S) {
264   Mat         B, C, D, Bd, Cd, AinvBd;
265   KSP         ksp;
266   PC          pc;
267   PetscBool   isLU, isILU, isCHOL, Bdense, Cdense;
268   PetscReal   fill = 2.0;
269   PetscInt    n_I;
270   PetscMPIInt size;
271 
272   PetscFunctionBegin;
273   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)M), &size));
274   PetscCheck(size == 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not for parallel matrices");
275   if (reuse == MAT_REUSE_MATRIX) {
276     PetscBool Sdense;
277 
278     PetscCall(PetscObjectTypeCompare((PetscObject)*S, MATSEQDENSE, &Sdense));
279     PetscCheck(Sdense, PetscObjectComm((PetscObject)M), PETSC_ERR_SUP, "S should dense");
280   }
281   PetscCall(MatSchurComplementGetSubMatrices(M, NULL, NULL, &B, &C, &D));
282   PetscCall(MatSchurComplementGetKSP(M, &ksp));
283   PetscCall(KSPGetPC(ksp, &pc));
284   PetscCall(PetscObjectTypeCompare((PetscObject)pc, PCLU, &isLU));
285   PetscCall(PetscObjectTypeCompare((PetscObject)pc, PCILU, &isILU));
286   PetscCall(PetscObjectTypeCompare((PetscObject)pc, PCCHOLESKY, &isCHOL));
287   PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSEQDENSE, &Bdense));
288   PetscCall(PetscObjectTypeCompare((PetscObject)C, MATSEQDENSE, &Cdense));
289   PetscCall(MatGetSize(B, &n_I, NULL));
290   if (n_I) {
291     if (!Bdense) {
292       PetscCall(MatConvert(B, MATSEQDENSE, MAT_INITIAL_MATRIX, &Bd));
293     } else {
294       Bd = B;
295     }
296 
297     if (isLU || isILU || isCHOL) {
298       Mat fact;
299       PetscCall(KSPSetUp(ksp));
300       PetscCall(PCFactorGetMatrix(pc, &fact));
301       PetscCall(MatDuplicate(Bd, MAT_DO_NOT_COPY_VALUES, &AinvBd));
302       PetscCall(MatMatSolve(fact, Bd, AinvBd));
303     } else {
304       PetscBool ex = PETSC_TRUE;
305 
306       if (ex) {
307         Mat Ainvd;
308 
309         PetscCall(PCComputeOperator(pc, MATDENSE, &Ainvd));
310         PetscCall(MatMatMult(Ainvd, Bd, MAT_INITIAL_MATRIX, fill, &AinvBd));
311         PetscCall(MatDestroy(&Ainvd));
312       } else {
313         Vec          sol, rhs;
314         PetscScalar *arrayrhs, *arraysol;
315         PetscInt     i, nrhs, n;
316 
317         PetscCall(MatDuplicate(Bd, MAT_DO_NOT_COPY_VALUES, &AinvBd));
318         PetscCall(MatGetSize(Bd, &n, &nrhs));
319         PetscCall(MatDenseGetArray(Bd, &arrayrhs));
320         PetscCall(MatDenseGetArray(AinvBd, &arraysol));
321         PetscCall(KSPGetSolution(ksp, &sol));
322         PetscCall(KSPGetRhs(ksp, &rhs));
323         for (i = 0; i < nrhs; i++) {
324           PetscCall(VecPlaceArray(rhs, arrayrhs + i * n));
325           PetscCall(VecPlaceArray(sol, arraysol + i * n));
326           PetscCall(KSPSolve(ksp, rhs, sol));
327           PetscCall(VecResetArray(rhs));
328           PetscCall(VecResetArray(sol));
329         }
330         PetscCall(MatDenseRestoreArray(Bd, &arrayrhs));
331         PetscCall(MatDenseRestoreArray(AinvBd, &arrayrhs));
332       }
333     }
334     if (!Bdense & !issym) { PetscCall(MatDestroy(&Bd)); }
335 
336     if (!issym) {
337       if (!Cdense) {
338         PetscCall(MatConvert(C, MATSEQDENSE, MAT_INITIAL_MATRIX, &Cd));
339       } else {
340         Cd = C;
341       }
342       PetscCall(MatMatMult(Cd, AinvBd, reuse, fill, S));
343       if (!Cdense) { PetscCall(MatDestroy(&Cd)); }
344     } else {
345       PetscCall(MatTransposeMatMult(Bd, AinvBd, reuse, fill, S));
346       if (!Bdense) { PetscCall(MatDestroy(&Bd)); }
347     }
348     PetscCall(MatDestroy(&AinvBd));
349   }
350 
351   if (D) {
352     Mat       Dd;
353     PetscBool Ddense;
354 
355     PetscCall(PetscObjectTypeCompare((PetscObject)D, MATSEQDENSE, &Ddense));
356     if (!Ddense) {
357       PetscCall(MatConvert(D, MATSEQDENSE, MAT_INITIAL_MATRIX, &Dd));
358     } else {
359       Dd = D;
360     }
361     if (n_I) {
362       PetscCall(MatAYPX(*S, -1.0, Dd, SAME_NONZERO_PATTERN));
363     } else {
364       if (reuse == MAT_INITIAL_MATRIX) {
365         PetscCall(MatDuplicate(Dd, MAT_COPY_VALUES, S));
366       } else {
367         PetscCall(MatCopy(Dd, *S, SAME_NONZERO_PATTERN));
368       }
369     }
370     if (!Ddense) { PetscCall(MatDestroy(&Dd)); }
371   } else {
372     PetscCall(MatScale(*S, -1.0));
373   }
374   PetscFunctionReturn(0);
375 }
376 
377 PetscErrorCode PCBDDCSubSchursSetUp(PCBDDCSubSchurs sub_schurs, Mat Ain, Mat Sin, PetscBool exact_schur, PetscInt xadj[], PetscInt adjncy[], PetscInt nlayers, Vec scaling, PetscBool compute_Stilda, PetscBool reuse_solvers, PetscBool benign_trick, PetscInt benign_n, PetscInt benign_p0_lidx[], IS benign_zerodiag_subs[], Mat change, IS change_primal) {
378   Mat          F, A_II, A_IB, A_BI, A_BB, AE_II;
379   Mat          S_all;
380   Vec          gstash, lstash;
381   VecScatter   sstash;
382   IS           is_I, is_I_layer;
383   IS           all_subsets, all_subsets_mult, all_subsets_n;
384   PetscScalar *stasharray, *Bwork;
385   PetscInt    *nnz, *all_local_idx_N;
386   PetscInt    *auxnum1, *auxnum2;
387   PetscInt     i, subset_size, max_subset_size;
388   PetscInt     n_B, extra, local_size, global_size;
389   PetscInt     local_stash_size;
390   PetscBLASInt B_N, B_ierr, B_lwork, *pivots;
391   MPI_Comm     comm_n;
392   PetscBool    deluxe   = PETSC_TRUE;
393   PetscBool    use_potr = PETSC_FALSE, use_sytr = PETSC_FALSE;
394   PetscViewer  matl_dbg_viewer = NULL;
395   PetscBool    flg;
396 
397   PetscFunctionBegin;
398   PetscCall(MatDestroy(&sub_schurs->A));
399   PetscCall(MatDestroy(&sub_schurs->S));
400   if (Ain) {
401     PetscCall(PetscObjectReference((PetscObject)Ain));
402     sub_schurs->A = Ain;
403   }
404 
405   PetscCall(PetscObjectReference((PetscObject)Sin));
406   sub_schurs->S = Sin;
407   if (sub_schurs->schur_explicit) { sub_schurs->schur_explicit = (PetscBool)(!!sub_schurs->A); }
408 
409   /* preliminary checks */
410   PetscCheck(sub_schurs->schur_explicit || !compute_Stilda, PetscObjectComm((PetscObject)sub_schurs->l2gmap), PETSC_ERR_SUP, "Adaptive selection of constraints requires MUMPS and/or MKL_PARDISO");
411 
412   if (benign_trick) sub_schurs->is_posdef = PETSC_FALSE;
413 
414   /* debug (MATLAB) */
415   if (sub_schurs->debug) {
416     PetscMPIInt size, rank;
417     PetscInt    nr, *print_schurs_ranks, print_schurs = PETSC_FALSE;
418 
419     PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sub_schurs->l2gmap), &size));
420     PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sub_schurs->l2gmap), &rank));
421     nr = size;
422     PetscCall(PetscMalloc1(nr, &print_schurs_ranks));
423     PetscOptionsBegin(PetscObjectComm((PetscObject)sub_schurs->l2gmap), sub_schurs->prefix, "BDDC sub_schurs options", "PC");
424     PetscCall(PetscOptionsIntArray("-sub_schurs_debug_ranks", "Ranks to debug (all if the option is not used)", NULL, print_schurs_ranks, &nr, &flg));
425     if (!flg) print_schurs = PETSC_TRUE;
426     else {
427       print_schurs = PETSC_FALSE;
428       for (i = 0; i < nr; i++)
429         if (print_schurs_ranks[i] == (PetscInt)rank) {
430           print_schurs = PETSC_TRUE;
431           break;
432         }
433     }
434     PetscOptionsEnd();
435     PetscCall(PetscFree(print_schurs_ranks));
436     if (print_schurs) {
437       char filename[256];
438 
439       PetscCall(PetscSNPrintf(filename, sizeof(filename), "sub_schurs_Schur_r%d.m", PetscGlobalRank));
440       PetscCall(PetscViewerASCIIOpen(PETSC_COMM_SELF, filename, &matl_dbg_viewer));
441       PetscCall(PetscViewerPushFormat(matl_dbg_viewer, PETSC_VIEWER_ASCII_MATLAB));
442     }
443   }
444 
445   /* restrict work on active processes */
446   if (sub_schurs->restrict_comm) {
447     PetscSubcomm subcomm;
448     PetscMPIInt  color, rank;
449 
450     color = 0;
451     if (!sub_schurs->n_subs) color = 1; /* this can happen if we are in a multilevel case or if the subdomain is disconnected */
452     PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sub_schurs->l2gmap), &rank));
453     PetscCall(PetscSubcommCreate(PetscObjectComm((PetscObject)sub_schurs->l2gmap), &subcomm));
454     PetscCall(PetscSubcommSetNumber(subcomm, 2));
455     PetscCall(PetscSubcommSetTypeGeneral(subcomm, color, rank));
456     PetscCall(PetscCommDuplicate(PetscSubcommChild(subcomm), &comm_n, NULL));
457     PetscCall(PetscSubcommDestroy(&subcomm));
458     if (!sub_schurs->n_subs) {
459       PetscCall(PetscCommDestroy(&comm_n));
460       PetscFunctionReturn(0);
461     }
462   } else {
463     PetscCall(PetscCommDuplicate(PetscObjectComm((PetscObject)sub_schurs->l2gmap), &comm_n, NULL));
464   }
465 
466   /* get Schur complement matrices */
467   if (!sub_schurs->schur_explicit) {
468     Mat       tA_IB, tA_BI, tA_BB;
469     PetscBool isseqsbaij;
470     PetscCall(MatSchurComplementGetSubMatrices(sub_schurs->S, &A_II, NULL, &tA_IB, &tA_BI, &tA_BB));
471     PetscCall(PetscObjectTypeCompare((PetscObject)tA_BB, MATSEQSBAIJ, &isseqsbaij));
472     if (isseqsbaij) {
473       PetscCall(MatConvert(tA_BB, MATSEQAIJ, MAT_INITIAL_MATRIX, &A_BB));
474       PetscCall(MatConvert(tA_IB, MATSEQAIJ, MAT_INITIAL_MATRIX, &A_IB));
475       PetscCall(MatConvert(tA_BI, MATSEQAIJ, MAT_INITIAL_MATRIX, &A_BI));
476     } else {
477       PetscCall(PetscObjectReference((PetscObject)tA_BB));
478       A_BB = tA_BB;
479       PetscCall(PetscObjectReference((PetscObject)tA_IB));
480       A_IB = tA_IB;
481       PetscCall(PetscObjectReference((PetscObject)tA_BI));
482       A_BI = tA_BI;
483     }
484   } else {
485     A_II = NULL;
486     A_IB = NULL;
487     A_BI = NULL;
488     A_BB = NULL;
489   }
490   S_all = NULL;
491 
492   /* determine interior problems */
493   PetscCall(ISGetLocalSize(sub_schurs->is_I, &i));
494   if (nlayers >= 0 && i) { /* Interior problems can be different from the original one */
495     PetscBT         touched;
496     const PetscInt *idx_B;
497     PetscInt        n_I, n_B, n_local_dofs, n_prev_added, j, layer, *local_numbering;
498 
499     PetscCheck(xadj, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot request layering without adjacency");
500     /* get sizes */
501     PetscCall(ISGetLocalSize(sub_schurs->is_I, &n_I));
502     PetscCall(ISGetLocalSize(sub_schurs->is_B, &n_B));
503 
504     PetscCall(PetscMalloc1(n_I + n_B, &local_numbering));
505     PetscCall(PetscBTCreate(n_I + n_B, &touched));
506     PetscCall(PetscBTMemzero(n_I + n_B, touched));
507 
508     /* all boundary dofs must be skipped when adding layers */
509     PetscCall(ISGetIndices(sub_schurs->is_B, &idx_B));
510     for (j = 0; j < n_B; j++) { PetscCall(PetscBTSet(touched, idx_B[j])); }
511     PetscCall(PetscArraycpy(local_numbering, idx_B, n_B));
512     PetscCall(ISRestoreIndices(sub_schurs->is_B, &idx_B));
513 
514     /* add prescribed number of layers of dofs */
515     n_local_dofs = n_B;
516     n_prev_added = n_B;
517     for (layer = 0; layer < nlayers; layer++) {
518       PetscInt n_added = 0;
519       if (n_local_dofs == n_I + n_B) break;
520       PetscCheck(n_local_dofs <= n_I + n_B, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error querying layer %" PetscInt_FMT ". Out of bound access (%" PetscInt_FMT " > %" PetscInt_FMT ")", layer, n_local_dofs, n_I + n_B);
521       PetscCall(PCBDDCAdjGetNextLayer_Private(local_numbering + n_local_dofs, n_prev_added, touched, xadj, adjncy, &n_added));
522       n_prev_added = n_added;
523       n_local_dofs += n_added;
524       if (!n_added) break;
525     }
526     PetscCall(PetscBTDestroy(&touched));
527 
528     /* IS for I layer dofs in original numbering */
529     PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)sub_schurs->is_I), n_local_dofs - n_B, local_numbering + n_B, PETSC_COPY_VALUES, &is_I_layer));
530     PetscCall(PetscFree(local_numbering));
531     PetscCall(ISSort(is_I_layer));
532     /* IS for I layer dofs in I numbering */
533     if (!sub_schurs->schur_explicit) {
534       ISLocalToGlobalMapping ItoNmap;
535       PetscCall(ISLocalToGlobalMappingCreateIS(sub_schurs->is_I, &ItoNmap));
536       PetscCall(ISGlobalToLocalMappingApplyIS(ItoNmap, IS_GTOLM_DROP, is_I_layer, &is_I));
537       PetscCall(ISLocalToGlobalMappingDestroy(&ItoNmap));
538 
539       /* II block */
540       PetscCall(MatCreateSubMatrix(A_II, is_I, is_I, MAT_INITIAL_MATRIX, &AE_II));
541     }
542   } else {
543     PetscInt n_I;
544 
545     /* IS for I dofs in original numbering */
546     PetscCall(PetscObjectReference((PetscObject)sub_schurs->is_I));
547     is_I_layer = sub_schurs->is_I;
548 
549     /* IS for I dofs in I numbering (strided 1) */
550     if (!sub_schurs->schur_explicit) {
551       PetscCall(ISGetSize(sub_schurs->is_I, &n_I));
552       PetscCall(ISCreateStride(PetscObjectComm((PetscObject)sub_schurs->is_I), n_I, 0, 1, &is_I));
553 
554       /* II block is the same */
555       PetscCall(PetscObjectReference((PetscObject)A_II));
556       AE_II = A_II;
557     }
558   }
559 
560   /* Get info on subset sizes and sum of all subsets sizes */
561   max_subset_size = 0;
562   local_size      = 0;
563   for (i = 0; i < sub_schurs->n_subs; i++) {
564     PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size));
565     max_subset_size = PetscMax(subset_size, max_subset_size);
566     local_size += subset_size;
567   }
568 
569   /* Work arrays for local indices */
570   extra = 0;
571   PetscCall(ISGetLocalSize(sub_schurs->is_B, &n_B));
572   if (sub_schurs->schur_explicit && is_I_layer) { PetscCall(ISGetLocalSize(is_I_layer, &extra)); }
573   PetscCall(PetscMalloc1(n_B + extra, &all_local_idx_N));
574   if (extra) {
575     const PetscInt *idxs;
576     PetscCall(ISGetIndices(is_I_layer, &idxs));
577     PetscCall(PetscArraycpy(all_local_idx_N, idxs, extra));
578     PetscCall(ISRestoreIndices(is_I_layer, &idxs));
579   }
580   PetscCall(PetscMalloc1(sub_schurs->n_subs, &auxnum1));
581   PetscCall(PetscMalloc1(sub_schurs->n_subs, &auxnum2));
582 
583   /* Get local indices in local numbering */
584   local_size       = 0;
585   local_stash_size = 0;
586   for (i = 0; i < sub_schurs->n_subs; i++) {
587     const PetscInt *idxs;
588 
589     PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size));
590     PetscCall(ISGetIndices(sub_schurs->is_subs[i], &idxs));
591     /* start (smallest in global ordering) and multiplicity */
592     auxnum1[i] = idxs[0];
593     auxnum2[i] = subset_size * subset_size;
594     /* subset indices in local numbering */
595     PetscCall(PetscArraycpy(all_local_idx_N + local_size + extra, idxs, subset_size));
596     PetscCall(ISRestoreIndices(sub_schurs->is_subs[i], &idxs));
597     local_size += subset_size;
598     local_stash_size += subset_size * subset_size;
599   }
600 
601   /* allocate extra workspace needed only for GETRI or SYTRF */
602   use_potr = use_sytr = PETSC_FALSE;
603   if (benign_trick || (sub_schurs->is_hermitian && sub_schurs->is_posdef)) {
604     use_potr = PETSC_TRUE;
605   } else if (sub_schurs->is_symmetric) {
606     use_sytr = PETSC_TRUE;
607   }
608   if (local_size && !use_potr) {
609     PetscScalar  lwork, dummyscalar = 0.;
610     PetscBLASInt dummyint = 0;
611 
612     B_lwork = -1;
613     PetscCall(PetscBLASIntCast(local_size, &B_N));
614     PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
615     if (use_sytr) {
616       PetscCallBLAS("LAPACKsytrf", LAPACKsytrf_("L", &B_N, &dummyscalar, &B_N, &dummyint, &lwork, &B_lwork, &B_ierr));
617       PetscCheck(!B_ierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in query to SYTRF Lapack routine %d", (int)B_ierr);
618     } else {
619       PetscCallBLAS("LAPACKgetri", LAPACKgetri_(&B_N, &dummyscalar, &B_N, &dummyint, &lwork, &B_lwork, &B_ierr));
620       PetscCheck(!B_ierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in query to GETRI Lapack routine %d", (int)B_ierr);
621     }
622     PetscCall(PetscFPTrapPop());
623     PetscCall(PetscBLASIntCast((PetscInt)PetscRealPart(lwork), &B_lwork));
624     PetscCall(PetscMalloc2(B_lwork, &Bwork, B_N, &pivots));
625   } else {
626     Bwork  = NULL;
627     pivots = NULL;
628   }
629 
630   /* prepare data for summing up properly schurs on subsets */
631   PetscCall(ISCreateGeneral(comm_n, sub_schurs->n_subs, auxnum1, PETSC_OWN_POINTER, &all_subsets_n));
632   PetscCall(ISLocalToGlobalMappingApplyIS(sub_schurs->l2gmap, all_subsets_n, &all_subsets));
633   PetscCall(ISDestroy(&all_subsets_n));
634   PetscCall(ISCreateGeneral(comm_n, sub_schurs->n_subs, auxnum2, PETSC_OWN_POINTER, &all_subsets_mult));
635   PetscCall(ISRenumber(all_subsets, all_subsets_mult, &global_size, &all_subsets_n));
636   PetscCall(ISDestroy(&all_subsets));
637   PetscCall(ISDestroy(&all_subsets_mult));
638   PetscCall(ISGetLocalSize(all_subsets_n, &i));
639   PetscCheck(i == local_stash_size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Invalid size of new subset! %" PetscInt_FMT " != %" PetscInt_FMT, i, local_stash_size);
640   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, local_stash_size, NULL, &lstash));
641   PetscCall(VecCreateMPI(comm_n, PETSC_DECIDE, global_size, &gstash));
642   PetscCall(VecScatterCreate(lstash, NULL, gstash, all_subsets_n, &sstash));
643   PetscCall(ISDestroy(&all_subsets_n));
644 
645   /* subset indices in local boundary numbering */
646   if (!sub_schurs->is_Ej_all) {
647     PetscInt *all_local_idx_B;
648 
649     PetscCall(PetscMalloc1(local_size, &all_local_idx_B));
650     PetscCall(ISGlobalToLocalMappingApply(sub_schurs->BtoNmap, IS_GTOLM_DROP, local_size, all_local_idx_N + extra, &subset_size, all_local_idx_B));
651     PetscCheck(subset_size == local_size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error in sub_schurs serial (BtoNmap)! %" PetscInt_FMT " != %" PetscInt_FMT, subset_size, local_size);
652     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, local_size, all_local_idx_B, PETSC_OWN_POINTER, &sub_schurs->is_Ej_all));
653   }
654 
655   if (change) {
656     ISLocalToGlobalMapping BtoS;
657     IS                     change_primal_B;
658     IS                     change_primal_all;
659 
660     PetscCheck(!sub_schurs->change_primal_sub, PETSC_COMM_SELF, PETSC_ERR_PLIB, "This should not happen");
661     PetscCheck(!sub_schurs->change, PETSC_COMM_SELF, PETSC_ERR_PLIB, "This should not happen");
662     PetscCall(PetscMalloc1(sub_schurs->n_subs, &sub_schurs->change_primal_sub));
663     for (i = 0; i < sub_schurs->n_subs; i++) {
664       ISLocalToGlobalMapping NtoS;
665       PetscCall(ISLocalToGlobalMappingCreateIS(sub_schurs->is_subs[i], &NtoS));
666       PetscCall(ISGlobalToLocalMappingApplyIS(NtoS, IS_GTOLM_DROP, change_primal, &sub_schurs->change_primal_sub[i]));
667       PetscCall(ISLocalToGlobalMappingDestroy(&NtoS));
668     }
669     PetscCall(ISGlobalToLocalMappingApplyIS(sub_schurs->BtoNmap, IS_GTOLM_DROP, change_primal, &change_primal_B));
670     PetscCall(ISLocalToGlobalMappingCreateIS(sub_schurs->is_Ej_all, &BtoS));
671     PetscCall(ISGlobalToLocalMappingApplyIS(BtoS, IS_GTOLM_DROP, change_primal_B, &change_primal_all));
672     PetscCall(ISLocalToGlobalMappingDestroy(&BtoS));
673     PetscCall(ISDestroy(&change_primal_B));
674     PetscCall(PetscMalloc1(sub_schurs->n_subs, &sub_schurs->change));
675     for (i = 0; i < sub_schurs->n_subs; i++) {
676       Mat change_sub;
677 
678       PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size));
679       PetscCall(KSPCreate(PETSC_COMM_SELF, &sub_schurs->change[i]));
680       PetscCall(KSPSetType(sub_schurs->change[i], KSPPREONLY));
681       if (!sub_schurs->change_with_qr) {
682         PetscCall(MatCreateSubMatrix(change, sub_schurs->is_subs[i], sub_schurs->is_subs[i], MAT_INITIAL_MATRIX, &change_sub));
683       } else {
684         Mat change_subt;
685         PetscCall(MatCreateSubMatrix(change, sub_schurs->is_subs[i], sub_schurs->is_subs[i], MAT_INITIAL_MATRIX, &change_subt));
686         PetscCall(MatConvert(change_subt, MATSEQDENSE, MAT_INITIAL_MATRIX, &change_sub));
687         PetscCall(MatDestroy(&change_subt));
688       }
689       PetscCall(KSPSetOperators(sub_schurs->change[i], change_sub, change_sub));
690       PetscCall(MatDestroy(&change_sub));
691       PetscCall(KSPSetOptionsPrefix(sub_schurs->change[i], sub_schurs->prefix));
692       PetscCall(KSPAppendOptionsPrefix(sub_schurs->change[i], "sub_schurs_change_"));
693     }
694     PetscCall(ISDestroy(&change_primal_all));
695   }
696 
697   /* Local matrix of all local Schur on subsets (transposed) */
698   if (!sub_schurs->S_Ej_all) {
699     Mat          T;
700     PetscScalar *v;
701     PetscInt    *ii, *jj;
702     PetscInt     cum, i, j, k;
703 
704     /* MatSeqAIJSetPreallocation + MatSetValues is slow for these kind of matrices (may have large blocks)
705        Allocate properly a representative matrix and duplicate */
706     PetscCall(PetscMalloc3(local_size + 1, &ii, local_stash_size, &jj, local_stash_size, &v));
707     ii[0] = 0;
708     cum   = 0;
709     for (i = 0; i < sub_schurs->n_subs; i++) {
710       PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size));
711       for (j = 0; j < subset_size; j++) {
712         const PetscInt row = cum + j;
713         PetscInt       col = cum;
714 
715         ii[row + 1] = ii[row] + subset_size;
716         for (k = ii[row]; k < ii[row + 1]; k++) {
717           jj[k] = col;
718           col++;
719         }
720       }
721       cum += subset_size;
722     }
723     PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, local_size, local_size, ii, jj, v, &T));
724     PetscCall(MatDuplicate(T, MAT_DO_NOT_COPY_VALUES, &sub_schurs->S_Ej_all));
725     PetscCall(MatDestroy(&T));
726     PetscCall(PetscFree3(ii, jj, v));
727   }
728   /* matrices for deluxe scaling and adaptive selection */
729   if (compute_Stilda) {
730     if (!sub_schurs->sum_S_Ej_tilda_all) { PetscCall(MatDuplicate(sub_schurs->S_Ej_all, MAT_DO_NOT_COPY_VALUES, &sub_schurs->sum_S_Ej_tilda_all)); }
731     if (!sub_schurs->sum_S_Ej_inv_all && deluxe) { PetscCall(MatDuplicate(sub_schurs->S_Ej_all, MAT_DO_NOT_COPY_VALUES, &sub_schurs->sum_S_Ej_inv_all)); }
732   }
733 
734   /* Compute Schur complements explicitly */
735   F = NULL;
736   if (!sub_schurs->schur_explicit) {
737     /* this code branch is used when MatFactor with Schur complement support is not present or when explicitly requested;
738        it is not efficient, unless the economic version of the scaling is used */
739     Mat          S_Ej_expl;
740     PetscScalar *work;
741     PetscInt     j, *dummy_idx;
742     PetscBool    Sdense;
743 
744     PetscCall(PetscMalloc2(max_subset_size, &dummy_idx, max_subset_size * max_subset_size, &work));
745     local_size = 0;
746     for (i = 0; i < sub_schurs->n_subs; i++) {
747       IS  is_subset_B;
748       Mat AE_EE, AE_IE, AE_EI, S_Ej;
749 
750       /* subsets in original and boundary numbering */
751       PetscCall(ISGlobalToLocalMappingApplyIS(sub_schurs->BtoNmap, IS_GTOLM_DROP, sub_schurs->is_subs[i], &is_subset_B));
752       /* EE block */
753       PetscCall(MatCreateSubMatrix(A_BB, is_subset_B, is_subset_B, MAT_INITIAL_MATRIX, &AE_EE));
754       /* IE block */
755       PetscCall(MatCreateSubMatrix(A_IB, is_I, is_subset_B, MAT_INITIAL_MATRIX, &AE_IE));
756       /* EI block */
757       if (sub_schurs->is_symmetric) {
758         PetscCall(MatCreateTranspose(AE_IE, &AE_EI));
759       } else if (sub_schurs->is_hermitian) {
760         PetscCall(MatCreateHermitianTranspose(AE_IE, &AE_EI));
761       } else {
762         PetscCall(MatCreateSubMatrix(A_BI, is_subset_B, is_I, MAT_INITIAL_MATRIX, &AE_EI));
763       }
764       PetscCall(ISDestroy(&is_subset_B));
765       PetscCall(MatCreateSchurComplement(AE_II, AE_II, AE_IE, AE_EI, AE_EE, &S_Ej));
766       PetscCall(MatDestroy(&AE_EE));
767       PetscCall(MatDestroy(&AE_IE));
768       PetscCall(MatDestroy(&AE_EI));
769       if (AE_II == A_II) { /* we can reuse the same ksp */
770         KSP ksp;
771         PetscCall(MatSchurComplementGetKSP(sub_schurs->S, &ksp));
772         PetscCall(MatSchurComplementSetKSP(S_Ej, ksp));
773       } else { /* build new ksp object which inherits ksp and pc types from the original one */
774         KSP       origksp, schurksp;
775         PC        origpc, schurpc;
776         KSPType   ksp_type;
777         PetscInt  n_internal;
778         PetscBool ispcnone;
779 
780         PetscCall(MatSchurComplementGetKSP(sub_schurs->S, &origksp));
781         PetscCall(MatSchurComplementGetKSP(S_Ej, &schurksp));
782         PetscCall(KSPGetType(origksp, &ksp_type));
783         PetscCall(KSPSetType(schurksp, ksp_type));
784         PetscCall(KSPGetPC(schurksp, &schurpc));
785         PetscCall(KSPGetPC(origksp, &origpc));
786         PetscCall(PetscObjectTypeCompare((PetscObject)origpc, PCNONE, &ispcnone));
787         if (!ispcnone) {
788           PCType pc_type;
789           PetscCall(PCGetType(origpc, &pc_type));
790           PetscCall(PCSetType(schurpc, pc_type));
791         } else {
792           PetscCall(PCSetType(schurpc, PCLU));
793         }
794         PetscCall(ISGetSize(is_I, &n_internal));
795         if (!n_internal) { /* UMFPACK gives error with 0 sized problems */
796           MatSolverType solver = NULL;
797           PetscCall(PCFactorGetMatSolverType(origpc, (MatSolverType *)&solver));
798           if (solver) PetscCall(PCFactorSetMatSolverType(schurpc, solver));
799         }
800         PetscCall(KSPSetUp(schurksp));
801       }
802       PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size));
803       PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, subset_size, work, &S_Ej_expl));
804       PetscCall(PCBDDCComputeExplicitSchur(S_Ej, sub_schurs->is_symmetric, MAT_REUSE_MATRIX, &S_Ej_expl));
805       PetscCall(PetscObjectTypeCompare((PetscObject)S_Ej_expl, MATSEQDENSE, &Sdense));
806       if (Sdense) {
807         for (j = 0; j < subset_size; j++) { dummy_idx[j] = local_size + j; }
808         PetscCall(MatSetValues(sub_schurs->S_Ej_all, subset_size, dummy_idx, subset_size, dummy_idx, work, INSERT_VALUES));
809       } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Not yet implemented for sparse matrices");
810       PetscCall(MatDestroy(&S_Ej));
811       PetscCall(MatDestroy(&S_Ej_expl));
812       local_size += subset_size;
813     }
814     PetscCall(PetscFree2(dummy_idx, work));
815     /* free */
816     PetscCall(ISDestroy(&is_I));
817     PetscCall(MatDestroy(&AE_II));
818     PetscCall(PetscFree(all_local_idx_N));
819   } else {
820     Mat                A, cs_AIB_mat = NULL, benign_AIIm1_ones_mat = NULL;
821     Mat               *gdswA;
822     Vec                Dall = NULL;
823     IS                 is_A_all, *is_p_r = NULL;
824     MatType            Stype;
825     PetscScalar       *work, *S_data, *schur_factor, infty = PETSC_MAX_REAL;
826     PetscScalar       *SEj_arr = NULL, *SEjinv_arr = NULL;
827     const PetscScalar *rS_data;
828     PetscInt           n, n_I, size_schur, size_active_schur, cum, cum2;
829     PetscBool          economic, solver_S, S_lower_triangular = PETSC_FALSE;
830     PetscBool          schur_has_vertices, factor_workaround;
831     PetscBool          use_cholesky;
832 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
833     PetscBool oldpin;
834 #endif
835 
836     /* get sizes */
837     n_I = 0;
838     if (is_I_layer) { PetscCall(ISGetLocalSize(is_I_layer, &n_I)); }
839     economic = PETSC_FALSE;
840     PetscCall(ISGetLocalSize(sub_schurs->is_I, &cum));
841     if (cum != n_I) economic = PETSC_TRUE;
842     PetscCall(MatGetLocalSize(sub_schurs->A, &n, NULL));
843     size_active_schur = local_size;
844 
845     /* import scaling vector (wrong formulation if we have 3D edges) */
846     if (scaling && compute_Stilda) {
847       const PetscScalar *array;
848       PetscScalar       *array2;
849       const PetscInt    *idxs;
850       PetscInt           i;
851 
852       PetscCall(ISGetIndices(sub_schurs->is_Ej_all, &idxs));
853       PetscCall(VecCreateSeq(PETSC_COMM_SELF, size_active_schur, &Dall));
854       PetscCall(VecGetArrayRead(scaling, &array));
855       PetscCall(VecGetArray(Dall, &array2));
856       for (i = 0; i < size_active_schur; i++) array2[i] = array[idxs[i]];
857       PetscCall(VecRestoreArray(Dall, &array2));
858       PetscCall(VecRestoreArrayRead(scaling, &array));
859       PetscCall(ISRestoreIndices(sub_schurs->is_Ej_all, &idxs));
860       deluxe = PETSC_FALSE;
861     }
862 
863     /* size active schurs does not count any dirichlet or vertex dof on the interface */
864     factor_workaround  = PETSC_FALSE;
865     schur_has_vertices = PETSC_FALSE;
866     cum                = n_I + size_active_schur;
867     if (sub_schurs->is_dir) {
868       const PetscInt *idxs;
869       PetscInt        n_dir;
870 
871       PetscCall(ISGetLocalSize(sub_schurs->is_dir, &n_dir));
872       PetscCall(ISGetIndices(sub_schurs->is_dir, &idxs));
873       PetscCall(PetscArraycpy(all_local_idx_N + cum, idxs, n_dir));
874       PetscCall(ISRestoreIndices(sub_schurs->is_dir, &idxs));
875       cum += n_dir;
876       if (!sub_schurs->gdsw) factor_workaround = PETSC_TRUE;
877     }
878     /* include the primal vertices in the Schur complement */
879     if (exact_schur && sub_schurs->is_vertices && (compute_Stilda || benign_n)) {
880       PetscInt n_v;
881 
882       PetscCall(ISGetLocalSize(sub_schurs->is_vertices, &n_v));
883       if (n_v) {
884         const PetscInt *idxs;
885 
886         PetscCall(ISGetIndices(sub_schurs->is_vertices, &idxs));
887         PetscCall(PetscArraycpy(all_local_idx_N + cum, idxs, n_v));
888         PetscCall(ISRestoreIndices(sub_schurs->is_vertices, &idxs));
889         cum += n_v;
890         if (!sub_schurs->gdsw) factor_workaround = PETSC_TRUE;
891         schur_has_vertices = PETSC_TRUE;
892       }
893     }
894     size_schur = cum - n_I;
895     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, cum, all_local_idx_N, PETSC_OWN_POINTER, &is_A_all));
896 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
897     oldpin = sub_schurs->A->boundtocpu;
898     PetscCall(MatBindToCPU(sub_schurs->A, PETSC_TRUE));
899 #endif
900     if (cum == n) {
901       PetscCall(ISSetPermutation(is_A_all));
902       PetscCall(MatPermute(sub_schurs->A, is_A_all, is_A_all, &A));
903     } else {
904       PetscCall(MatCreateSubMatrix(sub_schurs->A, is_A_all, is_A_all, MAT_INITIAL_MATRIX, &A));
905     }
906 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
907     PetscCall(MatBindToCPU(sub_schurs->A, oldpin));
908 #endif
909     PetscCall(MatSetOptionsPrefixFactor(A, sub_schurs->prefix));
910     PetscCall(MatAppendOptionsPrefixFactor(A, "sub_schurs_"));
911 
912     /* if we actually change the basis for the pressures, LDL^T factors will use a lot of memory
913        this is a workaround */
914     if (benign_n) {
915       Vec                    v, benign_AIIm1_ones;
916       ISLocalToGlobalMapping N_to_reor;
917       IS                     is_p0, is_p0_p;
918       PetscScalar           *cs_AIB, *AIIm1_data;
919       PetscInt               sizeA;
920 
921       PetscCall(ISLocalToGlobalMappingCreateIS(is_A_all, &N_to_reor));
922       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, benign_n, benign_p0_lidx, PETSC_COPY_VALUES, &is_p0));
923       PetscCall(ISGlobalToLocalMappingApplyIS(N_to_reor, IS_GTOLM_DROP, is_p0, &is_p0_p));
924       PetscCall(ISDestroy(&is_p0));
925       PetscCall(MatCreateVecs(A, &v, &benign_AIIm1_ones));
926       PetscCall(VecGetSize(v, &sizeA));
927       PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, sizeA, benign_n, NULL, &benign_AIIm1_ones_mat));
928       PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, size_schur, benign_n, NULL, &cs_AIB_mat));
929       PetscCall(MatDenseGetArray(cs_AIB_mat, &cs_AIB));
930       PetscCall(MatDenseGetArray(benign_AIIm1_ones_mat, &AIIm1_data));
931       PetscCall(PetscMalloc1(benign_n, &is_p_r));
932       /* compute colsum of A_IB restricted to pressures */
933       for (i = 0; i < benign_n; i++) {
934         const PetscScalar *array;
935         const PetscInt    *idxs;
936         PetscInt           j, nz;
937 
938         PetscCall(ISGlobalToLocalMappingApplyIS(N_to_reor, IS_GTOLM_DROP, benign_zerodiag_subs[i], &is_p_r[i]));
939         PetscCall(ISGetLocalSize(is_p_r[i], &nz));
940         PetscCall(ISGetIndices(is_p_r[i], &idxs));
941         for (j = 0; j < nz; j++) AIIm1_data[idxs[j] + sizeA * i] = 1.;
942         PetscCall(ISRestoreIndices(is_p_r[i], &idxs));
943         PetscCall(VecPlaceArray(benign_AIIm1_ones, AIIm1_data + sizeA * i));
944         PetscCall(MatMult(A, benign_AIIm1_ones, v));
945         PetscCall(VecResetArray(benign_AIIm1_ones));
946         PetscCall(VecGetArrayRead(v, &array));
947         for (j = 0; j < size_schur; j++) {
948 #if defined(PETSC_USE_COMPLEX)
949           cs_AIB[i * size_schur + j] = (PetscRealPart(array[j + n_I]) / nz + PETSC_i * (PetscImaginaryPart(array[j + n_I]) / nz));
950 #else
951           cs_AIB[i * size_schur + j] = array[j + n_I] / nz;
952 #endif
953         }
954         PetscCall(VecRestoreArrayRead(v, &array));
955       }
956       PetscCall(MatDenseRestoreArray(cs_AIB_mat, &cs_AIB));
957       PetscCall(MatDenseRestoreArray(benign_AIIm1_ones_mat, &AIIm1_data));
958       PetscCall(VecDestroy(&v));
959       PetscCall(VecDestroy(&benign_AIIm1_ones));
960       PetscCall(MatSetOption(A, MAT_KEEP_NONZERO_PATTERN, PETSC_FALSE));
961       PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE));
962       PetscCall(MatSetOption(A, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE));
963       PetscCall(MatZeroRowsColumnsIS(A, is_p0_p, 1.0, NULL, NULL));
964       PetscCall(ISDestroy(&is_p0_p));
965       PetscCall(ISLocalToGlobalMappingDestroy(&N_to_reor));
966     }
967     PetscCall(MatSetOption(A, MAT_SYMMETRIC, sub_schurs->is_symmetric));
968     PetscCall(MatSetOption(A, MAT_HERMITIAN, sub_schurs->is_hermitian));
969     PetscCall(MatSetOption(A, MAT_SPD, sub_schurs->is_posdef));
970 
971     /* for complexes, symmetric and hermitian at the same time implies null imaginary part */
972     use_cholesky = (PetscBool)((use_potr || use_sytr) && sub_schurs->is_hermitian && sub_schurs->is_symmetric);
973 
974     /* when using the benign subspace trick, the local Schur complements are SPD */
975     /* MKL_PARDISO does not handle well the computation of a Schur complement from a symmetric indefinite factorization
976        Use LU and adapt pivoting perturbation (still, solution is not as accurate as with using MUMPS) */
977     if (benign_trick) {
978       sub_schurs->is_posdef = PETSC_TRUE;
979       PetscCall(PetscStrcmp(sub_schurs->mat_solver_type, MATSOLVERMKL_PARDISO, &flg));
980       if (flg) use_cholesky = PETSC_FALSE;
981     }
982 
983     if (n_I) {
984       IS        is_schur;
985       char      stype[64];
986       PetscBool gpu = PETSC_FALSE;
987 
988       if (use_cholesky) {
989         PetscCall(MatGetFactor(A, sub_schurs->mat_solver_type, MAT_FACTOR_CHOLESKY, &F));
990       } else {
991         PetscCall(MatGetFactor(A, sub_schurs->mat_solver_type, MAT_FACTOR_LU, &F));
992       }
993       PetscCall(MatSetErrorIfFailure(A, PETSC_TRUE));
994 #if defined(PETSC_HAVE_MKL_PARDISO)
995       if (benign_trick) PetscCall(MatMkl_PardisoSetCntl(F, 10, 10));
996 #endif
997       /* subsets ordered last */
998       PetscCall(ISCreateStride(PETSC_COMM_SELF, size_schur, n_I, 1, &is_schur));
999       PetscCall(MatFactorSetSchurIS(F, is_schur));
1000       PetscCall(ISDestroy(&is_schur));
1001 
1002       /* factorization step */
1003       if (use_cholesky) {
1004         PetscCall(MatCholeskyFactorSymbolic(F, A, NULL, NULL));
1005 #if defined(PETSC_HAVE_MUMPS) /* be sure that icntl 19 is not set by command line */
1006         PetscCall(MatMumpsSetIcntl(F, 19, 2));
1007 #endif
1008         PetscCall(MatCholeskyFactorNumeric(F, A, NULL));
1009         S_lower_triangular = PETSC_TRUE;
1010       } else {
1011         PetscCall(MatLUFactorSymbolic(F, A, NULL, NULL, NULL));
1012 #if defined(PETSC_HAVE_MUMPS) /* be sure that icntl 19 is not set by command line */
1013         PetscCall(MatMumpsSetIcntl(F, 19, 3));
1014 #endif
1015         PetscCall(MatLUFactorNumeric(F, A, NULL));
1016       }
1017       PetscCall(MatViewFromOptions(F, (PetscObject)A, "-mat_factor_view"));
1018 
1019       if (matl_dbg_viewer) {
1020         Mat S;
1021         IS  is;
1022 
1023         PetscCall(PetscObjectSetName((PetscObject)A, "A"));
1024         PetscCall(MatView(A, matl_dbg_viewer));
1025         PetscCall(MatFactorCreateSchurComplement(F, &S, NULL));
1026         PetscCall(PetscObjectSetName((PetscObject)S, "S"));
1027         PetscCall(MatView(S, matl_dbg_viewer));
1028         PetscCall(MatDestroy(&S));
1029         PetscCall(ISCreateStride(PETSC_COMM_SELF, n_I, 0, 1, &is));
1030         PetscCall(PetscObjectSetName((PetscObject)is, "I"));
1031         PetscCall(ISView(is, matl_dbg_viewer));
1032         PetscCall(ISDestroy(&is));
1033         PetscCall(ISCreateStride(PETSC_COMM_SELF, size_schur, n_I, 1, &is));
1034         PetscCall(PetscObjectSetName((PetscObject)is, "B"));
1035         PetscCall(ISView(is, matl_dbg_viewer));
1036         PetscCall(ISDestroy(&is));
1037         PetscCall(PetscObjectSetName((PetscObject)is_A_all, "IA"));
1038         PetscCall(ISView(is_A_all, matl_dbg_viewer));
1039         for (i = 0, cum = 0; i < sub_schurs->n_subs; i++) {
1040           IS   is;
1041           char name[16];
1042 
1043           PetscCall(PetscSNPrintf(name, sizeof(name), "IE%" PetscInt_FMT, i));
1044           PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size));
1045           PetscCall(ISCreateStride(PETSC_COMM_SELF, subset_size, cum, 1, &is));
1046           PetscCall(PetscObjectSetName((PetscObject)is, name));
1047           PetscCall(ISView(is, matl_dbg_viewer));
1048           PetscCall(ISDestroy(&is));
1049           if (sub_schurs->change) {
1050             Mat T;
1051 
1052             PetscCall(PetscSNPrintf(name, sizeof(name), "TE%" PetscInt_FMT, i));
1053             PetscCall(KSPGetOperators(sub_schurs->change[i], &T, NULL));
1054             PetscCall(PetscObjectSetName((PetscObject)T, name));
1055             PetscCall(MatView(T, matl_dbg_viewer));
1056             PetscCall(PetscSNPrintf(name, sizeof(name), "ITE%" PetscInt_FMT, i));
1057             PetscCall(PetscObjectSetName((PetscObject)sub_schurs->change_primal_sub[i], name));
1058             PetscCall(ISView(sub_schurs->change_primal_sub[i], matl_dbg_viewer));
1059           }
1060           cum += subset_size;
1061         }
1062         PetscCall(PetscViewerFlush(matl_dbg_viewer));
1063       }
1064 
1065       /* get explicit Schur Complement computed during numeric factorization */
1066       PetscCall(MatFactorGetSchurComplement(F, &S_all, NULL));
1067       PetscCall(PetscStrncpy(stype, MATSEQDENSE, sizeof(stype)));
1068 #if defined(PETSC_HAVE_CUDA)
1069       PetscCall(PetscObjectTypeCompareAny((PetscObject)A, &gpu, MATSEQAIJVIENNACL, MATSEQAIJCUSPARSE, ""));
1070 #endif
1071       if (gpu) PetscCall(PetscStrncpy(stype, MATSEQDENSECUDA, sizeof(stype)));
1072       PetscCall(PetscOptionsGetString(NULL, sub_schurs->prefix, "-sub_schurs_schur_mat_type", stype, sizeof(stype), NULL));
1073       PetscCall(MatConvert(S_all, stype, MAT_INPLACE_MATRIX, &S_all));
1074       PetscCall(MatSetOption(S_all, MAT_SPD, sub_schurs->is_posdef));
1075       PetscCall(MatSetOption(S_all, MAT_HERMITIAN, sub_schurs->is_hermitian));
1076       PetscCall(MatGetType(S_all, &Stype));
1077 
1078       /* we can reuse the solvers if we are not using the economic version */
1079       reuse_solvers = (PetscBool)(reuse_solvers && !economic);
1080       if (!sub_schurs->gdsw) {
1081         factor_workaround = (PetscBool)(reuse_solvers && factor_workaround);
1082         if (!sub_schurs->is_posdef && factor_workaround && compute_Stilda && size_active_schur) reuse_solvers = factor_workaround = PETSC_FALSE;
1083       }
1084       solver_S = PETSC_TRUE;
1085 
1086       /* update the Schur complement with the change of basis on the pressures */
1087       if (benign_n) {
1088         const PetscScalar *cs_AIB;
1089         PetscScalar       *S_data, *AIIm1_data;
1090         Mat                S2 = NULL, S3 = NULL; /* dbg */
1091         PetscScalar       *S2_data, *S3_data;    /* dbg */
1092         Vec                v, benign_AIIm1_ones;
1093         PetscInt           sizeA;
1094 
1095         PetscCall(MatDenseGetArray(S_all, &S_data));
1096         PetscCall(MatCreateVecs(A, &v, &benign_AIIm1_ones));
1097         PetscCall(VecGetSize(v, &sizeA));
1098 #if defined(PETSC_HAVE_MUMPS)
1099         PetscCall(MatMumpsSetIcntl(F, 26, 0));
1100 #endif
1101 #if defined(PETSC_HAVE_MKL_PARDISO)
1102         PetscCall(MatMkl_PardisoSetCntl(F, 70, 1));
1103 #endif
1104         PetscCall(MatDenseGetArrayRead(cs_AIB_mat, &cs_AIB));
1105         PetscCall(MatDenseGetArray(benign_AIIm1_ones_mat, &AIIm1_data));
1106         if (matl_dbg_viewer) {
1107           PetscCall(MatDuplicate(S_all, MAT_DO_NOT_COPY_VALUES, &S2));
1108           PetscCall(MatDuplicate(S_all, MAT_DO_NOT_COPY_VALUES, &S3));
1109           PetscCall(MatDenseGetArray(S2, &S2_data));
1110           PetscCall(MatDenseGetArray(S3, &S3_data));
1111         }
1112         for (i = 0; i < benign_n; i++) {
1113           PetscScalar    *array, sum = 0., one = 1., *sums;
1114           const PetscInt *idxs;
1115           PetscInt        k, j, nz;
1116           PetscBLASInt    B_k, B_n;
1117 
1118           PetscCall(PetscCalloc1(benign_n, &sums));
1119           PetscCall(VecPlaceArray(benign_AIIm1_ones, AIIm1_data + sizeA * i));
1120           PetscCall(VecCopy(benign_AIIm1_ones, v));
1121           PetscCall(MatSolve(F, v, benign_AIIm1_ones));
1122           PetscCall(MatMult(A, benign_AIIm1_ones, v));
1123           PetscCall(VecResetArray(benign_AIIm1_ones));
1124           /* p0 dofs (eliminated) are excluded from the sums */
1125           for (k = 0; k < benign_n; k++) {
1126             PetscCall(ISGetLocalSize(is_p_r[k], &nz));
1127             PetscCall(ISGetIndices(is_p_r[k], &idxs));
1128             for (j = 0; j < nz - 1; j++) sums[k] -= AIIm1_data[idxs[j] + sizeA * i];
1129             PetscCall(ISRestoreIndices(is_p_r[k], &idxs));
1130           }
1131           PetscCall(VecGetArrayRead(v, (const PetscScalar **)&array));
1132           if (matl_dbg_viewer) {
1133             Vec  vv;
1134             char name[16];
1135 
1136             PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, size_schur, array + n_I, &vv));
1137             PetscCall(PetscSNPrintf(name, sizeof(name), "Pvs%" PetscInt_FMT, i));
1138             PetscCall(PetscObjectSetName((PetscObject)vv, name));
1139             PetscCall(VecView(vv, matl_dbg_viewer));
1140           }
1141           /* perform sparse rank updates on symmetric Schur (TODO: move outside of the loop?) */
1142           /* cs_AIB already scaled by 1./nz */
1143           B_k = 1;
1144           for (k = 0; k < benign_n; k++) {
1145             sum = sums[k];
1146             PetscCall(PetscBLASIntCast(size_schur, &B_n));
1147 
1148             if (PetscAbsScalar(sum) == 0.0) continue;
1149             if (k == i) {
1150               PetscCallBLAS("BLASsyrk", BLASsyrk_("L", "N", &B_n, &B_k, &sum, cs_AIB + i * size_schur, &B_n, &one, S_data, &B_n));
1151               if (matl_dbg_viewer) { PetscCallBLAS("BLASsyrk", BLASsyrk_("L", "N", &B_n, &B_k, &sum, cs_AIB + i * size_schur, &B_n, &one, S3_data, &B_n)); }
1152             } else { /* XXX Is it correct to use symmetric rank-2 update with half of the sum? */
1153               sum /= 2.0;
1154               PetscCallBLAS("BLASsyr2k", BLASsyr2k_("L", "N", &B_n, &B_k, &sum, cs_AIB + k * size_schur, &B_n, cs_AIB + i * size_schur, &B_n, &one, S_data, &B_n));
1155               if (matl_dbg_viewer) { PetscCallBLAS("BLASsyr2k", BLASsyr2k_("L", "N", &B_n, &B_k, &sum, cs_AIB + k * size_schur, &B_n, cs_AIB + i * size_schur, &B_n, &one, S3_data, &B_n)); }
1156             }
1157           }
1158           sum = 1.;
1159           PetscCallBLAS("BLASsyr2k", BLASsyr2k_("L", "N", &B_n, &B_k, &sum, array + n_I, &B_n, cs_AIB + i * size_schur, &B_n, &one, S_data, &B_n));
1160           if (matl_dbg_viewer) { PetscCallBLAS("BLASsyr2k", BLASsyr2k_("L", "N", &B_n, &B_k, &sum, array + n_I, &B_n, cs_AIB + i * size_schur, &B_n, &one, S2_data, &B_n)); }
1161           PetscCall(VecRestoreArrayRead(v, (const PetscScalar **)&array));
1162           /* set p0 entry of AIIm1_ones to zero */
1163           PetscCall(ISGetLocalSize(is_p_r[i], &nz));
1164           PetscCall(ISGetIndices(is_p_r[i], &idxs));
1165           for (j = 0; j < benign_n; j++) AIIm1_data[idxs[nz - 1] + sizeA * j] = 0.;
1166           PetscCall(ISRestoreIndices(is_p_r[i], &idxs));
1167           PetscCall(PetscFree(sums));
1168         }
1169         PetscCall(VecDestroy(&benign_AIIm1_ones));
1170         if (matl_dbg_viewer) {
1171           PetscCall(MatDenseRestoreArray(S2, &S2_data));
1172           PetscCall(MatDenseRestoreArray(S3, &S3_data));
1173         }
1174         if (!S_lower_triangular) { /* I need to expand the upper triangular data (column oriented) */
1175           PetscInt k, j;
1176           for (k = 0; k < size_schur; k++) {
1177             for (j = k; j < size_schur; j++) { S_data[j * size_schur + k] = PetscConj(S_data[k * size_schur + j]); }
1178           }
1179         }
1180 
1181         /* restore defaults */
1182 #if defined(PETSC_HAVE_MUMPS)
1183         PetscCall(MatMumpsSetIcntl(F, 26, -1));
1184 #endif
1185 #if defined(PETSC_HAVE_MKL_PARDISO)
1186         PetscCall(MatMkl_PardisoSetCntl(F, 70, 0));
1187 #endif
1188         PetscCall(MatDenseRestoreArrayRead(cs_AIB_mat, &cs_AIB));
1189         PetscCall(MatDenseRestoreArray(benign_AIIm1_ones_mat, &AIIm1_data));
1190         PetscCall(VecDestroy(&v));
1191         PetscCall(MatDenseRestoreArray(S_all, &S_data));
1192         if (matl_dbg_viewer) {
1193           Mat S;
1194 
1195           PetscCall(MatFactorRestoreSchurComplement(F, &S_all, MAT_FACTOR_SCHUR_UNFACTORED));
1196           PetscCall(MatFactorCreateSchurComplement(F, &S, NULL));
1197           PetscCall(PetscObjectSetName((PetscObject)S, "Sb"));
1198           PetscCall(MatView(S, matl_dbg_viewer));
1199           PetscCall(MatDestroy(&S));
1200           PetscCall(PetscObjectSetName((PetscObject)S2, "S2P"));
1201           PetscCall(MatView(S2, matl_dbg_viewer));
1202           PetscCall(PetscObjectSetName((PetscObject)S3, "S3P"));
1203           PetscCall(MatView(S3, matl_dbg_viewer));
1204           PetscCall(PetscObjectSetName((PetscObject)cs_AIB_mat, "cs"));
1205           PetscCall(MatView(cs_AIB_mat, matl_dbg_viewer));
1206           PetscCall(MatFactorGetSchurComplement(F, &S_all, NULL));
1207         }
1208         PetscCall(MatDestroy(&S2));
1209         PetscCall(MatDestroy(&S3));
1210       }
1211       if (!reuse_solvers) {
1212         for (i = 0; i < benign_n; i++) { PetscCall(ISDestroy(&is_p_r[i])); }
1213         PetscCall(PetscFree(is_p_r));
1214         PetscCall(MatDestroy(&cs_AIB_mat));
1215         PetscCall(MatDestroy(&benign_AIIm1_ones_mat));
1216       }
1217     } else { /* we can't use MatFactor when size_schur == size_of_the_problem */
1218       PetscCall(MatConvert(A, MATSEQDENSE, MAT_INITIAL_MATRIX, &S_all));
1219       PetscCall(MatGetType(S_all, &Stype));
1220       reuse_solvers     = PETSC_FALSE; /* TODO: why we can't reuse the solvers here? */
1221       factor_workaround = PETSC_FALSE;
1222       solver_S          = PETSC_FALSE;
1223     }
1224 
1225     if (reuse_solvers) {
1226       Mat                A_II, pA_II, Afake;
1227       Vec                vec1_B;
1228       PCBDDCReuseSolvers msolv_ctx;
1229       PetscInt           n_R;
1230 
1231       if (sub_schurs->reuse_solver) {
1232         PetscCall(PCBDDCReuseSolversReset(sub_schurs->reuse_solver));
1233       } else {
1234         PetscCall(PetscNew(&sub_schurs->reuse_solver));
1235       }
1236       msolv_ctx = sub_schurs->reuse_solver;
1237       PetscCall(MatSchurComplementGetSubMatrices(sub_schurs->S, &A_II, &pA_II, NULL, NULL, NULL));
1238       PetscCall(PetscObjectReference((PetscObject)F));
1239       msolv_ctx->F = F;
1240       PetscCall(MatCreateVecs(F, &msolv_ctx->sol, NULL));
1241       /* currently PETSc has no support for MatSolve(F,x,x), so cheat and let rhs and sol share the same memory */
1242       {
1243         PetscScalar *array;
1244         PetscInt     n;
1245 
1246         PetscCall(VecGetLocalSize(msolv_ctx->sol, &n));
1247         PetscCall(VecGetArray(msolv_ctx->sol, &array));
1248         PetscCall(VecCreateSeqWithArray(PetscObjectComm((PetscObject)msolv_ctx->sol), 1, n, array, &msolv_ctx->rhs));
1249         PetscCall(VecRestoreArray(msolv_ctx->sol, &array));
1250       }
1251       msolv_ctx->has_vertices = schur_has_vertices;
1252 
1253       /* interior solver */
1254       PetscCall(PCCreate(PetscObjectComm((PetscObject)A_II), &msolv_ctx->interior_solver));
1255       PetscCall(PCSetOperators(msolv_ctx->interior_solver, A_II, pA_II));
1256       PetscCall(PCSetType(msolv_ctx->interior_solver, PCSHELL));
1257       PetscCall(PCShellSetName(msolv_ctx->interior_solver, "Interior solver (w/o Schur factorization)"));
1258       PetscCall(PCShellSetContext(msolv_ctx->interior_solver, msolv_ctx));
1259       PetscCall(PCShellSetView(msolv_ctx->interior_solver, PCBDDCReuseSolvers_View));
1260       PetscCall(PCShellSetApply(msolv_ctx->interior_solver, PCBDDCReuseSolvers_Interior));
1261       PetscCall(PCShellSetApplyTranspose(msolv_ctx->interior_solver, PCBDDCReuseSolvers_InteriorTranspose));
1262       if (sub_schurs->gdsw) PetscCall(PCShellSetDestroy(msolv_ctx->interior_solver, PCBDDCReuseSolvers_Destroy));
1263 
1264       /* correction solver */
1265       if (!sub_schurs->gdsw) {
1266         PetscCall(PCCreate(PetscObjectComm((PetscObject)A_II), &msolv_ctx->correction_solver));
1267         PetscCall(PCSetType(msolv_ctx->correction_solver, PCSHELL));
1268         PetscCall(PCShellSetName(msolv_ctx->correction_solver, "Correction solver (with Schur factorization)"));
1269         PetscCall(PCShellSetContext(msolv_ctx->correction_solver, msolv_ctx));
1270         PetscCall(PCShellSetView(msolv_ctx->interior_solver, PCBDDCReuseSolvers_View));
1271         PetscCall(PCShellSetApply(msolv_ctx->correction_solver, PCBDDCReuseSolvers_Correction));
1272         PetscCall(PCShellSetApplyTranspose(msolv_ctx->correction_solver, PCBDDCReuseSolvers_CorrectionTranspose));
1273 
1274         /* scatter and vecs for Schur complement solver */
1275         PetscCall(MatCreateVecs(S_all, &msolv_ctx->sol_B, &msolv_ctx->rhs_B));
1276         PetscCall(MatCreateVecs(sub_schurs->S, &vec1_B, NULL));
1277         if (!schur_has_vertices) {
1278           PetscCall(ISGlobalToLocalMappingApplyIS(sub_schurs->BtoNmap, IS_GTOLM_DROP, is_A_all, &msolv_ctx->is_B));
1279           PetscCall(VecScatterCreate(vec1_B, msolv_ctx->is_B, msolv_ctx->sol_B, NULL, &msolv_ctx->correction_scatter_B));
1280           PetscCall(PetscObjectReference((PetscObject)is_A_all));
1281           msolv_ctx->is_R = is_A_all;
1282         } else {
1283           IS              is_B_all;
1284           const PetscInt *idxs;
1285           PetscInt        dual, n_v, n;
1286 
1287           PetscCall(ISGetLocalSize(sub_schurs->is_vertices, &n_v));
1288           dual = size_schur - n_v;
1289           PetscCall(ISGetLocalSize(is_A_all, &n));
1290           PetscCall(ISGetIndices(is_A_all, &idxs));
1291           PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)is_A_all), dual, idxs + n_I, PETSC_COPY_VALUES, &is_B_all));
1292           PetscCall(ISGlobalToLocalMappingApplyIS(sub_schurs->BtoNmap, IS_GTOLM_DROP, is_B_all, &msolv_ctx->is_B));
1293           PetscCall(ISDestroy(&is_B_all));
1294           PetscCall(ISCreateStride(PetscObjectComm((PetscObject)is_A_all), dual, 0, 1, &is_B_all));
1295           PetscCall(VecScatterCreate(vec1_B, msolv_ctx->is_B, msolv_ctx->sol_B, is_B_all, &msolv_ctx->correction_scatter_B));
1296           PetscCall(ISDestroy(&is_B_all));
1297           PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)is_A_all), n - n_v, idxs, PETSC_COPY_VALUES, &msolv_ctx->is_R));
1298           PetscCall(ISRestoreIndices(is_A_all, &idxs));
1299         }
1300         PetscCall(ISGetLocalSize(msolv_ctx->is_R, &n_R));
1301         PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, n_R, n_R, 0, NULL, &Afake));
1302         PetscCall(MatAssemblyBegin(Afake, MAT_FINAL_ASSEMBLY));
1303         PetscCall(MatAssemblyEnd(Afake, MAT_FINAL_ASSEMBLY));
1304         PetscCall(PCSetOperators(msolv_ctx->correction_solver, Afake, Afake));
1305         PetscCall(MatDestroy(&Afake));
1306         PetscCall(VecDestroy(&vec1_B));
1307       }
1308       /* communicate benign info to solver context */
1309       if (benign_n) {
1310         PetscScalar *array;
1311 
1312         msolv_ctx->benign_n             = benign_n;
1313         msolv_ctx->benign_zerodiag_subs = is_p_r;
1314         PetscCall(PetscMalloc1(benign_n, &msolv_ctx->benign_save_vals));
1315         msolv_ctx->benign_csAIB = cs_AIB_mat;
1316         PetscCall(MatCreateVecs(cs_AIB_mat, &msolv_ctx->benign_corr_work, NULL));
1317         PetscCall(VecGetArray(msolv_ctx->benign_corr_work, &array));
1318         PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, size_schur, array, &msolv_ctx->benign_dummy_schur_vec));
1319         PetscCall(VecRestoreArray(msolv_ctx->benign_corr_work, &array));
1320         msolv_ctx->benign_AIIm1ones = benign_AIIm1_ones_mat;
1321       }
1322     } else {
1323       if (sub_schurs->reuse_solver) PetscCall(PCBDDCReuseSolversReset(sub_schurs->reuse_solver));
1324       PetscCall(PetscFree(sub_schurs->reuse_solver));
1325     }
1326     PetscCall(MatDestroy(&A));
1327     PetscCall(ISDestroy(&is_A_all));
1328 
1329     /* Work arrays */
1330     PetscCall(PetscMalloc1(max_subset_size * max_subset_size, &work));
1331 
1332     /* S_Ej_all */
1333     cum = cum2 = 0;
1334     PetscCall(MatDenseGetArrayRead(S_all, &rS_data));
1335     PetscCall(MatSeqAIJGetArray(sub_schurs->S_Ej_all, &SEj_arr));
1336     if (sub_schurs->sum_S_Ej_inv_all) { PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all, &SEjinv_arr)); }
1337     if (sub_schurs->gdsw) { PetscCall(MatCreateSubMatrices(sub_schurs->A, sub_schurs->n_subs, sub_schurs->is_subs, sub_schurs->is_subs, MAT_INITIAL_MATRIX, &gdswA)); }
1338     for (i = 0; i < sub_schurs->n_subs; i++) {
1339       PetscInt j;
1340 
1341       /* get S_E (or K^i_EE for GDSW) */
1342       PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size));
1343       if (sub_schurs->gdsw) {
1344         Mat T;
1345 
1346         PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, subset_size, work, &T));
1347         PetscCall(MatConvert(gdswA[i], MATDENSE, MAT_REUSE_MATRIX, &T));
1348         PetscCall(MatDestroy(&T));
1349       } else {
1350         if (S_lower_triangular) { /* I need to expand the upper triangular data (column oriented) */
1351           PetscInt k;
1352           for (k = 0; k < subset_size; k++) {
1353             for (j = k; j < subset_size; j++) {
1354               work[k * subset_size + j] = rS_data[cum2 + k * size_schur + j];
1355               work[j * subset_size + k] = PetscConj(rS_data[cum2 + k * size_schur + j]);
1356             }
1357           }
1358         } else { /* just copy to workspace */
1359           PetscInt k;
1360           for (k = 0; k < subset_size; k++) {
1361             for (j = 0; j < subset_size; j++) { work[k * subset_size + j] = rS_data[cum2 + k * size_schur + j]; }
1362           }
1363         }
1364       }
1365       /* insert S_E values */
1366       if (sub_schurs->change) {
1367         Mat change_sub, SEj, T;
1368 
1369         /* change basis */
1370         PetscCall(KSPGetOperators(sub_schurs->change[i], &change_sub, NULL));
1371         PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, subset_size, work, &SEj));
1372         if (!sub_schurs->change_with_qr) { /* currently there's no support for PtAP with P SeqAIJ */
1373           Mat T2;
1374           PetscCall(MatTransposeMatMult(change_sub, SEj, MAT_INITIAL_MATRIX, 1.0, &T2));
1375           PetscCall(MatMatMult(T2, change_sub, MAT_INITIAL_MATRIX, 1.0, &T));
1376           PetscCall(MatConvert(T, MATSEQDENSE, MAT_INPLACE_MATRIX, &T));
1377           PetscCall(MatDestroy(&T2));
1378         } else {
1379           PetscCall(MatPtAP(SEj, change_sub, MAT_INITIAL_MATRIX, 1.0, &T));
1380         }
1381         PetscCall(MatCopy(T, SEj, SAME_NONZERO_PATTERN));
1382         PetscCall(MatDestroy(&T));
1383         PetscCall(MatZeroRowsColumnsIS(SEj, sub_schurs->change_primal_sub[i], 1.0, NULL, NULL));
1384         PetscCall(MatDestroy(&SEj));
1385       }
1386       PetscCall(PetscArraycpy(SEj_arr, work, subset_size * subset_size));
1387       if (compute_Stilda) {
1388         if (deluxe) { /* if adaptivity is requested, invert S_E blocks */
1389           Mat                M;
1390           const PetscScalar *vals;
1391           PetscBool          isdense, isdensecuda;
1392 
1393           PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, subset_size, work, &M));
1394           PetscCall(MatSetOption(M, MAT_SPD, sub_schurs->is_posdef));
1395           PetscCall(MatSetOption(M, MAT_HERMITIAN, sub_schurs->is_hermitian));
1396           if (!PetscBTLookup(sub_schurs->is_edge, i)) { PetscCall(MatSetType(M, Stype)); }
1397           PetscCall(PetscObjectTypeCompare((PetscObject)M, MATSEQDENSE, &isdense));
1398           PetscCall(PetscObjectTypeCompare((PetscObject)M, MATSEQDENSECUDA, &isdensecuda));
1399           if (use_cholesky) {
1400             PetscCall(MatCholeskyFactor(M, NULL, NULL));
1401           } else {
1402             PetscCall(MatLUFactor(M, NULL, NULL, NULL));
1403           }
1404           if (isdense) {
1405             PetscCall(MatSeqDenseInvertFactors_Private(M));
1406 #if defined(PETSC_HAVE_CUDA)
1407           } else if (isdensecuda) {
1408             PetscCall(MatSeqDenseCUDAInvertFactors_Private(M));
1409 #endif
1410           } else SETERRQ(PetscObjectComm((PetscObject)M), PETSC_ERR_SUP, "Not implemented for type %s", Stype);
1411           PetscCall(MatDenseGetArrayRead(M, &vals));
1412           PetscCall(PetscArraycpy(SEjinv_arr, vals, subset_size * subset_size));
1413           PetscCall(MatDenseRestoreArrayRead(M, &vals));
1414           PetscCall(MatDestroy(&M));
1415         } else if (scaling) { /* not using deluxe */
1416           Mat          SEj;
1417           Vec          D;
1418           PetscScalar *array;
1419 
1420           PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, subset_size, work, &SEj));
1421           PetscCall(VecGetArray(Dall, &array));
1422           PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, subset_size, array + cum, &D));
1423           PetscCall(VecRestoreArray(Dall, &array));
1424           PetscCall(VecShift(D, -1.));
1425           PetscCall(MatDiagonalScale(SEj, D, D));
1426           PetscCall(MatDestroy(&SEj));
1427           PetscCall(VecDestroy(&D));
1428           PetscCall(PetscArraycpy(SEj_arr, work, subset_size * subset_size));
1429         }
1430       }
1431       cum += subset_size;
1432       cum2 += subset_size * (size_schur + 1);
1433       SEj_arr += subset_size * subset_size;
1434       if (SEjinv_arr) SEjinv_arr += subset_size * subset_size;
1435     }
1436     if (sub_schurs->gdsw) { PetscCall(MatDestroySubMatrices(sub_schurs->n_subs, &gdswA)); }
1437     PetscCall(MatDenseRestoreArrayRead(S_all, &rS_data));
1438     PetscCall(MatSeqAIJRestoreArray(sub_schurs->S_Ej_all, &SEj_arr));
1439     if (sub_schurs->sum_S_Ej_inv_all) { PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all, &SEjinv_arr)); }
1440     if (solver_S) { PetscCall(MatFactorRestoreSchurComplement(F, &S_all, MAT_FACTOR_SCHUR_UNFACTORED)); }
1441 
1442     /* may prevent from unneeded copies, since MUMPS or MKL_Pardiso always use CPU memory
1443        however, preliminary tests indicate using GPUs is still faster in the solve phase */
1444 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
1445     if (reuse_solvers) {
1446       Mat                  St;
1447       MatFactorSchurStatus st;
1448 
1449       flg = PETSC_FALSE;
1450       PetscCall(PetscOptionsGetBool(NULL, sub_schurs->prefix, "-sub_schurs_schur_pin_to_cpu", &flg, NULL));
1451       PetscCall(MatFactorGetSchurComplement(F, &St, &st));
1452       PetscCall(MatBindToCPU(St, flg));
1453       PetscCall(MatFactorRestoreSchurComplement(F, &St, st));
1454     }
1455 #endif
1456 
1457     schur_factor = NULL;
1458     if (compute_Stilda && size_active_schur) {
1459       if (sub_schurs->n_subs == 1 && size_schur == size_active_schur && deluxe) { /* we already computed the inverse */
1460         PetscCall(MatSeqAIJGetArrayWrite(sub_schurs->sum_S_Ej_tilda_all, &SEjinv_arr));
1461         PetscCall(PetscArraycpy(SEjinv_arr, work, size_schur * size_schur));
1462         PetscCall(MatSeqAIJRestoreArrayWrite(sub_schurs->sum_S_Ej_tilda_all, &SEjinv_arr));
1463       } else {
1464         Mat S_all_inv = NULL;
1465 
1466         if (solver_S && !sub_schurs->gdsw) {
1467           /* for adaptive selection we need S^-1; for solver reusage we need S_\Delta\Delta^-1.
1468              The latter is not the principal subminor for S^-1. However, the factors can be reused since S_\Delta\Delta is the leading principal submatrix of S */
1469           if (factor_workaround) { /* invert without calling MatFactorInvertSchurComplement, since we are hacking */
1470             PetscScalar *data;
1471             PetscInt     nd = 0;
1472 
1473             PetscCheck(use_potr, PETSC_COMM_SELF, PETSC_ERR_SUP, "Factor update not yet implemented for non SPD matrices");
1474             PetscCall(MatFactorGetSchurComplement(F, &S_all_inv, NULL));
1475             PetscCall(MatDenseGetArray(S_all_inv, &data));
1476             if (sub_schurs->is_dir) { /* dirichlet dofs could have different scalings */
1477               PetscCall(ISGetLocalSize(sub_schurs->is_dir, &nd));
1478             }
1479 
1480             /* factor and invert activedofs and vertices (dirichlet dofs does not contribute) */
1481             if (schur_has_vertices) {
1482               Mat          M;
1483               PetscScalar *tdata;
1484               PetscInt     nv = 0, news;
1485 
1486               PetscCall(ISGetLocalSize(sub_schurs->is_vertices, &nv));
1487               news = size_active_schur + nv;
1488               PetscCall(PetscCalloc1(news * news, &tdata));
1489               for (i = 0; i < size_active_schur; i++) {
1490                 PetscCall(PetscArraycpy(tdata + i * (news + 1), data + i * (size_schur + 1), size_active_schur - i));
1491                 PetscCall(PetscArraycpy(tdata + i * (news + 1) + size_active_schur - i, data + i * size_schur + size_active_schur + nd, nv));
1492               }
1493               for (i = 0; i < nv; i++) {
1494                 PetscInt k = i + size_active_schur;
1495                 PetscCall(PetscArraycpy(tdata + k * (news + 1), data + (k + nd) * (size_schur + 1), nv - i));
1496               }
1497 
1498               PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, news, news, tdata, &M));
1499               PetscCall(MatSetOption(M, MAT_SPD, PETSC_TRUE));
1500               PetscCall(MatCholeskyFactor(M, NULL, NULL));
1501               /* save the factors */
1502               cum = 0;
1503               PetscCall(PetscMalloc1((size_active_schur * (size_active_schur + 1)) / 2 + nd, &schur_factor));
1504               for (i = 0; i < size_active_schur; i++) {
1505                 PetscCall(PetscArraycpy(schur_factor + cum, tdata + i * (news + 1), size_active_schur - i));
1506                 cum += size_active_schur - i;
1507               }
1508               for (i = 0; i < nd; i++) schur_factor[cum + i] = PetscSqrtReal(PetscRealPart(data[(i + size_active_schur) * (size_schur + 1)]));
1509               PetscCall(MatSeqDenseInvertFactors_Private(M));
1510               /* move back just the active dofs to the Schur complement */
1511               for (i = 0; i < size_active_schur; i++) { PetscCall(PetscArraycpy(data + i * size_schur, tdata + i * news, size_active_schur)); }
1512               PetscCall(PetscFree(tdata));
1513               PetscCall(MatDestroy(&M));
1514             } else { /* we can factorize and invert just the activedofs */
1515               Mat          M;
1516               PetscScalar *aux;
1517 
1518               PetscCall(PetscMalloc1(nd, &aux));
1519               for (i = 0; i < nd; i++) aux[i] = 1.0 / data[(i + size_active_schur) * (size_schur + 1)];
1520               PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, size_active_schur, size_active_schur, data, &M));
1521               PetscCall(MatDenseSetLDA(M, size_schur));
1522               PetscCall(MatSetOption(M, MAT_SPD, PETSC_TRUE));
1523               PetscCall(MatCholeskyFactor(M, NULL, NULL));
1524               PetscCall(MatSeqDenseInvertFactors_Private(M));
1525               PetscCall(MatDestroy(&M));
1526               PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, size_schur, nd, data + size_active_schur * size_schur, &M));
1527               PetscCall(MatZeroEntries(M));
1528               PetscCall(MatDestroy(&M));
1529               PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, nd, size_schur, data + size_active_schur, &M));
1530               PetscCall(MatDenseSetLDA(M, size_schur));
1531               PetscCall(MatZeroEntries(M));
1532               PetscCall(MatDestroy(&M));
1533               for (i = 0; i < nd; i++) data[(i + size_active_schur) * (size_schur + 1)] = aux[i];
1534               PetscCall(PetscFree(aux));
1535             }
1536             PetscCall(MatDenseRestoreArray(S_all_inv, &data));
1537           } else { /* use MatFactor calls to invert S */
1538             PetscCall(MatFactorInvertSchurComplement(F));
1539             PetscCall(MatFactorGetSchurComplement(F, &S_all_inv, NULL));
1540           }
1541         } else if (!sub_schurs->gdsw) { /* we need to invert explicitly since we are not using MatFactor for S */
1542           PetscCall(PetscObjectReference((PetscObject)S_all));
1543           S_all_inv = S_all;
1544           PetscCall(MatDenseGetArray(S_all_inv, &S_data));
1545           PetscCall(PetscBLASIntCast(size_schur, &B_N));
1546           PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
1547           if (use_potr) {
1548             PetscCallBLAS("LAPACKpotrf", LAPACKpotrf_("L", &B_N, S_data, &B_N, &B_ierr));
1549             PetscCheck(!B_ierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in POTRF Lapack routine %d", (int)B_ierr);
1550             PetscCallBLAS("LAPACKpotri", LAPACKpotri_("L", &B_N, S_data, &B_N, &B_ierr));
1551             PetscCheck(!B_ierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in POTRI Lapack routine %d", (int)B_ierr);
1552           } else if (use_sytr) {
1553             PetscCallBLAS("LAPACKsytrf", LAPACKsytrf_("L", &B_N, S_data, &B_N, pivots, Bwork, &B_lwork, &B_ierr));
1554             PetscCheck(!B_ierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYTRF Lapack routine %d", (int)B_ierr);
1555             PetscCallBLAS("LAPACKsytri", LAPACKsytri_("L", &B_N, S_data, &B_N, pivots, Bwork, &B_ierr));
1556             PetscCheck(!B_ierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYTRI Lapack routine %d", (int)B_ierr);
1557           } else {
1558             PetscCallBLAS("LAPACKgetrf", LAPACKgetrf_(&B_N, &B_N, S_data, &B_N, pivots, &B_ierr));
1559             PetscCheck(!B_ierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in GETRF Lapack routine %d", (int)B_ierr);
1560             PetscCallBLAS("LAPACKgetri", LAPACKgetri_(&B_N, S_data, &B_N, pivots, Bwork, &B_lwork, &B_ierr));
1561             PetscCheck(!B_ierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in GETRI Lapack routine %d", (int)B_ierr);
1562           }
1563           PetscCall(PetscLogFlops(1.0 * size_schur * size_schur * size_schur));
1564           PetscCall(PetscFPTrapPop());
1565           PetscCall(MatDenseRestoreArray(S_all_inv, &S_data));
1566         } else if (sub_schurs->gdsw) {
1567           Mat      tS, tX, SEj, S_II, S_IE, S_EE;
1568           KSP      pS_II;
1569           PC       pS_II_pc;
1570           IS       EE, II;
1571           PetscInt nS;
1572 
1573           PetscCall(MatFactorCreateSchurComplement(F, &tS, NULL));
1574           PetscCall(MatGetSize(tS, &nS, NULL));
1575           PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all, &SEjinv_arr));
1576           for (i = 0, cum = 0; i < sub_schurs->n_subs; i++) { /* naive implementation */
1577             PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size));
1578             PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, subset_size, SEjinv_arr, &SEj));
1579 
1580             PetscCall(ISCreateStride(PETSC_COMM_SELF, subset_size, cum, 1, &EE));
1581             PetscCall(ISComplement(EE, 0, nS, &II));
1582             PetscCall(MatCreateSubMatrix(tS, II, II, MAT_INITIAL_MATRIX, &S_II));
1583             PetscCall(MatCreateSubMatrix(tS, II, EE, MAT_INITIAL_MATRIX, &S_IE));
1584             PetscCall(MatCreateSubMatrix(tS, EE, EE, MAT_INITIAL_MATRIX, &S_EE));
1585             PetscCall(ISDestroy(&II));
1586             PetscCall(ISDestroy(&EE));
1587 
1588             PetscCall(KSPCreate(PETSC_COMM_SELF, &pS_II));
1589             PetscCall(KSPSetType(pS_II, KSPPREONLY));
1590             PetscCall(KSPGetPC(pS_II, &pS_II_pc));
1591             PetscCall(PCSetType(pS_II_pc, PCSVD));
1592             PetscCall(KSPSetOptionsPrefix(pS_II, sub_schurs->prefix));
1593             PetscCall(KSPAppendOptionsPrefix(pS_II, "pseudo_"));
1594             PetscCall(KSPSetOperators(pS_II, S_II, S_II));
1595             PetscCall(MatDestroy(&S_II));
1596             PetscCall(KSPSetFromOptions(pS_II));
1597             PetscCall(KSPSetUp(pS_II));
1598             PetscCall(MatDuplicate(S_IE, MAT_DO_NOT_COPY_VALUES, &tX));
1599             PetscCall(KSPMatSolve(pS_II, S_IE, tX));
1600             PetscCall(KSPDestroy(&pS_II));
1601 
1602             PetscCall(MatTransposeMatMult(S_IE, tX, MAT_REUSE_MATRIX, PETSC_DEFAULT, &SEj));
1603             PetscCall(MatDestroy(&S_IE));
1604             PetscCall(MatDestroy(&tX));
1605             PetscCall(MatAYPX(SEj, -1, S_EE, SAME_NONZERO_PATTERN));
1606             PetscCall(MatDestroy(&S_EE));
1607 
1608             PetscCall(MatDestroy(&SEj));
1609             cum += subset_size;
1610             SEjinv_arr += subset_size * subset_size;
1611           }
1612           PetscCall(MatDestroy(&tS));
1613           PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all, &SEjinv_arr));
1614         }
1615         /* S_Ej_tilda_all */
1616         cum = cum2 = 0;
1617         rS_data    = NULL;
1618         if (S_all_inv) PetscCall(MatDenseGetArrayRead(S_all_inv, &rS_data));
1619         PetscCall(MatSeqAIJGetArrayWrite(sub_schurs->sum_S_Ej_tilda_all, &SEjinv_arr));
1620         for (i = 0; i < sub_schurs->n_subs; i++) {
1621           PetscInt j;
1622 
1623           PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size));
1624           /* get (St^-1)_E */
1625           /* Unless we are changing the variables, I don't need to expand to upper triangular since St^-1
1626              will be properly accessed later during adaptive selection */
1627           if (rS_data) {
1628             if (S_lower_triangular) {
1629               PetscInt k;
1630               if (sub_schurs->change) {
1631                 for (k = 0; k < subset_size; k++) {
1632                   for (j = k; j < subset_size; j++) {
1633                     work[k * subset_size + j] = rS_data[cum2 + k * size_schur + j];
1634                     work[j * subset_size + k] = work[k * subset_size + j];
1635                   }
1636                 }
1637               } else {
1638                 for (k = 0; k < subset_size; k++) {
1639                   for (j = k; j < subset_size; j++) { work[k * subset_size + j] = rS_data[cum2 + k * size_schur + j]; }
1640                 }
1641               }
1642             } else {
1643               PetscInt k;
1644               for (k = 0; k < subset_size; k++) {
1645                 for (j = 0; j < subset_size; j++) { work[k * subset_size + j] = rS_data[cum2 + k * size_schur + j]; }
1646               }
1647             }
1648           }
1649           if (sub_schurs->change) {
1650             Mat         change_sub, SEj, T;
1651             PetscScalar val = sub_schurs->gdsw ? PETSC_SMALL : 1. / PETSC_SMALL;
1652 
1653             /* change basis */
1654             PetscCall(KSPGetOperators(sub_schurs->change[i], &change_sub, NULL));
1655             PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, subset_size, rS_data ? work : SEjinv_arr, &SEj));
1656             if (!sub_schurs->change_with_qr) { /* currently there's no support for PtAP with P SeqAIJ */
1657               Mat T2;
1658               PetscCall(MatTransposeMatMult(change_sub, SEj, MAT_INITIAL_MATRIX, 1.0, &T2));
1659               PetscCall(MatMatMult(T2, change_sub, MAT_INITIAL_MATRIX, 1.0, &T));
1660               PetscCall(MatDestroy(&T2));
1661               PetscCall(MatConvert(T, MATSEQDENSE, MAT_INPLACE_MATRIX, &T));
1662             } else {
1663               PetscCall(MatPtAP(SEj, change_sub, MAT_INITIAL_MATRIX, 1.0, &T));
1664             }
1665             PetscCall(MatCopy(T, SEj, SAME_NONZERO_PATTERN));
1666             PetscCall(MatDestroy(&T));
1667             PetscCall(MatZeroRowsColumnsIS(SEj, sub_schurs->change_primal_sub[i], val, NULL, NULL));
1668             PetscCall(MatDestroy(&SEj));
1669           }
1670           if (rS_data) PetscCall(PetscArraycpy(SEjinv_arr, work, subset_size * subset_size));
1671           cum += subset_size;
1672           cum2 += subset_size * (size_schur + 1);
1673           SEjinv_arr += subset_size * subset_size;
1674         }
1675         PetscCall(MatSeqAIJRestoreArrayWrite(sub_schurs->sum_S_Ej_tilda_all, &SEjinv_arr));
1676         if (S_all_inv) {
1677           PetscCall(MatDenseRestoreArrayRead(S_all_inv, &rS_data));
1678           if (solver_S) {
1679             if (schur_has_vertices) {
1680               PetscCall(MatFactorRestoreSchurComplement(F, &S_all_inv, MAT_FACTOR_SCHUR_FACTORED));
1681             } else {
1682               PetscCall(MatFactorRestoreSchurComplement(F, &S_all_inv, MAT_FACTOR_SCHUR_INVERTED));
1683             }
1684           }
1685         }
1686         PetscCall(MatDestroy(&S_all_inv));
1687       }
1688 
1689       /* move back factors if needed */
1690       if (schur_has_vertices && factor_workaround && !sub_schurs->gdsw) {
1691         Mat      S_tmp;
1692         PetscInt nd = 0;
1693 
1694         PetscCheck(solver_S, PETSC_COMM_SELF, PETSC_ERR_PLIB, "This should not happen");
1695         PetscCall(MatFactorGetSchurComplement(F, &S_tmp, NULL));
1696         if (use_potr) {
1697           PetscScalar *data;
1698 
1699           PetscCall(MatDenseGetArray(S_tmp, &data));
1700           PetscCall(PetscArrayzero(data, size_schur * size_schur));
1701 
1702           if (S_lower_triangular) {
1703             cum = 0;
1704             for (i = 0; i < size_active_schur; i++) {
1705               PetscCall(PetscArraycpy(data + i * (size_schur + 1), schur_factor + cum, size_active_schur - i));
1706               cum += size_active_schur - i;
1707             }
1708           } else {
1709             PetscCall(PetscArraycpy(data, schur_factor, size_schur * size_schur));
1710           }
1711           if (sub_schurs->is_dir) {
1712             PetscCall(ISGetLocalSize(sub_schurs->is_dir, &nd));
1713             for (i = 0; i < nd; i++) { data[(i + size_active_schur) * (size_schur + 1)] = schur_factor[cum + i]; }
1714           }
1715           /* workaround: since I cannot modify the matrices used inside the solvers for the forward and backward substitutions,
1716              set the diagonal entry of the Schur factor to a very large value */
1717           for (i = size_active_schur + nd; i < size_schur; i++) { data[i * (size_schur + 1)] = infty; }
1718           PetscCall(MatDenseRestoreArray(S_tmp, &data));
1719         } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Factor update not yet implemented for non SPD matrices");
1720         PetscCall(MatFactorRestoreSchurComplement(F, &S_tmp, MAT_FACTOR_SCHUR_FACTORED));
1721       }
1722     } else if (factor_workaround && !sub_schurs->gdsw) { /* we need to eliminate any unneeded coupling */
1723       PetscScalar *data;
1724       PetscInt     nd = 0;
1725 
1726       if (sub_schurs->is_dir) { /* dirichlet dofs could have different scalings */
1727         PetscCall(ISGetLocalSize(sub_schurs->is_dir, &nd));
1728       }
1729       PetscCall(MatFactorGetSchurComplement(F, &S_all, NULL));
1730       PetscCall(MatDenseGetArray(S_all, &data));
1731       for (i = 0; i < size_active_schur; i++) { PetscCall(PetscArrayzero(data + i * size_schur + size_active_schur, size_schur - size_active_schur)); }
1732       for (i = size_active_schur + nd; i < size_schur; i++) {
1733         PetscCall(PetscArrayzero(data + i * size_schur + size_active_schur, size_schur - size_active_schur));
1734         data[i * (size_schur + 1)] = infty;
1735       }
1736       PetscCall(MatDenseRestoreArray(S_all, &data));
1737       PetscCall(MatFactorRestoreSchurComplement(F, &S_all, MAT_FACTOR_SCHUR_UNFACTORED));
1738     }
1739     PetscCall(PetscFree(work));
1740     PetscCall(PetscFree(schur_factor));
1741     PetscCall(VecDestroy(&Dall));
1742   }
1743   PetscCall(ISDestroy(&is_I_layer));
1744   PetscCall(MatDestroy(&S_all));
1745   PetscCall(MatDestroy(&A_BB));
1746   PetscCall(MatDestroy(&A_IB));
1747   PetscCall(MatDestroy(&A_BI));
1748   PetscCall(MatDestroy(&F));
1749 
1750   PetscCall(PetscMalloc1(sub_schurs->n_subs, &nnz));
1751   for (i = 0; i < sub_schurs->n_subs; i++) { PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &nnz[i])); }
1752   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, sub_schurs->n_subs, nnz, PETSC_OWN_POINTER, &is_I_layer));
1753   PetscCall(MatSetVariableBlockSizes(sub_schurs->S_Ej_all, sub_schurs->n_subs, nnz));
1754   PetscCall(MatAssemblyBegin(sub_schurs->S_Ej_all, MAT_FINAL_ASSEMBLY));
1755   PetscCall(MatAssemblyEnd(sub_schurs->S_Ej_all, MAT_FINAL_ASSEMBLY));
1756   if (compute_Stilda) {
1757     PetscCall(MatSetVariableBlockSizes(sub_schurs->sum_S_Ej_tilda_all, sub_schurs->n_subs, nnz));
1758     PetscCall(MatAssemblyBegin(sub_schurs->sum_S_Ej_tilda_all, MAT_FINAL_ASSEMBLY));
1759     PetscCall(MatAssemblyEnd(sub_schurs->sum_S_Ej_tilda_all, MAT_FINAL_ASSEMBLY));
1760     if (deluxe) {
1761       PetscCall(MatSetVariableBlockSizes(sub_schurs->sum_S_Ej_inv_all, sub_schurs->n_subs, nnz));
1762       PetscCall(MatAssemblyBegin(sub_schurs->sum_S_Ej_inv_all, MAT_FINAL_ASSEMBLY));
1763       PetscCall(MatAssemblyEnd(sub_schurs->sum_S_Ej_inv_all, MAT_FINAL_ASSEMBLY));
1764     }
1765   }
1766   PetscCall(ISDestroy(&is_I_layer));
1767 
1768   /* Get local part of (\sum_j S_Ej) */
1769   if (!sub_schurs->sum_S_Ej_all) { PetscCall(MatDuplicate(sub_schurs->S_Ej_all, MAT_DO_NOT_COPY_VALUES, &sub_schurs->sum_S_Ej_all)); }
1770   PetscCall(VecSet(gstash, 0.0));
1771   PetscCall(MatSeqAIJGetArray(sub_schurs->S_Ej_all, &stasharray));
1772   PetscCall(VecPlaceArray(lstash, stasharray));
1773   PetscCall(VecScatterBegin(sstash, lstash, gstash, ADD_VALUES, SCATTER_FORWARD));
1774   PetscCall(VecScatterEnd(sstash, lstash, gstash, ADD_VALUES, SCATTER_FORWARD));
1775   PetscCall(MatSeqAIJRestoreArray(sub_schurs->S_Ej_all, &stasharray));
1776   PetscCall(VecResetArray(lstash));
1777   PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_all, &stasharray));
1778   PetscCall(VecPlaceArray(lstash, stasharray));
1779   PetscCall(VecScatterBegin(sstash, gstash, lstash, INSERT_VALUES, SCATTER_REVERSE));
1780   PetscCall(VecScatterEnd(sstash, gstash, lstash, INSERT_VALUES, SCATTER_REVERSE));
1781   PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_all, &stasharray));
1782   PetscCall(VecResetArray(lstash));
1783 
1784   /* Get local part of (\sum_j S^-1_Ej) (\sum_j St^-1_Ej) */
1785   if (compute_Stilda) {
1786     PetscCall(VecSet(gstash, 0.0));
1787     PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all, &stasharray));
1788     PetscCall(VecPlaceArray(lstash, stasharray));
1789     PetscCall(VecScatterBegin(sstash, lstash, gstash, ADD_VALUES, SCATTER_FORWARD));
1790     PetscCall(VecScatterEnd(sstash, lstash, gstash, ADD_VALUES, SCATTER_FORWARD));
1791     PetscCall(VecScatterBegin(sstash, gstash, lstash, INSERT_VALUES, SCATTER_REVERSE));
1792     PetscCall(VecScatterEnd(sstash, gstash, lstash, INSERT_VALUES, SCATTER_REVERSE));
1793     PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all, &stasharray));
1794     PetscCall(VecResetArray(lstash));
1795     if (deluxe) {
1796       PetscCall(VecSet(gstash, 0.0));
1797       PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all, &stasharray));
1798       PetscCall(VecPlaceArray(lstash, stasharray));
1799       PetscCall(VecScatterBegin(sstash, lstash, gstash, ADD_VALUES, SCATTER_FORWARD));
1800       PetscCall(VecScatterEnd(sstash, lstash, gstash, ADD_VALUES, SCATTER_FORWARD));
1801       PetscCall(VecScatterBegin(sstash, gstash, lstash, INSERT_VALUES, SCATTER_REVERSE));
1802       PetscCall(VecScatterEnd(sstash, gstash, lstash, INSERT_VALUES, SCATTER_REVERSE));
1803       PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all, &stasharray));
1804       PetscCall(VecResetArray(lstash));
1805     } else if (!sub_schurs->gdsw) {
1806       PetscScalar *array;
1807       PetscInt     cum;
1808 
1809       PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all, &array));
1810       cum = 0;
1811       for (i = 0; i < sub_schurs->n_subs; i++) {
1812         PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size));
1813         PetscCall(PetscBLASIntCast(subset_size, &B_N));
1814         PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
1815         if (use_potr) {
1816           PetscCallBLAS("LAPACKpotrf", LAPACKpotrf_("L", &B_N, array + cum, &B_N, &B_ierr));
1817           PetscCheck(!B_ierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in POTRF Lapack routine %d", (int)B_ierr);
1818           PetscCallBLAS("LAPACKpotri", LAPACKpotri_("L", &B_N, array + cum, &B_N, &B_ierr));
1819           PetscCheck(!B_ierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in POTRI Lapack routine %d", (int)B_ierr);
1820         } else if (use_sytr) {
1821           PetscCallBLAS("LAPACKsytrf", LAPACKsytrf_("L", &B_N, array + cum, &B_N, pivots, Bwork, &B_lwork, &B_ierr));
1822           PetscCheck(!B_ierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYTRF Lapack routine %d", (int)B_ierr);
1823           PetscCallBLAS("LAPACKsytri", LAPACKsytri_("L", &B_N, array + cum, &B_N, pivots, Bwork, &B_ierr));
1824           PetscCheck(!B_ierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYTRI Lapack routine %d", (int)B_ierr);
1825         } else {
1826           PetscCallBLAS("LAPACKgetrf", LAPACKgetrf_(&B_N, &B_N, array + cum, &B_N, pivots, &B_ierr));
1827           PetscCheck(!B_ierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in GETRF Lapack routine %d", (int)B_ierr);
1828           PetscCallBLAS("LAPACKgetri", LAPACKgetri_(&B_N, array + cum, &B_N, pivots, Bwork, &B_lwork, &B_ierr));
1829           PetscCheck(!B_ierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in GETRI Lapack routine %d", (int)B_ierr);
1830         }
1831         PetscCall(PetscLogFlops(1.0 * subset_size * subset_size * subset_size));
1832         PetscCall(PetscFPTrapPop());
1833         cum += subset_size * subset_size;
1834       }
1835       PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all, &array));
1836       PetscCall(PetscObjectReference((PetscObject)sub_schurs->sum_S_Ej_all));
1837       PetscCall(MatDestroy(&sub_schurs->sum_S_Ej_inv_all));
1838       sub_schurs->sum_S_Ej_inv_all = sub_schurs->sum_S_Ej_all;
1839     }
1840   }
1841   PetscCall(VecDestroy(&lstash));
1842   PetscCall(VecDestroy(&gstash));
1843   PetscCall(VecScatterDestroy(&sstash));
1844 
1845   if (matl_dbg_viewer) {
1846     if (sub_schurs->S_Ej_all) {
1847       PetscCall(PetscObjectSetName((PetscObject)sub_schurs->S_Ej_all, "SE"));
1848       PetscCall(MatView(sub_schurs->S_Ej_all, matl_dbg_viewer));
1849     }
1850     if (sub_schurs->sum_S_Ej_all) {
1851       PetscCall(PetscObjectSetName((PetscObject)sub_schurs->sum_S_Ej_all, "SSE"));
1852       PetscCall(MatView(sub_schurs->sum_S_Ej_all, matl_dbg_viewer));
1853     }
1854     if (sub_schurs->sum_S_Ej_inv_all) {
1855       PetscCall(PetscObjectSetName((PetscObject)sub_schurs->sum_S_Ej_inv_all, "SSEm"));
1856       PetscCall(MatView(sub_schurs->sum_S_Ej_inv_all, matl_dbg_viewer));
1857     }
1858     if (sub_schurs->sum_S_Ej_tilda_all) {
1859       PetscCall(PetscObjectSetName((PetscObject)sub_schurs->sum_S_Ej_tilda_all, "SSEt"));
1860       PetscCall(MatView(sub_schurs->sum_S_Ej_tilda_all, matl_dbg_viewer));
1861     }
1862   }
1863 
1864   /* free workspace */
1865   if (matl_dbg_viewer) PetscCall(PetscViewerFlush(matl_dbg_viewer));
1866   if (sub_schurs->debug) PetscCallMPI(MPI_Barrier(comm_n));
1867   PetscCall(PetscViewerDestroy(&matl_dbg_viewer));
1868   PetscCall(PetscFree2(Bwork, pivots));
1869   PetscCall(PetscCommDestroy(&comm_n));
1870   PetscFunctionReturn(0);
1871 }
1872 
1873 PetscErrorCode PCBDDCSubSchursInit(PCBDDCSubSchurs sub_schurs, const char *prefix, IS is_I, IS is_B, PCBDDCGraph graph, ISLocalToGlobalMapping BtoNmap, PetscBool copycc, PetscBool gdsw) {
1874   IS       *faces, *edges, *all_cc, vertices;
1875   PetscInt  s, i, n_faces, n_edges, n_all_cc;
1876   PetscBool is_sorted, ispardiso, ismumps;
1877 
1878   PetscFunctionBegin;
1879   PetscCall(ISSorted(is_I, &is_sorted));
1880   PetscCheck(is_sorted, PetscObjectComm((PetscObject)is_I), PETSC_ERR_PLIB, "IS for I dofs should be shorted");
1881   PetscCall(ISSorted(is_B, &is_sorted));
1882   PetscCheck(is_sorted, PetscObjectComm((PetscObject)is_B), PETSC_ERR_PLIB, "IS for B dofs should be shorted");
1883 
1884   /* reset any previous data */
1885   PetscCall(PCBDDCSubSchursReset(sub_schurs));
1886 
1887   sub_schurs->gdsw = gdsw;
1888 
1889   /* get index sets for faces and edges (already sorted by global ordering) */
1890   PetscCall(PCBDDCGraphGetCandidatesIS(graph, &n_faces, &faces, &n_edges, &edges, &vertices));
1891   n_all_cc = n_faces + n_edges;
1892   PetscCall(PetscBTCreate(n_all_cc, &sub_schurs->is_edge));
1893   PetscCall(PetscMalloc1(n_all_cc, &all_cc));
1894   n_all_cc = 0;
1895   for (i = 0; i < n_faces; i++) {
1896     PetscCall(ISGetSize(faces[i], &s));
1897     if (!s) continue;
1898     if (copycc) {
1899       PetscCall(ISDuplicate(faces[i], &all_cc[n_all_cc]));
1900     } else {
1901       PetscCall(PetscObjectReference((PetscObject)faces[i]));
1902       all_cc[n_all_cc] = faces[i];
1903     }
1904     n_all_cc++;
1905   }
1906   for (i = 0; i < n_edges; i++) {
1907     PetscCall(ISGetSize(edges[i], &s));
1908     if (!s) continue;
1909     if (copycc) {
1910       PetscCall(ISDuplicate(edges[i], &all_cc[n_all_cc]));
1911     } else {
1912       PetscCall(PetscObjectReference((PetscObject)edges[i]));
1913       all_cc[n_all_cc] = edges[i];
1914     }
1915     PetscCall(PetscBTSet(sub_schurs->is_edge, n_all_cc));
1916     n_all_cc++;
1917   }
1918   PetscCall(PetscObjectReference((PetscObject)vertices));
1919   sub_schurs->is_vertices = vertices;
1920   PetscCall(PCBDDCGraphRestoreCandidatesIS(graph, &n_faces, &faces, &n_edges, &edges, &vertices));
1921   sub_schurs->is_dir = NULL;
1922   PetscCall(PCBDDCGraphGetDirichletDofsB(graph, &sub_schurs->is_dir));
1923 
1924   /* Determine if MatFactor can be used */
1925   PetscCall(PetscStrallocpy(prefix, &sub_schurs->prefix));
1926 #if defined(PETSC_HAVE_MUMPS)
1927   PetscCall(PetscStrncpy(sub_schurs->mat_solver_type, MATSOLVERMUMPS, sizeof(sub_schurs->mat_solver_type)));
1928 #elif defined(PETSC_HAVE_MKL_PARDISO)
1929   PetscCall(PetscStrncpy(sub_schurs->mat_solver_type, MATSOLVERMKL_PARDISO, sizeof(sub_schurs->mat_solver_type)));
1930 #else
1931   PetscCall(PetscStrncpy(sub_schurs->mat_solver_type, MATSOLVERPETSC, sizeof(sub_schurs->mat_solver_type)));
1932 #endif
1933 #if defined(PETSC_USE_COMPLEX)
1934   sub_schurs->is_hermitian = PETSC_FALSE; /* Hermitian Cholesky is not supported by PETSc and external packages */
1935 #else
1936   sub_schurs->is_hermitian = PETSC_TRUE;
1937 #endif
1938   sub_schurs->is_posdef     = PETSC_TRUE;
1939   sub_schurs->is_symmetric  = PETSC_TRUE;
1940   sub_schurs->debug         = PETSC_FALSE;
1941   sub_schurs->restrict_comm = PETSC_FALSE;
1942   PetscOptionsBegin(PetscObjectComm((PetscObject)graph->l2gmap), sub_schurs->prefix, "BDDC sub_schurs options", "PC");
1943   PetscCall(PetscOptionsString("-sub_schurs_mat_solver_type", "Specific direct solver to use", NULL, sub_schurs->mat_solver_type, sub_schurs->mat_solver_type, sizeof(sub_schurs->mat_solver_type), NULL));
1944   PetscCall(PetscOptionsBool("-sub_schurs_symmetric", "Symmetric problem", NULL, sub_schurs->is_symmetric, &sub_schurs->is_symmetric, NULL));
1945   PetscCall(PetscOptionsBool("-sub_schurs_hermitian", "Hermitian problem", NULL, sub_schurs->is_hermitian, &sub_schurs->is_hermitian, NULL));
1946   PetscCall(PetscOptionsBool("-sub_schurs_posdef", "Positive definite problem", NULL, sub_schurs->is_posdef, &sub_schurs->is_posdef, NULL));
1947   PetscCall(PetscOptionsBool("-sub_schurs_restrictcomm", "Restrict communicator on active processes only", NULL, sub_schurs->restrict_comm, &sub_schurs->restrict_comm, NULL));
1948   PetscCall(PetscOptionsBool("-sub_schurs_debug", "Debug output", NULL, sub_schurs->debug, &sub_schurs->debug, NULL));
1949   PetscOptionsEnd();
1950   PetscCall(PetscStrcmp(sub_schurs->mat_solver_type, MATSOLVERMUMPS, &ismumps));
1951   PetscCall(PetscStrcmp(sub_schurs->mat_solver_type, MATSOLVERMKL_PARDISO, &ispardiso));
1952   sub_schurs->schur_explicit = (PetscBool)(ispardiso || ismumps);
1953 
1954   /* for reals, symmetric and hermitian are synonims */
1955 #if !defined(PETSC_USE_COMPLEX)
1956   sub_schurs->is_symmetric = (PetscBool)(sub_schurs->is_symmetric && sub_schurs->is_hermitian);
1957   sub_schurs->is_hermitian = sub_schurs->is_symmetric;
1958 #endif
1959 
1960   PetscCall(PetscObjectReference((PetscObject)is_I));
1961   sub_schurs->is_I = is_I;
1962   PetscCall(PetscObjectReference((PetscObject)is_B));
1963   sub_schurs->is_B = is_B;
1964   PetscCall(PetscObjectReference((PetscObject)graph->l2gmap));
1965   sub_schurs->l2gmap = graph->l2gmap;
1966   PetscCall(PetscObjectReference((PetscObject)BtoNmap));
1967   sub_schurs->BtoNmap            = BtoNmap;
1968   sub_schurs->n_subs             = n_all_cc;
1969   sub_schurs->is_subs            = all_cc;
1970   sub_schurs->S_Ej_all           = NULL;
1971   sub_schurs->sum_S_Ej_all       = NULL;
1972   sub_schurs->sum_S_Ej_inv_all   = NULL;
1973   sub_schurs->sum_S_Ej_tilda_all = NULL;
1974   sub_schurs->is_Ej_all          = NULL;
1975   PetscFunctionReturn(0);
1976 }
1977 
1978 PetscErrorCode PCBDDCSubSchursCreate(PCBDDCSubSchurs *sub_schurs) {
1979   PCBDDCSubSchurs schurs_ctx;
1980 
1981   PetscFunctionBegin;
1982   PetscCall(PetscNew(&schurs_ctx));
1983   schurs_ctx->n_subs = 0;
1984   *sub_schurs        = schurs_ctx;
1985   PetscFunctionReturn(0);
1986 }
1987 
1988 PetscErrorCode PCBDDCSubSchursReset(PCBDDCSubSchurs sub_schurs) {
1989   PetscInt i;
1990 
1991   PetscFunctionBegin;
1992   if (!sub_schurs) PetscFunctionReturn(0);
1993   PetscCall(PetscFree(sub_schurs->prefix));
1994   PetscCall(MatDestroy(&sub_schurs->A));
1995   PetscCall(MatDestroy(&sub_schurs->S));
1996   PetscCall(ISDestroy(&sub_schurs->is_I));
1997   PetscCall(ISDestroy(&sub_schurs->is_B));
1998   PetscCall(ISLocalToGlobalMappingDestroy(&sub_schurs->l2gmap));
1999   PetscCall(ISLocalToGlobalMappingDestroy(&sub_schurs->BtoNmap));
2000   PetscCall(MatDestroy(&sub_schurs->S_Ej_all));
2001   PetscCall(MatDestroy(&sub_schurs->sum_S_Ej_all));
2002   PetscCall(MatDestroy(&sub_schurs->sum_S_Ej_inv_all));
2003   PetscCall(MatDestroy(&sub_schurs->sum_S_Ej_tilda_all));
2004   PetscCall(ISDestroy(&sub_schurs->is_Ej_all));
2005   PetscCall(ISDestroy(&sub_schurs->is_vertices));
2006   PetscCall(ISDestroy(&sub_schurs->is_dir));
2007   PetscCall(PetscBTDestroy(&sub_schurs->is_edge));
2008   for (i = 0; i < sub_schurs->n_subs; i++) { PetscCall(ISDestroy(&sub_schurs->is_subs[i])); }
2009   if (sub_schurs->n_subs) PetscCall(PetscFree(sub_schurs->is_subs));
2010   if (sub_schurs->reuse_solver) PetscCall(PCBDDCReuseSolversReset(sub_schurs->reuse_solver));
2011   PetscCall(PetscFree(sub_schurs->reuse_solver));
2012   if (sub_schurs->change) {
2013     for (i = 0; i < sub_schurs->n_subs; i++) {
2014       PetscCall(KSPDestroy(&sub_schurs->change[i]));
2015       PetscCall(ISDestroy(&sub_schurs->change_primal_sub[i]));
2016     }
2017   }
2018   PetscCall(PetscFree(sub_schurs->change));
2019   PetscCall(PetscFree(sub_schurs->change_primal_sub));
2020   sub_schurs->n_subs = 0;
2021   PetscFunctionReturn(0);
2022 }
2023 
2024 PetscErrorCode PCBDDCSubSchursDestroy(PCBDDCSubSchurs *sub_schurs) {
2025   PetscFunctionBegin;
2026   PetscCall(PCBDDCSubSchursReset(*sub_schurs));
2027   PetscCall(PetscFree(*sub_schurs));
2028   PetscFunctionReturn(0);
2029 }
2030 
2031 static inline PetscErrorCode PCBDDCAdjGetNextLayer_Private(PetscInt *queue_tip, PetscInt n_prev, PetscBT touched, PetscInt *xadj, PetscInt *adjncy, PetscInt *n_added) {
2032   PetscInt i, j, n;
2033 
2034   PetscFunctionBegin;
2035   n = 0;
2036   for (i = -n_prev; i < 0; i++) {
2037     PetscInt start_dof = queue_tip[i];
2038     for (j = xadj[start_dof]; j < xadj[start_dof + 1]; j++) {
2039       PetscInt dof = adjncy[j];
2040       if (!PetscBTLookup(touched, dof)) {
2041         PetscCall(PetscBTSet(touched, dof));
2042         queue_tip[n] = dof;
2043         n++;
2044       }
2045     }
2046   }
2047   *n_added = n;
2048   PetscFunctionReturn(0);
2049 }
2050