xref: /petsc/src/ksp/pc/impls/bddc/bddcschurs.c (revision 21e3ffae2f3b73c0bd738cf6d0a809700fc04bb0)
1 #include <petsc/private/pcbddcimpl.h>
2 #include <petsc/private/pcbddcprivateimpl.h>
3 #include <../src/mat/impls/dense/seq/dense.h>
4 #include <petscblaslapack.h>
5 
6 static inline PetscErrorCode PCBDDCAdjGetNextLayer_Private(PetscInt *, PetscInt, PetscBT, PetscInt *, PetscInt *, PetscInt *);
7 static PetscErrorCode        PCBDDCComputeExplicitSchur(Mat, PetscBool, MatReuse, Mat *);
8 static PetscErrorCode        PCBDDCReuseSolvers_Interior(PC, Vec, Vec);
9 static PetscErrorCode        PCBDDCReuseSolvers_Correction(PC, Vec, Vec);
10 
11 /* if v2 is not present, correction is done in-place */
12 PetscErrorCode PCBDDCReuseSolversBenignAdapt(PCBDDCReuseSolvers ctx, Vec v, Vec v2, PetscBool sol, PetscBool full)
13 {
14   PetscScalar *array;
15   PetscScalar *array2;
16 
17   PetscFunctionBegin;
18   if (!ctx->benign_n) PetscFunctionReturn(PETSC_SUCCESS);
19   if (sol && full) {
20     PetscInt n_I, size_schur;
21 
22     /* get sizes */
23     PetscCall(MatGetSize(ctx->benign_csAIB, &size_schur, NULL));
24     PetscCall(VecGetSize(v, &n_I));
25     n_I = n_I - size_schur;
26     /* get schur sol from array */
27     PetscCall(VecGetArray(v, &array));
28     PetscCall(VecPlaceArray(ctx->benign_dummy_schur_vec, array + n_I));
29     PetscCall(VecRestoreArray(v, &array));
30     /* apply interior sol correction */
31     PetscCall(MatMultTranspose(ctx->benign_csAIB, ctx->benign_dummy_schur_vec, ctx->benign_corr_work));
32     PetscCall(VecResetArray(ctx->benign_dummy_schur_vec));
33     PetscCall(MatMultAdd(ctx->benign_AIIm1ones, ctx->benign_corr_work, v, v));
34   }
35   if (v2) {
36     PetscInt nl;
37 
38     PetscCall(VecGetArrayRead(v, (const PetscScalar **)&array));
39     PetscCall(VecGetLocalSize(v2, &nl));
40     PetscCall(VecGetArray(v2, &array2));
41     PetscCall(PetscArraycpy(array2, array, nl));
42   } else {
43     PetscCall(VecGetArray(v, &array));
44     array2 = array;
45   }
46   if (!sol) { /* change rhs */
47     PetscInt n;
48     for (n = 0; n < ctx->benign_n; n++) {
49       PetscScalar     sum = 0.;
50       const PetscInt *cols;
51       PetscInt        nz, i;
52 
53       PetscCall(ISGetLocalSize(ctx->benign_zerodiag_subs[n], &nz));
54       PetscCall(ISGetIndices(ctx->benign_zerodiag_subs[n], &cols));
55       for (i = 0; i < nz - 1; i++) sum += array[cols[i]];
56 #if defined(PETSC_USE_COMPLEX)
57       sum = -(PetscRealPart(sum) / nz + PETSC_i * (PetscImaginaryPart(sum) / nz));
58 #else
59       sum = -sum / nz;
60 #endif
61       for (i = 0; i < nz - 1; i++) array2[cols[i]] += sum;
62       ctx->benign_save_vals[n] = array2[cols[nz - 1]];
63       array2[cols[nz - 1]]     = sum;
64       PetscCall(ISRestoreIndices(ctx->benign_zerodiag_subs[n], &cols));
65     }
66   } else {
67     PetscInt n;
68     for (n = 0; n < ctx->benign_n; n++) {
69       PetscScalar     sum = 0.;
70       const PetscInt *cols;
71       PetscInt        nz, i;
72       PetscCall(ISGetLocalSize(ctx->benign_zerodiag_subs[n], &nz));
73       PetscCall(ISGetIndices(ctx->benign_zerodiag_subs[n], &cols));
74       for (i = 0; i < nz - 1; i++) sum += array[cols[i]];
75 #if defined(PETSC_USE_COMPLEX)
76       sum = -(PetscRealPart(sum) / nz + PETSC_i * (PetscImaginaryPart(sum) / nz));
77 #else
78       sum = -sum / nz;
79 #endif
80       for (i = 0; i < nz - 1; i++) array2[cols[i]] += sum;
81       array2[cols[nz - 1]] = ctx->benign_save_vals[n];
82       PetscCall(ISRestoreIndices(ctx->benign_zerodiag_subs[n], &cols));
83     }
84   }
85   if (v2) {
86     PetscCall(VecRestoreArrayRead(v, (const PetscScalar **)&array));
87     PetscCall(VecRestoreArray(v2, &array2));
88   } else {
89     PetscCall(VecRestoreArray(v, &array));
90   }
91   if (!sol && full) {
92     Vec      usedv;
93     PetscInt n_I, size_schur;
94 
95     /* get sizes */
96     PetscCall(MatGetSize(ctx->benign_csAIB, &size_schur, NULL));
97     PetscCall(VecGetSize(v, &n_I));
98     n_I = n_I - size_schur;
99     /* compute schur rhs correction */
100     if (v2) {
101       usedv = v2;
102     } else {
103       usedv = v;
104     }
105     /* apply schur rhs correction */
106     PetscCall(MatMultTranspose(ctx->benign_AIIm1ones, usedv, ctx->benign_corr_work));
107     PetscCall(VecGetArrayRead(usedv, (const PetscScalar **)&array));
108     PetscCall(VecPlaceArray(ctx->benign_dummy_schur_vec, array + n_I));
109     PetscCall(VecRestoreArrayRead(usedv, (const PetscScalar **)&array));
110     PetscCall(MatMultAdd(ctx->benign_csAIB, ctx->benign_corr_work, ctx->benign_dummy_schur_vec, ctx->benign_dummy_schur_vec));
111     PetscCall(VecResetArray(ctx->benign_dummy_schur_vec));
112   }
113   PetscFunctionReturn(PETSC_SUCCESS);
114 }
115 
116 static PetscErrorCode PCBDDCReuseSolvers_Solve_Private(PC pc, Vec rhs, Vec sol, PetscBool transpose, PetscBool full)
117 {
118   PCBDDCReuseSolvers ctx;
119   PetscBool          copy = PETSC_FALSE;
120 
121   PetscFunctionBegin;
122   PetscCall(PCShellGetContext(pc, &ctx));
123   if (full) {
124 #if defined(PETSC_HAVE_MUMPS)
125     PetscCall(MatMumpsSetIcntl(ctx->F, 26, -1));
126 #endif
127 #if defined(PETSC_HAVE_MKL_PARDISO)
128     PetscCall(MatMkl_PardisoSetCntl(ctx->F, 70, 0));
129 #endif
130     copy = ctx->has_vertices;
131   } else { /* interior solver */
132 #if defined(PETSC_HAVE_MUMPS)
133     PetscCall(MatMumpsSetIcntl(ctx->F, 26, 0));
134 #endif
135 #if defined(PETSC_HAVE_MKL_PARDISO)
136     PetscCall(MatMkl_PardisoSetCntl(ctx->F, 70, 1));
137 #endif
138     copy = PETSC_TRUE;
139   }
140   /* copy rhs into factored matrix workspace */
141   if (copy) {
142     PetscInt     n;
143     PetscScalar *array, *array_solver;
144 
145     PetscCall(VecGetLocalSize(rhs, &n));
146     PetscCall(VecGetArrayRead(rhs, (const PetscScalar **)&array));
147     PetscCall(VecGetArray(ctx->rhs, &array_solver));
148     PetscCall(PetscArraycpy(array_solver, array, n));
149     PetscCall(VecRestoreArray(ctx->rhs, &array_solver));
150     PetscCall(VecRestoreArrayRead(rhs, (const PetscScalar **)&array));
151 
152     PetscCall(PCBDDCReuseSolversBenignAdapt(ctx, ctx->rhs, NULL, PETSC_FALSE, full));
153     if (transpose) {
154       PetscCall(MatSolveTranspose(ctx->F, ctx->rhs, ctx->sol));
155     } else {
156       PetscCall(MatSolve(ctx->F, ctx->rhs, ctx->sol));
157     }
158     PetscCall(PCBDDCReuseSolversBenignAdapt(ctx, ctx->sol, NULL, PETSC_TRUE, full));
159 
160     /* get back data to caller worskpace */
161     PetscCall(VecGetArrayRead(ctx->sol, (const PetscScalar **)&array_solver));
162     PetscCall(VecGetArray(sol, &array));
163     PetscCall(PetscArraycpy(array, array_solver, n));
164     PetscCall(VecRestoreArray(sol, &array));
165     PetscCall(VecRestoreArrayRead(ctx->sol, (const PetscScalar **)&array_solver));
166   } else {
167     if (ctx->benign_n) {
168       PetscCall(PCBDDCReuseSolversBenignAdapt(ctx, rhs, ctx->rhs, PETSC_FALSE, full));
169       if (transpose) {
170         PetscCall(MatSolveTranspose(ctx->F, ctx->rhs, sol));
171       } else {
172         PetscCall(MatSolve(ctx->F, ctx->rhs, sol));
173       }
174       PetscCall(PCBDDCReuseSolversBenignAdapt(ctx, sol, NULL, PETSC_TRUE, full));
175     } else {
176       if (transpose) {
177         PetscCall(MatSolveTranspose(ctx->F, rhs, sol));
178       } else {
179         PetscCall(MatSolve(ctx->F, rhs, sol));
180       }
181     }
182   }
183   /* restore defaults */
184 #if defined(PETSC_HAVE_MUMPS)
185   PetscCall(MatMumpsSetIcntl(ctx->F, 26, -1));
186 #endif
187 #if defined(PETSC_HAVE_MKL_PARDISO)
188   PetscCall(MatMkl_PardisoSetCntl(ctx->F, 70, 0));
189 #endif
190   PetscFunctionReturn(PETSC_SUCCESS);
191 }
192 
193 static PetscErrorCode PCBDDCReuseSolvers_Correction(PC pc, Vec rhs, Vec sol)
194 {
195   PetscFunctionBegin;
196   PetscCall(PCBDDCReuseSolvers_Solve_Private(pc, rhs, sol, PETSC_FALSE, PETSC_TRUE));
197   PetscFunctionReturn(PETSC_SUCCESS);
198 }
199 
200 static PetscErrorCode PCBDDCReuseSolvers_CorrectionTranspose(PC pc, Vec rhs, Vec sol)
201 {
202   PetscFunctionBegin;
203   PetscCall(PCBDDCReuseSolvers_Solve_Private(pc, rhs, sol, PETSC_TRUE, PETSC_TRUE));
204   PetscFunctionReturn(PETSC_SUCCESS);
205 }
206 
207 static PetscErrorCode PCBDDCReuseSolvers_Interior(PC pc, Vec rhs, Vec sol)
208 {
209   PetscFunctionBegin;
210   PetscCall(PCBDDCReuseSolvers_Solve_Private(pc, rhs, sol, PETSC_FALSE, PETSC_FALSE));
211   PetscFunctionReturn(PETSC_SUCCESS);
212 }
213 
214 static PetscErrorCode PCBDDCReuseSolvers_InteriorTranspose(PC pc, Vec rhs, Vec sol)
215 {
216   PetscFunctionBegin;
217   PetscCall(PCBDDCReuseSolvers_Solve_Private(pc, rhs, sol, PETSC_TRUE, PETSC_FALSE));
218   PetscFunctionReturn(PETSC_SUCCESS);
219 }
220 
221 static PetscErrorCode PCBDDCReuseSolvers_View(PC pc, PetscViewer viewer)
222 {
223   PCBDDCReuseSolvers ctx;
224   PetscBool          iascii;
225 
226   PetscFunctionBegin;
227   PetscCall(PCShellGetContext(pc, &ctx));
228   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
229   if (iascii) PetscCall(PetscViewerPushFormat(viewer, PETSC_VIEWER_ASCII_INFO));
230   PetscCall(MatView(ctx->F, viewer));
231   if (iascii) PetscCall(PetscViewerPopFormat(viewer));
232   PetscFunctionReturn(PETSC_SUCCESS);
233 }
234 
235 static PetscErrorCode PCBDDCReuseSolversReset(PCBDDCReuseSolvers reuse)
236 {
237   PetscInt i;
238 
239   PetscFunctionBegin;
240   PetscCall(MatDestroy(&reuse->F));
241   PetscCall(VecDestroy(&reuse->sol));
242   PetscCall(VecDestroy(&reuse->rhs));
243   PetscCall(PCDestroy(&reuse->interior_solver));
244   PetscCall(PCDestroy(&reuse->correction_solver));
245   PetscCall(ISDestroy(&reuse->is_R));
246   PetscCall(ISDestroy(&reuse->is_B));
247   PetscCall(VecScatterDestroy(&reuse->correction_scatter_B));
248   PetscCall(VecDestroy(&reuse->sol_B));
249   PetscCall(VecDestroy(&reuse->rhs_B));
250   for (i = 0; i < reuse->benign_n; i++) PetscCall(ISDestroy(&reuse->benign_zerodiag_subs[i]));
251   PetscCall(PetscFree(reuse->benign_zerodiag_subs));
252   PetscCall(PetscFree(reuse->benign_save_vals));
253   PetscCall(MatDestroy(&reuse->benign_csAIB));
254   PetscCall(MatDestroy(&reuse->benign_AIIm1ones));
255   PetscCall(VecDestroy(&reuse->benign_corr_work));
256   PetscCall(VecDestroy(&reuse->benign_dummy_schur_vec));
257   PetscFunctionReturn(PETSC_SUCCESS);
258 }
259 
260 static PetscErrorCode PCBDDCReuseSolvers_Destroy(PC pc)
261 {
262   PCBDDCReuseSolvers ctx;
263 
264   PetscFunctionBegin;
265   PetscCall(PCShellGetContext(pc, &ctx));
266   PetscCall(PCBDDCReuseSolversReset(ctx));
267   PetscCall(PetscFree(ctx));
268   PetscCall(PCShellSetContext(pc, NULL));
269   PetscFunctionReturn(PETSC_SUCCESS);
270 }
271 
272 static PetscErrorCode PCBDDCComputeExplicitSchur(Mat M, PetscBool issym, MatReuse reuse, Mat *S)
273 {
274   Mat         B, C, D, Bd, Cd, AinvBd;
275   KSP         ksp;
276   PC          pc;
277   PetscBool   isLU, isILU, isCHOL, Bdense, Cdense;
278   PetscReal   fill = 2.0;
279   PetscInt    n_I;
280   PetscMPIInt size;
281 
282   PetscFunctionBegin;
283   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)M), &size));
284   PetscCheck(size == 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not for parallel matrices");
285   if (reuse == MAT_REUSE_MATRIX) {
286     PetscBool Sdense;
287 
288     PetscCall(PetscObjectTypeCompare((PetscObject)*S, MATSEQDENSE, &Sdense));
289     PetscCheck(Sdense, PetscObjectComm((PetscObject)M), PETSC_ERR_SUP, "S should dense");
290   }
291   PetscCall(MatSchurComplementGetSubMatrices(M, NULL, NULL, &B, &C, &D));
292   PetscCall(MatSchurComplementGetKSP(M, &ksp));
293   PetscCall(KSPGetPC(ksp, &pc));
294   PetscCall(PetscObjectTypeCompare((PetscObject)pc, PCLU, &isLU));
295   PetscCall(PetscObjectTypeCompare((PetscObject)pc, PCILU, &isILU));
296   PetscCall(PetscObjectTypeCompare((PetscObject)pc, PCCHOLESKY, &isCHOL));
297   PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSEQDENSE, &Bdense));
298   PetscCall(PetscObjectTypeCompare((PetscObject)C, MATSEQDENSE, &Cdense));
299   PetscCall(MatGetSize(B, &n_I, NULL));
300   if (n_I) {
301     if (!Bdense) {
302       PetscCall(MatConvert(B, MATSEQDENSE, MAT_INITIAL_MATRIX, &Bd));
303     } else {
304       Bd = B;
305     }
306 
307     if (isLU || isILU || isCHOL) {
308       Mat fact;
309       PetscCall(KSPSetUp(ksp));
310       PetscCall(PCFactorGetMatrix(pc, &fact));
311       PetscCall(MatDuplicate(Bd, MAT_DO_NOT_COPY_VALUES, &AinvBd));
312       PetscCall(MatMatSolve(fact, Bd, AinvBd));
313     } else {
314       PetscBool ex = PETSC_TRUE;
315 
316       if (ex) {
317         Mat Ainvd;
318 
319         PetscCall(PCComputeOperator(pc, MATDENSE, &Ainvd));
320         PetscCall(MatMatMult(Ainvd, Bd, MAT_INITIAL_MATRIX, fill, &AinvBd));
321         PetscCall(MatDestroy(&Ainvd));
322       } else {
323         Vec          sol, rhs;
324         PetscScalar *arrayrhs, *arraysol;
325         PetscInt     i, nrhs, n;
326 
327         PetscCall(MatDuplicate(Bd, MAT_DO_NOT_COPY_VALUES, &AinvBd));
328         PetscCall(MatGetSize(Bd, &n, &nrhs));
329         PetscCall(MatDenseGetArray(Bd, &arrayrhs));
330         PetscCall(MatDenseGetArray(AinvBd, &arraysol));
331         PetscCall(KSPGetSolution(ksp, &sol));
332         PetscCall(KSPGetRhs(ksp, &rhs));
333         for (i = 0; i < nrhs; i++) {
334           PetscCall(VecPlaceArray(rhs, arrayrhs + i * n));
335           PetscCall(VecPlaceArray(sol, arraysol + i * n));
336           PetscCall(KSPSolve(ksp, rhs, sol));
337           PetscCall(VecResetArray(rhs));
338           PetscCall(VecResetArray(sol));
339         }
340         PetscCall(MatDenseRestoreArray(Bd, &arrayrhs));
341         PetscCall(MatDenseRestoreArray(AinvBd, &arrayrhs));
342       }
343     }
344     if (!Bdense & !issym) PetscCall(MatDestroy(&Bd));
345 
346     if (!issym) {
347       if (!Cdense) {
348         PetscCall(MatConvert(C, MATSEQDENSE, MAT_INITIAL_MATRIX, &Cd));
349       } else {
350         Cd = C;
351       }
352       PetscCall(MatMatMult(Cd, AinvBd, reuse, fill, S));
353       if (!Cdense) PetscCall(MatDestroy(&Cd));
354     } else {
355       PetscCall(MatTransposeMatMult(Bd, AinvBd, reuse, fill, S));
356       if (!Bdense) PetscCall(MatDestroy(&Bd));
357     }
358     PetscCall(MatDestroy(&AinvBd));
359   }
360 
361   if (D) {
362     Mat       Dd;
363     PetscBool Ddense;
364 
365     PetscCall(PetscObjectTypeCompare((PetscObject)D, MATSEQDENSE, &Ddense));
366     if (!Ddense) {
367       PetscCall(MatConvert(D, MATSEQDENSE, MAT_INITIAL_MATRIX, &Dd));
368     } else {
369       Dd = D;
370     }
371     if (n_I) {
372       PetscCall(MatAYPX(*S, -1.0, Dd, SAME_NONZERO_PATTERN));
373     } else {
374       if (reuse == MAT_INITIAL_MATRIX) {
375         PetscCall(MatDuplicate(Dd, MAT_COPY_VALUES, S));
376       } else {
377         PetscCall(MatCopy(Dd, *S, SAME_NONZERO_PATTERN));
378       }
379     }
380     if (!Ddense) PetscCall(MatDestroy(&Dd));
381   } else {
382     PetscCall(MatScale(*S, -1.0));
383   }
384   PetscFunctionReturn(PETSC_SUCCESS);
385 }
386 
387 PetscErrorCode PCBDDCSubSchursSetUp(PCBDDCSubSchurs sub_schurs, Mat Ain, Mat Sin, PetscBool exact_schur, PetscInt xadj[], PetscInt adjncy[], PetscInt nlayers, Vec scaling, PetscBool compute_Stilda, PetscBool reuse_solvers, PetscBool benign_trick, PetscInt benign_n, PetscInt benign_p0_lidx[], IS benign_zerodiag_subs[], Mat change, IS change_primal)
388 {
389   Mat          F, A_II, A_IB, A_BI, A_BB, AE_II;
390   Mat          S_all;
391   Vec          gstash, lstash;
392   VecScatter   sstash;
393   IS           is_I, is_I_layer;
394   IS           all_subsets, all_subsets_mult, all_subsets_n;
395   PetscScalar *stasharray, *Bwork;
396   PetscInt    *nnz, *all_local_idx_N;
397   PetscInt    *auxnum1, *auxnum2;
398   PetscInt     i, subset_size, max_subset_size;
399   PetscInt     n_B, extra, local_size, global_size;
400   PetscInt     local_stash_size;
401   PetscBLASInt B_N, B_ierr, B_lwork, *pivots;
402   MPI_Comm     comm_n;
403   PetscBool    deluxe   = PETSC_TRUE;
404   PetscBool    use_potr = PETSC_FALSE, use_sytr = PETSC_FALSE;
405   PetscViewer  matl_dbg_viewer = NULL;
406   PetscBool    flg;
407 
408   PetscFunctionBegin;
409   PetscCall(MatDestroy(&sub_schurs->A));
410   PetscCall(MatDestroy(&sub_schurs->S));
411   if (Ain) {
412     PetscCall(PetscObjectReference((PetscObject)Ain));
413     sub_schurs->A = Ain;
414   }
415 
416   PetscCall(PetscObjectReference((PetscObject)Sin));
417   sub_schurs->S = Sin;
418   if (sub_schurs->schur_explicit) sub_schurs->schur_explicit = (PetscBool)(!!sub_schurs->A);
419 
420   /* preliminary checks */
421   PetscCheck(sub_schurs->schur_explicit || !compute_Stilda, PetscObjectComm((PetscObject)sub_schurs->l2gmap), PETSC_ERR_SUP, "Adaptive selection of constraints requires MUMPS and/or MKL_PARDISO");
422 
423   if (benign_trick) sub_schurs->is_posdef = PETSC_FALSE;
424 
425   /* debug (MATLAB) */
426   if (sub_schurs->debug) {
427     PetscMPIInt size, rank;
428     PetscInt    nr, *print_schurs_ranks, print_schurs = PETSC_FALSE;
429 
430     PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sub_schurs->l2gmap), &size));
431     PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sub_schurs->l2gmap), &rank));
432     nr = size;
433     PetscCall(PetscMalloc1(nr, &print_schurs_ranks));
434     PetscOptionsBegin(PetscObjectComm((PetscObject)sub_schurs->l2gmap), sub_schurs->prefix, "BDDC sub_schurs options", "PC");
435     PetscCall(PetscOptionsIntArray("-sub_schurs_debug_ranks", "Ranks to debug (all if the option is not used)", NULL, print_schurs_ranks, &nr, &flg));
436     if (!flg) print_schurs = PETSC_TRUE;
437     else {
438       print_schurs = PETSC_FALSE;
439       for (i = 0; i < nr; i++)
440         if (print_schurs_ranks[i] == (PetscInt)rank) {
441           print_schurs = PETSC_TRUE;
442           break;
443         }
444     }
445     PetscOptionsEnd();
446     PetscCall(PetscFree(print_schurs_ranks));
447     if (print_schurs) {
448       char filename[256];
449 
450       PetscCall(PetscSNPrintf(filename, sizeof(filename), "sub_schurs_Schur_r%d.m", PetscGlobalRank));
451       PetscCall(PetscViewerASCIIOpen(PETSC_COMM_SELF, filename, &matl_dbg_viewer));
452       PetscCall(PetscViewerPushFormat(matl_dbg_viewer, PETSC_VIEWER_ASCII_MATLAB));
453     }
454   }
455 
456   /* restrict work on active processes */
457   if (sub_schurs->restrict_comm) {
458     PetscSubcomm subcomm;
459     PetscMPIInt  color, rank;
460 
461     color = 0;
462     if (!sub_schurs->n_subs) color = 1; /* this can happen if we are in a multilevel case or if the subdomain is disconnected */
463     PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sub_schurs->l2gmap), &rank));
464     PetscCall(PetscSubcommCreate(PetscObjectComm((PetscObject)sub_schurs->l2gmap), &subcomm));
465     PetscCall(PetscSubcommSetNumber(subcomm, 2));
466     PetscCall(PetscSubcommSetTypeGeneral(subcomm, color, rank));
467     PetscCall(PetscCommDuplicate(PetscSubcommChild(subcomm), &comm_n, NULL));
468     PetscCall(PetscSubcommDestroy(&subcomm));
469     if (!sub_schurs->n_subs) {
470       PetscCall(PetscCommDestroy(&comm_n));
471       PetscFunctionReturn(PETSC_SUCCESS);
472     }
473   } else {
474     PetscCall(PetscCommDuplicate(PetscObjectComm((PetscObject)sub_schurs->l2gmap), &comm_n, NULL));
475   }
476 
477   /* get Schur complement matrices */
478   if (!sub_schurs->schur_explicit) {
479     Mat       tA_IB, tA_BI, tA_BB;
480     PetscBool isseqsbaij;
481     PetscCall(MatSchurComplementGetSubMatrices(sub_schurs->S, &A_II, NULL, &tA_IB, &tA_BI, &tA_BB));
482     PetscCall(PetscObjectTypeCompare((PetscObject)tA_BB, MATSEQSBAIJ, &isseqsbaij));
483     if (isseqsbaij) {
484       PetscCall(MatConvert(tA_BB, MATSEQAIJ, MAT_INITIAL_MATRIX, &A_BB));
485       PetscCall(MatConvert(tA_IB, MATSEQAIJ, MAT_INITIAL_MATRIX, &A_IB));
486       PetscCall(MatConvert(tA_BI, MATSEQAIJ, MAT_INITIAL_MATRIX, &A_BI));
487     } else {
488       PetscCall(PetscObjectReference((PetscObject)tA_BB));
489       A_BB = tA_BB;
490       PetscCall(PetscObjectReference((PetscObject)tA_IB));
491       A_IB = tA_IB;
492       PetscCall(PetscObjectReference((PetscObject)tA_BI));
493       A_BI = tA_BI;
494     }
495   } else {
496     A_II = NULL;
497     A_IB = NULL;
498     A_BI = NULL;
499     A_BB = NULL;
500   }
501   S_all = NULL;
502 
503   /* determine interior problems */
504   PetscCall(ISGetLocalSize(sub_schurs->is_I, &i));
505   if (nlayers >= 0 && i) { /* Interior problems can be different from the original one */
506     PetscBT         touched;
507     const PetscInt *idx_B;
508     PetscInt        n_I, n_B, n_local_dofs, n_prev_added, j, layer, *local_numbering;
509 
510     PetscCheck(xadj, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot request layering without adjacency");
511     /* get sizes */
512     PetscCall(ISGetLocalSize(sub_schurs->is_I, &n_I));
513     PetscCall(ISGetLocalSize(sub_schurs->is_B, &n_B));
514 
515     PetscCall(PetscMalloc1(n_I + n_B, &local_numbering));
516     PetscCall(PetscBTCreate(n_I + n_B, &touched));
517     PetscCall(PetscBTMemzero(n_I + n_B, touched));
518 
519     /* all boundary dofs must be skipped when adding layers */
520     PetscCall(ISGetIndices(sub_schurs->is_B, &idx_B));
521     for (j = 0; j < n_B; j++) PetscCall(PetscBTSet(touched, idx_B[j]));
522     PetscCall(PetscArraycpy(local_numbering, idx_B, n_B));
523     PetscCall(ISRestoreIndices(sub_schurs->is_B, &idx_B));
524 
525     /* add prescribed number of layers of dofs */
526     n_local_dofs = n_B;
527     n_prev_added = n_B;
528     for (layer = 0; layer < nlayers; layer++) {
529       PetscInt n_added = 0;
530       if (n_local_dofs == n_I + n_B) break;
531       PetscCheck(n_local_dofs <= n_I + n_B, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error querying layer %" PetscInt_FMT ". Out of bound access (%" PetscInt_FMT " > %" PetscInt_FMT ")", layer, n_local_dofs, n_I + n_B);
532       PetscCall(PCBDDCAdjGetNextLayer_Private(local_numbering + n_local_dofs, n_prev_added, touched, xadj, adjncy, &n_added));
533       n_prev_added = n_added;
534       n_local_dofs += n_added;
535       if (!n_added) break;
536     }
537     PetscCall(PetscBTDestroy(&touched));
538 
539     /* IS for I layer dofs in original numbering */
540     PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)sub_schurs->is_I), n_local_dofs - n_B, local_numbering + n_B, PETSC_COPY_VALUES, &is_I_layer));
541     PetscCall(PetscFree(local_numbering));
542     PetscCall(ISSort(is_I_layer));
543     /* IS for I layer dofs in I numbering */
544     if (!sub_schurs->schur_explicit) {
545       ISLocalToGlobalMapping ItoNmap;
546       PetscCall(ISLocalToGlobalMappingCreateIS(sub_schurs->is_I, &ItoNmap));
547       PetscCall(ISGlobalToLocalMappingApplyIS(ItoNmap, IS_GTOLM_DROP, is_I_layer, &is_I));
548       PetscCall(ISLocalToGlobalMappingDestroy(&ItoNmap));
549 
550       /* II block */
551       PetscCall(MatCreateSubMatrix(A_II, is_I, is_I, MAT_INITIAL_MATRIX, &AE_II));
552     }
553   } else {
554     PetscInt n_I;
555 
556     /* IS for I dofs in original numbering */
557     PetscCall(PetscObjectReference((PetscObject)sub_schurs->is_I));
558     is_I_layer = sub_schurs->is_I;
559 
560     /* IS for I dofs in I numbering (strided 1) */
561     if (!sub_schurs->schur_explicit) {
562       PetscCall(ISGetSize(sub_schurs->is_I, &n_I));
563       PetscCall(ISCreateStride(PetscObjectComm((PetscObject)sub_schurs->is_I), n_I, 0, 1, &is_I));
564 
565       /* II block is the same */
566       PetscCall(PetscObjectReference((PetscObject)A_II));
567       AE_II = A_II;
568     }
569   }
570 
571   /* Get info on subset sizes and sum of all subsets sizes */
572   max_subset_size = 0;
573   local_size      = 0;
574   for (i = 0; i < sub_schurs->n_subs; i++) {
575     PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size));
576     max_subset_size = PetscMax(subset_size, max_subset_size);
577     local_size += subset_size;
578   }
579 
580   /* Work arrays for local indices */
581   extra = 0;
582   PetscCall(ISGetLocalSize(sub_schurs->is_B, &n_B));
583   if (sub_schurs->schur_explicit && is_I_layer) PetscCall(ISGetLocalSize(is_I_layer, &extra));
584   PetscCall(PetscMalloc1(n_B + extra, &all_local_idx_N));
585   if (extra) {
586     const PetscInt *idxs;
587     PetscCall(ISGetIndices(is_I_layer, &idxs));
588     PetscCall(PetscArraycpy(all_local_idx_N, idxs, extra));
589     PetscCall(ISRestoreIndices(is_I_layer, &idxs));
590   }
591   PetscCall(PetscMalloc1(sub_schurs->n_subs, &auxnum1));
592   PetscCall(PetscMalloc1(sub_schurs->n_subs, &auxnum2));
593 
594   /* Get local indices in local numbering */
595   local_size       = 0;
596   local_stash_size = 0;
597   for (i = 0; i < sub_schurs->n_subs; i++) {
598     const PetscInt *idxs;
599 
600     PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size));
601     PetscCall(ISGetIndices(sub_schurs->is_subs[i], &idxs));
602     /* start (smallest in global ordering) and multiplicity */
603     auxnum1[i] = idxs[0];
604     auxnum2[i] = subset_size * subset_size;
605     /* subset indices in local numbering */
606     PetscCall(PetscArraycpy(all_local_idx_N + local_size + extra, idxs, subset_size));
607     PetscCall(ISRestoreIndices(sub_schurs->is_subs[i], &idxs));
608     local_size += subset_size;
609     local_stash_size += subset_size * subset_size;
610   }
611 
612   /* allocate extra workspace needed only for GETRI or SYTRF */
613   use_potr = use_sytr = PETSC_FALSE;
614   if (benign_trick || (sub_schurs->is_hermitian && sub_schurs->is_posdef)) {
615     use_potr = PETSC_TRUE;
616   } else if (sub_schurs->is_symmetric) {
617     use_sytr = PETSC_TRUE;
618   }
619   if (local_size && !use_potr) {
620     PetscScalar  lwork, dummyscalar = 0.;
621     PetscBLASInt dummyint = 0;
622 
623     B_lwork = -1;
624     PetscCall(PetscBLASIntCast(local_size, &B_N));
625     PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
626     if (use_sytr) {
627       PetscCallBLAS("LAPACKsytrf", LAPACKsytrf_("L", &B_N, &dummyscalar, &B_N, &dummyint, &lwork, &B_lwork, &B_ierr));
628       PetscCheck(!B_ierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in query to SYTRF Lapack routine %d", (int)B_ierr);
629     } else {
630       PetscCallBLAS("LAPACKgetri", LAPACKgetri_(&B_N, &dummyscalar, &B_N, &dummyint, &lwork, &B_lwork, &B_ierr));
631       PetscCheck(!B_ierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in query to GETRI Lapack routine %d", (int)B_ierr);
632     }
633     PetscCall(PetscFPTrapPop());
634     PetscCall(PetscBLASIntCast((PetscInt)PetscRealPart(lwork), &B_lwork));
635     PetscCall(PetscMalloc2(B_lwork, &Bwork, B_N, &pivots));
636   } else {
637     Bwork  = NULL;
638     pivots = NULL;
639   }
640 
641   /* prepare data for summing up properly schurs on subsets */
642   PetscCall(ISCreateGeneral(comm_n, sub_schurs->n_subs, auxnum1, PETSC_OWN_POINTER, &all_subsets_n));
643   PetscCall(ISLocalToGlobalMappingApplyIS(sub_schurs->l2gmap, all_subsets_n, &all_subsets));
644   PetscCall(ISDestroy(&all_subsets_n));
645   PetscCall(ISCreateGeneral(comm_n, sub_schurs->n_subs, auxnum2, PETSC_OWN_POINTER, &all_subsets_mult));
646   PetscCall(ISRenumber(all_subsets, all_subsets_mult, &global_size, &all_subsets_n));
647   PetscCall(ISDestroy(&all_subsets));
648   PetscCall(ISDestroy(&all_subsets_mult));
649   PetscCall(ISGetLocalSize(all_subsets_n, &i));
650   PetscCheck(i == local_stash_size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Invalid size of new subset! %" PetscInt_FMT " != %" PetscInt_FMT, i, local_stash_size);
651   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, local_stash_size, NULL, &lstash));
652   PetscCall(VecCreateMPI(comm_n, PETSC_DECIDE, global_size, &gstash));
653   PetscCall(VecScatterCreate(lstash, NULL, gstash, all_subsets_n, &sstash));
654   PetscCall(ISDestroy(&all_subsets_n));
655 
656   /* subset indices in local boundary numbering */
657   if (!sub_schurs->is_Ej_all) {
658     PetscInt *all_local_idx_B;
659 
660     PetscCall(PetscMalloc1(local_size, &all_local_idx_B));
661     PetscCall(ISGlobalToLocalMappingApply(sub_schurs->BtoNmap, IS_GTOLM_DROP, local_size, all_local_idx_N + extra, &subset_size, all_local_idx_B));
662     PetscCheck(subset_size == local_size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error in sub_schurs serial (BtoNmap)! %" PetscInt_FMT " != %" PetscInt_FMT, subset_size, local_size);
663     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, local_size, all_local_idx_B, PETSC_OWN_POINTER, &sub_schurs->is_Ej_all));
664   }
665 
666   if (change) {
667     ISLocalToGlobalMapping BtoS;
668     IS                     change_primal_B;
669     IS                     change_primal_all;
670 
671     PetscCheck(!sub_schurs->change_primal_sub, PETSC_COMM_SELF, PETSC_ERR_PLIB, "This should not happen");
672     PetscCheck(!sub_schurs->change, PETSC_COMM_SELF, PETSC_ERR_PLIB, "This should not happen");
673     PetscCall(PetscMalloc1(sub_schurs->n_subs, &sub_schurs->change_primal_sub));
674     for (i = 0; i < sub_schurs->n_subs; i++) {
675       ISLocalToGlobalMapping NtoS;
676       PetscCall(ISLocalToGlobalMappingCreateIS(sub_schurs->is_subs[i], &NtoS));
677       PetscCall(ISGlobalToLocalMappingApplyIS(NtoS, IS_GTOLM_DROP, change_primal, &sub_schurs->change_primal_sub[i]));
678       PetscCall(ISLocalToGlobalMappingDestroy(&NtoS));
679     }
680     PetscCall(ISGlobalToLocalMappingApplyIS(sub_schurs->BtoNmap, IS_GTOLM_DROP, change_primal, &change_primal_B));
681     PetscCall(ISLocalToGlobalMappingCreateIS(sub_schurs->is_Ej_all, &BtoS));
682     PetscCall(ISGlobalToLocalMappingApplyIS(BtoS, IS_GTOLM_DROP, change_primal_B, &change_primal_all));
683     PetscCall(ISLocalToGlobalMappingDestroy(&BtoS));
684     PetscCall(ISDestroy(&change_primal_B));
685     PetscCall(PetscMalloc1(sub_schurs->n_subs, &sub_schurs->change));
686     for (i = 0; i < sub_schurs->n_subs; i++) {
687       Mat change_sub;
688 
689       PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size));
690       PetscCall(KSPCreate(PETSC_COMM_SELF, &sub_schurs->change[i]));
691       PetscCall(KSPSetType(sub_schurs->change[i], KSPPREONLY));
692       if (!sub_schurs->change_with_qr) {
693         PetscCall(MatCreateSubMatrix(change, sub_schurs->is_subs[i], sub_schurs->is_subs[i], MAT_INITIAL_MATRIX, &change_sub));
694       } else {
695         Mat change_subt;
696         PetscCall(MatCreateSubMatrix(change, sub_schurs->is_subs[i], sub_schurs->is_subs[i], MAT_INITIAL_MATRIX, &change_subt));
697         PetscCall(MatConvert(change_subt, MATSEQDENSE, MAT_INITIAL_MATRIX, &change_sub));
698         PetscCall(MatDestroy(&change_subt));
699       }
700       PetscCall(KSPSetOperators(sub_schurs->change[i], change_sub, change_sub));
701       PetscCall(MatDestroy(&change_sub));
702       PetscCall(KSPSetOptionsPrefix(sub_schurs->change[i], sub_schurs->prefix));
703       PetscCall(KSPAppendOptionsPrefix(sub_schurs->change[i], "sub_schurs_change_"));
704     }
705     PetscCall(ISDestroy(&change_primal_all));
706   }
707 
708   /* Local matrix of all local Schur on subsets (transposed) */
709   if (!sub_schurs->S_Ej_all) {
710     Mat          T;
711     PetscScalar *v;
712     PetscInt    *ii, *jj;
713     PetscInt     cum, i, j, k;
714 
715     /* MatSeqAIJSetPreallocation + MatSetValues is slow for these kind of matrices (may have large blocks)
716        Allocate properly a representative matrix and duplicate */
717     PetscCall(PetscMalloc3(local_size + 1, &ii, local_stash_size, &jj, local_stash_size, &v));
718     ii[0] = 0;
719     cum   = 0;
720     for (i = 0; i < sub_schurs->n_subs; i++) {
721       PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size));
722       for (j = 0; j < subset_size; j++) {
723         const PetscInt row = cum + j;
724         PetscInt       col = cum;
725 
726         ii[row + 1] = ii[row] + subset_size;
727         for (k = ii[row]; k < ii[row + 1]; k++) {
728           jj[k] = col;
729           col++;
730         }
731       }
732       cum += subset_size;
733     }
734     PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, local_size, local_size, ii, jj, v, &T));
735     PetscCall(MatDuplicate(T, MAT_DO_NOT_COPY_VALUES, &sub_schurs->S_Ej_all));
736     PetscCall(MatDestroy(&T));
737     PetscCall(PetscFree3(ii, jj, v));
738   }
739   /* matrices for deluxe scaling and adaptive selection */
740   if (compute_Stilda) {
741     if (!sub_schurs->sum_S_Ej_tilda_all) PetscCall(MatDuplicate(sub_schurs->S_Ej_all, MAT_DO_NOT_COPY_VALUES, &sub_schurs->sum_S_Ej_tilda_all));
742     if (!sub_schurs->sum_S_Ej_inv_all && deluxe) PetscCall(MatDuplicate(sub_schurs->S_Ej_all, MAT_DO_NOT_COPY_VALUES, &sub_schurs->sum_S_Ej_inv_all));
743   }
744 
745   /* Compute Schur complements explicitly */
746   F = NULL;
747   if (!sub_schurs->schur_explicit) {
748     /* this code branch is used when MatFactor with Schur complement support is not present or when explicitly requested;
749        it is not efficient, unless the economic version of the scaling is used */
750     Mat          S_Ej_expl;
751     PetscScalar *work;
752     PetscInt     j, *dummy_idx;
753     PetscBool    Sdense;
754 
755     PetscCall(PetscMalloc2(max_subset_size, &dummy_idx, max_subset_size * max_subset_size, &work));
756     local_size = 0;
757     for (i = 0; i < sub_schurs->n_subs; i++) {
758       IS  is_subset_B;
759       Mat AE_EE, AE_IE, AE_EI, S_Ej;
760 
761       /* subsets in original and boundary numbering */
762       PetscCall(ISGlobalToLocalMappingApplyIS(sub_schurs->BtoNmap, IS_GTOLM_DROP, sub_schurs->is_subs[i], &is_subset_B));
763       /* EE block */
764       PetscCall(MatCreateSubMatrix(A_BB, is_subset_B, is_subset_B, MAT_INITIAL_MATRIX, &AE_EE));
765       /* IE block */
766       PetscCall(MatCreateSubMatrix(A_IB, is_I, is_subset_B, MAT_INITIAL_MATRIX, &AE_IE));
767       /* EI block */
768       if (sub_schurs->is_symmetric) {
769         PetscCall(MatCreateTranspose(AE_IE, &AE_EI));
770       } else if (sub_schurs->is_hermitian) {
771         PetscCall(MatCreateHermitianTranspose(AE_IE, &AE_EI));
772       } else {
773         PetscCall(MatCreateSubMatrix(A_BI, is_subset_B, is_I, MAT_INITIAL_MATRIX, &AE_EI));
774       }
775       PetscCall(ISDestroy(&is_subset_B));
776       PetscCall(MatCreateSchurComplement(AE_II, AE_II, AE_IE, AE_EI, AE_EE, &S_Ej));
777       PetscCall(MatDestroy(&AE_EE));
778       PetscCall(MatDestroy(&AE_IE));
779       PetscCall(MatDestroy(&AE_EI));
780       if (AE_II == A_II) { /* we can reuse the same ksp */
781         KSP ksp;
782         PetscCall(MatSchurComplementGetKSP(sub_schurs->S, &ksp));
783         PetscCall(MatSchurComplementSetKSP(S_Ej, ksp));
784       } else { /* build new ksp object which inherits ksp and pc types from the original one */
785         KSP       origksp, schurksp;
786         PC        origpc, schurpc;
787         KSPType   ksp_type;
788         PetscInt  n_internal;
789         PetscBool ispcnone;
790 
791         PetscCall(MatSchurComplementGetKSP(sub_schurs->S, &origksp));
792         PetscCall(MatSchurComplementGetKSP(S_Ej, &schurksp));
793         PetscCall(KSPGetType(origksp, &ksp_type));
794         PetscCall(KSPSetType(schurksp, ksp_type));
795         PetscCall(KSPGetPC(schurksp, &schurpc));
796         PetscCall(KSPGetPC(origksp, &origpc));
797         PetscCall(PetscObjectTypeCompare((PetscObject)origpc, PCNONE, &ispcnone));
798         if (!ispcnone) {
799           PCType pc_type;
800           PetscCall(PCGetType(origpc, &pc_type));
801           PetscCall(PCSetType(schurpc, pc_type));
802         } else {
803           PetscCall(PCSetType(schurpc, PCLU));
804         }
805         PetscCall(ISGetSize(is_I, &n_internal));
806         if (!n_internal) { /* UMFPACK gives error with 0 sized problems */
807           MatSolverType solver = NULL;
808           PetscCall(PCFactorGetMatSolverType(origpc, (MatSolverType *)&solver));
809           if (solver) PetscCall(PCFactorSetMatSolverType(schurpc, solver));
810         }
811         PetscCall(KSPSetUp(schurksp));
812       }
813       PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size));
814       PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, subset_size, work, &S_Ej_expl));
815       PetscCall(PCBDDCComputeExplicitSchur(S_Ej, sub_schurs->is_symmetric, MAT_REUSE_MATRIX, &S_Ej_expl));
816       PetscCall(PetscObjectTypeCompare((PetscObject)S_Ej_expl, MATSEQDENSE, &Sdense));
817       PetscCheck(Sdense, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not yet implemented for sparse matrices");
818       for (j = 0; j < subset_size; j++) dummy_idx[j] = local_size + j;
819       PetscCall(MatSetValues(sub_schurs->S_Ej_all, subset_size, dummy_idx, subset_size, dummy_idx, work, INSERT_VALUES));
820       PetscCall(MatDestroy(&S_Ej));
821       PetscCall(MatDestroy(&S_Ej_expl));
822       local_size += subset_size;
823     }
824     PetscCall(PetscFree2(dummy_idx, work));
825     /* free */
826     PetscCall(ISDestroy(&is_I));
827     PetscCall(MatDestroy(&AE_II));
828     PetscCall(PetscFree(all_local_idx_N));
829   } else {
830     Mat                A, cs_AIB_mat = NULL, benign_AIIm1_ones_mat = NULL;
831     Mat               *gdswA;
832     Vec                Dall = NULL;
833     IS                 is_A_all, *is_p_r = NULL;
834     MatType            Stype;
835     PetscScalar       *work, *S_data, *schur_factor, infty = PETSC_MAX_REAL;
836     PetscScalar       *SEj_arr = NULL, *SEjinv_arr = NULL;
837     const PetscScalar *rS_data;
838     PetscInt           n, n_I, size_schur, size_active_schur, cum, cum2;
839     PetscBool          economic, solver_S, S_lower_triangular = PETSC_FALSE;
840     PetscBool          schur_has_vertices, factor_workaround;
841     PetscBool          use_cholesky;
842 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
843     PetscBool oldpin;
844 #endif
845 
846     /* get sizes */
847     n_I = 0;
848     if (is_I_layer) PetscCall(ISGetLocalSize(is_I_layer, &n_I));
849     economic = PETSC_FALSE;
850     PetscCall(ISGetLocalSize(sub_schurs->is_I, &cum));
851     if (cum != n_I) economic = PETSC_TRUE;
852     PetscCall(MatGetLocalSize(sub_schurs->A, &n, NULL));
853     size_active_schur = local_size;
854 
855     /* import scaling vector (wrong formulation if we have 3D edges) */
856     if (scaling && compute_Stilda) {
857       const PetscScalar *array;
858       PetscScalar       *array2;
859       const PetscInt    *idxs;
860       PetscInt           i;
861 
862       PetscCall(ISGetIndices(sub_schurs->is_Ej_all, &idxs));
863       PetscCall(VecCreateSeq(PETSC_COMM_SELF, size_active_schur, &Dall));
864       PetscCall(VecGetArrayRead(scaling, &array));
865       PetscCall(VecGetArray(Dall, &array2));
866       for (i = 0; i < size_active_schur; i++) array2[i] = array[idxs[i]];
867       PetscCall(VecRestoreArray(Dall, &array2));
868       PetscCall(VecRestoreArrayRead(scaling, &array));
869       PetscCall(ISRestoreIndices(sub_schurs->is_Ej_all, &idxs));
870       deluxe = PETSC_FALSE;
871     }
872 
873     /* size active schurs does not count any dirichlet or vertex dof on the interface */
874     factor_workaround  = PETSC_FALSE;
875     schur_has_vertices = PETSC_FALSE;
876     cum                = n_I + size_active_schur;
877     if (sub_schurs->is_dir) {
878       const PetscInt *idxs;
879       PetscInt        n_dir;
880 
881       PetscCall(ISGetLocalSize(sub_schurs->is_dir, &n_dir));
882       PetscCall(ISGetIndices(sub_schurs->is_dir, &idxs));
883       PetscCall(PetscArraycpy(all_local_idx_N + cum, idxs, n_dir));
884       PetscCall(ISRestoreIndices(sub_schurs->is_dir, &idxs));
885       cum += n_dir;
886       if (!sub_schurs->gdsw) factor_workaround = PETSC_TRUE;
887     }
888     /* include the primal vertices in the Schur complement */
889     if (exact_schur && sub_schurs->is_vertices && (compute_Stilda || benign_n)) {
890       PetscInt n_v;
891 
892       PetscCall(ISGetLocalSize(sub_schurs->is_vertices, &n_v));
893       if (n_v) {
894         const PetscInt *idxs;
895 
896         PetscCall(ISGetIndices(sub_schurs->is_vertices, &idxs));
897         PetscCall(PetscArraycpy(all_local_idx_N + cum, idxs, n_v));
898         PetscCall(ISRestoreIndices(sub_schurs->is_vertices, &idxs));
899         cum += n_v;
900         if (!sub_schurs->gdsw) factor_workaround = PETSC_TRUE;
901         schur_has_vertices = PETSC_TRUE;
902       }
903     }
904     size_schur = cum - n_I;
905     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, cum, all_local_idx_N, PETSC_OWN_POINTER, &is_A_all));
906 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
907     oldpin = sub_schurs->A->boundtocpu;
908     PetscCall(MatBindToCPU(sub_schurs->A, PETSC_TRUE));
909 #endif
910     if (cum == n) {
911       PetscCall(ISSetPermutation(is_A_all));
912       PetscCall(MatPermute(sub_schurs->A, is_A_all, is_A_all, &A));
913     } else {
914       PetscCall(MatCreateSubMatrix(sub_schurs->A, is_A_all, is_A_all, MAT_INITIAL_MATRIX, &A));
915     }
916 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
917     PetscCall(MatBindToCPU(sub_schurs->A, oldpin));
918 #endif
919     PetscCall(MatSetOptionsPrefixFactor(A, sub_schurs->prefix));
920     PetscCall(MatAppendOptionsPrefixFactor(A, "sub_schurs_"));
921 
922     /* if we actually change the basis for the pressures, LDL^T factors will use a lot of memory
923        this is a workaround */
924     if (benign_n) {
925       Vec                    v, benign_AIIm1_ones;
926       ISLocalToGlobalMapping N_to_reor;
927       IS                     is_p0, is_p0_p;
928       PetscScalar           *cs_AIB, *AIIm1_data;
929       PetscInt               sizeA;
930 
931       PetscCall(ISLocalToGlobalMappingCreateIS(is_A_all, &N_to_reor));
932       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, benign_n, benign_p0_lidx, PETSC_COPY_VALUES, &is_p0));
933       PetscCall(ISGlobalToLocalMappingApplyIS(N_to_reor, IS_GTOLM_DROP, is_p0, &is_p0_p));
934       PetscCall(ISDestroy(&is_p0));
935       PetscCall(MatCreateVecs(A, &v, &benign_AIIm1_ones));
936       PetscCall(VecGetSize(v, &sizeA));
937       PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, sizeA, benign_n, NULL, &benign_AIIm1_ones_mat));
938       PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, size_schur, benign_n, NULL, &cs_AIB_mat));
939       PetscCall(MatDenseGetArray(cs_AIB_mat, &cs_AIB));
940       PetscCall(MatDenseGetArray(benign_AIIm1_ones_mat, &AIIm1_data));
941       PetscCall(PetscMalloc1(benign_n, &is_p_r));
942       /* compute colsum of A_IB restricted to pressures */
943       for (i = 0; i < benign_n; i++) {
944         const PetscScalar *array;
945         const PetscInt    *idxs;
946         PetscInt           j, nz;
947 
948         PetscCall(ISGlobalToLocalMappingApplyIS(N_to_reor, IS_GTOLM_DROP, benign_zerodiag_subs[i], &is_p_r[i]));
949         PetscCall(ISGetLocalSize(is_p_r[i], &nz));
950         PetscCall(ISGetIndices(is_p_r[i], &idxs));
951         for (j = 0; j < nz; j++) AIIm1_data[idxs[j] + sizeA * i] = 1.;
952         PetscCall(ISRestoreIndices(is_p_r[i], &idxs));
953         PetscCall(VecPlaceArray(benign_AIIm1_ones, AIIm1_data + sizeA * i));
954         PetscCall(MatMult(A, benign_AIIm1_ones, v));
955         PetscCall(VecResetArray(benign_AIIm1_ones));
956         PetscCall(VecGetArrayRead(v, &array));
957         for (j = 0; j < size_schur; j++) {
958 #if defined(PETSC_USE_COMPLEX)
959           cs_AIB[i * size_schur + j] = (PetscRealPart(array[j + n_I]) / nz + PETSC_i * (PetscImaginaryPart(array[j + n_I]) / nz));
960 #else
961           cs_AIB[i * size_schur + j] = array[j + n_I] / nz;
962 #endif
963         }
964         PetscCall(VecRestoreArrayRead(v, &array));
965       }
966       PetscCall(MatDenseRestoreArray(cs_AIB_mat, &cs_AIB));
967       PetscCall(MatDenseRestoreArray(benign_AIIm1_ones_mat, &AIIm1_data));
968       PetscCall(VecDestroy(&v));
969       PetscCall(VecDestroy(&benign_AIIm1_ones));
970       PetscCall(MatSetOption(A, MAT_KEEP_NONZERO_PATTERN, PETSC_FALSE));
971       PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE));
972       PetscCall(MatSetOption(A, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE));
973       PetscCall(MatZeroRowsColumnsIS(A, is_p0_p, 1.0, NULL, NULL));
974       PetscCall(ISDestroy(&is_p0_p));
975       PetscCall(ISLocalToGlobalMappingDestroy(&N_to_reor));
976     }
977     PetscCall(MatSetOption(A, MAT_SYMMETRIC, sub_schurs->is_symmetric));
978     PetscCall(MatSetOption(A, MAT_HERMITIAN, sub_schurs->is_hermitian));
979     PetscCall(MatSetOption(A, MAT_SPD, sub_schurs->is_posdef));
980 
981     /* for complexes, symmetric and hermitian at the same time implies null imaginary part */
982     use_cholesky = (PetscBool)((use_potr || use_sytr) && sub_schurs->is_hermitian && sub_schurs->is_symmetric);
983 
984     /* when using the benign subspace trick, the local Schur complements are SPD */
985     /* MKL_PARDISO does not handle well the computation of a Schur complement from a symmetric indefinite factorization
986        Use LU and adapt pivoting perturbation (still, solution is not as accurate as with using MUMPS) */
987     if (benign_trick) {
988       sub_schurs->is_posdef = PETSC_TRUE;
989       PetscCall(PetscStrcmp(sub_schurs->mat_solver_type, MATSOLVERMKL_PARDISO, &flg));
990       if (flg) use_cholesky = PETSC_FALSE;
991     }
992 
993     if (n_I) {
994       IS        is_schur;
995       char      stype[64];
996       PetscBool gpu = PETSC_FALSE;
997 
998       if (use_cholesky) {
999         PetscCall(MatGetFactor(A, sub_schurs->mat_solver_type, MAT_FACTOR_CHOLESKY, &F));
1000       } else {
1001         PetscCall(MatGetFactor(A, sub_schurs->mat_solver_type, MAT_FACTOR_LU, &F));
1002       }
1003       PetscCall(MatSetErrorIfFailure(A, PETSC_TRUE));
1004 #if defined(PETSC_HAVE_MKL_PARDISO)
1005       if (benign_trick) PetscCall(MatMkl_PardisoSetCntl(F, 10, 10));
1006 #endif
1007       /* subsets ordered last */
1008       PetscCall(ISCreateStride(PETSC_COMM_SELF, size_schur, n_I, 1, &is_schur));
1009       PetscCall(MatFactorSetSchurIS(F, is_schur));
1010       PetscCall(ISDestroy(&is_schur));
1011 
1012       /* factorization step */
1013       if (use_cholesky) {
1014         PetscCall(MatCholeskyFactorSymbolic(F, A, NULL, NULL));
1015 #if defined(PETSC_HAVE_MUMPS) /* be sure that icntl 19 is not set by command line */
1016         PetscCall(MatMumpsSetIcntl(F, 19, 2));
1017 #endif
1018         PetscCall(MatCholeskyFactorNumeric(F, A, NULL));
1019         S_lower_triangular = PETSC_TRUE;
1020       } else {
1021         PetscCall(MatLUFactorSymbolic(F, A, NULL, NULL, NULL));
1022 #if defined(PETSC_HAVE_MUMPS) /* be sure that icntl 19 is not set by command line */
1023         PetscCall(MatMumpsSetIcntl(F, 19, 3));
1024 #endif
1025         PetscCall(MatLUFactorNumeric(F, A, NULL));
1026       }
1027       PetscCall(MatViewFromOptions(F, (PetscObject)A, "-mat_factor_view"));
1028 
1029       if (matl_dbg_viewer) {
1030         Mat S;
1031         IS  is;
1032 
1033         PetscCall(PetscObjectSetName((PetscObject)A, "A"));
1034         PetscCall(MatView(A, matl_dbg_viewer));
1035         PetscCall(MatFactorCreateSchurComplement(F, &S, NULL));
1036         PetscCall(PetscObjectSetName((PetscObject)S, "S"));
1037         PetscCall(MatView(S, matl_dbg_viewer));
1038         PetscCall(MatDestroy(&S));
1039         PetscCall(ISCreateStride(PETSC_COMM_SELF, n_I, 0, 1, &is));
1040         PetscCall(PetscObjectSetName((PetscObject)is, "I"));
1041         PetscCall(ISView(is, matl_dbg_viewer));
1042         PetscCall(ISDestroy(&is));
1043         PetscCall(ISCreateStride(PETSC_COMM_SELF, size_schur, n_I, 1, &is));
1044         PetscCall(PetscObjectSetName((PetscObject)is, "B"));
1045         PetscCall(ISView(is, matl_dbg_viewer));
1046         PetscCall(ISDestroy(&is));
1047         PetscCall(PetscObjectSetName((PetscObject)is_A_all, "IA"));
1048         PetscCall(ISView(is_A_all, matl_dbg_viewer));
1049         for (i = 0, cum = 0; i < sub_schurs->n_subs; i++) {
1050           IS   is;
1051           char name[16];
1052 
1053           PetscCall(PetscSNPrintf(name, sizeof(name), "IE%" PetscInt_FMT, i));
1054           PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size));
1055           PetscCall(ISCreateStride(PETSC_COMM_SELF, subset_size, cum, 1, &is));
1056           PetscCall(PetscObjectSetName((PetscObject)is, name));
1057           PetscCall(ISView(is, matl_dbg_viewer));
1058           PetscCall(ISDestroy(&is));
1059           if (sub_schurs->change) {
1060             Mat T;
1061 
1062             PetscCall(PetscSNPrintf(name, sizeof(name), "TE%" PetscInt_FMT, i));
1063             PetscCall(KSPGetOperators(sub_schurs->change[i], &T, NULL));
1064             PetscCall(PetscObjectSetName((PetscObject)T, name));
1065             PetscCall(MatView(T, matl_dbg_viewer));
1066             PetscCall(PetscSNPrintf(name, sizeof(name), "ITE%" PetscInt_FMT, i));
1067             PetscCall(PetscObjectSetName((PetscObject)sub_schurs->change_primal_sub[i], name));
1068             PetscCall(ISView(sub_schurs->change_primal_sub[i], matl_dbg_viewer));
1069           }
1070           cum += subset_size;
1071         }
1072         PetscCall(PetscViewerFlush(matl_dbg_viewer));
1073       }
1074 
1075       /* get explicit Schur Complement computed during numeric factorization */
1076       PetscCall(MatFactorGetSchurComplement(F, &S_all, NULL));
1077       PetscCall(PetscStrncpy(stype, MATSEQDENSE, sizeof(stype)));
1078 #if defined(PETSC_HAVE_CUDA)
1079       PetscCall(PetscObjectTypeCompareAny((PetscObject)A, &gpu, MATSEQAIJVIENNACL, MATSEQAIJCUSPARSE, ""));
1080 #endif
1081       if (gpu) PetscCall(PetscStrncpy(stype, MATSEQDENSECUDA, sizeof(stype)));
1082       PetscCall(PetscOptionsGetString(NULL, sub_schurs->prefix, "-sub_schurs_schur_mat_type", stype, sizeof(stype), NULL));
1083       PetscCall(MatConvert(S_all, stype, MAT_INPLACE_MATRIX, &S_all));
1084       PetscCall(MatSetOption(S_all, MAT_SPD, sub_schurs->is_posdef));
1085       PetscCall(MatSetOption(S_all, MAT_HERMITIAN, sub_schurs->is_hermitian));
1086       PetscCall(MatGetType(S_all, &Stype));
1087 
1088       /* we can reuse the solvers if we are not using the economic version */
1089       reuse_solvers = (PetscBool)(reuse_solvers && !economic);
1090       if (!sub_schurs->gdsw) {
1091         factor_workaround = (PetscBool)(reuse_solvers && factor_workaround);
1092         if (!sub_schurs->is_posdef && factor_workaround && compute_Stilda && size_active_schur) reuse_solvers = factor_workaround = PETSC_FALSE;
1093       }
1094       solver_S = PETSC_TRUE;
1095 
1096       /* update the Schur complement with the change of basis on the pressures */
1097       if (benign_n) {
1098         const PetscScalar *cs_AIB;
1099         PetscScalar       *S_data, *AIIm1_data;
1100         Mat                S2 = NULL, S3 = NULL; /* dbg */
1101         PetscScalar       *S2_data, *S3_data;    /* dbg */
1102         Vec                v, benign_AIIm1_ones;
1103         PetscInt           sizeA;
1104 
1105         PetscCall(MatDenseGetArray(S_all, &S_data));
1106         PetscCall(MatCreateVecs(A, &v, &benign_AIIm1_ones));
1107         PetscCall(VecGetSize(v, &sizeA));
1108 #if defined(PETSC_HAVE_MUMPS)
1109         PetscCall(MatMumpsSetIcntl(F, 26, 0));
1110 #endif
1111 #if defined(PETSC_HAVE_MKL_PARDISO)
1112         PetscCall(MatMkl_PardisoSetCntl(F, 70, 1));
1113 #endif
1114         PetscCall(MatDenseGetArrayRead(cs_AIB_mat, &cs_AIB));
1115         PetscCall(MatDenseGetArray(benign_AIIm1_ones_mat, &AIIm1_data));
1116         if (matl_dbg_viewer) {
1117           PetscCall(MatDuplicate(S_all, MAT_DO_NOT_COPY_VALUES, &S2));
1118           PetscCall(MatDuplicate(S_all, MAT_DO_NOT_COPY_VALUES, &S3));
1119           PetscCall(MatDenseGetArray(S2, &S2_data));
1120           PetscCall(MatDenseGetArray(S3, &S3_data));
1121         }
1122         for (i = 0; i < benign_n; i++) {
1123           PetscScalar    *array, sum = 0., one = 1., *sums;
1124           const PetscInt *idxs;
1125           PetscInt        k, j, nz;
1126           PetscBLASInt    B_k, B_n;
1127 
1128           PetscCall(PetscCalloc1(benign_n, &sums));
1129           PetscCall(VecPlaceArray(benign_AIIm1_ones, AIIm1_data + sizeA * i));
1130           PetscCall(VecCopy(benign_AIIm1_ones, v));
1131           PetscCall(MatSolve(F, v, benign_AIIm1_ones));
1132           PetscCall(MatMult(A, benign_AIIm1_ones, v));
1133           PetscCall(VecResetArray(benign_AIIm1_ones));
1134           /* p0 dofs (eliminated) are excluded from the sums */
1135           for (k = 0; k < benign_n; k++) {
1136             PetscCall(ISGetLocalSize(is_p_r[k], &nz));
1137             PetscCall(ISGetIndices(is_p_r[k], &idxs));
1138             for (j = 0; j < nz - 1; j++) sums[k] -= AIIm1_data[idxs[j] + sizeA * i];
1139             PetscCall(ISRestoreIndices(is_p_r[k], &idxs));
1140           }
1141           PetscCall(VecGetArrayRead(v, (const PetscScalar **)&array));
1142           if (matl_dbg_viewer) {
1143             Vec  vv;
1144             char name[16];
1145 
1146             PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, size_schur, array + n_I, &vv));
1147             PetscCall(PetscSNPrintf(name, sizeof(name), "Pvs%" PetscInt_FMT, i));
1148             PetscCall(PetscObjectSetName((PetscObject)vv, name));
1149             PetscCall(VecView(vv, matl_dbg_viewer));
1150           }
1151           /* perform sparse rank updates on symmetric Schur (TODO: move outside of the loop?) */
1152           /* cs_AIB already scaled by 1./nz */
1153           B_k = 1;
1154           for (k = 0; k < benign_n; k++) {
1155             sum = sums[k];
1156             PetscCall(PetscBLASIntCast(size_schur, &B_n));
1157 
1158             if (PetscAbsScalar(sum) == 0.0) continue;
1159             if (k == i) {
1160               PetscCallBLAS("BLASsyrk", BLASsyrk_("L", "N", &B_n, &B_k, &sum, cs_AIB + i * size_schur, &B_n, &one, S_data, &B_n));
1161               if (matl_dbg_viewer) PetscCallBLAS("BLASsyrk", BLASsyrk_("L", "N", &B_n, &B_k, &sum, cs_AIB + i * size_schur, &B_n, &one, S3_data, &B_n));
1162             } else { /* XXX Is it correct to use symmetric rank-2 update with half of the sum? */
1163               sum /= 2.0;
1164               PetscCallBLAS("BLASsyr2k", BLASsyr2k_("L", "N", &B_n, &B_k, &sum, cs_AIB + k * size_schur, &B_n, cs_AIB + i * size_schur, &B_n, &one, S_data, &B_n));
1165               if (matl_dbg_viewer) PetscCallBLAS("BLASsyr2k", BLASsyr2k_("L", "N", &B_n, &B_k, &sum, cs_AIB + k * size_schur, &B_n, cs_AIB + i * size_schur, &B_n, &one, S3_data, &B_n));
1166             }
1167           }
1168           sum = 1.;
1169           PetscCallBLAS("BLASsyr2k", BLASsyr2k_("L", "N", &B_n, &B_k, &sum, array + n_I, &B_n, cs_AIB + i * size_schur, &B_n, &one, S_data, &B_n));
1170           if (matl_dbg_viewer) PetscCallBLAS("BLASsyr2k", BLASsyr2k_("L", "N", &B_n, &B_k, &sum, array + n_I, &B_n, cs_AIB + i * size_schur, &B_n, &one, S2_data, &B_n));
1171           PetscCall(VecRestoreArrayRead(v, (const PetscScalar **)&array));
1172           /* set p0 entry of AIIm1_ones to zero */
1173           PetscCall(ISGetLocalSize(is_p_r[i], &nz));
1174           PetscCall(ISGetIndices(is_p_r[i], &idxs));
1175           for (j = 0; j < benign_n; j++) AIIm1_data[idxs[nz - 1] + sizeA * j] = 0.;
1176           PetscCall(ISRestoreIndices(is_p_r[i], &idxs));
1177           PetscCall(PetscFree(sums));
1178         }
1179         PetscCall(VecDestroy(&benign_AIIm1_ones));
1180         if (matl_dbg_viewer) {
1181           PetscCall(MatDenseRestoreArray(S2, &S2_data));
1182           PetscCall(MatDenseRestoreArray(S3, &S3_data));
1183         }
1184         if (!S_lower_triangular) { /* I need to expand the upper triangular data (column oriented) */
1185           PetscInt k, j;
1186           for (k = 0; k < size_schur; k++) {
1187             for (j = k; j < size_schur; j++) S_data[j * size_schur + k] = PetscConj(S_data[k * size_schur + j]);
1188           }
1189         }
1190 
1191         /* restore defaults */
1192 #if defined(PETSC_HAVE_MUMPS)
1193         PetscCall(MatMumpsSetIcntl(F, 26, -1));
1194 #endif
1195 #if defined(PETSC_HAVE_MKL_PARDISO)
1196         PetscCall(MatMkl_PardisoSetCntl(F, 70, 0));
1197 #endif
1198         PetscCall(MatDenseRestoreArrayRead(cs_AIB_mat, &cs_AIB));
1199         PetscCall(MatDenseRestoreArray(benign_AIIm1_ones_mat, &AIIm1_data));
1200         PetscCall(VecDestroy(&v));
1201         PetscCall(MatDenseRestoreArray(S_all, &S_data));
1202         if (matl_dbg_viewer) {
1203           Mat S;
1204 
1205           PetscCall(MatFactorRestoreSchurComplement(F, &S_all, MAT_FACTOR_SCHUR_UNFACTORED));
1206           PetscCall(MatFactorCreateSchurComplement(F, &S, NULL));
1207           PetscCall(PetscObjectSetName((PetscObject)S, "Sb"));
1208           PetscCall(MatView(S, matl_dbg_viewer));
1209           PetscCall(MatDestroy(&S));
1210           PetscCall(PetscObjectSetName((PetscObject)S2, "S2P"));
1211           PetscCall(MatView(S2, matl_dbg_viewer));
1212           PetscCall(PetscObjectSetName((PetscObject)S3, "S3P"));
1213           PetscCall(MatView(S3, matl_dbg_viewer));
1214           PetscCall(PetscObjectSetName((PetscObject)cs_AIB_mat, "cs"));
1215           PetscCall(MatView(cs_AIB_mat, matl_dbg_viewer));
1216           PetscCall(MatFactorGetSchurComplement(F, &S_all, NULL));
1217         }
1218         PetscCall(MatDestroy(&S2));
1219         PetscCall(MatDestroy(&S3));
1220       }
1221       if (!reuse_solvers) {
1222         for (i = 0; i < benign_n; i++) PetscCall(ISDestroy(&is_p_r[i]));
1223         PetscCall(PetscFree(is_p_r));
1224         PetscCall(MatDestroy(&cs_AIB_mat));
1225         PetscCall(MatDestroy(&benign_AIIm1_ones_mat));
1226       }
1227     } else { /* we can't use MatFactor when size_schur == size_of_the_problem */
1228       PetscCall(MatConvert(A, MATSEQDENSE, MAT_INITIAL_MATRIX, &S_all));
1229       PetscCall(MatGetType(S_all, &Stype));
1230       reuse_solvers     = PETSC_FALSE; /* TODO: why we can't reuse the solvers here? */
1231       factor_workaround = PETSC_FALSE;
1232       solver_S          = PETSC_FALSE;
1233     }
1234 
1235     if (reuse_solvers) {
1236       Mat                A_II, pA_II, Afake;
1237       Vec                vec1_B;
1238       PCBDDCReuseSolvers msolv_ctx;
1239       PetscInt           n_R;
1240 
1241       if (sub_schurs->reuse_solver) {
1242         PetscCall(PCBDDCReuseSolversReset(sub_schurs->reuse_solver));
1243       } else {
1244         PetscCall(PetscNew(&sub_schurs->reuse_solver));
1245       }
1246       msolv_ctx = sub_schurs->reuse_solver;
1247       PetscCall(MatSchurComplementGetSubMatrices(sub_schurs->S, &A_II, &pA_II, NULL, NULL, NULL));
1248       PetscCall(PetscObjectReference((PetscObject)F));
1249       msolv_ctx->F = F;
1250       PetscCall(MatCreateVecs(F, &msolv_ctx->sol, NULL));
1251       /* currently PETSc has no support for MatSolve(F,x,x), so cheat and let rhs and sol share the same memory */
1252       {
1253         PetscScalar *array;
1254         PetscInt     n;
1255 
1256         PetscCall(VecGetLocalSize(msolv_ctx->sol, &n));
1257         PetscCall(VecGetArray(msolv_ctx->sol, &array));
1258         PetscCall(VecCreateSeqWithArray(PetscObjectComm((PetscObject)msolv_ctx->sol), 1, n, array, &msolv_ctx->rhs));
1259         PetscCall(VecRestoreArray(msolv_ctx->sol, &array));
1260       }
1261       msolv_ctx->has_vertices = schur_has_vertices;
1262 
1263       /* interior solver */
1264       PetscCall(PCCreate(PetscObjectComm((PetscObject)A_II), &msolv_ctx->interior_solver));
1265       PetscCall(PCSetOperators(msolv_ctx->interior_solver, A_II, pA_II));
1266       PetscCall(PCSetType(msolv_ctx->interior_solver, PCSHELL));
1267       PetscCall(PCShellSetName(msolv_ctx->interior_solver, "Interior solver (w/o Schur factorization)"));
1268       PetscCall(PCShellSetContext(msolv_ctx->interior_solver, msolv_ctx));
1269       PetscCall(PCShellSetView(msolv_ctx->interior_solver, PCBDDCReuseSolvers_View));
1270       PetscCall(PCShellSetApply(msolv_ctx->interior_solver, PCBDDCReuseSolvers_Interior));
1271       PetscCall(PCShellSetApplyTranspose(msolv_ctx->interior_solver, PCBDDCReuseSolvers_InteriorTranspose));
1272       if (sub_schurs->gdsw) PetscCall(PCShellSetDestroy(msolv_ctx->interior_solver, PCBDDCReuseSolvers_Destroy));
1273 
1274       /* correction solver */
1275       if (!sub_schurs->gdsw) {
1276         PetscCall(PCCreate(PetscObjectComm((PetscObject)A_II), &msolv_ctx->correction_solver));
1277         PetscCall(PCSetType(msolv_ctx->correction_solver, PCSHELL));
1278         PetscCall(PCShellSetName(msolv_ctx->correction_solver, "Correction solver (with Schur factorization)"));
1279         PetscCall(PCShellSetContext(msolv_ctx->correction_solver, msolv_ctx));
1280         PetscCall(PCShellSetView(msolv_ctx->interior_solver, PCBDDCReuseSolvers_View));
1281         PetscCall(PCShellSetApply(msolv_ctx->correction_solver, PCBDDCReuseSolvers_Correction));
1282         PetscCall(PCShellSetApplyTranspose(msolv_ctx->correction_solver, PCBDDCReuseSolvers_CorrectionTranspose));
1283 
1284         /* scatter and vecs for Schur complement solver */
1285         PetscCall(MatCreateVecs(S_all, &msolv_ctx->sol_B, &msolv_ctx->rhs_B));
1286         PetscCall(MatCreateVecs(sub_schurs->S, &vec1_B, NULL));
1287         if (!schur_has_vertices) {
1288           PetscCall(ISGlobalToLocalMappingApplyIS(sub_schurs->BtoNmap, IS_GTOLM_DROP, is_A_all, &msolv_ctx->is_B));
1289           PetscCall(VecScatterCreate(vec1_B, msolv_ctx->is_B, msolv_ctx->sol_B, NULL, &msolv_ctx->correction_scatter_B));
1290           PetscCall(PetscObjectReference((PetscObject)is_A_all));
1291           msolv_ctx->is_R = is_A_all;
1292         } else {
1293           IS              is_B_all;
1294           const PetscInt *idxs;
1295           PetscInt        dual, n_v, n;
1296 
1297           PetscCall(ISGetLocalSize(sub_schurs->is_vertices, &n_v));
1298           dual = size_schur - n_v;
1299           PetscCall(ISGetLocalSize(is_A_all, &n));
1300           PetscCall(ISGetIndices(is_A_all, &idxs));
1301           PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)is_A_all), dual, idxs + n_I, PETSC_COPY_VALUES, &is_B_all));
1302           PetscCall(ISGlobalToLocalMappingApplyIS(sub_schurs->BtoNmap, IS_GTOLM_DROP, is_B_all, &msolv_ctx->is_B));
1303           PetscCall(ISDestroy(&is_B_all));
1304           PetscCall(ISCreateStride(PetscObjectComm((PetscObject)is_A_all), dual, 0, 1, &is_B_all));
1305           PetscCall(VecScatterCreate(vec1_B, msolv_ctx->is_B, msolv_ctx->sol_B, is_B_all, &msolv_ctx->correction_scatter_B));
1306           PetscCall(ISDestroy(&is_B_all));
1307           PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)is_A_all), n - n_v, idxs, PETSC_COPY_VALUES, &msolv_ctx->is_R));
1308           PetscCall(ISRestoreIndices(is_A_all, &idxs));
1309         }
1310         PetscCall(ISGetLocalSize(msolv_ctx->is_R, &n_R));
1311         PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, n_R, n_R, 0, NULL, &Afake));
1312         PetscCall(MatAssemblyBegin(Afake, MAT_FINAL_ASSEMBLY));
1313         PetscCall(MatAssemblyEnd(Afake, MAT_FINAL_ASSEMBLY));
1314         PetscCall(PCSetOperators(msolv_ctx->correction_solver, Afake, Afake));
1315         PetscCall(MatDestroy(&Afake));
1316         PetscCall(VecDestroy(&vec1_B));
1317       }
1318       /* communicate benign info to solver context */
1319       if (benign_n) {
1320         PetscScalar *array;
1321 
1322         msolv_ctx->benign_n             = benign_n;
1323         msolv_ctx->benign_zerodiag_subs = is_p_r;
1324         PetscCall(PetscMalloc1(benign_n, &msolv_ctx->benign_save_vals));
1325         msolv_ctx->benign_csAIB = cs_AIB_mat;
1326         PetscCall(MatCreateVecs(cs_AIB_mat, &msolv_ctx->benign_corr_work, NULL));
1327         PetscCall(VecGetArray(msolv_ctx->benign_corr_work, &array));
1328         PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, size_schur, array, &msolv_ctx->benign_dummy_schur_vec));
1329         PetscCall(VecRestoreArray(msolv_ctx->benign_corr_work, &array));
1330         msolv_ctx->benign_AIIm1ones = benign_AIIm1_ones_mat;
1331       }
1332     } else {
1333       if (sub_schurs->reuse_solver) PetscCall(PCBDDCReuseSolversReset(sub_schurs->reuse_solver));
1334       PetscCall(PetscFree(sub_schurs->reuse_solver));
1335     }
1336     PetscCall(MatDestroy(&A));
1337     PetscCall(ISDestroy(&is_A_all));
1338 
1339     /* Work arrays */
1340     PetscCall(PetscMalloc1(max_subset_size * max_subset_size, &work));
1341 
1342     /* S_Ej_all */
1343     cum = cum2 = 0;
1344     PetscCall(MatDenseGetArrayRead(S_all, &rS_data));
1345     PetscCall(MatSeqAIJGetArray(sub_schurs->S_Ej_all, &SEj_arr));
1346     if (sub_schurs->sum_S_Ej_inv_all) PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all, &SEjinv_arr));
1347     if (sub_schurs->gdsw) PetscCall(MatCreateSubMatrices(sub_schurs->A, sub_schurs->n_subs, sub_schurs->is_subs, sub_schurs->is_subs, MAT_INITIAL_MATRIX, &gdswA));
1348     for (i = 0; i < sub_schurs->n_subs; i++) {
1349       PetscInt j;
1350 
1351       /* get S_E (or K^i_EE for GDSW) */
1352       PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size));
1353       if (sub_schurs->gdsw) {
1354         Mat T;
1355 
1356         PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, subset_size, work, &T));
1357         PetscCall(MatConvert(gdswA[i], MATDENSE, MAT_REUSE_MATRIX, &T));
1358         PetscCall(MatDestroy(&T));
1359       } else {
1360         if (S_lower_triangular) { /* I need to expand the upper triangular data (column oriented) */
1361           PetscInt k;
1362           for (k = 0; k < subset_size; k++) {
1363             for (j = k; j < subset_size; j++) {
1364               work[k * subset_size + j] = rS_data[cum2 + k * size_schur + j];
1365               work[j * subset_size + k] = PetscConj(rS_data[cum2 + k * size_schur + j]);
1366             }
1367           }
1368         } else { /* just copy to workspace */
1369           PetscInt k;
1370           for (k = 0; k < subset_size; k++) {
1371             for (j = 0; j < subset_size; j++) work[k * subset_size + j] = rS_data[cum2 + k * size_schur + j];
1372           }
1373         }
1374       }
1375       /* insert S_E values */
1376       if (sub_schurs->change) {
1377         Mat change_sub, SEj, T;
1378 
1379         /* change basis */
1380         PetscCall(KSPGetOperators(sub_schurs->change[i], &change_sub, NULL));
1381         PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, subset_size, work, &SEj));
1382         if (!sub_schurs->change_with_qr) { /* currently there's no support for PtAP with P SeqAIJ */
1383           Mat T2;
1384           PetscCall(MatTransposeMatMult(change_sub, SEj, MAT_INITIAL_MATRIX, 1.0, &T2));
1385           PetscCall(MatMatMult(T2, change_sub, MAT_INITIAL_MATRIX, 1.0, &T));
1386           PetscCall(MatConvert(T, MATSEQDENSE, MAT_INPLACE_MATRIX, &T));
1387           PetscCall(MatDestroy(&T2));
1388         } else {
1389           PetscCall(MatPtAP(SEj, change_sub, MAT_INITIAL_MATRIX, 1.0, &T));
1390         }
1391         PetscCall(MatCopy(T, SEj, SAME_NONZERO_PATTERN));
1392         PetscCall(MatDestroy(&T));
1393         PetscCall(MatZeroRowsColumnsIS(SEj, sub_schurs->change_primal_sub[i], 1.0, NULL, NULL));
1394         PetscCall(MatDestroy(&SEj));
1395       }
1396       PetscCall(PetscArraycpy(SEj_arr, work, subset_size * subset_size));
1397       if (compute_Stilda) {
1398         if (deluxe) { /* if adaptivity is requested, invert S_E blocks */
1399           Mat                M;
1400           const PetscScalar *vals;
1401           PetscBool          isdense, isdensecuda;
1402 
1403           PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, subset_size, work, &M));
1404           PetscCall(MatSetOption(M, MAT_SPD, sub_schurs->is_posdef));
1405           PetscCall(MatSetOption(M, MAT_HERMITIAN, sub_schurs->is_hermitian));
1406           if (!PetscBTLookup(sub_schurs->is_edge, i)) PetscCall(MatSetType(M, Stype));
1407           PetscCall(PetscObjectTypeCompare((PetscObject)M, MATSEQDENSE, &isdense));
1408           PetscCall(PetscObjectTypeCompare((PetscObject)M, MATSEQDENSECUDA, &isdensecuda));
1409           if (use_cholesky) {
1410             PetscCall(MatCholeskyFactor(M, NULL, NULL));
1411           } else {
1412             PetscCall(MatLUFactor(M, NULL, NULL, NULL));
1413           }
1414           if (isdense) {
1415             PetscCall(MatSeqDenseInvertFactors_Private(M));
1416 #if defined(PETSC_HAVE_CUDA)
1417           } else if (isdensecuda) {
1418             PetscCall(MatSeqDenseCUDAInvertFactors_Private(M));
1419 #endif
1420           } else SETERRQ(PetscObjectComm((PetscObject)M), PETSC_ERR_SUP, "Not implemented for type %s", Stype);
1421           PetscCall(MatDenseGetArrayRead(M, &vals));
1422           PetscCall(PetscArraycpy(SEjinv_arr, vals, subset_size * subset_size));
1423           PetscCall(MatDenseRestoreArrayRead(M, &vals));
1424           PetscCall(MatDestroy(&M));
1425         } else if (scaling) { /* not using deluxe */
1426           Mat          SEj;
1427           Vec          D;
1428           PetscScalar *array;
1429 
1430           PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, subset_size, work, &SEj));
1431           PetscCall(VecGetArray(Dall, &array));
1432           PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, subset_size, array + cum, &D));
1433           PetscCall(VecRestoreArray(Dall, &array));
1434           PetscCall(VecShift(D, -1.));
1435           PetscCall(MatDiagonalScale(SEj, D, D));
1436           PetscCall(MatDestroy(&SEj));
1437           PetscCall(VecDestroy(&D));
1438           PetscCall(PetscArraycpy(SEj_arr, work, subset_size * subset_size));
1439         }
1440       }
1441       cum += subset_size;
1442       cum2 += subset_size * (size_schur + 1);
1443       SEj_arr += subset_size * subset_size;
1444       if (SEjinv_arr) SEjinv_arr += subset_size * subset_size;
1445     }
1446     if (sub_schurs->gdsw) PetscCall(MatDestroySubMatrices(sub_schurs->n_subs, &gdswA));
1447     PetscCall(MatDenseRestoreArrayRead(S_all, &rS_data));
1448     PetscCall(MatSeqAIJRestoreArray(sub_schurs->S_Ej_all, &SEj_arr));
1449     if (sub_schurs->sum_S_Ej_inv_all) PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all, &SEjinv_arr));
1450     if (solver_S) PetscCall(MatFactorRestoreSchurComplement(F, &S_all, MAT_FACTOR_SCHUR_UNFACTORED));
1451 
1452       /* may prevent from unneeded copies, since MUMPS or MKL_Pardiso always use CPU memory
1453        however, preliminary tests indicate using GPUs is still faster in the solve phase */
1454 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
1455     if (reuse_solvers) {
1456       Mat                  St;
1457       MatFactorSchurStatus st;
1458 
1459       flg = PETSC_FALSE;
1460       PetscCall(PetscOptionsGetBool(NULL, sub_schurs->prefix, "-sub_schurs_schur_pin_to_cpu", &flg, NULL));
1461       PetscCall(MatFactorGetSchurComplement(F, &St, &st));
1462       PetscCall(MatBindToCPU(St, flg));
1463       PetscCall(MatFactorRestoreSchurComplement(F, &St, st));
1464     }
1465 #endif
1466 
1467     schur_factor = NULL;
1468     if (compute_Stilda && size_active_schur) {
1469       if (sub_schurs->n_subs == 1 && size_schur == size_active_schur && deluxe) { /* we already computed the inverse */
1470         PetscCall(MatSeqAIJGetArrayWrite(sub_schurs->sum_S_Ej_tilda_all, &SEjinv_arr));
1471         PetscCall(PetscArraycpy(SEjinv_arr, work, size_schur * size_schur));
1472         PetscCall(MatSeqAIJRestoreArrayWrite(sub_schurs->sum_S_Ej_tilda_all, &SEjinv_arr));
1473       } else {
1474         Mat S_all_inv = NULL;
1475 
1476         if (solver_S && !sub_schurs->gdsw) {
1477           /* for adaptive selection we need S^-1; for solver reusage we need S_\Delta\Delta^-1.
1478              The latter is not the principal subminor for S^-1. However, the factors can be reused since S_\Delta\Delta is the leading principal submatrix of S */
1479           if (factor_workaround) { /* invert without calling MatFactorInvertSchurComplement, since we are hacking */
1480             PetscScalar *data;
1481             PetscInt     nd = 0;
1482 
1483             PetscCheck(use_potr, PETSC_COMM_SELF, PETSC_ERR_SUP, "Factor update not yet implemented for non SPD matrices");
1484             PetscCall(MatFactorGetSchurComplement(F, &S_all_inv, NULL));
1485             PetscCall(MatDenseGetArray(S_all_inv, &data));
1486             if (sub_schurs->is_dir) { /* dirichlet dofs could have different scalings */
1487               PetscCall(ISGetLocalSize(sub_schurs->is_dir, &nd));
1488             }
1489 
1490             /* factor and invert activedofs and vertices (dirichlet dofs does not contribute) */
1491             if (schur_has_vertices) {
1492               Mat          M;
1493               PetscScalar *tdata;
1494               PetscInt     nv = 0, news;
1495 
1496               PetscCall(ISGetLocalSize(sub_schurs->is_vertices, &nv));
1497               news = size_active_schur + nv;
1498               PetscCall(PetscCalloc1(news * news, &tdata));
1499               for (i = 0; i < size_active_schur; i++) {
1500                 PetscCall(PetscArraycpy(tdata + i * (news + 1), data + i * (size_schur + 1), size_active_schur - i));
1501                 PetscCall(PetscArraycpy(tdata + i * (news + 1) + size_active_schur - i, data + i * size_schur + size_active_schur + nd, nv));
1502               }
1503               for (i = 0; i < nv; i++) {
1504                 PetscInt k = i + size_active_schur;
1505                 PetscCall(PetscArraycpy(tdata + k * (news + 1), data + (k + nd) * (size_schur + 1), nv - i));
1506               }
1507 
1508               PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, news, news, tdata, &M));
1509               PetscCall(MatSetOption(M, MAT_SPD, PETSC_TRUE));
1510               PetscCall(MatCholeskyFactor(M, NULL, NULL));
1511               /* save the factors */
1512               cum = 0;
1513               PetscCall(PetscMalloc1((size_active_schur * (size_active_schur + 1)) / 2 + nd, &schur_factor));
1514               for (i = 0; i < size_active_schur; i++) {
1515                 PetscCall(PetscArraycpy(schur_factor + cum, tdata + i * (news + 1), size_active_schur - i));
1516                 cum += size_active_schur - i;
1517               }
1518               for (i = 0; i < nd; i++) schur_factor[cum + i] = PetscSqrtReal(PetscRealPart(data[(i + size_active_schur) * (size_schur + 1)]));
1519               PetscCall(MatSeqDenseInvertFactors_Private(M));
1520               /* move back just the active dofs to the Schur complement */
1521               for (i = 0; i < size_active_schur; i++) PetscCall(PetscArraycpy(data + i * size_schur, tdata + i * news, size_active_schur));
1522               PetscCall(PetscFree(tdata));
1523               PetscCall(MatDestroy(&M));
1524             } else { /* we can factorize and invert just the activedofs */
1525               Mat          M;
1526               PetscScalar *aux;
1527 
1528               PetscCall(PetscMalloc1(nd, &aux));
1529               for (i = 0; i < nd; i++) aux[i] = 1.0 / data[(i + size_active_schur) * (size_schur + 1)];
1530               PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, size_active_schur, size_active_schur, data, &M));
1531               PetscCall(MatDenseSetLDA(M, size_schur));
1532               PetscCall(MatSetOption(M, MAT_SPD, PETSC_TRUE));
1533               PetscCall(MatCholeskyFactor(M, NULL, NULL));
1534               PetscCall(MatSeqDenseInvertFactors_Private(M));
1535               PetscCall(MatDestroy(&M));
1536               PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, size_schur, nd, data + size_active_schur * size_schur, &M));
1537               PetscCall(MatZeroEntries(M));
1538               PetscCall(MatDestroy(&M));
1539               PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, nd, size_schur, data + size_active_schur, &M));
1540               PetscCall(MatDenseSetLDA(M, size_schur));
1541               PetscCall(MatZeroEntries(M));
1542               PetscCall(MatDestroy(&M));
1543               for (i = 0; i < nd; i++) data[(i + size_active_schur) * (size_schur + 1)] = aux[i];
1544               PetscCall(PetscFree(aux));
1545             }
1546             PetscCall(MatDenseRestoreArray(S_all_inv, &data));
1547           } else { /* use MatFactor calls to invert S */
1548             PetscCall(MatFactorInvertSchurComplement(F));
1549             PetscCall(MatFactorGetSchurComplement(F, &S_all_inv, NULL));
1550           }
1551         } else if (!sub_schurs->gdsw) { /* we need to invert explicitly since we are not using MatFactor for S */
1552           PetscCall(PetscObjectReference((PetscObject)S_all));
1553           S_all_inv = S_all;
1554           PetscCall(MatDenseGetArray(S_all_inv, &S_data));
1555           PetscCall(PetscBLASIntCast(size_schur, &B_N));
1556           PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
1557           if (use_potr) {
1558             PetscCallBLAS("LAPACKpotrf", LAPACKpotrf_("L", &B_N, S_data, &B_N, &B_ierr));
1559             PetscCheck(!B_ierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in POTRF Lapack routine %d", (int)B_ierr);
1560             PetscCallBLAS("LAPACKpotri", LAPACKpotri_("L", &B_N, S_data, &B_N, &B_ierr));
1561             PetscCheck(!B_ierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in POTRI Lapack routine %d", (int)B_ierr);
1562           } else if (use_sytr) {
1563             PetscCallBLAS("LAPACKsytrf", LAPACKsytrf_("L", &B_N, S_data, &B_N, pivots, Bwork, &B_lwork, &B_ierr));
1564             PetscCheck(!B_ierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYTRF Lapack routine %d", (int)B_ierr);
1565             PetscCallBLAS("LAPACKsytri", LAPACKsytri_("L", &B_N, S_data, &B_N, pivots, Bwork, &B_ierr));
1566             PetscCheck(!B_ierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYTRI Lapack routine %d", (int)B_ierr);
1567           } else {
1568             PetscCallBLAS("LAPACKgetrf", LAPACKgetrf_(&B_N, &B_N, S_data, &B_N, pivots, &B_ierr));
1569             PetscCheck(!B_ierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in GETRF Lapack routine %d", (int)B_ierr);
1570             PetscCallBLAS("LAPACKgetri", LAPACKgetri_(&B_N, S_data, &B_N, pivots, Bwork, &B_lwork, &B_ierr));
1571             PetscCheck(!B_ierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in GETRI Lapack routine %d", (int)B_ierr);
1572           }
1573           PetscCall(PetscLogFlops(1.0 * size_schur * size_schur * size_schur));
1574           PetscCall(PetscFPTrapPop());
1575           PetscCall(MatDenseRestoreArray(S_all_inv, &S_data));
1576         } else if (sub_schurs->gdsw) {
1577           Mat      tS, tX, SEj, S_II, S_IE, S_EE;
1578           KSP      pS_II;
1579           PC       pS_II_pc;
1580           IS       EE, II;
1581           PetscInt nS;
1582 
1583           PetscCall(MatFactorCreateSchurComplement(F, &tS, NULL));
1584           PetscCall(MatGetSize(tS, &nS, NULL));
1585           PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all, &SEjinv_arr));
1586           for (i = 0, cum = 0; i < sub_schurs->n_subs; i++) { /* naive implementation */
1587             PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size));
1588             PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, subset_size, SEjinv_arr, &SEj));
1589 
1590             PetscCall(ISCreateStride(PETSC_COMM_SELF, subset_size, cum, 1, &EE));
1591             PetscCall(ISComplement(EE, 0, nS, &II));
1592             PetscCall(MatCreateSubMatrix(tS, II, II, MAT_INITIAL_MATRIX, &S_II));
1593             PetscCall(MatCreateSubMatrix(tS, II, EE, MAT_INITIAL_MATRIX, &S_IE));
1594             PetscCall(MatCreateSubMatrix(tS, EE, EE, MAT_INITIAL_MATRIX, &S_EE));
1595             PetscCall(ISDestroy(&II));
1596             PetscCall(ISDestroy(&EE));
1597 
1598             PetscCall(KSPCreate(PETSC_COMM_SELF, &pS_II));
1599             PetscCall(KSPSetType(pS_II, KSPPREONLY));
1600             PetscCall(KSPGetPC(pS_II, &pS_II_pc));
1601             PetscCall(PCSetType(pS_II_pc, PCSVD));
1602             PetscCall(KSPSetOptionsPrefix(pS_II, sub_schurs->prefix));
1603             PetscCall(KSPAppendOptionsPrefix(pS_II, "pseudo_"));
1604             PetscCall(KSPSetOperators(pS_II, S_II, S_II));
1605             PetscCall(MatDestroy(&S_II));
1606             PetscCall(KSPSetFromOptions(pS_II));
1607             PetscCall(KSPSetUp(pS_II));
1608             PetscCall(MatDuplicate(S_IE, MAT_DO_NOT_COPY_VALUES, &tX));
1609             PetscCall(KSPMatSolve(pS_II, S_IE, tX));
1610             PetscCall(KSPDestroy(&pS_II));
1611 
1612             PetscCall(MatTransposeMatMult(S_IE, tX, MAT_REUSE_MATRIX, PETSC_DEFAULT, &SEj));
1613             PetscCall(MatDestroy(&S_IE));
1614             PetscCall(MatDestroy(&tX));
1615             PetscCall(MatAYPX(SEj, -1, S_EE, SAME_NONZERO_PATTERN));
1616             PetscCall(MatDestroy(&S_EE));
1617 
1618             PetscCall(MatDestroy(&SEj));
1619             cum += subset_size;
1620             SEjinv_arr += subset_size * subset_size;
1621           }
1622           PetscCall(MatDestroy(&tS));
1623           PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all, &SEjinv_arr));
1624         }
1625         /* S_Ej_tilda_all */
1626         cum = cum2 = 0;
1627         rS_data    = NULL;
1628         if (S_all_inv) PetscCall(MatDenseGetArrayRead(S_all_inv, &rS_data));
1629         PetscCall(MatSeqAIJGetArrayWrite(sub_schurs->sum_S_Ej_tilda_all, &SEjinv_arr));
1630         for (i = 0; i < sub_schurs->n_subs; i++) {
1631           PetscInt j;
1632 
1633           PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size));
1634           /* get (St^-1)_E */
1635           /* Unless we are changing the variables, I don't need to expand to upper triangular since St^-1
1636              will be properly accessed later during adaptive selection */
1637           if (rS_data) {
1638             if (S_lower_triangular) {
1639               PetscInt k;
1640               if (sub_schurs->change) {
1641                 for (k = 0; k < subset_size; k++) {
1642                   for (j = k; j < subset_size; j++) {
1643                     work[k * subset_size + j] = rS_data[cum2 + k * size_schur + j];
1644                     work[j * subset_size + k] = work[k * subset_size + j];
1645                   }
1646                 }
1647               } else {
1648                 for (k = 0; k < subset_size; k++) {
1649                   for (j = k; j < subset_size; j++) work[k * subset_size + j] = rS_data[cum2 + k * size_schur + j];
1650                 }
1651               }
1652             } else {
1653               PetscInt k;
1654               for (k = 0; k < subset_size; k++) {
1655                 for (j = 0; j < subset_size; j++) work[k * subset_size + j] = rS_data[cum2 + k * size_schur + j];
1656               }
1657             }
1658           }
1659           if (sub_schurs->change) {
1660             Mat         change_sub, SEj, T;
1661             PetscScalar val = sub_schurs->gdsw ? PETSC_SMALL : 1. / PETSC_SMALL;
1662 
1663             /* change basis */
1664             PetscCall(KSPGetOperators(sub_schurs->change[i], &change_sub, NULL));
1665             PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, subset_size, rS_data ? work : SEjinv_arr, &SEj));
1666             if (!sub_schurs->change_with_qr) { /* currently there's no support for PtAP with P SeqAIJ */
1667               Mat T2;
1668               PetscCall(MatTransposeMatMult(change_sub, SEj, MAT_INITIAL_MATRIX, 1.0, &T2));
1669               PetscCall(MatMatMult(T2, change_sub, MAT_INITIAL_MATRIX, 1.0, &T));
1670               PetscCall(MatDestroy(&T2));
1671               PetscCall(MatConvert(T, MATSEQDENSE, MAT_INPLACE_MATRIX, &T));
1672             } else {
1673               PetscCall(MatPtAP(SEj, change_sub, MAT_INITIAL_MATRIX, 1.0, &T));
1674             }
1675             PetscCall(MatCopy(T, SEj, SAME_NONZERO_PATTERN));
1676             PetscCall(MatDestroy(&T));
1677             PetscCall(MatZeroRowsColumnsIS(SEj, sub_schurs->change_primal_sub[i], val, NULL, NULL));
1678             PetscCall(MatDestroy(&SEj));
1679           }
1680           if (rS_data) PetscCall(PetscArraycpy(SEjinv_arr, work, subset_size * subset_size));
1681           cum += subset_size;
1682           cum2 += subset_size * (size_schur + 1);
1683           SEjinv_arr += subset_size * subset_size;
1684         }
1685         PetscCall(MatSeqAIJRestoreArrayWrite(sub_schurs->sum_S_Ej_tilda_all, &SEjinv_arr));
1686         if (S_all_inv) {
1687           PetscCall(MatDenseRestoreArrayRead(S_all_inv, &rS_data));
1688           if (solver_S) {
1689             if (schur_has_vertices) {
1690               PetscCall(MatFactorRestoreSchurComplement(F, &S_all_inv, MAT_FACTOR_SCHUR_FACTORED));
1691             } else {
1692               PetscCall(MatFactorRestoreSchurComplement(F, &S_all_inv, MAT_FACTOR_SCHUR_INVERTED));
1693             }
1694           }
1695         }
1696         PetscCall(MatDestroy(&S_all_inv));
1697       }
1698 
1699       /* move back factors if needed */
1700       if (schur_has_vertices && factor_workaround && !sub_schurs->gdsw) {
1701         Mat          S_tmp;
1702         PetscInt     nd = 0;
1703         PetscScalar *data;
1704 
1705         PetscCheck(use_potr, PETSC_COMM_SELF, PETSC_ERR_SUP, "Factor update not yet implemented for non SPD matrices");
1706         PetscCheck(solver_S, PETSC_COMM_SELF, PETSC_ERR_PLIB, "This should not happen");
1707         PetscCall(MatFactorGetSchurComplement(F, &S_tmp, NULL));
1708         PetscCall(MatDenseGetArray(S_tmp, &data));
1709         PetscCall(PetscArrayzero(data, size_schur * size_schur));
1710 
1711         if (S_lower_triangular) {
1712           cum = 0;
1713           for (i = 0; i < size_active_schur; i++) {
1714             PetscCall(PetscArraycpy(data + i * (size_schur + 1), schur_factor + cum, size_active_schur - i));
1715             cum += size_active_schur - i;
1716           }
1717         } else {
1718           PetscCall(PetscArraycpy(data, schur_factor, size_schur * size_schur));
1719         }
1720         if (sub_schurs->is_dir) {
1721           PetscCall(ISGetLocalSize(sub_schurs->is_dir, &nd));
1722           for (i = 0; i < nd; i++) data[(i + size_active_schur) * (size_schur + 1)] = schur_factor[cum + i];
1723         }
1724         /* workaround: since I cannot modify the matrices used inside the solvers for the forward and backward substitutions,
1725              set the diagonal entry of the Schur factor to a very large value */
1726         for (i = size_active_schur + nd; i < size_schur; i++) data[i * (size_schur + 1)] = infty;
1727         PetscCall(MatDenseRestoreArray(S_tmp, &data));
1728         PetscCall(MatFactorRestoreSchurComplement(F, &S_tmp, MAT_FACTOR_SCHUR_FACTORED));
1729       }
1730     } else if (factor_workaround && !sub_schurs->gdsw) { /* we need to eliminate any unneeded coupling */
1731       PetscScalar *data;
1732       PetscInt     nd = 0;
1733 
1734       if (sub_schurs->is_dir) { /* dirichlet dofs could have different scalings */
1735         PetscCall(ISGetLocalSize(sub_schurs->is_dir, &nd));
1736       }
1737       PetscCall(MatFactorGetSchurComplement(F, &S_all, NULL));
1738       PetscCall(MatDenseGetArray(S_all, &data));
1739       for (i = 0; i < size_active_schur; i++) PetscCall(PetscArrayzero(data + i * size_schur + size_active_schur, size_schur - size_active_schur));
1740       for (i = size_active_schur + nd; i < size_schur; i++) {
1741         PetscCall(PetscArrayzero(data + i * size_schur + size_active_schur, size_schur - size_active_schur));
1742         data[i * (size_schur + 1)] = infty;
1743       }
1744       PetscCall(MatDenseRestoreArray(S_all, &data));
1745       PetscCall(MatFactorRestoreSchurComplement(F, &S_all, MAT_FACTOR_SCHUR_UNFACTORED));
1746     }
1747     PetscCall(PetscFree(work));
1748     PetscCall(PetscFree(schur_factor));
1749     PetscCall(VecDestroy(&Dall));
1750   }
1751   PetscCall(ISDestroy(&is_I_layer));
1752   PetscCall(MatDestroy(&S_all));
1753   PetscCall(MatDestroy(&A_BB));
1754   PetscCall(MatDestroy(&A_IB));
1755   PetscCall(MatDestroy(&A_BI));
1756   PetscCall(MatDestroy(&F));
1757 
1758   PetscCall(PetscMalloc1(sub_schurs->n_subs, &nnz));
1759   for (i = 0; i < sub_schurs->n_subs; i++) PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &nnz[i]));
1760   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, sub_schurs->n_subs, nnz, PETSC_OWN_POINTER, &is_I_layer));
1761   PetscCall(MatSetVariableBlockSizes(sub_schurs->S_Ej_all, sub_schurs->n_subs, nnz));
1762   PetscCall(MatAssemblyBegin(sub_schurs->S_Ej_all, MAT_FINAL_ASSEMBLY));
1763   PetscCall(MatAssemblyEnd(sub_schurs->S_Ej_all, MAT_FINAL_ASSEMBLY));
1764   if (compute_Stilda) {
1765     PetscCall(MatSetVariableBlockSizes(sub_schurs->sum_S_Ej_tilda_all, sub_schurs->n_subs, nnz));
1766     PetscCall(MatAssemblyBegin(sub_schurs->sum_S_Ej_tilda_all, MAT_FINAL_ASSEMBLY));
1767     PetscCall(MatAssemblyEnd(sub_schurs->sum_S_Ej_tilda_all, MAT_FINAL_ASSEMBLY));
1768     if (deluxe) {
1769       PetscCall(MatSetVariableBlockSizes(sub_schurs->sum_S_Ej_inv_all, sub_schurs->n_subs, nnz));
1770       PetscCall(MatAssemblyBegin(sub_schurs->sum_S_Ej_inv_all, MAT_FINAL_ASSEMBLY));
1771       PetscCall(MatAssemblyEnd(sub_schurs->sum_S_Ej_inv_all, MAT_FINAL_ASSEMBLY));
1772     }
1773   }
1774   PetscCall(ISDestroy(&is_I_layer));
1775 
1776   /* Get local part of (\sum_j S_Ej) */
1777   if (!sub_schurs->sum_S_Ej_all) PetscCall(MatDuplicate(sub_schurs->S_Ej_all, MAT_DO_NOT_COPY_VALUES, &sub_schurs->sum_S_Ej_all));
1778   PetscCall(VecSet(gstash, 0.0));
1779   PetscCall(MatSeqAIJGetArray(sub_schurs->S_Ej_all, &stasharray));
1780   PetscCall(VecPlaceArray(lstash, stasharray));
1781   PetscCall(VecScatterBegin(sstash, lstash, gstash, ADD_VALUES, SCATTER_FORWARD));
1782   PetscCall(VecScatterEnd(sstash, lstash, gstash, ADD_VALUES, SCATTER_FORWARD));
1783   PetscCall(MatSeqAIJRestoreArray(sub_schurs->S_Ej_all, &stasharray));
1784   PetscCall(VecResetArray(lstash));
1785   PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_all, &stasharray));
1786   PetscCall(VecPlaceArray(lstash, stasharray));
1787   PetscCall(VecScatterBegin(sstash, gstash, lstash, INSERT_VALUES, SCATTER_REVERSE));
1788   PetscCall(VecScatterEnd(sstash, gstash, lstash, INSERT_VALUES, SCATTER_REVERSE));
1789   PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_all, &stasharray));
1790   PetscCall(VecResetArray(lstash));
1791 
1792   /* Get local part of (\sum_j S^-1_Ej) (\sum_j St^-1_Ej) */
1793   if (compute_Stilda) {
1794     PetscCall(VecSet(gstash, 0.0));
1795     PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all, &stasharray));
1796     PetscCall(VecPlaceArray(lstash, stasharray));
1797     PetscCall(VecScatterBegin(sstash, lstash, gstash, ADD_VALUES, SCATTER_FORWARD));
1798     PetscCall(VecScatterEnd(sstash, lstash, gstash, ADD_VALUES, SCATTER_FORWARD));
1799     PetscCall(VecScatterBegin(sstash, gstash, lstash, INSERT_VALUES, SCATTER_REVERSE));
1800     PetscCall(VecScatterEnd(sstash, gstash, lstash, INSERT_VALUES, SCATTER_REVERSE));
1801     PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all, &stasharray));
1802     PetscCall(VecResetArray(lstash));
1803     if (deluxe) {
1804       PetscCall(VecSet(gstash, 0.0));
1805       PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all, &stasharray));
1806       PetscCall(VecPlaceArray(lstash, stasharray));
1807       PetscCall(VecScatterBegin(sstash, lstash, gstash, ADD_VALUES, SCATTER_FORWARD));
1808       PetscCall(VecScatterEnd(sstash, lstash, gstash, ADD_VALUES, SCATTER_FORWARD));
1809       PetscCall(VecScatterBegin(sstash, gstash, lstash, INSERT_VALUES, SCATTER_REVERSE));
1810       PetscCall(VecScatterEnd(sstash, gstash, lstash, INSERT_VALUES, SCATTER_REVERSE));
1811       PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all, &stasharray));
1812       PetscCall(VecResetArray(lstash));
1813     } else if (!sub_schurs->gdsw) {
1814       PetscScalar *array;
1815       PetscInt     cum;
1816 
1817       PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all, &array));
1818       cum = 0;
1819       for (i = 0; i < sub_schurs->n_subs; i++) {
1820         PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size));
1821         PetscCall(PetscBLASIntCast(subset_size, &B_N));
1822         PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
1823         if (use_potr) {
1824           PetscCallBLAS("LAPACKpotrf", LAPACKpotrf_("L", &B_N, array + cum, &B_N, &B_ierr));
1825           PetscCheck(!B_ierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in POTRF Lapack routine %d", (int)B_ierr);
1826           PetscCallBLAS("LAPACKpotri", LAPACKpotri_("L", &B_N, array + cum, &B_N, &B_ierr));
1827           PetscCheck(!B_ierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in POTRI Lapack routine %d", (int)B_ierr);
1828         } else if (use_sytr) {
1829           PetscCallBLAS("LAPACKsytrf", LAPACKsytrf_("L", &B_N, array + cum, &B_N, pivots, Bwork, &B_lwork, &B_ierr));
1830           PetscCheck(!B_ierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYTRF Lapack routine %d", (int)B_ierr);
1831           PetscCallBLAS("LAPACKsytri", LAPACKsytri_("L", &B_N, array + cum, &B_N, pivots, Bwork, &B_ierr));
1832           PetscCheck(!B_ierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYTRI Lapack routine %d", (int)B_ierr);
1833         } else {
1834           PetscCallBLAS("LAPACKgetrf", LAPACKgetrf_(&B_N, &B_N, array + cum, &B_N, pivots, &B_ierr));
1835           PetscCheck(!B_ierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in GETRF Lapack routine %d", (int)B_ierr);
1836           PetscCallBLAS("LAPACKgetri", LAPACKgetri_(&B_N, array + cum, &B_N, pivots, Bwork, &B_lwork, &B_ierr));
1837           PetscCheck(!B_ierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in GETRI Lapack routine %d", (int)B_ierr);
1838         }
1839         PetscCall(PetscLogFlops(1.0 * subset_size * subset_size * subset_size));
1840         PetscCall(PetscFPTrapPop());
1841         cum += subset_size * subset_size;
1842       }
1843       PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all, &array));
1844       PetscCall(PetscObjectReference((PetscObject)sub_schurs->sum_S_Ej_all));
1845       PetscCall(MatDestroy(&sub_schurs->sum_S_Ej_inv_all));
1846       sub_schurs->sum_S_Ej_inv_all = sub_schurs->sum_S_Ej_all;
1847     }
1848   }
1849   PetscCall(VecDestroy(&lstash));
1850   PetscCall(VecDestroy(&gstash));
1851   PetscCall(VecScatterDestroy(&sstash));
1852 
1853   if (matl_dbg_viewer) {
1854     if (sub_schurs->S_Ej_all) {
1855       PetscCall(PetscObjectSetName((PetscObject)sub_schurs->S_Ej_all, "SE"));
1856       PetscCall(MatView(sub_schurs->S_Ej_all, matl_dbg_viewer));
1857     }
1858     if (sub_schurs->sum_S_Ej_all) {
1859       PetscCall(PetscObjectSetName((PetscObject)sub_schurs->sum_S_Ej_all, "SSE"));
1860       PetscCall(MatView(sub_schurs->sum_S_Ej_all, matl_dbg_viewer));
1861     }
1862     if (sub_schurs->sum_S_Ej_inv_all) {
1863       PetscCall(PetscObjectSetName((PetscObject)sub_schurs->sum_S_Ej_inv_all, "SSEm"));
1864       PetscCall(MatView(sub_schurs->sum_S_Ej_inv_all, matl_dbg_viewer));
1865     }
1866     if (sub_schurs->sum_S_Ej_tilda_all) {
1867       PetscCall(PetscObjectSetName((PetscObject)sub_schurs->sum_S_Ej_tilda_all, "SSEt"));
1868       PetscCall(MatView(sub_schurs->sum_S_Ej_tilda_all, matl_dbg_viewer));
1869     }
1870   }
1871 
1872   /* free workspace */
1873   if (matl_dbg_viewer) PetscCall(PetscViewerFlush(matl_dbg_viewer));
1874   if (sub_schurs->debug) PetscCallMPI(MPI_Barrier(comm_n));
1875   PetscCall(PetscViewerDestroy(&matl_dbg_viewer));
1876   PetscCall(PetscFree2(Bwork, pivots));
1877   PetscCall(PetscCommDestroy(&comm_n));
1878   PetscFunctionReturn(PETSC_SUCCESS);
1879 }
1880 
1881 PetscErrorCode PCBDDCSubSchursInit(PCBDDCSubSchurs sub_schurs, const char *prefix, IS is_I, IS is_B, PCBDDCGraph graph, ISLocalToGlobalMapping BtoNmap, PetscBool copycc, PetscBool gdsw)
1882 {
1883   IS       *faces, *edges, *all_cc, vertices;
1884   PetscInt  s, i, n_faces, n_edges, n_all_cc;
1885   PetscBool is_sorted, ispardiso, ismumps;
1886 
1887   PetscFunctionBegin;
1888   PetscCall(ISSorted(is_I, &is_sorted));
1889   PetscCheck(is_sorted, PetscObjectComm((PetscObject)is_I), PETSC_ERR_PLIB, "IS for I dofs should be shorted");
1890   PetscCall(ISSorted(is_B, &is_sorted));
1891   PetscCheck(is_sorted, PetscObjectComm((PetscObject)is_B), PETSC_ERR_PLIB, "IS for B dofs should be shorted");
1892 
1893   /* reset any previous data */
1894   PetscCall(PCBDDCSubSchursReset(sub_schurs));
1895 
1896   sub_schurs->gdsw = gdsw;
1897 
1898   /* get index sets for faces and edges (already sorted by global ordering) */
1899   PetscCall(PCBDDCGraphGetCandidatesIS(graph, &n_faces, &faces, &n_edges, &edges, &vertices));
1900   n_all_cc = n_faces + n_edges;
1901   PetscCall(PetscBTCreate(n_all_cc, &sub_schurs->is_edge));
1902   PetscCall(PetscMalloc1(n_all_cc, &all_cc));
1903   n_all_cc = 0;
1904   for (i = 0; i < n_faces; i++) {
1905     PetscCall(ISGetSize(faces[i], &s));
1906     if (!s) continue;
1907     if (copycc) {
1908       PetscCall(ISDuplicate(faces[i], &all_cc[n_all_cc]));
1909     } else {
1910       PetscCall(PetscObjectReference((PetscObject)faces[i]));
1911       all_cc[n_all_cc] = faces[i];
1912     }
1913     n_all_cc++;
1914   }
1915   for (i = 0; i < n_edges; i++) {
1916     PetscCall(ISGetSize(edges[i], &s));
1917     if (!s) continue;
1918     if (copycc) {
1919       PetscCall(ISDuplicate(edges[i], &all_cc[n_all_cc]));
1920     } else {
1921       PetscCall(PetscObjectReference((PetscObject)edges[i]));
1922       all_cc[n_all_cc] = edges[i];
1923     }
1924     PetscCall(PetscBTSet(sub_schurs->is_edge, n_all_cc));
1925     n_all_cc++;
1926   }
1927   PetscCall(PetscObjectReference((PetscObject)vertices));
1928   sub_schurs->is_vertices = vertices;
1929   PetscCall(PCBDDCGraphRestoreCandidatesIS(graph, &n_faces, &faces, &n_edges, &edges, &vertices));
1930   sub_schurs->is_dir = NULL;
1931   PetscCall(PCBDDCGraphGetDirichletDofsB(graph, &sub_schurs->is_dir));
1932 
1933   /* Determine if MatFactor can be used */
1934   PetscCall(PetscStrallocpy(prefix, &sub_schurs->prefix));
1935 #if defined(PETSC_HAVE_MUMPS)
1936   PetscCall(PetscStrncpy(sub_schurs->mat_solver_type, MATSOLVERMUMPS, sizeof(sub_schurs->mat_solver_type)));
1937 #elif defined(PETSC_HAVE_MKL_PARDISO)
1938   PetscCall(PetscStrncpy(sub_schurs->mat_solver_type, MATSOLVERMKL_PARDISO, sizeof(sub_schurs->mat_solver_type)));
1939 #else
1940   PetscCall(PetscStrncpy(sub_schurs->mat_solver_type, MATSOLVERPETSC, sizeof(sub_schurs->mat_solver_type)));
1941 #endif
1942 #if defined(PETSC_USE_COMPLEX)
1943   sub_schurs->is_hermitian = PETSC_FALSE; /* Hermitian Cholesky is not supported by PETSc and external packages */
1944 #else
1945   sub_schurs->is_hermitian = PETSC_TRUE;
1946 #endif
1947   sub_schurs->is_posdef     = PETSC_TRUE;
1948   sub_schurs->is_symmetric  = PETSC_TRUE;
1949   sub_schurs->debug         = PETSC_FALSE;
1950   sub_schurs->restrict_comm = PETSC_FALSE;
1951   PetscOptionsBegin(PetscObjectComm((PetscObject)graph->l2gmap), sub_schurs->prefix, "BDDC sub_schurs options", "PC");
1952   PetscCall(PetscOptionsString("-sub_schurs_mat_solver_type", "Specific direct solver to use", NULL, sub_schurs->mat_solver_type, sub_schurs->mat_solver_type, sizeof(sub_schurs->mat_solver_type), NULL));
1953   PetscCall(PetscOptionsBool("-sub_schurs_symmetric", "Symmetric problem", NULL, sub_schurs->is_symmetric, &sub_schurs->is_symmetric, NULL));
1954   PetscCall(PetscOptionsBool("-sub_schurs_hermitian", "Hermitian problem", NULL, sub_schurs->is_hermitian, &sub_schurs->is_hermitian, NULL));
1955   PetscCall(PetscOptionsBool("-sub_schurs_posdef", "Positive definite problem", NULL, sub_schurs->is_posdef, &sub_schurs->is_posdef, NULL));
1956   PetscCall(PetscOptionsBool("-sub_schurs_restrictcomm", "Restrict communicator on active processes only", NULL, sub_schurs->restrict_comm, &sub_schurs->restrict_comm, NULL));
1957   PetscCall(PetscOptionsBool("-sub_schurs_debug", "Debug output", NULL, sub_schurs->debug, &sub_schurs->debug, NULL));
1958   PetscOptionsEnd();
1959   PetscCall(PetscStrcmp(sub_schurs->mat_solver_type, MATSOLVERMUMPS, &ismumps));
1960   PetscCall(PetscStrcmp(sub_schurs->mat_solver_type, MATSOLVERMKL_PARDISO, &ispardiso));
1961   sub_schurs->schur_explicit = (PetscBool)(ispardiso || ismumps);
1962 
1963   /* for reals, symmetric and hermitian are synonims */
1964 #if !defined(PETSC_USE_COMPLEX)
1965   sub_schurs->is_symmetric = (PetscBool)(sub_schurs->is_symmetric && sub_schurs->is_hermitian);
1966   sub_schurs->is_hermitian = sub_schurs->is_symmetric;
1967 #endif
1968 
1969   PetscCall(PetscObjectReference((PetscObject)is_I));
1970   sub_schurs->is_I = is_I;
1971   PetscCall(PetscObjectReference((PetscObject)is_B));
1972   sub_schurs->is_B = is_B;
1973   PetscCall(PetscObjectReference((PetscObject)graph->l2gmap));
1974   sub_schurs->l2gmap = graph->l2gmap;
1975   PetscCall(PetscObjectReference((PetscObject)BtoNmap));
1976   sub_schurs->BtoNmap            = BtoNmap;
1977   sub_schurs->n_subs             = n_all_cc;
1978   sub_schurs->is_subs            = all_cc;
1979   sub_schurs->S_Ej_all           = NULL;
1980   sub_schurs->sum_S_Ej_all       = NULL;
1981   sub_schurs->sum_S_Ej_inv_all   = NULL;
1982   sub_schurs->sum_S_Ej_tilda_all = NULL;
1983   sub_schurs->is_Ej_all          = NULL;
1984   PetscFunctionReturn(PETSC_SUCCESS);
1985 }
1986 
1987 PetscErrorCode PCBDDCSubSchursCreate(PCBDDCSubSchurs *sub_schurs)
1988 {
1989   PCBDDCSubSchurs schurs_ctx;
1990 
1991   PetscFunctionBegin;
1992   PetscCall(PetscNew(&schurs_ctx));
1993   schurs_ctx->n_subs = 0;
1994   *sub_schurs        = schurs_ctx;
1995   PetscFunctionReturn(PETSC_SUCCESS);
1996 }
1997 
1998 PetscErrorCode PCBDDCSubSchursReset(PCBDDCSubSchurs sub_schurs)
1999 {
2000   PetscInt i;
2001 
2002   PetscFunctionBegin;
2003   if (!sub_schurs) PetscFunctionReturn(PETSC_SUCCESS);
2004   PetscCall(PetscFree(sub_schurs->prefix));
2005   PetscCall(MatDestroy(&sub_schurs->A));
2006   PetscCall(MatDestroy(&sub_schurs->S));
2007   PetscCall(ISDestroy(&sub_schurs->is_I));
2008   PetscCall(ISDestroy(&sub_schurs->is_B));
2009   PetscCall(ISLocalToGlobalMappingDestroy(&sub_schurs->l2gmap));
2010   PetscCall(ISLocalToGlobalMappingDestroy(&sub_schurs->BtoNmap));
2011   PetscCall(MatDestroy(&sub_schurs->S_Ej_all));
2012   PetscCall(MatDestroy(&sub_schurs->sum_S_Ej_all));
2013   PetscCall(MatDestroy(&sub_schurs->sum_S_Ej_inv_all));
2014   PetscCall(MatDestroy(&sub_schurs->sum_S_Ej_tilda_all));
2015   PetscCall(ISDestroy(&sub_schurs->is_Ej_all));
2016   PetscCall(ISDestroy(&sub_schurs->is_vertices));
2017   PetscCall(ISDestroy(&sub_schurs->is_dir));
2018   PetscCall(PetscBTDestroy(&sub_schurs->is_edge));
2019   for (i = 0; i < sub_schurs->n_subs; i++) PetscCall(ISDestroy(&sub_schurs->is_subs[i]));
2020   if (sub_schurs->n_subs) PetscCall(PetscFree(sub_schurs->is_subs));
2021   if (sub_schurs->reuse_solver) PetscCall(PCBDDCReuseSolversReset(sub_schurs->reuse_solver));
2022   PetscCall(PetscFree(sub_schurs->reuse_solver));
2023   if (sub_schurs->change) {
2024     for (i = 0; i < sub_schurs->n_subs; i++) {
2025       PetscCall(KSPDestroy(&sub_schurs->change[i]));
2026       PetscCall(ISDestroy(&sub_schurs->change_primal_sub[i]));
2027     }
2028   }
2029   PetscCall(PetscFree(sub_schurs->change));
2030   PetscCall(PetscFree(sub_schurs->change_primal_sub));
2031   sub_schurs->n_subs = 0;
2032   PetscFunctionReturn(PETSC_SUCCESS);
2033 }
2034 
2035 PetscErrorCode PCBDDCSubSchursDestroy(PCBDDCSubSchurs *sub_schurs)
2036 {
2037   PetscFunctionBegin;
2038   PetscCall(PCBDDCSubSchursReset(*sub_schurs));
2039   PetscCall(PetscFree(*sub_schurs));
2040   PetscFunctionReturn(PETSC_SUCCESS);
2041 }
2042 
2043 static inline PetscErrorCode PCBDDCAdjGetNextLayer_Private(PetscInt *queue_tip, PetscInt n_prev, PetscBT touched, PetscInt *xadj, PetscInt *adjncy, PetscInt *n_added)
2044 {
2045   PetscInt i, j, n;
2046 
2047   PetscFunctionBegin;
2048   n = 0;
2049   for (i = -n_prev; i < 0; i++) {
2050     PetscInt start_dof = queue_tip[i];
2051     for (j = xadj[start_dof]; j < xadj[start_dof + 1]; j++) {
2052       PetscInt dof = adjncy[j];
2053       if (!PetscBTLookup(touched, dof)) {
2054         PetscCall(PetscBTSet(touched, dof));
2055         queue_tip[n] = dof;
2056         n++;
2057       }
2058     }
2059   }
2060   *n_added = n;
2061   PetscFunctionReturn(PETSC_SUCCESS);
2062 }
2063