xref: /petsc/src/ksp/pc/impls/bddc/bddcschurs.c (revision ebead697dbf761eb322f829370bbe90b3bd93fa3)
1 #include <petsc/private/pcbddcimpl.h>
2 #include <petsc/private/pcbddcprivateimpl.h>
3 #include <../src/mat/impls/dense/seq/dense.h>
4 #include <petscblaslapack.h>
5 
6 static inline PetscErrorCode PCBDDCAdjGetNextLayer_Private(PetscInt*,PetscInt,PetscBT,PetscInt*,PetscInt*,PetscInt*);
7 static PetscErrorCode PCBDDCComputeExplicitSchur(Mat,PetscBool,MatReuse,Mat*);
8 static PetscErrorCode PCBDDCReuseSolvers_Interior(PC,Vec,Vec);
9 static PetscErrorCode PCBDDCReuseSolvers_Correction(PC,Vec,Vec);
10 
11 /* if v2 is not present, correction is done in-place */
12 PetscErrorCode PCBDDCReuseSolversBenignAdapt(PCBDDCReuseSolvers ctx, Vec v, Vec v2, PetscBool sol, PetscBool full)
13 {
14   PetscScalar    *array;
15   PetscScalar    *array2;
16 
17   PetscFunctionBegin;
18   if (!ctx->benign_n) PetscFunctionReturn(0);
19   if (sol && full) {
20     PetscInt n_I,size_schur;
21 
22     /* get sizes */
23     PetscCall(MatGetSize(ctx->benign_csAIB,&size_schur,NULL));
24     PetscCall(VecGetSize(v,&n_I));
25     n_I = n_I - size_schur;
26     /* get schur sol from array */
27     PetscCall(VecGetArray(v,&array));
28     PetscCall(VecPlaceArray(ctx->benign_dummy_schur_vec,array+n_I));
29     PetscCall(VecRestoreArray(v,&array));
30     /* apply interior sol correction */
31     PetscCall(MatMultTranspose(ctx->benign_csAIB,ctx->benign_dummy_schur_vec,ctx->benign_corr_work));
32     PetscCall(VecResetArray(ctx->benign_dummy_schur_vec));
33     PetscCall(MatMultAdd(ctx->benign_AIIm1ones,ctx->benign_corr_work,v,v));
34   }
35   if (v2) {
36     PetscInt nl;
37 
38     PetscCall(VecGetArrayRead(v,(const PetscScalar**)&array));
39     PetscCall(VecGetLocalSize(v2,&nl));
40     PetscCall(VecGetArray(v2,&array2));
41     PetscCall(PetscArraycpy(array2,array,nl));
42   } else {
43     PetscCall(VecGetArray(v,&array));
44     array2 = array;
45   }
46   if (!sol) { /* change rhs */
47     PetscInt n;
48     for (n=0;n<ctx->benign_n;n++) {
49       PetscScalar    sum = 0.;
50       const PetscInt *cols;
51       PetscInt       nz,i;
52 
53       PetscCall(ISGetLocalSize(ctx->benign_zerodiag_subs[n],&nz));
54       PetscCall(ISGetIndices(ctx->benign_zerodiag_subs[n],&cols));
55       for (i=0;i<nz-1;i++) sum += array[cols[i]];
56 #if defined(PETSC_USE_COMPLEX)
57       sum = -(PetscRealPart(sum)/nz + PETSC_i*(PetscImaginaryPart(sum)/nz));
58 #else
59       sum = -sum/nz;
60 #endif
61       for (i=0;i<nz-1;i++) array2[cols[i]] += sum;
62       ctx->benign_save_vals[n] = array2[cols[nz-1]];
63       array2[cols[nz-1]] = sum;
64       PetscCall(ISRestoreIndices(ctx->benign_zerodiag_subs[n],&cols));
65     }
66   } else {
67     PetscInt n;
68     for (n=0;n<ctx->benign_n;n++) {
69       PetscScalar    sum = 0.;
70       const PetscInt *cols;
71       PetscInt       nz,i;
72       PetscCall(ISGetLocalSize(ctx->benign_zerodiag_subs[n],&nz));
73       PetscCall(ISGetIndices(ctx->benign_zerodiag_subs[n],&cols));
74       for (i=0;i<nz-1;i++) sum += array[cols[i]];
75 #if defined(PETSC_USE_COMPLEX)
76       sum = -(PetscRealPart(sum)/nz + PETSC_i*(PetscImaginaryPart(sum)/nz));
77 #else
78       sum = -sum/nz;
79 #endif
80       for (i=0;i<nz-1;i++) array2[cols[i]] += sum;
81       array2[cols[nz-1]] = ctx->benign_save_vals[n];
82       PetscCall(ISRestoreIndices(ctx->benign_zerodiag_subs[n],&cols));
83     }
84   }
85   if (v2) {
86     PetscCall(VecRestoreArrayRead(v,(const PetscScalar**)&array));
87     PetscCall(VecRestoreArray(v2,&array2));
88   } else {
89     PetscCall(VecRestoreArray(v,&array));
90   }
91   if (!sol && full) {
92     Vec      usedv;
93     PetscInt n_I,size_schur;
94 
95     /* get sizes */
96     PetscCall(MatGetSize(ctx->benign_csAIB,&size_schur,NULL));
97     PetscCall(VecGetSize(v,&n_I));
98     n_I = n_I - size_schur;
99     /* compute schur rhs correction */
100     if (v2) {
101       usedv = v2;
102     } else {
103       usedv = v;
104     }
105     /* apply schur rhs correction */
106     PetscCall(MatMultTranspose(ctx->benign_AIIm1ones,usedv,ctx->benign_corr_work));
107     PetscCall(VecGetArrayRead(usedv,(const PetscScalar**)&array));
108     PetscCall(VecPlaceArray(ctx->benign_dummy_schur_vec,array+n_I));
109     PetscCall(VecRestoreArrayRead(usedv,(const PetscScalar**)&array));
110     PetscCall(MatMultAdd(ctx->benign_csAIB,ctx->benign_corr_work,ctx->benign_dummy_schur_vec,ctx->benign_dummy_schur_vec));
111     PetscCall(VecResetArray(ctx->benign_dummy_schur_vec));
112   }
113   PetscFunctionReturn(0);
114 }
115 
116 static PetscErrorCode PCBDDCReuseSolvers_Solve_Private(PC pc, Vec rhs, Vec sol, PetscBool transpose, PetscBool full)
117 {
118   PCBDDCReuseSolvers ctx;
119   PetscBool          copy = PETSC_FALSE;
120 
121   PetscFunctionBegin;
122   PetscCall(PCShellGetContext(pc,&ctx));
123   if (full) {
124 #if defined(PETSC_HAVE_MUMPS)
125     PetscCall(MatMumpsSetIcntl(ctx->F,26,-1));
126 #endif
127 #if defined(PETSC_HAVE_MKL_PARDISO)
128     PetscCall(MatMkl_PardisoSetCntl(ctx->F,70,0));
129 #endif
130     copy = ctx->has_vertices;
131   } else { /* interior solver */
132 #if defined(PETSC_HAVE_MUMPS)
133     PetscCall(MatMumpsSetIcntl(ctx->F,26,0));
134 #endif
135 #if defined(PETSC_HAVE_MKL_PARDISO)
136     PetscCall(MatMkl_PardisoSetCntl(ctx->F,70,1));
137 #endif
138     copy = PETSC_TRUE;
139   }
140   /* copy rhs into factored matrix workspace */
141   if (copy) {
142     PetscInt    n;
143     PetscScalar *array,*array_solver;
144 
145     PetscCall(VecGetLocalSize(rhs,&n));
146     PetscCall(VecGetArrayRead(rhs,(const PetscScalar**)&array));
147     PetscCall(VecGetArray(ctx->rhs,&array_solver));
148     PetscCall(PetscArraycpy(array_solver,array,n));
149     PetscCall(VecRestoreArray(ctx->rhs,&array_solver));
150     PetscCall(VecRestoreArrayRead(rhs,(const PetscScalar**)&array));
151 
152     PetscCall(PCBDDCReuseSolversBenignAdapt(ctx,ctx->rhs,NULL,PETSC_FALSE,full));
153     if (transpose) {
154       PetscCall(MatSolveTranspose(ctx->F,ctx->rhs,ctx->sol));
155     } else {
156       PetscCall(MatSolve(ctx->F,ctx->rhs,ctx->sol));
157     }
158     PetscCall(PCBDDCReuseSolversBenignAdapt(ctx,ctx->sol,NULL,PETSC_TRUE,full));
159 
160     /* get back data to caller worskpace */
161     PetscCall(VecGetArrayRead(ctx->sol,(const PetscScalar**)&array_solver));
162     PetscCall(VecGetArray(sol,&array));
163     PetscCall(PetscArraycpy(array,array_solver,n));
164     PetscCall(VecRestoreArray(sol,&array));
165     PetscCall(VecRestoreArrayRead(ctx->sol,(const PetscScalar**)&array_solver));
166   } else {
167     if (ctx->benign_n) {
168       PetscCall(PCBDDCReuseSolversBenignAdapt(ctx,rhs,ctx->rhs,PETSC_FALSE,full));
169       if (transpose) {
170         PetscCall(MatSolveTranspose(ctx->F,ctx->rhs,sol));
171       } else {
172         PetscCall(MatSolve(ctx->F,ctx->rhs,sol));
173       }
174       PetscCall(PCBDDCReuseSolversBenignAdapt(ctx,sol,NULL,PETSC_TRUE,full));
175     } else {
176       if (transpose) {
177         PetscCall(MatSolveTranspose(ctx->F,rhs,sol));
178       } else {
179         PetscCall(MatSolve(ctx->F,rhs,sol));
180       }
181     }
182   }
183   /* restore defaults */
184 #if defined(PETSC_HAVE_MUMPS)
185   PetscCall(MatMumpsSetIcntl(ctx->F,26,-1));
186 #endif
187 #if defined(PETSC_HAVE_MKL_PARDISO)
188   PetscCall(MatMkl_PardisoSetCntl(ctx->F,70,0));
189 #endif
190   PetscFunctionReturn(0);
191 }
192 
193 static PetscErrorCode PCBDDCReuseSolvers_Correction(PC pc, Vec rhs, Vec sol)
194 {
195   PetscFunctionBegin;
196   PetscCall(PCBDDCReuseSolvers_Solve_Private(pc,rhs,sol,PETSC_FALSE,PETSC_TRUE));
197   PetscFunctionReturn(0);
198 }
199 
200 static PetscErrorCode PCBDDCReuseSolvers_CorrectionTranspose(PC pc, Vec rhs, Vec sol)
201 {
202   PetscFunctionBegin;
203   PetscCall(PCBDDCReuseSolvers_Solve_Private(pc,rhs,sol,PETSC_TRUE,PETSC_TRUE));
204   PetscFunctionReturn(0);
205 }
206 
207 static PetscErrorCode PCBDDCReuseSolvers_Interior(PC pc, Vec rhs, Vec sol)
208 {
209   PetscFunctionBegin;
210   PetscCall(PCBDDCReuseSolvers_Solve_Private(pc,rhs,sol,PETSC_FALSE,PETSC_FALSE));
211   PetscFunctionReturn(0);
212 }
213 
214 static PetscErrorCode PCBDDCReuseSolvers_InteriorTranspose(PC pc, Vec rhs, Vec sol)
215 {
216   PetscFunctionBegin;
217   PetscCall(PCBDDCReuseSolvers_Solve_Private(pc,rhs,sol,PETSC_TRUE,PETSC_FALSE));
218   PetscFunctionReturn(0);
219 }
220 
221 static PetscErrorCode PCBDDCReuseSolvers_View(PC pc, PetscViewer viewer)
222 {
223   PCBDDCReuseSolvers ctx;
224   PetscBool          iascii;
225 
226   PetscFunctionBegin;
227   PetscCall(PCShellGetContext(pc,&ctx));
228   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii));
229   if (iascii) PetscCall(PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_INFO));
230   PetscCall(MatView(ctx->F,viewer));
231   if (iascii) PetscCall(PetscViewerPopFormat(viewer));
232   PetscFunctionReturn(0);
233 }
234 
235 static PetscErrorCode PCBDDCReuseSolversReset(PCBDDCReuseSolvers reuse)
236 {
237   PetscInt       i;
238 
239   PetscFunctionBegin;
240   PetscCall(MatDestroy(&reuse->F));
241   PetscCall(VecDestroy(&reuse->sol));
242   PetscCall(VecDestroy(&reuse->rhs));
243   PetscCall(PCDestroy(&reuse->interior_solver));
244   PetscCall(PCDestroy(&reuse->correction_solver));
245   PetscCall(ISDestroy(&reuse->is_R));
246   PetscCall(ISDestroy(&reuse->is_B));
247   PetscCall(VecScatterDestroy(&reuse->correction_scatter_B));
248   PetscCall(VecDestroy(&reuse->sol_B));
249   PetscCall(VecDestroy(&reuse->rhs_B));
250   for (i=0;i<reuse->benign_n;i++) {
251     PetscCall(ISDestroy(&reuse->benign_zerodiag_subs[i]));
252   }
253   PetscCall(PetscFree(reuse->benign_zerodiag_subs));
254   PetscCall(PetscFree(reuse->benign_save_vals));
255   PetscCall(MatDestroy(&reuse->benign_csAIB));
256   PetscCall(MatDestroy(&reuse->benign_AIIm1ones));
257   PetscCall(VecDestroy(&reuse->benign_corr_work));
258   PetscCall(VecDestroy(&reuse->benign_dummy_schur_vec));
259   PetscFunctionReturn(0);
260 }
261 
262 static PetscErrorCode PCBDDCReuseSolvers_Destroy(PC pc)
263 {
264   PCBDDCReuseSolvers ctx;
265 
266   PetscFunctionBegin;
267   PetscCall(PCShellGetContext(pc,&ctx));
268   PetscCall(PCBDDCReuseSolversReset(ctx));
269   PetscCall(PetscFree(ctx));
270   PetscCall(PCShellSetContext(pc,NULL));
271   PetscFunctionReturn(0);
272 }
273 
274 static PetscErrorCode PCBDDCComputeExplicitSchur(Mat M, PetscBool issym, MatReuse reuse, Mat *S)
275 {
276   Mat            B, C, D, Bd, Cd, AinvBd;
277   KSP            ksp;
278   PC             pc;
279   PetscBool      isLU, isILU, isCHOL, Bdense, Cdense;
280   PetscReal      fill = 2.0;
281   PetscInt       n_I;
282   PetscMPIInt    size;
283 
284   PetscFunctionBegin;
285   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)M),&size));
286   PetscCheck(size == 1,PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for parallel matrices");
287   if (reuse == MAT_REUSE_MATRIX) {
288     PetscBool Sdense;
289 
290     PetscCall(PetscObjectTypeCompare((PetscObject)*S, MATSEQDENSE, &Sdense));
291     PetscCheck(Sdense,PetscObjectComm((PetscObject)M),PETSC_ERR_SUP,"S should dense");
292   }
293   PetscCall(MatSchurComplementGetSubMatrices(M, NULL, NULL, &B, &C, &D));
294   PetscCall(MatSchurComplementGetKSP(M, &ksp));
295   PetscCall(KSPGetPC(ksp, &pc));
296   PetscCall(PetscObjectTypeCompare((PetscObject) pc, PCLU, &isLU));
297   PetscCall(PetscObjectTypeCompare((PetscObject) pc, PCILU, &isILU));
298   PetscCall(PetscObjectTypeCompare((PetscObject) pc, PCCHOLESKY, &isCHOL));
299   PetscCall(PetscObjectTypeCompare((PetscObject) B, MATSEQDENSE, &Bdense));
300   PetscCall(PetscObjectTypeCompare((PetscObject) C, MATSEQDENSE, &Cdense));
301   PetscCall(MatGetSize(B,&n_I,NULL));
302   if (n_I) {
303     if (!Bdense) {
304       PetscCall(MatConvert(B, MATSEQDENSE, MAT_INITIAL_MATRIX, &Bd));
305     } else {
306       Bd = B;
307     }
308 
309     if (isLU || isILU || isCHOL) {
310       Mat fact;
311       PetscCall(KSPSetUp(ksp));
312       PetscCall(PCFactorGetMatrix(pc, &fact));
313       PetscCall(MatDuplicate(Bd, MAT_DO_NOT_COPY_VALUES, &AinvBd));
314       PetscCall(MatMatSolve(fact, Bd, AinvBd));
315     } else {
316       PetscBool ex = PETSC_TRUE;
317 
318       if (ex) {
319         Mat Ainvd;
320 
321         PetscCall(PCComputeOperator(pc, MATDENSE, &Ainvd));
322         PetscCall(MatMatMult(Ainvd, Bd, MAT_INITIAL_MATRIX, fill, &AinvBd));
323         PetscCall(MatDestroy(&Ainvd));
324       } else {
325         Vec         sol,rhs;
326         PetscScalar *arrayrhs,*arraysol;
327         PetscInt    i,nrhs,n;
328 
329         PetscCall(MatDuplicate(Bd, MAT_DO_NOT_COPY_VALUES, &AinvBd));
330         PetscCall(MatGetSize(Bd,&n,&nrhs));
331         PetscCall(MatDenseGetArray(Bd,&arrayrhs));
332         PetscCall(MatDenseGetArray(AinvBd,&arraysol));
333         PetscCall(KSPGetSolution(ksp,&sol));
334         PetscCall(KSPGetRhs(ksp,&rhs));
335         for (i=0;i<nrhs;i++) {
336           PetscCall(VecPlaceArray(rhs,arrayrhs+i*n));
337           PetscCall(VecPlaceArray(sol,arraysol+i*n));
338           PetscCall(KSPSolve(ksp,rhs,sol));
339           PetscCall(VecResetArray(rhs));
340           PetscCall(VecResetArray(sol));
341         }
342         PetscCall(MatDenseRestoreArray(Bd,&arrayrhs));
343         PetscCall(MatDenseRestoreArray(AinvBd,&arrayrhs));
344       }
345     }
346     if (!Bdense & !issym) {
347       PetscCall(MatDestroy(&Bd));
348     }
349 
350     if (!issym) {
351       if (!Cdense) {
352         PetscCall(MatConvert(C, MATSEQDENSE, MAT_INITIAL_MATRIX, &Cd));
353       } else {
354         Cd = C;
355       }
356       PetscCall(MatMatMult(Cd, AinvBd, reuse, fill, S));
357       if (!Cdense) {
358         PetscCall(MatDestroy(&Cd));
359       }
360     } else {
361       PetscCall(MatTransposeMatMult(Bd, AinvBd, reuse, fill, S));
362       if (!Bdense) {
363         PetscCall(MatDestroy(&Bd));
364       }
365     }
366     PetscCall(MatDestroy(&AinvBd));
367   }
368 
369   if (D) {
370     Mat       Dd;
371     PetscBool Ddense;
372 
373     PetscCall(PetscObjectTypeCompare((PetscObject)D,MATSEQDENSE,&Ddense));
374     if (!Ddense) {
375       PetscCall(MatConvert(D, MATSEQDENSE, MAT_INITIAL_MATRIX, &Dd));
376     } else {
377       Dd = D;
378     }
379     if (n_I) {
380       PetscCall(MatAYPX(*S,-1.0,Dd,SAME_NONZERO_PATTERN));
381     } else {
382       if (reuse == MAT_INITIAL_MATRIX) {
383         PetscCall(MatDuplicate(Dd,MAT_COPY_VALUES,S));
384       } else {
385         PetscCall(MatCopy(Dd,*S,SAME_NONZERO_PATTERN));
386       }
387     }
388     if (!Ddense) {
389       PetscCall(MatDestroy(&Dd));
390     }
391   } else {
392     PetscCall(MatScale(*S,-1.0));
393   }
394   PetscFunctionReturn(0);
395 }
396 
397 PetscErrorCode PCBDDCSubSchursSetUp(PCBDDCSubSchurs sub_schurs, Mat Ain, Mat Sin, PetscBool exact_schur, PetscInt xadj[], PetscInt adjncy[], PetscInt nlayers, Vec scaling, PetscBool compute_Stilda, PetscBool reuse_solvers, PetscBool benign_trick, PetscInt benign_n, PetscInt benign_p0_lidx[], IS benign_zerodiag_subs[], Mat change, IS change_primal)
398 {
399   Mat                    F,A_II,A_IB,A_BI,A_BB,AE_II;
400   Mat                    S_all;
401   Vec                    gstash,lstash;
402   VecScatter             sstash;
403   IS                     is_I,is_I_layer;
404   IS                     all_subsets,all_subsets_mult,all_subsets_n;
405   PetscScalar            *stasharray,*Bwork;
406   PetscInt               *nnz,*all_local_idx_N;
407   PetscInt               *auxnum1,*auxnum2;
408   PetscInt               i,subset_size,max_subset_size;
409   PetscInt               n_B,extra,local_size,global_size;
410   PetscInt               local_stash_size;
411   PetscBLASInt           B_N,B_ierr,B_lwork,*pivots;
412   MPI_Comm               comm_n;
413   PetscBool              deluxe = PETSC_TRUE;
414   PetscBool              use_potr = PETSC_FALSE, use_sytr = PETSC_FALSE;
415   PetscViewer            matl_dbg_viewer = NULL;
416   PetscBool              flg;
417 
418   PetscFunctionBegin;
419   PetscCall(MatDestroy(&sub_schurs->A));
420   PetscCall(MatDestroy(&sub_schurs->S));
421   if (Ain) {
422     PetscCall(PetscObjectReference((PetscObject)Ain));
423     sub_schurs->A = Ain;
424   }
425 
426   PetscCall(PetscObjectReference((PetscObject)Sin));
427   sub_schurs->S = Sin;
428   if (sub_schurs->schur_explicit) {
429     sub_schurs->schur_explicit = (PetscBool)(!!sub_schurs->A);
430   }
431 
432   /* preliminary checks */
433   PetscCheck(sub_schurs->schur_explicit || !compute_Stilda,PetscObjectComm((PetscObject)sub_schurs->l2gmap),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS and/or MKL_PARDISO");
434 
435   if (benign_trick) sub_schurs->is_posdef = PETSC_FALSE;
436 
437   /* debug (MATLAB) */
438   if (sub_schurs->debug) {
439     PetscMPIInt size,rank;
440     PetscInt    nr,*print_schurs_ranks,print_schurs = PETSC_FALSE;
441 
442     PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sub_schurs->l2gmap),&size));
443     PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sub_schurs->l2gmap),&rank));
444     nr   = size;
445     PetscCall(PetscMalloc1(nr,&print_schurs_ranks));
446     PetscOptionsBegin(PetscObjectComm((PetscObject)sub_schurs->l2gmap),sub_schurs->prefix,"BDDC sub_schurs options","PC");
447     PetscCall(PetscOptionsIntArray("-sub_schurs_debug_ranks","Ranks to debug (all if the option is not used)",NULL,print_schurs_ranks,&nr,&flg));
448     if (!flg) print_schurs = PETSC_TRUE;
449     else {
450       print_schurs = PETSC_FALSE;
451       for (i=0;i<nr;i++) if (print_schurs_ranks[i] == (PetscInt)rank) { print_schurs = PETSC_TRUE; break; }
452     }
453     PetscOptionsEnd();
454     PetscCall(PetscFree(print_schurs_ranks));
455     if (print_schurs) {
456       char filename[256];
457 
458       PetscCall(PetscSNPrintf(filename,sizeof(filename),"sub_schurs_Schur_r%d.m",PetscGlobalRank));
459       PetscCall(PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&matl_dbg_viewer));
460       PetscCall(PetscViewerPushFormat(matl_dbg_viewer,PETSC_VIEWER_ASCII_MATLAB));
461     }
462   }
463 
464   /* restrict work on active processes */
465   if (sub_schurs->restrict_comm) {
466     PetscSubcomm subcomm;
467     PetscMPIInt  color,rank;
468 
469     color = 0;
470     if (!sub_schurs->n_subs) color = 1; /* this can happen if we are in a multilevel case or if the subdomain is disconnected */
471     PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sub_schurs->l2gmap),&rank));
472     PetscCall(PetscSubcommCreate(PetscObjectComm((PetscObject)sub_schurs->l2gmap),&subcomm));
473     PetscCall(PetscSubcommSetNumber(subcomm,2));
474     PetscCall(PetscSubcommSetTypeGeneral(subcomm,color,rank));
475     PetscCall(PetscCommDuplicate(PetscSubcommChild(subcomm),&comm_n,NULL));
476     PetscCall(PetscSubcommDestroy(&subcomm));
477     if (!sub_schurs->n_subs) {
478       PetscCall(PetscCommDestroy(&comm_n));
479       PetscFunctionReturn(0);
480     }
481   } else {
482     PetscCall(PetscCommDuplicate(PetscObjectComm((PetscObject)sub_schurs->l2gmap),&comm_n,NULL));
483   }
484 
485   /* get Schur complement matrices */
486   if (!sub_schurs->schur_explicit) {
487     Mat       tA_IB,tA_BI,tA_BB;
488     PetscBool isseqsbaij;
489     PetscCall(MatSchurComplementGetSubMatrices(sub_schurs->S,&A_II,NULL,&tA_IB,&tA_BI,&tA_BB));
490     PetscCall(PetscObjectTypeCompare((PetscObject)tA_BB,MATSEQSBAIJ,&isseqsbaij));
491     if (isseqsbaij) {
492       PetscCall(MatConvert(tA_BB,MATSEQAIJ,MAT_INITIAL_MATRIX,&A_BB));
493       PetscCall(MatConvert(tA_IB,MATSEQAIJ,MAT_INITIAL_MATRIX,&A_IB));
494       PetscCall(MatConvert(tA_BI,MATSEQAIJ,MAT_INITIAL_MATRIX,&A_BI));
495     } else {
496       PetscCall(PetscObjectReference((PetscObject)tA_BB));
497       A_BB = tA_BB;
498       PetscCall(PetscObjectReference((PetscObject)tA_IB));
499       A_IB = tA_IB;
500       PetscCall(PetscObjectReference((PetscObject)tA_BI));
501       A_BI = tA_BI;
502     }
503   } else {
504     A_II = NULL;
505     A_IB = NULL;
506     A_BI = NULL;
507     A_BB = NULL;
508   }
509   S_all = NULL;
510 
511   /* determine interior problems */
512   PetscCall(ISGetLocalSize(sub_schurs->is_I,&i));
513   if (nlayers >= 0 && i) { /* Interior problems can be different from the original one */
514     PetscBT                touched;
515     const PetscInt*        idx_B;
516     PetscInt               n_I,n_B,n_local_dofs,n_prev_added,j,layer,*local_numbering;
517 
518     PetscCheck(xadj,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot request layering without adjacency");
519     /* get sizes */
520     PetscCall(ISGetLocalSize(sub_schurs->is_I,&n_I));
521     PetscCall(ISGetLocalSize(sub_schurs->is_B,&n_B));
522 
523     PetscCall(PetscMalloc1(n_I+n_B,&local_numbering));
524     PetscCall(PetscBTCreate(n_I+n_B,&touched));
525     PetscCall(PetscBTMemzero(n_I+n_B,touched));
526 
527     /* all boundary dofs must be skipped when adding layers */
528     PetscCall(ISGetIndices(sub_schurs->is_B,&idx_B));
529     for (j=0;j<n_B;j++) {
530       PetscCall(PetscBTSet(touched,idx_B[j]));
531     }
532     PetscCall(PetscArraycpy(local_numbering,idx_B,n_B));
533     PetscCall(ISRestoreIndices(sub_schurs->is_B,&idx_B));
534 
535     /* add prescribed number of layers of dofs */
536     n_local_dofs = n_B;
537     n_prev_added = n_B;
538     for (layer=0;layer<nlayers;layer++) {
539       PetscInt n_added = 0;
540       if (n_local_dofs == n_I+n_B) break;
541       PetscCheck(n_local_dofs <= n_I+n_B,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error querying layer %" PetscInt_FMT ". Out of bound access (%" PetscInt_FMT " > %" PetscInt_FMT ")",layer,n_local_dofs,n_I+n_B);
542       PetscCall(PCBDDCAdjGetNextLayer_Private(local_numbering+n_local_dofs,n_prev_added,touched,xadj,adjncy,&n_added));
543       n_prev_added = n_added;
544       n_local_dofs += n_added;
545       if (!n_added) break;
546     }
547     PetscCall(PetscBTDestroy(&touched));
548 
549     /* IS for I layer dofs in original numbering */
550     PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)sub_schurs->is_I),n_local_dofs-n_B,local_numbering+n_B,PETSC_COPY_VALUES,&is_I_layer));
551     PetscCall(PetscFree(local_numbering));
552     PetscCall(ISSort(is_I_layer));
553     /* IS for I layer dofs in I numbering */
554     if (!sub_schurs->schur_explicit) {
555       ISLocalToGlobalMapping ItoNmap;
556       PetscCall(ISLocalToGlobalMappingCreateIS(sub_schurs->is_I,&ItoNmap));
557       PetscCall(ISGlobalToLocalMappingApplyIS(ItoNmap,IS_GTOLM_DROP,is_I_layer,&is_I));
558       PetscCall(ISLocalToGlobalMappingDestroy(&ItoNmap));
559 
560       /* II block */
561       PetscCall(MatCreateSubMatrix(A_II,is_I,is_I,MAT_INITIAL_MATRIX,&AE_II));
562     }
563   } else {
564     PetscInt n_I;
565 
566     /* IS for I dofs in original numbering */
567     PetscCall(PetscObjectReference((PetscObject)sub_schurs->is_I));
568     is_I_layer = sub_schurs->is_I;
569 
570     /* IS for I dofs in I numbering (strided 1) */
571     if (!sub_schurs->schur_explicit) {
572       PetscCall(ISGetSize(sub_schurs->is_I,&n_I));
573       PetscCall(ISCreateStride(PetscObjectComm((PetscObject)sub_schurs->is_I),n_I,0,1,&is_I));
574 
575       /* II block is the same */
576       PetscCall(PetscObjectReference((PetscObject)A_II));
577       AE_II = A_II;
578     }
579   }
580 
581   /* Get info on subset sizes and sum of all subsets sizes */
582   max_subset_size = 0;
583   local_size = 0;
584   for (i=0;i<sub_schurs->n_subs;i++) {
585     PetscCall(ISGetLocalSize(sub_schurs->is_subs[i],&subset_size));
586     max_subset_size = PetscMax(subset_size,max_subset_size);
587     local_size += subset_size;
588   }
589 
590   /* Work arrays for local indices */
591   extra = 0;
592   PetscCall(ISGetLocalSize(sub_schurs->is_B,&n_B));
593   if (sub_schurs->schur_explicit && is_I_layer) {
594     PetscCall(ISGetLocalSize(is_I_layer,&extra));
595   }
596   PetscCall(PetscMalloc1(n_B+extra,&all_local_idx_N));
597   if (extra) {
598     const PetscInt *idxs;
599     PetscCall(ISGetIndices(is_I_layer,&idxs));
600     PetscCall(PetscArraycpy(all_local_idx_N,idxs,extra));
601     PetscCall(ISRestoreIndices(is_I_layer,&idxs));
602   }
603   PetscCall(PetscMalloc1(sub_schurs->n_subs,&auxnum1));
604   PetscCall(PetscMalloc1(sub_schurs->n_subs,&auxnum2));
605 
606   /* Get local indices in local numbering */
607   local_size = 0;
608   local_stash_size = 0;
609   for (i=0;i<sub_schurs->n_subs;i++) {
610     const PetscInt *idxs;
611 
612     PetscCall(ISGetLocalSize(sub_schurs->is_subs[i],&subset_size));
613     PetscCall(ISGetIndices(sub_schurs->is_subs[i],&idxs));
614     /* start (smallest in global ordering) and multiplicity */
615     auxnum1[i] = idxs[0];
616     auxnum2[i] = subset_size*subset_size;
617     /* subset indices in local numbering */
618     PetscCall(PetscArraycpy(all_local_idx_N+local_size+extra,idxs,subset_size));
619     PetscCall(ISRestoreIndices(sub_schurs->is_subs[i],&idxs));
620     local_size += subset_size;
621     local_stash_size += subset_size*subset_size;
622   }
623 
624   /* allocate extra workspace needed only for GETRI or SYTRF */
625   use_potr = use_sytr = PETSC_FALSE;
626   if (benign_trick || (sub_schurs->is_hermitian && sub_schurs->is_posdef)) {
627     use_potr = PETSC_TRUE;
628   } else if (sub_schurs->is_symmetric) {
629     use_sytr = PETSC_TRUE;
630   }
631   if (local_size && !use_potr) {
632     PetscScalar  lwork,dummyscalar = 0.;
633     PetscBLASInt dummyint = 0;
634 
635     B_lwork = -1;
636     PetscCall(PetscBLASIntCast(local_size,&B_N));
637     PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
638     if (use_sytr) {
639       PetscCallBLAS("LAPACKsytrf",LAPACKsytrf_("L",&B_N,&dummyscalar,&B_N,&dummyint,&lwork,&B_lwork,&B_ierr));
640       PetscCheck(!B_ierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYTRF Lapack routine %d",(int)B_ierr);
641     } else {
642       PetscCallBLAS("LAPACKgetri",LAPACKgetri_(&B_N,&dummyscalar,&B_N,&dummyint,&lwork,&B_lwork,&B_ierr));
643       PetscCheck(!B_ierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GETRI Lapack routine %d",(int)B_ierr);
644     }
645     PetscCall(PetscFPTrapPop());
646     PetscCall(PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork));
647     PetscCall(PetscMalloc2(B_lwork,&Bwork,B_N,&pivots));
648   } else {
649     Bwork = NULL;
650     pivots = NULL;
651   }
652 
653   /* prepare data for summing up properly schurs on subsets */
654   PetscCall(ISCreateGeneral(comm_n,sub_schurs->n_subs,auxnum1,PETSC_OWN_POINTER,&all_subsets_n));
655   PetscCall(ISLocalToGlobalMappingApplyIS(sub_schurs->l2gmap,all_subsets_n,&all_subsets));
656   PetscCall(ISDestroy(&all_subsets_n));
657   PetscCall(ISCreateGeneral(comm_n,sub_schurs->n_subs,auxnum2,PETSC_OWN_POINTER,&all_subsets_mult));
658   PetscCall(ISRenumber(all_subsets,all_subsets_mult,&global_size,&all_subsets_n));
659   PetscCall(ISDestroy(&all_subsets));
660   PetscCall(ISDestroy(&all_subsets_mult));
661   PetscCall(ISGetLocalSize(all_subsets_n,&i));
662   PetscCheck(i == local_stash_size,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid size of new subset! %" PetscInt_FMT " != %" PetscInt_FMT,i,local_stash_size);
663   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,local_stash_size,NULL,&lstash));
664   PetscCall(VecCreateMPI(comm_n,PETSC_DECIDE,global_size,&gstash));
665   PetscCall(VecScatterCreate(lstash,NULL,gstash,all_subsets_n,&sstash));
666   PetscCall(ISDestroy(&all_subsets_n));
667 
668   /* subset indices in local boundary numbering */
669   if (!sub_schurs->is_Ej_all) {
670     PetscInt *all_local_idx_B;
671 
672     PetscCall(PetscMalloc1(local_size,&all_local_idx_B));
673     PetscCall(ISGlobalToLocalMappingApply(sub_schurs->BtoNmap,IS_GTOLM_DROP,local_size,all_local_idx_N+extra,&subset_size,all_local_idx_B));
674     PetscCheck(subset_size == local_size,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in sub_schurs serial (BtoNmap)! %" PetscInt_FMT " != %" PetscInt_FMT,subset_size,local_size);
675     PetscCall(ISCreateGeneral(PETSC_COMM_SELF,local_size,all_local_idx_B,PETSC_OWN_POINTER,&sub_schurs->is_Ej_all));
676   }
677 
678   if (change) {
679     ISLocalToGlobalMapping BtoS;
680     IS                     change_primal_B;
681     IS                     change_primal_all;
682 
683     PetscCheck(!sub_schurs->change_primal_sub,PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen");
684     PetscCheck(!sub_schurs->change,PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen");
685     PetscCall(PetscMalloc1(sub_schurs->n_subs,&sub_schurs->change_primal_sub));
686     for (i=0;i<sub_schurs->n_subs;i++) {
687       ISLocalToGlobalMapping NtoS;
688       PetscCall(ISLocalToGlobalMappingCreateIS(sub_schurs->is_subs[i],&NtoS));
689       PetscCall(ISGlobalToLocalMappingApplyIS(NtoS,IS_GTOLM_DROP,change_primal,&sub_schurs->change_primal_sub[i]));
690       PetscCall(ISLocalToGlobalMappingDestroy(&NtoS));
691     }
692     PetscCall(ISGlobalToLocalMappingApplyIS(sub_schurs->BtoNmap,IS_GTOLM_DROP,change_primal,&change_primal_B));
693     PetscCall(ISLocalToGlobalMappingCreateIS(sub_schurs->is_Ej_all,&BtoS));
694     PetscCall(ISGlobalToLocalMappingApplyIS(BtoS,IS_GTOLM_DROP,change_primal_B,&change_primal_all));
695     PetscCall(ISLocalToGlobalMappingDestroy(&BtoS));
696     PetscCall(ISDestroy(&change_primal_B));
697     PetscCall(PetscMalloc1(sub_schurs->n_subs,&sub_schurs->change));
698     for (i=0;i<sub_schurs->n_subs;i++) {
699       Mat change_sub;
700 
701       PetscCall(ISGetLocalSize(sub_schurs->is_subs[i],&subset_size));
702       PetscCall(KSPCreate(PETSC_COMM_SELF,&sub_schurs->change[i]));
703       PetscCall(KSPSetType(sub_schurs->change[i],KSPPREONLY));
704       if (!sub_schurs->change_with_qr) {
705         PetscCall(MatCreateSubMatrix(change,sub_schurs->is_subs[i],sub_schurs->is_subs[i],MAT_INITIAL_MATRIX,&change_sub));
706       } else {
707         Mat change_subt;
708         PetscCall(MatCreateSubMatrix(change,sub_schurs->is_subs[i],sub_schurs->is_subs[i],MAT_INITIAL_MATRIX,&change_subt));
709         PetscCall(MatConvert(change_subt,MATSEQDENSE,MAT_INITIAL_MATRIX,&change_sub));
710         PetscCall(MatDestroy(&change_subt));
711       }
712       PetscCall(KSPSetOperators(sub_schurs->change[i],change_sub,change_sub));
713       PetscCall(MatDestroy(&change_sub));
714       PetscCall(KSPSetOptionsPrefix(sub_schurs->change[i],sub_schurs->prefix));
715       PetscCall(KSPAppendOptionsPrefix(sub_schurs->change[i],"sub_schurs_change_"));
716     }
717     PetscCall(ISDestroy(&change_primal_all));
718   }
719 
720   /* Local matrix of all local Schur on subsets (transposed) */
721   if (!sub_schurs->S_Ej_all) {
722     Mat         T;
723     PetscScalar *v;
724     PetscInt    *ii,*jj;
725     PetscInt    cum,i,j,k;
726 
727     /* MatSeqAIJSetPreallocation + MatSetValues is slow for these kind of matrices (may have large blocks)
728        Allocate properly a representative matrix and duplicate */
729     PetscCall(PetscMalloc3(local_size+1,&ii,local_stash_size,&jj,local_stash_size,&v));
730     ii[0] = 0;
731     cum   = 0;
732     for (i=0;i<sub_schurs->n_subs;i++) {
733       PetscCall(ISGetLocalSize(sub_schurs->is_subs[i],&subset_size));
734       for (j=0;j<subset_size;j++) {
735         const PetscInt row = cum+j;
736         PetscInt col = cum;
737 
738         ii[row+1] = ii[row] + subset_size;
739         for (k=ii[row];k<ii[row+1];k++) {
740           jj[k] = col;
741           col++;
742         }
743       }
744       cum += subset_size;
745     }
746     PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,local_size,local_size,ii,jj,v,&T));
747     PetscCall(MatDuplicate(T,MAT_DO_NOT_COPY_VALUES,&sub_schurs->S_Ej_all));
748     PetscCall(MatDestroy(&T));
749     PetscCall(PetscFree3(ii,jj,v));
750   }
751   /* matrices for deluxe scaling and adaptive selection */
752   if (compute_Stilda) {
753     if (!sub_schurs->sum_S_Ej_tilda_all) {
754       PetscCall(MatDuplicate(sub_schurs->S_Ej_all,MAT_DO_NOT_COPY_VALUES,&sub_schurs->sum_S_Ej_tilda_all));
755     }
756     if (!sub_schurs->sum_S_Ej_inv_all && deluxe) {
757       PetscCall(MatDuplicate(sub_schurs->S_Ej_all,MAT_DO_NOT_COPY_VALUES,&sub_schurs->sum_S_Ej_inv_all));
758     }
759   }
760 
761   /* Compute Schur complements explicitly */
762   F = NULL;
763   if (!sub_schurs->schur_explicit) {
764     /* this code branch is used when MatFactor with Schur complement support is not present or when explicitly requested;
765        it is not efficient, unless the economic version of the scaling is used */
766     Mat         S_Ej_expl;
767     PetscScalar *work;
768     PetscInt    j,*dummy_idx;
769     PetscBool   Sdense;
770 
771     PetscCall(PetscMalloc2(max_subset_size,&dummy_idx,max_subset_size*max_subset_size,&work));
772     local_size = 0;
773     for (i=0;i<sub_schurs->n_subs;i++) {
774       IS  is_subset_B;
775       Mat AE_EE,AE_IE,AE_EI,S_Ej;
776 
777       /* subsets in original and boundary numbering */
778       PetscCall(ISGlobalToLocalMappingApplyIS(sub_schurs->BtoNmap,IS_GTOLM_DROP,sub_schurs->is_subs[i],&is_subset_B));
779       /* EE block */
780       PetscCall(MatCreateSubMatrix(A_BB,is_subset_B,is_subset_B,MAT_INITIAL_MATRIX,&AE_EE));
781       /* IE block */
782       PetscCall(MatCreateSubMatrix(A_IB,is_I,is_subset_B,MAT_INITIAL_MATRIX,&AE_IE));
783       /* EI block */
784       if (sub_schurs->is_symmetric) {
785         PetscCall(MatCreateTranspose(AE_IE,&AE_EI));
786       } else if (sub_schurs->is_hermitian) {
787         PetscCall(MatCreateHermitianTranspose(AE_IE,&AE_EI));
788       } else {
789         PetscCall(MatCreateSubMatrix(A_BI,is_subset_B,is_I,MAT_INITIAL_MATRIX,&AE_EI));
790       }
791       PetscCall(ISDestroy(&is_subset_B));
792       PetscCall(MatCreateSchurComplement(AE_II,AE_II,AE_IE,AE_EI,AE_EE,&S_Ej));
793       PetscCall(MatDestroy(&AE_EE));
794       PetscCall(MatDestroy(&AE_IE));
795       PetscCall(MatDestroy(&AE_EI));
796       if (AE_II == A_II) { /* we can reuse the same ksp */
797         KSP ksp;
798         PetscCall(MatSchurComplementGetKSP(sub_schurs->S,&ksp));
799         PetscCall(MatSchurComplementSetKSP(S_Ej,ksp));
800       } else { /* build new ksp object which inherits ksp and pc types from the original one */
801         KSP       origksp,schurksp;
802         PC        origpc,schurpc;
803         KSPType   ksp_type;
804         PetscInt  n_internal;
805         PetscBool ispcnone;
806 
807         PetscCall(MatSchurComplementGetKSP(sub_schurs->S,&origksp));
808         PetscCall(MatSchurComplementGetKSP(S_Ej,&schurksp));
809         PetscCall(KSPGetType(origksp,&ksp_type));
810         PetscCall(KSPSetType(schurksp,ksp_type));
811         PetscCall(KSPGetPC(schurksp,&schurpc));
812         PetscCall(KSPGetPC(origksp,&origpc));
813         PetscCall(PetscObjectTypeCompare((PetscObject)origpc,PCNONE,&ispcnone));
814         if (!ispcnone) {
815           PCType pc_type;
816           PetscCall(PCGetType(origpc,&pc_type));
817           PetscCall(PCSetType(schurpc,pc_type));
818         } else {
819           PetscCall(PCSetType(schurpc,PCLU));
820         }
821         PetscCall(ISGetSize(is_I,&n_internal));
822         if (!n_internal) { /* UMFPACK gives error with 0 sized problems */
823           MatSolverType solver = NULL;
824           PetscCall(PCFactorGetMatSolverType(origpc,(MatSolverType*)&solver));
825           if (solver) PetscCall(PCFactorSetMatSolverType(schurpc,solver));
826         }
827         PetscCall(KSPSetUp(schurksp));
828       }
829       PetscCall(ISGetLocalSize(sub_schurs->is_subs[i],&subset_size));
830       PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,work,&S_Ej_expl));
831       PetscCall(PCBDDCComputeExplicitSchur(S_Ej,sub_schurs->is_symmetric,MAT_REUSE_MATRIX,&S_Ej_expl));
832       PetscCall(PetscObjectTypeCompare((PetscObject)S_Ej_expl,MATSEQDENSE,&Sdense));
833       if (Sdense) {
834         for (j=0;j<subset_size;j++) {
835           dummy_idx[j]=local_size+j;
836         }
837         PetscCall(MatSetValues(sub_schurs->S_Ej_all,subset_size,dummy_idx,subset_size,dummy_idx,work,INSERT_VALUES));
838       } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented for sparse matrices");
839       PetscCall(MatDestroy(&S_Ej));
840       PetscCall(MatDestroy(&S_Ej_expl));
841       local_size += subset_size;
842     }
843     PetscCall(PetscFree2(dummy_idx,work));
844     /* free */
845     PetscCall(ISDestroy(&is_I));
846     PetscCall(MatDestroy(&AE_II));
847     PetscCall(PetscFree(all_local_idx_N));
848   } else {
849     Mat               A,cs_AIB_mat = NULL,benign_AIIm1_ones_mat = NULL;
850     Mat               *gdswA;
851     Vec               Dall = NULL;
852     IS                is_A_all,*is_p_r = NULL;
853     MatType           Stype;
854     PetscScalar       *work,*S_data,*schur_factor,infty = PETSC_MAX_REAL;
855     PetscScalar       *SEj_arr = NULL,*SEjinv_arr = NULL;
856     const PetscScalar *rS_data;
857     PetscInt          n,n_I,size_schur,size_active_schur,cum,cum2;
858     PetscBool         economic,solver_S,S_lower_triangular = PETSC_FALSE;
859     PetscBool         schur_has_vertices,factor_workaround;
860     PetscBool         use_cholesky;
861 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
862     PetscBool         oldpin;
863 #endif
864 
865     /* get sizes */
866     n_I = 0;
867     if (is_I_layer) {
868       PetscCall(ISGetLocalSize(is_I_layer,&n_I));
869     }
870     economic = PETSC_FALSE;
871     PetscCall(ISGetLocalSize(sub_schurs->is_I,&cum));
872     if (cum != n_I) economic = PETSC_TRUE;
873     PetscCall(MatGetLocalSize(sub_schurs->A,&n,NULL));
874     size_active_schur = local_size;
875 
876     /* import scaling vector (wrong formulation if we have 3D edges) */
877     if (scaling && compute_Stilda) {
878       const PetscScalar *array;
879       PetscScalar       *array2;
880       const PetscInt    *idxs;
881       PetscInt          i;
882 
883       PetscCall(ISGetIndices(sub_schurs->is_Ej_all,&idxs));
884       PetscCall(VecCreateSeq(PETSC_COMM_SELF,size_active_schur,&Dall));
885       PetscCall(VecGetArrayRead(scaling,&array));
886       PetscCall(VecGetArray(Dall,&array2));
887       for (i=0;i<size_active_schur;i++) array2[i] = array[idxs[i]];
888       PetscCall(VecRestoreArray(Dall,&array2));
889       PetscCall(VecRestoreArrayRead(scaling,&array));
890       PetscCall(ISRestoreIndices(sub_schurs->is_Ej_all,&idxs));
891       deluxe = PETSC_FALSE;
892     }
893 
894     /* size active schurs does not count any dirichlet or vertex dof on the interface */
895     factor_workaround = PETSC_FALSE;
896     schur_has_vertices = PETSC_FALSE;
897     cum = n_I+size_active_schur;
898     if (sub_schurs->is_dir) {
899       const PetscInt* idxs;
900       PetscInt        n_dir;
901 
902       PetscCall(ISGetLocalSize(sub_schurs->is_dir,&n_dir));
903       PetscCall(ISGetIndices(sub_schurs->is_dir,&idxs));
904       PetscCall(PetscArraycpy(all_local_idx_N+cum,idxs,n_dir));
905       PetscCall(ISRestoreIndices(sub_schurs->is_dir,&idxs));
906       cum += n_dir;
907       if (!sub_schurs->gdsw) factor_workaround = PETSC_TRUE;
908     }
909     /* include the primal vertices in the Schur complement */
910     if (exact_schur && sub_schurs->is_vertices && (compute_Stilda || benign_n)) {
911       PetscInt n_v;
912 
913       PetscCall(ISGetLocalSize(sub_schurs->is_vertices,&n_v));
914       if (n_v) {
915         const PetscInt* idxs;
916 
917         PetscCall(ISGetIndices(sub_schurs->is_vertices,&idxs));
918         PetscCall(PetscArraycpy(all_local_idx_N+cum,idxs,n_v));
919         PetscCall(ISRestoreIndices(sub_schurs->is_vertices,&idxs));
920         cum += n_v;
921         if (!sub_schurs->gdsw) factor_workaround = PETSC_TRUE;
922         schur_has_vertices = PETSC_TRUE;
923       }
924     }
925     size_schur = cum - n_I;
926     PetscCall(ISCreateGeneral(PETSC_COMM_SELF,cum,all_local_idx_N,PETSC_OWN_POINTER,&is_A_all));
927 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
928     oldpin = sub_schurs->A->boundtocpu;
929     PetscCall(MatBindToCPU(sub_schurs->A,PETSC_TRUE));
930 #endif
931     if (cum == n) {
932       PetscCall(ISSetPermutation(is_A_all));
933       PetscCall(MatPermute(sub_schurs->A,is_A_all,is_A_all,&A));
934     } else {
935       PetscCall(MatCreateSubMatrix(sub_schurs->A,is_A_all,is_A_all,MAT_INITIAL_MATRIX,&A));
936     }
937 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
938     PetscCall(MatBindToCPU(sub_schurs->A,oldpin));
939 #endif
940     PetscCall(MatSetOptionsPrefixFactor(A,sub_schurs->prefix));
941     PetscCall(MatAppendOptionsPrefixFactor(A,"sub_schurs_"));
942 
943     /* if we actually change the basis for the pressures, LDL^T factors will use a lot of memory
944        this is a workaround */
945     if (benign_n) {
946       Vec                    v,benign_AIIm1_ones;
947       ISLocalToGlobalMapping N_to_reor;
948       IS                     is_p0,is_p0_p;
949       PetscScalar            *cs_AIB,*AIIm1_data;
950       PetscInt               sizeA;
951 
952       PetscCall(ISLocalToGlobalMappingCreateIS(is_A_all,&N_to_reor));
953       PetscCall(ISCreateGeneral(PETSC_COMM_SELF,benign_n,benign_p0_lidx,PETSC_COPY_VALUES,&is_p0));
954       PetscCall(ISGlobalToLocalMappingApplyIS(N_to_reor,IS_GTOLM_DROP,is_p0,&is_p0_p));
955       PetscCall(ISDestroy(&is_p0));
956       PetscCall(MatCreateVecs(A,&v,&benign_AIIm1_ones));
957       PetscCall(VecGetSize(v,&sizeA));
958       PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,sizeA,benign_n,NULL,&benign_AIIm1_ones_mat));
959       PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,size_schur,benign_n,NULL,&cs_AIB_mat));
960       PetscCall(MatDenseGetArray(cs_AIB_mat,&cs_AIB));
961       PetscCall(MatDenseGetArray(benign_AIIm1_ones_mat,&AIIm1_data));
962       PetscCall(PetscMalloc1(benign_n,&is_p_r));
963       /* compute colsum of A_IB restricted to pressures */
964       for (i=0;i<benign_n;i++) {
965         const PetscScalar *array;
966         const PetscInt    *idxs;
967         PetscInt          j,nz;
968 
969         PetscCall(ISGlobalToLocalMappingApplyIS(N_to_reor,IS_GTOLM_DROP,benign_zerodiag_subs[i],&is_p_r[i]));
970         PetscCall(ISGetLocalSize(is_p_r[i],&nz));
971         PetscCall(ISGetIndices(is_p_r[i],&idxs));
972         for (j=0;j<nz;j++) AIIm1_data[idxs[j]+sizeA*i] = 1.;
973         PetscCall(ISRestoreIndices(is_p_r[i],&idxs));
974         PetscCall(VecPlaceArray(benign_AIIm1_ones,AIIm1_data+sizeA*i));
975         PetscCall(MatMult(A,benign_AIIm1_ones,v));
976         PetscCall(VecResetArray(benign_AIIm1_ones));
977         PetscCall(VecGetArrayRead(v,&array));
978         for (j=0;j<size_schur;j++) {
979 #if defined(PETSC_USE_COMPLEX)
980           cs_AIB[i*size_schur + j] = (PetscRealPart(array[j+n_I])/nz + PETSC_i*(PetscImaginaryPart(array[j+n_I])/nz));
981 #else
982           cs_AIB[i*size_schur + j] = array[j+n_I]/nz;
983 #endif
984         }
985         PetscCall(VecRestoreArrayRead(v,&array));
986       }
987       PetscCall(MatDenseRestoreArray(cs_AIB_mat,&cs_AIB));
988       PetscCall(MatDenseRestoreArray(benign_AIIm1_ones_mat,&AIIm1_data));
989       PetscCall(VecDestroy(&v));
990       PetscCall(VecDestroy(&benign_AIIm1_ones));
991       PetscCall(MatSetOption(A,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE));
992       PetscCall(MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE));
993       PetscCall(MatSetOption(A,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE));
994       PetscCall(MatZeroRowsColumnsIS(A,is_p0_p,1.0,NULL,NULL));
995       PetscCall(ISDestroy(&is_p0_p));
996       PetscCall(ISLocalToGlobalMappingDestroy(&N_to_reor));
997     }
998     PetscCall(MatSetOption(A,MAT_SYMMETRIC,sub_schurs->is_symmetric));
999     PetscCall(MatSetOption(A,MAT_HERMITIAN,sub_schurs->is_hermitian));
1000     PetscCall(MatSetOption(A,MAT_SPD,sub_schurs->is_posdef));
1001 
1002     /* for complexes, symmetric and hermitian at the same time implies null imaginary part */
1003     use_cholesky = (PetscBool)((use_potr || use_sytr) && sub_schurs->is_hermitian && sub_schurs->is_symmetric);
1004 
1005     /* when using the benign subspace trick, the local Schur complements are SPD */
1006     /* MKL_PARDISO does not handle well the computation of a Schur complement from a symmetric indefinite factorization
1007        Use LU and adapt pivoting perturbation (still, solution is not as accurate as with using MUMPS) */
1008     if (benign_trick) {
1009       sub_schurs->is_posdef = PETSC_TRUE;
1010       PetscCall(PetscStrcmp(sub_schurs->mat_solver_type,MATSOLVERMKL_PARDISO,&flg));
1011       if (flg) use_cholesky = PETSC_FALSE;
1012     }
1013 
1014     if (n_I) {
1015       IS        is_schur;
1016       char      stype[64];
1017       PetscBool gpu = PETSC_FALSE;
1018 
1019       if (use_cholesky) {
1020         PetscCall(MatGetFactor(A,sub_schurs->mat_solver_type,MAT_FACTOR_CHOLESKY,&F));
1021       } else {
1022         PetscCall(MatGetFactor(A,sub_schurs->mat_solver_type,MAT_FACTOR_LU,&F));
1023       }
1024       PetscCall(MatSetErrorIfFailure(A,PETSC_TRUE));
1025 #if defined(PETSC_HAVE_MKL_PARDISO)
1026       if (benign_trick) PetscCall(MatMkl_PardisoSetCntl(F,10,10));
1027 #endif
1028       /* subsets ordered last */
1029       PetscCall(ISCreateStride(PETSC_COMM_SELF,size_schur,n_I,1,&is_schur));
1030       PetscCall(MatFactorSetSchurIS(F,is_schur));
1031       PetscCall(ISDestroy(&is_schur));
1032 
1033       /* factorization step */
1034       if (use_cholesky) {
1035         PetscCall(MatCholeskyFactorSymbolic(F,A,NULL,NULL));
1036 #if defined(PETSC_HAVE_MUMPS) /* be sure that icntl 19 is not set by command line */
1037         PetscCall(MatMumpsSetIcntl(F,19,2));
1038 #endif
1039         PetscCall(MatCholeskyFactorNumeric(F,A,NULL));
1040         S_lower_triangular = PETSC_TRUE;
1041       } else {
1042         PetscCall(MatLUFactorSymbolic(F,A,NULL,NULL,NULL));
1043 #if defined(PETSC_HAVE_MUMPS) /* be sure that icntl 19 is not set by command line */
1044         PetscCall(MatMumpsSetIcntl(F,19,3));
1045 #endif
1046         PetscCall(MatLUFactorNumeric(F,A,NULL));
1047       }
1048       PetscCall(MatViewFromOptions(F,(PetscObject)A,"-mat_factor_view"));
1049 
1050       if (matl_dbg_viewer) {
1051         Mat S;
1052         IS  is;
1053 
1054         PetscCall(PetscObjectSetName((PetscObject)A,"A"));
1055         PetscCall(MatView(A,matl_dbg_viewer));
1056         PetscCall(MatFactorCreateSchurComplement(F,&S,NULL));
1057         PetscCall(PetscObjectSetName((PetscObject)S,"S"));
1058         PetscCall(MatView(S,matl_dbg_viewer));
1059         PetscCall(MatDestroy(&S));
1060         PetscCall(ISCreateStride(PETSC_COMM_SELF,n_I,0,1,&is));
1061         PetscCall(PetscObjectSetName((PetscObject)is,"I"));
1062         PetscCall(ISView(is,matl_dbg_viewer));
1063         PetscCall(ISDestroy(&is));
1064         PetscCall(ISCreateStride(PETSC_COMM_SELF,size_schur,n_I,1,&is));
1065         PetscCall(PetscObjectSetName((PetscObject)is,"B"));
1066         PetscCall(ISView(is,matl_dbg_viewer));
1067         PetscCall(ISDestroy(&is));
1068         PetscCall(PetscObjectSetName((PetscObject)is_A_all,"IA"));
1069         PetscCall(ISView(is_A_all,matl_dbg_viewer));
1070         for (i=0,cum=0;i<sub_schurs->n_subs;i++) {
1071           IS   is;
1072           char name[16];
1073 
1074           PetscCall(PetscSNPrintf(name,sizeof(name),"IE%" PetscInt_FMT,i));
1075           PetscCall(ISGetLocalSize(sub_schurs->is_subs[i],&subset_size));
1076           PetscCall(ISCreateStride(PETSC_COMM_SELF,subset_size,cum,1,&is));
1077           PetscCall(PetscObjectSetName((PetscObject)is,name));
1078           PetscCall(ISView(is,matl_dbg_viewer));
1079           PetscCall(ISDestroy(&is));
1080           if (sub_schurs->change) {
1081             Mat T;
1082 
1083             PetscCall(PetscSNPrintf(name,sizeof(name),"TE%" PetscInt_FMT,i));
1084             PetscCall(KSPGetOperators(sub_schurs->change[i],&T,NULL));
1085             PetscCall(PetscObjectSetName((PetscObject)T,name));
1086             PetscCall(MatView(T,matl_dbg_viewer));
1087             PetscCall(PetscSNPrintf(name,sizeof(name),"ITE%" PetscInt_FMT,i));
1088             PetscCall(PetscObjectSetName((PetscObject)sub_schurs->change_primal_sub[i],name));
1089             PetscCall(ISView(sub_schurs->change_primal_sub[i],matl_dbg_viewer));
1090           }
1091           cum += subset_size;
1092         }
1093         PetscCall(PetscViewerFlush(matl_dbg_viewer));
1094       }
1095 
1096       /* get explicit Schur Complement computed during numeric factorization */
1097       PetscCall(MatFactorGetSchurComplement(F,&S_all,NULL));
1098       PetscCall(PetscStrncpy(stype,MATSEQDENSE,sizeof(stype)));
1099 #if defined(PETSC_HAVE_CUDA)
1100       PetscCall(PetscObjectTypeCompareAny((PetscObject)A,&gpu,MATSEQAIJVIENNACL,MATSEQAIJCUSPARSE,""));
1101 #endif
1102       if (gpu) PetscCall(PetscStrncpy(stype,MATSEQDENSECUDA,sizeof(stype)));
1103       PetscCall(PetscOptionsGetString(NULL,sub_schurs->prefix,"-sub_schurs_schur_mat_type",stype,sizeof(stype),NULL));
1104       PetscCall(MatConvert(S_all,stype,MAT_INPLACE_MATRIX,&S_all));
1105       PetscCall(MatSetOption(S_all,MAT_SPD,sub_schurs->is_posdef));
1106       PetscCall(MatSetOption(S_all,MAT_HERMITIAN,sub_schurs->is_hermitian));
1107       PetscCall(MatGetType(S_all,&Stype));
1108 
1109       /* we can reuse the solvers if we are not using the economic version */
1110       reuse_solvers = (PetscBool)(reuse_solvers && !economic);
1111       if (!sub_schurs->gdsw) {
1112         factor_workaround = (PetscBool)(reuse_solvers && factor_workaround);
1113         if (!sub_schurs->is_posdef && factor_workaround && compute_Stilda && size_active_schur)
1114           reuse_solvers = factor_workaround = PETSC_FALSE;
1115       }
1116       solver_S = PETSC_TRUE;
1117 
1118       /* update the Schur complement with the change of basis on the pressures */
1119       if (benign_n) {
1120         const PetscScalar *cs_AIB;
1121         PetscScalar       *S_data,*AIIm1_data;
1122         Mat               S2 = NULL,S3 = NULL; /* dbg */
1123         PetscScalar       *S2_data,*S3_data; /* dbg */
1124         Vec               v,benign_AIIm1_ones;
1125         PetscInt          sizeA;
1126 
1127         PetscCall(MatDenseGetArray(S_all,&S_data));
1128         PetscCall(MatCreateVecs(A,&v,&benign_AIIm1_ones));
1129         PetscCall(VecGetSize(v,&sizeA));
1130 #if defined(PETSC_HAVE_MUMPS)
1131         PetscCall(MatMumpsSetIcntl(F,26,0));
1132 #endif
1133 #if defined(PETSC_HAVE_MKL_PARDISO)
1134         PetscCall(MatMkl_PardisoSetCntl(F,70,1));
1135 #endif
1136         PetscCall(MatDenseGetArrayRead(cs_AIB_mat,&cs_AIB));
1137         PetscCall(MatDenseGetArray(benign_AIIm1_ones_mat,&AIIm1_data));
1138         if (matl_dbg_viewer) {
1139           PetscCall(MatDuplicate(S_all,MAT_DO_NOT_COPY_VALUES,&S2));
1140           PetscCall(MatDuplicate(S_all,MAT_DO_NOT_COPY_VALUES,&S3));
1141           PetscCall(MatDenseGetArray(S2,&S2_data));
1142           PetscCall(MatDenseGetArray(S3,&S3_data));
1143         }
1144         for (i=0;i<benign_n;i++) {
1145           PetscScalar    *array,sum = 0.,one = 1.,*sums;
1146           const PetscInt *idxs;
1147           PetscInt       k,j,nz;
1148           PetscBLASInt   B_k,B_n;
1149 
1150           PetscCall(PetscCalloc1(benign_n,&sums));
1151           PetscCall(VecPlaceArray(benign_AIIm1_ones,AIIm1_data+sizeA*i));
1152           PetscCall(VecCopy(benign_AIIm1_ones,v));
1153           PetscCall(MatSolve(F,v,benign_AIIm1_ones));
1154           PetscCall(MatMult(A,benign_AIIm1_ones,v));
1155           PetscCall(VecResetArray(benign_AIIm1_ones));
1156           /* p0 dofs (eliminated) are excluded from the sums */
1157           for (k=0;k<benign_n;k++) {
1158             PetscCall(ISGetLocalSize(is_p_r[k],&nz));
1159             PetscCall(ISGetIndices(is_p_r[k],&idxs));
1160             for (j=0;j<nz-1;j++) sums[k] -= AIIm1_data[idxs[j]+sizeA*i];
1161             PetscCall(ISRestoreIndices(is_p_r[k],&idxs));
1162           }
1163           PetscCall(VecGetArrayRead(v,(const PetscScalar**)&array));
1164           if (matl_dbg_viewer) {
1165             Vec  vv;
1166             char name[16];
1167 
1168             PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,size_schur,array+n_I,&vv));
1169             PetscCall(PetscSNPrintf(name,sizeof(name),"Pvs%" PetscInt_FMT,i));
1170             PetscCall(PetscObjectSetName((PetscObject)vv,name));
1171             PetscCall(VecView(vv,matl_dbg_viewer));
1172           }
1173           /* perform sparse rank updates on symmetric Schur (TODO: move outside of the loop?) */
1174           /* cs_AIB already scaled by 1./nz */
1175           B_k = 1;
1176           for (k=0;k<benign_n;k++) {
1177             sum  = sums[k];
1178             PetscCall(PetscBLASIntCast(size_schur,&B_n));
1179 
1180             if (PetscAbsScalar(sum) == 0.0) continue;
1181             if (k == i) {
1182               PetscCallBLAS("BLASsyrk",BLASsyrk_("L","N",&B_n,&B_k,&sum,cs_AIB+i*size_schur,&B_n,&one,S_data,&B_n));
1183               if (matl_dbg_viewer) {
1184                 PetscCallBLAS("BLASsyrk",BLASsyrk_("L","N",&B_n,&B_k,&sum,cs_AIB+i*size_schur,&B_n,&one,S3_data,&B_n));
1185               }
1186             } else { /* XXX Is it correct to use symmetric rank-2 update with half of the sum? */
1187               sum /= 2.0;
1188               PetscCallBLAS("BLASsyr2k",BLASsyr2k_("L","N",&B_n,&B_k,&sum,cs_AIB+k*size_schur,&B_n,cs_AIB+i*size_schur,&B_n,&one,S_data,&B_n));
1189               if (matl_dbg_viewer) {
1190                 PetscCallBLAS("BLASsyr2k",BLASsyr2k_("L","N",&B_n,&B_k,&sum,cs_AIB+k*size_schur,&B_n,cs_AIB+i*size_schur,&B_n,&one,S3_data,&B_n));
1191               }
1192             }
1193           }
1194           sum  = 1.;
1195           PetscCallBLAS("BLASsyr2k",BLASsyr2k_("L","N",&B_n,&B_k,&sum,array+n_I,&B_n,cs_AIB+i*size_schur,&B_n,&one,S_data,&B_n));
1196           if (matl_dbg_viewer) {
1197             PetscCallBLAS("BLASsyr2k",BLASsyr2k_("L","N",&B_n,&B_k,&sum,array+n_I,&B_n,cs_AIB+i*size_schur,&B_n,&one,S2_data,&B_n));
1198           }
1199           PetscCall(VecRestoreArrayRead(v,(const PetscScalar**)&array));
1200           /* set p0 entry of AIIm1_ones to zero */
1201           PetscCall(ISGetLocalSize(is_p_r[i],&nz));
1202           PetscCall(ISGetIndices(is_p_r[i],&idxs));
1203           for (j=0;j<benign_n;j++) AIIm1_data[idxs[nz-1]+sizeA*j] = 0.;
1204           PetscCall(ISRestoreIndices(is_p_r[i],&idxs));
1205           PetscCall(PetscFree(sums));
1206         }
1207         PetscCall(VecDestroy(&benign_AIIm1_ones));
1208         if (matl_dbg_viewer) {
1209           PetscCall(MatDenseRestoreArray(S2,&S2_data));
1210           PetscCall(MatDenseRestoreArray(S3,&S3_data));
1211         }
1212         if (!S_lower_triangular) { /* I need to expand the upper triangular data (column oriented) */
1213           PetscInt k,j;
1214           for (k=0;k<size_schur;k++) {
1215             for (j=k;j<size_schur;j++) {
1216               S_data[j*size_schur+k] = PetscConj(S_data[k*size_schur+j]);
1217             }
1218           }
1219         }
1220 
1221         /* restore defaults */
1222 #if defined(PETSC_HAVE_MUMPS)
1223         PetscCall(MatMumpsSetIcntl(F,26,-1));
1224 #endif
1225 #if defined(PETSC_HAVE_MKL_PARDISO)
1226         PetscCall(MatMkl_PardisoSetCntl(F,70,0));
1227 #endif
1228         PetscCall(MatDenseRestoreArrayRead(cs_AIB_mat,&cs_AIB));
1229         PetscCall(MatDenseRestoreArray(benign_AIIm1_ones_mat,&AIIm1_data));
1230         PetscCall(VecDestroy(&v));
1231         PetscCall(MatDenseRestoreArray(S_all,&S_data));
1232         if (matl_dbg_viewer) {
1233           Mat S;
1234 
1235           PetscCall(MatFactorRestoreSchurComplement(F,&S_all,MAT_FACTOR_SCHUR_UNFACTORED));
1236           PetscCall(MatFactorCreateSchurComplement(F,&S,NULL));
1237           PetscCall(PetscObjectSetName((PetscObject)S,"Sb"));
1238           PetscCall(MatView(S,matl_dbg_viewer));
1239           PetscCall(MatDestroy(&S));
1240           PetscCall(PetscObjectSetName((PetscObject)S2,"S2P"));
1241           PetscCall(MatView(S2,matl_dbg_viewer));
1242           PetscCall(PetscObjectSetName((PetscObject)S3,"S3P"));
1243           PetscCall(MatView(S3,matl_dbg_viewer));
1244           PetscCall(PetscObjectSetName((PetscObject)cs_AIB_mat,"cs"));
1245           PetscCall(MatView(cs_AIB_mat,matl_dbg_viewer));
1246           PetscCall(MatFactorGetSchurComplement(F,&S_all,NULL));
1247         }
1248         PetscCall(MatDestroy(&S2));
1249         PetscCall(MatDestroy(&S3));
1250       }
1251       if (!reuse_solvers) {
1252         for (i=0;i<benign_n;i++) {
1253           PetscCall(ISDestroy(&is_p_r[i]));
1254         }
1255         PetscCall(PetscFree(is_p_r));
1256         PetscCall(MatDestroy(&cs_AIB_mat));
1257         PetscCall(MatDestroy(&benign_AIIm1_ones_mat));
1258       }
1259     } else { /* we can't use MatFactor when size_schur == size_of_the_problem */
1260       PetscCall(MatConvert(A,MATSEQDENSE,MAT_INITIAL_MATRIX,&S_all));
1261       PetscCall(MatGetType(S_all,&Stype));
1262       reuse_solvers = PETSC_FALSE; /* TODO: why we can't reuse the solvers here? */
1263       factor_workaround = PETSC_FALSE;
1264       solver_S = PETSC_FALSE;
1265     }
1266 
1267     if (reuse_solvers) {
1268       Mat                A_II,pA_II,Afake;
1269       Vec                vec1_B;
1270       PCBDDCReuseSolvers msolv_ctx;
1271       PetscInt           n_R;
1272 
1273       if (sub_schurs->reuse_solver) {
1274         PetscCall(PCBDDCReuseSolversReset(sub_schurs->reuse_solver));
1275       } else {
1276         PetscCall(PetscNew(&sub_schurs->reuse_solver));
1277       }
1278       msolv_ctx = sub_schurs->reuse_solver;
1279       PetscCall(MatSchurComplementGetSubMatrices(sub_schurs->S,&A_II,&pA_II,NULL,NULL,NULL));
1280       PetscCall(PetscObjectReference((PetscObject)F));
1281       msolv_ctx->F = F;
1282       PetscCall(MatCreateVecs(F,&msolv_ctx->sol,NULL));
1283       /* currently PETSc has no support for MatSolve(F,x,x), so cheat and let rhs and sol share the same memory */
1284       {
1285         PetscScalar *array;
1286         PetscInt    n;
1287 
1288         PetscCall(VecGetLocalSize(msolv_ctx->sol,&n));
1289         PetscCall(VecGetArray(msolv_ctx->sol,&array));
1290         PetscCall(VecCreateSeqWithArray(PetscObjectComm((PetscObject)msolv_ctx->sol),1,n,array,&msolv_ctx->rhs));
1291         PetscCall(VecRestoreArray(msolv_ctx->sol,&array));
1292       }
1293       msolv_ctx->has_vertices = schur_has_vertices;
1294 
1295       /* interior solver */
1296       PetscCall(PCCreate(PetscObjectComm((PetscObject)A_II),&msolv_ctx->interior_solver));
1297       PetscCall(PCSetOperators(msolv_ctx->interior_solver,A_II,pA_II));
1298       PetscCall(PCSetType(msolv_ctx->interior_solver,PCSHELL));
1299       PetscCall(PCShellSetName(msolv_ctx->interior_solver,"Interior solver (w/o Schur factorization)"));
1300       PetscCall(PCShellSetContext(msolv_ctx->interior_solver,msolv_ctx));
1301       PetscCall(PCShellSetView(msolv_ctx->interior_solver,PCBDDCReuseSolvers_View));
1302       PetscCall(PCShellSetApply(msolv_ctx->interior_solver,PCBDDCReuseSolvers_Interior));
1303       PetscCall(PCShellSetApplyTranspose(msolv_ctx->interior_solver,PCBDDCReuseSolvers_InteriorTranspose));
1304       if (sub_schurs->gdsw) PetscCall(PCShellSetDestroy(msolv_ctx->interior_solver,PCBDDCReuseSolvers_Destroy));
1305 
1306       /* correction solver */
1307       if (!sub_schurs->gdsw) {
1308         PetscCall(PCCreate(PetscObjectComm((PetscObject)A_II),&msolv_ctx->correction_solver));
1309         PetscCall(PCSetType(msolv_ctx->correction_solver,PCSHELL));
1310         PetscCall(PCShellSetName(msolv_ctx->correction_solver,"Correction solver (with Schur factorization)"));
1311         PetscCall(PCShellSetContext(msolv_ctx->correction_solver,msolv_ctx));
1312         PetscCall(PCShellSetView(msolv_ctx->interior_solver,PCBDDCReuseSolvers_View));
1313         PetscCall(PCShellSetApply(msolv_ctx->correction_solver,PCBDDCReuseSolvers_Correction));
1314         PetscCall(PCShellSetApplyTranspose(msolv_ctx->correction_solver,PCBDDCReuseSolvers_CorrectionTranspose));
1315 
1316         /* scatter and vecs for Schur complement solver */
1317         PetscCall(MatCreateVecs(S_all,&msolv_ctx->sol_B,&msolv_ctx->rhs_B));
1318         PetscCall(MatCreateVecs(sub_schurs->S,&vec1_B,NULL));
1319         if (!schur_has_vertices) {
1320           PetscCall(ISGlobalToLocalMappingApplyIS(sub_schurs->BtoNmap,IS_GTOLM_DROP,is_A_all,&msolv_ctx->is_B));
1321           PetscCall(VecScatterCreate(vec1_B,msolv_ctx->is_B,msolv_ctx->sol_B,NULL,&msolv_ctx->correction_scatter_B));
1322           PetscCall(PetscObjectReference((PetscObject)is_A_all));
1323           msolv_ctx->is_R = is_A_all;
1324         } else {
1325           IS              is_B_all;
1326           const PetscInt* idxs;
1327           PetscInt        dual,n_v,n;
1328 
1329           PetscCall(ISGetLocalSize(sub_schurs->is_vertices,&n_v));
1330           dual = size_schur - n_v;
1331           PetscCall(ISGetLocalSize(is_A_all,&n));
1332           PetscCall(ISGetIndices(is_A_all,&idxs));
1333           PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)is_A_all),dual,idxs+n_I,PETSC_COPY_VALUES,&is_B_all));
1334           PetscCall(ISGlobalToLocalMappingApplyIS(sub_schurs->BtoNmap,IS_GTOLM_DROP,is_B_all,&msolv_ctx->is_B));
1335           PetscCall(ISDestroy(&is_B_all));
1336           PetscCall(ISCreateStride(PetscObjectComm((PetscObject)is_A_all),dual,0,1,&is_B_all));
1337           PetscCall(VecScatterCreate(vec1_B,msolv_ctx->is_B,msolv_ctx->sol_B,is_B_all,&msolv_ctx->correction_scatter_B));
1338           PetscCall(ISDestroy(&is_B_all));
1339           PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)is_A_all),n-n_v,idxs,PETSC_COPY_VALUES,&msolv_ctx->is_R));
1340           PetscCall(ISRestoreIndices(is_A_all,&idxs));
1341         }
1342         PetscCall(ISGetLocalSize(msolv_ctx->is_R,&n_R));
1343         PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF,n_R,n_R,0,NULL,&Afake));
1344         PetscCall(MatAssemblyBegin(Afake,MAT_FINAL_ASSEMBLY));
1345         PetscCall(MatAssemblyEnd(Afake,MAT_FINAL_ASSEMBLY));
1346         PetscCall(PCSetOperators(msolv_ctx->correction_solver,Afake,Afake));
1347         PetscCall(MatDestroy(&Afake));
1348         PetscCall(VecDestroy(&vec1_B));
1349       }
1350       /* communicate benign info to solver context */
1351       if (benign_n) {
1352         PetscScalar *array;
1353 
1354         msolv_ctx->benign_n = benign_n;
1355         msolv_ctx->benign_zerodiag_subs = is_p_r;
1356         PetscCall(PetscMalloc1(benign_n,&msolv_ctx->benign_save_vals));
1357         msolv_ctx->benign_csAIB = cs_AIB_mat;
1358         PetscCall(MatCreateVecs(cs_AIB_mat,&msolv_ctx->benign_corr_work,NULL));
1359         PetscCall(VecGetArray(msolv_ctx->benign_corr_work,&array));
1360         PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,size_schur,array,&msolv_ctx->benign_dummy_schur_vec));
1361         PetscCall(VecRestoreArray(msolv_ctx->benign_corr_work,&array));
1362         msolv_ctx->benign_AIIm1ones = benign_AIIm1_ones_mat;
1363       }
1364     } else {
1365       if (sub_schurs->reuse_solver) PetscCall(PCBDDCReuseSolversReset(sub_schurs->reuse_solver));
1366       PetscCall(PetscFree(sub_schurs->reuse_solver));
1367     }
1368     PetscCall(MatDestroy(&A));
1369     PetscCall(ISDestroy(&is_A_all));
1370 
1371     /* Work arrays */
1372     PetscCall(PetscMalloc1(max_subset_size*max_subset_size,&work));
1373 
1374     /* S_Ej_all */
1375     cum = cum2 = 0;
1376     PetscCall(MatDenseGetArrayRead(S_all,&rS_data));
1377     PetscCall(MatSeqAIJGetArray(sub_schurs->S_Ej_all,&SEj_arr));
1378     if (sub_schurs->sum_S_Ej_inv_all) {
1379       PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&SEjinv_arr));
1380     }
1381     if (sub_schurs->gdsw) {
1382       PetscCall(MatCreateSubMatrices(sub_schurs->A,sub_schurs->n_subs,sub_schurs->is_subs,sub_schurs->is_subs,MAT_INITIAL_MATRIX,&gdswA));
1383     }
1384     for (i=0;i<sub_schurs->n_subs;i++) {
1385       PetscInt j;
1386 
1387       /* get S_E (or K^i_EE for GDSW) */
1388       PetscCall(ISGetLocalSize(sub_schurs->is_subs[i],&subset_size));
1389       if (sub_schurs->gdsw) {
1390         Mat T;
1391 
1392         PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,work,&T));
1393         PetscCall(MatConvert(gdswA[i],MATDENSE,MAT_REUSE_MATRIX,&T));
1394         PetscCall(MatDestroy(&T));
1395       } else {
1396         if (S_lower_triangular) { /* I need to expand the upper triangular data (column oriented) */
1397           PetscInt k;
1398           for (k=0;k<subset_size;k++) {
1399             for (j=k;j<subset_size;j++) {
1400               work[k*subset_size+j] = rS_data[cum2+k*size_schur+j];
1401               work[j*subset_size+k] = PetscConj(rS_data[cum2+k*size_schur+j]);
1402             }
1403           }
1404         } else { /* just copy to workspace */
1405           PetscInt k;
1406           for (k=0;k<subset_size;k++) {
1407             for (j=0;j<subset_size;j++) {
1408               work[k*subset_size+j] = rS_data[cum2+k*size_schur+j];
1409             }
1410           }
1411         }
1412       }
1413       /* insert S_E values */
1414       if (sub_schurs->change) {
1415         Mat change_sub,SEj,T;
1416 
1417         /* change basis */
1418         PetscCall(KSPGetOperators(sub_schurs->change[i],&change_sub,NULL));
1419         PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,work,&SEj));
1420         if (!sub_schurs->change_with_qr) { /* currently there's no support for PtAP with P SeqAIJ */
1421           Mat T2;
1422           PetscCall(MatTransposeMatMult(change_sub,SEj,MAT_INITIAL_MATRIX,1.0,&T2));
1423           PetscCall(MatMatMult(T2,change_sub,MAT_INITIAL_MATRIX,1.0,&T));
1424           PetscCall(MatConvert(T,MATSEQDENSE,MAT_INPLACE_MATRIX,&T));
1425           PetscCall(MatDestroy(&T2));
1426         } else {
1427           PetscCall(MatPtAP(SEj,change_sub,MAT_INITIAL_MATRIX,1.0,&T));
1428         }
1429         PetscCall(MatCopy(T,SEj,SAME_NONZERO_PATTERN));
1430         PetscCall(MatDestroy(&T));
1431         PetscCall(MatZeroRowsColumnsIS(SEj,sub_schurs->change_primal_sub[i],1.0,NULL,NULL));
1432         PetscCall(MatDestroy(&SEj));
1433       }
1434       PetscCall(PetscArraycpy(SEj_arr,work,subset_size*subset_size));
1435       if (compute_Stilda) {
1436         if (deluxe) { /* if adaptivity is requested, invert S_E blocks */
1437           Mat               M;
1438           const PetscScalar *vals;
1439           PetscBool         isdense,isdensecuda;
1440 
1441           PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,work,&M));
1442           PetscCall(MatSetOption(M,MAT_SPD,sub_schurs->is_posdef));
1443           PetscCall(MatSetOption(M,MAT_HERMITIAN,sub_schurs->is_hermitian));
1444           if (!PetscBTLookup(sub_schurs->is_edge,i)) {
1445             PetscCall(MatSetType(M,Stype));
1446           }
1447           PetscCall(PetscObjectTypeCompare((PetscObject)M,MATSEQDENSE,&isdense));
1448           PetscCall(PetscObjectTypeCompare((PetscObject)M,MATSEQDENSECUDA,&isdensecuda));
1449           if (use_cholesky) {
1450             PetscCall(MatCholeskyFactor(M,NULL,NULL));
1451           } else {
1452             PetscCall(MatLUFactor(M,NULL,NULL,NULL));
1453           }
1454           if (isdense) {
1455             PetscCall(MatSeqDenseInvertFactors_Private(M));
1456 #if defined(PETSC_HAVE_CUDA)
1457           } else if (isdensecuda) {
1458             PetscCall(MatSeqDenseCUDAInvertFactors_Private(M));
1459 #endif
1460           } else SETERRQ(PetscObjectComm((PetscObject)M),PETSC_ERR_SUP,"Not implemented for type %s",Stype);
1461           PetscCall(MatDenseGetArrayRead(M,&vals));
1462           PetscCall(PetscArraycpy(SEjinv_arr,vals,subset_size*subset_size));
1463           PetscCall(MatDenseRestoreArrayRead(M,&vals));
1464           PetscCall(MatDestroy(&M));
1465         } else if (scaling) { /* not using deluxe */
1466           Mat         SEj;
1467           Vec         D;
1468           PetscScalar *array;
1469 
1470           PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,work,&SEj));
1471           PetscCall(VecGetArray(Dall,&array));
1472           PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,subset_size,array+cum,&D));
1473           PetscCall(VecRestoreArray(Dall,&array));
1474           PetscCall(VecShift(D,-1.));
1475           PetscCall(MatDiagonalScale(SEj,D,D));
1476           PetscCall(MatDestroy(&SEj));
1477           PetscCall(VecDestroy(&D));
1478           PetscCall(PetscArraycpy(SEj_arr,work,subset_size*subset_size));
1479         }
1480       }
1481       cum += subset_size;
1482       cum2 += subset_size*(size_schur + 1);
1483       SEj_arr += subset_size*subset_size;
1484       if (SEjinv_arr) SEjinv_arr += subset_size*subset_size;
1485     }
1486     if (sub_schurs->gdsw) {
1487       PetscCall(MatDestroySubMatrices(sub_schurs->n_subs,&gdswA));
1488     }
1489     PetscCall(MatDenseRestoreArrayRead(S_all,&rS_data));
1490     PetscCall(MatSeqAIJRestoreArray(sub_schurs->S_Ej_all,&SEj_arr));
1491     if (sub_schurs->sum_S_Ej_inv_all) {
1492       PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&SEjinv_arr));
1493     }
1494     if (solver_S) {
1495       PetscCall(MatFactorRestoreSchurComplement(F,&S_all,MAT_FACTOR_SCHUR_UNFACTORED));
1496     }
1497 
1498     /* may prevent from unneeded copies, since MUMPS or MKL_Pardiso always use CPU memory
1499        however, preliminary tests indicate using GPUs is still faster in the solve phase */
1500 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
1501     if (reuse_solvers) {
1502       Mat                  St;
1503       MatFactorSchurStatus st;
1504 
1505       flg  = PETSC_FALSE;
1506       PetscCall(PetscOptionsGetBool(NULL,sub_schurs->prefix,"-sub_schurs_schur_pin_to_cpu",&flg,NULL));
1507       PetscCall(MatFactorGetSchurComplement(F,&St,&st));
1508       PetscCall(MatBindToCPU(St,flg));
1509       PetscCall(MatFactorRestoreSchurComplement(F,&St,st));
1510     }
1511 #endif
1512 
1513     schur_factor = NULL;
1514     if (compute_Stilda && size_active_schur) {
1515 
1516       if (sub_schurs->n_subs == 1 && size_schur == size_active_schur && deluxe) { /* we already computed the inverse */
1517         PetscCall(MatSeqAIJGetArrayWrite(sub_schurs->sum_S_Ej_tilda_all,&SEjinv_arr));
1518         PetscCall(PetscArraycpy(SEjinv_arr,work,size_schur*size_schur));
1519         PetscCall(MatSeqAIJRestoreArrayWrite(sub_schurs->sum_S_Ej_tilda_all,&SEjinv_arr));
1520       } else {
1521         Mat S_all_inv=NULL;
1522 
1523         if (solver_S && !sub_schurs->gdsw) {
1524           /* for adaptive selection we need S^-1; for solver reusage we need S_\Delta\Delta^-1.
1525              The latter is not the principal subminor for S^-1. However, the factors can be reused since S_\Delta\Delta is the leading principal submatrix of S */
1526           if (factor_workaround) {/* invert without calling MatFactorInvertSchurComplement, since we are hacking */
1527             PetscScalar *data;
1528             PetscInt     nd = 0;
1529 
1530             PetscCheck(use_potr,PETSC_COMM_SELF,PETSC_ERR_SUP,"Factor update not yet implemented for non SPD matrices");
1531             PetscCall(MatFactorGetSchurComplement(F,&S_all_inv,NULL));
1532             PetscCall(MatDenseGetArray(S_all_inv,&data));
1533             if (sub_schurs->is_dir) { /* dirichlet dofs could have different scalings */
1534               PetscCall(ISGetLocalSize(sub_schurs->is_dir,&nd));
1535             }
1536 
1537             /* factor and invert activedofs and vertices (dirichlet dofs does not contribute) */
1538             if (schur_has_vertices) {
1539               Mat          M;
1540               PetscScalar *tdata;
1541               PetscInt     nv = 0, news;
1542 
1543               PetscCall(ISGetLocalSize(sub_schurs->is_vertices,&nv));
1544               news = size_active_schur + nv;
1545               PetscCall(PetscCalloc1(news*news,&tdata));
1546               for (i=0;i<size_active_schur;i++) {
1547                 PetscCall(PetscArraycpy(tdata+i*(news+1),data+i*(size_schur+1),size_active_schur-i));
1548                 PetscCall(PetscArraycpy(tdata+i*(news+1)+size_active_schur-i,data+i*size_schur+size_active_schur+nd,nv));
1549               }
1550               for (i=0;i<nv;i++) {
1551                 PetscInt k = i+size_active_schur;
1552                 PetscCall(PetscArraycpy(tdata+k*(news+1),data+(k+nd)*(size_schur+1),nv-i));
1553               }
1554 
1555               PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,news,news,tdata,&M));
1556               PetscCall(MatSetOption(M,MAT_SPD,PETSC_TRUE));
1557               PetscCall(MatCholeskyFactor(M,NULL,NULL));
1558               /* save the factors */
1559               cum = 0;
1560               PetscCall(PetscMalloc1((size_active_schur*(size_active_schur +1))/2+nd,&schur_factor));
1561               for (i=0;i<size_active_schur;i++) {
1562                 PetscCall(PetscArraycpy(schur_factor+cum,tdata+i*(news+1),size_active_schur-i));
1563                 cum += size_active_schur - i;
1564               }
1565               for (i=0;i<nd;i++) schur_factor[cum+i] = PetscSqrtReal(PetscRealPart(data[(i+size_active_schur)*(size_schur+1)]));
1566               PetscCall(MatSeqDenseInvertFactors_Private(M));
1567               /* move back just the active dofs to the Schur complement */
1568               for (i=0;i<size_active_schur;i++) {
1569                 PetscCall(PetscArraycpy(data+i*size_schur,tdata+i*news,size_active_schur));
1570               }
1571               PetscCall(PetscFree(tdata));
1572               PetscCall(MatDestroy(&M));
1573             } else { /* we can factorize and invert just the activedofs */
1574               Mat         M;
1575               PetscScalar *aux;
1576 
1577               PetscCall(PetscMalloc1(nd,&aux));
1578               for (i=0;i<nd;i++) aux[i] = 1.0/data[(i+size_active_schur)*(size_schur+1)];
1579               PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,size_active_schur,size_active_schur,data,&M));
1580               PetscCall(MatDenseSetLDA(M,size_schur));
1581               PetscCall(MatSetOption(M,MAT_SPD,PETSC_TRUE));
1582               PetscCall(MatCholeskyFactor(M,NULL,NULL));
1583               PetscCall(MatSeqDenseInvertFactors_Private(M));
1584               PetscCall(MatDestroy(&M));
1585               PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,size_schur,nd,data+size_active_schur*size_schur,&M));
1586               PetscCall(MatZeroEntries(M));
1587               PetscCall(MatDestroy(&M));
1588               PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,nd,size_schur,data+size_active_schur,&M));
1589               PetscCall(MatDenseSetLDA(M,size_schur));
1590               PetscCall(MatZeroEntries(M));
1591               PetscCall(MatDestroy(&M));
1592               for (i=0;i<nd;i++) data[(i+size_active_schur)*(size_schur+1)] = aux[i];
1593               PetscCall(PetscFree(aux));
1594             }
1595             PetscCall(MatDenseRestoreArray(S_all_inv,&data));
1596           } else { /* use MatFactor calls to invert S */
1597             PetscCall(MatFactorInvertSchurComplement(F));
1598             PetscCall(MatFactorGetSchurComplement(F,&S_all_inv,NULL));
1599           }
1600         } else if (!sub_schurs->gdsw) { /* we need to invert explicitly since we are not using MatFactor for S */
1601           PetscCall(PetscObjectReference((PetscObject)S_all));
1602           S_all_inv = S_all;
1603           PetscCall(MatDenseGetArray(S_all_inv,&S_data));
1604           PetscCall(PetscBLASIntCast(size_schur,&B_N));
1605           PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
1606           if (use_potr) {
1607             PetscCallBLAS("LAPACKpotrf",LAPACKpotrf_("L",&B_N,S_data,&B_N,&B_ierr));
1608             PetscCheck(!B_ierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in POTRF Lapack routine %d",(int)B_ierr);
1609             PetscCallBLAS("LAPACKpotri",LAPACKpotri_("L",&B_N,S_data,&B_N,&B_ierr));
1610             PetscCheck(!B_ierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in POTRI Lapack routine %d",(int)B_ierr);
1611           } else if (use_sytr) {
1612             PetscCallBLAS("LAPACKsytrf",LAPACKsytrf_("L",&B_N,S_data,&B_N,pivots,Bwork,&B_lwork,&B_ierr));
1613             PetscCheck(!B_ierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYTRF Lapack routine %d",(int)B_ierr);
1614             PetscCallBLAS("LAPACKsytri",LAPACKsytri_("L",&B_N,S_data,&B_N,pivots,Bwork,&B_ierr));
1615             PetscCheck(!B_ierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYTRI Lapack routine %d",(int)B_ierr);
1616           } else {
1617             PetscCallBLAS("LAPACKgetrf",LAPACKgetrf_(&B_N,&B_N,S_data,&B_N,pivots,&B_ierr));
1618             PetscCheck(!B_ierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GETRF Lapack routine %d",(int)B_ierr);
1619             PetscCallBLAS("LAPACKgetri",LAPACKgetri_(&B_N,S_data,&B_N,pivots,Bwork,&B_lwork,&B_ierr));
1620             PetscCheck(!B_ierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GETRI Lapack routine %d",(int)B_ierr);
1621           }
1622           PetscCall(PetscLogFlops(1.0*size_schur*size_schur*size_schur));
1623           PetscCall(PetscFPTrapPop());
1624           PetscCall(MatDenseRestoreArray(S_all_inv,&S_data));
1625         } else if (sub_schurs->gdsw) {
1626           Mat      tS, tX, SEj, S_II, S_IE, S_EE;
1627           KSP      pS_II;
1628           PC       pS_II_pc;
1629           IS       EE, II;
1630           PetscInt nS;
1631 
1632           PetscCall(MatFactorCreateSchurComplement(F,&tS,NULL));
1633           PetscCall(MatGetSize(tS,&nS,NULL));
1634           PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&SEjinv_arr));
1635           for (i=0,cum=0;i<sub_schurs->n_subs;i++) { /* naive implementation */
1636             PetscCall(ISGetLocalSize(sub_schurs->is_subs[i],&subset_size));
1637             PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,SEjinv_arr,&SEj));
1638 
1639             PetscCall(ISCreateStride(PETSC_COMM_SELF,subset_size,cum,1,&EE));
1640             PetscCall(ISComplement(EE,0,nS,&II));
1641             PetscCall(MatCreateSubMatrix(tS,II,II,MAT_INITIAL_MATRIX,&S_II));
1642             PetscCall(MatCreateSubMatrix(tS,II,EE,MAT_INITIAL_MATRIX,&S_IE));
1643             PetscCall(MatCreateSubMatrix(tS,EE,EE,MAT_INITIAL_MATRIX,&S_EE));
1644             PetscCall(ISDestroy(&II));
1645             PetscCall(ISDestroy(&EE));
1646 
1647             PetscCall(KSPCreate(PETSC_COMM_SELF,&pS_II));
1648             PetscCall(KSPSetType(pS_II,KSPPREONLY));
1649             PetscCall(KSPGetPC(pS_II,&pS_II_pc));
1650             PetscCall(PCSetType(pS_II_pc,PCSVD));
1651             PetscCall(KSPSetOptionsPrefix(pS_II,sub_schurs->prefix));
1652             PetscCall(KSPAppendOptionsPrefix(pS_II,"pseudo_"));
1653             PetscCall(KSPSetOperators(pS_II,S_II,S_II));
1654             PetscCall(MatDestroy(&S_II));
1655             PetscCall(KSPSetFromOptions(pS_II));
1656             PetscCall(KSPSetUp(pS_II));
1657             PetscCall(MatDuplicate(S_IE,MAT_DO_NOT_COPY_VALUES,&tX));
1658             PetscCall(KSPMatSolve(pS_II,S_IE,tX));
1659             PetscCall(KSPDestroy(&pS_II));
1660 
1661             PetscCall(MatTransposeMatMult(S_IE,tX,MAT_REUSE_MATRIX,PETSC_DEFAULT,&SEj));
1662             PetscCall(MatDestroy(&S_IE));
1663             PetscCall(MatDestroy(&tX));
1664             PetscCall(MatAYPX(SEj,-1,S_EE,SAME_NONZERO_PATTERN));
1665             PetscCall(MatDestroy(&S_EE));
1666 
1667             PetscCall(MatDestroy(&SEj));
1668             cum += subset_size;
1669             SEjinv_arr += subset_size*subset_size;
1670           }
1671           PetscCall(MatDestroy(&tS));
1672           PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&SEjinv_arr));
1673         }
1674         /* S_Ej_tilda_all */
1675         cum = cum2 = 0;
1676         rS_data = NULL;
1677         if (S_all_inv) PetscCall(MatDenseGetArrayRead(S_all_inv,&rS_data));
1678         PetscCall(MatSeqAIJGetArrayWrite(sub_schurs->sum_S_Ej_tilda_all,&SEjinv_arr));
1679         for (i=0;i<sub_schurs->n_subs;i++) {
1680           PetscInt j;
1681 
1682           PetscCall(ISGetLocalSize(sub_schurs->is_subs[i],&subset_size));
1683           /* get (St^-1)_E */
1684           /* Unless we are changing the variables, I don't need to expand to upper triangular since St^-1
1685              will be properly accessed later during adaptive selection */
1686           if (rS_data) {
1687             if (S_lower_triangular) {
1688               PetscInt k;
1689               if (sub_schurs->change) {
1690                 for (k=0;k<subset_size;k++) {
1691                   for (j=k;j<subset_size;j++) {
1692                     work[k*subset_size+j] = rS_data[cum2+k*size_schur+j];
1693                     work[j*subset_size+k] = work[k*subset_size+j];
1694                   }
1695                 }
1696               } else {
1697                 for (k=0;k<subset_size;k++) {
1698                   for (j=k;j<subset_size;j++) {
1699                     work[k*subset_size+j] = rS_data[cum2+k*size_schur+j];
1700                   }
1701                 }
1702               }
1703             } else {
1704               PetscInt k;
1705               for (k=0;k<subset_size;k++) {
1706                 for (j=0;j<subset_size;j++) {
1707                   work[k*subset_size+j] = rS_data[cum2+k*size_schur+j];
1708                 }
1709               }
1710             }
1711           }
1712           if (sub_schurs->change) {
1713             Mat change_sub,SEj,T;
1714             PetscScalar val = sub_schurs->gdsw ? PETSC_SMALL : 1./PETSC_SMALL;
1715 
1716             /* change basis */
1717             PetscCall(KSPGetOperators(sub_schurs->change[i],&change_sub,NULL));
1718             PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,rS_data ? work : SEjinv_arr,&SEj));
1719             if (!sub_schurs->change_with_qr) { /* currently there's no support for PtAP with P SeqAIJ */
1720               Mat T2;
1721               PetscCall(MatTransposeMatMult(change_sub,SEj,MAT_INITIAL_MATRIX,1.0,&T2));
1722               PetscCall(MatMatMult(T2,change_sub,MAT_INITIAL_MATRIX,1.0,&T));
1723               PetscCall(MatDestroy(&T2));
1724               PetscCall(MatConvert(T,MATSEQDENSE,MAT_INPLACE_MATRIX,&T));
1725             } else {
1726               PetscCall(MatPtAP(SEj,change_sub,MAT_INITIAL_MATRIX,1.0,&T));
1727             }
1728             PetscCall(MatCopy(T,SEj,SAME_NONZERO_PATTERN));
1729             PetscCall(MatDestroy(&T));
1730             PetscCall(MatZeroRowsColumnsIS(SEj,sub_schurs->change_primal_sub[i],val,NULL,NULL));
1731             PetscCall(MatDestroy(&SEj));
1732           }
1733           if (rS_data) PetscCall(PetscArraycpy(SEjinv_arr,work,subset_size*subset_size));
1734           cum += subset_size;
1735           cum2 += subset_size*(size_schur + 1);
1736           SEjinv_arr += subset_size*subset_size;
1737         }
1738         PetscCall(MatSeqAIJRestoreArrayWrite(sub_schurs->sum_S_Ej_tilda_all,&SEjinv_arr));
1739         if (S_all_inv) {
1740           PetscCall(MatDenseRestoreArrayRead(S_all_inv,&rS_data));
1741           if (solver_S) {
1742             if (schur_has_vertices) {
1743               PetscCall(MatFactorRestoreSchurComplement(F,&S_all_inv,MAT_FACTOR_SCHUR_FACTORED));
1744             } else {
1745               PetscCall(MatFactorRestoreSchurComplement(F,&S_all_inv,MAT_FACTOR_SCHUR_INVERTED));
1746             }
1747           }
1748         }
1749         PetscCall(MatDestroy(&S_all_inv));
1750       }
1751 
1752       /* move back factors if needed */
1753       if (schur_has_vertices && factor_workaround && !sub_schurs->gdsw) {
1754         Mat      S_tmp;
1755         PetscInt nd = 0;
1756 
1757         PetscCheck(solver_S,PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen");
1758         PetscCall(MatFactorGetSchurComplement(F,&S_tmp,NULL));
1759         if (use_potr) {
1760           PetscScalar *data;
1761 
1762           PetscCall(MatDenseGetArray(S_tmp,&data));
1763           PetscCall(PetscArrayzero(data,size_schur*size_schur));
1764 
1765           if (S_lower_triangular) {
1766             cum = 0;
1767             for (i=0;i<size_active_schur;i++) {
1768               PetscCall(PetscArraycpy(data+i*(size_schur+1),schur_factor+cum,size_active_schur-i));
1769               cum += size_active_schur-i;
1770             }
1771           } else {
1772             PetscCall(PetscArraycpy(data,schur_factor,size_schur*size_schur));
1773           }
1774           if (sub_schurs->is_dir) {
1775             PetscCall(ISGetLocalSize(sub_schurs->is_dir,&nd));
1776             for (i=0;i<nd;i++) {
1777               data[(i+size_active_schur)*(size_schur+1)] = schur_factor[cum+i];
1778             }
1779           }
1780           /* workaround: since I cannot modify the matrices used inside the solvers for the forward and backward substitutions,
1781              set the diagonal entry of the Schur factor to a very large value */
1782           for (i=size_active_schur+nd;i<size_schur;i++) {
1783             data[i*(size_schur+1)] = infty;
1784           }
1785           PetscCall(MatDenseRestoreArray(S_tmp,&data));
1786         } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Factor update not yet implemented for non SPD matrices");
1787         PetscCall(MatFactorRestoreSchurComplement(F,&S_tmp,MAT_FACTOR_SCHUR_FACTORED));
1788       }
1789     } else if (factor_workaround && !sub_schurs->gdsw) { /* we need to eliminate any unneeded coupling */
1790       PetscScalar *data;
1791       PetscInt    nd = 0;
1792 
1793       if (sub_schurs->is_dir) { /* dirichlet dofs could have different scalings */
1794         PetscCall(ISGetLocalSize(sub_schurs->is_dir,&nd));
1795       }
1796       PetscCall(MatFactorGetSchurComplement(F,&S_all,NULL));
1797       PetscCall(MatDenseGetArray(S_all,&data));
1798       for (i=0;i<size_active_schur;i++) {
1799         PetscCall(PetscArrayzero(data+i*size_schur+size_active_schur,size_schur-size_active_schur));
1800       }
1801       for (i=size_active_schur+nd;i<size_schur;i++) {
1802         PetscCall(PetscArrayzero(data+i*size_schur+size_active_schur,size_schur-size_active_schur));
1803         data[i*(size_schur+1)] = infty;
1804       }
1805       PetscCall(MatDenseRestoreArray(S_all,&data));
1806       PetscCall(MatFactorRestoreSchurComplement(F,&S_all,MAT_FACTOR_SCHUR_UNFACTORED));
1807     }
1808     PetscCall(PetscFree(work));
1809     PetscCall(PetscFree(schur_factor));
1810     PetscCall(VecDestroy(&Dall));
1811   }
1812   PetscCall(ISDestroy(&is_I_layer));
1813   PetscCall(MatDestroy(&S_all));
1814   PetscCall(MatDestroy(&A_BB));
1815   PetscCall(MatDestroy(&A_IB));
1816   PetscCall(MatDestroy(&A_BI));
1817   PetscCall(MatDestroy(&F));
1818 
1819   PetscCall(PetscMalloc1(sub_schurs->n_subs,&nnz));
1820   for (i=0;i<sub_schurs->n_subs;i++) {
1821     PetscCall(ISGetLocalSize(sub_schurs->is_subs[i],&nnz[i]));
1822   }
1823   PetscCall(ISCreateGeneral(PETSC_COMM_SELF,sub_schurs->n_subs,nnz,PETSC_OWN_POINTER,&is_I_layer));
1824   PetscCall(MatSetVariableBlockSizes(sub_schurs->S_Ej_all,sub_schurs->n_subs,nnz));
1825   PetscCall(MatAssemblyBegin(sub_schurs->S_Ej_all,MAT_FINAL_ASSEMBLY));
1826   PetscCall(MatAssemblyEnd(sub_schurs->S_Ej_all,MAT_FINAL_ASSEMBLY));
1827   if (compute_Stilda) {
1828     PetscCall(MatSetVariableBlockSizes(sub_schurs->sum_S_Ej_tilda_all,sub_schurs->n_subs,nnz));
1829     PetscCall(MatAssemblyBegin(sub_schurs->sum_S_Ej_tilda_all,MAT_FINAL_ASSEMBLY));
1830     PetscCall(MatAssemblyEnd(sub_schurs->sum_S_Ej_tilda_all,MAT_FINAL_ASSEMBLY));
1831     if (deluxe) {
1832       PetscCall(MatSetVariableBlockSizes(sub_schurs->sum_S_Ej_inv_all,sub_schurs->n_subs,nnz));
1833       PetscCall(MatAssemblyBegin(sub_schurs->sum_S_Ej_inv_all,MAT_FINAL_ASSEMBLY));
1834       PetscCall(MatAssemblyEnd(sub_schurs->sum_S_Ej_inv_all,MAT_FINAL_ASSEMBLY));
1835     }
1836   }
1837   PetscCall(ISDestroy(&is_I_layer));
1838 
1839   /* Get local part of (\sum_j S_Ej) */
1840   if (!sub_schurs->sum_S_Ej_all) {
1841     PetscCall(MatDuplicate(sub_schurs->S_Ej_all,MAT_DO_NOT_COPY_VALUES,&sub_schurs->sum_S_Ej_all));
1842   }
1843   PetscCall(VecSet(gstash,0.0));
1844   PetscCall(MatSeqAIJGetArray(sub_schurs->S_Ej_all,&stasharray));
1845   PetscCall(VecPlaceArray(lstash,stasharray));
1846   PetscCall(VecScatterBegin(sstash,lstash,gstash,ADD_VALUES,SCATTER_FORWARD));
1847   PetscCall(VecScatterEnd(sstash,lstash,gstash,ADD_VALUES,SCATTER_FORWARD));
1848   PetscCall(MatSeqAIJRestoreArray(sub_schurs->S_Ej_all,&stasharray));
1849   PetscCall(VecResetArray(lstash));
1850   PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_all,&stasharray));
1851   PetscCall(VecPlaceArray(lstash,stasharray));
1852   PetscCall(VecScatterBegin(sstash,gstash,lstash,INSERT_VALUES,SCATTER_REVERSE));
1853   PetscCall(VecScatterEnd(sstash,gstash,lstash,INSERT_VALUES,SCATTER_REVERSE));
1854   PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_all,&stasharray));
1855   PetscCall(VecResetArray(lstash));
1856 
1857   /* Get local part of (\sum_j S^-1_Ej) (\sum_j St^-1_Ej) */
1858   if (compute_Stilda) {
1859     PetscCall(VecSet(gstash,0.0));
1860     PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&stasharray));
1861     PetscCall(VecPlaceArray(lstash,stasharray));
1862     PetscCall(VecScatterBegin(sstash,lstash,gstash,ADD_VALUES,SCATTER_FORWARD));
1863     PetscCall(VecScatterEnd(sstash,lstash,gstash,ADD_VALUES,SCATTER_FORWARD));
1864     PetscCall(VecScatterBegin(sstash,gstash,lstash,INSERT_VALUES,SCATTER_REVERSE));
1865     PetscCall(VecScatterEnd(sstash,gstash,lstash,INSERT_VALUES,SCATTER_REVERSE));
1866     PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&stasharray));
1867     PetscCall(VecResetArray(lstash));
1868     if (deluxe) {
1869       PetscCall(VecSet(gstash,0.0));
1870       PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&stasharray));
1871       PetscCall(VecPlaceArray(lstash,stasharray));
1872       PetscCall(VecScatterBegin(sstash,lstash,gstash,ADD_VALUES,SCATTER_FORWARD));
1873       PetscCall(VecScatterEnd(sstash,lstash,gstash,ADD_VALUES,SCATTER_FORWARD));
1874       PetscCall(VecScatterBegin(sstash,gstash,lstash,INSERT_VALUES,SCATTER_REVERSE));
1875       PetscCall(VecScatterEnd(sstash,gstash,lstash,INSERT_VALUES,SCATTER_REVERSE));
1876       PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&stasharray));
1877       PetscCall(VecResetArray(lstash));
1878     } else if (!sub_schurs->gdsw) {
1879       PetscScalar *array;
1880       PetscInt    cum;
1881 
1882       PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&array));
1883       cum = 0;
1884       for (i=0;i<sub_schurs->n_subs;i++) {
1885         PetscCall(ISGetLocalSize(sub_schurs->is_subs[i],&subset_size));
1886         PetscCall(PetscBLASIntCast(subset_size,&B_N));
1887         PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
1888         if (use_potr) {
1889           PetscCallBLAS("LAPACKpotrf",LAPACKpotrf_("L",&B_N,array+cum,&B_N,&B_ierr));
1890           PetscCheck(!B_ierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in POTRF Lapack routine %d",(int)B_ierr);
1891           PetscCallBLAS("LAPACKpotri",LAPACKpotri_("L",&B_N,array+cum,&B_N,&B_ierr));
1892           PetscCheck(!B_ierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in POTRI Lapack routine %d",(int)B_ierr);
1893         } else if (use_sytr) {
1894           PetscCallBLAS("LAPACKsytrf",LAPACKsytrf_("L",&B_N,array+cum,&B_N,pivots,Bwork,&B_lwork,&B_ierr));
1895           PetscCheck(!B_ierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYTRF Lapack routine %d",(int)B_ierr);
1896           PetscCallBLAS("LAPACKsytri",LAPACKsytri_("L",&B_N,array+cum,&B_N,pivots,Bwork,&B_ierr));
1897           PetscCheck(!B_ierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYTRI Lapack routine %d",(int)B_ierr);
1898         } else {
1899           PetscCallBLAS("LAPACKgetrf",LAPACKgetrf_(&B_N,&B_N,array+cum,&B_N,pivots,&B_ierr));
1900           PetscCheck(!B_ierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GETRF Lapack routine %d",(int)B_ierr);
1901           PetscCallBLAS("LAPACKgetri",LAPACKgetri_(&B_N,array+cum,&B_N,pivots,Bwork,&B_lwork,&B_ierr));
1902           PetscCheck(!B_ierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GETRI Lapack routine %d",(int)B_ierr);
1903         }
1904         PetscCall(PetscLogFlops(1.0*subset_size*subset_size*subset_size));
1905         PetscCall(PetscFPTrapPop());
1906         cum += subset_size*subset_size;
1907       }
1908       PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&array));
1909       PetscCall(PetscObjectReference((PetscObject)sub_schurs->sum_S_Ej_all));
1910       PetscCall(MatDestroy(&sub_schurs->sum_S_Ej_inv_all));
1911       sub_schurs->sum_S_Ej_inv_all = sub_schurs->sum_S_Ej_all;
1912     }
1913   }
1914   PetscCall(VecDestroy(&lstash));
1915   PetscCall(VecDestroy(&gstash));
1916   PetscCall(VecScatterDestroy(&sstash));
1917 
1918   if (matl_dbg_viewer) {
1919     if (sub_schurs->S_Ej_all) {
1920       PetscCall(PetscObjectSetName((PetscObject)sub_schurs->S_Ej_all,"SE"));
1921       PetscCall(MatView(sub_schurs->S_Ej_all,matl_dbg_viewer));
1922     }
1923     if (sub_schurs->sum_S_Ej_all) {
1924       PetscCall(PetscObjectSetName((PetscObject)sub_schurs->sum_S_Ej_all,"SSE"));
1925       PetscCall(MatView(sub_schurs->sum_S_Ej_all,matl_dbg_viewer));
1926     }
1927     if (sub_schurs->sum_S_Ej_inv_all) {
1928       PetscCall(PetscObjectSetName((PetscObject)sub_schurs->sum_S_Ej_inv_all,"SSEm"));
1929       PetscCall(MatView(sub_schurs->sum_S_Ej_inv_all,matl_dbg_viewer));
1930     }
1931     if (sub_schurs->sum_S_Ej_tilda_all) {
1932       PetscCall(PetscObjectSetName((PetscObject)sub_schurs->sum_S_Ej_tilda_all,"SSEt"));
1933       PetscCall(MatView(sub_schurs->sum_S_Ej_tilda_all,matl_dbg_viewer));
1934     }
1935   }
1936 
1937   /* free workspace */
1938   if (matl_dbg_viewer) PetscCall(PetscViewerFlush(matl_dbg_viewer));
1939   if (sub_schurs->debug) PetscCallMPI(MPI_Barrier(comm_n));
1940   PetscCall(PetscViewerDestroy(&matl_dbg_viewer));
1941   PetscCall(PetscFree2(Bwork,pivots));
1942   PetscCall(PetscCommDestroy(&comm_n));
1943   PetscFunctionReturn(0);
1944 }
1945 
1946 PetscErrorCode PCBDDCSubSchursInit(PCBDDCSubSchurs sub_schurs, const char* prefix, IS is_I, IS is_B, PCBDDCGraph graph, ISLocalToGlobalMapping BtoNmap, PetscBool copycc, PetscBool gdsw)
1947 {
1948   IS              *faces,*edges,*all_cc,vertices;
1949   PetscInt        s,i,n_faces,n_edges,n_all_cc;
1950   PetscBool       is_sorted,ispardiso,ismumps;
1951 
1952   PetscFunctionBegin;
1953   PetscCall(ISSorted(is_I,&is_sorted));
1954   PetscCheck(is_sorted,PetscObjectComm((PetscObject)is_I),PETSC_ERR_PLIB,"IS for I dofs should be shorted");
1955   PetscCall(ISSorted(is_B,&is_sorted));
1956   PetscCheck(is_sorted,PetscObjectComm((PetscObject)is_B),PETSC_ERR_PLIB,"IS for B dofs should be shorted");
1957 
1958   /* reset any previous data */
1959   PetscCall(PCBDDCSubSchursReset(sub_schurs));
1960 
1961   sub_schurs->gdsw = gdsw;
1962 
1963   /* get index sets for faces and edges (already sorted by global ordering) */
1964   PetscCall(PCBDDCGraphGetCandidatesIS(graph,&n_faces,&faces,&n_edges,&edges,&vertices));
1965   n_all_cc = n_faces+n_edges;
1966   PetscCall(PetscBTCreate(n_all_cc,&sub_schurs->is_edge));
1967   PetscCall(PetscMalloc1(n_all_cc,&all_cc));
1968   n_all_cc = 0;
1969   for (i=0;i<n_faces;i++) {
1970     PetscCall(ISGetSize(faces[i],&s));
1971     if (!s) continue;
1972     if (copycc) {
1973       PetscCall(ISDuplicate(faces[i],&all_cc[n_all_cc]));
1974     } else {
1975       PetscCall(PetscObjectReference((PetscObject)faces[i]));
1976       all_cc[n_all_cc] = faces[i];
1977     }
1978     n_all_cc++;
1979   }
1980   for (i=0;i<n_edges;i++) {
1981     PetscCall(ISGetSize(edges[i],&s));
1982     if (!s) continue;
1983     if (copycc) {
1984       PetscCall(ISDuplicate(edges[i],&all_cc[n_all_cc]));
1985     } else {
1986       PetscCall(PetscObjectReference((PetscObject)edges[i]));
1987       all_cc[n_all_cc] = edges[i];
1988     }
1989     PetscCall(PetscBTSet(sub_schurs->is_edge,n_all_cc));
1990     n_all_cc++;
1991   }
1992   PetscCall(PetscObjectReference((PetscObject)vertices));
1993   sub_schurs->is_vertices = vertices;
1994   PetscCall(PCBDDCGraphRestoreCandidatesIS(graph,&n_faces,&faces,&n_edges,&edges,&vertices));
1995   sub_schurs->is_dir = NULL;
1996   PetscCall(PCBDDCGraphGetDirichletDofsB(graph,&sub_schurs->is_dir));
1997 
1998   /* Determine if MatFactor can be used */
1999   PetscCall(PetscStrallocpy(prefix,&sub_schurs->prefix));
2000 #if defined(PETSC_HAVE_MUMPS)
2001   PetscCall(PetscStrncpy(sub_schurs->mat_solver_type,MATSOLVERMUMPS,sizeof(sub_schurs->mat_solver_type)));
2002 #elif defined(PETSC_HAVE_MKL_PARDISO)
2003   PetscCall(PetscStrncpy(sub_schurs->mat_solver_type,MATSOLVERMKL_PARDISO,sizeof(sub_schurs->mat_solver_type)));
2004 #else
2005   PetscCall(PetscStrncpy(sub_schurs->mat_solver_type,MATSOLVERPETSC,sizeof(sub_schurs->mat_solver_type)));
2006 #endif
2007 #if defined(PETSC_USE_COMPLEX)
2008   sub_schurs->is_hermitian  = PETSC_FALSE; /* Hermitian Cholesky is not supported by PETSc and external packages */
2009 #else
2010   sub_schurs->is_hermitian  = PETSC_TRUE;
2011 #endif
2012   sub_schurs->is_posdef     = PETSC_TRUE;
2013   sub_schurs->is_symmetric  = PETSC_TRUE;
2014   sub_schurs->debug         = PETSC_FALSE;
2015   sub_schurs->restrict_comm = PETSC_FALSE;
2016   PetscOptionsBegin(PetscObjectComm((PetscObject)graph->l2gmap),sub_schurs->prefix,"BDDC sub_schurs options","PC");
2017   PetscCall(PetscOptionsString("-sub_schurs_mat_solver_type","Specific direct solver to use",NULL,sub_schurs->mat_solver_type,sub_schurs->mat_solver_type,sizeof(sub_schurs->mat_solver_type),NULL));
2018   PetscCall(PetscOptionsBool("-sub_schurs_symmetric","Symmetric problem",NULL,sub_schurs->is_symmetric,&sub_schurs->is_symmetric,NULL));
2019   PetscCall(PetscOptionsBool("-sub_schurs_hermitian","Hermitian problem",NULL,sub_schurs->is_hermitian,&sub_schurs->is_hermitian,NULL));
2020   PetscCall(PetscOptionsBool("-sub_schurs_posdef","Positive definite problem",NULL,sub_schurs->is_posdef,&sub_schurs->is_posdef,NULL));
2021   PetscCall(PetscOptionsBool("-sub_schurs_restrictcomm","Restrict communicator on active processes only",NULL,sub_schurs->restrict_comm,&sub_schurs->restrict_comm,NULL));
2022   PetscCall(PetscOptionsBool("-sub_schurs_debug","Debug output",NULL,sub_schurs->debug,&sub_schurs->debug,NULL));
2023   PetscOptionsEnd();
2024   PetscCall(PetscStrcmp(sub_schurs->mat_solver_type,MATSOLVERMUMPS,&ismumps));
2025   PetscCall(PetscStrcmp(sub_schurs->mat_solver_type,MATSOLVERMKL_PARDISO,&ispardiso));
2026   sub_schurs->schur_explicit = (PetscBool)(ispardiso || ismumps);
2027 
2028   /* for reals, symmetric and hermitian are synonims */
2029 #if !defined(PETSC_USE_COMPLEX)
2030   sub_schurs->is_symmetric = (PetscBool)(sub_schurs->is_symmetric && sub_schurs->is_hermitian);
2031   sub_schurs->is_hermitian = sub_schurs->is_symmetric;
2032 #endif
2033 
2034   PetscCall(PetscObjectReference((PetscObject)is_I));
2035   sub_schurs->is_I = is_I;
2036   PetscCall(PetscObjectReference((PetscObject)is_B));
2037   sub_schurs->is_B = is_B;
2038   PetscCall(PetscObjectReference((PetscObject)graph->l2gmap));
2039   sub_schurs->l2gmap = graph->l2gmap;
2040   PetscCall(PetscObjectReference((PetscObject)BtoNmap));
2041   sub_schurs->BtoNmap = BtoNmap;
2042   sub_schurs->n_subs = n_all_cc;
2043   sub_schurs->is_subs = all_cc;
2044   sub_schurs->S_Ej_all = NULL;
2045   sub_schurs->sum_S_Ej_all = NULL;
2046   sub_schurs->sum_S_Ej_inv_all = NULL;
2047   sub_schurs->sum_S_Ej_tilda_all = NULL;
2048   sub_schurs->is_Ej_all = NULL;
2049   PetscFunctionReturn(0);
2050 }
2051 
2052 PetscErrorCode PCBDDCSubSchursCreate(PCBDDCSubSchurs *sub_schurs)
2053 {
2054   PCBDDCSubSchurs schurs_ctx;
2055 
2056   PetscFunctionBegin;
2057   PetscCall(PetscNew(&schurs_ctx));
2058   schurs_ctx->n_subs = 0;
2059   *sub_schurs = schurs_ctx;
2060   PetscFunctionReturn(0);
2061 }
2062 
2063 PetscErrorCode PCBDDCSubSchursReset(PCBDDCSubSchurs sub_schurs)
2064 {
2065   PetscInt       i;
2066 
2067   PetscFunctionBegin;
2068   if (!sub_schurs) PetscFunctionReturn(0);
2069   PetscCall(PetscFree(sub_schurs->prefix));
2070   PetscCall(MatDestroy(&sub_schurs->A));
2071   PetscCall(MatDestroy(&sub_schurs->S));
2072   PetscCall(ISDestroy(&sub_schurs->is_I));
2073   PetscCall(ISDestroy(&sub_schurs->is_B));
2074   PetscCall(ISLocalToGlobalMappingDestroy(&sub_schurs->l2gmap));
2075   PetscCall(ISLocalToGlobalMappingDestroy(&sub_schurs->BtoNmap));
2076   PetscCall(MatDestroy(&sub_schurs->S_Ej_all));
2077   PetscCall(MatDestroy(&sub_schurs->sum_S_Ej_all));
2078   PetscCall(MatDestroy(&sub_schurs->sum_S_Ej_inv_all));
2079   PetscCall(MatDestroy(&sub_schurs->sum_S_Ej_tilda_all));
2080   PetscCall(ISDestroy(&sub_schurs->is_Ej_all));
2081   PetscCall(ISDestroy(&sub_schurs->is_vertices));
2082   PetscCall(ISDestroy(&sub_schurs->is_dir));
2083   PetscCall(PetscBTDestroy(&sub_schurs->is_edge));
2084   for (i=0;i<sub_schurs->n_subs;i++) {
2085     PetscCall(ISDestroy(&sub_schurs->is_subs[i]));
2086   }
2087   if (sub_schurs->n_subs) PetscCall(PetscFree(sub_schurs->is_subs));
2088   if (sub_schurs->reuse_solver) PetscCall(PCBDDCReuseSolversReset(sub_schurs->reuse_solver));
2089   PetscCall(PetscFree(sub_schurs->reuse_solver));
2090   if (sub_schurs->change) {
2091     for (i=0;i<sub_schurs->n_subs;i++) {
2092       PetscCall(KSPDestroy(&sub_schurs->change[i]));
2093       PetscCall(ISDestroy(&sub_schurs->change_primal_sub[i]));
2094     }
2095   }
2096   PetscCall(PetscFree(sub_schurs->change));
2097   PetscCall(PetscFree(sub_schurs->change_primal_sub));
2098   sub_schurs->n_subs = 0;
2099   PetscFunctionReturn(0);
2100 }
2101 
2102 PetscErrorCode PCBDDCSubSchursDestroy(PCBDDCSubSchurs* sub_schurs)
2103 {
2104   PetscFunctionBegin;
2105   PetscCall(PCBDDCSubSchursReset(*sub_schurs));
2106   PetscCall(PetscFree(*sub_schurs));
2107   PetscFunctionReturn(0);
2108 }
2109 
2110 static inline PetscErrorCode PCBDDCAdjGetNextLayer_Private(PetscInt* queue_tip,PetscInt n_prev,PetscBT touched,PetscInt* xadj,PetscInt* adjncy,PetscInt* n_added)
2111 {
2112   PetscInt       i,j,n;
2113 
2114   PetscFunctionBegin;
2115   n = 0;
2116   for (i=-n_prev;i<0;i++) {
2117     PetscInt start_dof = queue_tip[i];
2118     for (j=xadj[start_dof];j<xadj[start_dof+1];j++) {
2119       PetscInt dof = adjncy[j];
2120       if (!PetscBTLookup(touched,dof)) {
2121         PetscCall(PetscBTSet(touched,dof));
2122         queue_tip[n] = dof;
2123         n++;
2124       }
2125     }
2126   }
2127   *n_added = n;
2128   PetscFunctionReturn(0);
2129 }
2130