xref: /petsc/src/ksp/pc/impls/bddc/bddcschurs.c (revision 2e65eb737b5b07432530db55f6b2a145ebc548b2)
1 #include <../src/ksp/pc/impls/bddc/bddc.h>
2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h>
3 #include <../src/mat/impls/dense/seq/dense.h>
4 #include <petscblaslapack.h>
5 
6 static inline PetscErrorCode PCBDDCAdjGetNextLayer_Private(PetscInt*,PetscInt,PetscBT,PetscInt*,PetscInt*,PetscInt*);
7 static PetscErrorCode PCBDDCComputeExplicitSchur(Mat,PetscBool,MatReuse,Mat*);
8 static PetscErrorCode PCBDDCReuseSolvers_Interior(PC,Vec,Vec);
9 static PetscErrorCode PCBDDCReuseSolvers_Correction(PC,Vec,Vec);
10 
11 /* if v2 is not present, correction is done in-place */
12 PetscErrorCode PCBDDCReuseSolversBenignAdapt(PCBDDCReuseSolvers ctx, Vec v, Vec v2, PetscBool sol, PetscBool full)
13 {
14   PetscScalar    *array;
15   PetscScalar    *array2;
16 
17   PetscFunctionBegin;
18   if (!ctx->benign_n) PetscFunctionReturn(0);
19   if (sol && full) {
20     PetscInt n_I,size_schur;
21 
22     /* get sizes */
23     PetscCall(MatGetSize(ctx->benign_csAIB,&size_schur,NULL));
24     PetscCall(VecGetSize(v,&n_I));
25     n_I = n_I - size_schur;
26     /* get schur sol from array */
27     PetscCall(VecGetArray(v,&array));
28     PetscCall(VecPlaceArray(ctx->benign_dummy_schur_vec,array+n_I));
29     PetscCall(VecRestoreArray(v,&array));
30     /* apply interior sol correction */
31     PetscCall(MatMultTranspose(ctx->benign_csAIB,ctx->benign_dummy_schur_vec,ctx->benign_corr_work));
32     PetscCall(VecResetArray(ctx->benign_dummy_schur_vec));
33     PetscCall(MatMultAdd(ctx->benign_AIIm1ones,ctx->benign_corr_work,v,v));
34   }
35   if (v2) {
36     PetscInt nl;
37 
38     PetscCall(VecGetArrayRead(v,(const PetscScalar**)&array));
39     PetscCall(VecGetLocalSize(v2,&nl));
40     PetscCall(VecGetArray(v2,&array2));
41     PetscCall(PetscArraycpy(array2,array,nl));
42   } else {
43     PetscCall(VecGetArray(v,&array));
44     array2 = array;
45   }
46   if (!sol) { /* change rhs */
47     PetscInt n;
48     for (n=0;n<ctx->benign_n;n++) {
49       PetscScalar    sum = 0.;
50       const PetscInt *cols;
51       PetscInt       nz,i;
52 
53       PetscCall(ISGetLocalSize(ctx->benign_zerodiag_subs[n],&nz));
54       PetscCall(ISGetIndices(ctx->benign_zerodiag_subs[n],&cols));
55       for (i=0;i<nz-1;i++) sum += array[cols[i]];
56 #if defined(PETSC_USE_COMPLEX)
57       sum = -(PetscRealPart(sum)/nz + PETSC_i*(PetscImaginaryPart(sum)/nz));
58 #else
59       sum = -sum/nz;
60 #endif
61       for (i=0;i<nz-1;i++) array2[cols[i]] += sum;
62       ctx->benign_save_vals[n] = array2[cols[nz-1]];
63       array2[cols[nz-1]] = sum;
64       PetscCall(ISRestoreIndices(ctx->benign_zerodiag_subs[n],&cols));
65     }
66   } else {
67     PetscInt n;
68     for (n=0;n<ctx->benign_n;n++) {
69       PetscScalar    sum = 0.;
70       const PetscInt *cols;
71       PetscInt       nz,i;
72       PetscCall(ISGetLocalSize(ctx->benign_zerodiag_subs[n],&nz));
73       PetscCall(ISGetIndices(ctx->benign_zerodiag_subs[n],&cols));
74       for (i=0;i<nz-1;i++) sum += array[cols[i]];
75 #if defined(PETSC_USE_COMPLEX)
76       sum = -(PetscRealPart(sum)/nz + PETSC_i*(PetscImaginaryPart(sum)/nz));
77 #else
78       sum = -sum/nz;
79 #endif
80       for (i=0;i<nz-1;i++) array2[cols[i]] += sum;
81       array2[cols[nz-1]] = ctx->benign_save_vals[n];
82       PetscCall(ISRestoreIndices(ctx->benign_zerodiag_subs[n],&cols));
83     }
84   }
85   if (v2) {
86     PetscCall(VecRestoreArrayRead(v,(const PetscScalar**)&array));
87     PetscCall(VecRestoreArray(v2,&array2));
88   } else {
89     PetscCall(VecRestoreArray(v,&array));
90   }
91   if (!sol && full) {
92     Vec      usedv;
93     PetscInt n_I,size_schur;
94 
95     /* get sizes */
96     PetscCall(MatGetSize(ctx->benign_csAIB,&size_schur,NULL));
97     PetscCall(VecGetSize(v,&n_I));
98     n_I = n_I - size_schur;
99     /* compute schur rhs correction */
100     if (v2) {
101       usedv = v2;
102     } else {
103       usedv = v;
104     }
105     /* apply schur rhs correction */
106     PetscCall(MatMultTranspose(ctx->benign_AIIm1ones,usedv,ctx->benign_corr_work));
107     PetscCall(VecGetArrayRead(usedv,(const PetscScalar**)&array));
108     PetscCall(VecPlaceArray(ctx->benign_dummy_schur_vec,array+n_I));
109     PetscCall(VecRestoreArrayRead(usedv,(const PetscScalar**)&array));
110     PetscCall(MatMultAdd(ctx->benign_csAIB,ctx->benign_corr_work,ctx->benign_dummy_schur_vec,ctx->benign_dummy_schur_vec));
111     PetscCall(VecResetArray(ctx->benign_dummy_schur_vec));
112   }
113   PetscFunctionReturn(0);
114 }
115 
116 static PetscErrorCode PCBDDCReuseSolvers_Solve_Private(PC pc, Vec rhs, Vec sol, PetscBool transpose, PetscBool full)
117 {
118   PCBDDCReuseSolvers ctx;
119   PetscBool          copy = PETSC_FALSE;
120 
121   PetscFunctionBegin;
122   PetscCall(PCShellGetContext(pc,&ctx));
123   if (full) {
124 #if defined(PETSC_HAVE_MUMPS)
125     PetscCall(MatMumpsSetIcntl(ctx->F,26,-1));
126 #endif
127 #if defined(PETSC_HAVE_MKL_PARDISO)
128     PetscCall(MatMkl_PardisoSetCntl(ctx->F,70,0));
129 #endif
130     copy = ctx->has_vertices;
131   } else { /* interior solver */
132 #if defined(PETSC_HAVE_MUMPS)
133     PetscCall(MatMumpsSetIcntl(ctx->F,26,0));
134 #endif
135 #if defined(PETSC_HAVE_MKL_PARDISO)
136     PetscCall(MatMkl_PardisoSetCntl(ctx->F,70,1));
137 #endif
138     copy = PETSC_TRUE;
139   }
140   /* copy rhs into factored matrix workspace */
141   if (copy) {
142     PetscInt    n;
143     PetscScalar *array,*array_solver;
144 
145     PetscCall(VecGetLocalSize(rhs,&n));
146     PetscCall(VecGetArrayRead(rhs,(const PetscScalar**)&array));
147     PetscCall(VecGetArray(ctx->rhs,&array_solver));
148     PetscCall(PetscArraycpy(array_solver,array,n));
149     PetscCall(VecRestoreArray(ctx->rhs,&array_solver));
150     PetscCall(VecRestoreArrayRead(rhs,(const PetscScalar**)&array));
151 
152     PetscCall(PCBDDCReuseSolversBenignAdapt(ctx,ctx->rhs,NULL,PETSC_FALSE,full));
153     if (transpose) {
154       PetscCall(MatSolveTranspose(ctx->F,ctx->rhs,ctx->sol));
155     } else {
156       PetscCall(MatSolve(ctx->F,ctx->rhs,ctx->sol));
157     }
158     PetscCall(PCBDDCReuseSolversBenignAdapt(ctx,ctx->sol,NULL,PETSC_TRUE,full));
159 
160     /* get back data to caller worskpace */
161     PetscCall(VecGetArrayRead(ctx->sol,(const PetscScalar**)&array_solver));
162     PetscCall(VecGetArray(sol,&array));
163     PetscCall(PetscArraycpy(array,array_solver,n));
164     PetscCall(VecRestoreArray(sol,&array));
165     PetscCall(VecRestoreArrayRead(ctx->sol,(const PetscScalar**)&array_solver));
166   } else {
167     if (ctx->benign_n) {
168       PetscCall(PCBDDCReuseSolversBenignAdapt(ctx,rhs,ctx->rhs,PETSC_FALSE,full));
169       if (transpose) {
170         PetscCall(MatSolveTranspose(ctx->F,ctx->rhs,sol));
171       } else {
172         PetscCall(MatSolve(ctx->F,ctx->rhs,sol));
173       }
174       PetscCall(PCBDDCReuseSolversBenignAdapt(ctx,sol,NULL,PETSC_TRUE,full));
175     } else {
176       if (transpose) {
177         PetscCall(MatSolveTranspose(ctx->F,rhs,sol));
178       } else {
179         PetscCall(MatSolve(ctx->F,rhs,sol));
180       }
181     }
182   }
183   /* restore defaults */
184 #if defined(PETSC_HAVE_MUMPS)
185   PetscCall(MatMumpsSetIcntl(ctx->F,26,-1));
186 #endif
187 #if defined(PETSC_HAVE_MKL_PARDISO)
188   PetscCall(MatMkl_PardisoSetCntl(ctx->F,70,0));
189 #endif
190   PetscFunctionReturn(0);
191 }
192 
193 static PetscErrorCode PCBDDCReuseSolvers_Correction(PC pc, Vec rhs, Vec sol)
194 {
195   PetscFunctionBegin;
196   PetscCall(PCBDDCReuseSolvers_Solve_Private(pc,rhs,sol,PETSC_FALSE,PETSC_TRUE));
197   PetscFunctionReturn(0);
198 }
199 
200 static PetscErrorCode PCBDDCReuseSolvers_CorrectionTranspose(PC pc, Vec rhs, Vec sol)
201 {
202   PetscFunctionBegin;
203   PetscCall(PCBDDCReuseSolvers_Solve_Private(pc,rhs,sol,PETSC_TRUE,PETSC_TRUE));
204   PetscFunctionReturn(0);
205 }
206 
207 static PetscErrorCode PCBDDCReuseSolvers_Interior(PC pc, Vec rhs, Vec sol)
208 {
209   PetscFunctionBegin;
210   PetscCall(PCBDDCReuseSolvers_Solve_Private(pc,rhs,sol,PETSC_FALSE,PETSC_FALSE));
211   PetscFunctionReturn(0);
212 }
213 
214 static PetscErrorCode PCBDDCReuseSolvers_InteriorTranspose(PC pc, Vec rhs, Vec sol)
215 {
216   PetscFunctionBegin;
217   PetscCall(PCBDDCReuseSolvers_Solve_Private(pc,rhs,sol,PETSC_TRUE,PETSC_FALSE));
218   PetscFunctionReturn(0);
219 }
220 
221 static PetscErrorCode PCBDDCReuseSolvers_View(PC pc, PetscViewer viewer)
222 {
223   PCBDDCReuseSolvers ctx;
224   PetscBool          iascii;
225 
226   PetscFunctionBegin;
227   PetscCall(PCShellGetContext(pc,&ctx));
228   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii));
229   if (iascii) {
230     PetscCall(PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_INFO));
231   }
232   PetscCall(MatView(ctx->F,viewer));
233   if (iascii) {
234     PetscCall(PetscViewerPopFormat(viewer));
235   }
236   PetscFunctionReturn(0);
237 }
238 
239 static PetscErrorCode PCBDDCReuseSolversReset(PCBDDCReuseSolvers reuse)
240 {
241   PetscInt       i;
242 
243   PetscFunctionBegin;
244   PetscCall(MatDestroy(&reuse->F));
245   PetscCall(VecDestroy(&reuse->sol));
246   PetscCall(VecDestroy(&reuse->rhs));
247   PetscCall(PCDestroy(&reuse->interior_solver));
248   PetscCall(PCDestroy(&reuse->correction_solver));
249   PetscCall(ISDestroy(&reuse->is_R));
250   PetscCall(ISDestroy(&reuse->is_B));
251   PetscCall(VecScatterDestroy(&reuse->correction_scatter_B));
252   PetscCall(VecDestroy(&reuse->sol_B));
253   PetscCall(VecDestroy(&reuse->rhs_B));
254   for (i=0;i<reuse->benign_n;i++) {
255     PetscCall(ISDestroy(&reuse->benign_zerodiag_subs[i]));
256   }
257   PetscCall(PetscFree(reuse->benign_zerodiag_subs));
258   PetscCall(PetscFree(reuse->benign_save_vals));
259   PetscCall(MatDestroy(&reuse->benign_csAIB));
260   PetscCall(MatDestroy(&reuse->benign_AIIm1ones));
261   PetscCall(VecDestroy(&reuse->benign_corr_work));
262   PetscCall(VecDestroy(&reuse->benign_dummy_schur_vec));
263   PetscFunctionReturn(0);
264 }
265 
266 static PetscErrorCode PCBDDCComputeExplicitSchur(Mat M, PetscBool issym, MatReuse reuse, Mat *S)
267 {
268   Mat            B, C, D, Bd, Cd, AinvBd;
269   KSP            ksp;
270   PC             pc;
271   PetscBool      isLU, isILU, isCHOL, Bdense, Cdense;
272   PetscReal      fill = 2.0;
273   PetscInt       n_I;
274   PetscMPIInt    size;
275 
276   PetscFunctionBegin;
277   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)M),&size));
278   PetscCheck(size == 1,PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for parallel matrices");
279   if (reuse == MAT_REUSE_MATRIX) {
280     PetscBool Sdense;
281 
282     PetscCall(PetscObjectTypeCompare((PetscObject)*S, MATSEQDENSE, &Sdense));
283     PetscCheck(Sdense,PetscObjectComm((PetscObject)M),PETSC_ERR_SUP,"S should dense");
284   }
285   PetscCall(MatSchurComplementGetSubMatrices(M, NULL, NULL, &B, &C, &D));
286   PetscCall(MatSchurComplementGetKSP(M, &ksp));
287   PetscCall(KSPGetPC(ksp, &pc));
288   PetscCall(PetscObjectTypeCompare((PetscObject) pc, PCLU, &isLU));
289   PetscCall(PetscObjectTypeCompare((PetscObject) pc, PCILU, &isILU));
290   PetscCall(PetscObjectTypeCompare((PetscObject) pc, PCCHOLESKY, &isCHOL));
291   PetscCall(PetscObjectTypeCompare((PetscObject) B, MATSEQDENSE, &Bdense));
292   PetscCall(PetscObjectTypeCompare((PetscObject) C, MATSEQDENSE, &Cdense));
293   PetscCall(MatGetSize(B,&n_I,NULL));
294   if (n_I) {
295     if (!Bdense) {
296       PetscCall(MatConvert(B, MATSEQDENSE, MAT_INITIAL_MATRIX, &Bd));
297     } else {
298       Bd = B;
299     }
300 
301     if (isLU || isILU || isCHOL) {
302       Mat fact;
303       PetscCall(KSPSetUp(ksp));
304       PetscCall(PCFactorGetMatrix(pc, &fact));
305       PetscCall(MatDuplicate(Bd, MAT_DO_NOT_COPY_VALUES, &AinvBd));
306       PetscCall(MatMatSolve(fact, Bd, AinvBd));
307     } else {
308       PetscBool ex = PETSC_TRUE;
309 
310       if (ex) {
311         Mat Ainvd;
312 
313         PetscCall(PCComputeOperator(pc, MATDENSE, &Ainvd));
314         PetscCall(MatMatMult(Ainvd, Bd, MAT_INITIAL_MATRIX, fill, &AinvBd));
315         PetscCall(MatDestroy(&Ainvd));
316       } else {
317         Vec         sol,rhs;
318         PetscScalar *arrayrhs,*arraysol;
319         PetscInt    i,nrhs,n;
320 
321         PetscCall(MatDuplicate(Bd, MAT_DO_NOT_COPY_VALUES, &AinvBd));
322         PetscCall(MatGetSize(Bd,&n,&nrhs));
323         PetscCall(MatDenseGetArray(Bd,&arrayrhs));
324         PetscCall(MatDenseGetArray(AinvBd,&arraysol));
325         PetscCall(KSPGetSolution(ksp,&sol));
326         PetscCall(KSPGetRhs(ksp,&rhs));
327         for (i=0;i<nrhs;i++) {
328           PetscCall(VecPlaceArray(rhs,arrayrhs+i*n));
329           PetscCall(VecPlaceArray(sol,arraysol+i*n));
330           PetscCall(KSPSolve(ksp,rhs,sol));
331           PetscCall(VecResetArray(rhs));
332           PetscCall(VecResetArray(sol));
333         }
334         PetscCall(MatDenseRestoreArray(Bd,&arrayrhs));
335         PetscCall(MatDenseRestoreArray(AinvBd,&arrayrhs));
336       }
337     }
338     if (!Bdense & !issym) {
339       PetscCall(MatDestroy(&Bd));
340     }
341 
342     if (!issym) {
343       if (!Cdense) {
344         PetscCall(MatConvert(C, MATSEQDENSE, MAT_INITIAL_MATRIX, &Cd));
345       } else {
346         Cd = C;
347       }
348       PetscCall(MatMatMult(Cd, AinvBd, reuse, fill, S));
349       if (!Cdense) {
350         PetscCall(MatDestroy(&Cd));
351       }
352     } else {
353       PetscCall(MatTransposeMatMult(Bd, AinvBd, reuse, fill, S));
354       if (!Bdense) {
355         PetscCall(MatDestroy(&Bd));
356       }
357     }
358     PetscCall(MatDestroy(&AinvBd));
359   }
360 
361   if (D) {
362     Mat       Dd;
363     PetscBool Ddense;
364 
365     PetscCall(PetscObjectTypeCompare((PetscObject)D,MATSEQDENSE,&Ddense));
366     if (!Ddense) {
367       PetscCall(MatConvert(D, MATSEQDENSE, MAT_INITIAL_MATRIX, &Dd));
368     } else {
369       Dd = D;
370     }
371     if (n_I) {
372       PetscCall(MatAYPX(*S,-1.0,Dd,SAME_NONZERO_PATTERN));
373     } else {
374       if (reuse == MAT_INITIAL_MATRIX) {
375         PetscCall(MatDuplicate(Dd,MAT_COPY_VALUES,S));
376       } else {
377         PetscCall(MatCopy(Dd,*S,SAME_NONZERO_PATTERN));
378       }
379     }
380     if (!Ddense) {
381       PetscCall(MatDestroy(&Dd));
382     }
383   } else {
384     PetscCall(MatScale(*S,-1.0));
385   }
386   PetscFunctionReturn(0);
387 }
388 
389 PetscErrorCode PCBDDCSubSchursSetUp(PCBDDCSubSchurs sub_schurs, Mat Ain, Mat Sin, PetscBool exact_schur, PetscInt xadj[], PetscInt adjncy[], PetscInt nlayers, Vec scaling, PetscBool compute_Stilda, PetscBool reuse_solvers, PetscBool benign_trick, PetscInt benign_n, PetscInt benign_p0_lidx[], IS benign_zerodiag_subs[], Mat change, IS change_primal)
390 {
391   Mat                    F,A_II,A_IB,A_BI,A_BB,AE_II;
392   Mat                    S_all;
393   Vec                    gstash,lstash;
394   VecScatter             sstash;
395   IS                     is_I,is_I_layer;
396   IS                     all_subsets,all_subsets_mult,all_subsets_n;
397   PetscScalar            *stasharray,*Bwork;
398   PetscInt               *nnz,*all_local_idx_N;
399   PetscInt               *auxnum1,*auxnum2;
400   PetscInt               i,subset_size,max_subset_size;
401   PetscInt               n_B,extra,local_size,global_size;
402   PetscInt               local_stash_size;
403   PetscBLASInt           B_N,B_ierr,B_lwork,*pivots;
404   MPI_Comm               comm_n;
405   PetscBool              deluxe = PETSC_TRUE;
406   PetscBool              use_potr = PETSC_FALSE, use_sytr = PETSC_FALSE;
407   PetscViewer            matl_dbg_viewer = NULL;
408   PetscBool              flg;
409 
410   PetscFunctionBegin;
411   PetscCall(MatDestroy(&sub_schurs->A));
412   PetscCall(MatDestroy(&sub_schurs->S));
413   if (Ain) {
414     PetscCall(PetscObjectReference((PetscObject)Ain));
415     sub_schurs->A = Ain;
416   }
417 
418   PetscCall(PetscObjectReference((PetscObject)Sin));
419   sub_schurs->S = Sin;
420   if (sub_schurs->schur_explicit) {
421     sub_schurs->schur_explicit = (PetscBool)(!!sub_schurs->A);
422   }
423 
424   /* preliminary checks */
425   PetscCheck(sub_schurs->schur_explicit || !compute_Stilda,PetscObjectComm((PetscObject)sub_schurs->l2gmap),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS and/or MKL_PARDISO");
426 
427   if (benign_trick) sub_schurs->is_posdef = PETSC_FALSE;
428 
429   /* debug (MATLAB) */
430   if (sub_schurs->debug) {
431     PetscMPIInt size,rank;
432     PetscInt    nr,*print_schurs_ranks,print_schurs = PETSC_FALSE;
433 
434     PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sub_schurs->l2gmap),&size));
435     PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sub_schurs->l2gmap),&rank));
436     nr   = size;
437     PetscCall(PetscMalloc1(nr,&print_schurs_ranks));
438     PetscOptionsBegin(PetscObjectComm((PetscObject)sub_schurs->l2gmap),sub_schurs->prefix,"BDDC sub_schurs options","PC");
439     PetscCall(PetscOptionsIntArray("-sub_schurs_debug_ranks","Ranks to debug (all if the option is not used)",NULL,print_schurs_ranks,&nr,&flg));
440     if (!flg) print_schurs = PETSC_TRUE;
441     else {
442       print_schurs = PETSC_FALSE;
443       for (i=0;i<nr;i++) if (print_schurs_ranks[i] == (PetscInt)rank) { print_schurs = PETSC_TRUE; break; }
444     }
445     PetscOptionsEnd();
446     PetscCall(PetscFree(print_schurs_ranks));
447     if (print_schurs) {
448       char filename[256];
449 
450       PetscCall(PetscSNPrintf(filename,sizeof(filename),"sub_schurs_Schur_r%d.m",PetscGlobalRank));
451       PetscCall(PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&matl_dbg_viewer));
452       PetscCall(PetscViewerPushFormat(matl_dbg_viewer,PETSC_VIEWER_ASCII_MATLAB));
453     }
454   }
455 
456   /* restrict work on active processes */
457   if (sub_schurs->restrict_comm) {
458     PetscSubcomm subcomm;
459     PetscMPIInt  color,rank;
460 
461     color = 0;
462     if (!sub_schurs->n_subs) color = 1; /* this can happen if we are in a multilevel case or if the subdomain is disconnected */
463     PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sub_schurs->l2gmap),&rank));
464     PetscCall(PetscSubcommCreate(PetscObjectComm((PetscObject)sub_schurs->l2gmap),&subcomm));
465     PetscCall(PetscSubcommSetNumber(subcomm,2));
466     PetscCall(PetscSubcommSetTypeGeneral(subcomm,color,rank));
467     PetscCall(PetscCommDuplicate(PetscSubcommChild(subcomm),&comm_n,NULL));
468     PetscCall(PetscSubcommDestroy(&subcomm));
469     if (!sub_schurs->n_subs) {
470       PetscCall(PetscCommDestroy(&comm_n));
471       PetscFunctionReturn(0);
472     }
473   } else {
474     PetscCall(PetscCommDuplicate(PetscObjectComm((PetscObject)sub_schurs->l2gmap),&comm_n,NULL));
475   }
476 
477   /* get Schur complement matrices */
478   if (!sub_schurs->schur_explicit) {
479     Mat       tA_IB,tA_BI,tA_BB;
480     PetscBool isseqsbaij;
481     PetscCall(MatSchurComplementGetSubMatrices(sub_schurs->S,&A_II,NULL,&tA_IB,&tA_BI,&tA_BB));
482     PetscCall(PetscObjectTypeCompare((PetscObject)tA_BB,MATSEQSBAIJ,&isseqsbaij));
483     if (isseqsbaij) {
484       PetscCall(MatConvert(tA_BB,MATSEQAIJ,MAT_INITIAL_MATRIX,&A_BB));
485       PetscCall(MatConvert(tA_IB,MATSEQAIJ,MAT_INITIAL_MATRIX,&A_IB));
486       PetscCall(MatConvert(tA_BI,MATSEQAIJ,MAT_INITIAL_MATRIX,&A_BI));
487     } else {
488       PetscCall(PetscObjectReference((PetscObject)tA_BB));
489       A_BB = tA_BB;
490       PetscCall(PetscObjectReference((PetscObject)tA_IB));
491       A_IB = tA_IB;
492       PetscCall(PetscObjectReference((PetscObject)tA_BI));
493       A_BI = tA_BI;
494     }
495   } else {
496     A_II = NULL;
497     A_IB = NULL;
498     A_BI = NULL;
499     A_BB = NULL;
500   }
501   S_all = NULL;
502 
503   /* determine interior problems */
504   PetscCall(ISGetLocalSize(sub_schurs->is_I,&i));
505   if (nlayers >= 0 && i) { /* Interior problems can be different from the original one */
506     PetscBT                touched;
507     const PetscInt*        idx_B;
508     PetscInt               n_I,n_B,n_local_dofs,n_prev_added,j,layer,*local_numbering;
509 
510     PetscCheck(xadj,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot request layering without adjacency");
511     /* get sizes */
512     PetscCall(ISGetLocalSize(sub_schurs->is_I,&n_I));
513     PetscCall(ISGetLocalSize(sub_schurs->is_B,&n_B));
514 
515     PetscCall(PetscMalloc1(n_I+n_B,&local_numbering));
516     PetscCall(PetscBTCreate(n_I+n_B,&touched));
517     PetscCall(PetscBTMemzero(n_I+n_B,touched));
518 
519     /* all boundary dofs must be skipped when adding layers */
520     PetscCall(ISGetIndices(sub_schurs->is_B,&idx_B));
521     for (j=0;j<n_B;j++) {
522       PetscCall(PetscBTSet(touched,idx_B[j]));
523     }
524     PetscCall(PetscArraycpy(local_numbering,idx_B,n_B));
525     PetscCall(ISRestoreIndices(sub_schurs->is_B,&idx_B));
526 
527     /* add prescribed number of layers of dofs */
528     n_local_dofs = n_B;
529     n_prev_added = n_B;
530     for (layer=0;layer<nlayers;layer++) {
531       PetscInt n_added = 0;
532       if (n_local_dofs == n_I+n_B) break;
533       PetscCheck(n_local_dofs <= n_I+n_B,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error querying layer %" PetscInt_FMT ". Out of bound access (%" PetscInt_FMT " > %" PetscInt_FMT ")",layer,n_local_dofs,n_I+n_B);
534       PetscCall(PCBDDCAdjGetNextLayer_Private(local_numbering+n_local_dofs,n_prev_added,touched,xadj,adjncy,&n_added));
535       n_prev_added = n_added;
536       n_local_dofs += n_added;
537       if (!n_added) break;
538     }
539     PetscCall(PetscBTDestroy(&touched));
540 
541     /* IS for I layer dofs in original numbering */
542     PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)sub_schurs->is_I),n_local_dofs-n_B,local_numbering+n_B,PETSC_COPY_VALUES,&is_I_layer));
543     PetscCall(PetscFree(local_numbering));
544     PetscCall(ISSort(is_I_layer));
545     /* IS for I layer dofs in I numbering */
546     if (!sub_schurs->schur_explicit) {
547       ISLocalToGlobalMapping ItoNmap;
548       PetscCall(ISLocalToGlobalMappingCreateIS(sub_schurs->is_I,&ItoNmap));
549       PetscCall(ISGlobalToLocalMappingApplyIS(ItoNmap,IS_GTOLM_DROP,is_I_layer,&is_I));
550       PetscCall(ISLocalToGlobalMappingDestroy(&ItoNmap));
551 
552       /* II block */
553       PetscCall(MatCreateSubMatrix(A_II,is_I,is_I,MAT_INITIAL_MATRIX,&AE_II));
554     }
555   } else {
556     PetscInt n_I;
557 
558     /* IS for I dofs in original numbering */
559     PetscCall(PetscObjectReference((PetscObject)sub_schurs->is_I));
560     is_I_layer = sub_schurs->is_I;
561 
562     /* IS for I dofs in I numbering (strided 1) */
563     if (!sub_schurs->schur_explicit) {
564       PetscCall(ISGetSize(sub_schurs->is_I,&n_I));
565       PetscCall(ISCreateStride(PetscObjectComm((PetscObject)sub_schurs->is_I),n_I,0,1,&is_I));
566 
567       /* II block is the same */
568       PetscCall(PetscObjectReference((PetscObject)A_II));
569       AE_II = A_II;
570     }
571   }
572 
573   /* Get info on subset sizes and sum of all subsets sizes */
574   max_subset_size = 0;
575   local_size = 0;
576   for (i=0;i<sub_schurs->n_subs;i++) {
577     PetscCall(ISGetLocalSize(sub_schurs->is_subs[i],&subset_size));
578     max_subset_size = PetscMax(subset_size,max_subset_size);
579     local_size += subset_size;
580   }
581 
582   /* Work arrays for local indices */
583   extra = 0;
584   PetscCall(ISGetLocalSize(sub_schurs->is_B,&n_B));
585   if (sub_schurs->schur_explicit && is_I_layer) {
586     PetscCall(ISGetLocalSize(is_I_layer,&extra));
587   }
588   PetscCall(PetscMalloc1(n_B+extra,&all_local_idx_N));
589   if (extra) {
590     const PetscInt *idxs;
591     PetscCall(ISGetIndices(is_I_layer,&idxs));
592     PetscCall(PetscArraycpy(all_local_idx_N,idxs,extra));
593     PetscCall(ISRestoreIndices(is_I_layer,&idxs));
594   }
595   PetscCall(PetscMalloc1(sub_schurs->n_subs,&auxnum1));
596   PetscCall(PetscMalloc1(sub_schurs->n_subs,&auxnum2));
597 
598   /* Get local indices in local numbering */
599   local_size = 0;
600   local_stash_size = 0;
601   for (i=0;i<sub_schurs->n_subs;i++) {
602     const PetscInt *idxs;
603 
604     PetscCall(ISGetLocalSize(sub_schurs->is_subs[i],&subset_size));
605     PetscCall(ISGetIndices(sub_schurs->is_subs[i],&idxs));
606     /* start (smallest in global ordering) and multiplicity */
607     auxnum1[i] = idxs[0];
608     auxnum2[i] = subset_size*subset_size;
609     /* subset indices in local numbering */
610     PetscCall(PetscArraycpy(all_local_idx_N+local_size+extra,idxs,subset_size));
611     PetscCall(ISRestoreIndices(sub_schurs->is_subs[i],&idxs));
612     local_size += subset_size;
613     local_stash_size += subset_size*subset_size;
614   }
615 
616   /* allocate extra workspace needed only for GETRI or SYTRF */
617   use_potr = use_sytr = PETSC_FALSE;
618   if (benign_trick || (sub_schurs->is_hermitian && sub_schurs->is_posdef)) {
619     use_potr = PETSC_TRUE;
620   } else if (sub_schurs->is_symmetric) {
621     use_sytr = PETSC_TRUE;
622   }
623   if (local_size && !use_potr) {
624     PetscScalar  lwork,dummyscalar = 0.;
625     PetscBLASInt dummyint = 0;
626 
627     B_lwork = -1;
628     PetscCall(PetscBLASIntCast(local_size,&B_N));
629     PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
630     if (use_sytr) {
631       PetscStackCallBLAS("LAPACKsytrf",LAPACKsytrf_("L",&B_N,&dummyscalar,&B_N,&dummyint,&lwork,&B_lwork,&B_ierr));
632       PetscCheck(!B_ierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYTRF Lapack routine %d",(int)B_ierr);
633     } else {
634       PetscStackCallBLAS("LAPACKgetri",LAPACKgetri_(&B_N,&dummyscalar,&B_N,&dummyint,&lwork,&B_lwork,&B_ierr));
635       PetscCheck(!B_ierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GETRI Lapack routine %d",(int)B_ierr);
636     }
637     PetscCall(PetscFPTrapPop());
638     PetscCall(PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork));
639     PetscCall(PetscMalloc2(B_lwork,&Bwork,B_N,&pivots));
640   } else {
641     Bwork = NULL;
642     pivots = NULL;
643   }
644 
645   /* prepare data for summing up properly schurs on subsets */
646   PetscCall(ISCreateGeneral(comm_n,sub_schurs->n_subs,auxnum1,PETSC_OWN_POINTER,&all_subsets_n));
647   PetscCall(ISLocalToGlobalMappingApplyIS(sub_schurs->l2gmap,all_subsets_n,&all_subsets));
648   PetscCall(ISDestroy(&all_subsets_n));
649   PetscCall(ISCreateGeneral(comm_n,sub_schurs->n_subs,auxnum2,PETSC_OWN_POINTER,&all_subsets_mult));
650   PetscCall(ISRenumber(all_subsets,all_subsets_mult,&global_size,&all_subsets_n));
651   PetscCall(ISDestroy(&all_subsets));
652   PetscCall(ISDestroy(&all_subsets_mult));
653   PetscCall(ISGetLocalSize(all_subsets_n,&i));
654   PetscCheck(i == local_stash_size,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid size of new subset! %" PetscInt_FMT " != %" PetscInt_FMT,i,local_stash_size);
655   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,local_stash_size,NULL,&lstash));
656   PetscCall(VecCreateMPI(comm_n,PETSC_DECIDE,global_size,&gstash));
657   PetscCall(VecScatterCreate(lstash,NULL,gstash,all_subsets_n,&sstash));
658   PetscCall(ISDestroy(&all_subsets_n));
659 
660   /* subset indices in local boundary numbering */
661   if (!sub_schurs->is_Ej_all) {
662     PetscInt *all_local_idx_B;
663 
664     PetscCall(PetscMalloc1(local_size,&all_local_idx_B));
665     PetscCall(ISGlobalToLocalMappingApply(sub_schurs->BtoNmap,IS_GTOLM_DROP,local_size,all_local_idx_N+extra,&subset_size,all_local_idx_B));
666     PetscCheck(subset_size == local_size,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in sub_schurs serial (BtoNmap)! %" PetscInt_FMT " != %" PetscInt_FMT,subset_size,local_size);
667     PetscCall(ISCreateGeneral(PETSC_COMM_SELF,local_size,all_local_idx_B,PETSC_OWN_POINTER,&sub_schurs->is_Ej_all));
668   }
669 
670   if (change) {
671     ISLocalToGlobalMapping BtoS;
672     IS                     change_primal_B;
673     IS                     change_primal_all;
674 
675     PetscCheck(!sub_schurs->change_primal_sub,PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen");
676     PetscCheck(!sub_schurs->change,PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen");
677     PetscCall(PetscMalloc1(sub_schurs->n_subs,&sub_schurs->change_primal_sub));
678     for (i=0;i<sub_schurs->n_subs;i++) {
679       ISLocalToGlobalMapping NtoS;
680       PetscCall(ISLocalToGlobalMappingCreateIS(sub_schurs->is_subs[i],&NtoS));
681       PetscCall(ISGlobalToLocalMappingApplyIS(NtoS,IS_GTOLM_DROP,change_primal,&sub_schurs->change_primal_sub[i]));
682       PetscCall(ISLocalToGlobalMappingDestroy(&NtoS));
683     }
684     PetscCall(ISGlobalToLocalMappingApplyIS(sub_schurs->BtoNmap,IS_GTOLM_DROP,change_primal,&change_primal_B));
685     PetscCall(ISLocalToGlobalMappingCreateIS(sub_schurs->is_Ej_all,&BtoS));
686     PetscCall(ISGlobalToLocalMappingApplyIS(BtoS,IS_GTOLM_DROP,change_primal_B,&change_primal_all));
687     PetscCall(ISLocalToGlobalMappingDestroy(&BtoS));
688     PetscCall(ISDestroy(&change_primal_B));
689     PetscCall(PetscMalloc1(sub_schurs->n_subs,&sub_schurs->change));
690     for (i=0;i<sub_schurs->n_subs;i++) {
691       Mat change_sub;
692 
693       PetscCall(ISGetLocalSize(sub_schurs->is_subs[i],&subset_size));
694       PetscCall(KSPCreate(PETSC_COMM_SELF,&sub_schurs->change[i]));
695       PetscCall(KSPSetType(sub_schurs->change[i],KSPPREONLY));
696       if (!sub_schurs->change_with_qr) {
697         PetscCall(MatCreateSubMatrix(change,sub_schurs->is_subs[i],sub_schurs->is_subs[i],MAT_INITIAL_MATRIX,&change_sub));
698       } else {
699         Mat change_subt;
700         PetscCall(MatCreateSubMatrix(change,sub_schurs->is_subs[i],sub_schurs->is_subs[i],MAT_INITIAL_MATRIX,&change_subt));
701         PetscCall(MatConvert(change_subt,MATSEQDENSE,MAT_INITIAL_MATRIX,&change_sub));
702         PetscCall(MatDestroy(&change_subt));
703       }
704       PetscCall(KSPSetOperators(sub_schurs->change[i],change_sub,change_sub));
705       PetscCall(MatDestroy(&change_sub));
706       PetscCall(KSPSetOptionsPrefix(sub_schurs->change[i],sub_schurs->prefix));
707       PetscCall(KSPAppendOptionsPrefix(sub_schurs->change[i],"sub_schurs_change_"));
708     }
709     PetscCall(ISDestroy(&change_primal_all));
710   }
711 
712   /* Local matrix of all local Schur on subsets (transposed) */
713   if (!sub_schurs->S_Ej_all) {
714     Mat         T;
715     PetscScalar *v;
716     PetscInt    *ii,*jj;
717     PetscInt    cum,i,j,k;
718 
719     /* MatSeqAIJSetPreallocation + MatSetValues is slow for these kind of matrices (may have large blocks)
720        Allocate properly a representative matrix and duplicate */
721     PetscCall(PetscMalloc3(local_size+1,&ii,local_stash_size,&jj,local_stash_size,&v));
722     ii[0] = 0;
723     cum   = 0;
724     for (i=0;i<sub_schurs->n_subs;i++) {
725       PetscCall(ISGetLocalSize(sub_schurs->is_subs[i],&subset_size));
726       for (j=0;j<subset_size;j++) {
727         const PetscInt row = cum+j;
728         PetscInt col = cum;
729 
730         ii[row+1] = ii[row] + subset_size;
731         for (k=ii[row];k<ii[row+1];k++) {
732           jj[k] = col;
733           col++;
734         }
735       }
736       cum += subset_size;
737     }
738     PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,local_size,local_size,ii,jj,v,&T));
739     PetscCall(MatDuplicate(T,MAT_DO_NOT_COPY_VALUES,&sub_schurs->S_Ej_all));
740     PetscCall(MatDestroy(&T));
741     PetscCall(PetscFree3(ii,jj,v));
742   }
743   /* matrices for deluxe scaling and adaptive selection */
744   if (compute_Stilda) {
745     if (!sub_schurs->sum_S_Ej_tilda_all) {
746       PetscCall(MatDuplicate(sub_schurs->S_Ej_all,MAT_DO_NOT_COPY_VALUES,&sub_schurs->sum_S_Ej_tilda_all));
747     }
748     if (!sub_schurs->sum_S_Ej_inv_all && deluxe) {
749       PetscCall(MatDuplicate(sub_schurs->S_Ej_all,MAT_DO_NOT_COPY_VALUES,&sub_schurs->sum_S_Ej_inv_all));
750     }
751   }
752 
753   /* Compute Schur complements explicitly */
754   F = NULL;
755   if (!sub_schurs->schur_explicit) {
756     /* this code branch is used when MatFactor with Schur complement support is not present or when explicitly requested;
757        it is not efficient, unless the economic version of the scaling is used */
758     Mat         S_Ej_expl;
759     PetscScalar *work;
760     PetscInt    j,*dummy_idx;
761     PetscBool   Sdense;
762 
763     PetscCall(PetscMalloc2(max_subset_size,&dummy_idx,max_subset_size*max_subset_size,&work));
764     local_size = 0;
765     for (i=0;i<sub_schurs->n_subs;i++) {
766       IS  is_subset_B;
767       Mat AE_EE,AE_IE,AE_EI,S_Ej;
768 
769       /* subsets in original and boundary numbering */
770       PetscCall(ISGlobalToLocalMappingApplyIS(sub_schurs->BtoNmap,IS_GTOLM_DROP,sub_schurs->is_subs[i],&is_subset_B));
771       /* EE block */
772       PetscCall(MatCreateSubMatrix(A_BB,is_subset_B,is_subset_B,MAT_INITIAL_MATRIX,&AE_EE));
773       /* IE block */
774       PetscCall(MatCreateSubMatrix(A_IB,is_I,is_subset_B,MAT_INITIAL_MATRIX,&AE_IE));
775       /* EI block */
776       if (sub_schurs->is_symmetric) {
777         PetscCall(MatCreateTranspose(AE_IE,&AE_EI));
778       } else if (sub_schurs->is_hermitian) {
779         PetscCall(MatCreateHermitianTranspose(AE_IE,&AE_EI));
780       } else {
781         PetscCall(MatCreateSubMatrix(A_BI,is_subset_B,is_I,MAT_INITIAL_MATRIX,&AE_EI));
782       }
783       PetscCall(ISDestroy(&is_subset_B));
784       PetscCall(MatCreateSchurComplement(AE_II,AE_II,AE_IE,AE_EI,AE_EE,&S_Ej));
785       PetscCall(MatDestroy(&AE_EE));
786       PetscCall(MatDestroy(&AE_IE));
787       PetscCall(MatDestroy(&AE_EI));
788       if (AE_II == A_II) { /* we can reuse the same ksp */
789         KSP ksp;
790         PetscCall(MatSchurComplementGetKSP(sub_schurs->S,&ksp));
791         PetscCall(MatSchurComplementSetKSP(S_Ej,ksp));
792       } else { /* build new ksp object which inherits ksp and pc types from the original one */
793         KSP       origksp,schurksp;
794         PC        origpc,schurpc;
795         KSPType   ksp_type;
796         PetscInt  n_internal;
797         PetscBool ispcnone;
798 
799         PetscCall(MatSchurComplementGetKSP(sub_schurs->S,&origksp));
800         PetscCall(MatSchurComplementGetKSP(S_Ej,&schurksp));
801         PetscCall(KSPGetType(origksp,&ksp_type));
802         PetscCall(KSPSetType(schurksp,ksp_type));
803         PetscCall(KSPGetPC(schurksp,&schurpc));
804         PetscCall(KSPGetPC(origksp,&origpc));
805         PetscCall(PetscObjectTypeCompare((PetscObject)origpc,PCNONE,&ispcnone));
806         if (!ispcnone) {
807           PCType pc_type;
808           PetscCall(PCGetType(origpc,&pc_type));
809           PetscCall(PCSetType(schurpc,pc_type));
810         } else {
811           PetscCall(PCSetType(schurpc,PCLU));
812         }
813         PetscCall(ISGetSize(is_I,&n_internal));
814         if (!n_internal) { /* UMFPACK gives error with 0 sized problems */
815           MatSolverType solver = NULL;
816           PetscCall(PCFactorGetMatSolverType(origpc,(MatSolverType*)&solver));
817           if (solver) {
818             PetscCall(PCFactorSetMatSolverType(schurpc,solver));
819           }
820         }
821         PetscCall(KSPSetUp(schurksp));
822       }
823       PetscCall(ISGetLocalSize(sub_schurs->is_subs[i],&subset_size));
824       PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,work,&S_Ej_expl));
825       PetscCall(PCBDDCComputeExplicitSchur(S_Ej,sub_schurs->is_symmetric,MAT_REUSE_MATRIX,&S_Ej_expl));
826       PetscCall(PetscObjectTypeCompare((PetscObject)S_Ej_expl,MATSEQDENSE,&Sdense));
827       if (Sdense) {
828         for (j=0;j<subset_size;j++) {
829           dummy_idx[j]=local_size+j;
830         }
831         PetscCall(MatSetValues(sub_schurs->S_Ej_all,subset_size,dummy_idx,subset_size,dummy_idx,work,INSERT_VALUES));
832       } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented for sparse matrices");
833       PetscCall(MatDestroy(&S_Ej));
834       PetscCall(MatDestroy(&S_Ej_expl));
835       local_size += subset_size;
836     }
837     PetscCall(PetscFree2(dummy_idx,work));
838     /* free */
839     PetscCall(ISDestroy(&is_I));
840     PetscCall(MatDestroy(&AE_II));
841     PetscCall(PetscFree(all_local_idx_N));
842   } else {
843     Mat               A,cs_AIB_mat = NULL,benign_AIIm1_ones_mat = NULL;
844     Vec               Dall = NULL;
845     IS                is_A_all,*is_p_r = NULL;
846     MatType           Stype;
847     PetscScalar       *work,*S_data,*schur_factor,infty = PETSC_MAX_REAL;
848     PetscScalar       *SEj_arr = NULL,*SEjinv_arr = NULL;
849     const PetscScalar *rS_data;
850     PetscInt          n,n_I,size_schur,size_active_schur,cum,cum2;
851     PetscBool         economic,solver_S,S_lower_triangular = PETSC_FALSE;
852     PetscBool         schur_has_vertices,factor_workaround;
853     PetscBool         use_cholesky;
854 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
855     PetscBool         oldpin;
856 #endif
857 
858     /* get sizes */
859     n_I = 0;
860     if (is_I_layer) {
861       PetscCall(ISGetLocalSize(is_I_layer,&n_I));
862     }
863     economic = PETSC_FALSE;
864     PetscCall(ISGetLocalSize(sub_schurs->is_I,&cum));
865     if (cum != n_I) economic = PETSC_TRUE;
866     PetscCall(MatGetLocalSize(sub_schurs->A,&n,NULL));
867     size_active_schur = local_size;
868 
869     /* import scaling vector (wrong formulation if we have 3D edges) */
870     if (scaling && compute_Stilda) {
871       const PetscScalar *array;
872       PetscScalar       *array2;
873       const PetscInt    *idxs;
874       PetscInt          i;
875 
876       PetscCall(ISGetIndices(sub_schurs->is_Ej_all,&idxs));
877       PetscCall(VecCreateSeq(PETSC_COMM_SELF,size_active_schur,&Dall));
878       PetscCall(VecGetArrayRead(scaling,&array));
879       PetscCall(VecGetArray(Dall,&array2));
880       for (i=0;i<size_active_schur;i++) array2[i] = array[idxs[i]];
881       PetscCall(VecRestoreArray(Dall,&array2));
882       PetscCall(VecRestoreArrayRead(scaling,&array));
883       PetscCall(ISRestoreIndices(sub_schurs->is_Ej_all,&idxs));
884       deluxe = PETSC_FALSE;
885     }
886 
887     /* size active schurs does not count any dirichlet or vertex dof on the interface */
888     factor_workaround = PETSC_FALSE;
889     schur_has_vertices = PETSC_FALSE;
890     cum = n_I+size_active_schur;
891     if (sub_schurs->is_dir) {
892       const PetscInt* idxs;
893       PetscInt        n_dir;
894 
895       PetscCall(ISGetLocalSize(sub_schurs->is_dir,&n_dir));
896       PetscCall(ISGetIndices(sub_schurs->is_dir,&idxs));
897       PetscCall(PetscArraycpy(all_local_idx_N+cum,idxs,n_dir));
898       PetscCall(ISRestoreIndices(sub_schurs->is_dir,&idxs));
899       cum += n_dir;
900       factor_workaround = PETSC_TRUE;
901     }
902     /* include the primal vertices in the Schur complement */
903     if (exact_schur && sub_schurs->is_vertices && (compute_Stilda || benign_n)) {
904       PetscInt n_v;
905 
906       PetscCall(ISGetLocalSize(sub_schurs->is_vertices,&n_v));
907       if (n_v) {
908         const PetscInt* idxs;
909 
910         PetscCall(ISGetIndices(sub_schurs->is_vertices,&idxs));
911         PetscCall(PetscArraycpy(all_local_idx_N+cum,idxs,n_v));
912         PetscCall(ISRestoreIndices(sub_schurs->is_vertices,&idxs));
913         cum += n_v;
914         factor_workaround = PETSC_TRUE;
915         schur_has_vertices = PETSC_TRUE;
916       }
917     }
918     size_schur = cum - n_I;
919     PetscCall(ISCreateGeneral(PETSC_COMM_SELF,cum,all_local_idx_N,PETSC_OWN_POINTER,&is_A_all));
920 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
921     oldpin = sub_schurs->A->boundtocpu;
922     PetscCall(MatBindToCPU(sub_schurs->A,PETSC_TRUE));
923 #endif
924     if (cum == n) {
925       PetscCall(ISSetPermutation(is_A_all));
926       PetscCall(MatPermute(sub_schurs->A,is_A_all,is_A_all,&A));
927     } else {
928       PetscCall(MatCreateSubMatrix(sub_schurs->A,is_A_all,is_A_all,MAT_INITIAL_MATRIX,&A));
929     }
930 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
931     PetscCall(MatBindToCPU(sub_schurs->A,oldpin));
932 #endif
933     PetscCall(MatSetOptionsPrefix(A,sub_schurs->prefix));
934     PetscCall(MatAppendOptionsPrefix(A,"sub_schurs_"));
935 
936     /* if we actually change the basis for the pressures, LDL^T factors will use a lot of memory
937        this is a workaround */
938     if (benign_n) {
939       Vec                    v,benign_AIIm1_ones;
940       ISLocalToGlobalMapping N_to_reor;
941       IS                     is_p0,is_p0_p;
942       PetscScalar            *cs_AIB,*AIIm1_data;
943       PetscInt               sizeA;
944 
945       PetscCall(ISLocalToGlobalMappingCreateIS(is_A_all,&N_to_reor));
946       PetscCall(ISCreateGeneral(PETSC_COMM_SELF,benign_n,benign_p0_lidx,PETSC_COPY_VALUES,&is_p0));
947       PetscCall(ISGlobalToLocalMappingApplyIS(N_to_reor,IS_GTOLM_DROP,is_p0,&is_p0_p));
948       PetscCall(ISDestroy(&is_p0));
949       PetscCall(MatCreateVecs(A,&v,&benign_AIIm1_ones));
950       PetscCall(VecGetSize(v,&sizeA));
951       PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,sizeA,benign_n,NULL,&benign_AIIm1_ones_mat));
952       PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,size_schur,benign_n,NULL,&cs_AIB_mat));
953       PetscCall(MatDenseGetArray(cs_AIB_mat,&cs_AIB));
954       PetscCall(MatDenseGetArray(benign_AIIm1_ones_mat,&AIIm1_data));
955       PetscCall(PetscMalloc1(benign_n,&is_p_r));
956       /* compute colsum of A_IB restricted to pressures */
957       for (i=0;i<benign_n;i++) {
958         const PetscScalar *array;
959         const PetscInt    *idxs;
960         PetscInt          j,nz;
961 
962         PetscCall(ISGlobalToLocalMappingApplyIS(N_to_reor,IS_GTOLM_DROP,benign_zerodiag_subs[i],&is_p_r[i]));
963         PetscCall(ISGetLocalSize(is_p_r[i],&nz));
964         PetscCall(ISGetIndices(is_p_r[i],&idxs));
965         for (j=0;j<nz;j++) AIIm1_data[idxs[j]+sizeA*i] = 1.;
966         PetscCall(ISRestoreIndices(is_p_r[i],&idxs));
967         PetscCall(VecPlaceArray(benign_AIIm1_ones,AIIm1_data+sizeA*i));
968         PetscCall(MatMult(A,benign_AIIm1_ones,v));
969         PetscCall(VecResetArray(benign_AIIm1_ones));
970         PetscCall(VecGetArrayRead(v,&array));
971         for (j=0;j<size_schur;j++) {
972 #if defined(PETSC_USE_COMPLEX)
973           cs_AIB[i*size_schur + j] = (PetscRealPart(array[j+n_I])/nz + PETSC_i*(PetscImaginaryPart(array[j+n_I])/nz));
974 #else
975           cs_AIB[i*size_schur + j] = array[j+n_I]/nz;
976 #endif
977         }
978         PetscCall(VecRestoreArrayRead(v,&array));
979       }
980       PetscCall(MatDenseRestoreArray(cs_AIB_mat,&cs_AIB));
981       PetscCall(MatDenseRestoreArray(benign_AIIm1_ones_mat,&AIIm1_data));
982       PetscCall(VecDestroy(&v));
983       PetscCall(VecDestroy(&benign_AIIm1_ones));
984       PetscCall(MatSetOption(A,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE));
985       PetscCall(MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE));
986       PetscCall(MatSetOption(A,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE));
987       PetscCall(MatZeroRowsColumnsIS(A,is_p0_p,1.0,NULL,NULL));
988       PetscCall(ISDestroy(&is_p0_p));
989       PetscCall(ISLocalToGlobalMappingDestroy(&N_to_reor));
990     }
991     PetscCall(MatSetOption(A,MAT_SYMMETRIC,sub_schurs->is_symmetric));
992     PetscCall(MatSetOption(A,MAT_HERMITIAN,sub_schurs->is_hermitian));
993     PetscCall(MatSetOption(A,MAT_SPD,sub_schurs->is_posdef));
994 
995     /* for complexes, symmetric and hermitian at the same time implies null imaginary part */
996     use_cholesky = (PetscBool)((use_potr || use_sytr) && sub_schurs->is_hermitian && sub_schurs->is_symmetric);
997 
998     /* when using the benign subspace trick, the local Schur complements are SPD */
999     /* MKL_PARDISO does not handle well the computation of a Schur complement from a symmetric indefinite factorization
1000        Use LU and adapt pivoting perturbation (still, solution is not as accurate as with using MUMPS) */
1001     if (benign_trick) {
1002       sub_schurs->is_posdef = PETSC_TRUE;
1003       PetscCall(PetscStrcmp(sub_schurs->mat_solver_type,MATSOLVERMKL_PARDISO,&flg));
1004       if (flg) use_cholesky = PETSC_FALSE;
1005     }
1006 
1007     if (n_I) {
1008       IS        is_schur;
1009       char      stype[64];
1010       PetscBool gpu = PETSC_FALSE;
1011 
1012       if (use_cholesky) {
1013         PetscCall(MatGetFactor(A,sub_schurs->mat_solver_type,MAT_FACTOR_CHOLESKY,&F));
1014       } else {
1015         PetscCall(MatGetFactor(A,sub_schurs->mat_solver_type,MAT_FACTOR_LU,&F));
1016       }
1017       PetscCall(MatSetErrorIfFailure(A,PETSC_TRUE));
1018 #if defined(PETSC_HAVE_MKL_PARDISO)
1019       if (benign_trick) PetscCall(MatMkl_PardisoSetCntl(F,10,10));
1020 #endif
1021       /* subsets ordered last */
1022       PetscCall(ISCreateStride(PETSC_COMM_SELF,size_schur,n_I,1,&is_schur));
1023       PetscCall(MatFactorSetSchurIS(F,is_schur));
1024       PetscCall(ISDestroy(&is_schur));
1025 
1026       /* factorization step */
1027       if (use_cholesky) {
1028         PetscCall(MatCholeskyFactorSymbolic(F,A,NULL,NULL));
1029 #if defined(PETSC_HAVE_MUMPS) /* be sure that icntl 19 is not set by command line */
1030         PetscCall(MatMumpsSetIcntl(F,19,2));
1031 #endif
1032         PetscCall(MatCholeskyFactorNumeric(F,A,NULL));
1033         S_lower_triangular = PETSC_TRUE;
1034       } else {
1035         PetscCall(MatLUFactorSymbolic(F,A,NULL,NULL,NULL));
1036 #if defined(PETSC_HAVE_MUMPS) /* be sure that icntl 19 is not set by command line */
1037         PetscCall(MatMumpsSetIcntl(F,19,3));
1038 #endif
1039         PetscCall(MatLUFactorNumeric(F,A,NULL));
1040       }
1041       PetscCall(MatViewFromOptions(F,(PetscObject)A,"-mat_factor_view"));
1042 
1043       if (matl_dbg_viewer) {
1044         Mat S;
1045         IS  is;
1046 
1047         PetscCall(PetscObjectSetName((PetscObject)A,"A"));
1048         PetscCall(MatView(A,matl_dbg_viewer));
1049         PetscCall(MatFactorCreateSchurComplement(F,&S,NULL));
1050         PetscCall(PetscObjectSetName((PetscObject)S,"S"));
1051         PetscCall(MatView(S,matl_dbg_viewer));
1052         PetscCall(MatDestroy(&S));
1053         PetscCall(ISCreateStride(PETSC_COMM_SELF,n_I,0,1,&is));
1054         PetscCall(PetscObjectSetName((PetscObject)is,"I"));
1055         PetscCall(ISView(is,matl_dbg_viewer));
1056         PetscCall(ISDestroy(&is));
1057         PetscCall(ISCreateStride(PETSC_COMM_SELF,size_schur,n_I,1,&is));
1058         PetscCall(PetscObjectSetName((PetscObject)is,"B"));
1059         PetscCall(ISView(is,matl_dbg_viewer));
1060         PetscCall(ISDestroy(&is));
1061         PetscCall(PetscObjectSetName((PetscObject)is_A_all,"IA"));
1062         PetscCall(ISView(is_A_all,matl_dbg_viewer));
1063       }
1064 
1065       /* get explicit Schur Complement computed during numeric factorization */
1066       PetscCall(MatFactorGetSchurComplement(F,&S_all,NULL));
1067       PetscCall(PetscStrncpy(stype,MATSEQDENSE,sizeof(stype)));
1068 #if defined(PETSC_HAVE_CUDA)
1069       PetscCall(PetscObjectTypeCompareAny((PetscObject)A,&gpu,MATSEQAIJVIENNACL,MATSEQAIJCUSPARSE,""));
1070 #endif
1071       if (gpu) {
1072         PetscCall(PetscStrncpy(stype,MATSEQDENSECUDA,sizeof(stype)));
1073       }
1074       PetscCall(PetscOptionsGetString(NULL,sub_schurs->prefix,"-sub_schurs_schur_mat_type",stype,sizeof(stype),NULL));
1075       PetscCall(MatConvert(S_all,stype,MAT_INPLACE_MATRIX,&S_all));
1076       PetscCall(MatSetOption(S_all,MAT_SPD,sub_schurs->is_posdef));
1077       PetscCall(MatSetOption(S_all,MAT_HERMITIAN,sub_schurs->is_hermitian));
1078       PetscCall(MatGetType(S_all,&Stype));
1079 
1080       /* we can reuse the solvers if we are not using the economic version */
1081       reuse_solvers = (PetscBool)(reuse_solvers && !economic);
1082       factor_workaround = (PetscBool)(reuse_solvers && factor_workaround);
1083       if (!sub_schurs->is_posdef && factor_workaround && compute_Stilda && size_active_schur)
1084         reuse_solvers = factor_workaround = PETSC_FALSE;
1085 
1086       solver_S = PETSC_TRUE;
1087 
1088       /* update the Schur complement with the change of basis on the pressures */
1089       if (benign_n) {
1090         const PetscScalar *cs_AIB;
1091         PetscScalar       *S_data,*AIIm1_data;
1092         Mat               S2 = NULL,S3 = NULL; /* dbg */
1093         PetscScalar       *S2_data,*S3_data; /* dbg */
1094         Vec               v,benign_AIIm1_ones;
1095         PetscInt          sizeA;
1096 
1097         PetscCall(MatDenseGetArray(S_all,&S_data));
1098         PetscCall(MatCreateVecs(A,&v,&benign_AIIm1_ones));
1099         PetscCall(VecGetSize(v,&sizeA));
1100 #if defined(PETSC_HAVE_MUMPS)
1101         PetscCall(MatMumpsSetIcntl(F,26,0));
1102 #endif
1103 #if defined(PETSC_HAVE_MKL_PARDISO)
1104         PetscCall(MatMkl_PardisoSetCntl(F,70,1));
1105 #endif
1106         PetscCall(MatDenseGetArrayRead(cs_AIB_mat,&cs_AIB));
1107         PetscCall(MatDenseGetArray(benign_AIIm1_ones_mat,&AIIm1_data));
1108         if (matl_dbg_viewer) {
1109           PetscCall(MatDuplicate(S_all,MAT_DO_NOT_COPY_VALUES,&S2));
1110           PetscCall(MatDuplicate(S_all,MAT_DO_NOT_COPY_VALUES,&S3));
1111           PetscCall(MatDenseGetArray(S2,&S2_data));
1112           PetscCall(MatDenseGetArray(S3,&S3_data));
1113         }
1114         for (i=0;i<benign_n;i++) {
1115           PetscScalar    *array,sum = 0.,one = 1.,*sums;
1116           const PetscInt *idxs;
1117           PetscInt       k,j,nz;
1118           PetscBLASInt   B_k,B_n;
1119 
1120           PetscCall(PetscCalloc1(benign_n,&sums));
1121           PetscCall(VecPlaceArray(benign_AIIm1_ones,AIIm1_data+sizeA*i));
1122           PetscCall(VecCopy(benign_AIIm1_ones,v));
1123           PetscCall(MatSolve(F,v,benign_AIIm1_ones));
1124           PetscCall(MatMult(A,benign_AIIm1_ones,v));
1125           PetscCall(VecResetArray(benign_AIIm1_ones));
1126           /* p0 dofs (eliminated) are excluded from the sums */
1127           for (k=0;k<benign_n;k++) {
1128             PetscCall(ISGetLocalSize(is_p_r[k],&nz));
1129             PetscCall(ISGetIndices(is_p_r[k],&idxs));
1130             for (j=0;j<nz-1;j++) sums[k] -= AIIm1_data[idxs[j]+sizeA*i];
1131             PetscCall(ISRestoreIndices(is_p_r[k],&idxs));
1132           }
1133           PetscCall(VecGetArrayRead(v,(const PetscScalar**)&array));
1134           if (matl_dbg_viewer) {
1135             Vec  vv;
1136             char name[16];
1137 
1138             PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,size_schur,array+n_I,&vv));
1139             PetscCall(PetscSNPrintf(name,sizeof(name),"Pvs%" PetscInt_FMT,i));
1140             PetscCall(PetscObjectSetName((PetscObject)vv,name));
1141             PetscCall(VecView(vv,matl_dbg_viewer));
1142           }
1143           /* perform sparse rank updates on symmetric Schur (TODO: move outside of the loop?) */
1144           /* cs_AIB already scaled by 1./nz */
1145           B_k = 1;
1146           for (k=0;k<benign_n;k++) {
1147             sum  = sums[k];
1148             PetscCall(PetscBLASIntCast(size_schur,&B_n));
1149 
1150             if (PetscAbsScalar(sum) == 0.0) continue;
1151             if (k == i) {
1152               PetscStackCallBLAS("BLASsyrk",BLASsyrk_("L","N",&B_n,&B_k,&sum,cs_AIB+i*size_schur,&B_n,&one,S_data,&B_n));
1153               if (matl_dbg_viewer) {
1154                 PetscStackCallBLAS("BLASsyrk",BLASsyrk_("L","N",&B_n,&B_k,&sum,cs_AIB+i*size_schur,&B_n,&one,S3_data,&B_n));
1155               }
1156             } else { /* XXX Is it correct to use symmetric rank-2 update with half of the sum? */
1157               sum /= 2.0;
1158               PetscStackCallBLAS("BLASsyr2k",BLASsyr2k_("L","N",&B_n,&B_k,&sum,cs_AIB+k*size_schur,&B_n,cs_AIB+i*size_schur,&B_n,&one,S_data,&B_n));
1159               if (matl_dbg_viewer) {
1160                 PetscStackCallBLAS("BLASsyr2k",BLASsyr2k_("L","N",&B_n,&B_k,&sum,cs_AIB+k*size_schur,&B_n,cs_AIB+i*size_schur,&B_n,&one,S3_data,&B_n));
1161               }
1162             }
1163           }
1164           sum  = 1.;
1165           PetscStackCallBLAS("BLASsyr2k",BLASsyr2k_("L","N",&B_n,&B_k,&sum,array+n_I,&B_n,cs_AIB+i*size_schur,&B_n,&one,S_data,&B_n));
1166           if (matl_dbg_viewer) {
1167             PetscStackCallBLAS("BLASsyr2k",BLASsyr2k_("L","N",&B_n,&B_k,&sum,array+n_I,&B_n,cs_AIB+i*size_schur,&B_n,&one,S2_data,&B_n));
1168           }
1169           PetscCall(VecRestoreArrayRead(v,(const PetscScalar**)&array));
1170           /* set p0 entry of AIIm1_ones to zero */
1171           PetscCall(ISGetLocalSize(is_p_r[i],&nz));
1172           PetscCall(ISGetIndices(is_p_r[i],&idxs));
1173           for (j=0;j<benign_n;j++) AIIm1_data[idxs[nz-1]+sizeA*j] = 0.;
1174           PetscCall(ISRestoreIndices(is_p_r[i],&idxs));
1175           PetscCall(PetscFree(sums));
1176         }
1177         PetscCall(VecDestroy(&benign_AIIm1_ones));
1178         if (matl_dbg_viewer) {
1179           PetscCall(MatDenseRestoreArray(S2,&S2_data));
1180           PetscCall(MatDenseRestoreArray(S3,&S3_data));
1181         }
1182         if (!S_lower_triangular) { /* I need to expand the upper triangular data (column oriented) */
1183           PetscInt k,j;
1184           for (k=0;k<size_schur;k++) {
1185             for (j=k;j<size_schur;j++) {
1186               S_data[j*size_schur+k] = PetscConj(S_data[k*size_schur+j]);
1187             }
1188           }
1189         }
1190 
1191         /* restore defaults */
1192 #if defined(PETSC_HAVE_MUMPS)
1193         PetscCall(MatMumpsSetIcntl(F,26,-1));
1194 #endif
1195 #if defined(PETSC_HAVE_MKL_PARDISO)
1196         PetscCall(MatMkl_PardisoSetCntl(F,70,0));
1197 #endif
1198         PetscCall(MatDenseRestoreArrayRead(cs_AIB_mat,&cs_AIB));
1199         PetscCall(MatDenseRestoreArray(benign_AIIm1_ones_mat,&AIIm1_data));
1200         PetscCall(VecDestroy(&v));
1201         PetscCall(MatDenseRestoreArray(S_all,&S_data));
1202         if (matl_dbg_viewer) {
1203           Mat S;
1204 
1205           PetscCall(MatFactorRestoreSchurComplement(F,&S_all,MAT_FACTOR_SCHUR_UNFACTORED));
1206           PetscCall(MatFactorCreateSchurComplement(F,&S,NULL));
1207           PetscCall(PetscObjectSetName((PetscObject)S,"Sb"));
1208           PetscCall(MatView(S,matl_dbg_viewer));
1209           PetscCall(MatDestroy(&S));
1210           PetscCall(PetscObjectSetName((PetscObject)S2,"S2P"));
1211           PetscCall(MatView(S2,matl_dbg_viewer));
1212           PetscCall(PetscObjectSetName((PetscObject)S3,"S3P"));
1213           PetscCall(MatView(S3,matl_dbg_viewer));
1214           PetscCall(PetscObjectSetName((PetscObject)cs_AIB_mat,"cs"));
1215           PetscCall(MatView(cs_AIB_mat,matl_dbg_viewer));
1216           PetscCall(MatFactorGetSchurComplement(F,&S_all,NULL));
1217         }
1218         PetscCall(MatDestroy(&S2));
1219         PetscCall(MatDestroy(&S3));
1220       }
1221       if (!reuse_solvers) {
1222         for (i=0;i<benign_n;i++) {
1223           PetscCall(ISDestroy(&is_p_r[i]));
1224         }
1225         PetscCall(PetscFree(is_p_r));
1226         PetscCall(MatDestroy(&cs_AIB_mat));
1227         PetscCall(MatDestroy(&benign_AIIm1_ones_mat));
1228       }
1229     } else { /* we can't use MatFactor when size_schur == size_of_the_problem */
1230       PetscCall(MatConvert(A,MATSEQDENSE,MAT_INITIAL_MATRIX,&S_all));
1231       PetscCall(MatGetType(S_all,&Stype));
1232       reuse_solvers = PETSC_FALSE; /* TODO: why we can't reuse the solvers here? */
1233       factor_workaround = PETSC_FALSE;
1234       solver_S = PETSC_FALSE;
1235     }
1236 
1237     if (reuse_solvers) {
1238       Mat                A_II,Afake;
1239       Vec                vec1_B;
1240       PCBDDCReuseSolvers msolv_ctx;
1241       PetscInt           n_R;
1242 
1243       if (sub_schurs->reuse_solver) {
1244         PetscCall(PCBDDCReuseSolversReset(sub_schurs->reuse_solver));
1245       } else {
1246         PetscCall(PetscNew(&sub_schurs->reuse_solver));
1247       }
1248       msolv_ctx = sub_schurs->reuse_solver;
1249       PetscCall(MatSchurComplementGetSubMatrices(sub_schurs->S,&A_II,NULL,NULL,NULL,NULL));
1250       PetscCall(PetscObjectReference((PetscObject)F));
1251       msolv_ctx->F = F;
1252       PetscCall(MatCreateVecs(F,&msolv_ctx->sol,NULL));
1253       /* currently PETSc has no support for MatSolve(F,x,x), so cheat and let rhs and sol share the same memory */
1254       {
1255         PetscScalar *array;
1256         PetscInt    n;
1257 
1258         PetscCall(VecGetLocalSize(msolv_ctx->sol,&n));
1259         PetscCall(VecGetArray(msolv_ctx->sol,&array));
1260         PetscCall(VecCreateSeqWithArray(PetscObjectComm((PetscObject)msolv_ctx->sol),1,n,array,&msolv_ctx->rhs));
1261         PetscCall(VecRestoreArray(msolv_ctx->sol,&array));
1262       }
1263       msolv_ctx->has_vertices = schur_has_vertices;
1264 
1265       /* interior solver */
1266       PetscCall(PCCreate(PetscObjectComm((PetscObject)A_II),&msolv_ctx->interior_solver));
1267       PetscCall(PCSetOperators(msolv_ctx->interior_solver,A_II,A_II));
1268       PetscCall(PCSetType(msolv_ctx->interior_solver,PCSHELL));
1269       PetscCall(PCShellSetName(msolv_ctx->interior_solver,"Interior solver (w/o Schur factorization)"));
1270       PetscCall(PCShellSetContext(msolv_ctx->interior_solver,msolv_ctx));
1271       PetscCall(PCShellSetView(msolv_ctx->interior_solver,PCBDDCReuseSolvers_View));
1272       PetscCall(PCShellSetApply(msolv_ctx->interior_solver,PCBDDCReuseSolvers_Interior));
1273       PetscCall(PCShellSetApplyTranspose(msolv_ctx->interior_solver,PCBDDCReuseSolvers_InteriorTranspose));
1274 
1275       /* correction solver */
1276       PetscCall(PCCreate(PetscObjectComm((PetscObject)A_II),&msolv_ctx->correction_solver));
1277       PetscCall(PCSetType(msolv_ctx->correction_solver,PCSHELL));
1278       PetscCall(PCShellSetName(msolv_ctx->correction_solver,"Correction solver (with Schur factorization)"));
1279       PetscCall(PCShellSetContext(msolv_ctx->correction_solver,msolv_ctx));
1280       PetscCall(PCShellSetView(msolv_ctx->interior_solver,PCBDDCReuseSolvers_View));
1281       PetscCall(PCShellSetApply(msolv_ctx->correction_solver,PCBDDCReuseSolvers_Correction));
1282       PetscCall(PCShellSetApplyTranspose(msolv_ctx->correction_solver,PCBDDCReuseSolvers_CorrectionTranspose));
1283 
1284       /* scatter and vecs for Schur complement solver */
1285       PetscCall(MatCreateVecs(S_all,&msolv_ctx->sol_B,&msolv_ctx->rhs_B));
1286       PetscCall(MatCreateVecs(sub_schurs->S,&vec1_B,NULL));
1287       if (!schur_has_vertices) {
1288         PetscCall(ISGlobalToLocalMappingApplyIS(sub_schurs->BtoNmap,IS_GTOLM_DROP,is_A_all,&msolv_ctx->is_B));
1289         PetscCall(VecScatterCreate(vec1_B,msolv_ctx->is_B,msolv_ctx->sol_B,NULL,&msolv_ctx->correction_scatter_B));
1290         PetscCall(PetscObjectReference((PetscObject)is_A_all));
1291         msolv_ctx->is_R = is_A_all;
1292       } else {
1293         IS              is_B_all;
1294         const PetscInt* idxs;
1295         PetscInt        dual,n_v,n;
1296 
1297         PetscCall(ISGetLocalSize(sub_schurs->is_vertices,&n_v));
1298         dual = size_schur - n_v;
1299         PetscCall(ISGetLocalSize(is_A_all,&n));
1300         PetscCall(ISGetIndices(is_A_all,&idxs));
1301         PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)is_A_all),dual,idxs+n_I,PETSC_COPY_VALUES,&is_B_all));
1302         PetscCall(ISGlobalToLocalMappingApplyIS(sub_schurs->BtoNmap,IS_GTOLM_DROP,is_B_all,&msolv_ctx->is_B));
1303         PetscCall(ISDestroy(&is_B_all));
1304         PetscCall(ISCreateStride(PetscObjectComm((PetscObject)is_A_all),dual,0,1,&is_B_all));
1305         PetscCall(VecScatterCreate(vec1_B,msolv_ctx->is_B,msolv_ctx->sol_B,is_B_all,&msolv_ctx->correction_scatter_B));
1306         PetscCall(ISDestroy(&is_B_all));
1307         PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)is_A_all),n-n_v,idxs,PETSC_COPY_VALUES,&msolv_ctx->is_R));
1308         PetscCall(ISRestoreIndices(is_A_all,&idxs));
1309       }
1310       PetscCall(ISGetLocalSize(msolv_ctx->is_R,&n_R));
1311       PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF,n_R,n_R,0,NULL,&Afake));
1312       PetscCall(MatAssemblyBegin(Afake,MAT_FINAL_ASSEMBLY));
1313       PetscCall(MatAssemblyEnd(Afake,MAT_FINAL_ASSEMBLY));
1314       PetscCall(PCSetOperators(msolv_ctx->correction_solver,Afake,Afake));
1315       PetscCall(MatDestroy(&Afake));
1316       PetscCall(VecDestroy(&vec1_B));
1317 
1318       /* communicate benign info to solver context */
1319       if (benign_n) {
1320         PetscScalar *array;
1321 
1322         msolv_ctx->benign_n = benign_n;
1323         msolv_ctx->benign_zerodiag_subs = is_p_r;
1324         PetscCall(PetscMalloc1(benign_n,&msolv_ctx->benign_save_vals));
1325         msolv_ctx->benign_csAIB = cs_AIB_mat;
1326         PetscCall(MatCreateVecs(cs_AIB_mat,&msolv_ctx->benign_corr_work,NULL));
1327         PetscCall(VecGetArray(msolv_ctx->benign_corr_work,&array));
1328         PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,size_schur,array,&msolv_ctx->benign_dummy_schur_vec));
1329         PetscCall(VecRestoreArray(msolv_ctx->benign_corr_work,&array));
1330         msolv_ctx->benign_AIIm1ones = benign_AIIm1_ones_mat;
1331       }
1332     } else {
1333       if (sub_schurs->reuse_solver) {
1334         PetscCall(PCBDDCReuseSolversReset(sub_schurs->reuse_solver));
1335       }
1336       PetscCall(PetscFree(sub_schurs->reuse_solver));
1337     }
1338     PetscCall(MatDestroy(&A));
1339     PetscCall(ISDestroy(&is_A_all));
1340 
1341     /* Work arrays */
1342     PetscCall(PetscMalloc1(max_subset_size*max_subset_size,&work));
1343 
1344     /* S_Ej_all */
1345     cum = cum2 = 0;
1346     PetscCall(MatDenseGetArrayRead(S_all,&rS_data));
1347     PetscCall(MatSeqAIJGetArray(sub_schurs->S_Ej_all,&SEj_arr));
1348     if (compute_Stilda) {
1349       PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&SEjinv_arr));
1350     }
1351     for (i=0;i<sub_schurs->n_subs;i++) {
1352       PetscInt j;
1353 
1354       /* get S_E */
1355       PetscCall(ISGetLocalSize(sub_schurs->is_subs[i],&subset_size));
1356       if (S_lower_triangular) { /* I need to expand the upper triangular data (column oriented) */
1357         PetscInt k;
1358         for (k=0;k<subset_size;k++) {
1359           for (j=k;j<subset_size;j++) {
1360             work[k*subset_size+j] = rS_data[cum2+k*size_schur+j];
1361             work[j*subset_size+k] = PetscConj(rS_data[cum2+k*size_schur+j]);
1362           }
1363         }
1364       } else { /* just copy to workspace */
1365         PetscInt k;
1366         for (k=0;k<subset_size;k++) {
1367           for (j=0;j<subset_size;j++) {
1368             work[k*subset_size+j] = rS_data[cum2+k*size_schur+j];
1369           }
1370         }
1371       }
1372       /* insert S_E values */
1373       if (sub_schurs->change) {
1374         Mat change_sub,SEj,T;
1375 
1376         /* change basis */
1377         PetscCall(KSPGetOperators(sub_schurs->change[i],&change_sub,NULL));
1378         PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,work,&SEj));
1379         if (!sub_schurs->change_with_qr) { /* currently there's no support for PtAP with P SeqAIJ */
1380           Mat T2;
1381           PetscCall(MatTransposeMatMult(change_sub,SEj,MAT_INITIAL_MATRIX,1.0,&T2));
1382           PetscCall(MatMatMult(T2,change_sub,MAT_INITIAL_MATRIX,1.0,&T));
1383           PetscCall(MatConvert(T,MATSEQDENSE,MAT_INPLACE_MATRIX,&T));
1384           PetscCall(MatDestroy(&T2));
1385         } else {
1386           PetscCall(MatPtAP(SEj,change_sub,MAT_INITIAL_MATRIX,1.0,&T));
1387         }
1388         PetscCall(MatCopy(T,SEj,SAME_NONZERO_PATTERN));
1389         PetscCall(MatDestroy(&T));
1390         PetscCall(MatZeroRowsColumnsIS(SEj,sub_schurs->change_primal_sub[i],1.0,NULL,NULL));
1391         PetscCall(MatDestroy(&SEj));
1392       }
1393       if (deluxe) {
1394         PetscCall(PetscArraycpy(SEj_arr,work,subset_size*subset_size));
1395         /* if adaptivity is requested, invert S_E blocks */
1396         if (compute_Stilda) {
1397           Mat               M;
1398           const PetscScalar *vals;
1399           PetscBool         isdense,isdensecuda;
1400 
1401           PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,work,&M));
1402           PetscCall(MatSetOption(M,MAT_SPD,sub_schurs->is_posdef));
1403           PetscCall(MatSetOption(M,MAT_HERMITIAN,sub_schurs->is_hermitian));
1404           if (!PetscBTLookup(sub_schurs->is_edge,i)) {
1405             PetscCall(MatSetType(M,Stype));
1406           }
1407           PetscCall(PetscObjectTypeCompare((PetscObject)M,MATSEQDENSE,&isdense));
1408           PetscCall(PetscObjectTypeCompare((PetscObject)M,MATSEQDENSECUDA,&isdensecuda));
1409           if (use_cholesky) {
1410             PetscCall(MatCholeskyFactor(M,NULL,NULL));
1411           } else {
1412             PetscCall(MatLUFactor(M,NULL,NULL,NULL));
1413           }
1414           if (isdense) {
1415             PetscCall(MatSeqDenseInvertFactors_Private(M));
1416 #if defined(PETSC_HAVE_CUDA)
1417           } else if (isdensecuda) {
1418             PetscCall(MatSeqDenseCUDAInvertFactors_Private(M));
1419 #endif
1420           } else SETERRQ(PetscObjectComm((PetscObject)M),PETSC_ERR_SUP,"Not implemented for type %s",Stype);
1421           PetscCall(MatDenseGetArrayRead(M,&vals));
1422           PetscCall(PetscArraycpy(SEjinv_arr,vals,subset_size*subset_size));
1423           PetscCall(MatDenseRestoreArrayRead(M,&vals));
1424           PetscCall(MatDestroy(&M));
1425         }
1426       } else if (compute_Stilda) { /* not using deluxe */
1427         Mat         SEj;
1428         Vec         D;
1429         PetscScalar *array;
1430 
1431         PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,work,&SEj));
1432         PetscCall(VecGetArray(Dall,&array));
1433         PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,subset_size,array+cum,&D));
1434         PetscCall(VecRestoreArray(Dall,&array));
1435         PetscCall(VecShift(D,-1.));
1436         PetscCall(MatDiagonalScale(SEj,D,D));
1437         PetscCall(MatDestroy(&SEj));
1438         PetscCall(VecDestroy(&D));
1439         PetscCall(PetscArraycpy(SEj_arr,work,subset_size*subset_size));
1440       }
1441       cum += subset_size;
1442       cum2 += subset_size*(size_schur + 1);
1443       SEj_arr += subset_size*subset_size;
1444       if (SEjinv_arr) SEjinv_arr += subset_size*subset_size;
1445     }
1446     PetscCall(MatDenseRestoreArrayRead(S_all,&rS_data));
1447     PetscCall(MatSeqAIJRestoreArray(sub_schurs->S_Ej_all,&SEj_arr));
1448     if (compute_Stilda) {
1449       PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&SEjinv_arr));
1450     }
1451     if (solver_S) {
1452       PetscCall(MatFactorRestoreSchurComplement(F,&S_all,MAT_FACTOR_SCHUR_UNFACTORED));
1453     }
1454 
1455     /* may prevent from unneeded copies, since MUMPS or MKL_Pardiso always use CPU memory
1456        however, preliminary tests indicate using GPUs is still faster in the solve phase */
1457 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
1458     if (reuse_solvers) {
1459       Mat                  St;
1460       MatFactorSchurStatus st;
1461 
1462       flg  = PETSC_FALSE;
1463       PetscCall(PetscOptionsGetBool(NULL,sub_schurs->prefix,"-sub_schurs_schur_pin_to_cpu",&flg,NULL));
1464       PetscCall(MatFactorGetSchurComplement(F,&St,&st));
1465       PetscCall(MatBindToCPU(St,flg));
1466       PetscCall(MatFactorRestoreSchurComplement(F,&St,st));
1467     }
1468 #endif
1469 
1470     schur_factor = NULL;
1471     if (compute_Stilda && size_active_schur) {
1472 
1473       PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&SEjinv_arr));
1474       if (sub_schurs->n_subs == 1 && size_schur == size_active_schur && deluxe) { /* we already computed the inverse */
1475         PetscCall(PetscArraycpy(SEjinv_arr,work,size_schur*size_schur));
1476       } else {
1477         Mat S_all_inv=NULL;
1478 
1479         if (solver_S) {
1480           /* for adaptive selection we need S^-1; for solver reusage we need S_\Delta\Delta^-1.
1481              The latter is not the principal subminor for S^-1. However, the factors can be reused since S_\Delta\Delta is the leading principal submatrix of S */
1482           if (factor_workaround) {/* invert without calling MatFactorInvertSchurComplement, since we are hacking */
1483             PetscScalar *data;
1484             PetscInt     nd = 0;
1485 
1486             if (!use_potr) {
1487               SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Factor update not yet implemented for non SPD matrices");
1488             }
1489             PetscCall(MatFactorGetSchurComplement(F,&S_all_inv,NULL));
1490             PetscCall(MatDenseGetArray(S_all_inv,&data));
1491             if (sub_schurs->is_dir) { /* dirichlet dofs could have different scalings */
1492               PetscCall(ISGetLocalSize(sub_schurs->is_dir,&nd));
1493             }
1494 
1495             /* factor and invert activedofs and vertices (dirichlet dofs does not contribute) */
1496             if (schur_has_vertices) {
1497               Mat          M;
1498               PetscScalar *tdata;
1499               PetscInt     nv = 0, news;
1500 
1501               PetscCall(ISGetLocalSize(sub_schurs->is_vertices,&nv));
1502               news = size_active_schur + nv;
1503               PetscCall(PetscCalloc1(news*news,&tdata));
1504               for (i=0;i<size_active_schur;i++) {
1505                 PetscCall(PetscArraycpy(tdata+i*(news+1),data+i*(size_schur+1),size_active_schur-i));
1506                 PetscCall(PetscArraycpy(tdata+i*(news+1)+size_active_schur-i,data+i*size_schur+size_active_schur+nd,nv));
1507               }
1508               for (i=0;i<nv;i++) {
1509                 PetscInt k = i+size_active_schur;
1510                 PetscCall(PetscArraycpy(tdata+k*(news+1),data+(k+nd)*(size_schur+1),nv-i));
1511               }
1512 
1513               PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,news,news,tdata,&M));
1514               PetscCall(MatSetOption(M,MAT_SPD,PETSC_TRUE));
1515               PetscCall(MatCholeskyFactor(M,NULL,NULL));
1516               /* save the factors */
1517               cum = 0;
1518               PetscCall(PetscMalloc1((size_active_schur*(size_active_schur +1))/2+nd,&schur_factor));
1519               for (i=0;i<size_active_schur;i++) {
1520                 PetscCall(PetscArraycpy(schur_factor+cum,tdata+i*(news+1),size_active_schur-i));
1521                 cum += size_active_schur - i;
1522               }
1523               for (i=0;i<nd;i++) schur_factor[cum+i] = PetscSqrtReal(PetscRealPart(data[(i+size_active_schur)*(size_schur+1)]));
1524               PetscCall(MatSeqDenseInvertFactors_Private(M));
1525               /* move back just the active dofs to the Schur complement */
1526               for (i=0;i<size_active_schur;i++) {
1527                 PetscCall(PetscArraycpy(data+i*size_schur,tdata+i*news,size_active_schur));
1528               }
1529               PetscCall(PetscFree(tdata));
1530               PetscCall(MatDestroy(&M));
1531             } else { /* we can factorize and invert just the activedofs */
1532               Mat         M;
1533               PetscScalar *aux;
1534 
1535               PetscCall(PetscMalloc1(nd,&aux));
1536               for (i=0;i<nd;i++) aux[i] = 1.0/data[(i+size_active_schur)*(size_schur+1)];
1537               PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,size_active_schur,size_active_schur,data,&M));
1538               PetscCall(MatDenseSetLDA(M,size_schur));
1539               PetscCall(MatSetOption(M,MAT_SPD,PETSC_TRUE));
1540               PetscCall(MatCholeskyFactor(M,NULL,NULL));
1541               PetscCall(MatSeqDenseInvertFactors_Private(M));
1542               PetscCall(MatDestroy(&M));
1543               PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,size_schur,nd,data+size_active_schur*size_schur,&M));
1544               PetscCall(MatZeroEntries(M));
1545               PetscCall(MatDestroy(&M));
1546               PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,nd,size_schur,data+size_active_schur,&M));
1547               PetscCall(MatDenseSetLDA(M,size_schur));
1548               PetscCall(MatZeroEntries(M));
1549               PetscCall(MatDestroy(&M));
1550               for (i=0;i<nd;i++) data[(i+size_active_schur)*(size_schur+1)] = aux[i];
1551               PetscCall(PetscFree(aux));
1552             }
1553             PetscCall(MatDenseRestoreArray(S_all_inv,&data));
1554           } else { /* use MatFactor calls to invert S */
1555             PetscCall(MatFactorInvertSchurComplement(F));
1556             PetscCall(MatFactorGetSchurComplement(F,&S_all_inv,NULL));
1557           }
1558         } else { /* we need to invert explicitly since we are not using MatFactor for S */
1559           PetscCall(PetscObjectReference((PetscObject)S_all));
1560           S_all_inv = S_all;
1561           PetscCall(MatDenseGetArray(S_all_inv,&S_data));
1562           PetscCall(PetscBLASIntCast(size_schur,&B_N));
1563           PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
1564           if (use_potr) {
1565             PetscStackCallBLAS("LAPACKpotrf",LAPACKpotrf_("L",&B_N,S_data,&B_N,&B_ierr));
1566             PetscCheck(!B_ierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in POTRF Lapack routine %d",(int)B_ierr);
1567             PetscStackCallBLAS("LAPACKpotri",LAPACKpotri_("L",&B_N,S_data,&B_N,&B_ierr));
1568             PetscCheck(!B_ierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in POTRI Lapack routine %d",(int)B_ierr);
1569           } else if (use_sytr) {
1570             PetscStackCallBLAS("LAPACKsytrf",LAPACKsytrf_("L",&B_N,S_data,&B_N,pivots,Bwork,&B_lwork,&B_ierr));
1571             PetscCheck(!B_ierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYTRF Lapack routine %d",(int)B_ierr);
1572             PetscStackCallBLAS("LAPACKsytri",LAPACKsytri_("L",&B_N,S_data,&B_N,pivots,Bwork,&B_ierr));
1573             PetscCheck(!B_ierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYTRI Lapack routine %d",(int)B_ierr);
1574           } else {
1575             PetscStackCallBLAS("LAPACKgetrf",LAPACKgetrf_(&B_N,&B_N,S_data,&B_N,pivots,&B_ierr));
1576             PetscCheck(!B_ierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GETRF Lapack routine %d",(int)B_ierr);
1577             PetscStackCallBLAS("LAPACKgetri",LAPACKgetri_(&B_N,S_data,&B_N,pivots,Bwork,&B_lwork,&B_ierr));
1578             PetscCheck(!B_ierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GETRI Lapack routine %d",(int)B_ierr);
1579           }
1580           PetscCall(PetscLogFlops(1.0*size_schur*size_schur*size_schur));
1581           PetscCall(PetscFPTrapPop());
1582           PetscCall(MatDenseRestoreArray(S_all_inv,&S_data));
1583         }
1584         /* S_Ej_tilda_all */
1585         cum = cum2 = 0;
1586         PetscCall(MatDenseGetArrayRead(S_all_inv,&rS_data));
1587         for (i=0;i<sub_schurs->n_subs;i++) {
1588           PetscInt j;
1589 
1590           PetscCall(ISGetLocalSize(sub_schurs->is_subs[i],&subset_size));
1591           /* get (St^-1)_E */
1592           /* Unless we are changing the variables, I don't need to expand to upper triangular since St^-1
1593              will be properly accessed later during adaptive selection */
1594           if (S_lower_triangular) {
1595             PetscInt k;
1596             if (sub_schurs->change) {
1597               for (k=0;k<subset_size;k++) {
1598                 for (j=k;j<subset_size;j++) {
1599                   work[k*subset_size+j] = rS_data[cum2+k*size_schur+j];
1600                   work[j*subset_size+k] = work[k*subset_size+j];
1601                 }
1602               }
1603             } else {
1604               for (k=0;k<subset_size;k++) {
1605                 for (j=k;j<subset_size;j++) {
1606                   work[k*subset_size+j] = rS_data[cum2+k*size_schur+j];
1607                 }
1608               }
1609             }
1610           } else {
1611             PetscInt k;
1612             for (k=0;k<subset_size;k++) {
1613               for (j=0;j<subset_size;j++) {
1614                 work[k*subset_size+j] = rS_data[cum2+k*size_schur+j];
1615               }
1616             }
1617           }
1618           if (sub_schurs->change) {
1619             Mat change_sub,SEj,T;
1620 
1621             /* change basis */
1622             PetscCall(KSPGetOperators(sub_schurs->change[i],&change_sub,NULL));
1623             PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,work,&SEj));
1624             if (!sub_schurs->change_with_qr) { /* currently there's no support for PtAP with P SeqAIJ */
1625               Mat T2;
1626               PetscCall(MatTransposeMatMult(change_sub,SEj,MAT_INITIAL_MATRIX,1.0,&T2));
1627               PetscCall(MatMatMult(T2,change_sub,MAT_INITIAL_MATRIX,1.0,&T));
1628               PetscCall(MatDestroy(&T2));
1629               PetscCall(MatConvert(T,MATSEQDENSE,MAT_INPLACE_MATRIX,&T));
1630             } else {
1631               PetscCall(MatPtAP(SEj,change_sub,MAT_INITIAL_MATRIX,1.0,&T));
1632             }
1633             PetscCall(MatCopy(T,SEj,SAME_NONZERO_PATTERN));
1634             PetscCall(MatDestroy(&T));
1635             /* set diagonal entry to a very large value to pick the basis we are eliminating as the first eigenvectors with adaptive selection */
1636             PetscCall(MatZeroRowsColumnsIS(SEj,sub_schurs->change_primal_sub[i],1./PETSC_SMALL,NULL,NULL));
1637             PetscCall(MatDestroy(&SEj));
1638           }
1639           PetscCall(PetscArraycpy(SEjinv_arr,work,subset_size*subset_size));
1640           cum += subset_size;
1641           cum2 += subset_size*(size_schur + 1);
1642           SEjinv_arr += subset_size*subset_size;
1643         }
1644         PetscCall(MatDenseRestoreArrayRead(S_all_inv,&rS_data));
1645         if (solver_S) {
1646           if (schur_has_vertices) {
1647             PetscCall(MatFactorRestoreSchurComplement(F,&S_all_inv,MAT_FACTOR_SCHUR_FACTORED));
1648           } else {
1649             PetscCall(MatFactorRestoreSchurComplement(F,&S_all_inv,MAT_FACTOR_SCHUR_INVERTED));
1650           }
1651         }
1652         PetscCall(MatDestroy(&S_all_inv));
1653       }
1654       PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&SEjinv_arr));
1655 
1656       /* move back factors if needed */
1657       if (schur_has_vertices) {
1658         Mat      S_tmp;
1659         PetscInt nd = 0;
1660 
1661         PetscCheck(solver_S,PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen");
1662         PetscCall(MatFactorGetSchurComplement(F,&S_tmp,NULL));
1663         if (use_potr) {
1664           PetscScalar *data;
1665 
1666           PetscCall(MatDenseGetArray(S_tmp,&data));
1667           PetscCall(PetscArrayzero(data,size_schur*size_schur));
1668 
1669           if (S_lower_triangular) {
1670             cum = 0;
1671             for (i=0;i<size_active_schur;i++) {
1672               PetscCall(PetscArraycpy(data+i*(size_schur+1),schur_factor+cum,size_active_schur-i));
1673               cum += size_active_schur-i;
1674             }
1675           } else {
1676             PetscCall(PetscArraycpy(data,schur_factor,size_schur*size_schur));
1677           }
1678           if (sub_schurs->is_dir) {
1679             PetscCall(ISGetLocalSize(sub_schurs->is_dir,&nd));
1680             for (i=0;i<nd;i++) {
1681               data[(i+size_active_schur)*(size_schur+1)] = schur_factor[cum+i];
1682             }
1683           }
1684           /* workaround: since I cannot modify the matrices used inside the solvers for the forward and backward substitutions,
1685              set the diagonal entry of the Schur factor to a very large value */
1686           for (i=size_active_schur+nd;i<size_schur;i++) {
1687             data[i*(size_schur+1)] = infty;
1688           }
1689           PetscCall(MatDenseRestoreArray(S_tmp,&data));
1690         } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Factor update not yet implemented for non SPD matrices");
1691         PetscCall(MatFactorRestoreSchurComplement(F,&S_tmp,MAT_FACTOR_SCHUR_FACTORED));
1692       }
1693     } else if (factor_workaround) { /* we need to eliminate any unneeded coupling */
1694       PetscScalar *data;
1695       PetscInt    nd = 0;
1696 
1697       if (sub_schurs->is_dir) { /* dirichlet dofs could have different scalings */
1698         PetscCall(ISGetLocalSize(sub_schurs->is_dir,&nd));
1699       }
1700       PetscCall(MatFactorGetSchurComplement(F,&S_all,NULL));
1701       PetscCall(MatDenseGetArray(S_all,&data));
1702       for (i=0;i<size_active_schur;i++) {
1703         PetscCall(PetscArrayzero(data+i*size_schur+size_active_schur,size_schur-size_active_schur));
1704       }
1705       for (i=size_active_schur+nd;i<size_schur;i++) {
1706         PetscCall(PetscArrayzero(data+i*size_schur+size_active_schur,size_schur-size_active_schur));
1707         data[i*(size_schur+1)] = infty;
1708       }
1709       PetscCall(MatDenseRestoreArray(S_all,&data));
1710       PetscCall(MatFactorRestoreSchurComplement(F,&S_all,MAT_FACTOR_SCHUR_UNFACTORED));
1711     }
1712     PetscCall(PetscFree(work));
1713     PetscCall(PetscFree(schur_factor));
1714     PetscCall(VecDestroy(&Dall));
1715   }
1716   PetscCall(ISDestroy(&is_I_layer));
1717   PetscCall(MatDestroy(&S_all));
1718   PetscCall(MatDestroy(&A_BB));
1719   PetscCall(MatDestroy(&A_IB));
1720   PetscCall(MatDestroy(&A_BI));
1721   PetscCall(MatDestroy(&F));
1722 
1723   PetscCall(PetscMalloc1(sub_schurs->n_subs,&nnz));
1724   for (i=0;i<sub_schurs->n_subs;i++) {
1725     PetscCall(ISGetLocalSize(sub_schurs->is_subs[i],&nnz[i]));
1726   }
1727   PetscCall(ISCreateGeneral(PETSC_COMM_SELF,sub_schurs->n_subs,nnz,PETSC_OWN_POINTER,&is_I_layer));
1728   PetscCall(MatSetVariableBlockSizes(sub_schurs->S_Ej_all,sub_schurs->n_subs,nnz));
1729   PetscCall(MatAssemblyBegin(sub_schurs->S_Ej_all,MAT_FINAL_ASSEMBLY));
1730   PetscCall(MatAssemblyEnd(sub_schurs->S_Ej_all,MAT_FINAL_ASSEMBLY));
1731   if (compute_Stilda) {
1732     PetscCall(MatSetVariableBlockSizes(sub_schurs->sum_S_Ej_tilda_all,sub_schurs->n_subs,nnz));
1733     PetscCall(MatAssemblyBegin(sub_schurs->sum_S_Ej_tilda_all,MAT_FINAL_ASSEMBLY));
1734     PetscCall(MatAssemblyEnd(sub_schurs->sum_S_Ej_tilda_all,MAT_FINAL_ASSEMBLY));
1735     if (deluxe) {
1736       PetscCall(MatSetVariableBlockSizes(sub_schurs->sum_S_Ej_inv_all,sub_schurs->n_subs,nnz));
1737       PetscCall(MatAssemblyBegin(sub_schurs->sum_S_Ej_inv_all,MAT_FINAL_ASSEMBLY));
1738       PetscCall(MatAssemblyEnd(sub_schurs->sum_S_Ej_inv_all,MAT_FINAL_ASSEMBLY));
1739     }
1740   }
1741   PetscCall(ISDestroy(&is_I_layer));
1742 
1743   /* Get local part of (\sum_j S_Ej) */
1744   if (!sub_schurs->sum_S_Ej_all) {
1745     PetscCall(MatDuplicate(sub_schurs->S_Ej_all,MAT_DO_NOT_COPY_VALUES,&sub_schurs->sum_S_Ej_all));
1746   }
1747   PetscCall(VecSet(gstash,0.0));
1748   PetscCall(MatSeqAIJGetArray(sub_schurs->S_Ej_all,&stasharray));
1749   PetscCall(VecPlaceArray(lstash,stasharray));
1750   PetscCall(VecScatterBegin(sstash,lstash,gstash,ADD_VALUES,SCATTER_FORWARD));
1751   PetscCall(VecScatterEnd(sstash,lstash,gstash,ADD_VALUES,SCATTER_FORWARD));
1752   PetscCall(MatSeqAIJRestoreArray(sub_schurs->S_Ej_all,&stasharray));
1753   PetscCall(VecResetArray(lstash));
1754   PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_all,&stasharray));
1755   PetscCall(VecPlaceArray(lstash,stasharray));
1756   PetscCall(VecScatterBegin(sstash,gstash,lstash,INSERT_VALUES,SCATTER_REVERSE));
1757   PetscCall(VecScatterEnd(sstash,gstash,lstash,INSERT_VALUES,SCATTER_REVERSE));
1758   PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_all,&stasharray));
1759   PetscCall(VecResetArray(lstash));
1760 
1761   /* Get local part of (\sum_j S^-1_Ej) (\sum_j St^-1_Ej) */
1762   if (compute_Stilda) {
1763     PetscCall(VecSet(gstash,0.0));
1764     PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&stasharray));
1765     PetscCall(VecPlaceArray(lstash,stasharray));
1766     PetscCall(VecScatterBegin(sstash,lstash,gstash,ADD_VALUES,SCATTER_FORWARD));
1767     PetscCall(VecScatterEnd(sstash,lstash,gstash,ADD_VALUES,SCATTER_FORWARD));
1768     PetscCall(VecScatterBegin(sstash,gstash,lstash,INSERT_VALUES,SCATTER_REVERSE));
1769     PetscCall(VecScatterEnd(sstash,gstash,lstash,INSERT_VALUES,SCATTER_REVERSE));
1770     PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&stasharray));
1771     PetscCall(VecResetArray(lstash));
1772     if (deluxe) {
1773       PetscCall(VecSet(gstash,0.0));
1774       PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&stasharray));
1775       PetscCall(VecPlaceArray(lstash,stasharray));
1776       PetscCall(VecScatterBegin(sstash,lstash,gstash,ADD_VALUES,SCATTER_FORWARD));
1777       PetscCall(VecScatterEnd(sstash,lstash,gstash,ADD_VALUES,SCATTER_FORWARD));
1778       PetscCall(VecScatterBegin(sstash,gstash,lstash,INSERT_VALUES,SCATTER_REVERSE));
1779       PetscCall(VecScatterEnd(sstash,gstash,lstash,INSERT_VALUES,SCATTER_REVERSE));
1780       PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&stasharray));
1781       PetscCall(VecResetArray(lstash));
1782     } else {
1783       PetscScalar *array;
1784       PetscInt    cum;
1785 
1786       PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&array));
1787       cum = 0;
1788       for (i=0;i<sub_schurs->n_subs;i++) {
1789         PetscCall(ISGetLocalSize(sub_schurs->is_subs[i],&subset_size));
1790         PetscCall(PetscBLASIntCast(subset_size,&B_N));
1791         PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
1792         if (use_potr) {
1793           PetscStackCallBLAS("LAPACKpotrf",LAPACKpotrf_("L",&B_N,array+cum,&B_N,&B_ierr));
1794           PetscCheck(!B_ierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in POTRF Lapack routine %d",(int)B_ierr);
1795           PetscStackCallBLAS("LAPACKpotri",LAPACKpotri_("L",&B_N,array+cum,&B_N,&B_ierr));
1796           PetscCheck(!B_ierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in POTRI Lapack routine %d",(int)B_ierr);
1797         } else if (use_sytr) {
1798           PetscStackCallBLAS("LAPACKsytrf",LAPACKsytrf_("L",&B_N,array+cum,&B_N,pivots,Bwork,&B_lwork,&B_ierr));
1799           PetscCheck(!B_ierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYTRF Lapack routine %d",(int)B_ierr);
1800           PetscStackCallBLAS("LAPACKsytri",LAPACKsytri_("L",&B_N,array+cum,&B_N,pivots,Bwork,&B_ierr));
1801           PetscCheck(!B_ierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYTRI Lapack routine %d",(int)B_ierr);
1802         } else {
1803           PetscStackCallBLAS("LAPACKgetrf",LAPACKgetrf_(&B_N,&B_N,array+cum,&B_N,pivots,&B_ierr));
1804           PetscCheck(!B_ierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GETRF Lapack routine %d",(int)B_ierr);
1805           PetscStackCallBLAS("LAPACKgetri",LAPACKgetri_(&B_N,array+cum,&B_N,pivots,Bwork,&B_lwork,&B_ierr));
1806           PetscCheck(!B_ierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GETRI Lapack routine %d",(int)B_ierr);
1807         }
1808         PetscCall(PetscLogFlops(1.0*subset_size*subset_size*subset_size));
1809         PetscCall(PetscFPTrapPop());
1810         cum += subset_size*subset_size;
1811       }
1812       PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&array));
1813       PetscCall(PetscObjectReference((PetscObject)sub_schurs->sum_S_Ej_all));
1814       PetscCall(MatDestroy(&sub_schurs->sum_S_Ej_inv_all));
1815       sub_schurs->sum_S_Ej_inv_all = sub_schurs->sum_S_Ej_all;
1816     }
1817   }
1818   PetscCall(VecDestroy(&lstash));
1819   PetscCall(VecDestroy(&gstash));
1820   PetscCall(VecScatterDestroy(&sstash));
1821 
1822   if (matl_dbg_viewer) {
1823     PetscInt cum;
1824 
1825     if (sub_schurs->S_Ej_all) {
1826       PetscCall(PetscObjectSetName((PetscObject)sub_schurs->S_Ej_all,"SE"));
1827       PetscCall(MatView(sub_schurs->S_Ej_all,matl_dbg_viewer));
1828     }
1829     if (sub_schurs->sum_S_Ej_all) {
1830       PetscCall(PetscObjectSetName((PetscObject)sub_schurs->sum_S_Ej_all,"SSE"));
1831       PetscCall(MatView(sub_schurs->sum_S_Ej_all,matl_dbg_viewer));
1832     }
1833     if (sub_schurs->sum_S_Ej_inv_all) {
1834       PetscCall(PetscObjectSetName((PetscObject)sub_schurs->sum_S_Ej_inv_all,"SSEm"));
1835       PetscCall(MatView(sub_schurs->sum_S_Ej_inv_all,matl_dbg_viewer));
1836     }
1837     if (sub_schurs->sum_S_Ej_tilda_all) {
1838       PetscCall(PetscObjectSetName((PetscObject)sub_schurs->sum_S_Ej_tilda_all,"SSEt"));
1839       PetscCall(MatView(sub_schurs->sum_S_Ej_tilda_all,matl_dbg_viewer));
1840     }
1841     for (i=0,cum=0;i<sub_schurs->n_subs;i++) {
1842       IS   is;
1843       char name[16];
1844 
1845       PetscCall(PetscSNPrintf(name,sizeof(name),"IE%" PetscInt_FMT,i));
1846       PetscCall(ISGetLocalSize(sub_schurs->is_subs[i],&subset_size));
1847       PetscCall(ISCreateStride(PETSC_COMM_SELF,subset_size,cum,1,&is));
1848       PetscCall(PetscObjectSetName((PetscObject)is,name));
1849       PetscCall(ISView(is,matl_dbg_viewer));
1850       PetscCall(ISDestroy(&is));
1851       cum += subset_size;
1852     }
1853   }
1854 
1855   /* free workspace */
1856   if (matl_dbg_viewer) PetscCall(PetscViewerFlush(matl_dbg_viewer));
1857   if (sub_schurs->debug) PetscCallMPI(MPI_Barrier(comm_n));
1858   PetscCall(PetscViewerDestroy(&matl_dbg_viewer));
1859   PetscCall(PetscFree2(Bwork,pivots));
1860   PetscCall(PetscCommDestroy(&comm_n));
1861   PetscFunctionReturn(0);
1862 }
1863 
1864 PetscErrorCode PCBDDCSubSchursInit(PCBDDCSubSchurs sub_schurs, const char* prefix, IS is_I, IS is_B, PCBDDCGraph graph, ISLocalToGlobalMapping BtoNmap, PetscBool copycc)
1865 {
1866   IS              *faces,*edges,*all_cc,vertices;
1867   PetscInt        i,n_faces,n_edges,n_all_cc;
1868   PetscBool       is_sorted,ispardiso,ismumps;
1869 
1870   PetscFunctionBegin;
1871   PetscCall(ISSorted(is_I,&is_sorted));
1872   PetscCheck(is_sorted,PetscObjectComm((PetscObject)is_I),PETSC_ERR_PLIB,"IS for I dofs should be shorted");
1873   PetscCall(ISSorted(is_B,&is_sorted));
1874   PetscCheck(is_sorted,PetscObjectComm((PetscObject)is_B),PETSC_ERR_PLIB,"IS for B dofs should be shorted");
1875 
1876   /* reset any previous data */
1877   PetscCall(PCBDDCSubSchursReset(sub_schurs));
1878 
1879   /* get index sets for faces and edges (already sorted by global ordering) */
1880   PetscCall(PCBDDCGraphGetCandidatesIS(graph,&n_faces,&faces,&n_edges,&edges,&vertices));
1881   n_all_cc = n_faces+n_edges;
1882   PetscCall(PetscBTCreate(n_all_cc,&sub_schurs->is_edge));
1883   PetscCall(PetscMalloc1(n_all_cc,&all_cc));
1884   for (i=0;i<n_faces;i++) {
1885     if (copycc) {
1886       PetscCall(ISDuplicate(faces[i],&all_cc[i]));
1887     } else {
1888       PetscCall(PetscObjectReference((PetscObject)faces[i]));
1889       all_cc[i] = faces[i];
1890     }
1891   }
1892   for (i=0;i<n_edges;i++) {
1893     if (copycc) {
1894       PetscCall(ISDuplicate(edges[i],&all_cc[n_faces+i]));
1895     } else {
1896       PetscCall(PetscObjectReference((PetscObject)edges[i]));
1897       all_cc[n_faces+i] = edges[i];
1898     }
1899     PetscCall(PetscBTSet(sub_schurs->is_edge,n_faces+i));
1900   }
1901   PetscCall(PetscObjectReference((PetscObject)vertices));
1902   sub_schurs->is_vertices = vertices;
1903   PetscCall(PCBDDCGraphRestoreCandidatesIS(graph,&n_faces,&faces,&n_edges,&edges,&vertices));
1904   sub_schurs->is_dir = NULL;
1905   PetscCall(PCBDDCGraphGetDirichletDofsB(graph,&sub_schurs->is_dir));
1906 
1907   /* Determine if MatFactor can be used */
1908   PetscCall(PetscStrallocpy(prefix,&sub_schurs->prefix));
1909 #if defined(PETSC_HAVE_MUMPS)
1910   PetscCall(PetscStrncpy(sub_schurs->mat_solver_type,MATSOLVERMUMPS,sizeof(sub_schurs->mat_solver_type)));
1911 #elif defined(PETSC_HAVE_MKL_PARDISO)
1912   PetscCall(PetscStrncpy(sub_schurs->mat_solver_type,MATSOLVERMKL_PARDISO,sizeof(sub_schurs->mat_solver_type)));
1913 #else
1914   PetscCall(PetscStrncpy(sub_schurs->mat_solver_type,MATSOLVERPETSC,sizeof(sub_schurs->mat_solver_type)));
1915 #endif
1916 #if defined(PETSC_USE_COMPLEX)
1917   sub_schurs->is_hermitian  = PETSC_FALSE; /* Hermitian Cholesky is not supported by PETSc and external packages */
1918 #else
1919   sub_schurs->is_hermitian  = PETSC_TRUE;
1920 #endif
1921   sub_schurs->is_posdef     = PETSC_TRUE;
1922   sub_schurs->is_symmetric  = PETSC_TRUE;
1923   sub_schurs->debug         = PETSC_FALSE;
1924   sub_schurs->restrict_comm = PETSC_FALSE;
1925   PetscOptionsBegin(PetscObjectComm((PetscObject)graph->l2gmap),sub_schurs->prefix,"BDDC sub_schurs options","PC");
1926   PetscCall(PetscOptionsString("-sub_schurs_mat_solver_type","Specific direct solver to use",NULL,sub_schurs->mat_solver_type,sub_schurs->mat_solver_type,sizeof(sub_schurs->mat_solver_type),NULL));
1927   PetscCall(PetscOptionsBool("-sub_schurs_symmetric","Symmetric problem",NULL,sub_schurs->is_symmetric,&sub_schurs->is_symmetric,NULL));
1928   PetscCall(PetscOptionsBool("-sub_schurs_hermitian","Hermitian problem",NULL,sub_schurs->is_hermitian,&sub_schurs->is_hermitian,NULL));
1929   PetscCall(PetscOptionsBool("-sub_schurs_posdef","Positive definite problem",NULL,sub_schurs->is_posdef,&sub_schurs->is_posdef,NULL));
1930   PetscCall(PetscOptionsBool("-sub_schurs_restrictcomm","Restrict communicator on active processes only",NULL,sub_schurs->restrict_comm,&sub_schurs->restrict_comm,NULL));
1931   PetscCall(PetscOptionsBool("-sub_schurs_debug","Debug output",NULL,sub_schurs->debug,&sub_schurs->debug,NULL));
1932   PetscOptionsEnd();
1933   PetscCall(PetscStrcmp(sub_schurs->mat_solver_type,MATSOLVERMUMPS,&ismumps));
1934   PetscCall(PetscStrcmp(sub_schurs->mat_solver_type,MATSOLVERMKL_PARDISO,&ispardiso));
1935   sub_schurs->schur_explicit = (PetscBool)(ispardiso || ismumps);
1936 
1937   /* for reals, symmetric and hermitian are synonims */
1938 #if !defined(PETSC_USE_COMPLEX)
1939   sub_schurs->is_symmetric = (PetscBool)(sub_schurs->is_symmetric && sub_schurs->is_hermitian);
1940   sub_schurs->is_hermitian = sub_schurs->is_symmetric;
1941 #endif
1942 
1943   PetscCall(PetscObjectReference((PetscObject)is_I));
1944   sub_schurs->is_I = is_I;
1945   PetscCall(PetscObjectReference((PetscObject)is_B));
1946   sub_schurs->is_B = is_B;
1947   PetscCall(PetscObjectReference((PetscObject)graph->l2gmap));
1948   sub_schurs->l2gmap = graph->l2gmap;
1949   PetscCall(PetscObjectReference((PetscObject)BtoNmap));
1950   sub_schurs->BtoNmap = BtoNmap;
1951   sub_schurs->n_subs = n_all_cc;
1952   sub_schurs->is_subs = all_cc;
1953   sub_schurs->S_Ej_all = NULL;
1954   sub_schurs->sum_S_Ej_all = NULL;
1955   sub_schurs->sum_S_Ej_inv_all = NULL;
1956   sub_schurs->sum_S_Ej_tilda_all = NULL;
1957   sub_schurs->is_Ej_all = NULL;
1958   PetscFunctionReturn(0);
1959 }
1960 
1961 PetscErrorCode PCBDDCSubSchursCreate(PCBDDCSubSchurs *sub_schurs)
1962 {
1963   PCBDDCSubSchurs schurs_ctx;
1964 
1965   PetscFunctionBegin;
1966   PetscCall(PetscNew(&schurs_ctx));
1967   schurs_ctx->n_subs = 0;
1968   *sub_schurs = schurs_ctx;
1969   PetscFunctionReturn(0);
1970 }
1971 
1972 PetscErrorCode PCBDDCSubSchursReset(PCBDDCSubSchurs sub_schurs)
1973 {
1974   PetscInt       i;
1975 
1976   PetscFunctionBegin;
1977   if (!sub_schurs) PetscFunctionReturn(0);
1978   PetscCall(PetscFree(sub_schurs->prefix));
1979   PetscCall(MatDestroy(&sub_schurs->A));
1980   PetscCall(MatDestroy(&sub_schurs->S));
1981   PetscCall(ISDestroy(&sub_schurs->is_I));
1982   PetscCall(ISDestroy(&sub_schurs->is_B));
1983   PetscCall(ISLocalToGlobalMappingDestroy(&sub_schurs->l2gmap));
1984   PetscCall(ISLocalToGlobalMappingDestroy(&sub_schurs->BtoNmap));
1985   PetscCall(MatDestroy(&sub_schurs->S_Ej_all));
1986   PetscCall(MatDestroy(&sub_schurs->sum_S_Ej_all));
1987   PetscCall(MatDestroy(&sub_schurs->sum_S_Ej_inv_all));
1988   PetscCall(MatDestroy(&sub_schurs->sum_S_Ej_tilda_all));
1989   PetscCall(ISDestroy(&sub_schurs->is_Ej_all));
1990   PetscCall(ISDestroy(&sub_schurs->is_vertices));
1991   PetscCall(ISDestroy(&sub_schurs->is_dir));
1992   PetscCall(PetscBTDestroy(&sub_schurs->is_edge));
1993   for (i=0;i<sub_schurs->n_subs;i++) {
1994     PetscCall(ISDestroy(&sub_schurs->is_subs[i]));
1995   }
1996   if (sub_schurs->n_subs) {
1997     PetscCall(PetscFree(sub_schurs->is_subs));
1998   }
1999   if (sub_schurs->reuse_solver) {
2000     PetscCall(PCBDDCReuseSolversReset(sub_schurs->reuse_solver));
2001   }
2002   PetscCall(PetscFree(sub_schurs->reuse_solver));
2003   if (sub_schurs->change) {
2004     for (i=0;i<sub_schurs->n_subs;i++) {
2005       PetscCall(KSPDestroy(&sub_schurs->change[i]));
2006       PetscCall(ISDestroy(&sub_schurs->change_primal_sub[i]));
2007     }
2008   }
2009   PetscCall(PetscFree(sub_schurs->change));
2010   PetscCall(PetscFree(sub_schurs->change_primal_sub));
2011   sub_schurs->n_subs = 0;
2012   PetscFunctionReturn(0);
2013 }
2014 
2015 PetscErrorCode PCBDDCSubSchursDestroy(PCBDDCSubSchurs* sub_schurs)
2016 {
2017   PetscFunctionBegin;
2018   PetscCall(PCBDDCSubSchursReset(*sub_schurs));
2019   PetscCall(PetscFree(*sub_schurs));
2020   PetscFunctionReturn(0);
2021 }
2022 
2023 static inline PetscErrorCode PCBDDCAdjGetNextLayer_Private(PetscInt* queue_tip,PetscInt n_prev,PetscBT touched,PetscInt* xadj,PetscInt* adjncy,PetscInt* n_added)
2024 {
2025   PetscInt       i,j,n;
2026 
2027   PetscFunctionBegin;
2028   n = 0;
2029   for (i=-n_prev;i<0;i++) {
2030     PetscInt start_dof = queue_tip[i];
2031     for (j=xadj[start_dof];j<xadj[start_dof+1];j++) {
2032       PetscInt dof = adjncy[j];
2033       if (!PetscBTLookup(touched,dof)) {
2034         PetscCall(PetscBTSet(touched,dof));
2035         queue_tip[n] = dof;
2036         n++;
2037       }
2038     }
2039   }
2040   *n_added = n;
2041   PetscFunctionReturn(0);
2042 }
2043