xref: /petsc/src/ksp/pc/impls/bddc/bddcschurs.c (revision daa037dfd3c3bec8dc8659548d2b20b07c1dc6de)
1 #include <../src/ksp/pc/impls/bddc/bddc.h>
2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h>
3 #include <../src/mat/impls/dense/seq/dense.h>
4 #include <petscblaslapack.h>
5 
6 static inline PetscErrorCode PCBDDCAdjGetNextLayer_Private(PetscInt*,PetscInt,PetscBT,PetscInt*,PetscInt*,PetscInt*);
7 static PetscErrorCode PCBDDCComputeExplicitSchur(Mat,PetscBool,MatReuse,Mat*);
8 static PetscErrorCode PCBDDCReuseSolvers_Interior(PC,Vec,Vec);
9 static PetscErrorCode PCBDDCReuseSolvers_Correction(PC,Vec,Vec);
10 
11 /* if v2 is not present, correction is done in-place */
12 PetscErrorCode PCBDDCReuseSolversBenignAdapt(PCBDDCReuseSolvers ctx, Vec v, Vec v2, PetscBool sol, PetscBool full)
13 {
14   PetscScalar    *array;
15   PetscScalar    *array2;
16 
17   PetscFunctionBegin;
18   if (!ctx->benign_n) PetscFunctionReturn(0);
19   if (sol && full) {
20     PetscInt n_I,size_schur;
21 
22     /* get sizes */
23     PetscCall(MatGetSize(ctx->benign_csAIB,&size_schur,NULL));
24     PetscCall(VecGetSize(v,&n_I));
25     n_I = n_I - size_schur;
26     /* get schur sol from array */
27     PetscCall(VecGetArray(v,&array));
28     PetscCall(VecPlaceArray(ctx->benign_dummy_schur_vec,array+n_I));
29     PetscCall(VecRestoreArray(v,&array));
30     /* apply interior sol correction */
31     PetscCall(MatMultTranspose(ctx->benign_csAIB,ctx->benign_dummy_schur_vec,ctx->benign_corr_work));
32     PetscCall(VecResetArray(ctx->benign_dummy_schur_vec));
33     PetscCall(MatMultAdd(ctx->benign_AIIm1ones,ctx->benign_corr_work,v,v));
34   }
35   if (v2) {
36     PetscInt nl;
37 
38     PetscCall(VecGetArrayRead(v,(const PetscScalar**)&array));
39     PetscCall(VecGetLocalSize(v2,&nl));
40     PetscCall(VecGetArray(v2,&array2));
41     PetscCall(PetscArraycpy(array2,array,nl));
42   } else {
43     PetscCall(VecGetArray(v,&array));
44     array2 = array;
45   }
46   if (!sol) { /* change rhs */
47     PetscInt n;
48     for (n=0;n<ctx->benign_n;n++) {
49       PetscScalar    sum = 0.;
50       const PetscInt *cols;
51       PetscInt       nz,i;
52 
53       PetscCall(ISGetLocalSize(ctx->benign_zerodiag_subs[n],&nz));
54       PetscCall(ISGetIndices(ctx->benign_zerodiag_subs[n],&cols));
55       for (i=0;i<nz-1;i++) sum += array[cols[i]];
56 #if defined(PETSC_USE_COMPLEX)
57       sum = -(PetscRealPart(sum)/nz + PETSC_i*(PetscImaginaryPart(sum)/nz));
58 #else
59       sum = -sum/nz;
60 #endif
61       for (i=0;i<nz-1;i++) array2[cols[i]] += sum;
62       ctx->benign_save_vals[n] = array2[cols[nz-1]];
63       array2[cols[nz-1]] = sum;
64       PetscCall(ISRestoreIndices(ctx->benign_zerodiag_subs[n],&cols));
65     }
66   } else {
67     PetscInt n;
68     for (n=0;n<ctx->benign_n;n++) {
69       PetscScalar    sum = 0.;
70       const PetscInt *cols;
71       PetscInt       nz,i;
72       PetscCall(ISGetLocalSize(ctx->benign_zerodiag_subs[n],&nz));
73       PetscCall(ISGetIndices(ctx->benign_zerodiag_subs[n],&cols));
74       for (i=0;i<nz-1;i++) sum += array[cols[i]];
75 #if defined(PETSC_USE_COMPLEX)
76       sum = -(PetscRealPart(sum)/nz + PETSC_i*(PetscImaginaryPart(sum)/nz));
77 #else
78       sum = -sum/nz;
79 #endif
80       for (i=0;i<nz-1;i++) array2[cols[i]] += sum;
81       array2[cols[nz-1]] = ctx->benign_save_vals[n];
82       PetscCall(ISRestoreIndices(ctx->benign_zerodiag_subs[n],&cols));
83     }
84   }
85   if (v2) {
86     PetscCall(VecRestoreArrayRead(v,(const PetscScalar**)&array));
87     PetscCall(VecRestoreArray(v2,&array2));
88   } else {
89     PetscCall(VecRestoreArray(v,&array));
90   }
91   if (!sol && full) {
92     Vec      usedv;
93     PetscInt n_I,size_schur;
94 
95     /* get sizes */
96     PetscCall(MatGetSize(ctx->benign_csAIB,&size_schur,NULL));
97     PetscCall(VecGetSize(v,&n_I));
98     n_I = n_I - size_schur;
99     /* compute schur rhs correction */
100     if (v2) {
101       usedv = v2;
102     } else {
103       usedv = v;
104     }
105     /* apply schur rhs correction */
106     PetscCall(MatMultTranspose(ctx->benign_AIIm1ones,usedv,ctx->benign_corr_work));
107     PetscCall(VecGetArrayRead(usedv,(const PetscScalar**)&array));
108     PetscCall(VecPlaceArray(ctx->benign_dummy_schur_vec,array+n_I));
109     PetscCall(VecRestoreArrayRead(usedv,(const PetscScalar**)&array));
110     PetscCall(MatMultAdd(ctx->benign_csAIB,ctx->benign_corr_work,ctx->benign_dummy_schur_vec,ctx->benign_dummy_schur_vec));
111     PetscCall(VecResetArray(ctx->benign_dummy_schur_vec));
112   }
113   PetscFunctionReturn(0);
114 }
115 
116 static PetscErrorCode PCBDDCReuseSolvers_Solve_Private(PC pc, Vec rhs, Vec sol, PetscBool transpose, PetscBool full)
117 {
118   PCBDDCReuseSolvers ctx;
119   PetscBool          copy = PETSC_FALSE;
120 
121   PetscFunctionBegin;
122   PetscCall(PCShellGetContext(pc,&ctx));
123   if (full) {
124 #if defined(PETSC_HAVE_MUMPS)
125     PetscCall(MatMumpsSetIcntl(ctx->F,26,-1));
126 #endif
127 #if defined(PETSC_HAVE_MKL_PARDISO)
128     PetscCall(MatMkl_PardisoSetCntl(ctx->F,70,0));
129 #endif
130     copy = ctx->has_vertices;
131   } else { /* interior solver */
132 #if defined(PETSC_HAVE_MUMPS)
133     PetscCall(MatMumpsSetIcntl(ctx->F,26,0));
134 #endif
135 #if defined(PETSC_HAVE_MKL_PARDISO)
136     PetscCall(MatMkl_PardisoSetCntl(ctx->F,70,1));
137 #endif
138     copy = PETSC_TRUE;
139   }
140   /* copy rhs into factored matrix workspace */
141   if (copy) {
142     PetscInt    n;
143     PetscScalar *array,*array_solver;
144 
145     PetscCall(VecGetLocalSize(rhs,&n));
146     PetscCall(VecGetArrayRead(rhs,(const PetscScalar**)&array));
147     PetscCall(VecGetArray(ctx->rhs,&array_solver));
148     PetscCall(PetscArraycpy(array_solver,array,n));
149     PetscCall(VecRestoreArray(ctx->rhs,&array_solver));
150     PetscCall(VecRestoreArrayRead(rhs,(const PetscScalar**)&array));
151 
152     PetscCall(PCBDDCReuseSolversBenignAdapt(ctx,ctx->rhs,NULL,PETSC_FALSE,full));
153     if (transpose) {
154       PetscCall(MatSolveTranspose(ctx->F,ctx->rhs,ctx->sol));
155     } else {
156       PetscCall(MatSolve(ctx->F,ctx->rhs,ctx->sol));
157     }
158     PetscCall(PCBDDCReuseSolversBenignAdapt(ctx,ctx->sol,NULL,PETSC_TRUE,full));
159 
160     /* get back data to caller worskpace */
161     PetscCall(VecGetArrayRead(ctx->sol,(const PetscScalar**)&array_solver));
162     PetscCall(VecGetArray(sol,&array));
163     PetscCall(PetscArraycpy(array,array_solver,n));
164     PetscCall(VecRestoreArray(sol,&array));
165     PetscCall(VecRestoreArrayRead(ctx->sol,(const PetscScalar**)&array_solver));
166   } else {
167     if (ctx->benign_n) {
168       PetscCall(PCBDDCReuseSolversBenignAdapt(ctx,rhs,ctx->rhs,PETSC_FALSE,full));
169       if (transpose) {
170         PetscCall(MatSolveTranspose(ctx->F,ctx->rhs,sol));
171       } else {
172         PetscCall(MatSolve(ctx->F,ctx->rhs,sol));
173       }
174       PetscCall(PCBDDCReuseSolversBenignAdapt(ctx,sol,NULL,PETSC_TRUE,full));
175     } else {
176       if (transpose) {
177         PetscCall(MatSolveTranspose(ctx->F,rhs,sol));
178       } else {
179         PetscCall(MatSolve(ctx->F,rhs,sol));
180       }
181     }
182   }
183   /* restore defaults */
184 #if defined(PETSC_HAVE_MUMPS)
185   PetscCall(MatMumpsSetIcntl(ctx->F,26,-1));
186 #endif
187 #if defined(PETSC_HAVE_MKL_PARDISO)
188   PetscCall(MatMkl_PardisoSetCntl(ctx->F,70,0));
189 #endif
190   PetscFunctionReturn(0);
191 }
192 
193 static PetscErrorCode PCBDDCReuseSolvers_Correction(PC pc, Vec rhs, Vec sol)
194 {
195   PetscFunctionBegin;
196   PetscCall(PCBDDCReuseSolvers_Solve_Private(pc,rhs,sol,PETSC_FALSE,PETSC_TRUE));
197   PetscFunctionReturn(0);
198 }
199 
200 static PetscErrorCode PCBDDCReuseSolvers_CorrectionTranspose(PC pc, Vec rhs, Vec sol)
201 {
202   PetscFunctionBegin;
203   PetscCall(PCBDDCReuseSolvers_Solve_Private(pc,rhs,sol,PETSC_TRUE,PETSC_TRUE));
204   PetscFunctionReturn(0);
205 }
206 
207 static PetscErrorCode PCBDDCReuseSolvers_Interior(PC pc, Vec rhs, Vec sol)
208 {
209   PetscFunctionBegin;
210   PetscCall(PCBDDCReuseSolvers_Solve_Private(pc,rhs,sol,PETSC_FALSE,PETSC_FALSE));
211   PetscFunctionReturn(0);
212 }
213 
214 static PetscErrorCode PCBDDCReuseSolvers_InteriorTranspose(PC pc, Vec rhs, Vec sol)
215 {
216   PetscFunctionBegin;
217   PetscCall(PCBDDCReuseSolvers_Solve_Private(pc,rhs,sol,PETSC_TRUE,PETSC_FALSE));
218   PetscFunctionReturn(0);
219 }
220 
221 static PetscErrorCode PCBDDCReuseSolvers_View(PC pc, PetscViewer viewer)
222 {
223   PCBDDCReuseSolvers ctx;
224   PetscBool          iascii;
225 
226   PetscFunctionBegin;
227   PetscCall(PCShellGetContext(pc,&ctx));
228   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii));
229   if (iascii) {
230     PetscCall(PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_INFO));
231   }
232   PetscCall(MatView(ctx->F,viewer));
233   if (iascii) {
234     PetscCall(PetscViewerPopFormat(viewer));
235   }
236   PetscFunctionReturn(0);
237 }
238 
239 static PetscErrorCode PCBDDCReuseSolversReset(PCBDDCReuseSolvers reuse)
240 {
241   PetscInt       i;
242 
243   PetscFunctionBegin;
244   PetscCall(MatDestroy(&reuse->F));
245   PetscCall(VecDestroy(&reuse->sol));
246   PetscCall(VecDestroy(&reuse->rhs));
247   PetscCall(PCDestroy(&reuse->interior_solver));
248   PetscCall(PCDestroy(&reuse->correction_solver));
249   PetscCall(ISDestroy(&reuse->is_R));
250   PetscCall(ISDestroy(&reuse->is_B));
251   PetscCall(VecScatterDestroy(&reuse->correction_scatter_B));
252   PetscCall(VecDestroy(&reuse->sol_B));
253   PetscCall(VecDestroy(&reuse->rhs_B));
254   for (i=0;i<reuse->benign_n;i++) {
255     PetscCall(ISDestroy(&reuse->benign_zerodiag_subs[i]));
256   }
257   PetscCall(PetscFree(reuse->benign_zerodiag_subs));
258   PetscCall(PetscFree(reuse->benign_save_vals));
259   PetscCall(MatDestroy(&reuse->benign_csAIB));
260   PetscCall(MatDestroy(&reuse->benign_AIIm1ones));
261   PetscCall(VecDestroy(&reuse->benign_corr_work));
262   PetscCall(VecDestroy(&reuse->benign_dummy_schur_vec));
263   PetscFunctionReturn(0);
264 }
265 
266 static PetscErrorCode PCBDDCComputeExplicitSchur(Mat M, PetscBool issym, MatReuse reuse, Mat *S)
267 {
268   Mat            B, C, D, Bd, Cd, AinvBd;
269   KSP            ksp;
270   PC             pc;
271   PetscBool      isLU, isILU, isCHOL, Bdense, Cdense;
272   PetscReal      fill = 2.0;
273   PetscInt       n_I;
274   PetscMPIInt    size;
275 
276   PetscFunctionBegin;
277   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)M),&size));
278   PetscCheck(size == 1,PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for parallel matrices");
279   if (reuse == MAT_REUSE_MATRIX) {
280     PetscBool Sdense;
281 
282     PetscCall(PetscObjectTypeCompare((PetscObject)*S, MATSEQDENSE, &Sdense));
283     PetscCheck(Sdense,PetscObjectComm((PetscObject)M),PETSC_ERR_SUP,"S should dense");
284   }
285   PetscCall(MatSchurComplementGetSubMatrices(M, NULL, NULL, &B, &C, &D));
286   PetscCall(MatSchurComplementGetKSP(M, &ksp));
287   PetscCall(KSPGetPC(ksp, &pc));
288   PetscCall(PetscObjectTypeCompare((PetscObject) pc, PCLU, &isLU));
289   PetscCall(PetscObjectTypeCompare((PetscObject) pc, PCILU, &isILU));
290   PetscCall(PetscObjectTypeCompare((PetscObject) pc, PCCHOLESKY, &isCHOL));
291   PetscCall(PetscObjectTypeCompare((PetscObject) B, MATSEQDENSE, &Bdense));
292   PetscCall(PetscObjectTypeCompare((PetscObject) C, MATSEQDENSE, &Cdense));
293   PetscCall(MatGetSize(B,&n_I,NULL));
294   if (n_I) {
295     if (!Bdense) {
296       PetscCall(MatConvert(B, MATSEQDENSE, MAT_INITIAL_MATRIX, &Bd));
297     } else {
298       Bd = B;
299     }
300 
301     if (isLU || isILU || isCHOL) {
302       Mat fact;
303       PetscCall(KSPSetUp(ksp));
304       PetscCall(PCFactorGetMatrix(pc, &fact));
305       PetscCall(MatDuplicate(Bd, MAT_DO_NOT_COPY_VALUES, &AinvBd));
306       PetscCall(MatMatSolve(fact, Bd, AinvBd));
307     } else {
308       PetscBool ex = PETSC_TRUE;
309 
310       if (ex) {
311         Mat Ainvd;
312 
313         PetscCall(PCComputeOperator(pc, MATDENSE, &Ainvd));
314         PetscCall(MatMatMult(Ainvd, Bd, MAT_INITIAL_MATRIX, fill, &AinvBd));
315         PetscCall(MatDestroy(&Ainvd));
316       } else {
317         Vec         sol,rhs;
318         PetscScalar *arrayrhs,*arraysol;
319         PetscInt    i,nrhs,n;
320 
321         PetscCall(MatDuplicate(Bd, MAT_DO_NOT_COPY_VALUES, &AinvBd));
322         PetscCall(MatGetSize(Bd,&n,&nrhs));
323         PetscCall(MatDenseGetArray(Bd,&arrayrhs));
324         PetscCall(MatDenseGetArray(AinvBd,&arraysol));
325         PetscCall(KSPGetSolution(ksp,&sol));
326         PetscCall(KSPGetRhs(ksp,&rhs));
327         for (i=0;i<nrhs;i++) {
328           PetscCall(VecPlaceArray(rhs,arrayrhs+i*n));
329           PetscCall(VecPlaceArray(sol,arraysol+i*n));
330           PetscCall(KSPSolve(ksp,rhs,sol));
331           PetscCall(VecResetArray(rhs));
332           PetscCall(VecResetArray(sol));
333         }
334         PetscCall(MatDenseRestoreArray(Bd,&arrayrhs));
335         PetscCall(MatDenseRestoreArray(AinvBd,&arrayrhs));
336       }
337     }
338     if (!Bdense & !issym) {
339       PetscCall(MatDestroy(&Bd));
340     }
341 
342     if (!issym) {
343       if (!Cdense) {
344         PetscCall(MatConvert(C, MATSEQDENSE, MAT_INITIAL_MATRIX, &Cd));
345       } else {
346         Cd = C;
347       }
348       PetscCall(MatMatMult(Cd, AinvBd, reuse, fill, S));
349       if (!Cdense) {
350         PetscCall(MatDestroy(&Cd));
351       }
352     } else {
353       PetscCall(MatTransposeMatMult(Bd, AinvBd, reuse, fill, S));
354       if (!Bdense) {
355         PetscCall(MatDestroy(&Bd));
356       }
357     }
358     PetscCall(MatDestroy(&AinvBd));
359   }
360 
361   if (D) {
362     Mat       Dd;
363     PetscBool Ddense;
364 
365     PetscCall(PetscObjectTypeCompare((PetscObject)D,MATSEQDENSE,&Ddense));
366     if (!Ddense) {
367       PetscCall(MatConvert(D, MATSEQDENSE, MAT_INITIAL_MATRIX, &Dd));
368     } else {
369       Dd = D;
370     }
371     if (n_I) {
372       PetscCall(MatAYPX(*S,-1.0,Dd,SAME_NONZERO_PATTERN));
373     } else {
374       if (reuse == MAT_INITIAL_MATRIX) {
375         PetscCall(MatDuplicate(Dd,MAT_COPY_VALUES,S));
376       } else {
377         PetscCall(MatCopy(Dd,*S,SAME_NONZERO_PATTERN));
378       }
379     }
380     if (!Ddense) {
381       PetscCall(MatDestroy(&Dd));
382     }
383   } else {
384     PetscCall(MatScale(*S,-1.0));
385   }
386   PetscFunctionReturn(0);
387 }
388 
389 PetscErrorCode PCBDDCSubSchursSetUp(PCBDDCSubSchurs sub_schurs, Mat Ain, Mat Sin, PetscBool exact_schur, PetscInt xadj[], PetscInt adjncy[], PetscInt nlayers, Vec scaling, PetscBool compute_Stilda, PetscBool reuse_solvers, PetscBool benign_trick, PetscInt benign_n, PetscInt benign_p0_lidx[], IS benign_zerodiag_subs[], Mat change, IS change_primal)
390 {
391   Mat                    F,A_II,A_IB,A_BI,A_BB,AE_II;
392   Mat                    S_all;
393   Vec                    gstash,lstash;
394   VecScatter             sstash;
395   IS                     is_I,is_I_layer;
396   IS                     all_subsets,all_subsets_mult,all_subsets_n;
397   PetscScalar            *stasharray,*Bwork;
398   PetscInt               *nnz,*all_local_idx_N;
399   PetscInt               *auxnum1,*auxnum2;
400   PetscInt               i,subset_size,max_subset_size;
401   PetscInt               n_B,extra,local_size,global_size;
402   PetscInt               local_stash_size;
403   PetscBLASInt           B_N,B_ierr,B_lwork,*pivots;
404   MPI_Comm               comm_n;
405   PetscBool              deluxe = PETSC_TRUE;
406   PetscBool              use_potr = PETSC_FALSE, use_sytr = PETSC_FALSE;
407   PetscViewer            matl_dbg_viewer = NULL;
408   PetscErrorCode         ierr;
409   PetscBool              flg;
410 
411   PetscFunctionBegin;
412   PetscCall(MatDestroy(&sub_schurs->A));
413   PetscCall(MatDestroy(&sub_schurs->S));
414   if (Ain) {
415     PetscCall(PetscObjectReference((PetscObject)Ain));
416     sub_schurs->A = Ain;
417   }
418 
419   PetscCall(PetscObjectReference((PetscObject)Sin));
420   sub_schurs->S = Sin;
421   if (sub_schurs->schur_explicit) {
422     sub_schurs->schur_explicit = (PetscBool)(!!sub_schurs->A);
423   }
424 
425   /* preliminary checks */
426   PetscCheck(sub_schurs->schur_explicit || !compute_Stilda,PetscObjectComm((PetscObject)sub_schurs->l2gmap),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS and/or MKL_PARDISO");
427 
428   if (benign_trick) sub_schurs->is_posdef = PETSC_FALSE;
429 
430   /* debug (MATLAB) */
431   if (sub_schurs->debug) {
432     PetscMPIInt size,rank;
433     PetscInt    nr,*print_schurs_ranks,print_schurs = PETSC_FALSE;
434 
435     PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sub_schurs->l2gmap),&size));
436     PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sub_schurs->l2gmap),&rank));
437     nr   = size;
438     PetscCall(PetscMalloc1(nr,&print_schurs_ranks));
439     ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)sub_schurs->l2gmap),sub_schurs->prefix,"BDDC sub_schurs options","PC");PetscCall(ierr);
440     PetscCall(PetscOptionsIntArray("-sub_schurs_debug_ranks","Ranks to debug (all if the option is not used)",NULL,print_schurs_ranks,&nr,&flg));
441     if (!flg) print_schurs = PETSC_TRUE;
442     else {
443       print_schurs = PETSC_FALSE;
444       for (i=0;i<nr;i++) if (print_schurs_ranks[i] == (PetscInt)rank) { print_schurs = PETSC_TRUE; break; }
445     }
446     ierr = PetscOptionsEnd();PetscCall(ierr);
447     PetscCall(PetscFree(print_schurs_ranks));
448     if (print_schurs) {
449       char filename[256];
450 
451       PetscCall(PetscSNPrintf(filename,sizeof(filename),"sub_schurs_Schur_r%d.m",PetscGlobalRank));
452       PetscCall(PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&matl_dbg_viewer));
453       PetscCall(PetscViewerPushFormat(matl_dbg_viewer,PETSC_VIEWER_ASCII_MATLAB));
454     }
455   }
456 
457   /* restrict work on active processes */
458   if (sub_schurs->restrict_comm) {
459     PetscSubcomm subcomm;
460     PetscMPIInt  color,rank;
461 
462     color = 0;
463     if (!sub_schurs->n_subs) color = 1; /* this can happen if we are in a multilevel case or if the subdomain is disconnected */
464     PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sub_schurs->l2gmap),&rank));
465     PetscCall(PetscSubcommCreate(PetscObjectComm((PetscObject)sub_schurs->l2gmap),&subcomm));
466     PetscCall(PetscSubcommSetNumber(subcomm,2));
467     PetscCall(PetscSubcommSetTypeGeneral(subcomm,color,rank));
468     PetscCall(PetscCommDuplicate(PetscSubcommChild(subcomm),&comm_n,NULL));
469     PetscCall(PetscSubcommDestroy(&subcomm));
470     if (!sub_schurs->n_subs) {
471       PetscCall(PetscCommDestroy(&comm_n));
472       PetscFunctionReturn(0);
473     }
474   } else {
475     PetscCall(PetscCommDuplicate(PetscObjectComm((PetscObject)sub_schurs->l2gmap),&comm_n,NULL));
476   }
477 
478   /* get Schur complement matrices */
479   if (!sub_schurs->schur_explicit) {
480     Mat       tA_IB,tA_BI,tA_BB;
481     PetscBool isseqsbaij;
482     PetscCall(MatSchurComplementGetSubMatrices(sub_schurs->S,&A_II,NULL,&tA_IB,&tA_BI,&tA_BB));
483     PetscCall(PetscObjectTypeCompare((PetscObject)tA_BB,MATSEQSBAIJ,&isseqsbaij));
484     if (isseqsbaij) {
485       PetscCall(MatConvert(tA_BB,MATSEQAIJ,MAT_INITIAL_MATRIX,&A_BB));
486       PetscCall(MatConvert(tA_IB,MATSEQAIJ,MAT_INITIAL_MATRIX,&A_IB));
487       PetscCall(MatConvert(tA_BI,MATSEQAIJ,MAT_INITIAL_MATRIX,&A_BI));
488     } else {
489       PetscCall(PetscObjectReference((PetscObject)tA_BB));
490       A_BB = tA_BB;
491       PetscCall(PetscObjectReference((PetscObject)tA_IB));
492       A_IB = tA_IB;
493       PetscCall(PetscObjectReference((PetscObject)tA_BI));
494       A_BI = tA_BI;
495     }
496   } else {
497     A_II = NULL;
498     A_IB = NULL;
499     A_BI = NULL;
500     A_BB = NULL;
501   }
502   S_all = NULL;
503 
504   /* determine interior problems */
505   PetscCall(ISGetLocalSize(sub_schurs->is_I,&i));
506   if (nlayers >= 0 && i) { /* Interior problems can be different from the original one */
507     PetscBT                touched;
508     const PetscInt*        idx_B;
509     PetscInt               n_I,n_B,n_local_dofs,n_prev_added,j,layer,*local_numbering;
510 
511     PetscCheck(xadj,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot request layering without adjacency");
512     /* get sizes */
513     PetscCall(ISGetLocalSize(sub_schurs->is_I,&n_I));
514     PetscCall(ISGetLocalSize(sub_schurs->is_B,&n_B));
515 
516     PetscCall(PetscMalloc1(n_I+n_B,&local_numbering));
517     PetscCall(PetscBTCreate(n_I+n_B,&touched));
518     PetscCall(PetscBTMemzero(n_I+n_B,touched));
519 
520     /* all boundary dofs must be skipped when adding layers */
521     PetscCall(ISGetIndices(sub_schurs->is_B,&idx_B));
522     for (j=0;j<n_B;j++) {
523       PetscCall(PetscBTSet(touched,idx_B[j]));
524     }
525     PetscCall(PetscArraycpy(local_numbering,idx_B,n_B));
526     PetscCall(ISRestoreIndices(sub_schurs->is_B,&idx_B));
527 
528     /* add prescribed number of layers of dofs */
529     n_local_dofs = n_B;
530     n_prev_added = n_B;
531     for (layer=0;layer<nlayers;layer++) {
532       PetscInt n_added = 0;
533       if (n_local_dofs == n_I+n_B) break;
534       PetscCheckFalse(n_local_dofs > n_I+n_B,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error querying layer %D. Out of bound access (%D > %D)",layer,n_local_dofs,n_I+n_B);
535       PetscCall(PCBDDCAdjGetNextLayer_Private(local_numbering+n_local_dofs,n_prev_added,touched,xadj,adjncy,&n_added));
536       n_prev_added = n_added;
537       n_local_dofs += n_added;
538       if (!n_added) break;
539     }
540     PetscCall(PetscBTDestroy(&touched));
541 
542     /* IS for I layer dofs in original numbering */
543     PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)sub_schurs->is_I),n_local_dofs-n_B,local_numbering+n_B,PETSC_COPY_VALUES,&is_I_layer));
544     PetscCall(PetscFree(local_numbering));
545     PetscCall(ISSort(is_I_layer));
546     /* IS for I layer dofs in I numbering */
547     if (!sub_schurs->schur_explicit) {
548       ISLocalToGlobalMapping ItoNmap;
549       PetscCall(ISLocalToGlobalMappingCreateIS(sub_schurs->is_I,&ItoNmap));
550       PetscCall(ISGlobalToLocalMappingApplyIS(ItoNmap,IS_GTOLM_DROP,is_I_layer,&is_I));
551       PetscCall(ISLocalToGlobalMappingDestroy(&ItoNmap));
552 
553       /* II block */
554       PetscCall(MatCreateSubMatrix(A_II,is_I,is_I,MAT_INITIAL_MATRIX,&AE_II));
555     }
556   } else {
557     PetscInt n_I;
558 
559     /* IS for I dofs in original numbering */
560     PetscCall(PetscObjectReference((PetscObject)sub_schurs->is_I));
561     is_I_layer = sub_schurs->is_I;
562 
563     /* IS for I dofs in I numbering (strided 1) */
564     if (!sub_schurs->schur_explicit) {
565       PetscCall(ISGetSize(sub_schurs->is_I,&n_I));
566       PetscCall(ISCreateStride(PetscObjectComm((PetscObject)sub_schurs->is_I),n_I,0,1,&is_I));
567 
568       /* II block is the same */
569       PetscCall(PetscObjectReference((PetscObject)A_II));
570       AE_II = A_II;
571     }
572   }
573 
574   /* Get info on subset sizes and sum of all subsets sizes */
575   max_subset_size = 0;
576   local_size = 0;
577   for (i=0;i<sub_schurs->n_subs;i++) {
578     PetscCall(ISGetLocalSize(sub_schurs->is_subs[i],&subset_size));
579     max_subset_size = PetscMax(subset_size,max_subset_size);
580     local_size += subset_size;
581   }
582 
583   /* Work arrays for local indices */
584   extra = 0;
585   PetscCall(ISGetLocalSize(sub_schurs->is_B,&n_B));
586   if (sub_schurs->schur_explicit && is_I_layer) {
587     PetscCall(ISGetLocalSize(is_I_layer,&extra));
588   }
589   PetscCall(PetscMalloc1(n_B+extra,&all_local_idx_N));
590   if (extra) {
591     const PetscInt *idxs;
592     PetscCall(ISGetIndices(is_I_layer,&idxs));
593     PetscCall(PetscArraycpy(all_local_idx_N,idxs,extra));
594     PetscCall(ISRestoreIndices(is_I_layer,&idxs));
595   }
596   PetscCall(PetscMalloc1(sub_schurs->n_subs,&auxnum1));
597   PetscCall(PetscMalloc1(sub_schurs->n_subs,&auxnum2));
598 
599   /* Get local indices in local numbering */
600   local_size = 0;
601   local_stash_size = 0;
602   for (i=0;i<sub_schurs->n_subs;i++) {
603     const PetscInt *idxs;
604 
605     PetscCall(ISGetLocalSize(sub_schurs->is_subs[i],&subset_size));
606     PetscCall(ISGetIndices(sub_schurs->is_subs[i],&idxs));
607     /* start (smallest in global ordering) and multiplicity */
608     auxnum1[i] = idxs[0];
609     auxnum2[i] = subset_size*subset_size;
610     /* subset indices in local numbering */
611     PetscCall(PetscArraycpy(all_local_idx_N+local_size+extra,idxs,subset_size));
612     PetscCall(ISRestoreIndices(sub_schurs->is_subs[i],&idxs));
613     local_size += subset_size;
614     local_stash_size += subset_size*subset_size;
615   }
616 
617   /* allocate extra workspace needed only for GETRI or SYTRF */
618   use_potr = use_sytr = PETSC_FALSE;
619   if (benign_trick || (sub_schurs->is_hermitian && sub_schurs->is_posdef)) {
620     use_potr = PETSC_TRUE;
621   } else if (sub_schurs->is_symmetric) {
622     use_sytr = PETSC_TRUE;
623   }
624   if (local_size && !use_potr) {
625     PetscScalar  lwork,dummyscalar = 0.;
626     PetscBLASInt dummyint = 0;
627 
628     B_lwork = -1;
629     PetscCall(PetscBLASIntCast(local_size,&B_N));
630     PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
631     if (use_sytr) {
632       PetscStackCallBLAS("LAPACKsytrf",LAPACKsytrf_("L",&B_N,&dummyscalar,&B_N,&dummyint,&lwork,&B_lwork,&B_ierr));
633       PetscCheck(!B_ierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYTRF Lapack routine %d",(int)B_ierr);
634     } else {
635       PetscStackCallBLAS("LAPACKgetri",LAPACKgetri_(&B_N,&dummyscalar,&B_N,&dummyint,&lwork,&B_lwork,&B_ierr));
636       PetscCheck(!B_ierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GETRI Lapack routine %d",(int)B_ierr);
637     }
638     PetscCall(PetscFPTrapPop());
639     PetscCall(PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork));
640     PetscCall(PetscMalloc2(B_lwork,&Bwork,B_N,&pivots));
641   } else {
642     Bwork = NULL;
643     pivots = NULL;
644   }
645 
646   /* prepare data for summing up properly schurs on subsets */
647   PetscCall(ISCreateGeneral(comm_n,sub_schurs->n_subs,auxnum1,PETSC_OWN_POINTER,&all_subsets_n));
648   PetscCall(ISLocalToGlobalMappingApplyIS(sub_schurs->l2gmap,all_subsets_n,&all_subsets));
649   PetscCall(ISDestroy(&all_subsets_n));
650   PetscCall(ISCreateGeneral(comm_n,sub_schurs->n_subs,auxnum2,PETSC_OWN_POINTER,&all_subsets_mult));
651   PetscCall(ISRenumber(all_subsets,all_subsets_mult,&global_size,&all_subsets_n));
652   PetscCall(ISDestroy(&all_subsets));
653   PetscCall(ISDestroy(&all_subsets_mult));
654   PetscCall(ISGetLocalSize(all_subsets_n,&i));
655   PetscCheckFalse(i != local_stash_size,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid size of new subset! %D != %D",i,local_stash_size);
656   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,local_stash_size,NULL,&lstash));
657   PetscCall(VecCreateMPI(comm_n,PETSC_DECIDE,global_size,&gstash));
658   PetscCall(VecScatterCreate(lstash,NULL,gstash,all_subsets_n,&sstash));
659   PetscCall(ISDestroy(&all_subsets_n));
660 
661   /* subset indices in local boundary numbering */
662   if (!sub_schurs->is_Ej_all) {
663     PetscInt *all_local_idx_B;
664 
665     PetscCall(PetscMalloc1(local_size,&all_local_idx_B));
666     PetscCall(ISGlobalToLocalMappingApply(sub_schurs->BtoNmap,IS_GTOLM_DROP,local_size,all_local_idx_N+extra,&subset_size,all_local_idx_B));
667     PetscCheckFalse(subset_size != local_size,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in sub_schurs serial (BtoNmap)! %D != %D",subset_size,local_size);
668     PetscCall(ISCreateGeneral(PETSC_COMM_SELF,local_size,all_local_idx_B,PETSC_OWN_POINTER,&sub_schurs->is_Ej_all));
669   }
670 
671   if (change) {
672     ISLocalToGlobalMapping BtoS;
673     IS                     change_primal_B;
674     IS                     change_primal_all;
675 
676     PetscCheck(!sub_schurs->change_primal_sub,PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen");
677     PetscCheck(!sub_schurs->change,PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen");
678     PetscCall(PetscMalloc1(sub_schurs->n_subs,&sub_schurs->change_primal_sub));
679     for (i=0;i<sub_schurs->n_subs;i++) {
680       ISLocalToGlobalMapping NtoS;
681       PetscCall(ISLocalToGlobalMappingCreateIS(sub_schurs->is_subs[i],&NtoS));
682       PetscCall(ISGlobalToLocalMappingApplyIS(NtoS,IS_GTOLM_DROP,change_primal,&sub_schurs->change_primal_sub[i]));
683       PetscCall(ISLocalToGlobalMappingDestroy(&NtoS));
684     }
685     PetscCall(ISGlobalToLocalMappingApplyIS(sub_schurs->BtoNmap,IS_GTOLM_DROP,change_primal,&change_primal_B));
686     PetscCall(ISLocalToGlobalMappingCreateIS(sub_schurs->is_Ej_all,&BtoS));
687     PetscCall(ISGlobalToLocalMappingApplyIS(BtoS,IS_GTOLM_DROP,change_primal_B,&change_primal_all));
688     PetscCall(ISLocalToGlobalMappingDestroy(&BtoS));
689     PetscCall(ISDestroy(&change_primal_B));
690     PetscCall(PetscMalloc1(sub_schurs->n_subs,&sub_schurs->change));
691     for (i=0;i<sub_schurs->n_subs;i++) {
692       Mat change_sub;
693 
694       PetscCall(ISGetLocalSize(sub_schurs->is_subs[i],&subset_size));
695       PetscCall(KSPCreate(PETSC_COMM_SELF,&sub_schurs->change[i]));
696       PetscCall(KSPSetType(sub_schurs->change[i],KSPPREONLY));
697       if (!sub_schurs->change_with_qr) {
698         PetscCall(MatCreateSubMatrix(change,sub_schurs->is_subs[i],sub_schurs->is_subs[i],MAT_INITIAL_MATRIX,&change_sub));
699       } else {
700         Mat change_subt;
701         PetscCall(MatCreateSubMatrix(change,sub_schurs->is_subs[i],sub_schurs->is_subs[i],MAT_INITIAL_MATRIX,&change_subt));
702         PetscCall(MatConvert(change_subt,MATSEQDENSE,MAT_INITIAL_MATRIX,&change_sub));
703         PetscCall(MatDestroy(&change_subt));
704       }
705       PetscCall(KSPSetOperators(sub_schurs->change[i],change_sub,change_sub));
706       PetscCall(MatDestroy(&change_sub));
707       PetscCall(KSPSetOptionsPrefix(sub_schurs->change[i],sub_schurs->prefix));
708       PetscCall(KSPAppendOptionsPrefix(sub_schurs->change[i],"sub_schurs_change_"));
709     }
710     PetscCall(ISDestroy(&change_primal_all));
711   }
712 
713   /* Local matrix of all local Schur on subsets (transposed) */
714   if (!sub_schurs->S_Ej_all) {
715     Mat         T;
716     PetscScalar *v;
717     PetscInt    *ii,*jj;
718     PetscInt    cum,i,j,k;
719 
720     /* MatSeqAIJSetPreallocation + MatSetValues is slow for these kind of matrices (may have large blocks)
721        Allocate properly a representative matrix and duplicate */
722     PetscCall(PetscMalloc3(local_size+1,&ii,local_stash_size,&jj,local_stash_size,&v));
723     ii[0] = 0;
724     cum   = 0;
725     for (i=0;i<sub_schurs->n_subs;i++) {
726       PetscCall(ISGetLocalSize(sub_schurs->is_subs[i],&subset_size));
727       for (j=0;j<subset_size;j++) {
728         const PetscInt row = cum+j;
729         PetscInt col = cum;
730 
731         ii[row+1] = ii[row] + subset_size;
732         for (k=ii[row];k<ii[row+1];k++) {
733           jj[k] = col;
734           col++;
735         }
736       }
737       cum += subset_size;
738     }
739     PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,local_size,local_size,ii,jj,v,&T));
740     PetscCall(MatDuplicate(T,MAT_DO_NOT_COPY_VALUES,&sub_schurs->S_Ej_all));
741     PetscCall(MatDestroy(&T));
742     PetscCall(PetscFree3(ii,jj,v));
743   }
744   /* matrices for deluxe scaling and adaptive selection */
745   if (compute_Stilda) {
746     if (!sub_schurs->sum_S_Ej_tilda_all) {
747       PetscCall(MatDuplicate(sub_schurs->S_Ej_all,MAT_DO_NOT_COPY_VALUES,&sub_schurs->sum_S_Ej_tilda_all));
748     }
749     if (!sub_schurs->sum_S_Ej_inv_all && deluxe) {
750       PetscCall(MatDuplicate(sub_schurs->S_Ej_all,MAT_DO_NOT_COPY_VALUES,&sub_schurs->sum_S_Ej_inv_all));
751     }
752   }
753 
754   /* Compute Schur complements explicitly */
755   F = NULL;
756   if (!sub_schurs->schur_explicit) {
757     /* this code branch is used when MatFactor with Schur complement support is not present or when explicitly requested;
758        it is not efficient, unless the economic version of the scaling is used */
759     Mat         S_Ej_expl;
760     PetscScalar *work;
761     PetscInt    j,*dummy_idx;
762     PetscBool   Sdense;
763 
764     PetscCall(PetscMalloc2(max_subset_size,&dummy_idx,max_subset_size*max_subset_size,&work));
765     local_size = 0;
766     for (i=0;i<sub_schurs->n_subs;i++) {
767       IS  is_subset_B;
768       Mat AE_EE,AE_IE,AE_EI,S_Ej;
769 
770       /* subsets in original and boundary numbering */
771       PetscCall(ISGlobalToLocalMappingApplyIS(sub_schurs->BtoNmap,IS_GTOLM_DROP,sub_schurs->is_subs[i],&is_subset_B));
772       /* EE block */
773       PetscCall(MatCreateSubMatrix(A_BB,is_subset_B,is_subset_B,MAT_INITIAL_MATRIX,&AE_EE));
774       /* IE block */
775       PetscCall(MatCreateSubMatrix(A_IB,is_I,is_subset_B,MAT_INITIAL_MATRIX,&AE_IE));
776       /* EI block */
777       if (sub_schurs->is_symmetric) {
778         PetscCall(MatCreateTranspose(AE_IE,&AE_EI));
779       } else if (sub_schurs->is_hermitian) {
780         PetscCall(MatCreateHermitianTranspose(AE_IE,&AE_EI));
781       } else {
782         PetscCall(MatCreateSubMatrix(A_BI,is_subset_B,is_I,MAT_INITIAL_MATRIX,&AE_EI));
783       }
784       PetscCall(ISDestroy(&is_subset_B));
785       PetscCall(MatCreateSchurComplement(AE_II,AE_II,AE_IE,AE_EI,AE_EE,&S_Ej));
786       PetscCall(MatDestroy(&AE_EE));
787       PetscCall(MatDestroy(&AE_IE));
788       PetscCall(MatDestroy(&AE_EI));
789       if (AE_II == A_II) { /* we can reuse the same ksp */
790         KSP ksp;
791         PetscCall(MatSchurComplementGetKSP(sub_schurs->S,&ksp));
792         PetscCall(MatSchurComplementSetKSP(S_Ej,ksp));
793       } else { /* build new ksp object which inherits ksp and pc types from the original one */
794         KSP       origksp,schurksp;
795         PC        origpc,schurpc;
796         KSPType   ksp_type;
797         PetscInt  n_internal;
798         PetscBool ispcnone;
799 
800         PetscCall(MatSchurComplementGetKSP(sub_schurs->S,&origksp));
801         PetscCall(MatSchurComplementGetKSP(S_Ej,&schurksp));
802         PetscCall(KSPGetType(origksp,&ksp_type));
803         PetscCall(KSPSetType(schurksp,ksp_type));
804         PetscCall(KSPGetPC(schurksp,&schurpc));
805         PetscCall(KSPGetPC(origksp,&origpc));
806         PetscCall(PetscObjectTypeCompare((PetscObject)origpc,PCNONE,&ispcnone));
807         if (!ispcnone) {
808           PCType pc_type;
809           PetscCall(PCGetType(origpc,&pc_type));
810           PetscCall(PCSetType(schurpc,pc_type));
811         } else {
812           PetscCall(PCSetType(schurpc,PCLU));
813         }
814         PetscCall(ISGetSize(is_I,&n_internal));
815         if (!n_internal) { /* UMFPACK gives error with 0 sized problems */
816           MatSolverType solver = NULL;
817           PetscCall(PCFactorGetMatSolverType(origpc,(MatSolverType*)&solver));
818           if (solver) {
819             PetscCall(PCFactorSetMatSolverType(schurpc,solver));
820           }
821         }
822         PetscCall(KSPSetUp(schurksp));
823       }
824       PetscCall(ISGetLocalSize(sub_schurs->is_subs[i],&subset_size));
825       PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,work,&S_Ej_expl));
826       PetscCall(PCBDDCComputeExplicitSchur(S_Ej,sub_schurs->is_symmetric,MAT_REUSE_MATRIX,&S_Ej_expl));
827       PetscCall(PetscObjectTypeCompare((PetscObject)S_Ej_expl,MATSEQDENSE,&Sdense));
828       if (Sdense) {
829         for (j=0;j<subset_size;j++) {
830           dummy_idx[j]=local_size+j;
831         }
832         PetscCall(MatSetValues(sub_schurs->S_Ej_all,subset_size,dummy_idx,subset_size,dummy_idx,work,INSERT_VALUES));
833       } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented for sparse matrices");
834       PetscCall(MatDestroy(&S_Ej));
835       PetscCall(MatDestroy(&S_Ej_expl));
836       local_size += subset_size;
837     }
838     PetscCall(PetscFree2(dummy_idx,work));
839     /* free */
840     PetscCall(ISDestroy(&is_I));
841     PetscCall(MatDestroy(&AE_II));
842     PetscCall(PetscFree(all_local_idx_N));
843   } else {
844     Mat               A,cs_AIB_mat = NULL,benign_AIIm1_ones_mat = NULL;
845     Vec               Dall = NULL;
846     IS                is_A_all,*is_p_r = NULL;
847     MatType           Stype;
848     PetscScalar       *work,*S_data,*schur_factor,infty = PETSC_MAX_REAL;
849     PetscScalar       *SEj_arr = NULL,*SEjinv_arr = NULL;
850     const PetscScalar *rS_data;
851     PetscInt          n,n_I,size_schur,size_active_schur,cum,cum2;
852     PetscBool         economic,solver_S,S_lower_triangular = PETSC_FALSE;
853     PetscBool         schur_has_vertices,factor_workaround;
854     PetscBool         use_cholesky;
855 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
856     PetscBool         oldpin;
857 #endif
858 
859     /* get sizes */
860     n_I = 0;
861     if (is_I_layer) {
862       PetscCall(ISGetLocalSize(is_I_layer,&n_I));
863     }
864     economic = PETSC_FALSE;
865     PetscCall(ISGetLocalSize(sub_schurs->is_I,&cum));
866     if (cum != n_I) economic = PETSC_TRUE;
867     PetscCall(MatGetLocalSize(sub_schurs->A,&n,NULL));
868     size_active_schur = local_size;
869 
870     /* import scaling vector (wrong formulation if we have 3D edges) */
871     if (scaling && compute_Stilda) {
872       const PetscScalar *array;
873       PetscScalar       *array2;
874       const PetscInt    *idxs;
875       PetscInt          i;
876 
877       PetscCall(ISGetIndices(sub_schurs->is_Ej_all,&idxs));
878       PetscCall(VecCreateSeq(PETSC_COMM_SELF,size_active_schur,&Dall));
879       PetscCall(VecGetArrayRead(scaling,&array));
880       PetscCall(VecGetArray(Dall,&array2));
881       for (i=0;i<size_active_schur;i++) array2[i] = array[idxs[i]];
882       PetscCall(VecRestoreArray(Dall,&array2));
883       PetscCall(VecRestoreArrayRead(scaling,&array));
884       PetscCall(ISRestoreIndices(sub_schurs->is_Ej_all,&idxs));
885       deluxe = PETSC_FALSE;
886     }
887 
888     /* size active schurs does not count any dirichlet or vertex dof on the interface */
889     factor_workaround = PETSC_FALSE;
890     schur_has_vertices = PETSC_FALSE;
891     cum = n_I+size_active_schur;
892     if (sub_schurs->is_dir) {
893       const PetscInt* idxs;
894       PetscInt        n_dir;
895 
896       PetscCall(ISGetLocalSize(sub_schurs->is_dir,&n_dir));
897       PetscCall(ISGetIndices(sub_schurs->is_dir,&idxs));
898       PetscCall(PetscArraycpy(all_local_idx_N+cum,idxs,n_dir));
899       PetscCall(ISRestoreIndices(sub_schurs->is_dir,&idxs));
900       cum += n_dir;
901       factor_workaround = PETSC_TRUE;
902     }
903     /* include the primal vertices in the Schur complement */
904     if (exact_schur && sub_schurs->is_vertices && (compute_Stilda || benign_n)) {
905       PetscInt n_v;
906 
907       PetscCall(ISGetLocalSize(sub_schurs->is_vertices,&n_v));
908       if (n_v) {
909         const PetscInt* idxs;
910 
911         PetscCall(ISGetIndices(sub_schurs->is_vertices,&idxs));
912         PetscCall(PetscArraycpy(all_local_idx_N+cum,idxs,n_v));
913         PetscCall(ISRestoreIndices(sub_schurs->is_vertices,&idxs));
914         cum += n_v;
915         factor_workaround = PETSC_TRUE;
916         schur_has_vertices = PETSC_TRUE;
917       }
918     }
919     size_schur = cum - n_I;
920     PetscCall(ISCreateGeneral(PETSC_COMM_SELF,cum,all_local_idx_N,PETSC_OWN_POINTER,&is_A_all));
921 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
922     oldpin = sub_schurs->A->boundtocpu;
923     PetscCall(MatBindToCPU(sub_schurs->A,PETSC_TRUE));
924 #endif
925     if (cum == n) {
926       PetscCall(ISSetPermutation(is_A_all));
927       PetscCall(MatPermute(sub_schurs->A,is_A_all,is_A_all,&A));
928     } else {
929       PetscCall(MatCreateSubMatrix(sub_schurs->A,is_A_all,is_A_all,MAT_INITIAL_MATRIX,&A));
930     }
931 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
932     PetscCall(MatBindToCPU(sub_schurs->A,oldpin));
933 #endif
934     PetscCall(MatSetOptionsPrefix(A,sub_schurs->prefix));
935     PetscCall(MatAppendOptionsPrefix(A,"sub_schurs_"));
936 
937     /* if we actually change the basis for the pressures, LDL^T factors will use a lot of memory
938        this is a workaround */
939     if (benign_n) {
940       Vec                    v,benign_AIIm1_ones;
941       ISLocalToGlobalMapping N_to_reor;
942       IS                     is_p0,is_p0_p;
943       PetscScalar            *cs_AIB,*AIIm1_data;
944       PetscInt               sizeA;
945 
946       PetscCall(ISLocalToGlobalMappingCreateIS(is_A_all,&N_to_reor));
947       PetscCall(ISCreateGeneral(PETSC_COMM_SELF,benign_n,benign_p0_lidx,PETSC_COPY_VALUES,&is_p0));
948       PetscCall(ISGlobalToLocalMappingApplyIS(N_to_reor,IS_GTOLM_DROP,is_p0,&is_p0_p));
949       PetscCall(ISDestroy(&is_p0));
950       PetscCall(MatCreateVecs(A,&v,&benign_AIIm1_ones));
951       PetscCall(VecGetSize(v,&sizeA));
952       PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,sizeA,benign_n,NULL,&benign_AIIm1_ones_mat));
953       PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,size_schur,benign_n,NULL,&cs_AIB_mat));
954       PetscCall(MatDenseGetArray(cs_AIB_mat,&cs_AIB));
955       PetscCall(MatDenseGetArray(benign_AIIm1_ones_mat,&AIIm1_data));
956       PetscCall(PetscMalloc1(benign_n,&is_p_r));
957       /* compute colsum of A_IB restricted to pressures */
958       for (i=0;i<benign_n;i++) {
959         const PetscScalar *array;
960         const PetscInt    *idxs;
961         PetscInt          j,nz;
962 
963         PetscCall(ISGlobalToLocalMappingApplyIS(N_to_reor,IS_GTOLM_DROP,benign_zerodiag_subs[i],&is_p_r[i]));
964         PetscCall(ISGetLocalSize(is_p_r[i],&nz));
965         PetscCall(ISGetIndices(is_p_r[i],&idxs));
966         for (j=0;j<nz;j++) AIIm1_data[idxs[j]+sizeA*i] = 1.;
967         PetscCall(ISRestoreIndices(is_p_r[i],&idxs));
968         PetscCall(VecPlaceArray(benign_AIIm1_ones,AIIm1_data+sizeA*i));
969         PetscCall(MatMult(A,benign_AIIm1_ones,v));
970         PetscCall(VecResetArray(benign_AIIm1_ones));
971         PetscCall(VecGetArrayRead(v,&array));
972         for (j=0;j<size_schur;j++) {
973 #if defined(PETSC_USE_COMPLEX)
974           cs_AIB[i*size_schur + j] = (PetscRealPart(array[j+n_I])/nz + PETSC_i*(PetscImaginaryPart(array[j+n_I])/nz));
975 #else
976           cs_AIB[i*size_schur + j] = array[j+n_I]/nz;
977 #endif
978         }
979         PetscCall(VecRestoreArrayRead(v,&array));
980       }
981       PetscCall(MatDenseRestoreArray(cs_AIB_mat,&cs_AIB));
982       PetscCall(MatDenseRestoreArray(benign_AIIm1_ones_mat,&AIIm1_data));
983       PetscCall(VecDestroy(&v));
984       PetscCall(VecDestroy(&benign_AIIm1_ones));
985       PetscCall(MatSetOption(A,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE));
986       PetscCall(MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE));
987       PetscCall(MatSetOption(A,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE));
988       PetscCall(MatZeroRowsColumnsIS(A,is_p0_p,1.0,NULL,NULL));
989       PetscCall(ISDestroy(&is_p0_p));
990       PetscCall(ISLocalToGlobalMappingDestroy(&N_to_reor));
991     }
992     PetscCall(MatSetOption(A,MAT_SYMMETRIC,sub_schurs->is_symmetric));
993     PetscCall(MatSetOption(A,MAT_HERMITIAN,sub_schurs->is_hermitian));
994     PetscCall(MatSetOption(A,MAT_SPD,sub_schurs->is_posdef));
995 
996     /* for complexes, symmetric and hermitian at the same time implies null imaginary part */
997     use_cholesky = (PetscBool)((use_potr || use_sytr) && sub_schurs->is_hermitian && sub_schurs->is_symmetric);
998 
999     /* when using the benign subspace trick, the local Schur complements are SPD */
1000     /* MKL_PARDISO does not handle well the computation of a Schur complement from a symmetric indefinite factorization
1001        Use LU and adapt pivoting perturbation (still, solution is not as accurate as with using MUMPS) */
1002     if (benign_trick) {
1003       sub_schurs->is_posdef = PETSC_TRUE;
1004       PetscCall(PetscStrcmp(sub_schurs->mat_solver_type,MATSOLVERMKL_PARDISO,&flg));
1005       if (flg) use_cholesky = PETSC_FALSE;
1006     }
1007 
1008     if (n_I) {
1009       IS        is_schur;
1010       char      stype[64];
1011       PetscBool gpu = PETSC_FALSE;
1012 
1013       if (use_cholesky) {
1014         PetscCall(MatGetFactor(A,sub_schurs->mat_solver_type,MAT_FACTOR_CHOLESKY,&F));
1015       } else {
1016         PetscCall(MatGetFactor(A,sub_schurs->mat_solver_type,MAT_FACTOR_LU,&F));
1017       }
1018       PetscCall(MatSetErrorIfFailure(A,PETSC_TRUE));
1019 #if defined(PETSC_HAVE_MKL_PARDISO)
1020       if (benign_trick) PetscCall(MatMkl_PardisoSetCntl(F,10,10));
1021 #endif
1022       /* subsets ordered last */
1023       PetscCall(ISCreateStride(PETSC_COMM_SELF,size_schur,n_I,1,&is_schur));
1024       PetscCall(MatFactorSetSchurIS(F,is_schur));
1025       PetscCall(ISDestroy(&is_schur));
1026 
1027       /* factorization step */
1028       if (use_cholesky) {
1029         PetscCall(MatCholeskyFactorSymbolic(F,A,NULL,NULL));
1030 #if defined(PETSC_HAVE_MUMPS) /* be sure that icntl 19 is not set by command line */
1031         PetscCall(MatMumpsSetIcntl(F,19,2));
1032 #endif
1033         PetscCall(MatCholeskyFactorNumeric(F,A,NULL));
1034         S_lower_triangular = PETSC_TRUE;
1035       } else {
1036         PetscCall(MatLUFactorSymbolic(F,A,NULL,NULL,NULL));
1037 #if defined(PETSC_HAVE_MUMPS) /* be sure that icntl 19 is not set by command line */
1038         PetscCall(MatMumpsSetIcntl(F,19,3));
1039 #endif
1040         PetscCall(MatLUFactorNumeric(F,A,NULL));
1041       }
1042       PetscCall(MatViewFromOptions(F,(PetscObject)A,"-mat_factor_view"));
1043 
1044       if (matl_dbg_viewer) {
1045         Mat S;
1046         IS  is;
1047 
1048         PetscCall(PetscObjectSetName((PetscObject)A,"A"));
1049         PetscCall(MatView(A,matl_dbg_viewer));
1050         PetscCall(MatFactorCreateSchurComplement(F,&S,NULL));
1051         PetscCall(PetscObjectSetName((PetscObject)S,"S"));
1052         PetscCall(MatView(S,matl_dbg_viewer));
1053         PetscCall(MatDestroy(&S));
1054         PetscCall(ISCreateStride(PETSC_COMM_SELF,n_I,0,1,&is));
1055         PetscCall(PetscObjectSetName((PetscObject)is,"I"));
1056         PetscCall(ISView(is,matl_dbg_viewer));
1057         PetscCall(ISDestroy(&is));
1058         PetscCall(ISCreateStride(PETSC_COMM_SELF,size_schur,n_I,1,&is));
1059         PetscCall(PetscObjectSetName((PetscObject)is,"B"));
1060         PetscCall(ISView(is,matl_dbg_viewer));
1061         PetscCall(ISDestroy(&is));
1062         PetscCall(PetscObjectSetName((PetscObject)is_A_all,"IA"));
1063         PetscCall(ISView(is_A_all,matl_dbg_viewer));
1064       }
1065 
1066       /* get explicit Schur Complement computed during numeric factorization */
1067       PetscCall(MatFactorGetSchurComplement(F,&S_all,NULL));
1068       PetscCall(PetscStrncpy(stype,MATSEQDENSE,sizeof(stype)));
1069 #if defined(PETSC_HAVE_CUDA)
1070       PetscCall(PetscObjectTypeCompareAny((PetscObject)A,&gpu,MATSEQAIJVIENNACL,MATSEQAIJCUSPARSE,""));
1071 #endif
1072       if (gpu) {
1073         PetscCall(PetscStrncpy(stype,MATSEQDENSECUDA,sizeof(stype)));
1074       }
1075       PetscCall(PetscOptionsGetString(NULL,sub_schurs->prefix,"-sub_schurs_schur_mat_type",stype,sizeof(stype),NULL));
1076       PetscCall(MatConvert(S_all,stype,MAT_INPLACE_MATRIX,&S_all));
1077       PetscCall(MatSetOption(S_all,MAT_SPD,sub_schurs->is_posdef));
1078       PetscCall(MatSetOption(S_all,MAT_HERMITIAN,sub_schurs->is_hermitian));
1079       PetscCall(MatGetType(S_all,&Stype));
1080 
1081       /* we can reuse the solvers if we are not using the economic version */
1082       reuse_solvers = (PetscBool)(reuse_solvers && !economic);
1083       factor_workaround = (PetscBool)(reuse_solvers && factor_workaround);
1084       if (!sub_schurs->is_posdef && factor_workaround && compute_Stilda && size_active_schur)
1085         reuse_solvers = factor_workaround = PETSC_FALSE;
1086 
1087       solver_S = PETSC_TRUE;
1088 
1089       /* update the Schur complement with the change of basis on the pressures */
1090       if (benign_n) {
1091         const PetscScalar *cs_AIB;
1092         PetscScalar       *S_data,*AIIm1_data;
1093         Mat               S2 = NULL,S3 = NULL; /* dbg */
1094         PetscScalar       *S2_data,*S3_data; /* dbg */
1095         Vec               v,benign_AIIm1_ones;
1096         PetscInt          sizeA;
1097 
1098         PetscCall(MatDenseGetArray(S_all,&S_data));
1099         PetscCall(MatCreateVecs(A,&v,&benign_AIIm1_ones));
1100         PetscCall(VecGetSize(v,&sizeA));
1101 #if defined(PETSC_HAVE_MUMPS)
1102         PetscCall(MatMumpsSetIcntl(F,26,0));
1103 #endif
1104 #if defined(PETSC_HAVE_MKL_PARDISO)
1105         PetscCall(MatMkl_PardisoSetCntl(F,70,1));
1106 #endif
1107         PetscCall(MatDenseGetArrayRead(cs_AIB_mat,&cs_AIB));
1108         PetscCall(MatDenseGetArray(benign_AIIm1_ones_mat,&AIIm1_data));
1109         if (matl_dbg_viewer) {
1110           PetscCall(MatDuplicate(S_all,MAT_DO_NOT_COPY_VALUES,&S2));
1111           PetscCall(MatDuplicate(S_all,MAT_DO_NOT_COPY_VALUES,&S3));
1112           PetscCall(MatDenseGetArray(S2,&S2_data));
1113           PetscCall(MatDenseGetArray(S3,&S3_data));
1114         }
1115         for (i=0;i<benign_n;i++) {
1116           PetscScalar    *array,sum = 0.,one = 1.,*sums;
1117           const PetscInt *idxs;
1118           PetscInt       k,j,nz;
1119           PetscBLASInt   B_k,B_n;
1120 
1121           PetscCall(PetscCalloc1(benign_n,&sums));
1122           PetscCall(VecPlaceArray(benign_AIIm1_ones,AIIm1_data+sizeA*i));
1123           PetscCall(VecCopy(benign_AIIm1_ones,v));
1124           PetscCall(MatSolve(F,v,benign_AIIm1_ones));
1125           PetscCall(MatMult(A,benign_AIIm1_ones,v));
1126           PetscCall(VecResetArray(benign_AIIm1_ones));
1127           /* p0 dofs (eliminated) are excluded from the sums */
1128           for (k=0;k<benign_n;k++) {
1129             PetscCall(ISGetLocalSize(is_p_r[k],&nz));
1130             PetscCall(ISGetIndices(is_p_r[k],&idxs));
1131             for (j=0;j<nz-1;j++) sums[k] -= AIIm1_data[idxs[j]+sizeA*i];
1132             PetscCall(ISRestoreIndices(is_p_r[k],&idxs));
1133           }
1134           PetscCall(VecGetArrayRead(v,(const PetscScalar**)&array));
1135           if (matl_dbg_viewer) {
1136             Vec  vv;
1137             char name[16];
1138 
1139             PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,size_schur,array+n_I,&vv));
1140             PetscCall(PetscSNPrintf(name,sizeof(name),"Pvs%D",i));
1141             PetscCall(PetscObjectSetName((PetscObject)vv,name));
1142             PetscCall(VecView(vv,matl_dbg_viewer));
1143           }
1144           /* perform sparse rank updates on symmetric Schur (TODO: move outside of the loop?) */
1145           /* cs_AIB already scaled by 1./nz */
1146           B_k = 1;
1147           for (k=0;k<benign_n;k++) {
1148             sum  = sums[k];
1149             PetscCall(PetscBLASIntCast(size_schur,&B_n));
1150 
1151             if (PetscAbsScalar(sum) == 0.0) continue;
1152             if (k == i) {
1153               PetscStackCallBLAS("BLASsyrk",BLASsyrk_("L","N",&B_n,&B_k,&sum,cs_AIB+i*size_schur,&B_n,&one,S_data,&B_n));
1154               if (matl_dbg_viewer) {
1155                 PetscStackCallBLAS("BLASsyrk",BLASsyrk_("L","N",&B_n,&B_k,&sum,cs_AIB+i*size_schur,&B_n,&one,S3_data,&B_n));
1156               }
1157             } else { /* XXX Is it correct to use symmetric rank-2 update with half of the sum? */
1158               sum /= 2.0;
1159               PetscStackCallBLAS("BLASsyr2k",BLASsyr2k_("L","N",&B_n,&B_k,&sum,cs_AIB+k*size_schur,&B_n,cs_AIB+i*size_schur,&B_n,&one,S_data,&B_n));
1160               if (matl_dbg_viewer) {
1161                 PetscStackCallBLAS("BLASsyr2k",BLASsyr2k_("L","N",&B_n,&B_k,&sum,cs_AIB+k*size_schur,&B_n,cs_AIB+i*size_schur,&B_n,&one,S3_data,&B_n));
1162               }
1163             }
1164           }
1165           sum  = 1.;
1166           PetscStackCallBLAS("BLASsyr2k",BLASsyr2k_("L","N",&B_n,&B_k,&sum,array+n_I,&B_n,cs_AIB+i*size_schur,&B_n,&one,S_data,&B_n));
1167           if (matl_dbg_viewer) {
1168             PetscStackCallBLAS("BLASsyr2k",BLASsyr2k_("L","N",&B_n,&B_k,&sum,array+n_I,&B_n,cs_AIB+i*size_schur,&B_n,&one,S2_data,&B_n));
1169           }
1170           PetscCall(VecRestoreArrayRead(v,(const PetscScalar**)&array));
1171           /* set p0 entry of AIIm1_ones to zero */
1172           PetscCall(ISGetLocalSize(is_p_r[i],&nz));
1173           PetscCall(ISGetIndices(is_p_r[i],&idxs));
1174           for (j=0;j<benign_n;j++) AIIm1_data[idxs[nz-1]+sizeA*j] = 0.;
1175           PetscCall(ISRestoreIndices(is_p_r[i],&idxs));
1176           PetscCall(PetscFree(sums));
1177         }
1178         PetscCall(VecDestroy(&benign_AIIm1_ones));
1179         if (matl_dbg_viewer) {
1180           PetscCall(MatDenseRestoreArray(S2,&S2_data));
1181           PetscCall(MatDenseRestoreArray(S3,&S3_data));
1182         }
1183         if (!S_lower_triangular) { /* I need to expand the upper triangular data (column oriented) */
1184           PetscInt k,j;
1185           for (k=0;k<size_schur;k++) {
1186             for (j=k;j<size_schur;j++) {
1187               S_data[j*size_schur+k] = PetscConj(S_data[k*size_schur+j]);
1188             }
1189           }
1190         }
1191 
1192         /* restore defaults */
1193 #if defined(PETSC_HAVE_MUMPS)
1194         PetscCall(MatMumpsSetIcntl(F,26,-1));
1195 #endif
1196 #if defined(PETSC_HAVE_MKL_PARDISO)
1197         PetscCall(MatMkl_PardisoSetCntl(F,70,0));
1198 #endif
1199         PetscCall(MatDenseRestoreArrayRead(cs_AIB_mat,&cs_AIB));
1200         PetscCall(MatDenseRestoreArray(benign_AIIm1_ones_mat,&AIIm1_data));
1201         PetscCall(VecDestroy(&v));
1202         PetscCall(MatDenseRestoreArray(S_all,&S_data));
1203         if (matl_dbg_viewer) {
1204           Mat S;
1205 
1206           PetscCall(MatFactorRestoreSchurComplement(F,&S_all,MAT_FACTOR_SCHUR_UNFACTORED));
1207           PetscCall(MatFactorCreateSchurComplement(F,&S,NULL));
1208           PetscCall(PetscObjectSetName((PetscObject)S,"Sb"));
1209           PetscCall(MatView(S,matl_dbg_viewer));
1210           PetscCall(MatDestroy(&S));
1211           PetscCall(PetscObjectSetName((PetscObject)S2,"S2P"));
1212           PetscCall(MatView(S2,matl_dbg_viewer));
1213           PetscCall(PetscObjectSetName((PetscObject)S3,"S3P"));
1214           PetscCall(MatView(S3,matl_dbg_viewer));
1215           PetscCall(PetscObjectSetName((PetscObject)cs_AIB_mat,"cs"));
1216           PetscCall(MatView(cs_AIB_mat,matl_dbg_viewer));
1217           PetscCall(MatFactorGetSchurComplement(F,&S_all,NULL));
1218         }
1219         PetscCall(MatDestroy(&S2));
1220         PetscCall(MatDestroy(&S3));
1221       }
1222       if (!reuse_solvers) {
1223         for (i=0;i<benign_n;i++) {
1224           PetscCall(ISDestroy(&is_p_r[i]));
1225         }
1226         PetscCall(PetscFree(is_p_r));
1227         PetscCall(MatDestroy(&cs_AIB_mat));
1228         PetscCall(MatDestroy(&benign_AIIm1_ones_mat));
1229       }
1230     } else { /* we can't use MatFactor when size_schur == size_of_the_problem */
1231       PetscCall(MatConvert(A,MATSEQDENSE,MAT_INITIAL_MATRIX,&S_all));
1232       PetscCall(MatGetType(S_all,&Stype));
1233       reuse_solvers = PETSC_FALSE; /* TODO: why we can't reuse the solvers here? */
1234       factor_workaround = PETSC_FALSE;
1235       solver_S = PETSC_FALSE;
1236     }
1237 
1238     if (reuse_solvers) {
1239       Mat                A_II,Afake;
1240       Vec                vec1_B;
1241       PCBDDCReuseSolvers msolv_ctx;
1242       PetscInt           n_R;
1243 
1244       if (sub_schurs->reuse_solver) {
1245         PetscCall(PCBDDCReuseSolversReset(sub_schurs->reuse_solver));
1246       } else {
1247         PetscCall(PetscNew(&sub_schurs->reuse_solver));
1248       }
1249       msolv_ctx = sub_schurs->reuse_solver;
1250       PetscCall(MatSchurComplementGetSubMatrices(sub_schurs->S,&A_II,NULL,NULL,NULL,NULL));
1251       PetscCall(PetscObjectReference((PetscObject)F));
1252       msolv_ctx->F = F;
1253       PetscCall(MatCreateVecs(F,&msolv_ctx->sol,NULL));
1254       /* currently PETSc has no support for MatSolve(F,x,x), so cheat and let rhs and sol share the same memory */
1255       {
1256         PetscScalar *array;
1257         PetscInt    n;
1258 
1259         PetscCall(VecGetLocalSize(msolv_ctx->sol,&n));
1260         PetscCall(VecGetArray(msolv_ctx->sol,&array));
1261         PetscCall(VecCreateSeqWithArray(PetscObjectComm((PetscObject)msolv_ctx->sol),1,n,array,&msolv_ctx->rhs));
1262         PetscCall(VecRestoreArray(msolv_ctx->sol,&array));
1263       }
1264       msolv_ctx->has_vertices = schur_has_vertices;
1265 
1266       /* interior solver */
1267       PetscCall(PCCreate(PetscObjectComm((PetscObject)A_II),&msolv_ctx->interior_solver));
1268       PetscCall(PCSetOperators(msolv_ctx->interior_solver,A_II,A_II));
1269       PetscCall(PCSetType(msolv_ctx->interior_solver,PCSHELL));
1270       PetscCall(PCShellSetName(msolv_ctx->interior_solver,"Interior solver (w/o Schur factorization)"));
1271       PetscCall(PCShellSetContext(msolv_ctx->interior_solver,msolv_ctx));
1272       PetscCall(PCShellSetView(msolv_ctx->interior_solver,PCBDDCReuseSolvers_View));
1273       PetscCall(PCShellSetApply(msolv_ctx->interior_solver,PCBDDCReuseSolvers_Interior));
1274       PetscCall(PCShellSetApplyTranspose(msolv_ctx->interior_solver,PCBDDCReuseSolvers_InteriorTranspose));
1275 
1276       /* correction solver */
1277       PetscCall(PCCreate(PetscObjectComm((PetscObject)A_II),&msolv_ctx->correction_solver));
1278       PetscCall(PCSetType(msolv_ctx->correction_solver,PCSHELL));
1279       PetscCall(PCShellSetName(msolv_ctx->correction_solver,"Correction solver (with Schur factorization)"));
1280       PetscCall(PCShellSetContext(msolv_ctx->correction_solver,msolv_ctx));
1281       PetscCall(PCShellSetView(msolv_ctx->interior_solver,PCBDDCReuseSolvers_View));
1282       PetscCall(PCShellSetApply(msolv_ctx->correction_solver,PCBDDCReuseSolvers_Correction));
1283       PetscCall(PCShellSetApplyTranspose(msolv_ctx->correction_solver,PCBDDCReuseSolvers_CorrectionTranspose));
1284 
1285       /* scatter and vecs for Schur complement solver */
1286       PetscCall(MatCreateVecs(S_all,&msolv_ctx->sol_B,&msolv_ctx->rhs_B));
1287       PetscCall(MatCreateVecs(sub_schurs->S,&vec1_B,NULL));
1288       if (!schur_has_vertices) {
1289         PetscCall(ISGlobalToLocalMappingApplyIS(sub_schurs->BtoNmap,IS_GTOLM_DROP,is_A_all,&msolv_ctx->is_B));
1290         PetscCall(VecScatterCreate(vec1_B,msolv_ctx->is_B,msolv_ctx->sol_B,NULL,&msolv_ctx->correction_scatter_B));
1291         PetscCall(PetscObjectReference((PetscObject)is_A_all));
1292         msolv_ctx->is_R = is_A_all;
1293       } else {
1294         IS              is_B_all;
1295         const PetscInt* idxs;
1296         PetscInt        dual,n_v,n;
1297 
1298         PetscCall(ISGetLocalSize(sub_schurs->is_vertices,&n_v));
1299         dual = size_schur - n_v;
1300         PetscCall(ISGetLocalSize(is_A_all,&n));
1301         PetscCall(ISGetIndices(is_A_all,&idxs));
1302         PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)is_A_all),dual,idxs+n_I,PETSC_COPY_VALUES,&is_B_all));
1303         PetscCall(ISGlobalToLocalMappingApplyIS(sub_schurs->BtoNmap,IS_GTOLM_DROP,is_B_all,&msolv_ctx->is_B));
1304         PetscCall(ISDestroy(&is_B_all));
1305         PetscCall(ISCreateStride(PetscObjectComm((PetscObject)is_A_all),dual,0,1,&is_B_all));
1306         PetscCall(VecScatterCreate(vec1_B,msolv_ctx->is_B,msolv_ctx->sol_B,is_B_all,&msolv_ctx->correction_scatter_B));
1307         PetscCall(ISDestroy(&is_B_all));
1308         PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)is_A_all),n-n_v,idxs,PETSC_COPY_VALUES,&msolv_ctx->is_R));
1309         PetscCall(ISRestoreIndices(is_A_all,&idxs));
1310       }
1311       PetscCall(ISGetLocalSize(msolv_ctx->is_R,&n_R));
1312       PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF,n_R,n_R,0,NULL,&Afake));
1313       PetscCall(MatAssemblyBegin(Afake,MAT_FINAL_ASSEMBLY));
1314       PetscCall(MatAssemblyEnd(Afake,MAT_FINAL_ASSEMBLY));
1315       PetscCall(PCSetOperators(msolv_ctx->correction_solver,Afake,Afake));
1316       PetscCall(MatDestroy(&Afake));
1317       PetscCall(VecDestroy(&vec1_B));
1318 
1319       /* communicate benign info to solver context */
1320       if (benign_n) {
1321         PetscScalar *array;
1322 
1323         msolv_ctx->benign_n = benign_n;
1324         msolv_ctx->benign_zerodiag_subs = is_p_r;
1325         PetscCall(PetscMalloc1(benign_n,&msolv_ctx->benign_save_vals));
1326         msolv_ctx->benign_csAIB = cs_AIB_mat;
1327         PetscCall(MatCreateVecs(cs_AIB_mat,&msolv_ctx->benign_corr_work,NULL));
1328         PetscCall(VecGetArray(msolv_ctx->benign_corr_work,&array));
1329         PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,size_schur,array,&msolv_ctx->benign_dummy_schur_vec));
1330         PetscCall(VecRestoreArray(msolv_ctx->benign_corr_work,&array));
1331         msolv_ctx->benign_AIIm1ones = benign_AIIm1_ones_mat;
1332       }
1333     } else {
1334       if (sub_schurs->reuse_solver) {
1335         PetscCall(PCBDDCReuseSolversReset(sub_schurs->reuse_solver));
1336       }
1337       PetscCall(PetscFree(sub_schurs->reuse_solver));
1338     }
1339     PetscCall(MatDestroy(&A));
1340     PetscCall(ISDestroy(&is_A_all));
1341 
1342     /* Work arrays */
1343     PetscCall(PetscMalloc1(max_subset_size*max_subset_size,&work));
1344 
1345     /* S_Ej_all */
1346     cum = cum2 = 0;
1347     PetscCall(MatDenseGetArrayRead(S_all,&rS_data));
1348     PetscCall(MatSeqAIJGetArray(sub_schurs->S_Ej_all,&SEj_arr));
1349     if (compute_Stilda) {
1350       PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&SEjinv_arr));
1351     }
1352     for (i=0;i<sub_schurs->n_subs;i++) {
1353       PetscInt j;
1354 
1355       /* get S_E */
1356       PetscCall(ISGetLocalSize(sub_schurs->is_subs[i],&subset_size));
1357       if (S_lower_triangular) { /* I need to expand the upper triangular data (column oriented) */
1358         PetscInt k;
1359         for (k=0;k<subset_size;k++) {
1360           for (j=k;j<subset_size;j++) {
1361             work[k*subset_size+j] = rS_data[cum2+k*size_schur+j];
1362             work[j*subset_size+k] = PetscConj(rS_data[cum2+k*size_schur+j]);
1363           }
1364         }
1365       } else { /* just copy to workspace */
1366         PetscInt k;
1367         for (k=0;k<subset_size;k++) {
1368           for (j=0;j<subset_size;j++) {
1369             work[k*subset_size+j] = rS_data[cum2+k*size_schur+j];
1370           }
1371         }
1372       }
1373       /* insert S_E values */
1374       if (sub_schurs->change) {
1375         Mat change_sub,SEj,T;
1376 
1377         /* change basis */
1378         PetscCall(KSPGetOperators(sub_schurs->change[i],&change_sub,NULL));
1379         PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,work,&SEj));
1380         if (!sub_schurs->change_with_qr) { /* currently there's no support for PtAP with P SeqAIJ */
1381           Mat T2;
1382           PetscCall(MatTransposeMatMult(change_sub,SEj,MAT_INITIAL_MATRIX,1.0,&T2));
1383           PetscCall(MatMatMult(T2,change_sub,MAT_INITIAL_MATRIX,1.0,&T));
1384           PetscCall(MatConvert(T,MATSEQDENSE,MAT_INPLACE_MATRIX,&T));
1385           PetscCall(MatDestroy(&T2));
1386         } else {
1387           PetscCall(MatPtAP(SEj,change_sub,MAT_INITIAL_MATRIX,1.0,&T));
1388         }
1389         PetscCall(MatCopy(T,SEj,SAME_NONZERO_PATTERN));
1390         PetscCall(MatDestroy(&T));
1391         PetscCall(MatZeroRowsColumnsIS(SEj,sub_schurs->change_primal_sub[i],1.0,NULL,NULL));
1392         PetscCall(MatDestroy(&SEj));
1393       }
1394       if (deluxe) {
1395         PetscCall(PetscArraycpy(SEj_arr,work,subset_size*subset_size));
1396         /* if adaptivity is requested, invert S_E blocks */
1397         if (compute_Stilda) {
1398           Mat               M;
1399           const PetscScalar *vals;
1400           PetscBool         isdense,isdensecuda;
1401 
1402           PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,work,&M));
1403           PetscCall(MatSetOption(M,MAT_SPD,sub_schurs->is_posdef));
1404           PetscCall(MatSetOption(M,MAT_HERMITIAN,sub_schurs->is_hermitian));
1405           if (!PetscBTLookup(sub_schurs->is_edge,i)) {
1406             PetscCall(MatSetType(M,Stype));
1407           }
1408           PetscCall(PetscObjectTypeCompare((PetscObject)M,MATSEQDENSE,&isdense));
1409           PetscCall(PetscObjectTypeCompare((PetscObject)M,MATSEQDENSECUDA,&isdensecuda));
1410           if (use_cholesky) {
1411             PetscCall(MatCholeskyFactor(M,NULL,NULL));
1412           } else {
1413             PetscCall(MatLUFactor(M,NULL,NULL,NULL));
1414           }
1415           if (isdense) {
1416             PetscCall(MatSeqDenseInvertFactors_Private(M));
1417 #if defined(PETSC_HAVE_CUDA)
1418           } else if (isdensecuda) {
1419             PetscCall(MatSeqDenseCUDAInvertFactors_Private(M));
1420 #endif
1421           } else SETERRQ(PetscObjectComm((PetscObject)M),PETSC_ERR_SUP,"Not implemented for type %s",Stype);
1422           PetscCall(MatDenseGetArrayRead(M,&vals));
1423           PetscCall(PetscArraycpy(SEjinv_arr,vals,subset_size*subset_size));
1424           PetscCall(MatDenseRestoreArrayRead(M,&vals));
1425           PetscCall(MatDestroy(&M));
1426         }
1427       } else if (compute_Stilda) { /* not using deluxe */
1428         Mat         SEj;
1429         Vec         D;
1430         PetscScalar *array;
1431 
1432         PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,work,&SEj));
1433         PetscCall(VecGetArray(Dall,&array));
1434         PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,subset_size,array+cum,&D));
1435         PetscCall(VecRestoreArray(Dall,&array));
1436         PetscCall(VecShift(D,-1.));
1437         PetscCall(MatDiagonalScale(SEj,D,D));
1438         PetscCall(MatDestroy(&SEj));
1439         PetscCall(VecDestroy(&D));
1440         PetscCall(PetscArraycpy(SEj_arr,work,subset_size*subset_size));
1441       }
1442       cum += subset_size;
1443       cum2 += subset_size*(size_schur + 1);
1444       SEj_arr += subset_size*subset_size;
1445       if (SEjinv_arr) SEjinv_arr += subset_size*subset_size;
1446     }
1447     PetscCall(MatDenseRestoreArrayRead(S_all,&rS_data));
1448     PetscCall(MatSeqAIJRestoreArray(sub_schurs->S_Ej_all,&SEj_arr));
1449     if (compute_Stilda) {
1450       PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&SEjinv_arr));
1451     }
1452     if (solver_S) {
1453       PetscCall(MatFactorRestoreSchurComplement(F,&S_all,MAT_FACTOR_SCHUR_UNFACTORED));
1454     }
1455 
1456     /* may prevent from unneeded copies, since MUMPS or MKL_Pardiso always use CPU memory
1457        however, preliminary tests indicate using GPUs is still faster in the solve phase */
1458 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
1459     if (reuse_solvers) {
1460       Mat                  St;
1461       MatFactorSchurStatus st;
1462 
1463       flg  = PETSC_FALSE;
1464       PetscCall(PetscOptionsGetBool(NULL,sub_schurs->prefix,"-sub_schurs_schur_pin_to_cpu",&flg,NULL));
1465       PetscCall(MatFactorGetSchurComplement(F,&St,&st));
1466       PetscCall(MatBindToCPU(St,flg));
1467       PetscCall(MatFactorRestoreSchurComplement(F,&St,st));
1468     }
1469 #endif
1470 
1471     schur_factor = NULL;
1472     if (compute_Stilda && size_active_schur) {
1473 
1474       PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&SEjinv_arr));
1475       if (sub_schurs->n_subs == 1 && size_schur == size_active_schur && deluxe) { /* we already computed the inverse */
1476         PetscCall(PetscArraycpy(SEjinv_arr,work,size_schur*size_schur));
1477       } else {
1478         Mat S_all_inv=NULL;
1479 
1480         if (solver_S) {
1481           /* for adaptive selection we need S^-1; for solver reusage we need S_\Delta\Delta^-1.
1482              The latter is not the principal subminor for S^-1. However, the factors can be reused since S_\Delta\Delta is the leading principal submatrix of S */
1483           if (factor_workaround) {/* invert without calling MatFactorInvertSchurComplement, since we are hacking */
1484             PetscScalar *data;
1485             PetscInt     nd = 0;
1486 
1487             if (!use_potr) {
1488               SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Factor update not yet implemented for non SPD matrices");
1489             }
1490             PetscCall(MatFactorGetSchurComplement(F,&S_all_inv,NULL));
1491             PetscCall(MatDenseGetArray(S_all_inv,&data));
1492             if (sub_schurs->is_dir) { /* dirichlet dofs could have different scalings */
1493               PetscCall(ISGetLocalSize(sub_schurs->is_dir,&nd));
1494             }
1495 
1496             /* factor and invert activedofs and vertices (dirichlet dofs does not contribute) */
1497             if (schur_has_vertices) {
1498               Mat          M;
1499               PetscScalar *tdata;
1500               PetscInt     nv = 0, news;
1501 
1502               PetscCall(ISGetLocalSize(sub_schurs->is_vertices,&nv));
1503               news = size_active_schur + nv;
1504               PetscCall(PetscCalloc1(news*news,&tdata));
1505               for (i=0;i<size_active_schur;i++) {
1506                 PetscCall(PetscArraycpy(tdata+i*(news+1),data+i*(size_schur+1),size_active_schur-i));
1507                 PetscCall(PetscArraycpy(tdata+i*(news+1)+size_active_schur-i,data+i*size_schur+size_active_schur+nd,nv));
1508               }
1509               for (i=0;i<nv;i++) {
1510                 PetscInt k = i+size_active_schur;
1511                 PetscCall(PetscArraycpy(tdata+k*(news+1),data+(k+nd)*(size_schur+1),nv-i));
1512               }
1513 
1514               PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,news,news,tdata,&M));
1515               PetscCall(MatSetOption(M,MAT_SPD,PETSC_TRUE));
1516               PetscCall(MatCholeskyFactor(M,NULL,NULL));
1517               /* save the factors */
1518               cum = 0;
1519               PetscCall(PetscMalloc1((size_active_schur*(size_active_schur +1))/2+nd,&schur_factor));
1520               for (i=0;i<size_active_schur;i++) {
1521                 PetscCall(PetscArraycpy(schur_factor+cum,tdata+i*(news+1),size_active_schur-i));
1522                 cum += size_active_schur - i;
1523               }
1524               for (i=0;i<nd;i++) schur_factor[cum+i] = PetscSqrtReal(PetscRealPart(data[(i+size_active_schur)*(size_schur+1)]));
1525               PetscCall(MatSeqDenseInvertFactors_Private(M));
1526               /* move back just the active dofs to the Schur complement */
1527               for (i=0;i<size_active_schur;i++) {
1528                 PetscCall(PetscArraycpy(data+i*size_schur,tdata+i*news,size_active_schur));
1529               }
1530               PetscCall(PetscFree(tdata));
1531               PetscCall(MatDestroy(&M));
1532             } else { /* we can factorize and invert just the activedofs */
1533               Mat         M;
1534               PetscScalar *aux;
1535 
1536               PetscCall(PetscMalloc1(nd,&aux));
1537               for (i=0;i<nd;i++) aux[i] = 1.0/data[(i+size_active_schur)*(size_schur+1)];
1538               PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,size_active_schur,size_active_schur,data,&M));
1539               PetscCall(MatDenseSetLDA(M,size_schur));
1540               PetscCall(MatSetOption(M,MAT_SPD,PETSC_TRUE));
1541               PetscCall(MatCholeskyFactor(M,NULL,NULL));
1542               PetscCall(MatSeqDenseInvertFactors_Private(M));
1543               PetscCall(MatDestroy(&M));
1544               PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,size_schur,nd,data+size_active_schur*size_schur,&M));
1545               PetscCall(MatZeroEntries(M));
1546               PetscCall(MatDestroy(&M));
1547               PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,nd,size_schur,data+size_active_schur,&M));
1548               PetscCall(MatDenseSetLDA(M,size_schur));
1549               PetscCall(MatZeroEntries(M));
1550               PetscCall(MatDestroy(&M));
1551               for (i=0;i<nd;i++) data[(i+size_active_schur)*(size_schur+1)] = aux[i];
1552               PetscCall(PetscFree(aux));
1553             }
1554             PetscCall(MatDenseRestoreArray(S_all_inv,&data));
1555           } else { /* use MatFactor calls to invert S */
1556             PetscCall(MatFactorInvertSchurComplement(F));
1557             PetscCall(MatFactorGetSchurComplement(F,&S_all_inv,NULL));
1558           }
1559         } else { /* we need to invert explicitly since we are not using MatFactor for S */
1560           PetscCall(PetscObjectReference((PetscObject)S_all));
1561           S_all_inv = S_all;
1562           PetscCall(MatDenseGetArray(S_all_inv,&S_data));
1563           PetscCall(PetscBLASIntCast(size_schur,&B_N));
1564           PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
1565           if (use_potr) {
1566             PetscStackCallBLAS("LAPACKpotrf",LAPACKpotrf_("L",&B_N,S_data,&B_N,&B_ierr));
1567             PetscCheck(!B_ierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in POTRF Lapack routine %d",(int)B_ierr);
1568             PetscStackCallBLAS("LAPACKpotri",LAPACKpotri_("L",&B_N,S_data,&B_N,&B_ierr));
1569             PetscCheck(!B_ierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in POTRI Lapack routine %d",(int)B_ierr);
1570           } else if (use_sytr) {
1571             PetscStackCallBLAS("LAPACKsytrf",LAPACKsytrf_("L",&B_N,S_data,&B_N,pivots,Bwork,&B_lwork,&B_ierr));
1572             PetscCheck(!B_ierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYTRF Lapack routine %d",(int)B_ierr);
1573             PetscStackCallBLAS("LAPACKsytri",LAPACKsytri_("L",&B_N,S_data,&B_N,pivots,Bwork,&B_ierr));
1574             PetscCheck(!B_ierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYTRI Lapack routine %d",(int)B_ierr);
1575           } else {
1576             PetscStackCallBLAS("LAPACKgetrf",LAPACKgetrf_(&B_N,&B_N,S_data,&B_N,pivots,&B_ierr));
1577             PetscCheck(!B_ierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GETRF Lapack routine %d",(int)B_ierr);
1578             PetscStackCallBLAS("LAPACKgetri",LAPACKgetri_(&B_N,S_data,&B_N,pivots,Bwork,&B_lwork,&B_ierr));
1579             PetscCheck(!B_ierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GETRI Lapack routine %d",(int)B_ierr);
1580           }
1581           PetscCall(PetscLogFlops(1.0*size_schur*size_schur*size_schur));
1582           PetscCall(PetscFPTrapPop());
1583           PetscCall(MatDenseRestoreArray(S_all_inv,&S_data));
1584         }
1585         /* S_Ej_tilda_all */
1586         cum = cum2 = 0;
1587         PetscCall(MatDenseGetArrayRead(S_all_inv,&rS_data));
1588         for (i=0;i<sub_schurs->n_subs;i++) {
1589           PetscInt j;
1590 
1591           PetscCall(ISGetLocalSize(sub_schurs->is_subs[i],&subset_size));
1592           /* get (St^-1)_E */
1593           /* Unless we are changing the variables, I don't need to expand to upper triangular since St^-1
1594              will be properly accessed later during adaptive selection */
1595           if (S_lower_triangular) {
1596             PetscInt k;
1597             if (sub_schurs->change) {
1598               for (k=0;k<subset_size;k++) {
1599                 for (j=k;j<subset_size;j++) {
1600                   work[k*subset_size+j] = rS_data[cum2+k*size_schur+j];
1601                   work[j*subset_size+k] = work[k*subset_size+j];
1602                 }
1603               }
1604             } else {
1605               for (k=0;k<subset_size;k++) {
1606                 for (j=k;j<subset_size;j++) {
1607                   work[k*subset_size+j] = rS_data[cum2+k*size_schur+j];
1608                 }
1609               }
1610             }
1611           } else {
1612             PetscInt k;
1613             for (k=0;k<subset_size;k++) {
1614               for (j=0;j<subset_size;j++) {
1615                 work[k*subset_size+j] = rS_data[cum2+k*size_schur+j];
1616               }
1617             }
1618           }
1619           if (sub_schurs->change) {
1620             Mat change_sub,SEj,T;
1621 
1622             /* change basis */
1623             PetscCall(KSPGetOperators(sub_schurs->change[i],&change_sub,NULL));
1624             PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,work,&SEj));
1625             if (!sub_schurs->change_with_qr) { /* currently there's no support for PtAP with P SeqAIJ */
1626               Mat T2;
1627               PetscCall(MatTransposeMatMult(change_sub,SEj,MAT_INITIAL_MATRIX,1.0,&T2));
1628               PetscCall(MatMatMult(T2,change_sub,MAT_INITIAL_MATRIX,1.0,&T));
1629               PetscCall(MatDestroy(&T2));
1630               PetscCall(MatConvert(T,MATSEQDENSE,MAT_INPLACE_MATRIX,&T));
1631             } else {
1632               PetscCall(MatPtAP(SEj,change_sub,MAT_INITIAL_MATRIX,1.0,&T));
1633             }
1634             PetscCall(MatCopy(T,SEj,SAME_NONZERO_PATTERN));
1635             PetscCall(MatDestroy(&T));
1636             /* set diagonal entry to a very large value to pick the basis we are eliminating as the first eigenvectors with adaptive selection */
1637             PetscCall(MatZeroRowsColumnsIS(SEj,sub_schurs->change_primal_sub[i],1./PETSC_SMALL,NULL,NULL));
1638             PetscCall(MatDestroy(&SEj));
1639           }
1640           PetscCall(PetscArraycpy(SEjinv_arr,work,subset_size*subset_size));
1641           cum += subset_size;
1642           cum2 += subset_size*(size_schur + 1);
1643           SEjinv_arr += subset_size*subset_size;
1644         }
1645         PetscCall(MatDenseRestoreArrayRead(S_all_inv,&rS_data));
1646         if (solver_S) {
1647           if (schur_has_vertices) {
1648             PetscCall(MatFactorRestoreSchurComplement(F,&S_all_inv,MAT_FACTOR_SCHUR_FACTORED));
1649           } else {
1650             PetscCall(MatFactorRestoreSchurComplement(F,&S_all_inv,MAT_FACTOR_SCHUR_INVERTED));
1651           }
1652         }
1653         PetscCall(MatDestroy(&S_all_inv));
1654       }
1655       PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&SEjinv_arr));
1656 
1657       /* move back factors if needed */
1658       if (schur_has_vertices) {
1659         Mat      S_tmp;
1660         PetscInt nd = 0;
1661 
1662         PetscCheck(solver_S,PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen");
1663         PetscCall(MatFactorGetSchurComplement(F,&S_tmp,NULL));
1664         if (use_potr) {
1665           PetscScalar *data;
1666 
1667           PetscCall(MatDenseGetArray(S_tmp,&data));
1668           PetscCall(PetscArrayzero(data,size_schur*size_schur));
1669 
1670           if (S_lower_triangular) {
1671             cum = 0;
1672             for (i=0;i<size_active_schur;i++) {
1673               PetscCall(PetscArraycpy(data+i*(size_schur+1),schur_factor+cum,size_active_schur-i));
1674               cum += size_active_schur-i;
1675             }
1676           } else {
1677             PetscCall(PetscArraycpy(data,schur_factor,size_schur*size_schur));
1678           }
1679           if (sub_schurs->is_dir) {
1680             PetscCall(ISGetLocalSize(sub_schurs->is_dir,&nd));
1681             for (i=0;i<nd;i++) {
1682               data[(i+size_active_schur)*(size_schur+1)] = schur_factor[cum+i];
1683             }
1684           }
1685           /* workaround: since I cannot modify the matrices used inside the solvers for the forward and backward substitutions,
1686              set the diagonal entry of the Schur factor to a very large value */
1687           for (i=size_active_schur+nd;i<size_schur;i++) {
1688             data[i*(size_schur+1)] = infty;
1689           }
1690           PetscCall(MatDenseRestoreArray(S_tmp,&data));
1691         } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Factor update not yet implemented for non SPD matrices");
1692         PetscCall(MatFactorRestoreSchurComplement(F,&S_tmp,MAT_FACTOR_SCHUR_FACTORED));
1693       }
1694     } else if (factor_workaround) { /* we need to eliminate any unneeded coupling */
1695       PetscScalar *data;
1696       PetscInt    nd = 0;
1697 
1698       if (sub_schurs->is_dir) { /* dirichlet dofs could have different scalings */
1699         PetscCall(ISGetLocalSize(sub_schurs->is_dir,&nd));
1700       }
1701       PetscCall(MatFactorGetSchurComplement(F,&S_all,NULL));
1702       PetscCall(MatDenseGetArray(S_all,&data));
1703       for (i=0;i<size_active_schur;i++) {
1704         PetscCall(PetscArrayzero(data+i*size_schur+size_active_schur,size_schur-size_active_schur));
1705       }
1706       for (i=size_active_schur+nd;i<size_schur;i++) {
1707         PetscCall(PetscArrayzero(data+i*size_schur+size_active_schur,size_schur-size_active_schur));
1708         data[i*(size_schur+1)] = infty;
1709       }
1710       PetscCall(MatDenseRestoreArray(S_all,&data));
1711       PetscCall(MatFactorRestoreSchurComplement(F,&S_all,MAT_FACTOR_SCHUR_UNFACTORED));
1712     }
1713     PetscCall(PetscFree(work));
1714     PetscCall(PetscFree(schur_factor));
1715     PetscCall(VecDestroy(&Dall));
1716   }
1717   PetscCall(ISDestroy(&is_I_layer));
1718   PetscCall(MatDestroy(&S_all));
1719   PetscCall(MatDestroy(&A_BB));
1720   PetscCall(MatDestroy(&A_IB));
1721   PetscCall(MatDestroy(&A_BI));
1722   PetscCall(MatDestroy(&F));
1723 
1724   PetscCall(PetscMalloc1(sub_schurs->n_subs,&nnz));
1725   for (i=0;i<sub_schurs->n_subs;i++) {
1726     PetscCall(ISGetLocalSize(sub_schurs->is_subs[i],&nnz[i]));
1727   }
1728   PetscCall(ISCreateGeneral(PETSC_COMM_SELF,sub_schurs->n_subs,nnz,PETSC_OWN_POINTER,&is_I_layer));
1729   PetscCall(MatSetVariableBlockSizes(sub_schurs->S_Ej_all,sub_schurs->n_subs,nnz));
1730   PetscCall(MatAssemblyBegin(sub_schurs->S_Ej_all,MAT_FINAL_ASSEMBLY));
1731   PetscCall(MatAssemblyEnd(sub_schurs->S_Ej_all,MAT_FINAL_ASSEMBLY));
1732   if (compute_Stilda) {
1733     PetscCall(MatSetVariableBlockSizes(sub_schurs->sum_S_Ej_tilda_all,sub_schurs->n_subs,nnz));
1734     PetscCall(MatAssemblyBegin(sub_schurs->sum_S_Ej_tilda_all,MAT_FINAL_ASSEMBLY));
1735     PetscCall(MatAssemblyEnd(sub_schurs->sum_S_Ej_tilda_all,MAT_FINAL_ASSEMBLY));
1736     if (deluxe) {
1737       PetscCall(MatSetVariableBlockSizes(sub_schurs->sum_S_Ej_inv_all,sub_schurs->n_subs,nnz));
1738       PetscCall(MatAssemblyBegin(sub_schurs->sum_S_Ej_inv_all,MAT_FINAL_ASSEMBLY));
1739       PetscCall(MatAssemblyEnd(sub_schurs->sum_S_Ej_inv_all,MAT_FINAL_ASSEMBLY));
1740     }
1741   }
1742   PetscCall(ISDestroy(&is_I_layer));
1743 
1744   /* Get local part of (\sum_j S_Ej) */
1745   if (!sub_schurs->sum_S_Ej_all) {
1746     PetscCall(MatDuplicate(sub_schurs->S_Ej_all,MAT_DO_NOT_COPY_VALUES,&sub_schurs->sum_S_Ej_all));
1747   }
1748   PetscCall(VecSet(gstash,0.0));
1749   PetscCall(MatSeqAIJGetArray(sub_schurs->S_Ej_all,&stasharray));
1750   PetscCall(VecPlaceArray(lstash,stasharray));
1751   PetscCall(VecScatterBegin(sstash,lstash,gstash,ADD_VALUES,SCATTER_FORWARD));
1752   PetscCall(VecScatterEnd(sstash,lstash,gstash,ADD_VALUES,SCATTER_FORWARD));
1753   PetscCall(MatSeqAIJRestoreArray(sub_schurs->S_Ej_all,&stasharray));
1754   PetscCall(VecResetArray(lstash));
1755   PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_all,&stasharray));
1756   PetscCall(VecPlaceArray(lstash,stasharray));
1757   PetscCall(VecScatterBegin(sstash,gstash,lstash,INSERT_VALUES,SCATTER_REVERSE));
1758   PetscCall(VecScatterEnd(sstash,gstash,lstash,INSERT_VALUES,SCATTER_REVERSE));
1759   PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_all,&stasharray));
1760   PetscCall(VecResetArray(lstash));
1761 
1762   /* Get local part of (\sum_j S^-1_Ej) (\sum_j St^-1_Ej) */
1763   if (compute_Stilda) {
1764     PetscCall(VecSet(gstash,0.0));
1765     PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&stasharray));
1766     PetscCall(VecPlaceArray(lstash,stasharray));
1767     PetscCall(VecScatterBegin(sstash,lstash,gstash,ADD_VALUES,SCATTER_FORWARD));
1768     PetscCall(VecScatterEnd(sstash,lstash,gstash,ADD_VALUES,SCATTER_FORWARD));
1769     PetscCall(VecScatterBegin(sstash,gstash,lstash,INSERT_VALUES,SCATTER_REVERSE));
1770     PetscCall(VecScatterEnd(sstash,gstash,lstash,INSERT_VALUES,SCATTER_REVERSE));
1771     PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&stasharray));
1772     PetscCall(VecResetArray(lstash));
1773     if (deluxe) {
1774       PetscCall(VecSet(gstash,0.0));
1775       PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&stasharray));
1776       PetscCall(VecPlaceArray(lstash,stasharray));
1777       PetscCall(VecScatterBegin(sstash,lstash,gstash,ADD_VALUES,SCATTER_FORWARD));
1778       PetscCall(VecScatterEnd(sstash,lstash,gstash,ADD_VALUES,SCATTER_FORWARD));
1779       PetscCall(VecScatterBegin(sstash,gstash,lstash,INSERT_VALUES,SCATTER_REVERSE));
1780       PetscCall(VecScatterEnd(sstash,gstash,lstash,INSERT_VALUES,SCATTER_REVERSE));
1781       PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&stasharray));
1782       PetscCall(VecResetArray(lstash));
1783     } else {
1784       PetscScalar *array;
1785       PetscInt    cum;
1786 
1787       PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&array));
1788       cum = 0;
1789       for (i=0;i<sub_schurs->n_subs;i++) {
1790         PetscCall(ISGetLocalSize(sub_schurs->is_subs[i],&subset_size));
1791         PetscCall(PetscBLASIntCast(subset_size,&B_N));
1792         PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
1793         if (use_potr) {
1794           PetscStackCallBLAS("LAPACKpotrf",LAPACKpotrf_("L",&B_N,array+cum,&B_N,&B_ierr));
1795           PetscCheck(!B_ierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in POTRF Lapack routine %d",(int)B_ierr);
1796           PetscStackCallBLAS("LAPACKpotri",LAPACKpotri_("L",&B_N,array+cum,&B_N,&B_ierr));
1797           PetscCheck(!B_ierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in POTRI Lapack routine %d",(int)B_ierr);
1798         } else if (use_sytr) {
1799           PetscStackCallBLAS("LAPACKsytrf",LAPACKsytrf_("L",&B_N,array+cum,&B_N,pivots,Bwork,&B_lwork,&B_ierr));
1800           PetscCheck(!B_ierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYTRF Lapack routine %d",(int)B_ierr);
1801           PetscStackCallBLAS("LAPACKsytri",LAPACKsytri_("L",&B_N,array+cum,&B_N,pivots,Bwork,&B_ierr));
1802           PetscCheck(!B_ierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYTRI Lapack routine %d",(int)B_ierr);
1803         } else {
1804           PetscStackCallBLAS("LAPACKgetrf",LAPACKgetrf_(&B_N,&B_N,array+cum,&B_N,pivots,&B_ierr));
1805           PetscCheck(!B_ierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GETRF Lapack routine %d",(int)B_ierr);
1806           PetscStackCallBLAS("LAPACKgetri",LAPACKgetri_(&B_N,array+cum,&B_N,pivots,Bwork,&B_lwork,&B_ierr));
1807           PetscCheck(!B_ierr,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GETRI Lapack routine %d",(int)B_ierr);
1808         }
1809         PetscCall(PetscLogFlops(1.0*subset_size*subset_size*subset_size));
1810         PetscCall(PetscFPTrapPop());
1811         cum += subset_size*subset_size;
1812       }
1813       PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&array));
1814       PetscCall(PetscObjectReference((PetscObject)sub_schurs->sum_S_Ej_all));
1815       PetscCall(MatDestroy(&sub_schurs->sum_S_Ej_inv_all));
1816       sub_schurs->sum_S_Ej_inv_all = sub_schurs->sum_S_Ej_all;
1817     }
1818   }
1819   PetscCall(VecDestroy(&lstash));
1820   PetscCall(VecDestroy(&gstash));
1821   PetscCall(VecScatterDestroy(&sstash));
1822 
1823   if (matl_dbg_viewer) {
1824     PetscInt cum;
1825 
1826     if (sub_schurs->S_Ej_all) {
1827       PetscCall(PetscObjectSetName((PetscObject)sub_schurs->S_Ej_all,"SE"));
1828       PetscCall(MatView(sub_schurs->S_Ej_all,matl_dbg_viewer));
1829     }
1830     if (sub_schurs->sum_S_Ej_all) {
1831       PetscCall(PetscObjectSetName((PetscObject)sub_schurs->sum_S_Ej_all,"SSE"));
1832       PetscCall(MatView(sub_schurs->sum_S_Ej_all,matl_dbg_viewer));
1833     }
1834     if (sub_schurs->sum_S_Ej_inv_all) {
1835       PetscCall(PetscObjectSetName((PetscObject)sub_schurs->sum_S_Ej_inv_all,"SSEm"));
1836       PetscCall(MatView(sub_schurs->sum_S_Ej_inv_all,matl_dbg_viewer));
1837     }
1838     if (sub_schurs->sum_S_Ej_tilda_all) {
1839       PetscCall(PetscObjectSetName((PetscObject)sub_schurs->sum_S_Ej_tilda_all,"SSEt"));
1840       PetscCall(MatView(sub_schurs->sum_S_Ej_tilda_all,matl_dbg_viewer));
1841     }
1842     for (i=0,cum=0;i<sub_schurs->n_subs;i++) {
1843       IS   is;
1844       char name[16];
1845 
1846       PetscCall(PetscSNPrintf(name,sizeof(name),"IE%D",i));
1847       PetscCall(ISGetLocalSize(sub_schurs->is_subs[i],&subset_size));
1848       PetscCall(ISCreateStride(PETSC_COMM_SELF,subset_size,cum,1,&is));
1849       PetscCall(PetscObjectSetName((PetscObject)is,name));
1850       PetscCall(ISView(is,matl_dbg_viewer));
1851       PetscCall(ISDestroy(&is));
1852       cum += subset_size;
1853     }
1854   }
1855 
1856   /* free workspace */
1857   if (matl_dbg_viewer) PetscCall(PetscViewerFlush(matl_dbg_viewer));
1858   if (sub_schurs->debug) PetscCallMPI(MPI_Barrier(comm_n));
1859   PetscCall(PetscViewerDestroy(&matl_dbg_viewer));
1860   PetscCall(PetscFree2(Bwork,pivots));
1861   PetscCall(PetscCommDestroy(&comm_n));
1862   PetscFunctionReturn(0);
1863 }
1864 
1865 PetscErrorCode PCBDDCSubSchursInit(PCBDDCSubSchurs sub_schurs, const char* prefix, IS is_I, IS is_B, PCBDDCGraph graph, ISLocalToGlobalMapping BtoNmap, PetscBool copycc)
1866 {
1867   IS              *faces,*edges,*all_cc,vertices;
1868   PetscInt        i,n_faces,n_edges,n_all_cc;
1869   PetscBool       is_sorted,ispardiso,ismumps;
1870   PetscErrorCode  ierr;
1871 
1872   PetscFunctionBegin;
1873   PetscCall(ISSorted(is_I,&is_sorted));
1874   PetscCheck(is_sorted,PetscObjectComm((PetscObject)is_I),PETSC_ERR_PLIB,"IS for I dofs should be shorted");
1875   PetscCall(ISSorted(is_B,&is_sorted));
1876   PetscCheck(is_sorted,PetscObjectComm((PetscObject)is_B),PETSC_ERR_PLIB,"IS for B dofs should be shorted");
1877 
1878   /* reset any previous data */
1879   PetscCall(PCBDDCSubSchursReset(sub_schurs));
1880 
1881   /* get index sets for faces and edges (already sorted by global ordering) */
1882   PetscCall(PCBDDCGraphGetCandidatesIS(graph,&n_faces,&faces,&n_edges,&edges,&vertices));
1883   n_all_cc = n_faces+n_edges;
1884   PetscCall(PetscBTCreate(n_all_cc,&sub_schurs->is_edge));
1885   PetscCall(PetscMalloc1(n_all_cc,&all_cc));
1886   for (i=0;i<n_faces;i++) {
1887     if (copycc) {
1888       PetscCall(ISDuplicate(faces[i],&all_cc[i]));
1889     } else {
1890       PetscCall(PetscObjectReference((PetscObject)faces[i]));
1891       all_cc[i] = faces[i];
1892     }
1893   }
1894   for (i=0;i<n_edges;i++) {
1895     if (copycc) {
1896       PetscCall(ISDuplicate(edges[i],&all_cc[n_faces+i]));
1897     } else {
1898       PetscCall(PetscObjectReference((PetscObject)edges[i]));
1899       all_cc[n_faces+i] = edges[i];
1900     }
1901     PetscCall(PetscBTSet(sub_schurs->is_edge,n_faces+i));
1902   }
1903   PetscCall(PetscObjectReference((PetscObject)vertices));
1904   sub_schurs->is_vertices = vertices;
1905   PetscCall(PCBDDCGraphRestoreCandidatesIS(graph,&n_faces,&faces,&n_edges,&edges,&vertices));
1906   sub_schurs->is_dir = NULL;
1907   PetscCall(PCBDDCGraphGetDirichletDofsB(graph,&sub_schurs->is_dir));
1908 
1909   /* Determine if MatFactor can be used */
1910   PetscCall(PetscStrallocpy(prefix,&sub_schurs->prefix));
1911 #if defined(PETSC_HAVE_MUMPS)
1912   PetscCall(PetscStrncpy(sub_schurs->mat_solver_type,MATSOLVERMUMPS,sizeof(sub_schurs->mat_solver_type)));
1913 #elif defined(PETSC_HAVE_MKL_PARDISO)
1914   PetscCall(PetscStrncpy(sub_schurs->mat_solver_type,MATSOLVERMKL_PARDISO,sizeof(sub_schurs->mat_solver_type)));
1915 #else
1916   PetscCall(PetscStrncpy(sub_schurs->mat_solver_type,MATSOLVERPETSC,sizeof(sub_schurs->mat_solver_type)));
1917 #endif
1918 #if defined(PETSC_USE_COMPLEX)
1919   sub_schurs->is_hermitian  = PETSC_FALSE; /* Hermitian Cholesky is not supported by PETSc and external packages */
1920 #else
1921   sub_schurs->is_hermitian  = PETSC_TRUE;
1922 #endif
1923   sub_schurs->is_posdef     = PETSC_TRUE;
1924   sub_schurs->is_symmetric  = PETSC_TRUE;
1925   sub_schurs->debug         = PETSC_FALSE;
1926   sub_schurs->restrict_comm = PETSC_FALSE;
1927   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)graph->l2gmap),sub_schurs->prefix,"BDDC sub_schurs options","PC");PetscCall(ierr);
1928   PetscCall(PetscOptionsString("-sub_schurs_mat_solver_type","Specific direct solver to use",NULL,sub_schurs->mat_solver_type,sub_schurs->mat_solver_type,sizeof(sub_schurs->mat_solver_type),NULL));
1929   PetscCall(PetscOptionsBool("-sub_schurs_symmetric","Symmetric problem",NULL,sub_schurs->is_symmetric,&sub_schurs->is_symmetric,NULL));
1930   PetscCall(PetscOptionsBool("-sub_schurs_hermitian","Hermitian problem",NULL,sub_schurs->is_hermitian,&sub_schurs->is_hermitian,NULL));
1931   PetscCall(PetscOptionsBool("-sub_schurs_posdef","Positive definite problem",NULL,sub_schurs->is_posdef,&sub_schurs->is_posdef,NULL));
1932   PetscCall(PetscOptionsBool("-sub_schurs_restrictcomm","Restrict communicator on active processes only",NULL,sub_schurs->restrict_comm,&sub_schurs->restrict_comm,NULL));
1933   PetscCall(PetscOptionsBool("-sub_schurs_debug","Debug output",NULL,sub_schurs->debug,&sub_schurs->debug,NULL));
1934   ierr = PetscOptionsEnd();PetscCall(ierr);
1935   PetscCall(PetscStrcmp(sub_schurs->mat_solver_type,MATSOLVERMUMPS,&ismumps));
1936   PetscCall(PetscStrcmp(sub_schurs->mat_solver_type,MATSOLVERMKL_PARDISO,&ispardiso));
1937   sub_schurs->schur_explicit = (PetscBool)(ispardiso || ismumps);
1938 
1939   /* for reals, symmetric and hermitian are synonims */
1940 #if !defined(PETSC_USE_COMPLEX)
1941   sub_schurs->is_symmetric = (PetscBool)(sub_schurs->is_symmetric && sub_schurs->is_hermitian);
1942   sub_schurs->is_hermitian = sub_schurs->is_symmetric;
1943 #endif
1944 
1945   PetscCall(PetscObjectReference((PetscObject)is_I));
1946   sub_schurs->is_I = is_I;
1947   PetscCall(PetscObjectReference((PetscObject)is_B));
1948   sub_schurs->is_B = is_B;
1949   PetscCall(PetscObjectReference((PetscObject)graph->l2gmap));
1950   sub_schurs->l2gmap = graph->l2gmap;
1951   PetscCall(PetscObjectReference((PetscObject)BtoNmap));
1952   sub_schurs->BtoNmap = BtoNmap;
1953   sub_schurs->n_subs = n_all_cc;
1954   sub_schurs->is_subs = all_cc;
1955   sub_schurs->S_Ej_all = NULL;
1956   sub_schurs->sum_S_Ej_all = NULL;
1957   sub_schurs->sum_S_Ej_inv_all = NULL;
1958   sub_schurs->sum_S_Ej_tilda_all = NULL;
1959   sub_schurs->is_Ej_all = NULL;
1960   PetscFunctionReturn(0);
1961 }
1962 
1963 PetscErrorCode PCBDDCSubSchursCreate(PCBDDCSubSchurs *sub_schurs)
1964 {
1965   PCBDDCSubSchurs schurs_ctx;
1966 
1967   PetscFunctionBegin;
1968   PetscCall(PetscNew(&schurs_ctx));
1969   schurs_ctx->n_subs = 0;
1970   *sub_schurs = schurs_ctx;
1971   PetscFunctionReturn(0);
1972 }
1973 
1974 PetscErrorCode PCBDDCSubSchursReset(PCBDDCSubSchurs sub_schurs)
1975 {
1976   PetscInt       i;
1977 
1978   PetscFunctionBegin;
1979   if (!sub_schurs) PetscFunctionReturn(0);
1980   PetscCall(PetscFree(sub_schurs->prefix));
1981   PetscCall(MatDestroy(&sub_schurs->A));
1982   PetscCall(MatDestroy(&sub_schurs->S));
1983   PetscCall(ISDestroy(&sub_schurs->is_I));
1984   PetscCall(ISDestroy(&sub_schurs->is_B));
1985   PetscCall(ISLocalToGlobalMappingDestroy(&sub_schurs->l2gmap));
1986   PetscCall(ISLocalToGlobalMappingDestroy(&sub_schurs->BtoNmap));
1987   PetscCall(MatDestroy(&sub_schurs->S_Ej_all));
1988   PetscCall(MatDestroy(&sub_schurs->sum_S_Ej_all));
1989   PetscCall(MatDestroy(&sub_schurs->sum_S_Ej_inv_all));
1990   PetscCall(MatDestroy(&sub_schurs->sum_S_Ej_tilda_all));
1991   PetscCall(ISDestroy(&sub_schurs->is_Ej_all));
1992   PetscCall(ISDestroy(&sub_schurs->is_vertices));
1993   PetscCall(ISDestroy(&sub_schurs->is_dir));
1994   PetscCall(PetscBTDestroy(&sub_schurs->is_edge));
1995   for (i=0;i<sub_schurs->n_subs;i++) {
1996     PetscCall(ISDestroy(&sub_schurs->is_subs[i]));
1997   }
1998   if (sub_schurs->n_subs) {
1999     PetscCall(PetscFree(sub_schurs->is_subs));
2000   }
2001   if (sub_schurs->reuse_solver) {
2002     PetscCall(PCBDDCReuseSolversReset(sub_schurs->reuse_solver));
2003   }
2004   PetscCall(PetscFree(sub_schurs->reuse_solver));
2005   if (sub_schurs->change) {
2006     for (i=0;i<sub_schurs->n_subs;i++) {
2007       PetscCall(KSPDestroy(&sub_schurs->change[i]));
2008       PetscCall(ISDestroy(&sub_schurs->change_primal_sub[i]));
2009     }
2010   }
2011   PetscCall(PetscFree(sub_schurs->change));
2012   PetscCall(PetscFree(sub_schurs->change_primal_sub));
2013   sub_schurs->n_subs = 0;
2014   PetscFunctionReturn(0);
2015 }
2016 
2017 PetscErrorCode PCBDDCSubSchursDestroy(PCBDDCSubSchurs* sub_schurs)
2018 {
2019   PetscFunctionBegin;
2020   PetscCall(PCBDDCSubSchursReset(*sub_schurs));
2021   PetscCall(PetscFree(*sub_schurs));
2022   PetscFunctionReturn(0);
2023 }
2024 
2025 static inline PetscErrorCode PCBDDCAdjGetNextLayer_Private(PetscInt* queue_tip,PetscInt n_prev,PetscBT touched,PetscInt* xadj,PetscInt* adjncy,PetscInt* n_added)
2026 {
2027   PetscInt       i,j,n;
2028 
2029   PetscFunctionBegin;
2030   n = 0;
2031   for (i=-n_prev;i<0;i++) {
2032     PetscInt start_dof = queue_tip[i];
2033     for (j=xadj[start_dof];j<xadj[start_dof+1];j++) {
2034       PetscInt dof = adjncy[j];
2035       if (!PetscBTLookup(touched,dof)) {
2036         PetscCall(PetscBTSet(touched,dof));
2037         queue_tip[n] = dof;
2038         n++;
2039       }
2040     }
2041   }
2042   *n_added = n;
2043   PetscFunctionReturn(0);
2044 }
2045