xref: /petsc/src/ksp/pc/impls/bddc/bddcprivate.c (revision 688c8ee7bc20d3b60efe08031be4f1f63ae1e457)
1 #include <../src/mat/impls/aij/seq/aij.h>
2 #include <petsc/private/pcbddcimpl.h>
3 #include <petsc/private/pcbddcprivateimpl.h>
4 #include <petsc/private/kernels/blockinvert.h>
5 #include <../src/mat/impls/dense/seq/dense.h>
6 #include <petscdmplex.h>
7 #include <petscblaslapack.h>
8 #include <petsc/private/sfimpl.h>
9 #include <petsc/private/dmpleximpl.h>
10 #include <petscdmda.h>
11 
12 static PetscErrorCode MatMPIAIJRestrict(Mat, MPI_Comm, Mat *);
13 
14 /* if range is true,  it returns B s.t. span{B} = range(A)
15    if range is false, it returns B s.t. range(B) _|_ range(A) */
16 static PetscErrorCode MatDenseOrthogonalRangeOrComplement(Mat A, PetscBool range, PetscInt lw, PetscScalar *work, PetscReal *rwork, Mat *B)
17 {
18   PetscScalar *uwork, *data, *U, ds = 0.;
19   PetscReal   *sing;
20   PetscBLASInt bM, bN, lwork, lierr, di = 1;
21   PetscInt     ulw, i, nr, nc, n;
22 #if defined(PETSC_USE_COMPLEX)
23   PetscReal *rwork2;
24 #endif
25 
26   PetscFunctionBegin;
27   PetscCall(MatGetSize(A, &nr, &nc));
28   if (!nr || !nc) PetscFunctionReturn(PETSC_SUCCESS);
29 
30   /* workspace */
31   if (!work) {
32     ulw = PetscMax(PetscMax(1, 5 * PetscMin(nr, nc)), 3 * PetscMin(nr, nc) + PetscMax(nr, nc));
33     PetscCall(PetscMalloc1(ulw, &uwork));
34   } else {
35     ulw   = lw;
36     uwork = work;
37   }
38   n = PetscMin(nr, nc);
39   if (!rwork) {
40     PetscCall(PetscMalloc1(n, &sing));
41   } else {
42     sing = rwork;
43   }
44 
45   /* SVD */
46   PetscCall(PetscMalloc1(nr * nr, &U));
47   PetscCall(PetscBLASIntCast(nr, &bM));
48   PetscCall(PetscBLASIntCast(nc, &bN));
49   PetscCall(PetscBLASIntCast(ulw, &lwork));
50   PetscCall(MatDenseGetArray(A, &data));
51   PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
52 #if !defined(PETSC_USE_COMPLEX)
53   PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("A", "N", &bM, &bN, data, &bM, sing, U, &bM, &ds, &di, uwork, &lwork, &lierr));
54 #else
55   PetscCall(PetscMalloc1(5 * n, &rwork2));
56   PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("A", "N", &bM, &bN, data, &bM, sing, U, &bM, &ds, &di, uwork, &lwork, rwork2, &lierr));
57   PetscCall(PetscFree(rwork2));
58 #endif
59   PetscCall(PetscFPTrapPop());
60   PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in GESVD Lapack routine %d", (int)lierr);
61   PetscCall(MatDenseRestoreArray(A, &data));
62   for (i = 0; i < n; i++)
63     if (sing[i] < PETSC_SMALL) break;
64   if (!rwork) PetscCall(PetscFree(sing));
65   if (!work) PetscCall(PetscFree(uwork));
66   /* create B */
67   if (!range) {
68     PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, nr, nr - i, NULL, B));
69     PetscCall(MatDenseGetArray(*B, &data));
70     PetscCall(PetscArraycpy(data, U + nr * i, (nr - i) * nr));
71   } else {
72     PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, nr, i, NULL, B));
73     PetscCall(MatDenseGetArray(*B, &data));
74     PetscCall(PetscArraycpy(data, U, i * nr));
75   }
76   PetscCall(MatDenseRestoreArray(*B, &data));
77   PetscCall(PetscFree(U));
78   PetscFunctionReturn(PETSC_SUCCESS);
79 }
80 
81 /* TODO REMOVE */
82 #if defined(PRINT_GDET)
83 static int inc = 0;
84 static int lev = 0;
85 #endif
86 
87 static PetscErrorCode PCBDDCComputeNedelecChangeEdge(Mat lG, IS edge, IS extrow, IS extcol, IS corners, Mat *Gins, Mat *GKins, PetscScalar cvals[2], PetscScalar *work, PetscReal *rwork)
88 {
89   Mat          GE, GEd;
90   PetscInt     rsize, csize, esize;
91   PetscScalar *ptr;
92 
93   PetscFunctionBegin;
94   PetscCall(ISGetSize(edge, &esize));
95   if (!esize) PetscFunctionReturn(PETSC_SUCCESS);
96   PetscCall(ISGetSize(extrow, &rsize));
97   PetscCall(ISGetSize(extcol, &csize));
98 
99   /* gradients */
100   ptr = work + 5 * esize;
101   PetscCall(MatCreateSubMatrix(lG, extrow, extcol, MAT_INITIAL_MATRIX, &GE));
102   PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, rsize, csize, ptr, Gins));
103   PetscCall(MatConvert(GE, MATSEQDENSE, MAT_REUSE_MATRIX, Gins));
104   PetscCall(MatDestroy(&GE));
105 
106   /* constants */
107   ptr += rsize * csize;
108   PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, esize, csize, ptr, &GEd));
109   PetscCall(MatCreateSubMatrix(lG, edge, extcol, MAT_INITIAL_MATRIX, &GE));
110   PetscCall(MatConvert(GE, MATSEQDENSE, MAT_REUSE_MATRIX, &GEd));
111   PetscCall(MatDestroy(&GE));
112   PetscCall(MatDenseOrthogonalRangeOrComplement(GEd, PETSC_FALSE, 5 * esize, work, rwork, GKins));
113   PetscCall(MatDestroy(&GEd));
114 
115   if (corners) {
116     Mat                GEc;
117     const PetscScalar *vals;
118     PetscScalar        v;
119 
120     PetscCall(MatCreateSubMatrix(lG, edge, corners, MAT_INITIAL_MATRIX, &GEc));
121     PetscCall(MatTransposeMatMult(GEc, *GKins, MAT_INITIAL_MATRIX, 1.0, &GEd));
122     PetscCall(MatDenseGetArrayRead(GEd, &vals));
123     /* v       = PetscAbsScalar(vals[0]); */
124     v        = 1.;
125     cvals[0] = vals[0] / v;
126     cvals[1] = vals[1] / v;
127     PetscCall(MatDenseRestoreArrayRead(GEd, &vals));
128     PetscCall(MatScale(*GKins, 1. / v));
129 #if defined(PRINT_GDET)
130     {
131       PetscViewer viewer;
132       char        filename[256];
133       PetscCall(PetscSNPrintf(filename, PETSC_STATIC_ARRAY_LENGTH(filename), "Gdet_l%d_r%d_cc%d.m", lev, PetscGlobalRank, inc++));
134       PetscCall(PetscViewerASCIIOpen(PETSC_COMM_SELF, filename, &viewer));
135       PetscCall(PetscViewerPushFormat(viewer, PETSC_VIEWER_ASCII_MATLAB));
136       PetscCall(PetscObjectSetName((PetscObject)GEc, "GEc"));
137       PetscCall(MatView(GEc, viewer));
138       PetscCall(PetscObjectSetName((PetscObject)*GKins, "GK"));
139       PetscCall(MatView(*GKins, viewer));
140       PetscCall(PetscObjectSetName((PetscObject)GEd, "Gproj"));
141       PetscCall(MatView(GEd, viewer));
142       PetscCall(PetscViewerDestroy(&viewer));
143     }
144 #endif
145     PetscCall(MatDestroy(&GEd));
146     PetscCall(MatDestroy(&GEc));
147   }
148   PetscFunctionReturn(PETSC_SUCCESS);
149 }
150 
151 static PetscErrorCode MatAIJExtractRows(Mat, IS, Mat *);
152 
153 PetscErrorCode PCBDDCNedelecSupport(PC pc)
154 {
155   PC_BDDC               *pcbddc = (PC_BDDC *)pc->data;
156   Mat_IS                *matis  = (Mat_IS *)pc->pmat->data;
157   Mat                    G, T, conn, lG, lGt, lGis, lGall, lGe, lGinit;
158   PetscSF                sfv;
159   ISLocalToGlobalMapping el2g, vl2g, fl2g, al2g;
160   MPI_Comm               comm;
161   IS                     lned, primals, allprimals, nedfieldlocal, elements_corners = NULL;
162   IS                    *eedges, *extrows, *extcols, *alleedges;
163   PetscBT                btv, bte, btvc, btb, btbd, btvcand, btvi, btee, bter;
164   PetscScalar           *vals, *work;
165   PetscReal             *rwork;
166   const PetscInt        *idxs, *ii, *jj, *iit, *jjt;
167   PetscInt               ne, nv, Lv, order, n, field;
168   PetscInt               i, j, extmem, cum, maxsize, nee;
169   PetscInt              *extrow, *extrowcum, *marks, *vmarks, *gidxs;
170   PetscInt              *sfvleaves, *sfvroots;
171   PetscInt              *corners, *cedges;
172   PetscInt              *ecount, **eneighs, *vcount, **vneighs;
173   PetscInt              *emarks;
174   PetscBool              print, eerr, done, lrc[2], conforming, global, setprimal;
175 
176   PetscFunctionBegin;
177   /* If the discrete gradient is defined for a subset of dofs and global is true,
178      it assumes G is given in global ordering for all the dofs.
179      Otherwise, the ordering is global for the Nedelec field */
180   order      = pcbddc->nedorder;
181   conforming = pcbddc->conforming;
182   field      = pcbddc->nedfield;
183   global     = pcbddc->nedglobal;
184   setprimal  = PETSC_FALSE;
185   print      = PETSC_FALSE;
186 
187   /* Command line customization */
188   PetscOptionsBegin(PetscObjectComm((PetscObject)pc), ((PetscObject)pc)->prefix, "BDDC Nedelec options", "PC");
189   PetscCall(PetscOptionsBool("-pc_bddc_nedelec_field_primal", "All edge dofs set as primals: Toselli's algorithm C", NULL, setprimal, &setprimal, NULL));
190   /* print debug info and adaptive order TODO: to be removed */
191   PetscCall(PetscOptionsInt("-pc_bddc_nedelec_order", "Test variable order code (to be removed)", NULL, order, &order, NULL));
192   PetscCall(PetscOptionsBool("-pc_bddc_nedelec_print", "Print debug info", NULL, print, &print, NULL));
193   PetscOptionsEnd();
194 
195   /* Return if there are no edges in the decomposition */
196   PetscCall(MatISGetLocalToGlobalMapping(pc->pmat, &al2g, NULL));
197   PetscCall(ISLocalToGlobalMappingGetSize(al2g, &n));
198   PetscCall(PetscObjectGetComm((PetscObject)pc, &comm));
199   PetscCall(VecGetArrayRead(matis->counter, (const PetscScalar **)&vals));
200   lrc[0] = PETSC_FALSE;
201   for (i = 0; i < n; i++) {
202     if (PetscRealPart(vals[i]) > 2.) {
203       lrc[0] = PETSC_TRUE;
204       break;
205     }
206   }
207   PetscCall(VecRestoreArrayRead(matis->counter, (const PetscScalar **)&vals));
208   PetscCallMPI(MPIU_Allreduce(&lrc[0], &lrc[1], 1, MPIU_BOOL, MPI_LOR, comm));
209   if (!lrc[1]) PetscFunctionReturn(PETSC_SUCCESS);
210 
211   /* Get Nedelec field */
212   PetscCheck(!pcbddc->n_ISForDofsLocal || field < pcbddc->n_ISForDofsLocal, comm, PETSC_ERR_USER, "Invalid field for Nedelec %" PetscInt_FMT ": number of fields is %" PetscInt_FMT, field, pcbddc->n_ISForDofsLocal);
213   if (pcbddc->n_ISForDofsLocal && field >= 0) {
214     PetscCall(PetscObjectReference((PetscObject)pcbddc->ISForDofsLocal[field]));
215     nedfieldlocal = pcbddc->ISForDofsLocal[field];
216     PetscCall(ISGetLocalSize(nedfieldlocal, &ne));
217   } else if (!pcbddc->n_ISForDofsLocal && field != PETSC_DECIDE) {
218     ne            = n;
219     nedfieldlocal = NULL;
220     global        = PETSC_TRUE;
221   } else if (field == PETSC_DECIDE) {
222     PetscInt rst, ren, *idx;
223 
224     PetscCall(PetscArrayzero(matis->sf_leafdata, n));
225     PetscCall(PetscArrayzero(matis->sf_rootdata, pc->pmat->rmap->n));
226     PetscCall(MatGetOwnershipRange(pcbddc->discretegradient, &rst, &ren));
227     for (i = rst; i < ren; i++) {
228       PetscInt nc;
229 
230       PetscCall(MatGetRow(pcbddc->discretegradient, i, &nc, NULL, NULL));
231       if (nc > 1) matis->sf_rootdata[i - rst] = 1;
232       PetscCall(MatRestoreRow(pcbddc->discretegradient, i, &nc, NULL, NULL));
233     }
234     PetscCall(PetscSFBcastBegin(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE));
235     PetscCall(PetscSFBcastEnd(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE));
236     PetscCall(PetscMalloc1(n, &idx));
237     for (i = 0, ne = 0; i < n; i++)
238       if (matis->sf_leafdata[i]) idx[ne++] = i;
239     PetscCall(ISCreateGeneral(comm, ne, idx, PETSC_OWN_POINTER, &nedfieldlocal));
240   } else {
241     SETERRQ(comm, PETSC_ERR_USER, "When multiple fields are present, the Nedelec field has to be specified");
242   }
243 
244   /* Sanity checks */
245   PetscCheck(order || conforming, comm, PETSC_ERR_SUP, "Variable order and non-conforming spaces are not supported at the same time");
246   PetscCheck(!pcbddc->user_ChangeOfBasisMatrix, comm, PETSC_ERR_SUP, "Cannot generate Nedelec support with user defined change of basis");
247   PetscCheck(!order || (ne % order == 0), PETSC_COMM_SELF, PETSC_ERR_USER, "The number of local edge dofs %" PetscInt_FMT " is not a multiple of the order %" PetscInt_FMT, ne, order);
248 
249   /* Just set primal dofs and return */
250   if (setprimal) {
251     IS        enedfieldlocal;
252     PetscInt *eidxs;
253 
254     PetscCall(PetscMalloc1(ne, &eidxs));
255     PetscCall(VecGetArrayRead(matis->counter, (const PetscScalar **)&vals));
256     if (nedfieldlocal) {
257       PetscCall(ISGetIndices(nedfieldlocal, &idxs));
258       for (i = 0, cum = 0; i < ne; i++) {
259         if (PetscRealPart(vals[idxs[i]]) > 2.) eidxs[cum++] = idxs[i];
260       }
261       PetscCall(ISRestoreIndices(nedfieldlocal, &idxs));
262     } else {
263       for (i = 0, cum = 0; i < ne; i++) {
264         if (PetscRealPart(vals[i]) > 2.) eidxs[cum++] = i;
265       }
266     }
267     PetscCall(VecRestoreArrayRead(matis->counter, (const PetscScalar **)&vals));
268     PetscCall(ISCreateGeneral(comm, cum, eidxs, PETSC_COPY_VALUES, &enedfieldlocal));
269     PetscCall(PCBDDCSetPrimalVerticesLocalIS(pc, enedfieldlocal));
270     PetscCall(PetscFree(eidxs));
271     PetscCall(ISDestroy(&nedfieldlocal));
272     PetscCall(ISDestroy(&enedfieldlocal));
273     PetscFunctionReturn(PETSC_SUCCESS);
274   }
275 
276   /* Compute some l2g maps */
277   if (nedfieldlocal) {
278     IS is;
279 
280     /* need to map from the local Nedelec field to local numbering */
281     PetscCall(ISLocalToGlobalMappingCreateIS(nedfieldlocal, &fl2g));
282     /* need to map from the local Nedelec field to global numbering for the whole dofs*/
283     PetscCall(ISLocalToGlobalMappingApplyIS(al2g, nedfieldlocal, &is));
284     PetscCall(ISLocalToGlobalMappingCreateIS(is, &al2g));
285     /* need to map from the local Nedelec field to global numbering (for Nedelec only) */
286     if (global) {
287       PetscCall(PetscObjectReference((PetscObject)al2g));
288       el2g = al2g;
289     } else {
290       IS gis;
291 
292       PetscCall(ISRenumber(is, NULL, NULL, &gis));
293       PetscCall(ISLocalToGlobalMappingCreateIS(gis, &el2g));
294       PetscCall(ISDestroy(&gis));
295     }
296     PetscCall(ISDestroy(&is));
297   } else {
298     /* one ref for the destruction of al2g, one for el2g */
299     PetscCall(PetscObjectReference((PetscObject)al2g));
300     PetscCall(PetscObjectReference((PetscObject)al2g));
301     el2g = al2g;
302     fl2g = NULL;
303   }
304 
305   /* Start communication to drop connections for interior edges (for cc analysis only) */
306   PetscCall(PetscArrayzero(matis->sf_leafdata, n));
307   PetscCall(PetscArrayzero(matis->sf_rootdata, pc->pmat->rmap->n));
308   if (nedfieldlocal) {
309     PetscCall(ISGetIndices(nedfieldlocal, &idxs));
310     for (i = 0; i < ne; i++) matis->sf_leafdata[idxs[i]] = 1;
311     PetscCall(ISRestoreIndices(nedfieldlocal, &idxs));
312   } else {
313     for (i = 0; i < ne; i++) matis->sf_leafdata[i] = 1;
314   }
315   PetscCall(PetscSFReduceBegin(matis->sf, MPIU_INT, matis->sf_leafdata, matis->sf_rootdata, MPI_SUM));
316   PetscCall(PetscSFReduceEnd(matis->sf, MPIU_INT, matis->sf_leafdata, matis->sf_rootdata, MPI_SUM));
317 
318   /* There's no way to detect all possible corner candidates in a element-by-element case in a pure algebraic setting
319      Firedrake attaches a index set to identify them upfront. If it is present, we assume we are in such a case */
320   if (matis->allow_repeated) PetscCall(PetscObjectQuery((PetscObject)pcbddc->discretegradient, "_elements_corners", (PetscObject *)&elements_corners));
321 
322   /* drop connections with interior edges to avoid unneeded communications and memory movements */
323   PetscCall(MatViewFromOptions(pcbddc->discretegradient, (PetscObject)pc, "-pc_bddc_discrete_gradient_view"));
324   PetscCall(MatDuplicate(pcbddc->discretegradient, MAT_COPY_VALUES, &G));
325   PetscCall(MatSetOption(G, MAT_KEEP_NONZERO_PATTERN, PETSC_FALSE));
326   if (global) {
327     PetscInt rst;
328 
329     PetscCall(MatGetOwnershipRange(G, &rst, NULL));
330     for (i = 0, cum = 0; i < pc->pmat->rmap->n; i++) {
331       if (matis->sf_rootdata[i] < 2) matis->sf_rootdata[cum++] = i + rst;
332     }
333     PetscCall(MatSetOption(G, MAT_NO_OFF_PROC_ZERO_ROWS, PETSC_TRUE));
334     PetscCall(MatZeroRows(G, cum, matis->sf_rootdata, 0., NULL, NULL));
335   } else {
336     PetscInt *tbz;
337 
338     PetscCall(PetscMalloc1(ne, &tbz));
339     PetscCall(PetscSFBcastBegin(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE));
340     PetscCall(PetscSFBcastEnd(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE));
341     PetscCall(ISGetIndices(nedfieldlocal, &idxs));
342     for (i = 0, cum = 0; i < ne; i++)
343       if (matis->sf_leafdata[idxs[i]] == 1) tbz[cum++] = i;
344     PetscCall(ISRestoreIndices(nedfieldlocal, &idxs));
345     PetscCall(ISLocalToGlobalMappingApply(el2g, cum, tbz, tbz));
346     PetscCall(MatZeroRows(G, cum, tbz, 0., NULL, NULL));
347     PetscCall(PetscFree(tbz));
348   }
349 
350   /* Extract subdomain relevant rows of G  */
351   PetscCall(ISLocalToGlobalMappingGetIndices(el2g, &idxs));
352   PetscCall(ISCreateGeneral(comm, ne, idxs, PETSC_USE_POINTER, &lned));
353   PetscCall(MatAIJExtractRows(G, lned, &lGall));
354   /* PetscCall(MatCreateSubMatrix(G, lned, NULL, MAT_INITIAL_MATRIX, &lGall)); */
355   PetscCall(ISLocalToGlobalMappingRestoreIndices(el2g, &idxs));
356   PetscCall(ISDestroy(&lned));
357   PetscCall(MatConvert(lGall, MATIS, MAT_INITIAL_MATRIX, &lGis));
358   PetscCall(MatDestroy(&lGall));
359   PetscCall(MatISGetLocalMat(lGis, &lG));
360   if (matis->allow_repeated) { /* multi-element support */
361     Mat                   *lGn, B;
362     IS                    *is_rows, *tcols, tmap, nmap;
363     PetscInt               subnv;
364     const PetscInt        *subvidxs;
365     ISLocalToGlobalMapping mapn;
366 
367     PetscCall(PetscCalloc1(pcbddc->n_local_subs * pcbddc->n_local_subs, &lGn));
368     PetscCall(PetscMalloc1(pcbddc->n_local_subs, &is_rows));
369     PetscCall(PetscMalloc1(pcbddc->n_local_subs, &tcols));
370     for (PetscInt i = 0; i < pcbddc->n_local_subs; i++) {
371       if (fl2g) {
372         PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_MASK, pcbddc->local_subs[i], &is_rows[i]));
373       } else {
374         PetscCall(PetscObjectReference((PetscObject)pcbddc->local_subs[i]));
375         is_rows[i] = pcbddc->local_subs[i];
376       }
377       PetscCall(MatCreateSubMatrix(lG, is_rows[i], NULL, MAT_INITIAL_MATRIX, &lGn[i * (1 + pcbddc->n_local_subs)]));
378       PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(lGn[i * (1 + pcbddc->n_local_subs)], &mapn));
379       PetscCall(ISLocalToGlobalMappingGetSize(mapn, &subnv));
380       PetscCall(ISLocalToGlobalMappingGetIndices(mapn, &subvidxs));
381       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, subnv, subvidxs, PETSC_COPY_VALUES, &tcols[i]));
382       PetscCall(ISLocalToGlobalMappingRestoreIndices(mapn, &subvidxs));
383       PetscCall(ISLocalToGlobalMappingDestroy(&mapn));
384     }
385 
386     /* Create new MATIS with repeated vertices */
387     PetscCall(MatCreate(comm, &B));
388     PetscCall(MatSetSizes(B, lGis->rmap->n, lGis->cmap->n, lGis->rmap->N, lGis->cmap->N));
389     PetscCall(MatSetType(B, MATIS));
390     PetscCall(MatISSetAllowRepeated(B, PETSC_TRUE));
391     PetscCall(ISConcatenate(PETSC_COMM_SELF, pcbddc->n_local_subs, tcols, &tmap));
392     PetscCall(ISLocalToGlobalMappingApplyIS(lGis->cmap->mapping, tmap, &nmap));
393     PetscCall(ISDestroy(&tmap));
394     PetscCall(ISGetLocalSize(nmap, &subnv));
395     PetscCall(ISGetIndices(nmap, &subvidxs));
396     PetscCall(ISCreateGeneral(comm, subnv, subvidxs, PETSC_USE_POINTER, &tmap));
397     PetscCall(ISRestoreIndices(nmap, &subvidxs));
398     PetscCall(ISLocalToGlobalMappingCreateIS(tmap, &mapn));
399     PetscCall(ISDestroy(&tmap));
400     PetscCall(ISDestroy(&nmap));
401     PetscCall(MatSetLocalToGlobalMapping(B, lGis->rmap->mapping, mapn));
402     PetscCall(ISLocalToGlobalMappingDestroy(&mapn));
403     PetscCall(MatCreateNest(PETSC_COMM_SELF, pcbddc->n_local_subs, is_rows, pcbddc->n_local_subs, NULL, lGn, &lG));
404     for (PetscInt i = 0; i < pcbddc->n_local_subs; i++) {
405       PetscCall(MatDestroy(&lGn[i * (1 + pcbddc->n_local_subs)]));
406       PetscCall(ISDestroy(&is_rows[i]));
407       PetscCall(ISDestroy(&tcols[i]));
408     }
409     PetscCall(MatConvert(lG, MATSEQAIJ, MAT_INPLACE_MATRIX, &lG));
410     PetscCall(PetscFree(lGn));
411     PetscCall(PetscFree(is_rows));
412     PetscCall(PetscFree(tcols));
413     PetscCall(MatISSetLocalMat(B, lG));
414     PetscCall(MatDestroy(&lG));
415 
416     PetscCall(MatDestroy(&lGis));
417     lGis = B;
418 
419     lGis->assembled = PETSC_TRUE;
420   }
421   PetscCall(MatViewFromOptions(lGis, (PetscObject)pc, "-pc_bddc_nedelec_init_G_view"));
422 
423   /* SF for nodal dofs communications */
424   PetscCall(MatGetLocalSize(G, NULL, &Lv));
425   PetscCall(MatISGetLocalToGlobalMapping(lGis, NULL, &vl2g));
426   PetscCall(PetscObjectReference((PetscObject)vl2g));
427   PetscCall(ISLocalToGlobalMappingGetSize(vl2g, &nv));
428   PetscCall(PetscSFCreate(comm, &sfv));
429   PetscCall(ISLocalToGlobalMappingGetIndices(vl2g, &idxs));
430   PetscCall(PetscSFSetGraphLayout(sfv, lGis->cmap, nv, NULL, PETSC_OWN_POINTER, idxs));
431   PetscCall(ISLocalToGlobalMappingRestoreIndices(vl2g, &idxs));
432 
433   if (elements_corners) {
434     IS      tmp;
435     Vec     global, local;
436     Mat_IS *tGis = (Mat_IS *)lGis->data;
437 
438     PetscCall(MatCreateVecs(lGis, &global, NULL));
439     PetscCall(MatCreateVecs(tGis->A, &local, NULL));
440     PetscCall(PCBDDCGlobalToLocal(tGis->cctx, global, local, elements_corners, &tmp));
441     PetscCall(VecDestroy(&global));
442     PetscCall(VecDestroy(&local));
443     elements_corners = tmp;
444   }
445 
446   /* Destroy temporary G */
447   PetscCall(MatISGetLocalMat(lGis, &lG));
448   PetscCall(PetscObjectReference((PetscObject)lG));
449   PetscCall(MatDestroy(&G));
450   PetscCall(MatDestroy(&lGis));
451 
452   if (print) {
453     PetscCall(PetscObjectSetName((PetscObject)lG, "initial_lG"));
454     PetscCall(MatView(lG, NULL));
455   }
456 
457   /* Save lG for values insertion in change of basis */
458   PetscCall(MatDuplicate(lG, MAT_COPY_VALUES, &lGinit));
459 
460   /* Analyze the edge-nodes connections (duplicate lG) */
461   PetscCall(MatDuplicate(lG, MAT_COPY_VALUES, &lGe));
462   PetscCall(MatSetOption(lGe, MAT_KEEP_NONZERO_PATTERN, PETSC_FALSE));
463   PetscCall(PetscBTCreate(nv, &btv));
464   PetscCall(PetscBTCreate(ne, &bte));
465   PetscCall(PetscBTCreate(ne, &btb));
466   PetscCall(PetscBTCreate(ne, &btbd));
467   /* need to import the boundary specification to ensure the
468      proper detection of coarse edges' endpoints */
469   if (pcbddc->DirichletBoundariesLocal) {
470     IS is;
471 
472     if (fl2g) {
473       PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_MASK, pcbddc->DirichletBoundariesLocal, &is));
474     } else {
475       is = pcbddc->DirichletBoundariesLocal;
476     }
477     PetscCall(ISGetLocalSize(is, &cum));
478     PetscCall(ISGetIndices(is, &idxs));
479     for (i = 0; i < cum; i++) {
480       if (idxs[i] >= 0 && idxs[i] < ne) {
481         PetscCall(PetscBTSet(btb, idxs[i]));
482         PetscCall(PetscBTSet(btbd, idxs[i]));
483       }
484     }
485     PetscCall(ISRestoreIndices(is, &idxs));
486     if (fl2g) PetscCall(ISDestroy(&is));
487   }
488   if (pcbddc->NeumannBoundariesLocal) {
489     IS is;
490 
491     if (fl2g) {
492       PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_MASK, pcbddc->NeumannBoundariesLocal, &is));
493     } else {
494       is = pcbddc->NeumannBoundariesLocal;
495     }
496     PetscCall(ISGetLocalSize(is, &cum));
497     PetscCall(ISGetIndices(is, &idxs));
498     for (i = 0; i < cum; i++) {
499       if (idxs[i] >= 0 && idxs[i] < ne) PetscCall(PetscBTSet(btb, idxs[i]));
500     }
501     PetscCall(ISRestoreIndices(is, &idxs));
502     if (fl2g) PetscCall(ISDestroy(&is));
503   }
504 
505   /* Count neighs per dof */
506   PetscCall(ISLocalToGlobalMappingGetNodeInfo(el2g, NULL, &ecount, NULL));
507   PetscCall(ISLocalToGlobalMappingGetNodeInfo(vl2g, NULL, &vcount, NULL));
508 
509   /* need to remove coarse faces' dofs and coarse edges' dirichlet dofs
510      for proper detection of coarse edges' endpoints */
511   PetscCall(PetscBTCreate(ne, &btee));
512   for (i = 0; i < ne; i++) {
513     if ((ecount[i] > 2 && !PetscBTLookup(btbd, i)) || (ecount[i] == 2 && PetscBTLookup(btb, i))) PetscCall(PetscBTSet(btee, i));
514   }
515   PetscCall(PetscMalloc1(ne, &marks));
516   if (!conforming) {
517     PetscCall(MatTranspose(lGe, MAT_INITIAL_MATRIX, &lGt));
518     PetscCall(MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done));
519   }
520   PetscCall(MatGetRowIJ(lGe, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
521   PetscCall(MatSeqAIJGetArray(lGe, &vals));
522   cum = 0;
523   for (i = 0; i < ne; i++) {
524     /* eliminate rows corresponding to edge dofs belonging to coarse faces */
525     if (!PetscBTLookup(btee, i)) {
526       marks[cum++] = i;
527       continue;
528     }
529     /* set badly connected edge dofs as primal */
530     if (!conforming) {
531       if (ii[i + 1] - ii[i] != order + 1) { /* every row of G on the coarse edge should list order+1 nodal dofs */
532         marks[cum++] = i;
533         PetscCall(PetscBTSet(bte, i));
534         for (j = ii[i]; j < ii[i + 1]; j++) PetscCall(PetscBTSet(btv, jj[j]));
535       } else {
536         /* every edge dofs should be connected through a certain number of nodal dofs
537            to other edge dofs belonging to coarse edges
538            - at most 2 endpoints
539            - order-1 interior nodal dofs
540            - no undefined nodal dofs (nconn < order)
541         */
542         PetscInt ends = 0, ints = 0, undef = 0;
543         for (j = ii[i]; j < ii[i + 1]; j++) {
544           PetscInt v     = jj[j], k;
545           PetscInt nconn = iit[v + 1] - iit[v];
546           for (k = iit[v]; k < iit[v + 1]; k++)
547             if (!PetscBTLookup(btee, jjt[k])) nconn--;
548           if (nconn > order) ends++;
549           else if (nconn == order) ints++;
550           else undef++;
551         }
552         if (undef || ends > 2 || ints != order - 1) {
553           marks[cum++] = i;
554           PetscCall(PetscBTSet(bte, i));
555           for (j = ii[i]; j < ii[i + 1]; j++) PetscCall(PetscBTSet(btv, jj[j]));
556         }
557       }
558     }
559     /* We assume the order on the element edge is ii[i+1]-ii[i]-1 */
560     if (!order && ii[i + 1] != ii[i]) {
561       PetscScalar val = 1. / (ii[i + 1] - ii[i] - 1);
562       for (j = ii[i]; j < ii[i + 1]; j++) vals[j] = val;
563     }
564   }
565   PetscCall(PetscBTDestroy(&btee));
566   PetscCall(MatSeqAIJRestoreArray(lGe, &vals));
567   PetscCall(MatRestoreRowIJ(lGe, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
568   if (!conforming) {
569     PetscCall(MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done));
570     PetscCall(MatDestroy(&lGt));
571   }
572   PetscCall(MatZeroRows(lGe, cum, marks, 0., NULL, NULL));
573 
574   /* identify splitpoints and corner candidates */
575   PetscCall(PetscMalloc2(nv, &sfvleaves, Lv, &sfvroots));
576   PetscCall(PetscBTCreate(nv, &btvcand));
577   if (elements_corners) {
578     PetscCall(ISGetLocalSize(elements_corners, &cum));
579     PetscCall(ISGetIndices(elements_corners, &idxs));
580     for (i = 0; i < cum; i++) PetscCall(PetscBTSet(btvcand, idxs[i]));
581     PetscCall(ISRestoreIndices(elements_corners, &idxs));
582   }
583 
584   if (matis->allow_repeated) { /* assign a uniq global id to edge local subsets and communicate it with nodal space */
585     PetscSF   emlsf, vmlsf;
586     PetscInt *eleaves, *vleaves, *meleaves, *mvleaves;
587     PetscInt  cum_subs = 0, n_subs = pcbddc->n_local_subs, bs, emnr, emnl, vmnr, vmnl;
588 
589     PetscCall(ISLocalToGlobalMappingGetBlockSize(el2g, &bs));
590     PetscCheck(bs == 1, comm, PETSC_ERR_SUP, "Not coded");
591     PetscCall(ISLocalToGlobalMappingGetBlockSize(vl2g, &bs));
592     PetscCheck(bs == 1, comm, PETSC_ERR_SUP, "Not coded");
593 
594     PetscCall(ISLocalToGlobalMappingGetBlockMultiLeavesSF(el2g, &emlsf));
595     PetscCall(ISLocalToGlobalMappingGetBlockMultiLeavesSF(vl2g, &vmlsf));
596 
597     PetscCall(PetscSFGetGraph(emlsf, &emnr, &emnl, NULL, NULL));
598     for (i = 0, j = 0; i < ne; i++) j += ecount[i];
599     PetscCheck(emnr == ne, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Invalid number of roots in edge multi-leaves SF %" PetscInt_FMT " != %" PetscInt_FMT, emnr, ne);
600     PetscCheck(emnl == j, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Invalid number of leaves in edge multi-leaves SF %" PetscInt_FMT " != %" PetscInt_FMT, emnl, j);
601 
602     PetscCall(PetscSFGetGraph(vmlsf, &vmnr, &vmnl, NULL, NULL));
603     for (i = 0, j = 0; i < nv; i++) j += vcount[i];
604     PetscCheck(vmnr == nv, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Invalid number of roots in nodal multi-leaves SF %" PetscInt_FMT " != %" PetscInt_FMT, vmnr, nv);
605     PetscCheck(vmnl == j, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Invalid number of leaves in nodal multi-leaves SF %" PetscInt_FMT " != %" PetscInt_FMT, vmnl, j);
606 
607     PetscCall(PetscMalloc1(ne, &eleaves));
608     PetscCall(PetscMalloc1(nv, &vleaves));
609     for (i = 0; i < ne; i++) eleaves[i] = PETSC_INT_MAX;
610     for (i = 0; i < nv; i++) vleaves[i] = PETSC_INT_MAX;
611     PetscCall(PetscMalloc1(emnl, &meleaves));
612     PetscCall(PetscMalloc1(vmnl, &mvleaves));
613 
614     PetscCallMPI(MPI_Exscan(&n_subs, &cum_subs, 1, MPIU_INT, MPI_SUM, comm));
615     PetscCall(MatGetRowIJ(lGinit, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
616     for (i = 0; i < n_subs; i++) {
617       const PetscInt *idxs;
618       const PetscInt  subid = cum_subs + i;
619       PetscInt        ns;
620 
621       PetscCall(ISGetLocalSize(pcbddc->local_subs[i], &ns));
622       PetscCall(ISGetIndices(pcbddc->local_subs[i], &idxs));
623       for (j = 0; j < ns; j++) {
624         const PetscInt e = idxs[j];
625 
626         eleaves[e] = subid;
627         for (PetscInt k = ii[e]; k < ii[e + 1]; k++) vleaves[jj[k]] = subid;
628       }
629       PetscCall(ISRestoreIndices(pcbddc->local_subs[i], &idxs));
630     }
631     PetscCall(MatRestoreRowIJ(lGinit, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
632     PetscCall(PetscSFBcastBegin(emlsf, MPIU_INT, eleaves, meleaves, MPI_REPLACE));
633     PetscCall(PetscSFBcastEnd(emlsf, MPIU_INT, eleaves, meleaves, MPI_REPLACE));
634     PetscCall(PetscSFBcastBegin(vmlsf, MPIU_INT, vleaves, mvleaves, MPI_REPLACE));
635     PetscCall(PetscSFBcastEnd(vmlsf, MPIU_INT, vleaves, mvleaves, MPI_REPLACE));
636     PetscCall(PetscFree(eleaves));
637     PetscCall(PetscFree(vleaves));
638 
639     PetscCall(PetscMalloc1(ne + 1, &eneighs));
640     eneighs[0] = meleaves;
641     for (i = 0; i < ne; i++) {
642       PetscCall(PetscSortInt(ecount[i], eneighs[i]));
643       eneighs[i + 1] = eneighs[i] + ecount[i];
644     }
645     PetscCall(PetscMalloc1(nv + 1, &vneighs));
646     vneighs[0] = mvleaves;
647     for (i = 0; i < nv; i++) {
648       PetscCall(PetscSortInt(vcount[i], vneighs[i]));
649       vneighs[i + 1] = vneighs[i] + vcount[i];
650     }
651   } else {
652     PetscCall(ISLocalToGlobalMappingGetNodeInfo(el2g, NULL, NULL, &eneighs));
653     PetscCall(ISLocalToGlobalMappingGetNodeInfo(vl2g, NULL, NULL, &vneighs));
654   }
655 
656   PetscCall(MatTranspose(lGe, MAT_INITIAL_MATRIX, &lGt));
657   if (print) {
658     PetscCall(PetscObjectSetName((PetscObject)lGe, "edgerestr_lG"));
659     PetscCall(MatView(lGe, NULL));
660     PetscCall(PetscObjectSetName((PetscObject)lGt, "edgerestr_lGt"));
661     PetscCall(MatView(lGt, NULL));
662   }
663   PetscCall(MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
664   PetscCall(MatSeqAIJGetArray(lGt, &vals));
665   for (i = 0; i < nv; i++) {
666     PetscInt  ord = order, test = ii[i + 1] - ii[i], vc = vcount[i];
667     PetscBool sneighs = PETSC_TRUE, bdir = PETSC_FALSE;
668     if (!order) { /* variable order */
669       PetscReal vorder = 0.;
670 
671       for (j = ii[i]; j < ii[i + 1]; j++) vorder += PetscRealPart(vals[j]);
672       test = PetscFloorReal(vorder + 10. * PETSC_SQRT_MACHINE_EPSILON);
673       PetscCheck(vorder - test <= PETSC_SQRT_MACHINE_EPSILON, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected value for vorder: %g (%" PetscInt_FMT ")", (double)vorder, test);
674       ord = 1;
675     }
676     for (j = ii[i]; j < ii[i + 1] && sneighs; j++) {
677       const PetscInt e = jj[j];
678 
679       if (PetscBTLookup(btbd, e)) {
680         bdir = PETSC_TRUE;
681         break;
682       }
683       if (vc != ecount[e]) {
684         sneighs = PETSC_FALSE;
685       } else {
686         const PetscInt *vn = vneighs[i], *en = eneighs[e];
687 
688         for (PetscInt k = 0; k < vc; k++) {
689           if (vn[k] != en[k]) {
690             sneighs = PETSC_FALSE;
691             break;
692           }
693         }
694       }
695     }
696     if (elements_corners) test = 0;
697     if (!sneighs || test >= 3 * ord || bdir) { /* splitpoints */
698       if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "SPLITPOINT %" PetscInt_FMT " (%s %s %s)\n", i, PetscBools[!sneighs], PetscBools[test >= 3 * ord], PetscBools[bdir]));
699       PetscCall(PetscBTSet(btv, i));
700     } else if (test == ord) {
701       if (order == 1 || (!order && ii[i + 1] - ii[i] == 1)) {
702         if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "ENDPOINT %" PetscInt_FMT "\n", i));
703         PetscCall(PetscBTSet(btv, i));
704       } else if (!elements_corners) {
705         if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "CORNER CANDIDATE %" PetscInt_FMT "\n", i));
706         PetscCall(PetscBTSet(btvcand, i));
707       }
708     }
709   }
710   PetscCall(PetscBTDestroy(&btbd));
711 
712   /* a candidate is valid if it is connected to another candidate via a non-primal edge dof */
713   if (order != 1) {
714     if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "INSPECTING CANDIDATES\n"));
715     PetscCall(MatGetRowIJ(lGe, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done));
716     for (i = 0; i < nv; i++) {
717       if (PetscBTLookup(btvcand, i)) {
718         PetscBool found = PETSC_FALSE;
719         for (j = ii[i]; j < ii[i + 1] && !found; j++) {
720           PetscInt k, e = jj[j];
721           if (PetscBTLookup(bte, e)) continue;
722           for (k = iit[e]; k < iit[e + 1]; k++) {
723             PetscInt v = jjt[k];
724             if (v != i && PetscBTLookup(btvcand, v)) {
725               found = PETSC_TRUE;
726               break;
727             }
728           }
729         }
730         if (!found) {
731           if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "  CANDIDATE %" PetscInt_FMT " CLEARED\n", i));
732           PetscCall(PetscBTClear(btvcand, i));
733         } else {
734           if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "  CANDIDATE %" PetscInt_FMT " ACCEPTED\n", i));
735         }
736       }
737     }
738     PetscCall(MatRestoreRowIJ(lGe, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done));
739   }
740   PetscCall(MatSeqAIJRestoreArray(lGt, &vals));
741   PetscCall(MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
742   PetscCall(MatDestroy(&lGe));
743 
744   /* Get the local G^T explicitly */
745   PetscCall(MatDestroy(&lGt));
746   PetscCall(MatTranspose(lG, MAT_INITIAL_MATRIX, &lGt));
747   PetscCall(MatSetOption(lGt, MAT_KEEP_NONZERO_PATTERN, PETSC_FALSE));
748 
749   /* Mark shared nodal dofs */
750   PetscCall(PetscBTCreate(nv, &btvi));
751   for (i = 0; i < nv; i++) {
752     if (vcount[i] > 1) PetscCall(PetscBTSet(btvi, i));
753   }
754 
755   if (matis->allow_repeated) {
756     PetscCall(PetscFree(eneighs[0]));
757     PetscCall(PetscFree(vneighs[0]));
758     PetscCall(PetscFree(eneighs));
759     PetscCall(PetscFree(vneighs));
760   }
761   PetscCall(ISLocalToGlobalMappingRestoreNodeInfo(el2g, NULL, &ecount, &eneighs));
762   PetscCall(ISLocalToGlobalMappingRestoreNodeInfo(vl2g, NULL, &vcount, &vneighs));
763 
764   /* communicate corners and splitpoints */
765   PetscCall(PetscMalloc1(nv, &vmarks));
766   PetscCall(PetscArrayzero(sfvleaves, nv));
767   PetscCall(PetscArrayzero(sfvroots, Lv));
768   for (i = 0; i < nv; i++)
769     if (PetscUnlikely(PetscBTLookup(btv, i))) sfvleaves[i] = 1;
770 
771   if (print) {
772     IS tbz;
773 
774     cum = 0;
775     for (i = 0; i < nv; i++)
776       if (sfvleaves[i]) vmarks[cum++] = i;
777 
778     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, cum, vmarks, PETSC_COPY_VALUES, &tbz));
779     PetscCall(PetscObjectSetName((PetscObject)tbz, "corners_to_be_zeroed_local"));
780     PetscCall(ISView(tbz, NULL));
781     PetscCall(ISDestroy(&tbz));
782   }
783 
784   PetscCall(PetscSFReduceBegin(sfv, MPIU_INT, sfvleaves, sfvroots, MPI_SUM));
785   PetscCall(PetscSFReduceEnd(sfv, MPIU_INT, sfvleaves, sfvroots, MPI_SUM));
786   PetscCall(PetscSFBcastBegin(sfv, MPIU_INT, sfvroots, sfvleaves, MPI_REPLACE));
787   PetscCall(PetscSFBcastEnd(sfv, MPIU_INT, sfvroots, sfvleaves, MPI_REPLACE));
788 
789   /* Zero rows of lGt corresponding to identified corners
790      and interior nodal dofs */
791   cum = 0;
792   for (i = 0; i < nv; i++) {
793     if (sfvleaves[i]) {
794       vmarks[cum++] = i;
795       PetscCall(PetscBTSet(btv, i));
796     } else if (!PetscBTLookup(btvi, i)) vmarks[cum++] = i;
797   }
798   PetscCall(PetscBTDestroy(&btvi));
799   if (print) {
800     IS tbz;
801 
802     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, cum, vmarks, PETSC_COPY_VALUES, &tbz));
803     PetscCall(PetscObjectSetName((PetscObject)tbz, "corners_to_be_zeroed_with_interior"));
804     PetscCall(ISView(tbz, NULL));
805     PetscCall(ISDestroy(&tbz));
806   }
807   PetscCall(MatZeroRows(lGt, cum, vmarks, 0., NULL, NULL));
808   PetscCall(PetscFree(vmarks));
809   PetscCall(PetscSFDestroy(&sfv));
810   PetscCall(PetscFree2(sfvleaves, sfvroots));
811 
812   /* Recompute G */
813   PetscCall(MatDestroy(&lG));
814   PetscCall(MatTranspose(lGt, MAT_INITIAL_MATRIX, &lG));
815   if (print) {
816     PetscCall(PetscObjectSetName((PetscObject)lG, "used_lG"));
817     PetscCall(MatView(lG, NULL));
818     PetscCall(PetscObjectSetName((PetscObject)lGt, "used_lGt"));
819     PetscCall(MatView(lGt, NULL));
820   }
821 
822   /* Get primal dofs (if any) */
823   cum = 0;
824   for (i = 0; i < ne; i++) {
825     if (PetscUnlikely(PetscBTLookup(bte, i))) marks[cum++] = i;
826   }
827   if (fl2g) PetscCall(ISLocalToGlobalMappingApply(fl2g, cum, marks, marks));
828   PetscCall(ISCreateGeneral(comm, cum, marks, PETSC_COPY_VALUES, &primals));
829   if (print) {
830     PetscCall(PetscObjectSetName((PetscObject)primals, "prescribed_primal_dofs"));
831     PetscCall(ISView(primals, NULL));
832   }
833   PetscCall(PetscBTDestroy(&bte));
834   /* TODO: what if the user passed in some of them ?  */
835   PetscCall(PCBDDCSetPrimalVerticesLocalIS(pc, primals));
836   PetscCall(ISDestroy(&primals));
837 
838   /* Compute edge connectivity */
839   PetscCall(PetscObjectSetOptionsPrefix((PetscObject)lG, "econn_"));
840 
841   /* Symbolic conn = lG*lGt */
842   if (!elements_corners) { /* if present, we assume we are in the element-by-element case and the CSR graph is not needed */
843     PetscCall(MatProductCreate(lG, lGt, NULL, &conn));
844     PetscCall(MatProductSetType(conn, MATPRODUCT_AB));
845     PetscCall(MatProductSetAlgorithm(conn, "default"));
846     PetscCall(MatProductSetFill(conn, PETSC_DEFAULT));
847     PetscCall(PetscObjectSetOptionsPrefix((PetscObject)conn, "econn_"));
848     PetscCall(MatProductSetFromOptions(conn));
849     PetscCall(MatProductSymbolic(conn));
850     PetscCall(MatGetRowIJ(conn, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
851     if (fl2g) {
852       PetscBT   btf;
853       PetscInt *iia, *jja, *iiu, *jju;
854       PetscBool rest = PETSC_FALSE, free = PETSC_FALSE;
855 
856       /* create CSR for all local dofs */
857       PetscCall(PetscMalloc1(n + 1, &iia));
858       if (pcbddc->mat_graph->nvtxs_csr) { /* the user has passed in a CSR graph */
859         PetscCheck(pcbddc->mat_graph->nvtxs_csr == n, PETSC_COMM_SELF, PETSC_ERR_USER, "Invalid size of CSR graph %" PetscInt_FMT ". Should be %" PetscInt_FMT, pcbddc->mat_graph->nvtxs_csr, n);
860         iiu = pcbddc->mat_graph->xadj;
861         jju = pcbddc->mat_graph->adjncy;
862       } else if (pcbddc->use_local_adj) {
863         rest = PETSC_TRUE;
864         PetscCall(MatGetRowIJ(matis->A, 0, PETSC_TRUE, PETSC_FALSE, &i, (const PetscInt **)&iiu, (const PetscInt **)&jju, &done));
865       } else {
866         free = PETSC_TRUE;
867         PetscCall(PetscMalloc2(n + 1, &iiu, n, &jju));
868         iiu[0] = 0;
869         for (i = 0; i < n; i++) {
870           iiu[i + 1] = i + 1;
871           jju[i]     = -1;
872         }
873       }
874 
875       /* import sizes of CSR */
876       iia[0] = 0;
877       for (i = 0; i < n; i++) iia[i + 1] = iiu[i + 1] - iiu[i];
878 
879       /* overwrite entries corresponding to the Nedelec field */
880       PetscCall(PetscBTCreate(n, &btf));
881       PetscCall(ISGetIndices(nedfieldlocal, &idxs));
882       for (i = 0; i < ne; i++) {
883         PetscCall(PetscBTSet(btf, idxs[i]));
884         iia[idxs[i] + 1] = ii[i + 1] - ii[i];
885       }
886 
887       /* iia in CSR */
888       for (i = 0; i < n; i++) iia[i + 1] += iia[i];
889 
890       /* jja in CSR */
891       PetscCall(PetscMalloc1(iia[n], &jja));
892       for (i = 0; i < n; i++)
893         if (!PetscBTLookup(btf, i))
894           for (j = 0; j < iiu[i + 1] - iiu[i]; j++) jja[iia[i] + j] = jju[iiu[i] + j];
895 
896       /* map edge dofs connectivity */
897       if (jj) {
898         PetscCall(ISLocalToGlobalMappingApply(fl2g, ii[ne], jj, (PetscInt *)jj));
899         for (i = 0; i < ne; i++) {
900           PetscInt e = idxs[i];
901           for (j = 0; j < ii[i + 1] - ii[i]; j++) jja[iia[e] + j] = jj[ii[i] + j];
902         }
903       }
904       PetscCall(ISRestoreIndices(nedfieldlocal, &idxs));
905       PetscCall(PCBDDCSetLocalAdjacencyGraph(pc, n, iia, jja, PETSC_COPY_VALUES));
906       if (rest) PetscCall(MatRestoreRowIJ(matis->A, 0, PETSC_TRUE, PETSC_FALSE, &i, (const PetscInt **)&iiu, (const PetscInt **)&jju, &done));
907       if (free) PetscCall(PetscFree2(iiu, jju));
908       PetscCall(PetscBTDestroy(&btf));
909     } else {
910       PetscCall(PCBDDCSetLocalAdjacencyGraph(pc, n, ii, jj, PETSC_COPY_VALUES));
911     }
912     PetscCall(MatRestoreRowIJ(conn, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
913     PetscCall(MatDestroy(&conn));
914   }
915 
916   /* Analyze interface for edge dofs */
917   PetscCall(PCBDDCAnalyzeInterface(pc));
918   pcbddc->mat_graph->twodim = PETSC_FALSE;
919 
920   /* Get coarse edges in the edge space */
921   PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, NULL, NULL, &nee, &alleedges, &allprimals));
922 
923   if (fl2g) {
924     PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_DROP, allprimals, &primals));
925     PetscCall(PetscMalloc1(nee, &eedges));
926     for (i = 0; i < nee; i++) PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_DROP, alleedges[i], &eedges[i]));
927   } else {
928     eedges  = alleedges;
929     primals = allprimals;
930   }
931 
932   /* Mark fine edge dofs with their coarse edge id */
933   PetscCall(PetscArrayzero(marks, ne));
934   PetscCall(ISGetLocalSize(primals, &cum));
935   PetscCall(ISGetIndices(primals, &idxs));
936   for (i = 0; i < cum; i++) marks[idxs[i]] = nee + 1;
937   PetscCall(ISRestoreIndices(primals, &idxs));
938   if (print) {
939     PetscCall(PetscObjectSetName((PetscObject)primals, "obtained_primal_dofs"));
940     PetscCall(ISView(primals, NULL));
941   }
942 
943   maxsize = 0;
944   for (i = 0; i < nee; i++) {
945     PetscInt size, mark = i + 1;
946 
947     PetscCall(ISGetLocalSize(eedges[i], &size));
948     PetscCall(ISGetIndices(eedges[i], &idxs));
949     for (j = 0; j < size; j++) marks[idxs[j]] = mark;
950     PetscCall(ISRestoreIndices(eedges[i], &idxs));
951     maxsize = PetscMax(maxsize, size);
952   }
953 
954   /* Find coarse edge endpoints */
955   PetscCall(MatGetRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
956   PetscCall(MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done));
957   for (i = 0; i < nee; i++) {
958     PetscInt mark = i + 1, size;
959 
960     PetscCall(ISGetLocalSize(eedges[i], &size));
961     if (!size && nedfieldlocal) continue;
962     PetscCheck(size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected zero sized edge %" PetscInt_FMT, i);
963     PetscCall(ISGetIndices(eedges[i], &idxs));
964     if (print) {
965       PetscCall(PetscPrintf(PETSC_COMM_SELF, "ENDPOINTS ANALYSIS EDGE %" PetscInt_FMT "\n", i));
966       PetscCall(ISView(eedges[i], NULL));
967     }
968     for (j = 0; j < size; j++) {
969       PetscInt k, ee = idxs[j];
970       if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "  idx %" PetscInt_FMT "\n", ee));
971       for (k = ii[ee]; k < ii[ee + 1]; k++) {
972         if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "    inspect %" PetscInt_FMT "\n", jj[k]));
973         if (PetscBTLookup(btv, jj[k])) {
974           if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "      corner found (already set) %" PetscInt_FMT "\n", jj[k]));
975         } else if (PetscBTLookup(btvcand, jj[k])) { /* is it ok? */
976           PetscInt  k2;
977           PetscBool corner = PETSC_FALSE;
978           for (k2 = iit[jj[k]]; k2 < iit[jj[k] + 1]; k2++) {
979             if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "        INSPECTING %" PetscInt_FMT ": mark %" PetscInt_FMT " (ref mark %" PetscInt_FMT "), boundary %d\n", jjt[k2], marks[jjt[k2]], mark, (int)!!PetscBTLookup(btb, jjt[k2])));
980             /* it's a corner if either is connected with an edge dof belonging to a different cc or
981                if the edge dof lie on the natural part of the boundary */
982             if ((marks[jjt[k2]] && marks[jjt[k2]] != mark) || (!marks[jjt[k2]] && PetscBTLookup(btb, jjt[k2]))) {
983               corner = PETSC_TRUE;
984               break;
985             }
986           }
987           if (corner) { /* found the nodal dof corresponding to the endpoint of the edge */
988             if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "        corner found %" PetscInt_FMT "\n", jj[k]));
989             PetscCall(PetscBTSet(btv, jj[k]));
990           } else {
991             if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "        no corners found\n"));
992           }
993         }
994       }
995     }
996     PetscCall(ISRestoreIndices(eedges[i], &idxs));
997   }
998   PetscCall(MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done));
999   PetscCall(MatRestoreRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
1000   PetscCall(PetscBTDestroy(&btb));
1001 
1002   /* Reset marked primal dofs */
1003   PetscCall(ISGetLocalSize(primals, &cum));
1004   PetscCall(ISGetIndices(primals, &idxs));
1005   for (i = 0; i < cum; i++) marks[idxs[i]] = 0;
1006   PetscCall(ISRestoreIndices(primals, &idxs));
1007 
1008   /* Now use the initial lG */
1009   PetscCall(MatDestroy(&lG));
1010   PetscCall(MatDestroy(&lGt));
1011   lG = lGinit;
1012   PetscCall(MatTranspose(lG, MAT_INITIAL_MATRIX, &lGt));
1013 
1014   /* Compute extended cols indices */
1015   PetscCall(PetscBTCreate(nv, &btvc));
1016   PetscCall(PetscBTCreate(nee, &bter));
1017   PetscCall(MatGetRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
1018   PetscCall(MatSeqAIJGetMaxRowNonzeros(lG, &i));
1019   i *= maxsize;
1020   PetscCall(PetscCalloc1(nee, &extcols));
1021   PetscCall(PetscMalloc2(i, &extrow, i, &gidxs));
1022   eerr = PETSC_FALSE;
1023   for (i = 0; i < nee; i++) {
1024     PetscInt size, found = 0;
1025 
1026     cum = 0;
1027     PetscCall(ISGetLocalSize(eedges[i], &size));
1028     if (!size && nedfieldlocal) continue;
1029     PetscCheck(size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected zero sized edge %" PetscInt_FMT, i);
1030     PetscCall(ISGetIndices(eedges[i], &idxs));
1031     PetscCall(PetscBTMemzero(nv, btvc));
1032     for (j = 0; j < size; j++) {
1033       PetscInt k, ee = idxs[j];
1034       for (k = ii[ee]; k < ii[ee + 1]; k++) {
1035         PetscInt vv = jj[k];
1036         if (!PetscBTLookup(btv, vv)) extrow[cum++] = vv;
1037         else if (!PetscBTLookupSet(btvc, vv)) found++;
1038       }
1039     }
1040     PetscCall(ISRestoreIndices(eedges[i], &idxs));
1041     PetscCall(PetscSortRemoveDupsInt(&cum, extrow));
1042     PetscCall(ISLocalToGlobalMappingApply(vl2g, cum, extrow, gidxs));
1043     PetscCall(PetscSortIntWithArray(cum, gidxs, extrow));
1044     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, cum, extrow, PETSC_COPY_VALUES, &extcols[i]));
1045     /* it may happen that endpoints are not defined at this point
1046        if it is the case, mark this edge for a second pass */
1047     if (cum != size - 1 || found != 2) {
1048       PetscCall(PetscBTSet(bter, i));
1049       if (print) {
1050         PetscCall(PetscObjectSetName((PetscObject)eedges[i], "error_edge"));
1051         PetscCall(ISView(eedges[i], NULL));
1052         PetscCall(PetscObjectSetName((PetscObject)extcols[i], "error_extcol"));
1053         PetscCall(ISView(extcols[i], NULL));
1054       }
1055       eerr = PETSC_TRUE;
1056     }
1057   }
1058   /* PetscCheck(!eerr,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL FIRST PASS"); */
1059   PetscCallMPI(MPIU_Allreduce(&eerr, &done, 1, MPIU_BOOL, MPI_LOR, comm));
1060   if (done) {
1061     PetscInt *newprimals;
1062 
1063     PetscCall(PetscMalloc1(ne, &newprimals));
1064     PetscCall(ISGetLocalSize(primals, &cum));
1065     PetscCall(ISGetIndices(primals, &idxs));
1066     PetscCall(PetscArraycpy(newprimals, idxs, cum));
1067     PetscCall(ISRestoreIndices(primals, &idxs));
1068     PetscCall(MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done));
1069     if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "DOING SECOND PASS (eerr %s)\n", PetscBools[eerr]));
1070     for (i = 0; i < nee; i++) {
1071       PetscBool has_candidates = PETSC_FALSE;
1072       if (PetscBTLookup(bter, i)) {
1073         PetscInt size, mark = i + 1;
1074 
1075         PetscCall(ISGetLocalSize(eedges[i], &size));
1076         PetscCall(ISGetIndices(eedges[i], &idxs));
1077         /* for (j=0;j<size;j++) newprimals[cum++] = idxs[j]; */
1078         for (j = 0; j < size; j++) {
1079           PetscInt k, ee = idxs[j];
1080           if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "Inspecting edge dof %" PetscInt_FMT " [%" PetscInt_FMT " %" PetscInt_FMT ")\n", ee, ii[ee], ii[ee + 1]));
1081           for (k = ii[ee]; k < ii[ee + 1]; k++) {
1082             /* set all candidates located on the edge as corners */
1083             if (PetscBTLookup(btvcand, jj[k])) {
1084               PetscInt k2, vv = jj[k];
1085               has_candidates = PETSC_TRUE;
1086               if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "  Candidate set to vertex %" PetscInt_FMT "\n", vv));
1087               PetscCall(PetscBTSet(btv, vv));
1088               /* set all edge dofs connected to candidate as primals */
1089               for (k2 = iit[vv]; k2 < iit[vv + 1]; k2++) {
1090                 if (marks[jjt[k2]] == mark) {
1091                   PetscInt k3, ee2 = jjt[k2];
1092                   if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "    Connected edge dof set to primal %" PetscInt_FMT "\n", ee2));
1093                   newprimals[cum++] = ee2;
1094                   /* finally set the new corners */
1095                   for (k3 = ii[ee2]; k3 < ii[ee2 + 1]; k3++) {
1096                     if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "      Connected nodal dof set to vertex %" PetscInt_FMT "\n", jj[k3]));
1097                     PetscCall(PetscBTSet(btv, jj[k3]));
1098                   }
1099                 }
1100               }
1101             } else {
1102               if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "  Not a candidate vertex %" PetscInt_FMT "\n", jj[k]));
1103             }
1104           }
1105         }
1106         if (!has_candidates) { /* circular edge */
1107           PetscInt k, ee = idxs[0], *tmarks;
1108 
1109           PetscCall(PetscCalloc1(ne, &tmarks));
1110           if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "  Circular edge %" PetscInt_FMT "\n", i));
1111           for (k = ii[ee]; k < ii[ee + 1]; k++) {
1112             PetscInt k2;
1113             if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "    Set to corner %" PetscInt_FMT "\n", jj[k]));
1114             PetscCall(PetscBTSet(btv, jj[k]));
1115             for (k2 = iit[jj[k]]; k2 < iit[jj[k] + 1]; k2++) tmarks[jjt[k2]]++;
1116           }
1117           for (j = 0; j < size; j++) {
1118             if (tmarks[idxs[j]] > 1) {
1119               if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "  Edge dof set to primal %" PetscInt_FMT "\n", idxs[j]));
1120               newprimals[cum++] = idxs[j];
1121             }
1122           }
1123           PetscCall(PetscFree(tmarks));
1124         }
1125         PetscCall(ISRestoreIndices(eedges[i], &idxs));
1126       }
1127       PetscCall(ISDestroy(&extcols[i]));
1128     }
1129     PetscCall(PetscFree(extcols));
1130     PetscCall(MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done));
1131     PetscCall(PetscSortRemoveDupsInt(&cum, newprimals));
1132     if (fl2g) {
1133       PetscCall(ISLocalToGlobalMappingApply(fl2g, cum, newprimals, newprimals));
1134       PetscCall(ISDestroy(&primals));
1135       for (i = 0; i < nee; i++) PetscCall(ISDestroy(&eedges[i]));
1136       PetscCall(PetscFree(eedges));
1137     }
1138     PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, NULL, NULL, &nee, &alleedges, &allprimals));
1139     PetscCall(ISCreateGeneral(comm, cum, newprimals, PETSC_COPY_VALUES, &primals));
1140     PetscCall(PetscFree(newprimals));
1141     PetscCall(PCBDDCSetPrimalVerticesLocalIS(pc, primals));
1142     PetscCall(ISDestroy(&primals));
1143     PetscCall(PCBDDCAnalyzeInterface(pc));
1144     pcbddc->mat_graph->twodim = PETSC_FALSE;
1145     PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, NULL, NULL, &nee, &alleedges, &allprimals));
1146     if (fl2g) {
1147       PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_DROP, allprimals, &primals));
1148       PetscCall(PetscMalloc1(nee, &eedges));
1149       for (i = 0; i < nee; i++) PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_DROP, alleedges[i], &eedges[i]));
1150     } else {
1151       eedges  = alleedges;
1152       primals = allprimals;
1153     }
1154     PetscCall(PetscCalloc1(nee, &extcols));
1155 
1156     /* Mark again */
1157     PetscCall(PetscArrayzero(marks, ne));
1158     for (i = 0; i < nee; i++) {
1159       PetscInt size, mark = i + 1;
1160 
1161       PetscCall(ISGetLocalSize(eedges[i], &size));
1162       PetscCall(ISGetIndices(eedges[i], &idxs));
1163       for (j = 0; j < size; j++) marks[idxs[j]] = mark;
1164       PetscCall(ISRestoreIndices(eedges[i], &idxs));
1165     }
1166     if (print) {
1167       PetscCall(PetscObjectSetName((PetscObject)primals, "obtained_primal_dofs_secondpass"));
1168       PetscCall(ISView(primals, NULL));
1169     }
1170 
1171     /* Recompute extended cols */
1172     eerr = PETSC_FALSE;
1173     for (i = 0; i < nee; i++) {
1174       PetscInt size;
1175 
1176       cum = 0;
1177       PetscCall(ISGetLocalSize(eedges[i], &size));
1178       if (!size && nedfieldlocal) continue;
1179       PetscCheck(size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected zero sized edge %" PetscInt_FMT, i);
1180       PetscCall(ISGetIndices(eedges[i], &idxs));
1181       for (j = 0; j < size; j++) {
1182         PetscInt k, ee = idxs[j];
1183         for (k = ii[ee]; k < ii[ee + 1]; k++)
1184           if (!PetscBTLookup(btv, jj[k])) extrow[cum++] = jj[k];
1185       }
1186       PetscCall(ISRestoreIndices(eedges[i], &idxs));
1187       PetscCall(PetscSortRemoveDupsInt(&cum, extrow));
1188       PetscCall(ISLocalToGlobalMappingApply(vl2g, cum, extrow, gidxs));
1189       PetscCall(PetscSortIntWithArray(cum, gidxs, extrow));
1190       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, cum, extrow, PETSC_COPY_VALUES, &extcols[i]));
1191       if (cum != size - 1) {
1192         if (print) {
1193           PetscCall(PetscObjectSetName((PetscObject)eedges[i], "error_edge_secondpass"));
1194           PetscCall(ISView(eedges[i], NULL));
1195           PetscCall(PetscObjectSetName((PetscObject)extcols[i], "error_extcol_secondpass"));
1196           PetscCall(ISView(extcols[i], NULL));
1197         }
1198         eerr = PETSC_TRUE;
1199       }
1200     }
1201   }
1202   PetscCall(MatRestoreRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
1203   PetscCall(PetscFree2(extrow, gidxs));
1204   PetscCall(PetscBTDestroy(&bter));
1205   if (print) PetscCall(PCBDDCGraphASCIIView(pcbddc->mat_graph, 5, PETSC_VIEWER_STDOUT_SELF));
1206   /* an error should not occur at this point */
1207   PetscCheck(!eerr, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected SIZE OF EDGE > EXTCOL SECOND PASS");
1208 
1209   /* Check the number of endpoints */
1210   PetscCall(MatGetRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
1211   PetscCall(PetscMalloc1(2 * nee, &corners));
1212   PetscCall(PetscMalloc1(nee, &cedges));
1213   for (i = 0; i < nee; i++) {
1214     PetscInt size, found = 0, gc[2];
1215 
1216     /* init with defaults */
1217     cedges[i] = corners[i * 2] = corners[i * 2 + 1] = -1;
1218     PetscCall(ISGetLocalSize(eedges[i], &size));
1219     if (!size && nedfieldlocal) continue;
1220     PetscCheck(size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected zero sized edge %" PetscInt_FMT, i);
1221     PetscCall(ISGetIndices(eedges[i], &idxs));
1222     PetscCall(PetscBTMemzero(nv, btvc));
1223     for (j = 0; j < size; j++) {
1224       PetscInt k, ee = idxs[j];
1225       for (k = ii[ee]; k < ii[ee + 1]; k++) {
1226         PetscInt vv = jj[k];
1227         if (PetscBTLookup(btv, vv) && !PetscBTLookupSet(btvc, vv)) {
1228           PetscCheck(found != 2, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Found more than two corners for edge %" PetscInt_FMT, i);
1229           corners[i * 2 + found++] = vv;
1230         }
1231       }
1232     }
1233     if (found != 2) {
1234       PetscInt e;
1235       if (fl2g) {
1236         PetscCall(ISLocalToGlobalMappingApply(fl2g, 1, idxs, &e));
1237       } else {
1238         e = idxs[0];
1239       }
1240       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_PLIB, "Found %" PetscInt_FMT " corners for edge %" PetscInt_FMT " (astart %" PetscInt_FMT ", estart %" PetscInt_FMT ")", found, i, e, idxs[0]);
1241     }
1242 
1243     /* get primal dof index on this coarse edge */
1244     PetscCall(ISLocalToGlobalMappingApply(vl2g, 2, corners + 2 * i, gc));
1245     if (gc[0] > gc[1]) {
1246       PetscInt swap      = corners[2 * i];
1247       corners[2 * i]     = corners[2 * i + 1];
1248       corners[2 * i + 1] = swap;
1249     }
1250     cedges[i] = idxs[size - 1];
1251     PetscCall(ISRestoreIndices(eedges[i], &idxs));
1252     if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "EDGE %" PetscInt_FMT ": ce %" PetscInt_FMT ", corners (%" PetscInt_FMT ",%" PetscInt_FMT ")\n", i, cedges[i], corners[2 * i], corners[2 * i + 1]));
1253   }
1254   PetscCall(MatRestoreRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
1255   PetscCall(PetscBTDestroy(&btvc));
1256 
1257   if (PetscDefined(USE_DEBUG)) {
1258     /* Inspects columns of lG (rows of lGt) and make sure the change of basis will
1259      not interfere with neighbouring coarse edges */
1260     PetscCall(PetscMalloc1(nee + 1, &emarks));
1261     PetscCall(MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
1262     for (i = 0; i < nv; i++) {
1263       PetscInt emax = 0, eemax = 0;
1264 
1265       if (ii[i + 1] == ii[i] || PetscBTLookup(btv, i)) continue;
1266       PetscCall(PetscArrayzero(emarks, nee + 1));
1267       for (j = ii[i]; j < ii[i + 1]; j++) emarks[marks[jj[j]]]++;
1268       for (j = 1; j < nee + 1; j++) {
1269         if (emax < emarks[j]) {
1270           emax  = emarks[j];
1271           eemax = j;
1272         }
1273       }
1274       /* not relevant for edges */
1275       if (!eemax) continue;
1276 
1277       for (j = ii[i]; j < ii[i + 1]; j++) {
1278         PetscCheck(!marks[jj[j]] || marks[jj[j]] == eemax, PETSC_COMM_SELF, PETSC_ERR_SUP, "Found 2 coarse edges (id %" PetscInt_FMT " and %" PetscInt_FMT ") connected through the %" PetscInt_FMT " nodal dof at edge dof %" PetscInt_FMT, marks[jj[j]] - 1, eemax, i, jj[j]);
1279       }
1280     }
1281     PetscCall(PetscFree(emarks));
1282     PetscCall(MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
1283   }
1284 
1285   /* Compute extended rows indices for edge blocks of the change of basis */
1286   PetscCall(MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
1287   PetscCall(MatSeqAIJGetMaxRowNonzeros(lGt, &extmem));
1288   extmem *= maxsize;
1289   PetscCall(PetscMalloc1(extmem * nee, &extrow));
1290   PetscCall(PetscMalloc1(nee, &extrows));
1291   PetscCall(PetscCalloc1(nee, &extrowcum));
1292   for (i = 0; i < nv; i++) {
1293     PetscInt mark = 0, size, start;
1294 
1295     if (ii[i + 1] == ii[i] || PetscBTLookup(btv, i)) continue;
1296     for (j = ii[i]; j < ii[i + 1]; j++)
1297       if (marks[jj[j]] && !mark) mark = marks[jj[j]];
1298 
1299     /* not relevant */
1300     if (!mark) continue;
1301 
1302     /* import extended row */
1303     mark--;
1304     start = mark * extmem + extrowcum[mark];
1305     size  = ii[i + 1] - ii[i];
1306     PetscCheck(extrowcum[mark] + size <= extmem, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Not enough memory allocated %" PetscInt_FMT " > %" PetscInt_FMT, extrowcum[mark] + size, extmem);
1307     PetscCall(PetscArraycpy(extrow + start, jj + ii[i], size));
1308     extrowcum[mark] += size;
1309   }
1310   PetscCall(MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
1311   PetscCall(MatDestroy(&lGt));
1312   PetscCall(PetscFree(marks));
1313 
1314   /* Compress extrows */
1315   cum = 0;
1316   for (i = 0; i < nee; i++) {
1317     PetscInt size = extrowcum[i], *start = extrow + i * extmem;
1318     PetscCall(PetscSortRemoveDupsInt(&size, start));
1319     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, size, start, PETSC_USE_POINTER, &extrows[i]));
1320     cum = PetscMax(cum, size);
1321   }
1322   PetscCall(PetscFree(extrowcum));
1323   PetscCall(PetscBTDestroy(&btv));
1324   PetscCall(PetscBTDestroy(&btvcand));
1325 
1326   /* Workspace for lapack inner calls and VecSetValues */
1327   PetscCall(PetscMalloc2((5 + cum + maxsize) * maxsize, &work, maxsize, &rwork));
1328 
1329   /* Create change of basis matrix (preallocation can be improved) */
1330   PetscCall(MatCreate(comm, &T));
1331   PetscCall(MatSetLayouts(T, pc->mat->rmap, pc->mat->cmap));
1332   PetscCall(MatSetType(T, MATAIJ));
1333   PetscCall(MatSeqAIJSetPreallocation(T, maxsize, NULL));
1334   PetscCall(MatMPIAIJSetPreallocation(T, maxsize, NULL, maxsize, NULL));
1335   PetscCall(MatSetLocalToGlobalMapping(T, al2g, al2g));
1336   PetscCall(MatSetOption(T, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE));
1337   PetscCall(MatSetOption(T, MAT_ROW_ORIENTED, PETSC_FALSE));
1338   PetscCall(ISLocalToGlobalMappingDestroy(&al2g));
1339 
1340   /* Defaults to identity */
1341   for (i = pc->mat->rmap->rstart; i < pc->mat->rmap->rend; i++) PetscCall(MatSetValue(T, i, i, 1.0, INSERT_VALUES));
1342 
1343   /* Create discrete gradient for the coarser level if needed */
1344   PetscCall(MatDestroy(&pcbddc->nedcG));
1345   PetscCall(ISDestroy(&pcbddc->nedclocal));
1346   if (pcbddc->current_level < pcbddc->max_levels) {
1347     ISLocalToGlobalMapping cel2g, cvl2g;
1348     IS                     wis, gwis;
1349     PetscInt               cnv, cne;
1350 
1351     PetscCall(ISCreateGeneral(comm, nee, cedges, PETSC_COPY_VALUES, &wis));
1352     if (fl2g) {
1353       PetscCall(ISLocalToGlobalMappingApplyIS(fl2g, wis, &pcbddc->nedclocal));
1354     } else {
1355       PetscCall(PetscObjectReference((PetscObject)wis));
1356       pcbddc->nedclocal = wis;
1357     }
1358     PetscCall(ISLocalToGlobalMappingApplyIS(el2g, wis, &gwis));
1359     PetscCall(ISDestroy(&wis));
1360     PetscCall(ISRenumber(gwis, NULL, &cne, &wis));
1361     PetscCall(ISLocalToGlobalMappingCreateIS(wis, &cel2g));
1362     PetscCall(ISDestroy(&wis));
1363     PetscCall(ISDestroy(&gwis));
1364 
1365     PetscCall(ISCreateGeneral(comm, 2 * nee, corners, PETSC_USE_POINTER, &wis));
1366     PetscCall(ISLocalToGlobalMappingApplyIS(vl2g, wis, &gwis));
1367     PetscCall(ISDestroy(&wis));
1368     PetscCall(ISRenumber(gwis, NULL, &cnv, &wis));
1369     PetscCall(ISLocalToGlobalMappingCreateIS(wis, &cvl2g));
1370     PetscCall(ISDestroy(&wis));
1371     PetscCall(ISDestroy(&gwis));
1372 
1373     PetscCall(MatCreate(comm, &pcbddc->nedcG));
1374     PetscCall(MatSetSizes(pcbddc->nedcG, PETSC_DECIDE, PETSC_DECIDE, cne, cnv));
1375     PetscCall(MatSetType(pcbddc->nedcG, MATAIJ));
1376     PetscCall(MatSeqAIJSetPreallocation(pcbddc->nedcG, 2, NULL));
1377     PetscCall(MatMPIAIJSetPreallocation(pcbddc->nedcG, 2, NULL, 2, NULL));
1378     PetscCall(MatSetLocalToGlobalMapping(pcbddc->nedcG, cel2g, cvl2g));
1379     PetscCall(ISLocalToGlobalMappingDestroy(&cel2g));
1380     PetscCall(ISLocalToGlobalMappingDestroy(&cvl2g));
1381   }
1382   PetscCall(ISLocalToGlobalMappingDestroy(&vl2g));
1383 
1384 #if defined(PRINT_GDET)
1385   inc = 0;
1386   lev = pcbddc->current_level;
1387 #endif
1388 
1389   /* Insert values in the change of basis matrix */
1390   for (i = 0; i < nee; i++) {
1391     Mat         Gins = NULL, GKins = NULL;
1392     IS          cornersis = NULL;
1393     PetscScalar cvals[2];
1394 
1395     if (pcbddc->nedcG) PetscCall(ISCreateGeneral(PETSC_COMM_SELF, 2, corners + 2 * i, PETSC_USE_POINTER, &cornersis));
1396     PetscCall(PCBDDCComputeNedelecChangeEdge(lG, eedges[i], extrows[i], extcols[i], cornersis, &Gins, &GKins, cvals, work, rwork));
1397     if (Gins && GKins) {
1398       const PetscScalar *data;
1399       const PetscInt    *rows, *cols;
1400       PetscInt           nrh, nch, nrc, ncc;
1401 
1402       PetscCall(ISGetIndices(eedges[i], &cols));
1403       /* H1 */
1404       PetscCall(ISGetIndices(extrows[i], &rows));
1405       PetscCall(MatGetSize(Gins, &nrh, &nch));
1406       PetscCall(MatDenseGetArrayRead(Gins, &data));
1407       PetscCall(MatSetValuesLocal(T, nrh, rows, nch, cols, data, INSERT_VALUES));
1408       PetscCall(MatDenseRestoreArrayRead(Gins, &data));
1409       PetscCall(ISRestoreIndices(extrows[i], &rows));
1410       /* complement */
1411       PetscCall(MatGetSize(GKins, &nrc, &ncc));
1412       PetscCheck(ncc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Constant function has not been generated for coarse edge %" PetscInt_FMT, i);
1413       PetscCheck(ncc + nch == nrc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "The sum of the number of columns of GKins %" PetscInt_FMT " and Gins %" PetscInt_FMT " does not match %" PetscInt_FMT " for coarse edge %" PetscInt_FMT, ncc, nch, nrc, i);
1414       PetscCheck(ncc == 1 || !pcbddc->nedcG, PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot generate the coarse discrete gradient for coarse edge %" PetscInt_FMT " with ncc %" PetscInt_FMT, i, ncc);
1415       PetscCall(MatDenseGetArrayRead(GKins, &data));
1416       PetscCall(MatSetValuesLocal(T, nrc, cols, ncc, cols + nch, data, INSERT_VALUES));
1417       PetscCall(MatDenseRestoreArrayRead(GKins, &data));
1418 
1419       /* coarse discrete gradient */
1420       if (pcbddc->nedcG) {
1421         PetscInt cols[2];
1422 
1423         cols[0] = 2 * i;
1424         cols[1] = 2 * i + 1;
1425         PetscCall(MatSetValuesLocal(pcbddc->nedcG, 1, &i, 2, cols, cvals, INSERT_VALUES));
1426       }
1427       PetscCall(ISRestoreIndices(eedges[i], &cols));
1428     }
1429     PetscCall(ISDestroy(&extrows[i]));
1430     PetscCall(ISDestroy(&extcols[i]));
1431     PetscCall(ISDestroy(&cornersis));
1432     PetscCall(MatDestroy(&Gins));
1433     PetscCall(MatDestroy(&GKins));
1434   }
1435 
1436   /* for FDM element-by-element: first dof on the edge only constraint. Why? */
1437   if (elements_corners && pcbddc->mat_graph->multi_element) {
1438     ISLocalToGlobalMapping map;
1439     MatNullSpace           nnsp;
1440     Vec                    quad_vec;
1441 
1442     PetscCall(MatCreateVecs(pc->pmat, &quad_vec, NULL));
1443     PetscCall(PCBDDCNullSpaceCreate(PetscObjectComm((PetscObject)pc), PETSC_FALSE, 1, &quad_vec, &nnsp));
1444     PetscCall(VecLockReadPop(quad_vec));
1445     PetscCall(MatISGetLocalToGlobalMapping(pc->pmat, &map, NULL));
1446     PetscCall(VecSetLocalToGlobalMapping(quad_vec, map));
1447     for (i = 0; i < nee; i++) {
1448       const PetscInt *idxs;
1449       PetscScalar     one = 1.0;
1450 
1451       PetscCall(ISGetLocalSize(alleedges[i], &cum));
1452       if (!cum) continue;
1453       PetscCall(ISGetIndices(alleedges[i], &idxs));
1454       PetscCall(VecSetValuesLocal(quad_vec, 1, idxs, &one, INSERT_VALUES));
1455       PetscCall(ISRestoreIndices(alleedges[i], &idxs));
1456     }
1457     PetscCall(VecLockReadPush(quad_vec));
1458     PetscCall(VecDestroy(&quad_vec));
1459     PetscCall(MatSetNearNullSpace(pc->pmat, nnsp));
1460     PetscCall(MatNullSpaceDestroy(&nnsp));
1461   }
1462   PetscCall(ISLocalToGlobalMappingDestroy(&el2g));
1463 
1464   /* Start assembling */
1465   PetscCall(MatAssemblyBegin(T, MAT_FINAL_ASSEMBLY));
1466   if (pcbddc->nedcG) PetscCall(MatAssemblyBegin(pcbddc->nedcG, MAT_FINAL_ASSEMBLY));
1467 
1468   /* Free */
1469   if (fl2g) {
1470     PetscCall(ISDestroy(&primals));
1471     for (i = 0; i < nee; i++) PetscCall(ISDestroy(&eedges[i]));
1472     PetscCall(PetscFree(eedges));
1473   }
1474 
1475   /* hack mat_graph with primal dofs on the coarse edges */
1476   {
1477     PCBDDCGraph graph  = pcbddc->mat_graph;
1478     PetscInt   *oqueue = graph->queue;
1479     PetscInt   *ocptr  = graph->cptr;
1480     PetscInt    ncc, *idxs;
1481 
1482     /* find first primal edge */
1483     if (pcbddc->nedclocal) {
1484       PetscCall(ISGetIndices(pcbddc->nedclocal, (const PetscInt **)&idxs));
1485     } else {
1486       if (fl2g) PetscCall(ISLocalToGlobalMappingApply(fl2g, nee, cedges, cedges));
1487       idxs = cedges;
1488     }
1489     cum = 0;
1490     while (cum < nee && cedges[cum] < 0) cum++;
1491 
1492     /* adapt connected components */
1493     PetscCall(PetscMalloc2(graph->nvtxs + 1, &graph->cptr, ocptr[graph->ncc], &graph->queue));
1494     graph->cptr[0] = 0;
1495     for (i = 0, ncc = 0; i < graph->ncc; i++) {
1496       PetscInt lc = ocptr[i + 1] - ocptr[i];
1497       if (cum != nee && oqueue[ocptr[i + 1] - 1] == cedges[cum]) { /* this cc has a primal dof */
1498         graph->cptr[ncc + 1]           = graph->cptr[ncc] + 1;
1499         graph->queue[graph->cptr[ncc]] = cedges[cum];
1500         ncc++;
1501         lc--;
1502         cum++;
1503         while (cum < nee && cedges[cum] < 0) cum++;
1504       }
1505       graph->cptr[ncc + 1] = graph->cptr[ncc] + lc;
1506       for (j = 0; j < lc; j++) graph->queue[graph->cptr[ncc] + j] = oqueue[ocptr[i] + j];
1507       ncc++;
1508     }
1509     graph->ncc = ncc;
1510     if (pcbddc->nedclocal) PetscCall(ISRestoreIndices(pcbddc->nedclocal, (const PetscInt **)&idxs));
1511     PetscCall(PetscFree2(ocptr, oqueue));
1512   }
1513   PetscCall(ISLocalToGlobalMappingDestroy(&fl2g));
1514   PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, NULL, NULL, &nee, &alleedges, &allprimals));
1515   PetscCall(PCBDDCGraphResetCSR(pcbddc->mat_graph));
1516 
1517   PetscCall(ISDestroy(&nedfieldlocal));
1518   PetscCall(PetscFree(extrow));
1519   PetscCall(PetscFree2(work, rwork));
1520   PetscCall(PetscFree(corners));
1521   PetscCall(PetscFree(cedges));
1522   PetscCall(PetscFree(extrows));
1523   PetscCall(PetscFree(extcols));
1524   PetscCall(MatDestroy(&lG));
1525 
1526   /* Complete assembling */
1527   PetscCall(MatAssemblyEnd(T, MAT_FINAL_ASSEMBLY));
1528   PetscCall(MatViewFromOptions(T, (PetscObject)pc, "-pc_bddc_nedelec_change_view"));
1529   if (pcbddc->nedcG) {
1530     PetscCall(MatAssemblyEnd(pcbddc->nedcG, MAT_FINAL_ASSEMBLY));
1531     PetscCall(MatViewFromOptions(pcbddc->nedcG, (PetscObject)pc, "-pc_bddc_nedelec_coarse_change_view"));
1532   }
1533 
1534   PetscCall(ISDestroy(&elements_corners));
1535 
1536   /* set change of basis */
1537   PetscCall(PCBDDCSetChangeOfBasisMat(pc, T, PETSC_FALSE));
1538   PetscCall(MatDestroy(&T));
1539   PetscFunctionReturn(PETSC_SUCCESS);
1540 }
1541 
1542 /* the near-null space of BDDC carries information on quadrature weights,
1543    and these can be collinear -> so cheat with MatNullSpaceCreate
1544    and create a suitable set of basis vectors first */
1545 PetscErrorCode PCBDDCNullSpaceCreate(MPI_Comm comm, PetscBool has_const, PetscInt nvecs, Vec quad_vecs[], MatNullSpace *nnsp)
1546 {
1547   PetscInt i;
1548 
1549   PetscFunctionBegin;
1550   for (i = 0; i < nvecs; i++) {
1551     PetscInt first, last;
1552 
1553     PetscCall(VecGetOwnershipRange(quad_vecs[i], &first, &last));
1554     PetscCheck(last - first >= 2 * nvecs || !has_const, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not implemented");
1555     if (i >= first && i < last) {
1556       PetscScalar *data;
1557       PetscCall(VecGetArray(quad_vecs[i], &data));
1558       if (!has_const) {
1559         data[i - first] = 1.;
1560       } else {
1561         data[2 * i - first]     = 1. / PetscSqrtReal(2.);
1562         data[2 * i - first + 1] = -1. / PetscSqrtReal(2.);
1563       }
1564       PetscCall(VecRestoreArray(quad_vecs[i], &data));
1565     }
1566     PetscCall(PetscObjectStateIncrease((PetscObject)quad_vecs[i]));
1567   }
1568   PetscCall(MatNullSpaceCreate(comm, has_const, nvecs, quad_vecs, nnsp));
1569   for (i = 0; i < nvecs; i++) { /* reset vectors */
1570     PetscInt first, last;
1571     PetscCall(VecLockReadPop(quad_vecs[i]));
1572     PetscCall(VecGetOwnershipRange(quad_vecs[i], &first, &last));
1573     if (i >= first && i < last) {
1574       PetscScalar *data;
1575       PetscCall(VecGetArray(quad_vecs[i], &data));
1576       if (!has_const) {
1577         data[i - first] = 0.;
1578       } else {
1579         data[2 * i - first]     = 0.;
1580         data[2 * i - first + 1] = 0.;
1581       }
1582       PetscCall(VecRestoreArray(quad_vecs[i], &data));
1583     }
1584     PetscCall(PetscObjectStateIncrease((PetscObject)quad_vecs[i]));
1585     PetscCall(VecLockReadPush(quad_vecs[i]));
1586   }
1587   PetscFunctionReturn(PETSC_SUCCESS);
1588 }
1589 
1590 PetscErrorCode PCBDDCComputeNoNetFlux(Mat A, Mat divudotp, PetscBool transpose, IS vl2l, PCBDDCGraph graph, MatNullSpace *nnsp)
1591 {
1592   Mat                    loc_divudotp;
1593   Vec                    p, v, quad_vec;
1594   ISLocalToGlobalMapping map;
1595   PetscScalar           *array;
1596 
1597   PetscFunctionBegin;
1598   PetscCall(MatCreateVecs(A, &quad_vec, NULL));
1599   if (!transpose) {
1600     PetscCall(MatISGetLocalToGlobalMapping(A, &map, NULL));
1601   } else {
1602     PetscCall(MatISGetLocalToGlobalMapping(A, NULL, &map));
1603   }
1604   PetscCall(PCBDDCNullSpaceCreate(PetscObjectComm((PetscObject)A), PETSC_FALSE, 1, &quad_vec, nnsp));
1605   PetscCall(VecLockReadPop(quad_vec));
1606   PetscCall(VecSetLocalToGlobalMapping(quad_vec, map));
1607 
1608   /* compute local quad vec */
1609   PetscCall(MatISGetLocalMat(divudotp, &loc_divudotp));
1610   if (!transpose) {
1611     PetscCall(MatCreateVecs(loc_divudotp, &v, &p));
1612   } else {
1613     PetscCall(MatCreateVecs(loc_divudotp, &p, &v));
1614   }
1615   /* the assumption here is that the constant vector interpolates the constant on the L2 conforming space */
1616   PetscCall(VecSet(p, 1.));
1617   if (!transpose) {
1618     PetscCall(MatMultTranspose(loc_divudotp, p, v));
1619   } else {
1620     PetscCall(MatMult(loc_divudotp, p, v));
1621   }
1622   PetscCall(VecDestroy(&p));
1623   if (vl2l) {
1624     Mat        lA;
1625     VecScatter sc;
1626     Vec        vins;
1627 
1628     PetscCall(MatISGetLocalMat(A, &lA));
1629     PetscCall(MatCreateVecs(lA, &vins, NULL));
1630     PetscCall(VecScatterCreate(v, NULL, vins, vl2l, &sc));
1631     PetscCall(VecScatterBegin(sc, v, vins, INSERT_VALUES, SCATTER_FORWARD));
1632     PetscCall(VecScatterEnd(sc, v, vins, INSERT_VALUES, SCATTER_FORWARD));
1633     PetscCall(VecScatterDestroy(&sc));
1634     PetscCall(VecDestroy(&v));
1635     v = vins;
1636   }
1637 
1638   /* mask summation of interface values */
1639   PetscInt        n, *mmask, *mask, *idxs, nmr, nr;
1640   const PetscInt *degree;
1641   PetscSF         msf;
1642 
1643   PetscCall(VecGetLocalSize(v, &n));
1644   PetscCall(PetscSFGetGraph(graph->interface_subset_sf, &nr, NULL, NULL, NULL));
1645   PetscCall(PetscSFGetMultiSF(graph->interface_subset_sf, &msf));
1646   PetscCall(PetscSFGetGraph(msf, &nmr, NULL, NULL, NULL));
1647   PetscCall(PetscCalloc3(nmr, &mmask, n, &mask, n, &idxs));
1648   PetscCall(PetscSFComputeDegreeBegin(graph->interface_subset_sf, &degree));
1649   PetscCall(PetscSFComputeDegreeEnd(graph->interface_subset_sf, &degree));
1650   for (PetscInt i = 0, c = 0; i < nr; i++) {
1651     mmask[c] = 1;
1652     c += degree[i];
1653   }
1654   PetscCall(PetscSFScatterBegin(graph->interface_subset_sf, MPIU_INT, mmask, mask));
1655   PetscCall(PetscSFScatterEnd(graph->interface_subset_sf, MPIU_INT, mmask, mask));
1656   PetscCall(VecGetArray(v, &array));
1657   for (PetscInt i = 0; i < n; i++) {
1658     array[i] *= mask[i];
1659     idxs[i] = i;
1660   }
1661   PetscCall(VecSetValuesLocal(quad_vec, n, idxs, array, ADD_VALUES));
1662   PetscCall(VecRestoreArray(v, &array));
1663   PetscCall(PetscFree3(mmask, mask, idxs));
1664   PetscCall(VecDestroy(&v));
1665   PetscCall(VecAssemblyBegin(quad_vec));
1666   PetscCall(VecAssemblyEnd(quad_vec));
1667   PetscCall(VecViewFromOptions(quad_vec, NULL, "-pc_bddc_quad_vec_view"));
1668   PetscCall(VecLockReadPush(quad_vec));
1669   PetscCall(VecDestroy(&quad_vec));
1670   PetscFunctionReturn(PETSC_SUCCESS);
1671 }
1672 
1673 PetscErrorCode PCBDDCAddPrimalVerticesLocalIS(PC pc, IS primalv)
1674 {
1675   PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
1676 
1677   PetscFunctionBegin;
1678   if (primalv) {
1679     if (pcbddc->user_primal_vertices_local) {
1680       IS list[2], newp;
1681 
1682       list[0] = primalv;
1683       list[1] = pcbddc->user_primal_vertices_local;
1684       PetscCall(ISConcatenate(PetscObjectComm((PetscObject)pc), 2, list, &newp));
1685       PetscCall(ISSortRemoveDups(newp));
1686       PetscCall(ISDestroy(&list[1]));
1687       pcbddc->user_primal_vertices_local = newp;
1688     } else {
1689       PetscCall(PCBDDCSetPrimalVerticesLocalIS(pc, primalv));
1690     }
1691   }
1692   PetscFunctionReturn(PETSC_SUCCESS);
1693 }
1694 
1695 static PetscErrorCode func_coords_private(PetscInt dim, PetscReal t, const PetscReal X[], PetscInt Nf, PetscScalar *out, void *ctx)
1696 {
1697   PetscInt f, *comp = (PetscInt *)ctx;
1698 
1699   PetscFunctionBegin;
1700   for (f = 0; f < Nf; f++) out[f] = X[*comp];
1701   PetscFunctionReturn(PETSC_SUCCESS);
1702 }
1703 
1704 PetscErrorCode PCBDDCComputeLocalTopologyInfo(PC pc)
1705 {
1706   Vec       local, global;
1707   PC_BDDC  *pcbddc     = (PC_BDDC *)pc->data;
1708   Mat_IS   *matis      = (Mat_IS *)pc->pmat->data;
1709   PetscBool monolithic = PETSC_FALSE;
1710 
1711   PetscFunctionBegin;
1712   PetscOptionsBegin(PetscObjectComm((PetscObject)pc), ((PetscObject)pc)->prefix, "BDDC topology options", "PC");
1713   PetscCall(PetscOptionsBool("-pc_bddc_monolithic", "Discard any information on dofs splitting", NULL, monolithic, &monolithic, NULL));
1714   PetscOptionsEnd();
1715   /* need to convert from global to local topology information and remove references to information in global ordering */
1716   PetscCall(MatCreateVecs(pc->pmat, &global, NULL));
1717   PetscCall(MatCreateVecs(matis->A, &local, NULL));
1718   PetscCall(VecBindToCPU(global, PETSC_TRUE));
1719   PetscCall(VecBindToCPU(local, PETSC_TRUE));
1720   if (monolithic) { /* just get block size to properly compute vertices */
1721     if (pcbddc->vertex_size == 1) PetscCall(MatGetBlockSize(pc->pmat, &pcbddc->vertex_size));
1722     goto boundary;
1723   }
1724 
1725   if (pcbddc->user_provided_isfordofs) {
1726     if (pcbddc->n_ISForDofs) {
1727       PetscInt i;
1728 
1729       PetscCall(PetscMalloc1(pcbddc->n_ISForDofs, &pcbddc->ISForDofsLocal));
1730       for (i = 0; i < pcbddc->n_ISForDofs; i++) {
1731         PetscInt bs;
1732 
1733         PetscCall(PCBDDCGlobalToLocal(matis->rctx, global, local, pcbddc->ISForDofs[i], &pcbddc->ISForDofsLocal[i]));
1734         PetscCall(ISGetBlockSize(pcbddc->ISForDofs[i], &bs));
1735         PetscCall(ISSetBlockSize(pcbddc->ISForDofsLocal[i], bs));
1736         PetscCall(ISDestroy(&pcbddc->ISForDofs[i]));
1737       }
1738       pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs;
1739       pcbddc->n_ISForDofs      = 0;
1740       PetscCall(PetscFree(pcbddc->ISForDofs));
1741     }
1742   } else {
1743     if (!pcbddc->n_ISForDofsLocal) { /* field split not present */
1744       DM dm;
1745 
1746       PetscCall(MatGetDM(pc->pmat, &dm));
1747       if (!dm) PetscCall(PCGetDM(pc, &dm));
1748       if (dm) {
1749         IS      *fields;
1750         PetscInt nf, i;
1751 
1752         PetscCall(DMCreateFieldDecomposition(dm, &nf, NULL, &fields, NULL));
1753         PetscCall(PetscMalloc1(nf, &pcbddc->ISForDofsLocal));
1754         for (i = 0; i < nf; i++) {
1755           PetscInt bs;
1756 
1757           PetscCall(PCBDDCGlobalToLocal(matis->rctx, global, local, fields[i], &pcbddc->ISForDofsLocal[i]));
1758           PetscCall(ISGetBlockSize(fields[i], &bs));
1759           PetscCall(ISSetBlockSize(pcbddc->ISForDofsLocal[i], bs));
1760           PetscCall(ISDestroy(&fields[i]));
1761         }
1762         PetscCall(PetscFree(fields));
1763         pcbddc->n_ISForDofsLocal = nf;
1764       } else { /* See if MATIS has fields attached by the conversion from MatNest */
1765         PetscContainer c;
1766 
1767         PetscCall(PetscObjectQuery((PetscObject)pc->pmat, "_convert_nest_lfields", (PetscObject *)&c));
1768         if (c) {
1769           MatISLocalFields lf;
1770           PetscCall(PetscContainerGetPointer(c, (void **)&lf));
1771           PetscCall(PCBDDCSetDofsSplittingLocal(pc, lf->nr, lf->rf));
1772         } else { /* fallback, create the default fields if bs > 1 */
1773           PetscInt i, n = matis->A->rmap->n;
1774           PetscCall(MatGetBlockSize(pc->pmat, &i));
1775           if (i > 1) {
1776             pcbddc->n_ISForDofsLocal = i;
1777             PetscCall(PetscMalloc1(pcbddc->n_ISForDofsLocal, &pcbddc->ISForDofsLocal));
1778             for (i = 0; i < pcbddc->n_ISForDofsLocal; i++) PetscCall(ISCreateStride(PetscObjectComm((PetscObject)pc), n / pcbddc->n_ISForDofsLocal, i, pcbddc->n_ISForDofsLocal, &pcbddc->ISForDofsLocal[i]));
1779           }
1780         }
1781       }
1782     } else {
1783       PetscInt i;
1784       for (i = 0; i < pcbddc->n_ISForDofsLocal; i++) PetscCall(PCBDDCConsistencyCheckIS(pc, MPI_LAND, &pcbddc->ISForDofsLocal[i]));
1785     }
1786   }
1787 
1788 boundary:
1789   if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) {
1790     PetscCall(PCBDDCGlobalToLocal(matis->rctx, global, local, pcbddc->DirichletBoundaries, &pcbddc->DirichletBoundariesLocal));
1791   } else if (pcbddc->DirichletBoundariesLocal) {
1792     PetscCall(PCBDDCConsistencyCheckIS(pc, MPI_LAND, &pcbddc->DirichletBoundariesLocal));
1793   }
1794   if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) {
1795     PetscCall(PCBDDCGlobalToLocal(matis->rctx, global, local, pcbddc->NeumannBoundaries, &pcbddc->NeumannBoundariesLocal));
1796   } else if (pcbddc->NeumannBoundariesLocal) {
1797     PetscCall(PCBDDCConsistencyCheckIS(pc, MPI_LOR, &pcbddc->NeumannBoundariesLocal));
1798   }
1799   if (!pcbddc->user_primal_vertices_local && pcbddc->user_primal_vertices) PetscCall(PCBDDCGlobalToLocal(matis->rctx, global, local, pcbddc->user_primal_vertices, &pcbddc->user_primal_vertices_local));
1800   PetscCall(VecDestroy(&global));
1801   PetscCall(VecDestroy(&local));
1802   /* detect local disconnected subdomains if requested or needed */
1803   if (pcbddc->detect_disconnected || matis->allow_repeated) {
1804     IS        primalv = NULL;
1805     PetscInt  nel;
1806     PetscBool filter = pcbddc->detect_disconnected_filter;
1807 
1808     for (PetscInt i = 0; i < pcbddc->n_local_subs; i++) PetscCall(ISDestroy(&pcbddc->local_subs[i]));
1809     PetscCall(PetscFree(pcbddc->local_subs));
1810     PetscCall(MatGetVariableBlockSizes(matis->A, &nel, NULL));
1811     if (matis->allow_repeated && nel) {
1812       const PetscInt *elsizes;
1813 
1814       pcbddc->n_local_subs = nel;
1815       PetscCall(MatGetVariableBlockSizes(matis->A, NULL, &elsizes));
1816       PetscCall(PetscMalloc1(nel, &pcbddc->local_subs));
1817       for (PetscInt i = 0, c = 0; i < nel; i++) {
1818         PetscCall(ISCreateStride(PETSC_COMM_SELF, elsizes[i], c, 1, &pcbddc->local_subs[i]));
1819         c += elsizes[i];
1820       }
1821     } else {
1822       PetscCall(PCBDDCDetectDisconnectedComponents(pc, filter, &pcbddc->n_local_subs, &pcbddc->local_subs, &primalv));
1823     }
1824     PetscCall(PCBDDCAddPrimalVerticesLocalIS(pc, primalv));
1825     PetscCall(ISDestroy(&primalv));
1826   }
1827   /* early stage corner detection */
1828   {
1829     DM dm;
1830 
1831     PetscCall(MatGetDM(pc->pmat, &dm));
1832     if (!dm) PetscCall(PCGetDM(pc, &dm));
1833     if (dm) {
1834       PetscBool isda;
1835 
1836       PetscCall(PetscObjectTypeCompare((PetscObject)dm, DMDA, &isda));
1837       if (isda) {
1838         ISLocalToGlobalMapping l2l;
1839         IS                     corners;
1840         Mat                    lA;
1841         PetscBool              gl, lo;
1842 
1843         {
1844           Vec                cvec;
1845           const PetscScalar *coords;
1846           PetscInt           dof, n, cdim;
1847           PetscBool          memc = PETSC_TRUE;
1848 
1849           PetscCall(DMDAGetInfo(dm, NULL, NULL, NULL, NULL, NULL, NULL, NULL, &dof, NULL, NULL, NULL, NULL, NULL));
1850           PetscCall(DMGetCoordinates(dm, &cvec));
1851           PetscCall(VecGetLocalSize(cvec, &n));
1852           PetscCall(VecGetBlockSize(cvec, &cdim));
1853           n /= cdim;
1854           PetscCall(PetscFree(pcbddc->mat_graph->coords));
1855           PetscCall(PetscMalloc1(dof * n * cdim, &pcbddc->mat_graph->coords));
1856           PetscCall(VecGetArrayRead(cvec, &coords));
1857 #if defined(PETSC_USE_COMPLEX)
1858           memc = PETSC_FALSE;
1859 #endif
1860           if (dof != 1) memc = PETSC_FALSE;
1861           if (memc) {
1862             PetscCall(PetscArraycpy(pcbddc->mat_graph->coords, coords, cdim * n * dof));
1863           } else { /* BDDC graph does not use any blocked information, we need to replicate the data */
1864             PetscReal *bcoords = pcbddc->mat_graph->coords;
1865             PetscInt   i, b, d;
1866 
1867             for (i = 0; i < n; i++) {
1868               for (b = 0; b < dof; b++) {
1869                 for (d = 0; d < cdim; d++) bcoords[i * dof * cdim + b * cdim + d] = PetscRealPart(coords[i * cdim + d]);
1870               }
1871             }
1872           }
1873           PetscCall(VecRestoreArrayRead(cvec, &coords));
1874           pcbddc->mat_graph->cdim  = cdim;
1875           pcbddc->mat_graph->cnloc = dof * n;
1876           pcbddc->mat_graph->cloc  = PETSC_FALSE;
1877         }
1878         PetscCall(DMDAGetSubdomainCornersIS(dm, &corners));
1879         PetscCall(MatISGetLocalMat(pc->pmat, &lA));
1880         PetscCall(MatGetLocalToGlobalMapping(lA, &l2l, NULL));
1881         PetscCall(MatISRestoreLocalMat(pc->pmat, &lA));
1882         lo = (PetscBool)(l2l && corners);
1883         PetscCallMPI(MPIU_Allreduce(&lo, &gl, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)pc)));
1884         if (gl) { /* From PETSc's DMDA */
1885           const PetscInt *idx;
1886           PetscInt        dof, bs, *idxout, n;
1887 
1888           PetscCall(DMDAGetInfo(dm, NULL, NULL, NULL, NULL, NULL, NULL, NULL, &dof, NULL, NULL, NULL, NULL, NULL));
1889           PetscCall(ISLocalToGlobalMappingGetBlockSize(l2l, &bs));
1890           PetscCall(ISGetLocalSize(corners, &n));
1891           PetscCall(ISGetIndices(corners, &idx));
1892           if (bs == dof) {
1893             PetscCall(PetscMalloc1(n, &idxout));
1894             PetscCall(ISLocalToGlobalMappingApplyBlock(l2l, n, idx, idxout));
1895           } else { /* the original DMDA local-to-local map have been modified */
1896             PetscInt i, d;
1897 
1898             PetscCall(PetscMalloc1(dof * n, &idxout));
1899             for (i = 0; i < n; i++)
1900               for (d = 0; d < dof; d++) idxout[dof * i + d] = dof * idx[i] + d;
1901             PetscCall(ISLocalToGlobalMappingApply(l2l, dof * n, idxout, idxout));
1902 
1903             bs = 1;
1904             n *= dof;
1905           }
1906           PetscCall(ISRestoreIndices(corners, &idx));
1907           PetscCall(DMDARestoreSubdomainCornersIS(dm, &corners));
1908           PetscCall(ISCreateBlock(PetscObjectComm((PetscObject)pc), bs, n, idxout, PETSC_OWN_POINTER, &corners));
1909           PetscCall(PCBDDCAddPrimalVerticesLocalIS(pc, corners));
1910           PetscCall(ISDestroy(&corners));
1911           pcbddc->corner_selected  = PETSC_TRUE;
1912           pcbddc->corner_selection = PETSC_TRUE;
1913         }
1914         if (corners) PetscCall(DMDARestoreSubdomainCornersIS(dm, &corners));
1915       }
1916     }
1917   }
1918   if (pcbddc->corner_selection && !pcbddc->mat_graph->cdim) {
1919     DM dm;
1920 
1921     PetscCall(MatGetDM(pc->pmat, &dm));
1922     if (!dm) PetscCall(PCGetDM(pc, &dm));
1923     if (dm) { /* this can get very expensive, I need to find a faster alternative */
1924       Vec          vcoords;
1925       PetscSection section;
1926       PetscReal   *coords;
1927       PetscInt     d, cdim, nl, nf, **ctxs;
1928       PetscErrorCode (**funcs)(PetscInt, PetscReal, const PetscReal *, PetscInt, PetscScalar *, void *);
1929       /* debug coordinates */
1930       PetscViewer       viewer;
1931       PetscBool         flg;
1932       PetscViewerFormat format;
1933       const char       *prefix;
1934 
1935       PetscCall(DMGetCoordinateDim(dm, &cdim));
1936       PetscCall(DMGetLocalSection(dm, &section));
1937       PetscCall(PetscSectionGetNumFields(section, &nf));
1938       PetscCall(DMCreateGlobalVector(dm, &vcoords));
1939       PetscCall(VecGetLocalSize(vcoords, &nl));
1940       PetscCall(PetscMalloc1(nl * cdim, &coords));
1941       PetscCall(PetscMalloc2(nf, &funcs, nf, &ctxs));
1942       PetscCall(PetscMalloc1(nf, &ctxs[0]));
1943       for (d = 0; d < nf; d++) funcs[d] = func_coords_private;
1944       for (d = 1; d < nf; d++) ctxs[d] = ctxs[d - 1] + 1;
1945 
1946       /* debug coordinates */
1947       PetscCall(PCGetOptionsPrefix(pc, &prefix));
1948       PetscCall(PetscOptionsCreateViewer(PetscObjectComm((PetscObject)vcoords), ((PetscObject)vcoords)->options, prefix, "-pc_bddc_coords_vec_view", &viewer, &format, &flg));
1949       if (flg) PetscCall(PetscViewerPushFormat(viewer, format));
1950       for (d = 0; d < cdim; d++) {
1951         PetscInt           i;
1952         const PetscScalar *v;
1953         char               name[16];
1954 
1955         for (i = 0; i < nf; i++) ctxs[i][0] = d;
1956         PetscCall(PetscSNPrintf(name, sizeof(name), "bddc_coords_%d", (int)d));
1957         PetscCall(PetscObjectSetName((PetscObject)vcoords, name));
1958         PetscCall(DMProjectFunction(dm, 0.0, funcs, (void **)ctxs, INSERT_VALUES, vcoords));
1959         if (flg) PetscCall(VecView(vcoords, viewer));
1960         PetscCall(VecGetArrayRead(vcoords, &v));
1961         for (i = 0; i < nl; i++) coords[i * cdim + d] = PetscRealPart(v[i]);
1962         PetscCall(VecRestoreArrayRead(vcoords, &v));
1963       }
1964       PetscCall(VecDestroy(&vcoords));
1965       PetscCall(PCSetCoordinates(pc, cdim, nl, coords));
1966       PetscCall(PetscFree(coords));
1967       PetscCall(PetscFree(ctxs[0]));
1968       PetscCall(PetscFree2(funcs, ctxs));
1969       if (flg) {
1970         PetscCall(PetscViewerPopFormat(viewer));
1971         PetscCall(PetscViewerDestroy(&viewer));
1972       }
1973     }
1974   }
1975   PetscFunctionReturn(PETSC_SUCCESS);
1976 }
1977 
1978 PetscErrorCode PCBDDCConsistencyCheckIS(PC pc, MPI_Op mop, IS *is)
1979 {
1980   Mat_IS         *matis = (Mat_IS *)pc->pmat->data;
1981   IS              nis;
1982   const PetscInt *idxs;
1983   PetscInt        i, nd, n = matis->A->rmap->n, *nidxs, nnd;
1984 
1985   PetscFunctionBegin;
1986   PetscCheck(mop == MPI_LAND || mop == MPI_LOR, PetscObjectComm((PetscObject)pc), PETSC_ERR_SUP, "Supported are MPI_LAND and MPI_LOR");
1987   if (mop == MPI_LAND) {
1988     /* init rootdata with true */
1989     for (i = 0; i < pc->pmat->rmap->n; i++) matis->sf_rootdata[i] = 1;
1990   } else {
1991     PetscCall(PetscArrayzero(matis->sf_rootdata, pc->pmat->rmap->n));
1992   }
1993   PetscCall(PetscArrayzero(matis->sf_leafdata, n));
1994   PetscCall(ISGetLocalSize(*is, &nd));
1995   PetscCall(ISGetIndices(*is, &idxs));
1996   for (i = 0; i < nd; i++)
1997     if (-1 < idxs[i] && idxs[i] < n) matis->sf_leafdata[idxs[i]] = 1;
1998   PetscCall(ISRestoreIndices(*is, &idxs));
1999   PetscCall(PetscSFReduceBegin(matis->sf, MPIU_INT, matis->sf_leafdata, matis->sf_rootdata, mop));
2000   PetscCall(PetscSFReduceEnd(matis->sf, MPIU_INT, matis->sf_leafdata, matis->sf_rootdata, mop));
2001   PetscCall(PetscSFBcastBegin(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE));
2002   PetscCall(PetscSFBcastEnd(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE));
2003   if (mop == MPI_LAND) {
2004     PetscCall(PetscMalloc1(nd, &nidxs));
2005   } else {
2006     PetscCall(PetscMalloc1(n, &nidxs));
2007   }
2008   for (i = 0, nnd = 0; i < n; i++)
2009     if (matis->sf_leafdata[i]) nidxs[nnd++] = i;
2010   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)*is), nnd, nidxs, PETSC_OWN_POINTER, &nis));
2011   PetscCall(ISDestroy(is));
2012   *is = nis;
2013   PetscFunctionReturn(PETSC_SUCCESS);
2014 }
2015 
2016 PetscErrorCode PCBDDCBenignRemoveInterior(PC pc, Vec r, Vec z)
2017 {
2018   PC_IS   *pcis   = (PC_IS *)pc->data;
2019   PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
2020 
2021   PetscFunctionBegin;
2022   if (!pcbddc->benign_have_null) PetscFunctionReturn(PETSC_SUCCESS);
2023   if (pcbddc->ChangeOfBasisMatrix) {
2024     Vec swap;
2025 
2026     PetscCall(MatMultTranspose(pcbddc->ChangeOfBasisMatrix, r, pcbddc->work_change));
2027     swap                = pcbddc->work_change;
2028     pcbddc->work_change = r;
2029     r                   = swap;
2030   }
2031   PetscCall(VecScatterBegin(pcis->global_to_D, r, pcis->vec1_D, INSERT_VALUES, SCATTER_FORWARD));
2032   PetscCall(VecScatterEnd(pcis->global_to_D, r, pcis->vec1_D, INSERT_VALUES, SCATTER_FORWARD));
2033   PetscCall(PetscLogEventBegin(PC_BDDC_Solves[pcbddc->current_level][0], pc, 0, 0, 0));
2034   PetscCall(KSPSolve(pcbddc->ksp_D, pcis->vec1_D, pcis->vec2_D));
2035   PetscCall(PetscLogEventEnd(PC_BDDC_Solves[pcbddc->current_level][0], pc, 0, 0, 0));
2036   PetscCall(KSPCheckSolve(pcbddc->ksp_D, pc, pcis->vec2_D));
2037   PetscCall(VecSet(z, 0.));
2038   PetscCall(VecScatterBegin(pcis->global_to_D, pcis->vec2_D, z, INSERT_VALUES, SCATTER_REVERSE));
2039   PetscCall(VecScatterEnd(pcis->global_to_D, pcis->vec2_D, z, INSERT_VALUES, SCATTER_REVERSE));
2040   if (pcbddc->ChangeOfBasisMatrix) {
2041     pcbddc->work_change = r;
2042     PetscCall(VecCopy(z, pcbddc->work_change));
2043     PetscCall(MatMult(pcbddc->ChangeOfBasisMatrix, pcbddc->work_change, z));
2044   }
2045   PetscFunctionReturn(PETSC_SUCCESS);
2046 }
2047 
2048 static PetscErrorCode PCBDDCBenignMatMult_Private_Private(Mat A, Vec x, Vec y, PetscBool transpose)
2049 {
2050   PCBDDCBenignMatMult_ctx ctx;
2051   PetscBool               apply_right, apply_left, reset_x;
2052 
2053   PetscFunctionBegin;
2054   PetscCall(MatShellGetContext(A, &ctx));
2055   if (transpose) {
2056     apply_right = ctx->apply_left;
2057     apply_left  = ctx->apply_right;
2058   } else {
2059     apply_right = ctx->apply_right;
2060     apply_left  = ctx->apply_left;
2061   }
2062   reset_x = PETSC_FALSE;
2063   if (apply_right) {
2064     const PetscScalar *ax;
2065     PetscInt           nl, i;
2066 
2067     PetscCall(VecGetLocalSize(x, &nl));
2068     PetscCall(VecGetArrayRead(x, &ax));
2069     PetscCall(PetscArraycpy(ctx->work, ax, nl));
2070     PetscCall(VecRestoreArrayRead(x, &ax));
2071     for (i = 0; i < ctx->benign_n; i++) {
2072       PetscScalar     sum, val;
2073       const PetscInt *idxs;
2074       PetscInt        nz, j;
2075       PetscCall(ISGetLocalSize(ctx->benign_zerodiag_subs[i], &nz));
2076       PetscCall(ISGetIndices(ctx->benign_zerodiag_subs[i], &idxs));
2077       sum = 0.;
2078       if (ctx->apply_p0) {
2079         val = ctx->work[idxs[nz - 1]];
2080         for (j = 0; j < nz - 1; j++) {
2081           sum += ctx->work[idxs[j]];
2082           ctx->work[idxs[j]] += val;
2083         }
2084       } else {
2085         for (j = 0; j < nz - 1; j++) sum += ctx->work[idxs[j]];
2086       }
2087       ctx->work[idxs[nz - 1]] -= sum;
2088       PetscCall(ISRestoreIndices(ctx->benign_zerodiag_subs[i], &idxs));
2089     }
2090     PetscCall(VecPlaceArray(x, ctx->work));
2091     reset_x = PETSC_TRUE;
2092   }
2093   if (transpose) {
2094     PetscCall(MatMultTranspose(ctx->A, x, y));
2095   } else {
2096     PetscCall(MatMult(ctx->A, x, y));
2097   }
2098   if (reset_x) PetscCall(VecResetArray(x));
2099   if (apply_left) {
2100     PetscScalar *ay;
2101     PetscInt     i;
2102 
2103     PetscCall(VecGetArray(y, &ay));
2104     for (i = 0; i < ctx->benign_n; i++) {
2105       PetscScalar     sum, val;
2106       const PetscInt *idxs;
2107       PetscInt        nz, j;
2108       PetscCall(ISGetLocalSize(ctx->benign_zerodiag_subs[i], &nz));
2109       PetscCall(ISGetIndices(ctx->benign_zerodiag_subs[i], &idxs));
2110       val = -ay[idxs[nz - 1]];
2111       if (ctx->apply_p0) {
2112         sum = 0.;
2113         for (j = 0; j < nz - 1; j++) {
2114           sum += ay[idxs[j]];
2115           ay[idxs[j]] += val;
2116         }
2117         ay[idxs[nz - 1]] += sum;
2118       } else {
2119         for (j = 0; j < nz - 1; j++) ay[idxs[j]] += val;
2120         ay[idxs[nz - 1]] = 0.;
2121       }
2122       PetscCall(ISRestoreIndices(ctx->benign_zerodiag_subs[i], &idxs));
2123     }
2124     PetscCall(VecRestoreArray(y, &ay));
2125   }
2126   PetscFunctionReturn(PETSC_SUCCESS);
2127 }
2128 
2129 static PetscErrorCode PCBDDCBenignMatMultTranspose_Private(Mat A, Vec x, Vec y)
2130 {
2131   PetscFunctionBegin;
2132   PetscCall(PCBDDCBenignMatMult_Private_Private(A, x, y, PETSC_TRUE));
2133   PetscFunctionReturn(PETSC_SUCCESS);
2134 }
2135 
2136 static PetscErrorCode PCBDDCBenignMatMult_Private(Mat A, Vec x, Vec y)
2137 {
2138   PetscFunctionBegin;
2139   PetscCall(PCBDDCBenignMatMult_Private_Private(A, x, y, PETSC_FALSE));
2140   PetscFunctionReturn(PETSC_SUCCESS);
2141 }
2142 
2143 PetscErrorCode PCBDDCBenignShellMat(PC pc, PetscBool restore)
2144 {
2145   PC_IS                  *pcis   = (PC_IS *)pc->data;
2146   PC_BDDC                *pcbddc = (PC_BDDC *)pc->data;
2147   PCBDDCBenignMatMult_ctx ctx;
2148 
2149   PetscFunctionBegin;
2150   if (!restore) {
2151     Mat                A_IB, A_BI;
2152     PetscScalar       *work;
2153     PCBDDCReuseSolvers reuse = pcbddc->sub_schurs ? pcbddc->sub_schurs->reuse_solver : NULL;
2154 
2155     PetscCheck(!pcbddc->benign_original_mat, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Benign original mat has not been restored");
2156     if (!pcbddc->benign_change || !pcbddc->benign_n || pcbddc->benign_change_explicit) PetscFunctionReturn(PETSC_SUCCESS);
2157     PetscCall(PetscMalloc1(pcis->n, &work));
2158     PetscCall(MatCreate(PETSC_COMM_SELF, &A_IB));
2159     PetscCall(MatSetSizes(A_IB, pcis->n - pcis->n_B, pcis->n_B, PETSC_DECIDE, PETSC_DECIDE));
2160     PetscCall(MatSetType(A_IB, MATSHELL));
2161     PetscCall(MatShellSetOperation(A_IB, MATOP_MULT, (void (*)(void))PCBDDCBenignMatMult_Private));
2162     PetscCall(MatShellSetOperation(A_IB, MATOP_MULT_TRANSPOSE, (void (*)(void))PCBDDCBenignMatMultTranspose_Private));
2163     PetscCall(PetscNew(&ctx));
2164     PetscCall(MatShellSetContext(A_IB, ctx));
2165     ctx->apply_left  = PETSC_TRUE;
2166     ctx->apply_right = PETSC_FALSE;
2167     ctx->apply_p0    = PETSC_FALSE;
2168     ctx->benign_n    = pcbddc->benign_n;
2169     if (reuse) {
2170       ctx->benign_zerodiag_subs = reuse->benign_zerodiag_subs;
2171       ctx->free                 = PETSC_FALSE;
2172     } else { /* TODO: could be optimized for successive solves */
2173       ISLocalToGlobalMapping N_to_D;
2174       PetscInt               i;
2175 
2176       PetscCall(ISLocalToGlobalMappingCreateIS(pcis->is_I_local, &N_to_D));
2177       PetscCall(PetscMalloc1(pcbddc->benign_n, &ctx->benign_zerodiag_subs));
2178       for (i = 0; i < pcbddc->benign_n; i++) PetscCall(ISGlobalToLocalMappingApplyIS(N_to_D, IS_GTOLM_DROP, pcbddc->benign_zerodiag_subs[i], &ctx->benign_zerodiag_subs[i]));
2179       PetscCall(ISLocalToGlobalMappingDestroy(&N_to_D));
2180       ctx->free = PETSC_TRUE;
2181     }
2182     ctx->A    = pcis->A_IB;
2183     ctx->work = work;
2184     PetscCall(MatSetUp(A_IB));
2185     PetscCall(MatAssemblyBegin(A_IB, MAT_FINAL_ASSEMBLY));
2186     PetscCall(MatAssemblyEnd(A_IB, MAT_FINAL_ASSEMBLY));
2187     pcis->A_IB = A_IB;
2188 
2189     /* A_BI as A_IB^T */
2190     PetscCall(MatCreateTranspose(A_IB, &A_BI));
2191     pcbddc->benign_original_mat = pcis->A_BI;
2192     pcis->A_BI                  = A_BI;
2193   } else {
2194     if (!pcbddc->benign_original_mat) PetscFunctionReturn(PETSC_SUCCESS);
2195     PetscCall(MatShellGetContext(pcis->A_IB, &ctx));
2196     PetscCall(MatDestroy(&pcis->A_IB));
2197     pcis->A_IB = ctx->A;
2198     ctx->A     = NULL;
2199     PetscCall(MatDestroy(&pcis->A_BI));
2200     pcis->A_BI                  = pcbddc->benign_original_mat;
2201     pcbddc->benign_original_mat = NULL;
2202     if (ctx->free) {
2203       PetscInt i;
2204       for (i = 0; i < ctx->benign_n; i++) PetscCall(ISDestroy(&ctx->benign_zerodiag_subs[i]));
2205       PetscCall(PetscFree(ctx->benign_zerodiag_subs));
2206     }
2207     PetscCall(PetscFree(ctx->work));
2208     PetscCall(PetscFree(ctx));
2209   }
2210   PetscFunctionReturn(PETSC_SUCCESS);
2211 }
2212 
2213 /* used just in bddc debug mode */
2214 static PetscErrorCode PCBDDCBenignProject(PC pc, IS is1, IS is2, Mat *B)
2215 {
2216   PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
2217   Mat_IS  *matis  = (Mat_IS *)pc->pmat->data;
2218   Mat      An;
2219 
2220   PetscFunctionBegin;
2221   PetscCall(MatPtAP(matis->A, pcbddc->benign_change, MAT_INITIAL_MATRIX, 2.0, &An));
2222   PetscCall(MatZeroRowsColumns(An, pcbddc->benign_n, pcbddc->benign_p0_lidx, 1.0, NULL, NULL));
2223   if (is1) {
2224     PetscCall(MatCreateSubMatrix(An, is1, is2, MAT_INITIAL_MATRIX, B));
2225     PetscCall(MatDestroy(&An));
2226   } else {
2227     *B = An;
2228   }
2229   PetscFunctionReturn(PETSC_SUCCESS);
2230 }
2231 
2232 /* TODO: add reuse flag */
2233 PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B)
2234 {
2235   Mat             Bt;
2236   PetscScalar    *a, *bdata;
2237   const PetscInt *ii, *ij;
2238   PetscInt        m, n, i, nnz, *bii, *bij;
2239   PetscBool       flg_row;
2240 
2241   PetscFunctionBegin;
2242   PetscCall(MatGetSize(A, &n, &m));
2243   PetscCall(MatGetRowIJ(A, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, &ij, &flg_row));
2244   PetscCall(MatSeqAIJGetArray(A, &a));
2245   nnz = n;
2246   for (i = 0; i < ii[n]; i++) {
2247     if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++;
2248   }
2249   PetscCall(PetscMalloc1(n + 1, &bii));
2250   PetscCall(PetscMalloc1(nnz, &bij));
2251   PetscCall(PetscMalloc1(nnz, &bdata));
2252   nnz    = 0;
2253   bii[0] = 0;
2254   for (i = 0; i < n; i++) {
2255     PetscInt j;
2256     for (j = ii[i]; j < ii[i + 1]; j++) {
2257       PetscScalar entry = a[j];
2258       if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || (n == m && ij[j] == i)) {
2259         bij[nnz]   = ij[j];
2260         bdata[nnz] = entry;
2261         nnz++;
2262       }
2263     }
2264     bii[i + 1] = nnz;
2265   }
2266   PetscCall(MatSeqAIJRestoreArray(A, &a));
2267   PetscCall(MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A), n, m, bii, bij, bdata, &Bt));
2268   PetscCall(MatRestoreRowIJ(A, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, &ij, &flg_row));
2269   {
2270     Mat_SeqAIJ *b = (Mat_SeqAIJ *)Bt->data;
2271     b->free_a     = PETSC_TRUE;
2272     b->free_ij    = PETSC_TRUE;
2273   }
2274   if (*B == A) PetscCall(MatDestroy(&A));
2275   *B = Bt;
2276   PetscFunctionReturn(PETSC_SUCCESS);
2277 }
2278 
2279 PetscErrorCode PCBDDCDetectDisconnectedComponents(PC pc, PetscBool filter, PetscInt *ncc, IS *cc[], IS *primalv)
2280 {
2281   Mat                    B = NULL;
2282   DM                     dm;
2283   IS                     is_dummy, *cc_n;
2284   ISLocalToGlobalMapping l2gmap_dummy;
2285   PCBDDCGraph            graph;
2286   PetscInt              *xadj_filtered = NULL, *adjncy_filtered = NULL;
2287   PetscInt               i, n;
2288   PetscInt              *xadj, *adjncy;
2289   PetscBool              isplex = PETSC_FALSE;
2290 
2291   PetscFunctionBegin;
2292   if (ncc) *ncc = 0;
2293   if (cc) *cc = NULL;
2294   if (primalv) *primalv = NULL;
2295   PetscCall(PCBDDCGraphCreate(&graph));
2296   PetscCall(MatGetDM(pc->pmat, &dm));
2297   if (!dm) PetscCall(PCGetDM(pc, &dm));
2298   if (dm) PetscCall(PetscObjectTypeCompareAny((PetscObject)dm, &isplex, DMPLEX, DMP4EST, DMP8EST, ""));
2299   if (filter) isplex = PETSC_FALSE;
2300 
2301   if (isplex) { /* this code has been modified from plexpartition.c */
2302     PetscInt        p, pStart, pEnd, a, adjSize, idx, size, nroots;
2303     PetscInt       *adj = NULL;
2304     IS              cellNumbering;
2305     const PetscInt *cellNum;
2306     PetscBool       useCone, useClosure;
2307     PetscSection    section;
2308     PetscSegBuffer  adjBuffer;
2309     PetscSF         sfPoint;
2310 
2311     PetscCall(DMConvert(dm, DMPLEX, &dm));
2312     PetscCall(DMPlexGetHeightStratum(dm, 0, &pStart, &pEnd));
2313     PetscCall(DMGetPointSF(dm, &sfPoint));
2314     PetscCall(PetscSFGetGraph(sfPoint, &nroots, NULL, NULL, NULL));
2315     /* Build adjacency graph via a section/segbuffer */
2316     PetscCall(PetscSectionCreate(PetscObjectComm((PetscObject)dm), &section));
2317     PetscCall(PetscSectionSetChart(section, pStart, pEnd));
2318     PetscCall(PetscSegBufferCreate(sizeof(PetscInt), 1000, &adjBuffer));
2319     /* Always use FVM adjacency to create partitioner graph */
2320     PetscCall(DMGetBasicAdjacency(dm, &useCone, &useClosure));
2321     PetscCall(DMSetBasicAdjacency(dm, PETSC_TRUE, PETSC_FALSE));
2322     PetscCall(DMPlexGetCellNumbering(dm, &cellNumbering));
2323     PetscCall(ISGetIndices(cellNumbering, &cellNum));
2324     for (n = 0, p = pStart; p < pEnd; p++) {
2325       /* Skip non-owned cells in parallel (ParMetis expects no overlap) */
2326       if (nroots > 0) {
2327         if (cellNum[p] < 0) continue;
2328       }
2329       adjSize = PETSC_DETERMINE;
2330       PetscCall(DMPlexGetAdjacency(dm, p, &adjSize, &adj));
2331       for (a = 0; a < adjSize; ++a) {
2332         const PetscInt point = adj[a];
2333         if (pStart <= point && point < pEnd) {
2334           PetscInt *PETSC_RESTRICT pBuf;
2335           PetscCall(PetscSectionAddDof(section, p, 1));
2336           PetscCall(PetscSegBufferGetInts(adjBuffer, 1, &pBuf));
2337           *pBuf = point;
2338         }
2339       }
2340       n++;
2341     }
2342     PetscCall(DMSetBasicAdjacency(dm, useCone, useClosure));
2343     /* Derive CSR graph from section/segbuffer */
2344     PetscCall(PetscSectionSetUp(section));
2345     PetscCall(PetscSectionGetStorageSize(section, &size));
2346     PetscCall(PetscMalloc1(n + 1, &xadj));
2347     for (idx = 0, p = pStart; p < pEnd; p++) {
2348       if (nroots > 0) {
2349         if (cellNum[p] < 0) continue;
2350       }
2351       PetscCall(PetscSectionGetOffset(section, p, &xadj[idx++]));
2352     }
2353     xadj[n] = size;
2354     PetscCall(PetscSegBufferExtractAlloc(adjBuffer, &adjncy));
2355     /* Clean up */
2356     PetscCall(PetscSegBufferDestroy(&adjBuffer));
2357     PetscCall(PetscSectionDestroy(&section));
2358     PetscCall(PetscFree(adj));
2359     graph->xadj   = xadj;
2360     graph->adjncy = adjncy;
2361   } else {
2362     Mat       A;
2363     PetscBool isseqaij, flg_row;
2364 
2365     PetscCall(MatISGetLocalMat(pc->pmat, &A));
2366     if (!A->rmap->N || !A->cmap->N) {
2367       PetscCall(PCBDDCGraphDestroy(&graph));
2368       PetscFunctionReturn(PETSC_SUCCESS);
2369     }
2370     PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &isseqaij));
2371     if (!isseqaij && filter) {
2372       PetscBool isseqdense;
2373 
2374       PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQDENSE, &isseqdense));
2375       if (!isseqdense) {
2376         PetscCall(MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &B));
2377       } else { /* TODO: rectangular case and LDA */
2378         PetscScalar *array;
2379         PetscReal    chop = 1.e-6;
2380 
2381         PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &B));
2382         PetscCall(MatDenseGetArray(B, &array));
2383         PetscCall(MatGetSize(B, &n, NULL));
2384         for (i = 0; i < n; i++) {
2385           PetscInt j;
2386           for (j = i + 1; j < n; j++) {
2387             PetscReal thresh = chop * (PetscAbsScalar(array[i * (n + 1)]) + PetscAbsScalar(array[j * (n + 1)]));
2388             if (PetscAbsScalar(array[i * n + j]) < thresh) array[i * n + j] = 0.;
2389             if (PetscAbsScalar(array[j * n + i]) < thresh) array[j * n + i] = 0.;
2390           }
2391         }
2392         PetscCall(MatDenseRestoreArray(B, &array));
2393         PetscCall(MatConvert(B, MATSEQAIJ, MAT_INPLACE_MATRIX, &B));
2394       }
2395     } else {
2396       PetscCall(PetscObjectReference((PetscObject)A));
2397       B = A;
2398     }
2399     PetscCall(MatGetRowIJ(B, 0, PETSC_TRUE, PETSC_FALSE, &n, (const PetscInt **)&xadj, (const PetscInt **)&adjncy, &flg_row));
2400 
2401     /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */
2402     if (filter) {
2403       PetscScalar *data;
2404       PetscInt     j, cum;
2405 
2406       PetscCall(PetscCalloc2(n + 1, &xadj_filtered, xadj[n], &adjncy_filtered));
2407       PetscCall(MatSeqAIJGetArray(B, &data));
2408       cum = 0;
2409       for (i = 0; i < n; i++) {
2410         PetscInt t;
2411 
2412         for (j = xadj[i]; j < xadj[i + 1]; j++) {
2413           if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) continue;
2414           adjncy_filtered[cum + xadj_filtered[i]++] = adjncy[j];
2415         }
2416         t                = xadj_filtered[i];
2417         xadj_filtered[i] = cum;
2418         cum += t;
2419       }
2420       PetscCall(MatSeqAIJRestoreArray(B, &data));
2421       graph->xadj   = xadj_filtered;
2422       graph->adjncy = adjncy_filtered;
2423     } else {
2424       graph->xadj   = xadj;
2425       graph->adjncy = adjncy;
2426     }
2427   }
2428   /* compute local connected components using PCBDDCGraph */
2429   graph->seq_graph = PETSC_TRUE; /* analyze local connected components (i.e. disconnected subdomains) irrespective of dofs count */
2430   PetscCall(ISCreateStride(PETSC_COMM_SELF, n, 0, 1, &is_dummy));
2431   PetscCall(ISLocalToGlobalMappingCreateIS(is_dummy, &l2gmap_dummy));
2432   PetscCall(ISDestroy(&is_dummy));
2433   PetscCall(PCBDDCGraphInit(graph, l2gmap_dummy, n, PETSC_INT_MAX));
2434   PetscCall(ISLocalToGlobalMappingDestroy(&l2gmap_dummy));
2435   PetscCall(PCBDDCGraphSetUp(graph, 1, NULL, NULL, 0, NULL, NULL));
2436   PetscCall(PCBDDCGraphComputeConnectedComponents(graph));
2437 
2438   /* partial clean up */
2439   PetscCall(PetscFree2(xadj_filtered, adjncy_filtered));
2440   if (B) {
2441     PetscBool flg_row;
2442     PetscCall(MatRestoreRowIJ(B, 0, PETSC_TRUE, PETSC_FALSE, &n, (const PetscInt **)&xadj, (const PetscInt **)&adjncy, &flg_row));
2443     PetscCall(MatDestroy(&B));
2444   }
2445   if (isplex) {
2446     PetscCall(PetscFree(xadj));
2447     PetscCall(PetscFree(adjncy));
2448   }
2449 
2450   /* get back data */
2451   if (isplex) {
2452     if (ncc) *ncc = graph->ncc;
2453     if (cc || primalv) {
2454       Mat          A;
2455       PetscBT      btv, btvt, btvc;
2456       PetscSection subSection;
2457       PetscInt    *ids, cum, cump, *cids, *pids;
2458       PetscInt     dim, cStart, cEnd, fStart, fEnd, vStart, vEnd, pStart, pEnd;
2459 
2460       PetscCall(DMGetDimension(dm, &dim));
2461       PetscCall(DMPlexGetSubdomainSection(dm, &subSection));
2462       PetscCall(DMPlexGetHeightStratum(dm, 1, &fStart, &fEnd));
2463       PetscCall(DMPlexGetHeightStratum(dm, 0, &cStart, &cEnd));
2464       PetscCall(DMPlexGetDepthStratum(dm, 0, &vStart, &vEnd));
2465       PetscCall(DMPlexGetChart(dm, &pStart, &pEnd));
2466       PetscCall(MatISGetLocalMat(pc->pmat, &A));
2467       PetscCall(PetscMalloc3(A->rmap->n, &ids, graph->ncc + 1, &cids, A->rmap->n, &pids));
2468       PetscCall(PetscBTCreate(A->rmap->n, &btv));
2469       PetscCall(PetscBTCreate(A->rmap->n, &btvt));
2470       PetscCall(PetscBTCreate(pEnd - pStart, &btvc));
2471 
2472       /* First see if we find corners for the subdomains, i.e. a vertex
2473          shared by at least dim subdomain boundary faces. This does not
2474          cover all the possible cases with simplices but it is enough
2475          for tensor cells */
2476       if (vStart != fStart && dim <= 3) {
2477         for (PetscInt c = cStart; c < cEnd; c++) {
2478           PetscInt        nf, cnt = 0, mcnt = dim, *cfaces;
2479           const PetscInt *faces;
2480 
2481           PetscCall(DMPlexGetConeSize(dm, c, &nf));
2482           PetscCall(DMGetWorkArray(dm, nf, MPIU_INT, &cfaces));
2483           PetscCall(DMPlexGetCone(dm, c, &faces));
2484           for (PetscInt f = 0; f < nf; f++) {
2485             PetscInt nc, ff;
2486 
2487             PetscCall(DMPlexGetSupportSize(dm, faces[f], &nc));
2488             PetscCall(DMPlexGetTreeParent(dm, faces[f], &ff, NULL));
2489             if (nc == 1 && faces[f] == ff) cfaces[cnt++] = faces[f];
2490           }
2491           if (cnt >= mcnt) {
2492             PetscInt size, *closure = NULL;
2493 
2494             PetscCall(DMPlexGetTransitiveClosure(dm, c, PETSC_TRUE, &size, &closure));
2495             for (PetscInt k = 0; k < 2 * size; k += 2) {
2496               PetscInt v = closure[k];
2497               if (v >= vStart && v < vEnd) {
2498                 PetscInt vsize, *vclosure = NULL;
2499 
2500                 cnt = 0;
2501                 PetscCall(DMPlexGetTransitiveClosure(dm, v, PETSC_FALSE, &vsize, &vclosure));
2502                 for (PetscInt vk = 0; vk < 2 * vsize; vk += 2) {
2503                   PetscInt f = vclosure[vk];
2504                   if (f >= fStart && f < fEnd) {
2505                     PetscInt  nc, ff;
2506                     PetscBool valid = PETSC_FALSE;
2507 
2508                     for (PetscInt fk = 0; fk < nf; fk++)
2509                       if (f == cfaces[fk]) valid = PETSC_TRUE;
2510                     if (!valid) continue;
2511                     PetscCall(DMPlexGetSupportSize(dm, f, &nc));
2512                     PetscCall(DMPlexGetTreeParent(dm, f, &ff, NULL));
2513                     if (nc == 1 && f == ff) cnt++;
2514                   }
2515                 }
2516                 if (cnt >= mcnt) PetscCall(PetscBTSet(btvc, v - pStart));
2517                 PetscCall(DMPlexRestoreTransitiveClosure(dm, v, PETSC_FALSE, &vsize, &vclosure));
2518               }
2519             }
2520             PetscCall(DMPlexRestoreTransitiveClosure(dm, c, PETSC_TRUE, &size, &closure));
2521           }
2522           PetscCall(DMRestoreWorkArray(dm, nf, MPIU_INT, &cfaces));
2523         }
2524       }
2525 
2526       cids[0] = 0;
2527       for (i = 0, cump = 0, cum = 0; i < graph->ncc; i++) {
2528         PetscInt j;
2529 
2530         PetscCall(PetscBTMemzero(A->rmap->n, btvt));
2531         for (j = graph->cptr[i]; j < graph->cptr[i + 1]; j++) {
2532           PetscInt k, size, *closure = NULL, cell = graph->queue[j];
2533 
2534           PetscCall(DMPlexGetTransitiveClosure(dm, cell, PETSC_TRUE, &size, &closure));
2535           for (k = 0; k < 2 * size; k += 2) {
2536             PetscInt s, pp, p = closure[k], off, dof, cdof;
2537 
2538             PetscCall(PetscSectionGetConstraintDof(subSection, p, &cdof));
2539             PetscCall(PetscSectionGetOffset(subSection, p, &off));
2540             PetscCall(PetscSectionGetDof(subSection, p, &dof));
2541             for (s = 0; s < dof - cdof; s++) {
2542               if (PetscBTLookupSet(btvt, off + s)) continue;
2543               if (PetscBTLookup(btvc, p - pStart)) pids[cump++] = off + s; /* subdomain corner */
2544               else if (!PetscBTLookup(btv, off + s)) ids[cum++] = off + s;
2545               else pids[cump++] = off + s; /* cross-vertex */
2546             }
2547             PetscCall(DMPlexGetTreeParent(dm, p, &pp, NULL));
2548             if (pp != p) {
2549               PetscCall(PetscSectionGetConstraintDof(subSection, pp, &cdof));
2550               PetscCall(PetscSectionGetOffset(subSection, pp, &off));
2551               PetscCall(PetscSectionGetDof(subSection, pp, &dof));
2552               for (s = 0; s < dof - cdof; s++) {
2553                 if (PetscBTLookupSet(btvt, off + s)) continue;
2554                 if (PetscBTLookup(btvc, pp - pStart)) pids[cump++] = off + s; /* subdomain corner */
2555                 else if (!PetscBTLookup(btv, off + s)) ids[cum++] = off + s;
2556                 else pids[cump++] = off + s; /* cross-vertex */
2557               }
2558             }
2559           }
2560           PetscCall(DMPlexRestoreTransitiveClosure(dm, cell, PETSC_TRUE, &size, &closure));
2561         }
2562         cids[i + 1] = cum;
2563         /* mark dofs as already assigned */
2564         for (j = cids[i]; j < cids[i + 1]; j++) PetscCall(PetscBTSet(btv, ids[j]));
2565       }
2566       if (cc) {
2567         PetscCall(PetscMalloc1(graph->ncc, &cc_n));
2568         for (i = 0; i < graph->ncc; i++) PetscCall(ISCreateGeneral(PETSC_COMM_SELF, cids[i + 1] - cids[i], ids + cids[i], PETSC_COPY_VALUES, &cc_n[i]));
2569         *cc = cc_n;
2570       }
2571       if (primalv) PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), cump, pids, PETSC_COPY_VALUES, primalv));
2572       PetscCall(PetscFree3(ids, cids, pids));
2573       PetscCall(PetscBTDestroy(&btv));
2574       PetscCall(PetscBTDestroy(&btvt));
2575       PetscCall(PetscBTDestroy(&btvc));
2576       PetscCall(DMDestroy(&dm));
2577     }
2578   } else {
2579     if (ncc) *ncc = graph->ncc;
2580     if (cc) {
2581       PetscCall(PetscMalloc1(graph->ncc, &cc_n));
2582       for (i = 0; i < graph->ncc; i++) PetscCall(ISCreateGeneral(PETSC_COMM_SELF, graph->cptr[i + 1] - graph->cptr[i], graph->queue + graph->cptr[i], PETSC_COPY_VALUES, &cc_n[i]));
2583       *cc = cc_n;
2584     }
2585   }
2586   /* clean up graph */
2587   graph->xadj   = NULL;
2588   graph->adjncy = NULL;
2589   PetscCall(PCBDDCGraphDestroy(&graph));
2590   PetscFunctionReturn(PETSC_SUCCESS);
2591 }
2592 
2593 PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag)
2594 {
2595   PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
2596   PC_IS   *pcis   = (PC_IS *)pc->data;
2597   IS       dirIS  = NULL;
2598   PetscInt i;
2599 
2600   PetscFunctionBegin;
2601   PetscCall(PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph, &dirIS));
2602   if (zerodiag) {
2603     Mat             A;
2604     Vec             vec3_N;
2605     PetscScalar    *vals;
2606     const PetscInt *idxs;
2607     PetscInt        nz, *count;
2608 
2609     /* p0 */
2610     PetscCall(VecSet(pcis->vec1_N, 0.));
2611     PetscCall(PetscMalloc1(pcis->n, &vals));
2612     PetscCall(ISGetLocalSize(zerodiag, &nz));
2613     PetscCall(ISGetIndices(zerodiag, &idxs));
2614     for (i = 0; i < nz; i++) vals[i] = 1.;
2615     PetscCall(VecSetValues(pcis->vec1_N, nz, idxs, vals, INSERT_VALUES));
2616     PetscCall(VecAssemblyBegin(pcis->vec1_N));
2617     PetscCall(VecAssemblyEnd(pcis->vec1_N));
2618     /* v_I */
2619     PetscCall(VecSetRandom(pcis->vec2_N, NULL));
2620     for (i = 0; i < nz; i++) vals[i] = 0.;
2621     PetscCall(VecSetValues(pcis->vec2_N, nz, idxs, vals, INSERT_VALUES));
2622     PetscCall(ISRestoreIndices(zerodiag, &idxs));
2623     PetscCall(ISGetIndices(pcis->is_B_local, &idxs));
2624     for (i = 0; i < pcis->n_B; i++) vals[i] = 0.;
2625     PetscCall(VecSetValues(pcis->vec2_N, pcis->n_B, idxs, vals, INSERT_VALUES));
2626     PetscCall(ISRestoreIndices(pcis->is_B_local, &idxs));
2627     if (dirIS) {
2628       PetscInt n;
2629 
2630       PetscCall(ISGetLocalSize(dirIS, &n));
2631       PetscCall(ISGetIndices(dirIS, &idxs));
2632       for (i = 0; i < n; i++) vals[i] = 0.;
2633       PetscCall(VecSetValues(pcis->vec2_N, n, idxs, vals, INSERT_VALUES));
2634       PetscCall(ISRestoreIndices(dirIS, &idxs));
2635     }
2636     PetscCall(VecAssemblyBegin(pcis->vec2_N));
2637     PetscCall(VecAssemblyEnd(pcis->vec2_N));
2638     PetscCall(VecDuplicate(pcis->vec1_N, &vec3_N));
2639     PetscCall(VecSet(vec3_N, 0.));
2640     PetscCall(MatISGetLocalMat(pc->pmat, &A));
2641     PetscCall(MatMult(A, pcis->vec1_N, vec3_N));
2642     PetscCall(VecDot(vec3_N, pcis->vec2_N, &vals[0]));
2643     PetscCheck(PetscAbsScalar(vals[0]) <= 1.e-1, PETSC_COMM_SELF, PETSC_ERR_SUP, "Benign trick can not be applied! b(v_I,p_0) = %1.6e (should be numerically 0.)", (double)PetscAbsScalar(vals[0]));
2644     PetscCall(PetscFree(vals));
2645     PetscCall(VecDestroy(&vec3_N));
2646 
2647     /* there should not be any pressure dofs lying on the interface */
2648     PetscCall(PetscCalloc1(pcis->n, &count));
2649     PetscCall(ISGetIndices(pcis->is_B_local, &idxs));
2650     for (i = 0; i < pcis->n_B; i++) count[idxs[i]]++;
2651     PetscCall(ISRestoreIndices(pcis->is_B_local, &idxs));
2652     PetscCall(ISGetIndices(zerodiag, &idxs));
2653     for (i = 0; i < nz; i++) PetscCheck(!count[idxs[i]], PETSC_COMM_SELF, PETSC_ERR_SUP, "Benign trick can not be applied! pressure dof %" PetscInt_FMT " is an interface dof", idxs[i]);
2654     PetscCall(ISRestoreIndices(zerodiag, &idxs));
2655     PetscCall(PetscFree(count));
2656   }
2657   PetscCall(ISDestroy(&dirIS));
2658 
2659   /* check PCBDDCBenignGetOrSetP0 */
2660   PetscCall(VecSetRandom(pcis->vec1_global, NULL));
2661   for (i = 0; i < pcbddc->benign_n; i++) pcbddc->benign_p0[i] = -PetscGlobalRank - i;
2662   PetscCall(PCBDDCBenignGetOrSetP0(pc, pcis->vec1_global, PETSC_FALSE));
2663   for (i = 0; i < pcbddc->benign_n; i++) pcbddc->benign_p0[i] = 1;
2664   PetscCall(PCBDDCBenignGetOrSetP0(pc, pcis->vec1_global, PETSC_TRUE));
2665   for (i = 0; i < pcbddc->benign_n; i++) {
2666     PetscInt val = PetscRealPart(pcbddc->benign_p0[i]);
2667     PetscCheck(val == -PetscGlobalRank - i, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error testing PCBDDCBenignGetOrSetP0! Found %g at %" PetscInt_FMT " instead of %g", (double)PetscRealPart(pcbddc->benign_p0[i]), i, (double)(-PetscGlobalRank - i));
2668   }
2669   PetscFunctionReturn(PETSC_SUCCESS);
2670 }
2671 
2672 PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, PetscBool reuse, IS *zerodiaglocal)
2673 {
2674   PC_BDDC  *pcbddc    = (PC_BDDC *)pc->data;
2675   Mat_IS   *matis     = (Mat_IS *)pc->pmat->data;
2676   IS        pressures = NULL, zerodiag = NULL, *bzerodiag = NULL, zerodiag_save, *zerodiag_subs;
2677   PetscInt  nz, n, benign_n, bsp = 1;
2678   PetscInt *interior_dofs, n_interior_dofs, nneu;
2679   PetscBool sorted, have_null, has_null_pressures, recompute_zerodiag, checkb;
2680 
2681   PetscFunctionBegin;
2682   if (reuse) goto project_b0;
2683   PetscCall(PetscSFDestroy(&pcbddc->benign_sf));
2684   PetscCall(MatDestroy(&pcbddc->benign_B0));
2685   for (n = 0; n < pcbddc->benign_n; n++) PetscCall(ISDestroy(&pcbddc->benign_zerodiag_subs[n]));
2686   PetscCall(PetscFree(pcbddc->benign_zerodiag_subs));
2687   has_null_pressures = PETSC_TRUE;
2688   have_null          = PETSC_TRUE;
2689   /* if a local information on dofs is present, gets pressure dofs from command line (uses the last field is not provided)
2690      Without local information, it uses only the zerodiagonal dofs (ok if the pressure block is all zero and it is a scalar field)
2691      Checks if all the pressure dofs in each subdomain have a zero diagonal
2692      If not, a change of basis on pressures is not needed
2693      since the local Schur complements are already SPD
2694   */
2695   if (pcbddc->n_ISForDofsLocal) {
2696     IS        iP = NULL;
2697     PetscInt  p, *pp;
2698     PetscBool flg;
2699 
2700     PetscCall(PetscMalloc1(pcbddc->n_ISForDofsLocal, &pp));
2701     n = pcbddc->n_ISForDofsLocal;
2702     PetscOptionsBegin(PetscObjectComm((PetscObject)pc), ((PetscObject)pc)->prefix, "BDDC benign options", "PC");
2703     PetscCall(PetscOptionsIntArray("-pc_bddc_pressure_field", "Field id for pressures", NULL, pp, &n, &flg));
2704     PetscOptionsEnd();
2705     if (!flg) {
2706       n     = 1;
2707       pp[0] = pcbddc->n_ISForDofsLocal - 1;
2708     }
2709 
2710     bsp = 0;
2711     for (p = 0; p < n; p++) {
2712       PetscInt bs;
2713 
2714       PetscCheck(pp[p] >= 0 && pp[p] < pcbddc->n_ISForDofsLocal, PetscObjectComm((PetscObject)pc), PETSC_ERR_USER, "Invalid field id for pressures %" PetscInt_FMT, pp[p]);
2715       PetscCall(ISGetBlockSize(pcbddc->ISForDofsLocal[pp[p]], &bs));
2716       bsp += bs;
2717     }
2718     PetscCall(PetscMalloc1(bsp, &bzerodiag));
2719     bsp = 0;
2720     for (p = 0; p < n; p++) {
2721       const PetscInt *idxs;
2722       PetscInt        b, bs, npl, *bidxs;
2723 
2724       PetscCall(ISGetBlockSize(pcbddc->ISForDofsLocal[pp[p]], &bs));
2725       PetscCall(ISGetLocalSize(pcbddc->ISForDofsLocal[pp[p]], &npl));
2726       PetscCall(ISGetIndices(pcbddc->ISForDofsLocal[pp[p]], &idxs));
2727       PetscCall(PetscMalloc1(npl / bs, &bidxs));
2728       for (b = 0; b < bs; b++) {
2729         PetscInt i;
2730 
2731         for (i = 0; i < npl / bs; i++) bidxs[i] = idxs[bs * i + b];
2732         PetscCall(ISCreateGeneral(PETSC_COMM_SELF, npl / bs, bidxs, PETSC_COPY_VALUES, &bzerodiag[bsp]));
2733         bsp++;
2734       }
2735       PetscCall(PetscFree(bidxs));
2736       PetscCall(ISRestoreIndices(pcbddc->ISForDofsLocal[pp[p]], &idxs));
2737     }
2738     PetscCall(ISConcatenate(PETSC_COMM_SELF, bsp, bzerodiag, &pressures));
2739 
2740     /* remove zeroed out pressures if we are setting up a BDDC solver for a saddle-point FETI-DP */
2741     PetscCall(PetscObjectQuery((PetscObject)pc, "__KSPFETIDP_lP", (PetscObject *)&iP));
2742     if (iP) {
2743       IS newpressures;
2744 
2745       PetscCall(ISDifference(pressures, iP, &newpressures));
2746       PetscCall(ISDestroy(&pressures));
2747       pressures = newpressures;
2748     }
2749     PetscCall(ISSorted(pressures, &sorted));
2750     if (!sorted) PetscCall(ISSort(pressures));
2751     PetscCall(PetscFree(pp));
2752   }
2753 
2754   /* pcis has not been setup yet, so get the local size from the subdomain matrix */
2755   PetscCall(MatGetLocalSize(pcbddc->local_mat, &n, NULL));
2756   if (!n) pcbddc->benign_change_explicit = PETSC_TRUE;
2757   PetscCall(MatFindZeroDiagonals(pcbddc->local_mat, &zerodiag));
2758   PetscCall(ISSorted(zerodiag, &sorted));
2759   if (!sorted) PetscCall(ISSort(zerodiag));
2760   PetscCall(PetscObjectReference((PetscObject)zerodiag));
2761   zerodiag_save = zerodiag;
2762   PetscCall(ISGetLocalSize(zerodiag, &nz));
2763   if (!nz) {
2764     if (n) have_null = PETSC_FALSE;
2765     has_null_pressures = PETSC_FALSE;
2766     PetscCall(ISDestroy(&zerodiag));
2767   }
2768   recompute_zerodiag = PETSC_FALSE;
2769 
2770   /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */
2771   zerodiag_subs   = NULL;
2772   benign_n        = 0;
2773   n_interior_dofs = 0;
2774   interior_dofs   = NULL;
2775   nneu            = 0;
2776   if (pcbddc->NeumannBoundariesLocal) PetscCall(ISGetLocalSize(pcbddc->NeumannBoundariesLocal, &nneu));
2777   checkb = (PetscBool)(!pcbddc->NeumannBoundariesLocal || pcbddc->current_level);
2778   if (checkb) { /* need to compute interior nodes */
2779     PetscInt               n, i;
2780     PetscInt              *count;
2781     ISLocalToGlobalMapping mapping;
2782 
2783     PetscCall(MatISGetLocalToGlobalMapping(pc->pmat, &mapping, NULL));
2784     PetscCall(ISLocalToGlobalMappingGetNodeInfo(mapping, &n, &count, NULL));
2785     PetscCall(PetscMalloc1(n, &interior_dofs));
2786     for (i = 0; i < n; i++)
2787       if (count[i] < 2) interior_dofs[n_interior_dofs++] = i;
2788     PetscCall(ISLocalToGlobalMappingRestoreNodeInfo(mapping, &n, &count, NULL));
2789   }
2790   if (has_null_pressures) {
2791     IS             *subs;
2792     PetscInt        nsubs, i, j, nl;
2793     const PetscInt *idxs;
2794     PetscScalar    *array;
2795     Vec            *work;
2796 
2797     subs  = pcbddc->local_subs;
2798     nsubs = pcbddc->n_local_subs;
2799     /* these vectors are needed to check if the constant on pressures is in the kernel of the local operator B (i.e. B(v_I,p0) should be zero) */
2800     if (checkb) {
2801       PetscCall(VecDuplicateVecs(matis->y, 2, &work));
2802       PetscCall(ISGetLocalSize(zerodiag, &nl));
2803       PetscCall(ISGetIndices(zerodiag, &idxs));
2804       /* work[0] = 1_p */
2805       PetscCall(VecSet(work[0], 0.));
2806       PetscCall(VecGetArray(work[0], &array));
2807       for (j = 0; j < nl; j++) array[idxs[j]] = 1.;
2808       PetscCall(VecRestoreArray(work[0], &array));
2809       /* work[0] = 1_v */
2810       PetscCall(VecSet(work[1], 1.));
2811       PetscCall(VecGetArray(work[1], &array));
2812       for (j = 0; j < nl; j++) array[idxs[j]] = 0.;
2813       PetscCall(VecRestoreArray(work[1], &array));
2814       PetscCall(ISRestoreIndices(zerodiag, &idxs));
2815     }
2816 
2817     if (nsubs > 1 || bsp > 1) {
2818       IS      *is;
2819       PetscInt b, totb;
2820 
2821       totb  = bsp;
2822       is    = bsp > 1 ? bzerodiag : &zerodiag;
2823       nsubs = PetscMax(nsubs, 1);
2824       PetscCall(PetscCalloc1(nsubs * totb, &zerodiag_subs));
2825       for (b = 0; b < totb; b++) {
2826         for (i = 0; i < nsubs; i++) {
2827           ISLocalToGlobalMapping l2g;
2828           IS                     t_zerodiag_subs;
2829           PetscInt               nl;
2830 
2831           if (subs) {
2832             PetscCall(ISLocalToGlobalMappingCreateIS(subs[i], &l2g));
2833           } else {
2834             IS tis;
2835 
2836             PetscCall(MatGetLocalSize(pcbddc->local_mat, &nl, NULL));
2837             PetscCall(ISCreateStride(PETSC_COMM_SELF, nl, 0, 1, &tis));
2838             PetscCall(ISLocalToGlobalMappingCreateIS(tis, &l2g));
2839             PetscCall(ISDestroy(&tis));
2840           }
2841           PetscCall(ISGlobalToLocalMappingApplyIS(l2g, IS_GTOLM_DROP, is[b], &t_zerodiag_subs));
2842           PetscCall(ISGetLocalSize(t_zerodiag_subs, &nl));
2843           if (nl) {
2844             PetscBool valid = PETSC_TRUE;
2845 
2846             if (checkb) {
2847               PetscCall(VecSet(matis->x, 0));
2848               PetscCall(ISGetLocalSize(subs[i], &nl));
2849               PetscCall(ISGetIndices(subs[i], &idxs));
2850               PetscCall(VecGetArray(matis->x, &array));
2851               for (j = 0; j < nl; j++) array[idxs[j]] = 1.;
2852               PetscCall(VecRestoreArray(matis->x, &array));
2853               PetscCall(ISRestoreIndices(subs[i], &idxs));
2854               PetscCall(VecPointwiseMult(matis->x, work[0], matis->x));
2855               PetscCall(MatMult(matis->A, matis->x, matis->y));
2856               PetscCall(VecPointwiseMult(matis->y, work[1], matis->y));
2857               PetscCall(VecGetArray(matis->y, &array));
2858               for (j = 0; j < n_interior_dofs; j++) {
2859                 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) {
2860                   valid = PETSC_FALSE;
2861                   break;
2862                 }
2863               }
2864               PetscCall(VecRestoreArray(matis->y, &array));
2865             }
2866             if (valid && nneu) {
2867               const PetscInt *idxs;
2868               PetscInt        nzb;
2869 
2870               PetscCall(ISGetIndices(pcbddc->NeumannBoundariesLocal, &idxs));
2871               PetscCall(ISGlobalToLocalMappingApply(l2g, IS_GTOLM_DROP, nneu, idxs, &nzb, NULL));
2872               PetscCall(ISRestoreIndices(pcbddc->NeumannBoundariesLocal, &idxs));
2873               if (nzb) valid = PETSC_FALSE;
2874             }
2875             if (valid && pressures) {
2876               IS       t_pressure_subs, tmp;
2877               PetscInt i1, i2;
2878 
2879               PetscCall(ISGlobalToLocalMappingApplyIS(l2g, IS_GTOLM_DROP, pressures, &t_pressure_subs));
2880               PetscCall(ISEmbed(t_zerodiag_subs, t_pressure_subs, PETSC_TRUE, &tmp));
2881               PetscCall(ISGetLocalSize(tmp, &i1));
2882               PetscCall(ISGetLocalSize(t_zerodiag_subs, &i2));
2883               if (i2 != i1) valid = PETSC_FALSE;
2884               PetscCall(ISDestroy(&t_pressure_subs));
2885               PetscCall(ISDestroy(&tmp));
2886             }
2887             if (valid) {
2888               PetscCall(ISLocalToGlobalMappingApplyIS(l2g, t_zerodiag_subs, &zerodiag_subs[benign_n]));
2889               benign_n++;
2890             } else recompute_zerodiag = PETSC_TRUE;
2891           }
2892           PetscCall(ISDestroy(&t_zerodiag_subs));
2893           PetscCall(ISLocalToGlobalMappingDestroy(&l2g));
2894         }
2895       }
2896     } else { /* there's just one subdomain (or zero if they have not been detected */
2897       PetscBool valid = PETSC_TRUE;
2898 
2899       if (nneu) valid = PETSC_FALSE;
2900       if (valid && pressures) PetscCall(ISEqual(pressures, zerodiag, &valid));
2901       if (valid && checkb) {
2902         PetscCall(MatMult(matis->A, work[0], matis->x));
2903         PetscCall(VecPointwiseMult(matis->x, work[1], matis->x));
2904         PetscCall(VecGetArray(matis->x, &array));
2905         for (j = 0; j < n_interior_dofs; j++) {
2906           if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) {
2907             valid = PETSC_FALSE;
2908             break;
2909           }
2910         }
2911         PetscCall(VecRestoreArray(matis->x, &array));
2912       }
2913       if (valid) {
2914         benign_n = 1;
2915         PetscCall(PetscMalloc1(benign_n, &zerodiag_subs));
2916         PetscCall(PetscObjectReference((PetscObject)zerodiag));
2917         zerodiag_subs[0] = zerodiag;
2918       }
2919     }
2920     if (checkb) PetscCall(VecDestroyVecs(2, &work));
2921   }
2922   PetscCall(PetscFree(interior_dofs));
2923 
2924   if (!benign_n) {
2925     PetscInt n;
2926 
2927     PetscCall(ISDestroy(&zerodiag));
2928     recompute_zerodiag = PETSC_FALSE;
2929     PetscCall(MatGetLocalSize(pcbddc->local_mat, &n, NULL));
2930     if (n) have_null = PETSC_FALSE;
2931   }
2932 
2933   /* final check for null pressures */
2934   if (zerodiag && pressures) PetscCall(ISEqual(pressures, zerodiag, &have_null));
2935 
2936   if (recompute_zerodiag) {
2937     PetscCall(ISDestroy(&zerodiag));
2938     if (benign_n == 1) {
2939       PetscCall(PetscObjectReference((PetscObject)zerodiag_subs[0]));
2940       zerodiag = zerodiag_subs[0];
2941     } else {
2942       PetscInt i, nzn, *new_idxs;
2943 
2944       nzn = 0;
2945       for (i = 0; i < benign_n; i++) {
2946         PetscInt ns;
2947         PetscCall(ISGetLocalSize(zerodiag_subs[i], &ns));
2948         nzn += ns;
2949       }
2950       PetscCall(PetscMalloc1(nzn, &new_idxs));
2951       nzn = 0;
2952       for (i = 0; i < benign_n; i++) {
2953         PetscInt ns, *idxs;
2954         PetscCall(ISGetLocalSize(zerodiag_subs[i], &ns));
2955         PetscCall(ISGetIndices(zerodiag_subs[i], (const PetscInt **)&idxs));
2956         PetscCall(PetscArraycpy(new_idxs + nzn, idxs, ns));
2957         PetscCall(ISRestoreIndices(zerodiag_subs[i], (const PetscInt **)&idxs));
2958         nzn += ns;
2959       }
2960       PetscCall(PetscSortInt(nzn, new_idxs));
2961       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nzn, new_idxs, PETSC_OWN_POINTER, &zerodiag));
2962     }
2963     have_null = PETSC_FALSE;
2964   }
2965 
2966   /* determines if the coarse solver will be singular or not */
2967   PetscCallMPI(MPIU_Allreduce(&have_null, &pcbddc->benign_null, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)pc)));
2968 
2969   /* Prepare matrix to compute no-net-flux */
2970   if (pcbddc->compute_nonetflux && !pcbddc->divudotp) {
2971     Mat                    A, loc_divudotp;
2972     ISLocalToGlobalMapping rl2g, cl2g, l2gmap;
2973     IS                     row, col, isused = NULL;
2974     PetscInt               M, N, n, st, n_isused;
2975 
2976     if (pressures) {
2977       isused = pressures;
2978     } else {
2979       isused = zerodiag_save;
2980     }
2981     PetscCall(MatISGetLocalToGlobalMapping(pc->pmat, &l2gmap, NULL));
2982     PetscCall(MatISGetLocalMat(pc->pmat, &A));
2983     PetscCall(MatGetLocalSize(A, &n, NULL));
2984     PetscCheck(isused || (n == 0), PETSC_COMM_SELF, PETSC_ERR_USER, "Don't know how to extract div u dot p! Please provide the pressure field");
2985     n_isused = 0;
2986     if (isused) PetscCall(ISGetLocalSize(isused, &n_isused));
2987     PetscCallMPI(MPI_Scan(&n_isused, &st, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)pc)));
2988     st = st - n_isused;
2989     if (n) {
2990       const PetscInt *gidxs;
2991 
2992       PetscCall(MatCreateSubMatrix(A, isused, NULL, MAT_INITIAL_MATRIX, &loc_divudotp));
2993       PetscCall(ISLocalToGlobalMappingGetIndices(l2gmap, &gidxs));
2994       /* TODO: extend ISCreateStride with st = PETSC_DECIDE */
2995       PetscCall(ISCreateStride(PetscObjectComm((PetscObject)pc), n_isused, st, 1, &row));
2996       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), n, gidxs, PETSC_COPY_VALUES, &col));
2997       PetscCall(ISLocalToGlobalMappingRestoreIndices(l2gmap, &gidxs));
2998     } else {
2999       PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, 0, 0, 1, NULL, &loc_divudotp));
3000       PetscCall(ISCreateStride(PetscObjectComm((PetscObject)pc), n_isused, st, 1, &row));
3001       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), 0, NULL, PETSC_COPY_VALUES, &col));
3002     }
3003     PetscCall(MatGetSize(pc->pmat, NULL, &N));
3004     PetscCall(ISGetSize(row, &M));
3005     PetscCall(ISLocalToGlobalMappingCreateIS(row, &rl2g));
3006     PetscCall(ISLocalToGlobalMappingCreateIS(col, &cl2g));
3007     PetscCall(ISDestroy(&row));
3008     PetscCall(ISDestroy(&col));
3009     PetscCall(MatCreate(PetscObjectComm((PetscObject)pc), &pcbddc->divudotp));
3010     PetscCall(MatSetType(pcbddc->divudotp, MATIS));
3011     PetscCall(MatSetSizes(pcbddc->divudotp, PETSC_DECIDE, PETSC_DECIDE, M, N));
3012     PetscCall(MatSetLocalToGlobalMapping(pcbddc->divudotp, rl2g, cl2g));
3013     PetscCall(ISLocalToGlobalMappingDestroy(&rl2g));
3014     PetscCall(ISLocalToGlobalMappingDestroy(&cl2g));
3015     PetscCall(MatISSetLocalMat(pcbddc->divudotp, loc_divudotp));
3016     PetscCall(MatDestroy(&loc_divudotp));
3017     PetscCall(MatAssemblyBegin(pcbddc->divudotp, MAT_FINAL_ASSEMBLY));
3018     PetscCall(MatAssemblyEnd(pcbddc->divudotp, MAT_FINAL_ASSEMBLY));
3019   }
3020   PetscCall(ISDestroy(&zerodiag_save));
3021   PetscCall(ISDestroy(&pressures));
3022   if (bzerodiag) {
3023     PetscInt i;
3024 
3025     for (i = 0; i < bsp; i++) PetscCall(ISDestroy(&bzerodiag[i]));
3026     PetscCall(PetscFree(bzerodiag));
3027   }
3028   pcbddc->benign_n             = benign_n;
3029   pcbddc->benign_zerodiag_subs = zerodiag_subs;
3030 
3031   /* determines if the problem has subdomains with 0 pressure block */
3032   have_null = (PetscBool)(!!pcbddc->benign_n);
3033   PetscCallMPI(MPIU_Allreduce(&have_null, &pcbddc->benign_have_null, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)pc)));
3034 
3035 project_b0:
3036   PetscCall(MatGetLocalSize(pcbddc->local_mat, &n, NULL));
3037   /* change of basis and p0 dofs */
3038   if (pcbddc->benign_n) {
3039     PetscInt i, s, *nnz;
3040 
3041     /* local change of basis for pressures */
3042     PetscCall(MatDestroy(&pcbddc->benign_change));
3043     PetscCall(MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat), &pcbddc->benign_change));
3044     PetscCall(MatSetType(pcbddc->benign_change, MATAIJ));
3045     PetscCall(MatSetSizes(pcbddc->benign_change, n, n, PETSC_DECIDE, PETSC_DECIDE));
3046     PetscCall(PetscMalloc1(n, &nnz));
3047     for (i = 0; i < n; i++) nnz[i] = 1; /* defaults to identity */
3048     for (i = 0; i < pcbddc->benign_n; i++) {
3049       const PetscInt *idxs;
3050       PetscInt        nzs, j;
3051 
3052       PetscCall(ISGetLocalSize(pcbddc->benign_zerodiag_subs[i], &nzs));
3053       PetscCall(ISGetIndices(pcbddc->benign_zerodiag_subs[i], &idxs));
3054       for (j = 0; j < nzs - 1; j++) nnz[idxs[j]] = 2; /* change on pressures */
3055       nnz[idxs[nzs - 1]] = nzs;                       /* last local pressure dof in subdomain */
3056       PetscCall(ISRestoreIndices(pcbddc->benign_zerodiag_subs[i], &idxs));
3057     }
3058     PetscCall(MatSeqAIJSetPreallocation(pcbddc->benign_change, 0, nnz));
3059     PetscCall(MatSetOption(pcbddc->benign_change, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE));
3060     PetscCall(PetscFree(nnz));
3061     /* set identity by default */
3062     for (i = 0; i < n; i++) PetscCall(MatSetValue(pcbddc->benign_change, i, i, 1., INSERT_VALUES));
3063     PetscCall(PetscFree3(pcbddc->benign_p0_lidx, pcbddc->benign_p0_gidx, pcbddc->benign_p0));
3064     PetscCall(PetscMalloc3(pcbddc->benign_n, &pcbddc->benign_p0_lidx, pcbddc->benign_n, &pcbddc->benign_p0_gidx, pcbddc->benign_n, &pcbddc->benign_p0));
3065     /* set change on pressures */
3066     for (s = 0; s < pcbddc->benign_n; s++) {
3067       PetscScalar    *array;
3068       const PetscInt *idxs;
3069       PetscInt        nzs;
3070 
3071       PetscCall(ISGetLocalSize(pcbddc->benign_zerodiag_subs[s], &nzs));
3072       PetscCall(ISGetIndices(pcbddc->benign_zerodiag_subs[s], &idxs));
3073       for (i = 0; i < nzs - 1; i++) {
3074         PetscScalar vals[2];
3075         PetscInt    cols[2];
3076 
3077         cols[0] = idxs[i];
3078         cols[1] = idxs[nzs - 1];
3079         vals[0] = 1.;
3080         vals[1] = 1.;
3081         PetscCall(MatSetValues(pcbddc->benign_change, 1, cols, 2, cols, vals, INSERT_VALUES));
3082       }
3083       PetscCall(PetscMalloc1(nzs, &array));
3084       for (i = 0; i < nzs - 1; i++) array[i] = -1.;
3085       array[nzs - 1] = 1.;
3086       PetscCall(MatSetValues(pcbddc->benign_change, 1, idxs + nzs - 1, nzs, idxs, array, INSERT_VALUES));
3087       /* store local idxs for p0 */
3088       pcbddc->benign_p0_lidx[s] = idxs[nzs - 1];
3089       PetscCall(ISRestoreIndices(pcbddc->benign_zerodiag_subs[s], &idxs));
3090       PetscCall(PetscFree(array));
3091     }
3092     PetscCall(MatAssemblyBegin(pcbddc->benign_change, MAT_FINAL_ASSEMBLY));
3093     PetscCall(MatAssemblyEnd(pcbddc->benign_change, MAT_FINAL_ASSEMBLY));
3094 
3095     /* project if needed */
3096     if (pcbddc->benign_change_explicit) {
3097       Mat M;
3098 
3099       PetscCall(MatPtAP(pcbddc->local_mat, pcbddc->benign_change, MAT_INITIAL_MATRIX, 2.0, &M));
3100       PetscCall(MatDestroy(&pcbddc->local_mat));
3101       PetscCall(MatSeqAIJCompress(M, &pcbddc->local_mat));
3102       PetscCall(MatDestroy(&M));
3103     }
3104     /* store global idxs for p0 */
3105     PetscCall(ISLocalToGlobalMappingApply(matis->rmapping, pcbddc->benign_n, pcbddc->benign_p0_lidx, pcbddc->benign_p0_gidx));
3106   }
3107   *zerodiaglocal = zerodiag;
3108   PetscFunctionReturn(PETSC_SUCCESS);
3109 }
3110 
3111 PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get)
3112 {
3113   PC_BDDC     *pcbddc = (PC_BDDC *)pc->data;
3114   PetscScalar *array;
3115 
3116   PetscFunctionBegin;
3117   if (!pcbddc->benign_sf) {
3118     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)pc), &pcbddc->benign_sf));
3119     PetscCall(PetscSFSetGraphLayout(pcbddc->benign_sf, pc->pmat->rmap, pcbddc->benign_n, NULL, PETSC_OWN_POINTER, pcbddc->benign_p0_gidx));
3120   }
3121   if (get) {
3122     PetscCall(VecGetArrayRead(v, (const PetscScalar **)&array));
3123     PetscCall(PetscSFBcastBegin(pcbddc->benign_sf, MPIU_SCALAR, array, pcbddc->benign_p0, MPI_REPLACE));
3124     PetscCall(PetscSFBcastEnd(pcbddc->benign_sf, MPIU_SCALAR, array, pcbddc->benign_p0, MPI_REPLACE));
3125     PetscCall(VecRestoreArrayRead(v, (const PetscScalar **)&array));
3126   } else {
3127     PetscCall(VecGetArray(v, &array));
3128     PetscCall(PetscSFReduceBegin(pcbddc->benign_sf, MPIU_SCALAR, pcbddc->benign_p0, array, MPI_REPLACE));
3129     PetscCall(PetscSFReduceEnd(pcbddc->benign_sf, MPIU_SCALAR, pcbddc->benign_p0, array, MPI_REPLACE));
3130     PetscCall(VecRestoreArray(v, &array));
3131   }
3132   PetscFunctionReturn(PETSC_SUCCESS);
3133 }
3134 
3135 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop)
3136 {
3137   PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
3138 
3139   PetscFunctionBegin;
3140   /* TODO: add error checking
3141     - avoid nested pop (or push) calls.
3142     - cannot push before pop.
3143     - cannot call this if pcbddc->local_mat is NULL
3144   */
3145   if (!pcbddc->benign_n) PetscFunctionReturn(PETSC_SUCCESS);
3146   if (pop) {
3147     if (pcbddc->benign_change_explicit) {
3148       IS       is_p0;
3149       MatReuse reuse;
3150 
3151       /* extract B_0 */
3152       reuse = MAT_INITIAL_MATRIX;
3153       if (pcbddc->benign_B0) reuse = MAT_REUSE_MATRIX;
3154       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, pcbddc->benign_n, pcbddc->benign_p0_lidx, PETSC_COPY_VALUES, &is_p0));
3155       PetscCall(MatCreateSubMatrix(pcbddc->local_mat, is_p0, NULL, reuse, &pcbddc->benign_B0));
3156       /* remove rows and cols from local problem */
3157       PetscCall(MatSetOption(pcbddc->local_mat, MAT_KEEP_NONZERO_PATTERN, PETSC_TRUE));
3158       PetscCall(MatSetOption(pcbddc->local_mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE));
3159       PetscCall(MatZeroRowsColumnsIS(pcbddc->local_mat, is_p0, 1.0, NULL, NULL));
3160       PetscCall(ISDestroy(&is_p0));
3161     } else {
3162       Mat_IS      *matis = (Mat_IS *)pc->pmat->data;
3163       PetscScalar *vals;
3164       PetscInt     i, n, *idxs_ins;
3165 
3166       PetscCall(VecGetLocalSize(matis->y, &n));
3167       PetscCall(PetscMalloc2(n, &idxs_ins, n, &vals));
3168       if (!pcbddc->benign_B0) {
3169         PetscInt *nnz;
3170         PetscCall(MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat), &pcbddc->benign_B0));
3171         PetscCall(MatSetType(pcbddc->benign_B0, MATAIJ));
3172         PetscCall(MatSetSizes(pcbddc->benign_B0, pcbddc->benign_n, n, PETSC_DECIDE, PETSC_DECIDE));
3173         PetscCall(PetscMalloc1(pcbddc->benign_n, &nnz));
3174         for (i = 0; i < pcbddc->benign_n; i++) {
3175           PetscCall(ISGetLocalSize(pcbddc->benign_zerodiag_subs[i], &nnz[i]));
3176           nnz[i] = n - nnz[i];
3177         }
3178         PetscCall(MatSeqAIJSetPreallocation(pcbddc->benign_B0, 0, nnz));
3179         PetscCall(MatSetOption(pcbddc->benign_B0, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE));
3180         PetscCall(PetscFree(nnz));
3181       }
3182 
3183       for (i = 0; i < pcbddc->benign_n; i++) {
3184         PetscScalar *array;
3185         PetscInt    *idxs, j, nz, cum;
3186 
3187         PetscCall(VecSet(matis->x, 0.));
3188         PetscCall(ISGetLocalSize(pcbddc->benign_zerodiag_subs[i], &nz));
3189         PetscCall(ISGetIndices(pcbddc->benign_zerodiag_subs[i], (const PetscInt **)&idxs));
3190         for (j = 0; j < nz; j++) vals[j] = 1.;
3191         PetscCall(VecSetValues(matis->x, nz, idxs, vals, INSERT_VALUES));
3192         PetscCall(VecAssemblyBegin(matis->x));
3193         PetscCall(VecAssemblyEnd(matis->x));
3194         PetscCall(VecSet(matis->y, 0.));
3195         PetscCall(MatMult(matis->A, matis->x, matis->y));
3196         PetscCall(VecGetArray(matis->y, &array));
3197         cum = 0;
3198         for (j = 0; j < n; j++) {
3199           if (PetscUnlikely(PetscAbsScalar(array[j]) > PETSC_SMALL)) {
3200             vals[cum]     = array[j];
3201             idxs_ins[cum] = j;
3202             cum++;
3203           }
3204         }
3205         PetscCall(MatSetValues(pcbddc->benign_B0, 1, &i, cum, idxs_ins, vals, INSERT_VALUES));
3206         PetscCall(VecRestoreArray(matis->y, &array));
3207         PetscCall(ISRestoreIndices(pcbddc->benign_zerodiag_subs[i], (const PetscInt **)&idxs));
3208       }
3209       PetscCall(MatAssemblyBegin(pcbddc->benign_B0, MAT_FINAL_ASSEMBLY));
3210       PetscCall(MatAssemblyEnd(pcbddc->benign_B0, MAT_FINAL_ASSEMBLY));
3211       PetscCall(PetscFree2(idxs_ins, vals));
3212     }
3213   } else { /* push */
3214 
3215     PetscCheck(pcbddc->benign_change_explicit, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot push B0!");
3216     for (PetscInt i = 0; i < pcbddc->benign_n; i++) {
3217       PetscScalar *B0_vals;
3218       PetscInt    *B0_cols, B0_ncol;
3219 
3220       PetscCall(MatGetRow(pcbddc->benign_B0, i, &B0_ncol, (const PetscInt **)&B0_cols, (const PetscScalar **)&B0_vals));
3221       PetscCall(MatSetValues(pcbddc->local_mat, 1, pcbddc->benign_p0_lidx + i, B0_ncol, B0_cols, B0_vals, INSERT_VALUES));
3222       PetscCall(MatSetValues(pcbddc->local_mat, B0_ncol, B0_cols, 1, pcbddc->benign_p0_lidx + i, B0_vals, INSERT_VALUES));
3223       PetscCall(MatSetValue(pcbddc->local_mat, pcbddc->benign_p0_lidx[i], pcbddc->benign_p0_lidx[i], 0.0, INSERT_VALUES));
3224       PetscCall(MatRestoreRow(pcbddc->benign_B0, i, &B0_ncol, (const PetscInt **)&B0_cols, (const PetscScalar **)&B0_vals));
3225     }
3226     PetscCall(MatAssemblyBegin(pcbddc->local_mat, MAT_FINAL_ASSEMBLY));
3227     PetscCall(MatAssemblyEnd(pcbddc->local_mat, MAT_FINAL_ASSEMBLY));
3228   }
3229   PetscFunctionReturn(PETSC_SUCCESS);
3230 }
3231 
3232 PetscErrorCode PCBDDCAdaptiveSelection(PC pc)
3233 {
3234   PC_BDDC        *pcbddc     = (PC_BDDC *)pc->data;
3235   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
3236   PetscBLASInt    B_dummyint, B_neigs, B_ierr, B_lwork;
3237   PetscBLASInt   *B_iwork, *B_ifail;
3238   PetscScalar    *work, lwork;
3239   PetscScalar    *St, *S, *eigv;
3240   PetscScalar    *Sarray, *Starray;
3241   PetscReal      *eigs, thresh, lthresh, uthresh;
3242   PetscInt        i, nmax, nmin, nv, cum, mss, cum2, cumarray, maxneigs;
3243   PetscBool       allocated_S_St, upart;
3244 #if defined(PETSC_USE_COMPLEX)
3245   PetscReal *rwork;
3246 #endif
3247 
3248   PetscFunctionBegin;
3249   if (!pcbddc->adaptive_selection) PetscFunctionReturn(PETSC_SUCCESS);
3250   PetscCheck(sub_schurs, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Adaptive selection of constraints requires SubSchurs data");
3251   PetscCheck(sub_schurs->schur_explicit || !sub_schurs->n_subs, PetscObjectComm((PetscObject)pc), PETSC_ERR_SUP, "Adaptive selection of constraints requires MUMPS and/or MKL_CPARDISO");
3252   PetscCheck(!sub_schurs->n_subs || sub_schurs->is_symmetric, PETSC_COMM_SELF, PETSC_ERR_SUP, "Adaptive selection not yet implemented for this matrix pencil (herm %d, symm %d, posdef %d)", sub_schurs->is_hermitian, sub_schurs->is_symmetric,
3253              sub_schurs->is_posdef);
3254   PetscCall(PetscLogEventBegin(PC_BDDC_AdaptiveSetUp[pcbddc->current_level], pc, 0, 0, 0));
3255 
3256   if (pcbddc->dbg_flag) {
3257     if (!pcbddc->dbg_viewer) pcbddc->dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pc));
3258     PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
3259     PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n"));
3260     PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Check adaptive selection of constraints\n"));
3261     PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer));
3262   }
3263 
3264   if (pcbddc->dbg_flag) PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d cc %" PetscInt_FMT " (%d,%d).\n", PetscGlobalRank, sub_schurs->n_subs, sub_schurs->is_hermitian, sub_schurs->is_posdef));
3265 
3266   /* max size of subsets */
3267   mss = 0;
3268   for (i = 0; i < sub_schurs->n_subs; i++) {
3269     PetscInt subset_size;
3270 
3271     PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size));
3272     mss = PetscMax(mss, subset_size);
3273   }
3274 
3275   /* min/max and threshold */
3276   nmax           = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss;
3277   nmin           = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0;
3278   nmax           = PetscMax(nmin, nmax);
3279   allocated_S_St = PETSC_FALSE;
3280   if (nmin || !sub_schurs->is_posdef) { /* XXX */
3281     allocated_S_St = PETSC_TRUE;
3282   }
3283 
3284   /* allocate lapack workspace */
3285   cum = cum2 = 0;
3286   maxneigs   = 0;
3287   for (i = 0; i < sub_schurs->n_subs; i++) {
3288     PetscInt n, subset_size;
3289 
3290     PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size));
3291     n = PetscMin(subset_size, nmax);
3292     cum += subset_size;
3293     cum2 += subset_size * n;
3294     maxneigs = PetscMax(maxneigs, n);
3295   }
3296   lwork = 0;
3297   if (mss) {
3298     PetscScalar  sdummy  = 0.;
3299     PetscBLASInt B_itype = 1;
3300     PetscBLASInt B_N, idummy = 0;
3301     PetscReal    rdummy = 0., zero = 0.0;
3302     PetscReal    eps = 0.0; /* dlamch? */
3303 
3304     PetscCheck(sub_schurs->is_symmetric, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not yet implemented");
3305     PetscCall(PetscBLASIntCast(mss, &B_N));
3306     B_lwork = -1;
3307     /* some implementations may complain about NULL pointers, even if we are querying */
3308     S       = &sdummy;
3309     St      = &sdummy;
3310     eigs    = &rdummy;
3311     eigv    = &sdummy;
3312     B_iwork = &idummy;
3313     B_ifail = &idummy;
3314 #if defined(PETSC_USE_COMPLEX)
3315     rwork = &rdummy;
3316 #endif
3317     thresh = 1.0;
3318     PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
3319 #if defined(PETSC_USE_COMPLEX)
3320     PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &zero, &thresh, &B_dummyint, &B_dummyint, &eps, &B_neigs, eigs, eigv, &B_N, &lwork, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3321 #else
3322     PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &zero, &thresh, &B_dummyint, &B_dummyint, &eps, &B_neigs, eigs, eigv, &B_N, &lwork, &B_lwork, B_iwork, B_ifail, &B_ierr));
3323 #endif
3324     PetscCheck(B_ierr == 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in query to SYGVX Lapack routine %d", (int)B_ierr);
3325     PetscCall(PetscFPTrapPop());
3326   }
3327 
3328   nv = 0;
3329   if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */
3330     PetscCall(ISGetLocalSize(sub_schurs->is_vertices, &nv));
3331   }
3332   PetscCall(PetscBLASIntCast((PetscInt)PetscRealPart(lwork), &B_lwork));
3333   if (allocated_S_St) PetscCall(PetscMalloc2(mss * mss, &S, mss * mss, &St));
3334   PetscCall(PetscMalloc5(mss * mss, &eigv, mss, &eigs, B_lwork, &work, 5 * mss, &B_iwork, mss, &B_ifail));
3335 #if defined(PETSC_USE_COMPLEX)
3336   PetscCall(PetscMalloc1(7 * mss, &rwork));
3337 #endif
3338   PetscCall(PetscMalloc5(nv + sub_schurs->n_subs, &pcbddc->adaptive_constraints_n, nv + sub_schurs->n_subs + 1, &pcbddc->adaptive_constraints_idxs_ptr, nv + sub_schurs->n_subs + 1, &pcbddc->adaptive_constraints_data_ptr, nv + cum, &pcbddc->adaptive_constraints_idxs, nv + cum2,
3339                          &pcbddc->adaptive_constraints_data));
3340   PetscCall(PetscArrayzero(pcbddc->adaptive_constraints_n, nv + sub_schurs->n_subs));
3341 
3342   maxneigs = 0;
3343   cum = cumarray                           = 0;
3344   pcbddc->adaptive_constraints_idxs_ptr[0] = 0;
3345   pcbddc->adaptive_constraints_data_ptr[0] = 0;
3346   if (sub_schurs->is_vertices && pcbddc->use_vertices) {
3347     const PetscInt *idxs;
3348 
3349     PetscCall(ISGetIndices(sub_schurs->is_vertices, &idxs));
3350     for (cum = 0; cum < nv; cum++) {
3351       pcbddc->adaptive_constraints_n[cum]            = 1;
3352       pcbddc->adaptive_constraints_idxs[cum]         = idxs[cum];
3353       pcbddc->adaptive_constraints_data[cum]         = 1.0;
3354       pcbddc->adaptive_constraints_idxs_ptr[cum + 1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + 1;
3355       pcbddc->adaptive_constraints_data_ptr[cum + 1] = pcbddc->adaptive_constraints_data_ptr[cum] + 1;
3356     }
3357     PetscCall(ISRestoreIndices(sub_schurs->is_vertices, &idxs));
3358   }
3359 
3360   if (mss) { /* multilevel */
3361     if (sub_schurs->gdsw) {
3362       PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_all, &Sarray));
3363       PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all, &Starray));
3364     } else {
3365       PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all, &Sarray));
3366       PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all, &Starray));
3367     }
3368   }
3369 
3370   lthresh = pcbddc->adaptive_threshold[0];
3371   uthresh = pcbddc->adaptive_threshold[1];
3372   upart   = pcbddc->use_deluxe_scaling;
3373   for (i = 0; i < sub_schurs->n_subs; i++) {
3374     const PetscInt *idxs;
3375     PetscReal       upper, lower;
3376     PetscInt        j, subset_size, eigs_start = 0;
3377     PetscBLASInt    B_N;
3378     PetscBool       same_data = PETSC_FALSE;
3379     PetscBool       scal      = PETSC_FALSE;
3380 
3381     if (upart) {
3382       upper = PETSC_MAX_REAL;
3383       lower = uthresh;
3384     } else {
3385       if (sub_schurs->gdsw) {
3386         upper = uthresh;
3387         lower = PETSC_MIN_REAL;
3388       } else {
3389         PetscCheck(sub_schurs->is_posdef, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not yet implemented without deluxe scaling");
3390         upper = 1. / uthresh;
3391         lower = 0.;
3392       }
3393     }
3394     PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size));
3395     PetscCall(ISGetIndices(sub_schurs->is_subs[i], &idxs));
3396     PetscCall(PetscBLASIntCast(subset_size, &B_N));
3397     /* this is experimental: we assume the dofs have been properly grouped to have
3398        the diagonal blocks Schur complements either positive or negative definite (true for Stokes) */
3399     if (!sub_schurs->is_posdef) {
3400       Mat T;
3401 
3402       for (j = 0; j < subset_size; j++) {
3403         if (PetscRealPart(*(Sarray + cumarray + j * (subset_size + 1))) < 0.0) {
3404           PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, subset_size, Sarray + cumarray, &T));
3405           PetscCall(MatScale(T, -1.0));
3406           PetscCall(MatDestroy(&T));
3407           PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, subset_size, Starray + cumarray, &T));
3408           PetscCall(MatScale(T, -1.0));
3409           PetscCall(MatDestroy(&T));
3410           if (sub_schurs->change_primal_sub) {
3411             PetscInt        nz, k;
3412             const PetscInt *idxs;
3413 
3414             PetscCall(ISGetLocalSize(sub_schurs->change_primal_sub[i], &nz));
3415             PetscCall(ISGetIndices(sub_schurs->change_primal_sub[i], &idxs));
3416             for (k = 0; k < nz; k++) {
3417               *(Sarray + cumarray + idxs[k] * (subset_size + 1)) *= -1.0;
3418               *(Starray + cumarray + idxs[k] * (subset_size + 1)) = 0.0;
3419             }
3420             PetscCall(ISRestoreIndices(sub_schurs->change_primal_sub[i], &idxs));
3421           }
3422           scal = PETSC_TRUE;
3423           break;
3424         }
3425       }
3426     }
3427 
3428     if (allocated_S_St) { /* S and S_t should be copied since we could need them later */
3429       if (sub_schurs->is_symmetric) {
3430         PetscInt j, k;
3431         if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscArraycmp() later */
3432           PetscCall(PetscArrayzero(S, subset_size * subset_size));
3433           PetscCall(PetscArrayzero(St, subset_size * subset_size));
3434         }
3435         for (j = 0; j < subset_size; j++) {
3436           for (k = j; k < subset_size; k++) {
3437             S[j * subset_size + k]  = Sarray[cumarray + j * subset_size + k];
3438             St[j * subset_size + k] = Starray[cumarray + j * subset_size + k];
3439           }
3440         }
3441       } else {
3442         PetscCall(PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size));
3443         PetscCall(PetscArraycpy(St, Starray + cumarray, subset_size * subset_size));
3444       }
3445     } else {
3446       S  = Sarray + cumarray;
3447       St = Starray + cumarray;
3448     }
3449     /* see if we can save some work */
3450     if (sub_schurs->n_subs == 1 && pcbddc->use_deluxe_scaling) PetscCall(PetscArraycmp(S, St, subset_size * subset_size, &same_data));
3451 
3452     if (same_data && !sub_schurs->change) { /* there's no need of constraints here */
3453       B_neigs = 0;
3454     } else {
3455       PetscBLASInt B_itype = 1;
3456       PetscBLASInt B_IL, B_IU;
3457       PetscReal    eps = -1.0; /* dlamch? */
3458       PetscInt     nmin_s;
3459       PetscBool    compute_range;
3460 
3461       PetscCheck(sub_schurs->is_symmetric, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not yet implemented");
3462       B_neigs       = 0;
3463       compute_range = (PetscBool)!same_data;
3464       if (nmin >= subset_size) compute_range = PETSC_FALSE;
3465 
3466       if (pcbddc->dbg_flag) {
3467         PetscInt nc = 0, c = pcbddc->mat_graph->nodes[idxs[0]].count, w = pcbddc->mat_graph->nodes[idxs[0]].which_dof;
3468 
3469         if (sub_schurs->change_primal_sub) PetscCall(ISGetLocalSize(sub_schurs->change_primal_sub[i], &nc));
3470         PetscCall(
3471           PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Computing for sub %" PetscInt_FMT "/%" PetscInt_FMT " size %" PetscInt_FMT " count %" PetscInt_FMT " fid %" PetscInt_FMT " (range %d) (change %" PetscInt_FMT ").\n", i, sub_schurs->n_subs, subset_size, c, w, compute_range, nc));
3472       }
3473 
3474       PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
3475       if (compute_range) {
3476         /* ask for eigenvalues larger than thresh */
3477         if (sub_schurs->is_posdef) {
3478 #if defined(PETSC_USE_COMPLEX)
3479           PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3480 #else
3481           PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3482 #endif
3483           PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3484         } else { /* no theory so far, but it works nicely */
3485           PetscInt  recipe = 0, recipe_m = 1;
3486           PetscReal bb[2];
3487 
3488           PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)pc)->prefix, "-pc_bddc_adaptive_recipe", &recipe, NULL));
3489           switch (recipe) {
3490           case 0:
3491             if (scal) {
3492               bb[0] = PETSC_MIN_REAL;
3493               bb[1] = lthresh;
3494             } else {
3495               bb[0] = uthresh;
3496               bb[1] = PETSC_MAX_REAL;
3497             }
3498 #if defined(PETSC_USE_COMPLEX)
3499             PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3500 #else
3501             PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3502 #endif
3503             PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3504             break;
3505           case 1:
3506             bb[0] = PETSC_MIN_REAL;
3507             bb[1] = lthresh * lthresh;
3508 #if defined(PETSC_USE_COMPLEX)
3509             PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3510 #else
3511             PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3512 #endif
3513             PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3514             if (!scal) {
3515               PetscBLASInt B_neigs2 = 0;
3516 
3517               bb[0] = PetscMax(lthresh * lthresh, uthresh);
3518               bb[1] = PETSC_MAX_REAL;
3519               PetscCall(PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size));
3520               PetscCall(PetscArraycpy(St, Starray + cumarray, subset_size * subset_size));
3521 #if defined(PETSC_USE_COMPLEX)
3522               PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3523 #else
3524               PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3525 #endif
3526               PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3527               B_neigs += B_neigs2;
3528             }
3529             break;
3530           case 2:
3531             if (scal) {
3532               bb[0] = PETSC_MIN_REAL;
3533               bb[1] = 0;
3534 #if defined(PETSC_USE_COMPLEX)
3535               PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3536 #else
3537               PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3538 #endif
3539               PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3540             } else {
3541               PetscBLASInt B_neigs2 = 0;
3542               PetscBool    do_copy  = PETSC_FALSE;
3543 
3544               lthresh = PetscMax(lthresh, 0.0);
3545               if (lthresh > 0.0) {
3546                 bb[0] = PETSC_MIN_REAL;
3547                 bb[1] = lthresh * lthresh;
3548 
3549                 do_copy = PETSC_TRUE;
3550 #if defined(PETSC_USE_COMPLEX)
3551                 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3552 #else
3553                 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3554 #endif
3555                 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3556               }
3557               bb[0] = PetscMax(lthresh * lthresh, uthresh);
3558               bb[1] = PETSC_MAX_REAL;
3559               if (do_copy) {
3560                 PetscCall(PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size));
3561                 PetscCall(PetscArraycpy(St, Starray + cumarray, subset_size * subset_size));
3562               }
3563 #if defined(PETSC_USE_COMPLEX)
3564               PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3565 #else
3566               PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3567 #endif
3568               PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3569               B_neigs += B_neigs2;
3570             }
3571             break;
3572           case 3:
3573             if (scal) {
3574               PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)pc)->prefix, "-pc_bddc_adaptive_recipe3_min_scal", &recipe_m, NULL));
3575             } else {
3576               PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)pc)->prefix, "-pc_bddc_adaptive_recipe3_min", &recipe_m, NULL));
3577             }
3578             if (!scal) {
3579               bb[0] = uthresh;
3580               bb[1] = PETSC_MAX_REAL;
3581 #if defined(PETSC_USE_COMPLEX)
3582               PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3583 #else
3584               PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3585 #endif
3586               PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3587             }
3588             if (recipe_m > 0 && B_N - B_neigs > 0) {
3589               PetscBLASInt B_neigs2 = 0;
3590 
3591               B_IL = 1;
3592               PetscCall(PetscBLASIntCast(PetscMin(recipe_m, B_N - B_neigs), &B_IU));
3593               PetscCall(PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size));
3594               PetscCall(PetscArraycpy(St, Starray + cumarray, subset_size * subset_size));
3595 #if defined(PETSC_USE_COMPLEX)
3596               PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3597 #else
3598               PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3599 #endif
3600               PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3601               B_neigs += B_neigs2;
3602             }
3603             break;
3604           case 4:
3605             bb[0] = PETSC_MIN_REAL;
3606             bb[1] = lthresh;
3607 #if defined(PETSC_USE_COMPLEX)
3608             PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3609 #else
3610             PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3611 #endif
3612             PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3613             {
3614               PetscBLASInt B_neigs2 = 0;
3615 
3616               bb[0] = PetscMax(lthresh + PETSC_SMALL, uthresh);
3617               bb[1] = PETSC_MAX_REAL;
3618               PetscCall(PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size));
3619               PetscCall(PetscArraycpy(St, Starray + cumarray, subset_size * subset_size));
3620 #if defined(PETSC_USE_COMPLEX)
3621               PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3622 #else
3623               PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3624 #endif
3625               PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3626               B_neigs += B_neigs2;
3627             }
3628             break;
3629           case 5: /* same as before: first compute all eigenvalues, then filter */
3630 #if defined(PETSC_USE_COMPLEX)
3631             PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "A", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3632 #else
3633             PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "A", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3634 #endif
3635             PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3636             {
3637               PetscInt e, k, ne;
3638               for (e = 0, ne = 0; e < B_neigs; e++) {
3639                 if (eigs[e] < lthresh || eigs[e] > uthresh) {
3640                   for (k = 0; k < B_N; k++) S[ne * B_N + k] = eigv[e * B_N + k];
3641                   eigs[ne] = eigs[e];
3642                   ne++;
3643                 }
3644               }
3645               PetscCall(PetscArraycpy(eigv, S, B_N * ne));
3646               PetscCall(PetscBLASIntCast(ne, &B_neigs));
3647             }
3648             break;
3649           default:
3650             SETERRQ(PetscObjectComm((PetscObject)pc), PETSC_ERR_SUP, "Unknown recipe %" PetscInt_FMT, recipe);
3651           }
3652         }
3653       } else if (!same_data) { /* this is just to see all the eigenvalues */
3654         PetscCall(PetscBLASIntCast(PetscMax(1, PetscMin(B_N, nmax)), &B_IU));
3655         B_IL = 1;
3656 #if defined(PETSC_USE_COMPLEX)
3657         PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3658 #else
3659         PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3660 #endif
3661         PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3662       } else { /* same_data is true, so just get the adaptive functional requested by the user */
3663         PetscInt k;
3664         PetscCheck(sub_schurs->change_primal_sub, PETSC_COMM_SELF, PETSC_ERR_PLIB, "This should not happen");
3665         PetscCall(ISGetLocalSize(sub_schurs->change_primal_sub[i], &nmax));
3666         PetscCall(PetscBLASIntCast(nmax, &B_neigs));
3667         nmin = nmax;
3668         PetscCall(PetscArrayzero(eigv, subset_size * nmax));
3669         for (k = 0; k < nmax; k++) {
3670           eigs[k]                     = 1. / PETSC_SMALL;
3671           eigv[k * (subset_size + 1)] = 1.0;
3672         }
3673       }
3674       PetscCall(PetscFPTrapPop());
3675       if (B_ierr) {
3676         PetscCheck(B_ierr >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYGVX Lapack routine: illegal value for argument %" PetscBLASInt_FMT, -B_ierr);
3677         PetscCheck(B_ierr > B_N, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYGVX Lapack routine: %" PetscBLASInt_FMT " eigenvalues failed to converge", B_ierr);
3678         SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYGVX Lapack routine: leading minor of order %" PetscBLASInt_FMT " is not positive definite", B_ierr - B_N - 1);
3679       }
3680 
3681       if (B_neigs > nmax) {
3682         if (pcbddc->dbg_flag) PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "   found %" PetscBLASInt_FMT " eigs, more than maximum required %" PetscInt_FMT ".\n", B_neigs, nmax));
3683         if (upart) eigs_start = scal ? 0 : B_neigs - nmax;
3684         PetscCall(PetscBLASIntCast(nmax, &B_neigs));
3685       }
3686 
3687       nmin_s = PetscMin(nmin, B_N);
3688       if (B_neigs < nmin_s) {
3689         PetscBLASInt B_neigs2 = 0;
3690 
3691         if (upart) {
3692           if (scal) {
3693             PetscCall(PetscBLASIntCast(nmin_s, &B_IU));
3694             B_IL = B_neigs + 1;
3695           } else {
3696             PetscCall(PetscBLASIntCast(B_N - nmin_s + 1, &B_IL));
3697             B_IU = B_N - B_neigs;
3698           }
3699         } else {
3700           B_IL = B_neigs + 1;
3701           PetscCall(PetscBLASIntCast(nmin_s, &B_IU));
3702         }
3703         if (pcbddc->dbg_flag) {
3704           PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "   found %" PetscBLASInt_FMT " eigs, less than minimum required %" PetscInt_FMT ". Asking for %" PetscBLASInt_FMT " to %" PetscBLASInt_FMT " incl (fortran like)\n", B_neigs, nmin, B_IL, B_IU));
3705         }
3706         if (sub_schurs->is_symmetric) {
3707           PetscInt j, k;
3708           for (j = 0; j < subset_size; j++) {
3709             for (k = j; k < subset_size; k++) {
3710               S[j * subset_size + k]  = Sarray[cumarray + j * subset_size + k];
3711               St[j * subset_size + k] = Starray[cumarray + j * subset_size + k];
3712             }
3713           }
3714         } else {
3715           PetscCall(PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size));
3716           PetscCall(PetscArraycpy(St, Starray + cumarray, subset_size * subset_size));
3717         }
3718         PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
3719 #if defined(PETSC_USE_COMPLEX)
3720         PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * subset_size, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3721 #else
3722         PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * subset_size, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3723 #endif
3724         PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3725         PetscCall(PetscFPTrapPop());
3726         B_neigs += B_neigs2;
3727       }
3728       if (B_ierr) {
3729         PetscCheck(B_ierr >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYGVX Lapack routine: illegal value for argument %" PetscBLASInt_FMT, -B_ierr);
3730         PetscCheck(B_ierr > B_N, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYGVX Lapack routine: %" PetscBLASInt_FMT " eigenvalues failed to converge", B_ierr);
3731         SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYGVX Lapack routine: leading minor of order %" PetscBLASInt_FMT " is not positive definite", B_ierr - B_N - 1);
3732       }
3733       if (pcbddc->dbg_flag) {
3734         PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "   -> Got %" PetscBLASInt_FMT " eigs\n", B_neigs));
3735         for (j = 0; j < B_neigs; j++) {
3736           if (!sub_schurs->gdsw) {
3737             if (eigs[j] == 0.0) {
3738               PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "     Inf\n"));
3739             } else {
3740               if (upart) {
3741                 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "     %1.6e\n", (double)eigs[j + eigs_start]));
3742               } else {
3743                 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "     %1.6e\n", (double)(1. / eigs[j + eigs_start])));
3744               }
3745             }
3746           } else {
3747             double pg = (double)eigs[j + eigs_start];
3748             if (pg < 2 * PETSC_SMALL) pg = 0.0;
3749             PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "     %1.6e\n", pg));
3750           }
3751         }
3752       }
3753     }
3754     /* change the basis back to the original one */
3755     if (sub_schurs->change) {
3756       Mat change, phi, phit;
3757 
3758       if (pcbddc->dbg_flag > 2) {
3759         PetscInt ii;
3760         for (ii = 0; ii < B_neigs; ii++) {
3761           PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "   -> Eigenvector (old basis) %" PetscInt_FMT "/%" PetscBLASInt_FMT " (%" PetscBLASInt_FMT ")\n", ii, B_neigs, B_N));
3762           for (j = 0; j < B_N; j++) {
3763 #if defined(PETSC_USE_COMPLEX)
3764             PetscReal r = PetscRealPart(eigv[(ii + eigs_start) * subset_size + j]);
3765             PetscReal c = PetscImaginaryPart(eigv[(ii + eigs_start) * subset_size + j]);
3766             PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "       %1.4e + %1.4e i\n", (double)r, (double)c));
3767 #else
3768             PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "       %1.4e\n", (double)(eigv[(ii + eigs_start) * subset_size + j])));
3769 #endif
3770           }
3771         }
3772       }
3773       PetscCall(KSPGetOperators(sub_schurs->change[i], &change, NULL));
3774       PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, B_neigs, eigv + eigs_start * subset_size, &phit));
3775       PetscCall(MatMatMult(change, phit, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &phi));
3776       PetscCall(MatCopy(phi, phit, SAME_NONZERO_PATTERN));
3777       PetscCall(MatDestroy(&phit));
3778       PetscCall(MatDestroy(&phi));
3779     }
3780     maxneigs                               = PetscMax(B_neigs, maxneigs);
3781     pcbddc->adaptive_constraints_n[i + nv] = B_neigs;
3782     if (B_neigs) {
3783       PetscCall(PetscArraycpy(pcbddc->adaptive_constraints_data + pcbddc->adaptive_constraints_data_ptr[cum], eigv + eigs_start * subset_size, B_neigs * subset_size));
3784 
3785       if (pcbddc->dbg_flag > 1) {
3786         PetscInt ii;
3787         for (ii = 0; ii < B_neigs; ii++) {
3788           PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "   -> Eigenvector %" PetscInt_FMT "/%" PetscBLASInt_FMT " (%" PetscBLASInt_FMT ")\n", ii, B_neigs, B_N));
3789           for (j = 0; j < B_N; j++) {
3790 #if defined(PETSC_USE_COMPLEX)
3791             PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii * subset_size + j + pcbddc->adaptive_constraints_data_ptr[cum]]);
3792             PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii * subset_size + j + pcbddc->adaptive_constraints_data_ptr[cum]]);
3793             PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "       %1.4e + %1.4e i\n", (double)r, (double)c));
3794 #else
3795             PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "       %1.4e\n", (double)PetscRealPart(pcbddc->adaptive_constraints_data[ii * subset_size + j + pcbddc->adaptive_constraints_data_ptr[cum]])));
3796 #endif
3797           }
3798         }
3799       }
3800       PetscCall(PetscArraycpy(pcbddc->adaptive_constraints_idxs + pcbddc->adaptive_constraints_idxs_ptr[cum], idxs, subset_size));
3801       pcbddc->adaptive_constraints_idxs_ptr[cum + 1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size;
3802       pcbddc->adaptive_constraints_data_ptr[cum + 1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size * B_neigs;
3803       cum++;
3804     }
3805     PetscCall(ISRestoreIndices(sub_schurs->is_subs[i], &idxs));
3806     /* shift for next computation */
3807     cumarray += subset_size * subset_size;
3808   }
3809   if (pcbddc->dbg_flag) PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
3810 
3811   if (mss) {
3812     if (sub_schurs->gdsw) {
3813       PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_all, &Sarray));
3814       PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all, &Starray));
3815     } else {
3816       PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all, &Sarray));
3817       PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all, &Starray));
3818       /* destroy matrices (junk) */
3819       PetscCall(MatDestroy(&sub_schurs->sum_S_Ej_inv_all));
3820       PetscCall(MatDestroy(&sub_schurs->sum_S_Ej_tilda_all));
3821     }
3822   }
3823   if (allocated_S_St) PetscCall(PetscFree2(S, St));
3824   PetscCall(PetscFree5(eigv, eigs, work, B_iwork, B_ifail));
3825 #if defined(PETSC_USE_COMPLEX)
3826   PetscCall(PetscFree(rwork));
3827 #endif
3828   if (pcbddc->dbg_flag) {
3829     PetscInt maxneigs_r;
3830     PetscCallMPI(MPIU_Allreduce(&maxneigs, &maxneigs_r, 1, MPIU_INT, MPI_MAX, PetscObjectComm((PetscObject)pc)));
3831     PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Maximum number of constraints per cc %" PetscInt_FMT "\n", maxneigs_r));
3832   }
3833   PetscCall(PetscLogEventEnd(PC_BDDC_AdaptiveSetUp[pcbddc->current_level], pc, 0, 0, 0));
3834   PetscFunctionReturn(PETSC_SUCCESS);
3835 }
3836 
3837 PetscErrorCode PCBDDCSetUpSolvers(PC pc)
3838 {
3839   Mat coarse_submat;
3840 
3841   PetscFunctionBegin;
3842   /* Setup local scatters R_to_B and (optionally) R_to_D */
3843   /* PCBDDCSetUpLocalWorkVectors should be called first! */
3844   PetscCall(PCBDDCSetUpLocalScatters(pc));
3845 
3846   /* Setup local neumann solver ksp_R */
3847   /* PCBDDCSetUpLocalScatters should be called first! */
3848   PetscCall(PCBDDCSetUpLocalSolvers(pc, PETSC_FALSE, PETSC_TRUE));
3849 
3850   /*
3851      Setup local correction and local part of coarse basis.
3852      Gives back the dense local part of the coarse matrix in column major ordering
3853   */
3854   PetscCall(PCBDDCSetUpCorrection(pc, &coarse_submat));
3855 
3856   /* Compute total number of coarse nodes and setup coarse solver */
3857   PetscCall(PCBDDCSetUpCoarseSolver(pc, coarse_submat));
3858   PetscCall(MatDestroy(&coarse_submat));
3859   PetscFunctionReturn(PETSC_SUCCESS);
3860 }
3861 
3862 PetscErrorCode PCBDDCResetCustomization(PC pc)
3863 {
3864   PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
3865 
3866   PetscFunctionBegin;
3867   PetscCall(ISDestroy(&pcbddc->user_primal_vertices));
3868   PetscCall(ISDestroy(&pcbddc->user_primal_vertices_local));
3869   PetscCall(ISDestroy(&pcbddc->NeumannBoundaries));
3870   PetscCall(ISDestroy(&pcbddc->NeumannBoundariesLocal));
3871   PetscCall(ISDestroy(&pcbddc->DirichletBoundaries));
3872   PetscCall(MatNullSpaceDestroy(&pcbddc->onearnullspace));
3873   PetscCall(PetscFree(pcbddc->onearnullvecs_state));
3874   PetscCall(ISDestroy(&pcbddc->DirichletBoundariesLocal));
3875   PetscCall(PCBDDCSetDofsSplitting(pc, 0, NULL));
3876   PetscCall(PCBDDCSetDofsSplittingLocal(pc, 0, NULL));
3877   PetscFunctionReturn(PETSC_SUCCESS);
3878 }
3879 
3880 PetscErrorCode PCBDDCResetTopography(PC pc)
3881 {
3882   PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
3883   PetscInt i;
3884 
3885   PetscFunctionBegin;
3886   PetscCall(MatDestroy(&pcbddc->nedcG));
3887   PetscCall(ISDestroy(&pcbddc->nedclocal));
3888   PetscCall(MatDestroy(&pcbddc->discretegradient));
3889   PetscCall(MatDestroy(&pcbddc->user_ChangeOfBasisMatrix));
3890   PetscCall(MatDestroy(&pcbddc->ChangeOfBasisMatrix));
3891   PetscCall(MatDestroy(&pcbddc->switch_static_change));
3892   PetscCall(VecDestroy(&pcbddc->work_change));
3893   PetscCall(MatDestroy(&pcbddc->ConstraintMatrix));
3894   PetscCall(MatDestroy(&pcbddc->divudotp));
3895   PetscCall(ISDestroy(&pcbddc->divudotp_vl2l));
3896   PetscCall(PCBDDCGraphDestroy(&pcbddc->mat_graph));
3897   for (i = 0; i < pcbddc->n_local_subs; i++) PetscCall(ISDestroy(&pcbddc->local_subs[i]));
3898   pcbddc->n_local_subs = 0;
3899   PetscCall(PetscFree(pcbddc->local_subs));
3900   PetscCall(PCBDDCSubSchursDestroy(&pcbddc->sub_schurs));
3901   pcbddc->graphanalyzed        = PETSC_FALSE;
3902   pcbddc->recompute_topography = PETSC_TRUE;
3903   pcbddc->corner_selected      = PETSC_FALSE;
3904   PetscFunctionReturn(PETSC_SUCCESS);
3905 }
3906 
3907 PetscErrorCode PCBDDCResetSolvers(PC pc)
3908 {
3909   PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
3910 
3911   PetscFunctionBegin;
3912   PetscCall(VecDestroy(&pcbddc->coarse_vec));
3913   PetscCall(MatDestroy(&pcbddc->coarse_phi_B));
3914   PetscCall(MatDestroy(&pcbddc->coarse_phi_D));
3915   PetscCall(MatDestroy(&pcbddc->coarse_psi_B));
3916   PetscCall(MatDestroy(&pcbddc->coarse_psi_D));
3917   PetscCall(VecDestroy(&pcbddc->vec1_P));
3918   PetscCall(VecDestroy(&pcbddc->vec1_C));
3919   PetscCall(MatDestroy(&pcbddc->local_auxmat2));
3920   PetscCall(MatDestroy(&pcbddc->local_auxmat1));
3921   PetscCall(VecDestroy(&pcbddc->vec1_R));
3922   PetscCall(VecDestroy(&pcbddc->vec2_R));
3923   PetscCall(ISDestroy(&pcbddc->is_R_local));
3924   PetscCall(VecScatterDestroy(&pcbddc->R_to_B));
3925   PetscCall(VecScatterDestroy(&pcbddc->R_to_D));
3926   PetscCall(VecScatterDestroy(&pcbddc->coarse_loc_to_glob));
3927   PetscCall(KSPReset(pcbddc->ksp_D));
3928   PetscCall(KSPReset(pcbddc->ksp_R));
3929   PetscCall(KSPReset(pcbddc->coarse_ksp));
3930   PetscCall(MatDestroy(&pcbddc->local_mat));
3931   PetscCall(PetscFree(pcbddc->primal_indices_local_idxs));
3932   PetscCall(PetscFree2(pcbddc->local_primal_ref_node, pcbddc->local_primal_ref_mult));
3933   PetscCall(PetscFree(pcbddc->global_primal_indices));
3934   PetscCall(ISDestroy(&pcbddc->coarse_subassembling));
3935   PetscCall(MatDestroy(&pcbddc->benign_change));
3936   PetscCall(VecDestroy(&pcbddc->benign_vec));
3937   PetscCall(PCBDDCBenignShellMat(pc, PETSC_TRUE));
3938   PetscCall(MatDestroy(&pcbddc->benign_B0));
3939   PetscCall(PetscSFDestroy(&pcbddc->benign_sf));
3940   if (pcbddc->benign_zerodiag_subs) {
3941     PetscInt i;
3942     for (i = 0; i < pcbddc->benign_n; i++) PetscCall(ISDestroy(&pcbddc->benign_zerodiag_subs[i]));
3943     PetscCall(PetscFree(pcbddc->benign_zerodiag_subs));
3944   }
3945   PetscCall(PetscFree3(pcbddc->benign_p0_lidx, pcbddc->benign_p0_gidx, pcbddc->benign_p0));
3946   PetscFunctionReturn(PETSC_SUCCESS);
3947 }
3948 
3949 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc)
3950 {
3951   PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
3952   PC_IS   *pcis   = (PC_IS *)pc->data;
3953   VecType  impVecType;
3954   PetscInt n_constraints, n_R, old_size;
3955 
3956   PetscFunctionBegin;
3957   n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices;
3958   n_R           = pcis->n - pcbddc->n_vertices;
3959   PetscCall(VecGetType(pcis->vec1_N, &impVecType));
3960   /* local work vectors (try to avoid unneeded work)*/
3961   /* R nodes */
3962   old_size = -1;
3963   if (pcbddc->vec1_R) PetscCall(VecGetSize(pcbddc->vec1_R, &old_size));
3964   if (n_R != old_size) {
3965     PetscCall(VecDestroy(&pcbddc->vec1_R));
3966     PetscCall(VecDestroy(&pcbddc->vec2_R));
3967     PetscCall(VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N), &pcbddc->vec1_R));
3968     PetscCall(VecSetSizes(pcbddc->vec1_R, PETSC_DECIDE, n_R));
3969     PetscCall(VecSetType(pcbddc->vec1_R, impVecType));
3970     PetscCall(VecDuplicate(pcbddc->vec1_R, &pcbddc->vec2_R));
3971   }
3972   /* local primal dofs */
3973   old_size = -1;
3974   if (pcbddc->vec1_P) PetscCall(VecGetSize(pcbddc->vec1_P, &old_size));
3975   if (pcbddc->local_primal_size != old_size) {
3976     PetscCall(VecDestroy(&pcbddc->vec1_P));
3977     PetscCall(VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N), &pcbddc->vec1_P));
3978     PetscCall(VecSetSizes(pcbddc->vec1_P, PETSC_DECIDE, pcbddc->local_primal_size));
3979     PetscCall(VecSetType(pcbddc->vec1_P, impVecType));
3980   }
3981   /* local explicit constraints */
3982   old_size = -1;
3983   if (pcbddc->vec1_C) PetscCall(VecGetSize(pcbddc->vec1_C, &old_size));
3984   if (n_constraints && n_constraints != old_size) {
3985     PetscCall(VecDestroy(&pcbddc->vec1_C));
3986     PetscCall(VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N), &pcbddc->vec1_C));
3987     PetscCall(VecSetSizes(pcbddc->vec1_C, PETSC_DECIDE, n_constraints));
3988     PetscCall(VecSetType(pcbddc->vec1_C, impVecType));
3989   }
3990   PetscFunctionReturn(PETSC_SUCCESS);
3991 }
3992 
3993 static PetscErrorCode MatSetValuesSubMat(Mat A, Mat S, PetscInt nr, const PetscInt rows[], PetscInt nc, const PetscInt cols[], InsertMode imode)
3994 {
3995   PetscBool          flg;
3996   const PetscScalar *a;
3997 
3998   PetscFunctionBegin;
3999   PetscCall(PetscObjectBaseTypeCompare((PetscObject)S, MATSEQDENSE, &flg));
4000   if (flg) {
4001     PetscCall(MatDenseGetArrayRead(S, &a));
4002     PetscCall(MatSetOption(A, MAT_ROW_ORIENTED, PETSC_FALSE));
4003     PetscCall(MatSetValues(A, nr, rows, nc, cols, a, imode));
4004     PetscCall(MatSetOption(A, MAT_ROW_ORIENTED, PETSC_TRUE));
4005     PetscCall(MatDenseRestoreArrayRead(S, &a));
4006   } else {
4007     const PetscInt *ii, *jj;
4008     PetscInt        n;
4009     PetscInt        buf[8192], *bufc = NULL;
4010     PetscBool       freeb = PETSC_FALSE;
4011     Mat             Sm    = S;
4012 
4013     PetscCall(PetscObjectBaseTypeCompare((PetscObject)S, MATSEQAIJ, &flg));
4014     if (!flg) PetscCall(MatConvert(S, MATSEQAIJ, MAT_INITIAL_MATRIX, &Sm));
4015     else PetscCall(PetscObjectReference((PetscObject)S));
4016     PetscCall(MatSeqAIJGetArrayRead(Sm, &a));
4017     PetscCall(MatGetRowIJ(Sm, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, &jj, &flg));
4018     PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot get IJ structure");
4019     if (nc <= (PetscInt)PETSC_STATIC_ARRAY_LENGTH(buf)) {
4020       bufc = buf;
4021     } else {
4022       PetscCall(PetscMalloc1(nc, &bufc));
4023       freeb = PETSC_TRUE;
4024     }
4025 
4026     for (PetscInt i = 0; i < n; i++) {
4027       const PetscInt nci = ii[i + 1] - ii[i];
4028 
4029       for (PetscInt j = 0; j < nci; j++) bufc[j] = cols[jj[ii[i] + j]];
4030       PetscCall(MatSetValues(A, 1, rows + i, nci, bufc, a + ii[i], imode));
4031     }
4032     PetscCall(MatRestoreRowIJ(Sm, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, &jj, &flg));
4033     PetscCall(MatSeqAIJRestoreArrayRead(Sm, &a));
4034     PetscCall(MatDestroy(&Sm));
4035     if (freeb) PetscCall(PetscFree(bufc));
4036   }
4037   PetscCall(MatAssemblyBegin(A, MAT_FLUSH_ASSEMBLY));
4038   PetscCall(MatAssemblyEnd(A, MAT_FLUSH_ASSEMBLY));
4039   PetscFunctionReturn(PETSC_SUCCESS);
4040 }
4041 
4042 static PetscErrorCode MatCreateSeqAIJFromDenseExpand(Mat D, PetscInt n, const PetscInt j[], Mat *mat)
4043 {
4044   Mat_SeqAIJ        *aij;
4045   PetscInt          *ii, *jj;
4046   PetscScalar       *aa;
4047   PetscInt           nnz = 0, m, nc;
4048   const PetscScalar *a;
4049   const PetscScalar  zero = 0.0;
4050 
4051   PetscFunctionBegin;
4052   PetscCall(MatGetLocalSize(D, &m, &nc));
4053   PetscCall(MatDenseGetArrayRead(D, &a));
4054   PetscCall(PetscMalloc1(m + 1, &ii));
4055   PetscCall(PetscMalloc1(m * nc, &jj));
4056   PetscCall(PetscMalloc1(m * nc, &aa));
4057   ii[0] = 0;
4058   for (PetscInt k = 0; k < m; k++) {
4059     for (PetscInt s = 0; s < nc; s++) {
4060       const PetscInt    c = s + k * nc;
4061       const PetscScalar v = a[k + s * m];
4062 
4063       if (PetscUnlikely(j[c] < 0 || v == zero)) continue;
4064       jj[nnz] = j[c];
4065       aa[nnz] = a[k + s * m];
4066       nnz++;
4067     }
4068     ii[k + 1] = nnz;
4069   }
4070 
4071   PetscCall(MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)D), m, n, ii, jj, aa, mat));
4072   PetscCall(MatDenseRestoreArrayRead(D, &a));
4073 
4074   aij          = (Mat_SeqAIJ *)(*mat)->data;
4075   aij->free_a  = PETSC_TRUE;
4076   aij->free_ij = PETSC_TRUE;
4077   PetscFunctionReturn(PETSC_SUCCESS);
4078 }
4079 
4080 /* adapted from MatInvertVariableBlockDiagonal_SeqAIJ */
4081 static PetscErrorCode MatSeqAIJInvertVariableBlockDiagonalMat(Mat A, PetscInt nblocks, const PetscInt *bsizes, Mat *B)
4082 {
4083   PetscInt        n = A->rmap->n, ncnt = 0, ncnt2 = 0, bsizemax = 0, *v_pivots = NULL;
4084   const PetscBool allowzeropivot    = PETSC_FALSE;
4085   PetscBool       zeropivotdetected = PETSC_FALSE;
4086   const PetscReal shift             = 0.0;
4087   PetscInt        ipvt[5], *ii, *jj, *indi, *indj;
4088   PetscScalar     work[25], *v_work = NULL, *aa, *diag;
4089   PetscLogDouble  flops = 0.0;
4090 
4091   PetscFunctionBegin;
4092   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Not for rectangular matrices");
4093   for (PetscInt i = 0; i < nblocks; i++) {
4094     ncnt += bsizes[i];
4095     ncnt2 += PetscSqr(bsizes[i]);
4096   }
4097   PetscCheck(ncnt == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Total blocksizes %" PetscInt_FMT " doesn't match number matrix rows %" PetscInt_FMT, ncnt, n);
4098   for (PetscInt i = 0; i < nblocks; i++) bsizemax = PetscMax(bsizemax, bsizes[i]);
4099   if (bsizemax > 7) PetscCall(PetscMalloc2(bsizemax, &v_work, bsizemax, &v_pivots));
4100 
4101   PetscCall(PetscMalloc1(n + 1, &ii));
4102   PetscCall(PetscMalloc1(ncnt2, &jj));
4103   PetscCall(PetscCalloc1(ncnt2, &aa));
4104 
4105   ncnt  = 0;
4106   ii[0] = 0;
4107   indi  = ii;
4108   indj  = jj;
4109   diag  = aa;
4110   for (PetscInt i = 0; i < nblocks; i++) {
4111     const PetscInt bs = bsizes[i];
4112 
4113     for (PetscInt k = 0; k < bs; k++) {
4114       indi[k + 1] = indi[k] + bs;
4115       for (PetscInt j = 0; j < bs; j++) indj[k * bs + j] = ncnt + j;
4116     }
4117     PetscCall(MatGetValues(A, bs, indj, bs, indj, diag));
4118     switch (bs) {
4119     case 1:
4120       *diag = 1.0 / (*diag);
4121       break;
4122     case 2:
4123       PetscCall(PetscKernel_A_gets_inverse_A_2(diag, shift, allowzeropivot, &zeropivotdetected));
4124       break;
4125     case 3:
4126       PetscCall(PetscKernel_A_gets_inverse_A_3(diag, shift, allowzeropivot, &zeropivotdetected));
4127       break;
4128     case 4:
4129       PetscCall(PetscKernel_A_gets_inverse_A_4(diag, shift, allowzeropivot, &zeropivotdetected));
4130       break;
4131     case 5:
4132       PetscCall(PetscKernel_A_gets_inverse_A_5(diag, ipvt, work, shift, allowzeropivot, &zeropivotdetected));
4133       break;
4134     case 6:
4135       PetscCall(PetscKernel_A_gets_inverse_A_6(diag, shift, allowzeropivot, &zeropivotdetected));
4136       break;
4137     case 7:
4138       PetscCall(PetscKernel_A_gets_inverse_A_7(diag, shift, allowzeropivot, &zeropivotdetected));
4139       break;
4140     default:
4141       PetscCall(PetscKernel_A_gets_inverse_A(bs, diag, v_pivots, v_work, allowzeropivot, &zeropivotdetected));
4142     }
4143     ncnt += bs;
4144     flops += 2.0 * PetscPowInt(bs, 3) / 3.0;
4145     diag += bs * bs;
4146     indj += bs * bs;
4147     indi += bs;
4148   }
4149   PetscCall(PetscLogFlops(flops));
4150   PetscCall(PetscFree2(v_work, v_pivots));
4151   PetscCall(MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A), n, n, ii, jj, aa, B));
4152   {
4153     Mat_SeqAIJ *aij = (Mat_SeqAIJ *)(*B)->data;
4154     aij->free_a     = PETSC_TRUE;
4155     aij->free_ij    = PETSC_TRUE;
4156   }
4157   PetscFunctionReturn(PETSC_SUCCESS);
4158 }
4159 
4160 static PetscErrorCode MatDenseScatter(Mat A, PetscSF sf, Mat B)
4161 {
4162   const PetscScalar *rarr;
4163   PetscScalar       *larr;
4164   PetscSF            vsf;
4165   PetscInt           n, rld, lld;
4166 
4167   PetscFunctionBegin;
4168   PetscCall(MatGetSize(A, NULL, &n));
4169   PetscCall(MatDenseGetLDA(A, &rld));
4170   PetscCall(MatDenseGetLDA(B, &lld));
4171   PetscCall(MatDenseGetArrayRead(A, &rarr));
4172   PetscCall(MatDenseGetArrayWrite(B, &larr));
4173   PetscCall(PetscSFCreateStridedSF(sf, n, rld, lld, &vsf));
4174   PetscCall(PetscSFBcastBegin(vsf, MPIU_SCALAR, rarr, larr, MPI_REPLACE));
4175   PetscCall(PetscSFBcastEnd(vsf, MPIU_SCALAR, rarr, larr, MPI_REPLACE));
4176   PetscCall(MatDenseRestoreArrayRead(A, &rarr));
4177   PetscCall(MatDenseRestoreArrayWrite(B, &larr));
4178   PetscCall(PetscSFDestroy(&vsf));
4179   PetscFunctionReturn(PETSC_SUCCESS);
4180 }
4181 
4182 PetscErrorCode PCBDDCSetUpCorrection(PC pc, Mat *coarse_submat)
4183 {
4184   PC_IS          *pcis       = (PC_IS *)pc->data;
4185   PC_BDDC        *pcbddc     = (PC_BDDC *)pc->data;
4186   PCBDDCGraph     graph      = pcbddc->mat_graph;
4187   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
4188   /* submatrices of local problem */
4189   Mat A_RV = NULL, A_VR, A_VV, local_auxmat2_R = NULL;
4190   /* submatrices of local coarse problem */
4191   Mat S_CV = NULL, S_VC = NULL, S_CC = NULL;
4192   /* working matrices */
4193   Mat C_CR;
4194 
4195   /* additional working stuff */
4196   PC              pc_R;
4197   IS              is_R, is_V, is_C;
4198   const PetscInt *idx_V, *idx_C;
4199   Mat             F, Brhs = NULL;
4200   Vec             dummy_vec;
4201   PetscBool       isLU, isCHOL, need_benign_correction, sparserhs;
4202   PetscInt       *idx_V_B;
4203   PetscInt        lda_rhs, n_vertices, n_constraints, *p0_lidx_I;
4204   PetscInt        n_eff_vertices, n_eff_constraints;
4205   PetscInt        i, n_R, n_D, n_B;
4206   PetscScalar     one = 1.0, m_one = -1.0;
4207 
4208   /* Multi-element support */
4209   PetscBool multi_element = graph->multi_element;
4210   PetscInt *V_to_eff_V = NULL, *C_to_eff_C = NULL;
4211   PetscInt *B_eff_V_J = NULL, *R_eff_V_J = NULL, *B_eff_C_J = NULL, *R_eff_C_J = NULL;
4212   IS        is_C_perm = NULL;
4213   PetscInt  n_C_bss = 0, *C_bss = NULL;
4214   Mat       coarse_phi_multi;
4215 
4216   PetscFunctionBegin;
4217   PetscCheck(pcbddc->symmetric_primal || !pcbddc->benign_n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Non-symmetric primal basis computation with benign trick not yet implemented");
4218   PetscCall(PetscLogEventBegin(PC_BDDC_CorrectionSetUp[pcbddc->current_level], pc, 0, 0, 0));
4219 
4220   /* Set Non-overlapping dimensions */
4221   n_vertices    = pcbddc->n_vertices;
4222   n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices;
4223   n_B           = pcis->n_B;
4224   n_D           = pcis->n - n_B;
4225   n_R           = pcis->n - n_vertices;
4226 
4227   /* vertices in boundary numbering */
4228   PetscCall(PetscMalloc1(n_vertices, &idx_V_B));
4229   PetscCall(ISGlobalToLocalMappingApply(pcis->BtoNmap, IS_GTOLM_DROP, n_vertices, pcbddc->local_primal_ref_node, &i, idx_V_B));
4230   PetscCheck(i == n_vertices, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error in boundary numbering for BDDC vertices! %" PetscInt_FMT " != %" PetscInt_FMT, n_vertices, i);
4231 
4232   /* these two cases still need to be optimized */
4233   if (pcbddc->benign_saddle_point || !pcbddc->symmetric_primal) multi_element = PETSC_FALSE;
4234 
4235   /* Subdomain contribution (Non-overlapping) to coarse matrix  */
4236   if (multi_element) {
4237     PetscCheck(!pcbddc->benign_n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not yet implemented");
4238 
4239     PetscCall(MatCreate(PETSC_COMM_SELF, coarse_submat));
4240     PetscCall(MatSetSizes(*coarse_submat, pcbddc->local_primal_size, pcbddc->local_primal_size, pcbddc->local_primal_size, pcbddc->local_primal_size));
4241     PetscCall(MatSetType(*coarse_submat, MATSEQAIJ));
4242     PetscCall(MatSetOption(*coarse_submat, MAT_IGNORE_ZERO_ENTRIES, PETSC_TRUE));
4243     PetscCall(MatSetOption(*coarse_submat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
4244 
4245     /* group vertices and constraints by subdomain id */
4246     const PetscInt *vidxs = pcbddc->primal_indices_local_idxs;
4247     const PetscInt *cidxs = pcbddc->primal_indices_local_idxs + n_vertices;
4248     PetscInt       *count_eff, *V_eff_to_V, *C_eff_to_C, *nnz;
4249     PetscInt        n_el = PetscMax(graph->n_local_subs, 1);
4250 
4251     PetscCall(PetscCalloc1(2 * n_el, &count_eff));
4252     PetscCall(PetscMalloc1(n_vertices, &V_to_eff_V));
4253     PetscCall(PetscMalloc1(n_constraints, &C_to_eff_C));
4254     for (PetscInt i = 0; i < n_vertices; i++) {
4255       PetscInt s = 2 * graph->nodes[vidxs[i]].local_sub;
4256 
4257       V_to_eff_V[i] = count_eff[s];
4258       count_eff[s] += 1;
4259     }
4260     for (PetscInt i = 0; i < n_constraints; i++) {
4261       PetscInt s = 2 * graph->nodes[cidxs[i]].local_sub + 1;
4262 
4263       C_to_eff_C[i] = count_eff[s];
4264       count_eff[s] += 1;
4265     }
4266 
4267     /* preallocation */
4268     PetscCall(PetscMalloc1(n_vertices + n_constraints, &nnz));
4269     for (PetscInt i = 0; i < n_vertices; i++) {
4270       PetscInt s = 2 * graph->nodes[vidxs[i]].local_sub;
4271 
4272       nnz[i] = count_eff[s] + count_eff[s + 1];
4273     }
4274     for (PetscInt i = 0; i < n_constraints; i++) {
4275       PetscInt s = 2 * graph->nodes[cidxs[i]].local_sub;
4276 
4277       nnz[i + n_vertices] = count_eff[s] + count_eff[s + 1];
4278     }
4279     PetscCall(MatSeqAIJSetPreallocation(*coarse_submat, 0, nnz));
4280     PetscCall(PetscFree(nnz));
4281 
4282     n_eff_vertices    = 0;
4283     n_eff_constraints = 0;
4284     for (PetscInt i = 0; i < n_el; i++) {
4285       n_eff_vertices       = PetscMax(n_eff_vertices, count_eff[2 * i]);
4286       n_eff_constraints    = PetscMax(n_eff_constraints, count_eff[2 * i + 1]);
4287       count_eff[2 * i]     = 0;
4288       count_eff[2 * i + 1] = 0;
4289     }
4290 
4291     const PetscInt *idx;
4292     PetscCall(PetscMalloc2(n_el * n_eff_vertices, &V_eff_to_V, n_el * n_eff_constraints, &C_eff_to_C));
4293 
4294     for (PetscInt i = 0; i < n_vertices; i++) {
4295       const PetscInt e = graph->nodes[vidxs[i]].local_sub;
4296       const PetscInt s = 2 * e;
4297 
4298       V_eff_to_V[e * n_eff_vertices + count_eff[s]] = i;
4299       count_eff[s] += 1;
4300     }
4301     for (PetscInt i = 0; i < n_constraints; i++) {
4302       const PetscInt e = graph->nodes[cidxs[i]].local_sub;
4303       const PetscInt s = 2 * e + 1;
4304 
4305       C_eff_to_C[e * n_eff_constraints + count_eff[s]] = i;
4306       count_eff[s] += 1;
4307     }
4308 
4309     PetscCall(PetscMalloc1(n_R * n_eff_vertices, &R_eff_V_J));
4310     PetscCall(PetscMalloc1(n_R * n_eff_constraints, &R_eff_C_J));
4311     PetscCall(PetscMalloc1(n_B * n_eff_vertices, &B_eff_V_J));
4312     PetscCall(PetscMalloc1(n_B * n_eff_constraints, &B_eff_C_J));
4313     for (PetscInt i = 0; i < n_R * n_eff_vertices; i++) R_eff_V_J[i] = -1;
4314     for (PetscInt i = 0; i < n_R * n_eff_constraints; i++) R_eff_C_J[i] = -1;
4315     for (PetscInt i = 0; i < n_B * n_eff_vertices; i++) B_eff_V_J[i] = -1;
4316     for (PetscInt i = 0; i < n_B * n_eff_constraints; i++) B_eff_C_J[i] = -1;
4317 
4318     PetscCall(ISGetIndices(pcbddc->is_R_local, &idx));
4319     for (PetscInt i = 0; i < n_R; i++) {
4320       const PetscInt e = graph->nodes[idx[i]].local_sub;
4321       const PetscInt s = 2 * e;
4322       PetscInt       j;
4323 
4324       for (j = 0; j < count_eff[s]; j++) R_eff_V_J[i * n_eff_vertices + j] = V_eff_to_V[e * n_eff_vertices + j];
4325       for (j = 0; j < count_eff[s + 1]; j++) R_eff_C_J[i * n_eff_constraints + j] = C_eff_to_C[e * n_eff_constraints + j];
4326     }
4327     PetscCall(ISRestoreIndices(pcbddc->is_R_local, &idx));
4328     PetscCall(ISGetIndices(pcis->is_B_local, &idx));
4329     for (PetscInt i = 0; i < n_B; i++) {
4330       const PetscInt e = graph->nodes[idx[i]].local_sub;
4331       const PetscInt s = 2 * e;
4332       PetscInt       j;
4333 
4334       for (j = 0; j < count_eff[s]; j++) B_eff_V_J[i * n_eff_vertices + j] = V_eff_to_V[e * n_eff_vertices + j];
4335       for (j = 0; j < count_eff[s + 1]; j++) B_eff_C_J[i * n_eff_constraints + j] = C_eff_to_C[e * n_eff_constraints + j];
4336     }
4337     PetscCall(ISRestoreIndices(pcis->is_B_local, &idx));
4338 
4339     /* permutation and blocksizes for block invert of S_CC */
4340     PetscInt *idxp;
4341 
4342     PetscCall(PetscMalloc1(n_constraints, &idxp));
4343     PetscCall(PetscMalloc1(n_el, &C_bss));
4344     n_C_bss = 0;
4345     for (PetscInt e = 0, cnt = 0; e < n_el; e++) {
4346       const PetscInt nc = count_eff[2 * e + 1];
4347 
4348       if (nc) C_bss[n_C_bss++] = nc;
4349       for (PetscInt c = 0; c < nc; c++) { idxp[cnt + c] = C_eff_to_C[e * n_eff_constraints + c]; }
4350       cnt += nc;
4351     }
4352 
4353     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n_constraints, idxp, PETSC_OWN_POINTER, &is_C_perm));
4354 
4355     PetscCall(PetscFree2(V_eff_to_V, C_eff_to_C));
4356     PetscCall(PetscFree(count_eff));
4357   } else {
4358     PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, pcbddc->local_primal_size, pcbddc->local_primal_size, NULL, coarse_submat));
4359     n_eff_constraints = n_constraints;
4360     n_eff_vertices    = n_vertices;
4361   }
4362 
4363   /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */
4364   PetscCall(KSPGetPC(pcbddc->ksp_R, &pc_R));
4365   PetscCall(PCSetUp(pc_R));
4366   PetscCall(PetscObjectTypeCompare((PetscObject)pc_R, PCLU, &isLU));
4367   PetscCall(PetscObjectTypeCompare((PetscObject)pc_R, PCCHOLESKY, &isCHOL));
4368   lda_rhs                = n_R;
4369   need_benign_correction = PETSC_FALSE;
4370   if (isLU || isCHOL) {
4371     PetscCall(PCFactorGetMatrix(pc_R, &F));
4372   } else if (sub_schurs && sub_schurs->reuse_solver) {
4373     PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
4374     MatFactorType      type;
4375 
4376     F = reuse_solver->F;
4377     PetscCall(MatGetFactorType(F, &type));
4378     if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE;
4379     if (type == MAT_FACTOR_LU) isLU = PETSC_TRUE;
4380     PetscCall(MatGetSize(F, &lda_rhs, NULL));
4381     need_benign_correction = (PetscBool)(!!reuse_solver->benign_n);
4382   } else F = NULL;
4383 
4384   /* determine if we can use a sparse right-hand side */
4385   sparserhs = PETSC_FALSE;
4386   if (F && !multi_element) {
4387     MatSolverType solver;
4388 
4389     PetscCall(MatFactorGetSolverType(F, &solver));
4390     PetscCall(PetscStrcmp(solver, MATSOLVERMUMPS, &sparserhs));
4391   }
4392 
4393   /* create dummy vector to modify rhs and sol of MatMatSolve (work array will never be used) */
4394   dummy_vec = NULL;
4395   if (need_benign_correction && lda_rhs != n_R && F) {
4396     PetscCall(VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N), &dummy_vec));
4397     PetscCall(VecSetSizes(dummy_vec, lda_rhs, PETSC_DECIDE));
4398     PetscCall(VecSetType(dummy_vec, ((PetscObject)pcis->vec1_N)->type_name));
4399   }
4400 
4401   PetscCall(MatDestroy(&pcbddc->local_auxmat1));
4402   PetscCall(MatDestroy(&pcbddc->local_auxmat2));
4403 
4404   PetscCall(ISCreateStride(PETSC_COMM_SELF, n_R, 0, 1, &is_R));
4405   PetscCall(ISCreateStride(PETSC_COMM_SELF, n_vertices, 0, 1, &is_V));
4406   PetscCall(ISCreateStride(PETSC_COMM_SELF, n_constraints, n_vertices, 1, &is_C));
4407   PetscCall(ISGetIndices(is_V, &idx_V));
4408   PetscCall(ISGetIndices(is_C, &idx_C));
4409 
4410   /* Precompute stuffs needed for preprocessing and application of BDDC*/
4411   if (n_constraints) {
4412     Mat C_B;
4413 
4414     /* Extract constraints on R nodes: C_{CR}  */
4415     PetscCall(MatCreateSubMatrix(pcbddc->ConstraintMatrix, is_C, pcbddc->is_R_local, MAT_INITIAL_MATRIX, &C_CR));
4416     PetscCall(MatCreateSubMatrix(pcbddc->ConstraintMatrix, is_C, pcis->is_B_local, MAT_INITIAL_MATRIX, &C_B));
4417 
4418     /* Assemble         local_auxmat2_R =        (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */
4419     /* Assemble pcbddc->local_auxmat2   = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */
4420     if (!sparserhs) {
4421       PetscScalar *marr;
4422 
4423       PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, lda_rhs, n_eff_constraints, NULL, &Brhs));
4424       PetscCall(MatDenseGetArrayWrite(Brhs, &marr));
4425       for (i = 0; i < n_constraints; i++) {
4426         const PetscScalar *row_cmat_values;
4427         const PetscInt    *row_cmat_indices;
4428         PetscInt           size_of_constraint, j, col = C_to_eff_C ? C_to_eff_C[i] : i;
4429 
4430         PetscCall(MatGetRow(C_CR, i, &size_of_constraint, &row_cmat_indices, &row_cmat_values));
4431         for (j = 0; j < size_of_constraint; j++) marr[row_cmat_indices[j] + col * lda_rhs] = -row_cmat_values[j];
4432         PetscCall(MatRestoreRow(C_CR, i, &size_of_constraint, &row_cmat_indices, &row_cmat_values));
4433       }
4434       PetscCall(MatDenseRestoreArrayWrite(Brhs, &marr));
4435     } else {
4436       Mat tC_CR;
4437 
4438       PetscCall(MatScale(C_CR, -1.0));
4439       if (lda_rhs != n_R) {
4440         PetscScalar *aa;
4441         PetscInt     r, *ii, *jj;
4442         PetscBool    done;
4443 
4444         PetscCall(MatGetRowIJ(C_CR, 0, PETSC_FALSE, PETSC_FALSE, &r, (const PetscInt **)&ii, (const PetscInt **)&jj, &done));
4445         PetscCheck(done, PETSC_COMM_SELF, PETSC_ERR_PLIB, "GetRowIJ failed");
4446         PetscCall(MatSeqAIJGetArray(C_CR, &aa));
4447         PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, n_constraints, lda_rhs, ii, jj, aa, &tC_CR));
4448         PetscCall(MatRestoreRowIJ(C_CR, 0, PETSC_FALSE, PETSC_FALSE, &r, (const PetscInt **)&ii, (const PetscInt **)&jj, &done));
4449         PetscCheck(done, PETSC_COMM_SELF, PETSC_ERR_PLIB, "RestoreRowIJ failed");
4450       } else {
4451         PetscCall(PetscObjectReference((PetscObject)C_CR));
4452         tC_CR = C_CR;
4453       }
4454       PetscCall(MatCreateTranspose(tC_CR, &Brhs));
4455       PetscCall(MatDestroy(&tC_CR));
4456     }
4457     PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, lda_rhs, n_eff_constraints, NULL, &local_auxmat2_R));
4458     if (F) {
4459       if (need_benign_correction) {
4460         PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
4461 
4462         /* rhs is already zero on interior dofs, no need to change the rhs */
4463         PetscCall(PetscArrayzero(reuse_solver->benign_save_vals, pcbddc->benign_n));
4464       }
4465       PetscCall(MatMatSolve(F, Brhs, local_auxmat2_R));
4466       if (need_benign_correction) {
4467         PetscScalar       *marr;
4468         PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
4469 
4470         /* XXX multi_element? */
4471         PetscCall(MatDenseGetArray(local_auxmat2_R, &marr));
4472         if (lda_rhs != n_R) {
4473           for (i = 0; i < n_eff_constraints; i++) {
4474             PetscCall(VecPlaceArray(dummy_vec, marr + i * lda_rhs));
4475             PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver, dummy_vec, NULL, PETSC_TRUE, PETSC_TRUE));
4476             PetscCall(VecResetArray(dummy_vec));
4477           }
4478         } else {
4479           for (i = 0; i < n_eff_constraints; i++) {
4480             PetscCall(VecPlaceArray(pcbddc->vec1_R, marr + i * lda_rhs));
4481             PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver, pcbddc->vec1_R, NULL, PETSC_TRUE, PETSC_TRUE));
4482             PetscCall(VecResetArray(pcbddc->vec1_R));
4483           }
4484         }
4485         PetscCall(MatDenseRestoreArray(local_auxmat2_R, &marr));
4486       }
4487     } else {
4488       const PetscScalar *barr;
4489       PetscScalar       *marr;
4490 
4491       PetscCall(MatDenseGetArrayRead(Brhs, &barr));
4492       PetscCall(MatDenseGetArray(local_auxmat2_R, &marr));
4493       for (i = 0; i < n_eff_constraints; i++) {
4494         PetscCall(VecPlaceArray(pcbddc->vec1_R, barr + i * lda_rhs));
4495         PetscCall(VecPlaceArray(pcbddc->vec2_R, marr + i * lda_rhs));
4496         PetscCall(KSPSolve(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec2_R));
4497         PetscCall(KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec2_R));
4498         PetscCall(VecResetArray(pcbddc->vec1_R));
4499         PetscCall(VecResetArray(pcbddc->vec2_R));
4500       }
4501       PetscCall(MatDenseRestoreArrayRead(Brhs, &barr));
4502       PetscCall(MatDenseRestoreArray(local_auxmat2_R, &marr));
4503     }
4504     if (sparserhs) PetscCall(MatScale(C_CR, -1.0));
4505     PetscCall(MatDestroy(&Brhs));
4506     /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR})^{-1}  */
4507     if (!pcbddc->switch_static) {
4508       PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_B, n_eff_constraints, NULL, &pcbddc->local_auxmat2));
4509       for (i = 0; i < n_eff_constraints; i++) {
4510         Vec r, b;
4511         PetscCall(MatDenseGetColumnVecRead(local_auxmat2_R, i, &r));
4512         PetscCall(MatDenseGetColumnVec(pcbddc->local_auxmat2, i, &b));
4513         PetscCall(VecScatterBegin(pcbddc->R_to_B, r, b, INSERT_VALUES, SCATTER_FORWARD));
4514         PetscCall(VecScatterEnd(pcbddc->R_to_B, r, b, INSERT_VALUES, SCATTER_FORWARD));
4515         PetscCall(MatDenseRestoreColumnVec(pcbddc->local_auxmat2, i, &b));
4516         PetscCall(MatDenseRestoreColumnVecRead(local_auxmat2_R, i, &r));
4517       }
4518       if (multi_element) {
4519         Mat T;
4520 
4521         PetscCall(MatCreateSeqAIJFromDenseExpand(local_auxmat2_R, n_constraints, R_eff_C_J, &T));
4522         PetscCall(MatDestroy(&local_auxmat2_R));
4523         local_auxmat2_R = T;
4524         PetscCall(MatCreateSeqAIJFromDenseExpand(pcbddc->local_auxmat2, n_constraints, B_eff_C_J, &T));
4525         PetscCall(MatDestroy(&pcbddc->local_auxmat2));
4526         pcbddc->local_auxmat2 = T;
4527       }
4528       PetscCall(MatMatMult(C_B, pcbddc->local_auxmat2, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &S_CC));
4529     } else {
4530       if (multi_element) {
4531         Mat T;
4532 
4533         PetscCall(MatCreateSeqAIJFromDenseExpand(local_auxmat2_R, n_constraints, R_eff_C_J, &T));
4534         PetscCall(MatDestroy(&local_auxmat2_R));
4535         local_auxmat2_R = T;
4536       }
4537       if (lda_rhs != n_R) {
4538         PetscCall(MatCreateSubMatrix(local_auxmat2_R, is_R, NULL, MAT_INITIAL_MATRIX, &pcbddc->local_auxmat2));
4539       } else {
4540         PetscCall(PetscObjectReference((PetscObject)local_auxmat2_R));
4541         pcbddc->local_auxmat2 = local_auxmat2_R;
4542       }
4543       PetscCall(MatMatMult(C_CR, pcbddc->local_auxmat2, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &S_CC));
4544     }
4545     PetscCall(MatScale(S_CC, m_one));
4546     if (multi_element) {
4547       Mat T, T2;
4548       IS  isp, ispi;
4549 
4550       isp = is_C_perm;
4551 
4552       PetscCall(ISInvertPermutation(isp, PETSC_DECIDE, &ispi));
4553       PetscCall(MatPermute(S_CC, isp, isp, &T));
4554       PetscCall(MatSeqAIJInvertVariableBlockDiagonalMat(T, n_C_bss, C_bss, &T2));
4555       PetscCall(MatDestroy(&T));
4556       PetscCall(MatDestroy(&S_CC));
4557       PetscCall(MatPermute(T2, ispi, ispi, &S_CC));
4558       PetscCall(MatDestroy(&T2));
4559       PetscCall(ISDestroy(&ispi));
4560     } else {
4561       if (isCHOL) {
4562         PetscCall(MatCholeskyFactor(S_CC, NULL, NULL));
4563       } else {
4564         PetscCall(MatLUFactor(S_CC, NULL, NULL, NULL));
4565       }
4566       PetscCall(MatSeqDenseInvertFactors_Private(S_CC));
4567     }
4568     /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */
4569     PetscCall(MatMatMult(S_CC, C_B, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &pcbddc->local_auxmat1));
4570     PetscCall(MatDestroy(&C_B));
4571     PetscCall(MatSetValuesSubMat(*coarse_submat, S_CC, n_constraints, idx_C, n_constraints, idx_C, INSERT_VALUES));
4572   }
4573 
4574   /* Get submatrices from subdomain matrix */
4575   if (n_vertices) {
4576 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
4577     PetscBool oldpin;
4578 #endif
4579     IS is_aux;
4580 
4581     if (sub_schurs && sub_schurs->reuse_solver) { /* is_R_local is not sorted, ISComplement doesn't like it */
4582       IS tis;
4583 
4584       PetscCall(ISDuplicate(pcbddc->is_R_local, &tis));
4585       PetscCall(ISSort(tis));
4586       PetscCall(ISComplement(tis, 0, pcis->n, &is_aux));
4587       PetscCall(ISDestroy(&tis));
4588     } else {
4589       PetscCall(ISComplement(pcbddc->is_R_local, 0, pcis->n, &is_aux));
4590     }
4591 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
4592     oldpin = pcbddc->local_mat->boundtocpu;
4593 #endif
4594     PetscCall(MatBindToCPU(pcbddc->local_mat, PETSC_TRUE));
4595     PetscCall(MatCreateSubMatrix(pcbddc->local_mat, pcbddc->is_R_local, is_aux, MAT_INITIAL_MATRIX, &A_RV));
4596     PetscCall(MatCreateSubMatrix(pcbddc->local_mat, is_aux, pcbddc->is_R_local, MAT_INITIAL_MATRIX, &A_VR));
4597     /* TODO REMOVE: MatMatMult(A_VR,A_RRmA_RV) below may raise an error */
4598     PetscCall(MatConvert(A_VR, MATSEQAIJ, MAT_INPLACE_MATRIX, &A_VR));
4599     PetscCall(MatCreateSubMatrix(pcbddc->local_mat, is_aux, is_aux, MAT_INITIAL_MATRIX, &A_VV));
4600 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
4601     PetscCall(MatBindToCPU(pcbddc->local_mat, oldpin));
4602 #endif
4603     PetscCall(ISDestroy(&is_aux));
4604   }
4605   PetscCall(ISDestroy(&is_C_perm));
4606   PetscCall(PetscFree(C_bss));
4607 
4608   p0_lidx_I = NULL;
4609   if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) {
4610     const PetscInt *idxs;
4611 
4612     PetscCall(ISGetIndices(pcis->is_I_local, &idxs));
4613     PetscCall(PetscMalloc1(pcbddc->benign_n, &p0_lidx_I));
4614     for (i = 0; i < pcbddc->benign_n; i++) PetscCall(PetscFindInt(pcbddc->benign_p0_lidx[i], pcis->n - pcis->n_B, idxs, &p0_lidx_I[i]));
4615     PetscCall(ISRestoreIndices(pcis->is_I_local, &idxs));
4616   }
4617 
4618   /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */
4619 
4620   /* Matrices of coarse basis functions (local) */
4621   PetscCall(MatDestroy(&pcbddc->coarse_phi_B));
4622   PetscCall(MatDestroy(&pcbddc->coarse_psi_B));
4623   PetscCall(MatDestroy(&pcbddc->coarse_phi_D));
4624   PetscCall(MatDestroy(&pcbddc->coarse_psi_D));
4625   if (!multi_element) {
4626     PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_B, pcbddc->local_primal_size, NULL, &pcbddc->coarse_phi_B));
4627     if (pcbddc->switch_static || pcbddc->dbg_flag) PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_D, pcbddc->local_primal_size, NULL, &pcbddc->coarse_phi_D));
4628     coarse_phi_multi = NULL;
4629   } else { /* Create temporary NEST matrix to hold coarse basis functions blocks */
4630     IS is_rows[2] = {pcbddc->is_R_local, NULL};
4631     IS is_cols[2] = {is_V, is_C};
4632 
4633     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n_vertices, pcbddc->local_primal_ref_node, PETSC_USE_POINTER, &is_rows[1]));
4634     PetscCall(MatCreateNest(PETSC_COMM_SELF, 2, is_rows, 2, is_cols, NULL, &coarse_phi_multi));
4635     PetscCall(ISDestroy(&is_rows[1]));
4636   }
4637 
4638   /* vertices */
4639   if (n_vertices) {
4640     PetscBool restoreavr = PETSC_FALSE;
4641     Mat       A_RRmA_RV  = NULL;
4642 
4643     PetscCall(MatSetValuesSubMat(*coarse_submat, A_VV, n_vertices, idx_V, n_vertices, idx_V, ADD_VALUES));
4644     PetscCall(MatDestroy(&A_VV));
4645 
4646     if (n_R) {
4647       Mat A_RV_bcorr = NULL, S_VV;
4648 
4649       PetscCall(MatScale(A_RV, m_one));
4650       if (need_benign_correction) {
4651         ISLocalToGlobalMapping RtoN;
4652         IS                     is_p0;
4653         PetscInt              *idxs_p0, n;
4654 
4655         PetscCall(PetscMalloc1(pcbddc->benign_n, &idxs_p0));
4656         PetscCall(ISLocalToGlobalMappingCreateIS(pcbddc->is_R_local, &RtoN));
4657         PetscCall(ISGlobalToLocalMappingApply(RtoN, IS_GTOLM_DROP, pcbddc->benign_n, pcbddc->benign_p0_lidx, &n, idxs_p0));
4658         PetscCheck(n == pcbddc->benign_n, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error in R numbering for benign p0! %" PetscInt_FMT " != %" PetscInt_FMT, n, pcbddc->benign_n);
4659         PetscCall(ISLocalToGlobalMappingDestroy(&RtoN));
4660         PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idxs_p0, PETSC_OWN_POINTER, &is_p0));
4661         PetscCall(MatCreateSubMatrix(A_RV, is_p0, NULL, MAT_INITIAL_MATRIX, &A_RV_bcorr));
4662         PetscCall(ISDestroy(&is_p0));
4663       }
4664 
4665       PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, lda_rhs, n_eff_vertices, NULL, &A_RRmA_RV));
4666       if (!sparserhs || need_benign_correction) {
4667         if (lda_rhs == n_R && !multi_element) {
4668           PetscCall(MatConvert(A_RV, MATDENSE, MAT_INPLACE_MATRIX, &A_RV));
4669         } else {
4670           Mat             T;
4671           PetscScalar    *av, *array;
4672           const PetscInt *xadj, *adjncy;
4673           PetscInt        n;
4674           PetscBool       flg_row;
4675 
4676           PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, lda_rhs, n_eff_vertices, NULL, &T));
4677           PetscCall(MatDenseGetArrayWrite(T, &array));
4678           PetscCall(MatConvert(A_RV, MATSEQAIJ, MAT_INPLACE_MATRIX, &A_RV));
4679           PetscCall(MatGetRowIJ(A_RV, 0, PETSC_FALSE, PETSC_FALSE, &n, &xadj, &adjncy, &flg_row));
4680           PetscCall(MatSeqAIJGetArray(A_RV, &av));
4681           for (i = 0; i < n; i++) {
4682             PetscInt j;
4683             for (j = xadj[i]; j < xadj[i + 1]; j++) array[lda_rhs * (V_to_eff_V ? V_to_eff_V[adjncy[j]] : adjncy[j]) + i] = av[j];
4684           }
4685           PetscCall(MatRestoreRowIJ(A_RV, 0, PETSC_FALSE, PETSC_FALSE, &n, &xadj, &adjncy, &flg_row));
4686           PetscCall(MatDenseRestoreArrayWrite(T, &array));
4687           PetscCall(MatDestroy(&A_RV));
4688           A_RV = T;
4689         }
4690         if (need_benign_correction) {
4691           PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
4692           PetscScalar       *marr;
4693 
4694           /* XXX multi_element */
4695           PetscCall(MatDenseGetArray(A_RV, &marr));
4696           /* need \Phi^T A_RV = (I+L)A_RV, L given by
4697 
4698                  | 0 0  0 | (V)
4699              L = | 0 0 -1 | (P-p0)
4700                  | 0 0 -1 | (p0)
4701 
4702           */
4703           for (i = 0; i < reuse_solver->benign_n; i++) {
4704             const PetscScalar *vals;
4705             const PetscInt    *idxs, *idxs_zero;
4706             PetscInt           n, j, nz;
4707 
4708             PetscCall(ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i], &nz));
4709             PetscCall(ISGetIndices(reuse_solver->benign_zerodiag_subs[i], &idxs_zero));
4710             PetscCall(MatGetRow(A_RV_bcorr, i, &n, &idxs, &vals));
4711             for (j = 0; j < n; j++) {
4712               PetscScalar val = vals[j];
4713               PetscInt    k, col = idxs[j];
4714               for (k = 0; k < nz; k++) marr[idxs_zero[k] + lda_rhs * col] -= val;
4715             }
4716             PetscCall(MatRestoreRow(A_RV_bcorr, i, &n, &idxs, &vals));
4717             PetscCall(ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i], &idxs_zero));
4718           }
4719           PetscCall(MatDenseRestoreArray(A_RV, &marr));
4720         }
4721         PetscCall(PetscObjectReference((PetscObject)A_RV));
4722         Brhs = A_RV;
4723       } else {
4724         Mat tA_RVT, A_RVT;
4725 
4726         if (!pcbddc->symmetric_primal) {
4727           /* A_RV already scaled by -1 */
4728           PetscCall(MatTranspose(A_RV, MAT_INITIAL_MATRIX, &A_RVT));
4729         } else {
4730           restoreavr = PETSC_TRUE;
4731           PetscCall(MatScale(A_VR, -1.0));
4732           PetscCall(PetscObjectReference((PetscObject)A_VR));
4733           A_RVT = A_VR;
4734         }
4735         if (lda_rhs != n_R) {
4736           PetscScalar *aa;
4737           PetscInt     r, *ii, *jj;
4738           PetscBool    done;
4739 
4740           PetscCall(MatGetRowIJ(A_RVT, 0, PETSC_FALSE, PETSC_FALSE, &r, (const PetscInt **)&ii, (const PetscInt **)&jj, &done));
4741           PetscCheck(done, PETSC_COMM_SELF, PETSC_ERR_PLIB, "GetRowIJ failed");
4742           PetscCall(MatSeqAIJGetArray(A_RVT, &aa));
4743           PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, n_vertices, lda_rhs, ii, jj, aa, &tA_RVT));
4744           PetscCall(MatRestoreRowIJ(A_RVT, 0, PETSC_FALSE, PETSC_FALSE, &r, (const PetscInt **)&ii, (const PetscInt **)&jj, &done));
4745           PetscCheck(done, PETSC_COMM_SELF, PETSC_ERR_PLIB, "RestoreRowIJ failed");
4746         } else {
4747           PetscCall(PetscObjectReference((PetscObject)A_RVT));
4748           tA_RVT = A_RVT;
4749         }
4750         PetscCall(MatCreateTranspose(tA_RVT, &Brhs));
4751         PetscCall(MatDestroy(&tA_RVT));
4752         PetscCall(MatDestroy(&A_RVT));
4753       }
4754       if (F) {
4755         /* need to correct the rhs */
4756         if (need_benign_correction) {
4757           PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
4758           PetscScalar       *marr;
4759 
4760           PetscCall(MatDenseGetArray(Brhs, &marr));
4761           if (lda_rhs != n_R) {
4762             for (i = 0; i < n_eff_vertices; i++) {
4763               PetscCall(VecPlaceArray(dummy_vec, marr + i * lda_rhs));
4764               PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver, dummy_vec, NULL, PETSC_FALSE, PETSC_TRUE));
4765               PetscCall(VecResetArray(dummy_vec));
4766             }
4767           } else {
4768             for (i = 0; i < n_eff_vertices; i++) {
4769               PetscCall(VecPlaceArray(pcbddc->vec1_R, marr + i * lda_rhs));
4770               PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver, pcbddc->vec1_R, NULL, PETSC_FALSE, PETSC_TRUE));
4771               PetscCall(VecResetArray(pcbddc->vec1_R));
4772             }
4773           }
4774           PetscCall(MatDenseRestoreArray(Brhs, &marr));
4775         }
4776         PetscCall(MatMatSolve(F, Brhs, A_RRmA_RV));
4777         if (restoreavr) PetscCall(MatScale(A_VR, -1.0));
4778         /* need to correct the solution */
4779         if (need_benign_correction) {
4780           PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
4781           PetscScalar       *marr;
4782 
4783           PetscCall(MatDenseGetArray(A_RRmA_RV, &marr));
4784           if (lda_rhs != n_R) {
4785             for (i = 0; i < n_eff_vertices; i++) {
4786               PetscCall(VecPlaceArray(dummy_vec, marr + i * lda_rhs));
4787               PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver, dummy_vec, NULL, PETSC_TRUE, PETSC_TRUE));
4788               PetscCall(VecResetArray(dummy_vec));
4789             }
4790           } else {
4791             for (i = 0; i < n_eff_vertices; i++) {
4792               PetscCall(VecPlaceArray(pcbddc->vec1_R, marr + i * lda_rhs));
4793               PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver, pcbddc->vec1_R, NULL, PETSC_TRUE, PETSC_TRUE));
4794               PetscCall(VecResetArray(pcbddc->vec1_R));
4795             }
4796           }
4797           PetscCall(MatDenseRestoreArray(A_RRmA_RV, &marr));
4798         }
4799       } else {
4800         const PetscScalar *barr;
4801         PetscScalar       *marr;
4802 
4803         PetscCall(MatDenseGetArrayRead(Brhs, &barr));
4804         PetscCall(MatDenseGetArray(A_RRmA_RV, &marr));
4805         for (i = 0; i < n_eff_vertices; i++) {
4806           PetscCall(VecPlaceArray(pcbddc->vec1_R, barr + i * lda_rhs));
4807           PetscCall(VecPlaceArray(pcbddc->vec2_R, marr + i * lda_rhs));
4808           PetscCall(KSPSolve(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec2_R));
4809           PetscCall(KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec2_R));
4810           PetscCall(VecResetArray(pcbddc->vec1_R));
4811           PetscCall(VecResetArray(pcbddc->vec2_R));
4812         }
4813         PetscCall(MatDenseRestoreArrayRead(Brhs, &barr));
4814         PetscCall(MatDenseRestoreArray(A_RRmA_RV, &marr));
4815       }
4816       PetscCall(MatDestroy(&A_RV));
4817       PetscCall(MatDestroy(&Brhs));
4818       /* S_VV and S_CV */
4819       if (n_constraints) {
4820         Mat B;
4821 
4822         PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_B, n_eff_vertices, NULL, &B));
4823         PetscCall(MatDenseScatter(A_RRmA_RV, pcbddc->R_to_B, B));
4824 
4825         /* S_CV = pcbddc->local_auxmat1 * B */
4826         if (multi_element) {
4827           Mat T;
4828 
4829           PetscCall(MatCreateSeqAIJFromDenseExpand(B, n_vertices, B_eff_V_J, &T));
4830           PetscCall(MatDestroy(&B));
4831           B = T;
4832         }
4833         PetscCall(MatProductCreate(pcbddc->local_auxmat1, B, NULL, &S_CV));
4834         PetscCall(MatProductSetType(S_CV, MATPRODUCT_AB));
4835         PetscCall(MatProductSetFromOptions(S_CV));
4836         PetscCall(MatProductSymbolic(S_CV));
4837         PetscCall(MatProductNumeric(S_CV));
4838         PetscCall(MatProductClear(S_CV));
4839         PetscCall(MatDestroy(&B));
4840 
4841         /* B = local_auxmat2_R * S_CV */
4842         PetscCall(MatProductCreate(local_auxmat2_R, S_CV, NULL, &B));
4843         PetscCall(MatProductSetType(B, MATPRODUCT_AB));
4844         PetscCall(MatProductSetFromOptions(B));
4845         PetscCall(MatProductSymbolic(B));
4846         PetscCall(MatProductNumeric(B));
4847 
4848         PetscCall(MatScale(S_CV, m_one));
4849         PetscCall(MatSetValuesSubMat(*coarse_submat, S_CV, n_constraints, idx_C, n_vertices, idx_V, INSERT_VALUES));
4850 
4851         if (multi_element) {
4852           Mat T;
4853 
4854           PetscCall(MatCreateSeqAIJFromDenseExpand(A_RRmA_RV, n_vertices, R_eff_V_J, &T));
4855           PetscCall(MatDestroy(&A_RRmA_RV));
4856           A_RRmA_RV = T;
4857         }
4858         PetscCall(MatAXPY(A_RRmA_RV, 1.0, B, UNKNOWN_NONZERO_PATTERN)); /* XXX ? */
4859         PetscCall(MatDestroy(&B));
4860       } else if (multi_element) {
4861         Mat T;
4862 
4863         PetscCall(MatCreateSeqAIJFromDenseExpand(A_RRmA_RV, n_vertices, R_eff_V_J, &T));
4864         PetscCall(MatDestroy(&A_RRmA_RV));
4865         A_RRmA_RV = T;
4866       }
4867 
4868       if (lda_rhs != n_R) {
4869         Mat T;
4870 
4871         PetscCall(MatCreateSubMatrix(A_RRmA_RV, is_R, NULL, MAT_INITIAL_MATRIX, &T));
4872         PetscCall(MatDestroy(&A_RRmA_RV));
4873         A_RRmA_RV = T;
4874       }
4875 
4876       /* need A_VR * \Phi * A_RRmA_RV = A_VR * (I+L)^T * A_RRmA_RV, L given as before */
4877       if (need_benign_correction) { /* XXX SPARSE */
4878         PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
4879         PetscScalar       *sums;
4880         const PetscScalar *marr;
4881 
4882         PetscCall(MatDenseGetArrayRead(A_RRmA_RV, &marr));
4883         PetscCall(PetscMalloc1(n_vertices, &sums));
4884         for (i = 0; i < reuse_solver->benign_n; i++) {
4885           const PetscScalar *vals;
4886           const PetscInt    *idxs, *idxs_zero;
4887           PetscInt           n, j, nz;
4888 
4889           PetscCall(ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i], &nz));
4890           PetscCall(ISGetIndices(reuse_solver->benign_zerodiag_subs[i], &idxs_zero));
4891           for (j = 0; j < n_vertices; j++) {
4892             sums[j] = 0.;
4893             for (PetscInt k = 0; k < nz; k++) sums[j] += marr[idxs_zero[k] + j * n_R];
4894           }
4895           PetscCall(MatGetRow(A_RV_bcorr, i, &n, &idxs, &vals));
4896           for (j = 0; j < n; j++) {
4897             PetscScalar val = vals[j];
4898             for (PetscInt k = 0; k < n_vertices; k++) PetscCall(MatSetValue(*coarse_submat, idx_V[idxs[j]], idx_V[k], val * sums[k], ADD_VALUES));
4899           }
4900           PetscCall(MatRestoreRow(A_RV_bcorr, i, &n, &idxs, &vals));
4901           PetscCall(ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i], &idxs_zero));
4902         }
4903         PetscCall(PetscFree(sums));
4904         PetscCall(MatDestroy(&A_RV_bcorr));
4905         PetscCall(MatDenseRestoreArrayRead(A_RRmA_RV, &marr));
4906       }
4907 
4908       PetscCall(MatMatMult(A_VR, A_RRmA_RV, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &S_VV));
4909       PetscCall(MatSetValuesSubMat(*coarse_submat, S_VV, n_vertices, idx_V, n_vertices, idx_V, ADD_VALUES));
4910       PetscCall(MatDestroy(&S_VV));
4911     }
4912 
4913     /* coarse basis functions */
4914     if (coarse_phi_multi) {
4915       Mat Vid;
4916 
4917       PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, n_vertices, n_vertices, 1, NULL, &Vid));
4918       PetscCall(MatShift_Basic(Vid, 1.0));
4919       PetscCall(MatNestSetSubMat(coarse_phi_multi, 0, 0, A_RRmA_RV));
4920       PetscCall(MatNestSetSubMat(coarse_phi_multi, 1, 0, Vid));
4921       PetscCall(MatDestroy(&Vid));
4922     } else {
4923       if (A_RRmA_RV) {
4924         PetscCall(MatDenseScatter(A_RRmA_RV, pcbddc->R_to_B, pcbddc->coarse_phi_B));
4925         if (pcbddc->switch_static || pcbddc->dbg_flag) {
4926           PetscCall(MatDenseScatter(A_RRmA_RV, pcbddc->R_to_D, pcbddc->coarse_phi_D));
4927           if (pcbddc->benign_n) {
4928             for (i = 0; i < n_vertices; i++) { PetscCall(MatSetValues(pcbddc->coarse_phi_D, pcbddc->benign_n, p0_lidx_I, 1, &i, NULL, INSERT_VALUES)); }
4929           }
4930         }
4931       }
4932       for (i = 0; i < n_vertices; i++) PetscCall(MatSetValues(pcbddc->coarse_phi_B, 1, &idx_V_B[i], 1, &i, &one, INSERT_VALUES));
4933       PetscCall(MatAssemblyBegin(pcbddc->coarse_phi_B, MAT_FINAL_ASSEMBLY));
4934       PetscCall(MatAssemblyEnd(pcbddc->coarse_phi_B, MAT_FINAL_ASSEMBLY));
4935     }
4936     PetscCall(MatDestroy(&A_RRmA_RV));
4937   }
4938   PetscCall(MatDestroy(&A_RV));
4939   PetscCall(VecDestroy(&dummy_vec));
4940 
4941   if (n_constraints) {
4942     Mat B, B2;
4943 
4944     PetscCall(MatScale(S_CC, m_one));
4945     PetscCall(MatProductCreate(local_auxmat2_R, S_CC, NULL, &B));
4946     PetscCall(MatProductSetType(B, MATPRODUCT_AB));
4947     PetscCall(MatProductSetFromOptions(B));
4948     PetscCall(MatProductSymbolic(B));
4949     PetscCall(MatProductNumeric(B));
4950 
4951     if (n_vertices) {
4952       if (isCHOL || need_benign_correction) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */
4953         PetscCall(MatTranspose(S_CV, MAT_INITIAL_MATRIX, &S_VC));
4954       } else {
4955         if (lda_rhs != n_R) {
4956           Mat tB;
4957 
4958           PetscCall(MatCreateSubMatrix(B, is_R, NULL, MAT_INITIAL_MATRIX, &tB));
4959           PetscCall(MatDestroy(&B));
4960           B = tB;
4961         }
4962         PetscCall(MatMatMult(A_VR, B, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &S_VC));
4963       }
4964       PetscCall(MatSetValuesSubMat(*coarse_submat, S_VC, n_vertices, idx_V, n_constraints, idx_C, INSERT_VALUES));
4965     }
4966 
4967     /* coarse basis functions */
4968     if (coarse_phi_multi) {
4969       PetscCall(MatNestSetSubMat(coarse_phi_multi, 0, 1, B));
4970     } else {
4971       PetscCall(MatDenseGetSubMatrix(pcbddc->coarse_phi_B, PETSC_DECIDE, PETSC_DECIDE, n_vertices, n_vertices + n_constraints, &B2));
4972       PetscCall(MatDenseScatter(B, pcbddc->R_to_B, B2));
4973       PetscCall(MatDenseRestoreSubMatrix(pcbddc->coarse_phi_B, &B2));
4974       if (pcbddc->switch_static || pcbddc->dbg_flag) {
4975         PetscCall(MatDenseGetSubMatrix(pcbddc->coarse_phi_D, PETSC_DECIDE, PETSC_DECIDE, n_vertices, n_vertices + n_constraints, &B2));
4976         PetscCall(MatDenseScatter(B, pcbddc->R_to_D, B2));
4977         if (pcbddc->benign_n) {
4978           for (i = 0; i < n_constraints; i++) { PetscCall(MatSetValues(B2, pcbddc->benign_n, p0_lidx_I, 1, &i, NULL, INSERT_VALUES)); }
4979         }
4980         PetscCall(MatDenseRestoreSubMatrix(pcbddc->coarse_phi_D, &B2));
4981       }
4982     }
4983     PetscCall(MatDestroy(&B));
4984   }
4985 
4986   /* assemble sparse coarse basis functions */
4987   if (coarse_phi_multi) {
4988     Mat T;
4989 
4990     PetscCall(MatConvert(coarse_phi_multi, MATSEQAIJ, MAT_INITIAL_MATRIX, &T));
4991     PetscCall(MatDestroy(&coarse_phi_multi));
4992     PetscCall(MatCreateSubMatrix(T, pcis->is_B_local, NULL, MAT_INITIAL_MATRIX, &pcbddc->coarse_phi_B));
4993     if (pcbddc->switch_static || pcbddc->dbg_flag) { PetscCall(MatCreateSubMatrix(T, pcis->is_I_local, NULL, MAT_INITIAL_MATRIX, &pcbddc->coarse_phi_D)); }
4994     PetscCall(MatDestroy(&T));
4995   }
4996   PetscCall(MatDestroy(&local_auxmat2_R));
4997   PetscCall(PetscFree(p0_lidx_I));
4998 
4999   /* coarse matrix entries relative to B_0 */
5000   if (pcbddc->benign_n) {
5001     Mat                B0_B, B0_BPHI;
5002     IS                 is_dummy;
5003     const PetscScalar *data;
5004     PetscInt           j;
5005 
5006     PetscCall(ISCreateStride(PETSC_COMM_SELF, pcbddc->benign_n, 0, 1, &is_dummy));
5007     PetscCall(MatCreateSubMatrix(pcbddc->benign_B0, is_dummy, pcis->is_B_local, MAT_INITIAL_MATRIX, &B0_B));
5008     PetscCall(ISDestroy(&is_dummy));
5009     PetscCall(MatMatMult(B0_B, pcbddc->coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &B0_BPHI));
5010     PetscCall(MatConvert(B0_BPHI, MATSEQDENSE, MAT_INPLACE_MATRIX, &B0_BPHI));
5011     PetscCall(MatDenseGetArrayRead(B0_BPHI, &data));
5012     for (j = 0; j < pcbddc->benign_n; j++) {
5013       PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j;
5014       for (i = 0; i < pcbddc->local_primal_size; i++) {
5015         PetscCall(MatSetValue(*coarse_submat, primal_idx, i, data[i * pcbddc->benign_n + j], INSERT_VALUES));
5016         PetscCall(MatSetValue(*coarse_submat, i, primal_idx, data[i * pcbddc->benign_n + j], INSERT_VALUES));
5017       }
5018     }
5019     PetscCall(MatDenseRestoreArrayRead(B0_BPHI, &data));
5020     PetscCall(MatDestroy(&B0_B));
5021     PetscCall(MatDestroy(&B0_BPHI));
5022   }
5023 
5024   /* compute other basis functions for non-symmetric problems */
5025   if (!pcbddc->symmetric_primal) {
5026     Mat          B_V = NULL, B_C = NULL;
5027     PetscScalar *marray, *work;
5028 
5029     /* TODO multi_element MatDenseScatter */
5030     if (n_constraints) {
5031       Mat S_CCT, C_CRT;
5032 
5033       PetscCall(MatScale(S_CC, m_one));
5034       PetscCall(MatTranspose(C_CR, MAT_INITIAL_MATRIX, &C_CRT));
5035       PetscCall(MatTranspose(S_CC, MAT_INITIAL_MATRIX, &S_CCT));
5036       PetscCall(MatMatMult(C_CRT, S_CCT, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &B_C));
5037       PetscCall(MatConvert(B_C, MATDENSE, MAT_INPLACE_MATRIX, &B_C));
5038       PetscCall(MatDestroy(&S_CCT));
5039       if (n_vertices) {
5040         Mat S_VCT;
5041 
5042         PetscCall(MatTranspose(S_VC, MAT_INITIAL_MATRIX, &S_VCT));
5043         PetscCall(MatMatMult(C_CRT, S_VCT, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &B_V));
5044         PetscCall(MatDestroy(&S_VCT));
5045         PetscCall(MatConvert(B_V, MATDENSE, MAT_INPLACE_MATRIX, &B_V));
5046       }
5047       PetscCall(MatDestroy(&C_CRT));
5048     } else {
5049       PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_R, n_vertices, NULL, &B_V));
5050     }
5051     if (n_vertices && n_R) {
5052       PetscScalar    *av, *marray;
5053       const PetscInt *xadj, *adjncy;
5054       PetscInt        n;
5055       PetscBool       flg_row;
5056 
5057       /* B_V = B_V - A_VR^T */
5058       PetscCall(MatConvert(A_VR, MATSEQAIJ, MAT_INPLACE_MATRIX, &A_VR));
5059       PetscCall(MatGetRowIJ(A_VR, 0, PETSC_FALSE, PETSC_FALSE, &n, &xadj, &adjncy, &flg_row));
5060       PetscCall(MatSeqAIJGetArray(A_VR, &av));
5061       PetscCall(MatDenseGetArray(B_V, &marray));
5062       for (i = 0; i < n; i++) {
5063         PetscInt j;
5064         for (j = xadj[i]; j < xadj[i + 1]; j++) marray[i * n_R + adjncy[j]] -= av[j];
5065       }
5066       PetscCall(MatDenseRestoreArray(B_V, &marray));
5067       PetscCall(MatRestoreRowIJ(A_VR, 0, PETSC_FALSE, PETSC_FALSE, &n, &xadj, &adjncy, &flg_row));
5068       PetscCall(MatDestroy(&A_VR));
5069     }
5070 
5071     /* currently there's no support for MatTransposeMatSolve(F,B,X) */
5072     PetscCall(PetscMalloc1(n_R * pcbddc->local_primal_size, &work));
5073     if (n_vertices) {
5074       PetscCall(MatDenseGetArray(B_V, &marray));
5075       for (i = 0; i < n_vertices; i++) {
5076         PetscCall(VecPlaceArray(pcbddc->vec1_R, marray + i * n_R));
5077         PetscCall(VecPlaceArray(pcbddc->vec2_R, work + i * n_R));
5078         PetscCall(KSPSolveTranspose(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec2_R));
5079         PetscCall(KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec2_R));
5080         PetscCall(VecResetArray(pcbddc->vec1_R));
5081         PetscCall(VecResetArray(pcbddc->vec2_R));
5082       }
5083       PetscCall(MatDenseRestoreArray(B_V, &marray));
5084     }
5085     if (B_C) {
5086       PetscCall(MatDenseGetArray(B_C, &marray));
5087       for (i = n_vertices; i < n_constraints + n_vertices; i++) {
5088         PetscCall(VecPlaceArray(pcbddc->vec1_R, marray + (i - n_vertices) * n_R));
5089         PetscCall(VecPlaceArray(pcbddc->vec2_R, work + i * n_R));
5090         PetscCall(KSPSolveTranspose(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec2_R));
5091         PetscCall(KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec2_R));
5092         PetscCall(VecResetArray(pcbddc->vec1_R));
5093         PetscCall(VecResetArray(pcbddc->vec2_R));
5094       }
5095       PetscCall(MatDenseRestoreArray(B_C, &marray));
5096     }
5097     /* coarse basis functions */
5098     PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_B, pcbddc->local_primal_size, NULL, &pcbddc->coarse_psi_B));
5099     if (pcbddc->switch_static || pcbddc->dbg_flag) PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_D, pcbddc->local_primal_size, NULL, &pcbddc->coarse_psi_D));
5100     for (i = 0; i < pcbddc->local_primal_size; i++) {
5101       Vec v;
5102 
5103       PetscCall(VecPlaceArray(pcbddc->vec1_R, work + i * n_R));
5104       PetscCall(MatDenseGetColumnVec(pcbddc->coarse_psi_B, i, &v));
5105       PetscCall(VecScatterBegin(pcbddc->R_to_B, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD));
5106       PetscCall(VecScatterEnd(pcbddc->R_to_B, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD));
5107       if (i < n_vertices) {
5108         PetscScalar one = 1.0;
5109         PetscCall(VecSetValues(v, 1, &idx_V_B[i], &one, INSERT_VALUES));
5110         PetscCall(VecAssemblyBegin(v));
5111         PetscCall(VecAssemblyEnd(v));
5112       }
5113       PetscCall(MatDenseRestoreColumnVec(pcbddc->coarse_psi_B, i, &v));
5114 
5115       if (pcbddc->switch_static || pcbddc->dbg_flag) {
5116         PetscCall(MatDenseGetColumnVec(pcbddc->coarse_psi_D, i, &v));
5117         PetscCall(VecScatterBegin(pcbddc->R_to_D, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD));
5118         PetscCall(VecScatterEnd(pcbddc->R_to_D, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD));
5119         PetscCall(MatDenseRestoreColumnVec(pcbddc->coarse_psi_D, i, &v));
5120       }
5121       PetscCall(VecResetArray(pcbddc->vec1_R));
5122     }
5123     PetscCall(MatDestroy(&B_V));
5124     PetscCall(MatDestroy(&B_C));
5125     PetscCall(PetscFree(work));
5126   } else {
5127     PetscCall(PetscObjectReference((PetscObject)pcbddc->coarse_phi_B));
5128     pcbddc->coarse_psi_B = pcbddc->coarse_phi_B;
5129     PetscCall(PetscObjectReference((PetscObject)pcbddc->coarse_phi_D));
5130     pcbddc->coarse_psi_D = pcbddc->coarse_phi_D;
5131   }
5132   PetscCall(MatAssemblyBegin(*coarse_submat, MAT_FINAL_ASSEMBLY));
5133   PetscCall(MatAssemblyEnd(*coarse_submat, MAT_FINAL_ASSEMBLY));
5134 
5135   /* free memory */
5136   PetscCall(PetscFree(V_to_eff_V));
5137   PetscCall(PetscFree(C_to_eff_C));
5138   PetscCall(PetscFree(R_eff_V_J));
5139   PetscCall(PetscFree(R_eff_C_J));
5140   PetscCall(PetscFree(B_eff_V_J));
5141   PetscCall(PetscFree(B_eff_C_J));
5142   PetscCall(ISDestroy(&is_R));
5143   PetscCall(ISRestoreIndices(is_V, &idx_V));
5144   PetscCall(ISRestoreIndices(is_C, &idx_C));
5145   PetscCall(ISDestroy(&is_V));
5146   PetscCall(ISDestroy(&is_C));
5147   PetscCall(PetscFree(idx_V_B));
5148   PetscCall(MatDestroy(&S_CV));
5149   PetscCall(MatDestroy(&S_VC));
5150   PetscCall(MatDestroy(&S_CC));
5151   if (n_vertices) PetscCall(MatDestroy(&A_VR));
5152   if (n_constraints) PetscCall(MatDestroy(&C_CR));
5153   PetscCall(PetscLogEventEnd(PC_BDDC_CorrectionSetUp[pcbddc->current_level], pc, 0, 0, 0));
5154 
5155   /* Checking coarse_sub_mat and coarse basis functions */
5156   /* Symmetric case     : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
5157   /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
5158   if (pcbddc->dbg_flag) {
5159     Mat       AUXMAT, TM1, TM2, TM3, TM4;
5160     Mat       coarse_phi_D, coarse_phi_B;
5161     Mat       coarse_psi_D, coarse_psi_B;
5162     Mat       A_II, A_BB, A_IB, A_BI;
5163     Mat       C_B, CPHI;
5164     IS        is_dummy;
5165     Vec       mones;
5166     MatType   checkmattype = MATSEQAIJ;
5167     PetscReal real_value;
5168 
5169     if (pcbddc->benign_n && !pcbddc->benign_change_explicit) {
5170       Mat A;
5171       PetscCall(PCBDDCBenignProject(pc, NULL, NULL, &A));
5172       PetscCall(MatCreateSubMatrix(A, pcis->is_I_local, pcis->is_I_local, MAT_INITIAL_MATRIX, &A_II));
5173       PetscCall(MatCreateSubMatrix(A, pcis->is_I_local, pcis->is_B_local, MAT_INITIAL_MATRIX, &A_IB));
5174       PetscCall(MatCreateSubMatrix(A, pcis->is_B_local, pcis->is_I_local, MAT_INITIAL_MATRIX, &A_BI));
5175       PetscCall(MatCreateSubMatrix(A, pcis->is_B_local, pcis->is_B_local, MAT_INITIAL_MATRIX, &A_BB));
5176       PetscCall(MatDestroy(&A));
5177     } else {
5178       PetscCall(MatConvert(pcis->A_II, checkmattype, MAT_INITIAL_MATRIX, &A_II));
5179       PetscCall(MatConvert(pcis->A_IB, checkmattype, MAT_INITIAL_MATRIX, &A_IB));
5180       PetscCall(MatConvert(pcis->A_BI, checkmattype, MAT_INITIAL_MATRIX, &A_BI));
5181       PetscCall(MatConvert(pcis->A_BB, checkmattype, MAT_INITIAL_MATRIX, &A_BB));
5182     }
5183     PetscCall(MatConvert(pcbddc->coarse_phi_D, checkmattype, MAT_INITIAL_MATRIX, &coarse_phi_D));
5184     PetscCall(MatConvert(pcbddc->coarse_phi_B, checkmattype, MAT_INITIAL_MATRIX, &coarse_phi_B));
5185     if (!pcbddc->symmetric_primal) {
5186       PetscCall(MatConvert(pcbddc->coarse_psi_D, checkmattype, MAT_INITIAL_MATRIX, &coarse_psi_D));
5187       PetscCall(MatConvert(pcbddc->coarse_psi_B, checkmattype, MAT_INITIAL_MATRIX, &coarse_psi_B));
5188     }
5189     PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n"));
5190     PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Check coarse sub mat computation (symmetric %d)\n", pcbddc->symmetric_primal));
5191     PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
5192     if (!pcbddc->symmetric_primal) {
5193       PetscCall(MatMatMult(A_II, coarse_phi_D, MAT_INITIAL_MATRIX, 1.0, &AUXMAT));
5194       PetscCall(MatTransposeMatMult(coarse_psi_D, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM1));
5195       PetscCall(MatDestroy(&AUXMAT));
5196       PetscCall(MatMatMult(A_BB, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &AUXMAT));
5197       PetscCall(MatTransposeMatMult(coarse_psi_B, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM2));
5198       PetscCall(MatDestroy(&AUXMAT));
5199       PetscCall(MatMatMult(A_IB, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &AUXMAT));
5200       PetscCall(MatTransposeMatMult(coarse_psi_D, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM3));
5201       PetscCall(MatDestroy(&AUXMAT));
5202       PetscCall(MatMatMult(A_BI, coarse_phi_D, MAT_INITIAL_MATRIX, 1.0, &AUXMAT));
5203       PetscCall(MatTransposeMatMult(coarse_psi_B, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM4));
5204       PetscCall(MatDestroy(&AUXMAT));
5205     } else {
5206       PetscCall(MatPtAP(A_II, coarse_phi_D, MAT_INITIAL_MATRIX, 1.0, &TM1));
5207       PetscCall(MatPtAP(A_BB, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &TM2));
5208       PetscCall(MatMatMult(A_IB, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &AUXMAT));
5209       PetscCall(MatTransposeMatMult(coarse_phi_D, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM3));
5210       PetscCall(MatDestroy(&AUXMAT));
5211       PetscCall(MatMatMult(A_BI, coarse_phi_D, MAT_INITIAL_MATRIX, 1.0, &AUXMAT));
5212       PetscCall(MatTransposeMatMult(coarse_phi_B, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM4));
5213       PetscCall(MatDestroy(&AUXMAT));
5214     }
5215     PetscCall(MatAXPY(TM1, one, TM2, DIFFERENT_NONZERO_PATTERN));
5216     PetscCall(MatAXPY(TM1, one, TM3, DIFFERENT_NONZERO_PATTERN));
5217     PetscCall(MatAXPY(TM1, one, TM4, DIFFERENT_NONZERO_PATTERN));
5218     PetscCall(MatConvert(TM1, MATSEQDENSE, MAT_INPLACE_MATRIX, &TM1));
5219     if (pcbddc->benign_n) {
5220       Mat                B0_B, B0_BPHI;
5221       const PetscScalar *data2;
5222       PetscScalar       *data;
5223       PetscInt           j;
5224 
5225       PetscCall(ISCreateStride(PETSC_COMM_SELF, pcbddc->benign_n, 0, 1, &is_dummy));
5226       PetscCall(MatCreateSubMatrix(pcbddc->benign_B0, is_dummy, pcis->is_B_local, MAT_INITIAL_MATRIX, &B0_B));
5227       PetscCall(MatMatMult(B0_B, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &B0_BPHI));
5228       PetscCall(MatConvert(B0_BPHI, MATSEQDENSE, MAT_INPLACE_MATRIX, &B0_BPHI));
5229       PetscCall(MatDenseGetArray(TM1, &data));
5230       PetscCall(MatDenseGetArrayRead(B0_BPHI, &data2));
5231       for (j = 0; j < pcbddc->benign_n; j++) {
5232         PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j;
5233         for (i = 0; i < pcbddc->local_primal_size; i++) {
5234           data[primal_idx * pcbddc->local_primal_size + i] += data2[i * pcbddc->benign_n + j];
5235           data[i * pcbddc->local_primal_size + primal_idx] += data2[i * pcbddc->benign_n + j];
5236         }
5237       }
5238       PetscCall(MatDenseRestoreArray(TM1, &data));
5239       PetscCall(MatDenseRestoreArrayRead(B0_BPHI, &data2));
5240       PetscCall(MatDestroy(&B0_B));
5241       PetscCall(ISDestroy(&is_dummy));
5242       PetscCall(MatDestroy(&B0_BPHI));
5243     }
5244     PetscCall(MatAXPY(TM1, m_one, *coarse_submat, DIFFERENT_NONZERO_PATTERN));
5245     PetscCall(MatNorm(TM1, NORM_FROBENIUS, &real_value));
5246     PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer));
5247     PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d          matrix error % 1.14e\n", PetscGlobalRank, (double)real_value));
5248 
5249     /* check constraints */
5250     PetscCall(ISCreateStride(PETSC_COMM_SELF, pcbddc->local_primal_size - pcbddc->benign_n, 0, 1, &is_dummy));
5251     PetscCall(MatCreateSubMatrix(pcbddc->ConstraintMatrix, is_dummy, pcis->is_B_local, MAT_INITIAL_MATRIX, &C_B));
5252     if (!pcbddc->benign_n) { /* TODO: add benign case */
5253       PetscCall(MatMatMult(C_B, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &CPHI));
5254     } else {
5255       PetscScalar *data;
5256       Mat          tmat;
5257       PetscCall(MatDenseGetArray(pcbddc->coarse_phi_B, &data));
5258       PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, pcis->n_B, pcbddc->local_primal_size - pcbddc->benign_n, data, &tmat));
5259       PetscCall(MatDenseRestoreArray(pcbddc->coarse_phi_B, &data));
5260       PetscCall(MatMatMult(C_B, tmat, MAT_INITIAL_MATRIX, 1.0, &CPHI));
5261       PetscCall(MatDestroy(&tmat));
5262     }
5263     PetscCall(MatCreateVecs(CPHI, &mones, NULL));
5264     PetscCall(VecSet(mones, -1.0));
5265     PetscCall(MatDiagonalSet(CPHI, mones, ADD_VALUES));
5266     PetscCall(MatNorm(CPHI, NORM_FROBENIUS, &real_value));
5267     PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d phi constraints error % 1.14e\n", PetscGlobalRank, (double)real_value));
5268     if (!pcbddc->symmetric_primal) {
5269       PetscCall(MatMatMult(C_B, coarse_psi_B, MAT_REUSE_MATRIX, 1.0, &CPHI));
5270       PetscCall(VecSet(mones, -1.0));
5271       PetscCall(MatDiagonalSet(CPHI, mones, ADD_VALUES));
5272       PetscCall(MatNorm(CPHI, NORM_FROBENIUS, &real_value));
5273       PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d psi constraints error % 1.14e\n", PetscGlobalRank, (double)real_value));
5274     }
5275     PetscCall(MatDestroy(&C_B));
5276     PetscCall(MatDestroy(&CPHI));
5277     PetscCall(ISDestroy(&is_dummy));
5278     PetscCall(VecDestroy(&mones));
5279     PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
5280     PetscCall(MatDestroy(&A_II));
5281     PetscCall(MatDestroy(&A_BB));
5282     PetscCall(MatDestroy(&A_IB));
5283     PetscCall(MatDestroy(&A_BI));
5284     PetscCall(MatDestroy(&TM1));
5285     PetscCall(MatDestroy(&TM2));
5286     PetscCall(MatDestroy(&TM3));
5287     PetscCall(MatDestroy(&TM4));
5288     PetscCall(MatDestroy(&coarse_phi_D));
5289     PetscCall(MatDestroy(&coarse_phi_B));
5290     if (!pcbddc->symmetric_primal) {
5291       PetscCall(MatDestroy(&coarse_psi_D));
5292       PetscCall(MatDestroy(&coarse_psi_B));
5293     }
5294   }
5295 
5296 #if 0
5297   {
5298     PetscViewer viewer;
5299     char filename[256];
5300 
5301     PetscCall(PetscSNPrintf(filename, PETSC_STATIC_ARRAY_LENGTH(filename), "details_local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level));
5302     PetscCall(PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer));
5303     PetscCall(PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB));
5304     PetscCall(PetscObjectSetName((PetscObject)*coarse_submat,"coarse submat"));
5305     PetscCall(MatView(*coarse_submat,viewer));
5306     if (pcbddc->coarse_phi_B) {
5307       PetscCall(PetscObjectSetName((PetscObject)pcbddc->coarse_phi_B,"phi_B"));
5308       PetscCall(MatView(pcbddc->coarse_phi_B,viewer));
5309     }
5310     if (pcbddc->coarse_phi_D) {
5311       PetscCall(PetscObjectSetName((PetscObject)pcbddc->coarse_phi_D,"phi_D"));
5312       PetscCall(MatView(pcbddc->coarse_phi_D,viewer));
5313     }
5314     if (pcbddc->coarse_psi_B) {
5315       PetscCall(PetscObjectSetName((PetscObject)pcbddc->coarse_psi_B,"psi_B"));
5316       PetscCall(MatView(pcbddc->coarse_psi_B,viewer));
5317     }
5318     if (pcbddc->coarse_psi_D) {
5319       PetscCall(PetscObjectSetName((PetscObject)pcbddc->coarse_psi_D,"psi_D"));
5320       PetscCall(MatView(pcbddc->coarse_psi_D,viewer));
5321     }
5322     PetscCall(PetscObjectSetName((PetscObject)pcbddc->local_mat,"A"));
5323     PetscCall(MatView(pcbddc->local_mat,viewer));
5324     PetscCall(PetscObjectSetName((PetscObject)pcbddc->ConstraintMatrix,"C"));
5325     PetscCall(MatView(pcbddc->ConstraintMatrix,viewer));
5326     PetscCall(PetscObjectSetName((PetscObject)pcis->is_I_local,"I"));
5327     PetscCall(ISView(pcis->is_I_local,viewer));
5328     PetscCall(PetscObjectSetName((PetscObject)pcis->is_B_local,"B"));
5329     PetscCall(ISView(pcis->is_B_local,viewer));
5330     PetscCall(PetscObjectSetName((PetscObject)pcbddc->is_R_local,"R"));
5331     PetscCall(ISView(pcbddc->is_R_local,viewer));
5332     PetscCall(PetscViewerDestroy(&viewer));
5333   }
5334 #endif
5335 
5336   /* device support */
5337   {
5338     PetscBool iscuda, iship, iskokkos;
5339     MatType   mtype = NULL;
5340 
5341     PetscCall(PetscObjectTypeCompareAny((PetscObject)pcis->vec1_N, &iscuda, VECCUDA, VECMPICUDA, VECSEQCUDA, ""));
5342     PetscCall(PetscObjectTypeCompareAny((PetscObject)pcis->vec1_N, &iship, VECHIP, VECMPIHIP, VECSEQHIP, ""));
5343     PetscCall(PetscObjectTypeCompareAny((PetscObject)pcis->vec1_N, &iskokkos, VECKOKKOS, VECMPIKOKKOS, VECSEQKOKKOS, ""));
5344     if (iskokkos) {
5345       if (PetscDefined(HAVE_MACRO_KOKKOS_ENABLE_CUDA)) iscuda = PETSC_TRUE;
5346       else if (PetscDefined(HAVE_MACRO_KOKKOS_ENABLE_HIP)) iship = PETSC_TRUE;
5347     }
5348     if (iskokkos) mtype = multi_element ? MATSEQAIJKOKKOS : (iscuda ? MATSEQDENSECUDA : MATSEQDENSEHIP);
5349     else if (iship) mtype = multi_element ? MATSEQAIJHIPSPARSE : MATSEQDENSEHIP;
5350     else if (iscuda) mtype = multi_element ? MATSEQAIJCUSPARSE : MATSEQDENSECUDA;
5351     if (mtype) {
5352       if (pcbddc->local_auxmat1) PetscCall(MatConvert(pcbddc->local_auxmat1, mtype, MAT_INPLACE_MATRIX, &pcbddc->local_auxmat1));
5353       if (pcbddc->local_auxmat2) PetscCall(MatConvert(pcbddc->local_auxmat2, mtype, MAT_INPLACE_MATRIX, &pcbddc->local_auxmat2));
5354       if (pcbddc->coarse_phi_B) PetscCall(MatConvert(pcbddc->coarse_phi_B, mtype, MAT_INPLACE_MATRIX, &pcbddc->coarse_phi_B));
5355       if (pcbddc->coarse_phi_D) PetscCall(MatConvert(pcbddc->coarse_phi_D, mtype, MAT_INPLACE_MATRIX, &pcbddc->coarse_phi_D));
5356       if (pcbddc->coarse_psi_B) PetscCall(MatConvert(pcbddc->coarse_psi_B, mtype, MAT_INPLACE_MATRIX, &pcbddc->coarse_psi_B));
5357       if (pcbddc->coarse_psi_D) PetscCall(MatConvert(pcbddc->coarse_psi_D, mtype, MAT_INPLACE_MATRIX, &pcbddc->coarse_psi_D));
5358     }
5359   }
5360   PetscFunctionReturn(PETSC_SUCCESS);
5361 }
5362 
5363 PetscErrorCode MatCreateSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat *B)
5364 {
5365   Mat      *work_mat;
5366   IS        isrow_s, iscol_s;
5367   PetscBool rsorted, csorted;
5368   PetscInt  rsize, *idxs_perm_r = NULL, csize, *idxs_perm_c = NULL;
5369 
5370   PetscFunctionBegin;
5371   PetscCall(ISSorted(isrow, &rsorted));
5372   PetscCall(ISSorted(iscol, &csorted));
5373   PetscCall(ISGetLocalSize(isrow, &rsize));
5374   PetscCall(ISGetLocalSize(iscol, &csize));
5375 
5376   if (!rsorted) {
5377     const PetscInt *idxs;
5378     PetscInt       *idxs_sorted, i;
5379 
5380     PetscCall(PetscMalloc1(rsize, &idxs_perm_r));
5381     PetscCall(PetscMalloc1(rsize, &idxs_sorted));
5382     for (i = 0; i < rsize; i++) idxs_perm_r[i] = i;
5383     PetscCall(ISGetIndices(isrow, &idxs));
5384     PetscCall(PetscSortIntWithPermutation(rsize, idxs, idxs_perm_r));
5385     for (i = 0; i < rsize; i++) idxs_sorted[i] = idxs[idxs_perm_r[i]];
5386     PetscCall(ISRestoreIndices(isrow, &idxs));
5387     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, rsize, idxs_sorted, PETSC_OWN_POINTER, &isrow_s));
5388   } else {
5389     PetscCall(PetscObjectReference((PetscObject)isrow));
5390     isrow_s = isrow;
5391   }
5392 
5393   if (!csorted) {
5394     if (isrow == iscol) {
5395       PetscCall(PetscObjectReference((PetscObject)isrow_s));
5396       iscol_s = isrow_s;
5397     } else {
5398       const PetscInt *idxs;
5399       PetscInt       *idxs_sorted, i;
5400 
5401       PetscCall(PetscMalloc1(csize, &idxs_perm_c));
5402       PetscCall(PetscMalloc1(csize, &idxs_sorted));
5403       for (i = 0; i < csize; i++) idxs_perm_c[i] = i;
5404       PetscCall(ISGetIndices(iscol, &idxs));
5405       PetscCall(PetscSortIntWithPermutation(csize, idxs, idxs_perm_c));
5406       for (i = 0; i < csize; i++) idxs_sorted[i] = idxs[idxs_perm_c[i]];
5407       PetscCall(ISRestoreIndices(iscol, &idxs));
5408       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, csize, idxs_sorted, PETSC_OWN_POINTER, &iscol_s));
5409     }
5410   } else {
5411     PetscCall(PetscObjectReference((PetscObject)iscol));
5412     iscol_s = iscol;
5413   }
5414 
5415   PetscCall(MatCreateSubMatrices(A, 1, &isrow_s, &iscol_s, MAT_INITIAL_MATRIX, &work_mat));
5416 
5417   if (!rsorted || !csorted) {
5418     Mat new_mat;
5419     IS  is_perm_r, is_perm_c;
5420 
5421     if (!rsorted) {
5422       PetscInt *idxs_r, i;
5423       PetscCall(PetscMalloc1(rsize, &idxs_r));
5424       for (i = 0; i < rsize; i++) idxs_r[idxs_perm_r[i]] = i;
5425       PetscCall(PetscFree(idxs_perm_r));
5426       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, rsize, idxs_r, PETSC_OWN_POINTER, &is_perm_r));
5427     } else {
5428       PetscCall(ISCreateStride(PETSC_COMM_SELF, rsize, 0, 1, &is_perm_r));
5429     }
5430     PetscCall(ISSetPermutation(is_perm_r));
5431 
5432     if (!csorted) {
5433       if (isrow_s == iscol_s) {
5434         PetscCall(PetscObjectReference((PetscObject)is_perm_r));
5435         is_perm_c = is_perm_r;
5436       } else {
5437         PetscInt *idxs_c, i;
5438         PetscCheck(idxs_perm_c, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Permutation array not present");
5439         PetscCall(PetscMalloc1(csize, &idxs_c));
5440         for (i = 0; i < csize; i++) idxs_c[idxs_perm_c[i]] = i;
5441         PetscCall(PetscFree(idxs_perm_c));
5442         PetscCall(ISCreateGeneral(PETSC_COMM_SELF, csize, idxs_c, PETSC_OWN_POINTER, &is_perm_c));
5443       }
5444     } else {
5445       PetscCall(ISCreateStride(PETSC_COMM_SELF, csize, 0, 1, &is_perm_c));
5446     }
5447     PetscCall(ISSetPermutation(is_perm_c));
5448 
5449     PetscCall(MatPermute(work_mat[0], is_perm_r, is_perm_c, &new_mat));
5450     PetscCall(MatDestroy(&work_mat[0]));
5451     work_mat[0] = new_mat;
5452     PetscCall(ISDestroy(&is_perm_r));
5453     PetscCall(ISDestroy(&is_perm_c));
5454   }
5455 
5456   PetscCall(PetscObjectReference((PetscObject)work_mat[0]));
5457   *B = work_mat[0];
5458   PetscCall(MatDestroyMatrices(1, &work_mat));
5459   PetscCall(ISDestroy(&isrow_s));
5460   PetscCall(ISDestroy(&iscol_s));
5461   PetscFunctionReturn(PETSC_SUCCESS);
5462 }
5463 
5464 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix)
5465 {
5466   Mat_IS   *matis  = (Mat_IS *)pc->pmat->data;
5467   PC_BDDC  *pcbddc = (PC_BDDC *)pc->data;
5468   Mat       new_mat, lA;
5469   IS        is_local, is_global;
5470   PetscInt  local_size;
5471   PetscBool isseqaij, issym, isset;
5472 
5473   PetscFunctionBegin;
5474   PetscCall(MatDestroy(&pcbddc->local_mat));
5475   PetscCall(MatGetSize(matis->A, &local_size, NULL));
5476   if (pcbddc->mat_graph->multi_element) {
5477     Mat     *mats, *bdiags;
5478     IS      *gsubs;
5479     PetscInt nsubs = pcbddc->n_local_subs;
5480 
5481     PetscCall(PetscCalloc1(nsubs * nsubs, &mats));
5482     PetscCall(PetscMalloc1(nsubs, &gsubs));
5483     for (PetscInt i = 0; i < nsubs; i++) PetscCall(ISLocalToGlobalMappingApplyIS(matis->rmapping, pcbddc->local_subs[i], &gsubs[i]));
5484     PetscCall(MatCreateSubMatrices(ChangeOfBasisMatrix, nsubs, gsubs, gsubs, MAT_INITIAL_MATRIX, &bdiags));
5485     for (PetscInt i = 0; i < nsubs; i++) PetscCall(ISDestroy(&gsubs[i]));
5486     PetscCall(PetscFree(gsubs));
5487 
5488     for (PetscInt i = 0; i < nsubs; i++) mats[i * (1 + nsubs)] = bdiags[i];
5489     PetscCall(MatCreateNest(PETSC_COMM_SELF, nsubs, pcbddc->local_subs, nsubs, pcbddc->local_subs, mats, &new_mat));
5490     PetscCall(MatConvert(new_mat, MATSEQAIJ, MAT_INPLACE_MATRIX, &new_mat));
5491     PetscCall(MatDestroySubMatrices(nsubs, &bdiags));
5492     PetscCall(PetscFree(mats));
5493   } else {
5494     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)matis->A), local_size, 0, 1, &is_local));
5495     PetscCall(ISLocalToGlobalMappingApplyIS(matis->rmapping, is_local, &is_global));
5496     PetscCall(ISDestroy(&is_local));
5497     PetscCall(MatCreateSubMatrixUnsorted(ChangeOfBasisMatrix, is_global, is_global, &new_mat));
5498     PetscCall(ISDestroy(&is_global));
5499   }
5500   if (pcbddc->dbg_flag) {
5501     Vec       x, x_change;
5502     PetscReal error;
5503 
5504     PetscCall(MatCreateVecs(ChangeOfBasisMatrix, &x, &x_change));
5505     PetscCall(VecSetRandom(x, NULL));
5506     PetscCall(MatMult(ChangeOfBasisMatrix, x, x_change));
5507     PetscCall(VecScatterBegin(matis->cctx, x, matis->x, INSERT_VALUES, SCATTER_FORWARD));
5508     PetscCall(VecScatterEnd(matis->cctx, x, matis->x, INSERT_VALUES, SCATTER_FORWARD));
5509     PetscCall(MatMult(new_mat, matis->x, matis->y));
5510     if (!pcbddc->change_interior) {
5511       const PetscScalar *x, *y, *v;
5512       PetscReal          lerror = 0.;
5513       PetscInt           i;
5514 
5515       PetscCall(VecGetArrayRead(matis->x, &x));
5516       PetscCall(VecGetArrayRead(matis->y, &y));
5517       PetscCall(VecGetArrayRead(matis->counter, &v));
5518       for (i = 0; i < local_size; i++)
5519         if (PetscRealPart(v[i]) < 1.5 && PetscAbsScalar(x[i] - y[i]) > lerror) lerror = PetscAbsScalar(x[i] - y[i]);
5520       PetscCall(VecRestoreArrayRead(matis->x, &x));
5521       PetscCall(VecRestoreArrayRead(matis->y, &y));
5522       PetscCall(VecRestoreArrayRead(matis->counter, &v));
5523       PetscCallMPI(MPIU_Allreduce(&lerror, &error, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)pc)));
5524       if (error > PETSC_SMALL) {
5525         if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) {
5526           SETERRQ(PetscObjectComm((PetscObject)pc), PETSC_ERR_PLIB, "Error global vs local change on I: %1.6e", (double)error);
5527         } else {
5528           SETERRQ(PetscObjectComm((PetscObject)pc), PETSC_ERR_USER, "Error global vs local change on I: %1.6e", (double)error);
5529         }
5530       }
5531     }
5532     PetscCall(VecScatterBegin(matis->rctx, matis->y, x, INSERT_VALUES, SCATTER_REVERSE));
5533     PetscCall(VecScatterEnd(matis->rctx, matis->y, x, INSERT_VALUES, SCATTER_REVERSE));
5534     PetscCall(VecAXPY(x, -1.0, x_change));
5535     PetscCall(VecNorm(x, NORM_INFINITY, &error));
5536     if (error > PETSC_SMALL) {
5537       if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) {
5538         SETERRQ(PetscObjectComm((PetscObject)pc), PETSC_ERR_PLIB, "Error global vs local change on N: %1.6e", (double)error);
5539       } else {
5540         SETERRQ(PetscObjectComm((PetscObject)pc), PETSC_ERR_USER, "Error global vs local change on N: %1.6e", (double)error);
5541       }
5542     }
5543     PetscCall(VecDestroy(&x));
5544     PetscCall(VecDestroy(&x_change));
5545   }
5546 
5547   /* lA is present if we are setting up an inner BDDC for a saddle point FETI-DP */
5548   PetscCall(PetscObjectQuery((PetscObject)pc, "__KSPFETIDP_lA", (PetscObject *)&lA));
5549 
5550   /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */
5551   PetscCall(PetscObjectBaseTypeCompare((PetscObject)matis->A, MATSEQAIJ, &isseqaij));
5552   if (isseqaij) {
5553     PetscCall(MatDestroy(&pcbddc->local_mat));
5554     PetscCall(MatPtAP(matis->A, new_mat, MAT_INITIAL_MATRIX, 2.0, &pcbddc->local_mat));
5555     if (lA) {
5556       Mat work;
5557       PetscCall(MatPtAP(lA, new_mat, MAT_INITIAL_MATRIX, 2.0, &work));
5558       PetscCall(PetscObjectCompose((PetscObject)pc, "__KSPFETIDP_lA", (PetscObject)work));
5559       PetscCall(MatDestroy(&work));
5560     }
5561   } else {
5562     Mat work_mat;
5563 
5564     PetscCall(MatDestroy(&pcbddc->local_mat));
5565     PetscCall(MatConvert(matis->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &work_mat));
5566     PetscCall(MatPtAP(work_mat, new_mat, MAT_INITIAL_MATRIX, 2.0, &pcbddc->local_mat));
5567     PetscCall(MatDestroy(&work_mat));
5568     if (lA) {
5569       Mat work;
5570       PetscCall(MatConvert(lA, MATSEQAIJ, MAT_INITIAL_MATRIX, &work_mat));
5571       PetscCall(MatPtAP(work_mat, new_mat, MAT_INITIAL_MATRIX, 2.0, &work));
5572       PetscCall(PetscObjectCompose((PetscObject)pc, "__KSPFETIDP_lA", (PetscObject)work));
5573       PetscCall(MatDestroy(&work));
5574     }
5575   }
5576   PetscCall(MatIsSymmetricKnown(matis->A, &isset, &issym));
5577   if (isset) PetscCall(MatSetOption(pcbddc->local_mat, MAT_SYMMETRIC, issym));
5578   PetscCall(MatDestroy(&new_mat));
5579   PetscFunctionReturn(PETSC_SUCCESS);
5580 }
5581 
5582 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc)
5583 {
5584   PC_IS          *pcis        = (PC_IS *)pc->data;
5585   PC_BDDC        *pcbddc      = (PC_BDDC *)pc->data;
5586   PCBDDCSubSchurs sub_schurs  = pcbddc->sub_schurs;
5587   PetscInt       *idx_R_local = NULL;
5588   PetscInt        n_vertices, i, j, n_R, n_D, n_B;
5589   PetscInt        vbs, bs;
5590   PetscBT         bitmask = NULL;
5591 
5592   PetscFunctionBegin;
5593   /*
5594     No need to setup local scatters if
5595       - primal space is unchanged
5596         AND
5597       - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains)
5598         AND
5599       - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine
5600   */
5601   if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) PetscFunctionReturn(PETSC_SUCCESS);
5602   /* destroy old objects */
5603   PetscCall(ISDestroy(&pcbddc->is_R_local));
5604   PetscCall(VecScatterDestroy(&pcbddc->R_to_B));
5605   PetscCall(VecScatterDestroy(&pcbddc->R_to_D));
5606   /* Set Non-overlapping dimensions */
5607   n_B        = pcis->n_B;
5608   n_D        = pcis->n - n_B;
5609   n_vertices = pcbddc->n_vertices;
5610 
5611   /* Dohrmann's notation: dofs split in R (Remaining: all dofs but the vertices) and V (Vertices) */
5612 
5613   /* create auxiliary bitmask and allocate workspace */
5614   if (!sub_schurs || !sub_schurs->reuse_solver) {
5615     PetscCall(PetscMalloc1(pcis->n - n_vertices, &idx_R_local));
5616     PetscCall(PetscBTCreate(pcis->n, &bitmask));
5617     for (i = 0; i < n_vertices; i++) PetscCall(PetscBTSet(bitmask, pcbddc->local_primal_ref_node[i]));
5618 
5619     for (i = 0, n_R = 0; i < pcis->n; i++) {
5620       if (!PetscBTLookup(bitmask, i)) idx_R_local[n_R++] = i;
5621     }
5622   } else { /* A different ordering (already computed) is present if we are reusing the Schur solver */
5623     PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
5624 
5625     PetscCall(ISGetIndices(reuse_solver->is_R, (const PetscInt **)&idx_R_local));
5626     PetscCall(ISGetLocalSize(reuse_solver->is_R, &n_R));
5627   }
5628 
5629   /* Block code */
5630   vbs = 1;
5631   PetscCall(MatGetBlockSize(pcbddc->local_mat, &bs));
5632   if (bs > 1 && !(n_vertices % bs)) {
5633     PetscBool is_blocked = PETSC_TRUE;
5634     PetscInt *vary;
5635     if (!sub_schurs || !sub_schurs->reuse_solver) {
5636       PetscCall(PetscMalloc1(pcis->n / bs, &vary));
5637       PetscCall(PetscArrayzero(vary, pcis->n / bs));
5638       /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */
5639       /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */
5640       for (i = 0; i < n_vertices; i++) vary[pcbddc->local_primal_ref_node[i] / bs]++;
5641       for (i = 0; i < pcis->n / bs; i++) {
5642         if (vary[i] != 0 && vary[i] != bs) {
5643           is_blocked = PETSC_FALSE;
5644           break;
5645         }
5646       }
5647       PetscCall(PetscFree(vary));
5648     } else {
5649       /* Verify directly the R set */
5650       for (i = 0; i < n_R / bs; i++) {
5651         PetscInt j, node = idx_R_local[bs * i];
5652         for (j = 1; j < bs; j++) {
5653           if (node != idx_R_local[bs * i + j] - j) {
5654             is_blocked = PETSC_FALSE;
5655             break;
5656           }
5657         }
5658       }
5659     }
5660     if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */
5661       vbs = bs;
5662       for (i = 0; i < n_R / vbs; i++) idx_R_local[i] = idx_R_local[vbs * i] / vbs;
5663     }
5664   }
5665   PetscCall(ISCreateBlock(PETSC_COMM_SELF, vbs, n_R / vbs, idx_R_local, PETSC_COPY_VALUES, &pcbddc->is_R_local));
5666   if (sub_schurs && sub_schurs->reuse_solver) {
5667     PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
5668 
5669     PetscCall(ISRestoreIndices(reuse_solver->is_R, (const PetscInt **)&idx_R_local));
5670     PetscCall(ISDestroy(&reuse_solver->is_R));
5671     PetscCall(PetscObjectReference((PetscObject)pcbddc->is_R_local));
5672     reuse_solver->is_R = pcbddc->is_R_local;
5673   } else {
5674     PetscCall(PetscFree(idx_R_local));
5675   }
5676 
5677   /* print some info if requested */
5678   if (pcbddc->dbg_flag) {
5679     PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n"));
5680     PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
5681     PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer));
5682     PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d local dimensions\n", PetscGlobalRank));
5683     PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "local_size = %" PetscInt_FMT ", dirichlet_size = %" PetscInt_FMT ", boundary_size = %" PetscInt_FMT "\n", pcis->n, n_D, n_B));
5684     PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "r_size = %" PetscInt_FMT ", v_size = %" PetscInt_FMT ", constraints = %" PetscInt_FMT ", local_primal_size = %" PetscInt_FMT "\n", n_R, n_vertices,
5685                                                  pcbddc->local_primal_size - n_vertices - pcbddc->benign_n, pcbddc->local_primal_size));
5686     PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
5687   }
5688 
5689   /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */
5690   if (!sub_schurs || !sub_schurs->reuse_solver) {
5691     IS        is_aux1, is_aux2;
5692     PetscInt *aux_array1, *aux_array2, *is_indices, *idx_R_local;
5693 
5694     PetscCall(ISGetIndices(pcbddc->is_R_local, (const PetscInt **)&idx_R_local));
5695     PetscCall(PetscMalloc1(pcis->n_B - n_vertices, &aux_array1));
5696     PetscCall(PetscMalloc1(pcis->n_B - n_vertices, &aux_array2));
5697     PetscCall(ISGetIndices(pcis->is_I_local, (const PetscInt **)&is_indices));
5698     for (i = 0; i < n_D; i++) PetscCall(PetscBTSet(bitmask, is_indices[i]));
5699     PetscCall(ISRestoreIndices(pcis->is_I_local, (const PetscInt **)&is_indices));
5700     for (i = 0, j = 0; i < n_R; i++) {
5701       if (!PetscBTLookup(bitmask, idx_R_local[i])) aux_array1[j++] = i;
5702     }
5703     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, j, aux_array1, PETSC_OWN_POINTER, &is_aux1));
5704     PetscCall(ISGetIndices(pcis->is_B_local, (const PetscInt **)&is_indices));
5705     for (i = 0, j = 0; i < n_B; i++) {
5706       if (!PetscBTLookup(bitmask, is_indices[i])) aux_array2[j++] = i;
5707     }
5708     PetscCall(ISRestoreIndices(pcis->is_B_local, (const PetscInt **)&is_indices));
5709     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, j, aux_array2, PETSC_OWN_POINTER, &is_aux2));
5710     PetscCall(VecScatterCreate(pcbddc->vec1_R, is_aux1, pcis->vec1_B, is_aux2, &pcbddc->R_to_B));
5711     PetscCall(ISDestroy(&is_aux1));
5712     PetscCall(ISDestroy(&is_aux2));
5713 
5714     if (pcbddc->switch_static || pcbddc->dbg_flag) {
5715       PetscCall(PetscMalloc1(n_D, &aux_array1));
5716       for (i = 0, j = 0; i < n_R; i++) {
5717         if (PetscBTLookup(bitmask, idx_R_local[i])) aux_array1[j++] = i;
5718       }
5719       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, j, aux_array1, PETSC_OWN_POINTER, &is_aux1));
5720       PetscCall(VecScatterCreate(pcbddc->vec1_R, is_aux1, pcis->vec1_D, (IS)0, &pcbddc->R_to_D));
5721       PetscCall(ISDestroy(&is_aux1));
5722     }
5723     PetscCall(PetscBTDestroy(&bitmask));
5724     PetscCall(ISRestoreIndices(pcbddc->is_R_local, (const PetscInt **)&idx_R_local));
5725   } else {
5726     PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
5727     IS                 tis;
5728     PetscInt           schur_size;
5729 
5730     PetscCall(ISGetLocalSize(reuse_solver->is_B, &schur_size));
5731     PetscCall(ISCreateStride(PETSC_COMM_SELF, schur_size, n_D, 1, &tis));
5732     PetscCall(VecScatterCreate(pcbddc->vec1_R, tis, pcis->vec1_B, reuse_solver->is_B, &pcbddc->R_to_B));
5733     PetscCall(ISDestroy(&tis));
5734     if (pcbddc->switch_static || pcbddc->dbg_flag) {
5735       PetscCall(ISCreateStride(PETSC_COMM_SELF, n_D, 0, 1, &tis));
5736       PetscCall(VecScatterCreate(pcbddc->vec1_R, tis, pcis->vec1_D, (IS)0, &pcbddc->R_to_D));
5737       PetscCall(ISDestroy(&tis));
5738     }
5739   }
5740   PetscFunctionReturn(PETSC_SUCCESS);
5741 }
5742 
5743 static PetscErrorCode MatNullSpacePropagateAny_Private(Mat A, IS is, Mat B)
5744 {
5745   MatNullSpace NullSpace;
5746   Mat          dmat;
5747   const Vec   *nullvecs;
5748   Vec          v, v2, *nullvecs2;
5749   VecScatter   sct = NULL;
5750   PetscScalar *ddata;
5751   PetscInt     k, nnsp_size, bsiz, bsiz2, n, N, bs;
5752   PetscBool    nnsp_has_cnst;
5753 
5754   PetscFunctionBegin;
5755   if (!is && !B) { /* MATIS */
5756     Mat_IS *matis = (Mat_IS *)A->data;
5757 
5758     if (!B) PetscCall(MatISGetLocalMat(A, &B));
5759     sct = matis->cctx;
5760     PetscCall(PetscObjectReference((PetscObject)sct));
5761   } else {
5762     PetscCall(MatGetNullSpace(B, &NullSpace));
5763     if (!NullSpace) PetscCall(MatGetNearNullSpace(B, &NullSpace));
5764     if (NullSpace) PetscFunctionReturn(PETSC_SUCCESS);
5765   }
5766   PetscCall(MatGetNullSpace(A, &NullSpace));
5767   if (!NullSpace) PetscCall(MatGetNearNullSpace(A, &NullSpace));
5768   if (!NullSpace) PetscFunctionReturn(PETSC_SUCCESS);
5769 
5770   PetscCall(MatCreateVecs(A, &v, NULL));
5771   PetscCall(MatCreateVecs(B, &v2, NULL));
5772   if (!sct) PetscCall(VecScatterCreate(v, is, v2, NULL, &sct));
5773   PetscCall(MatNullSpaceGetVecs(NullSpace, &nnsp_has_cnst, &nnsp_size, (const Vec **)&nullvecs));
5774   bsiz = bsiz2 = nnsp_size + !!nnsp_has_cnst;
5775   PetscCall(PetscMalloc1(bsiz, &nullvecs2));
5776   PetscCall(VecGetBlockSize(v2, &bs));
5777   PetscCall(VecGetSize(v2, &N));
5778   PetscCall(VecGetLocalSize(v2, &n));
5779   PetscCall(PetscMalloc1(n * bsiz, &ddata));
5780   for (k = 0; k < nnsp_size; k++) {
5781     PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), bs, n, N, ddata + n * k, &nullvecs2[k]));
5782     PetscCall(VecScatterBegin(sct, nullvecs[k], nullvecs2[k], INSERT_VALUES, SCATTER_FORWARD));
5783     PetscCall(VecScatterEnd(sct, nullvecs[k], nullvecs2[k], INSERT_VALUES, SCATTER_FORWARD));
5784   }
5785   if (nnsp_has_cnst) {
5786     PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), bs, n, N, ddata + n * nnsp_size, &nullvecs2[nnsp_size]));
5787     PetscCall(VecSet(nullvecs2[nnsp_size], 1.0));
5788   }
5789   PetscCall(PCBDDCOrthonormalizeVecs(&bsiz2, nullvecs2));
5790   PetscCall(MatNullSpaceCreate(PetscObjectComm((PetscObject)B), PETSC_FALSE, bsiz2, nullvecs2, &NullSpace));
5791 
5792   PetscCall(MatCreateDense(PetscObjectComm((PetscObject)B), n, PETSC_DECIDE, N, bsiz2, ddata, &dmat));
5793   PetscCall(PetscObjectContainerCompose((PetscObject)dmat, "_PBDDC_Null_dmat_arr", ddata, PetscContainerUserDestroyDefault));
5794   PetscCall(PetscObjectCompose((PetscObject)NullSpace, "_PBDDC_Null_dmat", (PetscObject)dmat));
5795   PetscCall(MatDestroy(&dmat));
5796 
5797   for (k = 0; k < bsiz; k++) PetscCall(VecDestroy(&nullvecs2[k]));
5798   PetscCall(PetscFree(nullvecs2));
5799   PetscCall(MatSetNearNullSpace(B, NullSpace));
5800   PetscCall(MatNullSpaceDestroy(&NullSpace));
5801   PetscCall(VecDestroy(&v));
5802   PetscCall(VecDestroy(&v2));
5803   PetscCall(VecScatterDestroy(&sct));
5804   PetscFunctionReturn(PETSC_SUCCESS);
5805 }
5806 
5807 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann)
5808 {
5809   PC_BDDC     *pcbddc = (PC_BDDC *)pc->data;
5810   PC_IS       *pcis   = (PC_IS *)pc->data;
5811   PC           pc_temp;
5812   Mat          A_RR;
5813   MatNullSpace nnsp;
5814   MatReuse     reuse;
5815   PetscScalar  m_one = -1.0;
5816   PetscReal    value;
5817   PetscInt     n_D, n_R;
5818   PetscBool    issbaij, opts, isset, issym;
5819   PetscBool    f = PETSC_FALSE;
5820   char         dir_prefix[256], neu_prefix[256], str_level[16];
5821   size_t       len;
5822 
5823   PetscFunctionBegin;
5824   PetscCall(PetscLogEventBegin(PC_BDDC_LocalSolvers[pcbddc->current_level], pc, 0, 0, 0));
5825   /* approximate solver, propagate NearNullSpace if needed */
5826   if (!pc->setupcalled && (pcbddc->NullSpace_corr[0] || pcbddc->NullSpace_corr[2])) {
5827     MatNullSpace gnnsp1, gnnsp2;
5828     PetscBool    lhas, ghas;
5829 
5830     PetscCall(MatGetNearNullSpace(pcbddc->local_mat, &nnsp));
5831     PetscCall(MatGetNearNullSpace(pc->pmat, &gnnsp1));
5832     PetscCall(MatGetNullSpace(pc->pmat, &gnnsp2));
5833     lhas = nnsp ? PETSC_TRUE : PETSC_FALSE;
5834     PetscCallMPI(MPIU_Allreduce(&lhas, &ghas, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)pc)));
5835     if (!ghas && (gnnsp1 || gnnsp2)) PetscCall(MatNullSpacePropagateAny_Private(pc->pmat, NULL, NULL));
5836   }
5837 
5838   /* compute prefixes */
5839   PetscCall(PetscStrncpy(dir_prefix, "", sizeof(dir_prefix)));
5840   PetscCall(PetscStrncpy(neu_prefix, "", sizeof(neu_prefix)));
5841   if (!pcbddc->current_level) {
5842     PetscCall(PetscStrncpy(dir_prefix, ((PetscObject)pc)->prefix, sizeof(dir_prefix)));
5843     PetscCall(PetscStrncpy(neu_prefix, ((PetscObject)pc)->prefix, sizeof(neu_prefix)));
5844     PetscCall(PetscStrlcat(dir_prefix, "pc_bddc_dirichlet_", sizeof(dir_prefix)));
5845     PetscCall(PetscStrlcat(neu_prefix, "pc_bddc_neumann_", sizeof(neu_prefix)));
5846   } else {
5847     PetscCall(PetscSNPrintf(str_level, sizeof(str_level), "l%d_", (int)pcbddc->current_level));
5848     PetscCall(PetscStrlen(((PetscObject)pc)->prefix, &len));
5849     len -= 15;                                /* remove "pc_bddc_coarse_" */
5850     if (pcbddc->current_level > 1) len -= 3;  /* remove "lX_" with X level number */
5851     if (pcbddc->current_level > 10) len -= 1; /* remove another char from level number */
5852     /* Nonstandard use of PetscStrncpy() to only copy a portion of the input string */
5853     PetscCall(PetscStrncpy(dir_prefix, ((PetscObject)pc)->prefix, len + 1));
5854     PetscCall(PetscStrncpy(neu_prefix, ((PetscObject)pc)->prefix, len + 1));
5855     PetscCall(PetscStrlcat(dir_prefix, "pc_bddc_dirichlet_", sizeof(dir_prefix)));
5856     PetscCall(PetscStrlcat(neu_prefix, "pc_bddc_neumann_", sizeof(neu_prefix)));
5857     PetscCall(PetscStrlcat(dir_prefix, str_level, sizeof(dir_prefix)));
5858     PetscCall(PetscStrlcat(neu_prefix, str_level, sizeof(neu_prefix)));
5859   }
5860 
5861   /* DIRICHLET PROBLEM */
5862   if (dirichlet) {
5863     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
5864     if (pcbddc->benign_n && !pcbddc->benign_change_explicit) {
5865       PetscCheck(sub_schurs && sub_schurs->reuse_solver, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not yet implemented");
5866       if (pcbddc->dbg_flag) {
5867         Mat A_IIn;
5868 
5869         PetscCall(PCBDDCBenignProject(pc, pcis->is_I_local, pcis->is_I_local, &A_IIn));
5870         PetscCall(MatDestroy(&pcis->A_II));
5871         pcis->A_II = A_IIn;
5872       }
5873     }
5874     PetscCall(MatIsSymmetricKnown(pcbddc->local_mat, &isset, &issym));
5875     if (isset) PetscCall(MatSetOption(pcis->A_II, MAT_SYMMETRIC, issym));
5876 
5877     /* Matrix for Dirichlet problem is pcis->A_II */
5878     n_D  = pcis->n - pcis->n_B;
5879     opts = PETSC_FALSE;
5880     if (!pcbddc->ksp_D) { /* create object if not yet build */
5881       opts = PETSC_TRUE;
5882       PetscCall(KSPCreate(PETSC_COMM_SELF, &pcbddc->ksp_D));
5883       PetscCall(KSPSetNestLevel(pcbddc->ksp_D, pc->kspnestlevel));
5884       PetscCall(PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D, (PetscObject)pc, 1));
5885       /* default */
5886       PetscCall(KSPSetType(pcbddc->ksp_D, KSPPREONLY));
5887       PetscCall(KSPSetOptionsPrefix(pcbddc->ksp_D, dir_prefix));
5888       PetscCall(PetscObjectTypeCompare((PetscObject)pcis->pA_II, MATSEQSBAIJ, &issbaij));
5889       PetscCall(KSPGetPC(pcbddc->ksp_D, &pc_temp));
5890       if (issbaij) {
5891         PetscCall(PCSetType(pc_temp, PCCHOLESKY));
5892       } else {
5893         PetscCall(PCSetType(pc_temp, PCLU));
5894       }
5895       PetscCall(KSPSetErrorIfNotConverged(pcbddc->ksp_D, pc->erroriffailure));
5896     }
5897     PetscCall(MatSetOptionsPrefix(pcis->pA_II, ((PetscObject)pcbddc->ksp_D)->prefix));
5898     PetscCall(MatViewFromOptions(pcis->pA_II, NULL, "-mat_view"));
5899     PetscCall(KSPSetOperators(pcbddc->ksp_D, pcis->A_II, pcis->pA_II));
5900     /* Allow user's customization */
5901     if (opts) PetscCall(KSPSetFromOptions(pcbddc->ksp_D));
5902     PetscCall(MatGetNearNullSpace(pcis->pA_II, &nnsp));
5903     if (pcbddc->NullSpace_corr[0] && !nnsp) { /* approximate solver, propagate NearNullSpace */
5904       PetscCall(MatNullSpacePropagateAny_Private(pcbddc->local_mat, pcis->is_I_local, pcis->pA_II));
5905     }
5906     PetscCall(MatGetNearNullSpace(pcis->pA_II, &nnsp));
5907     PetscCall(KSPGetPC(pcbddc->ksp_D, &pc_temp));
5908     PetscCall(PetscObjectHasFunction((PetscObject)pc_temp, "PCSetCoordinates_C", &f));
5909     if (f && pcbddc->mat_graph->cloc && !nnsp) {
5910       PetscReal      *coords = pcbddc->mat_graph->coords, *scoords;
5911       const PetscInt *idxs;
5912       PetscInt        cdim = pcbddc->mat_graph->cdim, nl, i, d;
5913 
5914       PetscCall(ISGetLocalSize(pcis->is_I_local, &nl));
5915       PetscCall(ISGetIndices(pcis->is_I_local, &idxs));
5916       PetscCall(PetscMalloc1(nl * cdim, &scoords));
5917       for (i = 0; i < nl; i++) {
5918         for (d = 0; d < cdim; d++) scoords[i * cdim + d] = coords[idxs[i] * cdim + d];
5919       }
5920       PetscCall(ISRestoreIndices(pcis->is_I_local, &idxs));
5921       PetscCall(PCSetCoordinates(pc_temp, cdim, nl, scoords));
5922       PetscCall(PetscFree(scoords));
5923     }
5924     if (sub_schurs && sub_schurs->reuse_solver) {
5925       PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
5926 
5927       PetscCall(KSPSetPC(pcbddc->ksp_D, reuse_solver->interior_solver));
5928     }
5929 
5930     /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
5931     if (!n_D) {
5932       PetscCall(KSPGetPC(pcbddc->ksp_D, &pc_temp));
5933       PetscCall(PCSetType(pc_temp, PCNONE));
5934     }
5935     PetscCall(KSPSetUp(pcbddc->ksp_D));
5936     /* set ksp_D into pcis data */
5937     PetscCall(PetscObjectReference((PetscObject)pcbddc->ksp_D));
5938     PetscCall(KSPDestroy(&pcis->ksp_D));
5939     pcis->ksp_D = pcbddc->ksp_D;
5940   }
5941 
5942   /* NEUMANN PROBLEM */
5943   A_RR = NULL;
5944   if (neumann) {
5945     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
5946     PetscInt        ibs, mbs;
5947     PetscBool       issbaij, reuse_neumann_solver, isset, issym;
5948     Mat_IS         *matis = (Mat_IS *)pc->pmat->data;
5949 
5950     reuse_neumann_solver = PETSC_FALSE;
5951     if (sub_schurs && sub_schurs->reuse_solver) {
5952       IS iP;
5953 
5954       reuse_neumann_solver = PETSC_TRUE;
5955       PetscCall(PetscObjectQuery((PetscObject)sub_schurs->A, "__KSPFETIDP_iP", (PetscObject *)&iP));
5956       if (iP) reuse_neumann_solver = PETSC_FALSE;
5957     }
5958     /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */
5959     PetscCall(ISGetSize(pcbddc->is_R_local, &n_R));
5960     if (pcbddc->ksp_R) { /* already created ksp */
5961       PetscInt nn_R;
5962       PetscCall(KSPGetOperators(pcbddc->ksp_R, NULL, &A_RR));
5963       PetscCall(PetscObjectReference((PetscObject)A_RR));
5964       PetscCall(MatGetSize(A_RR, &nn_R, NULL));
5965       if (nn_R != n_R) { /* old ksp is not reusable, so reset it */
5966         PetscCall(KSPReset(pcbddc->ksp_R));
5967         PetscCall(MatDestroy(&A_RR));
5968         reuse = MAT_INITIAL_MATRIX;
5969       } else {                                /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */
5970         if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */
5971           PetscCall(MatDestroy(&A_RR));
5972           reuse = MAT_INITIAL_MATRIX;
5973         } else { /* safe to reuse the matrix */
5974           reuse = MAT_REUSE_MATRIX;
5975         }
5976       }
5977       /* last check */
5978       if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
5979         PetscCall(MatDestroy(&A_RR));
5980         reuse = MAT_INITIAL_MATRIX;
5981       }
5982     } else { /* first time, so we need to create the matrix */
5983       reuse = MAT_INITIAL_MATRIX;
5984     }
5985     /* convert pcbddc->local_mat if needed later in PCBDDCSetUpCorrection
5986        TODO: Get Rid of these conversions */
5987     PetscCall(MatGetBlockSize(pcbddc->local_mat, &mbs));
5988     PetscCall(ISGetBlockSize(pcbddc->is_R_local, &ibs));
5989     PetscCall(PetscObjectTypeCompare((PetscObject)pcbddc->local_mat, MATSEQSBAIJ, &issbaij));
5990     if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */
5991       if (matis->A == pcbddc->local_mat) {
5992         PetscCall(MatDestroy(&pcbddc->local_mat));
5993         PetscCall(MatConvert(matis->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &pcbddc->local_mat));
5994       } else {
5995         PetscCall(MatConvert(pcbddc->local_mat, MATSEQAIJ, MAT_INPLACE_MATRIX, &pcbddc->local_mat));
5996       }
5997     } else if (issbaij) { /* need to convert to BAIJ to get off-diagonal blocks */
5998       if (matis->A == pcbddc->local_mat) {
5999         PetscCall(MatDestroy(&pcbddc->local_mat));
6000         PetscCall(MatConvert(matis->A, mbs > 1 ? MATSEQBAIJ : MATSEQAIJ, MAT_INITIAL_MATRIX, &pcbddc->local_mat));
6001       } else {
6002         PetscCall(MatConvert(pcbddc->local_mat, mbs > 1 ? MATSEQBAIJ : MATSEQAIJ, MAT_INPLACE_MATRIX, &pcbddc->local_mat));
6003       }
6004     }
6005     /* extract A_RR */
6006     if (reuse_neumann_solver) {
6007       PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
6008 
6009       if (pcbddc->dbg_flag) { /* we need A_RR to test the solver later */
6010         PetscCall(MatDestroy(&A_RR));
6011         if (reuse_solver->benign_n) { /* we are not using the explicit change of basis on the pressures */
6012           PetscCall(PCBDDCBenignProject(pc, pcbddc->is_R_local, pcbddc->is_R_local, &A_RR));
6013         } else {
6014           PetscCall(MatCreateSubMatrix(pcbddc->local_mat, pcbddc->is_R_local, pcbddc->is_R_local, MAT_INITIAL_MATRIX, &A_RR));
6015         }
6016       } else {
6017         PetscCall(MatDestroy(&A_RR));
6018         PetscCall(PCGetOperators(reuse_solver->correction_solver, &A_RR, NULL));
6019         PetscCall(PetscObjectReference((PetscObject)A_RR));
6020       }
6021     } else { /* we have to build the neumann solver, so we need to extract the relevant matrix */
6022       PetscCall(MatCreateSubMatrix(pcbddc->local_mat, pcbddc->is_R_local, pcbddc->is_R_local, reuse, &A_RR));
6023     }
6024     PetscCall(MatIsSymmetricKnown(pcbddc->local_mat, &isset, &issym));
6025     if (isset) PetscCall(MatSetOption(A_RR, MAT_SYMMETRIC, issym));
6026     opts = PETSC_FALSE;
6027     if (!pcbddc->ksp_R) { /* create object if not present */
6028       opts = PETSC_TRUE;
6029       PetscCall(KSPCreate(PETSC_COMM_SELF, &pcbddc->ksp_R));
6030       PetscCall(KSPSetNestLevel(pcbddc->ksp_R, pc->kspnestlevel));
6031       PetscCall(PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R, (PetscObject)pc, 1));
6032       /* default */
6033       PetscCall(KSPSetType(pcbddc->ksp_R, KSPPREONLY));
6034       PetscCall(KSPSetOptionsPrefix(pcbddc->ksp_R, neu_prefix));
6035       PetscCall(KSPGetPC(pcbddc->ksp_R, &pc_temp));
6036       PetscCall(PetscObjectTypeCompare((PetscObject)A_RR, MATSEQSBAIJ, &issbaij));
6037       if (issbaij) {
6038         PetscCall(PCSetType(pc_temp, PCCHOLESKY));
6039       } else {
6040         PetscCall(PCSetType(pc_temp, PCLU));
6041       }
6042       PetscCall(KSPSetErrorIfNotConverged(pcbddc->ksp_R, pc->erroriffailure));
6043     }
6044     PetscCall(MatSetOptionsPrefix(A_RR, ((PetscObject)pcbddc->ksp_R)->prefix));
6045     PetscCall(MatViewFromOptions(A_RR, NULL, "-mat_view"));
6046     PetscCall(KSPSetOperators(pcbddc->ksp_R, A_RR, A_RR));
6047     if (opts) { /* Allow user's customization once */
6048       PetscCall(KSPSetFromOptions(pcbddc->ksp_R));
6049     }
6050     PetscCall(MatGetNearNullSpace(A_RR, &nnsp));
6051     if (pcbddc->NullSpace_corr[2] && !nnsp) { /* approximate solver, propagate NearNullSpace */
6052       PetscCall(MatNullSpacePropagateAny_Private(pcbddc->local_mat, pcbddc->is_R_local, A_RR));
6053     }
6054     PetscCall(MatGetNearNullSpace(A_RR, &nnsp));
6055     PetscCall(KSPGetPC(pcbddc->ksp_R, &pc_temp));
6056     PetscCall(PetscObjectHasFunction((PetscObject)pc_temp, "PCSetCoordinates_C", &f));
6057     if (f && pcbddc->mat_graph->cloc && !nnsp) {
6058       PetscReal      *coords = pcbddc->mat_graph->coords, *scoords;
6059       const PetscInt *idxs;
6060       PetscInt        cdim = pcbddc->mat_graph->cdim, nl, i, d;
6061 
6062       PetscCall(ISGetLocalSize(pcbddc->is_R_local, &nl));
6063       PetscCall(ISGetIndices(pcbddc->is_R_local, &idxs));
6064       PetscCall(PetscMalloc1(nl * cdim, &scoords));
6065       for (i = 0; i < nl; i++) {
6066         for (d = 0; d < cdim; d++) scoords[i * cdim + d] = coords[idxs[i] * cdim + d];
6067       }
6068       PetscCall(ISRestoreIndices(pcbddc->is_R_local, &idxs));
6069       PetscCall(PCSetCoordinates(pc_temp, cdim, nl, scoords));
6070       PetscCall(PetscFree(scoords));
6071     }
6072 
6073     /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
6074     if (!n_R) {
6075       PetscCall(KSPGetPC(pcbddc->ksp_R, &pc_temp));
6076       PetscCall(PCSetType(pc_temp, PCNONE));
6077     }
6078     /* Reuse solver if it is present */
6079     if (reuse_neumann_solver) {
6080       PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
6081 
6082       PetscCall(KSPSetPC(pcbddc->ksp_R, reuse_solver->correction_solver));
6083     }
6084     PetscCall(KSPSetUp(pcbddc->ksp_R));
6085   }
6086 
6087   if (pcbddc->dbg_flag) {
6088     PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
6089     PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer));
6090     PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n"));
6091   }
6092   PetscCall(PetscLogEventEnd(PC_BDDC_LocalSolvers[pcbddc->current_level], pc, 0, 0, 0));
6093 
6094   /* adapt Dirichlet and Neumann solvers if a nullspace correction has been requested */
6095   if (pcbddc->NullSpace_corr[0]) PetscCall(PCBDDCSetUseExactDirichlet(pc, PETSC_FALSE));
6096   if (dirichlet && pcbddc->NullSpace_corr[0] && !pcbddc->switch_static) PetscCall(PCBDDCNullSpaceAssembleCorrection(pc, PETSC_TRUE, pcbddc->NullSpace_corr[1]));
6097   if (neumann && pcbddc->NullSpace_corr[2]) PetscCall(PCBDDCNullSpaceAssembleCorrection(pc, PETSC_FALSE, pcbddc->NullSpace_corr[3]));
6098   /* check Dirichlet and Neumann solvers */
6099   if (pcbddc->dbg_flag) {
6100     if (dirichlet) { /* Dirichlet */
6101       PetscCall(VecSetRandom(pcis->vec1_D, NULL));
6102       PetscCall(MatMult(pcis->A_II, pcis->vec1_D, pcis->vec2_D));
6103       PetscCall(KSPSolve(pcbddc->ksp_D, pcis->vec2_D, pcis->vec2_D));
6104       PetscCall(KSPCheckSolve(pcbddc->ksp_D, pc, pcis->vec2_D));
6105       PetscCall(VecAXPY(pcis->vec1_D, m_one, pcis->vec2_D));
6106       PetscCall(VecNorm(pcis->vec1_D, NORM_INFINITY, &value));
6107       PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n", PetscGlobalRank, ((PetscObject)pcbddc->ksp_D)->prefix, (double)value));
6108       PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
6109     }
6110     if (neumann) { /* Neumann */
6111       PetscCall(VecSetRandom(pcbddc->vec1_R, NULL));
6112       PetscCall(MatMult(A_RR, pcbddc->vec1_R, pcbddc->vec2_R));
6113       PetscCall(KSPSolve(pcbddc->ksp_R, pcbddc->vec2_R, pcbddc->vec2_R));
6114       PetscCall(KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec2_R));
6115       PetscCall(VecAXPY(pcbddc->vec1_R, m_one, pcbddc->vec2_R));
6116       PetscCall(VecNorm(pcbddc->vec1_R, NORM_INFINITY, &value));
6117       PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n", PetscGlobalRank, ((PetscObject)pcbddc->ksp_R)->prefix, (double)value));
6118       PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
6119     }
6120   }
6121   /* free Neumann problem's matrix */
6122   PetscCall(MatDestroy(&A_RR));
6123   PetscFunctionReturn(PETSC_SUCCESS);
6124 }
6125 
6126 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose)
6127 {
6128   PC_BDDC        *pcbddc       = (PC_BDDC *)pc->data;
6129   PCBDDCSubSchurs sub_schurs   = pcbddc->sub_schurs;
6130   PetscBool       reuse_solver = sub_schurs ? (sub_schurs->reuse_solver ? PETSC_TRUE : PETSC_FALSE) : PETSC_FALSE;
6131 
6132   PetscFunctionBegin;
6133   if (!reuse_solver) PetscCall(VecSet(pcbddc->vec1_R, 0.));
6134   if (!pcbddc->switch_static) {
6135     if (applytranspose && pcbddc->local_auxmat1) {
6136       PetscCall(MatMultTranspose(pcbddc->local_auxmat2, inout_B, pcbddc->vec1_C));
6137       PetscCall(MatMultTransposeAdd(pcbddc->local_auxmat1, pcbddc->vec1_C, inout_B, inout_B));
6138     }
6139     if (!reuse_solver) {
6140       PetscCall(VecScatterBegin(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE));
6141       PetscCall(VecScatterEnd(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE));
6142     } else {
6143       PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
6144 
6145       PetscCall(VecScatterBegin(reuse_solver->correction_scatter_B, inout_B, reuse_solver->rhs_B, INSERT_VALUES, SCATTER_FORWARD));
6146       PetscCall(VecScatterEnd(reuse_solver->correction_scatter_B, inout_B, reuse_solver->rhs_B, INSERT_VALUES, SCATTER_FORWARD));
6147     }
6148   } else {
6149     PetscCall(VecScatterBegin(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE));
6150     PetscCall(VecScatterEnd(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE));
6151     PetscCall(VecScatterBegin(pcbddc->R_to_D, inout_D, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE));
6152     PetscCall(VecScatterEnd(pcbddc->R_to_D, inout_D, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE));
6153     if (applytranspose && pcbddc->local_auxmat1) {
6154       PetscCall(MatMultTranspose(pcbddc->local_auxmat2, pcbddc->vec1_R, pcbddc->vec1_C));
6155       PetscCall(MatMultTransposeAdd(pcbddc->local_auxmat1, pcbddc->vec1_C, inout_B, inout_B));
6156       PetscCall(VecScatterBegin(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE));
6157       PetscCall(VecScatterEnd(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE));
6158     }
6159   }
6160   PetscCall(PetscLogEventBegin(PC_BDDC_Solves[pcbddc->current_level][1], pc, 0, 0, 0));
6161   if (!reuse_solver || pcbddc->switch_static) {
6162     if (applytranspose) {
6163       PetscCall(KSPSolveTranspose(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec1_R));
6164     } else {
6165       PetscCall(KSPSolve(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec1_R));
6166     }
6167     PetscCall(KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec1_R));
6168   } else {
6169     PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
6170 
6171     if (applytranspose) {
6172       PetscCall(MatFactorSolveSchurComplementTranspose(reuse_solver->F, reuse_solver->rhs_B, reuse_solver->sol_B));
6173     } else {
6174       PetscCall(MatFactorSolveSchurComplement(reuse_solver->F, reuse_solver->rhs_B, reuse_solver->sol_B));
6175     }
6176   }
6177   PetscCall(PetscLogEventEnd(PC_BDDC_Solves[pcbddc->current_level][1], pc, 0, 0, 0));
6178   PetscCall(VecSet(inout_B, 0.));
6179   if (!pcbddc->switch_static) {
6180     if (!reuse_solver) {
6181       PetscCall(VecScatterBegin(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD));
6182       PetscCall(VecScatterEnd(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD));
6183     } else {
6184       PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
6185 
6186       PetscCall(VecScatterBegin(reuse_solver->correction_scatter_B, reuse_solver->sol_B, inout_B, INSERT_VALUES, SCATTER_REVERSE));
6187       PetscCall(VecScatterEnd(reuse_solver->correction_scatter_B, reuse_solver->sol_B, inout_B, INSERT_VALUES, SCATTER_REVERSE));
6188     }
6189     if (!applytranspose && pcbddc->local_auxmat1) {
6190       PetscCall(MatMult(pcbddc->local_auxmat1, inout_B, pcbddc->vec1_C));
6191       PetscCall(MatMultAdd(pcbddc->local_auxmat2, pcbddc->vec1_C, inout_B, inout_B));
6192     }
6193   } else {
6194     PetscCall(VecScatterBegin(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD));
6195     PetscCall(VecScatterEnd(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD));
6196     PetscCall(VecScatterBegin(pcbddc->R_to_D, pcbddc->vec1_R, inout_D, INSERT_VALUES, SCATTER_FORWARD));
6197     PetscCall(VecScatterEnd(pcbddc->R_to_D, pcbddc->vec1_R, inout_D, INSERT_VALUES, SCATTER_FORWARD));
6198     if (!applytranspose && pcbddc->local_auxmat1) {
6199       PetscCall(MatMult(pcbddc->local_auxmat1, inout_B, pcbddc->vec1_C));
6200       PetscCall(MatMultAdd(pcbddc->local_auxmat2, pcbddc->vec1_C, pcbddc->vec1_R, pcbddc->vec1_R));
6201     }
6202     PetscCall(VecScatterBegin(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD));
6203     PetscCall(VecScatterEnd(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD));
6204     PetscCall(VecScatterBegin(pcbddc->R_to_D, pcbddc->vec1_R, inout_D, INSERT_VALUES, SCATTER_FORWARD));
6205     PetscCall(VecScatterEnd(pcbddc->R_to_D, pcbddc->vec1_R, inout_D, INSERT_VALUES, SCATTER_FORWARD));
6206   }
6207   PetscFunctionReturn(PETSC_SUCCESS);
6208 }
6209 
6210 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */
6211 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose)
6212 {
6213   PC_BDDC          *pcbddc = (PC_BDDC *)pc->data;
6214   PC_IS            *pcis   = (PC_IS *)pc->data;
6215   const PetscScalar zero   = 0.0;
6216 
6217   PetscFunctionBegin;
6218   /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */
6219   if (!pcbddc->benign_apply_coarse_only) {
6220     if (applytranspose) {
6221       PetscCall(MatMultTranspose(pcbddc->coarse_phi_B, pcis->vec1_B, pcbddc->vec1_P));
6222       if (pcbddc->switch_static) PetscCall(MatMultTransposeAdd(pcbddc->coarse_phi_D, pcis->vec1_D, pcbddc->vec1_P, pcbddc->vec1_P));
6223     } else {
6224       PetscCall(MatMultTranspose(pcbddc->coarse_psi_B, pcis->vec1_B, pcbddc->vec1_P));
6225       if (pcbddc->switch_static) PetscCall(MatMultTransposeAdd(pcbddc->coarse_psi_D, pcis->vec1_D, pcbddc->vec1_P, pcbddc->vec1_P));
6226     }
6227   } else {
6228     PetscCall(VecSet(pcbddc->vec1_P, zero));
6229   }
6230 
6231   /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */
6232   if (pcbddc->benign_n) {
6233     PetscScalar *array;
6234     PetscInt     j;
6235 
6236     PetscCall(VecGetArray(pcbddc->vec1_P, &array));
6237     for (j = 0; j < pcbddc->benign_n; j++) array[pcbddc->local_primal_size - pcbddc->benign_n + j] += pcbddc->benign_p0[j];
6238     PetscCall(VecRestoreArray(pcbddc->vec1_P, &array));
6239   }
6240 
6241   /* start communications from local primal nodes to rhs of coarse solver */
6242   PetscCall(VecSet(pcbddc->coarse_vec, zero));
6243   PetscCall(PCBDDCScatterCoarseDataBegin(pc, ADD_VALUES, SCATTER_FORWARD));
6244   PetscCall(PCBDDCScatterCoarseDataEnd(pc, ADD_VALUES, SCATTER_FORWARD));
6245 
6246   /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */
6247   PetscCall(PetscLogEventBegin(PC_BDDC_Solves[pcbddc->current_level][2], pc, 0, 0, 0));
6248   if (pcbddc->coarse_ksp) {
6249     Mat          coarse_mat;
6250     Vec          rhs, sol;
6251     MatNullSpace nullsp;
6252     PetscBool    isbddc = PETSC_FALSE;
6253 
6254     if (pcbddc->benign_have_null) {
6255       PC coarse_pc;
6256 
6257       PetscCall(KSPGetPC(pcbddc->coarse_ksp, &coarse_pc));
6258       PetscCall(PetscObjectTypeCompare((PetscObject)coarse_pc, PCBDDC, &isbddc));
6259       /* we need to propagate to coarser levels the need for a possible benign correction */
6260       if (isbddc && pcbddc->benign_apply_coarse_only && !pcbddc->benign_skip_correction) {
6261         PC_BDDC *coarsepcbddc                  = (PC_BDDC *)coarse_pc->data;
6262         coarsepcbddc->benign_skip_correction   = PETSC_FALSE;
6263         coarsepcbddc->benign_apply_coarse_only = PETSC_TRUE;
6264       }
6265     }
6266     PetscCall(KSPGetRhs(pcbddc->coarse_ksp, &rhs));
6267     PetscCall(KSPGetSolution(pcbddc->coarse_ksp, &sol));
6268     PetscCall(KSPGetOperators(pcbddc->coarse_ksp, &coarse_mat, NULL));
6269     if (applytranspose) {
6270       PetscCheck(!pcbddc->benign_apply_coarse_only, PetscObjectComm((PetscObject)pcbddc->coarse_ksp), PETSC_ERR_SUP, "Not yet implemented");
6271       PetscCall(KSPSolveTranspose(pcbddc->coarse_ksp, rhs, sol));
6272       PetscCall(KSPCheckSolve(pcbddc->coarse_ksp, pc, sol));
6273       PetscCall(MatGetTransposeNullSpace(coarse_mat, &nullsp));
6274       if (nullsp) PetscCall(MatNullSpaceRemove(nullsp, sol));
6275     } else {
6276       PetscCall(MatGetNullSpace(coarse_mat, &nullsp));
6277       if (pcbddc->benign_apply_coarse_only && isbddc) { /* need just to apply the coarse preconditioner during presolve */
6278         PC coarse_pc;
6279 
6280         if (nullsp) PetscCall(MatNullSpaceRemove(nullsp, rhs));
6281         PetscCall(KSPGetPC(pcbddc->coarse_ksp, &coarse_pc));
6282         PetscCall(PCPreSolve(coarse_pc, pcbddc->coarse_ksp));
6283         PetscCall(PCBDDCBenignRemoveInterior(coarse_pc, rhs, sol));
6284         PetscCall(PCPostSolve(coarse_pc, pcbddc->coarse_ksp));
6285       } else {
6286         PetscCall(KSPSolve(pcbddc->coarse_ksp, rhs, sol));
6287         PetscCall(KSPCheckSolve(pcbddc->coarse_ksp, pc, sol));
6288         if (nullsp) PetscCall(MatNullSpaceRemove(nullsp, sol));
6289       }
6290     }
6291     /* we don't need the benign correction at coarser levels anymore */
6292     if (pcbddc->benign_have_null && isbddc) {
6293       PC       coarse_pc;
6294       PC_BDDC *coarsepcbddc;
6295 
6296       PetscCall(KSPGetPC(pcbddc->coarse_ksp, &coarse_pc));
6297       coarsepcbddc                           = (PC_BDDC *)coarse_pc->data;
6298       coarsepcbddc->benign_skip_correction   = PETSC_TRUE;
6299       coarsepcbddc->benign_apply_coarse_only = PETSC_FALSE;
6300     }
6301   }
6302   PetscCall(PetscLogEventEnd(PC_BDDC_Solves[pcbddc->current_level][2], pc, 0, 0, 0));
6303 
6304   /* Local solution on R nodes */
6305   if (!pcbddc->benign_apply_coarse_only) PetscCall(PCBDDCSolveSubstructureCorrection(pc, pcis->vec1_B, pcis->vec1_D, applytranspose));
6306   /* communications from coarse sol to local primal nodes */
6307   PetscCall(PCBDDCScatterCoarseDataBegin(pc, INSERT_VALUES, SCATTER_REVERSE));
6308   PetscCall(PCBDDCScatterCoarseDataEnd(pc, INSERT_VALUES, SCATTER_REVERSE));
6309 
6310   /* Sum contributions from the two levels */
6311   if (!pcbddc->benign_apply_coarse_only) {
6312     if (applytranspose) {
6313       PetscCall(MatMultAdd(pcbddc->coarse_psi_B, pcbddc->vec1_P, pcis->vec1_B, pcis->vec1_B));
6314       if (pcbddc->switch_static) PetscCall(MatMultAdd(pcbddc->coarse_psi_D, pcbddc->vec1_P, pcis->vec1_D, pcis->vec1_D));
6315     } else {
6316       PetscCall(MatMultAdd(pcbddc->coarse_phi_B, pcbddc->vec1_P, pcis->vec1_B, pcis->vec1_B));
6317       if (pcbddc->switch_static) PetscCall(MatMultAdd(pcbddc->coarse_phi_D, pcbddc->vec1_P, pcis->vec1_D, pcis->vec1_D));
6318     }
6319     /* store p0 */
6320     if (pcbddc->benign_n) {
6321       PetscScalar *array;
6322       PetscInt     j;
6323 
6324       PetscCall(VecGetArray(pcbddc->vec1_P, &array));
6325       for (j = 0; j < pcbddc->benign_n; j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size - pcbddc->benign_n + j];
6326       PetscCall(VecRestoreArray(pcbddc->vec1_P, &array));
6327     }
6328   } else { /* expand the coarse solution */
6329     if (applytranspose) {
6330       PetscCall(MatMult(pcbddc->coarse_psi_B, pcbddc->vec1_P, pcis->vec1_B));
6331     } else {
6332       PetscCall(MatMult(pcbddc->coarse_phi_B, pcbddc->vec1_P, pcis->vec1_B));
6333     }
6334   }
6335   PetscFunctionReturn(PETSC_SUCCESS);
6336 }
6337 
6338 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc, InsertMode imode, ScatterMode smode)
6339 {
6340   PC_BDDC           *pcbddc = (PC_BDDC *)pc->data;
6341   Vec                from, to;
6342   const PetscScalar *array;
6343 
6344   PetscFunctionBegin;
6345   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
6346     from = pcbddc->coarse_vec;
6347     to   = pcbddc->vec1_P;
6348     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
6349       Vec tvec;
6350 
6351       PetscCall(KSPGetRhs(pcbddc->coarse_ksp, &tvec));
6352       PetscCall(VecResetArray(tvec));
6353       PetscCall(KSPGetSolution(pcbddc->coarse_ksp, &tvec));
6354       PetscCall(VecGetArrayRead(tvec, &array));
6355       PetscCall(VecPlaceArray(from, array));
6356       PetscCall(VecRestoreArrayRead(tvec, &array));
6357     }
6358   } else { /* from local to global -> put data in coarse right-hand side */
6359     from = pcbddc->vec1_P;
6360     to   = pcbddc->coarse_vec;
6361   }
6362   PetscCall(VecScatterBegin(pcbddc->coarse_loc_to_glob, from, to, imode, smode));
6363   PetscFunctionReturn(PETSC_SUCCESS);
6364 }
6365 
6366 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode)
6367 {
6368   PC_BDDC           *pcbddc = (PC_BDDC *)pc->data;
6369   Vec                from, to;
6370   const PetscScalar *array;
6371 
6372   PetscFunctionBegin;
6373   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
6374     from = pcbddc->coarse_vec;
6375     to   = pcbddc->vec1_P;
6376   } else { /* from local to global -> put data in coarse right-hand side */
6377     from = pcbddc->vec1_P;
6378     to   = pcbddc->coarse_vec;
6379   }
6380   PetscCall(VecScatterEnd(pcbddc->coarse_loc_to_glob, from, to, imode, smode));
6381   if (smode == SCATTER_FORWARD) {
6382     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
6383       Vec tvec;
6384 
6385       PetscCall(KSPGetRhs(pcbddc->coarse_ksp, &tvec));
6386       PetscCall(VecGetArrayRead(to, &array));
6387       PetscCall(VecPlaceArray(tvec, array));
6388       PetscCall(VecRestoreArrayRead(to, &array));
6389     }
6390   } else {
6391     if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */
6392       PetscCall(VecResetArray(from));
6393     }
6394   }
6395   PetscFunctionReturn(PETSC_SUCCESS);
6396 }
6397 
6398 PetscErrorCode PCBDDCConstraintsSetUp(PC pc)
6399 {
6400   PC_IS   *pcis   = (PC_IS *)pc->data;
6401   PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
6402   Mat_IS  *matis  = (Mat_IS *)pc->pmat->data;
6403   /* one and zero */
6404   PetscScalar one = 1.0, zero = 0.0;
6405   /* space to store constraints and their local indices */
6406   PetscScalar *constraints_data;
6407   PetscInt    *constraints_idxs, *constraints_idxs_B;
6408   PetscInt    *constraints_idxs_ptr, *constraints_data_ptr;
6409   PetscInt    *constraints_n;
6410   /* iterators */
6411   PetscInt i, j, k, total_counts, total_counts_cc, cum;
6412   /* BLAS integers */
6413   PetscBLASInt lwork, lierr;
6414   PetscBLASInt Blas_N, Blas_M, Blas_K, Blas_one = 1;
6415   PetscBLASInt Blas_LDA, Blas_LDB, Blas_LDC;
6416   /* reuse */
6417   PetscInt  olocal_primal_size, olocal_primal_size_cc;
6418   PetscInt *olocal_primal_ref_node, *olocal_primal_ref_mult;
6419   /* change of basis */
6420   PetscBool qr_needed;
6421   PetscBT   change_basis, qr_needed_idx;
6422   /* auxiliary stuff */
6423   PetscInt *nnz, *is_indices;
6424   PetscInt  ncc;
6425   /* some quantities */
6426   PetscInt  n_vertices, total_primal_vertices, valid_constraints;
6427   PetscInt  size_of_constraint, max_size_of_constraint = 0, max_constraints, temp_constraints;
6428   PetscReal tol; /* tolerance for retaining eigenmodes */
6429 
6430   PetscFunctionBegin;
6431   tol = PetscSqrtReal(PETSC_SMALL);
6432   /* Destroy Mat objects computed previously */
6433   PetscCall(MatDestroy(&pcbddc->ChangeOfBasisMatrix));
6434   PetscCall(MatDestroy(&pcbddc->ConstraintMatrix));
6435   PetscCall(MatDestroy(&pcbddc->switch_static_change));
6436   /* save info on constraints from previous setup (if any) */
6437   olocal_primal_size    = pcbddc->local_primal_size;
6438   olocal_primal_size_cc = pcbddc->local_primal_size_cc;
6439   PetscCall(PetscMalloc2(olocal_primal_size_cc, &olocal_primal_ref_node, olocal_primal_size_cc, &olocal_primal_ref_mult));
6440   PetscCall(PetscArraycpy(olocal_primal_ref_node, pcbddc->local_primal_ref_node, olocal_primal_size_cc));
6441   PetscCall(PetscArraycpy(olocal_primal_ref_mult, pcbddc->local_primal_ref_mult, olocal_primal_size_cc));
6442   PetscCall(PetscFree2(pcbddc->local_primal_ref_node, pcbddc->local_primal_ref_mult));
6443   PetscCall(PetscFree(pcbddc->primal_indices_local_idxs));
6444 
6445   if (!pcbddc->adaptive_selection) {
6446     IS           ISForVertices, *ISForFaces, *ISForEdges;
6447     MatNullSpace nearnullsp;
6448     const Vec   *nearnullvecs;
6449     Vec         *localnearnullsp;
6450     PetscScalar *array;
6451     PetscInt     n_ISForFaces, n_ISForEdges, nnsp_size, o_nf, o_ne;
6452     PetscBool    nnsp_has_cnst;
6453     /* LAPACK working arrays for SVD or POD */
6454     PetscBool    skip_lapack, boolforchange;
6455     PetscScalar *work;
6456     PetscReal   *singular_vals;
6457 #if defined(PETSC_USE_COMPLEX)
6458     PetscReal *rwork;
6459 #endif
6460     PetscScalar *temp_basis = NULL, *correlation_mat = NULL;
6461     PetscBLASInt dummy_int    = 1;
6462     PetscScalar  dummy_scalar = 1.;
6463     PetscBool    use_pod      = PETSC_FALSE;
6464 
6465     /* MKL SVD with same input gives different results on different processes! */
6466 #if defined(PETSC_MISSING_LAPACK_GESVD) || defined(PETSC_HAVE_MKL_LIBS)
6467     use_pod = PETSC_TRUE;
6468 #endif
6469     /* Get index sets for faces, edges and vertices from graph */
6470     PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, &n_ISForFaces, &ISForFaces, &n_ISForEdges, &ISForEdges, &ISForVertices));
6471     o_nf       = n_ISForFaces;
6472     o_ne       = n_ISForEdges;
6473     n_vertices = 0;
6474     if (ISForVertices) PetscCall(ISGetSize(ISForVertices, &n_vertices));
6475     /* print some info */
6476     if (pcbddc->dbg_flag && (!pcbddc->sub_schurs || pcbddc->sub_schurs_rebuild)) {
6477       if (!pcbddc->dbg_viewer) pcbddc->dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pc));
6478       PetscCall(PCBDDCGraphASCIIView(pcbddc->mat_graph, pcbddc->dbg_flag, pcbddc->dbg_viewer));
6479       PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer));
6480       PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "--------------------------------------------------------------\n"));
6481       PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate vertices (%d)\n", PetscGlobalRank, n_vertices, pcbddc->use_vertices));
6482       PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate edges    (%d)\n", PetscGlobalRank, n_ISForEdges, pcbddc->use_edges));
6483       PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate faces    (%d)\n", PetscGlobalRank, n_ISForFaces, pcbddc->use_faces));
6484       PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
6485       PetscCall(PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer));
6486     }
6487 
6488     if (!pcbddc->use_vertices) n_vertices = 0;
6489     if (!pcbddc->use_edges) n_ISForEdges = 0;
6490     if (!pcbddc->use_faces) n_ISForFaces = 0;
6491 
6492     /* check if near null space is attached to global mat */
6493     if (pcbddc->use_nnsp) {
6494       PetscCall(MatGetNearNullSpace(pc->pmat, &nearnullsp));
6495     } else nearnullsp = NULL;
6496 
6497     if (nearnullsp) {
6498       PetscCall(MatNullSpaceGetVecs(nearnullsp, &nnsp_has_cnst, &nnsp_size, &nearnullvecs));
6499       /* remove any stored info */
6500       PetscCall(MatNullSpaceDestroy(&pcbddc->onearnullspace));
6501       PetscCall(PetscFree(pcbddc->onearnullvecs_state));
6502       /* store information for BDDC solver reuse */
6503       PetscCall(PetscObjectReference((PetscObject)nearnullsp));
6504       pcbddc->onearnullspace = nearnullsp;
6505       PetscCall(PetscMalloc1(nnsp_size, &pcbddc->onearnullvecs_state));
6506       for (i = 0; i < nnsp_size; i++) PetscCall(PetscObjectStateGet((PetscObject)nearnullvecs[i], &pcbddc->onearnullvecs_state[i]));
6507     } else { /* if near null space is not provided BDDC uses constants by default */
6508       nnsp_size     = 0;
6509       nnsp_has_cnst = PETSC_TRUE;
6510     }
6511     /* get max number of constraints on a single cc */
6512     max_constraints = nnsp_size;
6513     if (nnsp_has_cnst) max_constraints++;
6514 
6515     /*
6516          Evaluate maximum storage size needed by the procedure
6517          - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]"
6518          - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]"
6519          There can be multiple constraints per connected component
6520                                                                                                                                                            */
6521     ncc = n_vertices + n_ISForFaces + n_ISForEdges;
6522     PetscCall(PetscMalloc3(ncc + 1, &constraints_idxs_ptr, ncc + 1, &constraints_data_ptr, ncc, &constraints_n));
6523 
6524     total_counts = n_ISForFaces + n_ISForEdges;
6525     total_counts *= max_constraints;
6526     total_counts += n_vertices;
6527     PetscCall(PetscBTCreate(total_counts, &change_basis));
6528 
6529     total_counts           = 0;
6530     max_size_of_constraint = 0;
6531     for (i = 0; i < n_ISForEdges + n_ISForFaces; i++) {
6532       IS used_is;
6533       if (i < n_ISForEdges) {
6534         used_is = ISForEdges[i];
6535       } else {
6536         used_is = ISForFaces[i - n_ISForEdges];
6537       }
6538       PetscCall(ISGetSize(used_is, &j));
6539       total_counts += j;
6540       max_size_of_constraint = PetscMax(j, max_size_of_constraint);
6541     }
6542     PetscCall(PetscMalloc3(total_counts * max_constraints + n_vertices, &constraints_data, total_counts + n_vertices, &constraints_idxs, total_counts + n_vertices, &constraints_idxs_B));
6543 
6544     /* get local part of global near null space vectors */
6545     PetscCall(PetscMalloc1(nnsp_size, &localnearnullsp));
6546     for (k = 0; k < nnsp_size; k++) {
6547       PetscCall(VecDuplicate(pcis->vec1_N, &localnearnullsp[k]));
6548       PetscCall(VecScatterBegin(matis->rctx, nearnullvecs[k], localnearnullsp[k], INSERT_VALUES, SCATTER_FORWARD));
6549       PetscCall(VecScatterEnd(matis->rctx, nearnullvecs[k], localnearnullsp[k], INSERT_VALUES, SCATTER_FORWARD));
6550     }
6551 
6552     /* whether or not to skip lapack calls */
6553     skip_lapack = PETSC_TRUE;
6554     if (n_ISForFaces + n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE;
6555 
6556     /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */
6557     if (!skip_lapack) {
6558       PetscScalar temp_work;
6559 
6560       if (use_pod) {
6561         /* Proper Orthogonal Decomposition (POD) using the snapshot method */
6562         PetscCall(PetscMalloc1(max_constraints * max_constraints, &correlation_mat));
6563         PetscCall(PetscMalloc1(max_constraints, &singular_vals));
6564         PetscCall(PetscMalloc1(max_size_of_constraint * max_constraints, &temp_basis));
6565 #if defined(PETSC_USE_COMPLEX)
6566         PetscCall(PetscMalloc1(3 * max_constraints, &rwork));
6567 #endif
6568         /* now we evaluate the optimal workspace using query with lwork=-1 */
6569         PetscCall(PetscBLASIntCast(max_constraints, &Blas_N));
6570         PetscCall(PetscBLASIntCast(max_constraints, &Blas_LDA));
6571         lwork = -1;
6572         PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
6573 #if !defined(PETSC_USE_COMPLEX)
6574         PetscCallBLAS("LAPACKsyev", LAPACKsyev_("V", "U", &Blas_N, correlation_mat, &Blas_LDA, singular_vals, &temp_work, &lwork, &lierr));
6575 #else
6576         PetscCallBLAS("LAPACKsyev", LAPACKsyev_("V", "U", &Blas_N, correlation_mat, &Blas_LDA, singular_vals, &temp_work, &lwork, rwork, &lierr));
6577 #endif
6578         PetscCall(PetscFPTrapPop());
6579         PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in query to SYEV Lapack routine %d", (int)lierr);
6580       } else {
6581 #if !defined(PETSC_MISSING_LAPACK_GESVD)
6582         /* SVD */
6583         PetscInt max_n, min_n;
6584         max_n = max_size_of_constraint;
6585         min_n = max_constraints;
6586         if (max_size_of_constraint < max_constraints) {
6587           min_n = max_size_of_constraint;
6588           max_n = max_constraints;
6589         }
6590         PetscCall(PetscMalloc1(min_n, &singular_vals));
6591   #if defined(PETSC_USE_COMPLEX)
6592         PetscCall(PetscMalloc1(5 * min_n, &rwork));
6593   #endif
6594         /* now we evaluate the optimal workspace using query with lwork=-1 */
6595         lwork = -1;
6596         PetscCall(PetscBLASIntCast(max_n, &Blas_M));
6597         PetscCall(PetscBLASIntCast(min_n, &Blas_N));
6598         PetscCall(PetscBLASIntCast(max_n, &Blas_LDA));
6599         PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
6600   #if !defined(PETSC_USE_COMPLEX)
6601         PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("O", "N", &Blas_M, &Blas_N, &constraints_data[0], &Blas_LDA, singular_vals, &dummy_scalar, &dummy_int, &dummy_scalar, &dummy_int, &temp_work, &lwork, &lierr));
6602   #else
6603         PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("O", "N", &Blas_M, &Blas_N, &constraints_data[0], &Blas_LDA, singular_vals, &dummy_scalar, &dummy_int, &dummy_scalar, &dummy_int, &temp_work, &lwork, rwork, &lierr));
6604   #endif
6605         PetscCall(PetscFPTrapPop());
6606         PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in query to GESVD Lapack routine %d", (int)lierr);
6607 #else
6608         SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "This should not happen");
6609 #endif /* on missing GESVD */
6610       }
6611       /* Allocate optimal workspace */
6612       PetscCall(PetscBLASIntCast((PetscInt)PetscRealPart(temp_work), &lwork));
6613       PetscCall(PetscMalloc1(lwork, &work));
6614     }
6615     /* Now we can loop on constraining sets */
6616     total_counts            = 0;
6617     constraints_idxs_ptr[0] = 0;
6618     constraints_data_ptr[0] = 0;
6619     /* vertices */
6620     if (n_vertices) {
6621       PetscCall(ISGetIndices(ISForVertices, (const PetscInt **)&is_indices));
6622       PetscCall(PetscArraycpy(constraints_idxs, is_indices, n_vertices));
6623       for (i = 0; i < n_vertices; i++) {
6624         constraints_n[total_counts]            = 1;
6625         constraints_data[total_counts]         = 1.0;
6626         constraints_idxs_ptr[total_counts + 1] = constraints_idxs_ptr[total_counts] + 1;
6627         constraints_data_ptr[total_counts + 1] = constraints_data_ptr[total_counts] + 1;
6628         total_counts++;
6629       }
6630       PetscCall(ISRestoreIndices(ISForVertices, (const PetscInt **)&is_indices));
6631     }
6632 
6633     /* edges and faces */
6634     total_counts_cc = total_counts;
6635     for (ncc = 0; ncc < n_ISForEdges + n_ISForFaces; ncc++) {
6636       IS        used_is;
6637       PetscBool idxs_copied = PETSC_FALSE;
6638 
6639       if (ncc < n_ISForEdges) {
6640         used_is       = ISForEdges[ncc];
6641         boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */
6642       } else {
6643         used_is       = ISForFaces[ncc - n_ISForEdges];
6644         boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */
6645       }
6646       temp_constraints = 0; /* zero the number of constraints I have on this conn comp */
6647 
6648       PetscCall(ISGetSize(used_is, &size_of_constraint));
6649       if (!size_of_constraint) continue;
6650       PetscCall(ISGetIndices(used_is, (const PetscInt **)&is_indices));
6651       if (nnsp_has_cnst) {
6652         PetscScalar quad_value;
6653 
6654         PetscCall(PetscArraycpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc], is_indices, size_of_constraint));
6655         idxs_copied = PETSC_TRUE;
6656 
6657         if (!pcbddc->use_nnsp_true) {
6658           quad_value = (PetscScalar)(1.0 / PetscSqrtReal((PetscReal)size_of_constraint));
6659         } else {
6660           quad_value = 1.0;
6661         }
6662         for (j = 0; j < size_of_constraint; j++) constraints_data[constraints_data_ptr[total_counts_cc] + j] = quad_value;
6663         temp_constraints++;
6664         total_counts++;
6665       }
6666       for (k = 0; k < nnsp_size; k++) {
6667         PetscReal    real_value;
6668         PetscScalar *ptr_to_data;
6669 
6670         PetscCall(VecGetArrayRead(localnearnullsp[k], (const PetscScalar **)&array));
6671         ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc] + temp_constraints * size_of_constraint];
6672         for (j = 0; j < size_of_constraint; j++) ptr_to_data[j] = array[is_indices[j]];
6673         PetscCall(VecRestoreArrayRead(localnearnullsp[k], (const PetscScalar **)&array));
6674         /* check if array is null on the connected component */
6675         PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_N));
6676         PetscCallBLAS("BLASasum", real_value = BLASasum_(&Blas_N, ptr_to_data, &Blas_one));
6677         if (real_value > tol * size_of_constraint) { /* keep indices and values */
6678           temp_constraints++;
6679           total_counts++;
6680           if (!idxs_copied) {
6681             PetscCall(PetscArraycpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc], is_indices, size_of_constraint));
6682             idxs_copied = PETSC_TRUE;
6683           }
6684         }
6685       }
6686       PetscCall(ISRestoreIndices(used_is, (const PetscInt **)&is_indices));
6687       valid_constraints = temp_constraints;
6688       if (!pcbddc->use_nnsp_true && temp_constraints) {
6689         if (temp_constraints == 1) { /* just normalize the constraint */
6690           PetscScalar norm, *ptr_to_data;
6691 
6692           ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]];
6693           PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_N));
6694           PetscCallBLAS("BLASdot", norm = BLASdot_(&Blas_N, ptr_to_data, &Blas_one, ptr_to_data, &Blas_one));
6695           norm = 1.0 / PetscSqrtReal(PetscRealPart(norm));
6696           PetscCallBLAS("BLASscal", BLASscal_(&Blas_N, &norm, ptr_to_data, &Blas_one));
6697         } else { /* perform SVD */
6698           PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]];
6699 
6700           if (use_pod) {
6701             /* SVD: Y = U*S*V^H                -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag
6702                POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2)
6703                -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined
6704                   the constraints basis will differ (by a complex factor with absolute value equal to 1)
6705                   from that computed using LAPACKgesvd
6706                -> This is due to a different computation of eigenvectors in LAPACKheev
6707                -> The quality of the POD-computed basis will be the same */
6708             PetscCall(PetscArrayzero(correlation_mat, temp_constraints * temp_constraints));
6709             /* Store upper triangular part of correlation matrix */
6710             PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_N));
6711             PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
6712             for (j = 0; j < temp_constraints; j++) {
6713               for (k = 0; k < j + 1; k++) PetscCallBLAS("BLASdot", correlation_mat[j * temp_constraints + k] = BLASdot_(&Blas_N, ptr_to_data + k * size_of_constraint, &Blas_one, ptr_to_data + j * size_of_constraint, &Blas_one));
6714             }
6715             /* compute eigenvalues and eigenvectors of correlation matrix */
6716             PetscCall(PetscBLASIntCast(temp_constraints, &Blas_N));
6717             PetscCall(PetscBLASIntCast(temp_constraints, &Blas_LDA));
6718 #if !defined(PETSC_USE_COMPLEX)
6719             PetscCallBLAS("LAPACKsyev", LAPACKsyev_("V", "U", &Blas_N, correlation_mat, &Blas_LDA, singular_vals, work, &lwork, &lierr));
6720 #else
6721             PetscCallBLAS("LAPACKsyev", LAPACKsyev_("V", "U", &Blas_N, correlation_mat, &Blas_LDA, singular_vals, work, &lwork, rwork, &lierr));
6722 #endif
6723             PetscCall(PetscFPTrapPop());
6724             PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYEV Lapack routine %d", (int)lierr);
6725             /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */
6726             j = 0;
6727             while (j < temp_constraints && singular_vals[j] / singular_vals[temp_constraints - 1] < tol) j++;
6728             total_counts      = total_counts - j;
6729             valid_constraints = temp_constraints - j;
6730             /* scale and copy POD basis into used quadrature memory */
6731             PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_M));
6732             PetscCall(PetscBLASIntCast(temp_constraints, &Blas_N));
6733             PetscCall(PetscBLASIntCast(temp_constraints, &Blas_K));
6734             PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA));
6735             PetscCall(PetscBLASIntCast(temp_constraints, &Blas_LDB));
6736             PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDC));
6737             if (j < temp_constraints) {
6738               PetscInt ii;
6739               for (k = j; k < temp_constraints; k++) singular_vals[k] = 1.0 / PetscSqrtReal(singular_vals[k]);
6740               PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
6741               PetscCallBLAS("BLASgemm", BLASgemm_("N", "N", &Blas_M, &Blas_N, &Blas_K, &one, ptr_to_data, &Blas_LDA, correlation_mat, &Blas_LDB, &zero, temp_basis, &Blas_LDC));
6742               PetscCall(PetscFPTrapPop());
6743               for (k = 0; k < temp_constraints - j; k++) {
6744                 for (ii = 0; ii < size_of_constraint; ii++) ptr_to_data[k * size_of_constraint + ii] = singular_vals[temp_constraints - 1 - k] * temp_basis[(temp_constraints - 1 - k) * size_of_constraint + ii];
6745               }
6746             }
6747           } else {
6748 #if !defined(PETSC_MISSING_LAPACK_GESVD)
6749             PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_M));
6750             PetscCall(PetscBLASIntCast(temp_constraints, &Blas_N));
6751             PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA));
6752             PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
6753   #if !defined(PETSC_USE_COMPLEX)
6754             PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("O", "N", &Blas_M, &Blas_N, ptr_to_data, &Blas_LDA, singular_vals, &dummy_scalar, &dummy_int, &dummy_scalar, &dummy_int, work, &lwork, &lierr));
6755   #else
6756             PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("O", "N", &Blas_M, &Blas_N, ptr_to_data, &Blas_LDA, singular_vals, &dummy_scalar, &dummy_int, &dummy_scalar, &dummy_int, work, &lwork, rwork, &lierr));
6757   #endif
6758             PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in GESVD Lapack routine %d", (int)lierr);
6759             PetscCall(PetscFPTrapPop());
6760             /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */
6761             k = temp_constraints;
6762             if (k > size_of_constraint) k = size_of_constraint;
6763             j = 0;
6764             while (j < k && singular_vals[k - j - 1] / singular_vals[0] < tol) j++;
6765             valid_constraints = k - j;
6766             total_counts      = total_counts - temp_constraints + valid_constraints;
6767 #else
6768             SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "This should not happen");
6769 #endif /* on missing GESVD */
6770           }
6771         }
6772       }
6773       /* update pointers information */
6774       if (valid_constraints) {
6775         constraints_n[total_counts_cc]            = valid_constraints;
6776         constraints_idxs_ptr[total_counts_cc + 1] = constraints_idxs_ptr[total_counts_cc] + size_of_constraint;
6777         constraints_data_ptr[total_counts_cc + 1] = constraints_data_ptr[total_counts_cc] + size_of_constraint * valid_constraints;
6778         /* set change_of_basis flag */
6779         if (boolforchange) PetscCall(PetscBTSet(change_basis, total_counts_cc));
6780         total_counts_cc++;
6781       }
6782     }
6783     /* free workspace */
6784     if (!skip_lapack) {
6785       PetscCall(PetscFree(work));
6786 #if defined(PETSC_USE_COMPLEX)
6787       PetscCall(PetscFree(rwork));
6788 #endif
6789       PetscCall(PetscFree(singular_vals));
6790       PetscCall(PetscFree(correlation_mat));
6791       PetscCall(PetscFree(temp_basis));
6792     }
6793     for (k = 0; k < nnsp_size; k++) PetscCall(VecDestroy(&localnearnullsp[k]));
6794     PetscCall(PetscFree(localnearnullsp));
6795     /* free index sets of faces, edges and vertices */
6796     PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, &o_nf, &ISForFaces, &o_ne, &ISForEdges, &ISForVertices));
6797   } else {
6798     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
6799 
6800     total_counts = 0;
6801     n_vertices   = 0;
6802     if (sub_schurs->is_vertices && pcbddc->use_vertices) PetscCall(ISGetLocalSize(sub_schurs->is_vertices, &n_vertices));
6803     max_constraints = 0;
6804     total_counts_cc = 0;
6805     for (i = 0; i < sub_schurs->n_subs + n_vertices; i++) {
6806       total_counts += pcbddc->adaptive_constraints_n[i];
6807       if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++;
6808       max_constraints = PetscMax(max_constraints, pcbddc->adaptive_constraints_n[i]);
6809     }
6810     constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr;
6811     constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr;
6812     constraints_idxs     = pcbddc->adaptive_constraints_idxs;
6813     constraints_data     = pcbddc->adaptive_constraints_data;
6814     /* constraints_n differs from pcbddc->adaptive_constraints_n */
6815     PetscCall(PetscMalloc1(total_counts_cc, &constraints_n));
6816     total_counts_cc = 0;
6817     for (i = 0; i < sub_schurs->n_subs + n_vertices; i++) {
6818       if (pcbddc->adaptive_constraints_n[i]) constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i];
6819     }
6820 
6821     max_size_of_constraint = 0;
6822     for (i = 0; i < total_counts_cc; i++) max_size_of_constraint = PetscMax(max_size_of_constraint, constraints_idxs_ptr[i + 1] - constraints_idxs_ptr[i]);
6823     PetscCall(PetscMalloc1(constraints_idxs_ptr[total_counts_cc], &constraints_idxs_B));
6824     /* Change of basis */
6825     PetscCall(PetscBTCreate(total_counts_cc, &change_basis));
6826     if (pcbddc->use_change_of_basis) {
6827       for (i = 0; i < sub_schurs->n_subs; i++) {
6828         if (PetscBTLookup(sub_schurs->is_edge, i) || pcbddc->use_change_on_faces) PetscCall(PetscBTSet(change_basis, i + n_vertices));
6829       }
6830     }
6831   }
6832   pcbddc->local_primal_size = total_counts;
6833   PetscCall(PetscMalloc1(pcbddc->local_primal_size + pcbddc->benign_n, &pcbddc->primal_indices_local_idxs));
6834 
6835   /* map constraints_idxs in boundary numbering */
6836   if (pcbddc->use_change_of_basis) {
6837     PetscCall(ISGlobalToLocalMappingApply(pcis->BtoNmap, IS_GTOLM_DROP, constraints_idxs_ptr[total_counts_cc], constraints_idxs, &i, constraints_idxs_B));
6838     PetscCheck(i == constraints_idxs_ptr[total_counts_cc], PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error in boundary numbering for constraints indices %" PetscInt_FMT " != %" PetscInt_FMT, constraints_idxs_ptr[total_counts_cc], i);
6839   }
6840 
6841   /* Create constraint matrix */
6842   PetscCall(MatCreate(PETSC_COMM_SELF, &pcbddc->ConstraintMatrix));
6843   PetscCall(MatSetType(pcbddc->ConstraintMatrix, MATAIJ));
6844   PetscCall(MatSetSizes(pcbddc->ConstraintMatrix, pcbddc->local_primal_size, pcis->n, pcbddc->local_primal_size, pcis->n));
6845 
6846   /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */
6847   /* determine if a QR strategy is needed for change of basis */
6848   qr_needed = pcbddc->use_qr_single;
6849   PetscCall(PetscBTCreate(total_counts_cc, &qr_needed_idx));
6850   total_primal_vertices        = 0;
6851   pcbddc->local_primal_size_cc = 0;
6852   for (i = 0; i < total_counts_cc; i++) {
6853     size_of_constraint = constraints_idxs_ptr[i + 1] - constraints_idxs_ptr[i];
6854     if (size_of_constraint == 1 && pcbddc->mat_graph->custom_minimal_size) {
6855       pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]];
6856       pcbddc->local_primal_size_cc += 1;
6857     } else if (PetscBTLookup(change_basis, i)) {
6858       for (k = 0; k < constraints_n[i]; k++) pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i] + k];
6859       pcbddc->local_primal_size_cc += constraints_n[i];
6860       if (constraints_n[i] > 1 || pcbddc->use_qr_single) {
6861         PetscCall(PetscBTSet(qr_needed_idx, i));
6862         qr_needed = PETSC_TRUE;
6863       }
6864     } else {
6865       pcbddc->local_primal_size_cc += 1;
6866     }
6867   }
6868   /* note that the local variable n_vertices used below stores the number of pointwise constraints */
6869   pcbddc->n_vertices = total_primal_vertices;
6870   /* permute indices in order to have a sorted set of vertices */
6871   PetscCall(PetscSortInt(total_primal_vertices, pcbddc->primal_indices_local_idxs));
6872   PetscCall(PetscMalloc2(pcbddc->local_primal_size_cc + pcbddc->benign_n, &pcbddc->local_primal_ref_node, pcbddc->local_primal_size_cc + pcbddc->benign_n, &pcbddc->local_primal_ref_mult));
6873   PetscCall(PetscArraycpy(pcbddc->local_primal_ref_node, pcbddc->primal_indices_local_idxs, total_primal_vertices));
6874   for (i = 0; i < total_primal_vertices; i++) pcbddc->local_primal_ref_mult[i] = 1;
6875 
6876   /* nonzero structure of constraint matrix */
6877   /* and get reference dof for local constraints */
6878   PetscCall(PetscMalloc1(pcbddc->local_primal_size, &nnz));
6879   for (i = 0; i < total_primal_vertices; i++) nnz[i] = 1;
6880 
6881   j            = total_primal_vertices;
6882   total_counts = total_primal_vertices;
6883   cum          = total_primal_vertices;
6884   for (i = n_vertices; i < total_counts_cc; i++) {
6885     if (!PetscBTLookup(change_basis, i)) {
6886       pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]];
6887       pcbddc->local_primal_ref_mult[cum] = constraints_n[i];
6888       cum++;
6889       size_of_constraint = constraints_idxs_ptr[i + 1] - constraints_idxs_ptr[i];
6890       for (k = 0; k < constraints_n[i]; k++) {
6891         pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i] + k];
6892         nnz[j + k]                                        = size_of_constraint;
6893       }
6894       j += constraints_n[i];
6895     }
6896   }
6897   PetscCall(MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix, 0, nnz));
6898   PetscCall(MatSetOption(pcbddc->ConstraintMatrix, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE));
6899   PetscCall(PetscFree(nnz));
6900 
6901   /* set values in constraint matrix */
6902   for (i = 0; i < total_primal_vertices; i++) PetscCall(MatSetValue(pcbddc->ConstraintMatrix, i, pcbddc->local_primal_ref_node[i], 1.0, INSERT_VALUES));
6903   total_counts = total_primal_vertices;
6904   for (i = n_vertices; i < total_counts_cc; i++) {
6905     if (!PetscBTLookup(change_basis, i)) {
6906       PetscInt *cols;
6907 
6908       size_of_constraint = constraints_idxs_ptr[i + 1] - constraints_idxs_ptr[i];
6909       cols               = constraints_idxs + constraints_idxs_ptr[i];
6910       for (k = 0; k < constraints_n[i]; k++) {
6911         PetscInt     row = total_counts + k;
6912         PetscScalar *vals;
6913 
6914         vals = constraints_data + constraints_data_ptr[i] + k * size_of_constraint;
6915         PetscCall(MatSetValues(pcbddc->ConstraintMatrix, 1, &row, size_of_constraint, cols, vals, INSERT_VALUES));
6916       }
6917       total_counts += constraints_n[i];
6918     }
6919   }
6920   /* assembling */
6921   PetscCall(MatAssemblyBegin(pcbddc->ConstraintMatrix, MAT_FINAL_ASSEMBLY));
6922   PetscCall(MatAssemblyEnd(pcbddc->ConstraintMatrix, MAT_FINAL_ASSEMBLY));
6923   PetscCall(MatViewFromOptions(pcbddc->ConstraintMatrix, (PetscObject)pc, "-pc_bddc_constraint_mat_view"));
6924 
6925   /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */
6926   if (pcbddc->use_change_of_basis) {
6927     /* dual and primal dofs on a single cc */
6928     PetscInt dual_dofs, primal_dofs;
6929     /* working stuff for GEQRF */
6930     PetscScalar *qr_basis = NULL, *qr_tau = NULL, *qr_work = NULL, lqr_work_t;
6931     PetscBLASInt lqr_work;
6932     /* working stuff for UNGQR */
6933     PetscScalar *gqr_work = NULL, lgqr_work_t = 0.0;
6934     PetscBLASInt lgqr_work;
6935     /* working stuff for TRTRS */
6936     PetscScalar *trs_rhs = NULL;
6937     PetscBLASInt Blas_NRHS;
6938     /* pointers for values insertion into change of basis matrix */
6939     PetscInt    *start_rows, *start_cols;
6940     PetscScalar *start_vals;
6941     /* working stuff for values insertion */
6942     PetscBT   is_primal;
6943     PetscInt *aux_primal_numbering_B;
6944     /* matrix sizes */
6945     PetscInt global_size, local_size;
6946     /* temporary change of basis */
6947     Mat localChangeOfBasisMatrix;
6948     /* extra space for debugging */
6949     PetscScalar *dbg_work = NULL;
6950 
6951     PetscCall(MatCreate(PETSC_COMM_SELF, &localChangeOfBasisMatrix));
6952     PetscCall(MatSetType(localChangeOfBasisMatrix, MATAIJ));
6953     PetscCall(MatSetSizes(localChangeOfBasisMatrix, pcis->n, pcis->n, pcis->n, pcis->n));
6954     /* nonzeros for local mat */
6955     PetscCall(PetscMalloc1(pcis->n, &nnz));
6956     if (!pcbddc->benign_change || pcbddc->fake_change) {
6957       for (i = 0; i < pcis->n; i++) nnz[i] = 1;
6958     } else {
6959       const PetscInt *ii;
6960       PetscInt        n;
6961       PetscBool       flg_row;
6962       PetscCall(MatGetRowIJ(pcbddc->benign_change, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, NULL, &flg_row));
6963       for (i = 0; i < n; i++) nnz[i] = ii[i + 1] - ii[i];
6964       PetscCall(MatRestoreRowIJ(pcbddc->benign_change, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, NULL, &flg_row));
6965     }
6966     for (i = n_vertices; i < total_counts_cc; i++) {
6967       if (PetscBTLookup(change_basis, i)) {
6968         size_of_constraint = constraints_idxs_ptr[i + 1] - constraints_idxs_ptr[i];
6969         if (PetscBTLookup(qr_needed_idx, i)) {
6970           for (j = 0; j < size_of_constraint; j++) nnz[constraints_idxs[constraints_idxs_ptr[i] + j]] = size_of_constraint;
6971         } else {
6972           nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint;
6973           for (j = 1; j < size_of_constraint; j++) nnz[constraints_idxs[constraints_idxs_ptr[i] + j]] = 2;
6974         }
6975       }
6976     }
6977     PetscCall(MatSeqAIJSetPreallocation(localChangeOfBasisMatrix, 0, nnz));
6978     PetscCall(MatSetOption(localChangeOfBasisMatrix, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE));
6979     PetscCall(PetscFree(nnz));
6980     /* Set interior change in the matrix */
6981     if (!pcbddc->benign_change || pcbddc->fake_change) {
6982       for (i = 0; i < pcis->n; i++) PetscCall(MatSetValue(localChangeOfBasisMatrix, i, i, 1.0, INSERT_VALUES));
6983     } else {
6984       const PetscInt *ii, *jj;
6985       PetscScalar    *aa;
6986       PetscInt        n;
6987       PetscBool       flg_row;
6988       PetscCall(MatGetRowIJ(pcbddc->benign_change, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, &jj, &flg_row));
6989       PetscCall(MatSeqAIJGetArray(pcbddc->benign_change, &aa));
6990       for (i = 0; i < n; i++) PetscCall(MatSetValues(localChangeOfBasisMatrix, 1, &i, ii[i + 1] - ii[i], jj + ii[i], aa + ii[i], INSERT_VALUES));
6991       PetscCall(MatSeqAIJRestoreArray(pcbddc->benign_change, &aa));
6992       PetscCall(MatRestoreRowIJ(pcbddc->benign_change, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, &jj, &flg_row));
6993     }
6994 
6995     if (pcbddc->dbg_flag) {
6996       PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "--------------------------------------------------------------\n"));
6997       PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Checking change of basis computation for subdomain %04d\n", PetscGlobalRank));
6998     }
6999 
7000     /* Now we loop on the constraints which need a change of basis */
7001     /*
7002        Change of basis matrix is evaluated similarly to the FIRST APPROACH in
7003        Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1)
7004 
7005        Basic blocks of change of basis matrix T computed:
7006 
7007           - By using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified)
7008 
7009             | 1        0   ...        0         s_1/S |
7010             | 0        1   ...        0         s_2/S |
7011             |              ...                        |
7012             | 0        ...            1     s_{n-1}/S |
7013             | -s_1/s_n ...    -s_{n-1}/s_n      s_n/S |
7014 
7015             with S = \sum_{i=1}^n s_i^2
7016             NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering
7017                   in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering
7018 
7019           - QR decomposition of constraints otherwise
7020     */
7021     if (qr_needed && max_size_of_constraint) {
7022       /* space to store Q */
7023       PetscCall(PetscMalloc1(max_size_of_constraint * max_size_of_constraint, &qr_basis));
7024       /* array to store scaling factors for reflectors */
7025       PetscCall(PetscMalloc1(max_constraints, &qr_tau));
7026       /* first we issue queries for optimal work */
7027       PetscCall(PetscBLASIntCast(max_size_of_constraint, &Blas_M));
7028       PetscCall(PetscBLASIntCast(max_constraints, &Blas_N));
7029       PetscCall(PetscBLASIntCast(max_size_of_constraint, &Blas_LDA));
7030       lqr_work = -1;
7031       PetscCallBLAS("LAPACKgeqrf", LAPACKgeqrf_(&Blas_M, &Blas_N, qr_basis, &Blas_LDA, qr_tau, &lqr_work_t, &lqr_work, &lierr));
7032       PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in query to GEQRF Lapack routine %d", (int)lierr);
7033       PetscCall(PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t), &lqr_work));
7034       PetscCall(PetscMalloc1(lqr_work, &qr_work));
7035       lgqr_work = -1;
7036       PetscCall(PetscBLASIntCast(max_size_of_constraint, &Blas_M));
7037       PetscCall(PetscBLASIntCast(max_size_of_constraint, &Blas_N));
7038       PetscCall(PetscBLASIntCast(max_constraints, &Blas_K));
7039       PetscCall(PetscBLASIntCast(max_size_of_constraint, &Blas_LDA));
7040       if (Blas_K > Blas_M) Blas_K = Blas_M; /* adjust just for computing optimal work */
7041       PetscCallBLAS("LAPACKorgqr", LAPACKorgqr_(&Blas_M, &Blas_N, &Blas_K, qr_basis, &Blas_LDA, qr_tau, &lgqr_work_t, &lgqr_work, &lierr));
7042       PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in query to ORGQR/UNGQR Lapack routine %d", (int)lierr);
7043       PetscCall(PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t), &lgqr_work));
7044       PetscCall(PetscMalloc1(lgqr_work, &gqr_work));
7045       /* array to store rhs and solution of triangular solver */
7046       PetscCall(PetscMalloc1(max_constraints * max_constraints, &trs_rhs));
7047       /* allocating workspace for check */
7048       if (pcbddc->dbg_flag) PetscCall(PetscMalloc1(max_size_of_constraint * (max_constraints + max_size_of_constraint), &dbg_work));
7049     }
7050     /* array to store whether a node is primal or not */
7051     PetscCall(PetscBTCreate(pcis->n_B, &is_primal));
7052     PetscCall(PetscMalloc1(total_primal_vertices, &aux_primal_numbering_B));
7053     PetscCall(ISGlobalToLocalMappingApply(pcis->BtoNmap, IS_GTOLM_DROP, total_primal_vertices, pcbddc->local_primal_ref_node, &i, aux_primal_numbering_B));
7054     PetscCheck(i == total_primal_vertices, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error in boundary numbering for BDDC vertices! %" PetscInt_FMT " != %" PetscInt_FMT, total_primal_vertices, i);
7055     for (i = 0; i < total_primal_vertices; i++) PetscCall(PetscBTSet(is_primal, aux_primal_numbering_B[i]));
7056     PetscCall(PetscFree(aux_primal_numbering_B));
7057 
7058     /* loop on constraints and see whether or not they need a change of basis and compute it */
7059     for (total_counts = n_vertices; total_counts < total_counts_cc; total_counts++) {
7060       size_of_constraint = constraints_idxs_ptr[total_counts + 1] - constraints_idxs_ptr[total_counts];
7061       if (PetscBTLookup(change_basis, total_counts)) {
7062         /* get constraint info */
7063         primal_dofs = constraints_n[total_counts];
7064         dual_dofs   = size_of_constraint - primal_dofs;
7065 
7066         if (pcbddc->dbg_flag) PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Constraints %" PetscInt_FMT ": %" PetscInt_FMT " need a change of basis (size %" PetscInt_FMT ")\n", total_counts, primal_dofs, size_of_constraint));
7067 
7068         if (PetscBTLookup(qr_needed_idx, total_counts)) { /* QR */
7069 
7070           /* copy quadrature constraints for change of basis check */
7071           if (pcbddc->dbg_flag) PetscCall(PetscArraycpy(dbg_work, &constraints_data[constraints_data_ptr[total_counts]], size_of_constraint * primal_dofs));
7072           /* copy temporary constraints into larger work vector (in order to store all columns of Q) */
7073           PetscCall(PetscArraycpy(qr_basis, &constraints_data[constraints_data_ptr[total_counts]], size_of_constraint * primal_dofs));
7074 
7075           /* compute QR decomposition of constraints */
7076           PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_M));
7077           PetscCall(PetscBLASIntCast(primal_dofs, &Blas_N));
7078           PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA));
7079           PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
7080           PetscCallBLAS("LAPACKgeqrf", LAPACKgeqrf_(&Blas_M, &Blas_N, qr_basis, &Blas_LDA, qr_tau, qr_work, &lqr_work, &lierr));
7081           PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in GEQRF Lapack routine %d", (int)lierr);
7082           PetscCall(PetscFPTrapPop());
7083 
7084           /* explicitly compute R^-T */
7085           PetscCall(PetscArrayzero(trs_rhs, primal_dofs * primal_dofs));
7086           for (j = 0; j < primal_dofs; j++) trs_rhs[j * (primal_dofs + 1)] = 1.0;
7087           PetscCall(PetscBLASIntCast(primal_dofs, &Blas_N));
7088           PetscCall(PetscBLASIntCast(primal_dofs, &Blas_NRHS));
7089           PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA));
7090           PetscCall(PetscBLASIntCast(primal_dofs, &Blas_LDB));
7091           PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
7092           PetscCallBLAS("LAPACKtrtrs", LAPACKtrtrs_("U", "T", "N", &Blas_N, &Blas_NRHS, qr_basis, &Blas_LDA, trs_rhs, &Blas_LDB, &lierr));
7093           PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in TRTRS Lapack routine %d", (int)lierr);
7094           PetscCall(PetscFPTrapPop());
7095 
7096           /* explicitly compute all columns of Q (Q = [Q1 | Q2]) overwriting QR factorization in qr_basis */
7097           PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_M));
7098           PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_N));
7099           PetscCall(PetscBLASIntCast(primal_dofs, &Blas_K));
7100           PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA));
7101           PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
7102           PetscCallBLAS("LAPACKorgqr", LAPACKorgqr_(&Blas_M, &Blas_N, &Blas_K, qr_basis, &Blas_LDA, qr_tau, gqr_work, &lgqr_work, &lierr));
7103           PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in ORGQR/UNGQR Lapack routine %d", (int)lierr);
7104           PetscCall(PetscFPTrapPop());
7105 
7106           /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints
7107              i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below)
7108              where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */
7109           PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_M));
7110           PetscCall(PetscBLASIntCast(primal_dofs, &Blas_N));
7111           PetscCall(PetscBLASIntCast(primal_dofs, &Blas_K));
7112           PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA));
7113           PetscCall(PetscBLASIntCast(primal_dofs, &Blas_LDB));
7114           PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDC));
7115           PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
7116           PetscCallBLAS("BLASgemm", BLASgemm_("N", "N", &Blas_M, &Blas_N, &Blas_K, &one, qr_basis, &Blas_LDA, trs_rhs, &Blas_LDB, &zero, constraints_data + constraints_data_ptr[total_counts], &Blas_LDC));
7117           PetscCall(PetscFPTrapPop());
7118           PetscCall(PetscArraycpy(qr_basis, &constraints_data[constraints_data_ptr[total_counts]], size_of_constraint * primal_dofs));
7119 
7120           /* insert values in change of basis matrix respecting global ordering of new primal dofs */
7121           start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]];
7122           /* insert cols for primal dofs */
7123           for (j = 0; j < primal_dofs; j++) {
7124             start_vals = &qr_basis[j * size_of_constraint];
7125             start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts] + j];
7126             PetscCall(MatSetValues(localChangeOfBasisMatrix, size_of_constraint, start_rows, 1, start_cols, start_vals, INSERT_VALUES));
7127           }
7128           /* insert cols for dual dofs */
7129           for (j = 0, k = 0; j < dual_dofs; k++) {
7130             if (!PetscBTLookup(is_primal, constraints_idxs_B[constraints_idxs_ptr[total_counts] + k])) {
7131               start_vals = &qr_basis[(primal_dofs + j) * size_of_constraint];
7132               start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts] + k];
7133               PetscCall(MatSetValues(localChangeOfBasisMatrix, size_of_constraint, start_rows, 1, start_cols, start_vals, INSERT_VALUES));
7134               j++;
7135             }
7136           }
7137 
7138           /* check change of basis */
7139           if (pcbddc->dbg_flag) {
7140             PetscInt  ii, jj;
7141             PetscBool valid_qr = PETSC_TRUE;
7142             PetscCall(PetscBLASIntCast(primal_dofs, &Blas_M));
7143             PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_N));
7144             PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_K));
7145             PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA));
7146             PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDB));
7147             PetscCall(PetscBLASIntCast(primal_dofs, &Blas_LDC));
7148             PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
7149             PetscCallBLAS("BLASgemm", BLASgemm_("T", "N", &Blas_M, &Blas_N, &Blas_K, &one, dbg_work, &Blas_LDA, qr_basis, &Blas_LDB, &zero, &dbg_work[size_of_constraint * primal_dofs], &Blas_LDC));
7150             PetscCall(PetscFPTrapPop());
7151             for (jj = 0; jj < size_of_constraint; jj++) {
7152               for (ii = 0; ii < primal_dofs; ii++) {
7153                 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii]) > 1.e-12) valid_qr = PETSC_FALSE;
7154                 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii] - (PetscReal)1) > 1.e-12) valid_qr = PETSC_FALSE;
7155               }
7156             }
7157             if (!valid_qr) {
7158               PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "\t-> wrong change of basis!\n"));
7159               for (jj = 0; jj < size_of_constraint; jj++) {
7160                 for (ii = 0; ii < primal_dofs; ii++) {
7161                   if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii]) > 1.e-12) {
7162                     PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "\tQr basis function %" PetscInt_FMT " is not orthogonal to constraint %" PetscInt_FMT " (%1.14e)!\n", jj, ii, (double)PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii])));
7163                   }
7164                   if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii] - (PetscReal)1) > 1.e-12) {
7165                     PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "\tQr basis function %" PetscInt_FMT " is not unitary w.r.t constraint %" PetscInt_FMT " (%1.14e)!\n", jj, ii, (double)PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii])));
7166                   }
7167                 }
7168               }
7169             } else {
7170               PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "\t-> right change of basis!\n"));
7171             }
7172           }
7173         } else { /* simple transformation block */
7174           PetscInt    row, col;
7175           PetscScalar val, norm;
7176 
7177           PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_N));
7178           PetscCallBLAS("BLASdot", norm = BLASdot_(&Blas_N, constraints_data + constraints_data_ptr[total_counts], &Blas_one, constraints_data + constraints_data_ptr[total_counts], &Blas_one));
7179           for (j = 0; j < size_of_constraint; j++) {
7180             PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts] + j];
7181             row            = constraints_idxs[constraints_idxs_ptr[total_counts] + j];
7182             if (!PetscBTLookup(is_primal, row_B)) {
7183               col = constraints_idxs[constraints_idxs_ptr[total_counts]];
7184               PetscCall(MatSetValue(localChangeOfBasisMatrix, row, row, 1.0, INSERT_VALUES));
7185               PetscCall(MatSetValue(localChangeOfBasisMatrix, row, col, constraints_data[constraints_data_ptr[total_counts] + j] / norm, INSERT_VALUES));
7186             } else {
7187               for (k = 0; k < size_of_constraint; k++) {
7188                 col = constraints_idxs[constraints_idxs_ptr[total_counts] + k];
7189                 if (row != col) {
7190                   val = -constraints_data[constraints_data_ptr[total_counts] + k] / constraints_data[constraints_data_ptr[total_counts]];
7191                 } else {
7192                   val = constraints_data[constraints_data_ptr[total_counts]] / norm;
7193                 }
7194                 PetscCall(MatSetValue(localChangeOfBasisMatrix, row, col, val, INSERT_VALUES));
7195               }
7196             }
7197           }
7198           if (pcbddc->dbg_flag) PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "\t-> using standard change of basis\n"));
7199         }
7200       } else {
7201         if (pcbddc->dbg_flag) PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Constraint %" PetscInt_FMT " does not need a change of basis (size %" PetscInt_FMT ")\n", total_counts, size_of_constraint));
7202       }
7203     }
7204 
7205     /* free workspace */
7206     if (qr_needed) {
7207       if (pcbddc->dbg_flag) PetscCall(PetscFree(dbg_work));
7208       PetscCall(PetscFree(trs_rhs));
7209       PetscCall(PetscFree(qr_tau));
7210       PetscCall(PetscFree(qr_work));
7211       PetscCall(PetscFree(gqr_work));
7212       PetscCall(PetscFree(qr_basis));
7213     }
7214     PetscCall(PetscBTDestroy(&is_primal));
7215     PetscCall(MatAssemblyBegin(localChangeOfBasisMatrix, MAT_FINAL_ASSEMBLY));
7216     PetscCall(MatAssemblyEnd(localChangeOfBasisMatrix, MAT_FINAL_ASSEMBLY));
7217 
7218     /* assembling of global change of variable */
7219     if (!pcbddc->fake_change) {
7220       Mat      tmat;
7221       PetscInt bs;
7222 
7223       PetscCall(VecGetSize(pcis->vec1_global, &global_size));
7224       PetscCall(VecGetLocalSize(pcis->vec1_global, &local_size));
7225       PetscCall(MatDuplicate(pc->pmat, MAT_DO_NOT_COPY_VALUES, &tmat));
7226       PetscCall(MatISSetLocalMat(tmat, localChangeOfBasisMatrix));
7227       PetscCall(MatAssemblyBegin(tmat, MAT_FINAL_ASSEMBLY));
7228       PetscCall(MatAssemblyEnd(tmat, MAT_FINAL_ASSEMBLY));
7229       PetscCall(MatCreate(PetscObjectComm((PetscObject)pc), &pcbddc->ChangeOfBasisMatrix));
7230       PetscCall(MatSetType(pcbddc->ChangeOfBasisMatrix, MATAIJ));
7231       PetscCall(MatGetBlockSize(pc->pmat, &bs));
7232       PetscCall(MatSetBlockSize(pcbddc->ChangeOfBasisMatrix, bs));
7233       PetscCall(MatSetSizes(pcbddc->ChangeOfBasisMatrix, local_size, local_size, global_size, global_size));
7234       PetscCall(MatISSetMPIXAIJPreallocation_Private(tmat, pcbddc->ChangeOfBasisMatrix, PETSC_TRUE));
7235       PetscCall(MatConvert(tmat, MATAIJ, MAT_REUSE_MATRIX, &pcbddc->ChangeOfBasisMatrix));
7236       PetscCall(MatDestroy(&tmat));
7237       PetscCall(VecSet(pcis->vec1_global, 0.0));
7238       PetscCall(VecSet(pcis->vec1_N, 1.0));
7239       PetscCall(VecScatterBegin(matis->rctx, pcis->vec1_N, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE));
7240       PetscCall(VecScatterEnd(matis->rctx, pcis->vec1_N, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE));
7241       PetscCall(VecReciprocal(pcis->vec1_global));
7242       PetscCall(MatDiagonalScale(pcbddc->ChangeOfBasisMatrix, pcis->vec1_global, NULL));
7243 
7244       /* check */
7245       if (pcbddc->dbg_flag) {
7246         PetscReal error;
7247         Vec       x, x_change;
7248 
7249         PetscCall(VecDuplicate(pcis->vec1_global, &x));
7250         PetscCall(VecDuplicate(pcis->vec1_global, &x_change));
7251         PetscCall(VecSetRandom(x, NULL));
7252         PetscCall(VecCopy(x, pcis->vec1_global));
7253         PetscCall(VecScatterBegin(matis->rctx, x, pcis->vec1_N, INSERT_VALUES, SCATTER_FORWARD));
7254         PetscCall(VecScatterEnd(matis->rctx, x, pcis->vec1_N, INSERT_VALUES, SCATTER_FORWARD));
7255         PetscCall(MatMult(localChangeOfBasisMatrix, pcis->vec1_N, pcis->vec2_N));
7256         PetscCall(VecScatterBegin(matis->rctx, pcis->vec2_N, x, INSERT_VALUES, SCATTER_REVERSE));
7257         PetscCall(VecScatterEnd(matis->rctx, pcis->vec2_N, x, INSERT_VALUES, SCATTER_REVERSE));
7258         PetscCall(MatMult(pcbddc->ChangeOfBasisMatrix, pcis->vec1_global, x_change));
7259         PetscCall(VecAXPY(x, -1.0, x_change));
7260         PetscCall(VecNorm(x, NORM_INFINITY, &error));
7261         PetscCheck(error <= PETSC_SMALL, PetscObjectComm((PetscObject)pc), PETSC_ERR_PLIB, "Error global vs local change on N: %1.6e", (double)error);
7262         PetscCall(VecDestroy(&x));
7263         PetscCall(VecDestroy(&x_change));
7264       }
7265       /* adapt sub_schurs computed (if any) */
7266       if (pcbddc->use_deluxe_scaling) {
7267         PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
7268 
7269         PetscCheck(!pcbddc->use_change_of_basis || !pcbddc->adaptive_userdefined, PetscObjectComm((PetscObject)pc), PETSC_ERR_SUP, "Cannot mix automatic change of basis, adaptive selection and user-defined constraints");
7270         if (sub_schurs && sub_schurs->S_Ej_all) {
7271           Mat S_new, tmat;
7272           IS  is_all_N, is_V_Sall = NULL;
7273 
7274           PetscCall(ISLocalToGlobalMappingApplyIS(pcis->BtoNmap, sub_schurs->is_Ej_all, &is_all_N));
7275           PetscCall(MatCreateSubMatrix(localChangeOfBasisMatrix, is_all_N, is_all_N, MAT_INITIAL_MATRIX, &tmat));
7276           if (pcbddc->deluxe_zerorows) {
7277             ISLocalToGlobalMapping NtoSall;
7278             IS                     is_V;
7279             PetscCall(ISCreateGeneral(PETSC_COMM_SELF, pcbddc->n_vertices, pcbddc->local_primal_ref_node, PETSC_COPY_VALUES, &is_V));
7280             PetscCall(ISLocalToGlobalMappingCreateIS(is_all_N, &NtoSall));
7281             PetscCall(ISGlobalToLocalMappingApplyIS(NtoSall, IS_GTOLM_DROP, is_V, &is_V_Sall));
7282             PetscCall(ISLocalToGlobalMappingDestroy(&NtoSall));
7283             PetscCall(ISDestroy(&is_V));
7284           }
7285           PetscCall(ISDestroy(&is_all_N));
7286           PetscCall(MatPtAP(sub_schurs->S_Ej_all, tmat, MAT_INITIAL_MATRIX, 1.0, &S_new));
7287           PetscCall(MatDestroy(&sub_schurs->S_Ej_all));
7288           PetscCall(PetscObjectReference((PetscObject)S_new));
7289           if (pcbddc->deluxe_zerorows) {
7290             const PetscScalar *array;
7291             const PetscInt    *idxs_V, *idxs_all;
7292             PetscInt           i, n_V;
7293 
7294             PetscCall(MatZeroRowsColumnsIS(S_new, is_V_Sall, 1., NULL, NULL));
7295             PetscCall(ISGetLocalSize(is_V_Sall, &n_V));
7296             PetscCall(ISGetIndices(is_V_Sall, &idxs_V));
7297             PetscCall(ISGetIndices(sub_schurs->is_Ej_all, &idxs_all));
7298             PetscCall(VecGetArrayRead(pcis->D, &array));
7299             for (i = 0; i < n_V; i++) {
7300               PetscScalar val;
7301               PetscInt    idx;
7302 
7303               idx = idxs_V[i];
7304               val = array[idxs_all[idxs_V[i]]];
7305               PetscCall(MatSetValue(S_new, idx, idx, val, INSERT_VALUES));
7306             }
7307             PetscCall(MatAssemblyBegin(S_new, MAT_FINAL_ASSEMBLY));
7308             PetscCall(MatAssemblyEnd(S_new, MAT_FINAL_ASSEMBLY));
7309             PetscCall(VecRestoreArrayRead(pcis->D, &array));
7310             PetscCall(ISRestoreIndices(sub_schurs->is_Ej_all, &idxs_all));
7311             PetscCall(ISRestoreIndices(is_V_Sall, &idxs_V));
7312           }
7313           sub_schurs->S_Ej_all = S_new;
7314           PetscCall(MatDestroy(&S_new));
7315           if (sub_schurs->sum_S_Ej_all) {
7316             PetscCall(MatPtAP(sub_schurs->sum_S_Ej_all, tmat, MAT_INITIAL_MATRIX, 1.0, &S_new));
7317             PetscCall(MatDestroy(&sub_schurs->sum_S_Ej_all));
7318             PetscCall(PetscObjectReference((PetscObject)S_new));
7319             if (pcbddc->deluxe_zerorows) PetscCall(MatZeroRowsColumnsIS(S_new, is_V_Sall, 1., NULL, NULL));
7320             sub_schurs->sum_S_Ej_all = S_new;
7321             PetscCall(MatDestroy(&S_new));
7322           }
7323           PetscCall(ISDestroy(&is_V_Sall));
7324           PetscCall(MatDestroy(&tmat));
7325         }
7326         /* destroy any change of basis context in sub_schurs */
7327         if (sub_schurs && sub_schurs->change) {
7328           PetscInt i;
7329 
7330           for (i = 0; i < sub_schurs->n_subs; i++) PetscCall(KSPDestroy(&sub_schurs->change[i]));
7331           PetscCall(PetscFree(sub_schurs->change));
7332         }
7333       }
7334       if (pcbddc->switch_static) { /* need to save the local change */
7335         pcbddc->switch_static_change = localChangeOfBasisMatrix;
7336       } else {
7337         PetscCall(MatDestroy(&localChangeOfBasisMatrix));
7338       }
7339       /* determine if any process has changed the pressures locally */
7340       pcbddc->change_interior = pcbddc->benign_have_null;
7341     } else { /* fake change (get back change of basis into ConstraintMatrix and info on qr) */
7342       PetscCall(MatDestroy(&pcbddc->ConstraintMatrix));
7343       pcbddc->ConstraintMatrix = localChangeOfBasisMatrix;
7344       pcbddc->use_qr_single    = qr_needed;
7345     }
7346   } else if (pcbddc->user_ChangeOfBasisMatrix || pcbddc->benign_saddle_point) {
7347     if (!pcbddc->benign_have_null && pcbddc->user_ChangeOfBasisMatrix) {
7348       PetscCall(PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix));
7349       pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix;
7350     } else {
7351       Mat benign_global = NULL;
7352       if (pcbddc->benign_have_null) {
7353         Mat M;
7354 
7355         pcbddc->change_interior = PETSC_TRUE;
7356         PetscCall(VecCopy(matis->counter, pcis->vec1_N));
7357         PetscCall(VecReciprocal(pcis->vec1_N));
7358         PetscCall(MatDuplicate(pc->pmat, MAT_DO_NOT_COPY_VALUES, &benign_global));
7359         if (pcbddc->benign_change) {
7360           PetscCall(MatDuplicate(pcbddc->benign_change, MAT_COPY_VALUES, &M));
7361           PetscCall(MatDiagonalScale(M, pcis->vec1_N, NULL));
7362         } else {
7363           PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, pcis->n, pcis->n, 1, NULL, &M));
7364           PetscCall(MatDiagonalSet(M, pcis->vec1_N, INSERT_VALUES));
7365         }
7366         PetscCall(MatISSetLocalMat(benign_global, M));
7367         PetscCall(MatDestroy(&M));
7368         PetscCall(MatAssemblyBegin(benign_global, MAT_FINAL_ASSEMBLY));
7369         PetscCall(MatAssemblyEnd(benign_global, MAT_FINAL_ASSEMBLY));
7370       }
7371       if (pcbddc->user_ChangeOfBasisMatrix) {
7372         PetscCall(MatMatMult(pcbddc->user_ChangeOfBasisMatrix, benign_global, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &pcbddc->ChangeOfBasisMatrix));
7373         PetscCall(MatDestroy(&benign_global));
7374       } else if (pcbddc->benign_have_null) {
7375         pcbddc->ChangeOfBasisMatrix = benign_global;
7376       }
7377     }
7378     if (pcbddc->switch_static && pcbddc->ChangeOfBasisMatrix) { /* need to save the local change */
7379       IS              is_global;
7380       const PetscInt *gidxs;
7381 
7382       PetscCall(ISLocalToGlobalMappingGetIndices(matis->rmapping, &gidxs));
7383       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), pcis->n, gidxs, PETSC_COPY_VALUES, &is_global));
7384       PetscCall(ISLocalToGlobalMappingRestoreIndices(matis->rmapping, &gidxs));
7385       PetscCall(MatCreateSubMatrixUnsorted(pcbddc->ChangeOfBasisMatrix, is_global, is_global, &pcbddc->switch_static_change));
7386       PetscCall(ISDestroy(&is_global));
7387     }
7388   }
7389   if (!pcbddc->fake_change && pcbddc->ChangeOfBasisMatrix && !pcbddc->work_change) PetscCall(VecDuplicate(pcis->vec1_global, &pcbddc->work_change));
7390 
7391   if (!pcbddc->fake_change) {
7392     /* add pressure dofs to set of primal nodes for numbering purposes */
7393     for (i = 0; i < pcbddc->benign_n; i++) {
7394       pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc]  = pcbddc->benign_p0_lidx[i];
7395       pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i];
7396       pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc]  = 1;
7397       pcbddc->local_primal_size_cc++;
7398       pcbddc->local_primal_size++;
7399     }
7400 
7401     /* check if a new primal space has been introduced (also take into account benign trick) */
7402     pcbddc->new_primal_space_local = PETSC_TRUE;
7403     if (olocal_primal_size == pcbddc->local_primal_size) {
7404       PetscCall(PetscArraycmp(pcbddc->local_primal_ref_node, olocal_primal_ref_node, olocal_primal_size_cc, &pcbddc->new_primal_space_local));
7405       pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
7406       if (!pcbddc->new_primal_space_local) {
7407         PetscCall(PetscArraycmp(pcbddc->local_primal_ref_mult, olocal_primal_ref_mult, olocal_primal_size_cc, &pcbddc->new_primal_space_local));
7408         pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
7409       }
7410     }
7411     /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */
7412     PetscCallMPI(MPIU_Allreduce(&pcbddc->new_primal_space_local, &pcbddc->new_primal_space, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)pc)));
7413   }
7414   PetscCall(PetscFree2(olocal_primal_ref_node, olocal_primal_ref_mult));
7415 
7416   /* flush dbg viewer */
7417   if (pcbddc->dbg_flag) PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
7418 
7419   /* free workspace */
7420   PetscCall(PetscBTDestroy(&qr_needed_idx));
7421   PetscCall(PetscBTDestroy(&change_basis));
7422   if (!pcbddc->adaptive_selection) {
7423     PetscCall(PetscFree3(constraints_idxs_ptr, constraints_data_ptr, constraints_n));
7424     PetscCall(PetscFree3(constraints_data, constraints_idxs, constraints_idxs_B));
7425   } else {
7426     PetscCall(PetscFree5(pcbddc->adaptive_constraints_n, pcbddc->adaptive_constraints_idxs_ptr, pcbddc->adaptive_constraints_data_ptr, pcbddc->adaptive_constraints_idxs, pcbddc->adaptive_constraints_data));
7427     PetscCall(PetscFree(constraints_n));
7428     PetscCall(PetscFree(constraints_idxs_B));
7429   }
7430   PetscFunctionReturn(PETSC_SUCCESS);
7431 }
7432 
7433 PetscErrorCode PCBDDCAnalyzeInterface(PC pc)
7434 {
7435   ISLocalToGlobalMapping map;
7436   PC_BDDC               *pcbddc = (PC_BDDC *)pc->data;
7437   Mat_IS                *matis  = (Mat_IS *)pc->pmat->data;
7438   PetscInt               i, N;
7439   PetscBool              rcsr = PETSC_FALSE;
7440 
7441   PetscFunctionBegin;
7442   if (pcbddc->recompute_topography) {
7443     pcbddc->graphanalyzed = PETSC_FALSE;
7444     /* Reset previously computed graph */
7445     PetscCall(PCBDDCGraphReset(pcbddc->mat_graph));
7446     /* Init local Graph struct */
7447     PetscCall(MatGetSize(pc->pmat, &N, NULL));
7448     PetscCall(MatISGetLocalToGlobalMapping(pc->pmat, &map, NULL));
7449     PetscCall(PCBDDCGraphInit(pcbddc->mat_graph, map, N, pcbddc->graphmaxcount));
7450 
7451     if (pcbddc->user_primal_vertices_local && !pcbddc->user_primal_vertices) PetscCall(PCBDDCConsistencyCheckIS(pc, MPI_LOR, &pcbddc->user_primal_vertices_local));
7452     /* Check validity of the csr graph passed in by the user */
7453     PetscCheck(!pcbddc->mat_graph->nvtxs_csr || pcbddc->mat_graph->nvtxs_csr == pcbddc->mat_graph->nvtxs, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid size of local CSR graph! Found %" PetscInt_FMT ", expected %" PetscInt_FMT, pcbddc->mat_graph->nvtxs_csr,
7454                pcbddc->mat_graph->nvtxs);
7455 
7456     /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */
7457     if (!pcbddc->mat_graph->xadj && pcbddc->use_local_adj) {
7458       PetscInt *xadj, *adjncy;
7459       PetscInt  nvtxs;
7460       PetscBool flg_row;
7461       Mat       A;
7462 
7463       PetscCall(PetscObjectReference((PetscObject)matis->A));
7464       A = matis->A;
7465       for (PetscInt i = 0; i < pcbddc->local_adj_square; i++) {
7466         Mat AtA;
7467 
7468         PetscCall(MatProductCreate(A, A, NULL, &AtA));
7469         PetscCall(MatSetOptionsPrefix(AtA, "pc_bddc_graph_"));
7470         PetscCall(MatProductSetType(AtA, MATPRODUCT_AtB));
7471         PetscCall(MatProductSetFromOptions(AtA));
7472         PetscCall(MatProductSymbolic(AtA));
7473         PetscCall(MatProductClear(AtA));
7474         /* we only need the sparsity, cheat and tell PETSc the matrix has been assembled */
7475         AtA->assembled = PETSC_TRUE;
7476         PetscCall(MatDestroy(&A));
7477         A = AtA;
7478       }
7479       PetscCall(MatGetRowIJ(A, 0, PETSC_TRUE, PETSC_FALSE, &nvtxs, (const PetscInt **)&xadj, (const PetscInt **)&adjncy, &flg_row));
7480       if (flg_row) {
7481         PetscCall(PCBDDCSetLocalAdjacencyGraph(pc, nvtxs, xadj, adjncy, PETSC_COPY_VALUES));
7482         pcbddc->computed_rowadj = PETSC_TRUE;
7483         PetscCall(MatRestoreRowIJ(A, 0, PETSC_TRUE, PETSC_FALSE, &nvtxs, (const PetscInt **)&xadj, (const PetscInt **)&adjncy, &flg_row));
7484         rcsr = PETSC_TRUE;
7485       }
7486       PetscCall(MatDestroy(&A));
7487     }
7488     if (pcbddc->dbg_flag) PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
7489 
7490     if (pcbddc->mat_graph->cdim && !pcbddc->mat_graph->cloc) {
7491       PetscReal   *lcoords;
7492       PetscInt     n;
7493       MPI_Datatype dimrealtype;
7494 
7495       /* TODO: support for blocked */
7496       PetscCheck(pcbddc->mat_graph->cnloc == pc->pmat->rmap->n, PETSC_COMM_SELF, PETSC_ERR_USER, "Invalid number of local coordinates! Got %" PetscInt_FMT ", expected %" PetscInt_FMT, pcbddc->mat_graph->cnloc, pc->pmat->rmap->n);
7497       PetscCall(MatGetLocalSize(matis->A, &n, NULL));
7498       PetscCall(PetscMalloc1(pcbddc->mat_graph->cdim * n, &lcoords));
7499       PetscCallMPI(MPI_Type_contiguous((PetscMPIInt)pcbddc->mat_graph->cdim, MPIU_REAL, &dimrealtype));
7500       PetscCallMPI(MPI_Type_commit(&dimrealtype));
7501       PetscCall(PetscSFBcastBegin(matis->sf, dimrealtype, pcbddc->mat_graph->coords, lcoords, MPI_REPLACE));
7502       PetscCall(PetscSFBcastEnd(matis->sf, dimrealtype, pcbddc->mat_graph->coords, lcoords, MPI_REPLACE));
7503       PetscCallMPI(MPI_Type_free(&dimrealtype));
7504       PetscCall(PetscFree(pcbddc->mat_graph->coords));
7505 
7506       pcbddc->mat_graph->coords = lcoords;
7507       pcbddc->mat_graph->cloc   = PETSC_TRUE;
7508       pcbddc->mat_graph->cnloc  = n;
7509     }
7510     PetscCheck(!pcbddc->mat_graph->cnloc || pcbddc->mat_graph->cnloc == pcbddc->mat_graph->nvtxs, PETSC_COMM_SELF, PETSC_ERR_USER, "Invalid number of local subdomain coordinates! Got %" PetscInt_FMT ", expected %" PetscInt_FMT, pcbddc->mat_graph->cnloc,
7511                pcbddc->mat_graph->nvtxs);
7512     pcbddc->mat_graph->active_coords = (PetscBool)(pcbddc->corner_selection && pcbddc->mat_graph->cdim && !pcbddc->corner_selected);
7513 
7514     /* attach info on disconnected subdomains if present */
7515     if (pcbddc->n_local_subs) {
7516       PetscInt *local_subs, n, totn;
7517 
7518       PetscCall(MatGetLocalSize(matis->A, &n, NULL));
7519       PetscCall(PetscMalloc1(n, &local_subs));
7520       for (i = 0; i < n; i++) local_subs[i] = pcbddc->n_local_subs;
7521       for (i = 0; i < pcbddc->n_local_subs; i++) {
7522         const PetscInt *idxs;
7523         PetscInt        nl, j;
7524 
7525         PetscCall(ISGetLocalSize(pcbddc->local_subs[i], &nl));
7526         PetscCall(ISGetIndices(pcbddc->local_subs[i], &idxs));
7527         for (j = 0; j < nl; j++) local_subs[idxs[j]] = i;
7528         PetscCall(ISRestoreIndices(pcbddc->local_subs[i], &idxs));
7529       }
7530       for (i = 0, totn = 0; i < n; i++) totn = PetscMax(totn, local_subs[i]);
7531       pcbddc->mat_graph->n_local_subs = totn + 1;
7532       pcbddc->mat_graph->local_subs   = local_subs;
7533     }
7534 
7535     /* Setup of Graph */
7536     PetscCall(PCBDDCGraphSetUp(pcbddc->mat_graph, pcbddc->vertex_size, pcbddc->NeumannBoundariesLocal, pcbddc->DirichletBoundariesLocal, pcbddc->n_ISForDofsLocal, pcbddc->ISForDofsLocal, pcbddc->user_primal_vertices_local));
7537   }
7538 
7539   if (!pcbddc->graphanalyzed) {
7540     /* Graph's connected components analysis */
7541     PetscCall(PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph));
7542     pcbddc->graphanalyzed   = PETSC_TRUE;
7543     pcbddc->corner_selected = pcbddc->corner_selection;
7544   }
7545   if (rcsr) pcbddc->mat_graph->nvtxs_csr = 0;
7546   PetscFunctionReturn(PETSC_SUCCESS);
7547 }
7548 
7549 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt *nio, Vec vecs[])
7550 {
7551   PetscInt     i, j, n;
7552   PetscScalar *alphas;
7553   PetscReal    norm, *onorms;
7554 
7555   PetscFunctionBegin;
7556   n = *nio;
7557   if (!n) PetscFunctionReturn(PETSC_SUCCESS);
7558   PetscCall(PetscMalloc2(n, &alphas, n, &onorms));
7559   PetscCall(VecNormalize(vecs[0], &norm));
7560   if (norm < PETSC_SMALL) {
7561     onorms[0] = 0.0;
7562     PetscCall(VecSet(vecs[0], 0.0));
7563   } else {
7564     onorms[0] = norm;
7565   }
7566 
7567   for (i = 1; i < n; i++) {
7568     PetscCall(VecMDot(vecs[i], i, vecs, alphas));
7569     for (j = 0; j < i; j++) alphas[j] = PetscConj(-alphas[j]);
7570     PetscCall(VecMAXPY(vecs[i], i, alphas, vecs));
7571     PetscCall(VecNormalize(vecs[i], &norm));
7572     if (norm < PETSC_SMALL) {
7573       onorms[i] = 0.0;
7574       PetscCall(VecSet(vecs[i], 0.0));
7575     } else {
7576       onorms[i] = norm;
7577     }
7578   }
7579   /* push nonzero vectors at the beginning */
7580   for (i = 0; i < n; i++) {
7581     if (onorms[i] == 0.0) {
7582       for (j = i + 1; j < n; j++) {
7583         if (onorms[j] != 0.0) {
7584           PetscCall(VecCopy(vecs[j], vecs[i]));
7585           onorms[j] = 0.0;
7586         }
7587       }
7588     }
7589   }
7590   for (i = 0, *nio = 0; i < n; i++) *nio += onorms[i] != 0.0 ? 1 : 0;
7591   PetscCall(PetscFree2(alphas, onorms));
7592   PetscFunctionReturn(PETSC_SUCCESS);
7593 }
7594 
7595 static PetscErrorCode PCBDDCMatISGetSubassemblingPattern(Mat mat, PetscInt *n_subdomains, PetscInt redprocs, IS *is_sends, PetscBool *have_void)
7596 {
7597   ISLocalToGlobalMapping mapping;
7598   Mat                    A;
7599   PetscInt               n_neighs, *neighs, *n_shared, **shared;
7600   PetscMPIInt            size, rank, color;
7601   PetscInt              *xadj, *adjncy;
7602   PetscInt              *adjncy_wgt, *v_wgt, *ranks_send_to_idx;
7603   PetscInt               im_active, active_procs, N, n, i, j, threshold = 2;
7604   PetscInt               void_procs, *procs_candidates = NULL;
7605   PetscInt               xadj_count, *count;
7606   PetscBool              ismatis, use_vwgt = PETSC_FALSE;
7607   PetscSubcomm           psubcomm;
7608   MPI_Comm               subcomm;
7609 
7610   PetscFunctionBegin;
7611   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
7612   PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATIS, &ismatis));
7613   PetscCheck(ismatis, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Cannot use %s on a matrix object which is not of type MATIS", PETSC_FUNCTION_NAME);
7614   PetscValidLogicalCollectiveInt(mat, *n_subdomains, 2);
7615   PetscValidLogicalCollectiveInt(mat, redprocs, 3);
7616   PetscCheck(*n_subdomains > 0, PetscObjectComm((PetscObject)mat), PETSC_ERR_ARG_WRONG, "Invalid number of subdomains requested %" PetscInt_FMT, *n_subdomains);
7617 
7618   if (have_void) *have_void = PETSC_FALSE;
7619   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)mat), &size));
7620   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
7621   PetscCall(MatISGetLocalMat(mat, &A));
7622   PetscCall(MatGetLocalSize(A, &n, NULL));
7623   im_active = !!n;
7624   PetscCallMPI(MPIU_Allreduce(&im_active, &active_procs, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)mat)));
7625   void_procs = size - active_procs;
7626   /* get ranks of non-active processes in mat communicator */
7627   if (void_procs) {
7628     PetscInt ncand;
7629 
7630     if (have_void) *have_void = PETSC_TRUE;
7631     PetscCall(PetscMalloc1(size, &procs_candidates));
7632     PetscCallMPI(MPI_Allgather(&im_active, 1, MPIU_INT, procs_candidates, 1, MPIU_INT, PetscObjectComm((PetscObject)mat)));
7633     for (i = 0, ncand = 0; i < size; i++) {
7634       if (!procs_candidates[i]) procs_candidates[ncand++] = i;
7635     }
7636     /* force n_subdomains to be not greater that the number of non-active processes */
7637     *n_subdomains = PetscMin(void_procs, *n_subdomains);
7638   }
7639 
7640   /* number of subdomains requested greater than active processes or matrix size -> just shift the matrix
7641      number of subdomains requested 1 -> send to rank-0 or first candidate in voids  */
7642   PetscCall(MatGetSize(mat, &N, NULL));
7643   if (active_procs < *n_subdomains || *n_subdomains == 1 || N <= *n_subdomains) {
7644     PetscInt issize, isidx, dest;
7645     if (*n_subdomains == 1) dest = 0;
7646     else dest = rank;
7647     if (im_active) {
7648       issize = 1;
7649       if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */
7650         isidx = procs_candidates[dest];
7651       } else {
7652         isidx = dest;
7653       }
7654     } else {
7655       issize = 0;
7656       isidx  = -1;
7657     }
7658     if (*n_subdomains != 1) *n_subdomains = active_procs;
7659     PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)mat), issize, &isidx, PETSC_COPY_VALUES, is_sends));
7660     PetscCall(PetscFree(procs_candidates));
7661     PetscFunctionReturn(PETSC_SUCCESS);
7662   }
7663   PetscCall(PetscOptionsGetBool(NULL, ((PetscObject)A)->prefix, "-mat_is_partitioning_use_vwgt", &use_vwgt, NULL));
7664   PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)A)->prefix, "-mat_is_partitioning_threshold", &threshold, NULL));
7665   threshold = PetscMax(threshold, 2);
7666 
7667   /* Get info on mapping */
7668   PetscCall(MatISGetLocalToGlobalMapping(mat, &mapping, NULL));
7669   PetscCall(ISLocalToGlobalMappingGetInfo(mapping, &n_neighs, &neighs, &n_shared, &shared));
7670 
7671   /* build local CSR graph of subdomains' connectivity */
7672   PetscCall(PetscMalloc1(2, &xadj));
7673   xadj[0] = 0;
7674   xadj[1] = PetscMax(n_neighs - 1, 0);
7675   PetscCall(PetscMalloc1(xadj[1], &adjncy));
7676   PetscCall(PetscMalloc1(xadj[1], &adjncy_wgt));
7677   PetscCall(PetscCalloc1(n, &count));
7678   for (i = 1; i < n_neighs; i++)
7679     for (j = 0; j < n_shared[i]; j++) count[shared[i][j]] += 1;
7680 
7681   xadj_count = 0;
7682   for (i = 1; i < n_neighs; i++) {
7683     for (j = 0; j < n_shared[i]; j++) {
7684       if (count[shared[i][j]] < threshold) {
7685         adjncy[xadj_count]     = neighs[i];
7686         adjncy_wgt[xadj_count] = n_shared[i];
7687         xadj_count++;
7688         break;
7689       }
7690     }
7691   }
7692   xadj[1] = xadj_count;
7693   PetscCall(PetscFree(count));
7694   PetscCall(ISLocalToGlobalMappingRestoreInfo(mapping, &n_neighs, &neighs, &n_shared, &shared));
7695   PetscCall(PetscSortIntWithArray(xadj[1], adjncy, adjncy_wgt));
7696 
7697   PetscCall(PetscMalloc1(1, &ranks_send_to_idx));
7698 
7699   /* Restrict work on active processes only */
7700   PetscCall(PetscMPIIntCast(im_active, &color));
7701   if (void_procs) {
7702     PetscCall(PetscSubcommCreate(PetscObjectComm((PetscObject)mat), &psubcomm));
7703     PetscCall(PetscSubcommSetNumber(psubcomm, 2)); /* 2 groups, active process and not active processes */
7704     PetscCall(PetscSubcommSetTypeGeneral(psubcomm, color, rank));
7705     subcomm = PetscSubcommChild(psubcomm);
7706   } else {
7707     psubcomm = NULL;
7708     subcomm  = PetscObjectComm((PetscObject)mat);
7709   }
7710 
7711   v_wgt = NULL;
7712   if (!color) {
7713     PetscCall(PetscFree(xadj));
7714     PetscCall(PetscFree(adjncy));
7715     PetscCall(PetscFree(adjncy_wgt));
7716   } else {
7717     Mat             subdomain_adj;
7718     IS              new_ranks, new_ranks_contig;
7719     MatPartitioning partitioner;
7720     PetscInt        rstart, rend;
7721     PetscMPIInt     irstart = 0, irend = 0;
7722     PetscInt       *is_indices, *oldranks;
7723     PetscMPIInt     size;
7724     PetscBool       aggregate;
7725 
7726     PetscCallMPI(MPI_Comm_size(subcomm, &size));
7727     if (void_procs) {
7728       PetscInt prank = rank;
7729       PetscCall(PetscMalloc1(size, &oldranks));
7730       PetscCallMPI(MPI_Allgather(&prank, 1, MPIU_INT, oldranks, 1, MPIU_INT, subcomm));
7731       for (i = 0; i < xadj[1]; i++) PetscCall(PetscFindInt(adjncy[i], size, oldranks, &adjncy[i]));
7732       PetscCall(PetscSortIntWithArray(xadj[1], adjncy, adjncy_wgt));
7733     } else {
7734       oldranks = NULL;
7735     }
7736     aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE);
7737     if (aggregate) { /* TODO: all this part could be made more efficient */
7738       PetscInt     lrows, row, ncols, *cols;
7739       PetscMPIInt  nrank;
7740       PetscScalar *vals;
7741 
7742       PetscCallMPI(MPI_Comm_rank(subcomm, &nrank));
7743       lrows = 0;
7744       if (nrank < redprocs) {
7745         lrows = size / redprocs;
7746         if (nrank < size % redprocs) lrows++;
7747       }
7748       PetscCall(MatCreateAIJ(subcomm, lrows, lrows, size, size, 50, NULL, 50, NULL, &subdomain_adj));
7749       PetscCall(MatGetOwnershipRange(subdomain_adj, &rstart, &rend));
7750       irstart = (PetscMPIInt)rstart; /* from construction these are always less than size */
7751       irend   = (PetscMPIInt)rend;
7752       PetscCall(MatSetOption(subdomain_adj, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE));
7753       PetscCall(MatSetOption(subdomain_adj, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE));
7754       row   = nrank;
7755       ncols = xadj[1] - xadj[0];
7756       cols  = adjncy;
7757       PetscCall(PetscMalloc1(ncols, &vals));
7758       for (i = 0; i < ncols; i++) vals[i] = adjncy_wgt[i];
7759       PetscCall(MatSetValues(subdomain_adj, 1, &row, ncols, cols, vals, INSERT_VALUES));
7760       PetscCall(MatAssemblyBegin(subdomain_adj, MAT_FINAL_ASSEMBLY));
7761       PetscCall(MatAssemblyEnd(subdomain_adj, MAT_FINAL_ASSEMBLY));
7762       PetscCall(PetscFree(xadj));
7763       PetscCall(PetscFree(adjncy));
7764       PetscCall(PetscFree(adjncy_wgt));
7765       PetscCall(PetscFree(vals));
7766       if (use_vwgt) {
7767         Vec                v;
7768         const PetscScalar *array;
7769         PetscInt           nl;
7770 
7771         PetscCall(MatCreateVecs(subdomain_adj, &v, NULL));
7772         PetscCall(VecSetValue(v, row, (PetscScalar)n, INSERT_VALUES));
7773         PetscCall(VecAssemblyBegin(v));
7774         PetscCall(VecAssemblyEnd(v));
7775         PetscCall(VecGetLocalSize(v, &nl));
7776         PetscCall(VecGetArrayRead(v, &array));
7777         PetscCall(PetscMalloc1(nl, &v_wgt));
7778         for (i = 0; i < nl; i++) v_wgt[i] = (PetscInt)PetscRealPart(array[i]);
7779         PetscCall(VecRestoreArrayRead(v, &array));
7780         PetscCall(VecDestroy(&v));
7781       }
7782     } else {
7783       PetscCall(MatCreateMPIAdj(subcomm, 1, (PetscInt)size, xadj, adjncy, adjncy_wgt, &subdomain_adj));
7784       if (use_vwgt) {
7785         PetscCall(PetscMalloc1(1, &v_wgt));
7786         v_wgt[0] = n;
7787       }
7788     }
7789     /* PetscCall(MatView(subdomain_adj,0)); */
7790 
7791     /* Partition */
7792     PetscCall(MatPartitioningCreate(subcomm, &partitioner));
7793 #if defined(PETSC_HAVE_PTSCOTCH)
7794     PetscCall(MatPartitioningSetType(partitioner, MATPARTITIONINGPTSCOTCH));
7795 #elif defined(PETSC_HAVE_PARMETIS)
7796     PetscCall(MatPartitioningSetType(partitioner, MATPARTITIONINGPARMETIS));
7797 #else
7798     PetscCall(MatPartitioningSetType(partitioner, MATPARTITIONINGAVERAGE));
7799 #endif
7800     PetscCall(MatPartitioningSetAdjacency(partitioner, subdomain_adj));
7801     if (v_wgt) PetscCall(MatPartitioningSetVertexWeights(partitioner, v_wgt));
7802     *n_subdomains = PetscMin((PetscInt)size, *n_subdomains);
7803     PetscCall(MatPartitioningSetNParts(partitioner, *n_subdomains));
7804     PetscCall(MatPartitioningSetFromOptions(partitioner));
7805     PetscCall(MatPartitioningApply(partitioner, &new_ranks));
7806     /* PetscCall(MatPartitioningView(partitioner,0)); */
7807 
7808     /* renumber new_ranks to avoid "holes" in new set of processors */
7809     PetscCall(ISRenumber(new_ranks, NULL, NULL, &new_ranks_contig));
7810     PetscCall(ISDestroy(&new_ranks));
7811     PetscCall(ISGetIndices(new_ranks_contig, (const PetscInt **)&is_indices));
7812     if (!aggregate) {
7813       if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */
7814         PetscAssert(oldranks, PETSC_COMM_SELF, PETSC_ERR_PLIB, "This should not happen");
7815         ranks_send_to_idx[0] = procs_candidates[oldranks[is_indices[0]]];
7816       } else if (oldranks) {
7817         ranks_send_to_idx[0] = oldranks[is_indices[0]];
7818       } else {
7819         ranks_send_to_idx[0] = is_indices[0];
7820       }
7821     } else {
7822       PetscInt     idx = 0;
7823       PetscMPIInt  tag;
7824       MPI_Request *reqs;
7825 
7826       PetscCall(PetscObjectGetNewTag((PetscObject)subdomain_adj, &tag));
7827       PetscCall(PetscMalloc1(rend - rstart, &reqs));
7828       for (PetscMPIInt i = irstart; i < irend; i++) PetscCallMPI(MPIU_Isend(is_indices + i - rstart, 1, MPIU_INT, i, tag, subcomm, &reqs[i - rstart]));
7829       PetscCallMPI(MPIU_Recv(&idx, 1, MPIU_INT, MPI_ANY_SOURCE, tag, subcomm, MPI_STATUS_IGNORE));
7830       PetscCallMPI(MPI_Waitall(irend - irstart, reqs, MPI_STATUSES_IGNORE));
7831       PetscCall(PetscFree(reqs));
7832       if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */
7833         PetscAssert(oldranks, PETSC_COMM_SELF, PETSC_ERR_PLIB, "This should not happen");
7834         ranks_send_to_idx[0] = procs_candidates[oldranks[idx]];
7835       } else if (oldranks) {
7836         ranks_send_to_idx[0] = oldranks[idx];
7837       } else {
7838         ranks_send_to_idx[0] = idx;
7839       }
7840     }
7841     PetscCall(ISRestoreIndices(new_ranks_contig, (const PetscInt **)&is_indices));
7842     /* clean up */
7843     PetscCall(PetscFree(oldranks));
7844     PetscCall(ISDestroy(&new_ranks_contig));
7845     PetscCall(MatDestroy(&subdomain_adj));
7846     PetscCall(MatPartitioningDestroy(&partitioner));
7847   }
7848   PetscCall(PetscSubcommDestroy(&psubcomm));
7849   PetscCall(PetscFree(procs_candidates));
7850 
7851   /* assemble parallel IS for sends */
7852   i = 1;
7853   if (!color) i = 0;
7854   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)mat), i, ranks_send_to_idx, PETSC_OWN_POINTER, is_sends));
7855   PetscFunctionReturn(PETSC_SUCCESS);
7856 }
7857 
7858 typedef enum {
7859   MATDENSE_PRIVATE = 0,
7860   MATAIJ_PRIVATE,
7861   MATBAIJ_PRIVATE,
7862   MATSBAIJ_PRIVATE
7863 } MatTypePrivate;
7864 
7865 static PetscErrorCode PCBDDCMatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, PetscBool reuse, Mat *mat_n, PetscInt nis, IS isarray[], PetscInt nvecs, Vec nnsp_vec[])
7866 {
7867   Mat                    local_mat;
7868   IS                     is_sends_internal;
7869   PetscInt               rows, cols, new_local_rows;
7870   PetscInt               i, bs, buf_size_idxs, buf_size_idxs_is, buf_size_vals, buf_size_vecs;
7871   PetscBool              ismatis, isdense, newisdense, destroy_mat;
7872   ISLocalToGlobalMapping l2gmap;
7873   PetscInt              *l2gmap_indices;
7874   const PetscInt        *is_indices;
7875   MatType                new_local_type;
7876   /* buffers */
7877   PetscInt          *ptr_idxs, *send_buffer_idxs, *recv_buffer_idxs;
7878   PetscInt          *ptr_idxs_is, *send_buffer_idxs_is, *recv_buffer_idxs_is;
7879   PetscInt          *recv_buffer_idxs_local;
7880   PetscScalar       *ptr_vals, *recv_buffer_vals;
7881   const PetscScalar *send_buffer_vals;
7882   PetscScalar       *ptr_vecs, *send_buffer_vecs, *recv_buffer_vecs;
7883   /* MPI */
7884   MPI_Comm     comm, comm_n;
7885   PetscSubcomm subcomm;
7886   PetscMPIInt  n_sends, n_recvs, size;
7887   PetscMPIInt *iflags, *ilengths_idxs, *ilengths_vals, *ilengths_idxs_is;
7888   PetscMPIInt *onodes, *onodes_is, *olengths_idxs, *olengths_idxs_is, *olengths_vals;
7889   PetscMPIInt  len, tag_idxs, tag_idxs_is, tag_vals, tag_vecs, source_dest;
7890   MPI_Request *send_req_idxs, *send_req_idxs_is, *send_req_vals, *send_req_vecs;
7891   MPI_Request *recv_req_idxs, *recv_req_idxs_is, *recv_req_vals, *recv_req_vecs;
7892 
7893   PetscFunctionBegin;
7894   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
7895   PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATIS, &ismatis));
7896   PetscCheck(ismatis, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Cannot use %s on a matrix object which is not of type MATIS", PETSC_FUNCTION_NAME);
7897   PetscValidLogicalCollectiveInt(mat, n_subdomains, 3);
7898   PetscValidLogicalCollectiveBool(mat, restrict_comm, 4);
7899   PetscValidLogicalCollectiveBool(mat, restrict_full, 5);
7900   PetscValidLogicalCollectiveBool(mat, reuse, 6);
7901   PetscValidLogicalCollectiveInt(mat, nis, 8);
7902   PetscValidLogicalCollectiveInt(mat, nvecs, 10);
7903   if (nvecs) {
7904     PetscCheck(nvecs <= 1, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Just 1 vector supported");
7905     PetscValidHeaderSpecific(nnsp_vec[0], VEC_CLASSID, 11);
7906   }
7907   /* further checks */
7908   PetscCall(MatISGetLocalMat(mat, &local_mat));
7909   PetscCall(PetscObjectTypeCompare((PetscObject)local_mat, MATSEQDENSE, &isdense));
7910   /* XXX hack for multi_element */
7911   if (!isdense) PetscCall(MatConvert(local_mat, MATDENSE, MAT_INPLACE_MATRIX, &local_mat));
7912   PetscCall(PetscObjectTypeCompare((PetscObject)local_mat, MATSEQDENSE, &isdense));
7913   PetscCheck(isdense, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE");
7914 
7915   PetscCall(MatGetSize(local_mat, &rows, &cols));
7916   PetscCheck(rows == cols, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Local MATIS matrices should be square");
7917   if (reuse && *mat_n) {
7918     PetscInt mrows, mcols, mnrows, mncols;
7919     PetscValidHeaderSpecific(*mat_n, MAT_CLASSID, 7);
7920     PetscCall(PetscObjectTypeCompare((PetscObject)*mat_n, MATIS, &ismatis));
7921     PetscCheck(ismatis, PetscObjectComm((PetscObject)*mat_n), PETSC_ERR_SUP, "Cannot reuse a matrix which is not of type MATIS");
7922     PetscCall(MatGetSize(mat, &mrows, &mcols));
7923     PetscCall(MatGetSize(*mat_n, &mnrows, &mncols));
7924     PetscCheck(mrows == mnrows, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Cannot reuse matrix! Wrong number of rows %" PetscInt_FMT " != %" PetscInt_FMT, mrows, mnrows);
7925     PetscCheck(mcols == mncols, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Cannot reuse matrix! Wrong number of cols %" PetscInt_FMT " != %" PetscInt_FMT, mcols, mncols);
7926   }
7927   PetscCall(MatGetBlockSize(local_mat, &bs));
7928   PetscValidLogicalCollectiveInt(mat, bs, 1);
7929 
7930   /* prepare IS for sending if not provided */
7931   if (!is_sends) {
7932     PetscCheck(n_subdomains, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "You should specify either an IS or a target number of subdomains");
7933     PetscCall(PCBDDCMatISGetSubassemblingPattern(mat, &n_subdomains, 0, &is_sends_internal, NULL));
7934   } else {
7935     PetscCall(PetscObjectReference((PetscObject)is_sends));
7936     is_sends_internal = is_sends;
7937   }
7938 
7939   /* get comm */
7940   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
7941 
7942   /* compute number of sends */
7943   PetscCall(ISGetLocalSize(is_sends_internal, &i));
7944   PetscCall(PetscMPIIntCast(i, &n_sends));
7945 
7946   /* compute number of receives */
7947   PetscCallMPI(MPI_Comm_size(comm, &size));
7948   PetscCall(PetscMalloc1(size, &iflags));
7949   PetscCall(PetscArrayzero(iflags, size));
7950   PetscCall(ISGetIndices(is_sends_internal, &is_indices));
7951   for (i = 0; i < n_sends; i++) iflags[is_indices[i]] = 1;
7952   PetscCall(PetscGatherNumberOfMessages(comm, iflags, NULL, &n_recvs));
7953   PetscCall(PetscFree(iflags));
7954 
7955   /* restrict comm if requested */
7956   subcomm     = NULL;
7957   destroy_mat = PETSC_FALSE;
7958   if (restrict_comm) {
7959     PetscMPIInt color, subcommsize;
7960 
7961     color = 0;
7962     if (restrict_full) {
7963       if (!n_recvs) color = 1; /* processes not receiving anything will not participate in new comm (full restriction) */
7964     } else {
7965       if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not participate in new comm */
7966     }
7967     PetscCallMPI(MPIU_Allreduce(&color, &subcommsize, 1, MPI_INT, MPI_SUM, comm));
7968     subcommsize = size - subcommsize;
7969     /* check if reuse has been requested */
7970     if (reuse) {
7971       if (*mat_n) {
7972         PetscMPIInt subcommsize2;
7973         PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n), &subcommsize2));
7974         PetscCheck(subcommsize == subcommsize2, PetscObjectComm((PetscObject)*mat_n), PETSC_ERR_PLIB, "Cannot reuse matrix! wrong subcomm size %d != %d", subcommsize, subcommsize2);
7975         comm_n = PetscObjectComm((PetscObject)*mat_n);
7976       } else {
7977         comm_n = PETSC_COMM_SELF;
7978       }
7979     } else { /* MAT_INITIAL_MATRIX */
7980       PetscMPIInt rank;
7981 
7982       PetscCallMPI(MPI_Comm_rank(comm, &rank));
7983       PetscCall(PetscSubcommCreate(comm, &subcomm));
7984       PetscCall(PetscSubcommSetNumber(subcomm, 2));
7985       PetscCall(PetscSubcommSetTypeGeneral(subcomm, color, rank));
7986       comm_n = PetscSubcommChild(subcomm);
7987     }
7988     /* flag to destroy *mat_n if not significative */
7989     if (color) destroy_mat = PETSC_TRUE;
7990   } else {
7991     comm_n = comm;
7992   }
7993 
7994   /* prepare send/receive buffers */
7995   PetscCall(PetscMalloc1(size, &ilengths_idxs));
7996   PetscCall(PetscArrayzero(ilengths_idxs, size));
7997   PetscCall(PetscMalloc1(size, &ilengths_vals));
7998   PetscCall(PetscArrayzero(ilengths_vals, size));
7999   if (nis) PetscCall(PetscCalloc1(size, &ilengths_idxs_is));
8000 
8001   /* Get data from local matrices */
8002   PetscCheck(isdense, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Subassembling of AIJ local matrices not yet implemented");
8003   /* TODO: See below some guidelines on how to prepare the local buffers */
8004   /*
8005        send_buffer_vals should contain the raw values of the local matrix
8006        send_buffer_idxs should contain:
8007        - MatType_PRIVATE type
8008        - PetscInt        size_of_l2gmap
8009        - PetscInt        global_row_indices[size_of_l2gmap]
8010        - PetscInt        all_other_info_which_is_needed_to_compute_preallocation_and_set_values
8011     */
8012   {
8013     ISLocalToGlobalMapping mapping;
8014 
8015     PetscCall(MatISGetLocalToGlobalMapping(mat, &mapping, NULL));
8016     PetscCall(MatDenseGetArrayRead(local_mat, &send_buffer_vals));
8017     PetscCall(ISLocalToGlobalMappingGetSize(mapping, &i));
8018     PetscCall(PetscMalloc1(i + 2, &send_buffer_idxs));
8019     send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE;
8020     send_buffer_idxs[1] = i;
8021     PetscCall(ISLocalToGlobalMappingGetIndices(mapping, (const PetscInt **)&ptr_idxs));
8022     PetscCall(PetscArraycpy(&send_buffer_idxs[2], ptr_idxs, i));
8023     PetscCall(ISLocalToGlobalMappingRestoreIndices(mapping, (const PetscInt **)&ptr_idxs));
8024     PetscCall(PetscMPIIntCast(i, &len));
8025     for (i = 0; i < n_sends; i++) {
8026       ilengths_vals[is_indices[i]] = len * len;
8027       ilengths_idxs[is_indices[i]] = len + 2;
8028     }
8029   }
8030   PetscCall(PetscGatherMessageLengths2(comm, n_sends, n_recvs, ilengths_idxs, ilengths_vals, &onodes, &olengths_idxs, &olengths_vals));
8031   /* additional is (if any) */
8032   if (nis) {
8033     PetscMPIInt psum;
8034     PetscInt    j;
8035     for (j = 0, psum = 0; j < nis; j++) {
8036       PetscInt plen;
8037       PetscCall(ISGetLocalSize(isarray[j], &plen));
8038       PetscCall(PetscMPIIntCast(plen, &len));
8039       psum += len + 1; /* indices + length */
8040     }
8041     PetscCall(PetscMalloc1(psum, &send_buffer_idxs_is));
8042     for (j = 0, psum = 0; j < nis; j++) {
8043       PetscInt        plen;
8044       const PetscInt *is_array_idxs;
8045       PetscCall(ISGetLocalSize(isarray[j], &plen));
8046       send_buffer_idxs_is[psum] = plen;
8047       PetscCall(ISGetIndices(isarray[j], &is_array_idxs));
8048       PetscCall(PetscArraycpy(&send_buffer_idxs_is[psum + 1], is_array_idxs, plen));
8049       PetscCall(ISRestoreIndices(isarray[j], &is_array_idxs));
8050       psum += plen + 1; /* indices + length */
8051     }
8052     for (i = 0; i < n_sends; i++) ilengths_idxs_is[is_indices[i]] = psum;
8053     PetscCall(PetscGatherMessageLengths(comm, n_sends, n_recvs, ilengths_idxs_is, &onodes_is, &olengths_idxs_is));
8054   }
8055   PetscCall(MatISRestoreLocalMat(mat, &local_mat));
8056 
8057   buf_size_idxs    = 0;
8058   buf_size_vals    = 0;
8059   buf_size_idxs_is = 0;
8060   buf_size_vecs    = 0;
8061   for (i = 0; i < n_recvs; i++) {
8062     buf_size_idxs += (PetscInt)olengths_idxs[i];
8063     buf_size_vals += (PetscInt)olengths_vals[i];
8064     if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i];
8065     if (nvecs) buf_size_vecs += (PetscInt)olengths_idxs[i];
8066   }
8067   PetscCall(PetscMalloc1(buf_size_idxs, &recv_buffer_idxs));
8068   PetscCall(PetscMalloc1(buf_size_vals, &recv_buffer_vals));
8069   PetscCall(PetscMalloc1(buf_size_idxs_is, &recv_buffer_idxs_is));
8070   PetscCall(PetscMalloc1(buf_size_vecs, &recv_buffer_vecs));
8071 
8072   /* get new tags for clean communications */
8073   PetscCall(PetscObjectGetNewTag((PetscObject)mat, &tag_idxs));
8074   PetscCall(PetscObjectGetNewTag((PetscObject)mat, &tag_vals));
8075   PetscCall(PetscObjectGetNewTag((PetscObject)mat, &tag_idxs_is));
8076   PetscCall(PetscObjectGetNewTag((PetscObject)mat, &tag_vecs));
8077 
8078   /* allocate for requests */
8079   PetscCall(PetscMalloc1(n_sends, &send_req_idxs));
8080   PetscCall(PetscMalloc1(n_sends, &send_req_vals));
8081   PetscCall(PetscMalloc1(n_sends, &send_req_idxs_is));
8082   PetscCall(PetscMalloc1(n_sends, &send_req_vecs));
8083   PetscCall(PetscMalloc1(n_recvs, &recv_req_idxs));
8084   PetscCall(PetscMalloc1(n_recvs, &recv_req_vals));
8085   PetscCall(PetscMalloc1(n_recvs, &recv_req_idxs_is));
8086   PetscCall(PetscMalloc1(n_recvs, &recv_req_vecs));
8087 
8088   /* communications */
8089   ptr_idxs    = recv_buffer_idxs;
8090   ptr_vals    = recv_buffer_vals;
8091   ptr_idxs_is = recv_buffer_idxs_is;
8092   ptr_vecs    = recv_buffer_vecs;
8093   for (i = 0; i < n_recvs; i++) {
8094     source_dest = onodes[i];
8095     PetscCallMPI(MPIU_Irecv(ptr_idxs, olengths_idxs[i], MPIU_INT, source_dest, tag_idxs, comm, &recv_req_idxs[i]));
8096     PetscCallMPI(MPIU_Irecv(ptr_vals, olengths_vals[i], MPIU_SCALAR, source_dest, tag_vals, comm, &recv_req_vals[i]));
8097     ptr_idxs += olengths_idxs[i];
8098     ptr_vals += olengths_vals[i];
8099     if (nis) {
8100       source_dest = onodes_is[i];
8101       PetscCallMPI(MPIU_Irecv(ptr_idxs_is, olengths_idxs_is[i], MPIU_INT, source_dest, tag_idxs_is, comm, &recv_req_idxs_is[i]));
8102       ptr_idxs_is += olengths_idxs_is[i];
8103     }
8104     if (nvecs) {
8105       source_dest = onodes[i];
8106       PetscCallMPI(MPIU_Irecv(ptr_vecs, olengths_idxs[i] - 2, MPIU_SCALAR, source_dest, tag_vecs, comm, &recv_req_vecs[i]));
8107       ptr_vecs += olengths_idxs[i] - 2;
8108     }
8109   }
8110   for (i = 0; i < n_sends; i++) {
8111     PetscCall(PetscMPIIntCast(is_indices[i], &source_dest));
8112     PetscCallMPI(MPIU_Isend(send_buffer_idxs, ilengths_idxs[source_dest], MPIU_INT, source_dest, tag_idxs, comm, &send_req_idxs[i]));
8113     PetscCallMPI(MPIU_Isend(send_buffer_vals, ilengths_vals[source_dest], MPIU_SCALAR, source_dest, tag_vals, comm, &send_req_vals[i]));
8114     if (nis) PetscCallMPI(MPIU_Isend(send_buffer_idxs_is, ilengths_idxs_is[source_dest], MPIU_INT, source_dest, tag_idxs_is, comm, &send_req_idxs_is[i]));
8115     if (nvecs) {
8116       PetscCall(VecGetArray(nnsp_vec[0], &send_buffer_vecs));
8117       PetscCallMPI(MPIU_Isend(send_buffer_vecs, ilengths_idxs[source_dest] - 2, MPIU_SCALAR, source_dest, tag_vecs, comm, &send_req_vecs[i]));
8118     }
8119   }
8120   PetscCall(ISRestoreIndices(is_sends_internal, &is_indices));
8121   PetscCall(ISDestroy(&is_sends_internal));
8122 
8123   /* assemble new l2g map */
8124   PetscCallMPI(MPI_Waitall(n_recvs, recv_req_idxs, MPI_STATUSES_IGNORE));
8125   ptr_idxs       = recv_buffer_idxs;
8126   new_local_rows = 0;
8127   for (i = 0; i < n_recvs; i++) {
8128     new_local_rows += *(ptr_idxs + 1); /* second element is the local size of the l2gmap */
8129     ptr_idxs += olengths_idxs[i];
8130   }
8131   PetscCall(PetscMalloc1(new_local_rows, &l2gmap_indices));
8132   ptr_idxs       = recv_buffer_idxs;
8133   new_local_rows = 0;
8134   for (i = 0; i < n_recvs; i++) {
8135     PetscCall(PetscArraycpy(&l2gmap_indices[new_local_rows], ptr_idxs + 2, *(ptr_idxs + 1)));
8136     new_local_rows += *(ptr_idxs + 1); /* second element is the local size of the l2gmap */
8137     ptr_idxs += olengths_idxs[i];
8138   }
8139   PetscCall(PetscSortRemoveDupsInt(&new_local_rows, l2gmap_indices));
8140   PetscCall(ISLocalToGlobalMappingCreate(comm_n, 1, new_local_rows, l2gmap_indices, PETSC_COPY_VALUES, &l2gmap));
8141   PetscCall(PetscFree(l2gmap_indices));
8142 
8143   /* infer new local matrix type from received local matrices type */
8144   /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */
8145   /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */
8146   if (n_recvs) {
8147     MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0];
8148     ptr_idxs                              = recv_buffer_idxs;
8149     for (i = 0; i < n_recvs; i++) {
8150       if ((PetscInt)new_local_type_private != *ptr_idxs) {
8151         new_local_type_private = MATAIJ_PRIVATE;
8152         break;
8153       }
8154       ptr_idxs += olengths_idxs[i];
8155     }
8156     switch (new_local_type_private) {
8157     case MATDENSE_PRIVATE:
8158       new_local_type = MATSEQAIJ;
8159       bs             = 1;
8160       break;
8161     case MATAIJ_PRIVATE:
8162       new_local_type = MATSEQAIJ;
8163       bs             = 1;
8164       break;
8165     case MATBAIJ_PRIVATE:
8166       new_local_type = MATSEQBAIJ;
8167       break;
8168     case MATSBAIJ_PRIVATE:
8169       new_local_type = MATSEQSBAIJ;
8170       break;
8171     default:
8172       SETERRQ(comm, PETSC_ERR_SUP, "Unsupported private type %d in %s", new_local_type_private, PETSC_FUNCTION_NAME);
8173     }
8174   } else { /* by default, new_local_type is seqaij */
8175     new_local_type = MATSEQAIJ;
8176     bs             = 1;
8177   }
8178 
8179   /* create MATIS object if needed */
8180   if (!reuse) {
8181     PetscCall(MatGetSize(mat, &rows, &cols));
8182     PetscCall(MatCreateIS(comm_n, bs, PETSC_DECIDE, PETSC_DECIDE, rows, cols, l2gmap, l2gmap, mat_n));
8183   } else {
8184     /* it also destroys the local matrices */
8185     if (*mat_n) {
8186       PetscCall(MatSetLocalToGlobalMapping(*mat_n, l2gmap, l2gmap));
8187     } else { /* this is a fake object */
8188       PetscCall(MatCreateIS(comm_n, bs, PETSC_DECIDE, PETSC_DECIDE, rows, cols, l2gmap, l2gmap, mat_n));
8189     }
8190   }
8191   PetscCall(MatISGetLocalMat(*mat_n, &local_mat));
8192   PetscCall(MatSetType(local_mat, new_local_type));
8193 
8194   PetscCallMPI(MPI_Waitall(n_recvs, recv_req_vals, MPI_STATUSES_IGNORE));
8195 
8196   /* Global to local map of received indices */
8197   PetscCall(PetscMalloc1(buf_size_idxs, &recv_buffer_idxs_local)); /* needed for values insertion */
8198   PetscCall(ISGlobalToLocalMappingApply(l2gmap, IS_GTOLM_MASK, buf_size_idxs, recv_buffer_idxs, &i, recv_buffer_idxs_local));
8199   PetscCall(ISLocalToGlobalMappingDestroy(&l2gmap));
8200 
8201   /* restore attributes -> type of incoming data and its size */
8202   buf_size_idxs = 0;
8203   for (i = 0; i < n_recvs; i++) {
8204     recv_buffer_idxs_local[buf_size_idxs]     = recv_buffer_idxs[buf_size_idxs];
8205     recv_buffer_idxs_local[buf_size_idxs + 1] = recv_buffer_idxs[buf_size_idxs + 1];
8206     buf_size_idxs += (PetscInt)olengths_idxs[i];
8207   }
8208   PetscCall(PetscFree(recv_buffer_idxs));
8209 
8210   /* set preallocation */
8211   PetscCall(PetscObjectTypeCompare((PetscObject)local_mat, MATSEQDENSE, &newisdense));
8212   if (!newisdense) {
8213     PetscInt *new_local_nnz = NULL;
8214 
8215     ptr_idxs = recv_buffer_idxs_local;
8216     if (n_recvs) PetscCall(PetscCalloc1(new_local_rows, &new_local_nnz));
8217     for (i = 0; i < n_recvs; i++) {
8218       PetscInt j;
8219       if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */
8220         for (j = 0; j < *(ptr_idxs + 1); j++) new_local_nnz[*(ptr_idxs + 2 + j)] += *(ptr_idxs + 1);
8221       } else {
8222         /* TODO */
8223       }
8224       ptr_idxs += olengths_idxs[i];
8225     }
8226     if (new_local_nnz) {
8227       for (i = 0; i < new_local_rows; i++) new_local_nnz[i] = PetscMin(new_local_nnz[i], new_local_rows);
8228       PetscCall(MatSeqAIJSetPreallocation(local_mat, 0, new_local_nnz));
8229       for (i = 0; i < new_local_rows; i++) new_local_nnz[i] /= bs;
8230       PetscCall(MatSeqBAIJSetPreallocation(local_mat, bs, 0, new_local_nnz));
8231       for (i = 0; i < new_local_rows; i++) new_local_nnz[i] = PetscMax(new_local_nnz[i] - i, 0);
8232       PetscCall(MatSeqSBAIJSetPreallocation(local_mat, bs, 0, new_local_nnz));
8233     } else {
8234       PetscCall(MatSetUp(local_mat));
8235     }
8236     PetscCall(PetscFree(new_local_nnz));
8237   } else {
8238     PetscCall(MatSetUp(local_mat));
8239   }
8240 
8241   /* set values */
8242   ptr_vals = recv_buffer_vals;
8243   ptr_idxs = recv_buffer_idxs_local;
8244   for (i = 0; i < n_recvs; i++) {
8245     if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */
8246       PetscCall(MatSetOption(local_mat, MAT_ROW_ORIENTED, PETSC_FALSE));
8247       PetscCall(MatSetValues(local_mat, *(ptr_idxs + 1), ptr_idxs + 2, *(ptr_idxs + 1), ptr_idxs + 2, ptr_vals, ADD_VALUES));
8248       PetscCall(MatAssemblyBegin(local_mat, MAT_FLUSH_ASSEMBLY));
8249       PetscCall(MatAssemblyEnd(local_mat, MAT_FLUSH_ASSEMBLY));
8250       PetscCall(MatSetOption(local_mat, MAT_ROW_ORIENTED, PETSC_TRUE));
8251     } else {
8252       /* TODO */
8253     }
8254     ptr_idxs += olengths_idxs[i];
8255     ptr_vals += olengths_vals[i];
8256   }
8257   PetscCall(MatAssemblyBegin(local_mat, MAT_FINAL_ASSEMBLY));
8258   PetscCall(MatAssemblyEnd(local_mat, MAT_FINAL_ASSEMBLY));
8259   PetscCall(MatISRestoreLocalMat(*mat_n, &local_mat));
8260   PetscCall(MatAssemblyBegin(*mat_n, MAT_FINAL_ASSEMBLY));
8261   PetscCall(MatAssemblyEnd(*mat_n, MAT_FINAL_ASSEMBLY));
8262   PetscCall(PetscFree(recv_buffer_vals));
8263 
8264 #if 0
8265   if (!restrict_comm) { /* check */
8266     Vec       lvec,rvec;
8267     PetscReal infty_error;
8268 
8269     PetscCall(MatCreateVecs(mat,&rvec,&lvec));
8270     PetscCall(VecSetRandom(rvec,NULL));
8271     PetscCall(MatMult(mat,rvec,lvec));
8272     PetscCall(VecScale(lvec,-1.0));
8273     PetscCall(MatMultAdd(*mat_n,rvec,lvec,lvec));
8274     PetscCall(VecNorm(lvec,NORM_INFINITY,&infty_error));
8275     PetscCall(PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error));
8276     PetscCall(VecDestroy(&rvec));
8277     PetscCall(VecDestroy(&lvec));
8278   }
8279 #endif
8280 
8281   /* assemble new additional is (if any) */
8282   if (nis) {
8283     PetscInt **temp_idxs, *count_is, j, psum;
8284 
8285     PetscCallMPI(MPI_Waitall(n_recvs, recv_req_idxs_is, MPI_STATUSES_IGNORE));
8286     PetscCall(PetscCalloc1(nis, &count_is));
8287     ptr_idxs = recv_buffer_idxs_is;
8288     psum     = 0;
8289     for (i = 0; i < n_recvs; i++) {
8290       for (j = 0; j < nis; j++) {
8291         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
8292         count_is[j] += plen;         /* increment counting of buffer for j-th IS */
8293         psum += plen;
8294         ptr_idxs += plen + 1; /* shift pointer to received data */
8295       }
8296     }
8297     PetscCall(PetscMalloc1(nis, &temp_idxs));
8298     PetscCall(PetscMalloc1(psum, &temp_idxs[0]));
8299     for (i = 1; i < nis; i++) temp_idxs[i] = PetscSafePointerPlusOffset(temp_idxs[i - 1], count_is[i - 1]);
8300     PetscCall(PetscArrayzero(count_is, nis));
8301     ptr_idxs = recv_buffer_idxs_is;
8302     for (i = 0; i < n_recvs; i++) {
8303       for (j = 0; j < nis; j++) {
8304         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
8305         PetscCall(PetscArraycpy(&temp_idxs[j][count_is[j]], ptr_idxs + 1, plen));
8306         count_is[j] += plen;  /* increment starting point of buffer for j-th IS */
8307         ptr_idxs += plen + 1; /* shift pointer to received data */
8308       }
8309     }
8310     for (i = 0; i < nis; i++) {
8311       PetscCall(ISDestroy(&isarray[i]));
8312       PetscCall(PetscSortRemoveDupsInt(&count_is[i], temp_idxs[i]));
8313       PetscCall(ISCreateGeneral(comm_n, count_is[i], temp_idxs[i], PETSC_COPY_VALUES, &isarray[i]));
8314     }
8315     PetscCall(PetscFree(count_is));
8316     PetscCall(PetscFree(temp_idxs[0]));
8317     PetscCall(PetscFree(temp_idxs));
8318   }
8319   /* free workspace */
8320   PetscCall(PetscFree(recv_buffer_idxs_is));
8321   PetscCallMPI(MPI_Waitall(n_sends, send_req_idxs, MPI_STATUSES_IGNORE));
8322   PetscCall(PetscFree(send_buffer_idxs));
8323   PetscCallMPI(MPI_Waitall(n_sends, send_req_vals, MPI_STATUSES_IGNORE));
8324   if (isdense) {
8325     PetscCall(MatISGetLocalMat(mat, &local_mat));
8326     PetscCall(MatDenseRestoreArrayRead(local_mat, &send_buffer_vals));
8327     PetscCall(MatISRestoreLocalMat(mat, &local_mat));
8328   } else {
8329     /* PetscCall(PetscFree(send_buffer_vals)); */
8330   }
8331   if (nis) {
8332     PetscCallMPI(MPI_Waitall(n_sends, send_req_idxs_is, MPI_STATUSES_IGNORE));
8333     PetscCall(PetscFree(send_buffer_idxs_is));
8334   }
8335 
8336   if (nvecs) {
8337     PetscCallMPI(MPI_Waitall(n_recvs, recv_req_vecs, MPI_STATUSES_IGNORE));
8338     PetscCallMPI(MPI_Waitall(n_sends, send_req_vecs, MPI_STATUSES_IGNORE));
8339     PetscCall(VecRestoreArray(nnsp_vec[0], &send_buffer_vecs));
8340     PetscCall(VecDestroy(&nnsp_vec[0]));
8341     PetscCall(VecCreate(comm_n, &nnsp_vec[0]));
8342     PetscCall(VecSetSizes(nnsp_vec[0], new_local_rows, PETSC_DECIDE));
8343     PetscCall(VecSetType(nnsp_vec[0], VECSTANDARD));
8344     /* set values */
8345     ptr_vals = recv_buffer_vecs;
8346     ptr_idxs = recv_buffer_idxs_local;
8347     PetscCall(VecGetArray(nnsp_vec[0], &send_buffer_vecs));
8348     for (i = 0; i < n_recvs; i++) {
8349       PetscInt j;
8350       for (j = 0; j < *(ptr_idxs + 1); j++) send_buffer_vecs[*(ptr_idxs + 2 + j)] += *(ptr_vals + j);
8351       ptr_idxs += olengths_idxs[i];
8352       ptr_vals += olengths_idxs[i] - 2;
8353     }
8354     PetscCall(VecRestoreArray(nnsp_vec[0], &send_buffer_vecs));
8355     PetscCall(VecAssemblyBegin(nnsp_vec[0]));
8356     PetscCall(VecAssemblyEnd(nnsp_vec[0]));
8357   }
8358 
8359   PetscCall(PetscFree(recv_buffer_vecs));
8360   PetscCall(PetscFree(recv_buffer_idxs_local));
8361   PetscCall(PetscFree(recv_req_idxs));
8362   PetscCall(PetscFree(recv_req_vals));
8363   PetscCall(PetscFree(recv_req_vecs));
8364   PetscCall(PetscFree(recv_req_idxs_is));
8365   PetscCall(PetscFree(send_req_idxs));
8366   PetscCall(PetscFree(send_req_vals));
8367   PetscCall(PetscFree(send_req_vecs));
8368   PetscCall(PetscFree(send_req_idxs_is));
8369   PetscCall(PetscFree(ilengths_vals));
8370   PetscCall(PetscFree(ilengths_idxs));
8371   PetscCall(PetscFree(olengths_vals));
8372   PetscCall(PetscFree(olengths_idxs));
8373   PetscCall(PetscFree(onodes));
8374   if (nis) {
8375     PetscCall(PetscFree(ilengths_idxs_is));
8376     PetscCall(PetscFree(olengths_idxs_is));
8377     PetscCall(PetscFree(onodes_is));
8378   }
8379   PetscCall(PetscSubcommDestroy(&subcomm));
8380   if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not participate */
8381     PetscCall(MatDestroy(mat_n));
8382     for (i = 0; i < nis; i++) PetscCall(ISDestroy(&isarray[i]));
8383     if (nvecs) { /* need to match VecDestroy nnsp_vec called in the other code path */
8384       PetscCall(VecDestroy(&nnsp_vec[0]));
8385     }
8386     *mat_n = NULL;
8387   }
8388   PetscFunctionReturn(PETSC_SUCCESS);
8389 }
8390 
8391 /* temporary hack into ksp private data structure */
8392 #include <petsc/private/kspimpl.h>
8393 
8394 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc, Mat coarse_submat)
8395 {
8396   PC_BDDC               *pcbddc = (PC_BDDC *)pc->data;
8397   PC_IS                 *pcis   = (PC_IS *)pc->data;
8398   PCBDDCGraph            graph  = pcbddc->mat_graph;
8399   Mat                    coarse_mat, coarse_mat_is;
8400   Mat                    coarsedivudotp = NULL;
8401   Mat                    coarseG, t_coarse_mat_is;
8402   MatNullSpace           CoarseNullSpace = NULL;
8403   ISLocalToGlobalMapping coarse_islg;
8404   IS                     coarse_is, *isarray, corners;
8405   PetscInt               i, im_active = -1, active_procs = -1;
8406   PetscInt               nis, nisdofs, nisneu, nisvert;
8407   PetscInt               coarse_eqs_per_proc, coarsening_ratio;
8408   PC                     pc_temp;
8409   PCType                 coarse_pc_type;
8410   KSPType                coarse_ksp_type;
8411   PetscBool              multilevel_requested, multilevel_allowed;
8412   PetscBool              coarse_reuse, multi_element = graph->multi_element;
8413   PetscInt               ncoarse, nedcfield;
8414   PetscBool              compute_vecs = PETSC_FALSE;
8415   PetscScalar           *array;
8416   MatReuse               coarse_mat_reuse;
8417   PetscBool              restr, full_restr, have_void;
8418   PetscMPIInt            size;
8419 
8420   PetscFunctionBegin;
8421   PetscCall(PetscLogEventBegin(PC_BDDC_CoarseSetUp[pcbddc->current_level], pc, 0, 0, 0));
8422   /* Assign global numbering to coarse dofs */
8423   if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */
8424     PetscInt ocoarse_size;
8425     compute_vecs = PETSC_TRUE;
8426 
8427     pcbddc->new_primal_space = PETSC_TRUE;
8428     ocoarse_size             = pcbddc->coarse_size;
8429     PetscCall(PetscFree(pcbddc->global_primal_indices));
8430     PetscCall(PCBDDCComputePrimalNumbering(pc, &pcbddc->coarse_size, &pcbddc->global_primal_indices));
8431     /* see if we can avoid some work */
8432     if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */
8433       /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */
8434       if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) {
8435         PetscCall(KSPReset(pcbddc->coarse_ksp));
8436         coarse_reuse = PETSC_FALSE;
8437       } else { /* we can safely reuse already computed coarse matrix */
8438         coarse_reuse = PETSC_TRUE;
8439       }
8440     } else { /* there's no coarse ksp, so we need to create the coarse matrix too */
8441       coarse_reuse = PETSC_FALSE;
8442     }
8443     /* reset any subassembling information */
8444     if (!coarse_reuse || pcbddc->recompute_topography) PetscCall(ISDestroy(&pcbddc->coarse_subassembling));
8445   } else { /* primal space is unchanged, so we can reuse coarse matrix */
8446     coarse_reuse = PETSC_TRUE;
8447   }
8448   if (coarse_reuse && pcbddc->coarse_ksp) {
8449     PetscCall(KSPGetOperators(pcbddc->coarse_ksp, &coarse_mat, NULL));
8450     PetscCall(PetscObjectReference((PetscObject)coarse_mat));
8451     coarse_mat_reuse = MAT_REUSE_MATRIX;
8452   } else {
8453     coarse_mat       = NULL;
8454     coarse_mat_reuse = MAT_INITIAL_MATRIX;
8455   }
8456 
8457   /* creates temporary l2gmap and IS for coarse indexes */
8458   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), pcbddc->local_primal_size, pcbddc->global_primal_indices, PETSC_COPY_VALUES, &coarse_is));
8459   PetscCall(ISLocalToGlobalMappingCreateIS(coarse_is, &coarse_islg));
8460 
8461   /* creates temporary MATIS object for coarse matrix */
8462   PetscCall(MatCreate(PetscObjectComm((PetscObject)pc), &t_coarse_mat_is));
8463   PetscCall(MatSetType(t_coarse_mat_is, MATIS));
8464   PetscCall(MatSetSizes(t_coarse_mat_is, PETSC_DECIDE, PETSC_DECIDE, pcbddc->coarse_size, pcbddc->coarse_size));
8465   PetscCall(MatISSetAllowRepeated(t_coarse_mat_is, PETSC_TRUE));
8466   PetscCall(MatSetLocalToGlobalMapping(t_coarse_mat_is, coarse_islg, coarse_islg));
8467   PetscCall(MatISSetLocalMat(t_coarse_mat_is, coarse_submat));
8468   PetscCall(MatAssemblyBegin(t_coarse_mat_is, MAT_FINAL_ASSEMBLY));
8469   PetscCall(MatAssemblyEnd(t_coarse_mat_is, MAT_FINAL_ASSEMBLY));
8470   PetscCall(MatViewFromOptions(t_coarse_mat_is, (PetscObject)pc, "-pc_bddc_coarse_mat_is_view"));
8471 
8472   /* count "active" (i.e. with positive local size) and "void" processes */
8473   im_active = !!pcis->n;
8474   PetscCallMPI(MPIU_Allreduce(&im_active, &active_procs, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)pc)));
8475 
8476   /* determine number of processes partecipating to coarse solver and compute subassembling pattern */
8477   /* restr : whether we want to exclude senders (which are not receivers) from the subassembling pattern */
8478   /* full_restr : just use the receivers from the subassembling pattern */
8479   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)pc), &size));
8480   coarse_mat_is        = NULL;
8481   multilevel_allowed   = PETSC_FALSE;
8482   multilevel_requested = PETSC_FALSE;
8483   coarse_eqs_per_proc  = PetscMin(PetscMax(pcbddc->coarse_size, 1), pcbddc->coarse_eqs_per_proc);
8484   if (coarse_eqs_per_proc < 0 || size == 1) coarse_eqs_per_proc = PetscMax(pcbddc->coarse_size, 1);
8485   if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE;
8486   if (pcbddc->coarse_size <= pcbddc->coarse_eqs_limit) multilevel_requested = PETSC_FALSE;
8487   coarsening_ratio = multi_element ? 1 : pcbddc->coarsening_ratio;
8488   if (multilevel_requested) {
8489     ncoarse    = active_procs / coarsening_ratio;
8490     restr      = PETSC_FALSE;
8491     full_restr = PETSC_FALSE;
8492   } else {
8493     ncoarse    = pcbddc->coarse_size / coarse_eqs_per_proc + !!(pcbddc->coarse_size % coarse_eqs_per_proc);
8494     restr      = PETSC_TRUE;
8495     full_restr = PETSC_TRUE;
8496   }
8497   if (!pcbddc->coarse_size || size == 1) multilevel_allowed = multilevel_requested = restr = full_restr = PETSC_FALSE;
8498   ncoarse = PetscMax(1, ncoarse);
8499   if (!pcbddc->coarse_subassembling) {
8500     if (coarsening_ratio > 1) {
8501       if (multilevel_requested) {
8502         PetscCall(PCBDDCMatISGetSubassemblingPattern(pc->pmat, &ncoarse, pcbddc->coarse_adj_red, &pcbddc->coarse_subassembling, &have_void));
8503       } else {
8504         PetscCall(PCBDDCMatISGetSubassemblingPattern(t_coarse_mat_is, &ncoarse, pcbddc->coarse_adj_red, &pcbddc->coarse_subassembling, &have_void));
8505       }
8506     } else {
8507       PetscMPIInt rank;
8508 
8509       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)pc), &rank));
8510       have_void = (active_procs == (PetscInt)size) ? PETSC_FALSE : PETSC_TRUE;
8511       PetscCall(ISCreateStride(PetscObjectComm((PetscObject)pc), 1, rank, 1, &pcbddc->coarse_subassembling));
8512     }
8513   } else { /* if a subassembling pattern exists, then we can reuse the coarse ksp and compute the number of process involved */
8514     PetscInt psum;
8515     if (pcbddc->coarse_ksp) psum = 1;
8516     else psum = 0;
8517     PetscCallMPI(MPIU_Allreduce(&psum, &ncoarse, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)pc)));
8518     have_void = ncoarse < size ? PETSC_TRUE : PETSC_FALSE;
8519   }
8520   /* determine if we can go multilevel */
8521   if (multilevel_requested) {
8522     if (ncoarse > 1) multilevel_allowed = PETSC_TRUE; /* found enough processes */
8523     else restr = full_restr = PETSC_TRUE;             /* 1 subdomain, use a direct solver */
8524   }
8525   if (multilevel_allowed && have_void) restr = PETSC_TRUE;
8526 
8527   /* dump subassembling pattern */
8528   if (pcbddc->dbg_flag && multilevel_allowed) PetscCall(ISView(pcbddc->coarse_subassembling, pcbddc->dbg_viewer));
8529   /* compute dofs splitting and neumann boundaries for coarse dofs */
8530   nedcfield = -1;
8531   corners   = NULL;
8532   if (multilevel_allowed && !coarse_reuse && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal || pcbddc->nedclocal || pcbddc->corner_selected)) { /* protects from unneeded computations */
8533     PetscInt              *tidxs, *tidxs2, nout, tsize, i;
8534     const PetscInt        *idxs;
8535     ISLocalToGlobalMapping tmap;
8536 
8537     /* create map between primal indices (in local representative ordering) and local primal numbering */
8538     PetscCall(ISLocalToGlobalMappingCreate(PETSC_COMM_SELF, 1, pcbddc->local_primal_size, pcbddc->primal_indices_local_idxs, PETSC_COPY_VALUES, &tmap));
8539     /* allocate space for temporary storage */
8540     PetscCall(PetscMalloc1(pcbddc->local_primal_size, &tidxs));
8541     PetscCall(PetscMalloc1(pcbddc->local_primal_size, &tidxs2));
8542     /* allocate for IS array */
8543     nisdofs = pcbddc->n_ISForDofsLocal;
8544     if (pcbddc->nedclocal) {
8545       if (pcbddc->nedfield > -1) {
8546         nedcfield = pcbddc->nedfield;
8547       } else {
8548         nedcfield = 0;
8549         PetscCheck(!nisdofs, PetscObjectComm((PetscObject)pc), PETSC_ERR_PLIB, "This should not happen (%" PetscInt_FMT ")", nisdofs);
8550         nisdofs = 1;
8551       }
8552     }
8553     nisneu  = !!pcbddc->NeumannBoundariesLocal;
8554     nisvert = 0; /* nisvert is not used */
8555     nis     = nisdofs + nisneu + nisvert;
8556     PetscCall(PetscMalloc1(nis, &isarray));
8557     /* dofs splitting */
8558     for (i = 0; i < nisdofs; i++) {
8559       /* PetscCall(ISView(pcbddc->ISForDofsLocal[i],0)); */
8560       if (nedcfield != i) {
8561         PetscCall(ISGetLocalSize(pcbddc->ISForDofsLocal[i], &tsize));
8562         PetscCall(ISGetIndices(pcbddc->ISForDofsLocal[i], &idxs));
8563         PetscCall(ISGlobalToLocalMappingApply(tmap, IS_GTOLM_DROP, tsize, idxs, &nout, tidxs));
8564         PetscCall(ISRestoreIndices(pcbddc->ISForDofsLocal[i], &idxs));
8565       } else {
8566         PetscCall(ISGetLocalSize(pcbddc->nedclocal, &tsize));
8567         PetscCall(ISGetIndices(pcbddc->nedclocal, &idxs));
8568         PetscCall(ISGlobalToLocalMappingApply(tmap, IS_GTOLM_DROP, tsize, idxs, &nout, tidxs));
8569         PetscCheck(tsize == nout, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Failed when mapping coarse nedelec field! %" PetscInt_FMT " != %" PetscInt_FMT, tsize, nout);
8570         PetscCall(ISRestoreIndices(pcbddc->nedclocal, &idxs));
8571       }
8572       PetscCall(ISLocalToGlobalMappingApply(coarse_islg, nout, tidxs, tidxs2));
8573       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), nout, tidxs2, PETSC_COPY_VALUES, &isarray[i]));
8574       /* PetscCall(ISView(isarray[i],0)); */
8575     }
8576     /* neumann boundaries */
8577     if (pcbddc->NeumannBoundariesLocal) {
8578       /* PetscCall(ISView(pcbddc->NeumannBoundariesLocal,0)); */
8579       PetscCall(ISGetLocalSize(pcbddc->NeumannBoundariesLocal, &tsize));
8580       PetscCall(ISGetIndices(pcbddc->NeumannBoundariesLocal, &idxs));
8581       PetscCall(ISGlobalToLocalMappingApply(tmap, IS_GTOLM_DROP, tsize, idxs, &nout, tidxs));
8582       PetscCall(ISRestoreIndices(pcbddc->NeumannBoundariesLocal, &idxs));
8583       PetscCall(ISLocalToGlobalMappingApply(coarse_islg, nout, tidxs, tidxs2));
8584       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), nout, tidxs2, PETSC_COPY_VALUES, &isarray[nisdofs]));
8585       /* PetscCall(ISView(isarray[nisdofs],0)); */
8586     }
8587     /* coordinates */
8588     if (pcbddc->corner_selected) {
8589       PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &corners));
8590       PetscCall(ISGetLocalSize(corners, &tsize));
8591       PetscCall(ISGetIndices(corners, &idxs));
8592       PetscCall(ISGlobalToLocalMappingApply(tmap, IS_GTOLM_DROP, tsize, idxs, &nout, tidxs));
8593       PetscCheck(tsize == nout, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Failed when mapping corners! %" PetscInt_FMT " != %" PetscInt_FMT, tsize, nout);
8594       PetscCall(ISRestoreIndices(corners, &idxs));
8595       PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &corners));
8596       PetscCall(ISLocalToGlobalMappingApply(coarse_islg, nout, tidxs, tidxs2));
8597       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), nout, tidxs2, PETSC_COPY_VALUES, &corners));
8598     }
8599     PetscCall(PetscFree(tidxs));
8600     PetscCall(PetscFree(tidxs2));
8601     PetscCall(ISLocalToGlobalMappingDestroy(&tmap));
8602   } else {
8603     nis     = 0;
8604     nisdofs = 0;
8605     nisneu  = 0;
8606     nisvert = 0;
8607     isarray = NULL;
8608   }
8609   /* destroy no longer needed map */
8610   PetscCall(ISLocalToGlobalMappingDestroy(&coarse_islg));
8611 
8612   /* subassemble */
8613   if (multilevel_allowed) {
8614     Vec       vp[1];
8615     PetscInt  nvecs = 0;
8616     PetscBool reuse;
8617 
8618     vp[0] = NULL;
8619     /* XXX HDIV also */
8620     if (pcbddc->benign_have_null) { /* propagate no-net-flux quadrature to coarser level */
8621       PetscCall(VecCreate(PetscObjectComm((PetscObject)pc), &vp[0]));
8622       PetscCall(VecSetSizes(vp[0], pcbddc->local_primal_size, PETSC_DECIDE));
8623       PetscCall(VecSetType(vp[0], VECSTANDARD));
8624       nvecs = 1;
8625 
8626       if (pcbddc->divudotp) {
8627         Mat      B, loc_divudotp;
8628         Vec      v, p;
8629         IS       dummy;
8630         PetscInt np;
8631 
8632         PetscCall(MatISGetLocalMat(pcbddc->divudotp, &loc_divudotp));
8633         PetscCall(MatGetSize(loc_divudotp, &np, NULL));
8634         PetscCall(ISCreateStride(PETSC_COMM_SELF, np, 0, 1, &dummy));
8635         PetscCall(MatCreateSubMatrix(loc_divudotp, dummy, pcis->is_B_local, MAT_INITIAL_MATRIX, &B));
8636         PetscCall(MatCreateVecs(B, &v, &p));
8637         PetscCall(VecSet(p, 1.));
8638         PetscCall(MatMultTranspose(B, p, v));
8639         PetscCall(VecDestroy(&p));
8640         PetscCall(MatDestroy(&B));
8641         PetscCall(VecGetArray(vp[0], &array));
8642         PetscCall(VecPlaceArray(pcbddc->vec1_P, array));
8643         PetscCall(MatMultTranspose(pcbddc->coarse_phi_B, v, pcbddc->vec1_P));
8644         PetscCall(VecResetArray(pcbddc->vec1_P));
8645         PetscCall(VecRestoreArray(vp[0], &array));
8646         PetscCall(ISDestroy(&dummy));
8647         PetscCall(VecDestroy(&v));
8648       }
8649     }
8650     if (coarse_mat) reuse = PETSC_TRUE;
8651     else reuse = PETSC_FALSE;
8652     if (multi_element) {
8653       /* XXX divudotp */
8654       PetscCall(MatISSetAllowRepeated(t_coarse_mat_is, PETSC_FALSE));
8655       PetscCall(PetscObjectReference((PetscObject)t_coarse_mat_is));
8656       coarse_mat_is = t_coarse_mat_is;
8657     } else {
8658       PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &reuse, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)pc)));
8659       if (reuse) {
8660         PetscCall(PCBDDCMatISSubassemble(t_coarse_mat_is, pcbddc->coarse_subassembling, 0, restr, full_restr, PETSC_TRUE, &coarse_mat, nis, isarray, nvecs, vp));
8661       } else {
8662         PetscCall(PCBDDCMatISSubassemble(t_coarse_mat_is, pcbddc->coarse_subassembling, 0, restr, full_restr, PETSC_FALSE, &coarse_mat_is, nis, isarray, nvecs, vp));
8663       }
8664       if (vp[0]) { /* vp[0] could have been placed on a different set of processes */
8665         PetscScalar       *arraym;
8666         const PetscScalar *arrayv;
8667         PetscInt           nl;
8668         PetscCall(VecGetLocalSize(vp[0], &nl));
8669         PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, 1, nl, NULL, &coarsedivudotp));
8670         PetscCall(MatDenseGetArray(coarsedivudotp, &arraym));
8671         PetscCall(VecGetArrayRead(vp[0], &arrayv));
8672         PetscCall(PetscArraycpy(arraym, arrayv, nl));
8673         PetscCall(VecRestoreArrayRead(vp[0], &arrayv));
8674         PetscCall(MatDenseRestoreArray(coarsedivudotp, &arraym));
8675         PetscCall(VecDestroy(&vp[0]));
8676       } else {
8677         PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, 0, 0, 1, NULL, &coarsedivudotp));
8678       }
8679     }
8680   } else {
8681     if (ncoarse != size) PetscCall(PCBDDCMatISSubassemble(t_coarse_mat_is, pcbddc->coarse_subassembling, 0, restr, full_restr, PETSC_FALSE, &coarse_mat_is, 0, NULL, 0, NULL));
8682     else {
8683       PetscCall(PetscObjectReference((PetscObject)t_coarse_mat_is));
8684       coarse_mat_is = t_coarse_mat_is;
8685     }
8686   }
8687   if (coarse_mat_is || coarse_mat) {
8688     if (!multilevel_allowed) {
8689       PetscCall(MatConvert(coarse_mat_is, MATAIJ, coarse_mat_reuse, &coarse_mat));
8690     } else {
8691       /* if this matrix is present, it means we are not reusing the coarse matrix */
8692       if (coarse_mat_is) {
8693         PetscCheck(!coarse_mat, PetscObjectComm((PetscObject)coarse_mat_is), PETSC_ERR_PLIB, "This should not happen");
8694         PetscCall(PetscObjectReference((PetscObject)coarse_mat_is));
8695         coarse_mat = coarse_mat_is;
8696       }
8697     }
8698   }
8699   PetscCall(MatDestroy(&t_coarse_mat_is));
8700   PetscCall(MatDestroy(&coarse_mat_is));
8701 
8702   /* create local to global scatters for coarse problem */
8703   if (compute_vecs) {
8704     PetscInt lrows;
8705     PetscCall(VecDestroy(&pcbddc->coarse_vec));
8706     if (coarse_mat) {
8707       PetscCall(MatGetLocalSize(coarse_mat, &lrows, NULL));
8708     } else {
8709       lrows = 0;
8710     }
8711     PetscCall(VecCreate(PetscObjectComm((PetscObject)pc), &pcbddc->coarse_vec));
8712     PetscCall(VecSetSizes(pcbddc->coarse_vec, lrows, PETSC_DECIDE));
8713     PetscCall(VecSetType(pcbddc->coarse_vec, coarse_mat ? coarse_mat->defaultvectype : VECSTANDARD));
8714     PetscCall(VecScatterDestroy(&pcbddc->coarse_loc_to_glob));
8715     PetscCall(VecScatterCreate(pcbddc->vec1_P, NULL, pcbddc->coarse_vec, coarse_is, &pcbddc->coarse_loc_to_glob));
8716   }
8717   PetscCall(ISDestroy(&coarse_is));
8718 
8719   /* set defaults for coarse KSP and PC */
8720   if (multilevel_allowed) {
8721     coarse_ksp_type = KSPRICHARDSON;
8722     coarse_pc_type  = PCBDDC;
8723   } else {
8724     coarse_ksp_type = KSPPREONLY;
8725     coarse_pc_type  = PCREDUNDANT;
8726   }
8727 
8728   /* print some info if requested */
8729   if (pcbddc->dbg_flag) {
8730     if (!multilevel_allowed) {
8731       PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n"));
8732       if (multilevel_requested) {
8733         PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Not enough active processes on level %" PetscInt_FMT " (active processes %" PetscInt_FMT ", coarsening ratio %" PetscInt_FMT ")\n", pcbddc->current_level, active_procs, coarsening_ratio));
8734       } else if (pcbddc->max_levels) {
8735         PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Maximum number of requested levels reached (%" PetscInt_FMT ")\n", pcbddc->max_levels));
8736       }
8737       PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
8738     }
8739   }
8740 
8741   /* communicate coarse discrete gradient */
8742   coarseG = NULL;
8743   if (pcbddc->nedcG && multilevel_allowed) {
8744     MPI_Comm ccomm;
8745     if (coarse_mat) {
8746       ccomm = PetscObjectComm((PetscObject)coarse_mat);
8747     } else {
8748       ccomm = MPI_COMM_NULL;
8749     }
8750     PetscCall(MatMPIAIJRestrict(pcbddc->nedcG, ccomm, &coarseG));
8751   }
8752 
8753   /* create the coarse KSP object only once with defaults */
8754   if (coarse_mat) {
8755     PetscBool   isredundant, isbddc, force, valid;
8756     PetscViewer dbg_viewer = NULL;
8757     PetscBool   isset, issym, isher, isspd;
8758 
8759     if (pcbddc->dbg_flag) {
8760       dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat));
8761       PetscCall(PetscViewerASCIIAddTab(dbg_viewer, 2 * pcbddc->current_level));
8762     }
8763     if (!pcbddc->coarse_ksp) {
8764       char   prefix[256], str_level[16];
8765       size_t len;
8766 
8767       PetscCall(KSPCreate(PetscObjectComm((PetscObject)coarse_mat), &pcbddc->coarse_ksp));
8768       PetscCall(KSPSetNestLevel(pcbddc->coarse_ksp, pc->kspnestlevel));
8769       PetscCall(KSPSetErrorIfNotConverged(pcbddc->coarse_ksp, pc->erroriffailure));
8770       PetscCall(PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp, (PetscObject)pc, 1));
8771       PetscCall(KSPSetTolerances(pcbddc->coarse_ksp, PETSC_CURRENT, PETSC_CURRENT, PETSC_CURRENT, 1));
8772       PetscCall(KSPSetOperators(pcbddc->coarse_ksp, coarse_mat, coarse_mat));
8773       PetscCall(KSPSetType(pcbddc->coarse_ksp, coarse_ksp_type));
8774       PetscCall(KSPSetNormType(pcbddc->coarse_ksp, KSP_NORM_NONE));
8775       PetscCall(KSPGetPC(pcbddc->coarse_ksp, &pc_temp));
8776       /* TODO is this logic correct? should check for coarse_mat type */
8777       PetscCall(PCSetType(pc_temp, coarse_pc_type));
8778       /* prefix */
8779       PetscCall(PetscStrncpy(prefix, "", sizeof(prefix)));
8780       PetscCall(PetscStrncpy(str_level, "", sizeof(str_level)));
8781       if (!pcbddc->current_level) {
8782         PetscCall(PetscStrncpy(prefix, ((PetscObject)pc)->prefix, sizeof(prefix)));
8783         PetscCall(PetscStrlcat(prefix, "pc_bddc_coarse_", sizeof(prefix)));
8784       } else {
8785         PetscCall(PetscStrlen(((PetscObject)pc)->prefix, &len));
8786         if (pcbddc->current_level > 1) len -= 3;  /* remove "lX_" with X level number */
8787         if (pcbddc->current_level > 10) len -= 1; /* remove another char from level number */
8788         /* Nonstandard use of PetscStrncpy() to copy only a portion of the string */
8789         PetscCall(PetscStrncpy(prefix, ((PetscObject)pc)->prefix, len + 1));
8790         PetscCall(PetscSNPrintf(str_level, sizeof(str_level), "l%d_", (int)pcbddc->current_level));
8791         PetscCall(PetscStrlcat(prefix, str_level, sizeof(prefix)));
8792       }
8793       PetscCall(KSPSetOptionsPrefix(pcbddc->coarse_ksp, prefix));
8794       /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
8795       PetscCall(PCBDDCSetLevel(pc_temp, pcbddc->current_level + 1));
8796       PetscCall(PCBDDCSetCoarseningRatio(pc_temp, pcbddc->coarsening_ratio));
8797       PetscCall(PCBDDCSetLevels(pc_temp, pcbddc->max_levels));
8798       /* allow user customization */
8799       PetscCall(KSPSetFromOptions(pcbddc->coarse_ksp));
8800       /* get some info after set from options */
8801       PetscCall(KSPGetPC(pcbddc->coarse_ksp, &pc_temp));
8802       /* multilevel cannot be done with coarse PC different from BDDC, NN, HPDDM, unless forced to */
8803       force = PETSC_FALSE;
8804       PetscCall(PetscOptionsGetBool(NULL, ((PetscObject)pc_temp)->prefix, "-pc_type_forced", &force, NULL));
8805       PetscCall(PetscObjectTypeCompareAny((PetscObject)pc_temp, &valid, PCBDDC, PCNN, PCHPDDM, ""));
8806       PetscCall(PetscObjectTypeCompare((PetscObject)pc_temp, PCBDDC, &isbddc));
8807       if (multilevel_allowed && !force && !valid) {
8808         isbddc = PETSC_TRUE;
8809         PetscCall(PCSetType(pc_temp, PCBDDC));
8810         PetscCall(PCBDDCSetLevel(pc_temp, pcbddc->current_level + 1));
8811         PetscCall(PCBDDCSetCoarseningRatio(pc_temp, pcbddc->coarsening_ratio));
8812         PetscCall(PCBDDCSetLevels(pc_temp, pcbddc->max_levels));
8813         if (pc_temp->ops->setfromoptions) { /* need to setfromoptions again, skipping the pc_type */
8814           PetscObjectOptionsBegin((PetscObject)pc_temp);
8815           PetscCall((*pc_temp->ops->setfromoptions)(pc_temp, PetscOptionsObject));
8816           PetscCall(PetscObjectProcessOptionsHandlers((PetscObject)pc_temp, PetscOptionsObject));
8817           PetscOptionsEnd();
8818           pc_temp->setfromoptionscalled++;
8819         }
8820       }
8821     }
8822     /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
8823     PetscCall(KSPGetPC(pcbddc->coarse_ksp, &pc_temp));
8824     if (nisdofs) {
8825       PetscCall(PCBDDCSetDofsSplitting(pc_temp, nisdofs, isarray));
8826       for (i = 0; i < nisdofs; i++) PetscCall(ISDestroy(&isarray[i]));
8827     }
8828     if (nisneu) {
8829       PetscCall(PCBDDCSetNeumannBoundaries(pc_temp, isarray[nisdofs]));
8830       PetscCall(ISDestroy(&isarray[nisdofs]));
8831     }
8832     if (nisvert) {
8833       PetscCall(PCBDDCSetPrimalVerticesIS(pc_temp, isarray[nis - 1]));
8834       PetscCall(ISDestroy(&isarray[nis - 1]));
8835     }
8836     if (coarseG) PetscCall(PCBDDCSetDiscreteGradient(pc_temp, coarseG, 1, nedcfield, PETSC_FALSE, PETSC_TRUE));
8837 
8838     /* get some info after set from options */
8839     PetscCall(PetscObjectTypeCompare((PetscObject)pc_temp, PCBDDC, &isbddc));
8840 
8841     /* multilevel can only be requested via -pc_bddc_levels or PCBDDCSetLevels */
8842     if (isbddc && !multilevel_allowed) PetscCall(PCSetType(pc_temp, coarse_pc_type));
8843     /* multilevel cannot be done with coarse PC different from BDDC, NN, HPDDM, unless forced to */
8844     force = PETSC_FALSE;
8845     PetscCall(PetscOptionsGetBool(NULL, ((PetscObject)pc_temp)->prefix, "-pc_type_forced", &force, NULL));
8846     PetscCall(PetscObjectTypeCompareAny((PetscObject)pc_temp, &valid, PCBDDC, PCNN, PCHPDDM, ""));
8847     if (multilevel_requested && multilevel_allowed && !valid && !force) PetscCall(PCSetType(pc_temp, PCBDDC));
8848     PetscCall(PetscObjectTypeCompare((PetscObject)pc_temp, PCREDUNDANT, &isredundant));
8849     if (isredundant) {
8850       KSP inner_ksp;
8851       PC  inner_pc;
8852 
8853       PetscCall(PCRedundantGetKSP(pc_temp, &inner_ksp));
8854       PetscCall(KSPGetPC(inner_ksp, &inner_pc));
8855     }
8856 
8857     /* parameters which miss an API */
8858     PetscCall(PetscObjectTypeCompare((PetscObject)pc_temp, PCBDDC, &isbddc));
8859     if (isbddc) {
8860       PC_BDDC *pcbddc_coarse = (PC_BDDC *)pc_temp->data;
8861 
8862       pcbddc_coarse->detect_disconnected = PETSC_TRUE;
8863       pcbddc_coarse->coarse_eqs_per_proc = pcbddc->coarse_eqs_per_proc;
8864       pcbddc_coarse->coarse_eqs_limit    = pcbddc->coarse_eqs_limit;
8865       pcbddc_coarse->benign_saddle_point = pcbddc->benign_have_null;
8866       if (pcbddc_coarse->benign_saddle_point) {
8867         Mat                    coarsedivudotp_is;
8868         ISLocalToGlobalMapping l2gmap, rl2g, cl2g;
8869         IS                     row, col;
8870         const PetscInt        *gidxs;
8871         PetscInt               n, st, M, N;
8872 
8873         PetscCall(MatGetSize(coarsedivudotp, &n, NULL));
8874         PetscCallMPI(MPI_Scan(&n, &st, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)coarse_mat)));
8875         st = st - n;
8876         PetscCall(ISCreateStride(PetscObjectComm((PetscObject)coarse_mat), 1, st, 1, &row));
8877         PetscCall(MatISGetLocalToGlobalMapping(coarse_mat, &l2gmap, NULL));
8878         PetscCall(ISLocalToGlobalMappingGetSize(l2gmap, &n));
8879         PetscCall(ISLocalToGlobalMappingGetIndices(l2gmap, &gidxs));
8880         PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)coarse_mat), n, gidxs, PETSC_COPY_VALUES, &col));
8881         PetscCall(ISLocalToGlobalMappingRestoreIndices(l2gmap, &gidxs));
8882         PetscCall(ISLocalToGlobalMappingCreateIS(row, &rl2g));
8883         PetscCall(ISLocalToGlobalMappingCreateIS(col, &cl2g));
8884         PetscCall(ISGetSize(row, &M));
8885         PetscCall(MatGetSize(coarse_mat, &N, NULL));
8886         PetscCall(ISDestroy(&row));
8887         PetscCall(ISDestroy(&col));
8888         PetscCall(MatCreate(PetscObjectComm((PetscObject)coarse_mat), &coarsedivudotp_is));
8889         PetscCall(MatSetType(coarsedivudotp_is, MATIS));
8890         PetscCall(MatSetSizes(coarsedivudotp_is, PETSC_DECIDE, PETSC_DECIDE, M, N));
8891         PetscCall(MatSetLocalToGlobalMapping(coarsedivudotp_is, rl2g, cl2g));
8892         PetscCall(ISLocalToGlobalMappingDestroy(&rl2g));
8893         PetscCall(ISLocalToGlobalMappingDestroy(&cl2g));
8894         PetscCall(MatISSetLocalMat(coarsedivudotp_is, coarsedivudotp));
8895         PetscCall(MatDestroy(&coarsedivudotp));
8896         PetscCall(PCBDDCSetDivergenceMat(pc_temp, coarsedivudotp_is, PETSC_FALSE, NULL));
8897         PetscCall(MatDestroy(&coarsedivudotp_is));
8898         pcbddc_coarse->adaptive_userdefined = PETSC_TRUE;
8899         if (pcbddc->adaptive_threshold[0] == 0.0) pcbddc_coarse->deluxe_zerorows = PETSC_TRUE;
8900       }
8901     }
8902 
8903     /* propagate symmetry info of coarse matrix */
8904     PetscCall(MatSetOption(coarse_mat, MAT_STRUCTURALLY_SYMMETRIC, PETSC_TRUE));
8905     PetscCall(MatIsSymmetricKnown(pc->pmat, &isset, &issym));
8906     if (isset) PetscCall(MatSetOption(coarse_mat, MAT_SYMMETRIC, issym));
8907     PetscCall(MatIsHermitianKnown(pc->pmat, &isset, &isher));
8908     if (isset) PetscCall(MatSetOption(coarse_mat, MAT_HERMITIAN, isher));
8909     PetscCall(MatIsSPDKnown(pc->pmat, &isset, &isspd));
8910     if (isset) PetscCall(MatSetOption(coarse_mat, MAT_SPD, isspd));
8911 
8912     if (pcbddc->benign_saddle_point && !pcbddc->benign_have_null) PetscCall(MatSetOption(coarse_mat, MAT_SPD, PETSC_TRUE));
8913     /* set operators */
8914     PetscCall(MatViewFromOptions(coarse_mat, (PetscObject)pc, "-pc_bddc_coarse_mat_view"));
8915     PetscCall(MatSetOptionsPrefix(coarse_mat, ((PetscObject)pcbddc->coarse_ksp)->prefix));
8916     PetscCall(KSPSetOperators(pcbddc->coarse_ksp, coarse_mat, coarse_mat));
8917     if (pcbddc->dbg_flag) PetscCall(PetscViewerASCIISubtractTab(dbg_viewer, 2 * pcbddc->current_level));
8918   }
8919   PetscCall(MatDestroy(&coarseG));
8920   PetscCall(PetscFree(isarray));
8921 #if 0
8922   {
8923     PetscViewer viewer;
8924     char filename[256];
8925     PetscCall(PetscSNPrintf(filename, PETSC_STATIC_ARRAY_LENGTH(filename), "coarse_mat_level%d.m",pcbddc->current_level));
8926     PetscCall(PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer));
8927     PetscCall(PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB));
8928     PetscCall(MatView(coarse_mat,viewer));
8929     PetscCall(PetscViewerPopFormat(viewer));
8930     PetscCall(PetscViewerDestroy(&viewer));
8931   }
8932 #endif
8933 
8934   if (corners) {
8935     Vec             gv;
8936     IS              is;
8937     const PetscInt *idxs;
8938     PetscInt        i, d, N, n, cdim = pcbddc->mat_graph->cdim;
8939     PetscScalar    *coords;
8940 
8941     PetscCheck(pcbddc->mat_graph->cloc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing local coordinates");
8942     PetscCall(VecGetSize(pcbddc->coarse_vec, &N));
8943     PetscCall(VecGetLocalSize(pcbddc->coarse_vec, &n));
8944     PetscCall(VecCreate(PetscObjectComm((PetscObject)pcbddc->coarse_vec), &gv));
8945     PetscCall(VecSetBlockSize(gv, cdim));
8946     PetscCall(VecSetSizes(gv, n * cdim, N * cdim));
8947     PetscCall(VecSetType(gv, VECSTANDARD));
8948     PetscCall(VecSetFromOptions(gv));
8949     PetscCall(VecSet(gv, PETSC_MAX_REAL)); /* we only propagate coordinates from vertices constraints */
8950 
8951     PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &is));
8952     PetscCall(ISGetLocalSize(is, &n));
8953     PetscCall(ISGetIndices(is, &idxs));
8954     PetscCall(PetscMalloc1(n * cdim, &coords));
8955     for (i = 0; i < n; i++) {
8956       for (d = 0; d < cdim; d++) coords[cdim * i + d] = pcbddc->mat_graph->coords[cdim * idxs[i] + d];
8957     }
8958     PetscCall(ISRestoreIndices(is, &idxs));
8959     PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &is));
8960 
8961     PetscCall(ISGetLocalSize(corners, &n));
8962     PetscCall(ISGetIndices(corners, &idxs));
8963     PetscCall(VecSetValuesBlocked(gv, n, idxs, coords, INSERT_VALUES));
8964     PetscCall(ISRestoreIndices(corners, &idxs));
8965     PetscCall(PetscFree(coords));
8966     PetscCall(VecAssemblyBegin(gv));
8967     PetscCall(VecAssemblyEnd(gv));
8968     PetscCall(VecGetArray(gv, &coords));
8969     if (pcbddc->coarse_ksp) {
8970       PC        coarse_pc;
8971       PetscBool isbddc;
8972 
8973       PetscCall(KSPGetPC(pcbddc->coarse_ksp, &coarse_pc));
8974       PetscCall(PetscObjectTypeCompare((PetscObject)coarse_pc, PCBDDC, &isbddc));
8975       if (isbddc) { /* coarse coordinates have PETSC_MAX_REAL, specific for BDDC */
8976         PetscReal *realcoords;
8977 
8978         PetscCall(VecGetLocalSize(gv, &n));
8979 #if defined(PETSC_USE_COMPLEX)
8980         PetscCall(PetscMalloc1(n, &realcoords));
8981         for (i = 0; i < n; i++) realcoords[i] = PetscRealPart(coords[i]);
8982 #else
8983         realcoords = coords;
8984 #endif
8985         PetscCall(PCSetCoordinates(coarse_pc, cdim, n / cdim, realcoords));
8986 #if defined(PETSC_USE_COMPLEX)
8987         PetscCall(PetscFree(realcoords));
8988 #endif
8989       }
8990     }
8991     PetscCall(VecRestoreArray(gv, &coords));
8992     PetscCall(VecDestroy(&gv));
8993   }
8994   PetscCall(ISDestroy(&corners));
8995 
8996   if (pcbddc->coarse_ksp) {
8997     Vec crhs, csol;
8998 
8999     PetscCall(KSPGetSolution(pcbddc->coarse_ksp, &csol));
9000     PetscCall(KSPGetRhs(pcbddc->coarse_ksp, &crhs));
9001     if (!csol) PetscCall(MatCreateVecs(coarse_mat, &pcbddc->coarse_ksp->vec_sol, NULL));
9002     if (!crhs) PetscCall(MatCreateVecs(coarse_mat, NULL, &pcbddc->coarse_ksp->vec_rhs));
9003   }
9004   PetscCall(MatDestroy(&coarsedivudotp));
9005 
9006   /* compute null space for coarse solver if the benign trick has been requested */
9007   if (pcbddc->benign_null) {
9008     PetscCall(VecSet(pcbddc->vec1_P, 0.));
9009     for (i = 0; i < pcbddc->benign_n; i++) PetscCall(VecSetValue(pcbddc->vec1_P, pcbddc->local_primal_size - pcbddc->benign_n + i, 1.0, INSERT_VALUES));
9010     PetscCall(VecAssemblyBegin(pcbddc->vec1_P));
9011     PetscCall(VecAssemblyEnd(pcbddc->vec1_P));
9012     PetscCall(VecScatterBegin(pcbddc->coarse_loc_to_glob, pcbddc->vec1_P, pcbddc->coarse_vec, INSERT_VALUES, SCATTER_FORWARD));
9013     PetscCall(VecScatterEnd(pcbddc->coarse_loc_to_glob, pcbddc->vec1_P, pcbddc->coarse_vec, INSERT_VALUES, SCATTER_FORWARD));
9014     if (coarse_mat) {
9015       Vec          nullv;
9016       PetscScalar *array, *array2;
9017       PetscInt     nl;
9018 
9019       PetscCall(MatCreateVecs(coarse_mat, &nullv, NULL));
9020       PetscCall(VecGetLocalSize(nullv, &nl));
9021       PetscCall(VecGetArrayRead(pcbddc->coarse_vec, (const PetscScalar **)&array));
9022       PetscCall(VecGetArray(nullv, &array2));
9023       PetscCall(PetscArraycpy(array2, array, nl));
9024       PetscCall(VecRestoreArray(nullv, &array2));
9025       PetscCall(VecRestoreArrayRead(pcbddc->coarse_vec, (const PetscScalar **)&array));
9026       PetscCall(VecNormalize(nullv, NULL));
9027       PetscCall(MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat), PETSC_FALSE, 1, &nullv, &CoarseNullSpace));
9028       PetscCall(VecDestroy(&nullv));
9029     }
9030   }
9031   PetscCall(PetscLogEventEnd(PC_BDDC_CoarseSetUp[pcbddc->current_level], pc, 0, 0, 0));
9032 
9033   PetscCall(PetscLogEventBegin(PC_BDDC_CoarseSolver[pcbddc->current_level], pc, 0, 0, 0));
9034   if (pcbddc->coarse_ksp) {
9035     PetscBool ispreonly;
9036 
9037     if (CoarseNullSpace) {
9038       PetscBool isnull;
9039 
9040       PetscCall(MatNullSpaceTest(CoarseNullSpace, coarse_mat, &isnull));
9041       if (isnull) PetscCall(MatSetNullSpace(coarse_mat, CoarseNullSpace));
9042       /* TODO: add local nullspaces (if any) */
9043     }
9044     /* setup coarse ksp */
9045     PetscCall(KSPSetUp(pcbddc->coarse_ksp));
9046     /* Check coarse problem if in debug mode or if solving with an iterative method */
9047     PetscCall(PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp, KSPPREONLY, &ispreonly));
9048     if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates)) {
9049       KSP         check_ksp;
9050       KSPType     check_ksp_type;
9051       PC          check_pc;
9052       Vec         check_vec, coarse_vec;
9053       PetscReal   abs_infty_error, infty_error, lambda_min = 1.0, lambda_max = 1.0;
9054       PetscInt    its;
9055       PetscBool   compute_eigs;
9056       PetscReal  *eigs_r, *eigs_c;
9057       PetscInt    neigs;
9058       const char *prefix;
9059 
9060       /* Create ksp object suitable for estimation of extreme eigenvalues */
9061       PetscCall(KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp), &check_ksp));
9062       PetscCall(KSPSetNestLevel(check_ksp, pc->kspnestlevel));
9063       PetscCall(PetscObjectIncrementTabLevel((PetscObject)check_ksp, (PetscObject)pcbddc->coarse_ksp, 0));
9064       PetscCall(KSPSetErrorIfNotConverged(pcbddc->coarse_ksp, PETSC_FALSE));
9065       PetscCall(KSPSetOperators(check_ksp, coarse_mat, coarse_mat));
9066       PetscCall(KSPSetTolerances(check_ksp, 1.e-12, 1.e-12, PETSC_CURRENT, pcbddc->coarse_size));
9067       /* prevent from setup unneeded object */
9068       PetscCall(KSPGetPC(check_ksp, &check_pc));
9069       PetscCall(PCSetType(check_pc, PCNONE));
9070       if (ispreonly) {
9071         check_ksp_type = KSPPREONLY;
9072         compute_eigs   = PETSC_FALSE;
9073       } else {
9074         check_ksp_type = KSPGMRES;
9075         compute_eigs   = PETSC_TRUE;
9076       }
9077       PetscCall(KSPSetType(check_ksp, check_ksp_type));
9078       PetscCall(KSPSetComputeSingularValues(check_ksp, compute_eigs));
9079       PetscCall(KSPSetComputeEigenvalues(check_ksp, compute_eigs));
9080       PetscCall(KSPGMRESSetRestart(check_ksp, pcbddc->coarse_size + 1));
9081       PetscCall(KSPGetOptionsPrefix(pcbddc->coarse_ksp, &prefix));
9082       PetscCall(KSPSetOptionsPrefix(check_ksp, prefix));
9083       PetscCall(KSPAppendOptionsPrefix(check_ksp, "check_"));
9084       PetscCall(KSPSetFromOptions(check_ksp));
9085       PetscCall(KSPSetUp(check_ksp));
9086       PetscCall(KSPGetPC(pcbddc->coarse_ksp, &check_pc));
9087       PetscCall(KSPSetPC(check_ksp, check_pc));
9088       /* create random vec */
9089       PetscCall(MatCreateVecs(coarse_mat, &coarse_vec, &check_vec));
9090       PetscCall(VecSetRandom(check_vec, NULL));
9091       PetscCall(MatMult(coarse_mat, check_vec, coarse_vec));
9092       /* solve coarse problem */
9093       PetscCall(KSPSolve(check_ksp, coarse_vec, coarse_vec));
9094       PetscCall(KSPCheckSolve(check_ksp, pc, coarse_vec));
9095       /* set eigenvalue estimation if preonly has not been requested */
9096       if (compute_eigs) {
9097         PetscCall(PetscMalloc1(pcbddc->coarse_size + 1, &eigs_r));
9098         PetscCall(PetscMalloc1(pcbddc->coarse_size + 1, &eigs_c));
9099         PetscCall(KSPComputeEigenvalues(check_ksp, pcbddc->coarse_size + 1, eigs_r, eigs_c, &neigs));
9100         if (neigs) {
9101           lambda_max = eigs_r[neigs - 1];
9102           lambda_min = eigs_r[0];
9103           if (pcbddc->use_coarse_estimates) {
9104             if (lambda_max >= lambda_min) { /* using PETSC_SMALL since lambda_max == lambda_min is not allowed by KSPChebyshevSetEigenvalues */
9105               PetscCall(KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp, lambda_max + PETSC_SMALL, lambda_min));
9106               PetscCall(KSPRichardsonSetScale(pcbddc->coarse_ksp, 2.0 / (lambda_max + lambda_min)));
9107             }
9108           }
9109         }
9110       }
9111 
9112       /* check coarse problem residual error */
9113       if (pcbddc->dbg_flag) {
9114         PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp));
9115         PetscCall(PetscViewerASCIIAddTab(dbg_viewer, 2 * (pcbddc->current_level + 1)));
9116         PetscCall(VecAXPY(check_vec, -1.0, coarse_vec));
9117         PetscCall(VecNorm(check_vec, NORM_INFINITY, &infty_error));
9118         PetscCall(MatMult(coarse_mat, check_vec, coarse_vec));
9119         PetscCall(VecNorm(coarse_vec, NORM_INFINITY, &abs_infty_error));
9120         PetscCall(PetscViewerASCIIPrintf(dbg_viewer, "Coarse problem details (use estimates %d)\n", pcbddc->use_coarse_estimates));
9121         PetscCall(PetscObjectPrintClassNamePrefixType((PetscObject)pcbddc->coarse_ksp, dbg_viewer));
9122         PetscCall(PetscObjectPrintClassNamePrefixType((PetscObject)check_pc, dbg_viewer));
9123         PetscCall(PetscViewerASCIIPrintf(dbg_viewer, "Coarse problem exact infty_error   : %1.6e\n", (double)infty_error));
9124         PetscCall(PetscViewerASCIIPrintf(dbg_viewer, "Coarse problem residual infty_error: %1.6e\n", (double)abs_infty_error));
9125         if (CoarseNullSpace) PetscCall(PetscViewerASCIIPrintf(dbg_viewer, "Coarse problem is singular\n"));
9126         if (compute_eigs) {
9127           PetscReal          lambda_max_s, lambda_min_s;
9128           KSPConvergedReason reason;
9129           PetscCall(KSPGetType(check_ksp, &check_ksp_type));
9130           PetscCall(KSPGetIterationNumber(check_ksp, &its));
9131           PetscCall(KSPGetConvergedReason(check_ksp, &reason));
9132           PetscCall(KSPComputeExtremeSingularValues(check_ksp, &lambda_max_s, &lambda_min_s));
9133           PetscCall(PetscViewerASCIIPrintf(dbg_viewer, "Coarse problem eigenvalues (estimated with %" PetscInt_FMT " iterations of %s, conv reason %d): %1.6e %1.6e (%1.6e %1.6e)\n", its, check_ksp_type, reason, (double)lambda_min, (double)lambda_max, (double)lambda_min_s, (double)lambda_max_s));
9134           for (i = 0; i < neigs; i++) PetscCall(PetscViewerASCIIPrintf(dbg_viewer, "%1.6e %1.6ei\n", (double)eigs_r[i], (double)eigs_c[i]));
9135         }
9136         PetscCall(PetscViewerFlush(dbg_viewer));
9137         PetscCall(PetscViewerASCIISubtractTab(dbg_viewer, 2 * (pcbddc->current_level + 1)));
9138       }
9139       PetscCall(VecDestroy(&check_vec));
9140       PetscCall(VecDestroy(&coarse_vec));
9141       PetscCall(KSPDestroy(&check_ksp));
9142       if (compute_eigs) {
9143         PetscCall(PetscFree(eigs_r));
9144         PetscCall(PetscFree(eigs_c));
9145       }
9146     }
9147   }
9148   PetscCall(MatNullSpaceDestroy(&CoarseNullSpace));
9149   /* print additional info */
9150   if (pcbddc->dbg_flag) {
9151     /* waits until all processes reaches this point */
9152     PetscCall(PetscBarrier((PetscObject)pc));
9153     PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Coarse solver setup completed at level %" PetscInt_FMT "\n", pcbddc->current_level));
9154     PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
9155   }
9156 
9157   /* free memory */
9158   PetscCall(MatDestroy(&coarse_mat));
9159   PetscCall(PetscLogEventEnd(PC_BDDC_CoarseSolver[pcbddc->current_level], pc, 0, 0, 0));
9160   PetscFunctionReturn(PETSC_SUCCESS);
9161 }
9162 
9163 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc, PetscInt *coarse_size_n, PetscInt **local_primal_indices_n)
9164 {
9165   PC_BDDC        *pcbddc = (PC_BDDC *)pc->data;
9166   PC_IS          *pcis   = (PC_IS *)pc->data;
9167   IS              subset, subset_mult, subset_n;
9168   PetscInt        local_size, coarse_size = 0;
9169   PetscInt       *local_primal_indices = NULL;
9170   const PetscInt *t_local_primal_indices;
9171 
9172   PetscFunctionBegin;
9173   /* Compute global number of coarse dofs */
9174   PetscCheck(!pcbddc->local_primal_size || pcbddc->local_primal_ref_node, PETSC_COMM_SELF, PETSC_ERR_PLIB, "BDDC ConstraintsSetUp should be called first");
9175   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc->pmat), pcbddc->local_primal_size_cc, pcbddc->local_primal_ref_node, PETSC_COPY_VALUES, &subset_n));
9176   PetscCall(ISLocalToGlobalMappingApplyIS(pcis->mapping, subset_n, &subset));
9177   PetscCall(ISDestroy(&subset_n));
9178   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc->pmat), pcbddc->local_primal_size_cc, pcbddc->local_primal_ref_mult, PETSC_COPY_VALUES, &subset_mult));
9179   PetscCall(ISRenumber(subset, subset_mult, &coarse_size, &subset_n));
9180   PetscCall(ISDestroy(&subset));
9181   PetscCall(ISDestroy(&subset_mult));
9182   PetscCall(ISGetLocalSize(subset_n, &local_size));
9183   PetscCheck(local_size == pcbddc->local_primal_size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Invalid number of local primal indices computed %" PetscInt_FMT " != %" PetscInt_FMT, local_size, pcbddc->local_primal_size);
9184   PetscCall(PetscMalloc1(local_size, &local_primal_indices));
9185   PetscCall(ISGetIndices(subset_n, &t_local_primal_indices));
9186   PetscCall(PetscArraycpy(local_primal_indices, t_local_primal_indices, local_size));
9187   PetscCall(ISRestoreIndices(subset_n, &t_local_primal_indices));
9188   PetscCall(ISDestroy(&subset_n));
9189 
9190   if (pcbddc->dbg_flag) {
9191     PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
9192     PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n"));
9193     PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Size of coarse problem is %" PetscInt_FMT "\n", coarse_size));
9194     PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
9195   }
9196 
9197   /* get back data */
9198   *coarse_size_n          = coarse_size;
9199   *local_primal_indices_n = local_primal_indices;
9200   PetscFunctionReturn(PETSC_SUCCESS);
9201 }
9202 
9203 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx, Vec gwork, Vec lwork, IS globalis, IS *localis)
9204 {
9205   IS           localis_t;
9206   PetscInt     i, lsize, *idxs, n;
9207   PetscScalar *vals;
9208 
9209   PetscFunctionBegin;
9210   /* get indices in local ordering exploiting local to global map */
9211   PetscCall(ISGetLocalSize(globalis, &lsize));
9212   PetscCall(PetscMalloc1(lsize, &vals));
9213   for (i = 0; i < lsize; i++) vals[i] = 1.0;
9214   PetscCall(ISGetIndices(globalis, (const PetscInt **)&idxs));
9215   PetscCall(VecSet(gwork, 0.0));
9216   PetscCall(VecSet(lwork, 0.0));
9217   if (idxs) { /* multilevel guard */
9218     PetscCall(VecSetOption(gwork, VEC_IGNORE_NEGATIVE_INDICES, PETSC_TRUE));
9219     PetscCall(VecSetValues(gwork, lsize, idxs, vals, INSERT_VALUES));
9220   }
9221   PetscCall(VecAssemblyBegin(gwork));
9222   PetscCall(ISRestoreIndices(globalis, (const PetscInt **)&idxs));
9223   PetscCall(PetscFree(vals));
9224   PetscCall(VecAssemblyEnd(gwork));
9225   /* now compute set in local ordering */
9226   PetscCall(VecScatterBegin(g2l_ctx, gwork, lwork, INSERT_VALUES, SCATTER_FORWARD));
9227   PetscCall(VecScatterEnd(g2l_ctx, gwork, lwork, INSERT_VALUES, SCATTER_FORWARD));
9228   PetscCall(VecGetArrayRead(lwork, (const PetscScalar **)&vals));
9229   PetscCall(VecGetSize(lwork, &n));
9230   for (i = 0, lsize = 0; i < n; i++) {
9231     if (PetscRealPart(vals[i]) > 0.5) lsize++;
9232   }
9233   PetscCall(PetscMalloc1(lsize, &idxs));
9234   for (i = 0, lsize = 0; i < n; i++) {
9235     if (PetscRealPart(vals[i]) > 0.5) idxs[lsize++] = i;
9236   }
9237   PetscCall(VecRestoreArrayRead(lwork, (const PetscScalar **)&vals));
9238   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)gwork), lsize, idxs, PETSC_OWN_POINTER, &localis_t));
9239   *localis = localis_t;
9240   PetscFunctionReturn(PETSC_SUCCESS);
9241 }
9242 
9243 PetscErrorCode PCBDDCComputeFakeChange(PC pc, PetscBool constraints, PCBDDCGraph graph, PCBDDCSubSchurs schurs, Mat *change, IS *change_primal, IS *change_primal_mult, PetscBool *change_with_qr)
9244 {
9245   PC_IS   *pcis   = (PC_IS *)pc->data;
9246   PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
9247   PC_IS   *pcisf;
9248   PC_BDDC *pcbddcf;
9249   PC       pcf;
9250 
9251   PetscFunctionBegin;
9252   PetscCall(PCCreate(PetscObjectComm((PetscObject)pc), &pcf));
9253   PetscCall(PCSetOperators(pcf, pc->mat, pc->pmat));
9254   PetscCall(PCSetType(pcf, PCBDDC));
9255 
9256   pcisf   = (PC_IS *)pcf->data;
9257   pcbddcf = (PC_BDDC *)pcf->data;
9258 
9259   pcisf->is_B_local = pcis->is_B_local;
9260   pcisf->vec1_N     = pcis->vec1_N;
9261   pcisf->BtoNmap    = pcis->BtoNmap;
9262   pcisf->n          = pcis->n;
9263   pcisf->n_B        = pcis->n_B;
9264 
9265   PetscCall(PetscFree(pcbddcf->mat_graph));
9266   PetscCall(PetscFree(pcbddcf->sub_schurs));
9267   pcbddcf->mat_graph             = graph ? graph : pcbddc->mat_graph;
9268   pcbddcf->sub_schurs            = schurs;
9269   pcbddcf->adaptive_selection    = schurs ? PETSC_TRUE : PETSC_FALSE;
9270   pcbddcf->adaptive_threshold[0] = pcbddc->adaptive_threshold[0];
9271   pcbddcf->adaptive_threshold[1] = pcbddc->adaptive_threshold[1];
9272   pcbddcf->adaptive_nmin         = pcbddc->adaptive_nmin;
9273   pcbddcf->adaptive_nmax         = pcbddc->adaptive_nmax;
9274   pcbddcf->use_faces             = PETSC_TRUE;
9275   pcbddcf->use_change_of_basis   = (PetscBool)!constraints;
9276   pcbddcf->use_change_on_faces   = (PetscBool)!constraints;
9277   pcbddcf->use_qr_single         = (PetscBool)!constraints;
9278   pcbddcf->fake_change           = PETSC_TRUE;
9279   pcbddcf->dbg_flag              = pcbddc->dbg_flag;
9280 
9281   PetscCall(PCBDDCAdaptiveSelection(pcf));
9282   PetscCall(PCBDDCConstraintsSetUp(pcf));
9283 
9284   *change = pcbddcf->ConstraintMatrix;
9285   if (change_primal) PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc->pmat), pcbddcf->local_primal_size_cc, pcbddcf->local_primal_ref_node, PETSC_COPY_VALUES, change_primal));
9286   if (change_primal_mult) PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc->pmat), pcbddcf->local_primal_size_cc, pcbddcf->local_primal_ref_mult, PETSC_COPY_VALUES, change_primal_mult));
9287   if (change_with_qr) *change_with_qr = pcbddcf->use_qr_single;
9288 
9289   if (schurs) pcbddcf->sub_schurs = NULL;
9290   pcbddcf->ConstraintMatrix = NULL;
9291   pcbddcf->mat_graph        = NULL;
9292   pcisf->is_B_local         = NULL;
9293   pcisf->vec1_N             = NULL;
9294   pcisf->BtoNmap            = NULL;
9295   PetscCall(PCDestroy(&pcf));
9296   PetscFunctionReturn(PETSC_SUCCESS);
9297 }
9298 
9299 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc)
9300 {
9301   PC_IS          *pcis       = (PC_IS *)pc->data;
9302   PC_BDDC        *pcbddc     = (PC_BDDC *)pc->data;
9303   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
9304   Mat             S_j;
9305   PetscInt       *used_xadj, *used_adjncy;
9306   PetscBool       free_used_adj;
9307 
9308   PetscFunctionBegin;
9309   PetscCall(PetscLogEventBegin(PC_BDDC_Schurs[pcbddc->current_level], pc, 0, 0, 0));
9310   /* decide the adjacency to be used for determining internal problems for local schur on subsets */
9311   free_used_adj = PETSC_FALSE;
9312   if (pcbddc->sub_schurs_layers == -1) {
9313     used_xadj   = NULL;
9314     used_adjncy = NULL;
9315   } else {
9316     if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) {
9317       used_xadj   = pcbddc->mat_graph->xadj;
9318       used_adjncy = pcbddc->mat_graph->adjncy;
9319     } else if (pcbddc->computed_rowadj) {
9320       used_xadj   = pcbddc->mat_graph->xadj;
9321       used_adjncy = pcbddc->mat_graph->adjncy;
9322     } else {
9323       PetscBool       flg_row = PETSC_FALSE;
9324       const PetscInt *xadj, *adjncy;
9325       PetscInt        nvtxs;
9326 
9327       PetscCall(MatGetRowIJ(pcbddc->local_mat, 0, PETSC_TRUE, PETSC_FALSE, &nvtxs, &xadj, &adjncy, &flg_row));
9328       if (flg_row) {
9329         PetscCall(PetscMalloc2(nvtxs + 1, &used_xadj, xadj[nvtxs], &used_adjncy));
9330         PetscCall(PetscArraycpy(used_xadj, xadj, nvtxs + 1));
9331         PetscCall(PetscArraycpy(used_adjncy, adjncy, xadj[nvtxs]));
9332         free_used_adj = PETSC_TRUE;
9333       } else {
9334         pcbddc->sub_schurs_layers = -1;
9335         used_xadj                 = NULL;
9336         used_adjncy               = NULL;
9337       }
9338       PetscCall(MatRestoreRowIJ(pcbddc->local_mat, 0, PETSC_TRUE, PETSC_FALSE, &nvtxs, &xadj, &adjncy, &flg_row));
9339     }
9340   }
9341 
9342   /* setup sub_schurs data */
9343   PetscCall(MatCreateSchurComplement(pcis->A_II, pcis->pA_II, pcis->A_IB, pcis->A_BI, pcis->A_BB, &S_j));
9344   if (!sub_schurs->schur_explicit) {
9345     /* pcbddc->ksp_D up to date only if not using MatFactor with Schur complement support */
9346     PetscCall(MatSchurComplementSetKSP(S_j, pcbddc->ksp_D));
9347     PetscCall(PCBDDCSubSchursSetUp(sub_schurs, NULL, S_j, PETSC_FALSE, used_xadj, used_adjncy, pcbddc->sub_schurs_layers, NULL, pcbddc->adaptive_selection, PETSC_FALSE, PETSC_FALSE, 0, NULL, NULL, NULL, NULL));
9348   } else {
9349     Mat       change        = NULL;
9350     Vec       scaling       = NULL;
9351     IS        change_primal = NULL, iP;
9352     PetscInt  benign_n;
9353     PetscBool reuse_solvers     = (PetscBool)!pcbddc->use_change_of_basis;
9354     PetscBool need_change       = PETSC_FALSE;
9355     PetscBool discrete_harmonic = PETSC_FALSE;
9356 
9357     if (!pcbddc->use_vertices && reuse_solvers) {
9358       PetscInt n_vertices;
9359 
9360       PetscCall(ISGetLocalSize(sub_schurs->is_vertices, &n_vertices));
9361       reuse_solvers = (PetscBool)!n_vertices;
9362     }
9363     if (!pcbddc->benign_change_explicit) {
9364       benign_n = pcbddc->benign_n;
9365     } else {
9366       benign_n = 0;
9367     }
9368     /* sub_schurs->change is a local object; instead, PCBDDCConstraintsSetUp and the quantities used in the test below are logically collective on pc.
9369        We need a global reduction to avoid possible deadlocks.
9370        We assume that sub_schurs->change is created once, and then reused for different solves, unless the topography has been recomputed */
9371     if (pcbddc->adaptive_userdefined || (pcbddc->deluxe_zerorows && !pcbddc->use_change_of_basis)) {
9372       PetscBool have_loc_change = (PetscBool)(!!sub_schurs->change);
9373       PetscCallMPI(MPIU_Allreduce(&have_loc_change, &need_change, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)pc)));
9374       need_change = (PetscBool)(!need_change);
9375     }
9376     /* If the user defines additional constraints, we import them here */
9377     if (need_change) {
9378       PetscCheck(!pcbddc->sub_schurs_rebuild, PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot compute change of basis with a different graph");
9379       PetscCall(PCBDDCComputeFakeChange(pc, PETSC_FALSE, NULL, NULL, &change, &change_primal, NULL, &sub_schurs->change_with_qr));
9380     }
9381     if (!pcbddc->use_deluxe_scaling) scaling = pcis->D;
9382 
9383     PetscCall(PetscObjectQuery((PetscObject)pc, "__KSPFETIDP_iP", (PetscObject *)&iP));
9384     if (iP) {
9385       PetscOptionsBegin(PetscObjectComm((PetscObject)iP), sub_schurs->prefix, "BDDC sub_schurs options", "PC");
9386       PetscCall(PetscOptionsBool("-sub_schurs_discrete_harmonic", NULL, NULL, discrete_harmonic, &discrete_harmonic, NULL));
9387       PetscOptionsEnd();
9388     }
9389     if (discrete_harmonic) {
9390       Mat A;
9391       PetscCall(MatDuplicate(pcbddc->local_mat, MAT_COPY_VALUES, &A));
9392       PetscCall(MatZeroRowsColumnsIS(A, iP, 1.0, NULL, NULL));
9393       PetscCall(PetscObjectCompose((PetscObject)A, "__KSPFETIDP_iP", (PetscObject)iP));
9394       PetscCall(PCBDDCSubSchursSetUp(sub_schurs, A, S_j, pcbddc->sub_schurs_exact_schur, used_xadj, used_adjncy, pcbddc->sub_schurs_layers, scaling, pcbddc->adaptive_selection, reuse_solvers, pcbddc->benign_saddle_point, benign_n, pcbddc->benign_p0_lidx,
9395                                      pcbddc->benign_zerodiag_subs, change, change_primal));
9396       PetscCall(MatDestroy(&A));
9397     } else {
9398       PetscCall(PCBDDCSubSchursSetUp(sub_schurs, pcbddc->local_mat, S_j, pcbddc->sub_schurs_exact_schur, used_xadj, used_adjncy, pcbddc->sub_schurs_layers, scaling, pcbddc->adaptive_selection, reuse_solvers, pcbddc->benign_saddle_point, benign_n,
9399                                      pcbddc->benign_p0_lidx, pcbddc->benign_zerodiag_subs, change, change_primal));
9400     }
9401     PetscCall(MatDestroy(&change));
9402     PetscCall(ISDestroy(&change_primal));
9403   }
9404   PetscCall(MatDestroy(&S_j));
9405 
9406   /* free adjacency */
9407   if (free_used_adj) PetscCall(PetscFree2(used_xadj, used_adjncy));
9408   PetscCall(PetscLogEventEnd(PC_BDDC_Schurs[pcbddc->current_level], pc, 0, 0, 0));
9409   PetscFunctionReturn(PETSC_SUCCESS);
9410 }
9411 
9412 PetscErrorCode PCBDDCInitSubSchurs(PC pc)
9413 {
9414   PC_IS      *pcis   = (PC_IS *)pc->data;
9415   PC_BDDC    *pcbddc = (PC_BDDC *)pc->data;
9416   PCBDDCGraph graph;
9417 
9418   PetscFunctionBegin;
9419   /* attach interface graph for determining subsets */
9420   if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */
9421     IS       verticesIS, verticescomm;
9422     PetscInt vsize, *idxs;
9423 
9424     PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &verticesIS));
9425     PetscCall(ISGetSize(verticesIS, &vsize));
9426     PetscCall(ISGetIndices(verticesIS, (const PetscInt **)&idxs));
9427     PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), vsize, idxs, PETSC_COPY_VALUES, &verticescomm));
9428     PetscCall(ISRestoreIndices(verticesIS, (const PetscInt **)&idxs));
9429     PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &verticesIS));
9430     PetscCall(PCBDDCGraphCreate(&graph));
9431     PetscCall(PCBDDCGraphInit(graph, pcbddc->mat_graph->l2gmap, pcbddc->mat_graph->nvtxs_global, pcbddc->graphmaxcount));
9432     PetscCall(PCBDDCGraphSetUp(graph, pcbddc->mat_graph->custom_minimal_size, NULL, pcbddc->DirichletBoundariesLocal, 0, NULL, verticescomm));
9433     PetscCall(ISDestroy(&verticescomm));
9434     PetscCall(PCBDDCGraphComputeConnectedComponents(graph));
9435   } else {
9436     graph = pcbddc->mat_graph;
9437   }
9438   /* print some info */
9439   if (pcbddc->dbg_flag && !pcbddc->sub_schurs_rebuild) {
9440     IS       vertices;
9441     PetscInt nv, nedges, nfaces;
9442     PetscCall(PCBDDCGraphASCIIView(graph, pcbddc->dbg_flag, pcbddc->dbg_viewer));
9443     PetscCall(PCBDDCGraphGetCandidatesIS(graph, &nfaces, NULL, &nedges, NULL, &vertices));
9444     PetscCall(ISGetSize(vertices, &nv));
9445     PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer));
9446     PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "--------------------------------------------------------------\n"));
9447     PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate vertices (%d)\n", PetscGlobalRank, nv, pcbddc->use_vertices));
9448     PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate edges    (%d)\n", PetscGlobalRank, nedges, pcbddc->use_edges));
9449     PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate faces    (%d)\n", PetscGlobalRank, nfaces, pcbddc->use_faces));
9450     PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
9451     PetscCall(PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer));
9452     PetscCall(PCBDDCGraphRestoreCandidatesIS(graph, &nfaces, NULL, &nedges, NULL, &vertices));
9453   }
9454 
9455   /* sub_schurs init */
9456   if (!pcbddc->sub_schurs) PetscCall(PCBDDCSubSchursCreate(&pcbddc->sub_schurs));
9457   PetscCall(PCBDDCSubSchursInit(pcbddc->sub_schurs, ((PetscObject)pc)->prefix, pcis->is_I_local, pcis->is_B_local, graph, pcis->BtoNmap, pcbddc->sub_schurs_rebuild, PETSC_FALSE));
9458 
9459   /* free graph struct */
9460   if (pcbddc->sub_schurs_rebuild) PetscCall(PCBDDCGraphDestroy(&graph));
9461   PetscFunctionReturn(PETSC_SUCCESS);
9462 }
9463 
9464 static PetscErrorCode PCBDDCViewGlobalIS(PC pc, IS is, PetscViewer viewer)
9465 {
9466   Mat_IS         *matis = (Mat_IS *)pc->pmat->data;
9467   PetscInt        n     = pc->pmat->rmap->n, ln, ni, st;
9468   const PetscInt *idxs;
9469   IS              gis;
9470 
9471   PetscFunctionBegin;
9472   if (!is) PetscFunctionReturn(PETSC_SUCCESS);
9473   PetscCall(MatGetOwnershipRange(pc->pmat, &st, NULL));
9474   PetscCall(MatGetLocalSize(matis->A, NULL, &ln));
9475   PetscCall(PetscArrayzero(matis->sf_leafdata, ln));
9476   PetscCall(PetscArrayzero(matis->sf_rootdata, n));
9477   PetscCall(ISGetLocalSize(is, &ni));
9478   PetscCall(ISGetIndices(is, &idxs));
9479   for (PetscInt i = 0; i < ni; i++) {
9480     if (idxs[i] < 0 || idxs[i] >= ln) continue;
9481     matis->sf_leafdata[idxs[i]] = 1;
9482   }
9483   PetscCall(ISRestoreIndices(is, &idxs));
9484   PetscCall(PetscSFReduceBegin(matis->sf, MPIU_INT, matis->sf_leafdata, matis->sf_rootdata, MPI_SUM));
9485   PetscCall(PetscSFReduceEnd(matis->sf, MPIU_INT, matis->sf_leafdata, matis->sf_rootdata, MPI_SUM));
9486   ln = 0;
9487   for (PetscInt i = 0; i < n; i++) {
9488     if (matis->sf_rootdata[i]) matis->sf_rootdata[ln++] = i + st;
9489   }
9490   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), ln, matis->sf_rootdata, PETSC_USE_POINTER, &gis));
9491   PetscCall(ISView(gis, viewer));
9492   PetscCall(ISDestroy(&gis));
9493   PetscFunctionReturn(PETSC_SUCCESS);
9494 }
9495 
9496 PetscErrorCode PCBDDCLoadOrViewCustomization(PC pc, PetscBool load, const char *outfile)
9497 {
9498   PetscInt    header[11];
9499   PC_BDDC    *pcbddc = (PC_BDDC *)pc->data;
9500   PetscViewer viewer;
9501   MPI_Comm    comm = PetscObjectComm((PetscObject)pc);
9502 
9503   PetscFunctionBegin;
9504   PetscCall(PetscViewerBinaryOpen(comm, outfile ? outfile : "bddc_dump.dat", load ? FILE_MODE_READ : FILE_MODE_WRITE, &viewer));
9505   if (load) {
9506     IS  is;
9507     Mat A;
9508 
9509     PetscCall(PetscViewerBinaryRead(viewer, header, PETSC_STATIC_ARRAY_LENGTH(header), NULL, PETSC_INT));
9510     PetscCheck(header[0] == 0 || header[0] == 1, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file");
9511     PetscCheck(header[1] == 0 || header[1] == 1, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file");
9512     PetscCheck(header[2] >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file");
9513     PetscCheck(header[3] == 0 || header[3] == 1, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file");
9514     PetscCheck(header[4] == 0 || header[4] == 1, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file");
9515     PetscCheck(header[5] >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file");
9516     PetscCheck(header[7] == 0 || header[7] == 1, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file");
9517     PetscCheck(header[8] == 0 || header[8] == 1, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file");
9518     PetscCheck(header[9] == 0 || header[9] == 1, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file");
9519     PetscCheck(header[10] == 0 || header[10] == 1, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file");
9520     if (header[0]) {
9521       PetscCall(ISCreate(comm, &is));
9522       PetscCall(ISLoad(is, viewer));
9523       PetscCall(PCBDDCSetDirichletBoundaries(pc, is));
9524       PetscCall(ISDestroy(&is));
9525     }
9526     if (header[1]) {
9527       PetscCall(ISCreate(comm, &is));
9528       PetscCall(ISLoad(is, viewer));
9529       PetscCall(PCBDDCSetNeumannBoundaries(pc, is));
9530       PetscCall(ISDestroy(&is));
9531     }
9532     if (header[2]) {
9533       IS *isarray;
9534 
9535       PetscCall(PetscMalloc1(header[2], &isarray));
9536       for (PetscInt i = 0; i < header[2]; i++) {
9537         PetscCall(ISCreate(comm, &isarray[i]));
9538         PetscCall(ISLoad(isarray[i], viewer));
9539       }
9540       PetscCall(PCBDDCSetDofsSplitting(pc, header[2], isarray));
9541       for (PetscInt i = 0; i < header[2]; i++) PetscCall(ISDestroy(&isarray[i]));
9542       PetscCall(PetscFree(isarray));
9543     }
9544     if (header[3]) {
9545       PetscCall(ISCreate(comm, &is));
9546       PetscCall(ISLoad(is, viewer));
9547       PetscCall(PCBDDCSetPrimalVerticesIS(pc, is));
9548       PetscCall(ISDestroy(&is));
9549     }
9550     if (header[4]) {
9551       PetscCall(MatCreate(comm, &A));
9552       PetscCall(MatSetType(A, MATAIJ));
9553       PetscCall(MatLoad(A, viewer));
9554       PetscCall(PCBDDCSetDiscreteGradient(pc, A, header[5], header[6], (PetscBool)header[7], (PetscBool)header[8]));
9555       PetscCall(MatDestroy(&A));
9556     }
9557     if (header[9]) {
9558       PetscCall(MatCreate(comm, &A));
9559       PetscCall(MatSetType(A, MATIS));
9560       PetscCall(MatLoad(A, viewer));
9561       PetscCall(PCBDDCSetDivergenceMat(pc, A, (PetscBool)header[10], NULL));
9562       PetscCall(MatDestroy(&A));
9563     }
9564   } else {
9565     header[0]  = (PetscInt)!!pcbddc->DirichletBoundariesLocal;
9566     header[1]  = (PetscInt)!!pcbddc->NeumannBoundariesLocal;
9567     header[2]  = pcbddc->n_ISForDofsLocal;
9568     header[3]  = (PetscInt)!!pcbddc->user_primal_vertices_local;
9569     header[4]  = (PetscInt)!!pcbddc->discretegradient;
9570     header[5]  = pcbddc->nedorder;
9571     header[6]  = pcbddc->nedfield;
9572     header[7]  = (PetscInt)pcbddc->nedglobal;
9573     header[8]  = (PetscInt)pcbddc->conforming;
9574     header[9]  = (PetscInt)!!pcbddc->divudotp;
9575     header[10] = (PetscInt)pcbddc->divudotp_trans;
9576     if (header[4]) header[3] = 0;
9577 
9578     PetscCall(PetscViewerBinaryWrite(viewer, header, PETSC_STATIC_ARRAY_LENGTH(header), PETSC_INT));
9579     PetscCall(PCBDDCViewGlobalIS(pc, pcbddc->DirichletBoundariesLocal, viewer));
9580     PetscCall(PCBDDCViewGlobalIS(pc, pcbddc->NeumannBoundariesLocal, viewer));
9581     for (PetscInt i = 0; i < header[2]; i++) PetscCall(PCBDDCViewGlobalIS(pc, pcbddc->ISForDofsLocal[i], viewer));
9582     if (header[3]) PetscCall(PCBDDCViewGlobalIS(pc, pcbddc->user_primal_vertices_local, viewer));
9583     if (header[4]) PetscCall(MatView(pcbddc->discretegradient, viewer));
9584     if (header[9]) PetscCall(MatView(pcbddc->divudotp, viewer));
9585   }
9586   PetscCall(PetscViewerDestroy(&viewer));
9587   PetscFunctionReturn(PETSC_SUCCESS);
9588 }
9589 
9590 #include <../src/mat/impls/aij/mpi/mpiaij.h>
9591 static PetscErrorCode MatMPIAIJRestrict(Mat A, MPI_Comm ccomm, Mat *B)
9592 {
9593   Mat         At;
9594   IS          rows;
9595   PetscInt    rst, ren;
9596   PetscLayout rmap;
9597 
9598   PetscFunctionBegin;
9599   rst = ren = 0;
9600   if (ccomm != MPI_COMM_NULL) {
9601     PetscCall(PetscLayoutCreate(ccomm, &rmap));
9602     PetscCall(PetscLayoutSetSize(rmap, A->rmap->N));
9603     PetscCall(PetscLayoutSetBlockSize(rmap, 1));
9604     PetscCall(PetscLayoutSetUp(rmap));
9605     PetscCall(PetscLayoutGetRange(rmap, &rst, &ren));
9606   }
9607   PetscCall(ISCreateStride(PetscObjectComm((PetscObject)A), ren - rst, rst, 1, &rows));
9608   PetscCall(MatCreateSubMatrix(A, rows, NULL, MAT_INITIAL_MATRIX, &At));
9609   PetscCall(ISDestroy(&rows));
9610 
9611   if (ccomm != MPI_COMM_NULL) {
9612     Mat_MPIAIJ *a, *b;
9613     IS          from, to;
9614     Vec         gvec;
9615     PetscInt    lsize;
9616 
9617     PetscCall(MatCreate(ccomm, B));
9618     PetscCall(MatSetSizes(*B, ren - rst, PETSC_DECIDE, PETSC_DECIDE, At->cmap->N));
9619     PetscCall(MatSetType(*B, MATAIJ));
9620     PetscCall(PetscLayoutDestroy(&(*B)->rmap));
9621     PetscCall(PetscLayoutSetUp((*B)->cmap));
9622     a = (Mat_MPIAIJ *)At->data;
9623     b = (Mat_MPIAIJ *)(*B)->data;
9624     PetscCallMPI(MPI_Comm_size(ccomm, &b->size));
9625     PetscCallMPI(MPI_Comm_rank(ccomm, &b->rank));
9626     PetscCall(PetscObjectReference((PetscObject)a->A));
9627     PetscCall(PetscObjectReference((PetscObject)a->B));
9628     b->A = a->A;
9629     b->B = a->B;
9630 
9631     b->donotstash   = a->donotstash;
9632     b->roworiented  = a->roworiented;
9633     b->rowindices   = NULL;
9634     b->rowvalues    = NULL;
9635     b->getrowactive = PETSC_FALSE;
9636 
9637     (*B)->rmap         = rmap;
9638     (*B)->factortype   = A->factortype;
9639     (*B)->assembled    = PETSC_TRUE;
9640     (*B)->insertmode   = NOT_SET_VALUES;
9641     (*B)->preallocated = PETSC_TRUE;
9642 
9643     if (a->colmap) {
9644 #if defined(PETSC_USE_CTABLE)
9645       PetscCall(PetscHMapIDuplicate(a->colmap, &b->colmap));
9646 #else
9647       PetscCall(PetscMalloc1(At->cmap->N, &b->colmap));
9648       PetscCall(PetscArraycpy(b->colmap, a->colmap, At->cmap->N));
9649 #endif
9650     } else b->colmap = NULL;
9651     if (a->garray) {
9652       PetscInt len;
9653       len = a->B->cmap->n;
9654       PetscCall(PetscMalloc1(len + 1, &b->garray));
9655       if (len) PetscCall(PetscArraycpy(b->garray, a->garray, len));
9656     } else b->garray = NULL;
9657 
9658     PetscCall(PetscObjectReference((PetscObject)a->lvec));
9659     b->lvec = a->lvec;
9660 
9661     /* cannot use VecScatterCopy */
9662     PetscCall(VecGetLocalSize(b->lvec, &lsize));
9663     PetscCall(ISCreateGeneral(ccomm, lsize, b->garray, PETSC_USE_POINTER, &from));
9664     PetscCall(ISCreateStride(PETSC_COMM_SELF, lsize, 0, 1, &to));
9665     PetscCall(MatCreateVecs(*B, &gvec, NULL));
9666     PetscCall(VecScatterCreate(gvec, from, b->lvec, to, &b->Mvctx));
9667     PetscCall(ISDestroy(&from));
9668     PetscCall(ISDestroy(&to));
9669     PetscCall(VecDestroy(&gvec));
9670   }
9671   PetscCall(MatDestroy(&At));
9672   PetscFunctionReturn(PETSC_SUCCESS);
9673 }
9674 
9675 /* same as MatCreateSubMatrix(A, rows, NULL,...) but allows repeated rows */
9676 static PetscErrorCode MatAIJExtractRows(Mat A, IS rows, Mat *sA)
9677 {
9678   PetscBool isaij;
9679   MPI_Comm  comm;
9680 
9681   PetscFunctionBegin;
9682   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
9683   PetscCall(PetscObjectBaseTypeCompareAny((PetscObject)A, &isaij, MATSEQAIJ, MATMPIAIJ, ""));
9684   PetscCheck(isaij, comm, PETSC_ERR_SUP, "Not implemented");
9685   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &isaij));
9686   if (isaij) { /* SeqAIJ supports repeated rows */
9687     PetscCall(MatCreateSubMatrix(A, rows, NULL, MAT_INITIAL_MATRIX, sA));
9688   } else {
9689     Mat                A_loc;
9690     Mat_SeqAIJ        *da;
9691     PetscSF            sf;
9692     PetscInt           ni, *di, *dj, m = A->rmap->n, c, *ldata, *rdata;
9693     PetscScalar       *daa;
9694     const PetscInt    *idxs;
9695     const PetscSFNode *iremotes;
9696     PetscSFNode       *remotes;
9697 
9698     /* SF for incoming rows */
9699     PetscCall(PetscSFCreate(comm, &sf));
9700     PetscCall(ISGetLocalSize(rows, &ni));
9701     PetscCall(ISGetIndices(rows, &idxs));
9702     PetscCall(PetscSFSetGraphLayout(sf, A->rmap, ni, NULL, PETSC_USE_POINTER, idxs));
9703     PetscCall(ISRestoreIndices(rows, &idxs));
9704 
9705     PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &A_loc));
9706     da = (Mat_SeqAIJ *)A_loc->data;
9707     PetscCall(PetscMalloc2(2 * ni, &ldata, 2 * m, &rdata));
9708     for (PetscInt i = 0; i < m; i++) {
9709       rdata[2 * i + 0] = da->i[i + 1] - da->i[i];
9710       rdata[2 * i + 1] = da->i[i];
9711     }
9712     PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, rdata, ldata, MPI_REPLACE));
9713     PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, rdata, ldata, MPI_REPLACE));
9714     PetscCall(PetscMalloc1(ni + 1, &di));
9715     di[0] = 0;
9716     for (PetscInt i = 0; i < ni; i++) di[i + 1] = di[i] + ldata[2 * i + 0];
9717     PetscCall(PetscMalloc1(di[ni], &dj));
9718     PetscCall(PetscMalloc1(di[ni], &daa));
9719     PetscCall(PetscMalloc1(di[ni], &remotes));
9720 
9721     PetscCall(PetscSFGetGraph(sf, NULL, NULL, NULL, &iremotes));
9722 
9723     /* SF graph for nonzeros */
9724     c = 0;
9725     for (PetscInt i = 0; i < ni; i++) {
9726       const PetscMPIInt rank  = (PetscMPIInt)iremotes[i].rank;
9727       const PetscInt    rsize = ldata[2 * i];
9728       for (PetscInt j = 0; j < rsize; j++) {
9729         remotes[c].rank  = rank;
9730         remotes[c].index = ldata[2 * i + 1] + j;
9731         c++;
9732       }
9733     }
9734     PetscCheck(c == di[ni], PETSC_COMM_SELF, PETSC_ERR_PLIB, "Invalid number of local nonzeros %" PetscInt_FMT " != %" PetscInt_FMT, c, di[ni]);
9735     PetscCall(PetscSFSetGraph(sf, da->i[m], di[ni], NULL, PETSC_USE_POINTER, remotes, PETSC_USE_POINTER));
9736     PetscCall(PetscSFBcastBegin(sf, MPIU_INT, da->j, dj, MPI_REPLACE));
9737     PetscCall(PetscSFBcastEnd(sf, MPIU_INT, da->j, dj, MPI_REPLACE));
9738     PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, da->a, daa, MPI_REPLACE));
9739     PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, da->a, daa, MPI_REPLACE));
9740 
9741     PetscCall(MatCreateMPIAIJWithArrays(comm, ni, A->cmap->n, PETSC_DECIDE, A->cmap->N, di, dj, daa, sA));
9742     PetscCall(MatDestroy(&A_loc));
9743     PetscCall(PetscSFDestroy(&sf));
9744     PetscCall(PetscFree(di));
9745     PetscCall(PetscFree(dj));
9746     PetscCall(PetscFree(daa));
9747     PetscCall(PetscFree(remotes));
9748     PetscCall(PetscFree2(ldata, rdata));
9749   }
9750   PetscFunctionReturn(PETSC_SUCCESS);
9751 }
9752