xref: /petsc/src/ksp/pc/impls/bddc/bddcprivate.c (revision 010bec48e404682eb1850b482d20a94ac5ea46b4)
1 #include <../src/mat/impls/aij/seq/aij.h>
2 #include <petsc/private/pcbddcimpl.h>
3 #include <petsc/private/pcbddcprivateimpl.h>
4 #include <petsc/private/kernels/blockinvert.h>
5 #include <../src/mat/impls/dense/seq/dense.h>
6 #include <petscdmplex.h>
7 #include <petscblaslapack.h>
8 #include <petsc/private/sfimpl.h>
9 #include <petsc/private/dmpleximpl.h>
10 #include <petscdmda.h>
11 
12 static PetscErrorCode MatMPIAIJRestrict(Mat, MPI_Comm, Mat *);
13 
14 /* if range is true,  it returns B s.t. span{B} = range(A)
15    if range is false, it returns B s.t. range(B) _|_ range(A) */
16 static PetscErrorCode MatDenseOrthogonalRangeOrComplement(Mat A, PetscBool range, PetscInt lw, PetscScalar *work, PetscReal *rwork, Mat *B)
17 {
18   PetscScalar *uwork, *data, *U, ds = 0.;
19   PetscReal   *sing;
20   PetscBLASInt bM, bN, lwork, lierr, di = 1;
21   PetscInt     ulw, i, nr, nc, n;
22 #if defined(PETSC_USE_COMPLEX)
23   PetscReal *rwork2;
24 #endif
25 
26   PetscFunctionBegin;
27   PetscCall(MatGetSize(A, &nr, &nc));
28   if (!nr || !nc) PetscFunctionReturn(PETSC_SUCCESS);
29 
30   /* workspace */
31   if (!work) {
32     ulw = PetscMax(PetscMax(1, 5 * PetscMin(nr, nc)), 3 * PetscMin(nr, nc) + PetscMax(nr, nc));
33     PetscCall(PetscMalloc1(ulw, &uwork));
34   } else {
35     ulw   = lw;
36     uwork = work;
37   }
38   n = PetscMin(nr, nc);
39   if (!rwork) {
40     PetscCall(PetscMalloc1(n, &sing));
41   } else {
42     sing = rwork;
43   }
44 
45   /* SVD */
46   PetscCall(PetscMalloc1(nr * nr, &U));
47   PetscCall(PetscBLASIntCast(nr, &bM));
48   PetscCall(PetscBLASIntCast(nc, &bN));
49   PetscCall(PetscBLASIntCast(ulw, &lwork));
50   PetscCall(MatDenseGetArray(A, &data));
51   PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
52 #if !defined(PETSC_USE_COMPLEX)
53   PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("A", "N", &bM, &bN, data, &bM, sing, U, &bM, &ds, &di, uwork, &lwork, &lierr));
54 #else
55   PetscCall(PetscMalloc1(5 * n, &rwork2));
56   PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("A", "N", &bM, &bN, data, &bM, sing, U, &bM, &ds, &di, uwork, &lwork, rwork2, &lierr));
57   PetscCall(PetscFree(rwork2));
58 #endif
59   PetscCall(PetscFPTrapPop());
60   PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in GESVD Lapack routine %d", (int)lierr);
61   PetscCall(MatDenseRestoreArray(A, &data));
62   for (i = 0; i < n; i++)
63     if (sing[i] < PETSC_SMALL) break;
64   if (!rwork) PetscCall(PetscFree(sing));
65   if (!work) PetscCall(PetscFree(uwork));
66   /* create B */
67   if (!range) {
68     PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, nr, nr - i, NULL, B));
69     PetscCall(MatDenseGetArray(*B, &data));
70     PetscCall(PetscArraycpy(data, U + nr * i, (nr - i) * nr));
71   } else {
72     PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, nr, i, NULL, B));
73     PetscCall(MatDenseGetArray(*B, &data));
74     PetscCall(PetscArraycpy(data, U, i * nr));
75   }
76   PetscCall(MatDenseRestoreArray(*B, &data));
77   PetscCall(PetscFree(U));
78   PetscFunctionReturn(PETSC_SUCCESS);
79 }
80 
81 /* TODO REMOVE */
82 #if defined(PRINT_GDET)
83 static int inc = 0;
84 static int lev = 0;
85 #endif
86 
87 static PetscErrorCode PCBDDCComputeNedelecChangeEdge(Mat lG, IS edge, IS extrow, IS extcol, IS corners, Mat *Gins, Mat *GKins, PetscScalar cvals[2], PetscScalar *work, PetscReal *rwork)
88 {
89   Mat          GE, GEd;
90   PetscInt     rsize, csize, esize;
91   PetscScalar *ptr;
92 
93   PetscFunctionBegin;
94   PetscCall(ISGetSize(edge, &esize));
95   if (!esize) PetscFunctionReturn(PETSC_SUCCESS);
96   PetscCall(ISGetSize(extrow, &rsize));
97   PetscCall(ISGetSize(extcol, &csize));
98 
99   /* gradients */
100   ptr = work + 5 * esize;
101   PetscCall(MatCreateSubMatrix(lG, extrow, extcol, MAT_INITIAL_MATRIX, &GE));
102   PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, rsize, csize, ptr, Gins));
103   PetscCall(MatConvert(GE, MATSEQDENSE, MAT_REUSE_MATRIX, Gins));
104   PetscCall(MatDestroy(&GE));
105 
106   /* constants */
107   ptr += rsize * csize;
108   PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, esize, csize, ptr, &GEd));
109   PetscCall(MatCreateSubMatrix(lG, edge, extcol, MAT_INITIAL_MATRIX, &GE));
110   PetscCall(MatConvert(GE, MATSEQDENSE, MAT_REUSE_MATRIX, &GEd));
111   PetscCall(MatDestroy(&GE));
112   PetscCall(MatDenseOrthogonalRangeOrComplement(GEd, PETSC_FALSE, 5 * esize, work, rwork, GKins));
113   PetscCall(MatDestroy(&GEd));
114 
115   if (corners) {
116     Mat                GEc;
117     const PetscScalar *vals;
118     PetscScalar        v;
119 
120     PetscCall(MatCreateSubMatrix(lG, edge, corners, MAT_INITIAL_MATRIX, &GEc));
121     PetscCall(MatTransposeMatMult(GEc, *GKins, MAT_INITIAL_MATRIX, 1.0, &GEd));
122     PetscCall(MatDenseGetArrayRead(GEd, &vals));
123     /* v       = PetscAbsScalar(vals[0]); */
124     v        = 1.;
125     cvals[0] = vals[0] / v;
126     cvals[1] = vals[1] / v;
127     PetscCall(MatDenseRestoreArrayRead(GEd, &vals));
128     PetscCall(MatScale(*GKins, 1. / v));
129 #if defined(PRINT_GDET)
130     {
131       PetscViewer viewer;
132       char        filename[256];
133       PetscCall(PetscSNPrintf(filename, PETSC_STATIC_ARRAY_LENGTH(filename), "Gdet_l%d_r%d_cc%d.m", lev, PetscGlobalRank, inc++));
134       PetscCall(PetscViewerASCIIOpen(PETSC_COMM_SELF, filename, &viewer));
135       PetscCall(PetscViewerPushFormat(viewer, PETSC_VIEWER_ASCII_MATLAB));
136       PetscCall(PetscObjectSetName((PetscObject)GEc, "GEc"));
137       PetscCall(MatView(GEc, viewer));
138       PetscCall(PetscObjectSetName((PetscObject)*GKins, "GK"));
139       PetscCall(MatView(*GKins, viewer));
140       PetscCall(PetscObjectSetName((PetscObject)GEd, "Gproj"));
141       PetscCall(MatView(GEd, viewer));
142       PetscCall(PetscViewerDestroy(&viewer));
143     }
144 #endif
145     PetscCall(MatDestroy(&GEd));
146     PetscCall(MatDestroy(&GEc));
147   }
148   PetscFunctionReturn(PETSC_SUCCESS);
149 }
150 
151 static PetscErrorCode MatAIJExtractRows(Mat, IS, Mat *);
152 
153 PetscErrorCode PCBDDCNedelecSupport(PC pc)
154 {
155   PC_BDDC               *pcbddc = (PC_BDDC *)pc->data;
156   Mat_IS                *matis  = (Mat_IS *)pc->pmat->data;
157   Mat                    G, T, conn, lG, lGt, lGis, lGall, lGe, lGinit;
158   PetscSF                sfv;
159   ISLocalToGlobalMapping el2g, vl2g, fl2g, al2g;
160   MPI_Comm               comm;
161   IS                     lned, primals, allprimals, nedfieldlocal, elements_corners = NULL;
162   IS                    *eedges, *extrows, *extcols, *alleedges;
163   PetscBT                btv, bte, btvc, btb, btbd, btvcand, btvi, btee, bter;
164   PetscScalar           *vals, *work;
165   PetscReal             *rwork;
166   const PetscInt        *idxs, *ii, *jj, *iit, *jjt;
167   PetscInt               ne, nv, Lv, order, n, field;
168   PetscInt               i, j, extmem, cum, maxsize, nee;
169   PetscInt              *extrow, *extrowcum, *marks, *vmarks, *gidxs;
170   PetscInt              *sfvleaves, *sfvroots;
171   PetscInt              *corners, *cedges;
172   PetscInt              *ecount, **eneighs, *vcount, **vneighs;
173   PetscInt              *emarks;
174   PetscBool              print, eerr, done, lrc[2], conforming, global, setprimal;
175 
176   PetscFunctionBegin;
177   /* If the discrete gradient is defined for a subset of dofs and global is true,
178      it assumes G is given in global ordering for all the dofs.
179      Otherwise, the ordering is global for the Nedelec field */
180   order      = pcbddc->nedorder;
181   conforming = pcbddc->conforming;
182   field      = pcbddc->nedfield;
183   global     = pcbddc->nedglobal;
184   setprimal  = PETSC_FALSE;
185   print      = PETSC_FALSE;
186 
187   /* Command line customization */
188   PetscOptionsBegin(PetscObjectComm((PetscObject)pc), ((PetscObject)pc)->prefix, "BDDC Nedelec options", "PC");
189   PetscCall(PetscOptionsBool("-pc_bddc_nedelec_field_primal", "All edge dofs set as primals: Toselli's algorithm C", NULL, setprimal, &setprimal, NULL));
190   /* print debug info and adaptive order TODO: to be removed */
191   PetscCall(PetscOptionsInt("-pc_bddc_nedelec_order", "Test variable order code (to be removed)", NULL, order, &order, NULL));
192   PetscCall(PetscOptionsBool("-pc_bddc_nedelec_print", "Print debug info", NULL, print, &print, NULL));
193   PetscOptionsEnd();
194 
195   /* Return if there are no edges in the decomposition */
196   PetscCall(MatISGetLocalToGlobalMapping(pc->pmat, &al2g, NULL));
197   PetscCall(ISLocalToGlobalMappingGetSize(al2g, &n));
198   PetscCall(PetscObjectGetComm((PetscObject)pc, &comm));
199   PetscCall(VecGetArrayRead(matis->counter, (const PetscScalar **)&vals));
200   lrc[0] = PETSC_FALSE;
201   for (i = 0; i < n; i++) {
202     if (PetscRealPart(vals[i]) > 2.) {
203       lrc[0] = PETSC_TRUE;
204       break;
205     }
206   }
207   PetscCall(VecRestoreArrayRead(matis->counter, (const PetscScalar **)&vals));
208   PetscCall(MPIU_Allreduce(&lrc[0], &lrc[1], 1, MPIU_BOOL, MPI_LOR, comm));
209   if (!lrc[1]) PetscFunctionReturn(PETSC_SUCCESS);
210 
211   /* Get Nedelec field */
212   PetscCheck(!pcbddc->n_ISForDofsLocal || field < pcbddc->n_ISForDofsLocal, comm, PETSC_ERR_USER, "Invalid field for Nedelec %" PetscInt_FMT ": number of fields is %" PetscInt_FMT, field, pcbddc->n_ISForDofsLocal);
213   if (pcbddc->n_ISForDofsLocal && field >= 0) {
214     PetscCall(PetscObjectReference((PetscObject)pcbddc->ISForDofsLocal[field]));
215     nedfieldlocal = pcbddc->ISForDofsLocal[field];
216     PetscCall(ISGetLocalSize(nedfieldlocal, &ne));
217   } else if (!pcbddc->n_ISForDofsLocal && field != PETSC_DECIDE) {
218     ne            = n;
219     nedfieldlocal = NULL;
220     global        = PETSC_TRUE;
221   } else if (field == PETSC_DECIDE) {
222     PetscInt rst, ren, *idx;
223 
224     PetscCall(PetscArrayzero(matis->sf_leafdata, n));
225     PetscCall(PetscArrayzero(matis->sf_rootdata, pc->pmat->rmap->n));
226     PetscCall(MatGetOwnershipRange(pcbddc->discretegradient, &rst, &ren));
227     for (i = rst; i < ren; i++) {
228       PetscInt nc;
229 
230       PetscCall(MatGetRow(pcbddc->discretegradient, i, &nc, NULL, NULL));
231       if (nc > 1) matis->sf_rootdata[i - rst] = 1;
232       PetscCall(MatRestoreRow(pcbddc->discretegradient, i, &nc, NULL, NULL));
233     }
234     PetscCall(PetscSFBcastBegin(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE));
235     PetscCall(PetscSFBcastEnd(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE));
236     PetscCall(PetscMalloc1(n, &idx));
237     for (i = 0, ne = 0; i < n; i++)
238       if (matis->sf_leafdata[i]) idx[ne++] = i;
239     PetscCall(ISCreateGeneral(comm, ne, idx, PETSC_OWN_POINTER, &nedfieldlocal));
240   } else {
241     SETERRQ(comm, PETSC_ERR_USER, "When multiple fields are present, the Nedelec field has to be specified");
242   }
243 
244   /* Sanity checks */
245   PetscCheck(order || conforming, comm, PETSC_ERR_SUP, "Variable order and non-conforming spaces are not supported at the same time");
246   PetscCheck(!pcbddc->user_ChangeOfBasisMatrix, comm, PETSC_ERR_SUP, "Cannot generate Nedelec support with user defined change of basis");
247   PetscCheck(!order || (ne % order == 0), PETSC_COMM_SELF, PETSC_ERR_USER, "The number of local edge dofs %" PetscInt_FMT " is not a multiple of the order %" PetscInt_FMT, ne, order);
248 
249   /* Just set primal dofs and return */
250   if (setprimal) {
251     IS        enedfieldlocal;
252     PetscInt *eidxs;
253 
254     PetscCall(PetscMalloc1(ne, &eidxs));
255     PetscCall(VecGetArrayRead(matis->counter, (const PetscScalar **)&vals));
256     if (nedfieldlocal) {
257       PetscCall(ISGetIndices(nedfieldlocal, &idxs));
258       for (i = 0, cum = 0; i < ne; i++) {
259         if (PetscRealPart(vals[idxs[i]]) > 2.) eidxs[cum++] = idxs[i];
260       }
261       PetscCall(ISRestoreIndices(nedfieldlocal, &idxs));
262     } else {
263       for (i = 0, cum = 0; i < ne; i++) {
264         if (PetscRealPart(vals[i]) > 2.) eidxs[cum++] = i;
265       }
266     }
267     PetscCall(VecRestoreArrayRead(matis->counter, (const PetscScalar **)&vals));
268     PetscCall(ISCreateGeneral(comm, cum, eidxs, PETSC_COPY_VALUES, &enedfieldlocal));
269     PetscCall(PCBDDCSetPrimalVerticesLocalIS(pc, enedfieldlocal));
270     PetscCall(PetscFree(eidxs));
271     PetscCall(ISDestroy(&nedfieldlocal));
272     PetscCall(ISDestroy(&enedfieldlocal));
273     PetscFunctionReturn(PETSC_SUCCESS);
274   }
275 
276   /* Compute some l2g maps */
277   if (nedfieldlocal) {
278     IS is;
279 
280     /* need to map from the local Nedelec field to local numbering */
281     PetscCall(ISLocalToGlobalMappingCreateIS(nedfieldlocal, &fl2g));
282     /* need to map from the local Nedelec field to global numbering for the whole dofs*/
283     PetscCall(ISLocalToGlobalMappingApplyIS(al2g, nedfieldlocal, &is));
284     PetscCall(ISLocalToGlobalMappingCreateIS(is, &al2g));
285     /* need to map from the local Nedelec field to global numbering (for Nedelec only) */
286     if (global) {
287       PetscCall(PetscObjectReference((PetscObject)al2g));
288       el2g = al2g;
289     } else {
290       IS gis;
291 
292       PetscCall(ISRenumber(is, NULL, NULL, &gis));
293       PetscCall(ISLocalToGlobalMappingCreateIS(gis, &el2g));
294       PetscCall(ISDestroy(&gis));
295     }
296     PetscCall(ISDestroy(&is));
297   } else {
298     /* one ref for the destruction of al2g, one for el2g */
299     PetscCall(PetscObjectReference((PetscObject)al2g));
300     PetscCall(PetscObjectReference((PetscObject)al2g));
301     el2g = al2g;
302     fl2g = NULL;
303   }
304 
305   /* Start communication to drop connections for interior edges (for cc analysis only) */
306   PetscCall(PetscArrayzero(matis->sf_leafdata, n));
307   PetscCall(PetscArrayzero(matis->sf_rootdata, pc->pmat->rmap->n));
308   if (nedfieldlocal) {
309     PetscCall(ISGetIndices(nedfieldlocal, &idxs));
310     for (i = 0; i < ne; i++) matis->sf_leafdata[idxs[i]] = 1;
311     PetscCall(ISRestoreIndices(nedfieldlocal, &idxs));
312   } else {
313     for (i = 0; i < ne; i++) matis->sf_leafdata[i] = 1;
314   }
315   PetscCall(PetscSFReduceBegin(matis->sf, MPIU_INT, matis->sf_leafdata, matis->sf_rootdata, MPI_SUM));
316   PetscCall(PetscSFReduceEnd(matis->sf, MPIU_INT, matis->sf_leafdata, matis->sf_rootdata, MPI_SUM));
317 
318   /* There's no way to detect all possible corner candidates in a element-by-element case in a pure algebraic setting
319      Firedrake attaches a index set to identify them upfront. If it is present, we assume we are in such a case */
320   if (matis->allow_repeated) PetscCall(PetscObjectQuery((PetscObject)pcbddc->discretegradient, "_elements_corners", (PetscObject *)&elements_corners));
321 
322   /* drop connections with interior edges to avoid unneeded communications and memory movements */
323   PetscCall(MatViewFromOptions(pcbddc->discretegradient, (PetscObject)pc, "-pc_bddc_discrete_gradient_view"));
324   PetscCall(MatDuplicate(pcbddc->discretegradient, MAT_COPY_VALUES, &G));
325   PetscCall(MatSetOption(G, MAT_KEEP_NONZERO_PATTERN, PETSC_FALSE));
326   if (global) {
327     PetscInt rst;
328 
329     PetscCall(MatGetOwnershipRange(G, &rst, NULL));
330     for (i = 0, cum = 0; i < pc->pmat->rmap->n; i++) {
331       if (matis->sf_rootdata[i] < 2) matis->sf_rootdata[cum++] = i + rst;
332     }
333     PetscCall(MatSetOption(G, MAT_NO_OFF_PROC_ZERO_ROWS, PETSC_TRUE));
334     PetscCall(MatZeroRows(G, cum, matis->sf_rootdata, 0., NULL, NULL));
335   } else {
336     PetscInt *tbz;
337 
338     PetscCall(PetscMalloc1(ne, &tbz));
339     PetscCall(PetscSFBcastBegin(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE));
340     PetscCall(PetscSFBcastEnd(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE));
341     PetscCall(ISGetIndices(nedfieldlocal, &idxs));
342     for (i = 0, cum = 0; i < ne; i++)
343       if (matis->sf_leafdata[idxs[i]] == 1) tbz[cum++] = i;
344     PetscCall(ISRestoreIndices(nedfieldlocal, &idxs));
345     PetscCall(ISLocalToGlobalMappingApply(el2g, cum, tbz, tbz));
346     PetscCall(MatZeroRows(G, cum, tbz, 0., NULL, NULL));
347     PetscCall(PetscFree(tbz));
348   }
349 
350   /* Extract subdomain relevant rows of G  */
351   PetscCall(ISLocalToGlobalMappingGetIndices(el2g, &idxs));
352   PetscCall(ISCreateGeneral(comm, ne, idxs, PETSC_USE_POINTER, &lned));
353   PetscCall(MatAIJExtractRows(G, lned, &lGall));
354   /* PetscCall(MatCreateSubMatrix(G, lned, NULL, MAT_INITIAL_MATRIX, &lGall)); */
355   PetscCall(ISLocalToGlobalMappingRestoreIndices(el2g, &idxs));
356   PetscCall(ISDestroy(&lned));
357   PetscCall(MatConvert(lGall, MATIS, MAT_INITIAL_MATRIX, &lGis));
358   PetscCall(MatDestroy(&lGall));
359   PetscCall(MatISGetLocalMat(lGis, &lG));
360   if (matis->allow_repeated) { /* multi-element support */
361     Mat                   *lGn, B;
362     IS                    *is_rows, *tcols, tmap, nmap;
363     PetscInt               subnv;
364     const PetscInt        *subvidxs;
365     ISLocalToGlobalMapping mapn;
366 
367     PetscCall(PetscCalloc1(pcbddc->n_local_subs * pcbddc->n_local_subs, &lGn));
368     PetscCall(PetscMalloc1(pcbddc->n_local_subs, &is_rows));
369     PetscCall(PetscMalloc1(pcbddc->n_local_subs, &tcols));
370     for (PetscInt i = 0; i < pcbddc->n_local_subs; i++) {
371       if (fl2g) {
372         PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_MASK, pcbddc->local_subs[i], &is_rows[i]));
373       } else {
374         PetscCall(PetscObjectReference((PetscObject)pcbddc->local_subs[i]));
375         is_rows[i] = pcbddc->local_subs[i];
376       }
377       PetscCall(MatCreateSubMatrix(lG, is_rows[i], NULL, MAT_INITIAL_MATRIX, &lGn[i * (1 + pcbddc->n_local_subs)]));
378       PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(lGn[i * (1 + pcbddc->n_local_subs)], &mapn));
379       PetscCall(ISLocalToGlobalMappingGetSize(mapn, &subnv));
380       PetscCall(ISLocalToGlobalMappingGetIndices(mapn, &subvidxs));
381       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, subnv, subvidxs, PETSC_COPY_VALUES, &tcols[i]));
382       PetscCall(ISLocalToGlobalMappingRestoreIndices(mapn, &subvidxs));
383       PetscCall(ISLocalToGlobalMappingDestroy(&mapn));
384     }
385 
386     /* Create new MATIS with repeated vertices */
387     PetscCall(MatCreate(comm, &B));
388     PetscCall(MatSetSizes(B, lGis->rmap->n, lGis->cmap->n, lGis->rmap->N, lGis->cmap->N));
389     PetscCall(MatSetType(B, MATIS));
390     PetscCall(MatISSetAllowRepeated(B, PETSC_TRUE));
391     PetscCall(ISConcatenate(PETSC_COMM_SELF, pcbddc->n_local_subs, tcols, &tmap));
392     PetscCall(ISLocalToGlobalMappingApplyIS(lGis->cmap->mapping, tmap, &nmap));
393     PetscCall(ISDestroy(&tmap));
394     PetscCall(ISGetLocalSize(nmap, &subnv));
395     PetscCall(ISGetIndices(nmap, &subvidxs));
396     PetscCall(ISCreateGeneral(comm, subnv, subvidxs, PETSC_USE_POINTER, &tmap));
397     PetscCall(ISRestoreIndices(nmap, &subvidxs));
398     PetscCall(ISLocalToGlobalMappingCreateIS(tmap, &mapn));
399     PetscCall(ISDestroy(&tmap));
400     PetscCall(ISDestroy(&nmap));
401     PetscCall(MatSetLocalToGlobalMapping(B, lGis->rmap->mapping, mapn));
402     PetscCall(ISLocalToGlobalMappingDestroy(&mapn));
403     PetscCall(MatCreateNest(PETSC_COMM_SELF, pcbddc->n_local_subs, is_rows, pcbddc->n_local_subs, NULL, lGn, &lG));
404     for (PetscInt i = 0; i < pcbddc->n_local_subs; i++) {
405       PetscCall(MatDestroy(&lGn[i * (1 + pcbddc->n_local_subs)]));
406       PetscCall(ISDestroy(&is_rows[i]));
407       PetscCall(ISDestroy(&tcols[i]));
408     }
409     PetscCall(MatConvert(lG, MATSEQAIJ, MAT_INPLACE_MATRIX, &lG));
410     PetscCall(PetscFree(lGn));
411     PetscCall(PetscFree(is_rows));
412     PetscCall(PetscFree(tcols));
413     PetscCall(MatISSetLocalMat(B, lG));
414     PetscCall(MatDestroy(&lG));
415 
416     PetscCall(MatDestroy(&lGis));
417     lGis = B;
418 
419     lGis->assembled = PETSC_TRUE;
420   }
421   PetscCall(MatViewFromOptions(lGis, (PetscObject)pc, "-pc_bddc_nedelec_init_G_view"));
422 
423   /* SF for nodal dofs communications */
424   PetscCall(MatGetLocalSize(G, NULL, &Lv));
425   PetscCall(MatISGetLocalToGlobalMapping(lGis, NULL, &vl2g));
426   PetscCall(PetscObjectReference((PetscObject)vl2g));
427   PetscCall(ISLocalToGlobalMappingGetSize(vl2g, &nv));
428   PetscCall(PetscSFCreate(comm, &sfv));
429   PetscCall(ISLocalToGlobalMappingGetIndices(vl2g, &idxs));
430   PetscCall(PetscSFSetGraphLayout(sfv, lGis->cmap, nv, NULL, PETSC_OWN_POINTER, idxs));
431   PetscCall(ISLocalToGlobalMappingRestoreIndices(vl2g, &idxs));
432 
433   if (elements_corners) {
434     IS      tmp;
435     Vec     global, local;
436     Mat_IS *tGis = (Mat_IS *)lGis->data;
437 
438     PetscCall(MatCreateVecs(lGis, &global, NULL));
439     PetscCall(MatCreateVecs(tGis->A, &local, NULL));
440     PetscCall(PCBDDCGlobalToLocal(tGis->cctx, global, local, elements_corners, &tmp));
441     PetscCall(VecDestroy(&global));
442     PetscCall(VecDestroy(&local));
443     elements_corners = tmp;
444   }
445 
446   /* Destroy temporary G */
447   PetscCall(MatISGetLocalMat(lGis, &lG));
448   PetscCall(PetscObjectReference((PetscObject)lG));
449   PetscCall(MatDestroy(&G));
450   PetscCall(MatDestroy(&lGis));
451 
452   if (print) {
453     PetscCall(PetscObjectSetName((PetscObject)lG, "initial_lG"));
454     PetscCall(MatView(lG, NULL));
455   }
456 
457   /* Save lG for values insertion in change of basis */
458   PetscCall(MatDuplicate(lG, MAT_COPY_VALUES, &lGinit));
459 
460   /* Analyze the edge-nodes connections (duplicate lG) */
461   PetscCall(MatDuplicate(lG, MAT_COPY_VALUES, &lGe));
462   PetscCall(MatSetOption(lGe, MAT_KEEP_NONZERO_PATTERN, PETSC_FALSE));
463   PetscCall(PetscBTCreate(nv, &btv));
464   PetscCall(PetscBTCreate(ne, &bte));
465   PetscCall(PetscBTCreate(ne, &btb));
466   PetscCall(PetscBTCreate(ne, &btbd));
467   /* need to import the boundary specification to ensure the
468      proper detection of coarse edges' endpoints */
469   if (pcbddc->DirichletBoundariesLocal) {
470     IS is;
471 
472     if (fl2g) {
473       PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_MASK, pcbddc->DirichletBoundariesLocal, &is));
474     } else {
475       is = pcbddc->DirichletBoundariesLocal;
476     }
477     PetscCall(ISGetLocalSize(is, &cum));
478     PetscCall(ISGetIndices(is, &idxs));
479     for (i = 0; i < cum; i++) {
480       if (idxs[i] >= 0 && idxs[i] < ne) {
481         PetscCall(PetscBTSet(btb, idxs[i]));
482         PetscCall(PetscBTSet(btbd, idxs[i]));
483       }
484     }
485     PetscCall(ISRestoreIndices(is, &idxs));
486     if (fl2g) PetscCall(ISDestroy(&is));
487   }
488   if (pcbddc->NeumannBoundariesLocal) {
489     IS is;
490 
491     if (fl2g) {
492       PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_MASK, pcbddc->NeumannBoundariesLocal, &is));
493     } else {
494       is = pcbddc->NeumannBoundariesLocal;
495     }
496     PetscCall(ISGetLocalSize(is, &cum));
497     PetscCall(ISGetIndices(is, &idxs));
498     for (i = 0; i < cum; i++) {
499       if (idxs[i] >= 0 && idxs[i] < ne) PetscCall(PetscBTSet(btb, idxs[i]));
500     }
501     PetscCall(ISRestoreIndices(is, &idxs));
502     if (fl2g) PetscCall(ISDestroy(&is));
503   }
504 
505   /* Count neighs per dof */
506   PetscCall(ISLocalToGlobalMappingGetNodeInfo(el2g, NULL, &ecount, NULL));
507   PetscCall(ISLocalToGlobalMappingGetNodeInfo(vl2g, NULL, &vcount, NULL));
508 
509   /* need to remove coarse faces' dofs and coarse edges' dirichlet dofs
510      for proper detection of coarse edges' endpoints */
511   PetscCall(PetscBTCreate(ne, &btee));
512   for (i = 0; i < ne; i++) {
513     if ((ecount[i] > 2 && !PetscBTLookup(btbd, i)) || (ecount[i] == 2 && PetscBTLookup(btb, i))) PetscCall(PetscBTSet(btee, i));
514   }
515   PetscCall(PetscMalloc1(ne, &marks));
516   if (!conforming) {
517     PetscCall(MatTranspose(lGe, MAT_INITIAL_MATRIX, &lGt));
518     PetscCall(MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done));
519   }
520   PetscCall(MatGetRowIJ(lGe, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
521   PetscCall(MatSeqAIJGetArray(lGe, &vals));
522   cum = 0;
523   for (i = 0; i < ne; i++) {
524     /* eliminate rows corresponding to edge dofs belonging to coarse faces */
525     if (!PetscBTLookup(btee, i)) {
526       marks[cum++] = i;
527       continue;
528     }
529     /* set badly connected edge dofs as primal */
530     if (!conforming) {
531       if (ii[i + 1] - ii[i] != order + 1) { /* every row of G on the coarse edge should list order+1 nodal dofs */
532         marks[cum++] = i;
533         PetscCall(PetscBTSet(bte, i));
534         for (j = ii[i]; j < ii[i + 1]; j++) PetscCall(PetscBTSet(btv, jj[j]));
535       } else {
536         /* every edge dofs should be connected through a certain number of nodal dofs
537            to other edge dofs belonging to coarse edges
538            - at most 2 endpoints
539            - order-1 interior nodal dofs
540            - no undefined nodal dofs (nconn < order)
541         */
542         PetscInt ends = 0, ints = 0, undef = 0;
543         for (j = ii[i]; j < ii[i + 1]; j++) {
544           PetscInt v     = jj[j], k;
545           PetscInt nconn = iit[v + 1] - iit[v];
546           for (k = iit[v]; k < iit[v + 1]; k++)
547             if (!PetscBTLookup(btee, jjt[k])) nconn--;
548           if (nconn > order) ends++;
549           else if (nconn == order) ints++;
550           else undef++;
551         }
552         if (undef || ends > 2 || ints != order - 1) {
553           marks[cum++] = i;
554           PetscCall(PetscBTSet(bte, i));
555           for (j = ii[i]; j < ii[i + 1]; j++) PetscCall(PetscBTSet(btv, jj[j]));
556         }
557       }
558     }
559     /* We assume the order on the element edge is ii[i+1]-ii[i]-1 */
560     if (!order && ii[i + 1] != ii[i]) {
561       PetscScalar val = 1. / (ii[i + 1] - ii[i] - 1);
562       for (j = ii[i]; j < ii[i + 1]; j++) vals[j] = val;
563     }
564   }
565   PetscCall(PetscBTDestroy(&btee));
566   PetscCall(MatSeqAIJRestoreArray(lGe, &vals));
567   PetscCall(MatRestoreRowIJ(lGe, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
568   if (!conforming) {
569     PetscCall(MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done));
570     PetscCall(MatDestroy(&lGt));
571   }
572   PetscCall(MatZeroRows(lGe, cum, marks, 0., NULL, NULL));
573 
574   /* identify splitpoints and corner candidates */
575   PetscCall(PetscMalloc2(nv, &sfvleaves, Lv, &sfvroots));
576   PetscCall(PetscBTCreate(nv, &btvcand));
577   if (elements_corners) {
578     PetscCall(ISGetLocalSize(elements_corners, &cum));
579     PetscCall(ISGetIndices(elements_corners, &idxs));
580     for (i = 0; i < cum; i++) PetscCall(PetscBTSet(btvcand, idxs[i]));
581     PetscCall(ISRestoreIndices(elements_corners, &idxs));
582   }
583 
584   if (matis->allow_repeated) { /* assign a uniq global id to edge local subsets and communicate it with nodal space */
585     PetscSF   emlsf, vmlsf;
586     PetscInt *eleaves, *vleaves, *meleaves, *mvleaves;
587     PetscInt  cum_subs = 0, n_subs = pcbddc->n_local_subs, bs, emnr, emnl, vmnr, vmnl;
588 
589     PetscCall(ISLocalToGlobalMappingGetBlockSize(el2g, &bs));
590     PetscCheck(bs == 1, comm, PETSC_ERR_SUP, "Not coded");
591     PetscCall(ISLocalToGlobalMappingGetBlockSize(vl2g, &bs));
592     PetscCheck(bs == 1, comm, PETSC_ERR_SUP, "Not coded");
593 
594     PetscCall(ISLocalToGlobalMappingGetBlockMultiLeavesSF(el2g, &emlsf));
595     PetscCall(ISLocalToGlobalMappingGetBlockMultiLeavesSF(vl2g, &vmlsf));
596 
597     PetscCall(PetscSFGetGraph(emlsf, &emnr, &emnl, NULL, NULL));
598     for (i = 0, j = 0; i < ne; i++) j += ecount[i];
599     PetscCheck(emnr == ne, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Invalid number of roots in edge multi-leaves SF %" PetscInt_FMT " != %" PetscInt_FMT, emnr, ne);
600     PetscCheck(emnl == j, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Invalid number of leaves in edge multi-leaves SF %" PetscInt_FMT " != %" PetscInt_FMT, emnl, j);
601 
602     PetscCall(PetscSFGetGraph(vmlsf, &vmnr, &vmnl, NULL, NULL));
603     for (i = 0, j = 0; i < nv; i++) j += vcount[i];
604     PetscCheck(vmnr == nv, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Invalid number of roots in nodal multi-leaves SF %" PetscInt_FMT " != %" PetscInt_FMT, vmnr, nv);
605     PetscCheck(vmnl == j, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Invalid number of leaves in nodal multi-leaves SF %" PetscInt_FMT " != %" PetscInt_FMT, vmnl, j);
606 
607     PetscCall(PetscMalloc1(ne, &eleaves));
608     PetscCall(PetscMalloc1(nv, &vleaves));
609     for (i = 0; i < ne; i++) eleaves[i] = PETSC_MAX_INT;
610     for (i = 0; i < nv; i++) vleaves[i] = PETSC_MAX_INT;
611     PetscCall(PetscMalloc1(emnl, &meleaves));
612     PetscCall(PetscMalloc1(vmnl, &mvleaves));
613 
614     PetscCallMPI(MPI_Exscan(&n_subs, &cum_subs, 1, MPIU_INT, MPI_SUM, comm));
615     PetscCall(MatGetRowIJ(lGinit, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
616     for (i = 0; i < n_subs; i++) {
617       const PetscInt *idxs;
618       const PetscInt  subid = cum_subs + i;
619       PetscInt        ns;
620 
621       PetscCall(ISGetLocalSize(pcbddc->local_subs[i], &ns));
622       PetscCall(ISGetIndices(pcbddc->local_subs[i], &idxs));
623       for (j = 0; j < ns; j++) {
624         const PetscInt e = idxs[j];
625 
626         eleaves[e] = subid;
627         for (PetscInt k = ii[e]; k < ii[e + 1]; k++) vleaves[jj[k]] = subid;
628       }
629       PetscCall(ISRestoreIndices(pcbddc->local_subs[i], &idxs));
630     }
631     PetscCall(MatRestoreRowIJ(lGinit, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
632     PetscCall(PetscSFBcastBegin(emlsf, MPIU_INT, eleaves, meleaves, MPI_REPLACE));
633     PetscCall(PetscSFBcastEnd(emlsf, MPIU_INT, eleaves, meleaves, MPI_REPLACE));
634     PetscCall(PetscSFBcastBegin(vmlsf, MPIU_INT, vleaves, mvleaves, MPI_REPLACE));
635     PetscCall(PetscSFBcastEnd(vmlsf, MPIU_INT, vleaves, mvleaves, MPI_REPLACE));
636     PetscCall(PetscFree(eleaves));
637     PetscCall(PetscFree(vleaves));
638 
639     PetscCall(PetscMalloc1(ne + 1, &eneighs));
640     eneighs[0] = meleaves;
641     for (i = 0; i < ne; i++) {
642       PetscCall(PetscSortInt(ecount[i], eneighs[i]));
643       eneighs[i + 1] = eneighs[i] + ecount[i];
644     }
645     PetscCall(PetscMalloc1(nv + 1, &vneighs));
646     vneighs[0] = mvleaves;
647     for (i = 0; i < nv; i++) {
648       PetscCall(PetscSortInt(vcount[i], vneighs[i]));
649       vneighs[i + 1] = vneighs[i] + vcount[i];
650     }
651   } else {
652     PetscCall(ISLocalToGlobalMappingGetNodeInfo(el2g, NULL, NULL, &eneighs));
653     PetscCall(ISLocalToGlobalMappingGetNodeInfo(vl2g, NULL, NULL, &vneighs));
654   }
655 
656   PetscCall(MatTranspose(lGe, MAT_INITIAL_MATRIX, &lGt));
657   if (print) {
658     PetscCall(PetscObjectSetName((PetscObject)lGe, "edgerestr_lG"));
659     PetscCall(MatView(lGe, NULL));
660     PetscCall(PetscObjectSetName((PetscObject)lGt, "edgerestr_lGt"));
661     PetscCall(MatView(lGt, NULL));
662   }
663   PetscCall(MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
664   PetscCall(MatSeqAIJGetArray(lGt, &vals));
665   for (i = 0; i < nv; i++) {
666     PetscInt  ord = order, test = ii[i + 1] - ii[i], vc = vcount[i];
667     PetscBool sneighs = PETSC_TRUE, bdir = PETSC_FALSE;
668     if (!order) { /* variable order */
669       PetscReal vorder = 0.;
670 
671       for (j = ii[i]; j < ii[i + 1]; j++) vorder += PetscRealPart(vals[j]);
672       test = PetscFloorReal(vorder + 10. * PETSC_SQRT_MACHINE_EPSILON);
673       PetscCheck(vorder - test <= PETSC_SQRT_MACHINE_EPSILON, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected value for vorder: %g (%" PetscInt_FMT ")", (double)vorder, test);
674       ord = 1;
675     }
676     for (j = ii[i]; j < ii[i + 1] && sneighs; j++) {
677       const PetscInt e = jj[j];
678 
679       if (PetscBTLookup(btbd, e)) {
680         bdir = PETSC_TRUE;
681         break;
682       }
683       if (vc != ecount[e]) {
684         sneighs = PETSC_FALSE;
685       } else {
686         const PetscInt *vn = vneighs[i], *en = eneighs[e];
687 
688         for (PetscInt k = 0; k < vc; k++) {
689           if (vn[k] != en[k]) {
690             sneighs = PETSC_FALSE;
691             break;
692           }
693         }
694       }
695     }
696     if (elements_corners) test = 0;
697     if (!sneighs || test >= 3 * ord || bdir) { /* splitpoints */
698       if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "SPLITPOINT %" PetscInt_FMT " (%s %s %s)\n", i, PetscBools[!sneighs], PetscBools[test >= 3 * ord], PetscBools[bdir]));
699       PetscCall(PetscBTSet(btv, i));
700     } else if (test == ord) {
701       if (order == 1 || (!order && ii[i + 1] - ii[i] == 1)) {
702         if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "ENDPOINT %" PetscInt_FMT "\n", i));
703         PetscCall(PetscBTSet(btv, i));
704       } else if (!elements_corners) {
705         if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "CORNER CANDIDATE %" PetscInt_FMT "\n", i));
706         PetscCall(PetscBTSet(btvcand, i));
707       }
708     }
709   }
710   PetscCall(PetscBTDestroy(&btbd));
711 
712   /* a candidate is valid if it is connected to another candidate via a non-primal edge dof */
713   if (order != 1) {
714     if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "INSPECTING CANDIDATES\n"));
715     PetscCall(MatGetRowIJ(lGe, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done));
716     for (i = 0; i < nv; i++) {
717       if (PetscBTLookup(btvcand, i)) {
718         PetscBool found = PETSC_FALSE;
719         for (j = ii[i]; j < ii[i + 1] && !found; j++) {
720           PetscInt k, e = jj[j];
721           if (PetscBTLookup(bte, e)) continue;
722           for (k = iit[e]; k < iit[e + 1]; k++) {
723             PetscInt v = jjt[k];
724             if (v != i && PetscBTLookup(btvcand, v)) {
725               found = PETSC_TRUE;
726               break;
727             }
728           }
729         }
730         if (!found) {
731           if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "  CANDIDATE %" PetscInt_FMT " CLEARED\n", i));
732           PetscCall(PetscBTClear(btvcand, i));
733         } else {
734           if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "  CANDIDATE %" PetscInt_FMT " ACCEPTED\n", i));
735         }
736       }
737     }
738     PetscCall(MatRestoreRowIJ(lGe, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done));
739   }
740   PetscCall(MatSeqAIJRestoreArray(lGt, &vals));
741   PetscCall(MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
742   PetscCall(MatDestroy(&lGe));
743 
744   /* Get the local G^T explicitly */
745   PetscCall(MatDestroy(&lGt));
746   PetscCall(MatTranspose(lG, MAT_INITIAL_MATRIX, &lGt));
747   PetscCall(MatSetOption(lGt, MAT_KEEP_NONZERO_PATTERN, PETSC_FALSE));
748 
749   /* Mark shared nodal dofs */
750   PetscCall(PetscBTCreate(nv, &btvi));
751   for (i = 0; i < nv; i++) {
752     if (vcount[i] > 1) PetscCall(PetscBTSet(btvi, i));
753   }
754 
755   if (matis->allow_repeated) {
756     PetscCall(PetscFree(eneighs[0]));
757     PetscCall(PetscFree(vneighs[0]));
758     PetscCall(PetscFree(eneighs));
759     PetscCall(PetscFree(vneighs));
760   }
761   PetscCall(ISLocalToGlobalMappingRestoreNodeInfo(el2g, NULL, &ecount, &eneighs));
762   PetscCall(ISLocalToGlobalMappingRestoreNodeInfo(vl2g, NULL, &vcount, &vneighs));
763 
764   /* communicate corners and splitpoints */
765   PetscCall(PetscMalloc1(nv, &vmarks));
766   PetscCall(PetscArrayzero(sfvleaves, nv));
767   PetscCall(PetscArrayzero(sfvroots, Lv));
768   for (i = 0; i < nv; i++)
769     if (PetscUnlikely(PetscBTLookup(btv, i))) sfvleaves[i] = 1;
770 
771   if (print) {
772     IS tbz;
773 
774     cum = 0;
775     for (i = 0; i < nv; i++)
776       if (sfvleaves[i]) vmarks[cum++] = i;
777 
778     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, cum, vmarks, PETSC_COPY_VALUES, &tbz));
779     PetscCall(PetscObjectSetName((PetscObject)tbz, "corners_to_be_zeroed_local"));
780     PetscCall(ISView(tbz, NULL));
781     PetscCall(ISDestroy(&tbz));
782   }
783 
784   PetscCall(PetscSFReduceBegin(sfv, MPIU_INT, sfvleaves, sfvroots, MPI_SUM));
785   PetscCall(PetscSFReduceEnd(sfv, MPIU_INT, sfvleaves, sfvroots, MPI_SUM));
786   PetscCall(PetscSFBcastBegin(sfv, MPIU_INT, sfvroots, sfvleaves, MPI_REPLACE));
787   PetscCall(PetscSFBcastEnd(sfv, MPIU_INT, sfvroots, sfvleaves, MPI_REPLACE));
788 
789   /* Zero rows of lGt corresponding to identified corners
790      and interior nodal dofs */
791   cum = 0;
792   for (i = 0; i < nv; i++) {
793     if (sfvleaves[i]) {
794       vmarks[cum++] = i;
795       PetscCall(PetscBTSet(btv, i));
796     } else if (!PetscBTLookup(btvi, i)) vmarks[cum++] = i;
797   }
798   PetscCall(PetscBTDestroy(&btvi));
799   if (print) {
800     IS tbz;
801 
802     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, cum, vmarks, PETSC_COPY_VALUES, &tbz));
803     PetscCall(PetscObjectSetName((PetscObject)tbz, "corners_to_be_zeroed_with_interior"));
804     PetscCall(ISView(tbz, NULL));
805     PetscCall(ISDestroy(&tbz));
806   }
807   PetscCall(MatZeroRows(lGt, cum, vmarks, 0., NULL, NULL));
808   PetscCall(PetscFree(vmarks));
809   PetscCall(PetscSFDestroy(&sfv));
810   PetscCall(PetscFree2(sfvleaves, sfvroots));
811 
812   /* Recompute G */
813   PetscCall(MatDestroy(&lG));
814   PetscCall(MatTranspose(lGt, MAT_INITIAL_MATRIX, &lG));
815   if (print) {
816     PetscCall(PetscObjectSetName((PetscObject)lG, "used_lG"));
817     PetscCall(MatView(lG, NULL));
818     PetscCall(PetscObjectSetName((PetscObject)lGt, "used_lGt"));
819     PetscCall(MatView(lGt, NULL));
820   }
821 
822   /* Get primal dofs (if any) */
823   cum = 0;
824   for (i = 0; i < ne; i++) {
825     if (PetscUnlikely(PetscBTLookup(bte, i))) marks[cum++] = i;
826   }
827   if (fl2g) PetscCall(ISLocalToGlobalMappingApply(fl2g, cum, marks, marks));
828   PetscCall(ISCreateGeneral(comm, cum, marks, PETSC_COPY_VALUES, &primals));
829   if (print) {
830     PetscCall(PetscObjectSetName((PetscObject)primals, "prescribed_primal_dofs"));
831     PetscCall(ISView(primals, NULL));
832   }
833   PetscCall(PetscBTDestroy(&bte));
834   /* TODO: what if the user passed in some of them ?  */
835   PetscCall(PCBDDCSetPrimalVerticesLocalIS(pc, primals));
836   PetscCall(ISDestroy(&primals));
837 
838   /* Compute edge connectivity */
839   PetscCall(PetscObjectSetOptionsPrefix((PetscObject)lG, "econn_"));
840 
841   /* Symbolic conn = lG*lGt */
842   if (!elements_corners) { /* if present, we assume we are in the element-by-element case and the CSR graph is not needed */
843     PetscCall(MatProductCreate(lG, lGt, NULL, &conn));
844     PetscCall(MatProductSetType(conn, MATPRODUCT_AB));
845     PetscCall(MatProductSetAlgorithm(conn, "default"));
846     PetscCall(MatProductSetFill(conn, PETSC_DEFAULT));
847     PetscCall(PetscObjectSetOptionsPrefix((PetscObject)conn, "econn_"));
848     PetscCall(MatProductSetFromOptions(conn));
849     PetscCall(MatProductSymbolic(conn));
850     PetscCall(MatGetRowIJ(conn, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
851     if (fl2g) {
852       PetscBT   btf;
853       PetscInt *iia, *jja, *iiu, *jju;
854       PetscBool rest = PETSC_FALSE, free = PETSC_FALSE;
855 
856       /* create CSR for all local dofs */
857       PetscCall(PetscMalloc1(n + 1, &iia));
858       if (pcbddc->mat_graph->nvtxs_csr) { /* the user has passed in a CSR graph */
859         PetscCheck(pcbddc->mat_graph->nvtxs_csr == n, PETSC_COMM_SELF, PETSC_ERR_USER, "Invalid size of CSR graph %" PetscInt_FMT ". Should be %" PetscInt_FMT, pcbddc->mat_graph->nvtxs_csr, n);
860         iiu = pcbddc->mat_graph->xadj;
861         jju = pcbddc->mat_graph->adjncy;
862       } else if (pcbddc->use_local_adj) {
863         rest = PETSC_TRUE;
864         PetscCall(MatGetRowIJ(matis->A, 0, PETSC_TRUE, PETSC_FALSE, &i, (const PetscInt **)&iiu, (const PetscInt **)&jju, &done));
865       } else {
866         free = PETSC_TRUE;
867         PetscCall(PetscMalloc2(n + 1, &iiu, n, &jju));
868         iiu[0] = 0;
869         for (i = 0; i < n; i++) {
870           iiu[i + 1] = i + 1;
871           jju[i]     = -1;
872         }
873       }
874 
875       /* import sizes of CSR */
876       iia[0] = 0;
877       for (i = 0; i < n; i++) iia[i + 1] = iiu[i + 1] - iiu[i];
878 
879       /* overwrite entries corresponding to the Nedelec field */
880       PetscCall(PetscBTCreate(n, &btf));
881       PetscCall(ISGetIndices(nedfieldlocal, &idxs));
882       for (i = 0; i < ne; i++) {
883         PetscCall(PetscBTSet(btf, idxs[i]));
884         iia[idxs[i] + 1] = ii[i + 1] - ii[i];
885       }
886 
887       /* iia in CSR */
888       for (i = 0; i < n; i++) iia[i + 1] += iia[i];
889 
890       /* jja in CSR */
891       PetscCall(PetscMalloc1(iia[n], &jja));
892       for (i = 0; i < n; i++)
893         if (!PetscBTLookup(btf, i))
894           for (j = 0; j < iiu[i + 1] - iiu[i]; j++) jja[iia[i] + j] = jju[iiu[i] + j];
895 
896       /* map edge dofs connectivity */
897       if (jj) {
898         PetscCall(ISLocalToGlobalMappingApply(fl2g, ii[ne], jj, (PetscInt *)jj));
899         for (i = 0; i < ne; i++) {
900           PetscInt e = idxs[i];
901           for (j = 0; j < ii[i + 1] - ii[i]; j++) jja[iia[e] + j] = jj[ii[i] + j];
902         }
903       }
904       PetscCall(ISRestoreIndices(nedfieldlocal, &idxs));
905       PetscCall(PCBDDCSetLocalAdjacencyGraph(pc, n, iia, jja, PETSC_COPY_VALUES));
906       if (rest) PetscCall(MatRestoreRowIJ(matis->A, 0, PETSC_TRUE, PETSC_FALSE, &i, (const PetscInt **)&iiu, (const PetscInt **)&jju, &done));
907       if (free) PetscCall(PetscFree2(iiu, jju));
908       PetscCall(PetscBTDestroy(&btf));
909     } else {
910       PetscCall(PCBDDCSetLocalAdjacencyGraph(pc, n, ii, jj, PETSC_COPY_VALUES));
911     }
912     PetscCall(MatRestoreRowIJ(conn, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
913     PetscCall(MatDestroy(&conn));
914   }
915 
916   /* Analyze interface for edge dofs */
917   PetscCall(PCBDDCAnalyzeInterface(pc));
918   pcbddc->mat_graph->twodim = PETSC_FALSE;
919 
920   /* Get coarse edges in the edge space */
921   PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, NULL, NULL, &nee, &alleedges, &allprimals));
922 
923   if (fl2g) {
924     PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_DROP, allprimals, &primals));
925     PetscCall(PetscMalloc1(nee, &eedges));
926     for (i = 0; i < nee; i++) PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_DROP, alleedges[i], &eedges[i]));
927   } else {
928     eedges  = alleedges;
929     primals = allprimals;
930   }
931 
932   /* Mark fine edge dofs with their coarse edge id */
933   PetscCall(PetscArrayzero(marks, ne));
934   PetscCall(ISGetLocalSize(primals, &cum));
935   PetscCall(ISGetIndices(primals, &idxs));
936   for (i = 0; i < cum; i++) marks[idxs[i]] = nee + 1;
937   PetscCall(ISRestoreIndices(primals, &idxs));
938   if (print) {
939     PetscCall(PetscObjectSetName((PetscObject)primals, "obtained_primal_dofs"));
940     PetscCall(ISView(primals, NULL));
941   }
942 
943   maxsize = 0;
944   for (i = 0; i < nee; i++) {
945     PetscInt size, mark = i + 1;
946 
947     PetscCall(ISGetLocalSize(eedges[i], &size));
948     PetscCall(ISGetIndices(eedges[i], &idxs));
949     for (j = 0; j < size; j++) marks[idxs[j]] = mark;
950     PetscCall(ISRestoreIndices(eedges[i], &idxs));
951     maxsize = PetscMax(maxsize, size);
952   }
953 
954   /* Find coarse edge endpoints */
955   PetscCall(MatGetRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
956   PetscCall(MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done));
957   for (i = 0; i < nee; i++) {
958     PetscInt mark = i + 1, size;
959 
960     PetscCall(ISGetLocalSize(eedges[i], &size));
961     if (!size && nedfieldlocal) continue;
962     PetscCheck(size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected zero sized edge %" PetscInt_FMT, i);
963     PetscCall(ISGetIndices(eedges[i], &idxs));
964     if (print) {
965       PetscCall(PetscPrintf(PETSC_COMM_SELF, "ENDPOINTS ANALYSIS EDGE %" PetscInt_FMT "\n", i));
966       PetscCall(ISView(eedges[i], NULL));
967     }
968     for (j = 0; j < size; j++) {
969       PetscInt k, ee = idxs[j];
970       if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "  idx %" PetscInt_FMT "\n", ee));
971       for (k = ii[ee]; k < ii[ee + 1]; k++) {
972         if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "    inspect %" PetscInt_FMT "\n", jj[k]));
973         if (PetscBTLookup(btv, jj[k])) {
974           if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "      corner found (already set) %" PetscInt_FMT "\n", jj[k]));
975         } else if (PetscBTLookup(btvcand, jj[k])) { /* is it ok? */
976           PetscInt  k2;
977           PetscBool corner = PETSC_FALSE;
978           for (k2 = iit[jj[k]]; k2 < iit[jj[k] + 1]; k2++) {
979             if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "        INSPECTING %" PetscInt_FMT ": mark %" PetscInt_FMT " (ref mark %" PetscInt_FMT "), boundary %d\n", jjt[k2], marks[jjt[k2]], mark, (int)!!PetscBTLookup(btb, jjt[k2])));
980             /* it's a corner if either is connected with an edge dof belonging to a different cc or
981                if the edge dof lie on the natural part of the boundary */
982             if ((marks[jjt[k2]] && marks[jjt[k2]] != mark) || (!marks[jjt[k2]] && PetscBTLookup(btb, jjt[k2]))) {
983               corner = PETSC_TRUE;
984               break;
985             }
986           }
987           if (corner) { /* found the nodal dof corresponding to the endpoint of the edge */
988             if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "        corner found %" PetscInt_FMT "\n", jj[k]));
989             PetscCall(PetscBTSet(btv, jj[k]));
990           } else {
991             if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "        no corners found\n"));
992           }
993         }
994       }
995     }
996     PetscCall(ISRestoreIndices(eedges[i], &idxs));
997   }
998   PetscCall(MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done));
999   PetscCall(MatRestoreRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
1000   PetscCall(PetscBTDestroy(&btb));
1001 
1002   /* Reset marked primal dofs */
1003   PetscCall(ISGetLocalSize(primals, &cum));
1004   PetscCall(ISGetIndices(primals, &idxs));
1005   for (i = 0; i < cum; i++) marks[idxs[i]] = 0;
1006   PetscCall(ISRestoreIndices(primals, &idxs));
1007 
1008   /* Now use the initial lG */
1009   PetscCall(MatDestroy(&lG));
1010   PetscCall(MatDestroy(&lGt));
1011   lG = lGinit;
1012   PetscCall(MatTranspose(lG, MAT_INITIAL_MATRIX, &lGt));
1013 
1014   /* Compute extended cols indices */
1015   PetscCall(PetscBTCreate(nv, &btvc));
1016   PetscCall(PetscBTCreate(nee, &bter));
1017   PetscCall(MatGetRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
1018   PetscCall(MatSeqAIJGetMaxRowNonzeros(lG, &i));
1019   i *= maxsize;
1020   PetscCall(PetscCalloc1(nee, &extcols));
1021   PetscCall(PetscMalloc2(i, &extrow, i, &gidxs));
1022   eerr = PETSC_FALSE;
1023   for (i = 0; i < nee; i++) {
1024     PetscInt size, found = 0;
1025 
1026     cum = 0;
1027     PetscCall(ISGetLocalSize(eedges[i], &size));
1028     if (!size && nedfieldlocal) continue;
1029     PetscCheck(size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected zero sized edge %" PetscInt_FMT, i);
1030     PetscCall(ISGetIndices(eedges[i], &idxs));
1031     PetscCall(PetscBTMemzero(nv, btvc));
1032     for (j = 0; j < size; j++) {
1033       PetscInt k, ee = idxs[j];
1034       for (k = ii[ee]; k < ii[ee + 1]; k++) {
1035         PetscInt vv = jj[k];
1036         if (!PetscBTLookup(btv, vv)) extrow[cum++] = vv;
1037         else if (!PetscBTLookupSet(btvc, vv)) found++;
1038       }
1039     }
1040     PetscCall(ISRestoreIndices(eedges[i], &idxs));
1041     PetscCall(PetscSortRemoveDupsInt(&cum, extrow));
1042     PetscCall(ISLocalToGlobalMappingApply(vl2g, cum, extrow, gidxs));
1043     PetscCall(PetscSortIntWithArray(cum, gidxs, extrow));
1044     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, cum, extrow, PETSC_COPY_VALUES, &extcols[i]));
1045     /* it may happen that endpoints are not defined at this point
1046        if it is the case, mark this edge for a second pass */
1047     if (cum != size - 1 || found != 2) {
1048       PetscCall(PetscBTSet(bter, i));
1049       if (print) {
1050         PetscCall(PetscObjectSetName((PetscObject)eedges[i], "error_edge"));
1051         PetscCall(ISView(eedges[i], NULL));
1052         PetscCall(PetscObjectSetName((PetscObject)extcols[i], "error_extcol"));
1053         PetscCall(ISView(extcols[i], NULL));
1054       }
1055       eerr = PETSC_TRUE;
1056     }
1057   }
1058   /* PetscCheck(!eerr,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL FIRST PASS"); */
1059   PetscCall(MPIU_Allreduce(&eerr, &done, 1, MPIU_BOOL, MPI_LOR, comm));
1060   if (done) {
1061     PetscInt *newprimals;
1062 
1063     PetscCall(PetscMalloc1(ne, &newprimals));
1064     PetscCall(ISGetLocalSize(primals, &cum));
1065     PetscCall(ISGetIndices(primals, &idxs));
1066     PetscCall(PetscArraycpy(newprimals, idxs, cum));
1067     PetscCall(ISRestoreIndices(primals, &idxs));
1068     PetscCall(MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done));
1069     if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "DOING SECOND PASS (eerr %s)\n", PetscBools[eerr]));
1070     for (i = 0; i < nee; i++) {
1071       PetscBool has_candidates = PETSC_FALSE;
1072       if (PetscBTLookup(bter, i)) {
1073         PetscInt size, mark = i + 1;
1074 
1075         PetscCall(ISGetLocalSize(eedges[i], &size));
1076         PetscCall(ISGetIndices(eedges[i], &idxs));
1077         /* for (j=0;j<size;j++) newprimals[cum++] = idxs[j]; */
1078         for (j = 0; j < size; j++) {
1079           PetscInt k, ee = idxs[j];
1080           if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "Inspecting edge dof %" PetscInt_FMT " [%" PetscInt_FMT " %" PetscInt_FMT ")\n", ee, ii[ee], ii[ee + 1]));
1081           for (k = ii[ee]; k < ii[ee + 1]; k++) {
1082             /* set all candidates located on the edge as corners */
1083             if (PetscBTLookup(btvcand, jj[k])) {
1084               PetscInt k2, vv = jj[k];
1085               has_candidates = PETSC_TRUE;
1086               if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "  Candidate set to vertex %" PetscInt_FMT "\n", vv));
1087               PetscCall(PetscBTSet(btv, vv));
1088               /* set all edge dofs connected to candidate as primals */
1089               for (k2 = iit[vv]; k2 < iit[vv + 1]; k2++) {
1090                 if (marks[jjt[k2]] == mark) {
1091                   PetscInt k3, ee2 = jjt[k2];
1092                   if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "    Connected edge dof set to primal %" PetscInt_FMT "\n", ee2));
1093                   newprimals[cum++] = ee2;
1094                   /* finally set the new corners */
1095                   for (k3 = ii[ee2]; k3 < ii[ee2 + 1]; k3++) {
1096                     if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "      Connected nodal dof set to vertex %" PetscInt_FMT "\n", jj[k3]));
1097                     PetscCall(PetscBTSet(btv, jj[k3]));
1098                   }
1099                 }
1100               }
1101             } else {
1102               if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "  Not a candidate vertex %" PetscInt_FMT "\n", jj[k]));
1103             }
1104           }
1105         }
1106         if (!has_candidates) { /* circular edge */
1107           PetscInt k, ee = idxs[0], *tmarks;
1108 
1109           PetscCall(PetscCalloc1(ne, &tmarks));
1110           if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "  Circular edge %" PetscInt_FMT "\n", i));
1111           for (k = ii[ee]; k < ii[ee + 1]; k++) {
1112             PetscInt k2;
1113             if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "    Set to corner %" PetscInt_FMT "\n", jj[k]));
1114             PetscCall(PetscBTSet(btv, jj[k]));
1115             for (k2 = iit[jj[k]]; k2 < iit[jj[k] + 1]; k2++) tmarks[jjt[k2]]++;
1116           }
1117           for (j = 0; j < size; j++) {
1118             if (tmarks[idxs[j]] > 1) {
1119               if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "  Edge dof set to primal %" PetscInt_FMT "\n", idxs[j]));
1120               newprimals[cum++] = idxs[j];
1121             }
1122           }
1123           PetscCall(PetscFree(tmarks));
1124         }
1125         PetscCall(ISRestoreIndices(eedges[i], &idxs));
1126       }
1127       PetscCall(ISDestroy(&extcols[i]));
1128     }
1129     PetscCall(PetscFree(extcols));
1130     PetscCall(MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done));
1131     PetscCall(PetscSortRemoveDupsInt(&cum, newprimals));
1132     if (fl2g) {
1133       PetscCall(ISLocalToGlobalMappingApply(fl2g, cum, newprimals, newprimals));
1134       PetscCall(ISDestroy(&primals));
1135       for (i = 0; i < nee; i++) PetscCall(ISDestroy(&eedges[i]));
1136       PetscCall(PetscFree(eedges));
1137     }
1138     PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, NULL, NULL, &nee, &alleedges, &allprimals));
1139     PetscCall(ISCreateGeneral(comm, cum, newprimals, PETSC_COPY_VALUES, &primals));
1140     PetscCall(PetscFree(newprimals));
1141     PetscCall(PCBDDCSetPrimalVerticesLocalIS(pc, primals));
1142     PetscCall(ISDestroy(&primals));
1143     PetscCall(PCBDDCAnalyzeInterface(pc));
1144     pcbddc->mat_graph->twodim = PETSC_FALSE;
1145     PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, NULL, NULL, &nee, &alleedges, &allprimals));
1146     if (fl2g) {
1147       PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_DROP, allprimals, &primals));
1148       PetscCall(PetscMalloc1(nee, &eedges));
1149       for (i = 0; i < nee; i++) PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_DROP, alleedges[i], &eedges[i]));
1150     } else {
1151       eedges  = alleedges;
1152       primals = allprimals;
1153     }
1154     PetscCall(PetscCalloc1(nee, &extcols));
1155 
1156     /* Mark again */
1157     PetscCall(PetscArrayzero(marks, ne));
1158     for (i = 0; i < nee; i++) {
1159       PetscInt size, mark = i + 1;
1160 
1161       PetscCall(ISGetLocalSize(eedges[i], &size));
1162       PetscCall(ISGetIndices(eedges[i], &idxs));
1163       for (j = 0; j < size; j++) marks[idxs[j]] = mark;
1164       PetscCall(ISRestoreIndices(eedges[i], &idxs));
1165     }
1166     if (print) {
1167       PetscCall(PetscObjectSetName((PetscObject)primals, "obtained_primal_dofs_secondpass"));
1168       PetscCall(ISView(primals, NULL));
1169     }
1170 
1171     /* Recompute extended cols */
1172     eerr = PETSC_FALSE;
1173     for (i = 0; i < nee; i++) {
1174       PetscInt size;
1175 
1176       cum = 0;
1177       PetscCall(ISGetLocalSize(eedges[i], &size));
1178       if (!size && nedfieldlocal) continue;
1179       PetscCheck(size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected zero sized edge %" PetscInt_FMT, i);
1180       PetscCall(ISGetIndices(eedges[i], &idxs));
1181       for (j = 0; j < size; j++) {
1182         PetscInt k, ee = idxs[j];
1183         for (k = ii[ee]; k < ii[ee + 1]; k++)
1184           if (!PetscBTLookup(btv, jj[k])) extrow[cum++] = jj[k];
1185       }
1186       PetscCall(ISRestoreIndices(eedges[i], &idxs));
1187       PetscCall(PetscSortRemoveDupsInt(&cum, extrow));
1188       PetscCall(ISLocalToGlobalMappingApply(vl2g, cum, extrow, gidxs));
1189       PetscCall(PetscSortIntWithArray(cum, gidxs, extrow));
1190       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, cum, extrow, PETSC_COPY_VALUES, &extcols[i]));
1191       if (cum != size - 1) {
1192         if (print) {
1193           PetscCall(PetscObjectSetName((PetscObject)eedges[i], "error_edge_secondpass"));
1194           PetscCall(ISView(eedges[i], NULL));
1195           PetscCall(PetscObjectSetName((PetscObject)extcols[i], "error_extcol_secondpass"));
1196           PetscCall(ISView(extcols[i], NULL));
1197         }
1198         eerr = PETSC_TRUE;
1199       }
1200     }
1201   }
1202   PetscCall(MatRestoreRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
1203   PetscCall(PetscFree2(extrow, gidxs));
1204   PetscCall(PetscBTDestroy(&bter));
1205   if (print) PetscCall(PCBDDCGraphASCIIView(pcbddc->mat_graph, 5, PETSC_VIEWER_STDOUT_SELF));
1206   /* an error should not occur at this point */
1207   PetscCheck(!eerr, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected SIZE OF EDGE > EXTCOL SECOND PASS");
1208 
1209   /* Check the number of endpoints */
1210   PetscCall(MatGetRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
1211   PetscCall(PetscMalloc1(2 * nee, &corners));
1212   PetscCall(PetscMalloc1(nee, &cedges));
1213   for (i = 0; i < nee; i++) {
1214     PetscInt size, found = 0, gc[2];
1215 
1216     /* init with defaults */
1217     cedges[i] = corners[i * 2] = corners[i * 2 + 1] = -1;
1218     PetscCall(ISGetLocalSize(eedges[i], &size));
1219     if (!size && nedfieldlocal) continue;
1220     PetscCheck(size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected zero sized edge %" PetscInt_FMT, i);
1221     PetscCall(ISGetIndices(eedges[i], &idxs));
1222     PetscCall(PetscBTMemzero(nv, btvc));
1223     for (j = 0; j < size; j++) {
1224       PetscInt k, ee = idxs[j];
1225       for (k = ii[ee]; k < ii[ee + 1]; k++) {
1226         PetscInt vv = jj[k];
1227         if (PetscBTLookup(btv, vv) && !PetscBTLookupSet(btvc, vv)) {
1228           PetscCheck(found != 2, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Found more than two corners for edge %" PetscInt_FMT, i);
1229           corners[i * 2 + found++] = vv;
1230         }
1231       }
1232     }
1233     if (found != 2) {
1234       PetscInt e;
1235       if (fl2g) {
1236         PetscCall(ISLocalToGlobalMappingApply(fl2g, 1, idxs, &e));
1237       } else {
1238         e = idxs[0];
1239       }
1240       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_PLIB, "Found %" PetscInt_FMT " corners for edge %" PetscInt_FMT " (astart %" PetscInt_FMT ", estart %" PetscInt_FMT ")", found, i, e, idxs[0]);
1241     }
1242 
1243     /* get primal dof index on this coarse edge */
1244     PetscCall(ISLocalToGlobalMappingApply(vl2g, 2, corners + 2 * i, gc));
1245     if (gc[0] > gc[1]) {
1246       PetscInt swap      = corners[2 * i];
1247       corners[2 * i]     = corners[2 * i + 1];
1248       corners[2 * i + 1] = swap;
1249     }
1250     cedges[i] = idxs[size - 1];
1251     PetscCall(ISRestoreIndices(eedges[i], &idxs));
1252     if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "EDGE %" PetscInt_FMT ": ce %" PetscInt_FMT ", corners (%" PetscInt_FMT ",%" PetscInt_FMT ")\n", i, cedges[i], corners[2 * i], corners[2 * i + 1]));
1253   }
1254   PetscCall(MatRestoreRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
1255   PetscCall(PetscBTDestroy(&btvc));
1256 
1257   if (PetscDefined(USE_DEBUG)) {
1258     /* Inspects columns of lG (rows of lGt) and make sure the change of basis will
1259      not interfere with neighbouring coarse edges */
1260     PetscCall(PetscMalloc1(nee + 1, &emarks));
1261     PetscCall(MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
1262     for (i = 0; i < nv; i++) {
1263       PetscInt emax = 0, eemax = 0;
1264 
1265       if (ii[i + 1] == ii[i] || PetscBTLookup(btv, i)) continue;
1266       PetscCall(PetscArrayzero(emarks, nee + 1));
1267       for (j = ii[i]; j < ii[i + 1]; j++) emarks[marks[jj[j]]]++;
1268       for (j = 1; j < nee + 1; j++) {
1269         if (emax < emarks[j]) {
1270           emax  = emarks[j];
1271           eemax = j;
1272         }
1273       }
1274       /* not relevant for edges */
1275       if (!eemax) continue;
1276 
1277       for (j = ii[i]; j < ii[i + 1]; j++) {
1278         PetscCheck(!marks[jj[j]] || marks[jj[j]] == eemax, PETSC_COMM_SELF, PETSC_ERR_SUP, "Found 2 coarse edges (id %" PetscInt_FMT " and %" PetscInt_FMT ") connected through the %" PetscInt_FMT " nodal dof at edge dof %" PetscInt_FMT, marks[jj[j]] - 1, eemax, i, jj[j]);
1279       }
1280     }
1281     PetscCall(PetscFree(emarks));
1282     PetscCall(MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
1283   }
1284 
1285   /* Compute extended rows indices for edge blocks of the change of basis */
1286   PetscCall(MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
1287   PetscCall(MatSeqAIJGetMaxRowNonzeros(lGt, &extmem));
1288   extmem *= maxsize;
1289   PetscCall(PetscMalloc1(extmem * nee, &extrow));
1290   PetscCall(PetscMalloc1(nee, &extrows));
1291   PetscCall(PetscCalloc1(nee, &extrowcum));
1292   for (i = 0; i < nv; i++) {
1293     PetscInt mark = 0, size, start;
1294 
1295     if (ii[i + 1] == ii[i] || PetscBTLookup(btv, i)) continue;
1296     for (j = ii[i]; j < ii[i + 1]; j++)
1297       if (marks[jj[j]] && !mark) mark = marks[jj[j]];
1298 
1299     /* not relevant */
1300     if (!mark) continue;
1301 
1302     /* import extended row */
1303     mark--;
1304     start = mark * extmem + extrowcum[mark];
1305     size  = ii[i + 1] - ii[i];
1306     PetscCheck(extrowcum[mark] + size <= extmem, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Not enough memory allocated %" PetscInt_FMT " > %" PetscInt_FMT, extrowcum[mark] + size, extmem);
1307     PetscCall(PetscArraycpy(extrow + start, jj + ii[i], size));
1308     extrowcum[mark] += size;
1309   }
1310   PetscCall(MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
1311   PetscCall(MatDestroy(&lGt));
1312   PetscCall(PetscFree(marks));
1313 
1314   /* Compress extrows */
1315   cum = 0;
1316   for (i = 0; i < nee; i++) {
1317     PetscInt size = extrowcum[i], *start = extrow + i * extmem;
1318     PetscCall(PetscSortRemoveDupsInt(&size, start));
1319     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, size, start, PETSC_USE_POINTER, &extrows[i]));
1320     cum = PetscMax(cum, size);
1321   }
1322   PetscCall(PetscFree(extrowcum));
1323   PetscCall(PetscBTDestroy(&btv));
1324   PetscCall(PetscBTDestroy(&btvcand));
1325 
1326   /* Workspace for lapack inner calls and VecSetValues */
1327   PetscCall(PetscMalloc2((5 + cum + maxsize) * maxsize, &work, maxsize, &rwork));
1328 
1329   /* Create change of basis matrix (preallocation can be improved) */
1330   PetscCall(MatCreate(comm, &T));
1331   PetscCall(MatSetLayouts(T, pc->mat->rmap, pc->mat->cmap));
1332   PetscCall(MatSetType(T, MATAIJ));
1333   PetscCall(MatSeqAIJSetPreallocation(T, maxsize, NULL));
1334   PetscCall(MatMPIAIJSetPreallocation(T, maxsize, NULL, maxsize, NULL));
1335   PetscCall(MatSetLocalToGlobalMapping(T, al2g, al2g));
1336   PetscCall(MatSetOption(T, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE));
1337   PetscCall(MatSetOption(T, MAT_ROW_ORIENTED, PETSC_FALSE));
1338   PetscCall(ISLocalToGlobalMappingDestroy(&al2g));
1339 
1340   /* Defaults to identity */
1341   for (i = pc->mat->rmap->rstart; i < pc->mat->rmap->rend; i++) PetscCall(MatSetValue(T, i, i, 1.0, INSERT_VALUES));
1342 
1343   /* Create discrete gradient for the coarser level if needed */
1344   PetscCall(MatDestroy(&pcbddc->nedcG));
1345   PetscCall(ISDestroy(&pcbddc->nedclocal));
1346   if (pcbddc->current_level < pcbddc->max_levels) {
1347     ISLocalToGlobalMapping cel2g, cvl2g;
1348     IS                     wis, gwis;
1349     PetscInt               cnv, cne;
1350 
1351     PetscCall(ISCreateGeneral(comm, nee, cedges, PETSC_COPY_VALUES, &wis));
1352     if (fl2g) {
1353       PetscCall(ISLocalToGlobalMappingApplyIS(fl2g, wis, &pcbddc->nedclocal));
1354     } else {
1355       PetscCall(PetscObjectReference((PetscObject)wis));
1356       pcbddc->nedclocal = wis;
1357     }
1358     PetscCall(ISLocalToGlobalMappingApplyIS(el2g, wis, &gwis));
1359     PetscCall(ISDestroy(&wis));
1360     PetscCall(ISRenumber(gwis, NULL, &cne, &wis));
1361     PetscCall(ISLocalToGlobalMappingCreateIS(wis, &cel2g));
1362     PetscCall(ISDestroy(&wis));
1363     PetscCall(ISDestroy(&gwis));
1364 
1365     PetscCall(ISCreateGeneral(comm, 2 * nee, corners, PETSC_USE_POINTER, &wis));
1366     PetscCall(ISLocalToGlobalMappingApplyIS(vl2g, wis, &gwis));
1367     PetscCall(ISDestroy(&wis));
1368     PetscCall(ISRenumber(gwis, NULL, &cnv, &wis));
1369     PetscCall(ISLocalToGlobalMappingCreateIS(wis, &cvl2g));
1370     PetscCall(ISDestroy(&wis));
1371     PetscCall(ISDestroy(&gwis));
1372 
1373     PetscCall(MatCreate(comm, &pcbddc->nedcG));
1374     PetscCall(MatSetSizes(pcbddc->nedcG, PETSC_DECIDE, PETSC_DECIDE, cne, cnv));
1375     PetscCall(MatSetType(pcbddc->nedcG, MATAIJ));
1376     PetscCall(MatSeqAIJSetPreallocation(pcbddc->nedcG, 2, NULL));
1377     PetscCall(MatMPIAIJSetPreallocation(pcbddc->nedcG, 2, NULL, 2, NULL));
1378     PetscCall(MatSetLocalToGlobalMapping(pcbddc->nedcG, cel2g, cvl2g));
1379     PetscCall(ISLocalToGlobalMappingDestroy(&cel2g));
1380     PetscCall(ISLocalToGlobalMappingDestroy(&cvl2g));
1381   }
1382   PetscCall(ISLocalToGlobalMappingDestroy(&vl2g));
1383 
1384 #if defined(PRINT_GDET)
1385   inc = 0;
1386   lev = pcbddc->current_level;
1387 #endif
1388 
1389   /* Insert values in the change of basis matrix */
1390   for (i = 0; i < nee; i++) {
1391     Mat         Gins = NULL, GKins = NULL;
1392     IS          cornersis = NULL;
1393     PetscScalar cvals[2];
1394 
1395     if (pcbddc->nedcG) PetscCall(ISCreateGeneral(PETSC_COMM_SELF, 2, corners + 2 * i, PETSC_USE_POINTER, &cornersis));
1396     PetscCall(PCBDDCComputeNedelecChangeEdge(lG, eedges[i], extrows[i], extcols[i], cornersis, &Gins, &GKins, cvals, work, rwork));
1397     if (Gins && GKins) {
1398       const PetscScalar *data;
1399       const PetscInt    *rows, *cols;
1400       PetscInt           nrh, nch, nrc, ncc;
1401 
1402       PetscCall(ISGetIndices(eedges[i], &cols));
1403       /* H1 */
1404       PetscCall(ISGetIndices(extrows[i], &rows));
1405       PetscCall(MatGetSize(Gins, &nrh, &nch));
1406       PetscCall(MatDenseGetArrayRead(Gins, &data));
1407       PetscCall(MatSetValuesLocal(T, nrh, rows, nch, cols, data, INSERT_VALUES));
1408       PetscCall(MatDenseRestoreArrayRead(Gins, &data));
1409       PetscCall(ISRestoreIndices(extrows[i], &rows));
1410       /* complement */
1411       PetscCall(MatGetSize(GKins, &nrc, &ncc));
1412       PetscCheck(ncc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Constant function has not been generated for coarse edge %" PetscInt_FMT, i);
1413       PetscCheck(ncc + nch == nrc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "The sum of the number of columns of GKins %" PetscInt_FMT " and Gins %" PetscInt_FMT " does not match %" PetscInt_FMT " for coarse edge %" PetscInt_FMT, ncc, nch, nrc, i);
1414       PetscCheck(ncc == 1 || !pcbddc->nedcG, PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot generate the coarse discrete gradient for coarse edge %" PetscInt_FMT " with ncc %" PetscInt_FMT, i, ncc);
1415       PetscCall(MatDenseGetArrayRead(GKins, &data));
1416       PetscCall(MatSetValuesLocal(T, nrc, cols, ncc, cols + nch, data, INSERT_VALUES));
1417       PetscCall(MatDenseRestoreArrayRead(GKins, &data));
1418 
1419       /* coarse discrete gradient */
1420       if (pcbddc->nedcG) {
1421         PetscInt cols[2];
1422 
1423         cols[0] = 2 * i;
1424         cols[1] = 2 * i + 1;
1425         PetscCall(MatSetValuesLocal(pcbddc->nedcG, 1, &i, 2, cols, cvals, INSERT_VALUES));
1426       }
1427       PetscCall(ISRestoreIndices(eedges[i], &cols));
1428     }
1429     PetscCall(ISDestroy(&extrows[i]));
1430     PetscCall(ISDestroy(&extcols[i]));
1431     PetscCall(ISDestroy(&cornersis));
1432     PetscCall(MatDestroy(&Gins));
1433     PetscCall(MatDestroy(&GKins));
1434   }
1435 
1436   /* for FDM element-by-element: first dof on the edge only constraint. Why? */
1437   if (elements_corners && pcbddc->mat_graph->multi_element) {
1438     ISLocalToGlobalMapping map;
1439     MatNullSpace           nnsp;
1440     Vec                    quad_vec;
1441 
1442     PetscCall(MatCreateVecs(pc->pmat, &quad_vec, NULL));
1443     PetscCall(PCBDDCNullSpaceCreate(PetscObjectComm((PetscObject)pc), PETSC_FALSE, 1, &quad_vec, &nnsp));
1444     PetscCall(VecLockReadPop(quad_vec));
1445     PetscCall(MatISGetLocalToGlobalMapping(pc->pmat, &map, NULL));
1446     PetscCall(VecSetLocalToGlobalMapping(quad_vec, map));
1447     for (i = 0; i < nee; i++) {
1448       const PetscInt *idxs;
1449       PetscScalar     one = 1.0;
1450 
1451       PetscCall(ISGetLocalSize(alleedges[i], &cum));
1452       if (!cum) continue;
1453       PetscCall(ISGetIndices(alleedges[i], &idxs));
1454       PetscCall(VecSetValuesLocal(quad_vec, 1, idxs, &one, INSERT_VALUES));
1455       PetscCall(ISRestoreIndices(alleedges[i], &idxs));
1456     }
1457     PetscCall(VecLockReadPush(quad_vec));
1458     PetscCall(VecDestroy(&quad_vec));
1459     PetscCall(MatSetNearNullSpace(pc->pmat, nnsp));
1460     PetscCall(MatNullSpaceDestroy(&nnsp));
1461   }
1462   PetscCall(ISLocalToGlobalMappingDestroy(&el2g));
1463 
1464   /* Start assembling */
1465   PetscCall(MatAssemblyBegin(T, MAT_FINAL_ASSEMBLY));
1466   if (pcbddc->nedcG) PetscCall(MatAssemblyBegin(pcbddc->nedcG, MAT_FINAL_ASSEMBLY));
1467 
1468   /* Free */
1469   if (fl2g) {
1470     PetscCall(ISDestroy(&primals));
1471     for (i = 0; i < nee; i++) PetscCall(ISDestroy(&eedges[i]));
1472     PetscCall(PetscFree(eedges));
1473   }
1474 
1475   /* hack mat_graph with primal dofs on the coarse edges */
1476   {
1477     PCBDDCGraph graph  = pcbddc->mat_graph;
1478     PetscInt   *oqueue = graph->queue;
1479     PetscInt   *ocptr  = graph->cptr;
1480     PetscInt    ncc, *idxs;
1481 
1482     /* find first primal edge */
1483     if (pcbddc->nedclocal) {
1484       PetscCall(ISGetIndices(pcbddc->nedclocal, (const PetscInt **)&idxs));
1485     } else {
1486       if (fl2g) PetscCall(ISLocalToGlobalMappingApply(fl2g, nee, cedges, cedges));
1487       idxs = cedges;
1488     }
1489     cum = 0;
1490     while (cum < nee && cedges[cum] < 0) cum++;
1491 
1492     /* adapt connected components */
1493     PetscCall(PetscMalloc2(graph->nvtxs + 1, &graph->cptr, ocptr[graph->ncc], &graph->queue));
1494     graph->cptr[0] = 0;
1495     for (i = 0, ncc = 0; i < graph->ncc; i++) {
1496       PetscInt lc = ocptr[i + 1] - ocptr[i];
1497       if (cum != nee && oqueue[ocptr[i + 1] - 1] == cedges[cum]) { /* this cc has a primal dof */
1498         graph->cptr[ncc + 1]           = graph->cptr[ncc] + 1;
1499         graph->queue[graph->cptr[ncc]] = cedges[cum];
1500         ncc++;
1501         lc--;
1502         cum++;
1503         while (cum < nee && cedges[cum] < 0) cum++;
1504       }
1505       graph->cptr[ncc + 1] = graph->cptr[ncc] + lc;
1506       for (j = 0; j < lc; j++) graph->queue[graph->cptr[ncc] + j] = oqueue[ocptr[i] + j];
1507       ncc++;
1508     }
1509     graph->ncc = ncc;
1510     if (pcbddc->nedclocal) PetscCall(ISRestoreIndices(pcbddc->nedclocal, (const PetscInt **)&idxs));
1511     PetscCall(PetscFree2(ocptr, oqueue));
1512   }
1513   PetscCall(ISLocalToGlobalMappingDestroy(&fl2g));
1514   PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, NULL, NULL, &nee, &alleedges, &allprimals));
1515   PetscCall(PCBDDCGraphResetCSR(pcbddc->mat_graph));
1516 
1517   PetscCall(ISDestroy(&nedfieldlocal));
1518   PetscCall(PetscFree(extrow));
1519   PetscCall(PetscFree2(work, rwork));
1520   PetscCall(PetscFree(corners));
1521   PetscCall(PetscFree(cedges));
1522   PetscCall(PetscFree(extrows));
1523   PetscCall(PetscFree(extcols));
1524   PetscCall(MatDestroy(&lG));
1525 
1526   /* Complete assembling */
1527   PetscCall(MatAssemblyEnd(T, MAT_FINAL_ASSEMBLY));
1528   PetscCall(MatViewFromOptions(T, (PetscObject)pc, "-pc_bddc_nedelec_change_view"));
1529   if (pcbddc->nedcG) {
1530     PetscCall(MatAssemblyEnd(pcbddc->nedcG, MAT_FINAL_ASSEMBLY));
1531     PetscCall(MatViewFromOptions(pcbddc->nedcG, (PetscObject)pc, "-pc_bddc_nedelec_coarse_change_view"));
1532   }
1533 
1534   PetscCall(ISDestroy(&elements_corners));
1535 
1536   /* set change of basis */
1537   PetscCall(PCBDDCSetChangeOfBasisMat(pc, T, PETSC_FALSE));
1538   PetscCall(MatDestroy(&T));
1539   PetscFunctionReturn(PETSC_SUCCESS);
1540 }
1541 
1542 /* the near-null space of BDDC carries information on quadrature weights,
1543    and these can be collinear -> so cheat with MatNullSpaceCreate
1544    and create a suitable set of basis vectors first */
1545 PetscErrorCode PCBDDCNullSpaceCreate(MPI_Comm comm, PetscBool has_const, PetscInt nvecs, Vec quad_vecs[], MatNullSpace *nnsp)
1546 {
1547   PetscInt i;
1548 
1549   PetscFunctionBegin;
1550   for (i = 0; i < nvecs; i++) {
1551     PetscInt first, last;
1552 
1553     PetscCall(VecGetOwnershipRange(quad_vecs[i], &first, &last));
1554     PetscCheck(last - first >= 2 * nvecs || !has_const, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not implemented");
1555     if (i >= first && i < last) {
1556       PetscScalar *data;
1557       PetscCall(VecGetArray(quad_vecs[i], &data));
1558       if (!has_const) {
1559         data[i - first] = 1.;
1560       } else {
1561         data[2 * i - first]     = 1. / PetscSqrtReal(2.);
1562         data[2 * i - first + 1] = -1. / PetscSqrtReal(2.);
1563       }
1564       PetscCall(VecRestoreArray(quad_vecs[i], &data));
1565     }
1566     PetscCall(PetscObjectStateIncrease((PetscObject)quad_vecs[i]));
1567   }
1568   PetscCall(MatNullSpaceCreate(comm, has_const, nvecs, quad_vecs, nnsp));
1569   for (i = 0; i < nvecs; i++) { /* reset vectors */
1570     PetscInt first, last;
1571     PetscCall(VecLockReadPop(quad_vecs[i]));
1572     PetscCall(VecGetOwnershipRange(quad_vecs[i], &first, &last));
1573     if (i >= first && i < last) {
1574       PetscScalar *data;
1575       PetscCall(VecGetArray(quad_vecs[i], &data));
1576       if (!has_const) {
1577         data[i - first] = 0.;
1578       } else {
1579         data[2 * i - first]     = 0.;
1580         data[2 * i - first + 1] = 0.;
1581       }
1582       PetscCall(VecRestoreArray(quad_vecs[i], &data));
1583     }
1584     PetscCall(PetscObjectStateIncrease((PetscObject)quad_vecs[i]));
1585     PetscCall(VecLockReadPush(quad_vecs[i]));
1586   }
1587   PetscFunctionReturn(PETSC_SUCCESS);
1588 }
1589 
1590 PetscErrorCode PCBDDCComputeNoNetFlux(Mat A, Mat divudotp, PetscBool transpose, IS vl2l, PCBDDCGraph graph, MatNullSpace *nnsp)
1591 {
1592   Mat                    loc_divudotp;
1593   Vec                    p, v, quad_vec;
1594   ISLocalToGlobalMapping map;
1595   PetscScalar           *array;
1596 
1597   PetscFunctionBegin;
1598   PetscCall(MatCreateVecs(A, &quad_vec, NULL));
1599   if (!transpose) {
1600     PetscCall(MatISGetLocalToGlobalMapping(A, &map, NULL));
1601   } else {
1602     PetscCall(MatISGetLocalToGlobalMapping(A, NULL, &map));
1603   }
1604   PetscCall(PCBDDCNullSpaceCreate(PetscObjectComm((PetscObject)A), PETSC_FALSE, 1, &quad_vec, nnsp));
1605   PetscCall(VecLockReadPop(quad_vec));
1606   PetscCall(VecSetLocalToGlobalMapping(quad_vec, map));
1607 
1608   /* compute local quad vec */
1609   PetscCall(MatISGetLocalMat(divudotp, &loc_divudotp));
1610   if (!transpose) {
1611     PetscCall(MatCreateVecs(loc_divudotp, &v, &p));
1612   } else {
1613     PetscCall(MatCreateVecs(loc_divudotp, &p, &v));
1614   }
1615   /* the assumption here is that the constant vector interpolates the constant on the L2 conforming space */
1616   PetscCall(VecSet(p, 1.));
1617   if (!transpose) {
1618     PetscCall(MatMultTranspose(loc_divudotp, p, v));
1619   } else {
1620     PetscCall(MatMult(loc_divudotp, p, v));
1621   }
1622   PetscCall(VecDestroy(&p));
1623   if (vl2l) {
1624     Mat        lA;
1625     VecScatter sc;
1626     Vec        vins;
1627 
1628     PetscCall(MatISGetLocalMat(A, &lA));
1629     PetscCall(MatCreateVecs(lA, &vins, NULL));
1630     PetscCall(VecScatterCreate(v, NULL, vins, vl2l, &sc));
1631     PetscCall(VecScatterBegin(sc, v, vins, INSERT_VALUES, SCATTER_FORWARD));
1632     PetscCall(VecScatterEnd(sc, v, vins, INSERT_VALUES, SCATTER_FORWARD));
1633     PetscCall(VecScatterDestroy(&sc));
1634     PetscCall(VecDestroy(&v));
1635     v = vins;
1636   }
1637 
1638   /* mask summation of interface values */
1639   PetscInt        n, *mmask, *mask, *idxs, nmr, nr;
1640   const PetscInt *degree;
1641   PetscSF         msf;
1642 
1643   PetscCall(VecGetLocalSize(v, &n));
1644   PetscCall(PetscSFGetGraph(graph->interface_subset_sf, &nr, NULL, NULL, NULL));
1645   PetscCall(PetscSFGetMultiSF(graph->interface_subset_sf, &msf));
1646   PetscCall(PetscSFGetGraph(msf, &nmr, NULL, NULL, NULL));
1647   PetscCall(PetscCalloc3(nmr, &mmask, n, &mask, n, &idxs));
1648   PetscCall(PetscSFComputeDegreeBegin(graph->interface_subset_sf, &degree));
1649   PetscCall(PetscSFComputeDegreeEnd(graph->interface_subset_sf, &degree));
1650   for (PetscInt i = 0, c = 0; i < nr; i++) {
1651     mmask[c] = 1;
1652     c += degree[i];
1653   }
1654   PetscCall(PetscSFScatterBegin(graph->interface_subset_sf, MPIU_INT, mmask, mask));
1655   PetscCall(PetscSFScatterEnd(graph->interface_subset_sf, MPIU_INT, mmask, mask));
1656   PetscCall(VecGetArray(v, &array));
1657   for (PetscInt i = 0; i < n; i++) {
1658     array[i] *= mask[i];
1659     idxs[i] = i;
1660   }
1661   PetscCall(VecSetValuesLocal(quad_vec, n, idxs, array, ADD_VALUES));
1662   PetscCall(VecRestoreArray(v, &array));
1663   PetscCall(PetscFree3(mmask, mask, idxs));
1664   PetscCall(VecDestroy(&v));
1665   PetscCall(VecAssemblyBegin(quad_vec));
1666   PetscCall(VecAssemblyEnd(quad_vec));
1667   PetscCall(VecViewFromOptions(quad_vec, NULL, "-pc_bddc_quad_vec_view"));
1668   PetscCall(VecLockReadPush(quad_vec));
1669   PetscCall(VecDestroy(&quad_vec));
1670   PetscFunctionReturn(PETSC_SUCCESS);
1671 }
1672 
1673 PetscErrorCode PCBDDCAddPrimalVerticesLocalIS(PC pc, IS primalv)
1674 {
1675   PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
1676 
1677   PetscFunctionBegin;
1678   if (primalv) {
1679     if (pcbddc->user_primal_vertices_local) {
1680       IS list[2], newp;
1681 
1682       list[0] = primalv;
1683       list[1] = pcbddc->user_primal_vertices_local;
1684       PetscCall(ISConcatenate(PetscObjectComm((PetscObject)pc), 2, list, &newp));
1685       PetscCall(ISSortRemoveDups(newp));
1686       PetscCall(ISDestroy(&list[1]));
1687       pcbddc->user_primal_vertices_local = newp;
1688     } else {
1689       PetscCall(PCBDDCSetPrimalVerticesLocalIS(pc, primalv));
1690     }
1691   }
1692   PetscFunctionReturn(PETSC_SUCCESS);
1693 }
1694 
1695 static PetscErrorCode func_coords_private(PetscInt dim, PetscReal t, const PetscReal X[], PetscInt Nf, PetscScalar *out, void *ctx)
1696 {
1697   PetscInt f, *comp = (PetscInt *)ctx;
1698 
1699   PetscFunctionBegin;
1700   for (f = 0; f < Nf; f++) out[f] = X[*comp];
1701   PetscFunctionReturn(PETSC_SUCCESS);
1702 }
1703 
1704 PetscErrorCode PCBDDCComputeLocalTopologyInfo(PC pc)
1705 {
1706   Vec       local, global;
1707   PC_BDDC  *pcbddc     = (PC_BDDC *)pc->data;
1708   Mat_IS   *matis      = (Mat_IS *)pc->pmat->data;
1709   PetscBool monolithic = PETSC_FALSE;
1710 
1711   PetscFunctionBegin;
1712   PetscOptionsBegin(PetscObjectComm((PetscObject)pc), ((PetscObject)pc)->prefix, "BDDC topology options", "PC");
1713   PetscCall(PetscOptionsBool("-pc_bddc_monolithic", "Discard any information on dofs splitting", NULL, monolithic, &monolithic, NULL));
1714   PetscOptionsEnd();
1715   /* need to convert from global to local topology information and remove references to information in global ordering */
1716   PetscCall(MatCreateVecs(pc->pmat, &global, NULL));
1717   PetscCall(MatCreateVecs(matis->A, &local, NULL));
1718   PetscCall(VecBindToCPU(global, PETSC_TRUE));
1719   PetscCall(VecBindToCPU(local, PETSC_TRUE));
1720   if (monolithic) { /* just get block size to properly compute vertices */
1721     if (pcbddc->vertex_size == 1) PetscCall(MatGetBlockSize(pc->pmat, &pcbddc->vertex_size));
1722     goto boundary;
1723   }
1724 
1725   if (pcbddc->user_provided_isfordofs) {
1726     if (pcbddc->n_ISForDofs) {
1727       PetscInt i;
1728 
1729       PetscCall(PetscMalloc1(pcbddc->n_ISForDofs, &pcbddc->ISForDofsLocal));
1730       for (i = 0; i < pcbddc->n_ISForDofs; i++) {
1731         PetscInt bs;
1732 
1733         PetscCall(PCBDDCGlobalToLocal(matis->rctx, global, local, pcbddc->ISForDofs[i], &pcbddc->ISForDofsLocal[i]));
1734         PetscCall(ISGetBlockSize(pcbddc->ISForDofs[i], &bs));
1735         PetscCall(ISSetBlockSize(pcbddc->ISForDofsLocal[i], bs));
1736         PetscCall(ISDestroy(&pcbddc->ISForDofs[i]));
1737       }
1738       pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs;
1739       pcbddc->n_ISForDofs      = 0;
1740       PetscCall(PetscFree(pcbddc->ISForDofs));
1741     }
1742   } else {
1743     if (!pcbddc->n_ISForDofsLocal) { /* field split not present */
1744       DM dm;
1745 
1746       PetscCall(MatGetDM(pc->pmat, &dm));
1747       if (!dm) PetscCall(PCGetDM(pc, &dm));
1748       if (dm) {
1749         IS      *fields;
1750         PetscInt nf, i;
1751 
1752         PetscCall(DMCreateFieldDecomposition(dm, &nf, NULL, &fields, NULL));
1753         PetscCall(PetscMalloc1(nf, &pcbddc->ISForDofsLocal));
1754         for (i = 0; i < nf; i++) {
1755           PetscInt bs;
1756 
1757           PetscCall(PCBDDCGlobalToLocal(matis->rctx, global, local, fields[i], &pcbddc->ISForDofsLocal[i]));
1758           PetscCall(ISGetBlockSize(fields[i], &bs));
1759           PetscCall(ISSetBlockSize(pcbddc->ISForDofsLocal[i], bs));
1760           PetscCall(ISDestroy(&fields[i]));
1761         }
1762         PetscCall(PetscFree(fields));
1763         pcbddc->n_ISForDofsLocal = nf;
1764       } else { /* See if MATIS has fields attached by the conversion from MatNest */
1765         PetscContainer c;
1766 
1767         PetscCall(PetscObjectQuery((PetscObject)pc->pmat, "_convert_nest_lfields", (PetscObject *)&c));
1768         if (c) {
1769           MatISLocalFields lf;
1770           PetscCall(PetscContainerGetPointer(c, (void **)&lf));
1771           PetscCall(PCBDDCSetDofsSplittingLocal(pc, lf->nr, lf->rf));
1772         } else { /* fallback, create the default fields if bs > 1 */
1773           PetscInt i, n = matis->A->rmap->n;
1774           PetscCall(MatGetBlockSize(pc->pmat, &i));
1775           if (i > 1) {
1776             pcbddc->n_ISForDofsLocal = i;
1777             PetscCall(PetscMalloc1(pcbddc->n_ISForDofsLocal, &pcbddc->ISForDofsLocal));
1778             for (i = 0; i < pcbddc->n_ISForDofsLocal; i++) PetscCall(ISCreateStride(PetscObjectComm((PetscObject)pc), n / pcbddc->n_ISForDofsLocal, i, pcbddc->n_ISForDofsLocal, &pcbddc->ISForDofsLocal[i]));
1779           }
1780         }
1781       }
1782     } else {
1783       PetscInt i;
1784       for (i = 0; i < pcbddc->n_ISForDofsLocal; i++) PetscCall(PCBDDCConsistencyCheckIS(pc, MPI_LAND, &pcbddc->ISForDofsLocal[i]));
1785     }
1786   }
1787 
1788 boundary:
1789   if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) {
1790     PetscCall(PCBDDCGlobalToLocal(matis->rctx, global, local, pcbddc->DirichletBoundaries, &pcbddc->DirichletBoundariesLocal));
1791   } else if (pcbddc->DirichletBoundariesLocal) {
1792     PetscCall(PCBDDCConsistencyCheckIS(pc, MPI_LAND, &pcbddc->DirichletBoundariesLocal));
1793   }
1794   if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) {
1795     PetscCall(PCBDDCGlobalToLocal(matis->rctx, global, local, pcbddc->NeumannBoundaries, &pcbddc->NeumannBoundariesLocal));
1796   } else if (pcbddc->NeumannBoundariesLocal) {
1797     PetscCall(PCBDDCConsistencyCheckIS(pc, MPI_LOR, &pcbddc->NeumannBoundariesLocal));
1798   }
1799   if (!pcbddc->user_primal_vertices_local && pcbddc->user_primal_vertices) PetscCall(PCBDDCGlobalToLocal(matis->rctx, global, local, pcbddc->user_primal_vertices, &pcbddc->user_primal_vertices_local));
1800   PetscCall(VecDestroy(&global));
1801   PetscCall(VecDestroy(&local));
1802   /* detect local disconnected subdomains if requested or needed */
1803   if (pcbddc->detect_disconnected || matis->allow_repeated) {
1804     IS        primalv = NULL;
1805     PetscInt  nel;
1806     PetscBool filter = pcbddc->detect_disconnected_filter;
1807 
1808     for (PetscInt i = 0; i < pcbddc->n_local_subs; i++) PetscCall(ISDestroy(&pcbddc->local_subs[i]));
1809     PetscCall(PetscFree(pcbddc->local_subs));
1810     PetscCall(MatGetVariableBlockSizes(matis->A, &nel, NULL));
1811     if (matis->allow_repeated && nel) {
1812       const PetscInt *elsizes;
1813 
1814       pcbddc->n_local_subs = nel;
1815       PetscCall(MatGetVariableBlockSizes(matis->A, NULL, &elsizes));
1816       PetscCall(PetscMalloc1(nel, &pcbddc->local_subs));
1817       for (PetscInt i = 0, c = 0; i < nel; i++) {
1818         PetscCall(ISCreateStride(PETSC_COMM_SELF, elsizes[i], c, 1, &pcbddc->local_subs[i]));
1819         c += elsizes[i];
1820       }
1821     } else {
1822       PetscCall(PCBDDCDetectDisconnectedComponents(pc, filter, &pcbddc->n_local_subs, &pcbddc->local_subs, &primalv));
1823     }
1824     PetscCall(PCBDDCAddPrimalVerticesLocalIS(pc, primalv));
1825     PetscCall(ISDestroy(&primalv));
1826   }
1827   /* early stage corner detection */
1828   {
1829     DM dm;
1830 
1831     PetscCall(MatGetDM(pc->pmat, &dm));
1832     if (!dm) PetscCall(PCGetDM(pc, &dm));
1833     if (dm) {
1834       PetscBool isda;
1835 
1836       PetscCall(PetscObjectTypeCompare((PetscObject)dm, DMDA, &isda));
1837       if (isda) {
1838         ISLocalToGlobalMapping l2l;
1839         IS                     corners;
1840         Mat                    lA;
1841         PetscBool              gl, lo;
1842 
1843         {
1844           Vec                cvec;
1845           const PetscScalar *coords;
1846           PetscInt           dof, n, cdim;
1847           PetscBool          memc = PETSC_TRUE;
1848 
1849           PetscCall(DMDAGetInfo(dm, NULL, NULL, NULL, NULL, NULL, NULL, NULL, &dof, NULL, NULL, NULL, NULL, NULL));
1850           PetscCall(DMGetCoordinates(dm, &cvec));
1851           PetscCall(VecGetLocalSize(cvec, &n));
1852           PetscCall(VecGetBlockSize(cvec, &cdim));
1853           n /= cdim;
1854           PetscCall(PetscFree(pcbddc->mat_graph->coords));
1855           PetscCall(PetscMalloc1(dof * n * cdim, &pcbddc->mat_graph->coords));
1856           PetscCall(VecGetArrayRead(cvec, &coords));
1857 #if defined(PETSC_USE_COMPLEX)
1858           memc = PETSC_FALSE;
1859 #endif
1860           if (dof != 1) memc = PETSC_FALSE;
1861           if (memc) {
1862             PetscCall(PetscArraycpy(pcbddc->mat_graph->coords, coords, cdim * n * dof));
1863           } else { /* BDDC graph does not use any blocked information, we need to replicate the data */
1864             PetscReal *bcoords = pcbddc->mat_graph->coords;
1865             PetscInt   i, b, d;
1866 
1867             for (i = 0; i < n; i++) {
1868               for (b = 0; b < dof; b++) {
1869                 for (d = 0; d < cdim; d++) bcoords[i * dof * cdim + b * cdim + d] = PetscRealPart(coords[i * cdim + d]);
1870               }
1871             }
1872           }
1873           PetscCall(VecRestoreArrayRead(cvec, &coords));
1874           pcbddc->mat_graph->cdim  = cdim;
1875           pcbddc->mat_graph->cnloc = dof * n;
1876           pcbddc->mat_graph->cloc  = PETSC_FALSE;
1877         }
1878         PetscCall(DMDAGetSubdomainCornersIS(dm, &corners));
1879         PetscCall(MatISGetLocalMat(pc->pmat, &lA));
1880         PetscCall(MatGetLocalToGlobalMapping(lA, &l2l, NULL));
1881         PetscCall(MatISRestoreLocalMat(pc->pmat, &lA));
1882         lo = (PetscBool)(l2l && corners);
1883         PetscCall(MPIU_Allreduce(&lo, &gl, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)pc)));
1884         if (gl) { /* From PETSc's DMDA */
1885           const PetscInt *idx;
1886           PetscInt        dof, bs, *idxout, n;
1887 
1888           PetscCall(DMDAGetInfo(dm, NULL, NULL, NULL, NULL, NULL, NULL, NULL, &dof, NULL, NULL, NULL, NULL, NULL));
1889           PetscCall(ISLocalToGlobalMappingGetBlockSize(l2l, &bs));
1890           PetscCall(ISGetLocalSize(corners, &n));
1891           PetscCall(ISGetIndices(corners, &idx));
1892           if (bs == dof) {
1893             PetscCall(PetscMalloc1(n, &idxout));
1894             PetscCall(ISLocalToGlobalMappingApplyBlock(l2l, n, idx, idxout));
1895           } else { /* the original DMDA local-to-local map have been modified */
1896             PetscInt i, d;
1897 
1898             PetscCall(PetscMalloc1(dof * n, &idxout));
1899             for (i = 0; i < n; i++)
1900               for (d = 0; d < dof; d++) idxout[dof * i + d] = dof * idx[i] + d;
1901             PetscCall(ISLocalToGlobalMappingApply(l2l, dof * n, idxout, idxout));
1902 
1903             bs = 1;
1904             n *= dof;
1905           }
1906           PetscCall(ISRestoreIndices(corners, &idx));
1907           PetscCall(DMDARestoreSubdomainCornersIS(dm, &corners));
1908           PetscCall(ISCreateBlock(PetscObjectComm((PetscObject)pc), bs, n, idxout, PETSC_OWN_POINTER, &corners));
1909           PetscCall(PCBDDCAddPrimalVerticesLocalIS(pc, corners));
1910           PetscCall(ISDestroy(&corners));
1911           pcbddc->corner_selected  = PETSC_TRUE;
1912           pcbddc->corner_selection = PETSC_TRUE;
1913         }
1914         if (corners) PetscCall(DMDARestoreSubdomainCornersIS(dm, &corners));
1915       }
1916     }
1917   }
1918   if (pcbddc->corner_selection && !pcbddc->mat_graph->cdim) {
1919     DM dm;
1920 
1921     PetscCall(MatGetDM(pc->pmat, &dm));
1922     if (!dm) PetscCall(PCGetDM(pc, &dm));
1923     if (dm) { /* this can get very expensive, I need to find a faster alternative */
1924       Vec          vcoords;
1925       PetscSection section;
1926       PetscReal   *coords;
1927       PetscInt     d, cdim, nl, nf, **ctxs;
1928       PetscErrorCode (**funcs)(PetscInt, PetscReal, const PetscReal *, PetscInt, PetscScalar *, void *);
1929       /* debug coordinates */
1930       PetscViewer       viewer;
1931       PetscBool         flg;
1932       PetscViewerFormat format;
1933       const char       *prefix;
1934 
1935       PetscCall(DMGetCoordinateDim(dm, &cdim));
1936       PetscCall(DMGetLocalSection(dm, &section));
1937       PetscCall(PetscSectionGetNumFields(section, &nf));
1938       PetscCall(DMCreateGlobalVector(dm, &vcoords));
1939       PetscCall(VecGetLocalSize(vcoords, &nl));
1940       PetscCall(PetscMalloc1(nl * cdim, &coords));
1941       PetscCall(PetscMalloc2(nf, &funcs, nf, &ctxs));
1942       PetscCall(PetscMalloc1(nf, &ctxs[0]));
1943       for (d = 0; d < nf; d++) funcs[d] = func_coords_private;
1944       for (d = 1; d < nf; d++) ctxs[d] = ctxs[d - 1] + 1;
1945 
1946       /* debug coordinates */
1947       PetscCall(PCGetOptionsPrefix(pc, &prefix));
1948       PetscCall(PetscOptionsGetViewer(PetscObjectComm((PetscObject)vcoords), ((PetscObject)vcoords)->options, prefix, "-pc_bddc_coords_vec_view", &viewer, &format, &flg));
1949       if (flg) PetscCall(PetscViewerPushFormat(viewer, format));
1950       for (d = 0; d < cdim; d++) {
1951         PetscInt           i;
1952         const PetscScalar *v;
1953         char               name[16];
1954 
1955         for (i = 0; i < nf; i++) ctxs[i][0] = d;
1956         PetscCall(PetscSNPrintf(name, sizeof(name), "bddc_coords_%d", (int)d));
1957         PetscCall(PetscObjectSetName((PetscObject)vcoords, name));
1958         PetscCall(DMProjectFunction(dm, 0.0, funcs, (void **)ctxs, INSERT_VALUES, vcoords));
1959         if (flg) PetscCall(VecView(vcoords, viewer));
1960         PetscCall(VecGetArrayRead(vcoords, &v));
1961         for (i = 0; i < nl; i++) coords[i * cdim + d] = PetscRealPart(v[i]);
1962         PetscCall(VecRestoreArrayRead(vcoords, &v));
1963       }
1964       PetscCall(VecDestroy(&vcoords));
1965       PetscCall(PCSetCoordinates(pc, cdim, nl, coords));
1966       PetscCall(PetscFree(coords));
1967       PetscCall(PetscFree(ctxs[0]));
1968       PetscCall(PetscFree2(funcs, ctxs));
1969       if (flg) {
1970         PetscCall(PetscViewerPopFormat(viewer));
1971         PetscCall(PetscOptionsRestoreViewer(&viewer));
1972       }
1973     }
1974   }
1975   PetscFunctionReturn(PETSC_SUCCESS);
1976 }
1977 
1978 PetscErrorCode PCBDDCConsistencyCheckIS(PC pc, MPI_Op mop, IS *is)
1979 {
1980   Mat_IS         *matis = (Mat_IS *)pc->pmat->data;
1981   IS              nis;
1982   const PetscInt *idxs;
1983   PetscInt        i, nd, n = matis->A->rmap->n, *nidxs, nnd;
1984 
1985   PetscFunctionBegin;
1986   PetscCheck(mop == MPI_LAND || mop == MPI_LOR, PetscObjectComm((PetscObject)(pc)), PETSC_ERR_SUP, "Supported are MPI_LAND and MPI_LOR");
1987   if (mop == MPI_LAND) {
1988     /* init rootdata with true */
1989     for (i = 0; i < pc->pmat->rmap->n; i++) matis->sf_rootdata[i] = 1;
1990   } else {
1991     PetscCall(PetscArrayzero(matis->sf_rootdata, pc->pmat->rmap->n));
1992   }
1993   PetscCall(PetscArrayzero(matis->sf_leafdata, n));
1994   PetscCall(ISGetLocalSize(*is, &nd));
1995   PetscCall(ISGetIndices(*is, &idxs));
1996   for (i = 0; i < nd; i++)
1997     if (-1 < idxs[i] && idxs[i] < n) matis->sf_leafdata[idxs[i]] = 1;
1998   PetscCall(ISRestoreIndices(*is, &idxs));
1999   PetscCall(PetscSFReduceBegin(matis->sf, MPIU_INT, matis->sf_leafdata, matis->sf_rootdata, mop));
2000   PetscCall(PetscSFReduceEnd(matis->sf, MPIU_INT, matis->sf_leafdata, matis->sf_rootdata, mop));
2001   PetscCall(PetscSFBcastBegin(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE));
2002   PetscCall(PetscSFBcastEnd(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE));
2003   if (mop == MPI_LAND) {
2004     PetscCall(PetscMalloc1(nd, &nidxs));
2005   } else {
2006     PetscCall(PetscMalloc1(n, &nidxs));
2007   }
2008   for (i = 0, nnd = 0; i < n; i++)
2009     if (matis->sf_leafdata[i]) nidxs[nnd++] = i;
2010   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)*is), nnd, nidxs, PETSC_OWN_POINTER, &nis));
2011   PetscCall(ISDestroy(is));
2012   *is = nis;
2013   PetscFunctionReturn(PETSC_SUCCESS);
2014 }
2015 
2016 PetscErrorCode PCBDDCBenignRemoveInterior(PC pc, Vec r, Vec z)
2017 {
2018   PC_IS   *pcis   = (PC_IS *)pc->data;
2019   PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
2020 
2021   PetscFunctionBegin;
2022   if (!pcbddc->benign_have_null) PetscFunctionReturn(PETSC_SUCCESS);
2023   if (pcbddc->ChangeOfBasisMatrix) {
2024     Vec swap;
2025 
2026     PetscCall(MatMultTranspose(pcbddc->ChangeOfBasisMatrix, r, pcbddc->work_change));
2027     swap                = pcbddc->work_change;
2028     pcbddc->work_change = r;
2029     r                   = swap;
2030   }
2031   PetscCall(VecScatterBegin(pcis->global_to_D, r, pcis->vec1_D, INSERT_VALUES, SCATTER_FORWARD));
2032   PetscCall(VecScatterEnd(pcis->global_to_D, r, pcis->vec1_D, INSERT_VALUES, SCATTER_FORWARD));
2033   PetscCall(PetscLogEventBegin(PC_BDDC_Solves[pcbddc->current_level][0], pc, 0, 0, 0));
2034   PetscCall(KSPSolve(pcbddc->ksp_D, pcis->vec1_D, pcis->vec2_D));
2035   PetscCall(PetscLogEventEnd(PC_BDDC_Solves[pcbddc->current_level][0], pc, 0, 0, 0));
2036   PetscCall(KSPCheckSolve(pcbddc->ksp_D, pc, pcis->vec2_D));
2037   PetscCall(VecSet(z, 0.));
2038   PetscCall(VecScatterBegin(pcis->global_to_D, pcis->vec2_D, z, INSERT_VALUES, SCATTER_REVERSE));
2039   PetscCall(VecScatterEnd(pcis->global_to_D, pcis->vec2_D, z, INSERT_VALUES, SCATTER_REVERSE));
2040   if (pcbddc->ChangeOfBasisMatrix) {
2041     pcbddc->work_change = r;
2042     PetscCall(VecCopy(z, pcbddc->work_change));
2043     PetscCall(MatMult(pcbddc->ChangeOfBasisMatrix, pcbddc->work_change, z));
2044   }
2045   PetscFunctionReturn(PETSC_SUCCESS);
2046 }
2047 
2048 static PetscErrorCode PCBDDCBenignMatMult_Private_Private(Mat A, Vec x, Vec y, PetscBool transpose)
2049 {
2050   PCBDDCBenignMatMult_ctx ctx;
2051   PetscBool               apply_right, apply_left, reset_x;
2052 
2053   PetscFunctionBegin;
2054   PetscCall(MatShellGetContext(A, &ctx));
2055   if (transpose) {
2056     apply_right = ctx->apply_left;
2057     apply_left  = ctx->apply_right;
2058   } else {
2059     apply_right = ctx->apply_right;
2060     apply_left  = ctx->apply_left;
2061   }
2062   reset_x = PETSC_FALSE;
2063   if (apply_right) {
2064     const PetscScalar *ax;
2065     PetscInt           nl, i;
2066 
2067     PetscCall(VecGetLocalSize(x, &nl));
2068     PetscCall(VecGetArrayRead(x, &ax));
2069     PetscCall(PetscArraycpy(ctx->work, ax, nl));
2070     PetscCall(VecRestoreArrayRead(x, &ax));
2071     for (i = 0; i < ctx->benign_n; i++) {
2072       PetscScalar     sum, val;
2073       const PetscInt *idxs;
2074       PetscInt        nz, j;
2075       PetscCall(ISGetLocalSize(ctx->benign_zerodiag_subs[i], &nz));
2076       PetscCall(ISGetIndices(ctx->benign_zerodiag_subs[i], &idxs));
2077       sum = 0.;
2078       if (ctx->apply_p0) {
2079         val = ctx->work[idxs[nz - 1]];
2080         for (j = 0; j < nz - 1; j++) {
2081           sum += ctx->work[idxs[j]];
2082           ctx->work[idxs[j]] += val;
2083         }
2084       } else {
2085         for (j = 0; j < nz - 1; j++) sum += ctx->work[idxs[j]];
2086       }
2087       ctx->work[idxs[nz - 1]] -= sum;
2088       PetscCall(ISRestoreIndices(ctx->benign_zerodiag_subs[i], &idxs));
2089     }
2090     PetscCall(VecPlaceArray(x, ctx->work));
2091     reset_x = PETSC_TRUE;
2092   }
2093   if (transpose) {
2094     PetscCall(MatMultTranspose(ctx->A, x, y));
2095   } else {
2096     PetscCall(MatMult(ctx->A, x, y));
2097   }
2098   if (reset_x) PetscCall(VecResetArray(x));
2099   if (apply_left) {
2100     PetscScalar *ay;
2101     PetscInt     i;
2102 
2103     PetscCall(VecGetArray(y, &ay));
2104     for (i = 0; i < ctx->benign_n; i++) {
2105       PetscScalar     sum, val;
2106       const PetscInt *idxs;
2107       PetscInt        nz, j;
2108       PetscCall(ISGetLocalSize(ctx->benign_zerodiag_subs[i], &nz));
2109       PetscCall(ISGetIndices(ctx->benign_zerodiag_subs[i], &idxs));
2110       val = -ay[idxs[nz - 1]];
2111       if (ctx->apply_p0) {
2112         sum = 0.;
2113         for (j = 0; j < nz - 1; j++) {
2114           sum += ay[idxs[j]];
2115           ay[idxs[j]] += val;
2116         }
2117         ay[idxs[nz - 1]] += sum;
2118       } else {
2119         for (j = 0; j < nz - 1; j++) ay[idxs[j]] += val;
2120         ay[idxs[nz - 1]] = 0.;
2121       }
2122       PetscCall(ISRestoreIndices(ctx->benign_zerodiag_subs[i], &idxs));
2123     }
2124     PetscCall(VecRestoreArray(y, &ay));
2125   }
2126   PetscFunctionReturn(PETSC_SUCCESS);
2127 }
2128 
2129 static PetscErrorCode PCBDDCBenignMatMultTranspose_Private(Mat A, Vec x, Vec y)
2130 {
2131   PetscFunctionBegin;
2132   PetscCall(PCBDDCBenignMatMult_Private_Private(A, x, y, PETSC_TRUE));
2133   PetscFunctionReturn(PETSC_SUCCESS);
2134 }
2135 
2136 static PetscErrorCode PCBDDCBenignMatMult_Private(Mat A, Vec x, Vec y)
2137 {
2138   PetscFunctionBegin;
2139   PetscCall(PCBDDCBenignMatMult_Private_Private(A, x, y, PETSC_FALSE));
2140   PetscFunctionReturn(PETSC_SUCCESS);
2141 }
2142 
2143 PetscErrorCode PCBDDCBenignShellMat(PC pc, PetscBool restore)
2144 {
2145   PC_IS                  *pcis   = (PC_IS *)pc->data;
2146   PC_BDDC                *pcbddc = (PC_BDDC *)pc->data;
2147   PCBDDCBenignMatMult_ctx ctx;
2148 
2149   PetscFunctionBegin;
2150   if (!restore) {
2151     Mat                A_IB, A_BI;
2152     PetscScalar       *work;
2153     PCBDDCReuseSolvers reuse = pcbddc->sub_schurs ? pcbddc->sub_schurs->reuse_solver : NULL;
2154 
2155     PetscCheck(!pcbddc->benign_original_mat, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Benign original mat has not been restored");
2156     if (!pcbddc->benign_change || !pcbddc->benign_n || pcbddc->benign_change_explicit) PetscFunctionReturn(PETSC_SUCCESS);
2157     PetscCall(PetscMalloc1(pcis->n, &work));
2158     PetscCall(MatCreate(PETSC_COMM_SELF, &A_IB));
2159     PetscCall(MatSetSizes(A_IB, pcis->n - pcis->n_B, pcis->n_B, PETSC_DECIDE, PETSC_DECIDE));
2160     PetscCall(MatSetType(A_IB, MATSHELL));
2161     PetscCall(MatShellSetOperation(A_IB, MATOP_MULT, (void (*)(void))PCBDDCBenignMatMult_Private));
2162     PetscCall(MatShellSetOperation(A_IB, MATOP_MULT_TRANSPOSE, (void (*)(void))PCBDDCBenignMatMultTranspose_Private));
2163     PetscCall(PetscNew(&ctx));
2164     PetscCall(MatShellSetContext(A_IB, ctx));
2165     ctx->apply_left  = PETSC_TRUE;
2166     ctx->apply_right = PETSC_FALSE;
2167     ctx->apply_p0    = PETSC_FALSE;
2168     ctx->benign_n    = pcbddc->benign_n;
2169     if (reuse) {
2170       ctx->benign_zerodiag_subs = reuse->benign_zerodiag_subs;
2171       ctx->free                 = PETSC_FALSE;
2172     } else { /* TODO: could be optimized for successive solves */
2173       ISLocalToGlobalMapping N_to_D;
2174       PetscInt               i;
2175 
2176       PetscCall(ISLocalToGlobalMappingCreateIS(pcis->is_I_local, &N_to_D));
2177       PetscCall(PetscMalloc1(pcbddc->benign_n, &ctx->benign_zerodiag_subs));
2178       for (i = 0; i < pcbddc->benign_n; i++) PetscCall(ISGlobalToLocalMappingApplyIS(N_to_D, IS_GTOLM_DROP, pcbddc->benign_zerodiag_subs[i], &ctx->benign_zerodiag_subs[i]));
2179       PetscCall(ISLocalToGlobalMappingDestroy(&N_to_D));
2180       ctx->free = PETSC_TRUE;
2181     }
2182     ctx->A    = pcis->A_IB;
2183     ctx->work = work;
2184     PetscCall(MatSetUp(A_IB));
2185     PetscCall(MatAssemblyBegin(A_IB, MAT_FINAL_ASSEMBLY));
2186     PetscCall(MatAssemblyEnd(A_IB, MAT_FINAL_ASSEMBLY));
2187     pcis->A_IB = A_IB;
2188 
2189     /* A_BI as A_IB^T */
2190     PetscCall(MatCreateTranspose(A_IB, &A_BI));
2191     pcbddc->benign_original_mat = pcis->A_BI;
2192     pcis->A_BI                  = A_BI;
2193   } else {
2194     if (!pcbddc->benign_original_mat) PetscFunctionReturn(PETSC_SUCCESS);
2195     PetscCall(MatShellGetContext(pcis->A_IB, &ctx));
2196     PetscCall(MatDestroy(&pcis->A_IB));
2197     pcis->A_IB = ctx->A;
2198     ctx->A     = NULL;
2199     PetscCall(MatDestroy(&pcis->A_BI));
2200     pcis->A_BI                  = pcbddc->benign_original_mat;
2201     pcbddc->benign_original_mat = NULL;
2202     if (ctx->free) {
2203       PetscInt i;
2204       for (i = 0; i < ctx->benign_n; i++) PetscCall(ISDestroy(&ctx->benign_zerodiag_subs[i]));
2205       PetscCall(PetscFree(ctx->benign_zerodiag_subs));
2206     }
2207     PetscCall(PetscFree(ctx->work));
2208     PetscCall(PetscFree(ctx));
2209   }
2210   PetscFunctionReturn(PETSC_SUCCESS);
2211 }
2212 
2213 /* used just in bddc debug mode */
2214 static PetscErrorCode PCBDDCBenignProject(PC pc, IS is1, IS is2, Mat *B)
2215 {
2216   PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
2217   Mat_IS  *matis  = (Mat_IS *)pc->pmat->data;
2218   Mat      An;
2219 
2220   PetscFunctionBegin;
2221   PetscCall(MatPtAP(matis->A, pcbddc->benign_change, MAT_INITIAL_MATRIX, 2.0, &An));
2222   PetscCall(MatZeroRowsColumns(An, pcbddc->benign_n, pcbddc->benign_p0_lidx, 1.0, NULL, NULL));
2223   if (is1) {
2224     PetscCall(MatCreateSubMatrix(An, is1, is2, MAT_INITIAL_MATRIX, B));
2225     PetscCall(MatDestroy(&An));
2226   } else {
2227     *B = An;
2228   }
2229   PetscFunctionReturn(PETSC_SUCCESS);
2230 }
2231 
2232 /* TODO: add reuse flag */
2233 PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B)
2234 {
2235   Mat             Bt;
2236   PetscScalar    *a, *bdata;
2237   const PetscInt *ii, *ij;
2238   PetscInt        m, n, i, nnz, *bii, *bij;
2239   PetscBool       flg_row;
2240 
2241   PetscFunctionBegin;
2242   PetscCall(MatGetSize(A, &n, &m));
2243   PetscCall(MatGetRowIJ(A, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, &ij, &flg_row));
2244   PetscCall(MatSeqAIJGetArray(A, &a));
2245   nnz = n;
2246   for (i = 0; i < ii[n]; i++) {
2247     if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++;
2248   }
2249   PetscCall(PetscMalloc1(n + 1, &bii));
2250   PetscCall(PetscMalloc1(nnz, &bij));
2251   PetscCall(PetscMalloc1(nnz, &bdata));
2252   nnz    = 0;
2253   bii[0] = 0;
2254   for (i = 0; i < n; i++) {
2255     PetscInt j;
2256     for (j = ii[i]; j < ii[i + 1]; j++) {
2257       PetscScalar entry = a[j];
2258       if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || (n == m && ij[j] == i)) {
2259         bij[nnz]   = ij[j];
2260         bdata[nnz] = entry;
2261         nnz++;
2262       }
2263     }
2264     bii[i + 1] = nnz;
2265   }
2266   PetscCall(MatSeqAIJRestoreArray(A, &a));
2267   PetscCall(MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A), n, m, bii, bij, bdata, &Bt));
2268   PetscCall(MatRestoreRowIJ(A, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, &ij, &flg_row));
2269   {
2270     Mat_SeqAIJ *b = (Mat_SeqAIJ *)Bt->data;
2271     b->free_a     = PETSC_TRUE;
2272     b->free_ij    = PETSC_TRUE;
2273   }
2274   if (*B == A) PetscCall(MatDestroy(&A));
2275   *B = Bt;
2276   PetscFunctionReturn(PETSC_SUCCESS);
2277 }
2278 
2279 PetscErrorCode PCBDDCDetectDisconnectedComponents(PC pc, PetscBool filter, PetscInt *ncc, IS *cc[], IS *primalv)
2280 {
2281   Mat                    B = NULL;
2282   DM                     dm;
2283   IS                     is_dummy, *cc_n;
2284   ISLocalToGlobalMapping l2gmap_dummy;
2285   PCBDDCGraph            graph;
2286   PetscInt              *xadj_filtered = NULL, *adjncy_filtered = NULL;
2287   PetscInt               i, n;
2288   PetscInt              *xadj, *adjncy;
2289   PetscBool              isplex = PETSC_FALSE;
2290 
2291   PetscFunctionBegin;
2292   if (ncc) *ncc = 0;
2293   if (cc) *cc = NULL;
2294   if (primalv) *primalv = NULL;
2295   PetscCall(PCBDDCGraphCreate(&graph));
2296   PetscCall(MatGetDM(pc->pmat, &dm));
2297   if (!dm) PetscCall(PCGetDM(pc, &dm));
2298   if (dm) PetscCall(PetscObjectTypeCompareAny((PetscObject)dm, &isplex, DMPLEX, DMP4EST, DMP8EST, ""));
2299   if (filter) isplex = PETSC_FALSE;
2300 
2301   if (isplex) { /* this code has been modified from plexpartition.c */
2302     PetscInt        p, pStart, pEnd, a, adjSize, idx, size, nroots;
2303     PetscInt       *adj = NULL;
2304     IS              cellNumbering;
2305     const PetscInt *cellNum;
2306     PetscBool       useCone, useClosure;
2307     PetscSection    section;
2308     PetscSegBuffer  adjBuffer;
2309     PetscSF         sfPoint;
2310 
2311     PetscCall(DMConvert(dm, DMPLEX, &dm));
2312     PetscCall(DMPlexGetHeightStratum(dm, 0, &pStart, &pEnd));
2313     PetscCall(DMGetPointSF(dm, &sfPoint));
2314     PetscCall(PetscSFGetGraph(sfPoint, &nroots, NULL, NULL, NULL));
2315     /* Build adjacency graph via a section/segbuffer */
2316     PetscCall(PetscSectionCreate(PetscObjectComm((PetscObject)dm), &section));
2317     PetscCall(PetscSectionSetChart(section, pStart, pEnd));
2318     PetscCall(PetscSegBufferCreate(sizeof(PetscInt), 1000, &adjBuffer));
2319     /* Always use FVM adjacency to create partitioner graph */
2320     PetscCall(DMGetBasicAdjacency(dm, &useCone, &useClosure));
2321     PetscCall(DMSetBasicAdjacency(dm, PETSC_TRUE, PETSC_FALSE));
2322     PetscCall(DMPlexGetCellNumbering(dm, &cellNumbering));
2323     PetscCall(ISGetIndices(cellNumbering, &cellNum));
2324     for (n = 0, p = pStart; p < pEnd; p++) {
2325       /* Skip non-owned cells in parallel (ParMetis expects no overlap) */
2326       if (nroots > 0) {
2327         if (cellNum[p] < 0) continue;
2328       }
2329       adjSize = PETSC_DETERMINE;
2330       PetscCall(DMPlexGetAdjacency(dm, p, &adjSize, &adj));
2331       for (a = 0; a < adjSize; ++a) {
2332         const PetscInt point = adj[a];
2333         if (pStart <= point && point < pEnd) {
2334           PetscInt *PETSC_RESTRICT pBuf;
2335           PetscCall(PetscSectionAddDof(section, p, 1));
2336           PetscCall(PetscSegBufferGetInts(adjBuffer, 1, &pBuf));
2337           *pBuf = point;
2338         }
2339       }
2340       n++;
2341     }
2342     PetscCall(DMSetBasicAdjacency(dm, useCone, useClosure));
2343     /* Derive CSR graph from section/segbuffer */
2344     PetscCall(PetscSectionSetUp(section));
2345     PetscCall(PetscSectionGetStorageSize(section, &size));
2346     PetscCall(PetscMalloc1(n + 1, &xadj));
2347     for (idx = 0, p = pStart; p < pEnd; p++) {
2348       if (nroots > 0) {
2349         if (cellNum[p] < 0) continue;
2350       }
2351       PetscCall(PetscSectionGetOffset(section, p, &xadj[idx++]));
2352     }
2353     xadj[n] = size;
2354     PetscCall(PetscSegBufferExtractAlloc(adjBuffer, &adjncy));
2355     /* Clean up */
2356     PetscCall(PetscSegBufferDestroy(&adjBuffer));
2357     PetscCall(PetscSectionDestroy(&section));
2358     PetscCall(PetscFree(adj));
2359     graph->xadj   = xadj;
2360     graph->adjncy = adjncy;
2361   } else {
2362     Mat       A;
2363     PetscBool isseqaij, flg_row;
2364 
2365     PetscCall(MatISGetLocalMat(pc->pmat, &A));
2366     if (!A->rmap->N || !A->cmap->N) {
2367       PetscCall(PCBDDCGraphDestroy(&graph));
2368       PetscFunctionReturn(PETSC_SUCCESS);
2369     }
2370     PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &isseqaij));
2371     if (!isseqaij && filter) {
2372       PetscBool isseqdense;
2373 
2374       PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQDENSE, &isseqdense));
2375       if (!isseqdense) {
2376         PetscCall(MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &B));
2377       } else { /* TODO: rectangular case and LDA */
2378         PetscScalar *array;
2379         PetscReal    chop = 1.e-6;
2380 
2381         PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &B));
2382         PetscCall(MatDenseGetArray(B, &array));
2383         PetscCall(MatGetSize(B, &n, NULL));
2384         for (i = 0; i < n; i++) {
2385           PetscInt j;
2386           for (j = i + 1; j < n; j++) {
2387             PetscReal thresh = chop * (PetscAbsScalar(array[i * (n + 1)]) + PetscAbsScalar(array[j * (n + 1)]));
2388             if (PetscAbsScalar(array[i * n + j]) < thresh) array[i * n + j] = 0.;
2389             if (PetscAbsScalar(array[j * n + i]) < thresh) array[j * n + i] = 0.;
2390           }
2391         }
2392         PetscCall(MatDenseRestoreArray(B, &array));
2393         PetscCall(MatConvert(B, MATSEQAIJ, MAT_INPLACE_MATRIX, &B));
2394       }
2395     } else {
2396       PetscCall(PetscObjectReference((PetscObject)A));
2397       B = A;
2398     }
2399     PetscCall(MatGetRowIJ(B, 0, PETSC_TRUE, PETSC_FALSE, &n, (const PetscInt **)&xadj, (const PetscInt **)&adjncy, &flg_row));
2400 
2401     /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */
2402     if (filter) {
2403       PetscScalar *data;
2404       PetscInt     j, cum;
2405 
2406       PetscCall(PetscCalloc2(n + 1, &xadj_filtered, xadj[n], &adjncy_filtered));
2407       PetscCall(MatSeqAIJGetArray(B, &data));
2408       cum = 0;
2409       for (i = 0; i < n; i++) {
2410         PetscInt t;
2411 
2412         for (j = xadj[i]; j < xadj[i + 1]; j++) {
2413           if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) continue;
2414           adjncy_filtered[cum + xadj_filtered[i]++] = adjncy[j];
2415         }
2416         t                = xadj_filtered[i];
2417         xadj_filtered[i] = cum;
2418         cum += t;
2419       }
2420       PetscCall(MatSeqAIJRestoreArray(B, &data));
2421       graph->xadj   = xadj_filtered;
2422       graph->adjncy = adjncy_filtered;
2423     } else {
2424       graph->xadj   = xadj;
2425       graph->adjncy = adjncy;
2426     }
2427   }
2428   /* compute local connected components using PCBDDCGraph */
2429   graph->seq_graph = PETSC_TRUE; /* analyze local connected components (i.e. disconnected subdomains) irrespective of dofs count */
2430   PetscCall(ISCreateStride(PETSC_COMM_SELF, n, 0, 1, &is_dummy));
2431   PetscCall(ISLocalToGlobalMappingCreateIS(is_dummy, &l2gmap_dummy));
2432   PetscCall(ISDestroy(&is_dummy));
2433   PetscCall(PCBDDCGraphInit(graph, l2gmap_dummy, n, PETSC_MAX_INT));
2434   PetscCall(ISLocalToGlobalMappingDestroy(&l2gmap_dummy));
2435   PetscCall(PCBDDCGraphSetUp(graph, 1, NULL, NULL, 0, NULL, NULL));
2436   PetscCall(PCBDDCGraphComputeConnectedComponents(graph));
2437 
2438   /* partial clean up */
2439   PetscCall(PetscFree2(xadj_filtered, adjncy_filtered));
2440   if (B) {
2441     PetscBool flg_row;
2442     PetscCall(MatRestoreRowIJ(B, 0, PETSC_TRUE, PETSC_FALSE, &n, (const PetscInt **)&xadj, (const PetscInt **)&adjncy, &flg_row));
2443     PetscCall(MatDestroy(&B));
2444   }
2445   if (isplex) {
2446     PetscCall(PetscFree(xadj));
2447     PetscCall(PetscFree(adjncy));
2448   }
2449 
2450   /* get back data */
2451   if (isplex) {
2452     if (ncc) *ncc = graph->ncc;
2453     if (cc || primalv) {
2454       Mat          A;
2455       PetscBT      btv, btvt, btvc;
2456       PetscSection subSection;
2457       PetscInt    *ids, cum, cump, *cids, *pids;
2458       PetscInt     dim, cStart, cEnd, fStart, fEnd, vStart, vEnd, pStart, pEnd;
2459 
2460       PetscCall(DMGetDimension(dm, &dim));
2461       PetscCall(DMPlexGetSubdomainSection(dm, &subSection));
2462       PetscCall(DMPlexGetHeightStratum(dm, 1, &fStart, &fEnd));
2463       PetscCall(DMPlexGetHeightStratum(dm, 0, &cStart, &cEnd));
2464       PetscCall(DMPlexGetDepthStratum(dm, 0, &vStart, &vEnd));
2465       PetscCall(DMPlexGetChart(dm, &pStart, &pEnd));
2466       PetscCall(MatISGetLocalMat(pc->pmat, &A));
2467       PetscCall(PetscMalloc3(A->rmap->n, &ids, graph->ncc + 1, &cids, A->rmap->n, &pids));
2468       PetscCall(PetscBTCreate(A->rmap->n, &btv));
2469       PetscCall(PetscBTCreate(A->rmap->n, &btvt));
2470       PetscCall(PetscBTCreate(pEnd - pStart, &btvc));
2471 
2472       /* First see if we find corners for the subdomains, i.e. a vertex
2473          shared by at least dim subdomain boundary faces. This does not
2474          cover all the possible cases with simplices but it is enough
2475          for tensor cells */
2476       if (vStart != fStart && dim <= 3) {
2477         for (PetscInt c = cStart; c < cEnd; c++) {
2478           PetscInt        nf, cnt = 0, mcnt = dim, *cfaces;
2479           const PetscInt *faces;
2480 
2481           PetscCall(DMPlexGetConeSize(dm, c, &nf));
2482           PetscCall(DMGetWorkArray(dm, nf, MPIU_INT, &cfaces));
2483           PetscCall(DMPlexGetCone(dm, c, &faces));
2484           for (PetscInt f = 0; f < nf; f++) {
2485             PetscInt nc, ff;
2486 
2487             PetscCall(DMPlexGetSupportSize(dm, faces[f], &nc));
2488             PetscCall(DMPlexGetTreeParent(dm, faces[f], &ff, NULL));
2489             if (nc == 1 && faces[f] == ff) cfaces[cnt++] = faces[f];
2490           }
2491           if (cnt >= mcnt) {
2492             PetscInt size, *closure = NULL;
2493 
2494             PetscCall(DMPlexGetTransitiveClosure(dm, c, PETSC_TRUE, &size, &closure));
2495             for (PetscInt k = 0; k < 2 * size; k += 2) {
2496               PetscInt v = closure[k];
2497               if (v >= vStart && v < vEnd) {
2498                 PetscInt vsize, *vclosure = NULL;
2499 
2500                 cnt = 0;
2501                 PetscCall(DMPlexGetTransitiveClosure(dm, v, PETSC_FALSE, &vsize, &vclosure));
2502                 for (PetscInt vk = 0; vk < 2 * vsize; vk += 2) {
2503                   PetscInt f = vclosure[vk];
2504                   if (f >= fStart && f < fEnd) {
2505                     PetscInt  nc, ff;
2506                     PetscBool valid = PETSC_FALSE;
2507 
2508                     for (PetscInt fk = 0; fk < nf; fk++)
2509                       if (f == cfaces[fk]) valid = PETSC_TRUE;
2510                     if (!valid) continue;
2511                     PetscCall(DMPlexGetSupportSize(dm, f, &nc));
2512                     PetscCall(DMPlexGetTreeParent(dm, f, &ff, NULL));
2513                     if (nc == 1 && f == ff) cnt++;
2514                   }
2515                 }
2516                 if (cnt >= mcnt) PetscCall(PetscBTSet(btvc, v - pStart));
2517                 PetscCall(DMPlexRestoreTransitiveClosure(dm, v, PETSC_FALSE, &vsize, &vclosure));
2518               }
2519             }
2520             PetscCall(DMPlexRestoreTransitiveClosure(dm, c, PETSC_TRUE, &size, &closure));
2521           }
2522           PetscCall(DMRestoreWorkArray(dm, nf, MPIU_INT, &cfaces));
2523         }
2524       }
2525 
2526       cids[0] = 0;
2527       for (i = 0, cump = 0, cum = 0; i < graph->ncc; i++) {
2528         PetscInt j;
2529 
2530         PetscCall(PetscBTMemzero(A->rmap->n, btvt));
2531         for (j = graph->cptr[i]; j < graph->cptr[i + 1]; j++) {
2532           PetscInt k, size, *closure = NULL, cell = graph->queue[j];
2533 
2534           PetscCall(DMPlexGetTransitiveClosure(dm, cell, PETSC_TRUE, &size, &closure));
2535           for (k = 0; k < 2 * size; k += 2) {
2536             PetscInt s, pp, p = closure[k], off, dof, cdof;
2537 
2538             PetscCall(PetscSectionGetConstraintDof(subSection, p, &cdof));
2539             PetscCall(PetscSectionGetOffset(subSection, p, &off));
2540             PetscCall(PetscSectionGetDof(subSection, p, &dof));
2541             for (s = 0; s < dof - cdof; s++) {
2542               if (PetscBTLookupSet(btvt, off + s)) continue;
2543               if (PetscBTLookup(btvc, p - pStart)) pids[cump++] = off + s; /* subdomain corner */
2544               else if (!PetscBTLookup(btv, off + s)) ids[cum++] = off + s;
2545               else pids[cump++] = off + s; /* cross-vertex */
2546             }
2547             PetscCall(DMPlexGetTreeParent(dm, p, &pp, NULL));
2548             if (pp != p) {
2549               PetscCall(PetscSectionGetConstraintDof(subSection, pp, &cdof));
2550               PetscCall(PetscSectionGetOffset(subSection, pp, &off));
2551               PetscCall(PetscSectionGetDof(subSection, pp, &dof));
2552               for (s = 0; s < dof - cdof; s++) {
2553                 if (PetscBTLookupSet(btvt, off + s)) continue;
2554                 if (PetscBTLookup(btvc, pp - pStart)) pids[cump++] = off + s; /* subdomain corner */
2555                 else if (!PetscBTLookup(btv, off + s)) ids[cum++] = off + s;
2556                 else pids[cump++] = off + s; /* cross-vertex */
2557               }
2558             }
2559           }
2560           PetscCall(DMPlexRestoreTransitiveClosure(dm, cell, PETSC_TRUE, &size, &closure));
2561         }
2562         cids[i + 1] = cum;
2563         /* mark dofs as already assigned */
2564         for (j = cids[i]; j < cids[i + 1]; j++) PetscCall(PetscBTSet(btv, ids[j]));
2565       }
2566       if (cc) {
2567         PetscCall(PetscMalloc1(graph->ncc, &cc_n));
2568         for (i = 0; i < graph->ncc; i++) PetscCall(ISCreateGeneral(PETSC_COMM_SELF, cids[i + 1] - cids[i], ids + cids[i], PETSC_COPY_VALUES, &cc_n[i]));
2569         *cc = cc_n;
2570       }
2571       if (primalv) PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), cump, pids, PETSC_COPY_VALUES, primalv));
2572       PetscCall(PetscFree3(ids, cids, pids));
2573       PetscCall(PetscBTDestroy(&btv));
2574       PetscCall(PetscBTDestroy(&btvt));
2575       PetscCall(PetscBTDestroy(&btvc));
2576       PetscCall(DMDestroy(&dm));
2577     }
2578   } else {
2579     if (ncc) *ncc = graph->ncc;
2580     if (cc) {
2581       PetscCall(PetscMalloc1(graph->ncc, &cc_n));
2582       for (i = 0; i < graph->ncc; i++) PetscCall(ISCreateGeneral(PETSC_COMM_SELF, graph->cptr[i + 1] - graph->cptr[i], graph->queue + graph->cptr[i], PETSC_COPY_VALUES, &cc_n[i]));
2583       *cc = cc_n;
2584     }
2585   }
2586   /* clean up graph */
2587   graph->xadj   = NULL;
2588   graph->adjncy = NULL;
2589   PetscCall(PCBDDCGraphDestroy(&graph));
2590   PetscFunctionReturn(PETSC_SUCCESS);
2591 }
2592 
2593 PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag)
2594 {
2595   PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
2596   PC_IS   *pcis   = (PC_IS *)pc->data;
2597   IS       dirIS  = NULL;
2598   PetscInt i;
2599 
2600   PetscFunctionBegin;
2601   PetscCall(PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph, &dirIS));
2602   if (zerodiag) {
2603     Mat             A;
2604     Vec             vec3_N;
2605     PetscScalar    *vals;
2606     const PetscInt *idxs;
2607     PetscInt        nz, *count;
2608 
2609     /* p0 */
2610     PetscCall(VecSet(pcis->vec1_N, 0.));
2611     PetscCall(PetscMalloc1(pcis->n, &vals));
2612     PetscCall(ISGetLocalSize(zerodiag, &nz));
2613     PetscCall(ISGetIndices(zerodiag, &idxs));
2614     for (i = 0; i < nz; i++) vals[i] = 1.;
2615     PetscCall(VecSetValues(pcis->vec1_N, nz, idxs, vals, INSERT_VALUES));
2616     PetscCall(VecAssemblyBegin(pcis->vec1_N));
2617     PetscCall(VecAssemblyEnd(pcis->vec1_N));
2618     /* v_I */
2619     PetscCall(VecSetRandom(pcis->vec2_N, NULL));
2620     for (i = 0; i < nz; i++) vals[i] = 0.;
2621     PetscCall(VecSetValues(pcis->vec2_N, nz, idxs, vals, INSERT_VALUES));
2622     PetscCall(ISRestoreIndices(zerodiag, &idxs));
2623     PetscCall(ISGetIndices(pcis->is_B_local, &idxs));
2624     for (i = 0; i < pcis->n_B; i++) vals[i] = 0.;
2625     PetscCall(VecSetValues(pcis->vec2_N, pcis->n_B, idxs, vals, INSERT_VALUES));
2626     PetscCall(ISRestoreIndices(pcis->is_B_local, &idxs));
2627     if (dirIS) {
2628       PetscInt n;
2629 
2630       PetscCall(ISGetLocalSize(dirIS, &n));
2631       PetscCall(ISGetIndices(dirIS, &idxs));
2632       for (i = 0; i < n; i++) vals[i] = 0.;
2633       PetscCall(VecSetValues(pcis->vec2_N, n, idxs, vals, INSERT_VALUES));
2634       PetscCall(ISRestoreIndices(dirIS, &idxs));
2635     }
2636     PetscCall(VecAssemblyBegin(pcis->vec2_N));
2637     PetscCall(VecAssemblyEnd(pcis->vec2_N));
2638     PetscCall(VecDuplicate(pcis->vec1_N, &vec3_N));
2639     PetscCall(VecSet(vec3_N, 0.));
2640     PetscCall(MatISGetLocalMat(pc->pmat, &A));
2641     PetscCall(MatMult(A, pcis->vec1_N, vec3_N));
2642     PetscCall(VecDot(vec3_N, pcis->vec2_N, &vals[0]));
2643     PetscCheck(PetscAbsScalar(vals[0]) <= 1.e-1, PETSC_COMM_SELF, PETSC_ERR_SUP, "Benign trick can not be applied! b(v_I,p_0) = %1.6e (should be numerically 0.)", (double)PetscAbsScalar(vals[0]));
2644     PetscCall(PetscFree(vals));
2645     PetscCall(VecDestroy(&vec3_N));
2646 
2647     /* there should not be any pressure dofs lying on the interface */
2648     PetscCall(PetscCalloc1(pcis->n, &count));
2649     PetscCall(ISGetIndices(pcis->is_B_local, &idxs));
2650     for (i = 0; i < pcis->n_B; i++) count[idxs[i]]++;
2651     PetscCall(ISRestoreIndices(pcis->is_B_local, &idxs));
2652     PetscCall(ISGetIndices(zerodiag, &idxs));
2653     for (i = 0; i < nz; i++) PetscCheck(!count[idxs[i]], PETSC_COMM_SELF, PETSC_ERR_SUP, "Benign trick can not be applied! pressure dof %" PetscInt_FMT " is an interface dof", idxs[i]);
2654     PetscCall(ISRestoreIndices(zerodiag, &idxs));
2655     PetscCall(PetscFree(count));
2656   }
2657   PetscCall(ISDestroy(&dirIS));
2658 
2659   /* check PCBDDCBenignGetOrSetP0 */
2660   PetscCall(VecSetRandom(pcis->vec1_global, NULL));
2661   for (i = 0; i < pcbddc->benign_n; i++) pcbddc->benign_p0[i] = -PetscGlobalRank - i;
2662   PetscCall(PCBDDCBenignGetOrSetP0(pc, pcis->vec1_global, PETSC_FALSE));
2663   for (i = 0; i < pcbddc->benign_n; i++) pcbddc->benign_p0[i] = 1;
2664   PetscCall(PCBDDCBenignGetOrSetP0(pc, pcis->vec1_global, PETSC_TRUE));
2665   for (i = 0; i < pcbddc->benign_n; i++) {
2666     PetscInt val = PetscRealPart(pcbddc->benign_p0[i]);
2667     PetscCheck(val == -PetscGlobalRank - i, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error testing PCBDDCBenignGetOrSetP0! Found %g at %" PetscInt_FMT " instead of %g", (double)PetscRealPart(pcbddc->benign_p0[i]), i, (double)(-PetscGlobalRank - i));
2668   }
2669   PetscFunctionReturn(PETSC_SUCCESS);
2670 }
2671 
2672 PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, PetscBool reuse, IS *zerodiaglocal)
2673 {
2674   PC_BDDC  *pcbddc    = (PC_BDDC *)pc->data;
2675   Mat_IS   *matis     = (Mat_IS *)pc->pmat->data;
2676   IS        pressures = NULL, zerodiag = NULL, *bzerodiag = NULL, zerodiag_save, *zerodiag_subs;
2677   PetscInt  nz, n, benign_n, bsp = 1;
2678   PetscInt *interior_dofs, n_interior_dofs, nneu;
2679   PetscBool sorted, have_null, has_null_pressures, recompute_zerodiag, checkb;
2680 
2681   PetscFunctionBegin;
2682   if (reuse) goto project_b0;
2683   PetscCall(PetscSFDestroy(&pcbddc->benign_sf));
2684   PetscCall(MatDestroy(&pcbddc->benign_B0));
2685   for (n = 0; n < pcbddc->benign_n; n++) PetscCall(ISDestroy(&pcbddc->benign_zerodiag_subs[n]));
2686   PetscCall(PetscFree(pcbddc->benign_zerodiag_subs));
2687   has_null_pressures = PETSC_TRUE;
2688   have_null          = PETSC_TRUE;
2689   /* if a local information on dofs is present, gets pressure dofs from command line (uses the last field is not provided)
2690      Without local information, it uses only the zerodiagonal dofs (ok if the pressure block is all zero and it is a scalar field)
2691      Checks if all the pressure dofs in each subdomain have a zero diagonal
2692      If not, a change of basis on pressures is not needed
2693      since the local Schur complements are already SPD
2694   */
2695   if (pcbddc->n_ISForDofsLocal) {
2696     IS        iP = NULL;
2697     PetscInt  p, *pp;
2698     PetscBool flg;
2699 
2700     PetscCall(PetscMalloc1(pcbddc->n_ISForDofsLocal, &pp));
2701     n = pcbddc->n_ISForDofsLocal;
2702     PetscOptionsBegin(PetscObjectComm((PetscObject)pc), ((PetscObject)pc)->prefix, "BDDC benign options", "PC");
2703     PetscCall(PetscOptionsIntArray("-pc_bddc_pressure_field", "Field id for pressures", NULL, pp, &n, &flg));
2704     PetscOptionsEnd();
2705     if (!flg) {
2706       n     = 1;
2707       pp[0] = pcbddc->n_ISForDofsLocal - 1;
2708     }
2709 
2710     bsp = 0;
2711     for (p = 0; p < n; p++) {
2712       PetscInt bs;
2713 
2714       PetscCheck(pp[p] >= 0 && pp[p] < pcbddc->n_ISForDofsLocal, PetscObjectComm((PetscObject)pc), PETSC_ERR_USER, "Invalid field id for pressures %" PetscInt_FMT, pp[p]);
2715       PetscCall(ISGetBlockSize(pcbddc->ISForDofsLocal[pp[p]], &bs));
2716       bsp += bs;
2717     }
2718     PetscCall(PetscMalloc1(bsp, &bzerodiag));
2719     bsp = 0;
2720     for (p = 0; p < n; p++) {
2721       const PetscInt *idxs;
2722       PetscInt        b, bs, npl, *bidxs;
2723 
2724       PetscCall(ISGetBlockSize(pcbddc->ISForDofsLocal[pp[p]], &bs));
2725       PetscCall(ISGetLocalSize(pcbddc->ISForDofsLocal[pp[p]], &npl));
2726       PetscCall(ISGetIndices(pcbddc->ISForDofsLocal[pp[p]], &idxs));
2727       PetscCall(PetscMalloc1(npl / bs, &bidxs));
2728       for (b = 0; b < bs; b++) {
2729         PetscInt i;
2730 
2731         for (i = 0; i < npl / bs; i++) bidxs[i] = idxs[bs * i + b];
2732         PetscCall(ISCreateGeneral(PETSC_COMM_SELF, npl / bs, bidxs, PETSC_COPY_VALUES, &bzerodiag[bsp]));
2733         bsp++;
2734       }
2735       PetscCall(PetscFree(bidxs));
2736       PetscCall(ISRestoreIndices(pcbddc->ISForDofsLocal[pp[p]], &idxs));
2737     }
2738     PetscCall(ISConcatenate(PETSC_COMM_SELF, bsp, bzerodiag, &pressures));
2739 
2740     /* remove zeroed out pressures if we are setting up a BDDC solver for a saddle-point FETI-DP */
2741     PetscCall(PetscObjectQuery((PetscObject)pc, "__KSPFETIDP_lP", (PetscObject *)&iP));
2742     if (iP) {
2743       IS newpressures;
2744 
2745       PetscCall(ISDifference(pressures, iP, &newpressures));
2746       PetscCall(ISDestroy(&pressures));
2747       pressures = newpressures;
2748     }
2749     PetscCall(ISSorted(pressures, &sorted));
2750     if (!sorted) PetscCall(ISSort(pressures));
2751     PetscCall(PetscFree(pp));
2752   }
2753 
2754   /* pcis has not been setup yet, so get the local size from the subdomain matrix */
2755   PetscCall(MatGetLocalSize(pcbddc->local_mat, &n, NULL));
2756   if (!n) pcbddc->benign_change_explicit = PETSC_TRUE;
2757   PetscCall(MatFindZeroDiagonals(pcbddc->local_mat, &zerodiag));
2758   PetscCall(ISSorted(zerodiag, &sorted));
2759   if (!sorted) PetscCall(ISSort(zerodiag));
2760   PetscCall(PetscObjectReference((PetscObject)zerodiag));
2761   zerodiag_save = zerodiag;
2762   PetscCall(ISGetLocalSize(zerodiag, &nz));
2763   if (!nz) {
2764     if (n) have_null = PETSC_FALSE;
2765     has_null_pressures = PETSC_FALSE;
2766     PetscCall(ISDestroy(&zerodiag));
2767   }
2768   recompute_zerodiag = PETSC_FALSE;
2769 
2770   /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */
2771   zerodiag_subs   = NULL;
2772   benign_n        = 0;
2773   n_interior_dofs = 0;
2774   interior_dofs   = NULL;
2775   nneu            = 0;
2776   if (pcbddc->NeumannBoundariesLocal) PetscCall(ISGetLocalSize(pcbddc->NeumannBoundariesLocal, &nneu));
2777   checkb = (PetscBool)(!pcbddc->NeumannBoundariesLocal || pcbddc->current_level);
2778   if (checkb) { /* need to compute interior nodes */
2779     PetscInt               n, i;
2780     PetscInt              *count;
2781     ISLocalToGlobalMapping mapping;
2782 
2783     PetscCall(MatISGetLocalToGlobalMapping(pc->pmat, &mapping, NULL));
2784     PetscCall(ISLocalToGlobalMappingGetNodeInfo(mapping, &n, &count, NULL));
2785     PetscCall(PetscMalloc1(n, &interior_dofs));
2786     for (i = 0; i < n; i++)
2787       if (count[i] < 2) interior_dofs[n_interior_dofs++] = i;
2788     PetscCall(ISLocalToGlobalMappingRestoreNodeInfo(mapping, &n, &count, NULL));
2789   }
2790   if (has_null_pressures) {
2791     IS             *subs;
2792     PetscInt        nsubs, i, j, nl;
2793     const PetscInt *idxs;
2794     PetscScalar    *array;
2795     Vec            *work;
2796 
2797     subs  = pcbddc->local_subs;
2798     nsubs = pcbddc->n_local_subs;
2799     /* these vectors are needed to check if the constant on pressures is in the kernel of the local operator B (i.e. B(v_I,p0) should be zero) */
2800     if (checkb) {
2801       PetscCall(VecDuplicateVecs(matis->y, 2, &work));
2802       PetscCall(ISGetLocalSize(zerodiag, &nl));
2803       PetscCall(ISGetIndices(zerodiag, &idxs));
2804       /* work[0] = 1_p */
2805       PetscCall(VecSet(work[0], 0.));
2806       PetscCall(VecGetArray(work[0], &array));
2807       for (j = 0; j < nl; j++) array[idxs[j]] = 1.;
2808       PetscCall(VecRestoreArray(work[0], &array));
2809       /* work[0] = 1_v */
2810       PetscCall(VecSet(work[1], 1.));
2811       PetscCall(VecGetArray(work[1], &array));
2812       for (j = 0; j < nl; j++) array[idxs[j]] = 0.;
2813       PetscCall(VecRestoreArray(work[1], &array));
2814       PetscCall(ISRestoreIndices(zerodiag, &idxs));
2815     }
2816 
2817     if (nsubs > 1 || bsp > 1) {
2818       IS      *is;
2819       PetscInt b, totb;
2820 
2821       totb  = bsp;
2822       is    = bsp > 1 ? bzerodiag : &zerodiag;
2823       nsubs = PetscMax(nsubs, 1);
2824       PetscCall(PetscCalloc1(nsubs * totb, &zerodiag_subs));
2825       for (b = 0; b < totb; b++) {
2826         for (i = 0; i < nsubs; i++) {
2827           ISLocalToGlobalMapping l2g;
2828           IS                     t_zerodiag_subs;
2829           PetscInt               nl;
2830 
2831           if (subs) {
2832             PetscCall(ISLocalToGlobalMappingCreateIS(subs[i], &l2g));
2833           } else {
2834             IS tis;
2835 
2836             PetscCall(MatGetLocalSize(pcbddc->local_mat, &nl, NULL));
2837             PetscCall(ISCreateStride(PETSC_COMM_SELF, nl, 0, 1, &tis));
2838             PetscCall(ISLocalToGlobalMappingCreateIS(tis, &l2g));
2839             PetscCall(ISDestroy(&tis));
2840           }
2841           PetscCall(ISGlobalToLocalMappingApplyIS(l2g, IS_GTOLM_DROP, is[b], &t_zerodiag_subs));
2842           PetscCall(ISGetLocalSize(t_zerodiag_subs, &nl));
2843           if (nl) {
2844             PetscBool valid = PETSC_TRUE;
2845 
2846             if (checkb) {
2847               PetscCall(VecSet(matis->x, 0));
2848               PetscCall(ISGetLocalSize(subs[i], &nl));
2849               PetscCall(ISGetIndices(subs[i], &idxs));
2850               PetscCall(VecGetArray(matis->x, &array));
2851               for (j = 0; j < nl; j++) array[idxs[j]] = 1.;
2852               PetscCall(VecRestoreArray(matis->x, &array));
2853               PetscCall(ISRestoreIndices(subs[i], &idxs));
2854               PetscCall(VecPointwiseMult(matis->x, work[0], matis->x));
2855               PetscCall(MatMult(matis->A, matis->x, matis->y));
2856               PetscCall(VecPointwiseMult(matis->y, work[1], matis->y));
2857               PetscCall(VecGetArray(matis->y, &array));
2858               for (j = 0; j < n_interior_dofs; j++) {
2859                 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) {
2860                   valid = PETSC_FALSE;
2861                   break;
2862                 }
2863               }
2864               PetscCall(VecRestoreArray(matis->y, &array));
2865             }
2866             if (valid && nneu) {
2867               const PetscInt *idxs;
2868               PetscInt        nzb;
2869 
2870               PetscCall(ISGetIndices(pcbddc->NeumannBoundariesLocal, &idxs));
2871               PetscCall(ISGlobalToLocalMappingApply(l2g, IS_GTOLM_DROP, nneu, idxs, &nzb, NULL));
2872               PetscCall(ISRestoreIndices(pcbddc->NeumannBoundariesLocal, &idxs));
2873               if (nzb) valid = PETSC_FALSE;
2874             }
2875             if (valid && pressures) {
2876               IS       t_pressure_subs, tmp;
2877               PetscInt i1, i2;
2878 
2879               PetscCall(ISGlobalToLocalMappingApplyIS(l2g, IS_GTOLM_DROP, pressures, &t_pressure_subs));
2880               PetscCall(ISEmbed(t_zerodiag_subs, t_pressure_subs, PETSC_TRUE, &tmp));
2881               PetscCall(ISGetLocalSize(tmp, &i1));
2882               PetscCall(ISGetLocalSize(t_zerodiag_subs, &i2));
2883               if (i2 != i1) valid = PETSC_FALSE;
2884               PetscCall(ISDestroy(&t_pressure_subs));
2885               PetscCall(ISDestroy(&tmp));
2886             }
2887             if (valid) {
2888               PetscCall(ISLocalToGlobalMappingApplyIS(l2g, t_zerodiag_subs, &zerodiag_subs[benign_n]));
2889               benign_n++;
2890             } else recompute_zerodiag = PETSC_TRUE;
2891           }
2892           PetscCall(ISDestroy(&t_zerodiag_subs));
2893           PetscCall(ISLocalToGlobalMappingDestroy(&l2g));
2894         }
2895       }
2896     } else { /* there's just one subdomain (or zero if they have not been detected */
2897       PetscBool valid = PETSC_TRUE;
2898 
2899       if (nneu) valid = PETSC_FALSE;
2900       if (valid && pressures) PetscCall(ISEqual(pressures, zerodiag, &valid));
2901       if (valid && checkb) {
2902         PetscCall(MatMult(matis->A, work[0], matis->x));
2903         PetscCall(VecPointwiseMult(matis->x, work[1], matis->x));
2904         PetscCall(VecGetArray(matis->x, &array));
2905         for (j = 0; j < n_interior_dofs; j++) {
2906           if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) {
2907             valid = PETSC_FALSE;
2908             break;
2909           }
2910         }
2911         PetscCall(VecRestoreArray(matis->x, &array));
2912       }
2913       if (valid) {
2914         benign_n = 1;
2915         PetscCall(PetscMalloc1(benign_n, &zerodiag_subs));
2916         PetscCall(PetscObjectReference((PetscObject)zerodiag));
2917         zerodiag_subs[0] = zerodiag;
2918       }
2919     }
2920     if (checkb) PetscCall(VecDestroyVecs(2, &work));
2921   }
2922   PetscCall(PetscFree(interior_dofs));
2923 
2924   if (!benign_n) {
2925     PetscInt n;
2926 
2927     PetscCall(ISDestroy(&zerodiag));
2928     recompute_zerodiag = PETSC_FALSE;
2929     PetscCall(MatGetLocalSize(pcbddc->local_mat, &n, NULL));
2930     if (n) have_null = PETSC_FALSE;
2931   }
2932 
2933   /* final check for null pressures */
2934   if (zerodiag && pressures) PetscCall(ISEqual(pressures, zerodiag, &have_null));
2935 
2936   if (recompute_zerodiag) {
2937     PetscCall(ISDestroy(&zerodiag));
2938     if (benign_n == 1) {
2939       PetscCall(PetscObjectReference((PetscObject)zerodiag_subs[0]));
2940       zerodiag = zerodiag_subs[0];
2941     } else {
2942       PetscInt i, nzn, *new_idxs;
2943 
2944       nzn = 0;
2945       for (i = 0; i < benign_n; i++) {
2946         PetscInt ns;
2947         PetscCall(ISGetLocalSize(zerodiag_subs[i], &ns));
2948         nzn += ns;
2949       }
2950       PetscCall(PetscMalloc1(nzn, &new_idxs));
2951       nzn = 0;
2952       for (i = 0; i < benign_n; i++) {
2953         PetscInt ns, *idxs;
2954         PetscCall(ISGetLocalSize(zerodiag_subs[i], &ns));
2955         PetscCall(ISGetIndices(zerodiag_subs[i], (const PetscInt **)&idxs));
2956         PetscCall(PetscArraycpy(new_idxs + nzn, idxs, ns));
2957         PetscCall(ISRestoreIndices(zerodiag_subs[i], (const PetscInt **)&idxs));
2958         nzn += ns;
2959       }
2960       PetscCall(PetscSortInt(nzn, new_idxs));
2961       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nzn, new_idxs, PETSC_OWN_POINTER, &zerodiag));
2962     }
2963     have_null = PETSC_FALSE;
2964   }
2965 
2966   /* determines if the coarse solver will be singular or not */
2967   PetscCall(MPIU_Allreduce(&have_null, &pcbddc->benign_null, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)pc)));
2968 
2969   /* Prepare matrix to compute no-net-flux */
2970   if (pcbddc->compute_nonetflux && !pcbddc->divudotp) {
2971     Mat                    A, loc_divudotp;
2972     ISLocalToGlobalMapping rl2g, cl2g, l2gmap;
2973     IS                     row, col, isused = NULL;
2974     PetscInt               M, N, n, st, n_isused;
2975 
2976     if (pressures) {
2977       isused = pressures;
2978     } else {
2979       isused = zerodiag_save;
2980     }
2981     PetscCall(MatISGetLocalToGlobalMapping(pc->pmat, &l2gmap, NULL));
2982     PetscCall(MatISGetLocalMat(pc->pmat, &A));
2983     PetscCall(MatGetLocalSize(A, &n, NULL));
2984     PetscCheck(isused || (n == 0), PETSC_COMM_SELF, PETSC_ERR_USER, "Don't know how to extract div u dot p! Please provide the pressure field");
2985     n_isused = 0;
2986     if (isused) PetscCall(ISGetLocalSize(isused, &n_isused));
2987     PetscCallMPI(MPI_Scan(&n_isused, &st, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)pc)));
2988     st = st - n_isused;
2989     if (n) {
2990       const PetscInt *gidxs;
2991 
2992       PetscCall(MatCreateSubMatrix(A, isused, NULL, MAT_INITIAL_MATRIX, &loc_divudotp));
2993       PetscCall(ISLocalToGlobalMappingGetIndices(l2gmap, &gidxs));
2994       /* TODO: extend ISCreateStride with st = PETSC_DECIDE */
2995       PetscCall(ISCreateStride(PetscObjectComm((PetscObject)pc), n_isused, st, 1, &row));
2996       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), n, gidxs, PETSC_COPY_VALUES, &col));
2997       PetscCall(ISLocalToGlobalMappingRestoreIndices(l2gmap, &gidxs));
2998     } else {
2999       PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, 0, 0, 1, NULL, &loc_divudotp));
3000       PetscCall(ISCreateStride(PetscObjectComm((PetscObject)pc), n_isused, st, 1, &row));
3001       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), 0, NULL, PETSC_COPY_VALUES, &col));
3002     }
3003     PetscCall(MatGetSize(pc->pmat, NULL, &N));
3004     PetscCall(ISGetSize(row, &M));
3005     PetscCall(ISLocalToGlobalMappingCreateIS(row, &rl2g));
3006     PetscCall(ISLocalToGlobalMappingCreateIS(col, &cl2g));
3007     PetscCall(ISDestroy(&row));
3008     PetscCall(ISDestroy(&col));
3009     PetscCall(MatCreate(PetscObjectComm((PetscObject)pc), &pcbddc->divudotp));
3010     PetscCall(MatSetType(pcbddc->divudotp, MATIS));
3011     PetscCall(MatSetSizes(pcbddc->divudotp, PETSC_DECIDE, PETSC_DECIDE, M, N));
3012     PetscCall(MatSetLocalToGlobalMapping(pcbddc->divudotp, rl2g, cl2g));
3013     PetscCall(ISLocalToGlobalMappingDestroy(&rl2g));
3014     PetscCall(ISLocalToGlobalMappingDestroy(&cl2g));
3015     PetscCall(MatISSetLocalMat(pcbddc->divudotp, loc_divudotp));
3016     PetscCall(MatDestroy(&loc_divudotp));
3017     PetscCall(MatAssemblyBegin(pcbddc->divudotp, MAT_FINAL_ASSEMBLY));
3018     PetscCall(MatAssemblyEnd(pcbddc->divudotp, MAT_FINAL_ASSEMBLY));
3019   }
3020   PetscCall(ISDestroy(&zerodiag_save));
3021   PetscCall(ISDestroy(&pressures));
3022   if (bzerodiag) {
3023     PetscInt i;
3024 
3025     for (i = 0; i < bsp; i++) PetscCall(ISDestroy(&bzerodiag[i]));
3026     PetscCall(PetscFree(bzerodiag));
3027   }
3028   pcbddc->benign_n             = benign_n;
3029   pcbddc->benign_zerodiag_subs = zerodiag_subs;
3030 
3031   /* determines if the problem has subdomains with 0 pressure block */
3032   have_null = (PetscBool)(!!pcbddc->benign_n);
3033   PetscCall(MPIU_Allreduce(&have_null, &pcbddc->benign_have_null, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)pc)));
3034 
3035 project_b0:
3036   PetscCall(MatGetLocalSize(pcbddc->local_mat, &n, NULL));
3037   /* change of basis and p0 dofs */
3038   if (pcbddc->benign_n) {
3039     PetscInt i, s, *nnz;
3040 
3041     /* local change of basis for pressures */
3042     PetscCall(MatDestroy(&pcbddc->benign_change));
3043     PetscCall(MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat), &pcbddc->benign_change));
3044     PetscCall(MatSetType(pcbddc->benign_change, MATAIJ));
3045     PetscCall(MatSetSizes(pcbddc->benign_change, n, n, PETSC_DECIDE, PETSC_DECIDE));
3046     PetscCall(PetscMalloc1(n, &nnz));
3047     for (i = 0; i < n; i++) nnz[i] = 1; /* defaults to identity */
3048     for (i = 0; i < pcbddc->benign_n; i++) {
3049       const PetscInt *idxs;
3050       PetscInt        nzs, j;
3051 
3052       PetscCall(ISGetLocalSize(pcbddc->benign_zerodiag_subs[i], &nzs));
3053       PetscCall(ISGetIndices(pcbddc->benign_zerodiag_subs[i], &idxs));
3054       for (j = 0; j < nzs - 1; j++) nnz[idxs[j]] = 2; /* change on pressures */
3055       nnz[idxs[nzs - 1]] = nzs;                       /* last local pressure dof in subdomain */
3056       PetscCall(ISRestoreIndices(pcbddc->benign_zerodiag_subs[i], &idxs));
3057     }
3058     PetscCall(MatSeqAIJSetPreallocation(pcbddc->benign_change, 0, nnz));
3059     PetscCall(MatSetOption(pcbddc->benign_change, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE));
3060     PetscCall(PetscFree(nnz));
3061     /* set identity by default */
3062     for (i = 0; i < n; i++) PetscCall(MatSetValue(pcbddc->benign_change, i, i, 1., INSERT_VALUES));
3063     PetscCall(PetscFree3(pcbddc->benign_p0_lidx, pcbddc->benign_p0_gidx, pcbddc->benign_p0));
3064     PetscCall(PetscMalloc3(pcbddc->benign_n, &pcbddc->benign_p0_lidx, pcbddc->benign_n, &pcbddc->benign_p0_gidx, pcbddc->benign_n, &pcbddc->benign_p0));
3065     /* set change on pressures */
3066     for (s = 0; s < pcbddc->benign_n; s++) {
3067       PetscScalar    *array;
3068       const PetscInt *idxs;
3069       PetscInt        nzs;
3070 
3071       PetscCall(ISGetLocalSize(pcbddc->benign_zerodiag_subs[s], &nzs));
3072       PetscCall(ISGetIndices(pcbddc->benign_zerodiag_subs[s], &idxs));
3073       for (i = 0; i < nzs - 1; i++) {
3074         PetscScalar vals[2];
3075         PetscInt    cols[2];
3076 
3077         cols[0] = idxs[i];
3078         cols[1] = idxs[nzs - 1];
3079         vals[0] = 1.;
3080         vals[1] = 1.;
3081         PetscCall(MatSetValues(pcbddc->benign_change, 1, cols, 2, cols, vals, INSERT_VALUES));
3082       }
3083       PetscCall(PetscMalloc1(nzs, &array));
3084       for (i = 0; i < nzs - 1; i++) array[i] = -1.;
3085       array[nzs - 1] = 1.;
3086       PetscCall(MatSetValues(pcbddc->benign_change, 1, idxs + nzs - 1, nzs, idxs, array, INSERT_VALUES));
3087       /* store local idxs for p0 */
3088       pcbddc->benign_p0_lidx[s] = idxs[nzs - 1];
3089       PetscCall(ISRestoreIndices(pcbddc->benign_zerodiag_subs[s], &idxs));
3090       PetscCall(PetscFree(array));
3091     }
3092     PetscCall(MatAssemblyBegin(pcbddc->benign_change, MAT_FINAL_ASSEMBLY));
3093     PetscCall(MatAssemblyEnd(pcbddc->benign_change, MAT_FINAL_ASSEMBLY));
3094 
3095     /* project if needed */
3096     if (pcbddc->benign_change_explicit) {
3097       Mat M;
3098 
3099       PetscCall(MatPtAP(pcbddc->local_mat, pcbddc->benign_change, MAT_INITIAL_MATRIX, 2.0, &M));
3100       PetscCall(MatDestroy(&pcbddc->local_mat));
3101       PetscCall(MatSeqAIJCompress(M, &pcbddc->local_mat));
3102       PetscCall(MatDestroy(&M));
3103     }
3104     /* store global idxs for p0 */
3105     PetscCall(ISLocalToGlobalMappingApply(matis->rmapping, pcbddc->benign_n, pcbddc->benign_p0_lidx, pcbddc->benign_p0_gidx));
3106   }
3107   *zerodiaglocal = zerodiag;
3108   PetscFunctionReturn(PETSC_SUCCESS);
3109 }
3110 
3111 PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get)
3112 {
3113   PC_BDDC     *pcbddc = (PC_BDDC *)pc->data;
3114   PetscScalar *array;
3115 
3116   PetscFunctionBegin;
3117   if (!pcbddc->benign_sf) {
3118     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)pc), &pcbddc->benign_sf));
3119     PetscCall(PetscSFSetGraphLayout(pcbddc->benign_sf, pc->pmat->rmap, pcbddc->benign_n, NULL, PETSC_OWN_POINTER, pcbddc->benign_p0_gidx));
3120   }
3121   if (get) {
3122     PetscCall(VecGetArrayRead(v, (const PetscScalar **)&array));
3123     PetscCall(PetscSFBcastBegin(pcbddc->benign_sf, MPIU_SCALAR, array, pcbddc->benign_p0, MPI_REPLACE));
3124     PetscCall(PetscSFBcastEnd(pcbddc->benign_sf, MPIU_SCALAR, array, pcbddc->benign_p0, MPI_REPLACE));
3125     PetscCall(VecRestoreArrayRead(v, (const PetscScalar **)&array));
3126   } else {
3127     PetscCall(VecGetArray(v, &array));
3128     PetscCall(PetscSFReduceBegin(pcbddc->benign_sf, MPIU_SCALAR, pcbddc->benign_p0, array, MPI_REPLACE));
3129     PetscCall(PetscSFReduceEnd(pcbddc->benign_sf, MPIU_SCALAR, pcbddc->benign_p0, array, MPI_REPLACE));
3130     PetscCall(VecRestoreArray(v, &array));
3131   }
3132   PetscFunctionReturn(PETSC_SUCCESS);
3133 }
3134 
3135 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop)
3136 {
3137   PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
3138 
3139   PetscFunctionBegin;
3140   /* TODO: add error checking
3141     - avoid nested pop (or push) calls.
3142     - cannot push before pop.
3143     - cannot call this if pcbddc->local_mat is NULL
3144   */
3145   if (!pcbddc->benign_n) PetscFunctionReturn(PETSC_SUCCESS);
3146   if (pop) {
3147     if (pcbddc->benign_change_explicit) {
3148       IS       is_p0;
3149       MatReuse reuse;
3150 
3151       /* extract B_0 */
3152       reuse = MAT_INITIAL_MATRIX;
3153       if (pcbddc->benign_B0) reuse = MAT_REUSE_MATRIX;
3154       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, pcbddc->benign_n, pcbddc->benign_p0_lidx, PETSC_COPY_VALUES, &is_p0));
3155       PetscCall(MatCreateSubMatrix(pcbddc->local_mat, is_p0, NULL, reuse, &pcbddc->benign_B0));
3156       /* remove rows and cols from local problem */
3157       PetscCall(MatSetOption(pcbddc->local_mat, MAT_KEEP_NONZERO_PATTERN, PETSC_TRUE));
3158       PetscCall(MatSetOption(pcbddc->local_mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE));
3159       PetscCall(MatZeroRowsColumnsIS(pcbddc->local_mat, is_p0, 1.0, NULL, NULL));
3160       PetscCall(ISDestroy(&is_p0));
3161     } else {
3162       Mat_IS      *matis = (Mat_IS *)pc->pmat->data;
3163       PetscScalar *vals;
3164       PetscInt     i, n, *idxs_ins;
3165 
3166       PetscCall(VecGetLocalSize(matis->y, &n));
3167       PetscCall(PetscMalloc2(n, &idxs_ins, n, &vals));
3168       if (!pcbddc->benign_B0) {
3169         PetscInt *nnz;
3170         PetscCall(MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat), &pcbddc->benign_B0));
3171         PetscCall(MatSetType(pcbddc->benign_B0, MATAIJ));
3172         PetscCall(MatSetSizes(pcbddc->benign_B0, pcbddc->benign_n, n, PETSC_DECIDE, PETSC_DECIDE));
3173         PetscCall(PetscMalloc1(pcbddc->benign_n, &nnz));
3174         for (i = 0; i < pcbddc->benign_n; i++) {
3175           PetscCall(ISGetLocalSize(pcbddc->benign_zerodiag_subs[i], &nnz[i]));
3176           nnz[i] = n - nnz[i];
3177         }
3178         PetscCall(MatSeqAIJSetPreallocation(pcbddc->benign_B0, 0, nnz));
3179         PetscCall(MatSetOption(pcbddc->benign_B0, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE));
3180         PetscCall(PetscFree(nnz));
3181       }
3182 
3183       for (i = 0; i < pcbddc->benign_n; i++) {
3184         PetscScalar *array;
3185         PetscInt    *idxs, j, nz, cum;
3186 
3187         PetscCall(VecSet(matis->x, 0.));
3188         PetscCall(ISGetLocalSize(pcbddc->benign_zerodiag_subs[i], &nz));
3189         PetscCall(ISGetIndices(pcbddc->benign_zerodiag_subs[i], (const PetscInt **)&idxs));
3190         for (j = 0; j < nz; j++) vals[j] = 1.;
3191         PetscCall(VecSetValues(matis->x, nz, idxs, vals, INSERT_VALUES));
3192         PetscCall(VecAssemblyBegin(matis->x));
3193         PetscCall(VecAssemblyEnd(matis->x));
3194         PetscCall(VecSet(matis->y, 0.));
3195         PetscCall(MatMult(matis->A, matis->x, matis->y));
3196         PetscCall(VecGetArray(matis->y, &array));
3197         cum = 0;
3198         for (j = 0; j < n; j++) {
3199           if (PetscUnlikely(PetscAbsScalar(array[j]) > PETSC_SMALL)) {
3200             vals[cum]     = array[j];
3201             idxs_ins[cum] = j;
3202             cum++;
3203           }
3204         }
3205         PetscCall(MatSetValues(pcbddc->benign_B0, 1, &i, cum, idxs_ins, vals, INSERT_VALUES));
3206         PetscCall(VecRestoreArray(matis->y, &array));
3207         PetscCall(ISRestoreIndices(pcbddc->benign_zerodiag_subs[i], (const PetscInt **)&idxs));
3208       }
3209       PetscCall(MatAssemblyBegin(pcbddc->benign_B0, MAT_FINAL_ASSEMBLY));
3210       PetscCall(MatAssemblyEnd(pcbddc->benign_B0, MAT_FINAL_ASSEMBLY));
3211       PetscCall(PetscFree2(idxs_ins, vals));
3212     }
3213   } else { /* push */
3214 
3215     PetscCheck(pcbddc->benign_change_explicit, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot push B0!");
3216     for (PetscInt i = 0; i < pcbddc->benign_n; i++) {
3217       PetscScalar *B0_vals;
3218       PetscInt    *B0_cols, B0_ncol;
3219 
3220       PetscCall(MatGetRow(pcbddc->benign_B0, i, &B0_ncol, (const PetscInt **)&B0_cols, (const PetscScalar **)&B0_vals));
3221       PetscCall(MatSetValues(pcbddc->local_mat, 1, pcbddc->benign_p0_lidx + i, B0_ncol, B0_cols, B0_vals, INSERT_VALUES));
3222       PetscCall(MatSetValues(pcbddc->local_mat, B0_ncol, B0_cols, 1, pcbddc->benign_p0_lidx + i, B0_vals, INSERT_VALUES));
3223       PetscCall(MatSetValue(pcbddc->local_mat, pcbddc->benign_p0_lidx[i], pcbddc->benign_p0_lidx[i], 0.0, INSERT_VALUES));
3224       PetscCall(MatRestoreRow(pcbddc->benign_B0, i, &B0_ncol, (const PetscInt **)&B0_cols, (const PetscScalar **)&B0_vals));
3225     }
3226     PetscCall(MatAssemblyBegin(pcbddc->local_mat, MAT_FINAL_ASSEMBLY));
3227     PetscCall(MatAssemblyEnd(pcbddc->local_mat, MAT_FINAL_ASSEMBLY));
3228   }
3229   PetscFunctionReturn(PETSC_SUCCESS);
3230 }
3231 
3232 PetscErrorCode PCBDDCAdaptiveSelection(PC pc)
3233 {
3234   PC_BDDC        *pcbddc     = (PC_BDDC *)pc->data;
3235   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
3236   PetscBLASInt    B_dummyint, B_neigs, B_ierr, B_lwork;
3237   PetscBLASInt   *B_iwork, *B_ifail;
3238   PetscScalar    *work, lwork;
3239   PetscScalar    *St, *S, *eigv;
3240   PetscScalar    *Sarray, *Starray;
3241   PetscReal      *eigs, thresh, lthresh, uthresh;
3242   PetscInt        i, nmax, nmin, nv, cum, mss, cum2, cumarray, maxneigs;
3243   PetscBool       allocated_S_St, upart;
3244 #if defined(PETSC_USE_COMPLEX)
3245   PetscReal *rwork;
3246 #endif
3247 
3248   PetscFunctionBegin;
3249   if (!pcbddc->adaptive_selection) PetscFunctionReturn(PETSC_SUCCESS);
3250   PetscCheck(sub_schurs, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Adaptive selection of constraints requires SubSchurs data");
3251   PetscCheck(sub_schurs->schur_explicit || !sub_schurs->n_subs, PetscObjectComm((PetscObject)pc), PETSC_ERR_SUP, "Adaptive selection of constraints requires MUMPS and/or MKL_CPARDISO");
3252   PetscCheck(!sub_schurs->n_subs || sub_schurs->is_symmetric, PETSC_COMM_SELF, PETSC_ERR_SUP, "Adaptive selection not yet implemented for this matrix pencil (herm %d, symm %d, posdef %d)", sub_schurs->is_hermitian, sub_schurs->is_symmetric,
3253              sub_schurs->is_posdef);
3254   PetscCall(PetscLogEventBegin(PC_BDDC_AdaptiveSetUp[pcbddc->current_level], pc, 0, 0, 0));
3255 
3256   if (pcbddc->dbg_flag) {
3257     if (!pcbddc->dbg_viewer) pcbddc->dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pc));
3258     PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
3259     PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n"));
3260     PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Check adaptive selection of constraints\n"));
3261     PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer));
3262   }
3263 
3264   if (pcbddc->dbg_flag) PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d cc %" PetscInt_FMT " (%d,%d).\n", PetscGlobalRank, sub_schurs->n_subs, sub_schurs->is_hermitian, sub_schurs->is_posdef));
3265 
3266   /* max size of subsets */
3267   mss = 0;
3268   for (i = 0; i < sub_schurs->n_subs; i++) {
3269     PetscInt subset_size;
3270 
3271     PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size));
3272     mss = PetscMax(mss, subset_size);
3273   }
3274 
3275   /* min/max and threshold */
3276   nmax           = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss;
3277   nmin           = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0;
3278   nmax           = PetscMax(nmin, nmax);
3279   allocated_S_St = PETSC_FALSE;
3280   if (nmin || !sub_schurs->is_posdef) { /* XXX */
3281     allocated_S_St = PETSC_TRUE;
3282   }
3283 
3284   /* allocate lapack workspace */
3285   cum = cum2 = 0;
3286   maxneigs   = 0;
3287   for (i = 0; i < sub_schurs->n_subs; i++) {
3288     PetscInt n, subset_size;
3289 
3290     PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size));
3291     n = PetscMin(subset_size, nmax);
3292     cum += subset_size;
3293     cum2 += subset_size * n;
3294     maxneigs = PetscMax(maxneigs, n);
3295   }
3296   lwork = 0;
3297   if (mss) {
3298     PetscScalar  sdummy  = 0.;
3299     PetscBLASInt B_itype = 1;
3300     PetscBLASInt B_N = mss, idummy = 0;
3301     PetscReal    rdummy = 0., zero = 0.0;
3302     PetscReal    eps = 0.0; /* dlamch? */
3303 
3304     PetscCheck(sub_schurs->is_symmetric, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not yet implemented");
3305     B_lwork = -1;
3306     /* some implementations may complain about NULL pointers, even if we are querying */
3307     S       = &sdummy;
3308     St      = &sdummy;
3309     eigs    = &rdummy;
3310     eigv    = &sdummy;
3311     B_iwork = &idummy;
3312     B_ifail = &idummy;
3313 #if defined(PETSC_USE_COMPLEX)
3314     rwork = &rdummy;
3315 #endif
3316     thresh = 1.0;
3317     PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
3318 #if defined(PETSC_USE_COMPLEX)
3319     PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &zero, &thresh, &B_dummyint, &B_dummyint, &eps, &B_neigs, eigs, eigv, &B_N, &lwork, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3320 #else
3321     PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &zero, &thresh, &B_dummyint, &B_dummyint, &eps, &B_neigs, eigs, eigv, &B_N, &lwork, &B_lwork, B_iwork, B_ifail, &B_ierr));
3322 #endif
3323     PetscCheck(B_ierr == 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in query to SYGVX Lapack routine %d", (int)B_ierr);
3324     PetscCall(PetscFPTrapPop());
3325   }
3326 
3327   nv = 0;
3328   if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */
3329     PetscCall(ISGetLocalSize(sub_schurs->is_vertices, &nv));
3330   }
3331   PetscCall(PetscBLASIntCast((PetscInt)PetscRealPart(lwork), &B_lwork));
3332   if (allocated_S_St) PetscCall(PetscMalloc2(mss * mss, &S, mss * mss, &St));
3333   PetscCall(PetscMalloc5(mss * mss, &eigv, mss, &eigs, B_lwork, &work, 5 * mss, &B_iwork, mss, &B_ifail));
3334 #if defined(PETSC_USE_COMPLEX)
3335   PetscCall(PetscMalloc1(7 * mss, &rwork));
3336 #endif
3337   PetscCall(PetscMalloc5(nv + sub_schurs->n_subs, &pcbddc->adaptive_constraints_n, nv + sub_schurs->n_subs + 1, &pcbddc->adaptive_constraints_idxs_ptr, nv + sub_schurs->n_subs + 1, &pcbddc->adaptive_constraints_data_ptr, nv + cum, &pcbddc->adaptive_constraints_idxs, nv + cum2,
3338                          &pcbddc->adaptive_constraints_data));
3339   PetscCall(PetscArrayzero(pcbddc->adaptive_constraints_n, nv + sub_schurs->n_subs));
3340 
3341   maxneigs = 0;
3342   cum = cumarray                           = 0;
3343   pcbddc->adaptive_constraints_idxs_ptr[0] = 0;
3344   pcbddc->adaptive_constraints_data_ptr[0] = 0;
3345   if (sub_schurs->is_vertices && pcbddc->use_vertices) {
3346     const PetscInt *idxs;
3347 
3348     PetscCall(ISGetIndices(sub_schurs->is_vertices, &idxs));
3349     for (cum = 0; cum < nv; cum++) {
3350       pcbddc->adaptive_constraints_n[cum]            = 1;
3351       pcbddc->adaptive_constraints_idxs[cum]         = idxs[cum];
3352       pcbddc->adaptive_constraints_data[cum]         = 1.0;
3353       pcbddc->adaptive_constraints_idxs_ptr[cum + 1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + 1;
3354       pcbddc->adaptive_constraints_data_ptr[cum + 1] = pcbddc->adaptive_constraints_data_ptr[cum] + 1;
3355     }
3356     PetscCall(ISRestoreIndices(sub_schurs->is_vertices, &idxs));
3357   }
3358 
3359   if (mss) { /* multilevel */
3360     if (sub_schurs->gdsw) {
3361       PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_all, &Sarray));
3362       PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all, &Starray));
3363     } else {
3364       PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all, &Sarray));
3365       PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all, &Starray));
3366     }
3367   }
3368 
3369   lthresh = pcbddc->adaptive_threshold[0];
3370   uthresh = pcbddc->adaptive_threshold[1];
3371   upart   = pcbddc->use_deluxe_scaling;
3372   for (i = 0; i < sub_schurs->n_subs; i++) {
3373     const PetscInt *idxs;
3374     PetscReal       upper, lower;
3375     PetscInt        j, subset_size, eigs_start = 0;
3376     PetscBLASInt    B_N;
3377     PetscBool       same_data = PETSC_FALSE;
3378     PetscBool       scal      = PETSC_FALSE;
3379 
3380     if (upart) {
3381       upper = PETSC_MAX_REAL;
3382       lower = uthresh;
3383     } else {
3384       if (sub_schurs->gdsw) {
3385         upper = uthresh;
3386         lower = PETSC_MIN_REAL;
3387       } else {
3388         PetscCheck(sub_schurs->is_posdef, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not yet implemented without deluxe scaling");
3389         upper = 1. / uthresh;
3390         lower = 0.;
3391       }
3392     }
3393     PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size));
3394     PetscCall(ISGetIndices(sub_schurs->is_subs[i], &idxs));
3395     PetscCall(PetscBLASIntCast(subset_size, &B_N));
3396     /* this is experimental: we assume the dofs have been properly grouped to have
3397        the diagonal blocks Schur complements either positive or negative definite (true for Stokes) */
3398     if (!sub_schurs->is_posdef) {
3399       Mat T;
3400 
3401       for (j = 0; j < subset_size; j++) {
3402         if (PetscRealPart(*(Sarray + cumarray + j * (subset_size + 1))) < 0.0) {
3403           PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, subset_size, Sarray + cumarray, &T));
3404           PetscCall(MatScale(T, -1.0));
3405           PetscCall(MatDestroy(&T));
3406           PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, subset_size, Starray + cumarray, &T));
3407           PetscCall(MatScale(T, -1.0));
3408           PetscCall(MatDestroy(&T));
3409           if (sub_schurs->change_primal_sub) {
3410             PetscInt        nz, k;
3411             const PetscInt *idxs;
3412 
3413             PetscCall(ISGetLocalSize(sub_schurs->change_primal_sub[i], &nz));
3414             PetscCall(ISGetIndices(sub_schurs->change_primal_sub[i], &idxs));
3415             for (k = 0; k < nz; k++) {
3416               *(Sarray + cumarray + idxs[k] * (subset_size + 1)) *= -1.0;
3417               *(Starray + cumarray + idxs[k] * (subset_size + 1)) = 0.0;
3418             }
3419             PetscCall(ISRestoreIndices(sub_schurs->change_primal_sub[i], &idxs));
3420           }
3421           scal = PETSC_TRUE;
3422           break;
3423         }
3424       }
3425     }
3426 
3427     if (allocated_S_St) { /* S and S_t should be copied since we could need them later */
3428       if (sub_schurs->is_symmetric) {
3429         PetscInt j, k;
3430         if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscArraycmp() later */
3431           PetscCall(PetscArrayzero(S, subset_size * subset_size));
3432           PetscCall(PetscArrayzero(St, subset_size * subset_size));
3433         }
3434         for (j = 0; j < subset_size; j++) {
3435           for (k = j; k < subset_size; k++) {
3436             S[j * subset_size + k]  = Sarray[cumarray + j * subset_size + k];
3437             St[j * subset_size + k] = Starray[cumarray + j * subset_size + k];
3438           }
3439         }
3440       } else {
3441         PetscCall(PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size));
3442         PetscCall(PetscArraycpy(St, Starray + cumarray, subset_size * subset_size));
3443       }
3444     } else {
3445       S  = Sarray + cumarray;
3446       St = Starray + cumarray;
3447     }
3448     /* see if we can save some work */
3449     if (sub_schurs->n_subs == 1 && pcbddc->use_deluxe_scaling) PetscCall(PetscArraycmp(S, St, subset_size * subset_size, &same_data));
3450 
3451     if (same_data && !sub_schurs->change) { /* there's no need of constraints here */
3452       B_neigs = 0;
3453     } else {
3454       PetscBLASInt B_itype = 1;
3455       PetscBLASInt B_IL, B_IU;
3456       PetscReal    eps = -1.0; /* dlamch? */
3457       PetscInt     nmin_s;
3458       PetscBool    compute_range;
3459 
3460       PetscCheck(sub_schurs->is_symmetric, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not yet implemented");
3461       B_neigs       = 0;
3462       compute_range = (PetscBool)!same_data;
3463       if (nmin >= subset_size) compute_range = PETSC_FALSE;
3464 
3465       if (pcbddc->dbg_flag) {
3466         PetscInt nc = 0, c = pcbddc->mat_graph->nodes[idxs[0]].count, w = pcbddc->mat_graph->nodes[idxs[0]].which_dof;
3467 
3468         if (sub_schurs->change_primal_sub) PetscCall(ISGetLocalSize(sub_schurs->change_primal_sub[i], &nc));
3469         PetscCall(
3470           PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Computing for sub %" PetscInt_FMT "/%" PetscInt_FMT " size %" PetscInt_FMT " count %" PetscInt_FMT " fid %" PetscInt_FMT " (range %d) (change %" PetscInt_FMT ").\n", i, sub_schurs->n_subs, subset_size, c, w, compute_range, nc));
3471       }
3472 
3473       PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
3474       if (compute_range) {
3475         /* ask for eigenvalues larger than thresh */
3476         if (sub_schurs->is_posdef) {
3477 #if defined(PETSC_USE_COMPLEX)
3478           PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3479 #else
3480           PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3481 #endif
3482           PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3483         } else { /* no theory so far, but it works nicely */
3484           PetscInt  recipe = 0, recipe_m = 1;
3485           PetscReal bb[2];
3486 
3487           PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)pc)->prefix, "-pc_bddc_adaptive_recipe", &recipe, NULL));
3488           switch (recipe) {
3489           case 0:
3490             if (scal) {
3491               bb[0] = PETSC_MIN_REAL;
3492               bb[1] = lthresh;
3493             } else {
3494               bb[0] = uthresh;
3495               bb[1] = PETSC_MAX_REAL;
3496             }
3497 #if defined(PETSC_USE_COMPLEX)
3498             PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3499 #else
3500             PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3501 #endif
3502             PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3503             break;
3504           case 1:
3505             bb[0] = PETSC_MIN_REAL;
3506             bb[1] = lthresh * lthresh;
3507 #if defined(PETSC_USE_COMPLEX)
3508             PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3509 #else
3510             PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3511 #endif
3512             PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3513             if (!scal) {
3514               PetscBLASInt B_neigs2 = 0;
3515 
3516               bb[0] = PetscMax(lthresh * lthresh, uthresh);
3517               bb[1] = PETSC_MAX_REAL;
3518               PetscCall(PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size));
3519               PetscCall(PetscArraycpy(St, Starray + cumarray, subset_size * subset_size));
3520 #if defined(PETSC_USE_COMPLEX)
3521               PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3522 #else
3523               PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3524 #endif
3525               PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3526               B_neigs += B_neigs2;
3527             }
3528             break;
3529           case 2:
3530             if (scal) {
3531               bb[0] = PETSC_MIN_REAL;
3532               bb[1] = 0;
3533 #if defined(PETSC_USE_COMPLEX)
3534               PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3535 #else
3536               PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3537 #endif
3538               PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3539             } else {
3540               PetscBLASInt B_neigs2 = 0;
3541               PetscBool    do_copy  = PETSC_FALSE;
3542 
3543               lthresh = PetscMax(lthresh, 0.0);
3544               if (lthresh > 0.0) {
3545                 bb[0] = PETSC_MIN_REAL;
3546                 bb[1] = lthresh * lthresh;
3547 
3548                 do_copy = PETSC_TRUE;
3549 #if defined(PETSC_USE_COMPLEX)
3550                 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3551 #else
3552                 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3553 #endif
3554                 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3555               }
3556               bb[0] = PetscMax(lthresh * lthresh, uthresh);
3557               bb[1] = PETSC_MAX_REAL;
3558               if (do_copy) {
3559                 PetscCall(PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size));
3560                 PetscCall(PetscArraycpy(St, Starray + cumarray, subset_size * subset_size));
3561               }
3562 #if defined(PETSC_USE_COMPLEX)
3563               PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3564 #else
3565               PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3566 #endif
3567               PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3568               B_neigs += B_neigs2;
3569             }
3570             break;
3571           case 3:
3572             if (scal) {
3573               PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)pc)->prefix, "-pc_bddc_adaptive_recipe3_min_scal", &recipe_m, NULL));
3574             } else {
3575               PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)pc)->prefix, "-pc_bddc_adaptive_recipe3_min", &recipe_m, NULL));
3576             }
3577             if (!scal) {
3578               bb[0] = uthresh;
3579               bb[1] = PETSC_MAX_REAL;
3580 #if defined(PETSC_USE_COMPLEX)
3581               PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3582 #else
3583               PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3584 #endif
3585               PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3586             }
3587             if (recipe_m > 0 && B_N - B_neigs > 0) {
3588               PetscBLASInt B_neigs2 = 0;
3589 
3590               B_IL = 1;
3591               PetscCall(PetscBLASIntCast(PetscMin(recipe_m, B_N - B_neigs), &B_IU));
3592               PetscCall(PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size));
3593               PetscCall(PetscArraycpy(St, Starray + cumarray, subset_size * subset_size));
3594 #if defined(PETSC_USE_COMPLEX)
3595               PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3596 #else
3597               PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3598 #endif
3599               PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3600               B_neigs += B_neigs2;
3601             }
3602             break;
3603           case 4:
3604             bb[0] = PETSC_MIN_REAL;
3605             bb[1] = lthresh;
3606 #if defined(PETSC_USE_COMPLEX)
3607             PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3608 #else
3609             PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3610 #endif
3611             PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3612             {
3613               PetscBLASInt B_neigs2 = 0;
3614 
3615               bb[0] = PetscMax(lthresh + PETSC_SMALL, uthresh);
3616               bb[1] = PETSC_MAX_REAL;
3617               PetscCall(PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size));
3618               PetscCall(PetscArraycpy(St, Starray + cumarray, subset_size * subset_size));
3619 #if defined(PETSC_USE_COMPLEX)
3620               PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3621 #else
3622               PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3623 #endif
3624               PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3625               B_neigs += B_neigs2;
3626             }
3627             break;
3628           case 5: /* same as before: first compute all eigenvalues, then filter */
3629 #if defined(PETSC_USE_COMPLEX)
3630             PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "A", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3631 #else
3632             PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "A", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3633 #endif
3634             PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3635             {
3636               PetscInt e, k, ne;
3637               for (e = 0, ne = 0; e < B_neigs; e++) {
3638                 if (eigs[e] < lthresh || eigs[e] > uthresh) {
3639                   for (k = 0; k < B_N; k++) S[ne * B_N + k] = eigv[e * B_N + k];
3640                   eigs[ne] = eigs[e];
3641                   ne++;
3642                 }
3643               }
3644               PetscCall(PetscArraycpy(eigv, S, B_N * ne));
3645               B_neigs = ne;
3646             }
3647             break;
3648           default:
3649             SETERRQ(PetscObjectComm((PetscObject)pc), PETSC_ERR_SUP, "Unknown recipe %" PetscInt_FMT, recipe);
3650           }
3651         }
3652       } else if (!same_data) { /* this is just to see all the eigenvalues */
3653         B_IU = PetscMax(1, PetscMin(B_N, nmax));
3654         B_IL = 1;
3655 #if defined(PETSC_USE_COMPLEX)
3656         PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3657 #else
3658         PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3659 #endif
3660         PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3661       } else { /* same_data is true, so just get the adaptive functional requested by the user */
3662         PetscInt k;
3663         PetscCheck(sub_schurs->change_primal_sub, PETSC_COMM_SELF, PETSC_ERR_PLIB, "This should not happen");
3664         PetscCall(ISGetLocalSize(sub_schurs->change_primal_sub[i], &nmax));
3665         PetscCall(PetscBLASIntCast(nmax, &B_neigs));
3666         nmin = nmax;
3667         PetscCall(PetscArrayzero(eigv, subset_size * nmax));
3668         for (k = 0; k < nmax; k++) {
3669           eigs[k]                     = 1. / PETSC_SMALL;
3670           eigv[k * (subset_size + 1)] = 1.0;
3671         }
3672       }
3673       PetscCall(PetscFPTrapPop());
3674       if (B_ierr) {
3675         PetscCheck(B_ierr >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYGVX Lapack routine: illegal value for argument %" PetscBLASInt_FMT, -B_ierr);
3676         PetscCheck(B_ierr > B_N, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYGVX Lapack routine: %" PetscBLASInt_FMT " eigenvalues failed to converge", B_ierr);
3677         SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYGVX Lapack routine: leading minor of order %" PetscBLASInt_FMT " is not positive definite", B_ierr - B_N - 1);
3678       }
3679 
3680       if (B_neigs > nmax) {
3681         if (pcbddc->dbg_flag) PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "   found %" PetscBLASInt_FMT " eigs, more than maximum required %" PetscInt_FMT ".\n", B_neigs, nmax));
3682         if (upart) eigs_start = scal ? 0 : B_neigs - nmax;
3683         B_neigs = nmax;
3684       }
3685 
3686       nmin_s = PetscMin(nmin, B_N);
3687       if (B_neigs < nmin_s) {
3688         PetscBLASInt B_neigs2 = 0;
3689 
3690         if (upart) {
3691           if (scal) {
3692             B_IU = nmin_s;
3693             B_IL = B_neigs + 1;
3694           } else {
3695             B_IL = B_N - nmin_s + 1;
3696             B_IU = B_N - B_neigs;
3697           }
3698         } else {
3699           B_IL = B_neigs + 1;
3700           B_IU = nmin_s;
3701         }
3702         if (pcbddc->dbg_flag) {
3703           PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "   found %" PetscBLASInt_FMT " eigs, less than minimum required %" PetscInt_FMT ". Asking for %" PetscBLASInt_FMT " to %" PetscBLASInt_FMT " incl (fortran like)\n", B_neigs, nmin, B_IL, B_IU));
3704         }
3705         if (sub_schurs->is_symmetric) {
3706           PetscInt j, k;
3707           for (j = 0; j < subset_size; j++) {
3708             for (k = j; k < subset_size; k++) {
3709               S[j * subset_size + k]  = Sarray[cumarray + j * subset_size + k];
3710               St[j * subset_size + k] = Starray[cumarray + j * subset_size + k];
3711             }
3712           }
3713         } else {
3714           PetscCall(PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size));
3715           PetscCall(PetscArraycpy(St, Starray + cumarray, subset_size * subset_size));
3716         }
3717         PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
3718 #if defined(PETSC_USE_COMPLEX)
3719         PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * subset_size, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3720 #else
3721         PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * subset_size, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3722 #endif
3723         PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3724         PetscCall(PetscFPTrapPop());
3725         B_neigs += B_neigs2;
3726       }
3727       if (B_ierr) {
3728         PetscCheck(B_ierr >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYGVX Lapack routine: illegal value for argument %" PetscBLASInt_FMT, -B_ierr);
3729         PetscCheck(B_ierr > B_N, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYGVX Lapack routine: %" PetscBLASInt_FMT " eigenvalues failed to converge", B_ierr);
3730         SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYGVX Lapack routine: leading minor of order %" PetscBLASInt_FMT " is not positive definite", B_ierr - B_N - 1);
3731       }
3732       if (pcbddc->dbg_flag) {
3733         PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "   -> Got %" PetscBLASInt_FMT " eigs\n", B_neigs));
3734         for (j = 0; j < B_neigs; j++) {
3735           if (!sub_schurs->gdsw) {
3736             if (eigs[j] == 0.0) {
3737               PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "     Inf\n"));
3738             } else {
3739               if (upart) {
3740                 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "     %1.6e\n", (double)eigs[j + eigs_start]));
3741               } else {
3742                 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "     %1.6e\n", (double)(1. / eigs[j + eigs_start])));
3743               }
3744             }
3745           } else {
3746             double pg = (double)eigs[j + eigs_start];
3747             if (pg < 2 * PETSC_SMALL) pg = 0.0;
3748             PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "     %1.6e\n", pg));
3749           }
3750         }
3751       }
3752     }
3753     /* change the basis back to the original one */
3754     if (sub_schurs->change) {
3755       Mat change, phi, phit;
3756 
3757       if (pcbddc->dbg_flag > 2) {
3758         PetscInt ii;
3759         for (ii = 0; ii < B_neigs; ii++) {
3760           PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "   -> Eigenvector (old basis) %" PetscInt_FMT "/%" PetscBLASInt_FMT " (%" PetscBLASInt_FMT ")\n", ii, B_neigs, B_N));
3761           for (j = 0; j < B_N; j++) {
3762 #if defined(PETSC_USE_COMPLEX)
3763             PetscReal r = PetscRealPart(eigv[(ii + eigs_start) * subset_size + j]);
3764             PetscReal c = PetscImaginaryPart(eigv[(ii + eigs_start) * subset_size + j]);
3765             PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "       %1.4e + %1.4e i\n", (double)r, (double)c));
3766 #else
3767             PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "       %1.4e\n", (double)(eigv[(ii + eigs_start) * subset_size + j])));
3768 #endif
3769           }
3770         }
3771       }
3772       PetscCall(KSPGetOperators(sub_schurs->change[i], &change, NULL));
3773       PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, B_neigs, eigv + eigs_start * subset_size, &phit));
3774       PetscCall(MatMatMult(change, phit, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &phi));
3775       PetscCall(MatCopy(phi, phit, SAME_NONZERO_PATTERN));
3776       PetscCall(MatDestroy(&phit));
3777       PetscCall(MatDestroy(&phi));
3778     }
3779     maxneigs                               = PetscMax(B_neigs, maxneigs);
3780     pcbddc->adaptive_constraints_n[i + nv] = B_neigs;
3781     if (B_neigs) {
3782       PetscCall(PetscArraycpy(pcbddc->adaptive_constraints_data + pcbddc->adaptive_constraints_data_ptr[cum], eigv + eigs_start * subset_size, B_neigs * subset_size));
3783 
3784       if (pcbddc->dbg_flag > 1) {
3785         PetscInt ii;
3786         for (ii = 0; ii < B_neigs; ii++) {
3787           PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "   -> Eigenvector %" PetscInt_FMT "/%" PetscBLASInt_FMT " (%" PetscBLASInt_FMT ")\n", ii, B_neigs, B_N));
3788           for (j = 0; j < B_N; j++) {
3789 #if defined(PETSC_USE_COMPLEX)
3790             PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii * subset_size + j + pcbddc->adaptive_constraints_data_ptr[cum]]);
3791             PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii * subset_size + j + pcbddc->adaptive_constraints_data_ptr[cum]]);
3792             PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "       %1.4e + %1.4e i\n", (double)r, (double)c));
3793 #else
3794             PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "       %1.4e\n", (double)PetscRealPart(pcbddc->adaptive_constraints_data[ii * subset_size + j + pcbddc->adaptive_constraints_data_ptr[cum]])));
3795 #endif
3796           }
3797         }
3798       }
3799       PetscCall(PetscArraycpy(pcbddc->adaptive_constraints_idxs + pcbddc->adaptive_constraints_idxs_ptr[cum], idxs, subset_size));
3800       pcbddc->adaptive_constraints_idxs_ptr[cum + 1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size;
3801       pcbddc->adaptive_constraints_data_ptr[cum + 1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size * B_neigs;
3802       cum++;
3803     }
3804     PetscCall(ISRestoreIndices(sub_schurs->is_subs[i], &idxs));
3805     /* shift for next computation */
3806     cumarray += subset_size * subset_size;
3807   }
3808   if (pcbddc->dbg_flag) PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
3809 
3810   if (mss) {
3811     if (sub_schurs->gdsw) {
3812       PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_all, &Sarray));
3813       PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all, &Starray));
3814     } else {
3815       PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all, &Sarray));
3816       PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all, &Starray));
3817       /* destroy matrices (junk) */
3818       PetscCall(MatDestroy(&sub_schurs->sum_S_Ej_inv_all));
3819       PetscCall(MatDestroy(&sub_schurs->sum_S_Ej_tilda_all));
3820     }
3821   }
3822   if (allocated_S_St) PetscCall(PetscFree2(S, St));
3823   PetscCall(PetscFree5(eigv, eigs, work, B_iwork, B_ifail));
3824 #if defined(PETSC_USE_COMPLEX)
3825   PetscCall(PetscFree(rwork));
3826 #endif
3827   if (pcbddc->dbg_flag) {
3828     PetscInt maxneigs_r;
3829     PetscCall(MPIU_Allreduce(&maxneigs, &maxneigs_r, 1, MPIU_INT, MPI_MAX, PetscObjectComm((PetscObject)pc)));
3830     PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Maximum number of constraints per cc %" PetscInt_FMT "\n", maxneigs_r));
3831   }
3832   PetscCall(PetscLogEventEnd(PC_BDDC_AdaptiveSetUp[pcbddc->current_level], pc, 0, 0, 0));
3833   PetscFunctionReturn(PETSC_SUCCESS);
3834 }
3835 
3836 PetscErrorCode PCBDDCSetUpSolvers(PC pc)
3837 {
3838   Mat coarse_submat;
3839 
3840   PetscFunctionBegin;
3841   /* Setup local scatters R_to_B and (optionally) R_to_D */
3842   /* PCBDDCSetUpLocalWorkVectors should be called first! */
3843   PetscCall(PCBDDCSetUpLocalScatters(pc));
3844 
3845   /* Setup local neumann solver ksp_R */
3846   /* PCBDDCSetUpLocalScatters should be called first! */
3847   PetscCall(PCBDDCSetUpLocalSolvers(pc, PETSC_FALSE, PETSC_TRUE));
3848 
3849   /*
3850      Setup local correction and local part of coarse basis.
3851      Gives back the dense local part of the coarse matrix in column major ordering
3852   */
3853   PetscCall(PCBDDCSetUpCorrection(pc, &coarse_submat));
3854 
3855   /* Compute total number of coarse nodes and setup coarse solver */
3856   PetscCall(PCBDDCSetUpCoarseSolver(pc, coarse_submat));
3857   PetscCall(MatDestroy(&coarse_submat));
3858   PetscFunctionReturn(PETSC_SUCCESS);
3859 }
3860 
3861 PetscErrorCode PCBDDCResetCustomization(PC pc)
3862 {
3863   PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
3864 
3865   PetscFunctionBegin;
3866   PetscCall(ISDestroy(&pcbddc->user_primal_vertices));
3867   PetscCall(ISDestroy(&pcbddc->user_primal_vertices_local));
3868   PetscCall(ISDestroy(&pcbddc->NeumannBoundaries));
3869   PetscCall(ISDestroy(&pcbddc->NeumannBoundariesLocal));
3870   PetscCall(ISDestroy(&pcbddc->DirichletBoundaries));
3871   PetscCall(MatNullSpaceDestroy(&pcbddc->onearnullspace));
3872   PetscCall(PetscFree(pcbddc->onearnullvecs_state));
3873   PetscCall(ISDestroy(&pcbddc->DirichletBoundariesLocal));
3874   PetscCall(PCBDDCSetDofsSplitting(pc, 0, NULL));
3875   PetscCall(PCBDDCSetDofsSplittingLocal(pc, 0, NULL));
3876   PetscFunctionReturn(PETSC_SUCCESS);
3877 }
3878 
3879 PetscErrorCode PCBDDCResetTopography(PC pc)
3880 {
3881   PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
3882   PetscInt i;
3883 
3884   PetscFunctionBegin;
3885   PetscCall(MatDestroy(&pcbddc->nedcG));
3886   PetscCall(ISDestroy(&pcbddc->nedclocal));
3887   PetscCall(MatDestroy(&pcbddc->discretegradient));
3888   PetscCall(MatDestroy(&pcbddc->user_ChangeOfBasisMatrix));
3889   PetscCall(MatDestroy(&pcbddc->ChangeOfBasisMatrix));
3890   PetscCall(MatDestroy(&pcbddc->switch_static_change));
3891   PetscCall(VecDestroy(&pcbddc->work_change));
3892   PetscCall(MatDestroy(&pcbddc->ConstraintMatrix));
3893   PetscCall(MatDestroy(&pcbddc->divudotp));
3894   PetscCall(ISDestroy(&pcbddc->divudotp_vl2l));
3895   PetscCall(PCBDDCGraphDestroy(&pcbddc->mat_graph));
3896   for (i = 0; i < pcbddc->n_local_subs; i++) PetscCall(ISDestroy(&pcbddc->local_subs[i]));
3897   pcbddc->n_local_subs = 0;
3898   PetscCall(PetscFree(pcbddc->local_subs));
3899   PetscCall(PCBDDCSubSchursDestroy(&pcbddc->sub_schurs));
3900   pcbddc->graphanalyzed        = PETSC_FALSE;
3901   pcbddc->recompute_topography = PETSC_TRUE;
3902   pcbddc->corner_selected      = PETSC_FALSE;
3903   PetscFunctionReturn(PETSC_SUCCESS);
3904 }
3905 
3906 PetscErrorCode PCBDDCResetSolvers(PC pc)
3907 {
3908   PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
3909 
3910   PetscFunctionBegin;
3911   PetscCall(VecDestroy(&pcbddc->coarse_vec));
3912   PetscCall(MatDestroy(&pcbddc->coarse_phi_B));
3913   PetscCall(MatDestroy(&pcbddc->coarse_phi_D));
3914   PetscCall(MatDestroy(&pcbddc->coarse_psi_B));
3915   PetscCall(MatDestroy(&pcbddc->coarse_psi_D));
3916   PetscCall(VecDestroy(&pcbddc->vec1_P));
3917   PetscCall(VecDestroy(&pcbddc->vec1_C));
3918   PetscCall(MatDestroy(&pcbddc->local_auxmat2));
3919   PetscCall(MatDestroy(&pcbddc->local_auxmat1));
3920   PetscCall(VecDestroy(&pcbddc->vec1_R));
3921   PetscCall(VecDestroy(&pcbddc->vec2_R));
3922   PetscCall(ISDestroy(&pcbddc->is_R_local));
3923   PetscCall(VecScatterDestroy(&pcbddc->R_to_B));
3924   PetscCall(VecScatterDestroy(&pcbddc->R_to_D));
3925   PetscCall(VecScatterDestroy(&pcbddc->coarse_loc_to_glob));
3926   PetscCall(KSPReset(pcbddc->ksp_D));
3927   PetscCall(KSPReset(pcbddc->ksp_R));
3928   PetscCall(KSPReset(pcbddc->coarse_ksp));
3929   PetscCall(MatDestroy(&pcbddc->local_mat));
3930   PetscCall(PetscFree(pcbddc->primal_indices_local_idxs));
3931   PetscCall(PetscFree2(pcbddc->local_primal_ref_node, pcbddc->local_primal_ref_mult));
3932   PetscCall(PetscFree(pcbddc->global_primal_indices));
3933   PetscCall(ISDestroy(&pcbddc->coarse_subassembling));
3934   PetscCall(MatDestroy(&pcbddc->benign_change));
3935   PetscCall(VecDestroy(&pcbddc->benign_vec));
3936   PetscCall(PCBDDCBenignShellMat(pc, PETSC_TRUE));
3937   PetscCall(MatDestroy(&pcbddc->benign_B0));
3938   PetscCall(PetscSFDestroy(&pcbddc->benign_sf));
3939   if (pcbddc->benign_zerodiag_subs) {
3940     PetscInt i;
3941     for (i = 0; i < pcbddc->benign_n; i++) PetscCall(ISDestroy(&pcbddc->benign_zerodiag_subs[i]));
3942     PetscCall(PetscFree(pcbddc->benign_zerodiag_subs));
3943   }
3944   PetscCall(PetscFree3(pcbddc->benign_p0_lidx, pcbddc->benign_p0_gidx, pcbddc->benign_p0));
3945   PetscFunctionReturn(PETSC_SUCCESS);
3946 }
3947 
3948 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc)
3949 {
3950   PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
3951   PC_IS   *pcis   = (PC_IS *)pc->data;
3952   VecType  impVecType;
3953   PetscInt n_constraints, n_R, old_size;
3954 
3955   PetscFunctionBegin;
3956   n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices;
3957   n_R           = pcis->n - pcbddc->n_vertices;
3958   PetscCall(VecGetType(pcis->vec1_N, &impVecType));
3959   /* local work vectors (try to avoid unneeded work)*/
3960   /* R nodes */
3961   old_size = -1;
3962   if (pcbddc->vec1_R) PetscCall(VecGetSize(pcbddc->vec1_R, &old_size));
3963   if (n_R != old_size) {
3964     PetscCall(VecDestroy(&pcbddc->vec1_R));
3965     PetscCall(VecDestroy(&pcbddc->vec2_R));
3966     PetscCall(VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N), &pcbddc->vec1_R));
3967     PetscCall(VecSetSizes(pcbddc->vec1_R, PETSC_DECIDE, n_R));
3968     PetscCall(VecSetType(pcbddc->vec1_R, impVecType));
3969     PetscCall(VecDuplicate(pcbddc->vec1_R, &pcbddc->vec2_R));
3970   }
3971   /* local primal dofs */
3972   old_size = -1;
3973   if (pcbddc->vec1_P) PetscCall(VecGetSize(pcbddc->vec1_P, &old_size));
3974   if (pcbddc->local_primal_size != old_size) {
3975     PetscCall(VecDestroy(&pcbddc->vec1_P));
3976     PetscCall(VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N), &pcbddc->vec1_P));
3977     PetscCall(VecSetSizes(pcbddc->vec1_P, PETSC_DECIDE, pcbddc->local_primal_size));
3978     PetscCall(VecSetType(pcbddc->vec1_P, impVecType));
3979   }
3980   /* local explicit constraints */
3981   old_size = -1;
3982   if (pcbddc->vec1_C) PetscCall(VecGetSize(pcbddc->vec1_C, &old_size));
3983   if (n_constraints && n_constraints != old_size) {
3984     PetscCall(VecDestroy(&pcbddc->vec1_C));
3985     PetscCall(VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N), &pcbddc->vec1_C));
3986     PetscCall(VecSetSizes(pcbddc->vec1_C, PETSC_DECIDE, n_constraints));
3987     PetscCall(VecSetType(pcbddc->vec1_C, impVecType));
3988   }
3989   PetscFunctionReturn(PETSC_SUCCESS);
3990 }
3991 
3992 static PetscErrorCode MatSetValuesSubMat(Mat A, Mat S, PetscInt nr, const PetscInt rows[], PetscInt nc, const PetscInt cols[], InsertMode imode)
3993 {
3994   PetscBool          flg;
3995   const PetscScalar *a;
3996 
3997   PetscFunctionBegin;
3998   PetscCall(PetscObjectBaseTypeCompare((PetscObject)S, MATSEQDENSE, &flg));
3999   if (flg) {
4000     PetscCall(MatDenseGetArrayRead(S, &a));
4001     PetscCall(MatSetOption(A, MAT_ROW_ORIENTED, PETSC_FALSE));
4002     PetscCall(MatSetValues(A, nr, rows, nc, cols, a, imode));
4003     PetscCall(MatSetOption(A, MAT_ROW_ORIENTED, PETSC_TRUE));
4004     PetscCall(MatDenseRestoreArrayRead(S, &a));
4005   } else {
4006     const PetscInt *ii, *jj;
4007     PetscInt        n;
4008     PetscInt        buf[8192], *bufc = NULL;
4009     PetscBool       freeb = PETSC_FALSE;
4010     Mat             Sm    = S;
4011 
4012     PetscCall(PetscObjectBaseTypeCompare((PetscObject)S, MATSEQAIJ, &flg));
4013     if (!flg) PetscCall(MatConvert(S, MATSEQAIJ, MAT_INITIAL_MATRIX, &Sm));
4014     else PetscCall(PetscObjectReference((PetscObject)S));
4015     PetscCall(MatSeqAIJGetArrayRead(Sm, &a));
4016     PetscCall(MatGetRowIJ(Sm, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, &jj, &flg));
4017     PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot get IJ structure");
4018     if (nc <= (PetscInt)PETSC_STATIC_ARRAY_LENGTH(buf)) {
4019       bufc = buf;
4020     } else {
4021       PetscCall(PetscMalloc1(nc, &bufc));
4022       freeb = PETSC_TRUE;
4023     }
4024 
4025     for (PetscInt i = 0; i < n; i++) {
4026       const PetscInt nci = ii[i + 1] - ii[i];
4027 
4028       for (PetscInt j = 0; j < nci; j++) bufc[j] = cols[jj[ii[i] + j]];
4029       PetscCall(MatSetValues(A, 1, rows + i, nci, bufc, a + ii[i], imode));
4030     }
4031     PetscCall(MatRestoreRowIJ(Sm, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, &jj, &flg));
4032     PetscCall(MatSeqAIJRestoreArrayRead(Sm, &a));
4033     PetscCall(MatDestroy(&Sm));
4034     if (freeb) PetscCall(PetscFree(bufc));
4035   }
4036   PetscCall(MatAssemblyBegin(A, MAT_FLUSH_ASSEMBLY));
4037   PetscCall(MatAssemblyEnd(A, MAT_FLUSH_ASSEMBLY));
4038   PetscFunctionReturn(PETSC_SUCCESS);
4039 }
4040 
4041 static PetscErrorCode MatCreateSeqAIJFromDenseExpand(Mat D, PetscInt n, const PetscInt j[], Mat *mat)
4042 {
4043   Mat_SeqAIJ        *aij;
4044   PetscInt          *ii, *jj;
4045   PetscScalar       *aa;
4046   PetscInt           nnz = 0, m, nc;
4047   const PetscScalar *a;
4048   const PetscScalar  zero = 0.0;
4049 
4050   PetscFunctionBegin;
4051   PetscCall(MatGetLocalSize(D, &m, &nc));
4052   PetscCall(MatDenseGetArrayRead(D, &a));
4053   PetscCall(PetscMalloc1(m + 1, &ii));
4054   PetscCall(PetscMalloc1(m * nc, &jj));
4055   PetscCall(PetscMalloc1(m * nc, &aa));
4056   ii[0] = 0;
4057   for (PetscInt k = 0; k < m; k++) {
4058     for (PetscInt s = 0; s < nc; s++) {
4059       const PetscInt    c = s + k * nc;
4060       const PetscScalar v = a[k + s * m];
4061 
4062       if (PetscUnlikely(j[c] < 0 || v == zero)) continue;
4063       jj[nnz] = j[c];
4064       aa[nnz] = a[k + s * m];
4065       nnz++;
4066     }
4067     ii[k + 1] = nnz;
4068   }
4069 
4070   PetscCall(MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)D), m, n, ii, jj, aa, mat));
4071   PetscCall(MatDenseRestoreArrayRead(D, &a));
4072 
4073   aij          = (Mat_SeqAIJ *)(*mat)->data;
4074   aij->free_a  = PETSC_TRUE;
4075   aij->free_ij = PETSC_TRUE;
4076   PetscFunctionReturn(PETSC_SUCCESS);
4077 }
4078 
4079 /* adapted from MatInvertVariableBlockDiagonal_SeqAIJ */
4080 static PetscErrorCode MatSeqAIJInvertVariableBlockDiagonalMat(Mat A, PetscInt nblocks, const PetscInt *bsizes, Mat *B)
4081 {
4082   PetscInt        n = A->rmap->n, ncnt = 0, ncnt2 = 0, bsizemax = 0, *v_pivots = NULL;
4083   const PetscBool allowzeropivot    = PETSC_FALSE;
4084   PetscBool       zeropivotdetected = PETSC_FALSE;
4085   const PetscReal shift             = 0.0;
4086   PetscInt        ipvt[5], *ii, *jj, *indi, *indj;
4087   PetscScalar     work[25], *v_work = NULL, *aa, *diag;
4088   PetscLogDouble  flops = 0.0;
4089 
4090   PetscFunctionBegin;
4091   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Not for rectangular matrices");
4092   for (PetscInt i = 0; i < nblocks; i++) {
4093     ncnt += bsizes[i];
4094     ncnt2 += PetscSqr(bsizes[i]);
4095   }
4096   PetscCheck(ncnt == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Total blocksizes %" PetscInt_FMT " doesn't match number matrix rows %" PetscInt_FMT, ncnt, n);
4097   for (PetscInt i = 0; i < nblocks; i++) bsizemax = PetscMax(bsizemax, bsizes[i]);
4098   if (bsizemax > 7) PetscCall(PetscMalloc2(bsizemax, &v_work, bsizemax, &v_pivots));
4099 
4100   PetscCall(PetscMalloc1(n + 1, &ii));
4101   PetscCall(PetscMalloc1(ncnt2, &jj));
4102   PetscCall(PetscCalloc1(ncnt2, &aa));
4103 
4104   ncnt  = 0;
4105   ii[0] = 0;
4106   indi  = ii;
4107   indj  = jj;
4108   diag  = aa;
4109   for (PetscInt i = 0; i < nblocks; i++) {
4110     const PetscInt bs = bsizes[i];
4111 
4112     for (PetscInt k = 0; k < bs; k++) {
4113       indi[k + 1] = indi[k] + bs;
4114       for (PetscInt j = 0; j < bs; j++) indj[k * bs + j] = ncnt + j;
4115     }
4116     PetscCall(MatGetValues(A, bs, indj, bs, indj, diag));
4117     switch (bs) {
4118     case 1:
4119       *diag = 1.0 / (*diag);
4120       break;
4121     case 2:
4122       PetscCall(PetscKernel_A_gets_inverse_A_2(diag, shift, allowzeropivot, &zeropivotdetected));
4123       break;
4124     case 3:
4125       PetscCall(PetscKernel_A_gets_inverse_A_3(diag, shift, allowzeropivot, &zeropivotdetected));
4126       break;
4127     case 4:
4128       PetscCall(PetscKernel_A_gets_inverse_A_4(diag, shift, allowzeropivot, &zeropivotdetected));
4129       break;
4130     case 5:
4131       PetscCall(PetscKernel_A_gets_inverse_A_5(diag, ipvt, work, shift, allowzeropivot, &zeropivotdetected));
4132       break;
4133     case 6:
4134       PetscCall(PetscKernel_A_gets_inverse_A_6(diag, shift, allowzeropivot, &zeropivotdetected));
4135       break;
4136     case 7:
4137       PetscCall(PetscKernel_A_gets_inverse_A_7(diag, shift, allowzeropivot, &zeropivotdetected));
4138       break;
4139     default:
4140       PetscCall(PetscKernel_A_gets_inverse_A(bs, diag, v_pivots, v_work, allowzeropivot, &zeropivotdetected));
4141     }
4142     ncnt += bs;
4143     flops += 2.0 * PetscPowInt(bs, 3) / 3.0;
4144     diag += bs * bs;
4145     indj += bs * bs;
4146     indi += bs;
4147   }
4148   PetscCall(PetscLogFlops(flops));
4149   PetscCall(PetscFree2(v_work, v_pivots));
4150   PetscCall(MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A), n, n, ii, jj, aa, B));
4151   {
4152     Mat_SeqAIJ *aij = (Mat_SeqAIJ *)(*B)->data;
4153     aij->free_a     = PETSC_TRUE;
4154     aij->free_ij    = PETSC_TRUE;
4155   }
4156   PetscFunctionReturn(PETSC_SUCCESS);
4157 }
4158 
4159 static PetscErrorCode MatDenseScatter(Mat A, PetscSF sf, Mat B)
4160 {
4161   const PetscScalar *rarr;
4162   PetscScalar       *larr;
4163   PetscSF            vsf;
4164   PetscInt           n, rld, lld;
4165 
4166   PetscFunctionBegin;
4167   PetscCall(MatGetSize(A, NULL, &n));
4168   PetscCall(MatDenseGetLDA(A, &rld));
4169   PetscCall(MatDenseGetLDA(B, &lld));
4170   PetscCall(MatDenseGetArrayRead(A, &rarr));
4171   PetscCall(MatDenseGetArrayWrite(B, &larr));
4172   PetscCall(PetscSFCreateStridedSF(sf, n, rld, lld, &vsf));
4173   PetscCall(PetscSFBcastBegin(vsf, MPIU_SCALAR, rarr, larr, MPI_REPLACE));
4174   PetscCall(PetscSFBcastEnd(vsf, MPIU_SCALAR, rarr, larr, MPI_REPLACE));
4175   PetscCall(MatDenseRestoreArrayRead(A, &rarr));
4176   PetscCall(MatDenseRestoreArrayWrite(B, &larr));
4177   PetscCall(PetscSFDestroy(&vsf));
4178   PetscFunctionReturn(PETSC_SUCCESS);
4179 }
4180 
4181 PetscErrorCode PCBDDCSetUpCorrection(PC pc, Mat *coarse_submat)
4182 {
4183   PC_IS          *pcis       = (PC_IS *)pc->data;
4184   PC_BDDC        *pcbddc     = (PC_BDDC *)pc->data;
4185   PCBDDCGraph     graph      = pcbddc->mat_graph;
4186   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
4187   /* submatrices of local problem */
4188   Mat A_RV = NULL, A_VR, A_VV, local_auxmat2_R = NULL;
4189   /* submatrices of local coarse problem */
4190   Mat S_CV = NULL, S_VC = NULL, S_CC = NULL;
4191   /* working matrices */
4192   Mat C_CR;
4193 
4194   /* additional working stuff */
4195   PC              pc_R;
4196   IS              is_R, is_V, is_C;
4197   const PetscInt *idx_V, *idx_C;
4198   Mat             F, Brhs = NULL;
4199   Vec             dummy_vec;
4200   PetscBool       isLU, isCHOL, need_benign_correction, sparserhs;
4201   PetscInt       *idx_V_B;
4202   PetscInt        lda_rhs, n_vertices, n_constraints, *p0_lidx_I;
4203   PetscInt        n_eff_vertices, n_eff_constraints;
4204   PetscInt        i, n_R, n_D, n_B;
4205   PetscScalar     one = 1.0, m_one = -1.0;
4206 
4207   /* Multi-element support */
4208   PetscBool multi_element = graph->multi_element;
4209   PetscInt *V_to_eff_V = NULL, *C_to_eff_C = NULL;
4210   PetscInt *B_eff_V_J = NULL, *R_eff_V_J = NULL, *B_eff_C_J = NULL, *R_eff_C_J = NULL;
4211   IS        is_C_perm = NULL;
4212   PetscInt  n_C_bss = 0, *C_bss = NULL;
4213   Mat       coarse_phi_multi;
4214 
4215   PetscFunctionBegin;
4216   PetscCheck(pcbddc->symmetric_primal || !pcbddc->benign_n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Non-symmetric primal basis computation with benign trick not yet implemented");
4217   PetscCall(PetscLogEventBegin(PC_BDDC_CorrectionSetUp[pcbddc->current_level], pc, 0, 0, 0));
4218 
4219   /* Set Non-overlapping dimensions */
4220   n_vertices    = pcbddc->n_vertices;
4221   n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices;
4222   n_B           = pcis->n_B;
4223   n_D           = pcis->n - n_B;
4224   n_R           = pcis->n - n_vertices;
4225 
4226   /* vertices in boundary numbering */
4227   PetscCall(PetscMalloc1(n_vertices, &idx_V_B));
4228   PetscCall(ISGlobalToLocalMappingApply(pcis->BtoNmap, IS_GTOLM_DROP, n_vertices, pcbddc->local_primal_ref_node, &i, idx_V_B));
4229   PetscCheck(i == n_vertices, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error in boundary numbering for BDDC vertices! %" PetscInt_FMT " != %" PetscInt_FMT, n_vertices, i);
4230 
4231   /* these two cases still need to be optimized */
4232   if (pcbddc->benign_saddle_point || !pcbddc->symmetric_primal) multi_element = PETSC_FALSE;
4233 
4234   /* Subdomain contribution (Non-overlapping) to coarse matrix  */
4235   if (multi_element) {
4236     PetscCheck(!pcbddc->benign_n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not yet implemented");
4237 
4238     PetscCall(MatCreate(PETSC_COMM_SELF, coarse_submat));
4239     PetscCall(MatSetSizes(*coarse_submat, pcbddc->local_primal_size, pcbddc->local_primal_size, pcbddc->local_primal_size, pcbddc->local_primal_size));
4240     PetscCall(MatSetType(*coarse_submat, MATSEQAIJ));
4241     PetscCall(MatSetOption(*coarse_submat, MAT_IGNORE_ZERO_ENTRIES, PETSC_TRUE));
4242     PetscCall(MatSetOption(*coarse_submat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
4243 
4244     /* group vertices and constraints by subdomain id */
4245     const PetscInt *vidxs = pcbddc->primal_indices_local_idxs;
4246     const PetscInt *cidxs = pcbddc->primal_indices_local_idxs + n_vertices;
4247     PetscInt       *count_eff, *V_eff_to_V, *C_eff_to_C, *nnz;
4248     PetscInt        n_el = PetscMax(graph->n_local_subs, 1);
4249 
4250     PetscCall(PetscCalloc1(2 * n_el, &count_eff));
4251     PetscCall(PetscMalloc1(n_vertices, &V_to_eff_V));
4252     PetscCall(PetscMalloc1(n_constraints, &C_to_eff_C));
4253     for (PetscInt i = 0; i < n_vertices; i++) {
4254       PetscInt s = 2 * graph->nodes[vidxs[i]].local_sub;
4255 
4256       V_to_eff_V[i] = count_eff[s];
4257       count_eff[s] += 1;
4258     }
4259     for (PetscInt i = 0; i < n_constraints; i++) {
4260       PetscInt s = 2 * graph->nodes[cidxs[i]].local_sub + 1;
4261 
4262       C_to_eff_C[i] = count_eff[s];
4263       count_eff[s] += 1;
4264     }
4265 
4266     /* preallocation */
4267     PetscCall(PetscMalloc1(n_vertices + n_constraints, &nnz));
4268     for (PetscInt i = 0; i < n_vertices; i++) {
4269       PetscInt s = 2 * graph->nodes[vidxs[i]].local_sub;
4270 
4271       nnz[i] = count_eff[s] + count_eff[s + 1];
4272     }
4273     for (PetscInt i = 0; i < n_constraints; i++) {
4274       PetscInt s = 2 * graph->nodes[cidxs[i]].local_sub;
4275 
4276       nnz[i + n_vertices] = count_eff[s] + count_eff[s + 1];
4277     }
4278     PetscCall(MatSeqAIJSetPreallocation(*coarse_submat, 0, nnz));
4279     PetscCall(PetscFree(nnz));
4280 
4281     n_eff_vertices    = 0;
4282     n_eff_constraints = 0;
4283     for (PetscInt i = 0; i < n_el; i++) {
4284       n_eff_vertices       = PetscMax(n_eff_vertices, count_eff[2 * i]);
4285       n_eff_constraints    = PetscMax(n_eff_constraints, count_eff[2 * i + 1]);
4286       count_eff[2 * i]     = 0;
4287       count_eff[2 * i + 1] = 0;
4288     }
4289 
4290     const PetscInt *idx;
4291     PetscCall(PetscMalloc2(n_el * n_eff_vertices, &V_eff_to_V, n_el * n_eff_constraints, &C_eff_to_C));
4292 
4293     for (PetscInt i = 0; i < n_vertices; i++) {
4294       const PetscInt e = graph->nodes[vidxs[i]].local_sub;
4295       const PetscInt s = 2 * e;
4296 
4297       V_eff_to_V[e * n_eff_vertices + count_eff[s]] = i;
4298       count_eff[s] += 1;
4299     }
4300     for (PetscInt i = 0; i < n_constraints; i++) {
4301       const PetscInt e = graph->nodes[cidxs[i]].local_sub;
4302       const PetscInt s = 2 * e + 1;
4303 
4304       C_eff_to_C[e * n_eff_constraints + count_eff[s]] = i;
4305       count_eff[s] += 1;
4306     }
4307 
4308     PetscCall(PetscMalloc1(n_R * n_eff_vertices, &R_eff_V_J));
4309     PetscCall(PetscMalloc1(n_R * n_eff_constraints, &R_eff_C_J));
4310     PetscCall(PetscMalloc1(n_B * n_eff_vertices, &B_eff_V_J));
4311     PetscCall(PetscMalloc1(n_B * n_eff_constraints, &B_eff_C_J));
4312     for (PetscInt i = 0; i < n_R * n_eff_vertices; i++) R_eff_V_J[i] = -1;
4313     for (PetscInt i = 0; i < n_R * n_eff_constraints; i++) R_eff_C_J[i] = -1;
4314     for (PetscInt i = 0; i < n_B * n_eff_vertices; i++) B_eff_V_J[i] = -1;
4315     for (PetscInt i = 0; i < n_B * n_eff_constraints; i++) B_eff_C_J[i] = -1;
4316 
4317     PetscCall(ISGetIndices(pcbddc->is_R_local, &idx));
4318     for (PetscInt i = 0; i < n_R; i++) {
4319       const PetscInt e = graph->nodes[idx[i]].local_sub;
4320       const PetscInt s = 2 * e;
4321       PetscInt       j;
4322 
4323       for (j = 0; j < count_eff[s]; j++) R_eff_V_J[i * n_eff_vertices + j] = V_eff_to_V[e * n_eff_vertices + j];
4324       for (j = 0; j < count_eff[s + 1]; j++) R_eff_C_J[i * n_eff_constraints + j] = C_eff_to_C[e * n_eff_constraints + j];
4325     }
4326     PetscCall(ISRestoreIndices(pcbddc->is_R_local, &idx));
4327     PetscCall(ISGetIndices(pcis->is_B_local, &idx));
4328     for (PetscInt i = 0; i < n_B; i++) {
4329       const PetscInt e = graph->nodes[idx[i]].local_sub;
4330       const PetscInt s = 2 * e;
4331       PetscInt       j;
4332 
4333       for (j = 0; j < count_eff[s]; j++) B_eff_V_J[i * n_eff_vertices + j] = V_eff_to_V[e * n_eff_vertices + j];
4334       for (j = 0; j < count_eff[s + 1]; j++) B_eff_C_J[i * n_eff_constraints + j] = C_eff_to_C[e * n_eff_constraints + j];
4335     }
4336     PetscCall(ISRestoreIndices(pcis->is_B_local, &idx));
4337 
4338     /* permutation and blocksizes for block invert of S_CC */
4339     PetscInt *idxp;
4340 
4341     PetscCall(PetscMalloc1(n_constraints, &idxp));
4342     PetscCall(PetscMalloc1(n_el, &C_bss));
4343     n_C_bss = 0;
4344     for (PetscInt e = 0, cnt = 0; e < n_el; e++) {
4345       const PetscInt nc = count_eff[2 * e + 1];
4346 
4347       if (nc) C_bss[n_C_bss++] = nc;
4348       for (PetscInt c = 0; c < nc; c++) { idxp[cnt + c] = C_eff_to_C[e * n_eff_constraints + c]; }
4349       cnt += nc;
4350     }
4351 
4352     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n_constraints, idxp, PETSC_OWN_POINTER, &is_C_perm));
4353 
4354     PetscCall(PetscFree2(V_eff_to_V, C_eff_to_C));
4355     PetscCall(PetscFree(count_eff));
4356   } else {
4357     PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, pcbddc->local_primal_size, pcbddc->local_primal_size, NULL, coarse_submat));
4358     n_eff_constraints = n_constraints;
4359     n_eff_vertices    = n_vertices;
4360   }
4361 
4362   /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */
4363   PetscCall(KSPGetPC(pcbddc->ksp_R, &pc_R));
4364   PetscCall(PCSetUp(pc_R));
4365   PetscCall(PetscObjectTypeCompare((PetscObject)pc_R, PCLU, &isLU));
4366   PetscCall(PetscObjectTypeCompare((PetscObject)pc_R, PCCHOLESKY, &isCHOL));
4367   lda_rhs                = n_R;
4368   need_benign_correction = PETSC_FALSE;
4369   if (isLU || isCHOL) {
4370     PetscCall(PCFactorGetMatrix(pc_R, &F));
4371   } else if (sub_schurs && sub_schurs->reuse_solver) {
4372     PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
4373     MatFactorType      type;
4374 
4375     F = reuse_solver->F;
4376     PetscCall(MatGetFactorType(F, &type));
4377     if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE;
4378     if (type == MAT_FACTOR_LU) isLU = PETSC_TRUE;
4379     PetscCall(MatGetSize(F, &lda_rhs, NULL));
4380     need_benign_correction = (PetscBool)(!!reuse_solver->benign_n);
4381   } else F = NULL;
4382 
4383   /* determine if we can use a sparse right-hand side */
4384   sparserhs = PETSC_FALSE;
4385   if (F && !multi_element) {
4386     MatSolverType solver;
4387 
4388     PetscCall(MatFactorGetSolverType(F, &solver));
4389     PetscCall(PetscStrcmp(solver, MATSOLVERMUMPS, &sparserhs));
4390   }
4391 
4392   /* create dummy vector to modify rhs and sol of MatMatSolve (work array will never be used) */
4393   dummy_vec = NULL;
4394   if (need_benign_correction && lda_rhs != n_R && F) {
4395     PetscCall(VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N), &dummy_vec));
4396     PetscCall(VecSetSizes(dummy_vec, lda_rhs, PETSC_DECIDE));
4397     PetscCall(VecSetType(dummy_vec, ((PetscObject)pcis->vec1_N)->type_name));
4398   }
4399 
4400   PetscCall(MatDestroy(&pcbddc->local_auxmat1));
4401   PetscCall(MatDestroy(&pcbddc->local_auxmat2));
4402 
4403   PetscCall(ISCreateStride(PETSC_COMM_SELF, n_R, 0, 1, &is_R));
4404   PetscCall(ISCreateStride(PETSC_COMM_SELF, n_vertices, 0, 1, &is_V));
4405   PetscCall(ISCreateStride(PETSC_COMM_SELF, n_constraints, n_vertices, 1, &is_C));
4406   PetscCall(ISGetIndices(is_V, &idx_V));
4407   PetscCall(ISGetIndices(is_C, &idx_C));
4408 
4409   /* Precompute stuffs needed for preprocessing and application of BDDC*/
4410   if (n_constraints) {
4411     Mat C_B;
4412 
4413     /* Extract constraints on R nodes: C_{CR}  */
4414     PetscCall(MatCreateSubMatrix(pcbddc->ConstraintMatrix, is_C, pcbddc->is_R_local, MAT_INITIAL_MATRIX, &C_CR));
4415     PetscCall(MatCreateSubMatrix(pcbddc->ConstraintMatrix, is_C, pcis->is_B_local, MAT_INITIAL_MATRIX, &C_B));
4416 
4417     /* Assemble         local_auxmat2_R =        (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */
4418     /* Assemble pcbddc->local_auxmat2   = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */
4419     if (!sparserhs) {
4420       PetscScalar *marr;
4421 
4422       PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, lda_rhs, n_eff_constraints, NULL, &Brhs));
4423       PetscCall(MatDenseGetArrayWrite(Brhs, &marr));
4424       for (i = 0; i < n_constraints; i++) {
4425         const PetscScalar *row_cmat_values;
4426         const PetscInt    *row_cmat_indices;
4427         PetscInt           size_of_constraint, j, col = C_to_eff_C ? C_to_eff_C[i] : i;
4428 
4429         PetscCall(MatGetRow(C_CR, i, &size_of_constraint, &row_cmat_indices, &row_cmat_values));
4430         for (j = 0; j < size_of_constraint; j++) marr[row_cmat_indices[j] + col * lda_rhs] = -row_cmat_values[j];
4431         PetscCall(MatRestoreRow(C_CR, i, &size_of_constraint, &row_cmat_indices, &row_cmat_values));
4432       }
4433       PetscCall(MatDenseRestoreArrayWrite(Brhs, &marr));
4434     } else {
4435       Mat tC_CR;
4436 
4437       PetscCall(MatScale(C_CR, -1.0));
4438       if (lda_rhs != n_R) {
4439         PetscScalar *aa;
4440         PetscInt     r, *ii, *jj;
4441         PetscBool    done;
4442 
4443         PetscCall(MatGetRowIJ(C_CR, 0, PETSC_FALSE, PETSC_FALSE, &r, (const PetscInt **)&ii, (const PetscInt **)&jj, &done));
4444         PetscCheck(done, PETSC_COMM_SELF, PETSC_ERR_PLIB, "GetRowIJ failed");
4445         PetscCall(MatSeqAIJGetArray(C_CR, &aa));
4446         PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, n_constraints, lda_rhs, ii, jj, aa, &tC_CR));
4447         PetscCall(MatRestoreRowIJ(C_CR, 0, PETSC_FALSE, PETSC_FALSE, &r, (const PetscInt **)&ii, (const PetscInt **)&jj, &done));
4448         PetscCheck(done, PETSC_COMM_SELF, PETSC_ERR_PLIB, "RestoreRowIJ failed");
4449       } else {
4450         PetscCall(PetscObjectReference((PetscObject)C_CR));
4451         tC_CR = C_CR;
4452       }
4453       PetscCall(MatCreateTranspose(tC_CR, &Brhs));
4454       PetscCall(MatDestroy(&tC_CR));
4455     }
4456     PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, lda_rhs, n_eff_constraints, NULL, &local_auxmat2_R));
4457     if (F) {
4458       if (need_benign_correction) {
4459         PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
4460 
4461         /* rhs is already zero on interior dofs, no need to change the rhs */
4462         PetscCall(PetscArrayzero(reuse_solver->benign_save_vals, pcbddc->benign_n));
4463       }
4464       PetscCall(MatMatSolve(F, Brhs, local_auxmat2_R));
4465       if (need_benign_correction) {
4466         PetscScalar       *marr;
4467         PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
4468 
4469         /* XXX multi_element? */
4470         PetscCall(MatDenseGetArray(local_auxmat2_R, &marr));
4471         if (lda_rhs != n_R) {
4472           for (i = 0; i < n_eff_constraints; i++) {
4473             PetscCall(VecPlaceArray(dummy_vec, marr + i * lda_rhs));
4474             PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver, dummy_vec, NULL, PETSC_TRUE, PETSC_TRUE));
4475             PetscCall(VecResetArray(dummy_vec));
4476           }
4477         } else {
4478           for (i = 0; i < n_eff_constraints; i++) {
4479             PetscCall(VecPlaceArray(pcbddc->vec1_R, marr + i * lda_rhs));
4480             PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver, pcbddc->vec1_R, NULL, PETSC_TRUE, PETSC_TRUE));
4481             PetscCall(VecResetArray(pcbddc->vec1_R));
4482           }
4483         }
4484         PetscCall(MatDenseRestoreArray(local_auxmat2_R, &marr));
4485       }
4486     } else {
4487       const PetscScalar *barr;
4488       PetscScalar       *marr;
4489 
4490       PetscCall(MatDenseGetArrayRead(Brhs, &barr));
4491       PetscCall(MatDenseGetArray(local_auxmat2_R, &marr));
4492       for (i = 0; i < n_eff_constraints; i++) {
4493         PetscCall(VecPlaceArray(pcbddc->vec1_R, barr + i * lda_rhs));
4494         PetscCall(VecPlaceArray(pcbddc->vec2_R, marr + i * lda_rhs));
4495         PetscCall(KSPSolve(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec2_R));
4496         PetscCall(KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec2_R));
4497         PetscCall(VecResetArray(pcbddc->vec1_R));
4498         PetscCall(VecResetArray(pcbddc->vec2_R));
4499       }
4500       PetscCall(MatDenseRestoreArrayRead(Brhs, &barr));
4501       PetscCall(MatDenseRestoreArray(local_auxmat2_R, &marr));
4502     }
4503     if (sparserhs) PetscCall(MatScale(C_CR, -1.0));
4504     PetscCall(MatDestroy(&Brhs));
4505     /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR})^{-1}  */
4506     if (!pcbddc->switch_static) {
4507       PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_B, n_eff_constraints, NULL, &pcbddc->local_auxmat2));
4508       for (i = 0; i < n_eff_constraints; i++) {
4509         Vec r, b;
4510         PetscCall(MatDenseGetColumnVecRead(local_auxmat2_R, i, &r));
4511         PetscCall(MatDenseGetColumnVec(pcbddc->local_auxmat2, i, &b));
4512         PetscCall(VecScatterBegin(pcbddc->R_to_B, r, b, INSERT_VALUES, SCATTER_FORWARD));
4513         PetscCall(VecScatterEnd(pcbddc->R_to_B, r, b, INSERT_VALUES, SCATTER_FORWARD));
4514         PetscCall(MatDenseRestoreColumnVec(pcbddc->local_auxmat2, i, &b));
4515         PetscCall(MatDenseRestoreColumnVecRead(local_auxmat2_R, i, &r));
4516       }
4517       if (multi_element) {
4518         Mat T;
4519 
4520         PetscCall(MatCreateSeqAIJFromDenseExpand(local_auxmat2_R, n_constraints, R_eff_C_J, &T));
4521         PetscCall(MatDestroy(&local_auxmat2_R));
4522         local_auxmat2_R = T;
4523         PetscCall(MatCreateSeqAIJFromDenseExpand(pcbddc->local_auxmat2, n_constraints, B_eff_C_J, &T));
4524         PetscCall(MatDestroy(&pcbddc->local_auxmat2));
4525         pcbddc->local_auxmat2 = T;
4526       }
4527       PetscCall(MatMatMult(C_B, pcbddc->local_auxmat2, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &S_CC));
4528     } else {
4529       if (multi_element) {
4530         Mat T;
4531 
4532         PetscCall(MatCreateSeqAIJFromDenseExpand(local_auxmat2_R, n_constraints, R_eff_C_J, &T));
4533         PetscCall(MatDestroy(&local_auxmat2_R));
4534         local_auxmat2_R = T;
4535       }
4536       if (lda_rhs != n_R) {
4537         PetscCall(MatCreateSubMatrix(local_auxmat2_R, is_R, NULL, MAT_INITIAL_MATRIX, &pcbddc->local_auxmat2));
4538       } else {
4539         PetscCall(PetscObjectReference((PetscObject)local_auxmat2_R));
4540         pcbddc->local_auxmat2 = local_auxmat2_R;
4541       }
4542       PetscCall(MatMatMult(C_CR, pcbddc->local_auxmat2, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &S_CC));
4543     }
4544     PetscCall(MatScale(S_CC, m_one));
4545     if (multi_element) {
4546       Mat T, T2;
4547       IS  isp, ispi;
4548 
4549       isp = is_C_perm;
4550 
4551       PetscCall(ISInvertPermutation(isp, PETSC_DECIDE, &ispi));
4552       PetscCall(MatPermute(S_CC, isp, isp, &T));
4553       PetscCall(MatSeqAIJInvertVariableBlockDiagonalMat(T, n_C_bss, C_bss, &T2));
4554       PetscCall(MatDestroy(&T));
4555       PetscCall(MatDestroy(&S_CC));
4556       PetscCall(MatPermute(T2, ispi, ispi, &S_CC));
4557       PetscCall(MatDestroy(&T2));
4558       PetscCall(ISDestroy(&ispi));
4559     } else {
4560       if (isCHOL) {
4561         PetscCall(MatCholeskyFactor(S_CC, NULL, NULL));
4562       } else {
4563         PetscCall(MatLUFactor(S_CC, NULL, NULL, NULL));
4564       }
4565       PetscCall(MatSeqDenseInvertFactors_Private(S_CC));
4566     }
4567     /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */
4568     PetscCall(MatMatMult(S_CC, C_B, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &pcbddc->local_auxmat1));
4569     PetscCall(MatDestroy(&C_B));
4570     PetscCall(MatSetValuesSubMat(*coarse_submat, S_CC, n_constraints, idx_C, n_constraints, idx_C, INSERT_VALUES));
4571   }
4572 
4573   /* Get submatrices from subdomain matrix */
4574   if (n_vertices) {
4575 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
4576     PetscBool oldpin;
4577 #endif
4578     IS is_aux;
4579 
4580     if (sub_schurs && sub_schurs->reuse_solver) { /* is_R_local is not sorted, ISComplement doesn't like it */
4581       IS tis;
4582 
4583       PetscCall(ISDuplicate(pcbddc->is_R_local, &tis));
4584       PetscCall(ISSort(tis));
4585       PetscCall(ISComplement(tis, 0, pcis->n, &is_aux));
4586       PetscCall(ISDestroy(&tis));
4587     } else {
4588       PetscCall(ISComplement(pcbddc->is_R_local, 0, pcis->n, &is_aux));
4589     }
4590 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
4591     oldpin = pcbddc->local_mat->boundtocpu;
4592 #endif
4593     PetscCall(MatBindToCPU(pcbddc->local_mat, PETSC_TRUE));
4594     PetscCall(MatCreateSubMatrix(pcbddc->local_mat, pcbddc->is_R_local, is_aux, MAT_INITIAL_MATRIX, &A_RV));
4595     PetscCall(MatCreateSubMatrix(pcbddc->local_mat, is_aux, pcbddc->is_R_local, MAT_INITIAL_MATRIX, &A_VR));
4596     /* TODO REMOVE: MatMatMult(A_VR,A_RRmA_RV) below may raise an error */
4597     PetscCall(MatConvert(A_VR, MATSEQAIJ, MAT_INPLACE_MATRIX, &A_VR));
4598     PetscCall(MatCreateSubMatrix(pcbddc->local_mat, is_aux, is_aux, MAT_INITIAL_MATRIX, &A_VV));
4599 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
4600     PetscCall(MatBindToCPU(pcbddc->local_mat, oldpin));
4601 #endif
4602     PetscCall(ISDestroy(&is_aux));
4603   }
4604   PetscCall(ISDestroy(&is_C_perm));
4605   PetscCall(PetscFree(C_bss));
4606 
4607   p0_lidx_I = NULL;
4608   if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) {
4609     const PetscInt *idxs;
4610 
4611     PetscCall(ISGetIndices(pcis->is_I_local, &idxs));
4612     PetscCall(PetscMalloc1(pcbddc->benign_n, &p0_lidx_I));
4613     for (i = 0; i < pcbddc->benign_n; i++) PetscCall(PetscFindInt(pcbddc->benign_p0_lidx[i], pcis->n - pcis->n_B, idxs, &p0_lidx_I[i]));
4614     PetscCall(ISRestoreIndices(pcis->is_I_local, &idxs));
4615   }
4616 
4617   /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */
4618 
4619   /* Matrices of coarse basis functions (local) */
4620   PetscCall(MatDestroy(&pcbddc->coarse_phi_B));
4621   PetscCall(MatDestroy(&pcbddc->coarse_psi_B));
4622   PetscCall(MatDestroy(&pcbddc->coarse_phi_D));
4623   PetscCall(MatDestroy(&pcbddc->coarse_psi_D));
4624   if (!multi_element) {
4625     PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_B, pcbddc->local_primal_size, NULL, &pcbddc->coarse_phi_B));
4626     if (pcbddc->switch_static || pcbddc->dbg_flag) PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_D, pcbddc->local_primal_size, NULL, &pcbddc->coarse_phi_D));
4627     coarse_phi_multi = NULL;
4628   } else { /* Create temporary NEST matrix to hold coarse basis functions blocks */
4629     IS is_rows[2] = {pcbddc->is_R_local, NULL};
4630     IS is_cols[2] = {is_V, is_C};
4631 
4632     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n_vertices, pcbddc->local_primal_ref_node, PETSC_USE_POINTER, &is_rows[1]));
4633     PetscCall(MatCreateNest(PETSC_COMM_SELF, 2, is_rows, 2, is_cols, NULL, &coarse_phi_multi));
4634     PetscCall(ISDestroy(&is_rows[1]));
4635   }
4636 
4637   /* vertices */
4638   if (n_vertices) {
4639     PetscBool restoreavr = PETSC_FALSE;
4640     Mat       A_RRmA_RV  = NULL;
4641 
4642     PetscCall(MatSetValuesSubMat(*coarse_submat, A_VV, n_vertices, idx_V, n_vertices, idx_V, ADD_VALUES));
4643     PetscCall(MatDestroy(&A_VV));
4644 
4645     if (n_R) {
4646       Mat A_RV_bcorr = NULL, S_VV;
4647 
4648       PetscCall(MatScale(A_RV, m_one));
4649       if (need_benign_correction) {
4650         ISLocalToGlobalMapping RtoN;
4651         IS                     is_p0;
4652         PetscInt              *idxs_p0, n;
4653 
4654         PetscCall(PetscMalloc1(pcbddc->benign_n, &idxs_p0));
4655         PetscCall(ISLocalToGlobalMappingCreateIS(pcbddc->is_R_local, &RtoN));
4656         PetscCall(ISGlobalToLocalMappingApply(RtoN, IS_GTOLM_DROP, pcbddc->benign_n, pcbddc->benign_p0_lidx, &n, idxs_p0));
4657         PetscCheck(n == pcbddc->benign_n, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error in R numbering for benign p0! %" PetscInt_FMT " != %" PetscInt_FMT, n, pcbddc->benign_n);
4658         PetscCall(ISLocalToGlobalMappingDestroy(&RtoN));
4659         PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idxs_p0, PETSC_OWN_POINTER, &is_p0));
4660         PetscCall(MatCreateSubMatrix(A_RV, is_p0, NULL, MAT_INITIAL_MATRIX, &A_RV_bcorr));
4661         PetscCall(ISDestroy(&is_p0));
4662       }
4663 
4664       PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, lda_rhs, n_eff_vertices, NULL, &A_RRmA_RV));
4665       if (!sparserhs || need_benign_correction) {
4666         if (lda_rhs == n_R && !multi_element) {
4667           PetscCall(MatConvert(A_RV, MATDENSE, MAT_INPLACE_MATRIX, &A_RV));
4668         } else {
4669           Mat             T;
4670           PetscScalar    *av, *array;
4671           const PetscInt *xadj, *adjncy;
4672           PetscInt        n;
4673           PetscBool       flg_row;
4674 
4675           PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, lda_rhs, n_eff_vertices, NULL, &T));
4676           PetscCall(MatDenseGetArrayWrite(T, &array));
4677           PetscCall(MatConvert(A_RV, MATSEQAIJ, MAT_INPLACE_MATRIX, &A_RV));
4678           PetscCall(MatGetRowIJ(A_RV, 0, PETSC_FALSE, PETSC_FALSE, &n, &xadj, &adjncy, &flg_row));
4679           PetscCall(MatSeqAIJGetArray(A_RV, &av));
4680           for (i = 0; i < n; i++) {
4681             PetscInt j;
4682             for (j = xadj[i]; j < xadj[i + 1]; j++) array[lda_rhs * (V_to_eff_V ? V_to_eff_V[adjncy[j]] : adjncy[j]) + i] = av[j];
4683           }
4684           PetscCall(MatRestoreRowIJ(A_RV, 0, PETSC_FALSE, PETSC_FALSE, &n, &xadj, &adjncy, &flg_row));
4685           PetscCall(MatDenseRestoreArrayWrite(T, &array));
4686           PetscCall(MatDestroy(&A_RV));
4687           A_RV = T;
4688         }
4689         if (need_benign_correction) {
4690           PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
4691           PetscScalar       *marr;
4692 
4693           /* XXX multi_element */
4694           PetscCall(MatDenseGetArray(A_RV, &marr));
4695           /* need \Phi^T A_RV = (I+L)A_RV, L given by
4696 
4697                  | 0 0  0 | (V)
4698              L = | 0 0 -1 | (P-p0)
4699                  | 0 0 -1 | (p0)
4700 
4701           */
4702           for (i = 0; i < reuse_solver->benign_n; i++) {
4703             const PetscScalar *vals;
4704             const PetscInt    *idxs, *idxs_zero;
4705             PetscInt           n, j, nz;
4706 
4707             PetscCall(ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i], &nz));
4708             PetscCall(ISGetIndices(reuse_solver->benign_zerodiag_subs[i], &idxs_zero));
4709             PetscCall(MatGetRow(A_RV_bcorr, i, &n, &idxs, &vals));
4710             for (j = 0; j < n; j++) {
4711               PetscScalar val = vals[j];
4712               PetscInt    k, col = idxs[j];
4713               for (k = 0; k < nz; k++) marr[idxs_zero[k] + lda_rhs * col] -= val;
4714             }
4715             PetscCall(MatRestoreRow(A_RV_bcorr, i, &n, &idxs, &vals));
4716             PetscCall(ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i], &idxs_zero));
4717           }
4718           PetscCall(MatDenseRestoreArray(A_RV, &marr));
4719         }
4720         PetscCall(PetscObjectReference((PetscObject)A_RV));
4721         Brhs = A_RV;
4722       } else {
4723         Mat tA_RVT, A_RVT;
4724 
4725         if (!pcbddc->symmetric_primal) {
4726           /* A_RV already scaled by -1 */
4727           PetscCall(MatTranspose(A_RV, MAT_INITIAL_MATRIX, &A_RVT));
4728         } else {
4729           restoreavr = PETSC_TRUE;
4730           PetscCall(MatScale(A_VR, -1.0));
4731           PetscCall(PetscObjectReference((PetscObject)A_VR));
4732           A_RVT = A_VR;
4733         }
4734         if (lda_rhs != n_R) {
4735           PetscScalar *aa;
4736           PetscInt     r, *ii, *jj;
4737           PetscBool    done;
4738 
4739           PetscCall(MatGetRowIJ(A_RVT, 0, PETSC_FALSE, PETSC_FALSE, &r, (const PetscInt **)&ii, (const PetscInt **)&jj, &done));
4740           PetscCheck(done, PETSC_COMM_SELF, PETSC_ERR_PLIB, "GetRowIJ failed");
4741           PetscCall(MatSeqAIJGetArray(A_RVT, &aa));
4742           PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, n_vertices, lda_rhs, ii, jj, aa, &tA_RVT));
4743           PetscCall(MatRestoreRowIJ(A_RVT, 0, PETSC_FALSE, PETSC_FALSE, &r, (const PetscInt **)&ii, (const PetscInt **)&jj, &done));
4744           PetscCheck(done, PETSC_COMM_SELF, PETSC_ERR_PLIB, "RestoreRowIJ failed");
4745         } else {
4746           PetscCall(PetscObjectReference((PetscObject)A_RVT));
4747           tA_RVT = A_RVT;
4748         }
4749         PetscCall(MatCreateTranspose(tA_RVT, &Brhs));
4750         PetscCall(MatDestroy(&tA_RVT));
4751         PetscCall(MatDestroy(&A_RVT));
4752       }
4753       if (F) {
4754         /* need to correct the rhs */
4755         if (need_benign_correction) {
4756           PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
4757           PetscScalar       *marr;
4758 
4759           PetscCall(MatDenseGetArray(Brhs, &marr));
4760           if (lda_rhs != n_R) {
4761             for (i = 0; i < n_eff_vertices; i++) {
4762               PetscCall(VecPlaceArray(dummy_vec, marr + i * lda_rhs));
4763               PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver, dummy_vec, NULL, PETSC_FALSE, PETSC_TRUE));
4764               PetscCall(VecResetArray(dummy_vec));
4765             }
4766           } else {
4767             for (i = 0; i < n_eff_vertices; i++) {
4768               PetscCall(VecPlaceArray(pcbddc->vec1_R, marr + i * lda_rhs));
4769               PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver, pcbddc->vec1_R, NULL, PETSC_FALSE, PETSC_TRUE));
4770               PetscCall(VecResetArray(pcbddc->vec1_R));
4771             }
4772           }
4773           PetscCall(MatDenseRestoreArray(Brhs, &marr));
4774         }
4775         PetscCall(MatMatSolve(F, Brhs, A_RRmA_RV));
4776         if (restoreavr) PetscCall(MatScale(A_VR, -1.0));
4777         /* need to correct the solution */
4778         if (need_benign_correction) {
4779           PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
4780           PetscScalar       *marr;
4781 
4782           PetscCall(MatDenseGetArray(A_RRmA_RV, &marr));
4783           if (lda_rhs != n_R) {
4784             for (i = 0; i < n_eff_vertices; i++) {
4785               PetscCall(VecPlaceArray(dummy_vec, marr + i * lda_rhs));
4786               PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver, dummy_vec, NULL, PETSC_TRUE, PETSC_TRUE));
4787               PetscCall(VecResetArray(dummy_vec));
4788             }
4789           } else {
4790             for (i = 0; i < n_eff_vertices; i++) {
4791               PetscCall(VecPlaceArray(pcbddc->vec1_R, marr + i * lda_rhs));
4792               PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver, pcbddc->vec1_R, NULL, PETSC_TRUE, PETSC_TRUE));
4793               PetscCall(VecResetArray(pcbddc->vec1_R));
4794             }
4795           }
4796           PetscCall(MatDenseRestoreArray(A_RRmA_RV, &marr));
4797         }
4798       } else {
4799         const PetscScalar *barr;
4800         PetscScalar       *marr;
4801 
4802         PetscCall(MatDenseGetArrayRead(Brhs, &barr));
4803         PetscCall(MatDenseGetArray(A_RRmA_RV, &marr));
4804         for (i = 0; i < n_eff_vertices; i++) {
4805           PetscCall(VecPlaceArray(pcbddc->vec1_R, barr + i * lda_rhs));
4806           PetscCall(VecPlaceArray(pcbddc->vec2_R, marr + i * lda_rhs));
4807           PetscCall(KSPSolve(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec2_R));
4808           PetscCall(KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec2_R));
4809           PetscCall(VecResetArray(pcbddc->vec1_R));
4810           PetscCall(VecResetArray(pcbddc->vec2_R));
4811         }
4812         PetscCall(MatDenseRestoreArrayRead(Brhs, &barr));
4813         PetscCall(MatDenseRestoreArray(A_RRmA_RV, &marr));
4814       }
4815       PetscCall(MatDestroy(&A_RV));
4816       PetscCall(MatDestroy(&Brhs));
4817       /* S_VV and S_CV */
4818       if (n_constraints) {
4819         Mat B;
4820 
4821         PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_B, n_eff_vertices, NULL, &B));
4822         PetscCall(MatDenseScatter(A_RRmA_RV, pcbddc->R_to_B, B));
4823 
4824         /* S_CV = pcbddc->local_auxmat1 * B */
4825         if (multi_element) {
4826           Mat T;
4827 
4828           PetscCall(MatCreateSeqAIJFromDenseExpand(B, n_vertices, B_eff_V_J, &T));
4829           PetscCall(MatDestroy(&B));
4830           B = T;
4831         }
4832         PetscCall(MatProductCreate(pcbddc->local_auxmat1, B, NULL, &S_CV));
4833         PetscCall(MatProductSetType(S_CV, MATPRODUCT_AB));
4834         PetscCall(MatProductSetFromOptions(S_CV));
4835         PetscCall(MatProductSymbolic(S_CV));
4836         PetscCall(MatProductNumeric(S_CV));
4837         PetscCall(MatProductClear(S_CV));
4838         PetscCall(MatDestroy(&B));
4839 
4840         /* B = local_auxmat2_R * S_CV */
4841         PetscCall(MatProductCreate(local_auxmat2_R, S_CV, NULL, &B));
4842         PetscCall(MatProductSetType(B, MATPRODUCT_AB));
4843         PetscCall(MatProductSetFromOptions(B));
4844         PetscCall(MatProductSymbolic(B));
4845         PetscCall(MatProductNumeric(B));
4846 
4847         PetscCall(MatScale(S_CV, m_one));
4848         PetscCall(MatSetValuesSubMat(*coarse_submat, S_CV, n_constraints, idx_C, n_vertices, idx_V, INSERT_VALUES));
4849 
4850         if (multi_element) {
4851           Mat T;
4852 
4853           PetscCall(MatCreateSeqAIJFromDenseExpand(A_RRmA_RV, n_vertices, R_eff_V_J, &T));
4854           PetscCall(MatDestroy(&A_RRmA_RV));
4855           A_RRmA_RV = T;
4856         }
4857         PetscCall(MatAXPY(A_RRmA_RV, 1.0, B, UNKNOWN_NONZERO_PATTERN)); /* XXX ? */
4858         PetscCall(MatDestroy(&B));
4859       } else if (multi_element) {
4860         Mat T;
4861 
4862         PetscCall(MatCreateSeqAIJFromDenseExpand(A_RRmA_RV, n_vertices, R_eff_V_J, &T));
4863         PetscCall(MatDestroy(&A_RRmA_RV));
4864         A_RRmA_RV = T;
4865       }
4866 
4867       if (lda_rhs != n_R) {
4868         Mat T;
4869 
4870         PetscCall(MatCreateSubMatrix(A_RRmA_RV, is_R, NULL, MAT_INITIAL_MATRIX, &T));
4871         PetscCall(MatDestroy(&A_RRmA_RV));
4872         A_RRmA_RV = T;
4873       }
4874 
4875       /* need A_VR * \Phi * A_RRmA_RV = A_VR * (I+L)^T * A_RRmA_RV, L given as before */
4876       if (need_benign_correction) { /* XXX SPARSE */
4877         PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
4878         PetscScalar       *sums;
4879         const PetscScalar *marr;
4880 
4881         PetscCall(MatDenseGetArrayRead(A_RRmA_RV, &marr));
4882         PetscCall(PetscMalloc1(n_vertices, &sums));
4883         for (i = 0; i < reuse_solver->benign_n; i++) {
4884           const PetscScalar *vals;
4885           const PetscInt    *idxs, *idxs_zero;
4886           PetscInt           n, j, nz;
4887 
4888           PetscCall(ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i], &nz));
4889           PetscCall(ISGetIndices(reuse_solver->benign_zerodiag_subs[i], &idxs_zero));
4890           for (j = 0; j < n_vertices; j++) {
4891             sums[j] = 0.;
4892             for (PetscInt k = 0; k < nz; k++) sums[j] += marr[idxs_zero[k] + j * n_R];
4893           }
4894           PetscCall(MatGetRow(A_RV_bcorr, i, &n, &idxs, &vals));
4895           for (j = 0; j < n; j++) {
4896             PetscScalar val = vals[j];
4897             for (PetscInt k = 0; k < n_vertices; k++) PetscCall(MatSetValue(*coarse_submat, idx_V[idxs[j]], idx_V[k], val * sums[k], ADD_VALUES));
4898           }
4899           PetscCall(MatRestoreRow(A_RV_bcorr, i, &n, &idxs, &vals));
4900           PetscCall(ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i], &idxs_zero));
4901         }
4902         PetscCall(PetscFree(sums));
4903         PetscCall(MatDestroy(&A_RV_bcorr));
4904         PetscCall(MatDenseRestoreArrayRead(A_RRmA_RV, &marr));
4905       }
4906 
4907       PetscCall(MatMatMult(A_VR, A_RRmA_RV, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &S_VV));
4908       PetscCall(MatSetValuesSubMat(*coarse_submat, S_VV, n_vertices, idx_V, n_vertices, idx_V, ADD_VALUES));
4909       PetscCall(MatDestroy(&S_VV));
4910     }
4911 
4912     /* coarse basis functions */
4913     if (coarse_phi_multi) {
4914       Mat Vid;
4915 
4916       PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, n_vertices, n_vertices, 1, NULL, &Vid));
4917       PetscCall(MatShift_Basic(Vid, 1.0));
4918       PetscCall(MatNestSetSubMat(coarse_phi_multi, 0, 0, A_RRmA_RV));
4919       PetscCall(MatNestSetSubMat(coarse_phi_multi, 1, 0, Vid));
4920       PetscCall(MatDestroy(&Vid));
4921     } else {
4922       if (A_RRmA_RV) {
4923         PetscCall(MatDenseScatter(A_RRmA_RV, pcbddc->R_to_B, pcbddc->coarse_phi_B));
4924         if (pcbddc->switch_static || pcbddc->dbg_flag) {
4925           PetscCall(MatDenseScatter(A_RRmA_RV, pcbddc->R_to_D, pcbddc->coarse_phi_D));
4926           if (pcbddc->benign_n) {
4927             for (i = 0; i < n_vertices; i++) { PetscCall(MatSetValues(pcbddc->coarse_phi_D, pcbddc->benign_n, p0_lidx_I, 1, &i, NULL, INSERT_VALUES)); }
4928           }
4929         }
4930       }
4931       for (i = 0; i < n_vertices; i++) PetscCall(MatSetValues(pcbddc->coarse_phi_B, 1, &idx_V_B[i], 1, &i, &one, INSERT_VALUES));
4932       PetscCall(MatAssemblyBegin(pcbddc->coarse_phi_B, MAT_FINAL_ASSEMBLY));
4933       PetscCall(MatAssemblyEnd(pcbddc->coarse_phi_B, MAT_FINAL_ASSEMBLY));
4934     }
4935     PetscCall(MatDestroy(&A_RRmA_RV));
4936   }
4937   PetscCall(MatDestroy(&A_RV));
4938   PetscCall(VecDestroy(&dummy_vec));
4939 
4940   if (n_constraints) {
4941     Mat B, B2;
4942 
4943     PetscCall(MatScale(S_CC, m_one));
4944     PetscCall(MatProductCreate(local_auxmat2_R, S_CC, NULL, &B));
4945     PetscCall(MatProductSetType(B, MATPRODUCT_AB));
4946     PetscCall(MatProductSetFromOptions(B));
4947     PetscCall(MatProductSymbolic(B));
4948     PetscCall(MatProductNumeric(B));
4949 
4950     if (n_vertices) {
4951       if (isCHOL || need_benign_correction) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */
4952         PetscCall(MatTranspose(S_CV, MAT_INITIAL_MATRIX, &S_VC));
4953       } else {
4954         if (lda_rhs != n_R) {
4955           Mat tB;
4956 
4957           PetscCall(MatCreateSubMatrix(B, is_R, NULL, MAT_INITIAL_MATRIX, &tB));
4958           PetscCall(MatDestroy(&B));
4959           B = tB;
4960         }
4961         PetscCall(MatMatMult(A_VR, B, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &S_VC));
4962       }
4963       PetscCall(MatSetValuesSubMat(*coarse_submat, S_VC, n_vertices, idx_V, n_constraints, idx_C, INSERT_VALUES));
4964     }
4965 
4966     /* coarse basis functions */
4967     if (coarse_phi_multi) {
4968       PetscCall(MatNestSetSubMat(coarse_phi_multi, 0, 1, B));
4969     } else {
4970       PetscCall(MatDenseGetSubMatrix(pcbddc->coarse_phi_B, PETSC_DECIDE, PETSC_DECIDE, n_vertices, n_vertices + n_constraints, &B2));
4971       PetscCall(MatDenseScatter(B, pcbddc->R_to_B, B2));
4972       PetscCall(MatDenseRestoreSubMatrix(pcbddc->coarse_phi_B, &B2));
4973       if (pcbddc->switch_static || pcbddc->dbg_flag) {
4974         PetscCall(MatDenseGetSubMatrix(pcbddc->coarse_phi_D, PETSC_DECIDE, PETSC_DECIDE, n_vertices, n_vertices + n_constraints, &B2));
4975         PetscCall(MatDenseScatter(B, pcbddc->R_to_D, B2));
4976         if (pcbddc->benign_n) {
4977           for (i = 0; i < n_constraints; i++) { PetscCall(MatSetValues(B2, pcbddc->benign_n, p0_lidx_I, 1, &i, NULL, INSERT_VALUES)); }
4978         }
4979         PetscCall(MatDenseRestoreSubMatrix(pcbddc->coarse_phi_D, &B2));
4980       }
4981     }
4982     PetscCall(MatDestroy(&B));
4983   }
4984 
4985   /* assemble sparse coarse basis functions */
4986   if (coarse_phi_multi) {
4987     Mat T;
4988 
4989     PetscCall(MatConvert(coarse_phi_multi, MATSEQAIJ, MAT_INITIAL_MATRIX, &T));
4990     PetscCall(MatDestroy(&coarse_phi_multi));
4991     PetscCall(MatCreateSubMatrix(T, pcis->is_B_local, NULL, MAT_INITIAL_MATRIX, &pcbddc->coarse_phi_B));
4992     if (pcbddc->switch_static || pcbddc->dbg_flag) { PetscCall(MatCreateSubMatrix(T, pcis->is_I_local, NULL, MAT_INITIAL_MATRIX, &pcbddc->coarse_phi_D)); }
4993     PetscCall(MatDestroy(&T));
4994   }
4995   PetscCall(MatDestroy(&local_auxmat2_R));
4996   PetscCall(PetscFree(p0_lidx_I));
4997 
4998   /* coarse matrix entries relative to B_0 */
4999   if (pcbddc->benign_n) {
5000     Mat                B0_B, B0_BPHI;
5001     IS                 is_dummy;
5002     const PetscScalar *data;
5003     PetscInt           j;
5004 
5005     PetscCall(ISCreateStride(PETSC_COMM_SELF, pcbddc->benign_n, 0, 1, &is_dummy));
5006     PetscCall(MatCreateSubMatrix(pcbddc->benign_B0, is_dummy, pcis->is_B_local, MAT_INITIAL_MATRIX, &B0_B));
5007     PetscCall(ISDestroy(&is_dummy));
5008     PetscCall(MatMatMult(B0_B, pcbddc->coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &B0_BPHI));
5009     PetscCall(MatConvert(B0_BPHI, MATSEQDENSE, MAT_INPLACE_MATRIX, &B0_BPHI));
5010     PetscCall(MatDenseGetArrayRead(B0_BPHI, &data));
5011     for (j = 0; j < pcbddc->benign_n; j++) {
5012       PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j;
5013       for (i = 0; i < pcbddc->local_primal_size; i++) {
5014         PetscCall(MatSetValue(*coarse_submat, primal_idx, i, data[i * pcbddc->benign_n + j], INSERT_VALUES));
5015         PetscCall(MatSetValue(*coarse_submat, i, primal_idx, data[i * pcbddc->benign_n + j], INSERT_VALUES));
5016       }
5017     }
5018     PetscCall(MatDenseRestoreArrayRead(B0_BPHI, &data));
5019     PetscCall(MatDestroy(&B0_B));
5020     PetscCall(MatDestroy(&B0_BPHI));
5021   }
5022 
5023   /* compute other basis functions for non-symmetric problems */
5024   if (!pcbddc->symmetric_primal) {
5025     Mat          B_V = NULL, B_C = NULL;
5026     PetscScalar *marray, *work;
5027 
5028     /* TODO multi_element MatDenseScatter */
5029     if (n_constraints) {
5030       Mat S_CCT, C_CRT;
5031 
5032       PetscCall(MatScale(S_CC, m_one));
5033       PetscCall(MatTranspose(C_CR, MAT_INITIAL_MATRIX, &C_CRT));
5034       PetscCall(MatTranspose(S_CC, MAT_INITIAL_MATRIX, &S_CCT));
5035       PetscCall(MatMatMult(C_CRT, S_CCT, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &B_C));
5036       PetscCall(MatConvert(B_C, MATDENSE, MAT_INPLACE_MATRIX, &B_C));
5037       PetscCall(MatDestroy(&S_CCT));
5038       if (n_vertices) {
5039         Mat S_VCT;
5040 
5041         PetscCall(MatTranspose(S_VC, MAT_INITIAL_MATRIX, &S_VCT));
5042         PetscCall(MatMatMult(C_CRT, S_VCT, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &B_V));
5043         PetscCall(MatDestroy(&S_VCT));
5044         PetscCall(MatConvert(B_V, MATDENSE, MAT_INPLACE_MATRIX, &B_V));
5045       }
5046       PetscCall(MatDestroy(&C_CRT));
5047     } else {
5048       PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_R, n_vertices, NULL, &B_V));
5049     }
5050     if (n_vertices && n_R) {
5051       PetscScalar    *av, *marray;
5052       const PetscInt *xadj, *adjncy;
5053       PetscInt        n;
5054       PetscBool       flg_row;
5055 
5056       /* B_V = B_V - A_VR^T */
5057       PetscCall(MatConvert(A_VR, MATSEQAIJ, MAT_INPLACE_MATRIX, &A_VR));
5058       PetscCall(MatGetRowIJ(A_VR, 0, PETSC_FALSE, PETSC_FALSE, &n, &xadj, &adjncy, &flg_row));
5059       PetscCall(MatSeqAIJGetArray(A_VR, &av));
5060       PetscCall(MatDenseGetArray(B_V, &marray));
5061       for (i = 0; i < n; i++) {
5062         PetscInt j;
5063         for (j = xadj[i]; j < xadj[i + 1]; j++) marray[i * n_R + adjncy[j]] -= av[j];
5064       }
5065       PetscCall(MatDenseRestoreArray(B_V, &marray));
5066       PetscCall(MatRestoreRowIJ(A_VR, 0, PETSC_FALSE, PETSC_FALSE, &n, &xadj, &adjncy, &flg_row));
5067       PetscCall(MatDestroy(&A_VR));
5068     }
5069 
5070     /* currently there's no support for MatTransposeMatSolve(F,B,X) */
5071     PetscCall(PetscMalloc1(n_R * pcbddc->local_primal_size, &work));
5072     if (n_vertices) {
5073       PetscCall(MatDenseGetArray(B_V, &marray));
5074       for (i = 0; i < n_vertices; i++) {
5075         PetscCall(VecPlaceArray(pcbddc->vec1_R, marray + i * n_R));
5076         PetscCall(VecPlaceArray(pcbddc->vec2_R, work + i * n_R));
5077         PetscCall(KSPSolveTranspose(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec2_R));
5078         PetscCall(KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec2_R));
5079         PetscCall(VecResetArray(pcbddc->vec1_R));
5080         PetscCall(VecResetArray(pcbddc->vec2_R));
5081       }
5082       PetscCall(MatDenseRestoreArray(B_V, &marray));
5083     }
5084     if (B_C) {
5085       PetscCall(MatDenseGetArray(B_C, &marray));
5086       for (i = n_vertices; i < n_constraints + n_vertices; i++) {
5087         PetscCall(VecPlaceArray(pcbddc->vec1_R, marray + (i - n_vertices) * n_R));
5088         PetscCall(VecPlaceArray(pcbddc->vec2_R, work + i * n_R));
5089         PetscCall(KSPSolveTranspose(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec2_R));
5090         PetscCall(KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec2_R));
5091         PetscCall(VecResetArray(pcbddc->vec1_R));
5092         PetscCall(VecResetArray(pcbddc->vec2_R));
5093       }
5094       PetscCall(MatDenseRestoreArray(B_C, &marray));
5095     }
5096     /* coarse basis functions */
5097     PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_B, pcbddc->local_primal_size, NULL, &pcbddc->coarse_psi_B));
5098     if (pcbddc->switch_static || pcbddc->dbg_flag) PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_D, pcbddc->local_primal_size, NULL, &pcbddc->coarse_psi_D));
5099     for (i = 0; i < pcbddc->local_primal_size; i++) {
5100       Vec v;
5101 
5102       PetscCall(VecPlaceArray(pcbddc->vec1_R, work + i * n_R));
5103       PetscCall(MatDenseGetColumnVec(pcbddc->coarse_psi_B, i, &v));
5104       PetscCall(VecScatterBegin(pcbddc->R_to_B, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD));
5105       PetscCall(VecScatterEnd(pcbddc->R_to_B, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD));
5106       if (i < n_vertices) {
5107         PetscScalar one = 1.0;
5108         PetscCall(VecSetValues(v, 1, &idx_V_B[i], &one, INSERT_VALUES));
5109         PetscCall(VecAssemblyBegin(v));
5110         PetscCall(VecAssemblyEnd(v));
5111       }
5112       PetscCall(MatDenseRestoreColumnVec(pcbddc->coarse_psi_B, i, &v));
5113 
5114       if (pcbddc->switch_static || pcbddc->dbg_flag) {
5115         PetscCall(MatDenseGetColumnVec(pcbddc->coarse_psi_D, i, &v));
5116         PetscCall(VecScatterBegin(pcbddc->R_to_D, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD));
5117         PetscCall(VecScatterEnd(pcbddc->R_to_D, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD));
5118         PetscCall(MatDenseRestoreColumnVec(pcbddc->coarse_psi_D, i, &v));
5119       }
5120       PetscCall(VecResetArray(pcbddc->vec1_R));
5121     }
5122     PetscCall(MatDestroy(&B_V));
5123     PetscCall(MatDestroy(&B_C));
5124     PetscCall(PetscFree(work));
5125   } else {
5126     PetscCall(PetscObjectReference((PetscObject)pcbddc->coarse_phi_B));
5127     pcbddc->coarse_psi_B = pcbddc->coarse_phi_B;
5128     PetscCall(PetscObjectReference((PetscObject)pcbddc->coarse_phi_D));
5129     pcbddc->coarse_psi_D = pcbddc->coarse_phi_D;
5130   }
5131   PetscCall(MatAssemblyBegin(*coarse_submat, MAT_FINAL_ASSEMBLY));
5132   PetscCall(MatAssemblyEnd(*coarse_submat, MAT_FINAL_ASSEMBLY));
5133 
5134   /* free memory */
5135   PetscCall(PetscFree(V_to_eff_V));
5136   PetscCall(PetscFree(C_to_eff_C));
5137   PetscCall(PetscFree(R_eff_V_J));
5138   PetscCall(PetscFree(R_eff_C_J));
5139   PetscCall(PetscFree(B_eff_V_J));
5140   PetscCall(PetscFree(B_eff_C_J));
5141   PetscCall(ISDestroy(&is_R));
5142   PetscCall(ISRestoreIndices(is_V, &idx_V));
5143   PetscCall(ISRestoreIndices(is_C, &idx_C));
5144   PetscCall(ISDestroy(&is_V));
5145   PetscCall(ISDestroy(&is_C));
5146   PetscCall(PetscFree(idx_V_B));
5147   PetscCall(MatDestroy(&S_CV));
5148   PetscCall(MatDestroy(&S_VC));
5149   PetscCall(MatDestroy(&S_CC));
5150   if (n_vertices) PetscCall(MatDestroy(&A_VR));
5151   if (n_constraints) PetscCall(MatDestroy(&C_CR));
5152   PetscCall(PetscLogEventEnd(PC_BDDC_CorrectionSetUp[pcbddc->current_level], pc, 0, 0, 0));
5153 
5154   /* Checking coarse_sub_mat and coarse basis functions */
5155   /* Symmetric case     : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
5156   /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
5157   if (pcbddc->dbg_flag) {
5158     Mat       AUXMAT, TM1, TM2, TM3, TM4;
5159     Mat       coarse_phi_D, coarse_phi_B;
5160     Mat       coarse_psi_D, coarse_psi_B;
5161     Mat       A_II, A_BB, A_IB, A_BI;
5162     Mat       C_B, CPHI;
5163     IS        is_dummy;
5164     Vec       mones;
5165     MatType   checkmattype = MATSEQAIJ;
5166     PetscReal real_value;
5167 
5168     if (pcbddc->benign_n && !pcbddc->benign_change_explicit) {
5169       Mat A;
5170       PetscCall(PCBDDCBenignProject(pc, NULL, NULL, &A));
5171       PetscCall(MatCreateSubMatrix(A, pcis->is_I_local, pcis->is_I_local, MAT_INITIAL_MATRIX, &A_II));
5172       PetscCall(MatCreateSubMatrix(A, pcis->is_I_local, pcis->is_B_local, MAT_INITIAL_MATRIX, &A_IB));
5173       PetscCall(MatCreateSubMatrix(A, pcis->is_B_local, pcis->is_I_local, MAT_INITIAL_MATRIX, &A_BI));
5174       PetscCall(MatCreateSubMatrix(A, pcis->is_B_local, pcis->is_B_local, MAT_INITIAL_MATRIX, &A_BB));
5175       PetscCall(MatDestroy(&A));
5176     } else {
5177       PetscCall(MatConvert(pcis->A_II, checkmattype, MAT_INITIAL_MATRIX, &A_II));
5178       PetscCall(MatConvert(pcis->A_IB, checkmattype, MAT_INITIAL_MATRIX, &A_IB));
5179       PetscCall(MatConvert(pcis->A_BI, checkmattype, MAT_INITIAL_MATRIX, &A_BI));
5180       PetscCall(MatConvert(pcis->A_BB, checkmattype, MAT_INITIAL_MATRIX, &A_BB));
5181     }
5182     PetscCall(MatConvert(pcbddc->coarse_phi_D, checkmattype, MAT_INITIAL_MATRIX, &coarse_phi_D));
5183     PetscCall(MatConvert(pcbddc->coarse_phi_B, checkmattype, MAT_INITIAL_MATRIX, &coarse_phi_B));
5184     if (!pcbddc->symmetric_primal) {
5185       PetscCall(MatConvert(pcbddc->coarse_psi_D, checkmattype, MAT_INITIAL_MATRIX, &coarse_psi_D));
5186       PetscCall(MatConvert(pcbddc->coarse_psi_B, checkmattype, MAT_INITIAL_MATRIX, &coarse_psi_B));
5187     }
5188     PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n"));
5189     PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Check coarse sub mat computation (symmetric %d)\n", pcbddc->symmetric_primal));
5190     PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
5191     if (!pcbddc->symmetric_primal) {
5192       PetscCall(MatMatMult(A_II, coarse_phi_D, MAT_INITIAL_MATRIX, 1.0, &AUXMAT));
5193       PetscCall(MatTransposeMatMult(coarse_psi_D, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM1));
5194       PetscCall(MatDestroy(&AUXMAT));
5195       PetscCall(MatMatMult(A_BB, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &AUXMAT));
5196       PetscCall(MatTransposeMatMult(coarse_psi_B, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM2));
5197       PetscCall(MatDestroy(&AUXMAT));
5198       PetscCall(MatMatMult(A_IB, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &AUXMAT));
5199       PetscCall(MatTransposeMatMult(coarse_psi_D, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM3));
5200       PetscCall(MatDestroy(&AUXMAT));
5201       PetscCall(MatMatMult(A_BI, coarse_phi_D, MAT_INITIAL_MATRIX, 1.0, &AUXMAT));
5202       PetscCall(MatTransposeMatMult(coarse_psi_B, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM4));
5203       PetscCall(MatDestroy(&AUXMAT));
5204     } else {
5205       PetscCall(MatPtAP(A_II, coarse_phi_D, MAT_INITIAL_MATRIX, 1.0, &TM1));
5206       PetscCall(MatPtAP(A_BB, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &TM2));
5207       PetscCall(MatMatMult(A_IB, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &AUXMAT));
5208       PetscCall(MatTransposeMatMult(coarse_phi_D, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM3));
5209       PetscCall(MatDestroy(&AUXMAT));
5210       PetscCall(MatMatMult(A_BI, coarse_phi_D, MAT_INITIAL_MATRIX, 1.0, &AUXMAT));
5211       PetscCall(MatTransposeMatMult(coarse_phi_B, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM4));
5212       PetscCall(MatDestroy(&AUXMAT));
5213     }
5214     PetscCall(MatAXPY(TM1, one, TM2, DIFFERENT_NONZERO_PATTERN));
5215     PetscCall(MatAXPY(TM1, one, TM3, DIFFERENT_NONZERO_PATTERN));
5216     PetscCall(MatAXPY(TM1, one, TM4, DIFFERENT_NONZERO_PATTERN));
5217     PetscCall(MatConvert(TM1, MATSEQDENSE, MAT_INPLACE_MATRIX, &TM1));
5218     if (pcbddc->benign_n) {
5219       Mat                B0_B, B0_BPHI;
5220       const PetscScalar *data2;
5221       PetscScalar       *data;
5222       PetscInt           j;
5223 
5224       PetscCall(ISCreateStride(PETSC_COMM_SELF, pcbddc->benign_n, 0, 1, &is_dummy));
5225       PetscCall(MatCreateSubMatrix(pcbddc->benign_B0, is_dummy, pcis->is_B_local, MAT_INITIAL_MATRIX, &B0_B));
5226       PetscCall(MatMatMult(B0_B, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &B0_BPHI));
5227       PetscCall(MatConvert(B0_BPHI, MATSEQDENSE, MAT_INPLACE_MATRIX, &B0_BPHI));
5228       PetscCall(MatDenseGetArray(TM1, &data));
5229       PetscCall(MatDenseGetArrayRead(B0_BPHI, &data2));
5230       for (j = 0; j < pcbddc->benign_n; j++) {
5231         PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j;
5232         for (i = 0; i < pcbddc->local_primal_size; i++) {
5233           data[primal_idx * pcbddc->local_primal_size + i] += data2[i * pcbddc->benign_n + j];
5234           data[i * pcbddc->local_primal_size + primal_idx] += data2[i * pcbddc->benign_n + j];
5235         }
5236       }
5237       PetscCall(MatDenseRestoreArray(TM1, &data));
5238       PetscCall(MatDenseRestoreArrayRead(B0_BPHI, &data2));
5239       PetscCall(MatDestroy(&B0_B));
5240       PetscCall(ISDestroy(&is_dummy));
5241       PetscCall(MatDestroy(&B0_BPHI));
5242     }
5243     PetscCall(MatAXPY(TM1, m_one, *coarse_submat, DIFFERENT_NONZERO_PATTERN));
5244     PetscCall(MatNorm(TM1, NORM_FROBENIUS, &real_value));
5245     PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer));
5246     PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d          matrix error % 1.14e\n", PetscGlobalRank, (double)real_value));
5247 
5248     /* check constraints */
5249     PetscCall(ISCreateStride(PETSC_COMM_SELF, pcbddc->local_primal_size - pcbddc->benign_n, 0, 1, &is_dummy));
5250     PetscCall(MatCreateSubMatrix(pcbddc->ConstraintMatrix, is_dummy, pcis->is_B_local, MAT_INITIAL_MATRIX, &C_B));
5251     if (!pcbddc->benign_n) { /* TODO: add benign case */
5252       PetscCall(MatMatMult(C_B, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &CPHI));
5253     } else {
5254       PetscScalar *data;
5255       Mat          tmat;
5256       PetscCall(MatDenseGetArray(pcbddc->coarse_phi_B, &data));
5257       PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, pcis->n_B, pcbddc->local_primal_size - pcbddc->benign_n, data, &tmat));
5258       PetscCall(MatDenseRestoreArray(pcbddc->coarse_phi_B, &data));
5259       PetscCall(MatMatMult(C_B, tmat, MAT_INITIAL_MATRIX, 1.0, &CPHI));
5260       PetscCall(MatDestroy(&tmat));
5261     }
5262     PetscCall(MatCreateVecs(CPHI, &mones, NULL));
5263     PetscCall(VecSet(mones, -1.0));
5264     PetscCall(MatDiagonalSet(CPHI, mones, ADD_VALUES));
5265     PetscCall(MatNorm(CPHI, NORM_FROBENIUS, &real_value));
5266     PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d phi constraints error % 1.14e\n", PetscGlobalRank, (double)real_value));
5267     if (!pcbddc->symmetric_primal) {
5268       PetscCall(MatMatMult(C_B, coarse_psi_B, MAT_REUSE_MATRIX, 1.0, &CPHI));
5269       PetscCall(VecSet(mones, -1.0));
5270       PetscCall(MatDiagonalSet(CPHI, mones, ADD_VALUES));
5271       PetscCall(MatNorm(CPHI, NORM_FROBENIUS, &real_value));
5272       PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d psi constraints error % 1.14e\n", PetscGlobalRank, (double)real_value));
5273     }
5274     PetscCall(MatDestroy(&C_B));
5275     PetscCall(MatDestroy(&CPHI));
5276     PetscCall(ISDestroy(&is_dummy));
5277     PetscCall(VecDestroy(&mones));
5278     PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
5279     PetscCall(MatDestroy(&A_II));
5280     PetscCall(MatDestroy(&A_BB));
5281     PetscCall(MatDestroy(&A_IB));
5282     PetscCall(MatDestroy(&A_BI));
5283     PetscCall(MatDestroy(&TM1));
5284     PetscCall(MatDestroy(&TM2));
5285     PetscCall(MatDestroy(&TM3));
5286     PetscCall(MatDestroy(&TM4));
5287     PetscCall(MatDestroy(&coarse_phi_D));
5288     PetscCall(MatDestroy(&coarse_phi_B));
5289     if (!pcbddc->symmetric_primal) {
5290       PetscCall(MatDestroy(&coarse_psi_D));
5291       PetscCall(MatDestroy(&coarse_psi_B));
5292     }
5293   }
5294 
5295 #if 0
5296   {
5297     PetscViewer viewer;
5298     char filename[256];
5299 
5300     PetscCall(PetscSNPrintf(filename, PETSC_STATIC_ARRAY_LENGTH(filename), "details_local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level));
5301     PetscCall(PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer));
5302     PetscCall(PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB));
5303     PetscCall(PetscObjectSetName((PetscObject)*coarse_submat,"coarse submat"));
5304     PetscCall(MatView(*coarse_submat,viewer));
5305     if (pcbddc->coarse_phi_B) {
5306       PetscCall(PetscObjectSetName((PetscObject)pcbddc->coarse_phi_B,"phi_B"));
5307       PetscCall(MatView(pcbddc->coarse_phi_B,viewer));
5308     }
5309     if (pcbddc->coarse_phi_D) {
5310       PetscCall(PetscObjectSetName((PetscObject)pcbddc->coarse_phi_D,"phi_D"));
5311       PetscCall(MatView(pcbddc->coarse_phi_D,viewer));
5312     }
5313     if (pcbddc->coarse_psi_B) {
5314       PetscCall(PetscObjectSetName((PetscObject)pcbddc->coarse_psi_B,"psi_B"));
5315       PetscCall(MatView(pcbddc->coarse_psi_B,viewer));
5316     }
5317     if (pcbddc->coarse_psi_D) {
5318       PetscCall(PetscObjectSetName((PetscObject)pcbddc->coarse_psi_D,"psi_D"));
5319       PetscCall(MatView(pcbddc->coarse_psi_D,viewer));
5320     }
5321     PetscCall(PetscObjectSetName((PetscObject)pcbddc->local_mat,"A"));
5322     PetscCall(MatView(pcbddc->local_mat,viewer));
5323     PetscCall(PetscObjectSetName((PetscObject)pcbddc->ConstraintMatrix,"C"));
5324     PetscCall(MatView(pcbddc->ConstraintMatrix,viewer));
5325     PetscCall(PetscObjectSetName((PetscObject)pcis->is_I_local,"I"));
5326     PetscCall(ISView(pcis->is_I_local,viewer));
5327     PetscCall(PetscObjectSetName((PetscObject)pcis->is_B_local,"B"));
5328     PetscCall(ISView(pcis->is_B_local,viewer));
5329     PetscCall(PetscObjectSetName((PetscObject)pcbddc->is_R_local,"R"));
5330     PetscCall(ISView(pcbddc->is_R_local,viewer));
5331     PetscCall(PetscOptionsRestoreViewer(&viewer));
5332   }
5333 #endif
5334 
5335   /* device support */
5336   {
5337     PetscBool iscuda, iship, iskokkos;
5338     MatType   mtype = NULL;
5339 
5340     PetscCall(PetscObjectTypeCompareAny((PetscObject)pcis->vec1_N, &iscuda, VECCUDA, VECMPICUDA, VECSEQCUDA, ""));
5341     PetscCall(PetscObjectTypeCompareAny((PetscObject)pcis->vec1_N, &iship, VECHIP, VECMPIHIP, VECSEQHIP, ""));
5342     PetscCall(PetscObjectTypeCompareAny((PetscObject)pcis->vec1_N, &iskokkos, VECKOKKOS, VECMPIKOKKOS, VECSEQKOKKOS, ""));
5343     if (iskokkos) {
5344       if (PetscDefined(HAVE_MACRO_KOKKOS_ENABLE_CUDA)) iscuda = PETSC_TRUE;
5345       else if (PetscDefined(HAVE_MACRO_KOKKOS_ENABLE_HIP)) iship = PETSC_TRUE;
5346     }
5347     if (iskokkos) mtype = multi_element ? MATSEQAIJKOKKOS : (iscuda ? MATSEQDENSECUDA : MATSEQDENSEHIP);
5348     else if (iship) mtype = multi_element ? MATSEQAIJHIPSPARSE : MATSEQDENSEHIP;
5349     else if (iscuda) mtype = multi_element ? MATSEQAIJCUSPARSE : MATSEQDENSECUDA;
5350     if (mtype) {
5351       if (pcbddc->local_auxmat1) PetscCall(MatConvert(pcbddc->local_auxmat1, mtype, MAT_INPLACE_MATRIX, &pcbddc->local_auxmat1));
5352       if (pcbddc->local_auxmat2) PetscCall(MatConvert(pcbddc->local_auxmat2, mtype, MAT_INPLACE_MATRIX, &pcbddc->local_auxmat2));
5353       if (pcbddc->coarse_phi_B) PetscCall(MatConvert(pcbddc->coarse_phi_B, mtype, MAT_INPLACE_MATRIX, &pcbddc->coarse_phi_B));
5354       if (pcbddc->coarse_phi_D) PetscCall(MatConvert(pcbddc->coarse_phi_D, mtype, MAT_INPLACE_MATRIX, &pcbddc->coarse_phi_D));
5355       if (pcbddc->coarse_psi_B) PetscCall(MatConvert(pcbddc->coarse_psi_B, mtype, MAT_INPLACE_MATRIX, &pcbddc->coarse_psi_B));
5356       if (pcbddc->coarse_psi_D) PetscCall(MatConvert(pcbddc->coarse_psi_D, mtype, MAT_INPLACE_MATRIX, &pcbddc->coarse_psi_D));
5357     }
5358   }
5359   PetscFunctionReturn(PETSC_SUCCESS);
5360 }
5361 
5362 PetscErrorCode MatCreateSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat *B)
5363 {
5364   Mat      *work_mat;
5365   IS        isrow_s, iscol_s;
5366   PetscBool rsorted, csorted;
5367   PetscInt  rsize, *idxs_perm_r = NULL, csize, *idxs_perm_c = NULL;
5368 
5369   PetscFunctionBegin;
5370   PetscCall(ISSorted(isrow, &rsorted));
5371   PetscCall(ISSorted(iscol, &csorted));
5372   PetscCall(ISGetLocalSize(isrow, &rsize));
5373   PetscCall(ISGetLocalSize(iscol, &csize));
5374 
5375   if (!rsorted) {
5376     const PetscInt *idxs;
5377     PetscInt       *idxs_sorted, i;
5378 
5379     PetscCall(PetscMalloc1(rsize, &idxs_perm_r));
5380     PetscCall(PetscMalloc1(rsize, &idxs_sorted));
5381     for (i = 0; i < rsize; i++) idxs_perm_r[i] = i;
5382     PetscCall(ISGetIndices(isrow, &idxs));
5383     PetscCall(PetscSortIntWithPermutation(rsize, idxs, idxs_perm_r));
5384     for (i = 0; i < rsize; i++) idxs_sorted[i] = idxs[idxs_perm_r[i]];
5385     PetscCall(ISRestoreIndices(isrow, &idxs));
5386     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, rsize, idxs_sorted, PETSC_OWN_POINTER, &isrow_s));
5387   } else {
5388     PetscCall(PetscObjectReference((PetscObject)isrow));
5389     isrow_s = isrow;
5390   }
5391 
5392   if (!csorted) {
5393     if (isrow == iscol) {
5394       PetscCall(PetscObjectReference((PetscObject)isrow_s));
5395       iscol_s = isrow_s;
5396     } else {
5397       const PetscInt *idxs;
5398       PetscInt       *idxs_sorted, i;
5399 
5400       PetscCall(PetscMalloc1(csize, &idxs_perm_c));
5401       PetscCall(PetscMalloc1(csize, &idxs_sorted));
5402       for (i = 0; i < csize; i++) idxs_perm_c[i] = i;
5403       PetscCall(ISGetIndices(iscol, &idxs));
5404       PetscCall(PetscSortIntWithPermutation(csize, idxs, idxs_perm_c));
5405       for (i = 0; i < csize; i++) idxs_sorted[i] = idxs[idxs_perm_c[i]];
5406       PetscCall(ISRestoreIndices(iscol, &idxs));
5407       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, csize, idxs_sorted, PETSC_OWN_POINTER, &iscol_s));
5408     }
5409   } else {
5410     PetscCall(PetscObjectReference((PetscObject)iscol));
5411     iscol_s = iscol;
5412   }
5413 
5414   PetscCall(MatCreateSubMatrices(A, 1, &isrow_s, &iscol_s, MAT_INITIAL_MATRIX, &work_mat));
5415 
5416   if (!rsorted || !csorted) {
5417     Mat new_mat;
5418     IS  is_perm_r, is_perm_c;
5419 
5420     if (!rsorted) {
5421       PetscInt *idxs_r, i;
5422       PetscCall(PetscMalloc1(rsize, &idxs_r));
5423       for (i = 0; i < rsize; i++) idxs_r[idxs_perm_r[i]] = i;
5424       PetscCall(PetscFree(idxs_perm_r));
5425       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, rsize, idxs_r, PETSC_OWN_POINTER, &is_perm_r));
5426     } else {
5427       PetscCall(ISCreateStride(PETSC_COMM_SELF, rsize, 0, 1, &is_perm_r));
5428     }
5429     PetscCall(ISSetPermutation(is_perm_r));
5430 
5431     if (!csorted) {
5432       if (isrow_s == iscol_s) {
5433         PetscCall(PetscObjectReference((PetscObject)is_perm_r));
5434         is_perm_c = is_perm_r;
5435       } else {
5436         PetscInt *idxs_c, i;
5437         PetscCheck(idxs_perm_c, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Permutation array not present");
5438         PetscCall(PetscMalloc1(csize, &idxs_c));
5439         for (i = 0; i < csize; i++) idxs_c[idxs_perm_c[i]] = i;
5440         PetscCall(PetscFree(idxs_perm_c));
5441         PetscCall(ISCreateGeneral(PETSC_COMM_SELF, csize, idxs_c, PETSC_OWN_POINTER, &is_perm_c));
5442       }
5443     } else {
5444       PetscCall(ISCreateStride(PETSC_COMM_SELF, csize, 0, 1, &is_perm_c));
5445     }
5446     PetscCall(ISSetPermutation(is_perm_c));
5447 
5448     PetscCall(MatPermute(work_mat[0], is_perm_r, is_perm_c, &new_mat));
5449     PetscCall(MatDestroy(&work_mat[0]));
5450     work_mat[0] = new_mat;
5451     PetscCall(ISDestroy(&is_perm_r));
5452     PetscCall(ISDestroy(&is_perm_c));
5453   }
5454 
5455   PetscCall(PetscObjectReference((PetscObject)work_mat[0]));
5456   *B = work_mat[0];
5457   PetscCall(MatDestroyMatrices(1, &work_mat));
5458   PetscCall(ISDestroy(&isrow_s));
5459   PetscCall(ISDestroy(&iscol_s));
5460   PetscFunctionReturn(PETSC_SUCCESS);
5461 }
5462 
5463 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix)
5464 {
5465   Mat_IS   *matis  = (Mat_IS *)pc->pmat->data;
5466   PC_BDDC  *pcbddc = (PC_BDDC *)pc->data;
5467   Mat       new_mat, lA;
5468   IS        is_local, is_global;
5469   PetscInt  local_size;
5470   PetscBool isseqaij, issym, isset;
5471 
5472   PetscFunctionBegin;
5473   PetscCall(MatDestroy(&pcbddc->local_mat));
5474   PetscCall(MatGetSize(matis->A, &local_size, NULL));
5475   if (pcbddc->mat_graph->multi_element) {
5476     Mat     *mats, *bdiags;
5477     IS      *gsubs;
5478     PetscInt nsubs = pcbddc->n_local_subs;
5479 
5480     PetscCall(PetscCalloc1(nsubs * nsubs, &mats));
5481     PetscCall(PetscMalloc1(nsubs, &gsubs));
5482     for (PetscInt i = 0; i < nsubs; i++) PetscCall(ISLocalToGlobalMappingApplyIS(matis->rmapping, pcbddc->local_subs[i], &gsubs[i]));
5483     PetscCall(MatCreateSubMatrices(ChangeOfBasisMatrix, nsubs, gsubs, gsubs, MAT_INITIAL_MATRIX, &bdiags));
5484     for (PetscInt i = 0; i < nsubs; i++) PetscCall(ISDestroy(&gsubs[i]));
5485     PetscCall(PetscFree(gsubs));
5486 
5487     for (PetscInt i = 0; i < nsubs; i++) mats[i * (1 + nsubs)] = bdiags[i];
5488     PetscCall(MatCreateNest(PETSC_COMM_SELF, nsubs, pcbddc->local_subs, nsubs, pcbddc->local_subs, mats, &new_mat));
5489     PetscCall(MatConvert(new_mat, MATSEQAIJ, MAT_INPLACE_MATRIX, &new_mat));
5490     PetscCall(MatDestroySubMatrices(nsubs, &bdiags));
5491     PetscCall(PetscFree(mats));
5492   } else {
5493     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)matis->A), local_size, 0, 1, &is_local));
5494     PetscCall(ISLocalToGlobalMappingApplyIS(matis->rmapping, is_local, &is_global));
5495     PetscCall(ISDestroy(&is_local));
5496     PetscCall(MatCreateSubMatrixUnsorted(ChangeOfBasisMatrix, is_global, is_global, &new_mat));
5497     PetscCall(ISDestroy(&is_global));
5498   }
5499   if (pcbddc->dbg_flag) {
5500     Vec       x, x_change;
5501     PetscReal error;
5502 
5503     PetscCall(MatCreateVecs(ChangeOfBasisMatrix, &x, &x_change));
5504     PetscCall(VecSetRandom(x, NULL));
5505     PetscCall(MatMult(ChangeOfBasisMatrix, x, x_change));
5506     PetscCall(VecScatterBegin(matis->cctx, x, matis->x, INSERT_VALUES, SCATTER_FORWARD));
5507     PetscCall(VecScatterEnd(matis->cctx, x, matis->x, INSERT_VALUES, SCATTER_FORWARD));
5508     PetscCall(MatMult(new_mat, matis->x, matis->y));
5509     if (!pcbddc->change_interior) {
5510       const PetscScalar *x, *y, *v;
5511       PetscReal          lerror = 0.;
5512       PetscInt           i;
5513 
5514       PetscCall(VecGetArrayRead(matis->x, &x));
5515       PetscCall(VecGetArrayRead(matis->y, &y));
5516       PetscCall(VecGetArrayRead(matis->counter, &v));
5517       for (i = 0; i < local_size; i++)
5518         if (PetscRealPart(v[i]) < 1.5 && PetscAbsScalar(x[i] - y[i]) > lerror) lerror = PetscAbsScalar(x[i] - y[i]);
5519       PetscCall(VecRestoreArrayRead(matis->x, &x));
5520       PetscCall(VecRestoreArrayRead(matis->y, &y));
5521       PetscCall(VecRestoreArrayRead(matis->counter, &v));
5522       PetscCall(MPIU_Allreduce(&lerror, &error, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)pc)));
5523       if (error > PETSC_SMALL) {
5524         if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) {
5525           SETERRQ(PetscObjectComm((PetscObject)pc), PETSC_ERR_PLIB, "Error global vs local change on I: %1.6e", (double)error);
5526         } else {
5527           SETERRQ(PetscObjectComm((PetscObject)pc), PETSC_ERR_USER, "Error global vs local change on I: %1.6e", (double)error);
5528         }
5529       }
5530     }
5531     PetscCall(VecScatterBegin(matis->rctx, matis->y, x, INSERT_VALUES, SCATTER_REVERSE));
5532     PetscCall(VecScatterEnd(matis->rctx, matis->y, x, INSERT_VALUES, SCATTER_REVERSE));
5533     PetscCall(VecAXPY(x, -1.0, x_change));
5534     PetscCall(VecNorm(x, NORM_INFINITY, &error));
5535     if (error > PETSC_SMALL) {
5536       if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) {
5537         SETERRQ(PetscObjectComm((PetscObject)pc), PETSC_ERR_PLIB, "Error global vs local change on N: %1.6e", (double)error);
5538       } else {
5539         SETERRQ(PetscObjectComm((PetscObject)pc), PETSC_ERR_USER, "Error global vs local change on N: %1.6e", (double)error);
5540       }
5541     }
5542     PetscCall(VecDestroy(&x));
5543     PetscCall(VecDestroy(&x_change));
5544   }
5545 
5546   /* lA is present if we are setting up an inner BDDC for a saddle point FETI-DP */
5547   PetscCall(PetscObjectQuery((PetscObject)pc, "__KSPFETIDP_lA", (PetscObject *)&lA));
5548 
5549   /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */
5550   PetscCall(PetscObjectBaseTypeCompare((PetscObject)matis->A, MATSEQAIJ, &isseqaij));
5551   if (isseqaij) {
5552     PetscCall(MatDestroy(&pcbddc->local_mat));
5553     PetscCall(MatPtAP(matis->A, new_mat, MAT_INITIAL_MATRIX, 2.0, &pcbddc->local_mat));
5554     if (lA) {
5555       Mat work;
5556       PetscCall(MatPtAP(lA, new_mat, MAT_INITIAL_MATRIX, 2.0, &work));
5557       PetscCall(PetscObjectCompose((PetscObject)pc, "__KSPFETIDP_lA", (PetscObject)work));
5558       PetscCall(MatDestroy(&work));
5559     }
5560   } else {
5561     Mat work_mat;
5562 
5563     PetscCall(MatDestroy(&pcbddc->local_mat));
5564     PetscCall(MatConvert(matis->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &work_mat));
5565     PetscCall(MatPtAP(work_mat, new_mat, MAT_INITIAL_MATRIX, 2.0, &pcbddc->local_mat));
5566     PetscCall(MatDestroy(&work_mat));
5567     if (lA) {
5568       Mat work;
5569       PetscCall(MatConvert(lA, MATSEQAIJ, MAT_INITIAL_MATRIX, &work_mat));
5570       PetscCall(MatPtAP(work_mat, new_mat, MAT_INITIAL_MATRIX, 2.0, &work));
5571       PetscCall(PetscObjectCompose((PetscObject)pc, "__KSPFETIDP_lA", (PetscObject)work));
5572       PetscCall(MatDestroy(&work));
5573     }
5574   }
5575   PetscCall(MatIsSymmetricKnown(matis->A, &isset, &issym));
5576   if (isset) PetscCall(MatSetOption(pcbddc->local_mat, MAT_SYMMETRIC, issym));
5577   PetscCall(MatDestroy(&new_mat));
5578   PetscFunctionReturn(PETSC_SUCCESS);
5579 }
5580 
5581 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc)
5582 {
5583   PC_IS          *pcis        = (PC_IS *)pc->data;
5584   PC_BDDC        *pcbddc      = (PC_BDDC *)pc->data;
5585   PCBDDCSubSchurs sub_schurs  = pcbddc->sub_schurs;
5586   PetscInt       *idx_R_local = NULL;
5587   PetscInt        n_vertices, i, j, n_R, n_D, n_B;
5588   PetscInt        vbs, bs;
5589   PetscBT         bitmask = NULL;
5590 
5591   PetscFunctionBegin;
5592   /*
5593     No need to setup local scatters if
5594       - primal space is unchanged
5595         AND
5596       - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains)
5597         AND
5598       - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine
5599   */
5600   if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) PetscFunctionReturn(PETSC_SUCCESS);
5601   /* destroy old objects */
5602   PetscCall(ISDestroy(&pcbddc->is_R_local));
5603   PetscCall(VecScatterDestroy(&pcbddc->R_to_B));
5604   PetscCall(VecScatterDestroy(&pcbddc->R_to_D));
5605   /* Set Non-overlapping dimensions */
5606   n_B        = pcis->n_B;
5607   n_D        = pcis->n - n_B;
5608   n_vertices = pcbddc->n_vertices;
5609 
5610   /* Dohrmann's notation: dofs split in R (Remaining: all dofs but the vertices) and V (Vertices) */
5611 
5612   /* create auxiliary bitmask and allocate workspace */
5613   if (!sub_schurs || !sub_schurs->reuse_solver) {
5614     PetscCall(PetscMalloc1(pcis->n - n_vertices, &idx_R_local));
5615     PetscCall(PetscBTCreate(pcis->n, &bitmask));
5616     for (i = 0; i < n_vertices; i++) PetscCall(PetscBTSet(bitmask, pcbddc->local_primal_ref_node[i]));
5617 
5618     for (i = 0, n_R = 0; i < pcis->n; i++) {
5619       if (!PetscBTLookup(bitmask, i)) idx_R_local[n_R++] = i;
5620     }
5621   } else { /* A different ordering (already computed) is present if we are reusing the Schur solver */
5622     PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
5623 
5624     PetscCall(ISGetIndices(reuse_solver->is_R, (const PetscInt **)&idx_R_local));
5625     PetscCall(ISGetLocalSize(reuse_solver->is_R, &n_R));
5626   }
5627 
5628   /* Block code */
5629   vbs = 1;
5630   PetscCall(MatGetBlockSize(pcbddc->local_mat, &bs));
5631   if (bs > 1 && !(n_vertices % bs)) {
5632     PetscBool is_blocked = PETSC_TRUE;
5633     PetscInt *vary;
5634     if (!sub_schurs || !sub_schurs->reuse_solver) {
5635       PetscCall(PetscMalloc1(pcis->n / bs, &vary));
5636       PetscCall(PetscArrayzero(vary, pcis->n / bs));
5637       /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */
5638       /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */
5639       for (i = 0; i < n_vertices; i++) vary[pcbddc->local_primal_ref_node[i] / bs]++;
5640       for (i = 0; i < pcis->n / bs; i++) {
5641         if (vary[i] != 0 && vary[i] != bs) {
5642           is_blocked = PETSC_FALSE;
5643           break;
5644         }
5645       }
5646       PetscCall(PetscFree(vary));
5647     } else {
5648       /* Verify directly the R set */
5649       for (i = 0; i < n_R / bs; i++) {
5650         PetscInt j, node = idx_R_local[bs * i];
5651         for (j = 1; j < bs; j++) {
5652           if (node != idx_R_local[bs * i + j] - j) {
5653             is_blocked = PETSC_FALSE;
5654             break;
5655           }
5656         }
5657       }
5658     }
5659     if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */
5660       vbs = bs;
5661       for (i = 0; i < n_R / vbs; i++) idx_R_local[i] = idx_R_local[vbs * i] / vbs;
5662     }
5663   }
5664   PetscCall(ISCreateBlock(PETSC_COMM_SELF, vbs, n_R / vbs, idx_R_local, PETSC_COPY_VALUES, &pcbddc->is_R_local));
5665   if (sub_schurs && sub_schurs->reuse_solver) {
5666     PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
5667 
5668     PetscCall(ISRestoreIndices(reuse_solver->is_R, (const PetscInt **)&idx_R_local));
5669     PetscCall(ISDestroy(&reuse_solver->is_R));
5670     PetscCall(PetscObjectReference((PetscObject)pcbddc->is_R_local));
5671     reuse_solver->is_R = pcbddc->is_R_local;
5672   } else {
5673     PetscCall(PetscFree(idx_R_local));
5674   }
5675 
5676   /* print some info if requested */
5677   if (pcbddc->dbg_flag) {
5678     PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n"));
5679     PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
5680     PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer));
5681     PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d local dimensions\n", PetscGlobalRank));
5682     PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "local_size = %" PetscInt_FMT ", dirichlet_size = %" PetscInt_FMT ", boundary_size = %" PetscInt_FMT "\n", pcis->n, n_D, n_B));
5683     PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "r_size = %" PetscInt_FMT ", v_size = %" PetscInt_FMT ", constraints = %" PetscInt_FMT ", local_primal_size = %" PetscInt_FMT "\n", n_R, n_vertices,
5684                                                  pcbddc->local_primal_size - n_vertices - pcbddc->benign_n, pcbddc->local_primal_size));
5685     PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
5686   }
5687 
5688   /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */
5689   if (!sub_schurs || !sub_schurs->reuse_solver) {
5690     IS        is_aux1, is_aux2;
5691     PetscInt *aux_array1, *aux_array2, *is_indices, *idx_R_local;
5692 
5693     PetscCall(ISGetIndices(pcbddc->is_R_local, (const PetscInt **)&idx_R_local));
5694     PetscCall(PetscMalloc1(pcis->n_B - n_vertices, &aux_array1));
5695     PetscCall(PetscMalloc1(pcis->n_B - n_vertices, &aux_array2));
5696     PetscCall(ISGetIndices(pcis->is_I_local, (const PetscInt **)&is_indices));
5697     for (i = 0; i < n_D; i++) PetscCall(PetscBTSet(bitmask, is_indices[i]));
5698     PetscCall(ISRestoreIndices(pcis->is_I_local, (const PetscInt **)&is_indices));
5699     for (i = 0, j = 0; i < n_R; i++) {
5700       if (!PetscBTLookup(bitmask, idx_R_local[i])) aux_array1[j++] = i;
5701     }
5702     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, j, aux_array1, PETSC_OWN_POINTER, &is_aux1));
5703     PetscCall(ISGetIndices(pcis->is_B_local, (const PetscInt **)&is_indices));
5704     for (i = 0, j = 0; i < n_B; i++) {
5705       if (!PetscBTLookup(bitmask, is_indices[i])) aux_array2[j++] = i;
5706     }
5707     PetscCall(ISRestoreIndices(pcis->is_B_local, (const PetscInt **)&is_indices));
5708     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, j, aux_array2, PETSC_OWN_POINTER, &is_aux2));
5709     PetscCall(VecScatterCreate(pcbddc->vec1_R, is_aux1, pcis->vec1_B, is_aux2, &pcbddc->R_to_B));
5710     PetscCall(ISDestroy(&is_aux1));
5711     PetscCall(ISDestroy(&is_aux2));
5712 
5713     if (pcbddc->switch_static || pcbddc->dbg_flag) {
5714       PetscCall(PetscMalloc1(n_D, &aux_array1));
5715       for (i = 0, j = 0; i < n_R; i++) {
5716         if (PetscBTLookup(bitmask, idx_R_local[i])) aux_array1[j++] = i;
5717       }
5718       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, j, aux_array1, PETSC_OWN_POINTER, &is_aux1));
5719       PetscCall(VecScatterCreate(pcbddc->vec1_R, is_aux1, pcis->vec1_D, (IS)0, &pcbddc->R_to_D));
5720       PetscCall(ISDestroy(&is_aux1));
5721     }
5722     PetscCall(PetscBTDestroy(&bitmask));
5723     PetscCall(ISRestoreIndices(pcbddc->is_R_local, (const PetscInt **)&idx_R_local));
5724   } else {
5725     PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
5726     IS                 tis;
5727     PetscInt           schur_size;
5728 
5729     PetscCall(ISGetLocalSize(reuse_solver->is_B, &schur_size));
5730     PetscCall(ISCreateStride(PETSC_COMM_SELF, schur_size, n_D, 1, &tis));
5731     PetscCall(VecScatterCreate(pcbddc->vec1_R, tis, pcis->vec1_B, reuse_solver->is_B, &pcbddc->R_to_B));
5732     PetscCall(ISDestroy(&tis));
5733     if (pcbddc->switch_static || pcbddc->dbg_flag) {
5734       PetscCall(ISCreateStride(PETSC_COMM_SELF, n_D, 0, 1, &tis));
5735       PetscCall(VecScatterCreate(pcbddc->vec1_R, tis, pcis->vec1_D, (IS)0, &pcbddc->R_to_D));
5736       PetscCall(ISDestroy(&tis));
5737     }
5738   }
5739   PetscFunctionReturn(PETSC_SUCCESS);
5740 }
5741 
5742 static PetscErrorCode MatNullSpacePropagateAny_Private(Mat A, IS is, Mat B)
5743 {
5744   MatNullSpace   NullSpace;
5745   Mat            dmat;
5746   const Vec     *nullvecs;
5747   Vec            v, v2, *nullvecs2;
5748   VecScatter     sct = NULL;
5749   PetscContainer c;
5750   PetscScalar   *ddata;
5751   PetscInt       k, nnsp_size, bsiz, bsiz2, n, N, bs;
5752   PetscBool      nnsp_has_cnst;
5753 
5754   PetscFunctionBegin;
5755   if (!is && !B) { /* MATIS */
5756     Mat_IS *matis = (Mat_IS *)A->data;
5757 
5758     if (!B) PetscCall(MatISGetLocalMat(A, &B));
5759     sct = matis->cctx;
5760     PetscCall(PetscObjectReference((PetscObject)sct));
5761   } else {
5762     PetscCall(MatGetNullSpace(B, &NullSpace));
5763     if (!NullSpace) PetscCall(MatGetNearNullSpace(B, &NullSpace));
5764     if (NullSpace) PetscFunctionReturn(PETSC_SUCCESS);
5765   }
5766   PetscCall(MatGetNullSpace(A, &NullSpace));
5767   if (!NullSpace) PetscCall(MatGetNearNullSpace(A, &NullSpace));
5768   if (!NullSpace) PetscFunctionReturn(PETSC_SUCCESS);
5769 
5770   PetscCall(MatCreateVecs(A, &v, NULL));
5771   PetscCall(MatCreateVecs(B, &v2, NULL));
5772   if (!sct) PetscCall(VecScatterCreate(v, is, v2, NULL, &sct));
5773   PetscCall(MatNullSpaceGetVecs(NullSpace, &nnsp_has_cnst, &nnsp_size, (const Vec **)&nullvecs));
5774   bsiz = bsiz2 = nnsp_size + !!nnsp_has_cnst;
5775   PetscCall(PetscMalloc1(bsiz, &nullvecs2));
5776   PetscCall(VecGetBlockSize(v2, &bs));
5777   PetscCall(VecGetSize(v2, &N));
5778   PetscCall(VecGetLocalSize(v2, &n));
5779   PetscCall(PetscMalloc1(n * bsiz, &ddata));
5780   for (k = 0; k < nnsp_size; k++) {
5781     PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), bs, n, N, ddata + n * k, &nullvecs2[k]));
5782     PetscCall(VecScatterBegin(sct, nullvecs[k], nullvecs2[k], INSERT_VALUES, SCATTER_FORWARD));
5783     PetscCall(VecScatterEnd(sct, nullvecs[k], nullvecs2[k], INSERT_VALUES, SCATTER_FORWARD));
5784   }
5785   if (nnsp_has_cnst) {
5786     PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), bs, n, N, ddata + n * nnsp_size, &nullvecs2[nnsp_size]));
5787     PetscCall(VecSet(nullvecs2[nnsp_size], 1.0));
5788   }
5789   PetscCall(PCBDDCOrthonormalizeVecs(&bsiz2, nullvecs2));
5790   PetscCall(MatNullSpaceCreate(PetscObjectComm((PetscObject)B), PETSC_FALSE, bsiz2, nullvecs2, &NullSpace));
5791 
5792   PetscCall(MatCreateDense(PetscObjectComm((PetscObject)B), n, PETSC_DECIDE, N, bsiz2, ddata, &dmat));
5793   PetscCall(PetscContainerCreate(PetscObjectComm((PetscObject)B), &c));
5794   PetscCall(PetscContainerSetPointer(c, ddata));
5795   PetscCall(PetscContainerSetUserDestroy(c, PetscContainerUserDestroyDefault));
5796   PetscCall(PetscObjectCompose((PetscObject)dmat, "_PBDDC_Null_dmat_arr", (PetscObject)c));
5797   PetscCall(PetscContainerDestroy(&c));
5798   PetscCall(PetscObjectCompose((PetscObject)NullSpace, "_PBDDC_Null_dmat", (PetscObject)dmat));
5799   PetscCall(MatDestroy(&dmat));
5800 
5801   for (k = 0; k < bsiz; k++) PetscCall(VecDestroy(&nullvecs2[k]));
5802   PetscCall(PetscFree(nullvecs2));
5803   PetscCall(MatSetNearNullSpace(B, NullSpace));
5804   PetscCall(MatNullSpaceDestroy(&NullSpace));
5805   PetscCall(VecDestroy(&v));
5806   PetscCall(VecDestroy(&v2));
5807   PetscCall(VecScatterDestroy(&sct));
5808   PetscFunctionReturn(PETSC_SUCCESS);
5809 }
5810 
5811 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann)
5812 {
5813   PC_BDDC     *pcbddc = (PC_BDDC *)pc->data;
5814   PC_IS       *pcis   = (PC_IS *)pc->data;
5815   PC           pc_temp;
5816   Mat          A_RR;
5817   MatNullSpace nnsp;
5818   MatReuse     reuse;
5819   PetscScalar  m_one = -1.0;
5820   PetscReal    value;
5821   PetscInt     n_D, n_R;
5822   PetscBool    issbaij, opts, isset, issym;
5823   void (*f)(void) = NULL;
5824   char   dir_prefix[256], neu_prefix[256], str_level[16];
5825   size_t len;
5826 
5827   PetscFunctionBegin;
5828   PetscCall(PetscLogEventBegin(PC_BDDC_LocalSolvers[pcbddc->current_level], pc, 0, 0, 0));
5829   /* approximate solver, propagate NearNullSpace if needed */
5830   if (!pc->setupcalled && (pcbddc->NullSpace_corr[0] || pcbddc->NullSpace_corr[2])) {
5831     MatNullSpace gnnsp1, gnnsp2;
5832     PetscBool    lhas, ghas;
5833 
5834     PetscCall(MatGetNearNullSpace(pcbddc->local_mat, &nnsp));
5835     PetscCall(MatGetNearNullSpace(pc->pmat, &gnnsp1));
5836     PetscCall(MatGetNullSpace(pc->pmat, &gnnsp2));
5837     lhas = nnsp ? PETSC_TRUE : PETSC_FALSE;
5838     PetscCall(MPIU_Allreduce(&lhas, &ghas, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)pc)));
5839     if (!ghas && (gnnsp1 || gnnsp2)) PetscCall(MatNullSpacePropagateAny_Private(pc->pmat, NULL, NULL));
5840   }
5841 
5842   /* compute prefixes */
5843   PetscCall(PetscStrncpy(dir_prefix, "", sizeof(dir_prefix)));
5844   PetscCall(PetscStrncpy(neu_prefix, "", sizeof(neu_prefix)));
5845   if (!pcbddc->current_level) {
5846     PetscCall(PetscStrncpy(dir_prefix, ((PetscObject)pc)->prefix, sizeof(dir_prefix)));
5847     PetscCall(PetscStrncpy(neu_prefix, ((PetscObject)pc)->prefix, sizeof(neu_prefix)));
5848     PetscCall(PetscStrlcat(dir_prefix, "pc_bddc_dirichlet_", sizeof(dir_prefix)));
5849     PetscCall(PetscStrlcat(neu_prefix, "pc_bddc_neumann_", sizeof(neu_prefix)));
5850   } else {
5851     PetscCall(PetscSNPrintf(str_level, sizeof(str_level), "l%d_", (int)pcbddc->current_level));
5852     PetscCall(PetscStrlen(((PetscObject)pc)->prefix, &len));
5853     len -= 15;                                /* remove "pc_bddc_coarse_" */
5854     if (pcbddc->current_level > 1) len -= 3;  /* remove "lX_" with X level number */
5855     if (pcbddc->current_level > 10) len -= 1; /* remove another char from level number */
5856     /* Nonstandard use of PetscStrncpy() to only copy a portion of the input string */
5857     PetscCall(PetscStrncpy(dir_prefix, ((PetscObject)pc)->prefix, len + 1));
5858     PetscCall(PetscStrncpy(neu_prefix, ((PetscObject)pc)->prefix, len + 1));
5859     PetscCall(PetscStrlcat(dir_prefix, "pc_bddc_dirichlet_", sizeof(dir_prefix)));
5860     PetscCall(PetscStrlcat(neu_prefix, "pc_bddc_neumann_", sizeof(neu_prefix)));
5861     PetscCall(PetscStrlcat(dir_prefix, str_level, sizeof(dir_prefix)));
5862     PetscCall(PetscStrlcat(neu_prefix, str_level, sizeof(neu_prefix)));
5863   }
5864 
5865   /* DIRICHLET PROBLEM */
5866   if (dirichlet) {
5867     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
5868     if (pcbddc->benign_n && !pcbddc->benign_change_explicit) {
5869       PetscCheck(sub_schurs && sub_schurs->reuse_solver, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not yet implemented");
5870       if (pcbddc->dbg_flag) {
5871         Mat A_IIn;
5872 
5873         PetscCall(PCBDDCBenignProject(pc, pcis->is_I_local, pcis->is_I_local, &A_IIn));
5874         PetscCall(MatDestroy(&pcis->A_II));
5875         pcis->A_II = A_IIn;
5876       }
5877     }
5878     PetscCall(MatIsSymmetricKnown(pcbddc->local_mat, &isset, &issym));
5879     if (isset) PetscCall(MatSetOption(pcis->A_II, MAT_SYMMETRIC, issym));
5880 
5881     /* Matrix for Dirichlet problem is pcis->A_II */
5882     n_D  = pcis->n - pcis->n_B;
5883     opts = PETSC_FALSE;
5884     if (!pcbddc->ksp_D) { /* create object if not yet build */
5885       opts = PETSC_TRUE;
5886       PetscCall(KSPCreate(PETSC_COMM_SELF, &pcbddc->ksp_D));
5887       PetscCall(KSPSetNestLevel(pcbddc->ksp_D, pc->kspnestlevel));
5888       PetscCall(PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D, (PetscObject)pc, 1));
5889       /* default */
5890       PetscCall(KSPSetType(pcbddc->ksp_D, KSPPREONLY));
5891       PetscCall(KSPSetOptionsPrefix(pcbddc->ksp_D, dir_prefix));
5892       PetscCall(PetscObjectTypeCompare((PetscObject)pcis->pA_II, MATSEQSBAIJ, &issbaij));
5893       PetscCall(KSPGetPC(pcbddc->ksp_D, &pc_temp));
5894       if (issbaij) {
5895         PetscCall(PCSetType(pc_temp, PCCHOLESKY));
5896       } else {
5897         PetscCall(PCSetType(pc_temp, PCLU));
5898       }
5899       PetscCall(KSPSetErrorIfNotConverged(pcbddc->ksp_D, pc->erroriffailure));
5900     }
5901     PetscCall(MatSetOptionsPrefix(pcis->pA_II, ((PetscObject)pcbddc->ksp_D)->prefix));
5902     PetscCall(MatViewFromOptions(pcis->pA_II, NULL, "-mat_view"));
5903     PetscCall(KSPSetOperators(pcbddc->ksp_D, pcis->A_II, pcis->pA_II));
5904     /* Allow user's customization */
5905     if (opts) PetscCall(KSPSetFromOptions(pcbddc->ksp_D));
5906     PetscCall(MatGetNearNullSpace(pcis->pA_II, &nnsp));
5907     if (pcbddc->NullSpace_corr[0] && !nnsp) { /* approximate solver, propagate NearNullSpace */
5908       PetscCall(MatNullSpacePropagateAny_Private(pcbddc->local_mat, pcis->is_I_local, pcis->pA_II));
5909     }
5910     PetscCall(MatGetNearNullSpace(pcis->pA_II, &nnsp));
5911     PetscCall(KSPGetPC(pcbddc->ksp_D, &pc_temp));
5912     PetscCall(PetscObjectQueryFunction((PetscObject)pc_temp, "PCSetCoordinates_C", &f));
5913     if (f && pcbddc->mat_graph->cloc && !nnsp) {
5914       PetscReal      *coords = pcbddc->mat_graph->coords, *scoords;
5915       const PetscInt *idxs;
5916       PetscInt        cdim = pcbddc->mat_graph->cdim, nl, i, d;
5917 
5918       PetscCall(ISGetLocalSize(pcis->is_I_local, &nl));
5919       PetscCall(ISGetIndices(pcis->is_I_local, &idxs));
5920       PetscCall(PetscMalloc1(nl * cdim, &scoords));
5921       for (i = 0; i < nl; i++) {
5922         for (d = 0; d < cdim; d++) scoords[i * cdim + d] = coords[idxs[i] * cdim + d];
5923       }
5924       PetscCall(ISRestoreIndices(pcis->is_I_local, &idxs));
5925       PetscCall(PCSetCoordinates(pc_temp, cdim, nl, scoords));
5926       PetscCall(PetscFree(scoords));
5927     }
5928     if (sub_schurs && sub_schurs->reuse_solver) {
5929       PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
5930 
5931       PetscCall(KSPSetPC(pcbddc->ksp_D, reuse_solver->interior_solver));
5932     }
5933 
5934     /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
5935     if (!n_D) {
5936       PetscCall(KSPGetPC(pcbddc->ksp_D, &pc_temp));
5937       PetscCall(PCSetType(pc_temp, PCNONE));
5938     }
5939     PetscCall(KSPSetUp(pcbddc->ksp_D));
5940     /* set ksp_D into pcis data */
5941     PetscCall(PetscObjectReference((PetscObject)pcbddc->ksp_D));
5942     PetscCall(KSPDestroy(&pcis->ksp_D));
5943     pcis->ksp_D = pcbddc->ksp_D;
5944   }
5945 
5946   /* NEUMANN PROBLEM */
5947   A_RR = NULL;
5948   if (neumann) {
5949     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
5950     PetscInt        ibs, mbs;
5951     PetscBool       issbaij, reuse_neumann_solver, isset, issym;
5952     Mat_IS         *matis = (Mat_IS *)pc->pmat->data;
5953 
5954     reuse_neumann_solver = PETSC_FALSE;
5955     if (sub_schurs && sub_schurs->reuse_solver) {
5956       IS iP;
5957 
5958       reuse_neumann_solver = PETSC_TRUE;
5959       PetscCall(PetscObjectQuery((PetscObject)sub_schurs->A, "__KSPFETIDP_iP", (PetscObject *)&iP));
5960       if (iP) reuse_neumann_solver = PETSC_FALSE;
5961     }
5962     /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */
5963     PetscCall(ISGetSize(pcbddc->is_R_local, &n_R));
5964     if (pcbddc->ksp_R) { /* already created ksp */
5965       PetscInt nn_R;
5966       PetscCall(KSPGetOperators(pcbddc->ksp_R, NULL, &A_RR));
5967       PetscCall(PetscObjectReference((PetscObject)A_RR));
5968       PetscCall(MatGetSize(A_RR, &nn_R, NULL));
5969       if (nn_R != n_R) { /* old ksp is not reusable, so reset it */
5970         PetscCall(KSPReset(pcbddc->ksp_R));
5971         PetscCall(MatDestroy(&A_RR));
5972         reuse = MAT_INITIAL_MATRIX;
5973       } else {                                /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */
5974         if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */
5975           PetscCall(MatDestroy(&A_RR));
5976           reuse = MAT_INITIAL_MATRIX;
5977         } else { /* safe to reuse the matrix */
5978           reuse = MAT_REUSE_MATRIX;
5979         }
5980       }
5981       /* last check */
5982       if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
5983         PetscCall(MatDestroy(&A_RR));
5984         reuse = MAT_INITIAL_MATRIX;
5985       }
5986     } else { /* first time, so we need to create the matrix */
5987       reuse = MAT_INITIAL_MATRIX;
5988     }
5989     /* convert pcbddc->local_mat if needed later in PCBDDCSetUpCorrection
5990        TODO: Get Rid of these conversions */
5991     PetscCall(MatGetBlockSize(pcbddc->local_mat, &mbs));
5992     PetscCall(ISGetBlockSize(pcbddc->is_R_local, &ibs));
5993     PetscCall(PetscObjectTypeCompare((PetscObject)pcbddc->local_mat, MATSEQSBAIJ, &issbaij));
5994     if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */
5995       if (matis->A == pcbddc->local_mat) {
5996         PetscCall(MatDestroy(&pcbddc->local_mat));
5997         PetscCall(MatConvert(matis->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &pcbddc->local_mat));
5998       } else {
5999         PetscCall(MatConvert(pcbddc->local_mat, MATSEQAIJ, MAT_INPLACE_MATRIX, &pcbddc->local_mat));
6000       }
6001     } else if (issbaij) { /* need to convert to BAIJ to get off-diagonal blocks */
6002       if (matis->A == pcbddc->local_mat) {
6003         PetscCall(MatDestroy(&pcbddc->local_mat));
6004         PetscCall(MatConvert(matis->A, mbs > 1 ? MATSEQBAIJ : MATSEQAIJ, MAT_INITIAL_MATRIX, &pcbddc->local_mat));
6005       } else {
6006         PetscCall(MatConvert(pcbddc->local_mat, mbs > 1 ? MATSEQBAIJ : MATSEQAIJ, MAT_INPLACE_MATRIX, &pcbddc->local_mat));
6007       }
6008     }
6009     /* extract A_RR */
6010     if (reuse_neumann_solver) {
6011       PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
6012 
6013       if (pcbddc->dbg_flag) { /* we need A_RR to test the solver later */
6014         PetscCall(MatDestroy(&A_RR));
6015         if (reuse_solver->benign_n) { /* we are not using the explicit change of basis on the pressures */
6016           PetscCall(PCBDDCBenignProject(pc, pcbddc->is_R_local, pcbddc->is_R_local, &A_RR));
6017         } else {
6018           PetscCall(MatCreateSubMatrix(pcbddc->local_mat, pcbddc->is_R_local, pcbddc->is_R_local, MAT_INITIAL_MATRIX, &A_RR));
6019         }
6020       } else {
6021         PetscCall(MatDestroy(&A_RR));
6022         PetscCall(PCGetOperators(reuse_solver->correction_solver, &A_RR, NULL));
6023         PetscCall(PetscObjectReference((PetscObject)A_RR));
6024       }
6025     } else { /* we have to build the neumann solver, so we need to extract the relevant matrix */
6026       PetscCall(MatCreateSubMatrix(pcbddc->local_mat, pcbddc->is_R_local, pcbddc->is_R_local, reuse, &A_RR));
6027     }
6028     PetscCall(MatIsSymmetricKnown(pcbddc->local_mat, &isset, &issym));
6029     if (isset) PetscCall(MatSetOption(A_RR, MAT_SYMMETRIC, issym));
6030     opts = PETSC_FALSE;
6031     if (!pcbddc->ksp_R) { /* create object if not present */
6032       opts = PETSC_TRUE;
6033       PetscCall(KSPCreate(PETSC_COMM_SELF, &pcbddc->ksp_R));
6034       PetscCall(KSPSetNestLevel(pcbddc->ksp_R, pc->kspnestlevel));
6035       PetscCall(PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R, (PetscObject)pc, 1));
6036       /* default */
6037       PetscCall(KSPSetType(pcbddc->ksp_R, KSPPREONLY));
6038       PetscCall(KSPSetOptionsPrefix(pcbddc->ksp_R, neu_prefix));
6039       PetscCall(KSPGetPC(pcbddc->ksp_R, &pc_temp));
6040       PetscCall(PetscObjectTypeCompare((PetscObject)A_RR, MATSEQSBAIJ, &issbaij));
6041       if (issbaij) {
6042         PetscCall(PCSetType(pc_temp, PCCHOLESKY));
6043       } else {
6044         PetscCall(PCSetType(pc_temp, PCLU));
6045       }
6046       PetscCall(KSPSetErrorIfNotConverged(pcbddc->ksp_R, pc->erroriffailure));
6047     }
6048     PetscCall(MatSetOptionsPrefix(A_RR, ((PetscObject)pcbddc->ksp_R)->prefix));
6049     PetscCall(MatViewFromOptions(A_RR, NULL, "-mat_view"));
6050     PetscCall(KSPSetOperators(pcbddc->ksp_R, A_RR, A_RR));
6051     if (opts) { /* Allow user's customization once */
6052       PetscCall(KSPSetFromOptions(pcbddc->ksp_R));
6053     }
6054     PetscCall(MatGetNearNullSpace(A_RR, &nnsp));
6055     if (pcbddc->NullSpace_corr[2] && !nnsp) { /* approximate solver, propagate NearNullSpace */
6056       PetscCall(MatNullSpacePropagateAny_Private(pcbddc->local_mat, pcbddc->is_R_local, A_RR));
6057     }
6058     PetscCall(MatGetNearNullSpace(A_RR, &nnsp));
6059     PetscCall(KSPGetPC(pcbddc->ksp_R, &pc_temp));
6060     PetscCall(PetscObjectQueryFunction((PetscObject)pc_temp, "PCSetCoordinates_C", &f));
6061     if (f && pcbddc->mat_graph->cloc && !nnsp) {
6062       PetscReal      *coords = pcbddc->mat_graph->coords, *scoords;
6063       const PetscInt *idxs;
6064       PetscInt        cdim = pcbddc->mat_graph->cdim, nl, i, d;
6065 
6066       PetscCall(ISGetLocalSize(pcbddc->is_R_local, &nl));
6067       PetscCall(ISGetIndices(pcbddc->is_R_local, &idxs));
6068       PetscCall(PetscMalloc1(nl * cdim, &scoords));
6069       for (i = 0; i < nl; i++) {
6070         for (d = 0; d < cdim; d++) scoords[i * cdim + d] = coords[idxs[i] * cdim + d];
6071       }
6072       PetscCall(ISRestoreIndices(pcbddc->is_R_local, &idxs));
6073       PetscCall(PCSetCoordinates(pc_temp, cdim, nl, scoords));
6074       PetscCall(PetscFree(scoords));
6075     }
6076 
6077     /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
6078     if (!n_R) {
6079       PetscCall(KSPGetPC(pcbddc->ksp_R, &pc_temp));
6080       PetscCall(PCSetType(pc_temp, PCNONE));
6081     }
6082     /* Reuse solver if it is present */
6083     if (reuse_neumann_solver) {
6084       PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
6085 
6086       PetscCall(KSPSetPC(pcbddc->ksp_R, reuse_solver->correction_solver));
6087     }
6088     PetscCall(KSPSetUp(pcbddc->ksp_R));
6089   }
6090 
6091   if (pcbddc->dbg_flag) {
6092     PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
6093     PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer));
6094     PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n"));
6095   }
6096   PetscCall(PetscLogEventEnd(PC_BDDC_LocalSolvers[pcbddc->current_level], pc, 0, 0, 0));
6097 
6098   /* adapt Dirichlet and Neumann solvers if a nullspace correction has been requested */
6099   if (pcbddc->NullSpace_corr[0]) PetscCall(PCBDDCSetUseExactDirichlet(pc, PETSC_FALSE));
6100   if (dirichlet && pcbddc->NullSpace_corr[0] && !pcbddc->switch_static) PetscCall(PCBDDCNullSpaceAssembleCorrection(pc, PETSC_TRUE, pcbddc->NullSpace_corr[1]));
6101   if (neumann && pcbddc->NullSpace_corr[2]) PetscCall(PCBDDCNullSpaceAssembleCorrection(pc, PETSC_FALSE, pcbddc->NullSpace_corr[3]));
6102   /* check Dirichlet and Neumann solvers */
6103   if (pcbddc->dbg_flag) {
6104     if (dirichlet) { /* Dirichlet */
6105       PetscCall(VecSetRandom(pcis->vec1_D, NULL));
6106       PetscCall(MatMult(pcis->A_II, pcis->vec1_D, pcis->vec2_D));
6107       PetscCall(KSPSolve(pcbddc->ksp_D, pcis->vec2_D, pcis->vec2_D));
6108       PetscCall(KSPCheckSolve(pcbddc->ksp_D, pc, pcis->vec2_D));
6109       PetscCall(VecAXPY(pcis->vec1_D, m_one, pcis->vec2_D));
6110       PetscCall(VecNorm(pcis->vec1_D, NORM_INFINITY, &value));
6111       PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n", PetscGlobalRank, ((PetscObject)pcbddc->ksp_D)->prefix, (double)value));
6112       PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
6113     }
6114     if (neumann) { /* Neumann */
6115       PetscCall(VecSetRandom(pcbddc->vec1_R, NULL));
6116       PetscCall(MatMult(A_RR, pcbddc->vec1_R, pcbddc->vec2_R));
6117       PetscCall(KSPSolve(pcbddc->ksp_R, pcbddc->vec2_R, pcbddc->vec2_R));
6118       PetscCall(KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec2_R));
6119       PetscCall(VecAXPY(pcbddc->vec1_R, m_one, pcbddc->vec2_R));
6120       PetscCall(VecNorm(pcbddc->vec1_R, NORM_INFINITY, &value));
6121       PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n", PetscGlobalRank, ((PetscObject)pcbddc->ksp_R)->prefix, (double)value));
6122       PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
6123     }
6124   }
6125   /* free Neumann problem's matrix */
6126   PetscCall(MatDestroy(&A_RR));
6127   PetscFunctionReturn(PETSC_SUCCESS);
6128 }
6129 
6130 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose)
6131 {
6132   PC_BDDC        *pcbddc       = (PC_BDDC *)pc->data;
6133   PCBDDCSubSchurs sub_schurs   = pcbddc->sub_schurs;
6134   PetscBool       reuse_solver = sub_schurs ? (sub_schurs->reuse_solver ? PETSC_TRUE : PETSC_FALSE) : PETSC_FALSE;
6135 
6136   PetscFunctionBegin;
6137   if (!reuse_solver) PetscCall(VecSet(pcbddc->vec1_R, 0.));
6138   if (!pcbddc->switch_static) {
6139     if (applytranspose && pcbddc->local_auxmat1) {
6140       PetscCall(MatMultTranspose(pcbddc->local_auxmat2, inout_B, pcbddc->vec1_C));
6141       PetscCall(MatMultTransposeAdd(pcbddc->local_auxmat1, pcbddc->vec1_C, inout_B, inout_B));
6142     }
6143     if (!reuse_solver) {
6144       PetscCall(VecScatterBegin(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE));
6145       PetscCall(VecScatterEnd(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE));
6146     } else {
6147       PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
6148 
6149       PetscCall(VecScatterBegin(reuse_solver->correction_scatter_B, inout_B, reuse_solver->rhs_B, INSERT_VALUES, SCATTER_FORWARD));
6150       PetscCall(VecScatterEnd(reuse_solver->correction_scatter_B, inout_B, reuse_solver->rhs_B, INSERT_VALUES, SCATTER_FORWARD));
6151     }
6152   } else {
6153     PetscCall(VecScatterBegin(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE));
6154     PetscCall(VecScatterEnd(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE));
6155     PetscCall(VecScatterBegin(pcbddc->R_to_D, inout_D, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE));
6156     PetscCall(VecScatterEnd(pcbddc->R_to_D, inout_D, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE));
6157     if (applytranspose && pcbddc->local_auxmat1) {
6158       PetscCall(MatMultTranspose(pcbddc->local_auxmat2, pcbddc->vec1_R, pcbddc->vec1_C));
6159       PetscCall(MatMultTransposeAdd(pcbddc->local_auxmat1, pcbddc->vec1_C, inout_B, inout_B));
6160       PetscCall(VecScatterBegin(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE));
6161       PetscCall(VecScatterEnd(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE));
6162     }
6163   }
6164   PetscCall(PetscLogEventBegin(PC_BDDC_Solves[pcbddc->current_level][1], pc, 0, 0, 0));
6165   if (!reuse_solver || pcbddc->switch_static) {
6166     if (applytranspose) {
6167       PetscCall(KSPSolveTranspose(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec1_R));
6168     } else {
6169       PetscCall(KSPSolve(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec1_R));
6170     }
6171     PetscCall(KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec1_R));
6172   } else {
6173     PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
6174 
6175     if (applytranspose) {
6176       PetscCall(MatFactorSolveSchurComplementTranspose(reuse_solver->F, reuse_solver->rhs_B, reuse_solver->sol_B));
6177     } else {
6178       PetscCall(MatFactorSolveSchurComplement(reuse_solver->F, reuse_solver->rhs_B, reuse_solver->sol_B));
6179     }
6180   }
6181   PetscCall(PetscLogEventEnd(PC_BDDC_Solves[pcbddc->current_level][1], pc, 0, 0, 0));
6182   PetscCall(VecSet(inout_B, 0.));
6183   if (!pcbddc->switch_static) {
6184     if (!reuse_solver) {
6185       PetscCall(VecScatterBegin(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD));
6186       PetscCall(VecScatterEnd(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD));
6187     } else {
6188       PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
6189 
6190       PetscCall(VecScatterBegin(reuse_solver->correction_scatter_B, reuse_solver->sol_B, inout_B, INSERT_VALUES, SCATTER_REVERSE));
6191       PetscCall(VecScatterEnd(reuse_solver->correction_scatter_B, reuse_solver->sol_B, inout_B, INSERT_VALUES, SCATTER_REVERSE));
6192     }
6193     if (!applytranspose && pcbddc->local_auxmat1) {
6194       PetscCall(MatMult(pcbddc->local_auxmat1, inout_B, pcbddc->vec1_C));
6195       PetscCall(MatMultAdd(pcbddc->local_auxmat2, pcbddc->vec1_C, inout_B, inout_B));
6196     }
6197   } else {
6198     PetscCall(VecScatterBegin(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD));
6199     PetscCall(VecScatterEnd(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD));
6200     PetscCall(VecScatterBegin(pcbddc->R_to_D, pcbddc->vec1_R, inout_D, INSERT_VALUES, SCATTER_FORWARD));
6201     PetscCall(VecScatterEnd(pcbddc->R_to_D, pcbddc->vec1_R, inout_D, INSERT_VALUES, SCATTER_FORWARD));
6202     if (!applytranspose && pcbddc->local_auxmat1) {
6203       PetscCall(MatMult(pcbddc->local_auxmat1, inout_B, pcbddc->vec1_C));
6204       PetscCall(MatMultAdd(pcbddc->local_auxmat2, pcbddc->vec1_C, pcbddc->vec1_R, pcbddc->vec1_R));
6205     }
6206     PetscCall(VecScatterBegin(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD));
6207     PetscCall(VecScatterEnd(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD));
6208     PetscCall(VecScatterBegin(pcbddc->R_to_D, pcbddc->vec1_R, inout_D, INSERT_VALUES, SCATTER_FORWARD));
6209     PetscCall(VecScatterEnd(pcbddc->R_to_D, pcbddc->vec1_R, inout_D, INSERT_VALUES, SCATTER_FORWARD));
6210   }
6211   PetscFunctionReturn(PETSC_SUCCESS);
6212 }
6213 
6214 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */
6215 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose)
6216 {
6217   PC_BDDC          *pcbddc = (PC_BDDC *)pc->data;
6218   PC_IS            *pcis   = (PC_IS *)pc->data;
6219   const PetscScalar zero   = 0.0;
6220 
6221   PetscFunctionBegin;
6222   /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */
6223   if (!pcbddc->benign_apply_coarse_only) {
6224     if (applytranspose) {
6225       PetscCall(MatMultTranspose(pcbddc->coarse_phi_B, pcis->vec1_B, pcbddc->vec1_P));
6226       if (pcbddc->switch_static) PetscCall(MatMultTransposeAdd(pcbddc->coarse_phi_D, pcis->vec1_D, pcbddc->vec1_P, pcbddc->vec1_P));
6227     } else {
6228       PetscCall(MatMultTranspose(pcbddc->coarse_psi_B, pcis->vec1_B, pcbddc->vec1_P));
6229       if (pcbddc->switch_static) PetscCall(MatMultTransposeAdd(pcbddc->coarse_psi_D, pcis->vec1_D, pcbddc->vec1_P, pcbddc->vec1_P));
6230     }
6231   } else {
6232     PetscCall(VecSet(pcbddc->vec1_P, zero));
6233   }
6234 
6235   /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */
6236   if (pcbddc->benign_n) {
6237     PetscScalar *array;
6238     PetscInt     j;
6239 
6240     PetscCall(VecGetArray(pcbddc->vec1_P, &array));
6241     for (j = 0; j < pcbddc->benign_n; j++) array[pcbddc->local_primal_size - pcbddc->benign_n + j] += pcbddc->benign_p0[j];
6242     PetscCall(VecRestoreArray(pcbddc->vec1_P, &array));
6243   }
6244 
6245   /* start communications from local primal nodes to rhs of coarse solver */
6246   PetscCall(VecSet(pcbddc->coarse_vec, zero));
6247   PetscCall(PCBDDCScatterCoarseDataBegin(pc, ADD_VALUES, SCATTER_FORWARD));
6248   PetscCall(PCBDDCScatterCoarseDataEnd(pc, ADD_VALUES, SCATTER_FORWARD));
6249 
6250   /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */
6251   PetscCall(PetscLogEventBegin(PC_BDDC_Solves[pcbddc->current_level][2], pc, 0, 0, 0));
6252   if (pcbddc->coarse_ksp) {
6253     Mat          coarse_mat;
6254     Vec          rhs, sol;
6255     MatNullSpace nullsp;
6256     PetscBool    isbddc = PETSC_FALSE;
6257 
6258     if (pcbddc->benign_have_null) {
6259       PC coarse_pc;
6260 
6261       PetscCall(KSPGetPC(pcbddc->coarse_ksp, &coarse_pc));
6262       PetscCall(PetscObjectTypeCompare((PetscObject)coarse_pc, PCBDDC, &isbddc));
6263       /* we need to propagate to coarser levels the need for a possible benign correction */
6264       if (isbddc && pcbddc->benign_apply_coarse_only && !pcbddc->benign_skip_correction) {
6265         PC_BDDC *coarsepcbddc                  = (PC_BDDC *)coarse_pc->data;
6266         coarsepcbddc->benign_skip_correction   = PETSC_FALSE;
6267         coarsepcbddc->benign_apply_coarse_only = PETSC_TRUE;
6268       }
6269     }
6270     PetscCall(KSPGetRhs(pcbddc->coarse_ksp, &rhs));
6271     PetscCall(KSPGetSolution(pcbddc->coarse_ksp, &sol));
6272     PetscCall(KSPGetOperators(pcbddc->coarse_ksp, &coarse_mat, NULL));
6273     if (applytranspose) {
6274       PetscCheck(!pcbddc->benign_apply_coarse_only, PetscObjectComm((PetscObject)pcbddc->coarse_ksp), PETSC_ERR_SUP, "Not yet implemented");
6275       PetscCall(KSPSolveTranspose(pcbddc->coarse_ksp, rhs, sol));
6276       PetscCall(KSPCheckSolve(pcbddc->coarse_ksp, pc, sol));
6277       PetscCall(MatGetTransposeNullSpace(coarse_mat, &nullsp));
6278       if (nullsp) PetscCall(MatNullSpaceRemove(nullsp, sol));
6279     } else {
6280       PetscCall(MatGetNullSpace(coarse_mat, &nullsp));
6281       if (pcbddc->benign_apply_coarse_only && isbddc) { /* need just to apply the coarse preconditioner during presolve */
6282         PC coarse_pc;
6283 
6284         if (nullsp) PetscCall(MatNullSpaceRemove(nullsp, rhs));
6285         PetscCall(KSPGetPC(pcbddc->coarse_ksp, &coarse_pc));
6286         PetscCall(PCPreSolve(coarse_pc, pcbddc->coarse_ksp));
6287         PetscCall(PCBDDCBenignRemoveInterior(coarse_pc, rhs, sol));
6288         PetscCall(PCPostSolve(coarse_pc, pcbddc->coarse_ksp));
6289       } else {
6290         PetscCall(KSPSolve(pcbddc->coarse_ksp, rhs, sol));
6291         PetscCall(KSPCheckSolve(pcbddc->coarse_ksp, pc, sol));
6292         if (nullsp) PetscCall(MatNullSpaceRemove(nullsp, sol));
6293       }
6294     }
6295     /* we don't need the benign correction at coarser levels anymore */
6296     if (pcbddc->benign_have_null && isbddc) {
6297       PC       coarse_pc;
6298       PC_BDDC *coarsepcbddc;
6299 
6300       PetscCall(KSPGetPC(pcbddc->coarse_ksp, &coarse_pc));
6301       coarsepcbddc                           = (PC_BDDC *)coarse_pc->data;
6302       coarsepcbddc->benign_skip_correction   = PETSC_TRUE;
6303       coarsepcbddc->benign_apply_coarse_only = PETSC_FALSE;
6304     }
6305   }
6306   PetscCall(PetscLogEventEnd(PC_BDDC_Solves[pcbddc->current_level][2], pc, 0, 0, 0));
6307 
6308   /* Local solution on R nodes */
6309   if (!pcbddc->benign_apply_coarse_only) PetscCall(PCBDDCSolveSubstructureCorrection(pc, pcis->vec1_B, pcis->vec1_D, applytranspose));
6310   /* communications from coarse sol to local primal nodes */
6311   PetscCall(PCBDDCScatterCoarseDataBegin(pc, INSERT_VALUES, SCATTER_REVERSE));
6312   PetscCall(PCBDDCScatterCoarseDataEnd(pc, INSERT_VALUES, SCATTER_REVERSE));
6313 
6314   /* Sum contributions from the two levels */
6315   if (!pcbddc->benign_apply_coarse_only) {
6316     if (applytranspose) {
6317       PetscCall(MatMultAdd(pcbddc->coarse_psi_B, pcbddc->vec1_P, pcis->vec1_B, pcis->vec1_B));
6318       if (pcbddc->switch_static) PetscCall(MatMultAdd(pcbddc->coarse_psi_D, pcbddc->vec1_P, pcis->vec1_D, pcis->vec1_D));
6319     } else {
6320       PetscCall(MatMultAdd(pcbddc->coarse_phi_B, pcbddc->vec1_P, pcis->vec1_B, pcis->vec1_B));
6321       if (pcbddc->switch_static) PetscCall(MatMultAdd(pcbddc->coarse_phi_D, pcbddc->vec1_P, pcis->vec1_D, pcis->vec1_D));
6322     }
6323     /* store p0 */
6324     if (pcbddc->benign_n) {
6325       PetscScalar *array;
6326       PetscInt     j;
6327 
6328       PetscCall(VecGetArray(pcbddc->vec1_P, &array));
6329       for (j = 0; j < pcbddc->benign_n; j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size - pcbddc->benign_n + j];
6330       PetscCall(VecRestoreArray(pcbddc->vec1_P, &array));
6331     }
6332   } else { /* expand the coarse solution */
6333     if (applytranspose) {
6334       PetscCall(MatMult(pcbddc->coarse_psi_B, pcbddc->vec1_P, pcis->vec1_B));
6335     } else {
6336       PetscCall(MatMult(pcbddc->coarse_phi_B, pcbddc->vec1_P, pcis->vec1_B));
6337     }
6338   }
6339   PetscFunctionReturn(PETSC_SUCCESS);
6340 }
6341 
6342 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc, InsertMode imode, ScatterMode smode)
6343 {
6344   PC_BDDC           *pcbddc = (PC_BDDC *)pc->data;
6345   Vec                from, to;
6346   const PetscScalar *array;
6347 
6348   PetscFunctionBegin;
6349   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
6350     from = pcbddc->coarse_vec;
6351     to   = pcbddc->vec1_P;
6352     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
6353       Vec tvec;
6354 
6355       PetscCall(KSPGetRhs(pcbddc->coarse_ksp, &tvec));
6356       PetscCall(VecResetArray(tvec));
6357       PetscCall(KSPGetSolution(pcbddc->coarse_ksp, &tvec));
6358       PetscCall(VecGetArrayRead(tvec, &array));
6359       PetscCall(VecPlaceArray(from, array));
6360       PetscCall(VecRestoreArrayRead(tvec, &array));
6361     }
6362   } else { /* from local to global -> put data in coarse right-hand side */
6363     from = pcbddc->vec1_P;
6364     to   = pcbddc->coarse_vec;
6365   }
6366   PetscCall(VecScatterBegin(pcbddc->coarse_loc_to_glob, from, to, imode, smode));
6367   PetscFunctionReturn(PETSC_SUCCESS);
6368 }
6369 
6370 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode)
6371 {
6372   PC_BDDC           *pcbddc = (PC_BDDC *)pc->data;
6373   Vec                from, to;
6374   const PetscScalar *array;
6375 
6376   PetscFunctionBegin;
6377   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
6378     from = pcbddc->coarse_vec;
6379     to   = pcbddc->vec1_P;
6380   } else { /* from local to global -> put data in coarse right-hand side */
6381     from = pcbddc->vec1_P;
6382     to   = pcbddc->coarse_vec;
6383   }
6384   PetscCall(VecScatterEnd(pcbddc->coarse_loc_to_glob, from, to, imode, smode));
6385   if (smode == SCATTER_FORWARD) {
6386     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
6387       Vec tvec;
6388 
6389       PetscCall(KSPGetRhs(pcbddc->coarse_ksp, &tvec));
6390       PetscCall(VecGetArrayRead(to, &array));
6391       PetscCall(VecPlaceArray(tvec, array));
6392       PetscCall(VecRestoreArrayRead(to, &array));
6393     }
6394   } else {
6395     if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */
6396       PetscCall(VecResetArray(from));
6397     }
6398   }
6399   PetscFunctionReturn(PETSC_SUCCESS);
6400 }
6401 
6402 PetscErrorCode PCBDDCConstraintsSetUp(PC pc)
6403 {
6404   PC_IS   *pcis   = (PC_IS *)pc->data;
6405   PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
6406   Mat_IS  *matis  = (Mat_IS *)pc->pmat->data;
6407   /* one and zero */
6408   PetscScalar one = 1.0, zero = 0.0;
6409   /* space to store constraints and their local indices */
6410   PetscScalar *constraints_data;
6411   PetscInt    *constraints_idxs, *constraints_idxs_B;
6412   PetscInt    *constraints_idxs_ptr, *constraints_data_ptr;
6413   PetscInt    *constraints_n;
6414   /* iterators */
6415   PetscInt i, j, k, total_counts, total_counts_cc, cum;
6416   /* BLAS integers */
6417   PetscBLASInt lwork, lierr;
6418   PetscBLASInt Blas_N, Blas_M, Blas_K, Blas_one = 1;
6419   PetscBLASInt Blas_LDA, Blas_LDB, Blas_LDC;
6420   /* reuse */
6421   PetscInt  olocal_primal_size, olocal_primal_size_cc;
6422   PetscInt *olocal_primal_ref_node, *olocal_primal_ref_mult;
6423   /* change of basis */
6424   PetscBool qr_needed;
6425   PetscBT   change_basis, qr_needed_idx;
6426   /* auxiliary stuff */
6427   PetscInt *nnz, *is_indices;
6428   PetscInt  ncc;
6429   /* some quantities */
6430   PetscInt  n_vertices, total_primal_vertices, valid_constraints;
6431   PetscInt  size_of_constraint, max_size_of_constraint = 0, max_constraints, temp_constraints;
6432   PetscReal tol; /* tolerance for retaining eigenmodes */
6433 
6434   PetscFunctionBegin;
6435   tol = PetscSqrtReal(PETSC_SMALL);
6436   /* Destroy Mat objects computed previously */
6437   PetscCall(MatDestroy(&pcbddc->ChangeOfBasisMatrix));
6438   PetscCall(MatDestroy(&pcbddc->ConstraintMatrix));
6439   PetscCall(MatDestroy(&pcbddc->switch_static_change));
6440   /* save info on constraints from previous setup (if any) */
6441   olocal_primal_size    = pcbddc->local_primal_size;
6442   olocal_primal_size_cc = pcbddc->local_primal_size_cc;
6443   PetscCall(PetscMalloc2(olocal_primal_size_cc, &olocal_primal_ref_node, olocal_primal_size_cc, &olocal_primal_ref_mult));
6444   PetscCall(PetscArraycpy(olocal_primal_ref_node, pcbddc->local_primal_ref_node, olocal_primal_size_cc));
6445   PetscCall(PetscArraycpy(olocal_primal_ref_mult, pcbddc->local_primal_ref_mult, olocal_primal_size_cc));
6446   PetscCall(PetscFree2(pcbddc->local_primal_ref_node, pcbddc->local_primal_ref_mult));
6447   PetscCall(PetscFree(pcbddc->primal_indices_local_idxs));
6448 
6449   if (!pcbddc->adaptive_selection) {
6450     IS           ISForVertices, *ISForFaces, *ISForEdges;
6451     MatNullSpace nearnullsp;
6452     const Vec   *nearnullvecs;
6453     Vec         *localnearnullsp;
6454     PetscScalar *array;
6455     PetscInt     n_ISForFaces, n_ISForEdges, nnsp_size, o_nf, o_ne;
6456     PetscBool    nnsp_has_cnst;
6457     /* LAPACK working arrays for SVD or POD */
6458     PetscBool    skip_lapack, boolforchange;
6459     PetscScalar *work;
6460     PetscReal   *singular_vals;
6461 #if defined(PETSC_USE_COMPLEX)
6462     PetscReal *rwork;
6463 #endif
6464     PetscScalar *temp_basis = NULL, *correlation_mat = NULL;
6465     PetscBLASInt dummy_int    = 1;
6466     PetscScalar  dummy_scalar = 1.;
6467     PetscBool    use_pod      = PETSC_FALSE;
6468 
6469     /* MKL SVD with same input gives different results on different processes! */
6470 #if defined(PETSC_MISSING_LAPACK_GESVD) || defined(PETSC_HAVE_MKL_LIBS)
6471     use_pod = PETSC_TRUE;
6472 #endif
6473     /* Get index sets for faces, edges and vertices from graph */
6474     PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, &n_ISForFaces, &ISForFaces, &n_ISForEdges, &ISForEdges, &ISForVertices));
6475     o_nf       = n_ISForFaces;
6476     o_ne       = n_ISForEdges;
6477     n_vertices = 0;
6478     if (ISForVertices) PetscCall(ISGetSize(ISForVertices, &n_vertices));
6479     /* print some info */
6480     if (pcbddc->dbg_flag && (!pcbddc->sub_schurs || pcbddc->sub_schurs_rebuild)) {
6481       if (!pcbddc->dbg_viewer) pcbddc->dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pc));
6482       PetscCall(PCBDDCGraphASCIIView(pcbddc->mat_graph, pcbddc->dbg_flag, pcbddc->dbg_viewer));
6483       PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer));
6484       PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "--------------------------------------------------------------\n"));
6485       PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate vertices (%d)\n", PetscGlobalRank, n_vertices, pcbddc->use_vertices));
6486       PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate edges    (%d)\n", PetscGlobalRank, n_ISForEdges, pcbddc->use_edges));
6487       PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate faces    (%d)\n", PetscGlobalRank, n_ISForFaces, pcbddc->use_faces));
6488       PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
6489       PetscCall(PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer));
6490     }
6491 
6492     if (!pcbddc->use_vertices) n_vertices = 0;
6493     if (!pcbddc->use_edges) n_ISForEdges = 0;
6494     if (!pcbddc->use_faces) n_ISForFaces = 0;
6495 
6496     /* check if near null space is attached to global mat */
6497     if (pcbddc->use_nnsp) {
6498       PetscCall(MatGetNearNullSpace(pc->pmat, &nearnullsp));
6499     } else nearnullsp = NULL;
6500 
6501     if (nearnullsp) {
6502       PetscCall(MatNullSpaceGetVecs(nearnullsp, &nnsp_has_cnst, &nnsp_size, &nearnullvecs));
6503       /* remove any stored info */
6504       PetscCall(MatNullSpaceDestroy(&pcbddc->onearnullspace));
6505       PetscCall(PetscFree(pcbddc->onearnullvecs_state));
6506       /* store information for BDDC solver reuse */
6507       PetscCall(PetscObjectReference((PetscObject)nearnullsp));
6508       pcbddc->onearnullspace = nearnullsp;
6509       PetscCall(PetscMalloc1(nnsp_size, &pcbddc->onearnullvecs_state));
6510       for (i = 0; i < nnsp_size; i++) PetscCall(PetscObjectStateGet((PetscObject)nearnullvecs[i], &pcbddc->onearnullvecs_state[i]));
6511     } else { /* if near null space is not provided BDDC uses constants by default */
6512       nnsp_size     = 0;
6513       nnsp_has_cnst = PETSC_TRUE;
6514     }
6515     /* get max number of constraints on a single cc */
6516     max_constraints = nnsp_size;
6517     if (nnsp_has_cnst) max_constraints++;
6518 
6519     /*
6520          Evaluate maximum storage size needed by the procedure
6521          - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]"
6522          - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]"
6523          There can be multiple constraints per connected component
6524                                                                                                                                                            */
6525     ncc = n_vertices + n_ISForFaces + n_ISForEdges;
6526     PetscCall(PetscMalloc3(ncc + 1, &constraints_idxs_ptr, ncc + 1, &constraints_data_ptr, ncc, &constraints_n));
6527 
6528     total_counts = n_ISForFaces + n_ISForEdges;
6529     total_counts *= max_constraints;
6530     total_counts += n_vertices;
6531     PetscCall(PetscBTCreate(total_counts, &change_basis));
6532 
6533     total_counts           = 0;
6534     max_size_of_constraint = 0;
6535     for (i = 0; i < n_ISForEdges + n_ISForFaces; i++) {
6536       IS used_is;
6537       if (i < n_ISForEdges) {
6538         used_is = ISForEdges[i];
6539       } else {
6540         used_is = ISForFaces[i - n_ISForEdges];
6541       }
6542       PetscCall(ISGetSize(used_is, &j));
6543       total_counts += j;
6544       max_size_of_constraint = PetscMax(j, max_size_of_constraint);
6545     }
6546     PetscCall(PetscMalloc3(total_counts * max_constraints + n_vertices, &constraints_data, total_counts + n_vertices, &constraints_idxs, total_counts + n_vertices, &constraints_idxs_B));
6547 
6548     /* get local part of global near null space vectors */
6549     PetscCall(PetscMalloc1(nnsp_size, &localnearnullsp));
6550     for (k = 0; k < nnsp_size; k++) {
6551       PetscCall(VecDuplicate(pcis->vec1_N, &localnearnullsp[k]));
6552       PetscCall(VecScatterBegin(matis->rctx, nearnullvecs[k], localnearnullsp[k], INSERT_VALUES, SCATTER_FORWARD));
6553       PetscCall(VecScatterEnd(matis->rctx, nearnullvecs[k], localnearnullsp[k], INSERT_VALUES, SCATTER_FORWARD));
6554     }
6555 
6556     /* whether or not to skip lapack calls */
6557     skip_lapack = PETSC_TRUE;
6558     if (n_ISForFaces + n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE;
6559 
6560     /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */
6561     if (!skip_lapack) {
6562       PetscScalar temp_work;
6563 
6564       if (use_pod) {
6565         /* Proper Orthogonal Decomposition (POD) using the snapshot method */
6566         PetscCall(PetscMalloc1(max_constraints * max_constraints, &correlation_mat));
6567         PetscCall(PetscMalloc1(max_constraints, &singular_vals));
6568         PetscCall(PetscMalloc1(max_size_of_constraint * max_constraints, &temp_basis));
6569 #if defined(PETSC_USE_COMPLEX)
6570         PetscCall(PetscMalloc1(3 * max_constraints, &rwork));
6571 #endif
6572         /* now we evaluate the optimal workspace using query with lwork=-1 */
6573         PetscCall(PetscBLASIntCast(max_constraints, &Blas_N));
6574         PetscCall(PetscBLASIntCast(max_constraints, &Blas_LDA));
6575         lwork = -1;
6576         PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
6577 #if !defined(PETSC_USE_COMPLEX)
6578         PetscCallBLAS("LAPACKsyev", LAPACKsyev_("V", "U", &Blas_N, correlation_mat, &Blas_LDA, singular_vals, &temp_work, &lwork, &lierr));
6579 #else
6580         PetscCallBLAS("LAPACKsyev", LAPACKsyev_("V", "U", &Blas_N, correlation_mat, &Blas_LDA, singular_vals, &temp_work, &lwork, rwork, &lierr));
6581 #endif
6582         PetscCall(PetscFPTrapPop());
6583         PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in query to SYEV Lapack routine %d", (int)lierr);
6584       } else {
6585 #if !defined(PETSC_MISSING_LAPACK_GESVD)
6586         /* SVD */
6587         PetscInt max_n, min_n;
6588         max_n = max_size_of_constraint;
6589         min_n = max_constraints;
6590         if (max_size_of_constraint < max_constraints) {
6591           min_n = max_size_of_constraint;
6592           max_n = max_constraints;
6593         }
6594         PetscCall(PetscMalloc1(min_n, &singular_vals));
6595   #if defined(PETSC_USE_COMPLEX)
6596         PetscCall(PetscMalloc1(5 * min_n, &rwork));
6597   #endif
6598         /* now we evaluate the optimal workspace using query with lwork=-1 */
6599         lwork = -1;
6600         PetscCall(PetscBLASIntCast(max_n, &Blas_M));
6601         PetscCall(PetscBLASIntCast(min_n, &Blas_N));
6602         PetscCall(PetscBLASIntCast(max_n, &Blas_LDA));
6603         PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
6604   #if !defined(PETSC_USE_COMPLEX)
6605         PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("O", "N", &Blas_M, &Blas_N, &constraints_data[0], &Blas_LDA, singular_vals, &dummy_scalar, &dummy_int, &dummy_scalar, &dummy_int, &temp_work, &lwork, &lierr));
6606   #else
6607         PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("O", "N", &Blas_M, &Blas_N, &constraints_data[0], &Blas_LDA, singular_vals, &dummy_scalar, &dummy_int, &dummy_scalar, &dummy_int, &temp_work, &lwork, rwork, &lierr));
6608   #endif
6609         PetscCall(PetscFPTrapPop());
6610         PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in query to GESVD Lapack routine %d", (int)lierr);
6611 #else
6612         SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "This should not happen");
6613 #endif /* on missing GESVD */
6614       }
6615       /* Allocate optimal workspace */
6616       PetscCall(PetscBLASIntCast((PetscInt)PetscRealPart(temp_work), &lwork));
6617       PetscCall(PetscMalloc1(lwork, &work));
6618     }
6619     /* Now we can loop on constraining sets */
6620     total_counts            = 0;
6621     constraints_idxs_ptr[0] = 0;
6622     constraints_data_ptr[0] = 0;
6623     /* vertices */
6624     if (n_vertices) {
6625       PetscCall(ISGetIndices(ISForVertices, (const PetscInt **)&is_indices));
6626       PetscCall(PetscArraycpy(constraints_idxs, is_indices, n_vertices));
6627       for (i = 0; i < n_vertices; i++) {
6628         constraints_n[total_counts]            = 1;
6629         constraints_data[total_counts]         = 1.0;
6630         constraints_idxs_ptr[total_counts + 1] = constraints_idxs_ptr[total_counts] + 1;
6631         constraints_data_ptr[total_counts + 1] = constraints_data_ptr[total_counts] + 1;
6632         total_counts++;
6633       }
6634       PetscCall(ISRestoreIndices(ISForVertices, (const PetscInt **)&is_indices));
6635     }
6636 
6637     /* edges and faces */
6638     total_counts_cc = total_counts;
6639     for (ncc = 0; ncc < n_ISForEdges + n_ISForFaces; ncc++) {
6640       IS        used_is;
6641       PetscBool idxs_copied = PETSC_FALSE;
6642 
6643       if (ncc < n_ISForEdges) {
6644         used_is       = ISForEdges[ncc];
6645         boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */
6646       } else {
6647         used_is       = ISForFaces[ncc - n_ISForEdges];
6648         boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */
6649       }
6650       temp_constraints = 0; /* zero the number of constraints I have on this conn comp */
6651 
6652       PetscCall(ISGetSize(used_is, &size_of_constraint));
6653       if (!size_of_constraint) continue;
6654       PetscCall(ISGetIndices(used_is, (const PetscInt **)&is_indices));
6655       if (nnsp_has_cnst) {
6656         PetscScalar quad_value;
6657 
6658         PetscCall(PetscArraycpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc], is_indices, size_of_constraint));
6659         idxs_copied = PETSC_TRUE;
6660 
6661         if (!pcbddc->use_nnsp_true) {
6662           quad_value = (PetscScalar)(1.0 / PetscSqrtReal((PetscReal)size_of_constraint));
6663         } else {
6664           quad_value = 1.0;
6665         }
6666         for (j = 0; j < size_of_constraint; j++) constraints_data[constraints_data_ptr[total_counts_cc] + j] = quad_value;
6667         temp_constraints++;
6668         total_counts++;
6669       }
6670       for (k = 0; k < nnsp_size; k++) {
6671         PetscReal    real_value;
6672         PetscScalar *ptr_to_data;
6673 
6674         PetscCall(VecGetArrayRead(localnearnullsp[k], (const PetscScalar **)&array));
6675         ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc] + temp_constraints * size_of_constraint];
6676         for (j = 0; j < size_of_constraint; j++) ptr_to_data[j] = array[is_indices[j]];
6677         PetscCall(VecRestoreArrayRead(localnearnullsp[k], (const PetscScalar **)&array));
6678         /* check if array is null on the connected component */
6679         PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_N));
6680         PetscCallBLAS("BLASasum", real_value = BLASasum_(&Blas_N, ptr_to_data, &Blas_one));
6681         if (real_value > tol * size_of_constraint) { /* keep indices and values */
6682           temp_constraints++;
6683           total_counts++;
6684           if (!idxs_copied) {
6685             PetscCall(PetscArraycpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc], is_indices, size_of_constraint));
6686             idxs_copied = PETSC_TRUE;
6687           }
6688         }
6689       }
6690       PetscCall(ISRestoreIndices(used_is, (const PetscInt **)&is_indices));
6691       valid_constraints = temp_constraints;
6692       if (!pcbddc->use_nnsp_true && temp_constraints) {
6693         if (temp_constraints == 1) { /* just normalize the constraint */
6694           PetscScalar norm, *ptr_to_data;
6695 
6696           ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]];
6697           PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_N));
6698           PetscCallBLAS("BLASdot", norm = BLASdot_(&Blas_N, ptr_to_data, &Blas_one, ptr_to_data, &Blas_one));
6699           norm = 1.0 / PetscSqrtReal(PetscRealPart(norm));
6700           PetscCallBLAS("BLASscal", BLASscal_(&Blas_N, &norm, ptr_to_data, &Blas_one));
6701         } else { /* perform SVD */
6702           PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]];
6703 
6704           if (use_pod) {
6705             /* SVD: Y = U*S*V^H                -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag
6706                POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2)
6707                -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined
6708                   the constraints basis will differ (by a complex factor with absolute value equal to 1)
6709                   from that computed using LAPACKgesvd
6710                -> This is due to a different computation of eigenvectors in LAPACKheev
6711                -> The quality of the POD-computed basis will be the same */
6712             PetscCall(PetscArrayzero(correlation_mat, temp_constraints * temp_constraints));
6713             /* Store upper triangular part of correlation matrix */
6714             PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_N));
6715             PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
6716             for (j = 0; j < temp_constraints; j++) {
6717               for (k = 0; k < j + 1; k++) PetscCallBLAS("BLASdot", correlation_mat[j * temp_constraints + k] = BLASdot_(&Blas_N, ptr_to_data + k * size_of_constraint, &Blas_one, ptr_to_data + j * size_of_constraint, &Blas_one));
6718             }
6719             /* compute eigenvalues and eigenvectors of correlation matrix */
6720             PetscCall(PetscBLASIntCast(temp_constraints, &Blas_N));
6721             PetscCall(PetscBLASIntCast(temp_constraints, &Blas_LDA));
6722 #if !defined(PETSC_USE_COMPLEX)
6723             PetscCallBLAS("LAPACKsyev", LAPACKsyev_("V", "U", &Blas_N, correlation_mat, &Blas_LDA, singular_vals, work, &lwork, &lierr));
6724 #else
6725             PetscCallBLAS("LAPACKsyev", LAPACKsyev_("V", "U", &Blas_N, correlation_mat, &Blas_LDA, singular_vals, work, &lwork, rwork, &lierr));
6726 #endif
6727             PetscCall(PetscFPTrapPop());
6728             PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYEV Lapack routine %d", (int)lierr);
6729             /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */
6730             j = 0;
6731             while (j < temp_constraints && singular_vals[j] / singular_vals[temp_constraints - 1] < tol) j++;
6732             total_counts      = total_counts - j;
6733             valid_constraints = temp_constraints - j;
6734             /* scale and copy POD basis into used quadrature memory */
6735             PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_M));
6736             PetscCall(PetscBLASIntCast(temp_constraints, &Blas_N));
6737             PetscCall(PetscBLASIntCast(temp_constraints, &Blas_K));
6738             PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA));
6739             PetscCall(PetscBLASIntCast(temp_constraints, &Blas_LDB));
6740             PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDC));
6741             if (j < temp_constraints) {
6742               PetscInt ii;
6743               for (k = j; k < temp_constraints; k++) singular_vals[k] = 1.0 / PetscSqrtReal(singular_vals[k]);
6744               PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
6745               PetscCallBLAS("BLASgemm", BLASgemm_("N", "N", &Blas_M, &Blas_N, &Blas_K, &one, ptr_to_data, &Blas_LDA, correlation_mat, &Blas_LDB, &zero, temp_basis, &Blas_LDC));
6746               PetscCall(PetscFPTrapPop());
6747               for (k = 0; k < temp_constraints - j; k++) {
6748                 for (ii = 0; ii < size_of_constraint; ii++) ptr_to_data[k * size_of_constraint + ii] = singular_vals[temp_constraints - 1 - k] * temp_basis[(temp_constraints - 1 - k) * size_of_constraint + ii];
6749               }
6750             }
6751           } else {
6752 #if !defined(PETSC_MISSING_LAPACK_GESVD)
6753             PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_M));
6754             PetscCall(PetscBLASIntCast(temp_constraints, &Blas_N));
6755             PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA));
6756             PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
6757   #if !defined(PETSC_USE_COMPLEX)
6758             PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("O", "N", &Blas_M, &Blas_N, ptr_to_data, &Blas_LDA, singular_vals, &dummy_scalar, &dummy_int, &dummy_scalar, &dummy_int, work, &lwork, &lierr));
6759   #else
6760             PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("O", "N", &Blas_M, &Blas_N, ptr_to_data, &Blas_LDA, singular_vals, &dummy_scalar, &dummy_int, &dummy_scalar, &dummy_int, work, &lwork, rwork, &lierr));
6761   #endif
6762             PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in GESVD Lapack routine %d", (int)lierr);
6763             PetscCall(PetscFPTrapPop());
6764             /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */
6765             k = temp_constraints;
6766             if (k > size_of_constraint) k = size_of_constraint;
6767             j = 0;
6768             while (j < k && singular_vals[k - j - 1] / singular_vals[0] < tol) j++;
6769             valid_constraints = k - j;
6770             total_counts      = total_counts - temp_constraints + valid_constraints;
6771 #else
6772             SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "This should not happen");
6773 #endif /* on missing GESVD */
6774           }
6775         }
6776       }
6777       /* update pointers information */
6778       if (valid_constraints) {
6779         constraints_n[total_counts_cc]            = valid_constraints;
6780         constraints_idxs_ptr[total_counts_cc + 1] = constraints_idxs_ptr[total_counts_cc] + size_of_constraint;
6781         constraints_data_ptr[total_counts_cc + 1] = constraints_data_ptr[total_counts_cc] + size_of_constraint * valid_constraints;
6782         /* set change_of_basis flag */
6783         if (boolforchange) PetscCall(PetscBTSet(change_basis, total_counts_cc));
6784         total_counts_cc++;
6785       }
6786     }
6787     /* free workspace */
6788     if (!skip_lapack) {
6789       PetscCall(PetscFree(work));
6790 #if defined(PETSC_USE_COMPLEX)
6791       PetscCall(PetscFree(rwork));
6792 #endif
6793       PetscCall(PetscFree(singular_vals));
6794       PetscCall(PetscFree(correlation_mat));
6795       PetscCall(PetscFree(temp_basis));
6796     }
6797     for (k = 0; k < nnsp_size; k++) PetscCall(VecDestroy(&localnearnullsp[k]));
6798     PetscCall(PetscFree(localnearnullsp));
6799     /* free index sets of faces, edges and vertices */
6800     PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, &o_nf, &ISForFaces, &o_ne, &ISForEdges, &ISForVertices));
6801   } else {
6802     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
6803 
6804     total_counts = 0;
6805     n_vertices   = 0;
6806     if (sub_schurs->is_vertices && pcbddc->use_vertices) PetscCall(ISGetLocalSize(sub_schurs->is_vertices, &n_vertices));
6807     max_constraints = 0;
6808     total_counts_cc = 0;
6809     for (i = 0; i < sub_schurs->n_subs + n_vertices; i++) {
6810       total_counts += pcbddc->adaptive_constraints_n[i];
6811       if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++;
6812       max_constraints = PetscMax(max_constraints, pcbddc->adaptive_constraints_n[i]);
6813     }
6814     constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr;
6815     constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr;
6816     constraints_idxs     = pcbddc->adaptive_constraints_idxs;
6817     constraints_data     = pcbddc->adaptive_constraints_data;
6818     /* constraints_n differs from pcbddc->adaptive_constraints_n */
6819     PetscCall(PetscMalloc1(total_counts_cc, &constraints_n));
6820     total_counts_cc = 0;
6821     for (i = 0; i < sub_schurs->n_subs + n_vertices; i++) {
6822       if (pcbddc->adaptive_constraints_n[i]) constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i];
6823     }
6824 
6825     max_size_of_constraint = 0;
6826     for (i = 0; i < total_counts_cc; i++) max_size_of_constraint = PetscMax(max_size_of_constraint, constraints_idxs_ptr[i + 1] - constraints_idxs_ptr[i]);
6827     PetscCall(PetscMalloc1(constraints_idxs_ptr[total_counts_cc], &constraints_idxs_B));
6828     /* Change of basis */
6829     PetscCall(PetscBTCreate(total_counts_cc, &change_basis));
6830     if (pcbddc->use_change_of_basis) {
6831       for (i = 0; i < sub_schurs->n_subs; i++) {
6832         if (PetscBTLookup(sub_schurs->is_edge, i) || pcbddc->use_change_on_faces) PetscCall(PetscBTSet(change_basis, i + n_vertices));
6833       }
6834     }
6835   }
6836   pcbddc->local_primal_size = total_counts;
6837   PetscCall(PetscMalloc1(pcbddc->local_primal_size + pcbddc->benign_n, &pcbddc->primal_indices_local_idxs));
6838 
6839   /* map constraints_idxs in boundary numbering */
6840   if (pcbddc->use_change_of_basis) {
6841     PetscCall(ISGlobalToLocalMappingApply(pcis->BtoNmap, IS_GTOLM_DROP, constraints_idxs_ptr[total_counts_cc], constraints_idxs, &i, constraints_idxs_B));
6842     PetscCheck(i == constraints_idxs_ptr[total_counts_cc], PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error in boundary numbering for constraints indices %" PetscInt_FMT " != %" PetscInt_FMT, constraints_idxs_ptr[total_counts_cc], i);
6843   }
6844 
6845   /* Create constraint matrix */
6846   PetscCall(MatCreate(PETSC_COMM_SELF, &pcbddc->ConstraintMatrix));
6847   PetscCall(MatSetType(pcbddc->ConstraintMatrix, MATAIJ));
6848   PetscCall(MatSetSizes(pcbddc->ConstraintMatrix, pcbddc->local_primal_size, pcis->n, pcbddc->local_primal_size, pcis->n));
6849 
6850   /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */
6851   /* determine if a QR strategy is needed for change of basis */
6852   qr_needed = pcbddc->use_qr_single;
6853   PetscCall(PetscBTCreate(total_counts_cc, &qr_needed_idx));
6854   total_primal_vertices        = 0;
6855   pcbddc->local_primal_size_cc = 0;
6856   for (i = 0; i < total_counts_cc; i++) {
6857     size_of_constraint = constraints_idxs_ptr[i + 1] - constraints_idxs_ptr[i];
6858     if (size_of_constraint == 1 && pcbddc->mat_graph->custom_minimal_size) {
6859       pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]];
6860       pcbddc->local_primal_size_cc += 1;
6861     } else if (PetscBTLookup(change_basis, i)) {
6862       for (k = 0; k < constraints_n[i]; k++) pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i] + k];
6863       pcbddc->local_primal_size_cc += constraints_n[i];
6864       if (constraints_n[i] > 1 || pcbddc->use_qr_single) {
6865         PetscCall(PetscBTSet(qr_needed_idx, i));
6866         qr_needed = PETSC_TRUE;
6867       }
6868     } else {
6869       pcbddc->local_primal_size_cc += 1;
6870     }
6871   }
6872   /* note that the local variable n_vertices used below stores the number of pointwise constraints */
6873   pcbddc->n_vertices = total_primal_vertices;
6874   /* permute indices in order to have a sorted set of vertices */
6875   PetscCall(PetscSortInt(total_primal_vertices, pcbddc->primal_indices_local_idxs));
6876   PetscCall(PetscMalloc2(pcbddc->local_primal_size_cc + pcbddc->benign_n, &pcbddc->local_primal_ref_node, pcbddc->local_primal_size_cc + pcbddc->benign_n, &pcbddc->local_primal_ref_mult));
6877   PetscCall(PetscArraycpy(pcbddc->local_primal_ref_node, pcbddc->primal_indices_local_idxs, total_primal_vertices));
6878   for (i = 0; i < total_primal_vertices; i++) pcbddc->local_primal_ref_mult[i] = 1;
6879 
6880   /* nonzero structure of constraint matrix */
6881   /* and get reference dof for local constraints */
6882   PetscCall(PetscMalloc1(pcbddc->local_primal_size, &nnz));
6883   for (i = 0; i < total_primal_vertices; i++) nnz[i] = 1;
6884 
6885   j            = total_primal_vertices;
6886   total_counts = total_primal_vertices;
6887   cum          = total_primal_vertices;
6888   for (i = n_vertices; i < total_counts_cc; i++) {
6889     if (!PetscBTLookup(change_basis, i)) {
6890       pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]];
6891       pcbddc->local_primal_ref_mult[cum] = constraints_n[i];
6892       cum++;
6893       size_of_constraint = constraints_idxs_ptr[i + 1] - constraints_idxs_ptr[i];
6894       for (k = 0; k < constraints_n[i]; k++) {
6895         pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i] + k];
6896         nnz[j + k]                                        = size_of_constraint;
6897       }
6898       j += constraints_n[i];
6899     }
6900   }
6901   PetscCall(MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix, 0, nnz));
6902   PetscCall(MatSetOption(pcbddc->ConstraintMatrix, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE));
6903   PetscCall(PetscFree(nnz));
6904 
6905   /* set values in constraint matrix */
6906   for (i = 0; i < total_primal_vertices; i++) PetscCall(MatSetValue(pcbddc->ConstraintMatrix, i, pcbddc->local_primal_ref_node[i], 1.0, INSERT_VALUES));
6907   total_counts = total_primal_vertices;
6908   for (i = n_vertices; i < total_counts_cc; i++) {
6909     if (!PetscBTLookup(change_basis, i)) {
6910       PetscInt *cols;
6911 
6912       size_of_constraint = constraints_idxs_ptr[i + 1] - constraints_idxs_ptr[i];
6913       cols               = constraints_idxs + constraints_idxs_ptr[i];
6914       for (k = 0; k < constraints_n[i]; k++) {
6915         PetscInt     row = total_counts + k;
6916         PetscScalar *vals;
6917 
6918         vals = constraints_data + constraints_data_ptr[i] + k * size_of_constraint;
6919         PetscCall(MatSetValues(pcbddc->ConstraintMatrix, 1, &row, size_of_constraint, cols, vals, INSERT_VALUES));
6920       }
6921       total_counts += constraints_n[i];
6922     }
6923   }
6924   /* assembling */
6925   PetscCall(MatAssemblyBegin(pcbddc->ConstraintMatrix, MAT_FINAL_ASSEMBLY));
6926   PetscCall(MatAssemblyEnd(pcbddc->ConstraintMatrix, MAT_FINAL_ASSEMBLY));
6927   PetscCall(MatViewFromOptions(pcbddc->ConstraintMatrix, (PetscObject)pc, "-pc_bddc_constraint_mat_view"));
6928 
6929   /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */
6930   if (pcbddc->use_change_of_basis) {
6931     /* dual and primal dofs on a single cc */
6932     PetscInt dual_dofs, primal_dofs;
6933     /* working stuff for GEQRF */
6934     PetscScalar *qr_basis = NULL, *qr_tau = NULL, *qr_work = NULL, lqr_work_t;
6935     PetscBLASInt lqr_work;
6936     /* working stuff for UNGQR */
6937     PetscScalar *gqr_work = NULL, lgqr_work_t = 0.0;
6938     PetscBLASInt lgqr_work;
6939     /* working stuff for TRTRS */
6940     PetscScalar *trs_rhs = NULL;
6941     PetscBLASInt Blas_NRHS;
6942     /* pointers for values insertion into change of basis matrix */
6943     PetscInt    *start_rows, *start_cols;
6944     PetscScalar *start_vals;
6945     /* working stuff for values insertion */
6946     PetscBT   is_primal;
6947     PetscInt *aux_primal_numbering_B;
6948     /* matrix sizes */
6949     PetscInt global_size, local_size;
6950     /* temporary change of basis */
6951     Mat localChangeOfBasisMatrix;
6952     /* extra space for debugging */
6953     PetscScalar *dbg_work = NULL;
6954 
6955     PetscCall(MatCreate(PETSC_COMM_SELF, &localChangeOfBasisMatrix));
6956     PetscCall(MatSetType(localChangeOfBasisMatrix, MATAIJ));
6957     PetscCall(MatSetSizes(localChangeOfBasisMatrix, pcis->n, pcis->n, pcis->n, pcis->n));
6958     /* nonzeros for local mat */
6959     PetscCall(PetscMalloc1(pcis->n, &nnz));
6960     if (!pcbddc->benign_change || pcbddc->fake_change) {
6961       for (i = 0; i < pcis->n; i++) nnz[i] = 1;
6962     } else {
6963       const PetscInt *ii;
6964       PetscInt        n;
6965       PetscBool       flg_row;
6966       PetscCall(MatGetRowIJ(pcbddc->benign_change, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, NULL, &flg_row));
6967       for (i = 0; i < n; i++) nnz[i] = ii[i + 1] - ii[i];
6968       PetscCall(MatRestoreRowIJ(pcbddc->benign_change, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, NULL, &flg_row));
6969     }
6970     for (i = n_vertices; i < total_counts_cc; i++) {
6971       if (PetscBTLookup(change_basis, i)) {
6972         size_of_constraint = constraints_idxs_ptr[i + 1] - constraints_idxs_ptr[i];
6973         if (PetscBTLookup(qr_needed_idx, i)) {
6974           for (j = 0; j < size_of_constraint; j++) nnz[constraints_idxs[constraints_idxs_ptr[i] + j]] = size_of_constraint;
6975         } else {
6976           nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint;
6977           for (j = 1; j < size_of_constraint; j++) nnz[constraints_idxs[constraints_idxs_ptr[i] + j]] = 2;
6978         }
6979       }
6980     }
6981     PetscCall(MatSeqAIJSetPreallocation(localChangeOfBasisMatrix, 0, nnz));
6982     PetscCall(MatSetOption(localChangeOfBasisMatrix, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE));
6983     PetscCall(PetscFree(nnz));
6984     /* Set interior change in the matrix */
6985     if (!pcbddc->benign_change || pcbddc->fake_change) {
6986       for (i = 0; i < pcis->n; i++) PetscCall(MatSetValue(localChangeOfBasisMatrix, i, i, 1.0, INSERT_VALUES));
6987     } else {
6988       const PetscInt *ii, *jj;
6989       PetscScalar    *aa;
6990       PetscInt        n;
6991       PetscBool       flg_row;
6992       PetscCall(MatGetRowIJ(pcbddc->benign_change, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, &jj, &flg_row));
6993       PetscCall(MatSeqAIJGetArray(pcbddc->benign_change, &aa));
6994       for (i = 0; i < n; i++) PetscCall(MatSetValues(localChangeOfBasisMatrix, 1, &i, ii[i + 1] - ii[i], jj + ii[i], aa + ii[i], INSERT_VALUES));
6995       PetscCall(MatSeqAIJRestoreArray(pcbddc->benign_change, &aa));
6996       PetscCall(MatRestoreRowIJ(pcbddc->benign_change, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, &jj, &flg_row));
6997     }
6998 
6999     if (pcbddc->dbg_flag) {
7000       PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "--------------------------------------------------------------\n"));
7001       PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Checking change of basis computation for subdomain %04d\n", PetscGlobalRank));
7002     }
7003 
7004     /* Now we loop on the constraints which need a change of basis */
7005     /*
7006        Change of basis matrix is evaluated similarly to the FIRST APPROACH in
7007        Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1)
7008 
7009        Basic blocks of change of basis matrix T computed:
7010 
7011           - By using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified)
7012 
7013             | 1        0   ...        0         s_1/S |
7014             | 0        1   ...        0         s_2/S |
7015             |              ...                        |
7016             | 0        ...            1     s_{n-1}/S |
7017             | -s_1/s_n ...    -s_{n-1}/s_n      s_n/S |
7018 
7019             with S = \sum_{i=1}^n s_i^2
7020             NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering
7021                   in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering
7022 
7023           - QR decomposition of constraints otherwise
7024     */
7025     if (qr_needed && max_size_of_constraint) {
7026       /* space to store Q */
7027       PetscCall(PetscMalloc1(max_size_of_constraint * max_size_of_constraint, &qr_basis));
7028       /* array to store scaling factors for reflectors */
7029       PetscCall(PetscMalloc1(max_constraints, &qr_tau));
7030       /* first we issue queries for optimal work */
7031       PetscCall(PetscBLASIntCast(max_size_of_constraint, &Blas_M));
7032       PetscCall(PetscBLASIntCast(max_constraints, &Blas_N));
7033       PetscCall(PetscBLASIntCast(max_size_of_constraint, &Blas_LDA));
7034       lqr_work = -1;
7035       PetscCallBLAS("LAPACKgeqrf", LAPACKgeqrf_(&Blas_M, &Blas_N, qr_basis, &Blas_LDA, qr_tau, &lqr_work_t, &lqr_work, &lierr));
7036       PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in query to GEQRF Lapack routine %d", (int)lierr);
7037       PetscCall(PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t), &lqr_work));
7038       PetscCall(PetscMalloc1(lqr_work, &qr_work));
7039       lgqr_work = -1;
7040       PetscCall(PetscBLASIntCast(max_size_of_constraint, &Blas_M));
7041       PetscCall(PetscBLASIntCast(max_size_of_constraint, &Blas_N));
7042       PetscCall(PetscBLASIntCast(max_constraints, &Blas_K));
7043       PetscCall(PetscBLASIntCast(max_size_of_constraint, &Blas_LDA));
7044       if (Blas_K > Blas_M) Blas_K = Blas_M; /* adjust just for computing optimal work */
7045       PetscCallBLAS("LAPACKorgqr", LAPACKorgqr_(&Blas_M, &Blas_N, &Blas_K, qr_basis, &Blas_LDA, qr_tau, &lgqr_work_t, &lgqr_work, &lierr));
7046       PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in query to ORGQR/UNGQR Lapack routine %d", (int)lierr);
7047       PetscCall(PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t), &lgqr_work));
7048       PetscCall(PetscMalloc1(lgqr_work, &gqr_work));
7049       /* array to store rhs and solution of triangular solver */
7050       PetscCall(PetscMalloc1(max_constraints * max_constraints, &trs_rhs));
7051       /* allocating workspace for check */
7052       if (pcbddc->dbg_flag) PetscCall(PetscMalloc1(max_size_of_constraint * (max_constraints + max_size_of_constraint), &dbg_work));
7053     }
7054     /* array to store whether a node is primal or not */
7055     PetscCall(PetscBTCreate(pcis->n_B, &is_primal));
7056     PetscCall(PetscMalloc1(total_primal_vertices, &aux_primal_numbering_B));
7057     PetscCall(ISGlobalToLocalMappingApply(pcis->BtoNmap, IS_GTOLM_DROP, total_primal_vertices, pcbddc->local_primal_ref_node, &i, aux_primal_numbering_B));
7058     PetscCheck(i == total_primal_vertices, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error in boundary numbering for BDDC vertices! %" PetscInt_FMT " != %" PetscInt_FMT, total_primal_vertices, i);
7059     for (i = 0; i < total_primal_vertices; i++) PetscCall(PetscBTSet(is_primal, aux_primal_numbering_B[i]));
7060     PetscCall(PetscFree(aux_primal_numbering_B));
7061 
7062     /* loop on constraints and see whether or not they need a change of basis and compute it */
7063     for (total_counts = n_vertices; total_counts < total_counts_cc; total_counts++) {
7064       size_of_constraint = constraints_idxs_ptr[total_counts + 1] - constraints_idxs_ptr[total_counts];
7065       if (PetscBTLookup(change_basis, total_counts)) {
7066         /* get constraint info */
7067         primal_dofs = constraints_n[total_counts];
7068         dual_dofs   = size_of_constraint - primal_dofs;
7069 
7070         if (pcbddc->dbg_flag) PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Constraints %" PetscInt_FMT ": %" PetscInt_FMT " need a change of basis (size %" PetscInt_FMT ")\n", total_counts, primal_dofs, size_of_constraint));
7071 
7072         if (PetscBTLookup(qr_needed_idx, total_counts)) { /* QR */
7073 
7074           /* copy quadrature constraints for change of basis check */
7075           if (pcbddc->dbg_flag) PetscCall(PetscArraycpy(dbg_work, &constraints_data[constraints_data_ptr[total_counts]], size_of_constraint * primal_dofs));
7076           /* copy temporary constraints into larger work vector (in order to store all columns of Q) */
7077           PetscCall(PetscArraycpy(qr_basis, &constraints_data[constraints_data_ptr[total_counts]], size_of_constraint * primal_dofs));
7078 
7079           /* compute QR decomposition of constraints */
7080           PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_M));
7081           PetscCall(PetscBLASIntCast(primal_dofs, &Blas_N));
7082           PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA));
7083           PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
7084           PetscCallBLAS("LAPACKgeqrf", LAPACKgeqrf_(&Blas_M, &Blas_N, qr_basis, &Blas_LDA, qr_tau, qr_work, &lqr_work, &lierr));
7085           PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in GEQRF Lapack routine %d", (int)lierr);
7086           PetscCall(PetscFPTrapPop());
7087 
7088           /* explicitly compute R^-T */
7089           PetscCall(PetscArrayzero(trs_rhs, primal_dofs * primal_dofs));
7090           for (j = 0; j < primal_dofs; j++) trs_rhs[j * (primal_dofs + 1)] = 1.0;
7091           PetscCall(PetscBLASIntCast(primal_dofs, &Blas_N));
7092           PetscCall(PetscBLASIntCast(primal_dofs, &Blas_NRHS));
7093           PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA));
7094           PetscCall(PetscBLASIntCast(primal_dofs, &Blas_LDB));
7095           PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
7096           PetscCallBLAS("LAPACKtrtrs", LAPACKtrtrs_("U", "T", "N", &Blas_N, &Blas_NRHS, qr_basis, &Blas_LDA, trs_rhs, &Blas_LDB, &lierr));
7097           PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in TRTRS Lapack routine %d", (int)lierr);
7098           PetscCall(PetscFPTrapPop());
7099 
7100           /* explicitly compute all columns of Q (Q = [Q1 | Q2]) overwriting QR factorization in qr_basis */
7101           PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_M));
7102           PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_N));
7103           PetscCall(PetscBLASIntCast(primal_dofs, &Blas_K));
7104           PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA));
7105           PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
7106           PetscCallBLAS("LAPACKorgqr", LAPACKorgqr_(&Blas_M, &Blas_N, &Blas_K, qr_basis, &Blas_LDA, qr_tau, gqr_work, &lgqr_work, &lierr));
7107           PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in ORGQR/UNGQR Lapack routine %d", (int)lierr);
7108           PetscCall(PetscFPTrapPop());
7109 
7110           /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints
7111              i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below)
7112              where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */
7113           PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_M));
7114           PetscCall(PetscBLASIntCast(primal_dofs, &Blas_N));
7115           PetscCall(PetscBLASIntCast(primal_dofs, &Blas_K));
7116           PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA));
7117           PetscCall(PetscBLASIntCast(primal_dofs, &Blas_LDB));
7118           PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDC));
7119           PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
7120           PetscCallBLAS("BLASgemm", BLASgemm_("N", "N", &Blas_M, &Blas_N, &Blas_K, &one, qr_basis, &Blas_LDA, trs_rhs, &Blas_LDB, &zero, constraints_data + constraints_data_ptr[total_counts], &Blas_LDC));
7121           PetscCall(PetscFPTrapPop());
7122           PetscCall(PetscArraycpy(qr_basis, &constraints_data[constraints_data_ptr[total_counts]], size_of_constraint * primal_dofs));
7123 
7124           /* insert values in change of basis matrix respecting global ordering of new primal dofs */
7125           start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]];
7126           /* insert cols for primal dofs */
7127           for (j = 0; j < primal_dofs; j++) {
7128             start_vals = &qr_basis[j * size_of_constraint];
7129             start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts] + j];
7130             PetscCall(MatSetValues(localChangeOfBasisMatrix, size_of_constraint, start_rows, 1, start_cols, start_vals, INSERT_VALUES));
7131           }
7132           /* insert cols for dual dofs */
7133           for (j = 0, k = 0; j < dual_dofs; k++) {
7134             if (!PetscBTLookup(is_primal, constraints_idxs_B[constraints_idxs_ptr[total_counts] + k])) {
7135               start_vals = &qr_basis[(primal_dofs + j) * size_of_constraint];
7136               start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts] + k];
7137               PetscCall(MatSetValues(localChangeOfBasisMatrix, size_of_constraint, start_rows, 1, start_cols, start_vals, INSERT_VALUES));
7138               j++;
7139             }
7140           }
7141 
7142           /* check change of basis */
7143           if (pcbddc->dbg_flag) {
7144             PetscInt  ii, jj;
7145             PetscBool valid_qr = PETSC_TRUE;
7146             PetscCall(PetscBLASIntCast(primal_dofs, &Blas_M));
7147             PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_N));
7148             PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_K));
7149             PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA));
7150             PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDB));
7151             PetscCall(PetscBLASIntCast(primal_dofs, &Blas_LDC));
7152             PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
7153             PetscCallBLAS("BLASgemm", BLASgemm_("T", "N", &Blas_M, &Blas_N, &Blas_K, &one, dbg_work, &Blas_LDA, qr_basis, &Blas_LDB, &zero, &dbg_work[size_of_constraint * primal_dofs], &Blas_LDC));
7154             PetscCall(PetscFPTrapPop());
7155             for (jj = 0; jj < size_of_constraint; jj++) {
7156               for (ii = 0; ii < primal_dofs; ii++) {
7157                 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii]) > 1.e-12) valid_qr = PETSC_FALSE;
7158                 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii] - (PetscReal)1) > 1.e-12) valid_qr = PETSC_FALSE;
7159               }
7160             }
7161             if (!valid_qr) {
7162               PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "\t-> wrong change of basis!\n"));
7163               for (jj = 0; jj < size_of_constraint; jj++) {
7164                 for (ii = 0; ii < primal_dofs; ii++) {
7165                   if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii]) > 1.e-12) {
7166                     PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "\tQr basis function %" PetscInt_FMT " is not orthogonal to constraint %" PetscInt_FMT " (%1.14e)!\n", jj, ii, (double)PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii])));
7167                   }
7168                   if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii] - (PetscReal)1) > 1.e-12) {
7169                     PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "\tQr basis function %" PetscInt_FMT " is not unitary w.r.t constraint %" PetscInt_FMT " (%1.14e)!\n", jj, ii, (double)PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii])));
7170                   }
7171                 }
7172               }
7173             } else {
7174               PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "\t-> right change of basis!\n"));
7175             }
7176           }
7177         } else { /* simple transformation block */
7178           PetscInt    row, col;
7179           PetscScalar val, norm;
7180 
7181           PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_N));
7182           PetscCallBLAS("BLASdot", norm = BLASdot_(&Blas_N, constraints_data + constraints_data_ptr[total_counts], &Blas_one, constraints_data + constraints_data_ptr[total_counts], &Blas_one));
7183           for (j = 0; j < size_of_constraint; j++) {
7184             PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts] + j];
7185             row            = constraints_idxs[constraints_idxs_ptr[total_counts] + j];
7186             if (!PetscBTLookup(is_primal, row_B)) {
7187               col = constraints_idxs[constraints_idxs_ptr[total_counts]];
7188               PetscCall(MatSetValue(localChangeOfBasisMatrix, row, row, 1.0, INSERT_VALUES));
7189               PetscCall(MatSetValue(localChangeOfBasisMatrix, row, col, constraints_data[constraints_data_ptr[total_counts] + j] / norm, INSERT_VALUES));
7190             } else {
7191               for (k = 0; k < size_of_constraint; k++) {
7192                 col = constraints_idxs[constraints_idxs_ptr[total_counts] + k];
7193                 if (row != col) {
7194                   val = -constraints_data[constraints_data_ptr[total_counts] + k] / constraints_data[constraints_data_ptr[total_counts]];
7195                 } else {
7196                   val = constraints_data[constraints_data_ptr[total_counts]] / norm;
7197                 }
7198                 PetscCall(MatSetValue(localChangeOfBasisMatrix, row, col, val, INSERT_VALUES));
7199               }
7200             }
7201           }
7202           if (pcbddc->dbg_flag) PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "\t-> using standard change of basis\n"));
7203         }
7204       } else {
7205         if (pcbddc->dbg_flag) PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Constraint %" PetscInt_FMT " does not need a change of basis (size %" PetscInt_FMT ")\n", total_counts, size_of_constraint));
7206       }
7207     }
7208 
7209     /* free workspace */
7210     if (qr_needed) {
7211       if (pcbddc->dbg_flag) PetscCall(PetscFree(dbg_work));
7212       PetscCall(PetscFree(trs_rhs));
7213       PetscCall(PetscFree(qr_tau));
7214       PetscCall(PetscFree(qr_work));
7215       PetscCall(PetscFree(gqr_work));
7216       PetscCall(PetscFree(qr_basis));
7217     }
7218     PetscCall(PetscBTDestroy(&is_primal));
7219     PetscCall(MatAssemblyBegin(localChangeOfBasisMatrix, MAT_FINAL_ASSEMBLY));
7220     PetscCall(MatAssemblyEnd(localChangeOfBasisMatrix, MAT_FINAL_ASSEMBLY));
7221 
7222     /* assembling of global change of variable */
7223     if (!pcbddc->fake_change) {
7224       Mat      tmat;
7225       PetscInt bs;
7226 
7227       PetscCall(VecGetSize(pcis->vec1_global, &global_size));
7228       PetscCall(VecGetLocalSize(pcis->vec1_global, &local_size));
7229       PetscCall(MatDuplicate(pc->pmat, MAT_DO_NOT_COPY_VALUES, &tmat));
7230       PetscCall(MatISSetLocalMat(tmat, localChangeOfBasisMatrix));
7231       PetscCall(MatAssemblyBegin(tmat, MAT_FINAL_ASSEMBLY));
7232       PetscCall(MatAssemblyEnd(tmat, MAT_FINAL_ASSEMBLY));
7233       PetscCall(MatCreate(PetscObjectComm((PetscObject)pc), &pcbddc->ChangeOfBasisMatrix));
7234       PetscCall(MatSetType(pcbddc->ChangeOfBasisMatrix, MATAIJ));
7235       PetscCall(MatGetBlockSize(pc->pmat, &bs));
7236       PetscCall(MatSetBlockSize(pcbddc->ChangeOfBasisMatrix, bs));
7237       PetscCall(MatSetSizes(pcbddc->ChangeOfBasisMatrix, local_size, local_size, global_size, global_size));
7238       PetscCall(MatISSetMPIXAIJPreallocation_Private(tmat, pcbddc->ChangeOfBasisMatrix, PETSC_TRUE));
7239       PetscCall(MatConvert(tmat, MATAIJ, MAT_REUSE_MATRIX, &pcbddc->ChangeOfBasisMatrix));
7240       PetscCall(MatDestroy(&tmat));
7241       PetscCall(VecSet(pcis->vec1_global, 0.0));
7242       PetscCall(VecSet(pcis->vec1_N, 1.0));
7243       PetscCall(VecScatterBegin(matis->rctx, pcis->vec1_N, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE));
7244       PetscCall(VecScatterEnd(matis->rctx, pcis->vec1_N, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE));
7245       PetscCall(VecReciprocal(pcis->vec1_global));
7246       PetscCall(MatDiagonalScale(pcbddc->ChangeOfBasisMatrix, pcis->vec1_global, NULL));
7247 
7248       /* check */
7249       if (pcbddc->dbg_flag) {
7250         PetscReal error;
7251         Vec       x, x_change;
7252 
7253         PetscCall(VecDuplicate(pcis->vec1_global, &x));
7254         PetscCall(VecDuplicate(pcis->vec1_global, &x_change));
7255         PetscCall(VecSetRandom(x, NULL));
7256         PetscCall(VecCopy(x, pcis->vec1_global));
7257         PetscCall(VecScatterBegin(matis->rctx, x, pcis->vec1_N, INSERT_VALUES, SCATTER_FORWARD));
7258         PetscCall(VecScatterEnd(matis->rctx, x, pcis->vec1_N, INSERT_VALUES, SCATTER_FORWARD));
7259         PetscCall(MatMult(localChangeOfBasisMatrix, pcis->vec1_N, pcis->vec2_N));
7260         PetscCall(VecScatterBegin(matis->rctx, pcis->vec2_N, x, INSERT_VALUES, SCATTER_REVERSE));
7261         PetscCall(VecScatterEnd(matis->rctx, pcis->vec2_N, x, INSERT_VALUES, SCATTER_REVERSE));
7262         PetscCall(MatMult(pcbddc->ChangeOfBasisMatrix, pcis->vec1_global, x_change));
7263         PetscCall(VecAXPY(x, -1.0, x_change));
7264         PetscCall(VecNorm(x, NORM_INFINITY, &error));
7265         PetscCheck(error <= PETSC_SMALL, PetscObjectComm((PetscObject)pc), PETSC_ERR_PLIB, "Error global vs local change on N: %1.6e", (double)error);
7266         PetscCall(VecDestroy(&x));
7267         PetscCall(VecDestroy(&x_change));
7268       }
7269       /* adapt sub_schurs computed (if any) */
7270       if (pcbddc->use_deluxe_scaling) {
7271         PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
7272 
7273         PetscCheck(!pcbddc->use_change_of_basis || !pcbddc->adaptive_userdefined, PetscObjectComm((PetscObject)pc), PETSC_ERR_SUP, "Cannot mix automatic change of basis, adaptive selection and user-defined constraints");
7274         if (sub_schurs && sub_schurs->S_Ej_all) {
7275           Mat S_new, tmat;
7276           IS  is_all_N, is_V_Sall = NULL;
7277 
7278           PetscCall(ISLocalToGlobalMappingApplyIS(pcis->BtoNmap, sub_schurs->is_Ej_all, &is_all_N));
7279           PetscCall(MatCreateSubMatrix(localChangeOfBasisMatrix, is_all_N, is_all_N, MAT_INITIAL_MATRIX, &tmat));
7280           if (pcbddc->deluxe_zerorows) {
7281             ISLocalToGlobalMapping NtoSall;
7282             IS                     is_V;
7283             PetscCall(ISCreateGeneral(PETSC_COMM_SELF, pcbddc->n_vertices, pcbddc->local_primal_ref_node, PETSC_COPY_VALUES, &is_V));
7284             PetscCall(ISLocalToGlobalMappingCreateIS(is_all_N, &NtoSall));
7285             PetscCall(ISGlobalToLocalMappingApplyIS(NtoSall, IS_GTOLM_DROP, is_V, &is_V_Sall));
7286             PetscCall(ISLocalToGlobalMappingDestroy(&NtoSall));
7287             PetscCall(ISDestroy(&is_V));
7288           }
7289           PetscCall(ISDestroy(&is_all_N));
7290           PetscCall(MatPtAP(sub_schurs->S_Ej_all, tmat, MAT_INITIAL_MATRIX, 1.0, &S_new));
7291           PetscCall(MatDestroy(&sub_schurs->S_Ej_all));
7292           PetscCall(PetscObjectReference((PetscObject)S_new));
7293           if (pcbddc->deluxe_zerorows) {
7294             const PetscScalar *array;
7295             const PetscInt    *idxs_V, *idxs_all;
7296             PetscInt           i, n_V;
7297 
7298             PetscCall(MatZeroRowsColumnsIS(S_new, is_V_Sall, 1., NULL, NULL));
7299             PetscCall(ISGetLocalSize(is_V_Sall, &n_V));
7300             PetscCall(ISGetIndices(is_V_Sall, &idxs_V));
7301             PetscCall(ISGetIndices(sub_schurs->is_Ej_all, &idxs_all));
7302             PetscCall(VecGetArrayRead(pcis->D, &array));
7303             for (i = 0; i < n_V; i++) {
7304               PetscScalar val;
7305               PetscInt    idx;
7306 
7307               idx = idxs_V[i];
7308               val = array[idxs_all[idxs_V[i]]];
7309               PetscCall(MatSetValue(S_new, idx, idx, val, INSERT_VALUES));
7310             }
7311             PetscCall(MatAssemblyBegin(S_new, MAT_FINAL_ASSEMBLY));
7312             PetscCall(MatAssemblyEnd(S_new, MAT_FINAL_ASSEMBLY));
7313             PetscCall(VecRestoreArrayRead(pcis->D, &array));
7314             PetscCall(ISRestoreIndices(sub_schurs->is_Ej_all, &idxs_all));
7315             PetscCall(ISRestoreIndices(is_V_Sall, &idxs_V));
7316           }
7317           sub_schurs->S_Ej_all = S_new;
7318           PetscCall(MatDestroy(&S_new));
7319           if (sub_schurs->sum_S_Ej_all) {
7320             PetscCall(MatPtAP(sub_schurs->sum_S_Ej_all, tmat, MAT_INITIAL_MATRIX, 1.0, &S_new));
7321             PetscCall(MatDestroy(&sub_schurs->sum_S_Ej_all));
7322             PetscCall(PetscObjectReference((PetscObject)S_new));
7323             if (pcbddc->deluxe_zerorows) PetscCall(MatZeroRowsColumnsIS(S_new, is_V_Sall, 1., NULL, NULL));
7324             sub_schurs->sum_S_Ej_all = S_new;
7325             PetscCall(MatDestroy(&S_new));
7326           }
7327           PetscCall(ISDestroy(&is_V_Sall));
7328           PetscCall(MatDestroy(&tmat));
7329         }
7330         /* destroy any change of basis context in sub_schurs */
7331         if (sub_schurs && sub_schurs->change) {
7332           PetscInt i;
7333 
7334           for (i = 0; i < sub_schurs->n_subs; i++) PetscCall(KSPDestroy(&sub_schurs->change[i]));
7335           PetscCall(PetscFree(sub_schurs->change));
7336         }
7337       }
7338       if (pcbddc->switch_static) { /* need to save the local change */
7339         pcbddc->switch_static_change = localChangeOfBasisMatrix;
7340       } else {
7341         PetscCall(MatDestroy(&localChangeOfBasisMatrix));
7342       }
7343       /* determine if any process has changed the pressures locally */
7344       pcbddc->change_interior = pcbddc->benign_have_null;
7345     } else { /* fake change (get back change of basis into ConstraintMatrix and info on qr) */
7346       PetscCall(MatDestroy(&pcbddc->ConstraintMatrix));
7347       pcbddc->ConstraintMatrix = localChangeOfBasisMatrix;
7348       pcbddc->use_qr_single    = qr_needed;
7349     }
7350   } else if (pcbddc->user_ChangeOfBasisMatrix || pcbddc->benign_saddle_point) {
7351     if (!pcbddc->benign_have_null && pcbddc->user_ChangeOfBasisMatrix) {
7352       PetscCall(PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix));
7353       pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix;
7354     } else {
7355       Mat benign_global = NULL;
7356       if (pcbddc->benign_have_null) {
7357         Mat M;
7358 
7359         pcbddc->change_interior = PETSC_TRUE;
7360         PetscCall(VecCopy(matis->counter, pcis->vec1_N));
7361         PetscCall(VecReciprocal(pcis->vec1_N));
7362         PetscCall(MatDuplicate(pc->pmat, MAT_DO_NOT_COPY_VALUES, &benign_global));
7363         if (pcbddc->benign_change) {
7364           PetscCall(MatDuplicate(pcbddc->benign_change, MAT_COPY_VALUES, &M));
7365           PetscCall(MatDiagonalScale(M, pcis->vec1_N, NULL));
7366         } else {
7367           PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, pcis->n, pcis->n, 1, NULL, &M));
7368           PetscCall(MatDiagonalSet(M, pcis->vec1_N, INSERT_VALUES));
7369         }
7370         PetscCall(MatISSetLocalMat(benign_global, M));
7371         PetscCall(MatDestroy(&M));
7372         PetscCall(MatAssemblyBegin(benign_global, MAT_FINAL_ASSEMBLY));
7373         PetscCall(MatAssemblyEnd(benign_global, MAT_FINAL_ASSEMBLY));
7374       }
7375       if (pcbddc->user_ChangeOfBasisMatrix) {
7376         PetscCall(MatMatMult(pcbddc->user_ChangeOfBasisMatrix, benign_global, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &pcbddc->ChangeOfBasisMatrix));
7377         PetscCall(MatDestroy(&benign_global));
7378       } else if (pcbddc->benign_have_null) {
7379         pcbddc->ChangeOfBasisMatrix = benign_global;
7380       }
7381     }
7382     if (pcbddc->switch_static && pcbddc->ChangeOfBasisMatrix) { /* need to save the local change */
7383       IS              is_global;
7384       const PetscInt *gidxs;
7385 
7386       PetscCall(ISLocalToGlobalMappingGetIndices(matis->rmapping, &gidxs));
7387       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), pcis->n, gidxs, PETSC_COPY_VALUES, &is_global));
7388       PetscCall(ISLocalToGlobalMappingRestoreIndices(matis->rmapping, &gidxs));
7389       PetscCall(MatCreateSubMatrixUnsorted(pcbddc->ChangeOfBasisMatrix, is_global, is_global, &pcbddc->switch_static_change));
7390       PetscCall(ISDestroy(&is_global));
7391     }
7392   }
7393   if (!pcbddc->fake_change && pcbddc->ChangeOfBasisMatrix && !pcbddc->work_change) PetscCall(VecDuplicate(pcis->vec1_global, &pcbddc->work_change));
7394 
7395   if (!pcbddc->fake_change) {
7396     /* add pressure dofs to set of primal nodes for numbering purposes */
7397     for (i = 0; i < pcbddc->benign_n; i++) {
7398       pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc]  = pcbddc->benign_p0_lidx[i];
7399       pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i];
7400       pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc]  = 1;
7401       pcbddc->local_primal_size_cc++;
7402       pcbddc->local_primal_size++;
7403     }
7404 
7405     /* check if a new primal space has been introduced (also take into account benign trick) */
7406     pcbddc->new_primal_space_local = PETSC_TRUE;
7407     if (olocal_primal_size == pcbddc->local_primal_size) {
7408       PetscCall(PetscArraycmp(pcbddc->local_primal_ref_node, olocal_primal_ref_node, olocal_primal_size_cc, &pcbddc->new_primal_space_local));
7409       pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
7410       if (!pcbddc->new_primal_space_local) {
7411         PetscCall(PetscArraycmp(pcbddc->local_primal_ref_mult, olocal_primal_ref_mult, olocal_primal_size_cc, &pcbddc->new_primal_space_local));
7412         pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
7413       }
7414     }
7415     /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */
7416     PetscCall(MPIU_Allreduce(&pcbddc->new_primal_space_local, &pcbddc->new_primal_space, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)pc)));
7417   }
7418   PetscCall(PetscFree2(olocal_primal_ref_node, olocal_primal_ref_mult));
7419 
7420   /* flush dbg viewer */
7421   if (pcbddc->dbg_flag) PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
7422 
7423   /* free workspace */
7424   PetscCall(PetscBTDestroy(&qr_needed_idx));
7425   PetscCall(PetscBTDestroy(&change_basis));
7426   if (!pcbddc->adaptive_selection) {
7427     PetscCall(PetscFree3(constraints_idxs_ptr, constraints_data_ptr, constraints_n));
7428     PetscCall(PetscFree3(constraints_data, constraints_idxs, constraints_idxs_B));
7429   } else {
7430     PetscCall(PetscFree5(pcbddc->adaptive_constraints_n, pcbddc->adaptive_constraints_idxs_ptr, pcbddc->adaptive_constraints_data_ptr, pcbddc->adaptive_constraints_idxs, pcbddc->adaptive_constraints_data));
7431     PetscCall(PetscFree(constraints_n));
7432     PetscCall(PetscFree(constraints_idxs_B));
7433   }
7434   PetscFunctionReturn(PETSC_SUCCESS);
7435 }
7436 
7437 PetscErrorCode PCBDDCAnalyzeInterface(PC pc)
7438 {
7439   ISLocalToGlobalMapping map;
7440   PC_BDDC               *pcbddc = (PC_BDDC *)pc->data;
7441   Mat_IS                *matis  = (Mat_IS *)pc->pmat->data;
7442   PetscInt               i, N;
7443   PetscBool              rcsr = PETSC_FALSE;
7444 
7445   PetscFunctionBegin;
7446   if (pcbddc->recompute_topography) {
7447     pcbddc->graphanalyzed = PETSC_FALSE;
7448     /* Reset previously computed graph */
7449     PetscCall(PCBDDCGraphReset(pcbddc->mat_graph));
7450     /* Init local Graph struct */
7451     PetscCall(MatGetSize(pc->pmat, &N, NULL));
7452     PetscCall(MatISGetLocalToGlobalMapping(pc->pmat, &map, NULL));
7453     PetscCall(PCBDDCGraphInit(pcbddc->mat_graph, map, N, pcbddc->graphmaxcount));
7454 
7455     if (pcbddc->user_primal_vertices_local && !pcbddc->user_primal_vertices) PetscCall(PCBDDCConsistencyCheckIS(pc, MPI_LOR, &pcbddc->user_primal_vertices_local));
7456     /* Check validity of the csr graph passed in by the user */
7457     PetscCheck(!pcbddc->mat_graph->nvtxs_csr || pcbddc->mat_graph->nvtxs_csr == pcbddc->mat_graph->nvtxs, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid size of local CSR graph! Found %" PetscInt_FMT ", expected %" PetscInt_FMT, pcbddc->mat_graph->nvtxs_csr,
7458                pcbddc->mat_graph->nvtxs);
7459 
7460     /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */
7461     if (!pcbddc->mat_graph->xadj && pcbddc->use_local_adj) {
7462       PetscInt *xadj, *adjncy;
7463       PetscInt  nvtxs;
7464       PetscBool flg_row;
7465       Mat       A;
7466 
7467       PetscCall(PetscObjectReference((PetscObject)matis->A));
7468       A = matis->A;
7469       for (PetscInt i = 0; i < pcbddc->local_adj_square; i++) {
7470         Mat AtA;
7471 
7472         PetscCall(MatProductCreate(A, A, NULL, &AtA));
7473         PetscCall(MatSetOptionsPrefix(AtA, "pc_bddc_graph_"));
7474         PetscCall(MatProductSetType(AtA, MATPRODUCT_AtB));
7475         PetscCall(MatProductSetFromOptions(AtA));
7476         PetscCall(MatProductSymbolic(AtA));
7477         PetscCall(MatProductClear(AtA));
7478         /* we only need the sparsity, cheat and tell PETSc the matrix has been assembled */
7479         AtA->assembled = PETSC_TRUE;
7480         PetscCall(MatDestroy(&A));
7481         A = AtA;
7482       }
7483       PetscCall(MatGetRowIJ(A, 0, PETSC_TRUE, PETSC_FALSE, &nvtxs, (const PetscInt **)&xadj, (const PetscInt **)&adjncy, &flg_row));
7484       if (flg_row) {
7485         PetscCall(PCBDDCSetLocalAdjacencyGraph(pc, nvtxs, xadj, adjncy, PETSC_COPY_VALUES));
7486         pcbddc->computed_rowadj = PETSC_TRUE;
7487         PetscCall(MatRestoreRowIJ(A, 0, PETSC_TRUE, PETSC_FALSE, &nvtxs, (const PetscInt **)&xadj, (const PetscInt **)&adjncy, &flg_row));
7488         rcsr = PETSC_TRUE;
7489       }
7490       PetscCall(MatDestroy(&A));
7491     }
7492     if (pcbddc->dbg_flag) PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
7493 
7494     if (pcbddc->mat_graph->cdim && !pcbddc->mat_graph->cloc) {
7495       PetscReal   *lcoords;
7496       PetscInt     n;
7497       MPI_Datatype dimrealtype;
7498 
7499       /* TODO: support for blocked */
7500       PetscCheck(pcbddc->mat_graph->cnloc == pc->pmat->rmap->n, PETSC_COMM_SELF, PETSC_ERR_USER, "Invalid number of local coordinates! Got %" PetscInt_FMT ", expected %" PetscInt_FMT, pcbddc->mat_graph->cnloc, pc->pmat->rmap->n);
7501       PetscCall(MatGetLocalSize(matis->A, &n, NULL));
7502       PetscCall(PetscMalloc1(pcbddc->mat_graph->cdim * n, &lcoords));
7503       PetscCallMPI(MPI_Type_contiguous(pcbddc->mat_graph->cdim, MPIU_REAL, &dimrealtype));
7504       PetscCallMPI(MPI_Type_commit(&dimrealtype));
7505       PetscCall(PetscSFBcastBegin(matis->sf, dimrealtype, pcbddc->mat_graph->coords, lcoords, MPI_REPLACE));
7506       PetscCall(PetscSFBcastEnd(matis->sf, dimrealtype, pcbddc->mat_graph->coords, lcoords, MPI_REPLACE));
7507       PetscCallMPI(MPI_Type_free(&dimrealtype));
7508       PetscCall(PetscFree(pcbddc->mat_graph->coords));
7509 
7510       pcbddc->mat_graph->coords = lcoords;
7511       pcbddc->mat_graph->cloc   = PETSC_TRUE;
7512       pcbddc->mat_graph->cnloc  = n;
7513     }
7514     PetscCheck(!pcbddc->mat_graph->cnloc || pcbddc->mat_graph->cnloc == pcbddc->mat_graph->nvtxs, PETSC_COMM_SELF, PETSC_ERR_USER, "Invalid number of local subdomain coordinates! Got %" PetscInt_FMT ", expected %" PetscInt_FMT, pcbddc->mat_graph->cnloc,
7515                pcbddc->mat_graph->nvtxs);
7516     pcbddc->mat_graph->active_coords = (PetscBool)(pcbddc->corner_selection && pcbddc->mat_graph->cdim && !pcbddc->corner_selected);
7517 
7518     /* attach info on disconnected subdomains if present */
7519     if (pcbddc->n_local_subs) {
7520       PetscInt *local_subs, n, totn;
7521 
7522       PetscCall(MatGetLocalSize(matis->A, &n, NULL));
7523       PetscCall(PetscMalloc1(n, &local_subs));
7524       for (i = 0; i < n; i++) local_subs[i] = pcbddc->n_local_subs;
7525       for (i = 0; i < pcbddc->n_local_subs; i++) {
7526         const PetscInt *idxs;
7527         PetscInt        nl, j;
7528 
7529         PetscCall(ISGetLocalSize(pcbddc->local_subs[i], &nl));
7530         PetscCall(ISGetIndices(pcbddc->local_subs[i], &idxs));
7531         for (j = 0; j < nl; j++) local_subs[idxs[j]] = i;
7532         PetscCall(ISRestoreIndices(pcbddc->local_subs[i], &idxs));
7533       }
7534       for (i = 0, totn = 0; i < n; i++) totn = PetscMax(totn, local_subs[i]);
7535       pcbddc->mat_graph->n_local_subs = totn + 1;
7536       pcbddc->mat_graph->local_subs   = local_subs;
7537     }
7538 
7539     /* Setup of Graph */
7540     PetscCall(PCBDDCGraphSetUp(pcbddc->mat_graph, pcbddc->vertex_size, pcbddc->NeumannBoundariesLocal, pcbddc->DirichletBoundariesLocal, pcbddc->n_ISForDofsLocal, pcbddc->ISForDofsLocal, pcbddc->user_primal_vertices_local));
7541   }
7542 
7543   if (!pcbddc->graphanalyzed) {
7544     /* Graph's connected components analysis */
7545     PetscCall(PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph));
7546     pcbddc->graphanalyzed   = PETSC_TRUE;
7547     pcbddc->corner_selected = pcbddc->corner_selection;
7548   }
7549   if (rcsr) pcbddc->mat_graph->nvtxs_csr = 0;
7550   PetscFunctionReturn(PETSC_SUCCESS);
7551 }
7552 
7553 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt *nio, Vec vecs[])
7554 {
7555   PetscInt     i, j, n;
7556   PetscScalar *alphas;
7557   PetscReal    norm, *onorms;
7558 
7559   PetscFunctionBegin;
7560   n = *nio;
7561   if (!n) PetscFunctionReturn(PETSC_SUCCESS);
7562   PetscCall(PetscMalloc2(n, &alphas, n, &onorms));
7563   PetscCall(VecNormalize(vecs[0], &norm));
7564   if (norm < PETSC_SMALL) {
7565     onorms[0] = 0.0;
7566     PetscCall(VecSet(vecs[0], 0.0));
7567   } else {
7568     onorms[0] = norm;
7569   }
7570 
7571   for (i = 1; i < n; i++) {
7572     PetscCall(VecMDot(vecs[i], i, vecs, alphas));
7573     for (j = 0; j < i; j++) alphas[j] = PetscConj(-alphas[j]);
7574     PetscCall(VecMAXPY(vecs[i], i, alphas, vecs));
7575     PetscCall(VecNormalize(vecs[i], &norm));
7576     if (norm < PETSC_SMALL) {
7577       onorms[i] = 0.0;
7578       PetscCall(VecSet(vecs[i], 0.0));
7579     } else {
7580       onorms[i] = norm;
7581     }
7582   }
7583   /* push nonzero vectors at the beginning */
7584   for (i = 0; i < n; i++) {
7585     if (onorms[i] == 0.0) {
7586       for (j = i + 1; j < n; j++) {
7587         if (onorms[j] != 0.0) {
7588           PetscCall(VecCopy(vecs[j], vecs[i]));
7589           onorms[j] = 0.0;
7590         }
7591       }
7592     }
7593   }
7594   for (i = 0, *nio = 0; i < n; i++) *nio += onorms[i] != 0.0 ? 1 : 0;
7595   PetscCall(PetscFree2(alphas, onorms));
7596   PetscFunctionReturn(PETSC_SUCCESS);
7597 }
7598 
7599 static PetscErrorCode PCBDDCMatISGetSubassemblingPattern(Mat mat, PetscInt *n_subdomains, PetscInt redprocs, IS *is_sends, PetscBool *have_void)
7600 {
7601   ISLocalToGlobalMapping mapping;
7602   Mat                    A;
7603   PetscInt               n_neighs, *neighs, *n_shared, **shared;
7604   PetscMPIInt            size, rank, color;
7605   PetscInt              *xadj, *adjncy;
7606   PetscInt              *adjncy_wgt, *v_wgt, *ranks_send_to_idx;
7607   PetscInt               im_active, active_procs, N, n, i, j, threshold = 2;
7608   PetscInt               void_procs, *procs_candidates = NULL;
7609   PetscInt               xadj_count, *count;
7610   PetscBool              ismatis, use_vwgt = PETSC_FALSE;
7611   PetscSubcomm           psubcomm;
7612   MPI_Comm               subcomm;
7613 
7614   PetscFunctionBegin;
7615   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
7616   PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATIS, &ismatis));
7617   PetscCheck(ismatis, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Cannot use %s on a matrix object which is not of type MATIS", PETSC_FUNCTION_NAME);
7618   PetscValidLogicalCollectiveInt(mat, *n_subdomains, 2);
7619   PetscValidLogicalCollectiveInt(mat, redprocs, 3);
7620   PetscCheck(*n_subdomains > 0, PetscObjectComm((PetscObject)mat), PETSC_ERR_ARG_WRONG, "Invalid number of subdomains requested %" PetscInt_FMT, *n_subdomains);
7621 
7622   if (have_void) *have_void = PETSC_FALSE;
7623   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)mat), &size));
7624   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
7625   PetscCall(MatISGetLocalMat(mat, &A));
7626   PetscCall(MatGetLocalSize(A, &n, NULL));
7627   im_active = !!n;
7628   PetscCall(MPIU_Allreduce(&im_active, &active_procs, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)mat)));
7629   void_procs = size - active_procs;
7630   /* get ranks of non-active processes in mat communicator */
7631   if (void_procs) {
7632     PetscInt ncand;
7633 
7634     if (have_void) *have_void = PETSC_TRUE;
7635     PetscCall(PetscMalloc1(size, &procs_candidates));
7636     PetscCallMPI(MPI_Allgather(&im_active, 1, MPIU_INT, procs_candidates, 1, MPIU_INT, PetscObjectComm((PetscObject)mat)));
7637     for (i = 0, ncand = 0; i < size; i++) {
7638       if (!procs_candidates[i]) procs_candidates[ncand++] = i;
7639     }
7640     /* force n_subdomains to be not greater that the number of non-active processes */
7641     *n_subdomains = PetscMin(void_procs, *n_subdomains);
7642   }
7643 
7644   /* number of subdomains requested greater than active processes or matrix size -> just shift the matrix
7645      number of subdomains requested 1 -> send to rank-0 or first candidate in voids  */
7646   PetscCall(MatGetSize(mat, &N, NULL));
7647   if (active_procs < *n_subdomains || *n_subdomains == 1 || N <= *n_subdomains) {
7648     PetscInt issize, isidx, dest;
7649     if (*n_subdomains == 1) dest = 0;
7650     else dest = rank;
7651     if (im_active) {
7652       issize = 1;
7653       if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */
7654         isidx = procs_candidates[dest];
7655       } else {
7656         isidx = dest;
7657       }
7658     } else {
7659       issize = 0;
7660       isidx  = -1;
7661     }
7662     if (*n_subdomains != 1) *n_subdomains = active_procs;
7663     PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)mat), issize, &isidx, PETSC_COPY_VALUES, is_sends));
7664     PetscCall(PetscFree(procs_candidates));
7665     PetscFunctionReturn(PETSC_SUCCESS);
7666   }
7667   PetscCall(PetscOptionsGetBool(NULL, ((PetscObject)A)->prefix, "-mat_is_partitioning_use_vwgt", &use_vwgt, NULL));
7668   PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)A)->prefix, "-mat_is_partitioning_threshold", &threshold, NULL));
7669   threshold = PetscMax(threshold, 2);
7670 
7671   /* Get info on mapping */
7672   PetscCall(MatISGetLocalToGlobalMapping(mat, &mapping, NULL));
7673   PetscCall(ISLocalToGlobalMappingGetInfo(mapping, &n_neighs, &neighs, &n_shared, &shared));
7674 
7675   /* build local CSR graph of subdomains' connectivity */
7676   PetscCall(PetscMalloc1(2, &xadj));
7677   xadj[0] = 0;
7678   xadj[1] = PetscMax(n_neighs - 1, 0);
7679   PetscCall(PetscMalloc1(xadj[1], &adjncy));
7680   PetscCall(PetscMalloc1(xadj[1], &adjncy_wgt));
7681   PetscCall(PetscCalloc1(n, &count));
7682   for (i = 1; i < n_neighs; i++)
7683     for (j = 0; j < n_shared[i]; j++) count[shared[i][j]] += 1;
7684 
7685   xadj_count = 0;
7686   for (i = 1; i < n_neighs; i++) {
7687     for (j = 0; j < n_shared[i]; j++) {
7688       if (count[shared[i][j]] < threshold) {
7689         adjncy[xadj_count]     = neighs[i];
7690         adjncy_wgt[xadj_count] = n_shared[i];
7691         xadj_count++;
7692         break;
7693       }
7694     }
7695   }
7696   xadj[1] = xadj_count;
7697   PetscCall(PetscFree(count));
7698   PetscCall(ISLocalToGlobalMappingRestoreInfo(mapping, &n_neighs, &neighs, &n_shared, &shared));
7699   PetscCall(PetscSortIntWithArray(xadj[1], adjncy, adjncy_wgt));
7700 
7701   PetscCall(PetscMalloc1(1, &ranks_send_to_idx));
7702 
7703   /* Restrict work on active processes only */
7704   PetscCall(PetscMPIIntCast(im_active, &color));
7705   if (void_procs) {
7706     PetscCall(PetscSubcommCreate(PetscObjectComm((PetscObject)mat), &psubcomm));
7707     PetscCall(PetscSubcommSetNumber(psubcomm, 2)); /* 2 groups, active process and not active processes */
7708     PetscCall(PetscSubcommSetTypeGeneral(psubcomm, color, rank));
7709     subcomm = PetscSubcommChild(psubcomm);
7710   } else {
7711     psubcomm = NULL;
7712     subcomm  = PetscObjectComm((PetscObject)mat);
7713   }
7714 
7715   v_wgt = NULL;
7716   if (!color) {
7717     PetscCall(PetscFree(xadj));
7718     PetscCall(PetscFree(adjncy));
7719     PetscCall(PetscFree(adjncy_wgt));
7720   } else {
7721     Mat             subdomain_adj;
7722     IS              new_ranks, new_ranks_contig;
7723     MatPartitioning partitioner;
7724     PetscInt        rstart = 0, rend = 0;
7725     PetscInt       *is_indices, *oldranks;
7726     PetscMPIInt     size;
7727     PetscBool       aggregate;
7728 
7729     PetscCallMPI(MPI_Comm_size(subcomm, &size));
7730     if (void_procs) {
7731       PetscInt prank = rank;
7732       PetscCall(PetscMalloc1(size, &oldranks));
7733       PetscCallMPI(MPI_Allgather(&prank, 1, MPIU_INT, oldranks, 1, MPIU_INT, subcomm));
7734       for (i = 0; i < xadj[1]; i++) PetscCall(PetscFindInt(adjncy[i], size, oldranks, &adjncy[i]));
7735       PetscCall(PetscSortIntWithArray(xadj[1], adjncy, adjncy_wgt));
7736     } else {
7737       oldranks = NULL;
7738     }
7739     aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE);
7740     if (aggregate) { /* TODO: all this part could be made more efficient */
7741       PetscInt     lrows, row, ncols, *cols;
7742       PetscMPIInt  nrank;
7743       PetscScalar *vals;
7744 
7745       PetscCallMPI(MPI_Comm_rank(subcomm, &nrank));
7746       lrows = 0;
7747       if (nrank < redprocs) {
7748         lrows = size / redprocs;
7749         if (nrank < size % redprocs) lrows++;
7750       }
7751       PetscCall(MatCreateAIJ(subcomm, lrows, lrows, size, size, 50, NULL, 50, NULL, &subdomain_adj));
7752       PetscCall(MatGetOwnershipRange(subdomain_adj, &rstart, &rend));
7753       PetscCall(MatSetOption(subdomain_adj, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE));
7754       PetscCall(MatSetOption(subdomain_adj, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE));
7755       row   = nrank;
7756       ncols = xadj[1] - xadj[0];
7757       cols  = adjncy;
7758       PetscCall(PetscMalloc1(ncols, &vals));
7759       for (i = 0; i < ncols; i++) vals[i] = adjncy_wgt[i];
7760       PetscCall(MatSetValues(subdomain_adj, 1, &row, ncols, cols, vals, INSERT_VALUES));
7761       PetscCall(MatAssemblyBegin(subdomain_adj, MAT_FINAL_ASSEMBLY));
7762       PetscCall(MatAssemblyEnd(subdomain_adj, MAT_FINAL_ASSEMBLY));
7763       PetscCall(PetscFree(xadj));
7764       PetscCall(PetscFree(adjncy));
7765       PetscCall(PetscFree(adjncy_wgt));
7766       PetscCall(PetscFree(vals));
7767       if (use_vwgt) {
7768         Vec                v;
7769         const PetscScalar *array;
7770         PetscInt           nl;
7771 
7772         PetscCall(MatCreateVecs(subdomain_adj, &v, NULL));
7773         PetscCall(VecSetValue(v, row, (PetscScalar)n, INSERT_VALUES));
7774         PetscCall(VecAssemblyBegin(v));
7775         PetscCall(VecAssemblyEnd(v));
7776         PetscCall(VecGetLocalSize(v, &nl));
7777         PetscCall(VecGetArrayRead(v, &array));
7778         PetscCall(PetscMalloc1(nl, &v_wgt));
7779         for (i = 0; i < nl; i++) v_wgt[i] = (PetscInt)PetscRealPart(array[i]);
7780         PetscCall(VecRestoreArrayRead(v, &array));
7781         PetscCall(VecDestroy(&v));
7782       }
7783     } else {
7784       PetscCall(MatCreateMPIAdj(subcomm, 1, (PetscInt)size, xadj, adjncy, adjncy_wgt, &subdomain_adj));
7785       if (use_vwgt) {
7786         PetscCall(PetscMalloc1(1, &v_wgt));
7787         v_wgt[0] = n;
7788       }
7789     }
7790     /* PetscCall(MatView(subdomain_adj,0)); */
7791 
7792     /* Partition */
7793     PetscCall(MatPartitioningCreate(subcomm, &partitioner));
7794 #if defined(PETSC_HAVE_PTSCOTCH)
7795     PetscCall(MatPartitioningSetType(partitioner, MATPARTITIONINGPTSCOTCH));
7796 #elif defined(PETSC_HAVE_PARMETIS)
7797     PetscCall(MatPartitioningSetType(partitioner, MATPARTITIONINGPARMETIS));
7798 #else
7799     PetscCall(MatPartitioningSetType(partitioner, MATPARTITIONINGAVERAGE));
7800 #endif
7801     PetscCall(MatPartitioningSetAdjacency(partitioner, subdomain_adj));
7802     if (v_wgt) PetscCall(MatPartitioningSetVertexWeights(partitioner, v_wgt));
7803     *n_subdomains = PetscMin((PetscInt)size, *n_subdomains);
7804     PetscCall(MatPartitioningSetNParts(partitioner, *n_subdomains));
7805     PetscCall(MatPartitioningSetFromOptions(partitioner));
7806     PetscCall(MatPartitioningApply(partitioner, &new_ranks));
7807     /* PetscCall(MatPartitioningView(partitioner,0)); */
7808 
7809     /* renumber new_ranks to avoid "holes" in new set of processors */
7810     PetscCall(ISRenumber(new_ranks, NULL, NULL, &new_ranks_contig));
7811     PetscCall(ISDestroy(&new_ranks));
7812     PetscCall(ISGetIndices(new_ranks_contig, (const PetscInt **)&is_indices));
7813     if (!aggregate) {
7814       if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */
7815         PetscAssert(oldranks, PETSC_COMM_SELF, PETSC_ERR_PLIB, "This should not happen");
7816         ranks_send_to_idx[0] = procs_candidates[oldranks[is_indices[0]]];
7817       } else if (oldranks) {
7818         ranks_send_to_idx[0] = oldranks[is_indices[0]];
7819       } else {
7820         ranks_send_to_idx[0] = is_indices[0];
7821       }
7822     } else {
7823       PetscInt     idx = 0;
7824       PetscMPIInt  tag;
7825       MPI_Request *reqs;
7826 
7827       PetscCall(PetscObjectGetNewTag((PetscObject)subdomain_adj, &tag));
7828       PetscCall(PetscMalloc1(rend - rstart, &reqs));
7829       for (i = rstart; i < rend; i++) PetscCallMPI(MPI_Isend(is_indices + i - rstart, 1, MPIU_INT, i, tag, subcomm, &reqs[i - rstart]));
7830       PetscCallMPI(MPI_Recv(&idx, 1, MPIU_INT, MPI_ANY_SOURCE, tag, subcomm, MPI_STATUS_IGNORE));
7831       PetscCallMPI(MPI_Waitall(rend - rstart, reqs, MPI_STATUSES_IGNORE));
7832       PetscCall(PetscFree(reqs));
7833       if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */
7834         PetscAssert(oldranks, PETSC_COMM_SELF, PETSC_ERR_PLIB, "This should not happen");
7835         ranks_send_to_idx[0] = procs_candidates[oldranks[idx]];
7836       } else if (oldranks) {
7837         ranks_send_to_idx[0] = oldranks[idx];
7838       } else {
7839         ranks_send_to_idx[0] = idx;
7840       }
7841     }
7842     PetscCall(ISRestoreIndices(new_ranks_contig, (const PetscInt **)&is_indices));
7843     /* clean up */
7844     PetscCall(PetscFree(oldranks));
7845     PetscCall(ISDestroy(&new_ranks_contig));
7846     PetscCall(MatDestroy(&subdomain_adj));
7847     PetscCall(MatPartitioningDestroy(&partitioner));
7848   }
7849   PetscCall(PetscSubcommDestroy(&psubcomm));
7850   PetscCall(PetscFree(procs_candidates));
7851 
7852   /* assemble parallel IS for sends */
7853   i = 1;
7854   if (!color) i = 0;
7855   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)mat), i, ranks_send_to_idx, PETSC_OWN_POINTER, is_sends));
7856   PetscFunctionReturn(PETSC_SUCCESS);
7857 }
7858 
7859 typedef enum {
7860   MATDENSE_PRIVATE = 0,
7861   MATAIJ_PRIVATE,
7862   MATBAIJ_PRIVATE,
7863   MATSBAIJ_PRIVATE
7864 } MatTypePrivate;
7865 
7866 static PetscErrorCode PCBDDCMatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, PetscBool reuse, Mat *mat_n, PetscInt nis, IS isarray[], PetscInt nvecs, Vec nnsp_vec[])
7867 {
7868   Mat                    local_mat;
7869   IS                     is_sends_internal;
7870   PetscInt               rows, cols, new_local_rows;
7871   PetscInt               i, bs, buf_size_idxs, buf_size_idxs_is, buf_size_vals, buf_size_vecs;
7872   PetscBool              ismatis, isdense, newisdense, destroy_mat;
7873   ISLocalToGlobalMapping l2gmap;
7874   PetscInt              *l2gmap_indices;
7875   const PetscInt        *is_indices;
7876   MatType                new_local_type;
7877   /* buffers */
7878   PetscInt          *ptr_idxs, *send_buffer_idxs, *recv_buffer_idxs;
7879   PetscInt          *ptr_idxs_is, *send_buffer_idxs_is, *recv_buffer_idxs_is;
7880   PetscInt          *recv_buffer_idxs_local;
7881   PetscScalar       *ptr_vals, *recv_buffer_vals;
7882   const PetscScalar *send_buffer_vals;
7883   PetscScalar       *ptr_vecs, *send_buffer_vecs, *recv_buffer_vecs;
7884   /* MPI */
7885   MPI_Comm     comm, comm_n;
7886   PetscSubcomm subcomm;
7887   PetscMPIInt  n_sends, n_recvs, size;
7888   PetscMPIInt *iflags, *ilengths_idxs, *ilengths_vals, *ilengths_idxs_is;
7889   PetscMPIInt *onodes, *onodes_is, *olengths_idxs, *olengths_idxs_is, *olengths_vals;
7890   PetscMPIInt  len, tag_idxs, tag_idxs_is, tag_vals, tag_vecs, source_dest;
7891   MPI_Request *send_req_idxs, *send_req_idxs_is, *send_req_vals, *send_req_vecs;
7892   MPI_Request *recv_req_idxs, *recv_req_idxs_is, *recv_req_vals, *recv_req_vecs;
7893 
7894   PetscFunctionBegin;
7895   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
7896   PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATIS, &ismatis));
7897   PetscCheck(ismatis, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Cannot use %s on a matrix object which is not of type MATIS", PETSC_FUNCTION_NAME);
7898   PetscValidLogicalCollectiveInt(mat, n_subdomains, 3);
7899   PetscValidLogicalCollectiveBool(mat, restrict_comm, 4);
7900   PetscValidLogicalCollectiveBool(mat, restrict_full, 5);
7901   PetscValidLogicalCollectiveBool(mat, reuse, 6);
7902   PetscValidLogicalCollectiveInt(mat, nis, 8);
7903   PetscValidLogicalCollectiveInt(mat, nvecs, 10);
7904   if (nvecs) {
7905     PetscCheck(nvecs <= 1, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Just 1 vector supported");
7906     PetscValidHeaderSpecific(nnsp_vec[0], VEC_CLASSID, 11);
7907   }
7908   /* further checks */
7909   PetscCall(MatISGetLocalMat(mat, &local_mat));
7910   PetscCall(PetscObjectTypeCompare((PetscObject)local_mat, MATSEQDENSE, &isdense));
7911   /* XXX hack for multi_element */
7912   if (!isdense) PetscCall(MatConvert(local_mat, MATDENSE, MAT_INPLACE_MATRIX, &local_mat));
7913   PetscCall(PetscObjectTypeCompare((PetscObject)local_mat, MATSEQDENSE, &isdense));
7914   PetscCheck(isdense, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE");
7915 
7916   PetscCall(MatGetSize(local_mat, &rows, &cols));
7917   PetscCheck(rows == cols, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Local MATIS matrices should be square");
7918   if (reuse && *mat_n) {
7919     PetscInt mrows, mcols, mnrows, mncols;
7920     PetscValidHeaderSpecific(*mat_n, MAT_CLASSID, 7);
7921     PetscCall(PetscObjectTypeCompare((PetscObject)*mat_n, MATIS, &ismatis));
7922     PetscCheck(ismatis, PetscObjectComm((PetscObject)*mat_n), PETSC_ERR_SUP, "Cannot reuse a matrix which is not of type MATIS");
7923     PetscCall(MatGetSize(mat, &mrows, &mcols));
7924     PetscCall(MatGetSize(*mat_n, &mnrows, &mncols));
7925     PetscCheck(mrows == mnrows, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Cannot reuse matrix! Wrong number of rows %" PetscInt_FMT " != %" PetscInt_FMT, mrows, mnrows);
7926     PetscCheck(mcols == mncols, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Cannot reuse matrix! Wrong number of cols %" PetscInt_FMT " != %" PetscInt_FMT, mcols, mncols);
7927   }
7928   PetscCall(MatGetBlockSize(local_mat, &bs));
7929   PetscValidLogicalCollectiveInt(mat, bs, 1);
7930 
7931   /* prepare IS for sending if not provided */
7932   if (!is_sends) {
7933     PetscCheck(n_subdomains, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "You should specify either an IS or a target number of subdomains");
7934     PetscCall(PCBDDCMatISGetSubassemblingPattern(mat, &n_subdomains, 0, &is_sends_internal, NULL));
7935   } else {
7936     PetscCall(PetscObjectReference((PetscObject)is_sends));
7937     is_sends_internal = is_sends;
7938   }
7939 
7940   /* get comm */
7941   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
7942 
7943   /* compute number of sends */
7944   PetscCall(ISGetLocalSize(is_sends_internal, &i));
7945   PetscCall(PetscMPIIntCast(i, &n_sends));
7946 
7947   /* compute number of receives */
7948   PetscCallMPI(MPI_Comm_size(comm, &size));
7949   PetscCall(PetscMalloc1(size, &iflags));
7950   PetscCall(PetscArrayzero(iflags, size));
7951   PetscCall(ISGetIndices(is_sends_internal, &is_indices));
7952   for (i = 0; i < n_sends; i++) iflags[is_indices[i]] = 1;
7953   PetscCall(PetscGatherNumberOfMessages(comm, iflags, NULL, &n_recvs));
7954   PetscCall(PetscFree(iflags));
7955 
7956   /* restrict comm if requested */
7957   subcomm     = NULL;
7958   destroy_mat = PETSC_FALSE;
7959   if (restrict_comm) {
7960     PetscMPIInt color, subcommsize;
7961 
7962     color = 0;
7963     if (restrict_full) {
7964       if (!n_recvs) color = 1; /* processes not receiving anything will not participate in new comm (full restriction) */
7965     } else {
7966       if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not participate in new comm */
7967     }
7968     PetscCall(MPIU_Allreduce(&color, &subcommsize, 1, MPI_INT, MPI_SUM, comm));
7969     subcommsize = size - subcommsize;
7970     /* check if reuse has been requested */
7971     if (reuse) {
7972       if (*mat_n) {
7973         PetscMPIInt subcommsize2;
7974         PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n), &subcommsize2));
7975         PetscCheck(subcommsize == subcommsize2, PetscObjectComm((PetscObject)*mat_n), PETSC_ERR_PLIB, "Cannot reuse matrix! wrong subcomm size %d != %d", subcommsize, subcommsize2);
7976         comm_n = PetscObjectComm((PetscObject)*mat_n);
7977       } else {
7978         comm_n = PETSC_COMM_SELF;
7979       }
7980     } else { /* MAT_INITIAL_MATRIX */
7981       PetscMPIInt rank;
7982 
7983       PetscCallMPI(MPI_Comm_rank(comm, &rank));
7984       PetscCall(PetscSubcommCreate(comm, &subcomm));
7985       PetscCall(PetscSubcommSetNumber(subcomm, 2));
7986       PetscCall(PetscSubcommSetTypeGeneral(subcomm, color, rank));
7987       comm_n = PetscSubcommChild(subcomm);
7988     }
7989     /* flag to destroy *mat_n if not significative */
7990     if (color) destroy_mat = PETSC_TRUE;
7991   } else {
7992     comm_n = comm;
7993   }
7994 
7995   /* prepare send/receive buffers */
7996   PetscCall(PetscMalloc1(size, &ilengths_idxs));
7997   PetscCall(PetscArrayzero(ilengths_idxs, size));
7998   PetscCall(PetscMalloc1(size, &ilengths_vals));
7999   PetscCall(PetscArrayzero(ilengths_vals, size));
8000   if (nis) PetscCall(PetscCalloc1(size, &ilengths_idxs_is));
8001 
8002   /* Get data from local matrices */
8003   PetscCheck(isdense, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Subassembling of AIJ local matrices not yet implemented");
8004   /* TODO: See below some guidelines on how to prepare the local buffers */
8005   /*
8006        send_buffer_vals should contain the raw values of the local matrix
8007        send_buffer_idxs should contain:
8008        - MatType_PRIVATE type
8009        - PetscInt        size_of_l2gmap
8010        - PetscInt        global_row_indices[size_of_l2gmap]
8011        - PetscInt        all_other_info_which_is_needed_to_compute_preallocation_and_set_values
8012     */
8013   {
8014     ISLocalToGlobalMapping mapping;
8015 
8016     PetscCall(MatISGetLocalToGlobalMapping(mat, &mapping, NULL));
8017     PetscCall(MatDenseGetArrayRead(local_mat, &send_buffer_vals));
8018     PetscCall(ISLocalToGlobalMappingGetSize(mapping, &i));
8019     PetscCall(PetscMalloc1(i + 2, &send_buffer_idxs));
8020     send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE;
8021     send_buffer_idxs[1] = i;
8022     PetscCall(ISLocalToGlobalMappingGetIndices(mapping, (const PetscInt **)&ptr_idxs));
8023     PetscCall(PetscArraycpy(&send_buffer_idxs[2], ptr_idxs, i));
8024     PetscCall(ISLocalToGlobalMappingRestoreIndices(mapping, (const PetscInt **)&ptr_idxs));
8025     PetscCall(PetscMPIIntCast(i, &len));
8026     for (i = 0; i < n_sends; i++) {
8027       ilengths_vals[is_indices[i]] = len * len;
8028       ilengths_idxs[is_indices[i]] = len + 2;
8029     }
8030   }
8031   PetscCall(PetscGatherMessageLengths2(comm, n_sends, n_recvs, ilengths_idxs, ilengths_vals, &onodes, &olengths_idxs, &olengths_vals));
8032   /* additional is (if any) */
8033   if (nis) {
8034     PetscMPIInt psum;
8035     PetscInt    j;
8036     for (j = 0, psum = 0; j < nis; j++) {
8037       PetscInt plen;
8038       PetscCall(ISGetLocalSize(isarray[j], &plen));
8039       PetscCall(PetscMPIIntCast(plen, &len));
8040       psum += len + 1; /* indices + length */
8041     }
8042     PetscCall(PetscMalloc1(psum, &send_buffer_idxs_is));
8043     for (j = 0, psum = 0; j < nis; j++) {
8044       PetscInt        plen;
8045       const PetscInt *is_array_idxs;
8046       PetscCall(ISGetLocalSize(isarray[j], &plen));
8047       send_buffer_idxs_is[psum] = plen;
8048       PetscCall(ISGetIndices(isarray[j], &is_array_idxs));
8049       PetscCall(PetscArraycpy(&send_buffer_idxs_is[psum + 1], is_array_idxs, plen));
8050       PetscCall(ISRestoreIndices(isarray[j], &is_array_idxs));
8051       psum += plen + 1; /* indices + length */
8052     }
8053     for (i = 0; i < n_sends; i++) ilengths_idxs_is[is_indices[i]] = psum;
8054     PetscCall(PetscGatherMessageLengths(comm, n_sends, n_recvs, ilengths_idxs_is, &onodes_is, &olengths_idxs_is));
8055   }
8056   PetscCall(MatISRestoreLocalMat(mat, &local_mat));
8057 
8058   buf_size_idxs    = 0;
8059   buf_size_vals    = 0;
8060   buf_size_idxs_is = 0;
8061   buf_size_vecs    = 0;
8062   for (i = 0; i < n_recvs; i++) {
8063     buf_size_idxs += (PetscInt)olengths_idxs[i];
8064     buf_size_vals += (PetscInt)olengths_vals[i];
8065     if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i];
8066     if (nvecs) buf_size_vecs += (PetscInt)olengths_idxs[i];
8067   }
8068   PetscCall(PetscMalloc1(buf_size_idxs, &recv_buffer_idxs));
8069   PetscCall(PetscMalloc1(buf_size_vals, &recv_buffer_vals));
8070   PetscCall(PetscMalloc1(buf_size_idxs_is, &recv_buffer_idxs_is));
8071   PetscCall(PetscMalloc1(buf_size_vecs, &recv_buffer_vecs));
8072 
8073   /* get new tags for clean communications */
8074   PetscCall(PetscObjectGetNewTag((PetscObject)mat, &tag_idxs));
8075   PetscCall(PetscObjectGetNewTag((PetscObject)mat, &tag_vals));
8076   PetscCall(PetscObjectGetNewTag((PetscObject)mat, &tag_idxs_is));
8077   PetscCall(PetscObjectGetNewTag((PetscObject)mat, &tag_vecs));
8078 
8079   /* allocate for requests */
8080   PetscCall(PetscMalloc1(n_sends, &send_req_idxs));
8081   PetscCall(PetscMalloc1(n_sends, &send_req_vals));
8082   PetscCall(PetscMalloc1(n_sends, &send_req_idxs_is));
8083   PetscCall(PetscMalloc1(n_sends, &send_req_vecs));
8084   PetscCall(PetscMalloc1(n_recvs, &recv_req_idxs));
8085   PetscCall(PetscMalloc1(n_recvs, &recv_req_vals));
8086   PetscCall(PetscMalloc1(n_recvs, &recv_req_idxs_is));
8087   PetscCall(PetscMalloc1(n_recvs, &recv_req_vecs));
8088 
8089   /* communications */
8090   ptr_idxs    = recv_buffer_idxs;
8091   ptr_vals    = recv_buffer_vals;
8092   ptr_idxs_is = recv_buffer_idxs_is;
8093   ptr_vecs    = recv_buffer_vecs;
8094   for (i = 0; i < n_recvs; i++) {
8095     source_dest = onodes[i];
8096     PetscCallMPI(MPI_Irecv(ptr_idxs, olengths_idxs[i], MPIU_INT, source_dest, tag_idxs, comm, &recv_req_idxs[i]));
8097     PetscCallMPI(MPI_Irecv(ptr_vals, olengths_vals[i], MPIU_SCALAR, source_dest, tag_vals, comm, &recv_req_vals[i]));
8098     ptr_idxs += olengths_idxs[i];
8099     ptr_vals += olengths_vals[i];
8100     if (nis) {
8101       source_dest = onodes_is[i];
8102       PetscCallMPI(MPI_Irecv(ptr_idxs_is, olengths_idxs_is[i], MPIU_INT, source_dest, tag_idxs_is, comm, &recv_req_idxs_is[i]));
8103       ptr_idxs_is += olengths_idxs_is[i];
8104     }
8105     if (nvecs) {
8106       source_dest = onodes[i];
8107       PetscCallMPI(MPI_Irecv(ptr_vecs, olengths_idxs[i] - 2, MPIU_SCALAR, source_dest, tag_vecs, comm, &recv_req_vecs[i]));
8108       ptr_vecs += olengths_idxs[i] - 2;
8109     }
8110   }
8111   for (i = 0; i < n_sends; i++) {
8112     PetscCall(PetscMPIIntCast(is_indices[i], &source_dest));
8113     PetscCallMPI(MPI_Isend(send_buffer_idxs, ilengths_idxs[source_dest], MPIU_INT, source_dest, tag_idxs, comm, &send_req_idxs[i]));
8114     PetscCallMPI(MPI_Isend((PetscScalar *)send_buffer_vals, ilengths_vals[source_dest], MPIU_SCALAR, source_dest, tag_vals, comm, &send_req_vals[i]));
8115     if (nis) PetscCallMPI(MPI_Isend(send_buffer_idxs_is, ilengths_idxs_is[source_dest], MPIU_INT, source_dest, tag_idxs_is, comm, &send_req_idxs_is[i]));
8116     if (nvecs) {
8117       PetscCall(VecGetArray(nnsp_vec[0], &send_buffer_vecs));
8118       PetscCallMPI(MPI_Isend(send_buffer_vecs, ilengths_idxs[source_dest] - 2, MPIU_SCALAR, source_dest, tag_vecs, comm, &send_req_vecs[i]));
8119     }
8120   }
8121   PetscCall(ISRestoreIndices(is_sends_internal, &is_indices));
8122   PetscCall(ISDestroy(&is_sends_internal));
8123 
8124   /* assemble new l2g map */
8125   PetscCallMPI(MPI_Waitall(n_recvs, recv_req_idxs, MPI_STATUSES_IGNORE));
8126   ptr_idxs       = recv_buffer_idxs;
8127   new_local_rows = 0;
8128   for (i = 0; i < n_recvs; i++) {
8129     new_local_rows += *(ptr_idxs + 1); /* second element is the local size of the l2gmap */
8130     ptr_idxs += olengths_idxs[i];
8131   }
8132   PetscCall(PetscMalloc1(new_local_rows, &l2gmap_indices));
8133   ptr_idxs       = recv_buffer_idxs;
8134   new_local_rows = 0;
8135   for (i = 0; i < n_recvs; i++) {
8136     PetscCall(PetscArraycpy(&l2gmap_indices[new_local_rows], ptr_idxs + 2, *(ptr_idxs + 1)));
8137     new_local_rows += *(ptr_idxs + 1); /* second element is the local size of the l2gmap */
8138     ptr_idxs += olengths_idxs[i];
8139   }
8140   PetscCall(PetscSortRemoveDupsInt(&new_local_rows, l2gmap_indices));
8141   PetscCall(ISLocalToGlobalMappingCreate(comm_n, 1, new_local_rows, l2gmap_indices, PETSC_COPY_VALUES, &l2gmap));
8142   PetscCall(PetscFree(l2gmap_indices));
8143 
8144   /* infer new local matrix type from received local matrices type */
8145   /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */
8146   /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */
8147   if (n_recvs) {
8148     MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0];
8149     ptr_idxs                              = recv_buffer_idxs;
8150     for (i = 0; i < n_recvs; i++) {
8151       if ((PetscInt)new_local_type_private != *ptr_idxs) {
8152         new_local_type_private = MATAIJ_PRIVATE;
8153         break;
8154       }
8155       ptr_idxs += olengths_idxs[i];
8156     }
8157     switch (new_local_type_private) {
8158     case MATDENSE_PRIVATE:
8159       new_local_type = MATSEQAIJ;
8160       bs             = 1;
8161       break;
8162     case MATAIJ_PRIVATE:
8163       new_local_type = MATSEQAIJ;
8164       bs             = 1;
8165       break;
8166     case MATBAIJ_PRIVATE:
8167       new_local_type = MATSEQBAIJ;
8168       break;
8169     case MATSBAIJ_PRIVATE:
8170       new_local_type = MATSEQSBAIJ;
8171       break;
8172     default:
8173       SETERRQ(comm, PETSC_ERR_SUP, "Unsupported private type %d in %s", new_local_type_private, PETSC_FUNCTION_NAME);
8174     }
8175   } else { /* by default, new_local_type is seqaij */
8176     new_local_type = MATSEQAIJ;
8177     bs             = 1;
8178   }
8179 
8180   /* create MATIS object if needed */
8181   if (!reuse) {
8182     PetscCall(MatGetSize(mat, &rows, &cols));
8183     PetscCall(MatCreateIS(comm_n, bs, PETSC_DECIDE, PETSC_DECIDE, rows, cols, l2gmap, l2gmap, mat_n));
8184   } else {
8185     /* it also destroys the local matrices */
8186     if (*mat_n) {
8187       PetscCall(MatSetLocalToGlobalMapping(*mat_n, l2gmap, l2gmap));
8188     } else { /* this is a fake object */
8189       PetscCall(MatCreateIS(comm_n, bs, PETSC_DECIDE, PETSC_DECIDE, rows, cols, l2gmap, l2gmap, mat_n));
8190     }
8191   }
8192   PetscCall(MatISGetLocalMat(*mat_n, &local_mat));
8193   PetscCall(MatSetType(local_mat, new_local_type));
8194 
8195   PetscCallMPI(MPI_Waitall(n_recvs, recv_req_vals, MPI_STATUSES_IGNORE));
8196 
8197   /* Global to local map of received indices */
8198   PetscCall(PetscMalloc1(buf_size_idxs, &recv_buffer_idxs_local)); /* needed for values insertion */
8199   PetscCall(ISGlobalToLocalMappingApply(l2gmap, IS_GTOLM_MASK, buf_size_idxs, recv_buffer_idxs, &i, recv_buffer_idxs_local));
8200   PetscCall(ISLocalToGlobalMappingDestroy(&l2gmap));
8201 
8202   /* restore attributes -> type of incoming data and its size */
8203   buf_size_idxs = 0;
8204   for (i = 0; i < n_recvs; i++) {
8205     recv_buffer_idxs_local[buf_size_idxs]     = recv_buffer_idxs[buf_size_idxs];
8206     recv_buffer_idxs_local[buf_size_idxs + 1] = recv_buffer_idxs[buf_size_idxs + 1];
8207     buf_size_idxs += (PetscInt)olengths_idxs[i];
8208   }
8209   PetscCall(PetscFree(recv_buffer_idxs));
8210 
8211   /* set preallocation */
8212   PetscCall(PetscObjectTypeCompare((PetscObject)local_mat, MATSEQDENSE, &newisdense));
8213   if (!newisdense) {
8214     PetscInt *new_local_nnz = NULL;
8215 
8216     ptr_idxs = recv_buffer_idxs_local;
8217     if (n_recvs) PetscCall(PetscCalloc1(new_local_rows, &new_local_nnz));
8218     for (i = 0; i < n_recvs; i++) {
8219       PetscInt j;
8220       if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */
8221         for (j = 0; j < *(ptr_idxs + 1); j++) new_local_nnz[*(ptr_idxs + 2 + j)] += *(ptr_idxs + 1);
8222       } else {
8223         /* TODO */
8224       }
8225       ptr_idxs += olengths_idxs[i];
8226     }
8227     if (new_local_nnz) {
8228       for (i = 0; i < new_local_rows; i++) new_local_nnz[i] = PetscMin(new_local_nnz[i], new_local_rows);
8229       PetscCall(MatSeqAIJSetPreallocation(local_mat, 0, new_local_nnz));
8230       for (i = 0; i < new_local_rows; i++) new_local_nnz[i] /= bs;
8231       PetscCall(MatSeqBAIJSetPreallocation(local_mat, bs, 0, new_local_nnz));
8232       for (i = 0; i < new_local_rows; i++) new_local_nnz[i] = PetscMax(new_local_nnz[i] - i, 0);
8233       PetscCall(MatSeqSBAIJSetPreallocation(local_mat, bs, 0, new_local_nnz));
8234     } else {
8235       PetscCall(MatSetUp(local_mat));
8236     }
8237     PetscCall(PetscFree(new_local_nnz));
8238   } else {
8239     PetscCall(MatSetUp(local_mat));
8240   }
8241 
8242   /* set values */
8243   ptr_vals = recv_buffer_vals;
8244   ptr_idxs = recv_buffer_idxs_local;
8245   for (i = 0; i < n_recvs; i++) {
8246     if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */
8247       PetscCall(MatSetOption(local_mat, MAT_ROW_ORIENTED, PETSC_FALSE));
8248       PetscCall(MatSetValues(local_mat, *(ptr_idxs + 1), ptr_idxs + 2, *(ptr_idxs + 1), ptr_idxs + 2, ptr_vals, ADD_VALUES));
8249       PetscCall(MatAssemblyBegin(local_mat, MAT_FLUSH_ASSEMBLY));
8250       PetscCall(MatAssemblyEnd(local_mat, MAT_FLUSH_ASSEMBLY));
8251       PetscCall(MatSetOption(local_mat, MAT_ROW_ORIENTED, PETSC_TRUE));
8252     } else {
8253       /* TODO */
8254     }
8255     ptr_idxs += olengths_idxs[i];
8256     ptr_vals += olengths_vals[i];
8257   }
8258   PetscCall(MatAssemblyBegin(local_mat, MAT_FINAL_ASSEMBLY));
8259   PetscCall(MatAssemblyEnd(local_mat, MAT_FINAL_ASSEMBLY));
8260   PetscCall(MatISRestoreLocalMat(*mat_n, &local_mat));
8261   PetscCall(MatAssemblyBegin(*mat_n, MAT_FINAL_ASSEMBLY));
8262   PetscCall(MatAssemblyEnd(*mat_n, MAT_FINAL_ASSEMBLY));
8263   PetscCall(PetscFree(recv_buffer_vals));
8264 
8265 #if 0
8266   if (!restrict_comm) { /* check */
8267     Vec       lvec,rvec;
8268     PetscReal infty_error;
8269 
8270     PetscCall(MatCreateVecs(mat,&rvec,&lvec));
8271     PetscCall(VecSetRandom(rvec,NULL));
8272     PetscCall(MatMult(mat,rvec,lvec));
8273     PetscCall(VecScale(lvec,-1.0));
8274     PetscCall(MatMultAdd(*mat_n,rvec,lvec,lvec));
8275     PetscCall(VecNorm(lvec,NORM_INFINITY,&infty_error));
8276     PetscCall(PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error));
8277     PetscCall(VecDestroy(&rvec));
8278     PetscCall(VecDestroy(&lvec));
8279   }
8280 #endif
8281 
8282   /* assemble new additional is (if any) */
8283   if (nis) {
8284     PetscInt **temp_idxs, *count_is, j, psum;
8285 
8286     PetscCallMPI(MPI_Waitall(n_recvs, recv_req_idxs_is, MPI_STATUSES_IGNORE));
8287     PetscCall(PetscCalloc1(nis, &count_is));
8288     ptr_idxs = recv_buffer_idxs_is;
8289     psum     = 0;
8290     for (i = 0; i < n_recvs; i++) {
8291       for (j = 0; j < nis; j++) {
8292         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
8293         count_is[j] += plen;         /* increment counting of buffer for j-th IS */
8294         psum += plen;
8295         ptr_idxs += plen + 1; /* shift pointer to received data */
8296       }
8297     }
8298     PetscCall(PetscMalloc1(nis, &temp_idxs));
8299     PetscCall(PetscMalloc1(psum, &temp_idxs[0]));
8300     for (i = 1; i < nis; i++) temp_idxs[i] = PetscSafePointerPlusOffset(temp_idxs[i - 1], count_is[i - 1]);
8301     PetscCall(PetscArrayzero(count_is, nis));
8302     ptr_idxs = recv_buffer_idxs_is;
8303     for (i = 0; i < n_recvs; i++) {
8304       for (j = 0; j < nis; j++) {
8305         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
8306         PetscCall(PetscArraycpy(&temp_idxs[j][count_is[j]], ptr_idxs + 1, plen));
8307         count_is[j] += plen;  /* increment starting point of buffer for j-th IS */
8308         ptr_idxs += plen + 1; /* shift pointer to received data */
8309       }
8310     }
8311     for (i = 0; i < nis; i++) {
8312       PetscCall(ISDestroy(&isarray[i]));
8313       PetscCall(PetscSortRemoveDupsInt(&count_is[i], temp_idxs[i]));
8314       PetscCall(ISCreateGeneral(comm_n, count_is[i], temp_idxs[i], PETSC_COPY_VALUES, &isarray[i]));
8315     }
8316     PetscCall(PetscFree(count_is));
8317     PetscCall(PetscFree(temp_idxs[0]));
8318     PetscCall(PetscFree(temp_idxs));
8319   }
8320   /* free workspace */
8321   PetscCall(PetscFree(recv_buffer_idxs_is));
8322   PetscCallMPI(MPI_Waitall(n_sends, send_req_idxs, MPI_STATUSES_IGNORE));
8323   PetscCall(PetscFree(send_buffer_idxs));
8324   PetscCallMPI(MPI_Waitall(n_sends, send_req_vals, MPI_STATUSES_IGNORE));
8325   if (isdense) {
8326     PetscCall(MatISGetLocalMat(mat, &local_mat));
8327     PetscCall(MatDenseRestoreArrayRead(local_mat, &send_buffer_vals));
8328     PetscCall(MatISRestoreLocalMat(mat, &local_mat));
8329   } else {
8330     /* PetscCall(PetscFree(send_buffer_vals)); */
8331   }
8332   if (nis) {
8333     PetscCallMPI(MPI_Waitall(n_sends, send_req_idxs_is, MPI_STATUSES_IGNORE));
8334     PetscCall(PetscFree(send_buffer_idxs_is));
8335   }
8336 
8337   if (nvecs) {
8338     PetscCallMPI(MPI_Waitall(n_recvs, recv_req_vecs, MPI_STATUSES_IGNORE));
8339     PetscCallMPI(MPI_Waitall(n_sends, send_req_vecs, MPI_STATUSES_IGNORE));
8340     PetscCall(VecRestoreArray(nnsp_vec[0], &send_buffer_vecs));
8341     PetscCall(VecDestroy(&nnsp_vec[0]));
8342     PetscCall(VecCreate(comm_n, &nnsp_vec[0]));
8343     PetscCall(VecSetSizes(nnsp_vec[0], new_local_rows, PETSC_DECIDE));
8344     PetscCall(VecSetType(nnsp_vec[0], VECSTANDARD));
8345     /* set values */
8346     ptr_vals = recv_buffer_vecs;
8347     ptr_idxs = recv_buffer_idxs_local;
8348     PetscCall(VecGetArray(nnsp_vec[0], &send_buffer_vecs));
8349     for (i = 0; i < n_recvs; i++) {
8350       PetscInt j;
8351       for (j = 0; j < *(ptr_idxs + 1); j++) send_buffer_vecs[*(ptr_idxs + 2 + j)] += *(ptr_vals + j);
8352       ptr_idxs += olengths_idxs[i];
8353       ptr_vals += olengths_idxs[i] - 2;
8354     }
8355     PetscCall(VecRestoreArray(nnsp_vec[0], &send_buffer_vecs));
8356     PetscCall(VecAssemblyBegin(nnsp_vec[0]));
8357     PetscCall(VecAssemblyEnd(nnsp_vec[0]));
8358   }
8359 
8360   PetscCall(PetscFree(recv_buffer_vecs));
8361   PetscCall(PetscFree(recv_buffer_idxs_local));
8362   PetscCall(PetscFree(recv_req_idxs));
8363   PetscCall(PetscFree(recv_req_vals));
8364   PetscCall(PetscFree(recv_req_vecs));
8365   PetscCall(PetscFree(recv_req_idxs_is));
8366   PetscCall(PetscFree(send_req_idxs));
8367   PetscCall(PetscFree(send_req_vals));
8368   PetscCall(PetscFree(send_req_vecs));
8369   PetscCall(PetscFree(send_req_idxs_is));
8370   PetscCall(PetscFree(ilengths_vals));
8371   PetscCall(PetscFree(ilengths_idxs));
8372   PetscCall(PetscFree(olengths_vals));
8373   PetscCall(PetscFree(olengths_idxs));
8374   PetscCall(PetscFree(onodes));
8375   if (nis) {
8376     PetscCall(PetscFree(ilengths_idxs_is));
8377     PetscCall(PetscFree(olengths_idxs_is));
8378     PetscCall(PetscFree(onodes_is));
8379   }
8380   PetscCall(PetscSubcommDestroy(&subcomm));
8381   if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not participate */
8382     PetscCall(MatDestroy(mat_n));
8383     for (i = 0; i < nis; i++) PetscCall(ISDestroy(&isarray[i]));
8384     if (nvecs) { /* need to match VecDestroy nnsp_vec called in the other code path */
8385       PetscCall(VecDestroy(&nnsp_vec[0]));
8386     }
8387     *mat_n = NULL;
8388   }
8389   PetscFunctionReturn(PETSC_SUCCESS);
8390 }
8391 
8392 /* temporary hack into ksp private data structure */
8393 #include <petsc/private/kspimpl.h>
8394 
8395 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc, Mat coarse_submat)
8396 {
8397   PC_BDDC               *pcbddc = (PC_BDDC *)pc->data;
8398   PC_IS                 *pcis   = (PC_IS *)pc->data;
8399   PCBDDCGraph            graph  = pcbddc->mat_graph;
8400   Mat                    coarse_mat, coarse_mat_is;
8401   Mat                    coarsedivudotp = NULL;
8402   Mat                    coarseG, t_coarse_mat_is;
8403   MatNullSpace           CoarseNullSpace = NULL;
8404   ISLocalToGlobalMapping coarse_islg;
8405   IS                     coarse_is, *isarray, corners;
8406   PetscInt               i, im_active = -1, active_procs = -1;
8407   PetscInt               nis, nisdofs, nisneu, nisvert;
8408   PetscInt               coarse_eqs_per_proc, coarsening_ratio;
8409   PC                     pc_temp;
8410   PCType                 coarse_pc_type;
8411   KSPType                coarse_ksp_type;
8412   PetscBool              multilevel_requested, multilevel_allowed;
8413   PetscBool              coarse_reuse, multi_element = graph->multi_element;
8414   PetscInt               ncoarse, nedcfield;
8415   PetscBool              compute_vecs = PETSC_FALSE;
8416   PetscScalar           *array;
8417   MatReuse               coarse_mat_reuse;
8418   PetscBool              restr, full_restr, have_void;
8419   PetscMPIInt            size;
8420 
8421   PetscFunctionBegin;
8422   PetscCall(PetscLogEventBegin(PC_BDDC_CoarseSetUp[pcbddc->current_level], pc, 0, 0, 0));
8423   /* Assign global numbering to coarse dofs */
8424   if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */
8425     PetscInt ocoarse_size;
8426     compute_vecs = PETSC_TRUE;
8427 
8428     pcbddc->new_primal_space = PETSC_TRUE;
8429     ocoarse_size             = pcbddc->coarse_size;
8430     PetscCall(PetscFree(pcbddc->global_primal_indices));
8431     PetscCall(PCBDDCComputePrimalNumbering(pc, &pcbddc->coarse_size, &pcbddc->global_primal_indices));
8432     /* see if we can avoid some work */
8433     if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */
8434       /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */
8435       if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) {
8436         PetscCall(KSPReset(pcbddc->coarse_ksp));
8437         coarse_reuse = PETSC_FALSE;
8438       } else { /* we can safely reuse already computed coarse matrix */
8439         coarse_reuse = PETSC_TRUE;
8440       }
8441     } else { /* there's no coarse ksp, so we need to create the coarse matrix too */
8442       coarse_reuse = PETSC_FALSE;
8443     }
8444     /* reset any subassembling information */
8445     if (!coarse_reuse || pcbddc->recompute_topography) PetscCall(ISDestroy(&pcbddc->coarse_subassembling));
8446   } else { /* primal space is unchanged, so we can reuse coarse matrix */
8447     coarse_reuse = PETSC_TRUE;
8448   }
8449   if (coarse_reuse && pcbddc->coarse_ksp) {
8450     PetscCall(KSPGetOperators(pcbddc->coarse_ksp, &coarse_mat, NULL));
8451     PetscCall(PetscObjectReference((PetscObject)coarse_mat));
8452     coarse_mat_reuse = MAT_REUSE_MATRIX;
8453   } else {
8454     coarse_mat       = NULL;
8455     coarse_mat_reuse = MAT_INITIAL_MATRIX;
8456   }
8457 
8458   /* creates temporary l2gmap and IS for coarse indexes */
8459   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), pcbddc->local_primal_size, pcbddc->global_primal_indices, PETSC_COPY_VALUES, &coarse_is));
8460   PetscCall(ISLocalToGlobalMappingCreateIS(coarse_is, &coarse_islg));
8461 
8462   /* creates temporary MATIS object for coarse matrix */
8463   PetscCall(MatCreate(PetscObjectComm((PetscObject)pc), &t_coarse_mat_is));
8464   PetscCall(MatSetType(t_coarse_mat_is, MATIS));
8465   PetscCall(MatSetSizes(t_coarse_mat_is, PETSC_DECIDE, PETSC_DECIDE, pcbddc->coarse_size, pcbddc->coarse_size));
8466   PetscCall(MatISSetAllowRepeated(t_coarse_mat_is, PETSC_TRUE));
8467   PetscCall(MatSetLocalToGlobalMapping(t_coarse_mat_is, coarse_islg, coarse_islg));
8468   PetscCall(MatISSetLocalMat(t_coarse_mat_is, coarse_submat));
8469   PetscCall(MatAssemblyBegin(t_coarse_mat_is, MAT_FINAL_ASSEMBLY));
8470   PetscCall(MatAssemblyEnd(t_coarse_mat_is, MAT_FINAL_ASSEMBLY));
8471   PetscCall(MatViewFromOptions(t_coarse_mat_is, (PetscObject)pc, "-pc_bddc_coarse_mat_is_view"));
8472 
8473   /* count "active" (i.e. with positive local size) and "void" processes */
8474   im_active = !!pcis->n;
8475   PetscCall(MPIU_Allreduce(&im_active, &active_procs, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)pc)));
8476 
8477   /* determine number of processes partecipating to coarse solver and compute subassembling pattern */
8478   /* restr : whether we want to exclude senders (which are not receivers) from the subassembling pattern */
8479   /* full_restr : just use the receivers from the subassembling pattern */
8480   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)pc), &size));
8481   coarse_mat_is        = NULL;
8482   multilevel_allowed   = PETSC_FALSE;
8483   multilevel_requested = PETSC_FALSE;
8484   coarse_eqs_per_proc  = PetscMin(PetscMax(pcbddc->coarse_size, 1), pcbddc->coarse_eqs_per_proc);
8485   if (coarse_eqs_per_proc < 0 || size == 1) coarse_eqs_per_proc = PetscMax(pcbddc->coarse_size, 1);
8486   if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE;
8487   if (pcbddc->coarse_size <= pcbddc->coarse_eqs_limit) multilevel_requested = PETSC_FALSE;
8488   coarsening_ratio = multi_element ? 1 : pcbddc->coarsening_ratio;
8489   if (multilevel_requested) {
8490     ncoarse    = active_procs / coarsening_ratio;
8491     restr      = PETSC_FALSE;
8492     full_restr = PETSC_FALSE;
8493   } else {
8494     ncoarse    = pcbddc->coarse_size / coarse_eqs_per_proc + !!(pcbddc->coarse_size % coarse_eqs_per_proc);
8495     restr      = PETSC_TRUE;
8496     full_restr = PETSC_TRUE;
8497   }
8498   if (!pcbddc->coarse_size || size == 1) multilevel_allowed = multilevel_requested = restr = full_restr = PETSC_FALSE;
8499   ncoarse = PetscMax(1, ncoarse);
8500   if (!pcbddc->coarse_subassembling) {
8501     if (coarsening_ratio > 1) {
8502       if (multilevel_requested) {
8503         PetscCall(PCBDDCMatISGetSubassemblingPattern(pc->pmat, &ncoarse, pcbddc->coarse_adj_red, &pcbddc->coarse_subassembling, &have_void));
8504       } else {
8505         PetscCall(PCBDDCMatISGetSubassemblingPattern(t_coarse_mat_is, &ncoarse, pcbddc->coarse_adj_red, &pcbddc->coarse_subassembling, &have_void));
8506       }
8507     } else {
8508       PetscMPIInt rank;
8509 
8510       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)pc), &rank));
8511       have_void = (active_procs == (PetscInt)size) ? PETSC_FALSE : PETSC_TRUE;
8512       PetscCall(ISCreateStride(PetscObjectComm((PetscObject)pc), 1, rank, 1, &pcbddc->coarse_subassembling));
8513     }
8514   } else { /* if a subassembling pattern exists, then we can reuse the coarse ksp and compute the number of process involved */
8515     PetscInt psum;
8516     if (pcbddc->coarse_ksp) psum = 1;
8517     else psum = 0;
8518     PetscCall(MPIU_Allreduce(&psum, &ncoarse, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)pc)));
8519     have_void = ncoarse < size ? PETSC_TRUE : PETSC_FALSE;
8520   }
8521   /* determine if we can go multilevel */
8522   if (multilevel_requested) {
8523     if (ncoarse > 1) multilevel_allowed = PETSC_TRUE; /* found enough processes */
8524     else restr = full_restr = PETSC_TRUE;             /* 1 subdomain, use a direct solver */
8525   }
8526   if (multilevel_allowed && have_void) restr = PETSC_TRUE;
8527 
8528   /* dump subassembling pattern */
8529   if (pcbddc->dbg_flag && multilevel_allowed) PetscCall(ISView(pcbddc->coarse_subassembling, pcbddc->dbg_viewer));
8530   /* compute dofs splitting and neumann boundaries for coarse dofs */
8531   nedcfield = -1;
8532   corners   = NULL;
8533   if (multilevel_allowed && !coarse_reuse && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal || pcbddc->nedclocal || pcbddc->corner_selected)) { /* protects from unneeded computations */
8534     PetscInt              *tidxs, *tidxs2, nout, tsize, i;
8535     const PetscInt        *idxs;
8536     ISLocalToGlobalMapping tmap;
8537 
8538     /* create map between primal indices (in local representative ordering) and local primal numbering */
8539     PetscCall(ISLocalToGlobalMappingCreate(PETSC_COMM_SELF, 1, pcbddc->local_primal_size, pcbddc->primal_indices_local_idxs, PETSC_COPY_VALUES, &tmap));
8540     /* allocate space for temporary storage */
8541     PetscCall(PetscMalloc1(pcbddc->local_primal_size, &tidxs));
8542     PetscCall(PetscMalloc1(pcbddc->local_primal_size, &tidxs2));
8543     /* allocate for IS array */
8544     nisdofs = pcbddc->n_ISForDofsLocal;
8545     if (pcbddc->nedclocal) {
8546       if (pcbddc->nedfield > -1) {
8547         nedcfield = pcbddc->nedfield;
8548       } else {
8549         nedcfield = 0;
8550         PetscCheck(!nisdofs, PetscObjectComm((PetscObject)pc), PETSC_ERR_PLIB, "This should not happen (%" PetscInt_FMT ")", nisdofs);
8551         nisdofs = 1;
8552       }
8553     }
8554     nisneu  = !!pcbddc->NeumannBoundariesLocal;
8555     nisvert = 0; /* nisvert is not used */
8556     nis     = nisdofs + nisneu + nisvert;
8557     PetscCall(PetscMalloc1(nis, &isarray));
8558     /* dofs splitting */
8559     for (i = 0; i < nisdofs; i++) {
8560       /* PetscCall(ISView(pcbddc->ISForDofsLocal[i],0)); */
8561       if (nedcfield != i) {
8562         PetscCall(ISGetLocalSize(pcbddc->ISForDofsLocal[i], &tsize));
8563         PetscCall(ISGetIndices(pcbddc->ISForDofsLocal[i], &idxs));
8564         PetscCall(ISGlobalToLocalMappingApply(tmap, IS_GTOLM_DROP, tsize, idxs, &nout, tidxs));
8565         PetscCall(ISRestoreIndices(pcbddc->ISForDofsLocal[i], &idxs));
8566       } else {
8567         PetscCall(ISGetLocalSize(pcbddc->nedclocal, &tsize));
8568         PetscCall(ISGetIndices(pcbddc->nedclocal, &idxs));
8569         PetscCall(ISGlobalToLocalMappingApply(tmap, IS_GTOLM_DROP, tsize, idxs, &nout, tidxs));
8570         PetscCheck(tsize == nout, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Failed when mapping coarse nedelec field! %" PetscInt_FMT " != %" PetscInt_FMT, tsize, nout);
8571         PetscCall(ISRestoreIndices(pcbddc->nedclocal, &idxs));
8572       }
8573       PetscCall(ISLocalToGlobalMappingApply(coarse_islg, nout, tidxs, tidxs2));
8574       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), nout, tidxs2, PETSC_COPY_VALUES, &isarray[i]));
8575       /* PetscCall(ISView(isarray[i],0)); */
8576     }
8577     /* neumann boundaries */
8578     if (pcbddc->NeumannBoundariesLocal) {
8579       /* PetscCall(ISView(pcbddc->NeumannBoundariesLocal,0)); */
8580       PetscCall(ISGetLocalSize(pcbddc->NeumannBoundariesLocal, &tsize));
8581       PetscCall(ISGetIndices(pcbddc->NeumannBoundariesLocal, &idxs));
8582       PetscCall(ISGlobalToLocalMappingApply(tmap, IS_GTOLM_DROP, tsize, idxs, &nout, tidxs));
8583       PetscCall(ISRestoreIndices(pcbddc->NeumannBoundariesLocal, &idxs));
8584       PetscCall(ISLocalToGlobalMappingApply(coarse_islg, nout, tidxs, tidxs2));
8585       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), nout, tidxs2, PETSC_COPY_VALUES, &isarray[nisdofs]));
8586       /* PetscCall(ISView(isarray[nisdofs],0)); */
8587     }
8588     /* coordinates */
8589     if (pcbddc->corner_selected) {
8590       PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &corners));
8591       PetscCall(ISGetLocalSize(corners, &tsize));
8592       PetscCall(ISGetIndices(corners, &idxs));
8593       PetscCall(ISGlobalToLocalMappingApply(tmap, IS_GTOLM_DROP, tsize, idxs, &nout, tidxs));
8594       PetscCheck(tsize == nout, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Failed when mapping corners! %" PetscInt_FMT " != %" PetscInt_FMT, tsize, nout);
8595       PetscCall(ISRestoreIndices(corners, &idxs));
8596       PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &corners));
8597       PetscCall(ISLocalToGlobalMappingApply(coarse_islg, nout, tidxs, tidxs2));
8598       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), nout, tidxs2, PETSC_COPY_VALUES, &corners));
8599     }
8600     PetscCall(PetscFree(tidxs));
8601     PetscCall(PetscFree(tidxs2));
8602     PetscCall(ISLocalToGlobalMappingDestroy(&tmap));
8603   } else {
8604     nis     = 0;
8605     nisdofs = 0;
8606     nisneu  = 0;
8607     nisvert = 0;
8608     isarray = NULL;
8609   }
8610   /* destroy no longer needed map */
8611   PetscCall(ISLocalToGlobalMappingDestroy(&coarse_islg));
8612 
8613   /* subassemble */
8614   if (multilevel_allowed) {
8615     Vec       vp[1];
8616     PetscInt  nvecs = 0;
8617     PetscBool reuse;
8618 
8619     vp[0] = NULL;
8620     /* XXX HDIV also */
8621     if (pcbddc->benign_have_null) { /* propagate no-net-flux quadrature to coarser level */
8622       PetscCall(VecCreate(PetscObjectComm((PetscObject)pc), &vp[0]));
8623       PetscCall(VecSetSizes(vp[0], pcbddc->local_primal_size, PETSC_DECIDE));
8624       PetscCall(VecSetType(vp[0], VECSTANDARD));
8625       nvecs = 1;
8626 
8627       if (pcbddc->divudotp) {
8628         Mat      B, loc_divudotp;
8629         Vec      v, p;
8630         IS       dummy;
8631         PetscInt np;
8632 
8633         PetscCall(MatISGetLocalMat(pcbddc->divudotp, &loc_divudotp));
8634         PetscCall(MatGetSize(loc_divudotp, &np, NULL));
8635         PetscCall(ISCreateStride(PETSC_COMM_SELF, np, 0, 1, &dummy));
8636         PetscCall(MatCreateSubMatrix(loc_divudotp, dummy, pcis->is_B_local, MAT_INITIAL_MATRIX, &B));
8637         PetscCall(MatCreateVecs(B, &v, &p));
8638         PetscCall(VecSet(p, 1.));
8639         PetscCall(MatMultTranspose(B, p, v));
8640         PetscCall(VecDestroy(&p));
8641         PetscCall(MatDestroy(&B));
8642         PetscCall(VecGetArray(vp[0], &array));
8643         PetscCall(VecPlaceArray(pcbddc->vec1_P, array));
8644         PetscCall(MatMultTranspose(pcbddc->coarse_phi_B, v, pcbddc->vec1_P));
8645         PetscCall(VecResetArray(pcbddc->vec1_P));
8646         PetscCall(VecRestoreArray(vp[0], &array));
8647         PetscCall(ISDestroy(&dummy));
8648         PetscCall(VecDestroy(&v));
8649       }
8650     }
8651     if (coarse_mat) reuse = PETSC_TRUE;
8652     else reuse = PETSC_FALSE;
8653     if (multi_element) {
8654       /* XXX divudotp */
8655       PetscCall(MatISSetAllowRepeated(t_coarse_mat_is, PETSC_FALSE));
8656       PetscCall(PetscObjectReference((PetscObject)t_coarse_mat_is));
8657       coarse_mat_is = t_coarse_mat_is;
8658     } else {
8659       PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &reuse, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)pc)));
8660       if (reuse) {
8661         PetscCall(PCBDDCMatISSubassemble(t_coarse_mat_is, pcbddc->coarse_subassembling, 0, restr, full_restr, PETSC_TRUE, &coarse_mat, nis, isarray, nvecs, vp));
8662       } else {
8663         PetscCall(PCBDDCMatISSubassemble(t_coarse_mat_is, pcbddc->coarse_subassembling, 0, restr, full_restr, PETSC_FALSE, &coarse_mat_is, nis, isarray, nvecs, vp));
8664       }
8665       if (vp[0]) { /* vp[0] could have been placed on a different set of processes */
8666         PetscScalar       *arraym;
8667         const PetscScalar *arrayv;
8668         PetscInt           nl;
8669         PetscCall(VecGetLocalSize(vp[0], &nl));
8670         PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, 1, nl, NULL, &coarsedivudotp));
8671         PetscCall(MatDenseGetArray(coarsedivudotp, &arraym));
8672         PetscCall(VecGetArrayRead(vp[0], &arrayv));
8673         PetscCall(PetscArraycpy(arraym, arrayv, nl));
8674         PetscCall(VecRestoreArrayRead(vp[0], &arrayv));
8675         PetscCall(MatDenseRestoreArray(coarsedivudotp, &arraym));
8676         PetscCall(VecDestroy(&vp[0]));
8677       } else {
8678         PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, 0, 0, 1, NULL, &coarsedivudotp));
8679       }
8680     }
8681   } else {
8682     if (ncoarse != size) PetscCall(PCBDDCMatISSubassemble(t_coarse_mat_is, pcbddc->coarse_subassembling, 0, restr, full_restr, PETSC_FALSE, &coarse_mat_is, 0, NULL, 0, NULL));
8683     else {
8684       PetscCall(PetscObjectReference((PetscObject)t_coarse_mat_is));
8685       coarse_mat_is = t_coarse_mat_is;
8686     }
8687   }
8688   if (coarse_mat_is || coarse_mat) {
8689     if (!multilevel_allowed) {
8690       PetscCall(MatConvert(coarse_mat_is, MATAIJ, coarse_mat_reuse, &coarse_mat));
8691     } else {
8692       /* if this matrix is present, it means we are not reusing the coarse matrix */
8693       if (coarse_mat_is) {
8694         PetscCheck(!coarse_mat, PetscObjectComm((PetscObject)coarse_mat_is), PETSC_ERR_PLIB, "This should not happen");
8695         PetscCall(PetscObjectReference((PetscObject)coarse_mat_is));
8696         coarse_mat = coarse_mat_is;
8697       }
8698     }
8699   }
8700   PetscCall(MatDestroy(&t_coarse_mat_is));
8701   PetscCall(MatDestroy(&coarse_mat_is));
8702 
8703   /* create local to global scatters for coarse problem */
8704   if (compute_vecs) {
8705     PetscInt lrows;
8706     PetscCall(VecDestroy(&pcbddc->coarse_vec));
8707     if (coarse_mat) {
8708       PetscCall(MatGetLocalSize(coarse_mat, &lrows, NULL));
8709     } else {
8710       lrows = 0;
8711     }
8712     PetscCall(VecCreate(PetscObjectComm((PetscObject)pc), &pcbddc->coarse_vec));
8713     PetscCall(VecSetSizes(pcbddc->coarse_vec, lrows, PETSC_DECIDE));
8714     PetscCall(VecSetType(pcbddc->coarse_vec, coarse_mat ? coarse_mat->defaultvectype : VECSTANDARD));
8715     PetscCall(VecScatterDestroy(&pcbddc->coarse_loc_to_glob));
8716     PetscCall(VecScatterCreate(pcbddc->vec1_P, NULL, pcbddc->coarse_vec, coarse_is, &pcbddc->coarse_loc_to_glob));
8717   }
8718   PetscCall(ISDestroy(&coarse_is));
8719 
8720   /* set defaults for coarse KSP and PC */
8721   if (multilevel_allowed) {
8722     coarse_ksp_type = KSPRICHARDSON;
8723     coarse_pc_type  = PCBDDC;
8724   } else {
8725     coarse_ksp_type = KSPPREONLY;
8726     coarse_pc_type  = PCREDUNDANT;
8727   }
8728 
8729   /* print some info if requested */
8730   if (pcbddc->dbg_flag) {
8731     if (!multilevel_allowed) {
8732       PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n"));
8733       if (multilevel_requested) {
8734         PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Not enough active processes on level %" PetscInt_FMT " (active processes %" PetscInt_FMT ", coarsening ratio %" PetscInt_FMT ")\n", pcbddc->current_level, active_procs, coarsening_ratio));
8735       } else if (pcbddc->max_levels) {
8736         PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Maximum number of requested levels reached (%" PetscInt_FMT ")\n", pcbddc->max_levels));
8737       }
8738       PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
8739     }
8740   }
8741 
8742   /* communicate coarse discrete gradient */
8743   coarseG = NULL;
8744   if (pcbddc->nedcG && multilevel_allowed) {
8745     MPI_Comm ccomm;
8746     if (coarse_mat) {
8747       ccomm = PetscObjectComm((PetscObject)coarse_mat);
8748     } else {
8749       ccomm = MPI_COMM_NULL;
8750     }
8751     PetscCall(MatMPIAIJRestrict(pcbddc->nedcG, ccomm, &coarseG));
8752   }
8753 
8754   /* create the coarse KSP object only once with defaults */
8755   if (coarse_mat) {
8756     PetscBool   isredundant, isbddc, force, valid;
8757     PetscViewer dbg_viewer = NULL;
8758     PetscBool   isset, issym, isher, isspd;
8759 
8760     if (pcbddc->dbg_flag) {
8761       dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat));
8762       PetscCall(PetscViewerASCIIAddTab(dbg_viewer, 2 * pcbddc->current_level));
8763     }
8764     if (!pcbddc->coarse_ksp) {
8765       char   prefix[256], str_level[16];
8766       size_t len;
8767 
8768       PetscCall(KSPCreate(PetscObjectComm((PetscObject)coarse_mat), &pcbddc->coarse_ksp));
8769       PetscCall(KSPSetNestLevel(pcbddc->coarse_ksp, pc->kspnestlevel));
8770       PetscCall(KSPSetErrorIfNotConverged(pcbddc->coarse_ksp, pc->erroriffailure));
8771       PetscCall(PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp, (PetscObject)pc, 1));
8772       PetscCall(KSPSetTolerances(pcbddc->coarse_ksp, PETSC_DEFAULT, PETSC_DEFAULT, PETSC_DEFAULT, 1));
8773       PetscCall(KSPSetOperators(pcbddc->coarse_ksp, coarse_mat, coarse_mat));
8774       PetscCall(KSPSetType(pcbddc->coarse_ksp, coarse_ksp_type));
8775       PetscCall(KSPSetNormType(pcbddc->coarse_ksp, KSP_NORM_NONE));
8776       PetscCall(KSPGetPC(pcbddc->coarse_ksp, &pc_temp));
8777       /* TODO is this logic correct? should check for coarse_mat type */
8778       PetscCall(PCSetType(pc_temp, coarse_pc_type));
8779       /* prefix */
8780       PetscCall(PetscStrncpy(prefix, "", sizeof(prefix)));
8781       PetscCall(PetscStrncpy(str_level, "", sizeof(str_level)));
8782       if (!pcbddc->current_level) {
8783         PetscCall(PetscStrncpy(prefix, ((PetscObject)pc)->prefix, sizeof(prefix)));
8784         PetscCall(PetscStrlcat(prefix, "pc_bddc_coarse_", sizeof(prefix)));
8785       } else {
8786         PetscCall(PetscStrlen(((PetscObject)pc)->prefix, &len));
8787         if (pcbddc->current_level > 1) len -= 3;  /* remove "lX_" with X level number */
8788         if (pcbddc->current_level > 10) len -= 1; /* remove another char from level number */
8789         /* Nonstandard use of PetscStrncpy() to copy only a portion of the string */
8790         PetscCall(PetscStrncpy(prefix, ((PetscObject)pc)->prefix, len + 1));
8791         PetscCall(PetscSNPrintf(str_level, sizeof(str_level), "l%d_", (int)pcbddc->current_level));
8792         PetscCall(PetscStrlcat(prefix, str_level, sizeof(prefix)));
8793       }
8794       PetscCall(KSPSetOptionsPrefix(pcbddc->coarse_ksp, prefix));
8795       /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
8796       PetscCall(PCBDDCSetLevel(pc_temp, pcbddc->current_level + 1));
8797       PetscCall(PCBDDCSetCoarseningRatio(pc_temp, pcbddc->coarsening_ratio));
8798       PetscCall(PCBDDCSetLevels(pc_temp, pcbddc->max_levels));
8799       /* allow user customization */
8800       PetscCall(KSPSetFromOptions(pcbddc->coarse_ksp));
8801       /* get some info after set from options */
8802       PetscCall(KSPGetPC(pcbddc->coarse_ksp, &pc_temp));
8803       /* multilevel cannot be done with coarse PC different from BDDC, NN, HPDDM, unless forced to */
8804       force = PETSC_FALSE;
8805       PetscCall(PetscOptionsGetBool(NULL, ((PetscObject)pc_temp)->prefix, "-pc_type_forced", &force, NULL));
8806       PetscCall(PetscObjectTypeCompareAny((PetscObject)pc_temp, &valid, PCBDDC, PCNN, PCHPDDM, ""));
8807       PetscCall(PetscObjectTypeCompare((PetscObject)pc_temp, PCBDDC, &isbddc));
8808       if (multilevel_allowed && !force && !valid) {
8809         isbddc = PETSC_TRUE;
8810         PetscCall(PCSetType(pc_temp, PCBDDC));
8811         PetscCall(PCBDDCSetLevel(pc_temp, pcbddc->current_level + 1));
8812         PetscCall(PCBDDCSetCoarseningRatio(pc_temp, pcbddc->coarsening_ratio));
8813         PetscCall(PCBDDCSetLevels(pc_temp, pcbddc->max_levels));
8814         if (pc_temp->ops->setfromoptions) { /* need to setfromoptions again, skipping the pc_type */
8815           PetscObjectOptionsBegin((PetscObject)pc_temp);
8816           PetscCall((*pc_temp->ops->setfromoptions)(pc_temp, PetscOptionsObject));
8817           PetscCall(PetscObjectProcessOptionsHandlers((PetscObject)pc_temp, PetscOptionsObject));
8818           PetscOptionsEnd();
8819           pc_temp->setfromoptionscalled++;
8820         }
8821       }
8822     }
8823     /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
8824     PetscCall(KSPGetPC(pcbddc->coarse_ksp, &pc_temp));
8825     if (nisdofs) {
8826       PetscCall(PCBDDCSetDofsSplitting(pc_temp, nisdofs, isarray));
8827       for (i = 0; i < nisdofs; i++) PetscCall(ISDestroy(&isarray[i]));
8828     }
8829     if (nisneu) {
8830       PetscCall(PCBDDCSetNeumannBoundaries(pc_temp, isarray[nisdofs]));
8831       PetscCall(ISDestroy(&isarray[nisdofs]));
8832     }
8833     if (nisvert) {
8834       PetscCall(PCBDDCSetPrimalVerticesIS(pc_temp, isarray[nis - 1]));
8835       PetscCall(ISDestroy(&isarray[nis - 1]));
8836     }
8837     if (coarseG) PetscCall(PCBDDCSetDiscreteGradient(pc_temp, coarseG, 1, nedcfield, PETSC_FALSE, PETSC_TRUE));
8838 
8839     /* get some info after set from options */
8840     PetscCall(PetscObjectTypeCompare((PetscObject)pc_temp, PCBDDC, &isbddc));
8841 
8842     /* multilevel can only be requested via -pc_bddc_levels or PCBDDCSetLevels */
8843     if (isbddc && !multilevel_allowed) PetscCall(PCSetType(pc_temp, coarse_pc_type));
8844     /* multilevel cannot be done with coarse PC different from BDDC, NN, HPDDM, unless forced to */
8845     force = PETSC_FALSE;
8846     PetscCall(PetscOptionsGetBool(NULL, ((PetscObject)pc_temp)->prefix, "-pc_type_forced", &force, NULL));
8847     PetscCall(PetscObjectTypeCompareAny((PetscObject)pc_temp, &valid, PCBDDC, PCNN, PCHPDDM, ""));
8848     if (multilevel_requested && multilevel_allowed && !valid && !force) PetscCall(PCSetType(pc_temp, PCBDDC));
8849     PetscCall(PetscObjectTypeCompare((PetscObject)pc_temp, PCREDUNDANT, &isredundant));
8850     if (isredundant) {
8851       KSP inner_ksp;
8852       PC  inner_pc;
8853 
8854       PetscCall(PCRedundantGetKSP(pc_temp, &inner_ksp));
8855       PetscCall(KSPGetPC(inner_ksp, &inner_pc));
8856     }
8857 
8858     /* parameters which miss an API */
8859     PetscCall(PetscObjectTypeCompare((PetscObject)pc_temp, PCBDDC, &isbddc));
8860     if (isbddc) {
8861       PC_BDDC *pcbddc_coarse = (PC_BDDC *)pc_temp->data;
8862 
8863       pcbddc_coarse->detect_disconnected = PETSC_TRUE;
8864       pcbddc_coarse->coarse_eqs_per_proc = pcbddc->coarse_eqs_per_proc;
8865       pcbddc_coarse->coarse_eqs_limit    = pcbddc->coarse_eqs_limit;
8866       pcbddc_coarse->benign_saddle_point = pcbddc->benign_have_null;
8867       if (pcbddc_coarse->benign_saddle_point) {
8868         Mat                    coarsedivudotp_is;
8869         ISLocalToGlobalMapping l2gmap, rl2g, cl2g;
8870         IS                     row, col;
8871         const PetscInt        *gidxs;
8872         PetscInt               n, st, M, N;
8873 
8874         PetscCall(MatGetSize(coarsedivudotp, &n, NULL));
8875         PetscCallMPI(MPI_Scan(&n, &st, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)coarse_mat)));
8876         st = st - n;
8877         PetscCall(ISCreateStride(PetscObjectComm((PetscObject)coarse_mat), 1, st, 1, &row));
8878         PetscCall(MatISGetLocalToGlobalMapping(coarse_mat, &l2gmap, NULL));
8879         PetscCall(ISLocalToGlobalMappingGetSize(l2gmap, &n));
8880         PetscCall(ISLocalToGlobalMappingGetIndices(l2gmap, &gidxs));
8881         PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)coarse_mat), n, gidxs, PETSC_COPY_VALUES, &col));
8882         PetscCall(ISLocalToGlobalMappingRestoreIndices(l2gmap, &gidxs));
8883         PetscCall(ISLocalToGlobalMappingCreateIS(row, &rl2g));
8884         PetscCall(ISLocalToGlobalMappingCreateIS(col, &cl2g));
8885         PetscCall(ISGetSize(row, &M));
8886         PetscCall(MatGetSize(coarse_mat, &N, NULL));
8887         PetscCall(ISDestroy(&row));
8888         PetscCall(ISDestroy(&col));
8889         PetscCall(MatCreate(PetscObjectComm((PetscObject)coarse_mat), &coarsedivudotp_is));
8890         PetscCall(MatSetType(coarsedivudotp_is, MATIS));
8891         PetscCall(MatSetSizes(coarsedivudotp_is, PETSC_DECIDE, PETSC_DECIDE, M, N));
8892         PetscCall(MatSetLocalToGlobalMapping(coarsedivudotp_is, rl2g, cl2g));
8893         PetscCall(ISLocalToGlobalMappingDestroy(&rl2g));
8894         PetscCall(ISLocalToGlobalMappingDestroy(&cl2g));
8895         PetscCall(MatISSetLocalMat(coarsedivudotp_is, coarsedivudotp));
8896         PetscCall(MatDestroy(&coarsedivudotp));
8897         PetscCall(PCBDDCSetDivergenceMat(pc_temp, coarsedivudotp_is, PETSC_FALSE, NULL));
8898         PetscCall(MatDestroy(&coarsedivudotp_is));
8899         pcbddc_coarse->adaptive_userdefined = PETSC_TRUE;
8900         if (pcbddc->adaptive_threshold[0] == 0.0) pcbddc_coarse->deluxe_zerorows = PETSC_TRUE;
8901       }
8902     }
8903 
8904     /* propagate symmetry info of coarse matrix */
8905     PetscCall(MatSetOption(coarse_mat, MAT_STRUCTURALLY_SYMMETRIC, PETSC_TRUE));
8906     PetscCall(MatIsSymmetricKnown(pc->pmat, &isset, &issym));
8907     if (isset) PetscCall(MatSetOption(coarse_mat, MAT_SYMMETRIC, issym));
8908     PetscCall(MatIsHermitianKnown(pc->pmat, &isset, &isher));
8909     if (isset) PetscCall(MatSetOption(coarse_mat, MAT_HERMITIAN, isher));
8910     PetscCall(MatIsSPDKnown(pc->pmat, &isset, &isspd));
8911     if (isset) PetscCall(MatSetOption(coarse_mat, MAT_SPD, isspd));
8912 
8913     if (pcbddc->benign_saddle_point && !pcbddc->benign_have_null) PetscCall(MatSetOption(coarse_mat, MAT_SPD, PETSC_TRUE));
8914     /* set operators */
8915     PetscCall(MatViewFromOptions(coarse_mat, (PetscObject)pc, "-pc_bddc_coarse_mat_view"));
8916     PetscCall(MatSetOptionsPrefix(coarse_mat, ((PetscObject)pcbddc->coarse_ksp)->prefix));
8917     PetscCall(KSPSetOperators(pcbddc->coarse_ksp, coarse_mat, coarse_mat));
8918     if (pcbddc->dbg_flag) PetscCall(PetscViewerASCIISubtractTab(dbg_viewer, 2 * pcbddc->current_level));
8919   }
8920   PetscCall(MatDestroy(&coarseG));
8921   PetscCall(PetscFree(isarray));
8922 #if 0
8923   {
8924     PetscViewer viewer;
8925     char filename[256];
8926     PetscCall(PetscSNPrintf(filename, PETSC_STATIC_ARRAY_LENGTH(filename), "coarse_mat_level%d.m",pcbddc->current_level));
8927     PetscCall(PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer));
8928     PetscCall(PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB));
8929     PetscCall(MatView(coarse_mat,viewer));
8930     PetscCall(PetscViewerPopFormat(viewer));
8931     PetscCall(PetscViewerDestroy(&viewer));
8932   }
8933 #endif
8934 
8935   if (corners) {
8936     Vec             gv;
8937     IS              is;
8938     const PetscInt *idxs;
8939     PetscInt        i, d, N, n, cdim = pcbddc->mat_graph->cdim;
8940     PetscScalar    *coords;
8941 
8942     PetscCheck(pcbddc->mat_graph->cloc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing local coordinates");
8943     PetscCall(VecGetSize(pcbddc->coarse_vec, &N));
8944     PetscCall(VecGetLocalSize(pcbddc->coarse_vec, &n));
8945     PetscCall(VecCreate(PetscObjectComm((PetscObject)pcbddc->coarse_vec), &gv));
8946     PetscCall(VecSetBlockSize(gv, cdim));
8947     PetscCall(VecSetSizes(gv, n * cdim, N * cdim));
8948     PetscCall(VecSetType(gv, VECSTANDARD));
8949     PetscCall(VecSetFromOptions(gv));
8950     PetscCall(VecSet(gv, PETSC_MAX_REAL)); /* we only propagate coordinates from vertices constraints */
8951 
8952     PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &is));
8953     PetscCall(ISGetLocalSize(is, &n));
8954     PetscCall(ISGetIndices(is, &idxs));
8955     PetscCall(PetscMalloc1(n * cdim, &coords));
8956     for (i = 0; i < n; i++) {
8957       for (d = 0; d < cdim; d++) coords[cdim * i + d] = pcbddc->mat_graph->coords[cdim * idxs[i] + d];
8958     }
8959     PetscCall(ISRestoreIndices(is, &idxs));
8960     PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &is));
8961 
8962     PetscCall(ISGetLocalSize(corners, &n));
8963     PetscCall(ISGetIndices(corners, &idxs));
8964     PetscCall(VecSetValuesBlocked(gv, n, idxs, coords, INSERT_VALUES));
8965     PetscCall(ISRestoreIndices(corners, &idxs));
8966     PetscCall(PetscFree(coords));
8967     PetscCall(VecAssemblyBegin(gv));
8968     PetscCall(VecAssemblyEnd(gv));
8969     PetscCall(VecGetArray(gv, &coords));
8970     if (pcbddc->coarse_ksp) {
8971       PC        coarse_pc;
8972       PetscBool isbddc;
8973 
8974       PetscCall(KSPGetPC(pcbddc->coarse_ksp, &coarse_pc));
8975       PetscCall(PetscObjectTypeCompare((PetscObject)coarse_pc, PCBDDC, &isbddc));
8976       if (isbddc) { /* coarse coordinates have PETSC_MAX_REAL, specific for BDDC */
8977         PetscReal *realcoords;
8978 
8979         PetscCall(VecGetLocalSize(gv, &n));
8980 #if defined(PETSC_USE_COMPLEX)
8981         PetscCall(PetscMalloc1(n, &realcoords));
8982         for (i = 0; i < n; i++) realcoords[i] = PetscRealPart(coords[i]);
8983 #else
8984         realcoords = coords;
8985 #endif
8986         PetscCall(PCSetCoordinates(coarse_pc, cdim, n / cdim, realcoords));
8987 #if defined(PETSC_USE_COMPLEX)
8988         PetscCall(PetscFree(realcoords));
8989 #endif
8990       }
8991     }
8992     PetscCall(VecRestoreArray(gv, &coords));
8993     PetscCall(VecDestroy(&gv));
8994   }
8995   PetscCall(ISDestroy(&corners));
8996 
8997   if (pcbddc->coarse_ksp) {
8998     Vec crhs, csol;
8999 
9000     PetscCall(KSPGetSolution(pcbddc->coarse_ksp, &csol));
9001     PetscCall(KSPGetRhs(pcbddc->coarse_ksp, &crhs));
9002     if (!csol) PetscCall(MatCreateVecs(coarse_mat, &pcbddc->coarse_ksp->vec_sol, NULL));
9003     if (!crhs) PetscCall(MatCreateVecs(coarse_mat, NULL, &pcbddc->coarse_ksp->vec_rhs));
9004   }
9005   PetscCall(MatDestroy(&coarsedivudotp));
9006 
9007   /* compute null space for coarse solver if the benign trick has been requested */
9008   if (pcbddc->benign_null) {
9009     PetscCall(VecSet(pcbddc->vec1_P, 0.));
9010     for (i = 0; i < pcbddc->benign_n; i++) PetscCall(VecSetValue(pcbddc->vec1_P, pcbddc->local_primal_size - pcbddc->benign_n + i, 1.0, INSERT_VALUES));
9011     PetscCall(VecAssemblyBegin(pcbddc->vec1_P));
9012     PetscCall(VecAssemblyEnd(pcbddc->vec1_P));
9013     PetscCall(VecScatterBegin(pcbddc->coarse_loc_to_glob, pcbddc->vec1_P, pcbddc->coarse_vec, INSERT_VALUES, SCATTER_FORWARD));
9014     PetscCall(VecScatterEnd(pcbddc->coarse_loc_to_glob, pcbddc->vec1_P, pcbddc->coarse_vec, INSERT_VALUES, SCATTER_FORWARD));
9015     if (coarse_mat) {
9016       Vec          nullv;
9017       PetscScalar *array, *array2;
9018       PetscInt     nl;
9019 
9020       PetscCall(MatCreateVecs(coarse_mat, &nullv, NULL));
9021       PetscCall(VecGetLocalSize(nullv, &nl));
9022       PetscCall(VecGetArrayRead(pcbddc->coarse_vec, (const PetscScalar **)&array));
9023       PetscCall(VecGetArray(nullv, &array2));
9024       PetscCall(PetscArraycpy(array2, array, nl));
9025       PetscCall(VecRestoreArray(nullv, &array2));
9026       PetscCall(VecRestoreArrayRead(pcbddc->coarse_vec, (const PetscScalar **)&array));
9027       PetscCall(VecNormalize(nullv, NULL));
9028       PetscCall(MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat), PETSC_FALSE, 1, &nullv, &CoarseNullSpace));
9029       PetscCall(VecDestroy(&nullv));
9030     }
9031   }
9032   PetscCall(PetscLogEventEnd(PC_BDDC_CoarseSetUp[pcbddc->current_level], pc, 0, 0, 0));
9033 
9034   PetscCall(PetscLogEventBegin(PC_BDDC_CoarseSolver[pcbddc->current_level], pc, 0, 0, 0));
9035   if (pcbddc->coarse_ksp) {
9036     PetscBool ispreonly;
9037 
9038     if (CoarseNullSpace) {
9039       PetscBool isnull;
9040 
9041       PetscCall(MatNullSpaceTest(CoarseNullSpace, coarse_mat, &isnull));
9042       if (isnull) PetscCall(MatSetNullSpace(coarse_mat, CoarseNullSpace));
9043       /* TODO: add local nullspaces (if any) */
9044     }
9045     /* setup coarse ksp */
9046     PetscCall(KSPSetUp(pcbddc->coarse_ksp));
9047     /* Check coarse problem if in debug mode or if solving with an iterative method */
9048     PetscCall(PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp, KSPPREONLY, &ispreonly));
9049     if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates)) {
9050       KSP         check_ksp;
9051       KSPType     check_ksp_type;
9052       PC          check_pc;
9053       Vec         check_vec, coarse_vec;
9054       PetscReal   abs_infty_error, infty_error, lambda_min = 1.0, lambda_max = 1.0;
9055       PetscInt    its;
9056       PetscBool   compute_eigs;
9057       PetscReal  *eigs_r, *eigs_c;
9058       PetscInt    neigs;
9059       const char *prefix;
9060 
9061       /* Create ksp object suitable for estimation of extreme eigenvalues */
9062       PetscCall(KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp), &check_ksp));
9063       PetscCall(KSPSetNestLevel(check_ksp, pc->kspnestlevel));
9064       PetscCall(PetscObjectIncrementTabLevel((PetscObject)check_ksp, (PetscObject)pcbddc->coarse_ksp, 0));
9065       PetscCall(KSPSetErrorIfNotConverged(pcbddc->coarse_ksp, PETSC_FALSE));
9066       PetscCall(KSPSetOperators(check_ksp, coarse_mat, coarse_mat));
9067       PetscCall(KSPSetTolerances(check_ksp, 1.e-12, 1.e-12, PETSC_DEFAULT, pcbddc->coarse_size));
9068       /* prevent from setup unneeded object */
9069       PetscCall(KSPGetPC(check_ksp, &check_pc));
9070       PetscCall(PCSetType(check_pc, PCNONE));
9071       if (ispreonly) {
9072         check_ksp_type = KSPPREONLY;
9073         compute_eigs   = PETSC_FALSE;
9074       } else {
9075         check_ksp_type = KSPGMRES;
9076         compute_eigs   = PETSC_TRUE;
9077       }
9078       PetscCall(KSPSetType(check_ksp, check_ksp_type));
9079       PetscCall(KSPSetComputeSingularValues(check_ksp, compute_eigs));
9080       PetscCall(KSPSetComputeEigenvalues(check_ksp, compute_eigs));
9081       PetscCall(KSPGMRESSetRestart(check_ksp, pcbddc->coarse_size + 1));
9082       PetscCall(KSPGetOptionsPrefix(pcbddc->coarse_ksp, &prefix));
9083       PetscCall(KSPSetOptionsPrefix(check_ksp, prefix));
9084       PetscCall(KSPAppendOptionsPrefix(check_ksp, "check_"));
9085       PetscCall(KSPSetFromOptions(check_ksp));
9086       PetscCall(KSPSetUp(check_ksp));
9087       PetscCall(KSPGetPC(pcbddc->coarse_ksp, &check_pc));
9088       PetscCall(KSPSetPC(check_ksp, check_pc));
9089       /* create random vec */
9090       PetscCall(MatCreateVecs(coarse_mat, &coarse_vec, &check_vec));
9091       PetscCall(VecSetRandom(check_vec, NULL));
9092       PetscCall(MatMult(coarse_mat, check_vec, coarse_vec));
9093       /* solve coarse problem */
9094       PetscCall(KSPSolve(check_ksp, coarse_vec, coarse_vec));
9095       PetscCall(KSPCheckSolve(check_ksp, pc, coarse_vec));
9096       /* set eigenvalue estimation if preonly has not been requested */
9097       if (compute_eigs) {
9098         PetscCall(PetscMalloc1(pcbddc->coarse_size + 1, &eigs_r));
9099         PetscCall(PetscMalloc1(pcbddc->coarse_size + 1, &eigs_c));
9100         PetscCall(KSPComputeEigenvalues(check_ksp, pcbddc->coarse_size + 1, eigs_r, eigs_c, &neigs));
9101         if (neigs) {
9102           lambda_max = eigs_r[neigs - 1];
9103           lambda_min = eigs_r[0];
9104           if (pcbddc->use_coarse_estimates) {
9105             if (lambda_max >= lambda_min) { /* using PETSC_SMALL since lambda_max == lambda_min is not allowed by KSPChebyshevSetEigenvalues */
9106               PetscCall(KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp, lambda_max + PETSC_SMALL, lambda_min));
9107               PetscCall(KSPRichardsonSetScale(pcbddc->coarse_ksp, 2.0 / (lambda_max + lambda_min)));
9108             }
9109           }
9110         }
9111       }
9112 
9113       /* check coarse problem residual error */
9114       if (pcbddc->dbg_flag) {
9115         PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp));
9116         PetscCall(PetscViewerASCIIAddTab(dbg_viewer, 2 * (pcbddc->current_level + 1)));
9117         PetscCall(VecAXPY(check_vec, -1.0, coarse_vec));
9118         PetscCall(VecNorm(check_vec, NORM_INFINITY, &infty_error));
9119         PetscCall(MatMult(coarse_mat, check_vec, coarse_vec));
9120         PetscCall(VecNorm(coarse_vec, NORM_INFINITY, &abs_infty_error));
9121         PetscCall(PetscViewerASCIIPrintf(dbg_viewer, "Coarse problem details (use estimates %d)\n", pcbddc->use_coarse_estimates));
9122         PetscCall(PetscObjectPrintClassNamePrefixType((PetscObject)pcbddc->coarse_ksp, dbg_viewer));
9123         PetscCall(PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc), dbg_viewer));
9124         PetscCall(PetscViewerASCIIPrintf(dbg_viewer, "Coarse problem exact infty_error   : %1.6e\n", (double)infty_error));
9125         PetscCall(PetscViewerASCIIPrintf(dbg_viewer, "Coarse problem residual infty_error: %1.6e\n", (double)abs_infty_error));
9126         if (CoarseNullSpace) PetscCall(PetscViewerASCIIPrintf(dbg_viewer, "Coarse problem is singular\n"));
9127         if (compute_eigs) {
9128           PetscReal          lambda_max_s, lambda_min_s;
9129           KSPConvergedReason reason;
9130           PetscCall(KSPGetType(check_ksp, &check_ksp_type));
9131           PetscCall(KSPGetIterationNumber(check_ksp, &its));
9132           PetscCall(KSPGetConvergedReason(check_ksp, &reason));
9133           PetscCall(KSPComputeExtremeSingularValues(check_ksp, &lambda_max_s, &lambda_min_s));
9134           PetscCall(PetscViewerASCIIPrintf(dbg_viewer, "Coarse problem eigenvalues (estimated with %" PetscInt_FMT " iterations of %s, conv reason %d): %1.6e %1.6e (%1.6e %1.6e)\n", its, check_ksp_type, reason, (double)lambda_min, (double)lambda_max, (double)lambda_min_s, (double)lambda_max_s));
9135           for (i = 0; i < neigs; i++) PetscCall(PetscViewerASCIIPrintf(dbg_viewer, "%1.6e %1.6ei\n", (double)eigs_r[i], (double)eigs_c[i]));
9136         }
9137         PetscCall(PetscViewerFlush(dbg_viewer));
9138         PetscCall(PetscViewerASCIISubtractTab(dbg_viewer, 2 * (pcbddc->current_level + 1)));
9139       }
9140       PetscCall(VecDestroy(&check_vec));
9141       PetscCall(VecDestroy(&coarse_vec));
9142       PetscCall(KSPDestroy(&check_ksp));
9143       if (compute_eigs) {
9144         PetscCall(PetscFree(eigs_r));
9145         PetscCall(PetscFree(eigs_c));
9146       }
9147     }
9148   }
9149   PetscCall(MatNullSpaceDestroy(&CoarseNullSpace));
9150   /* print additional info */
9151   if (pcbddc->dbg_flag) {
9152     /* waits until all processes reaches this point */
9153     PetscCall(PetscBarrier((PetscObject)pc));
9154     PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Coarse solver setup completed at level %" PetscInt_FMT "\n", pcbddc->current_level));
9155     PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
9156   }
9157 
9158   /* free memory */
9159   PetscCall(MatDestroy(&coarse_mat));
9160   PetscCall(PetscLogEventEnd(PC_BDDC_CoarseSolver[pcbddc->current_level], pc, 0, 0, 0));
9161   PetscFunctionReturn(PETSC_SUCCESS);
9162 }
9163 
9164 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc, PetscInt *coarse_size_n, PetscInt **local_primal_indices_n)
9165 {
9166   PC_BDDC        *pcbddc = (PC_BDDC *)pc->data;
9167   PC_IS          *pcis   = (PC_IS *)pc->data;
9168   IS              subset, subset_mult, subset_n;
9169   PetscInt        local_size, coarse_size = 0;
9170   PetscInt       *local_primal_indices = NULL;
9171   const PetscInt *t_local_primal_indices;
9172 
9173   PetscFunctionBegin;
9174   /* Compute global number of coarse dofs */
9175   PetscCheck(!pcbddc->local_primal_size || pcbddc->local_primal_ref_node, PETSC_COMM_SELF, PETSC_ERR_PLIB, "BDDC ConstraintsSetUp should be called first");
9176   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc->pmat), pcbddc->local_primal_size_cc, pcbddc->local_primal_ref_node, PETSC_COPY_VALUES, &subset_n));
9177   PetscCall(ISLocalToGlobalMappingApplyIS(pcis->mapping, subset_n, &subset));
9178   PetscCall(ISDestroy(&subset_n));
9179   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc->pmat), pcbddc->local_primal_size_cc, pcbddc->local_primal_ref_mult, PETSC_COPY_VALUES, &subset_mult));
9180   PetscCall(ISRenumber(subset, subset_mult, &coarse_size, &subset_n));
9181   PetscCall(ISDestroy(&subset));
9182   PetscCall(ISDestroy(&subset_mult));
9183   PetscCall(ISGetLocalSize(subset_n, &local_size));
9184   PetscCheck(local_size == pcbddc->local_primal_size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Invalid number of local primal indices computed %" PetscInt_FMT " != %" PetscInt_FMT, local_size, pcbddc->local_primal_size);
9185   PetscCall(PetscMalloc1(local_size, &local_primal_indices));
9186   PetscCall(ISGetIndices(subset_n, &t_local_primal_indices));
9187   PetscCall(PetscArraycpy(local_primal_indices, t_local_primal_indices, local_size));
9188   PetscCall(ISRestoreIndices(subset_n, &t_local_primal_indices));
9189   PetscCall(ISDestroy(&subset_n));
9190 
9191   if (pcbddc->dbg_flag) {
9192     PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
9193     PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n"));
9194     PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Size of coarse problem is %" PetscInt_FMT "\n", coarse_size));
9195     PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
9196   }
9197 
9198   /* get back data */
9199   *coarse_size_n          = coarse_size;
9200   *local_primal_indices_n = local_primal_indices;
9201   PetscFunctionReturn(PETSC_SUCCESS);
9202 }
9203 
9204 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx, Vec gwork, Vec lwork, IS globalis, IS *localis)
9205 {
9206   IS           localis_t;
9207   PetscInt     i, lsize, *idxs, n;
9208   PetscScalar *vals;
9209 
9210   PetscFunctionBegin;
9211   /* get indices in local ordering exploiting local to global map */
9212   PetscCall(ISGetLocalSize(globalis, &lsize));
9213   PetscCall(PetscMalloc1(lsize, &vals));
9214   for (i = 0; i < lsize; i++) vals[i] = 1.0;
9215   PetscCall(ISGetIndices(globalis, (const PetscInt **)&idxs));
9216   PetscCall(VecSet(gwork, 0.0));
9217   PetscCall(VecSet(lwork, 0.0));
9218   if (idxs) { /* multilevel guard */
9219     PetscCall(VecSetOption(gwork, VEC_IGNORE_NEGATIVE_INDICES, PETSC_TRUE));
9220     PetscCall(VecSetValues(gwork, lsize, idxs, vals, INSERT_VALUES));
9221   }
9222   PetscCall(VecAssemblyBegin(gwork));
9223   PetscCall(ISRestoreIndices(globalis, (const PetscInt **)&idxs));
9224   PetscCall(PetscFree(vals));
9225   PetscCall(VecAssemblyEnd(gwork));
9226   /* now compute set in local ordering */
9227   PetscCall(VecScatterBegin(g2l_ctx, gwork, lwork, INSERT_VALUES, SCATTER_FORWARD));
9228   PetscCall(VecScatterEnd(g2l_ctx, gwork, lwork, INSERT_VALUES, SCATTER_FORWARD));
9229   PetscCall(VecGetArrayRead(lwork, (const PetscScalar **)&vals));
9230   PetscCall(VecGetSize(lwork, &n));
9231   for (i = 0, lsize = 0; i < n; i++) {
9232     if (PetscRealPart(vals[i]) > 0.5) lsize++;
9233   }
9234   PetscCall(PetscMalloc1(lsize, &idxs));
9235   for (i = 0, lsize = 0; i < n; i++) {
9236     if (PetscRealPart(vals[i]) > 0.5) idxs[lsize++] = i;
9237   }
9238   PetscCall(VecRestoreArrayRead(lwork, (const PetscScalar **)&vals));
9239   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)gwork), lsize, idxs, PETSC_OWN_POINTER, &localis_t));
9240   *localis = localis_t;
9241   PetscFunctionReturn(PETSC_SUCCESS);
9242 }
9243 
9244 PetscErrorCode PCBDDCComputeFakeChange(PC pc, PetscBool constraints, PCBDDCGraph graph, PCBDDCSubSchurs schurs, Mat *change, IS *change_primal, IS *change_primal_mult, PetscBool *change_with_qr)
9245 {
9246   PC_IS   *pcis   = (PC_IS *)pc->data;
9247   PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
9248   PC_IS   *pcisf;
9249   PC_BDDC *pcbddcf;
9250   PC       pcf;
9251 
9252   PetscFunctionBegin;
9253   PetscCall(PCCreate(PetscObjectComm((PetscObject)pc), &pcf));
9254   PetscCall(PCSetOperators(pcf, pc->mat, pc->pmat));
9255   PetscCall(PCSetType(pcf, PCBDDC));
9256 
9257   pcisf   = (PC_IS *)pcf->data;
9258   pcbddcf = (PC_BDDC *)pcf->data;
9259 
9260   pcisf->is_B_local = pcis->is_B_local;
9261   pcisf->vec1_N     = pcis->vec1_N;
9262   pcisf->BtoNmap    = pcis->BtoNmap;
9263   pcisf->n          = pcis->n;
9264   pcisf->n_B        = pcis->n_B;
9265 
9266   PetscCall(PetscFree(pcbddcf->mat_graph));
9267   PetscCall(PetscFree(pcbddcf->sub_schurs));
9268   pcbddcf->mat_graph             = graph ? graph : pcbddc->mat_graph;
9269   pcbddcf->sub_schurs            = schurs;
9270   pcbddcf->adaptive_selection    = schurs ? PETSC_TRUE : PETSC_FALSE;
9271   pcbddcf->adaptive_threshold[0] = pcbddc->adaptive_threshold[0];
9272   pcbddcf->adaptive_threshold[1] = pcbddc->adaptive_threshold[1];
9273   pcbddcf->adaptive_nmin         = pcbddc->adaptive_nmin;
9274   pcbddcf->adaptive_nmax         = pcbddc->adaptive_nmax;
9275   pcbddcf->use_faces             = PETSC_TRUE;
9276   pcbddcf->use_change_of_basis   = (PetscBool)!constraints;
9277   pcbddcf->use_change_on_faces   = (PetscBool)!constraints;
9278   pcbddcf->use_qr_single         = (PetscBool)!constraints;
9279   pcbddcf->fake_change           = PETSC_TRUE;
9280   pcbddcf->dbg_flag              = pcbddc->dbg_flag;
9281 
9282   PetscCall(PCBDDCAdaptiveSelection(pcf));
9283   PetscCall(PCBDDCConstraintsSetUp(pcf));
9284 
9285   *change = pcbddcf->ConstraintMatrix;
9286   if (change_primal) PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc->pmat), pcbddcf->local_primal_size_cc, pcbddcf->local_primal_ref_node, PETSC_COPY_VALUES, change_primal));
9287   if (change_primal_mult) PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc->pmat), pcbddcf->local_primal_size_cc, pcbddcf->local_primal_ref_mult, PETSC_COPY_VALUES, change_primal_mult));
9288   if (change_with_qr) *change_with_qr = pcbddcf->use_qr_single;
9289 
9290   if (schurs) pcbddcf->sub_schurs = NULL;
9291   pcbddcf->ConstraintMatrix = NULL;
9292   pcbddcf->mat_graph        = NULL;
9293   pcisf->is_B_local         = NULL;
9294   pcisf->vec1_N             = NULL;
9295   pcisf->BtoNmap            = NULL;
9296   PetscCall(PCDestroy(&pcf));
9297   PetscFunctionReturn(PETSC_SUCCESS);
9298 }
9299 
9300 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc)
9301 {
9302   PC_IS          *pcis       = (PC_IS *)pc->data;
9303   PC_BDDC        *pcbddc     = (PC_BDDC *)pc->data;
9304   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
9305   Mat             S_j;
9306   PetscInt       *used_xadj, *used_adjncy;
9307   PetscBool       free_used_adj;
9308 
9309   PetscFunctionBegin;
9310   PetscCall(PetscLogEventBegin(PC_BDDC_Schurs[pcbddc->current_level], pc, 0, 0, 0));
9311   /* decide the adjacency to be used for determining internal problems for local schur on subsets */
9312   free_used_adj = PETSC_FALSE;
9313   if (pcbddc->sub_schurs_layers == -1) {
9314     used_xadj   = NULL;
9315     used_adjncy = NULL;
9316   } else {
9317     if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) {
9318       used_xadj   = pcbddc->mat_graph->xadj;
9319       used_adjncy = pcbddc->mat_graph->adjncy;
9320     } else if (pcbddc->computed_rowadj) {
9321       used_xadj   = pcbddc->mat_graph->xadj;
9322       used_adjncy = pcbddc->mat_graph->adjncy;
9323     } else {
9324       PetscBool       flg_row = PETSC_FALSE;
9325       const PetscInt *xadj, *adjncy;
9326       PetscInt        nvtxs;
9327 
9328       PetscCall(MatGetRowIJ(pcbddc->local_mat, 0, PETSC_TRUE, PETSC_FALSE, &nvtxs, &xadj, &adjncy, &flg_row));
9329       if (flg_row) {
9330         PetscCall(PetscMalloc2(nvtxs + 1, &used_xadj, xadj[nvtxs], &used_adjncy));
9331         PetscCall(PetscArraycpy(used_xadj, xadj, nvtxs + 1));
9332         PetscCall(PetscArraycpy(used_adjncy, adjncy, xadj[nvtxs]));
9333         free_used_adj = PETSC_TRUE;
9334       } else {
9335         pcbddc->sub_schurs_layers = -1;
9336         used_xadj                 = NULL;
9337         used_adjncy               = NULL;
9338       }
9339       PetscCall(MatRestoreRowIJ(pcbddc->local_mat, 0, PETSC_TRUE, PETSC_FALSE, &nvtxs, &xadj, &adjncy, &flg_row));
9340     }
9341   }
9342 
9343   /* setup sub_schurs data */
9344   PetscCall(MatCreateSchurComplement(pcis->A_II, pcis->pA_II, pcis->A_IB, pcis->A_BI, pcis->A_BB, &S_j));
9345   if (!sub_schurs->schur_explicit) {
9346     /* pcbddc->ksp_D up to date only if not using MatFactor with Schur complement support */
9347     PetscCall(MatSchurComplementSetKSP(S_j, pcbddc->ksp_D));
9348     PetscCall(PCBDDCSubSchursSetUp(sub_schurs, NULL, S_j, PETSC_FALSE, used_xadj, used_adjncy, pcbddc->sub_schurs_layers, NULL, pcbddc->adaptive_selection, PETSC_FALSE, PETSC_FALSE, 0, NULL, NULL, NULL, NULL));
9349   } else {
9350     Mat       change        = NULL;
9351     Vec       scaling       = NULL;
9352     IS        change_primal = NULL, iP;
9353     PetscInt  benign_n;
9354     PetscBool reuse_solvers     = (PetscBool)!pcbddc->use_change_of_basis;
9355     PetscBool need_change       = PETSC_FALSE;
9356     PetscBool discrete_harmonic = PETSC_FALSE;
9357 
9358     if (!pcbddc->use_vertices && reuse_solvers) {
9359       PetscInt n_vertices;
9360 
9361       PetscCall(ISGetLocalSize(sub_schurs->is_vertices, &n_vertices));
9362       reuse_solvers = (PetscBool)!n_vertices;
9363     }
9364     if (!pcbddc->benign_change_explicit) {
9365       benign_n = pcbddc->benign_n;
9366     } else {
9367       benign_n = 0;
9368     }
9369     /* sub_schurs->change is a local object; instead, PCBDDCConstraintsSetUp and the quantities used in the test below are logically collective on pc.
9370        We need a global reduction to avoid possible deadlocks.
9371        We assume that sub_schurs->change is created once, and then reused for different solves, unless the topography has been recomputed */
9372     if (pcbddc->adaptive_userdefined || (pcbddc->deluxe_zerorows && !pcbddc->use_change_of_basis)) {
9373       PetscBool have_loc_change = (PetscBool)(!!sub_schurs->change);
9374       PetscCall(MPIU_Allreduce(&have_loc_change, &need_change, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)pc)));
9375       need_change = (PetscBool)(!need_change);
9376     }
9377     /* If the user defines additional constraints, we import them here */
9378     if (need_change) {
9379       PetscCheck(!pcbddc->sub_schurs_rebuild, PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot compute change of basis with a different graph");
9380       PetscCall(PCBDDCComputeFakeChange(pc, PETSC_FALSE, NULL, NULL, &change, &change_primal, NULL, &sub_schurs->change_with_qr));
9381     }
9382     if (!pcbddc->use_deluxe_scaling) scaling = pcis->D;
9383 
9384     PetscCall(PetscObjectQuery((PetscObject)pc, "__KSPFETIDP_iP", (PetscObject *)&iP));
9385     if (iP) {
9386       PetscOptionsBegin(PetscObjectComm((PetscObject)iP), sub_schurs->prefix, "BDDC sub_schurs options", "PC");
9387       PetscCall(PetscOptionsBool("-sub_schurs_discrete_harmonic", NULL, NULL, discrete_harmonic, &discrete_harmonic, NULL));
9388       PetscOptionsEnd();
9389     }
9390     if (discrete_harmonic) {
9391       Mat A;
9392       PetscCall(MatDuplicate(pcbddc->local_mat, MAT_COPY_VALUES, &A));
9393       PetscCall(MatZeroRowsColumnsIS(A, iP, 1.0, NULL, NULL));
9394       PetscCall(PetscObjectCompose((PetscObject)A, "__KSPFETIDP_iP", (PetscObject)iP));
9395       PetscCall(PCBDDCSubSchursSetUp(sub_schurs, A, S_j, pcbddc->sub_schurs_exact_schur, used_xadj, used_adjncy, pcbddc->sub_schurs_layers, scaling, pcbddc->adaptive_selection, reuse_solvers, pcbddc->benign_saddle_point, benign_n, pcbddc->benign_p0_lidx,
9396                                      pcbddc->benign_zerodiag_subs, change, change_primal));
9397       PetscCall(MatDestroy(&A));
9398     } else {
9399       PetscCall(PCBDDCSubSchursSetUp(sub_schurs, pcbddc->local_mat, S_j, pcbddc->sub_schurs_exact_schur, used_xadj, used_adjncy, pcbddc->sub_schurs_layers, scaling, pcbddc->adaptive_selection, reuse_solvers, pcbddc->benign_saddle_point, benign_n,
9400                                      pcbddc->benign_p0_lidx, pcbddc->benign_zerodiag_subs, change, change_primal));
9401     }
9402     PetscCall(MatDestroy(&change));
9403     PetscCall(ISDestroy(&change_primal));
9404   }
9405   PetscCall(MatDestroy(&S_j));
9406 
9407   /* free adjacency */
9408   if (free_used_adj) PetscCall(PetscFree2(used_xadj, used_adjncy));
9409   PetscCall(PetscLogEventEnd(PC_BDDC_Schurs[pcbddc->current_level], pc, 0, 0, 0));
9410   PetscFunctionReturn(PETSC_SUCCESS);
9411 }
9412 
9413 PetscErrorCode PCBDDCInitSubSchurs(PC pc)
9414 {
9415   PC_IS      *pcis   = (PC_IS *)pc->data;
9416   PC_BDDC    *pcbddc = (PC_BDDC *)pc->data;
9417   PCBDDCGraph graph;
9418 
9419   PetscFunctionBegin;
9420   /* attach interface graph for determining subsets */
9421   if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */
9422     IS       verticesIS, verticescomm;
9423     PetscInt vsize, *idxs;
9424 
9425     PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &verticesIS));
9426     PetscCall(ISGetSize(verticesIS, &vsize));
9427     PetscCall(ISGetIndices(verticesIS, (const PetscInt **)&idxs));
9428     PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), vsize, idxs, PETSC_COPY_VALUES, &verticescomm));
9429     PetscCall(ISRestoreIndices(verticesIS, (const PetscInt **)&idxs));
9430     PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &verticesIS));
9431     PetscCall(PCBDDCGraphCreate(&graph));
9432     PetscCall(PCBDDCGraphInit(graph, pcbddc->mat_graph->l2gmap, pcbddc->mat_graph->nvtxs_global, pcbddc->graphmaxcount));
9433     PetscCall(PCBDDCGraphSetUp(graph, pcbddc->mat_graph->custom_minimal_size, NULL, pcbddc->DirichletBoundariesLocal, 0, NULL, verticescomm));
9434     PetscCall(ISDestroy(&verticescomm));
9435     PetscCall(PCBDDCGraphComputeConnectedComponents(graph));
9436   } else {
9437     graph = pcbddc->mat_graph;
9438   }
9439   /* print some info */
9440   if (pcbddc->dbg_flag && !pcbddc->sub_schurs_rebuild) {
9441     IS       vertices;
9442     PetscInt nv, nedges, nfaces;
9443     PetscCall(PCBDDCGraphASCIIView(graph, pcbddc->dbg_flag, pcbddc->dbg_viewer));
9444     PetscCall(PCBDDCGraphGetCandidatesIS(graph, &nfaces, NULL, &nedges, NULL, &vertices));
9445     PetscCall(ISGetSize(vertices, &nv));
9446     PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer));
9447     PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "--------------------------------------------------------------\n"));
9448     PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate vertices (%d)\n", PetscGlobalRank, nv, pcbddc->use_vertices));
9449     PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate edges    (%d)\n", PetscGlobalRank, nedges, pcbddc->use_edges));
9450     PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate faces    (%d)\n", PetscGlobalRank, nfaces, pcbddc->use_faces));
9451     PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
9452     PetscCall(PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer));
9453     PetscCall(PCBDDCGraphRestoreCandidatesIS(graph, &nfaces, NULL, &nedges, NULL, &vertices));
9454   }
9455 
9456   /* sub_schurs init */
9457   if (!pcbddc->sub_schurs) PetscCall(PCBDDCSubSchursCreate(&pcbddc->sub_schurs));
9458   PetscCall(PCBDDCSubSchursInit(pcbddc->sub_schurs, ((PetscObject)pc)->prefix, pcis->is_I_local, pcis->is_B_local, graph, pcis->BtoNmap, pcbddc->sub_schurs_rebuild, PETSC_FALSE));
9459 
9460   /* free graph struct */
9461   if (pcbddc->sub_schurs_rebuild) PetscCall(PCBDDCGraphDestroy(&graph));
9462   PetscFunctionReturn(PETSC_SUCCESS);
9463 }
9464 
9465 static PetscErrorCode PCBDDCViewGlobalIS(PC pc, IS is, PetscViewer viewer)
9466 {
9467   Mat_IS         *matis = (Mat_IS *)pc->pmat->data;
9468   PetscInt        n     = pc->pmat->rmap->n, ln, ni, st;
9469   const PetscInt *idxs;
9470   IS              gis;
9471 
9472   PetscFunctionBegin;
9473   if (!is) PetscFunctionReturn(PETSC_SUCCESS);
9474   PetscCall(MatGetOwnershipRange(pc->pmat, &st, NULL));
9475   PetscCall(MatGetLocalSize(matis->A, NULL, &ln));
9476   PetscCall(PetscArrayzero(matis->sf_leafdata, ln));
9477   PetscCall(PetscArrayzero(matis->sf_rootdata, n));
9478   PetscCall(ISGetLocalSize(is, &ni));
9479   PetscCall(ISGetIndices(is, &idxs));
9480   for (PetscInt i = 0; i < ni; i++) {
9481     if (idxs[i] < 0 || idxs[i] >= ln) continue;
9482     matis->sf_leafdata[idxs[i]] = 1;
9483   }
9484   PetscCall(ISRestoreIndices(is, &idxs));
9485   PetscCall(PetscSFReduceBegin(matis->sf, MPIU_INT, matis->sf_leafdata, matis->sf_rootdata, MPI_SUM));
9486   PetscCall(PetscSFReduceEnd(matis->sf, MPIU_INT, matis->sf_leafdata, matis->sf_rootdata, MPI_SUM));
9487   ln = 0;
9488   for (PetscInt i = 0; i < n; i++) {
9489     if (matis->sf_rootdata[i]) matis->sf_rootdata[ln++] = i + st;
9490   }
9491   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), ln, matis->sf_rootdata, PETSC_USE_POINTER, &gis));
9492   PetscCall(ISView(gis, viewer));
9493   PetscCall(ISDestroy(&gis));
9494   PetscFunctionReturn(PETSC_SUCCESS);
9495 }
9496 
9497 PetscErrorCode PCBDDCLoadOrViewCustomization(PC pc, PetscBool load, const char *outfile)
9498 {
9499   PetscInt    header[11];
9500   PC_BDDC    *pcbddc = (PC_BDDC *)pc->data;
9501   PetscViewer viewer;
9502   MPI_Comm    comm = PetscObjectComm((PetscObject)pc);
9503 
9504   PetscFunctionBegin;
9505   PetscCall(PetscViewerBinaryOpen(comm, outfile ? outfile : "bddc_dump.dat", load ? FILE_MODE_READ : FILE_MODE_WRITE, &viewer));
9506   if (load) {
9507     IS  is;
9508     Mat A;
9509 
9510     PetscCall(PetscViewerBinaryRead(viewer, header, PETSC_STATIC_ARRAY_LENGTH(header), NULL, PETSC_INT));
9511     PetscCheck(header[0] == 0 || header[0] == 1, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file");
9512     PetscCheck(header[1] == 0 || header[1] == 1, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file");
9513     PetscCheck(header[2] >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file");
9514     PetscCheck(header[3] == 0 || header[3] == 1, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file");
9515     PetscCheck(header[4] == 0 || header[4] == 1, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file");
9516     PetscCheck(header[5] >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file");
9517     PetscCheck(header[7] == 0 || header[7] == 1, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file");
9518     PetscCheck(header[8] == 0 || header[8] == 1, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file");
9519     PetscCheck(header[9] == 0 || header[9] == 1, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file");
9520     PetscCheck(header[10] == 0 || header[10] == 1, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file");
9521     if (header[0]) {
9522       PetscCall(ISCreate(comm, &is));
9523       PetscCall(ISLoad(is, viewer));
9524       PetscCall(PCBDDCSetDirichletBoundaries(pc, is));
9525       PetscCall(ISDestroy(&is));
9526     }
9527     if (header[1]) {
9528       PetscCall(ISCreate(comm, &is));
9529       PetscCall(ISLoad(is, viewer));
9530       PetscCall(PCBDDCSetNeumannBoundaries(pc, is));
9531       PetscCall(ISDestroy(&is));
9532     }
9533     if (header[2]) {
9534       IS *isarray;
9535 
9536       PetscCall(PetscMalloc1(header[2], &isarray));
9537       for (PetscInt i = 0; i < header[2]; i++) {
9538         PetscCall(ISCreate(comm, &isarray[i]));
9539         PetscCall(ISLoad(isarray[i], viewer));
9540       }
9541       PetscCall(PCBDDCSetDofsSplitting(pc, header[2], isarray));
9542       for (PetscInt i = 0; i < header[2]; i++) PetscCall(ISDestroy(&isarray[i]));
9543       PetscCall(PetscFree(isarray));
9544     }
9545     if (header[3]) {
9546       PetscCall(ISCreate(comm, &is));
9547       PetscCall(ISLoad(is, viewer));
9548       PetscCall(PCBDDCSetPrimalVerticesIS(pc, is));
9549       PetscCall(ISDestroy(&is));
9550     }
9551     if (header[4]) {
9552       PetscCall(MatCreate(comm, &A));
9553       PetscCall(MatSetType(A, MATAIJ));
9554       PetscCall(MatLoad(A, viewer));
9555       PetscCall(PCBDDCSetDiscreteGradient(pc, A, header[5], header[6], (PetscBool)header[7], (PetscBool)header[8]));
9556       PetscCall(MatDestroy(&A));
9557     }
9558     if (header[9]) {
9559       PetscCall(MatCreate(comm, &A));
9560       PetscCall(MatSetType(A, MATIS));
9561       PetscCall(MatLoad(A, viewer));
9562       PetscCall(PCBDDCSetDivergenceMat(pc, A, (PetscBool)header[10], NULL));
9563       PetscCall(MatDestroy(&A));
9564     }
9565   } else {
9566     header[0]  = (PetscInt) !!pcbddc->DirichletBoundariesLocal;
9567     header[1]  = (PetscInt) !!pcbddc->NeumannBoundariesLocal;
9568     header[2]  = pcbddc->n_ISForDofsLocal;
9569     header[3]  = (PetscInt) !!pcbddc->user_primal_vertices_local;
9570     header[4]  = (PetscInt) !!pcbddc->discretegradient;
9571     header[5]  = pcbddc->nedorder;
9572     header[6]  = pcbddc->nedfield;
9573     header[7]  = (PetscInt)pcbddc->nedglobal;
9574     header[8]  = (PetscInt)pcbddc->conforming;
9575     header[9]  = (PetscInt) !!pcbddc->divudotp;
9576     header[10] = (PetscInt)pcbddc->divudotp_trans;
9577     if (header[4]) header[3] = 0;
9578 
9579     PetscCall(PetscViewerBinaryWrite(viewer, header, PETSC_STATIC_ARRAY_LENGTH(header), PETSC_INT));
9580     PetscCall(PCBDDCViewGlobalIS(pc, pcbddc->DirichletBoundariesLocal, viewer));
9581     PetscCall(PCBDDCViewGlobalIS(pc, pcbddc->NeumannBoundariesLocal, viewer));
9582     for (PetscInt i = 0; i < header[2]; i++) PetscCall(PCBDDCViewGlobalIS(pc, pcbddc->ISForDofsLocal[i], viewer));
9583     if (header[3]) PetscCall(PCBDDCViewGlobalIS(pc, pcbddc->user_primal_vertices_local, viewer));
9584     if (header[4]) PetscCall(MatView(pcbddc->discretegradient, viewer));
9585     if (header[9]) PetscCall(MatView(pcbddc->divudotp, viewer));
9586   }
9587   PetscCall(PetscViewerDestroy(&viewer));
9588   PetscFunctionReturn(PETSC_SUCCESS);
9589 }
9590 
9591 #include <../src/mat/impls/aij/mpi/mpiaij.h>
9592 static PetscErrorCode MatMPIAIJRestrict(Mat A, MPI_Comm ccomm, Mat *B)
9593 {
9594   Mat         At;
9595   IS          rows;
9596   PetscInt    rst, ren;
9597   PetscLayout rmap;
9598 
9599   PetscFunctionBegin;
9600   rst = ren = 0;
9601   if (ccomm != MPI_COMM_NULL) {
9602     PetscCall(PetscLayoutCreate(ccomm, &rmap));
9603     PetscCall(PetscLayoutSetSize(rmap, A->rmap->N));
9604     PetscCall(PetscLayoutSetBlockSize(rmap, 1));
9605     PetscCall(PetscLayoutSetUp(rmap));
9606     PetscCall(PetscLayoutGetRange(rmap, &rst, &ren));
9607   }
9608   PetscCall(ISCreateStride(PetscObjectComm((PetscObject)A), ren - rst, rst, 1, &rows));
9609   PetscCall(MatCreateSubMatrix(A, rows, NULL, MAT_INITIAL_MATRIX, &At));
9610   PetscCall(ISDestroy(&rows));
9611 
9612   if (ccomm != MPI_COMM_NULL) {
9613     Mat_MPIAIJ *a, *b;
9614     IS          from, to;
9615     Vec         gvec;
9616     PetscInt    lsize;
9617 
9618     PetscCall(MatCreate(ccomm, B));
9619     PetscCall(MatSetSizes(*B, ren - rst, PETSC_DECIDE, PETSC_DECIDE, At->cmap->N));
9620     PetscCall(MatSetType(*B, MATAIJ));
9621     PetscCall(PetscLayoutDestroy(&((*B)->rmap)));
9622     PetscCall(PetscLayoutSetUp((*B)->cmap));
9623     a = (Mat_MPIAIJ *)At->data;
9624     b = (Mat_MPIAIJ *)(*B)->data;
9625     PetscCallMPI(MPI_Comm_size(ccomm, &b->size));
9626     PetscCallMPI(MPI_Comm_rank(ccomm, &b->rank));
9627     PetscCall(PetscObjectReference((PetscObject)a->A));
9628     PetscCall(PetscObjectReference((PetscObject)a->B));
9629     b->A = a->A;
9630     b->B = a->B;
9631 
9632     b->donotstash   = a->donotstash;
9633     b->roworiented  = a->roworiented;
9634     b->rowindices   = NULL;
9635     b->rowvalues    = NULL;
9636     b->getrowactive = PETSC_FALSE;
9637 
9638     (*B)->rmap         = rmap;
9639     (*B)->factortype   = A->factortype;
9640     (*B)->assembled    = PETSC_TRUE;
9641     (*B)->insertmode   = NOT_SET_VALUES;
9642     (*B)->preallocated = PETSC_TRUE;
9643 
9644     if (a->colmap) {
9645 #if defined(PETSC_USE_CTABLE)
9646       PetscCall(PetscHMapIDuplicate(a->colmap, &b->colmap));
9647 #else
9648       PetscCall(PetscMalloc1(At->cmap->N, &b->colmap));
9649       PetscCall(PetscArraycpy(b->colmap, a->colmap, At->cmap->N));
9650 #endif
9651     } else b->colmap = NULL;
9652     if (a->garray) {
9653       PetscInt len;
9654       len = a->B->cmap->n;
9655       PetscCall(PetscMalloc1(len + 1, &b->garray));
9656       if (len) PetscCall(PetscArraycpy(b->garray, a->garray, len));
9657     } else b->garray = NULL;
9658 
9659     PetscCall(PetscObjectReference((PetscObject)a->lvec));
9660     b->lvec = a->lvec;
9661 
9662     /* cannot use VecScatterCopy */
9663     PetscCall(VecGetLocalSize(b->lvec, &lsize));
9664     PetscCall(ISCreateGeneral(ccomm, lsize, b->garray, PETSC_USE_POINTER, &from));
9665     PetscCall(ISCreateStride(PETSC_COMM_SELF, lsize, 0, 1, &to));
9666     PetscCall(MatCreateVecs(*B, &gvec, NULL));
9667     PetscCall(VecScatterCreate(gvec, from, b->lvec, to, &b->Mvctx));
9668     PetscCall(ISDestroy(&from));
9669     PetscCall(ISDestroy(&to));
9670     PetscCall(VecDestroy(&gvec));
9671   }
9672   PetscCall(MatDestroy(&At));
9673   PetscFunctionReturn(PETSC_SUCCESS);
9674 }
9675 
9676 /* same as MatCreateSubMatrix(A, rows, NULL,...) but allows repeated rows */
9677 static PetscErrorCode MatAIJExtractRows(Mat A, IS rows, Mat *sA)
9678 {
9679   PetscBool isaij;
9680   MPI_Comm  comm;
9681 
9682   PetscFunctionBegin;
9683   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
9684   PetscCall(PetscObjectBaseTypeCompareAny((PetscObject)A, &isaij, MATSEQAIJ, MATMPIAIJ, ""));
9685   PetscCheck(isaij, comm, PETSC_ERR_SUP, "Not implemented");
9686   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &isaij));
9687   if (isaij) { /* SeqAIJ supports repeated rows */
9688     PetscCall(MatCreateSubMatrix(A, rows, NULL, MAT_INITIAL_MATRIX, sA));
9689   } else {
9690     Mat                A_loc;
9691     Mat_SeqAIJ        *da;
9692     PetscSF            sf;
9693     PetscInt           ni, *di, *dj, m = A->rmap->n, c, *ldata, *rdata;
9694     PetscScalar       *daa;
9695     const PetscInt    *idxs;
9696     const PetscSFNode *iremotes;
9697     PetscSFNode       *remotes;
9698 
9699     /* SF for incoming rows */
9700     PetscCall(PetscSFCreate(comm, &sf));
9701     PetscCall(ISGetLocalSize(rows, &ni));
9702     PetscCall(ISGetIndices(rows, &idxs));
9703     PetscCall(PetscSFSetGraphLayout(sf, A->rmap, ni, NULL, PETSC_USE_POINTER, idxs));
9704     PetscCall(ISRestoreIndices(rows, &idxs));
9705 
9706     PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &A_loc));
9707     da = (Mat_SeqAIJ *)A_loc->data;
9708     PetscCall(PetscMalloc2(2 * ni, &ldata, 2 * m, &rdata));
9709     for (PetscInt i = 0; i < m; i++) {
9710       rdata[2 * i + 0] = da->i[i + 1] - da->i[i];
9711       rdata[2 * i + 1] = da->i[i];
9712     }
9713     PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, rdata, ldata, MPI_REPLACE));
9714     PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, rdata, ldata, MPI_REPLACE));
9715     PetscCall(PetscMalloc1(ni + 1, &di));
9716     di[0] = 0;
9717     for (PetscInt i = 0; i < ni; i++) di[i + 1] = di[i] + ldata[2 * i + 0];
9718     PetscCall(PetscMalloc1(di[ni], &dj));
9719     PetscCall(PetscMalloc1(di[ni], &daa));
9720     PetscCall(PetscMalloc1(di[ni], &remotes));
9721 
9722     PetscCall(PetscSFGetGraph(sf, NULL, NULL, NULL, &iremotes));
9723 
9724     /* SF graph for nonzeros */
9725     c = 0;
9726     for (PetscInt i = 0; i < ni; i++) {
9727       const PetscInt rank  = iremotes[i].rank;
9728       const PetscInt rsize = ldata[2 * i];
9729       for (PetscInt j = 0; j < rsize; j++) {
9730         remotes[c].rank  = rank;
9731         remotes[c].index = ldata[2 * i + 1] + j;
9732         c++;
9733       }
9734     }
9735     PetscCheck(c == di[ni], PETSC_COMM_SELF, PETSC_ERR_PLIB, "Invalid number of local nonzeros %" PetscInt_FMT " != %" PetscInt_FMT, c, di[ni]);
9736     PetscCall(PetscSFSetGraph(sf, da->i[m], di[ni], NULL, PETSC_USE_POINTER, remotes, PETSC_USE_POINTER));
9737     PetscCall(PetscSFBcastBegin(sf, MPIU_INT, da->j, dj, MPI_REPLACE));
9738     PetscCall(PetscSFBcastEnd(sf, MPIU_INT, da->j, dj, MPI_REPLACE));
9739     PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, da->a, daa, MPI_REPLACE));
9740     PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, da->a, daa, MPI_REPLACE));
9741 
9742     PetscCall(MatCreateMPIAIJWithArrays(comm, ni, A->cmap->n, PETSC_DECIDE, A->cmap->N, di, dj, daa, sA));
9743     PetscCall(MatDestroy(&A_loc));
9744     PetscCall(PetscSFDestroy(&sf));
9745     PetscCall(PetscFree(di));
9746     PetscCall(PetscFree(dj));
9747     PetscCall(PetscFree(daa));
9748     PetscCall(PetscFree(remotes));
9749     PetscCall(PetscFree2(ldata, rdata));
9750   }
9751   PetscFunctionReturn(PETSC_SUCCESS);
9752 }
9753