xref: /petsc/src/ksp/pc/impls/bddc/bddcprivate.c (revision ac84dfd5778759083efa0c46d3820bac8a11500e)
1 #include <../src/mat/impls/aij/seq/aij.h>
2 #include <petsc/private/pcbddcimpl.h>
3 #include <petsc/private/pcbddcprivateimpl.h>
4 #include <petsc/private/kernels/blockinvert.h>
5 #include <../src/mat/impls/dense/seq/dense.h>
6 #include <petscdmplex.h>
7 #include <petscblaslapack.h>
8 #include <petsc/private/sfimpl.h>
9 #include <petsc/private/dmpleximpl.h>
10 #include <petscdmda.h>
11 
12 static PetscErrorCode MatMPIAIJRestrict(Mat, MPI_Comm, Mat *);
13 
14 /* if range is true,  it returns B s.t. span{B} = range(A)
15    if range is false, it returns B s.t. range(B) _|_ range(A) */
16 static PetscErrorCode MatDenseOrthogonalRangeOrComplement(Mat A, PetscBool range, PetscInt lw, PetscScalar *work, PetscReal *rwork, Mat *B)
17 {
18   PetscScalar *uwork, *data, *U, ds = 0.;
19   PetscReal   *sing;
20   PetscBLASInt bM, bN, lwork, lierr, di = 1;
21   PetscInt     ulw, i, nr, nc, n;
22 #if defined(PETSC_USE_COMPLEX)
23   PetscReal *rwork2;
24 #endif
25 
26   PetscFunctionBegin;
27   PetscCall(MatGetSize(A, &nr, &nc));
28   if (!nr || !nc) PetscFunctionReturn(PETSC_SUCCESS);
29 
30   /* workspace */
31   if (!work) {
32     ulw = PetscMax(PetscMax(1, 5 * PetscMin(nr, nc)), 3 * PetscMin(nr, nc) + PetscMax(nr, nc));
33     PetscCall(PetscMalloc1(ulw, &uwork));
34   } else {
35     ulw   = lw;
36     uwork = work;
37   }
38   n = PetscMin(nr, nc);
39   if (!rwork) {
40     PetscCall(PetscMalloc1(n, &sing));
41   } else {
42     sing = rwork;
43   }
44 
45   /* SVD */
46   PetscCall(PetscMalloc1(nr * nr, &U));
47   PetscCall(PetscBLASIntCast(nr, &bM));
48   PetscCall(PetscBLASIntCast(nc, &bN));
49   PetscCall(PetscBLASIntCast(ulw, &lwork));
50   PetscCall(MatDenseGetArray(A, &data));
51   PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
52 #if !defined(PETSC_USE_COMPLEX)
53   PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("A", "N", &bM, &bN, data, &bM, sing, U, &bM, &ds, &di, uwork, &lwork, &lierr));
54 #else
55   PetscCall(PetscMalloc1(5 * n, &rwork2));
56   PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("A", "N", &bM, &bN, data, &bM, sing, U, &bM, &ds, &di, uwork, &lwork, rwork2, &lierr));
57   PetscCall(PetscFree(rwork2));
58 #endif
59   PetscCall(PetscFPTrapPop());
60   PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in GESVD Lapack routine %" PetscBLASInt_FMT, lierr);
61   PetscCall(MatDenseRestoreArray(A, &data));
62   for (i = 0; i < n; i++)
63     if (sing[i] < PETSC_SMALL) break;
64   if (!rwork) PetscCall(PetscFree(sing));
65   if (!work) PetscCall(PetscFree(uwork));
66   /* create B */
67   if (!range) {
68     PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, nr, nr - i, NULL, B));
69     PetscCall(MatDenseGetArray(*B, &data));
70     PetscCall(PetscArraycpy(data, U + nr * i, (nr - i) * nr));
71   } else {
72     PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, nr, i, NULL, B));
73     PetscCall(MatDenseGetArray(*B, &data));
74     PetscCall(PetscArraycpy(data, U, i * nr));
75   }
76   PetscCall(MatDenseRestoreArray(*B, &data));
77   PetscCall(PetscFree(U));
78   PetscFunctionReturn(PETSC_SUCCESS);
79 }
80 
81 /* TODO REMOVE */
82 #if defined(PRINT_GDET)
83 static int inc = 0;
84 static int lev = 0;
85 #endif
86 
87 static PetscErrorCode PCBDDCComputeNedelecChangeEdge(Mat lG, IS edge, IS extrow, IS extcol, IS corners, Mat *Gins, Mat *GKins, PetscScalar cvals[2], PetscScalar *work, PetscReal *rwork)
88 {
89   Mat          GE, GEd;
90   PetscInt     rsize, csize, esize;
91   PetscScalar *ptr;
92 
93   PetscFunctionBegin;
94   PetscCall(ISGetSize(edge, &esize));
95   if (!esize) PetscFunctionReturn(PETSC_SUCCESS);
96   PetscCall(ISGetSize(extrow, &rsize));
97   PetscCall(ISGetSize(extcol, &csize));
98 
99   /* gradients */
100   ptr = work + 5 * esize;
101   PetscCall(MatCreateSubMatrix(lG, extrow, extcol, MAT_INITIAL_MATRIX, &GE));
102   PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, rsize, csize, ptr, Gins));
103   PetscCall(MatConvert(GE, MATSEQDENSE, MAT_REUSE_MATRIX, Gins));
104   PetscCall(MatDestroy(&GE));
105 
106   /* constants */
107   ptr += rsize * csize;
108   PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, esize, csize, ptr, &GEd));
109   PetscCall(MatCreateSubMatrix(lG, edge, extcol, MAT_INITIAL_MATRIX, &GE));
110   PetscCall(MatConvert(GE, MATSEQDENSE, MAT_REUSE_MATRIX, &GEd));
111   PetscCall(MatDestroy(&GE));
112   PetscCall(MatDenseOrthogonalRangeOrComplement(GEd, PETSC_FALSE, 5 * esize, work, rwork, GKins));
113   PetscCall(MatDestroy(&GEd));
114 
115   if (corners) {
116     Mat                GEc;
117     const PetscScalar *vals;
118     PetscScalar        v;
119 
120     PetscCall(MatCreateSubMatrix(lG, edge, corners, MAT_INITIAL_MATRIX, &GEc));
121     PetscCall(MatTransposeMatMult(GEc, *GKins, MAT_INITIAL_MATRIX, 1.0, &GEd));
122     PetscCall(MatDenseGetArrayRead(GEd, &vals));
123     /* v       = PetscAbsScalar(vals[0]); */
124     v        = 1.;
125     cvals[0] = vals[0] / v;
126     cvals[1] = vals[1] / v;
127     PetscCall(MatDenseRestoreArrayRead(GEd, &vals));
128     PetscCall(MatScale(*GKins, 1. / v));
129 #if defined(PRINT_GDET)
130     {
131       PetscViewer viewer;
132       char        filename[256];
133       PetscCall(PetscSNPrintf(filename, PETSC_STATIC_ARRAY_LENGTH(filename), "Gdet_l%d_r%d_cc%d.m", lev, PetscGlobalRank, inc++));
134       PetscCall(PetscViewerASCIIOpen(PETSC_COMM_SELF, filename, &viewer));
135       PetscCall(PetscViewerPushFormat(viewer, PETSC_VIEWER_ASCII_MATLAB));
136       PetscCall(PetscObjectSetName((PetscObject)GEc, "GEc"));
137       PetscCall(MatView(GEc, viewer));
138       PetscCall(PetscObjectSetName((PetscObject)*GKins, "GK"));
139       PetscCall(MatView(*GKins, viewer));
140       PetscCall(PetscObjectSetName((PetscObject)GEd, "Gproj"));
141       PetscCall(MatView(GEd, viewer));
142       PetscCall(PetscViewerDestroy(&viewer));
143     }
144 #endif
145     PetscCall(MatDestroy(&GEd));
146     PetscCall(MatDestroy(&GEc));
147   }
148   PetscFunctionReturn(PETSC_SUCCESS);
149 }
150 
151 static PetscErrorCode MatAIJExtractRows(Mat, IS, Mat *);
152 
153 PetscErrorCode PCBDDCNedelecSupport(PC pc)
154 {
155   PC_BDDC               *pcbddc = (PC_BDDC *)pc->data;
156   Mat_IS                *matis  = (Mat_IS *)pc->pmat->data;
157   Mat                    G, T, conn, lG, lGt, lGis, lGall, lGe, lGinit;
158   PetscSF                sfv;
159   ISLocalToGlobalMapping el2g, vl2g, fl2g, al2g;
160   MPI_Comm               comm;
161   IS                     lned, primals, allprimals, nedfieldlocal, elements_corners = NULL;
162   IS                    *eedges, *extrows, *extcols, *alleedges;
163   PetscBT                btv, bte, btvc, btb, btbd, btvcand, btvi, btee, bter;
164   PetscScalar           *vals, *work;
165   PetscReal             *rwork;
166   const PetscInt        *idxs, *ii, *jj, *iit, *jjt;
167   PetscInt               ne, nv, Lv, order, n, field;
168   PetscInt               i, j, extmem, cum, maxsize, nee;
169   PetscInt              *extrow, *extrowcum, *marks, *vmarks, *gidxs;
170   PetscInt              *sfvleaves, *sfvroots;
171   PetscInt              *corners, *cedges;
172   PetscInt              *ecount, **eneighs, *vcount, **vneighs;
173   PetscInt              *emarks;
174   PetscBool              print, eerr, done, lrc[2], conforming, global, setprimal;
175 
176   PetscFunctionBegin;
177   /* If the discrete gradient is defined for a subset of dofs and global is true,
178      it assumes G is given in global ordering for all the dofs.
179      Otherwise, the ordering is global for the Nedelec field */
180   order      = pcbddc->nedorder;
181   conforming = pcbddc->conforming;
182   field      = pcbddc->nedfield;
183   global     = pcbddc->nedglobal;
184   setprimal  = PETSC_FALSE;
185   print      = PETSC_FALSE;
186 
187   /* Command line customization */
188   PetscOptionsBegin(PetscObjectComm((PetscObject)pc), ((PetscObject)pc)->prefix, "BDDC Nedelec options", "PC");
189   PetscCall(PetscOptionsBool("-pc_bddc_nedelec_field_primal", "All edge dofs set as primals: Toselli's algorithm C", NULL, setprimal, &setprimal, NULL));
190   /* print debug info and adaptive order TODO: to be removed */
191   PetscCall(PetscOptionsInt("-pc_bddc_nedelec_order", "Test variable order code (to be removed)", NULL, order, &order, NULL));
192   PetscCall(PetscOptionsBool("-pc_bddc_nedelec_print", "Print debug info", NULL, print, &print, NULL));
193   PetscOptionsEnd();
194 
195   /* Return if there are no edges in the decomposition */
196   PetscCall(MatISGetLocalToGlobalMapping(pc->pmat, &al2g, NULL));
197   PetscCall(ISLocalToGlobalMappingGetSize(al2g, &n));
198   PetscCall(PetscObjectGetComm((PetscObject)pc, &comm));
199   PetscCall(VecGetArrayRead(matis->counter, (const PetscScalar **)&vals));
200   lrc[0] = PETSC_FALSE;
201   for (i = 0; i < n; i++) {
202     if (PetscRealPart(vals[i]) > 2.) {
203       lrc[0] = PETSC_TRUE;
204       break;
205     }
206   }
207   PetscCall(VecRestoreArrayRead(matis->counter, (const PetscScalar **)&vals));
208   PetscCallMPI(MPIU_Allreduce(&lrc[0], &lrc[1], 1, MPIU_BOOL, MPI_LOR, comm));
209   if (!lrc[1]) PetscFunctionReturn(PETSC_SUCCESS);
210 
211   /* Get Nedelec field */
212   PetscCheck(!pcbddc->n_ISForDofsLocal || field < pcbddc->n_ISForDofsLocal, comm, PETSC_ERR_USER, "Invalid field for Nedelec %" PetscInt_FMT ": number of fields is %" PetscInt_FMT, field, pcbddc->n_ISForDofsLocal);
213   if (pcbddc->n_ISForDofsLocal && field >= 0) {
214     PetscCall(PetscObjectReference((PetscObject)pcbddc->ISForDofsLocal[field]));
215     nedfieldlocal = pcbddc->ISForDofsLocal[field];
216     PetscCall(ISGetLocalSize(nedfieldlocal, &ne));
217   } else if (!pcbddc->n_ISForDofsLocal && field != PETSC_DECIDE) {
218     ne            = n;
219     nedfieldlocal = NULL;
220     global        = PETSC_TRUE;
221   } else if (field == PETSC_DECIDE) {
222     PetscInt rst, ren, *idx;
223 
224     PetscCall(PetscArrayzero(matis->sf_leafdata, n));
225     PetscCall(PetscArrayzero(matis->sf_rootdata, pc->pmat->rmap->n));
226     PetscCall(MatGetOwnershipRange(pcbddc->discretegradient, &rst, &ren));
227     for (i = rst; i < ren; i++) {
228       PetscInt nc;
229 
230       PetscCall(MatGetRow(pcbddc->discretegradient, i, &nc, NULL, NULL));
231       if (nc > 1) matis->sf_rootdata[i - rst] = 1;
232       PetscCall(MatRestoreRow(pcbddc->discretegradient, i, &nc, NULL, NULL));
233     }
234     PetscCall(PetscSFBcastBegin(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE));
235     PetscCall(PetscSFBcastEnd(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE));
236     PetscCall(PetscMalloc1(n, &idx));
237     for (i = 0, ne = 0; i < n; i++)
238       if (matis->sf_leafdata[i]) idx[ne++] = i;
239     PetscCall(ISCreateGeneral(comm, ne, idx, PETSC_OWN_POINTER, &nedfieldlocal));
240   } else {
241     SETERRQ(comm, PETSC_ERR_USER, "When multiple fields are present, the Nedelec field has to be specified");
242   }
243 
244   /* Sanity checks */
245   PetscCheck(order || conforming, comm, PETSC_ERR_SUP, "Variable order and non-conforming spaces are not supported at the same time");
246   PetscCheck(!pcbddc->user_ChangeOfBasisMatrix, comm, PETSC_ERR_SUP, "Cannot generate Nedelec support with user defined change of basis");
247   PetscCheck(!order || (ne % order == 0), PETSC_COMM_SELF, PETSC_ERR_USER, "The number of local edge dofs %" PetscInt_FMT " is not a multiple of the order %" PetscInt_FMT, ne, order);
248 
249   /* Just set primal dofs and return */
250   if (setprimal) {
251     IS        enedfieldlocal;
252     PetscInt *eidxs;
253 
254     PetscCall(PetscMalloc1(ne, &eidxs));
255     PetscCall(VecGetArrayRead(matis->counter, (const PetscScalar **)&vals));
256     if (nedfieldlocal) {
257       PetscCall(ISGetIndices(nedfieldlocal, &idxs));
258       for (i = 0, cum = 0; i < ne; i++) {
259         if (PetscRealPart(vals[idxs[i]]) > 2.) eidxs[cum++] = idxs[i];
260       }
261       PetscCall(ISRestoreIndices(nedfieldlocal, &idxs));
262     } else {
263       for (i = 0, cum = 0; i < ne; i++) {
264         if (PetscRealPart(vals[i]) > 2.) eidxs[cum++] = i;
265       }
266     }
267     PetscCall(VecRestoreArrayRead(matis->counter, (const PetscScalar **)&vals));
268     PetscCall(ISCreateGeneral(comm, cum, eidxs, PETSC_COPY_VALUES, &enedfieldlocal));
269     PetscCall(PCBDDCSetPrimalVerticesLocalIS(pc, enedfieldlocal));
270     PetscCall(PetscFree(eidxs));
271     PetscCall(ISDestroy(&nedfieldlocal));
272     PetscCall(ISDestroy(&enedfieldlocal));
273     PetscFunctionReturn(PETSC_SUCCESS);
274   }
275 
276   /* Compute some l2g maps */
277   if (nedfieldlocal) {
278     IS is;
279 
280     /* need to map from the local Nedelec field to local numbering */
281     PetscCall(ISLocalToGlobalMappingCreateIS(nedfieldlocal, &fl2g));
282     /* need to map from the local Nedelec field to global numbering for the whole dofs*/
283     PetscCall(ISLocalToGlobalMappingApplyIS(al2g, nedfieldlocal, &is));
284     PetscCall(ISLocalToGlobalMappingCreateIS(is, &al2g));
285     /* need to map from the local Nedelec field to global numbering (for Nedelec only) */
286     if (global) {
287       PetscCall(PetscObjectReference((PetscObject)al2g));
288       el2g = al2g;
289     } else {
290       IS gis;
291 
292       PetscCall(ISRenumber(is, NULL, NULL, &gis));
293       PetscCall(ISLocalToGlobalMappingCreateIS(gis, &el2g));
294       PetscCall(ISDestroy(&gis));
295     }
296     PetscCall(ISDestroy(&is));
297   } else {
298     /* one ref for the destruction of al2g, one for el2g */
299     PetscCall(PetscObjectReference((PetscObject)al2g));
300     PetscCall(PetscObjectReference((PetscObject)al2g));
301     el2g = al2g;
302     fl2g = NULL;
303   }
304 
305   /* Start communication to drop connections for interior edges (for cc analysis only) */
306   PetscCall(PetscArrayzero(matis->sf_leafdata, n));
307   PetscCall(PetscArrayzero(matis->sf_rootdata, pc->pmat->rmap->n));
308   if (nedfieldlocal) {
309     PetscCall(ISGetIndices(nedfieldlocal, &idxs));
310     for (i = 0; i < ne; i++) matis->sf_leafdata[idxs[i]] = 1;
311     PetscCall(ISRestoreIndices(nedfieldlocal, &idxs));
312   } else {
313     for (i = 0; i < ne; i++) matis->sf_leafdata[i] = 1;
314   }
315   PetscCall(PetscSFReduceBegin(matis->sf, MPIU_INT, matis->sf_leafdata, matis->sf_rootdata, MPI_SUM));
316   PetscCall(PetscSFReduceEnd(matis->sf, MPIU_INT, matis->sf_leafdata, matis->sf_rootdata, MPI_SUM));
317 
318   /* There's no way to detect all possible corner candidates in a element-by-element case in a pure algebraic setting
319      Firedrake attaches a index set to identify them upfront. If it is present, we assume we are in such a case */
320   if (matis->allow_repeated) PetscCall(PetscObjectQuery((PetscObject)pcbddc->discretegradient, "_elements_corners", (PetscObject *)&elements_corners));
321 
322   /* drop connections with interior edges to avoid unneeded communications and memory movements */
323   PetscCall(MatViewFromOptions(pcbddc->discretegradient, (PetscObject)pc, "-pc_bddc_discrete_gradient_view"));
324   PetscCall(MatDuplicate(pcbddc->discretegradient, MAT_COPY_VALUES, &G));
325   PetscCall(MatSetOption(G, MAT_KEEP_NONZERO_PATTERN, PETSC_FALSE));
326   if (global) {
327     PetscInt rst;
328 
329     PetscCall(MatGetOwnershipRange(G, &rst, NULL));
330     for (i = 0, cum = 0; i < pc->pmat->rmap->n; i++) {
331       if (matis->sf_rootdata[i] < 2) matis->sf_rootdata[cum++] = i + rst;
332     }
333     PetscCall(MatSetOption(G, MAT_NO_OFF_PROC_ZERO_ROWS, PETSC_TRUE));
334     PetscCall(MatZeroRows(G, cum, matis->sf_rootdata, 0., NULL, NULL));
335   } else {
336     PetscInt *tbz;
337 
338     PetscCall(PetscMalloc1(ne, &tbz));
339     PetscCall(PetscSFBcastBegin(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE));
340     PetscCall(PetscSFBcastEnd(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE));
341     PetscCall(ISGetIndices(nedfieldlocal, &idxs));
342     for (i = 0, cum = 0; i < ne; i++)
343       if (matis->sf_leafdata[idxs[i]] == 1) tbz[cum++] = i;
344     PetscCall(ISRestoreIndices(nedfieldlocal, &idxs));
345     PetscCall(ISLocalToGlobalMappingApply(el2g, cum, tbz, tbz));
346     PetscCall(MatZeroRows(G, cum, tbz, 0., NULL, NULL));
347     PetscCall(PetscFree(tbz));
348   }
349 
350   /* Extract subdomain relevant rows of G  */
351   PetscCall(ISLocalToGlobalMappingGetIndices(el2g, &idxs));
352   PetscCall(ISCreateGeneral(comm, ne, idxs, PETSC_USE_POINTER, &lned));
353   PetscCall(MatAIJExtractRows(G, lned, &lGall));
354   /* PetscCall(MatCreateSubMatrix(G, lned, NULL, MAT_INITIAL_MATRIX, &lGall)); */
355   PetscCall(ISLocalToGlobalMappingRestoreIndices(el2g, &idxs));
356   PetscCall(ISDestroy(&lned));
357   PetscCall(MatConvert(lGall, MATIS, MAT_INITIAL_MATRIX, &lGis));
358   PetscCall(MatDestroy(&lGall));
359   PetscCall(MatISGetLocalMat(lGis, &lG));
360   if (matis->allow_repeated) { /* multi-element support */
361     Mat                   *lGn, B;
362     IS                    *is_rows, *tcols, tmap, nmap;
363     PetscInt               subnv;
364     const PetscInt        *subvidxs;
365     ISLocalToGlobalMapping mapn;
366 
367     PetscCall(PetscCalloc1(pcbddc->n_local_subs * pcbddc->n_local_subs, &lGn));
368     PetscCall(PetscMalloc1(pcbddc->n_local_subs, &is_rows));
369     PetscCall(PetscMalloc1(pcbddc->n_local_subs, &tcols));
370     for (PetscInt i = 0; i < pcbddc->n_local_subs; i++) {
371       if (fl2g) {
372         PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_MASK, pcbddc->local_subs[i], &is_rows[i]));
373       } else {
374         PetscCall(PetscObjectReference((PetscObject)pcbddc->local_subs[i]));
375         is_rows[i] = pcbddc->local_subs[i];
376       }
377       PetscCall(MatCreateSubMatrix(lG, is_rows[i], NULL, MAT_INITIAL_MATRIX, &lGn[i * (1 + pcbddc->n_local_subs)]));
378       PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(lGn[i * (1 + pcbddc->n_local_subs)], &mapn));
379       PetscCall(ISLocalToGlobalMappingGetSize(mapn, &subnv));
380       PetscCall(ISLocalToGlobalMappingGetIndices(mapn, &subvidxs));
381       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, subnv, subvidxs, PETSC_COPY_VALUES, &tcols[i]));
382       PetscCall(ISLocalToGlobalMappingRestoreIndices(mapn, &subvidxs));
383       PetscCall(ISLocalToGlobalMappingDestroy(&mapn));
384     }
385 
386     /* Create new MATIS with repeated vertices */
387     PetscCall(MatCreate(comm, &B));
388     PetscCall(MatSetSizes(B, lGis->rmap->n, lGis->cmap->n, lGis->rmap->N, lGis->cmap->N));
389     PetscCall(MatSetType(B, MATIS));
390     PetscCall(MatISSetAllowRepeated(B, PETSC_TRUE));
391     PetscCall(ISConcatenate(PETSC_COMM_SELF, pcbddc->n_local_subs, tcols, &tmap));
392     PetscCall(ISLocalToGlobalMappingApplyIS(lGis->cmap->mapping, tmap, &nmap));
393     PetscCall(ISDestroy(&tmap));
394     PetscCall(ISGetLocalSize(nmap, &subnv));
395     PetscCall(ISGetIndices(nmap, &subvidxs));
396     PetscCall(ISCreateGeneral(comm, subnv, subvidxs, PETSC_USE_POINTER, &tmap));
397     PetscCall(ISRestoreIndices(nmap, &subvidxs));
398     PetscCall(ISLocalToGlobalMappingCreateIS(tmap, &mapn));
399     PetscCall(ISDestroy(&tmap));
400     PetscCall(ISDestroy(&nmap));
401     PetscCall(MatSetLocalToGlobalMapping(B, lGis->rmap->mapping, mapn));
402     PetscCall(ISLocalToGlobalMappingDestroy(&mapn));
403     PetscCall(MatCreateNest(PETSC_COMM_SELF, pcbddc->n_local_subs, is_rows, pcbddc->n_local_subs, NULL, lGn, &lG));
404     for (PetscInt i = 0; i < pcbddc->n_local_subs; i++) {
405       PetscCall(MatDestroy(&lGn[i * (1 + pcbddc->n_local_subs)]));
406       PetscCall(ISDestroy(&is_rows[i]));
407       PetscCall(ISDestroy(&tcols[i]));
408     }
409     PetscCall(MatConvert(lG, MATSEQAIJ, MAT_INPLACE_MATRIX, &lG));
410     PetscCall(PetscFree(lGn));
411     PetscCall(PetscFree(is_rows));
412     PetscCall(PetscFree(tcols));
413     PetscCall(MatISSetLocalMat(B, lG));
414     PetscCall(MatDestroy(&lG));
415 
416     PetscCall(MatDestroy(&lGis));
417     lGis = B;
418 
419     lGis->assembled = PETSC_TRUE;
420   }
421   PetscCall(MatViewFromOptions(lGis, (PetscObject)pc, "-pc_bddc_nedelec_init_G_view"));
422 
423   /* SF for nodal dofs communications */
424   PetscCall(MatGetLocalSize(G, NULL, &Lv));
425   PetscCall(MatISGetLocalToGlobalMapping(lGis, NULL, &vl2g));
426   PetscCall(PetscObjectReference((PetscObject)vl2g));
427   PetscCall(ISLocalToGlobalMappingGetSize(vl2g, &nv));
428   PetscCall(PetscSFCreate(comm, &sfv));
429   PetscCall(ISLocalToGlobalMappingGetIndices(vl2g, &idxs));
430   PetscCall(PetscSFSetGraphLayout(sfv, lGis->cmap, nv, NULL, PETSC_OWN_POINTER, idxs));
431   PetscCall(ISLocalToGlobalMappingRestoreIndices(vl2g, &idxs));
432 
433   if (elements_corners) {
434     IS      tmp;
435     Vec     global, local;
436     Mat_IS *tGis = (Mat_IS *)lGis->data;
437 
438     PetscCall(MatCreateVecs(lGis, &global, NULL));
439     PetscCall(MatCreateVecs(tGis->A, &local, NULL));
440     PetscCall(PCBDDCGlobalToLocal(tGis->cctx, global, local, elements_corners, &tmp));
441     PetscCall(VecDestroy(&global));
442     PetscCall(VecDestroy(&local));
443     elements_corners = tmp;
444   }
445 
446   /* Destroy temporary G */
447   PetscCall(MatISGetLocalMat(lGis, &lG));
448   PetscCall(PetscObjectReference((PetscObject)lG));
449   PetscCall(MatDestroy(&G));
450   PetscCall(MatDestroy(&lGis));
451 
452   if (print) {
453     PetscCall(PetscObjectSetName((PetscObject)lG, "initial_lG"));
454     PetscCall(MatView(lG, NULL));
455   }
456 
457   /* Save lG for values insertion in change of basis */
458   PetscCall(MatDuplicate(lG, MAT_COPY_VALUES, &lGinit));
459 
460   /* Analyze the edge-nodes connections (duplicate lG) */
461   PetscCall(MatDuplicate(lG, MAT_COPY_VALUES, &lGe));
462   PetscCall(MatSetOption(lGe, MAT_KEEP_NONZERO_PATTERN, PETSC_FALSE));
463   PetscCall(PetscBTCreate(nv, &btv));
464   PetscCall(PetscBTCreate(ne, &bte));
465   PetscCall(PetscBTCreate(ne, &btb));
466   PetscCall(PetscBTCreate(ne, &btbd));
467   /* need to import the boundary specification to ensure the
468      proper detection of coarse edges' endpoints */
469   if (pcbddc->DirichletBoundariesLocal) {
470     IS is;
471 
472     if (fl2g) {
473       PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_MASK, pcbddc->DirichletBoundariesLocal, &is));
474     } else {
475       is = pcbddc->DirichletBoundariesLocal;
476     }
477     PetscCall(ISGetLocalSize(is, &cum));
478     PetscCall(ISGetIndices(is, &idxs));
479     for (i = 0; i < cum; i++) {
480       if (idxs[i] >= 0 && idxs[i] < ne) {
481         PetscCall(PetscBTSet(btb, idxs[i]));
482         PetscCall(PetscBTSet(btbd, idxs[i]));
483       }
484     }
485     PetscCall(ISRestoreIndices(is, &idxs));
486     if (fl2g) PetscCall(ISDestroy(&is));
487   }
488   if (pcbddc->NeumannBoundariesLocal) {
489     IS is;
490 
491     if (fl2g) {
492       PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_MASK, pcbddc->NeumannBoundariesLocal, &is));
493     } else {
494       is = pcbddc->NeumannBoundariesLocal;
495     }
496     PetscCall(ISGetLocalSize(is, &cum));
497     PetscCall(ISGetIndices(is, &idxs));
498     for (i = 0; i < cum; i++) {
499       if (idxs[i] >= 0 && idxs[i] < ne) PetscCall(PetscBTSet(btb, idxs[i]));
500     }
501     PetscCall(ISRestoreIndices(is, &idxs));
502     if (fl2g) PetscCall(ISDestroy(&is));
503   }
504 
505   /* Count neighs per dof */
506   PetscCall(ISLocalToGlobalMappingGetNodeInfo(el2g, NULL, &ecount, NULL));
507   PetscCall(ISLocalToGlobalMappingGetNodeInfo(vl2g, NULL, &vcount, NULL));
508 
509   /* need to remove coarse faces' dofs and coarse edges' dirichlet dofs
510      for proper detection of coarse edges' endpoints */
511   PetscCall(PetscBTCreate(ne, &btee));
512   for (i = 0; i < ne; i++) {
513     if ((ecount[i] > 2 && !PetscBTLookup(btbd, i)) || (ecount[i] == 2 && PetscBTLookup(btb, i))) PetscCall(PetscBTSet(btee, i));
514   }
515   PetscCall(PetscMalloc1(ne, &marks));
516   if (!conforming) {
517     PetscCall(MatTranspose(lGe, MAT_INITIAL_MATRIX, &lGt));
518     PetscCall(MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done));
519   }
520   PetscCall(MatGetRowIJ(lGe, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
521   PetscCall(MatSeqAIJGetArray(lGe, &vals));
522   cum = 0;
523   for (i = 0; i < ne; i++) {
524     /* eliminate rows corresponding to edge dofs belonging to coarse faces */
525     if (!PetscBTLookup(btee, i)) {
526       marks[cum++] = i;
527       continue;
528     }
529     /* set badly connected edge dofs as primal */
530     if (!conforming) {
531       if (ii[i + 1] - ii[i] != order + 1) { /* every row of G on the coarse edge should list order+1 nodal dofs */
532         marks[cum++] = i;
533         PetscCall(PetscBTSet(bte, i));
534         for (j = ii[i]; j < ii[i + 1]; j++) PetscCall(PetscBTSet(btv, jj[j]));
535       } else {
536         /* every edge dofs should be connected through a certain number of nodal dofs
537            to other edge dofs belonging to coarse edges
538            - at most 2 endpoints
539            - order-1 interior nodal dofs
540            - no undefined nodal dofs (nconn < order)
541         */
542         PetscInt ends = 0, ints = 0, undef = 0;
543         for (j = ii[i]; j < ii[i + 1]; j++) {
544           PetscInt v     = jj[j], k;
545           PetscInt nconn = iit[v + 1] - iit[v];
546           for (k = iit[v]; k < iit[v + 1]; k++)
547             if (!PetscBTLookup(btee, jjt[k])) nconn--;
548           if (nconn > order) ends++;
549           else if (nconn == order) ints++;
550           else undef++;
551         }
552         if (undef || ends > 2 || ints != order - 1) {
553           marks[cum++] = i;
554           PetscCall(PetscBTSet(bte, i));
555           for (j = ii[i]; j < ii[i + 1]; j++) PetscCall(PetscBTSet(btv, jj[j]));
556         }
557       }
558     }
559     /* We assume the order on the element edge is ii[i+1]-ii[i]-1 */
560     if (!order && ii[i + 1] != ii[i]) {
561       PetscScalar val = 1. / (ii[i + 1] - ii[i] - 1);
562       for (j = ii[i]; j < ii[i + 1]; j++) vals[j] = val;
563     }
564   }
565   PetscCall(PetscBTDestroy(&btee));
566   PetscCall(MatSeqAIJRestoreArray(lGe, &vals));
567   PetscCall(MatRestoreRowIJ(lGe, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
568   if (!conforming) {
569     PetscCall(MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done));
570     PetscCall(MatDestroy(&lGt));
571   }
572   PetscCall(MatZeroRows(lGe, cum, marks, 0., NULL, NULL));
573 
574   /* identify splitpoints and corner candidates */
575   PetscCall(PetscMalloc2(nv, &sfvleaves, Lv, &sfvroots));
576   PetscCall(PetscBTCreate(nv, &btvcand));
577   if (elements_corners) {
578     PetscCall(ISGetLocalSize(elements_corners, &cum));
579     PetscCall(ISGetIndices(elements_corners, &idxs));
580     for (i = 0; i < cum; i++) PetscCall(PetscBTSet(btvcand, idxs[i]));
581     PetscCall(ISRestoreIndices(elements_corners, &idxs));
582   }
583 
584   if (matis->allow_repeated) { /* assign a uniq global id to edge local subsets and communicate it with nodal space */
585     PetscSF   emlsf, vmlsf;
586     PetscInt *eleaves, *vleaves, *meleaves, *mvleaves;
587     PetscInt  cum_subs = 0, n_subs = pcbddc->n_local_subs, bs, emnr, emnl, vmnr, vmnl;
588 
589     PetscCall(ISLocalToGlobalMappingGetBlockSize(el2g, &bs));
590     PetscCheck(bs == 1, comm, PETSC_ERR_SUP, "Not coded");
591     PetscCall(ISLocalToGlobalMappingGetBlockSize(vl2g, &bs));
592     PetscCheck(bs == 1, comm, PETSC_ERR_SUP, "Not coded");
593 
594     PetscCall(ISLocalToGlobalMappingGetBlockMultiLeavesSF(el2g, &emlsf));
595     PetscCall(ISLocalToGlobalMappingGetBlockMultiLeavesSF(vl2g, &vmlsf));
596 
597     PetscCall(PetscSFGetGraph(emlsf, &emnr, &emnl, NULL, NULL));
598     for (i = 0, j = 0; i < ne; i++) j += ecount[i];
599     PetscCheck(emnr == ne, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Invalid number of roots in edge multi-leaves SF %" PetscInt_FMT " != %" PetscInt_FMT, emnr, ne);
600     PetscCheck(emnl == j, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Invalid number of leaves in edge multi-leaves SF %" PetscInt_FMT " != %" PetscInt_FMT, emnl, j);
601 
602     PetscCall(PetscSFGetGraph(vmlsf, &vmnr, &vmnl, NULL, NULL));
603     for (i = 0, j = 0; i < nv; i++) j += vcount[i];
604     PetscCheck(vmnr == nv, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Invalid number of roots in nodal multi-leaves SF %" PetscInt_FMT " != %" PetscInt_FMT, vmnr, nv);
605     PetscCheck(vmnl == j, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Invalid number of leaves in nodal multi-leaves SF %" PetscInt_FMT " != %" PetscInt_FMT, vmnl, j);
606 
607     PetscCall(PetscMalloc1(ne, &eleaves));
608     PetscCall(PetscMalloc1(nv, &vleaves));
609     for (i = 0; i < ne; i++) eleaves[i] = PETSC_INT_MAX;
610     for (i = 0; i < nv; i++) vleaves[i] = PETSC_INT_MAX;
611     PetscCall(PetscMalloc1(emnl, &meleaves));
612     PetscCall(PetscMalloc1(vmnl, &mvleaves));
613 
614     PetscCallMPI(MPI_Exscan(&n_subs, &cum_subs, 1, MPIU_INT, MPI_SUM, comm));
615     PetscCall(MatGetRowIJ(lGinit, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
616     for (i = 0; i < n_subs; i++) {
617       const PetscInt *idxs;
618       const PetscInt  subid = cum_subs + i;
619       PetscInt        ns;
620 
621       PetscCall(ISGetLocalSize(pcbddc->local_subs[i], &ns));
622       PetscCall(ISGetIndices(pcbddc->local_subs[i], &idxs));
623       for (j = 0; j < ns; j++) {
624         const PetscInt e = idxs[j];
625 
626         eleaves[e] = subid;
627         for (PetscInt k = ii[e]; k < ii[e + 1]; k++) vleaves[jj[k]] = subid;
628       }
629       PetscCall(ISRestoreIndices(pcbddc->local_subs[i], &idxs));
630     }
631     PetscCall(MatRestoreRowIJ(lGinit, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
632     PetscCall(PetscSFBcastBegin(emlsf, MPIU_INT, eleaves, meleaves, MPI_REPLACE));
633     PetscCall(PetscSFBcastEnd(emlsf, MPIU_INT, eleaves, meleaves, MPI_REPLACE));
634     PetscCall(PetscSFBcastBegin(vmlsf, MPIU_INT, vleaves, mvleaves, MPI_REPLACE));
635     PetscCall(PetscSFBcastEnd(vmlsf, MPIU_INT, vleaves, mvleaves, MPI_REPLACE));
636     PetscCall(PetscFree(eleaves));
637     PetscCall(PetscFree(vleaves));
638 
639     PetscCall(PetscMalloc1(ne + 1, &eneighs));
640     eneighs[0] = meleaves;
641     for (i = 0; i < ne; i++) {
642       PetscCall(PetscSortInt(ecount[i], eneighs[i]));
643       eneighs[i + 1] = eneighs[i] + ecount[i];
644     }
645     PetscCall(PetscMalloc1(nv + 1, &vneighs));
646     vneighs[0] = mvleaves;
647     for (i = 0; i < nv; i++) {
648       PetscCall(PetscSortInt(vcount[i], vneighs[i]));
649       vneighs[i + 1] = vneighs[i] + vcount[i];
650     }
651   } else {
652     PetscCall(ISLocalToGlobalMappingGetNodeInfo(el2g, NULL, NULL, &eneighs));
653     PetscCall(ISLocalToGlobalMappingGetNodeInfo(vl2g, NULL, NULL, &vneighs));
654   }
655 
656   PetscCall(MatTranspose(lGe, MAT_INITIAL_MATRIX, &lGt));
657   if (print) {
658     PetscCall(PetscObjectSetName((PetscObject)lGe, "edgerestr_lG"));
659     PetscCall(MatView(lGe, NULL));
660     PetscCall(PetscObjectSetName((PetscObject)lGt, "edgerestr_lGt"));
661     PetscCall(MatView(lGt, NULL));
662   }
663   PetscCall(MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
664   PetscCall(MatSeqAIJGetArray(lGt, &vals));
665   for (i = 0; i < nv; i++) {
666     PetscInt  ord = order, test = ii[i + 1] - ii[i], vc = vcount[i];
667     PetscBool sneighs = PETSC_TRUE, bdir = PETSC_FALSE;
668     if (!order) { /* variable order */
669       PetscReal vorder = 0.;
670 
671       for (j = ii[i]; j < ii[i + 1]; j++) vorder += PetscRealPart(vals[j]);
672       test = PetscFloorReal(vorder + 10. * PETSC_SQRT_MACHINE_EPSILON);
673       PetscCheck(vorder - test <= PETSC_SQRT_MACHINE_EPSILON, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected value for vorder: %g (%" PetscInt_FMT ")", (double)vorder, test);
674       ord = 1;
675     }
676     for (j = ii[i]; j < ii[i + 1] && sneighs; j++) {
677       const PetscInt e = jj[j];
678 
679       if (PetscBTLookup(btbd, e)) {
680         bdir = PETSC_TRUE;
681         break;
682       }
683       if (vc != ecount[e]) {
684         sneighs = PETSC_FALSE;
685       } else {
686         const PetscInt *vn = vneighs[i], *en = eneighs[e];
687 
688         for (PetscInt k = 0; k < vc; k++) {
689           if (vn[k] != en[k]) {
690             sneighs = PETSC_FALSE;
691             break;
692           }
693         }
694       }
695     }
696     if (elements_corners) test = 0;
697     if (!sneighs || test >= 3 * ord || bdir) { /* splitpoints */
698       if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "SPLITPOINT %" PetscInt_FMT " (%s %s %s)\n", i, PetscBools[!sneighs], PetscBools[test >= 3 * ord], PetscBools[bdir]));
699       PetscCall(PetscBTSet(btv, i));
700     } else if (test == ord) {
701       if (order == 1 || (!order && ii[i + 1] - ii[i] == 1)) {
702         if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "ENDPOINT %" PetscInt_FMT "\n", i));
703         PetscCall(PetscBTSet(btv, i));
704       } else if (!elements_corners) {
705         if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "CORNER CANDIDATE %" PetscInt_FMT "\n", i));
706         PetscCall(PetscBTSet(btvcand, i));
707       }
708     }
709   }
710   PetscCall(PetscBTDestroy(&btbd));
711 
712   /* a candidate is valid if it is connected to another candidate via a non-primal edge dof */
713   if (order != 1) {
714     if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "INSPECTING CANDIDATES\n"));
715     PetscCall(MatGetRowIJ(lGe, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done));
716     for (i = 0; i < nv; i++) {
717       if (PetscBTLookup(btvcand, i)) {
718         PetscBool found = PETSC_FALSE;
719         for (j = ii[i]; j < ii[i + 1] && !found; j++) {
720           PetscInt k, e = jj[j];
721           if (PetscBTLookup(bte, e)) continue;
722           for (k = iit[e]; k < iit[e + 1]; k++) {
723             PetscInt v = jjt[k];
724             if (v != i && PetscBTLookup(btvcand, v)) {
725               found = PETSC_TRUE;
726               break;
727             }
728           }
729         }
730         if (!found) {
731           if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "  CANDIDATE %" PetscInt_FMT " CLEARED\n", i));
732           PetscCall(PetscBTClear(btvcand, i));
733         } else {
734           if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "  CANDIDATE %" PetscInt_FMT " ACCEPTED\n", i));
735         }
736       }
737     }
738     PetscCall(MatRestoreRowIJ(lGe, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done));
739   }
740   PetscCall(MatSeqAIJRestoreArray(lGt, &vals));
741   PetscCall(MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
742   PetscCall(MatDestroy(&lGe));
743 
744   /* Get the local G^T explicitly */
745   PetscCall(MatDestroy(&lGt));
746   PetscCall(MatTranspose(lG, MAT_INITIAL_MATRIX, &lGt));
747   PetscCall(MatSetOption(lGt, MAT_KEEP_NONZERO_PATTERN, PETSC_FALSE));
748 
749   /* Mark shared nodal dofs */
750   PetscCall(PetscBTCreate(nv, &btvi));
751   for (i = 0; i < nv; i++) {
752     if (vcount[i] > 1) PetscCall(PetscBTSet(btvi, i));
753   }
754 
755   if (matis->allow_repeated) {
756     PetscCall(PetscFree(eneighs[0]));
757     PetscCall(PetscFree(vneighs[0]));
758     PetscCall(PetscFree(eneighs));
759     PetscCall(PetscFree(vneighs));
760   }
761   PetscCall(ISLocalToGlobalMappingRestoreNodeInfo(el2g, NULL, &ecount, &eneighs));
762   PetscCall(ISLocalToGlobalMappingRestoreNodeInfo(vl2g, NULL, &vcount, &vneighs));
763 
764   /* communicate corners and splitpoints */
765   PetscCall(PetscMalloc1(nv, &vmarks));
766   PetscCall(PetscArrayzero(sfvleaves, nv));
767   PetscCall(PetscArrayzero(sfvroots, Lv));
768   for (i = 0; i < nv; i++)
769     if (PetscUnlikely(PetscBTLookup(btv, i))) sfvleaves[i] = 1;
770 
771   if (print) {
772     IS tbz;
773 
774     cum = 0;
775     for (i = 0; i < nv; i++)
776       if (sfvleaves[i]) vmarks[cum++] = i;
777 
778     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, cum, vmarks, PETSC_COPY_VALUES, &tbz));
779     PetscCall(PetscObjectSetName((PetscObject)tbz, "corners_to_be_zeroed_local"));
780     PetscCall(ISView(tbz, NULL));
781     PetscCall(ISDestroy(&tbz));
782   }
783 
784   PetscCall(PetscSFReduceBegin(sfv, MPIU_INT, sfvleaves, sfvroots, MPI_SUM));
785   PetscCall(PetscSFReduceEnd(sfv, MPIU_INT, sfvleaves, sfvroots, MPI_SUM));
786   PetscCall(PetscSFBcastBegin(sfv, MPIU_INT, sfvroots, sfvleaves, MPI_REPLACE));
787   PetscCall(PetscSFBcastEnd(sfv, MPIU_INT, sfvroots, sfvleaves, MPI_REPLACE));
788 
789   /* Zero rows of lGt corresponding to identified corners
790      and interior nodal dofs */
791   cum = 0;
792   for (i = 0; i < nv; i++) {
793     if (sfvleaves[i]) {
794       vmarks[cum++] = i;
795       PetscCall(PetscBTSet(btv, i));
796     } else if (!PetscBTLookup(btvi, i)) vmarks[cum++] = i;
797   }
798   PetscCall(PetscBTDestroy(&btvi));
799   if (print) {
800     IS tbz;
801 
802     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, cum, vmarks, PETSC_COPY_VALUES, &tbz));
803     PetscCall(PetscObjectSetName((PetscObject)tbz, "corners_to_be_zeroed_with_interior"));
804     PetscCall(ISView(tbz, NULL));
805     PetscCall(ISDestroy(&tbz));
806   }
807   PetscCall(MatZeroRows(lGt, cum, vmarks, 0., NULL, NULL));
808   PetscCall(PetscFree(vmarks));
809   PetscCall(PetscSFDestroy(&sfv));
810   PetscCall(PetscFree2(sfvleaves, sfvroots));
811 
812   /* Recompute G */
813   PetscCall(MatDestroy(&lG));
814   PetscCall(MatTranspose(lGt, MAT_INITIAL_MATRIX, &lG));
815   if (print) {
816     PetscCall(PetscObjectSetName((PetscObject)lG, "used_lG"));
817     PetscCall(MatView(lG, NULL));
818     PetscCall(PetscObjectSetName((PetscObject)lGt, "used_lGt"));
819     PetscCall(MatView(lGt, NULL));
820   }
821 
822   /* Get primal dofs (if any) */
823   cum = 0;
824   for (i = 0; i < ne; i++) {
825     if (PetscUnlikely(PetscBTLookup(bte, i))) marks[cum++] = i;
826   }
827   if (fl2g) PetscCall(ISLocalToGlobalMappingApply(fl2g, cum, marks, marks));
828   PetscCall(ISCreateGeneral(comm, cum, marks, PETSC_COPY_VALUES, &primals));
829   if (print) {
830     PetscCall(PetscObjectSetName((PetscObject)primals, "prescribed_primal_dofs"));
831     PetscCall(ISView(primals, NULL));
832   }
833   PetscCall(PetscBTDestroy(&bte));
834   /* TODO: what if the user passed in some of them ?  */
835   PetscCall(PCBDDCSetPrimalVerticesLocalIS(pc, primals));
836   PetscCall(ISDestroy(&primals));
837 
838   /* Compute edge connectivity */
839   PetscCall(PetscObjectSetOptionsPrefix((PetscObject)lG, "econn_"));
840 
841   /* Symbolic conn = lG*lGt */
842   if (!elements_corners) { /* if present, we assume we are in the element-by-element case and the CSR graph is not needed */
843     PetscCall(MatProductCreate(lG, lGt, NULL, &conn));
844     PetscCall(MatProductSetType(conn, MATPRODUCT_AB));
845     PetscCall(MatProductSetAlgorithm(conn, "default"));
846     PetscCall(MatProductSetFill(conn, PETSC_DEFAULT));
847     PetscCall(PetscObjectSetOptionsPrefix((PetscObject)conn, "econn_"));
848     PetscCall(MatProductSetFromOptions(conn));
849     PetscCall(MatProductSymbolic(conn));
850     PetscCall(MatGetRowIJ(conn, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
851     if (fl2g) {
852       PetscBT   btf;
853       PetscInt *iia, *jja, *iiu, *jju;
854       PetscBool rest = PETSC_FALSE, free = PETSC_FALSE;
855 
856       /* create CSR for all local dofs */
857       PetscCall(PetscMalloc1(n + 1, &iia));
858       if (pcbddc->mat_graph->nvtxs_csr) { /* the user has passed in a CSR graph */
859         PetscCheck(pcbddc->mat_graph->nvtxs_csr == n, PETSC_COMM_SELF, PETSC_ERR_USER, "Invalid size of CSR graph %" PetscInt_FMT ". Should be %" PetscInt_FMT, pcbddc->mat_graph->nvtxs_csr, n);
860         iiu = pcbddc->mat_graph->xadj;
861         jju = pcbddc->mat_graph->adjncy;
862       } else if (pcbddc->use_local_adj) {
863         rest = PETSC_TRUE;
864         PetscCall(MatGetRowIJ(matis->A, 0, PETSC_TRUE, PETSC_FALSE, &i, (const PetscInt **)&iiu, (const PetscInt **)&jju, &done));
865       } else {
866         free = PETSC_TRUE;
867         PetscCall(PetscMalloc2(n + 1, &iiu, n, &jju));
868         iiu[0] = 0;
869         for (i = 0; i < n; i++) {
870           iiu[i + 1] = i + 1;
871           jju[i]     = -1;
872         }
873       }
874 
875       /* import sizes of CSR */
876       iia[0] = 0;
877       for (i = 0; i < n; i++) iia[i + 1] = iiu[i + 1] - iiu[i];
878 
879       /* overwrite entries corresponding to the Nedelec field */
880       PetscCall(PetscBTCreate(n, &btf));
881       PetscCall(ISGetIndices(nedfieldlocal, &idxs));
882       for (i = 0; i < ne; i++) {
883         PetscCall(PetscBTSet(btf, idxs[i]));
884         iia[idxs[i] + 1] = ii[i + 1] - ii[i];
885       }
886 
887       /* iia in CSR */
888       for (i = 0; i < n; i++) iia[i + 1] += iia[i];
889 
890       /* jja in CSR */
891       PetscCall(PetscMalloc1(iia[n], &jja));
892       for (i = 0; i < n; i++)
893         if (!PetscBTLookup(btf, i))
894           for (j = 0; j < iiu[i + 1] - iiu[i]; j++) jja[iia[i] + j] = jju[iiu[i] + j];
895 
896       /* map edge dofs connectivity */
897       if (jj) {
898         PetscCall(ISLocalToGlobalMappingApply(fl2g, ii[ne], jj, (PetscInt *)jj));
899         for (i = 0; i < ne; i++) {
900           PetscInt e = idxs[i];
901           for (j = 0; j < ii[i + 1] - ii[i]; j++) jja[iia[e] + j] = jj[ii[i] + j];
902         }
903       }
904       PetscCall(ISRestoreIndices(nedfieldlocal, &idxs));
905       PetscCall(PCBDDCSetLocalAdjacencyGraph(pc, n, iia, jja, PETSC_COPY_VALUES));
906       if (rest) PetscCall(MatRestoreRowIJ(matis->A, 0, PETSC_TRUE, PETSC_FALSE, &i, (const PetscInt **)&iiu, (const PetscInt **)&jju, &done));
907       if (free) PetscCall(PetscFree2(iiu, jju));
908       PetscCall(PetscBTDestroy(&btf));
909     } else {
910       PetscCall(PCBDDCSetLocalAdjacencyGraph(pc, n, ii, jj, PETSC_COPY_VALUES));
911     }
912     PetscCall(MatRestoreRowIJ(conn, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
913     PetscCall(MatDestroy(&conn));
914   }
915 
916   /* Analyze interface for edge dofs */
917   PetscCall(PCBDDCAnalyzeInterface(pc));
918   pcbddc->mat_graph->twodim = PETSC_FALSE;
919 
920   /* Get coarse edges in the edge space */
921   PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, NULL, NULL, &nee, &alleedges, &allprimals));
922 
923   if (fl2g) {
924     PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_DROP, allprimals, &primals));
925     PetscCall(PetscMalloc1(nee, &eedges));
926     for (i = 0; i < nee; i++) PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_DROP, alleedges[i], &eedges[i]));
927   } else {
928     eedges  = alleedges;
929     primals = allprimals;
930   }
931 
932   /* Mark fine edge dofs with their coarse edge id */
933   PetscCall(PetscArrayzero(marks, ne));
934   PetscCall(ISGetLocalSize(primals, &cum));
935   PetscCall(ISGetIndices(primals, &idxs));
936   for (i = 0; i < cum; i++) marks[idxs[i]] = nee + 1;
937   PetscCall(ISRestoreIndices(primals, &idxs));
938   if (print) {
939     PetscCall(PetscObjectSetName((PetscObject)primals, "obtained_primal_dofs"));
940     PetscCall(ISView(primals, NULL));
941   }
942 
943   maxsize = 0;
944   for (i = 0; i < nee; i++) {
945     PetscInt size, mark = i + 1;
946 
947     PetscCall(ISGetLocalSize(eedges[i], &size));
948     PetscCall(ISGetIndices(eedges[i], &idxs));
949     for (j = 0; j < size; j++) marks[idxs[j]] = mark;
950     PetscCall(ISRestoreIndices(eedges[i], &idxs));
951     maxsize = PetscMax(maxsize, size);
952   }
953 
954   /* Find coarse edge endpoints */
955   PetscCall(MatGetRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
956   PetscCall(MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done));
957   for (i = 0; i < nee; i++) {
958     PetscInt mark = i + 1, size;
959 
960     PetscCall(ISGetLocalSize(eedges[i], &size));
961     if (!size && nedfieldlocal) continue;
962     PetscCheck(size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected zero sized edge %" PetscInt_FMT, i);
963     PetscCall(ISGetIndices(eedges[i], &idxs));
964     if (print) {
965       PetscCall(PetscPrintf(PETSC_COMM_SELF, "ENDPOINTS ANALYSIS EDGE %" PetscInt_FMT "\n", i));
966       PetscCall(ISView(eedges[i], NULL));
967     }
968     for (j = 0; j < size; j++) {
969       PetscInt k, ee = idxs[j];
970       if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "  idx %" PetscInt_FMT "\n", ee));
971       for (k = ii[ee]; k < ii[ee + 1]; k++) {
972         if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "    inspect %" PetscInt_FMT "\n", jj[k]));
973         if (PetscBTLookup(btv, jj[k])) {
974           if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "      corner found (already set) %" PetscInt_FMT "\n", jj[k]));
975         } else if (PetscBTLookup(btvcand, jj[k])) { /* is it ok? */
976           PetscInt  k2;
977           PetscBool corner = PETSC_FALSE;
978           for (k2 = iit[jj[k]]; k2 < iit[jj[k] + 1]; k2++) {
979             if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "        INSPECTING %" PetscInt_FMT ": mark %" PetscInt_FMT " (ref mark %" PetscInt_FMT "), boundary %d\n", jjt[k2], marks[jjt[k2]], mark, (int)!!PetscBTLookup(btb, jjt[k2])));
980             /* it's a corner if either is connected with an edge dof belonging to a different cc or
981                if the edge dof lie on the natural part of the boundary */
982             if ((marks[jjt[k2]] && marks[jjt[k2]] != mark) || (!marks[jjt[k2]] && PetscBTLookup(btb, jjt[k2]))) {
983               corner = PETSC_TRUE;
984               break;
985             }
986           }
987           if (corner) { /* found the nodal dof corresponding to the endpoint of the edge */
988             if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "        corner found %" PetscInt_FMT "\n", jj[k]));
989             PetscCall(PetscBTSet(btv, jj[k]));
990           } else {
991             if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "        no corners found\n"));
992           }
993         }
994       }
995     }
996     PetscCall(ISRestoreIndices(eedges[i], &idxs));
997   }
998   PetscCall(MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done));
999   PetscCall(MatRestoreRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
1000   PetscCall(PetscBTDestroy(&btb));
1001 
1002   /* Reset marked primal dofs */
1003   PetscCall(ISGetLocalSize(primals, &cum));
1004   PetscCall(ISGetIndices(primals, &idxs));
1005   for (i = 0; i < cum; i++) marks[idxs[i]] = 0;
1006   PetscCall(ISRestoreIndices(primals, &idxs));
1007 
1008   /* Now use the initial lG */
1009   PetscCall(MatDestroy(&lG));
1010   PetscCall(MatDestroy(&lGt));
1011   lG = lGinit;
1012   PetscCall(MatTranspose(lG, MAT_INITIAL_MATRIX, &lGt));
1013 
1014   /* Compute extended cols indices */
1015   PetscCall(PetscBTCreate(nv, &btvc));
1016   PetscCall(PetscBTCreate(nee, &bter));
1017   PetscCall(MatGetRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
1018   PetscCall(MatSeqAIJGetMaxRowNonzeros(lG, &i));
1019   i *= maxsize;
1020   PetscCall(PetscCalloc1(nee, &extcols));
1021   PetscCall(PetscMalloc2(i, &extrow, i, &gidxs));
1022   eerr = PETSC_FALSE;
1023   for (i = 0; i < nee; i++) {
1024     PetscInt size, found = 0;
1025 
1026     cum = 0;
1027     PetscCall(ISGetLocalSize(eedges[i], &size));
1028     if (!size && nedfieldlocal) continue;
1029     PetscCheck(size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected zero sized edge %" PetscInt_FMT, i);
1030     PetscCall(ISGetIndices(eedges[i], &idxs));
1031     PetscCall(PetscBTMemzero(nv, btvc));
1032     for (j = 0; j < size; j++) {
1033       PetscInt k, ee = idxs[j];
1034       for (k = ii[ee]; k < ii[ee + 1]; k++) {
1035         PetscInt vv = jj[k];
1036         if (!PetscBTLookup(btv, vv)) extrow[cum++] = vv;
1037         else if (!PetscBTLookupSet(btvc, vv)) found++;
1038       }
1039     }
1040     PetscCall(ISRestoreIndices(eedges[i], &idxs));
1041     PetscCall(PetscSortRemoveDupsInt(&cum, extrow));
1042     PetscCall(ISLocalToGlobalMappingApply(vl2g, cum, extrow, gidxs));
1043     PetscCall(PetscSortIntWithArray(cum, gidxs, extrow));
1044     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, cum, extrow, PETSC_COPY_VALUES, &extcols[i]));
1045     /* it may happen that endpoints are not defined at this point
1046        if it is the case, mark this edge for a second pass */
1047     if (cum != size - 1 || found != 2) {
1048       PetscCall(PetscBTSet(bter, i));
1049       if (print) {
1050         PetscCall(PetscObjectSetName((PetscObject)eedges[i], "error_edge"));
1051         PetscCall(ISView(eedges[i], NULL));
1052         PetscCall(PetscObjectSetName((PetscObject)extcols[i], "error_extcol"));
1053         PetscCall(ISView(extcols[i], NULL));
1054       }
1055       eerr = PETSC_TRUE;
1056     }
1057   }
1058   /* PetscCheck(!eerr,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL FIRST PASS"); */
1059   PetscCallMPI(MPIU_Allreduce(&eerr, &done, 1, MPIU_BOOL, MPI_LOR, comm));
1060   if (done) {
1061     PetscInt *newprimals;
1062 
1063     PetscCall(PetscMalloc1(ne, &newprimals));
1064     PetscCall(ISGetLocalSize(primals, &cum));
1065     PetscCall(ISGetIndices(primals, &idxs));
1066     PetscCall(PetscArraycpy(newprimals, idxs, cum));
1067     PetscCall(ISRestoreIndices(primals, &idxs));
1068     PetscCall(MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done));
1069     if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "DOING SECOND PASS (eerr %s)\n", PetscBools[eerr]));
1070     for (i = 0; i < nee; i++) {
1071       PetscBool has_candidates = PETSC_FALSE;
1072       if (PetscBTLookup(bter, i)) {
1073         PetscInt size, mark = i + 1;
1074 
1075         PetscCall(ISGetLocalSize(eedges[i], &size));
1076         PetscCall(ISGetIndices(eedges[i], &idxs));
1077         /* for (j=0;j<size;j++) newprimals[cum++] = idxs[j]; */
1078         for (j = 0; j < size; j++) {
1079           PetscInt k, ee = idxs[j];
1080           if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "Inspecting edge dof %" PetscInt_FMT " [%" PetscInt_FMT " %" PetscInt_FMT ")\n", ee, ii[ee], ii[ee + 1]));
1081           for (k = ii[ee]; k < ii[ee + 1]; k++) {
1082             /* set all candidates located on the edge as corners */
1083             if (PetscBTLookup(btvcand, jj[k])) {
1084               PetscInt k2, vv = jj[k];
1085               has_candidates = PETSC_TRUE;
1086               if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "  Candidate set to vertex %" PetscInt_FMT "\n", vv));
1087               PetscCall(PetscBTSet(btv, vv));
1088               /* set all edge dofs connected to candidate as primals */
1089               for (k2 = iit[vv]; k2 < iit[vv + 1]; k2++) {
1090                 if (marks[jjt[k2]] == mark) {
1091                   PetscInt k3, ee2 = jjt[k2];
1092                   if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "    Connected edge dof set to primal %" PetscInt_FMT "\n", ee2));
1093                   newprimals[cum++] = ee2;
1094                   /* finally set the new corners */
1095                   for (k3 = ii[ee2]; k3 < ii[ee2 + 1]; k3++) {
1096                     if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "      Connected nodal dof set to vertex %" PetscInt_FMT "\n", jj[k3]));
1097                     PetscCall(PetscBTSet(btv, jj[k3]));
1098                   }
1099                 }
1100               }
1101             } else {
1102               if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "  Not a candidate vertex %" PetscInt_FMT "\n", jj[k]));
1103             }
1104           }
1105         }
1106         if (!has_candidates) { /* circular edge */
1107           PetscInt k, ee = idxs[0], *tmarks;
1108 
1109           PetscCall(PetscCalloc1(ne, &tmarks));
1110           if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "  Circular edge %" PetscInt_FMT "\n", i));
1111           for (k = ii[ee]; k < ii[ee + 1]; k++) {
1112             PetscInt k2;
1113             if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "    Set to corner %" PetscInt_FMT "\n", jj[k]));
1114             PetscCall(PetscBTSet(btv, jj[k]));
1115             for (k2 = iit[jj[k]]; k2 < iit[jj[k] + 1]; k2++) tmarks[jjt[k2]]++;
1116           }
1117           for (j = 0; j < size; j++) {
1118             if (tmarks[idxs[j]] > 1) {
1119               if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "  Edge dof set to primal %" PetscInt_FMT "\n", idxs[j]));
1120               newprimals[cum++] = idxs[j];
1121             }
1122           }
1123           PetscCall(PetscFree(tmarks));
1124         }
1125         PetscCall(ISRestoreIndices(eedges[i], &idxs));
1126       }
1127       PetscCall(ISDestroy(&extcols[i]));
1128     }
1129     PetscCall(PetscFree(extcols));
1130     PetscCall(MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done));
1131     PetscCall(PetscSortRemoveDupsInt(&cum, newprimals));
1132     if (fl2g) {
1133       PetscCall(ISLocalToGlobalMappingApply(fl2g, cum, newprimals, newprimals));
1134       PetscCall(ISDestroy(&primals));
1135       for (i = 0; i < nee; i++) PetscCall(ISDestroy(&eedges[i]));
1136       PetscCall(PetscFree(eedges));
1137     }
1138     PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, NULL, NULL, &nee, &alleedges, &allprimals));
1139     PetscCall(ISCreateGeneral(comm, cum, newprimals, PETSC_COPY_VALUES, &primals));
1140     PetscCall(PetscFree(newprimals));
1141     PetscCall(PCBDDCSetPrimalVerticesLocalIS(pc, primals));
1142     PetscCall(ISDestroy(&primals));
1143     PetscCall(PCBDDCAnalyzeInterface(pc));
1144     pcbddc->mat_graph->twodim = PETSC_FALSE;
1145     PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, NULL, NULL, &nee, &alleedges, &allprimals));
1146     if (fl2g) {
1147       PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_DROP, allprimals, &primals));
1148       PetscCall(PetscMalloc1(nee, &eedges));
1149       for (i = 0; i < nee; i++) PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_DROP, alleedges[i], &eedges[i]));
1150     } else {
1151       eedges  = alleedges;
1152       primals = allprimals;
1153     }
1154     PetscCall(PetscCalloc1(nee, &extcols));
1155 
1156     /* Mark again */
1157     PetscCall(PetscArrayzero(marks, ne));
1158     for (i = 0; i < nee; i++) {
1159       PetscInt size, mark = i + 1;
1160 
1161       PetscCall(ISGetLocalSize(eedges[i], &size));
1162       PetscCall(ISGetIndices(eedges[i], &idxs));
1163       for (j = 0; j < size; j++) marks[idxs[j]] = mark;
1164       PetscCall(ISRestoreIndices(eedges[i], &idxs));
1165     }
1166     if (print) {
1167       PetscCall(PetscObjectSetName((PetscObject)primals, "obtained_primal_dofs_secondpass"));
1168       PetscCall(ISView(primals, NULL));
1169     }
1170 
1171     /* Recompute extended cols */
1172     eerr = PETSC_FALSE;
1173     for (i = 0; i < nee; i++) {
1174       PetscInt size;
1175 
1176       cum = 0;
1177       PetscCall(ISGetLocalSize(eedges[i], &size));
1178       if (!size && nedfieldlocal) continue;
1179       PetscCheck(size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected zero sized edge %" PetscInt_FMT, i);
1180       PetscCall(ISGetIndices(eedges[i], &idxs));
1181       for (j = 0; j < size; j++) {
1182         PetscInt k, ee = idxs[j];
1183         for (k = ii[ee]; k < ii[ee + 1]; k++)
1184           if (!PetscBTLookup(btv, jj[k])) extrow[cum++] = jj[k];
1185       }
1186       PetscCall(ISRestoreIndices(eedges[i], &idxs));
1187       PetscCall(PetscSortRemoveDupsInt(&cum, extrow));
1188       PetscCall(ISLocalToGlobalMappingApply(vl2g, cum, extrow, gidxs));
1189       PetscCall(PetscSortIntWithArray(cum, gidxs, extrow));
1190       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, cum, extrow, PETSC_COPY_VALUES, &extcols[i]));
1191       if (cum != size - 1) {
1192         if (print) {
1193           PetscCall(PetscObjectSetName((PetscObject)eedges[i], "error_edge_secondpass"));
1194           PetscCall(ISView(eedges[i], NULL));
1195           PetscCall(PetscObjectSetName((PetscObject)extcols[i], "error_extcol_secondpass"));
1196           PetscCall(ISView(extcols[i], NULL));
1197         }
1198         eerr = PETSC_TRUE;
1199       }
1200     }
1201   }
1202   PetscCall(MatRestoreRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
1203   PetscCall(PetscFree2(extrow, gidxs));
1204   PetscCall(PetscBTDestroy(&bter));
1205   if (print) PetscCall(PCBDDCGraphASCIIView(pcbddc->mat_graph, 5, PETSC_VIEWER_STDOUT_SELF));
1206   /* an error should not occur at this point */
1207   PetscCheck(!eerr, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected SIZE OF EDGE > EXTCOL SECOND PASS");
1208 
1209   /* Check the number of endpoints */
1210   PetscCall(MatGetRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
1211   PetscCall(PetscMalloc1(2 * nee, &corners));
1212   PetscCall(PetscMalloc1(nee, &cedges));
1213   for (i = 0; i < nee; i++) {
1214     PetscInt size, found = 0, gc[2];
1215 
1216     /* init with defaults */
1217     cedges[i] = corners[i * 2] = corners[i * 2 + 1] = -1;
1218     PetscCall(ISGetLocalSize(eedges[i], &size));
1219     if (!size && nedfieldlocal) continue;
1220     PetscCheck(size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected zero sized edge %" PetscInt_FMT, i);
1221     PetscCall(ISGetIndices(eedges[i], &idxs));
1222     PetscCall(PetscBTMemzero(nv, btvc));
1223     for (j = 0; j < size; j++) {
1224       PetscInt k, ee = idxs[j];
1225       for (k = ii[ee]; k < ii[ee + 1]; k++) {
1226         PetscInt vv = jj[k];
1227         if (PetscBTLookup(btv, vv) && !PetscBTLookupSet(btvc, vv)) {
1228           PetscCheck(found != 2, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Found more than two corners for edge %" PetscInt_FMT, i);
1229           corners[i * 2 + found++] = vv;
1230         }
1231       }
1232     }
1233     if (found != 2) {
1234       PetscInt e;
1235       if (fl2g) {
1236         PetscCall(ISLocalToGlobalMappingApply(fl2g, 1, idxs, &e));
1237       } else {
1238         e = idxs[0];
1239       }
1240       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_PLIB, "Found %" PetscInt_FMT " corners for edge %" PetscInt_FMT " (astart %" PetscInt_FMT ", estart %" PetscInt_FMT ")", found, i, e, idxs[0]);
1241     }
1242 
1243     /* get primal dof index on this coarse edge */
1244     PetscCall(ISLocalToGlobalMappingApply(vl2g, 2, corners + 2 * i, gc));
1245     if (gc[0] > gc[1]) {
1246       PetscInt swap      = corners[2 * i];
1247       corners[2 * i]     = corners[2 * i + 1];
1248       corners[2 * i + 1] = swap;
1249     }
1250     cedges[i] = idxs[size - 1];
1251     PetscCall(ISRestoreIndices(eedges[i], &idxs));
1252     if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "EDGE %" PetscInt_FMT ": ce %" PetscInt_FMT ", corners (%" PetscInt_FMT ",%" PetscInt_FMT ")\n", i, cedges[i], corners[2 * i], corners[2 * i + 1]));
1253   }
1254   PetscCall(MatRestoreRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
1255   PetscCall(PetscBTDestroy(&btvc));
1256 
1257   if (PetscDefined(USE_DEBUG)) {
1258     /* Inspects columns of lG (rows of lGt) and make sure the change of basis will
1259      not interfere with neighbouring coarse edges */
1260     PetscCall(PetscMalloc1(nee + 1, &emarks));
1261     PetscCall(MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
1262     for (i = 0; i < nv; i++) {
1263       PetscInt emax = 0, eemax = 0;
1264 
1265       if (ii[i + 1] == ii[i] || PetscBTLookup(btv, i)) continue;
1266       PetscCall(PetscArrayzero(emarks, nee + 1));
1267       for (j = ii[i]; j < ii[i + 1]; j++) emarks[marks[jj[j]]]++;
1268       for (j = 1; j < nee + 1; j++) {
1269         if (emax < emarks[j]) {
1270           emax  = emarks[j];
1271           eemax = j;
1272         }
1273       }
1274       /* not relevant for edges */
1275       if (!eemax) continue;
1276 
1277       for (j = ii[i]; j < ii[i + 1]; j++) {
1278         PetscCheck(!marks[jj[j]] || marks[jj[j]] == eemax, PETSC_COMM_SELF, PETSC_ERR_SUP, "Found 2 coarse edges (id %" PetscInt_FMT " and %" PetscInt_FMT ") connected through the %" PetscInt_FMT " nodal dof at edge dof %" PetscInt_FMT, marks[jj[j]] - 1, eemax, i, jj[j]);
1279       }
1280     }
1281     PetscCall(PetscFree(emarks));
1282     PetscCall(MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
1283   }
1284 
1285   /* Compute extended rows indices for edge blocks of the change of basis */
1286   PetscCall(MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
1287   PetscCall(MatSeqAIJGetMaxRowNonzeros(lGt, &extmem));
1288   extmem *= maxsize;
1289   PetscCall(PetscMalloc1(extmem * nee, &extrow));
1290   PetscCall(PetscMalloc1(nee, &extrows));
1291   PetscCall(PetscCalloc1(nee, &extrowcum));
1292   for (i = 0; i < nv; i++) {
1293     PetscInt mark = 0, size, start;
1294 
1295     if (ii[i + 1] == ii[i] || PetscBTLookup(btv, i)) continue;
1296     for (j = ii[i]; j < ii[i + 1]; j++)
1297       if (marks[jj[j]] && !mark) mark = marks[jj[j]];
1298 
1299     /* not relevant */
1300     if (!mark) continue;
1301 
1302     /* import extended row */
1303     mark--;
1304     start = mark * extmem + extrowcum[mark];
1305     size  = ii[i + 1] - ii[i];
1306     PetscCheck(extrowcum[mark] + size <= extmem, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Not enough memory allocated %" PetscInt_FMT " > %" PetscInt_FMT, extrowcum[mark] + size, extmem);
1307     PetscCall(PetscArraycpy(extrow + start, jj + ii[i], size));
1308     extrowcum[mark] += size;
1309   }
1310   PetscCall(MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
1311   PetscCall(MatDestroy(&lGt));
1312   PetscCall(PetscFree(marks));
1313 
1314   /* Compress extrows */
1315   cum = 0;
1316   for (i = 0; i < nee; i++) {
1317     PetscInt size = extrowcum[i], *start = extrow + i * extmem;
1318     PetscCall(PetscSortRemoveDupsInt(&size, start));
1319     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, size, start, PETSC_USE_POINTER, &extrows[i]));
1320     cum = PetscMax(cum, size);
1321   }
1322   PetscCall(PetscFree(extrowcum));
1323   PetscCall(PetscBTDestroy(&btv));
1324   PetscCall(PetscBTDestroy(&btvcand));
1325 
1326   /* Workspace for lapack inner calls and VecSetValues */
1327   PetscCall(PetscMalloc2((5 + cum + maxsize) * maxsize, &work, maxsize, &rwork));
1328 
1329   /* Create change of basis matrix (preallocation can be improved) */
1330   PetscCall(MatCreate(comm, &T));
1331   PetscCall(MatSetLayouts(T, pc->mat->rmap, pc->mat->cmap));
1332   PetscCall(MatSetType(T, MATAIJ));
1333   PetscCall(MatSeqAIJSetPreallocation(T, maxsize, NULL));
1334   PetscCall(MatMPIAIJSetPreallocation(T, maxsize, NULL, maxsize, NULL));
1335   PetscCall(MatSetLocalToGlobalMapping(T, al2g, al2g));
1336   PetscCall(MatSetOption(T, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE));
1337   PetscCall(MatSetOption(T, MAT_ROW_ORIENTED, PETSC_FALSE));
1338   PetscCall(ISLocalToGlobalMappingDestroy(&al2g));
1339 
1340   /* Defaults to identity */
1341   for (i = pc->mat->rmap->rstart; i < pc->mat->rmap->rend; i++) PetscCall(MatSetValue(T, i, i, 1.0, INSERT_VALUES));
1342 
1343   /* Create discrete gradient for the coarser level if needed */
1344   PetscCall(MatDestroy(&pcbddc->nedcG));
1345   PetscCall(ISDestroy(&pcbddc->nedclocal));
1346   if (pcbddc->current_level < pcbddc->max_levels) {
1347     ISLocalToGlobalMapping cel2g, cvl2g;
1348     IS                     wis, gwis;
1349     PetscInt               cnv, cne;
1350 
1351     PetscCall(ISCreateGeneral(comm, nee, cedges, PETSC_COPY_VALUES, &wis));
1352     if (fl2g) {
1353       PetscCall(ISLocalToGlobalMappingApplyIS(fl2g, wis, &pcbddc->nedclocal));
1354     } else {
1355       PetscCall(PetscObjectReference((PetscObject)wis));
1356       pcbddc->nedclocal = wis;
1357     }
1358     PetscCall(ISLocalToGlobalMappingApplyIS(el2g, wis, &gwis));
1359     PetscCall(ISDestroy(&wis));
1360     PetscCall(ISRenumber(gwis, NULL, &cne, &wis));
1361     PetscCall(ISLocalToGlobalMappingCreateIS(wis, &cel2g));
1362     PetscCall(ISDestroy(&wis));
1363     PetscCall(ISDestroy(&gwis));
1364 
1365     PetscCall(ISCreateGeneral(comm, 2 * nee, corners, PETSC_USE_POINTER, &wis));
1366     PetscCall(ISLocalToGlobalMappingApplyIS(vl2g, wis, &gwis));
1367     PetscCall(ISDestroy(&wis));
1368     PetscCall(ISRenumber(gwis, NULL, &cnv, &wis));
1369     PetscCall(ISLocalToGlobalMappingCreateIS(wis, &cvl2g));
1370     PetscCall(ISDestroy(&wis));
1371     PetscCall(ISDestroy(&gwis));
1372 
1373     PetscCall(MatCreate(comm, &pcbddc->nedcG));
1374     PetscCall(MatSetSizes(pcbddc->nedcG, PETSC_DECIDE, PETSC_DECIDE, cne, cnv));
1375     PetscCall(MatSetType(pcbddc->nedcG, MATAIJ));
1376     PetscCall(MatSeqAIJSetPreallocation(pcbddc->nedcG, 2, NULL));
1377     PetscCall(MatMPIAIJSetPreallocation(pcbddc->nedcG, 2, NULL, 2, NULL));
1378     PetscCall(MatSetLocalToGlobalMapping(pcbddc->nedcG, cel2g, cvl2g));
1379     PetscCall(ISLocalToGlobalMappingDestroy(&cel2g));
1380     PetscCall(ISLocalToGlobalMappingDestroy(&cvl2g));
1381   }
1382   PetscCall(ISLocalToGlobalMappingDestroy(&vl2g));
1383 
1384 #if defined(PRINT_GDET)
1385   inc = 0;
1386   lev = pcbddc->current_level;
1387 #endif
1388 
1389   /* Insert values in the change of basis matrix */
1390   for (i = 0; i < nee; i++) {
1391     Mat         Gins = NULL, GKins = NULL;
1392     IS          cornersis = NULL;
1393     PetscScalar cvals[2];
1394 
1395     if (pcbddc->nedcG) PetscCall(ISCreateGeneral(PETSC_COMM_SELF, 2, corners + 2 * i, PETSC_USE_POINTER, &cornersis));
1396     PetscCall(PCBDDCComputeNedelecChangeEdge(lG, eedges[i], extrows[i], extcols[i], cornersis, &Gins, &GKins, cvals, work, rwork));
1397     if (Gins && GKins) {
1398       const PetscScalar *data;
1399       const PetscInt    *rows, *cols;
1400       PetscInt           nrh, nch, nrc, ncc;
1401 
1402       PetscCall(ISGetIndices(eedges[i], &cols));
1403       /* H1 */
1404       PetscCall(ISGetIndices(extrows[i], &rows));
1405       PetscCall(MatGetSize(Gins, &nrh, &nch));
1406       PetscCall(MatDenseGetArrayRead(Gins, &data));
1407       PetscCall(MatSetValuesLocal(T, nrh, rows, nch, cols, data, INSERT_VALUES));
1408       PetscCall(MatDenseRestoreArrayRead(Gins, &data));
1409       PetscCall(ISRestoreIndices(extrows[i], &rows));
1410       /* complement */
1411       PetscCall(MatGetSize(GKins, &nrc, &ncc));
1412       PetscCheck(ncc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Constant function has not been generated for coarse edge %" PetscInt_FMT, i);
1413       PetscCheck(ncc + nch == nrc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "The sum of the number of columns of GKins %" PetscInt_FMT " and Gins %" PetscInt_FMT " does not match %" PetscInt_FMT " for coarse edge %" PetscInt_FMT, ncc, nch, nrc, i);
1414       PetscCheck(ncc == 1 || !pcbddc->nedcG, PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot generate the coarse discrete gradient for coarse edge %" PetscInt_FMT " with ncc %" PetscInt_FMT, i, ncc);
1415       PetscCall(MatDenseGetArrayRead(GKins, &data));
1416       PetscCall(MatSetValuesLocal(T, nrc, cols, ncc, cols + nch, data, INSERT_VALUES));
1417       PetscCall(MatDenseRestoreArrayRead(GKins, &data));
1418 
1419       /* coarse discrete gradient */
1420       if (pcbddc->nedcG) {
1421         PetscInt cols[2];
1422 
1423         cols[0] = 2 * i;
1424         cols[1] = 2 * i + 1;
1425         PetscCall(MatSetValuesLocal(pcbddc->nedcG, 1, &i, 2, cols, cvals, INSERT_VALUES));
1426       }
1427       PetscCall(ISRestoreIndices(eedges[i], &cols));
1428     }
1429     PetscCall(ISDestroy(&extrows[i]));
1430     PetscCall(ISDestroy(&extcols[i]));
1431     PetscCall(ISDestroy(&cornersis));
1432     PetscCall(MatDestroy(&Gins));
1433     PetscCall(MatDestroy(&GKins));
1434   }
1435 
1436   /* for FDM element-by-element: first dof on the edge only constraint. Why? */
1437   if (elements_corners && pcbddc->mat_graph->multi_element) {
1438     ISLocalToGlobalMapping map;
1439     MatNullSpace           nnsp;
1440     Vec                    quad_vec;
1441 
1442     PetscCall(MatCreateVecs(pc->pmat, &quad_vec, NULL));
1443     PetscCall(PCBDDCNullSpaceCreate(PetscObjectComm((PetscObject)pc), PETSC_FALSE, 1, &quad_vec, &nnsp));
1444     PetscCall(VecLockReadPop(quad_vec));
1445     PetscCall(MatISGetLocalToGlobalMapping(pc->pmat, &map, NULL));
1446     PetscCall(VecSetLocalToGlobalMapping(quad_vec, map));
1447     for (i = 0; i < nee; i++) {
1448       const PetscInt *idxs;
1449       PetscScalar     one = 1.0;
1450 
1451       PetscCall(ISGetLocalSize(alleedges[i], &cum));
1452       if (!cum) continue;
1453       PetscCall(ISGetIndices(alleedges[i], &idxs));
1454       PetscCall(VecSetValuesLocal(quad_vec, 1, idxs, &one, INSERT_VALUES));
1455       PetscCall(ISRestoreIndices(alleedges[i], &idxs));
1456     }
1457     PetscCall(VecLockReadPush(quad_vec));
1458     PetscCall(VecDestroy(&quad_vec));
1459     PetscCall(MatSetNearNullSpace(pc->pmat, nnsp));
1460     PetscCall(MatNullSpaceDestroy(&nnsp));
1461   }
1462   PetscCall(ISLocalToGlobalMappingDestroy(&el2g));
1463 
1464   /* Start assembling */
1465   PetscCall(MatAssemblyBegin(T, MAT_FINAL_ASSEMBLY));
1466   if (pcbddc->nedcG) PetscCall(MatAssemblyBegin(pcbddc->nedcG, MAT_FINAL_ASSEMBLY));
1467 
1468   /* Free */
1469   if (fl2g) {
1470     PetscCall(ISDestroy(&primals));
1471     for (i = 0; i < nee; i++) PetscCall(ISDestroy(&eedges[i]));
1472     PetscCall(PetscFree(eedges));
1473   }
1474 
1475   /* hack mat_graph with primal dofs on the coarse edges */
1476   {
1477     PCBDDCGraph graph  = pcbddc->mat_graph;
1478     PetscInt   *oqueue = graph->queue;
1479     PetscInt   *ocptr  = graph->cptr;
1480     PetscInt    ncc, *idxs;
1481 
1482     /* find first primal edge */
1483     if (pcbddc->nedclocal) {
1484       PetscCall(ISGetIndices(pcbddc->nedclocal, (const PetscInt **)&idxs));
1485     } else {
1486       if (fl2g) PetscCall(ISLocalToGlobalMappingApply(fl2g, nee, cedges, cedges));
1487       idxs = cedges;
1488     }
1489     cum = 0;
1490     while (cum < nee && cedges[cum] < 0) cum++;
1491 
1492     /* adapt connected components */
1493     PetscCall(PetscMalloc2(graph->nvtxs + 1, &graph->cptr, ocptr[graph->ncc], &graph->queue));
1494     graph->cptr[0] = 0;
1495     for (i = 0, ncc = 0; i < graph->ncc; i++) {
1496       PetscInt lc = ocptr[i + 1] - ocptr[i];
1497       if (cum != nee && oqueue[ocptr[i + 1] - 1] == cedges[cum]) { /* this cc has a primal dof */
1498         graph->cptr[ncc + 1]           = graph->cptr[ncc] + 1;
1499         graph->queue[graph->cptr[ncc]] = cedges[cum];
1500         ncc++;
1501         lc--;
1502         cum++;
1503         while (cum < nee && cedges[cum] < 0) cum++;
1504       }
1505       graph->cptr[ncc + 1] = graph->cptr[ncc] + lc;
1506       for (j = 0; j < lc; j++) graph->queue[graph->cptr[ncc] + j] = oqueue[ocptr[i] + j];
1507       ncc++;
1508     }
1509     graph->ncc = ncc;
1510     if (pcbddc->nedclocal) PetscCall(ISRestoreIndices(pcbddc->nedclocal, (const PetscInt **)&idxs));
1511     PetscCall(PetscFree2(ocptr, oqueue));
1512   }
1513   PetscCall(ISLocalToGlobalMappingDestroy(&fl2g));
1514   PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, NULL, NULL, &nee, &alleedges, &allprimals));
1515   PetscCall(PCBDDCGraphResetCSR(pcbddc->mat_graph));
1516 
1517   PetscCall(ISDestroy(&nedfieldlocal));
1518   PetscCall(PetscFree(extrow));
1519   PetscCall(PetscFree2(work, rwork));
1520   PetscCall(PetscFree(corners));
1521   PetscCall(PetscFree(cedges));
1522   PetscCall(PetscFree(extrows));
1523   PetscCall(PetscFree(extcols));
1524   PetscCall(MatDestroy(&lG));
1525 
1526   /* Complete assembling */
1527   PetscCall(MatAssemblyEnd(T, MAT_FINAL_ASSEMBLY));
1528   PetscCall(MatViewFromOptions(T, (PetscObject)pc, "-pc_bddc_nedelec_change_view"));
1529   if (pcbddc->nedcG) {
1530     PetscCall(MatAssemblyEnd(pcbddc->nedcG, MAT_FINAL_ASSEMBLY));
1531     PetscCall(MatViewFromOptions(pcbddc->nedcG, (PetscObject)pc, "-pc_bddc_nedelec_coarse_change_view"));
1532   }
1533 
1534   PetscCall(ISDestroy(&elements_corners));
1535 
1536   /* set change of basis */
1537   PetscCall(PCBDDCSetChangeOfBasisMat(pc, T, PETSC_FALSE));
1538   PetscCall(MatDestroy(&T));
1539   PetscFunctionReturn(PETSC_SUCCESS);
1540 }
1541 
1542 /* the near-null space of BDDC carries information on quadrature weights,
1543    and these can be collinear -> so cheat with MatNullSpaceCreate
1544    and create a suitable set of basis vectors first */
1545 PetscErrorCode PCBDDCNullSpaceCreate(MPI_Comm comm, PetscBool has_const, PetscInt nvecs, Vec quad_vecs[], MatNullSpace *nnsp)
1546 {
1547   PetscInt i;
1548 
1549   PetscFunctionBegin;
1550   for (i = 0; i < nvecs; i++) {
1551     PetscInt first, last;
1552 
1553     PetscCall(VecGetOwnershipRange(quad_vecs[i], &first, &last));
1554     PetscCheck(last - first >= 2 * nvecs || !has_const, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not implemented");
1555     if (i >= first && i < last) {
1556       PetscScalar *data;
1557       PetscCall(VecGetArray(quad_vecs[i], &data));
1558       if (!has_const) {
1559         data[i - first] = 1.;
1560       } else {
1561         data[2 * i - first]     = 1. / PetscSqrtReal(2.);
1562         data[2 * i - first + 1] = -1. / PetscSqrtReal(2.);
1563       }
1564       PetscCall(VecRestoreArray(quad_vecs[i], &data));
1565     }
1566     PetscCall(PetscObjectStateIncrease((PetscObject)quad_vecs[i]));
1567   }
1568   PetscCall(MatNullSpaceCreate(comm, has_const, nvecs, quad_vecs, nnsp));
1569   for (i = 0; i < nvecs; i++) { /* reset vectors */
1570     PetscInt first, last;
1571     PetscCall(VecLockReadPop(quad_vecs[i]));
1572     PetscCall(VecGetOwnershipRange(quad_vecs[i], &first, &last));
1573     if (i >= first && i < last) {
1574       PetscScalar *data;
1575       PetscCall(VecGetArray(quad_vecs[i], &data));
1576       if (!has_const) {
1577         data[i - first] = 0.;
1578       } else {
1579         data[2 * i - first]     = 0.;
1580         data[2 * i - first + 1] = 0.;
1581       }
1582       PetscCall(VecRestoreArray(quad_vecs[i], &data));
1583     }
1584     PetscCall(PetscObjectStateIncrease((PetscObject)quad_vecs[i]));
1585     PetscCall(VecLockReadPush(quad_vecs[i]));
1586   }
1587   PetscFunctionReturn(PETSC_SUCCESS);
1588 }
1589 
1590 PetscErrorCode PCBDDCComputeNoNetFlux(Mat A, Mat divudotp, PetscBool transpose, IS vl2l, PCBDDCGraph graph, MatNullSpace *nnsp)
1591 {
1592   Mat                    loc_divudotp;
1593   Vec                    p, v, quad_vec;
1594   ISLocalToGlobalMapping map;
1595   PetscScalar           *array;
1596 
1597   PetscFunctionBegin;
1598   PetscCall(MatCreateVecs(A, &quad_vec, NULL));
1599   if (!transpose) {
1600     PetscCall(MatISGetLocalToGlobalMapping(A, &map, NULL));
1601   } else {
1602     PetscCall(MatISGetLocalToGlobalMapping(A, NULL, &map));
1603   }
1604   PetscCall(PCBDDCNullSpaceCreate(PetscObjectComm((PetscObject)A), PETSC_FALSE, 1, &quad_vec, nnsp));
1605   PetscCall(VecLockReadPop(quad_vec));
1606   PetscCall(VecSetLocalToGlobalMapping(quad_vec, map));
1607 
1608   /* compute local quad vec */
1609   PetscCall(MatISGetLocalMat(divudotp, &loc_divudotp));
1610   if (!transpose) {
1611     PetscCall(MatCreateVecs(loc_divudotp, &v, &p));
1612   } else {
1613     PetscCall(MatCreateVecs(loc_divudotp, &p, &v));
1614   }
1615   /* the assumption here is that the constant vector interpolates the constant on the L2 conforming space */
1616   PetscCall(VecSet(p, 1.));
1617   if (!transpose) {
1618     PetscCall(MatMultTranspose(loc_divudotp, p, v));
1619   } else {
1620     PetscCall(MatMult(loc_divudotp, p, v));
1621   }
1622   PetscCall(VecDestroy(&p));
1623   if (vl2l) {
1624     Mat        lA;
1625     VecScatter sc;
1626     Vec        vins;
1627 
1628     PetscCall(MatISGetLocalMat(A, &lA));
1629     PetscCall(MatCreateVecs(lA, &vins, NULL));
1630     PetscCall(VecScatterCreate(v, NULL, vins, vl2l, &sc));
1631     PetscCall(VecScatterBegin(sc, v, vins, INSERT_VALUES, SCATTER_FORWARD));
1632     PetscCall(VecScatterEnd(sc, v, vins, INSERT_VALUES, SCATTER_FORWARD));
1633     PetscCall(VecScatterDestroy(&sc));
1634     PetscCall(VecDestroy(&v));
1635     v = vins;
1636   }
1637 
1638   /* mask summation of interface values */
1639   PetscInt        n, *mmask, *mask, *idxs, nmr, nr;
1640   const PetscInt *degree;
1641   PetscSF         msf;
1642 
1643   PetscCall(VecGetLocalSize(v, &n));
1644   PetscCall(PetscSFGetGraph(graph->interface_subset_sf, &nr, NULL, NULL, NULL));
1645   PetscCall(PetscSFGetMultiSF(graph->interface_subset_sf, &msf));
1646   PetscCall(PetscSFGetGraph(msf, &nmr, NULL, NULL, NULL));
1647   PetscCall(PetscCalloc3(nmr, &mmask, n, &mask, n, &idxs));
1648   PetscCall(PetscSFComputeDegreeBegin(graph->interface_subset_sf, &degree));
1649   PetscCall(PetscSFComputeDegreeEnd(graph->interface_subset_sf, &degree));
1650   for (PetscInt i = 0, c = 0; i < nr; i++) {
1651     mmask[c] = 1;
1652     c += degree[i];
1653   }
1654   PetscCall(PetscSFScatterBegin(graph->interface_subset_sf, MPIU_INT, mmask, mask));
1655   PetscCall(PetscSFScatterEnd(graph->interface_subset_sf, MPIU_INT, mmask, mask));
1656   PetscCall(VecGetArray(v, &array));
1657   for (PetscInt i = 0; i < n; i++) {
1658     array[i] *= mask[i];
1659     idxs[i] = i;
1660   }
1661   PetscCall(VecSetValuesLocal(quad_vec, n, idxs, array, ADD_VALUES));
1662   PetscCall(VecRestoreArray(v, &array));
1663   PetscCall(PetscFree3(mmask, mask, idxs));
1664   PetscCall(VecDestroy(&v));
1665   PetscCall(VecAssemblyBegin(quad_vec));
1666   PetscCall(VecAssemblyEnd(quad_vec));
1667   PetscCall(VecViewFromOptions(quad_vec, NULL, "-pc_bddc_quad_vec_view"));
1668   PetscCall(VecLockReadPush(quad_vec));
1669   PetscCall(VecDestroy(&quad_vec));
1670   PetscFunctionReturn(PETSC_SUCCESS);
1671 }
1672 
1673 PetscErrorCode PCBDDCAddPrimalVerticesLocalIS(PC pc, IS primalv)
1674 {
1675   PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
1676 
1677   PetscFunctionBegin;
1678   if (primalv) {
1679     if (pcbddc->user_primal_vertices_local) {
1680       IS list[2], newp;
1681 
1682       list[0] = primalv;
1683       list[1] = pcbddc->user_primal_vertices_local;
1684       PetscCall(ISConcatenate(PetscObjectComm((PetscObject)pc), 2, list, &newp));
1685       PetscCall(ISSortRemoveDups(newp));
1686       PetscCall(ISDestroy(&list[1]));
1687       pcbddc->user_primal_vertices_local = newp;
1688     } else {
1689       PetscCall(PCBDDCSetPrimalVerticesLocalIS(pc, primalv));
1690     }
1691   }
1692   PetscFunctionReturn(PETSC_SUCCESS);
1693 }
1694 
1695 static PetscErrorCode func_coords_private(PetscInt dim, PetscReal t, const PetscReal X[], PetscInt Nf, PetscScalar *out, void *ctx)
1696 {
1697   PetscInt f, *comp = (PetscInt *)ctx;
1698 
1699   PetscFunctionBegin;
1700   for (f = 0; f < Nf; f++) out[f] = X[*comp];
1701   PetscFunctionReturn(PETSC_SUCCESS);
1702 }
1703 
1704 PetscErrorCode PCBDDCComputeLocalTopologyInfo(PC pc)
1705 {
1706   Vec       local, global;
1707   PC_BDDC  *pcbddc     = (PC_BDDC *)pc->data;
1708   Mat_IS   *matis      = (Mat_IS *)pc->pmat->data;
1709   PetscBool monolithic = PETSC_FALSE;
1710 
1711   PetscFunctionBegin;
1712   PetscOptionsBegin(PetscObjectComm((PetscObject)pc), ((PetscObject)pc)->prefix, "BDDC topology options", "PC");
1713   PetscCall(PetscOptionsBool("-pc_bddc_monolithic", "Discard any information on dofs splitting", NULL, monolithic, &monolithic, NULL));
1714   PetscOptionsEnd();
1715   /* need to convert from global to local topology information and remove references to information in global ordering */
1716   PetscCall(MatCreateVecs(pc->pmat, &global, NULL));
1717   PetscCall(MatCreateVecs(matis->A, &local, NULL));
1718   PetscCall(VecBindToCPU(global, PETSC_TRUE));
1719   PetscCall(VecBindToCPU(local, PETSC_TRUE));
1720   if (monolithic) { /* just get block size to properly compute vertices */
1721     if (pcbddc->vertex_size == 1) PetscCall(MatGetBlockSize(pc->pmat, &pcbddc->vertex_size));
1722     goto boundary;
1723   }
1724 
1725   if (pcbddc->user_provided_isfordofs) {
1726     if (pcbddc->n_ISForDofs) {
1727       PetscInt i;
1728 
1729       PetscCall(PetscMalloc1(pcbddc->n_ISForDofs, &pcbddc->ISForDofsLocal));
1730       for (i = 0; i < pcbddc->n_ISForDofs; i++) {
1731         PetscInt bs;
1732 
1733         PetscCall(PCBDDCGlobalToLocal(matis->rctx, global, local, pcbddc->ISForDofs[i], &pcbddc->ISForDofsLocal[i]));
1734         PetscCall(ISGetBlockSize(pcbddc->ISForDofs[i], &bs));
1735         PetscCall(ISSetBlockSize(pcbddc->ISForDofsLocal[i], bs));
1736         PetscCall(ISDestroy(&pcbddc->ISForDofs[i]));
1737       }
1738       pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs;
1739       pcbddc->n_ISForDofs      = 0;
1740       PetscCall(PetscFree(pcbddc->ISForDofs));
1741     }
1742   } else {
1743     if (!pcbddc->n_ISForDofsLocal) { /* field split not present */
1744       DM dm;
1745 
1746       PetscCall(MatGetDM(pc->pmat, &dm));
1747       if (!dm) PetscCall(PCGetDM(pc, &dm));
1748       if (dm) {
1749         IS      *fields;
1750         PetscInt nf, i;
1751 
1752         PetscCall(DMCreateFieldDecomposition(dm, &nf, NULL, &fields, NULL));
1753         PetscCall(PetscMalloc1(nf, &pcbddc->ISForDofsLocal));
1754         for (i = 0; i < nf; i++) {
1755           PetscInt bs;
1756 
1757           PetscCall(PCBDDCGlobalToLocal(matis->rctx, global, local, fields[i], &pcbddc->ISForDofsLocal[i]));
1758           PetscCall(ISGetBlockSize(fields[i], &bs));
1759           PetscCall(ISSetBlockSize(pcbddc->ISForDofsLocal[i], bs));
1760           PetscCall(ISDestroy(&fields[i]));
1761         }
1762         PetscCall(PetscFree(fields));
1763         pcbddc->n_ISForDofsLocal = nf;
1764       } else { /* See if MATIS has fields attached by the conversion from MatNest */
1765         PetscContainer c;
1766 
1767         PetscCall(PetscObjectQuery((PetscObject)pc->pmat, "_convert_nest_lfields", (PetscObject *)&c));
1768         if (c) {
1769           MatISLocalFields lf;
1770           PetscCall(PetscContainerGetPointer(c, (void **)&lf));
1771           PetscCall(PCBDDCSetDofsSplittingLocal(pc, lf->nr, lf->rf));
1772         } else { /* fallback, create the default fields if bs > 1 */
1773           PetscInt i, n = matis->A->rmap->n;
1774           PetscCall(MatGetBlockSize(pc->pmat, &i));
1775           if (i > 1) {
1776             pcbddc->n_ISForDofsLocal = i;
1777             PetscCall(PetscMalloc1(pcbddc->n_ISForDofsLocal, &pcbddc->ISForDofsLocal));
1778             for (i = 0; i < pcbddc->n_ISForDofsLocal; i++) PetscCall(ISCreateStride(PetscObjectComm((PetscObject)pc), n / pcbddc->n_ISForDofsLocal, i, pcbddc->n_ISForDofsLocal, &pcbddc->ISForDofsLocal[i]));
1779           }
1780         }
1781       }
1782     } else {
1783       PetscInt i;
1784       for (i = 0; i < pcbddc->n_ISForDofsLocal; i++) PetscCall(PCBDDCConsistencyCheckIS(pc, MPI_LAND, &pcbddc->ISForDofsLocal[i]));
1785     }
1786   }
1787 
1788 boundary:
1789   if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) {
1790     PetscCall(PCBDDCGlobalToLocal(matis->rctx, global, local, pcbddc->DirichletBoundaries, &pcbddc->DirichletBoundariesLocal));
1791   } else if (pcbddc->DirichletBoundariesLocal) {
1792     PetscCall(PCBDDCConsistencyCheckIS(pc, MPI_LAND, &pcbddc->DirichletBoundariesLocal));
1793   }
1794   if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) {
1795     PetscCall(PCBDDCGlobalToLocal(matis->rctx, global, local, pcbddc->NeumannBoundaries, &pcbddc->NeumannBoundariesLocal));
1796   } else if (pcbddc->NeumannBoundariesLocal) {
1797     PetscCall(PCBDDCConsistencyCheckIS(pc, MPI_LOR, &pcbddc->NeumannBoundariesLocal));
1798   }
1799   if (!pcbddc->user_primal_vertices_local && pcbddc->user_primal_vertices) PetscCall(PCBDDCGlobalToLocal(matis->rctx, global, local, pcbddc->user_primal_vertices, &pcbddc->user_primal_vertices_local));
1800   PetscCall(VecDestroy(&global));
1801   PetscCall(VecDestroy(&local));
1802   /* detect local disconnected subdomains if requested or needed */
1803   if (pcbddc->detect_disconnected || matis->allow_repeated) {
1804     IS        primalv = NULL;
1805     PetscInt  nel;
1806     PetscBool filter = pcbddc->detect_disconnected_filter;
1807 
1808     for (PetscInt i = 0; i < pcbddc->n_local_subs; i++) PetscCall(ISDestroy(&pcbddc->local_subs[i]));
1809     PetscCall(PetscFree(pcbddc->local_subs));
1810     PetscCall(MatGetVariableBlockSizes(matis->A, &nel, NULL));
1811     if (matis->allow_repeated && nel) {
1812       const PetscInt *elsizes;
1813 
1814       pcbddc->n_local_subs = nel;
1815       PetscCall(MatGetVariableBlockSizes(matis->A, NULL, &elsizes));
1816       PetscCall(PetscMalloc1(nel, &pcbddc->local_subs));
1817       for (PetscInt i = 0, c = 0; i < nel; i++) {
1818         PetscCall(ISCreateStride(PETSC_COMM_SELF, elsizes[i], c, 1, &pcbddc->local_subs[i]));
1819         c += elsizes[i];
1820       }
1821     } else {
1822       PetscCall(PCBDDCDetectDisconnectedComponents(pc, filter, &pcbddc->n_local_subs, &pcbddc->local_subs, &primalv));
1823     }
1824     PetscCall(PCBDDCAddPrimalVerticesLocalIS(pc, primalv));
1825     PetscCall(ISDestroy(&primalv));
1826   }
1827   /* early stage corner detection */
1828   {
1829     DM dm;
1830 
1831     PetscCall(MatGetDM(pc->pmat, &dm));
1832     if (!dm) PetscCall(PCGetDM(pc, &dm));
1833     if (dm) {
1834       PetscBool isda;
1835 
1836       PetscCall(PetscObjectTypeCompare((PetscObject)dm, DMDA, &isda));
1837       if (isda) {
1838         ISLocalToGlobalMapping l2l;
1839         IS                     corners;
1840         Mat                    lA;
1841         PetscBool              gl, lo;
1842 
1843         {
1844           Vec                cvec;
1845           const PetscScalar *coords;
1846           PetscInt           dof, n, cdim;
1847           PetscBool          memc = PETSC_TRUE;
1848 
1849           PetscCall(DMDAGetInfo(dm, NULL, NULL, NULL, NULL, NULL, NULL, NULL, &dof, NULL, NULL, NULL, NULL, NULL));
1850           PetscCall(DMGetCoordinates(dm, &cvec));
1851           PetscCall(VecGetLocalSize(cvec, &n));
1852           PetscCall(VecGetBlockSize(cvec, &cdim));
1853           n /= cdim;
1854           PetscCall(PetscFree(pcbddc->mat_graph->coords));
1855           PetscCall(PetscMalloc1(dof * n * cdim, &pcbddc->mat_graph->coords));
1856           PetscCall(VecGetArrayRead(cvec, &coords));
1857 #if defined(PETSC_USE_COMPLEX)
1858           memc = PETSC_FALSE;
1859 #endif
1860           if (dof != 1) memc = PETSC_FALSE;
1861           if (memc) {
1862             PetscCall(PetscArraycpy(pcbddc->mat_graph->coords, coords, cdim * n * dof));
1863           } else { /* BDDC graph does not use any blocked information, we need to replicate the data */
1864             PetscReal *bcoords = pcbddc->mat_graph->coords;
1865             PetscInt   i, b, d;
1866 
1867             for (i = 0; i < n; i++) {
1868               for (b = 0; b < dof; b++) {
1869                 for (d = 0; d < cdim; d++) bcoords[i * dof * cdim + b * cdim + d] = PetscRealPart(coords[i * cdim + d]);
1870               }
1871             }
1872           }
1873           PetscCall(VecRestoreArrayRead(cvec, &coords));
1874           pcbddc->mat_graph->cdim  = cdim;
1875           pcbddc->mat_graph->cnloc = dof * n;
1876           pcbddc->mat_graph->cloc  = PETSC_FALSE;
1877         }
1878         PetscCall(DMDAGetSubdomainCornersIS(dm, &corners));
1879         PetscCall(MatISGetLocalMat(pc->pmat, &lA));
1880         PetscCall(MatGetLocalToGlobalMapping(lA, &l2l, NULL));
1881         PetscCall(MatISRestoreLocalMat(pc->pmat, &lA));
1882         lo = (PetscBool)(l2l && corners);
1883         PetscCallMPI(MPIU_Allreduce(&lo, &gl, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)pc)));
1884         if (gl) { /* From PETSc's DMDA */
1885           const PetscInt *idx;
1886           PetscInt        dof, bs, *idxout, n;
1887 
1888           PetscCall(DMDAGetInfo(dm, NULL, NULL, NULL, NULL, NULL, NULL, NULL, &dof, NULL, NULL, NULL, NULL, NULL));
1889           PetscCall(ISLocalToGlobalMappingGetBlockSize(l2l, &bs));
1890           PetscCall(ISGetLocalSize(corners, &n));
1891           PetscCall(ISGetIndices(corners, &idx));
1892           if (bs == dof) {
1893             PetscCall(PetscMalloc1(n, &idxout));
1894             PetscCall(ISLocalToGlobalMappingApplyBlock(l2l, n, idx, idxout));
1895           } else { /* the original DMDA local-to-local map have been modified */
1896             PetscInt i, d;
1897 
1898             PetscCall(PetscMalloc1(dof * n, &idxout));
1899             for (i = 0; i < n; i++)
1900               for (d = 0; d < dof; d++) idxout[dof * i + d] = dof * idx[i] + d;
1901             PetscCall(ISLocalToGlobalMappingApply(l2l, dof * n, idxout, idxout));
1902 
1903             bs = 1;
1904             n *= dof;
1905           }
1906           PetscCall(ISRestoreIndices(corners, &idx));
1907           PetscCall(DMDARestoreSubdomainCornersIS(dm, &corners));
1908           PetscCall(ISCreateBlock(PetscObjectComm((PetscObject)pc), bs, n, idxout, PETSC_OWN_POINTER, &corners));
1909           PetscCall(PCBDDCAddPrimalVerticesLocalIS(pc, corners));
1910           PetscCall(ISDestroy(&corners));
1911           pcbddc->corner_selected  = PETSC_TRUE;
1912           pcbddc->corner_selection = PETSC_TRUE;
1913         }
1914         if (corners) PetscCall(DMDARestoreSubdomainCornersIS(dm, &corners));
1915       }
1916     }
1917   }
1918   if (pcbddc->corner_selection && !pcbddc->mat_graph->cdim) {
1919     DM dm;
1920 
1921     PetscCall(MatGetDM(pc->pmat, &dm));
1922     if (!dm) PetscCall(PCGetDM(pc, &dm));
1923     if (dm) { /* this can get very expensive, I need to find a faster alternative */
1924       Vec          vcoords;
1925       PetscSection section;
1926       PetscReal   *coords;
1927       PetscInt     d, cdim, nl, nf, **ctxs;
1928       PetscErrorCode (**funcs)(PetscInt, PetscReal, const PetscReal *, PetscInt, PetscScalar *, void *);
1929       /* debug coordinates */
1930       PetscViewer       viewer;
1931       PetscBool         flg;
1932       PetscViewerFormat format;
1933       const char       *prefix;
1934 
1935       PetscCall(DMGetCoordinateDim(dm, &cdim));
1936       PetscCall(DMGetLocalSection(dm, &section));
1937       PetscCall(PetscSectionGetNumFields(section, &nf));
1938       PetscCall(DMCreateGlobalVector(dm, &vcoords));
1939       PetscCall(VecGetLocalSize(vcoords, &nl));
1940       PetscCall(PetscMalloc1(nl * cdim, &coords));
1941       PetscCall(PetscMalloc2(nf, &funcs, nf, &ctxs));
1942       PetscCall(PetscMalloc1(nf, &ctxs[0]));
1943       for (d = 0; d < nf; d++) funcs[d] = func_coords_private;
1944       for (d = 1; d < nf; d++) ctxs[d] = ctxs[d - 1] + 1;
1945 
1946       /* debug coordinates */
1947       PetscCall(PCGetOptionsPrefix(pc, &prefix));
1948       PetscCall(PetscOptionsCreateViewer(PetscObjectComm((PetscObject)vcoords), ((PetscObject)vcoords)->options, prefix, "-pc_bddc_coords_vec_view", &viewer, &format, &flg));
1949       if (flg) PetscCall(PetscViewerPushFormat(viewer, format));
1950       for (d = 0; d < cdim; d++) {
1951         PetscInt           i;
1952         const PetscScalar *v;
1953         char               name[16];
1954 
1955         for (i = 0; i < nf; i++) ctxs[i][0] = d;
1956         PetscCall(PetscSNPrintf(name, sizeof(name), "bddc_coords_%" PetscInt_FMT, d));
1957         PetscCall(PetscObjectSetName((PetscObject)vcoords, name));
1958         PetscCall(DMProjectFunction(dm, 0.0, funcs, (void **)ctxs, INSERT_VALUES, vcoords));
1959         if (flg) PetscCall(VecView(vcoords, viewer));
1960         PetscCall(VecGetArrayRead(vcoords, &v));
1961         for (i = 0; i < nl; i++) coords[i * cdim + d] = PetscRealPart(v[i]);
1962         PetscCall(VecRestoreArrayRead(vcoords, &v));
1963       }
1964       PetscCall(VecDestroy(&vcoords));
1965       PetscCall(PCSetCoordinates(pc, cdim, nl, coords));
1966       PetscCall(PetscFree(coords));
1967       PetscCall(PetscFree(ctxs[0]));
1968       PetscCall(PetscFree2(funcs, ctxs));
1969       if (flg) {
1970         PetscCall(PetscViewerPopFormat(viewer));
1971         PetscCall(PetscViewerDestroy(&viewer));
1972       }
1973     }
1974   }
1975   PetscFunctionReturn(PETSC_SUCCESS);
1976 }
1977 
1978 PetscErrorCode PCBDDCConsistencyCheckIS(PC pc, MPI_Op mop, IS *is)
1979 {
1980   Mat_IS         *matis = (Mat_IS *)pc->pmat->data;
1981   IS              nis;
1982   const PetscInt *idxs;
1983   PetscInt        i, nd, n = matis->A->rmap->n, *nidxs, nnd;
1984 
1985   PetscFunctionBegin;
1986   PetscCheck(mop == MPI_LAND || mop == MPI_LOR, PetscObjectComm((PetscObject)pc), PETSC_ERR_SUP, "Supported are MPI_LAND and MPI_LOR");
1987   if (mop == MPI_LAND) {
1988     /* init rootdata with true */
1989     for (i = 0; i < pc->pmat->rmap->n; i++) matis->sf_rootdata[i] = 1;
1990   } else {
1991     PetscCall(PetscArrayzero(matis->sf_rootdata, pc->pmat->rmap->n));
1992   }
1993   PetscCall(PetscArrayzero(matis->sf_leafdata, n));
1994   PetscCall(ISGetLocalSize(*is, &nd));
1995   PetscCall(ISGetIndices(*is, &idxs));
1996   for (i = 0; i < nd; i++)
1997     if (-1 < idxs[i] && idxs[i] < n) matis->sf_leafdata[idxs[i]] = 1;
1998   PetscCall(ISRestoreIndices(*is, &idxs));
1999   PetscCall(PetscSFReduceBegin(matis->sf, MPIU_INT, matis->sf_leafdata, matis->sf_rootdata, mop));
2000   PetscCall(PetscSFReduceEnd(matis->sf, MPIU_INT, matis->sf_leafdata, matis->sf_rootdata, mop));
2001   PetscCall(PetscSFBcastBegin(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE));
2002   PetscCall(PetscSFBcastEnd(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE));
2003   if (mop == MPI_LAND) {
2004     PetscCall(PetscMalloc1(nd, &nidxs));
2005   } else {
2006     PetscCall(PetscMalloc1(n, &nidxs));
2007   }
2008   for (i = 0, nnd = 0; i < n; i++)
2009     if (matis->sf_leafdata[i]) nidxs[nnd++] = i;
2010   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)*is), nnd, nidxs, PETSC_OWN_POINTER, &nis));
2011   PetscCall(ISDestroy(is));
2012   *is = nis;
2013   PetscFunctionReturn(PETSC_SUCCESS);
2014 }
2015 
2016 PetscErrorCode PCBDDCBenignRemoveInterior(PC pc, Vec r, Vec z)
2017 {
2018   PC_IS   *pcis   = (PC_IS *)pc->data;
2019   PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
2020 
2021   PetscFunctionBegin;
2022   if (!pcbddc->benign_have_null) PetscFunctionReturn(PETSC_SUCCESS);
2023   if (pcbddc->ChangeOfBasisMatrix) {
2024     Vec swap;
2025 
2026     PetscCall(MatMultTranspose(pcbddc->ChangeOfBasisMatrix, r, pcbddc->work_change));
2027     swap                = pcbddc->work_change;
2028     pcbddc->work_change = r;
2029     r                   = swap;
2030   }
2031   PetscCall(VecScatterBegin(pcis->global_to_D, r, pcis->vec1_D, INSERT_VALUES, SCATTER_FORWARD));
2032   PetscCall(VecScatterEnd(pcis->global_to_D, r, pcis->vec1_D, INSERT_VALUES, SCATTER_FORWARD));
2033   PetscCall(PetscLogEventBegin(PC_BDDC_Solves[pcbddc->current_level][0], pc, 0, 0, 0));
2034   PetscCall(KSPSolve(pcbddc->ksp_D, pcis->vec1_D, pcis->vec2_D));
2035   PetscCall(PetscLogEventEnd(PC_BDDC_Solves[pcbddc->current_level][0], pc, 0, 0, 0));
2036   PetscCall(KSPCheckSolve(pcbddc->ksp_D, pc, pcis->vec2_D));
2037   PetscCall(VecSet(z, 0.));
2038   PetscCall(VecScatterBegin(pcis->global_to_D, pcis->vec2_D, z, INSERT_VALUES, SCATTER_REVERSE));
2039   PetscCall(VecScatterEnd(pcis->global_to_D, pcis->vec2_D, z, INSERT_VALUES, SCATTER_REVERSE));
2040   if (pcbddc->ChangeOfBasisMatrix) {
2041     pcbddc->work_change = r;
2042     PetscCall(VecCopy(z, pcbddc->work_change));
2043     PetscCall(MatMult(pcbddc->ChangeOfBasisMatrix, pcbddc->work_change, z));
2044   }
2045   PetscFunctionReturn(PETSC_SUCCESS);
2046 }
2047 
2048 static PetscErrorCode PCBDDCBenignMatMult_Private_Private(Mat A, Vec x, Vec y, PetscBool transpose)
2049 {
2050   PCBDDCBenignMatMult_ctx ctx;
2051   PetscBool               apply_right, apply_left, reset_x;
2052 
2053   PetscFunctionBegin;
2054   PetscCall(MatShellGetContext(A, &ctx));
2055   if (transpose) {
2056     apply_right = ctx->apply_left;
2057     apply_left  = ctx->apply_right;
2058   } else {
2059     apply_right = ctx->apply_right;
2060     apply_left  = ctx->apply_left;
2061   }
2062   reset_x = PETSC_FALSE;
2063   if (apply_right) {
2064     const PetscScalar *ax;
2065     PetscInt           nl, i;
2066 
2067     PetscCall(VecGetLocalSize(x, &nl));
2068     PetscCall(VecGetArrayRead(x, &ax));
2069     PetscCall(PetscArraycpy(ctx->work, ax, nl));
2070     PetscCall(VecRestoreArrayRead(x, &ax));
2071     for (i = 0; i < ctx->benign_n; i++) {
2072       PetscScalar     sum, val;
2073       const PetscInt *idxs;
2074       PetscInt        nz, j;
2075       PetscCall(ISGetLocalSize(ctx->benign_zerodiag_subs[i], &nz));
2076       PetscCall(ISGetIndices(ctx->benign_zerodiag_subs[i], &idxs));
2077       sum = 0.;
2078       if (ctx->apply_p0) {
2079         val = ctx->work[idxs[nz - 1]];
2080         for (j = 0; j < nz - 1; j++) {
2081           sum += ctx->work[idxs[j]];
2082           ctx->work[idxs[j]] += val;
2083         }
2084       } else {
2085         for (j = 0; j < nz - 1; j++) sum += ctx->work[idxs[j]];
2086       }
2087       ctx->work[idxs[nz - 1]] -= sum;
2088       PetscCall(ISRestoreIndices(ctx->benign_zerodiag_subs[i], &idxs));
2089     }
2090     PetscCall(VecPlaceArray(x, ctx->work));
2091     reset_x = PETSC_TRUE;
2092   }
2093   if (transpose) {
2094     PetscCall(MatMultTranspose(ctx->A, x, y));
2095   } else {
2096     PetscCall(MatMult(ctx->A, x, y));
2097   }
2098   if (reset_x) PetscCall(VecResetArray(x));
2099   if (apply_left) {
2100     PetscScalar *ay;
2101     PetscInt     i;
2102 
2103     PetscCall(VecGetArray(y, &ay));
2104     for (i = 0; i < ctx->benign_n; i++) {
2105       PetscScalar     sum, val;
2106       const PetscInt *idxs;
2107       PetscInt        nz, j;
2108       PetscCall(ISGetLocalSize(ctx->benign_zerodiag_subs[i], &nz));
2109       PetscCall(ISGetIndices(ctx->benign_zerodiag_subs[i], &idxs));
2110       val = -ay[idxs[nz - 1]];
2111       if (ctx->apply_p0) {
2112         sum = 0.;
2113         for (j = 0; j < nz - 1; j++) {
2114           sum += ay[idxs[j]];
2115           ay[idxs[j]] += val;
2116         }
2117         ay[idxs[nz - 1]] += sum;
2118       } else {
2119         for (j = 0; j < nz - 1; j++) ay[idxs[j]] += val;
2120         ay[idxs[nz - 1]] = 0.;
2121       }
2122       PetscCall(ISRestoreIndices(ctx->benign_zerodiag_subs[i], &idxs));
2123     }
2124     PetscCall(VecRestoreArray(y, &ay));
2125   }
2126   PetscFunctionReturn(PETSC_SUCCESS);
2127 }
2128 
2129 static PetscErrorCode PCBDDCBenignMatMultTranspose_Private(Mat A, Vec x, Vec y)
2130 {
2131   PetscFunctionBegin;
2132   PetscCall(PCBDDCBenignMatMult_Private_Private(A, x, y, PETSC_TRUE));
2133   PetscFunctionReturn(PETSC_SUCCESS);
2134 }
2135 
2136 static PetscErrorCode PCBDDCBenignMatMult_Private(Mat A, Vec x, Vec y)
2137 {
2138   PetscFunctionBegin;
2139   PetscCall(PCBDDCBenignMatMult_Private_Private(A, x, y, PETSC_FALSE));
2140   PetscFunctionReturn(PETSC_SUCCESS);
2141 }
2142 
2143 PetscErrorCode PCBDDCBenignShellMat(PC pc, PetscBool restore)
2144 {
2145   PC_IS                  *pcis   = (PC_IS *)pc->data;
2146   PC_BDDC                *pcbddc = (PC_BDDC *)pc->data;
2147   PCBDDCBenignMatMult_ctx ctx;
2148 
2149   PetscFunctionBegin;
2150   if (!restore) {
2151     Mat                A_IB, A_BI;
2152     PetscScalar       *work;
2153     PCBDDCReuseSolvers reuse = pcbddc->sub_schurs ? pcbddc->sub_schurs->reuse_solver : NULL;
2154 
2155     PetscCheck(!pcbddc->benign_original_mat, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Benign original mat has not been restored");
2156     if (!pcbddc->benign_change || !pcbddc->benign_n || pcbddc->benign_change_explicit) PetscFunctionReturn(PETSC_SUCCESS);
2157     PetscCall(PetscMalloc1(pcis->n, &work));
2158     PetscCall(MatCreate(PETSC_COMM_SELF, &A_IB));
2159     PetscCall(MatSetSizes(A_IB, pcis->n - pcis->n_B, pcis->n_B, PETSC_DECIDE, PETSC_DECIDE));
2160     PetscCall(MatSetType(A_IB, MATSHELL));
2161     PetscCall(MatShellSetOperation(A_IB, MATOP_MULT, (void (*)(void))PCBDDCBenignMatMult_Private));
2162     PetscCall(MatShellSetOperation(A_IB, MATOP_MULT_TRANSPOSE, (void (*)(void))PCBDDCBenignMatMultTranspose_Private));
2163     PetscCall(PetscNew(&ctx));
2164     PetscCall(MatShellSetContext(A_IB, ctx));
2165     ctx->apply_left  = PETSC_TRUE;
2166     ctx->apply_right = PETSC_FALSE;
2167     ctx->apply_p0    = PETSC_FALSE;
2168     ctx->benign_n    = pcbddc->benign_n;
2169     if (reuse) {
2170       ctx->benign_zerodiag_subs = reuse->benign_zerodiag_subs;
2171       ctx->free                 = PETSC_FALSE;
2172     } else { /* TODO: could be optimized for successive solves */
2173       ISLocalToGlobalMapping N_to_D;
2174       PetscInt               i;
2175 
2176       PetscCall(ISLocalToGlobalMappingCreateIS(pcis->is_I_local, &N_to_D));
2177       PetscCall(PetscMalloc1(pcbddc->benign_n, &ctx->benign_zerodiag_subs));
2178       for (i = 0; i < pcbddc->benign_n; i++) PetscCall(ISGlobalToLocalMappingApplyIS(N_to_D, IS_GTOLM_DROP, pcbddc->benign_zerodiag_subs[i], &ctx->benign_zerodiag_subs[i]));
2179       PetscCall(ISLocalToGlobalMappingDestroy(&N_to_D));
2180       ctx->free = PETSC_TRUE;
2181     }
2182     ctx->A    = pcis->A_IB;
2183     ctx->work = work;
2184     PetscCall(MatSetUp(A_IB));
2185     PetscCall(MatAssemblyBegin(A_IB, MAT_FINAL_ASSEMBLY));
2186     PetscCall(MatAssemblyEnd(A_IB, MAT_FINAL_ASSEMBLY));
2187     pcis->A_IB = A_IB;
2188 
2189     /* A_BI as A_IB^T */
2190     PetscCall(MatCreateTranspose(A_IB, &A_BI));
2191     pcbddc->benign_original_mat = pcis->A_BI;
2192     pcis->A_BI                  = A_BI;
2193   } else {
2194     if (!pcbddc->benign_original_mat) PetscFunctionReturn(PETSC_SUCCESS);
2195     PetscCall(MatShellGetContext(pcis->A_IB, &ctx));
2196     PetscCall(MatDestroy(&pcis->A_IB));
2197     pcis->A_IB = ctx->A;
2198     ctx->A     = NULL;
2199     PetscCall(MatDestroy(&pcis->A_BI));
2200     pcis->A_BI                  = pcbddc->benign_original_mat;
2201     pcbddc->benign_original_mat = NULL;
2202     if (ctx->free) {
2203       PetscInt i;
2204       for (i = 0; i < ctx->benign_n; i++) PetscCall(ISDestroy(&ctx->benign_zerodiag_subs[i]));
2205       PetscCall(PetscFree(ctx->benign_zerodiag_subs));
2206     }
2207     PetscCall(PetscFree(ctx->work));
2208     PetscCall(PetscFree(ctx));
2209   }
2210   PetscFunctionReturn(PETSC_SUCCESS);
2211 }
2212 
2213 /* used just in bddc debug mode */
2214 static PetscErrorCode PCBDDCBenignProject(PC pc, IS is1, IS is2, Mat *B)
2215 {
2216   PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
2217   Mat_IS  *matis  = (Mat_IS *)pc->pmat->data;
2218   Mat      An;
2219 
2220   PetscFunctionBegin;
2221   PetscCall(MatPtAP(matis->A, pcbddc->benign_change, MAT_INITIAL_MATRIX, 2.0, &An));
2222   PetscCall(MatZeroRowsColumns(An, pcbddc->benign_n, pcbddc->benign_p0_lidx, 1.0, NULL, NULL));
2223   if (is1) {
2224     PetscCall(MatCreateSubMatrix(An, is1, is2, MAT_INITIAL_MATRIX, B));
2225     PetscCall(MatDestroy(&An));
2226   } else {
2227     *B = An;
2228   }
2229   PetscFunctionReturn(PETSC_SUCCESS);
2230 }
2231 
2232 /* TODO: add reuse flag */
2233 PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B)
2234 {
2235   Mat             Bt;
2236   PetscScalar    *a, *bdata;
2237   const PetscInt *ii, *ij;
2238   PetscInt        m, n, i, nnz, *bii, *bij;
2239   PetscBool       flg_row;
2240 
2241   PetscFunctionBegin;
2242   PetscCall(MatGetSize(A, &n, &m));
2243   PetscCall(MatGetRowIJ(A, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, &ij, &flg_row));
2244   PetscCall(MatSeqAIJGetArray(A, &a));
2245   nnz = n;
2246   for (i = 0; i < ii[n]; i++) {
2247     if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++;
2248   }
2249   PetscCall(PetscMalloc1(n + 1, &bii));
2250   PetscCall(PetscMalloc1(nnz, &bij));
2251   PetscCall(PetscMalloc1(nnz, &bdata));
2252   nnz    = 0;
2253   bii[0] = 0;
2254   for (i = 0; i < n; i++) {
2255     PetscInt j;
2256     for (j = ii[i]; j < ii[i + 1]; j++) {
2257       PetscScalar entry = a[j];
2258       if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || (n == m && ij[j] == i)) {
2259         bij[nnz]   = ij[j];
2260         bdata[nnz] = entry;
2261         nnz++;
2262       }
2263     }
2264     bii[i + 1] = nnz;
2265   }
2266   PetscCall(MatSeqAIJRestoreArray(A, &a));
2267   PetscCall(MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A), n, m, bii, bij, bdata, &Bt));
2268   PetscCall(MatRestoreRowIJ(A, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, &ij, &flg_row));
2269   {
2270     Mat_SeqAIJ *b = (Mat_SeqAIJ *)Bt->data;
2271     b->free_a     = PETSC_TRUE;
2272     b->free_ij    = PETSC_TRUE;
2273   }
2274   if (*B == A) PetscCall(MatDestroy(&A));
2275   *B = Bt;
2276   PetscFunctionReturn(PETSC_SUCCESS);
2277 }
2278 
2279 PetscErrorCode PCBDDCDetectDisconnectedComponents(PC pc, PetscBool filter, PetscInt *ncc, IS *cc[], IS *primalv)
2280 {
2281   Mat                    B = NULL;
2282   DM                     dm;
2283   IS                     is_dummy, *cc_n;
2284   ISLocalToGlobalMapping l2gmap_dummy;
2285   PCBDDCGraph            graph;
2286   PetscInt              *xadj_filtered = NULL, *adjncy_filtered = NULL;
2287   PetscInt               i, n;
2288   PetscInt              *xadj, *adjncy;
2289   PetscBool              isplex = PETSC_FALSE;
2290 
2291   PetscFunctionBegin;
2292   if (ncc) *ncc = 0;
2293   if (cc) *cc = NULL;
2294   if (primalv) *primalv = NULL;
2295   PetscCall(PCBDDCGraphCreate(&graph));
2296   PetscCall(MatGetDM(pc->pmat, &dm));
2297   if (!dm) PetscCall(PCGetDM(pc, &dm));
2298   if (dm) PetscCall(PetscObjectTypeCompareAny((PetscObject)dm, &isplex, DMPLEX, DMP4EST, DMP8EST, ""));
2299   if (filter) isplex = PETSC_FALSE;
2300 
2301   if (isplex) { /* this code has been modified from plexpartition.c */
2302     PetscInt        p, pStart, pEnd, a, adjSize, idx, size, nroots;
2303     PetscInt       *adj = NULL;
2304     IS              cellNumbering;
2305     const PetscInt *cellNum;
2306     PetscBool       useCone, useClosure;
2307     PetscSection    section;
2308     PetscSegBuffer  adjBuffer;
2309     PetscSF         sfPoint;
2310 
2311     PetscCall(DMConvert(dm, DMPLEX, &dm));
2312     PetscCall(DMPlexGetHeightStratum(dm, 0, &pStart, &pEnd));
2313     PetscCall(DMGetPointSF(dm, &sfPoint));
2314     PetscCall(PetscSFGetGraph(sfPoint, &nroots, NULL, NULL, NULL));
2315     /* Build adjacency graph via a section/segbuffer */
2316     PetscCall(PetscSectionCreate(PetscObjectComm((PetscObject)dm), &section));
2317     PetscCall(PetscSectionSetChart(section, pStart, pEnd));
2318     PetscCall(PetscSegBufferCreate(sizeof(PetscInt), 1000, &adjBuffer));
2319     /* Always use FVM adjacency to create partitioner graph */
2320     PetscCall(DMGetBasicAdjacency(dm, &useCone, &useClosure));
2321     PetscCall(DMSetBasicAdjacency(dm, PETSC_TRUE, PETSC_FALSE));
2322     PetscCall(DMPlexGetCellNumbering(dm, &cellNumbering));
2323     PetscCall(ISGetIndices(cellNumbering, &cellNum));
2324     for (n = 0, p = pStart; p < pEnd; p++) {
2325       /* Skip non-owned cells in parallel (ParMetis expects no overlap) */
2326       if (nroots > 0) {
2327         if (cellNum[p] < 0) continue;
2328       }
2329       adjSize = PETSC_DETERMINE;
2330       PetscCall(DMPlexGetAdjacency(dm, p, &adjSize, &adj));
2331       for (a = 0; a < adjSize; ++a) {
2332         const PetscInt point = adj[a];
2333         if (pStart <= point && point < pEnd) {
2334           PetscInt *PETSC_RESTRICT pBuf;
2335           PetscCall(PetscSectionAddDof(section, p, 1));
2336           PetscCall(PetscSegBufferGetInts(adjBuffer, 1, &pBuf));
2337           *pBuf = point;
2338         }
2339       }
2340       n++;
2341     }
2342     PetscCall(DMSetBasicAdjacency(dm, useCone, useClosure));
2343     /* Derive CSR graph from section/segbuffer */
2344     PetscCall(PetscSectionSetUp(section));
2345     PetscCall(PetscSectionGetStorageSize(section, &size));
2346     PetscCall(PetscMalloc1(n + 1, &xadj));
2347     for (idx = 0, p = pStart; p < pEnd; p++) {
2348       if (nroots > 0) {
2349         if (cellNum[p] < 0) continue;
2350       }
2351       PetscCall(PetscSectionGetOffset(section, p, &xadj[idx++]));
2352     }
2353     xadj[n] = size;
2354     PetscCall(PetscSegBufferExtractAlloc(adjBuffer, &adjncy));
2355     /* Clean up */
2356     PetscCall(PetscSegBufferDestroy(&adjBuffer));
2357     PetscCall(PetscSectionDestroy(&section));
2358     PetscCall(PetscFree(adj));
2359     graph->xadj   = xadj;
2360     graph->adjncy = adjncy;
2361   } else {
2362     Mat       A;
2363     PetscBool isseqaij, flg_row;
2364 
2365     PetscCall(MatISGetLocalMat(pc->pmat, &A));
2366     if (!A->rmap->N || !A->cmap->N) {
2367       PetscCall(PCBDDCGraphDestroy(&graph));
2368       PetscFunctionReturn(PETSC_SUCCESS);
2369     }
2370     PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &isseqaij));
2371     if (!isseqaij && filter) {
2372       PetscBool isseqdense;
2373 
2374       PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQDENSE, &isseqdense));
2375       if (!isseqdense) {
2376         PetscCall(MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &B));
2377       } else { /* TODO: rectangular case and LDA */
2378         PetscScalar *array;
2379         PetscReal    chop = 1.e-6;
2380 
2381         PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &B));
2382         PetscCall(MatDenseGetArray(B, &array));
2383         PetscCall(MatGetSize(B, &n, NULL));
2384         for (i = 0; i < n; i++) {
2385           PetscInt j;
2386           for (j = i + 1; j < n; j++) {
2387             PetscReal thresh = chop * (PetscAbsScalar(array[i * (n + 1)]) + PetscAbsScalar(array[j * (n + 1)]));
2388             if (PetscAbsScalar(array[i * n + j]) < thresh) array[i * n + j] = 0.;
2389             if (PetscAbsScalar(array[j * n + i]) < thresh) array[j * n + i] = 0.;
2390           }
2391         }
2392         PetscCall(MatDenseRestoreArray(B, &array));
2393         PetscCall(MatConvert(B, MATSEQAIJ, MAT_INPLACE_MATRIX, &B));
2394       }
2395     } else {
2396       PetscCall(PetscObjectReference((PetscObject)A));
2397       B = A;
2398     }
2399     PetscCall(MatGetRowIJ(B, 0, PETSC_TRUE, PETSC_FALSE, &n, (const PetscInt **)&xadj, (const PetscInt **)&adjncy, &flg_row));
2400 
2401     /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */
2402     if (filter) {
2403       PetscScalar *data;
2404       PetscInt     j, cum;
2405 
2406       PetscCall(PetscCalloc2(n + 1, &xadj_filtered, xadj[n], &adjncy_filtered));
2407       PetscCall(MatSeqAIJGetArray(B, &data));
2408       cum = 0;
2409       for (i = 0; i < n; i++) {
2410         PetscInt t;
2411 
2412         for (j = xadj[i]; j < xadj[i + 1]; j++) {
2413           if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) continue;
2414           adjncy_filtered[cum + xadj_filtered[i]++] = adjncy[j];
2415         }
2416         t                = xadj_filtered[i];
2417         xadj_filtered[i] = cum;
2418         cum += t;
2419       }
2420       PetscCall(MatSeqAIJRestoreArray(B, &data));
2421       graph->xadj   = xadj_filtered;
2422       graph->adjncy = adjncy_filtered;
2423     } else {
2424       graph->xadj   = xadj;
2425       graph->adjncy = adjncy;
2426     }
2427   }
2428   /* compute local connected components using PCBDDCGraph */
2429   graph->seq_graph = PETSC_TRUE; /* analyze local connected components (i.e. disconnected subdomains) irrespective of dofs count */
2430   PetscCall(ISCreateStride(PETSC_COMM_SELF, n, 0, 1, &is_dummy));
2431   PetscCall(ISLocalToGlobalMappingCreateIS(is_dummy, &l2gmap_dummy));
2432   PetscCall(ISDestroy(&is_dummy));
2433   PetscCall(PCBDDCGraphInit(graph, l2gmap_dummy, n, PETSC_INT_MAX));
2434   PetscCall(ISLocalToGlobalMappingDestroy(&l2gmap_dummy));
2435   PetscCall(PCBDDCGraphSetUp(graph, 1, NULL, NULL, 0, NULL, NULL));
2436   PetscCall(PCBDDCGraphComputeConnectedComponents(graph));
2437 
2438   /* partial clean up */
2439   PetscCall(PetscFree2(xadj_filtered, adjncy_filtered));
2440   if (B) {
2441     PetscBool flg_row;
2442     PetscCall(MatRestoreRowIJ(B, 0, PETSC_TRUE, PETSC_FALSE, &n, (const PetscInt **)&xadj, (const PetscInt **)&adjncy, &flg_row));
2443     PetscCall(MatDestroy(&B));
2444   }
2445   if (isplex) {
2446     PetscCall(PetscFree(xadj));
2447     PetscCall(PetscFree(adjncy));
2448   }
2449 
2450   /* get back data */
2451   if (isplex) {
2452     if (ncc) *ncc = graph->ncc;
2453     if (cc || primalv) {
2454       Mat          A;
2455       PetscBT      btv, btvt, btvc;
2456       PetscSection subSection;
2457       PetscInt    *ids, cum, cump, *cids, *pids;
2458       PetscInt     dim, cStart, cEnd, fStart, fEnd, vStart, vEnd, pStart, pEnd;
2459 
2460       PetscCall(DMGetDimension(dm, &dim));
2461       PetscCall(DMPlexGetSubdomainSection(dm, &subSection));
2462       PetscCall(DMPlexGetHeightStratum(dm, 1, &fStart, &fEnd));
2463       PetscCall(DMPlexGetHeightStratum(dm, 0, &cStart, &cEnd));
2464       PetscCall(DMPlexGetDepthStratum(dm, 0, &vStart, &vEnd));
2465       PetscCall(DMPlexGetChart(dm, &pStart, &pEnd));
2466       PetscCall(MatISGetLocalMat(pc->pmat, &A));
2467       PetscCall(PetscMalloc3(A->rmap->n, &ids, graph->ncc + 1, &cids, A->rmap->n, &pids));
2468       PetscCall(PetscBTCreate(A->rmap->n, &btv));
2469       PetscCall(PetscBTCreate(A->rmap->n, &btvt));
2470       PetscCall(PetscBTCreate(pEnd - pStart, &btvc));
2471 
2472       /* First see if we find corners for the subdomains, i.e. a vertex
2473          shared by at least dim subdomain boundary faces. This does not
2474          cover all the possible cases with simplices but it is enough
2475          for tensor cells */
2476       if (vStart != fStart && dim <= 3) {
2477         for (PetscInt c = cStart; c < cEnd; c++) {
2478           PetscInt        nf, cnt = 0, mcnt = dim, *cfaces;
2479           const PetscInt *faces;
2480 
2481           PetscCall(DMPlexGetConeSize(dm, c, &nf));
2482           PetscCall(DMGetWorkArray(dm, nf, MPIU_INT, &cfaces));
2483           PetscCall(DMPlexGetCone(dm, c, &faces));
2484           for (PetscInt f = 0; f < nf; f++) {
2485             PetscInt nc, ff;
2486 
2487             PetscCall(DMPlexGetSupportSize(dm, faces[f], &nc));
2488             PetscCall(DMPlexGetTreeParent(dm, faces[f], &ff, NULL));
2489             if (nc == 1 && faces[f] == ff) cfaces[cnt++] = faces[f];
2490           }
2491           if (cnt >= mcnt) {
2492             PetscInt size, *closure = NULL;
2493 
2494             PetscCall(DMPlexGetTransitiveClosure(dm, c, PETSC_TRUE, &size, &closure));
2495             for (PetscInt k = 0; k < 2 * size; k += 2) {
2496               PetscInt v = closure[k];
2497               if (v >= vStart && v < vEnd) {
2498                 PetscInt vsize, *vclosure = NULL;
2499 
2500                 cnt = 0;
2501                 PetscCall(DMPlexGetTransitiveClosure(dm, v, PETSC_FALSE, &vsize, &vclosure));
2502                 for (PetscInt vk = 0; vk < 2 * vsize; vk += 2) {
2503                   PetscInt f = vclosure[vk];
2504                   if (f >= fStart && f < fEnd) {
2505                     PetscInt  nc, ff;
2506                     PetscBool valid = PETSC_FALSE;
2507 
2508                     for (PetscInt fk = 0; fk < nf; fk++)
2509                       if (f == cfaces[fk]) valid = PETSC_TRUE;
2510                     if (!valid) continue;
2511                     PetscCall(DMPlexGetSupportSize(dm, f, &nc));
2512                     PetscCall(DMPlexGetTreeParent(dm, f, &ff, NULL));
2513                     if (nc == 1 && f == ff) cnt++;
2514                   }
2515                 }
2516                 if (cnt >= mcnt) PetscCall(PetscBTSet(btvc, v - pStart));
2517                 PetscCall(DMPlexRestoreTransitiveClosure(dm, v, PETSC_FALSE, &vsize, &vclosure));
2518               }
2519             }
2520             PetscCall(DMPlexRestoreTransitiveClosure(dm, c, PETSC_TRUE, &size, &closure));
2521           }
2522           PetscCall(DMRestoreWorkArray(dm, nf, MPIU_INT, &cfaces));
2523         }
2524       }
2525 
2526       cids[0] = 0;
2527       for (i = 0, cump = 0, cum = 0; i < graph->ncc; i++) {
2528         PetscInt j;
2529 
2530         PetscCall(PetscBTMemzero(A->rmap->n, btvt));
2531         for (j = graph->cptr[i]; j < graph->cptr[i + 1]; j++) {
2532           PetscInt k, size, *closure = NULL, cell = graph->queue[j];
2533 
2534           PetscCall(DMPlexGetTransitiveClosure(dm, cell, PETSC_TRUE, &size, &closure));
2535           for (k = 0; k < 2 * size; k += 2) {
2536             PetscInt s, pp, p = closure[k], off, dof, cdof;
2537 
2538             PetscCall(PetscSectionGetConstraintDof(subSection, p, &cdof));
2539             PetscCall(PetscSectionGetOffset(subSection, p, &off));
2540             PetscCall(PetscSectionGetDof(subSection, p, &dof));
2541             for (s = 0; s < dof - cdof; s++) {
2542               if (PetscBTLookupSet(btvt, off + s)) continue;
2543               if (PetscBTLookup(btvc, p - pStart)) pids[cump++] = off + s; /* subdomain corner */
2544               else if (!PetscBTLookup(btv, off + s)) ids[cum++] = off + s;
2545               else pids[cump++] = off + s; /* cross-vertex */
2546             }
2547             PetscCall(DMPlexGetTreeParent(dm, p, &pp, NULL));
2548             if (pp != p) {
2549               PetscCall(PetscSectionGetConstraintDof(subSection, pp, &cdof));
2550               PetscCall(PetscSectionGetOffset(subSection, pp, &off));
2551               PetscCall(PetscSectionGetDof(subSection, pp, &dof));
2552               for (s = 0; s < dof - cdof; s++) {
2553                 if (PetscBTLookupSet(btvt, off + s)) continue;
2554                 if (PetscBTLookup(btvc, pp - pStart)) pids[cump++] = off + s; /* subdomain corner */
2555                 else if (!PetscBTLookup(btv, off + s)) ids[cum++] = off + s;
2556                 else pids[cump++] = off + s; /* cross-vertex */
2557               }
2558             }
2559           }
2560           PetscCall(DMPlexRestoreTransitiveClosure(dm, cell, PETSC_TRUE, &size, &closure));
2561         }
2562         cids[i + 1] = cum;
2563         /* mark dofs as already assigned */
2564         for (j = cids[i]; j < cids[i + 1]; j++) PetscCall(PetscBTSet(btv, ids[j]));
2565       }
2566       if (cc) {
2567         PetscCall(PetscMalloc1(graph->ncc, &cc_n));
2568         for (i = 0; i < graph->ncc; i++) PetscCall(ISCreateGeneral(PETSC_COMM_SELF, cids[i + 1] - cids[i], ids + cids[i], PETSC_COPY_VALUES, &cc_n[i]));
2569         *cc = cc_n;
2570       }
2571       if (primalv) PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), cump, pids, PETSC_COPY_VALUES, primalv));
2572       PetscCall(PetscFree3(ids, cids, pids));
2573       PetscCall(PetscBTDestroy(&btv));
2574       PetscCall(PetscBTDestroy(&btvt));
2575       PetscCall(PetscBTDestroy(&btvc));
2576       PetscCall(DMDestroy(&dm));
2577     }
2578   } else {
2579     if (ncc) *ncc = graph->ncc;
2580     if (cc) {
2581       PetscCall(PetscMalloc1(graph->ncc, &cc_n));
2582       for (i = 0; i < graph->ncc; i++) PetscCall(ISCreateGeneral(PETSC_COMM_SELF, graph->cptr[i + 1] - graph->cptr[i], graph->queue + graph->cptr[i], PETSC_COPY_VALUES, &cc_n[i]));
2583       *cc = cc_n;
2584     }
2585   }
2586   /* clean up graph */
2587   graph->xadj   = NULL;
2588   graph->adjncy = NULL;
2589   PetscCall(PCBDDCGraphDestroy(&graph));
2590   PetscFunctionReturn(PETSC_SUCCESS);
2591 }
2592 
2593 PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag)
2594 {
2595   PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
2596   PC_IS   *pcis   = (PC_IS *)pc->data;
2597   IS       dirIS  = NULL;
2598   PetscInt i;
2599 
2600   PetscFunctionBegin;
2601   PetscCall(PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph, &dirIS));
2602   if (zerodiag) {
2603     Mat             A;
2604     Vec             vec3_N;
2605     PetscScalar    *vals;
2606     const PetscInt *idxs;
2607     PetscInt        nz, *count;
2608 
2609     /* p0 */
2610     PetscCall(VecSet(pcis->vec1_N, 0.));
2611     PetscCall(PetscMalloc1(pcis->n, &vals));
2612     PetscCall(ISGetLocalSize(zerodiag, &nz));
2613     PetscCall(ISGetIndices(zerodiag, &idxs));
2614     for (i = 0; i < nz; i++) vals[i] = 1.;
2615     PetscCall(VecSetValues(pcis->vec1_N, nz, idxs, vals, INSERT_VALUES));
2616     PetscCall(VecAssemblyBegin(pcis->vec1_N));
2617     PetscCall(VecAssemblyEnd(pcis->vec1_N));
2618     /* v_I */
2619     PetscCall(VecSetRandom(pcis->vec2_N, NULL));
2620     for (i = 0; i < nz; i++) vals[i] = 0.;
2621     PetscCall(VecSetValues(pcis->vec2_N, nz, idxs, vals, INSERT_VALUES));
2622     PetscCall(ISRestoreIndices(zerodiag, &idxs));
2623     PetscCall(ISGetIndices(pcis->is_B_local, &idxs));
2624     for (i = 0; i < pcis->n_B; i++) vals[i] = 0.;
2625     PetscCall(VecSetValues(pcis->vec2_N, pcis->n_B, idxs, vals, INSERT_VALUES));
2626     PetscCall(ISRestoreIndices(pcis->is_B_local, &idxs));
2627     if (dirIS) {
2628       PetscInt n;
2629 
2630       PetscCall(ISGetLocalSize(dirIS, &n));
2631       PetscCall(ISGetIndices(dirIS, &idxs));
2632       for (i = 0; i < n; i++) vals[i] = 0.;
2633       PetscCall(VecSetValues(pcis->vec2_N, n, idxs, vals, INSERT_VALUES));
2634       PetscCall(ISRestoreIndices(dirIS, &idxs));
2635     }
2636     PetscCall(VecAssemblyBegin(pcis->vec2_N));
2637     PetscCall(VecAssemblyEnd(pcis->vec2_N));
2638     PetscCall(VecDuplicate(pcis->vec1_N, &vec3_N));
2639     PetscCall(VecSet(vec3_N, 0.));
2640     PetscCall(MatISGetLocalMat(pc->pmat, &A));
2641     PetscCall(MatMult(A, pcis->vec1_N, vec3_N));
2642     PetscCall(VecDot(vec3_N, pcis->vec2_N, &vals[0]));
2643     PetscCheck(PetscAbsScalar(vals[0]) <= 1.e-1, PETSC_COMM_SELF, PETSC_ERR_SUP, "Benign trick can not be applied! b(v_I,p_0) = %1.6e (should be numerically 0.)", (double)PetscAbsScalar(vals[0]));
2644     PetscCall(PetscFree(vals));
2645     PetscCall(VecDestroy(&vec3_N));
2646 
2647     /* there should not be any pressure dofs lying on the interface */
2648     PetscCall(PetscCalloc1(pcis->n, &count));
2649     PetscCall(ISGetIndices(pcis->is_B_local, &idxs));
2650     for (i = 0; i < pcis->n_B; i++) count[idxs[i]]++;
2651     PetscCall(ISRestoreIndices(pcis->is_B_local, &idxs));
2652     PetscCall(ISGetIndices(zerodiag, &idxs));
2653     for (i = 0; i < nz; i++) PetscCheck(!count[idxs[i]], PETSC_COMM_SELF, PETSC_ERR_SUP, "Benign trick can not be applied! pressure dof %" PetscInt_FMT " is an interface dof", idxs[i]);
2654     PetscCall(ISRestoreIndices(zerodiag, &idxs));
2655     PetscCall(PetscFree(count));
2656   }
2657   PetscCall(ISDestroy(&dirIS));
2658 
2659   /* check PCBDDCBenignGetOrSetP0 */
2660   PetscCall(VecSetRandom(pcis->vec1_global, NULL));
2661   for (i = 0; i < pcbddc->benign_n; i++) pcbddc->benign_p0[i] = -PetscGlobalRank - i;
2662   PetscCall(PCBDDCBenignGetOrSetP0(pc, pcis->vec1_global, PETSC_FALSE));
2663   for (i = 0; i < pcbddc->benign_n; i++) pcbddc->benign_p0[i] = 1;
2664   PetscCall(PCBDDCBenignGetOrSetP0(pc, pcis->vec1_global, PETSC_TRUE));
2665   for (i = 0; i < pcbddc->benign_n; i++) {
2666     PetscInt val = PetscRealPart(pcbddc->benign_p0[i]);
2667     PetscCheck(val == -PetscGlobalRank - i, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error testing PCBDDCBenignGetOrSetP0! Found %g at %" PetscInt_FMT " instead of %g", (double)PetscRealPart(pcbddc->benign_p0[i]), i, (double)(-PetscGlobalRank - i));
2668   }
2669   PetscFunctionReturn(PETSC_SUCCESS);
2670 }
2671 
2672 PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, PetscBool reuse, IS *zerodiaglocal)
2673 {
2674   PC_BDDC  *pcbddc    = (PC_BDDC *)pc->data;
2675   Mat_IS   *matis     = (Mat_IS *)pc->pmat->data;
2676   IS        pressures = NULL, zerodiag = NULL, *bzerodiag = NULL, zerodiag_save, *zerodiag_subs;
2677   PetscInt  nz, n, benign_n, bsp = 1;
2678   PetscInt *interior_dofs, n_interior_dofs, nneu;
2679   PetscBool sorted, have_null, has_null_pressures, recompute_zerodiag, checkb;
2680 
2681   PetscFunctionBegin;
2682   if (reuse) goto project_b0;
2683   PetscCall(PetscSFDestroy(&pcbddc->benign_sf));
2684   PetscCall(MatDestroy(&pcbddc->benign_B0));
2685   for (n = 0; n < pcbddc->benign_n; n++) PetscCall(ISDestroy(&pcbddc->benign_zerodiag_subs[n]));
2686   PetscCall(PetscFree(pcbddc->benign_zerodiag_subs));
2687   has_null_pressures = PETSC_TRUE;
2688   have_null          = PETSC_TRUE;
2689   /* if a local information on dofs is present, gets pressure dofs from command line (uses the last field is not provided)
2690      Without local information, it uses only the zerodiagonal dofs (ok if the pressure block is all zero and it is a scalar field)
2691      Checks if all the pressure dofs in each subdomain have a zero diagonal
2692      If not, a change of basis on pressures is not needed
2693      since the local Schur complements are already SPD
2694   */
2695   if (pcbddc->n_ISForDofsLocal) {
2696     IS        iP = NULL;
2697     PetscInt  p, *pp;
2698     PetscBool flg, blocked = PETSC_FALSE;
2699 
2700     PetscCall(PetscMalloc1(pcbddc->n_ISForDofsLocal, &pp));
2701     n = pcbddc->n_ISForDofsLocal;
2702     PetscOptionsBegin(PetscObjectComm((PetscObject)pc), ((PetscObject)pc)->prefix, "BDDC benign options", "PC");
2703     PetscCall(PetscOptionsIntArray("-pc_bddc_pressure_field", "Field id for pressures", NULL, pp, &n, &flg));
2704     PetscCall(PetscOptionsBool("-pc_bddc_pressure_blocked", "Use blocked pressure fields", NULL, blocked, &blocked, NULL));
2705     PetscOptionsEnd();
2706     if (!flg) {
2707       n     = 1;
2708       pp[0] = pcbddc->n_ISForDofsLocal - 1;
2709     }
2710 
2711     bsp = 0;
2712     for (p = 0; p < n; p++) {
2713       PetscInt bs = 1;
2714 
2715       PetscCheck(pp[p] >= 0 && pp[p] < pcbddc->n_ISForDofsLocal, PetscObjectComm((PetscObject)pc), PETSC_ERR_USER, "Invalid field id for pressures %" PetscInt_FMT, pp[p]);
2716       if (blocked) PetscCall(ISGetBlockSize(pcbddc->ISForDofsLocal[pp[p]], &bs));
2717       bsp += bs;
2718     }
2719     PetscCall(PetscMalloc1(bsp, &bzerodiag));
2720     bsp = 0;
2721     for (p = 0; p < n; p++) {
2722       const PetscInt *idxs;
2723       PetscInt        b, bs = 1, npl, *bidxs;
2724 
2725       if (blocked) PetscCall(ISGetBlockSize(pcbddc->ISForDofsLocal[pp[p]], &bs));
2726       PetscCall(ISGetLocalSize(pcbddc->ISForDofsLocal[pp[p]], &npl));
2727       PetscCall(ISGetIndices(pcbddc->ISForDofsLocal[pp[p]], &idxs));
2728       PetscCall(PetscMalloc1(npl / bs, &bidxs));
2729       for (b = 0; b < bs; b++) {
2730         PetscInt i;
2731 
2732         for (i = 0; i < npl / bs; i++) bidxs[i] = idxs[bs * i + b];
2733         PetscCall(ISCreateGeneral(PETSC_COMM_SELF, npl / bs, bidxs, PETSC_COPY_VALUES, &bzerodiag[bsp]));
2734         bsp++;
2735       }
2736       PetscCall(PetscFree(bidxs));
2737       PetscCall(ISRestoreIndices(pcbddc->ISForDofsLocal[pp[p]], &idxs));
2738     }
2739     PetscCall(ISConcatenate(PETSC_COMM_SELF, bsp, bzerodiag, &pressures));
2740 
2741     /* remove zeroed out pressures if we are setting up a BDDC solver for a saddle-point FETI-DP */
2742     PetscCall(PetscObjectQuery((PetscObject)pc, "__KSPFETIDP_lP", (PetscObject *)&iP));
2743     if (iP) {
2744       IS newpressures;
2745 
2746       PetscCall(ISDifference(pressures, iP, &newpressures));
2747       PetscCall(ISDestroy(&pressures));
2748       pressures = newpressures;
2749     }
2750     PetscCall(ISSorted(pressures, &sorted));
2751     if (!sorted) PetscCall(ISSort(pressures));
2752     PetscCall(PetscFree(pp));
2753   }
2754 
2755   /* pcis has not been setup yet, so get the local size from the subdomain matrix */
2756   PetscCall(MatGetLocalSize(pcbddc->local_mat, &n, NULL));
2757   if (!n) pcbddc->benign_change_explicit = PETSC_TRUE;
2758   PetscCall(MatFindZeroDiagonals(pcbddc->local_mat, &zerodiag));
2759   PetscCall(ISSorted(zerodiag, &sorted));
2760   if (!sorted) PetscCall(ISSort(zerodiag));
2761   PetscCall(PetscObjectReference((PetscObject)zerodiag));
2762   zerodiag_save = zerodiag;
2763   PetscCall(ISGetLocalSize(zerodiag, &nz));
2764   if (!nz) {
2765     if (n) have_null = PETSC_FALSE;
2766     has_null_pressures = PETSC_FALSE;
2767     PetscCall(ISDestroy(&zerodiag));
2768   }
2769   recompute_zerodiag = PETSC_FALSE;
2770 
2771   /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */
2772   zerodiag_subs   = NULL;
2773   benign_n        = 0;
2774   n_interior_dofs = 0;
2775   interior_dofs   = NULL;
2776   nneu            = 0;
2777   if (pcbddc->NeumannBoundariesLocal) PetscCall(ISGetLocalSize(pcbddc->NeumannBoundariesLocal, &nneu));
2778   checkb = (PetscBool)(!pcbddc->NeumannBoundariesLocal || pcbddc->current_level);
2779   if (checkb) { /* need to compute interior nodes */
2780     PetscInt               n, i;
2781     PetscInt              *count;
2782     ISLocalToGlobalMapping mapping;
2783 
2784     PetscCall(MatISGetLocalToGlobalMapping(pc->pmat, &mapping, NULL));
2785     PetscCall(ISLocalToGlobalMappingGetNodeInfo(mapping, &n, &count, NULL));
2786     PetscCall(PetscMalloc1(n, &interior_dofs));
2787     for (i = 0; i < n; i++)
2788       if (count[i] < 2) interior_dofs[n_interior_dofs++] = i;
2789     PetscCall(ISLocalToGlobalMappingRestoreNodeInfo(mapping, &n, &count, NULL));
2790   }
2791   if (has_null_pressures) {
2792     IS             *subs;
2793     PetscInt        nsubs, i, j, nl;
2794     const PetscInt *idxs;
2795     PetscScalar    *array;
2796     Vec            *work;
2797 
2798     subs  = pcbddc->local_subs;
2799     nsubs = pcbddc->n_local_subs;
2800     /* these vectors are needed to check if the constant on pressures is in the kernel of the local operator B (i.e. B(v_I,p0) should be zero) */
2801     if (checkb) {
2802       PetscCall(VecDuplicateVecs(matis->y, 2, &work));
2803       PetscCall(ISGetLocalSize(zerodiag, &nl));
2804       PetscCall(ISGetIndices(zerodiag, &idxs));
2805       /* work[0] = 1_p */
2806       PetscCall(VecSet(work[0], 0.));
2807       PetscCall(VecGetArray(work[0], &array));
2808       for (j = 0; j < nl; j++) array[idxs[j]] = 1.;
2809       PetscCall(VecRestoreArray(work[0], &array));
2810       /* work[0] = 1_v */
2811       PetscCall(VecSet(work[1], 1.));
2812       PetscCall(VecGetArray(work[1], &array));
2813       for (j = 0; j < nl; j++) array[idxs[j]] = 0.;
2814       PetscCall(VecRestoreArray(work[1], &array));
2815       PetscCall(ISRestoreIndices(zerodiag, &idxs));
2816     }
2817 
2818     if (nsubs > 1 || bsp > 1) {
2819       IS      *is;
2820       PetscInt b, totb;
2821 
2822       totb  = bsp;
2823       is    = bsp > 1 ? bzerodiag : &zerodiag;
2824       nsubs = PetscMax(nsubs, 1);
2825       PetscCall(PetscCalloc1(nsubs * totb, &zerodiag_subs));
2826       for (b = 0; b < totb; b++) {
2827         for (i = 0; i < nsubs; i++) {
2828           ISLocalToGlobalMapping l2g;
2829           IS                     t_zerodiag_subs;
2830           PetscInt               nl;
2831 
2832           if (subs) {
2833             PetscCall(ISLocalToGlobalMappingCreateIS(subs[i], &l2g));
2834           } else {
2835             IS tis;
2836 
2837             PetscCall(MatGetLocalSize(pcbddc->local_mat, &nl, NULL));
2838             PetscCall(ISCreateStride(PETSC_COMM_SELF, nl, 0, 1, &tis));
2839             PetscCall(ISLocalToGlobalMappingCreateIS(tis, &l2g));
2840             PetscCall(ISDestroy(&tis));
2841           }
2842           PetscCall(ISGlobalToLocalMappingApplyIS(l2g, IS_GTOLM_DROP, is[b], &t_zerodiag_subs));
2843           PetscCall(ISGetLocalSize(t_zerodiag_subs, &nl));
2844           if (nl) {
2845             PetscBool valid = PETSC_TRUE;
2846 
2847             if (checkb) {
2848               PetscCall(VecSet(matis->x, 0));
2849               PetscCall(ISGetLocalSize(subs[i], &nl));
2850               PetscCall(ISGetIndices(subs[i], &idxs));
2851               PetscCall(VecGetArray(matis->x, &array));
2852               for (j = 0; j < nl; j++) array[idxs[j]] = 1.;
2853               PetscCall(VecRestoreArray(matis->x, &array));
2854               PetscCall(ISRestoreIndices(subs[i], &idxs));
2855               PetscCall(VecPointwiseMult(matis->x, work[0], matis->x));
2856               PetscCall(MatMult(matis->A, matis->x, matis->y));
2857               PetscCall(VecPointwiseMult(matis->y, work[1], matis->y));
2858               PetscCall(VecGetArray(matis->y, &array));
2859               for (j = 0; j < n_interior_dofs; j++) {
2860                 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) {
2861                   valid = PETSC_FALSE;
2862                   break;
2863                 }
2864               }
2865               PetscCall(VecRestoreArray(matis->y, &array));
2866             }
2867             if (valid && nneu) {
2868               const PetscInt *idxs;
2869               PetscInt        nzb;
2870 
2871               PetscCall(ISGetIndices(pcbddc->NeumannBoundariesLocal, &idxs));
2872               PetscCall(ISGlobalToLocalMappingApply(l2g, IS_GTOLM_DROP, nneu, idxs, &nzb, NULL));
2873               PetscCall(ISRestoreIndices(pcbddc->NeumannBoundariesLocal, &idxs));
2874               if (nzb) valid = PETSC_FALSE;
2875             }
2876             if (valid && pressures) {
2877               IS       t_pressure_subs, tmp;
2878               PetscInt i1, i2;
2879 
2880               PetscCall(ISGlobalToLocalMappingApplyIS(l2g, IS_GTOLM_DROP, pressures, &t_pressure_subs));
2881               PetscCall(ISEmbed(t_zerodiag_subs, t_pressure_subs, PETSC_TRUE, &tmp));
2882               PetscCall(ISGetLocalSize(tmp, &i1));
2883               PetscCall(ISGetLocalSize(t_zerodiag_subs, &i2));
2884               if (i2 != i1) valid = PETSC_FALSE;
2885               PetscCall(ISDestroy(&t_pressure_subs));
2886               PetscCall(ISDestroy(&tmp));
2887             }
2888             if (valid) {
2889               PetscCall(ISLocalToGlobalMappingApplyIS(l2g, t_zerodiag_subs, &zerodiag_subs[benign_n]));
2890               benign_n++;
2891             } else recompute_zerodiag = PETSC_TRUE;
2892           }
2893           PetscCall(ISDestroy(&t_zerodiag_subs));
2894           PetscCall(ISLocalToGlobalMappingDestroy(&l2g));
2895         }
2896       }
2897     } else { /* there's just one subdomain (or zero if they have not been detected */
2898       PetscBool valid = PETSC_TRUE;
2899 
2900       if (nneu) valid = PETSC_FALSE;
2901       if (valid && pressures) PetscCall(ISEqual(pressures, zerodiag, &valid));
2902       if (valid && checkb) {
2903         PetscCall(MatMult(matis->A, work[0], matis->x));
2904         PetscCall(VecPointwiseMult(matis->x, work[1], matis->x));
2905         PetscCall(VecGetArray(matis->x, &array));
2906         for (j = 0; j < n_interior_dofs; j++) {
2907           if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) {
2908             valid = PETSC_FALSE;
2909             break;
2910           }
2911         }
2912         PetscCall(VecRestoreArray(matis->x, &array));
2913       }
2914       if (valid) {
2915         benign_n = 1;
2916         PetscCall(PetscMalloc1(benign_n, &zerodiag_subs));
2917         PetscCall(PetscObjectReference((PetscObject)zerodiag));
2918         zerodiag_subs[0] = zerodiag;
2919       }
2920     }
2921     if (checkb) PetscCall(VecDestroyVecs(2, &work));
2922   }
2923   PetscCall(PetscFree(interior_dofs));
2924 
2925   if (!benign_n) {
2926     PetscInt n;
2927 
2928     PetscCall(ISDestroy(&zerodiag));
2929     recompute_zerodiag = PETSC_FALSE;
2930     PetscCall(MatGetLocalSize(pcbddc->local_mat, &n, NULL));
2931     if (n) have_null = PETSC_FALSE;
2932   }
2933 
2934   /* final check for null pressures */
2935   if (zerodiag && pressures) PetscCall(ISEqual(pressures, zerodiag, &have_null));
2936 
2937   if (recompute_zerodiag) {
2938     PetscCall(ISDestroy(&zerodiag));
2939     if (benign_n == 1) {
2940       PetscCall(PetscObjectReference((PetscObject)zerodiag_subs[0]));
2941       zerodiag = zerodiag_subs[0];
2942     } else {
2943       PetscInt i, nzn, *new_idxs;
2944 
2945       nzn = 0;
2946       for (i = 0; i < benign_n; i++) {
2947         PetscInt ns;
2948         PetscCall(ISGetLocalSize(zerodiag_subs[i], &ns));
2949         nzn += ns;
2950       }
2951       PetscCall(PetscMalloc1(nzn, &new_idxs));
2952       nzn = 0;
2953       for (i = 0; i < benign_n; i++) {
2954         PetscInt ns, *idxs;
2955         PetscCall(ISGetLocalSize(zerodiag_subs[i], &ns));
2956         PetscCall(ISGetIndices(zerodiag_subs[i], (const PetscInt **)&idxs));
2957         PetscCall(PetscArraycpy(new_idxs + nzn, idxs, ns));
2958         PetscCall(ISRestoreIndices(zerodiag_subs[i], (const PetscInt **)&idxs));
2959         nzn += ns;
2960       }
2961       PetscCall(PetscSortInt(nzn, new_idxs));
2962       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nzn, new_idxs, PETSC_OWN_POINTER, &zerodiag));
2963     }
2964     have_null = PETSC_FALSE;
2965   }
2966 
2967   /* determines if the coarse solver will be singular or not */
2968   PetscCallMPI(MPIU_Allreduce(&have_null, &pcbddc->benign_null, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)pc)));
2969 
2970   /* Prepare matrix to compute no-net-flux */
2971   if (pcbddc->compute_nonetflux && !pcbddc->divudotp) {
2972     Mat                    A, loc_divudotp;
2973     ISLocalToGlobalMapping rl2g, cl2g, l2gmap;
2974     IS                     row, col, isused = NULL;
2975     PetscInt               M, N, n, st, n_isused;
2976 
2977     if (pressures) {
2978       isused = pressures;
2979     } else {
2980       isused = zerodiag_save;
2981     }
2982     PetscCall(MatISGetLocalToGlobalMapping(pc->pmat, &l2gmap, NULL));
2983     PetscCall(MatISGetLocalMat(pc->pmat, &A));
2984     PetscCall(MatGetLocalSize(A, &n, NULL));
2985     PetscCheck(isused || (n == 0), PETSC_COMM_SELF, PETSC_ERR_USER, "Don't know how to extract div u dot p! Please provide the pressure field");
2986     n_isused = 0;
2987     if (isused) PetscCall(ISGetLocalSize(isused, &n_isused));
2988     PetscCallMPI(MPI_Scan(&n_isused, &st, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)pc)));
2989     st = st - n_isused;
2990     if (n) {
2991       const PetscInt *gidxs;
2992 
2993       PetscCall(MatCreateSubMatrix(A, isused, NULL, MAT_INITIAL_MATRIX, &loc_divudotp));
2994       PetscCall(ISLocalToGlobalMappingGetIndices(l2gmap, &gidxs));
2995       /* TODO: extend ISCreateStride with st = PETSC_DECIDE */
2996       PetscCall(ISCreateStride(PetscObjectComm((PetscObject)pc), n_isused, st, 1, &row));
2997       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), n, gidxs, PETSC_COPY_VALUES, &col));
2998       PetscCall(ISLocalToGlobalMappingRestoreIndices(l2gmap, &gidxs));
2999     } else {
3000       PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, 0, 0, 1, NULL, &loc_divudotp));
3001       PetscCall(ISCreateStride(PetscObjectComm((PetscObject)pc), n_isused, st, 1, &row));
3002       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), 0, NULL, PETSC_COPY_VALUES, &col));
3003     }
3004     PetscCall(MatGetSize(pc->pmat, NULL, &N));
3005     PetscCall(ISGetSize(row, &M));
3006     PetscCall(ISLocalToGlobalMappingCreateIS(row, &rl2g));
3007     PetscCall(ISLocalToGlobalMappingCreateIS(col, &cl2g));
3008     PetscCall(ISDestroy(&row));
3009     PetscCall(ISDestroy(&col));
3010     PetscCall(MatCreate(PetscObjectComm((PetscObject)pc), &pcbddc->divudotp));
3011     PetscCall(MatSetType(pcbddc->divudotp, MATIS));
3012     PetscCall(MatSetSizes(pcbddc->divudotp, PETSC_DECIDE, PETSC_DECIDE, M, N));
3013     PetscCall(MatSetLocalToGlobalMapping(pcbddc->divudotp, rl2g, cl2g));
3014     PetscCall(ISLocalToGlobalMappingDestroy(&rl2g));
3015     PetscCall(ISLocalToGlobalMappingDestroy(&cl2g));
3016     PetscCall(MatISSetLocalMat(pcbddc->divudotp, loc_divudotp));
3017     PetscCall(MatDestroy(&loc_divudotp));
3018     PetscCall(MatAssemblyBegin(pcbddc->divudotp, MAT_FINAL_ASSEMBLY));
3019     PetscCall(MatAssemblyEnd(pcbddc->divudotp, MAT_FINAL_ASSEMBLY));
3020   }
3021   PetscCall(ISDestroy(&zerodiag_save));
3022   PetscCall(ISDestroy(&pressures));
3023   if (bzerodiag) {
3024     PetscInt i;
3025 
3026     for (i = 0; i < bsp; i++) PetscCall(ISDestroy(&bzerodiag[i]));
3027     PetscCall(PetscFree(bzerodiag));
3028   }
3029   pcbddc->benign_n             = benign_n;
3030   pcbddc->benign_zerodiag_subs = zerodiag_subs;
3031 
3032   /* determines if the problem has subdomains with 0 pressure block */
3033   have_null = (PetscBool)(!!pcbddc->benign_n);
3034   PetscCallMPI(MPIU_Allreduce(&have_null, &pcbddc->benign_have_null, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)pc)));
3035 
3036 project_b0:
3037   PetscCall(MatGetLocalSize(pcbddc->local_mat, &n, NULL));
3038   /* change of basis and p0 dofs */
3039   if (pcbddc->benign_n) {
3040     PetscInt i, s, *nnz;
3041 
3042     /* local change of basis for pressures */
3043     PetscCall(MatDestroy(&pcbddc->benign_change));
3044     PetscCall(MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat), &pcbddc->benign_change));
3045     PetscCall(MatSetType(pcbddc->benign_change, MATAIJ));
3046     PetscCall(MatSetSizes(pcbddc->benign_change, n, n, PETSC_DECIDE, PETSC_DECIDE));
3047     PetscCall(PetscMalloc1(n, &nnz));
3048     for (i = 0; i < n; i++) nnz[i] = 1; /* defaults to identity */
3049     for (i = 0; i < pcbddc->benign_n; i++) {
3050       const PetscInt *idxs;
3051       PetscInt        nzs, j;
3052 
3053       PetscCall(ISGetLocalSize(pcbddc->benign_zerodiag_subs[i], &nzs));
3054       PetscCall(ISGetIndices(pcbddc->benign_zerodiag_subs[i], &idxs));
3055       for (j = 0; j < nzs - 1; j++) nnz[idxs[j]] = 2; /* change on pressures */
3056       nnz[idxs[nzs - 1]] = nzs;                       /* last local pressure dof in subdomain */
3057       PetscCall(ISRestoreIndices(pcbddc->benign_zerodiag_subs[i], &idxs));
3058     }
3059     PetscCall(MatSeqAIJSetPreallocation(pcbddc->benign_change, 0, nnz));
3060     PetscCall(MatSetOption(pcbddc->benign_change, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE));
3061     PetscCall(PetscFree(nnz));
3062     /* set identity by default */
3063     for (i = 0; i < n; i++) PetscCall(MatSetValue(pcbddc->benign_change, i, i, 1., INSERT_VALUES));
3064     PetscCall(PetscFree3(pcbddc->benign_p0_lidx, pcbddc->benign_p0_gidx, pcbddc->benign_p0));
3065     PetscCall(PetscMalloc3(pcbddc->benign_n, &pcbddc->benign_p0_lidx, pcbddc->benign_n, &pcbddc->benign_p0_gidx, pcbddc->benign_n, &pcbddc->benign_p0));
3066     /* set change on pressures */
3067     for (s = 0; s < pcbddc->benign_n; s++) {
3068       PetscScalar    *array;
3069       const PetscInt *idxs;
3070       PetscInt        nzs;
3071 
3072       PetscCall(ISGetLocalSize(pcbddc->benign_zerodiag_subs[s], &nzs));
3073       PetscCall(ISGetIndices(pcbddc->benign_zerodiag_subs[s], &idxs));
3074       for (i = 0; i < nzs - 1; i++) {
3075         PetscScalar vals[2];
3076         PetscInt    cols[2];
3077 
3078         cols[0] = idxs[i];
3079         cols[1] = idxs[nzs - 1];
3080         vals[0] = 1.;
3081         vals[1] = 1.;
3082         PetscCall(MatSetValues(pcbddc->benign_change, 1, cols, 2, cols, vals, INSERT_VALUES));
3083       }
3084       PetscCall(PetscMalloc1(nzs, &array));
3085       for (i = 0; i < nzs - 1; i++) array[i] = -1.;
3086       array[nzs - 1] = 1.;
3087       PetscCall(MatSetValues(pcbddc->benign_change, 1, idxs + nzs - 1, nzs, idxs, array, INSERT_VALUES));
3088       /* store local idxs for p0 */
3089       pcbddc->benign_p0_lidx[s] = idxs[nzs - 1];
3090       PetscCall(ISRestoreIndices(pcbddc->benign_zerodiag_subs[s], &idxs));
3091       PetscCall(PetscFree(array));
3092     }
3093     PetscCall(MatAssemblyBegin(pcbddc->benign_change, MAT_FINAL_ASSEMBLY));
3094     PetscCall(MatAssemblyEnd(pcbddc->benign_change, MAT_FINAL_ASSEMBLY));
3095 
3096     /* project if needed */
3097     if (pcbddc->benign_change_explicit) {
3098       Mat M;
3099 
3100       PetscCall(MatPtAP(pcbddc->local_mat, pcbddc->benign_change, MAT_INITIAL_MATRIX, 2.0, &M));
3101       PetscCall(MatDestroy(&pcbddc->local_mat));
3102       PetscCall(MatSeqAIJCompress(M, &pcbddc->local_mat));
3103       PetscCall(MatDestroy(&M));
3104     }
3105     /* store global idxs for p0 */
3106     PetscCall(ISLocalToGlobalMappingApply(matis->rmapping, pcbddc->benign_n, pcbddc->benign_p0_lidx, pcbddc->benign_p0_gidx));
3107   }
3108   *zerodiaglocal = zerodiag;
3109   PetscFunctionReturn(PETSC_SUCCESS);
3110 }
3111 
3112 PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get)
3113 {
3114   PC_BDDC     *pcbddc = (PC_BDDC *)pc->data;
3115   PetscScalar *array;
3116 
3117   PetscFunctionBegin;
3118   if (!pcbddc->benign_sf) {
3119     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)pc), &pcbddc->benign_sf));
3120     PetscCall(PetscSFSetGraphLayout(pcbddc->benign_sf, pc->pmat->rmap, pcbddc->benign_n, NULL, PETSC_OWN_POINTER, pcbddc->benign_p0_gidx));
3121   }
3122   if (get) {
3123     PetscCall(VecGetArrayRead(v, (const PetscScalar **)&array));
3124     PetscCall(PetscSFBcastBegin(pcbddc->benign_sf, MPIU_SCALAR, array, pcbddc->benign_p0, MPI_REPLACE));
3125     PetscCall(PetscSFBcastEnd(pcbddc->benign_sf, MPIU_SCALAR, array, pcbddc->benign_p0, MPI_REPLACE));
3126     PetscCall(VecRestoreArrayRead(v, (const PetscScalar **)&array));
3127   } else {
3128     PetscCall(VecGetArray(v, &array));
3129     PetscCall(PetscSFReduceBegin(pcbddc->benign_sf, MPIU_SCALAR, pcbddc->benign_p0, array, MPI_REPLACE));
3130     PetscCall(PetscSFReduceEnd(pcbddc->benign_sf, MPIU_SCALAR, pcbddc->benign_p0, array, MPI_REPLACE));
3131     PetscCall(VecRestoreArray(v, &array));
3132   }
3133   PetscFunctionReturn(PETSC_SUCCESS);
3134 }
3135 
3136 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop)
3137 {
3138   PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
3139 
3140   PetscFunctionBegin;
3141   /* TODO: add error checking
3142     - avoid nested pop (or push) calls.
3143     - cannot push before pop.
3144     - cannot call this if pcbddc->local_mat is NULL
3145   */
3146   if (!pcbddc->benign_n) PetscFunctionReturn(PETSC_SUCCESS);
3147   if (pop) {
3148     if (pcbddc->benign_change_explicit) {
3149       IS       is_p0;
3150       MatReuse reuse;
3151 
3152       /* extract B_0 */
3153       reuse = MAT_INITIAL_MATRIX;
3154       if (pcbddc->benign_B0) reuse = MAT_REUSE_MATRIX;
3155       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, pcbddc->benign_n, pcbddc->benign_p0_lidx, PETSC_COPY_VALUES, &is_p0));
3156       PetscCall(MatCreateSubMatrix(pcbddc->local_mat, is_p0, NULL, reuse, &pcbddc->benign_B0));
3157       /* remove rows and cols from local problem */
3158       PetscCall(MatSetOption(pcbddc->local_mat, MAT_KEEP_NONZERO_PATTERN, PETSC_TRUE));
3159       PetscCall(MatSetOption(pcbddc->local_mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE));
3160       PetscCall(MatZeroRowsColumnsIS(pcbddc->local_mat, is_p0, 1.0, NULL, NULL));
3161       PetscCall(ISDestroy(&is_p0));
3162     } else {
3163       Mat_IS      *matis = (Mat_IS *)pc->pmat->data;
3164       PetscScalar *vals;
3165       PetscInt     i, n, *idxs_ins;
3166 
3167       PetscCall(VecGetLocalSize(matis->y, &n));
3168       PetscCall(PetscMalloc2(n, &idxs_ins, n, &vals));
3169       if (!pcbddc->benign_B0) {
3170         PetscInt *nnz;
3171         PetscCall(MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat), &pcbddc->benign_B0));
3172         PetscCall(MatSetType(pcbddc->benign_B0, MATAIJ));
3173         PetscCall(MatSetSizes(pcbddc->benign_B0, pcbddc->benign_n, n, PETSC_DECIDE, PETSC_DECIDE));
3174         PetscCall(PetscMalloc1(pcbddc->benign_n, &nnz));
3175         for (i = 0; i < pcbddc->benign_n; i++) {
3176           PetscCall(ISGetLocalSize(pcbddc->benign_zerodiag_subs[i], &nnz[i]));
3177           nnz[i] = n - nnz[i];
3178         }
3179         PetscCall(MatSeqAIJSetPreallocation(pcbddc->benign_B0, 0, nnz));
3180         PetscCall(MatSetOption(pcbddc->benign_B0, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE));
3181         PetscCall(PetscFree(nnz));
3182       }
3183 
3184       for (i = 0; i < pcbddc->benign_n; i++) {
3185         PetscScalar *array;
3186         PetscInt    *idxs, j, nz, cum;
3187 
3188         PetscCall(VecSet(matis->x, 0.));
3189         PetscCall(ISGetLocalSize(pcbddc->benign_zerodiag_subs[i], &nz));
3190         PetscCall(ISGetIndices(pcbddc->benign_zerodiag_subs[i], (const PetscInt **)&idxs));
3191         for (j = 0; j < nz; j++) vals[j] = 1.;
3192         PetscCall(VecSetValues(matis->x, nz, idxs, vals, INSERT_VALUES));
3193         PetscCall(VecAssemblyBegin(matis->x));
3194         PetscCall(VecAssemblyEnd(matis->x));
3195         PetscCall(VecSet(matis->y, 0.));
3196         PetscCall(MatMult(matis->A, matis->x, matis->y));
3197         PetscCall(VecGetArray(matis->y, &array));
3198         cum = 0;
3199         for (j = 0; j < n; j++) {
3200           if (PetscUnlikely(PetscAbsScalar(array[j]) > PETSC_SMALL)) {
3201             vals[cum]     = array[j];
3202             idxs_ins[cum] = j;
3203             cum++;
3204           }
3205         }
3206         PetscCall(MatSetValues(pcbddc->benign_B0, 1, &i, cum, idxs_ins, vals, INSERT_VALUES));
3207         PetscCall(VecRestoreArray(matis->y, &array));
3208         PetscCall(ISRestoreIndices(pcbddc->benign_zerodiag_subs[i], (const PetscInt **)&idxs));
3209       }
3210       PetscCall(MatAssemblyBegin(pcbddc->benign_B0, MAT_FINAL_ASSEMBLY));
3211       PetscCall(MatAssemblyEnd(pcbddc->benign_B0, MAT_FINAL_ASSEMBLY));
3212       PetscCall(PetscFree2(idxs_ins, vals));
3213     }
3214   } else { /* push */
3215 
3216     PetscCheck(pcbddc->benign_change_explicit, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot push B0!");
3217     for (PetscInt i = 0; i < pcbddc->benign_n; i++) {
3218       PetscScalar *B0_vals;
3219       PetscInt    *B0_cols, B0_ncol;
3220 
3221       PetscCall(MatGetRow(pcbddc->benign_B0, i, &B0_ncol, (const PetscInt **)&B0_cols, (const PetscScalar **)&B0_vals));
3222       PetscCall(MatSetValues(pcbddc->local_mat, 1, pcbddc->benign_p0_lidx + i, B0_ncol, B0_cols, B0_vals, INSERT_VALUES));
3223       PetscCall(MatSetValues(pcbddc->local_mat, B0_ncol, B0_cols, 1, pcbddc->benign_p0_lidx + i, B0_vals, INSERT_VALUES));
3224       PetscCall(MatSetValue(pcbddc->local_mat, pcbddc->benign_p0_lidx[i], pcbddc->benign_p0_lidx[i], 0.0, INSERT_VALUES));
3225       PetscCall(MatRestoreRow(pcbddc->benign_B0, i, &B0_ncol, (const PetscInt **)&B0_cols, (const PetscScalar **)&B0_vals));
3226     }
3227     PetscCall(MatAssemblyBegin(pcbddc->local_mat, MAT_FINAL_ASSEMBLY));
3228     PetscCall(MatAssemblyEnd(pcbddc->local_mat, MAT_FINAL_ASSEMBLY));
3229   }
3230   PetscFunctionReturn(PETSC_SUCCESS);
3231 }
3232 
3233 PetscErrorCode PCBDDCAdaptiveSelection(PC pc)
3234 {
3235   PC_BDDC        *pcbddc     = (PC_BDDC *)pc->data;
3236   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
3237   PetscBLASInt    B_neigs, B_ierr, B_lwork;
3238   PetscBLASInt   *B_iwork, *B_ifail;
3239   PetscScalar    *work, lwork;
3240   PetscScalar    *St, *S, *eigv;
3241   PetscScalar    *Sarray, *Starray;
3242   PetscReal      *eigs, thresh, lthresh, uthresh;
3243   PetscInt        i, nmax, nmin, nv, cum, mss, cum2, cumarray, maxneigs;
3244   PetscBool       allocated_S_St, upart;
3245 #if defined(PETSC_USE_COMPLEX)
3246   PetscReal *rwork;
3247 #endif
3248 
3249   PetscFunctionBegin;
3250   if (!pcbddc->adaptive_selection) PetscFunctionReturn(PETSC_SUCCESS);
3251   PetscCheck(sub_schurs, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Adaptive selection of constraints requires SubSchurs data");
3252   PetscCheck(sub_schurs->schur_explicit || !sub_schurs->n_subs, PetscObjectComm((PetscObject)pc), PETSC_ERR_SUP, "Adaptive selection of constraints requires MUMPS and/or MKL_CPARDISO");
3253   PetscCheck(!sub_schurs->n_subs || sub_schurs->is_symmetric, PETSC_COMM_SELF, PETSC_ERR_SUP, "Adaptive selection not yet implemented for this matrix pencil (herm %d, symm %d, posdef %d)", sub_schurs->is_hermitian, sub_schurs->is_symmetric,
3254              sub_schurs->is_posdef);
3255   PetscCall(PetscLogEventBegin(PC_BDDC_AdaptiveSetUp[pcbddc->current_level], pc, 0, 0, 0));
3256 
3257   if (pcbddc->dbg_flag) {
3258     if (!pcbddc->dbg_viewer) pcbddc->dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pc));
3259     PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
3260     PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n"));
3261     PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Check adaptive selection of constraints\n"));
3262     PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer));
3263   }
3264 
3265   if (pcbddc->dbg_flag) PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d cc %" PetscInt_FMT " (%d,%d).\n", PetscGlobalRank, sub_schurs->n_subs, sub_schurs->is_hermitian, sub_schurs->is_posdef));
3266 
3267   /* max size of subsets */
3268   mss = 0;
3269   for (i = 0; i < sub_schurs->n_subs; i++) {
3270     PetscInt subset_size;
3271 
3272     PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size));
3273     mss = PetscMax(mss, subset_size);
3274   }
3275 
3276   /* min/max and threshold */
3277   nmax           = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss;
3278   nmin           = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0;
3279   nmax           = PetscMax(nmin, nmax);
3280   allocated_S_St = PETSC_FALSE;
3281   if (nmin || !sub_schurs->is_posdef) { /* XXX */
3282     allocated_S_St = PETSC_TRUE;
3283   }
3284 
3285   /* allocate lapack workspace */
3286   cum = cum2 = 0;
3287   maxneigs   = 0;
3288   for (i = 0; i < sub_schurs->n_subs; i++) {
3289     PetscInt n, subset_size;
3290 
3291     PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size));
3292     n = PetscMin(subset_size, nmax);
3293     cum += subset_size;
3294     cum2 += subset_size * n;
3295     maxneigs = PetscMax(maxneigs, n);
3296   }
3297   lwork = 0;
3298   if (mss) {
3299     PetscScalar  sdummy  = 0.;
3300     PetscBLASInt B_itype = 1;
3301     PetscBLASInt B_N, idummy = 0;
3302     PetscReal    rdummy = 0., zero = 0.0;
3303     PetscReal    eps = 0.0; /* dlamch? */
3304 
3305     PetscCheck(sub_schurs->is_symmetric, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not yet implemented");
3306     PetscCall(PetscBLASIntCast(mss, &B_N));
3307     B_lwork = -1;
3308     /* some implementations may complain about NULL pointers, even if we are querying */
3309     S       = &sdummy;
3310     St      = &sdummy;
3311     eigs    = &rdummy;
3312     eigv    = &sdummy;
3313     B_iwork = &idummy;
3314     B_ifail = &idummy;
3315 #if defined(PETSC_USE_COMPLEX)
3316     rwork = &rdummy;
3317 #endif
3318     thresh = 1.0;
3319     PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
3320 #if defined(PETSC_USE_COMPLEX)
3321     PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &zero, &thresh, B_iwork, B_iwork, &eps, &B_neigs, eigs, eigv, &B_N, &lwork, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3322 #else
3323     PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &zero, &thresh, B_iwork, B_iwork, &eps, &B_neigs, eigs, eigv, &B_N, &lwork, &B_lwork, B_iwork, B_ifail, &B_ierr));
3324 #endif
3325     PetscCheck(B_ierr == 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in query to SYGVX Lapack routine %" PetscBLASInt_FMT, B_ierr);
3326     PetscCall(PetscFPTrapPop());
3327   }
3328 
3329   nv = 0;
3330   if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */
3331     PetscCall(ISGetLocalSize(sub_schurs->is_vertices, &nv));
3332   }
3333   PetscCall(PetscBLASIntCast((PetscInt)PetscRealPart(lwork), &B_lwork));
3334   if (allocated_S_St) PetscCall(PetscMalloc2(mss * mss, &S, mss * mss, &St));
3335   PetscCall(PetscMalloc5(mss * mss, &eigv, mss, &eigs, B_lwork, &work, 5 * mss, &B_iwork, mss, &B_ifail));
3336 #if defined(PETSC_USE_COMPLEX)
3337   PetscCall(PetscMalloc1(7 * mss, &rwork));
3338 #endif
3339   PetscCall(PetscMalloc5(nv + sub_schurs->n_subs, &pcbddc->adaptive_constraints_n, nv + sub_schurs->n_subs + 1, &pcbddc->adaptive_constraints_idxs_ptr, nv + sub_schurs->n_subs + 1, &pcbddc->adaptive_constraints_data_ptr, nv + cum, &pcbddc->adaptive_constraints_idxs, nv + cum2,
3340                          &pcbddc->adaptive_constraints_data));
3341   PetscCall(PetscArrayzero(pcbddc->adaptive_constraints_n, nv + sub_schurs->n_subs));
3342 
3343   maxneigs = 0;
3344   cum = cumarray                           = 0;
3345   pcbddc->adaptive_constraints_idxs_ptr[0] = 0;
3346   pcbddc->adaptive_constraints_data_ptr[0] = 0;
3347   if (sub_schurs->is_vertices && pcbddc->use_vertices) {
3348     const PetscInt *idxs;
3349 
3350     PetscCall(ISGetIndices(sub_schurs->is_vertices, &idxs));
3351     for (cum = 0; cum < nv; cum++) {
3352       pcbddc->adaptive_constraints_n[cum]            = 1;
3353       pcbddc->adaptive_constraints_idxs[cum]         = idxs[cum];
3354       pcbddc->adaptive_constraints_data[cum]         = 1.0;
3355       pcbddc->adaptive_constraints_idxs_ptr[cum + 1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + 1;
3356       pcbddc->adaptive_constraints_data_ptr[cum + 1] = pcbddc->adaptive_constraints_data_ptr[cum] + 1;
3357     }
3358     PetscCall(ISRestoreIndices(sub_schurs->is_vertices, &idxs));
3359   }
3360 
3361   if (mss) { /* multilevel */
3362     if (sub_schurs->gdsw) {
3363       PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_all, &Sarray));
3364       PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all, &Starray));
3365     } else {
3366       PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all, &Sarray));
3367       PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all, &Starray));
3368     }
3369   }
3370 
3371   lthresh = pcbddc->adaptive_threshold[0];
3372   uthresh = pcbddc->adaptive_threshold[1];
3373   upart   = pcbddc->use_deluxe_scaling;
3374   for (i = 0; i < sub_schurs->n_subs; i++) {
3375     const PetscInt *idxs;
3376     PetscReal       upper, lower;
3377     PetscInt        j, subset_size, eigs_start = 0;
3378     PetscBLASInt    B_N;
3379     PetscBool       same_data = PETSC_FALSE;
3380     PetscBool       scal      = PETSC_FALSE;
3381 
3382     if (upart) {
3383       upper = PETSC_MAX_REAL;
3384       lower = uthresh;
3385     } else {
3386       if (sub_schurs->gdsw) {
3387         upper = uthresh;
3388         lower = PETSC_MIN_REAL;
3389       } else {
3390         PetscCheck(sub_schurs->is_posdef, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not yet implemented without deluxe scaling");
3391         upper = 1. / uthresh;
3392         lower = 0.;
3393       }
3394     }
3395     PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size));
3396     PetscCall(ISGetIndices(sub_schurs->is_subs[i], &idxs));
3397     PetscCall(PetscBLASIntCast(subset_size, &B_N));
3398     /* this is experimental: we assume the dofs have been properly grouped to have
3399        the diagonal blocks Schur complements either positive or negative definite (true for Stokes) */
3400     if (!sub_schurs->is_posdef) {
3401       Mat T;
3402 
3403       for (j = 0; j < subset_size; j++) {
3404         if (PetscRealPart(*(Sarray + cumarray + j * (subset_size + 1))) < 0.0) {
3405           PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, subset_size, Sarray + cumarray, &T));
3406           PetscCall(MatScale(T, -1.0));
3407           PetscCall(MatDestroy(&T));
3408           PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, subset_size, Starray + cumarray, &T));
3409           PetscCall(MatScale(T, -1.0));
3410           PetscCall(MatDestroy(&T));
3411           if (sub_schurs->change_primal_sub) {
3412             PetscInt        nz, k;
3413             const PetscInt *idxs;
3414 
3415             PetscCall(ISGetLocalSize(sub_schurs->change_primal_sub[i], &nz));
3416             PetscCall(ISGetIndices(sub_schurs->change_primal_sub[i], &idxs));
3417             for (k = 0; k < nz; k++) {
3418               *(Sarray + cumarray + idxs[k] * (subset_size + 1)) *= -1.0;
3419               *(Starray + cumarray + idxs[k] * (subset_size + 1)) = 0.0;
3420             }
3421             PetscCall(ISRestoreIndices(sub_schurs->change_primal_sub[i], &idxs));
3422           }
3423           scal = PETSC_TRUE;
3424           break;
3425         }
3426       }
3427     }
3428 
3429     if (allocated_S_St) { /* S and S_t should be copied since we could need them later */
3430       if (sub_schurs->is_symmetric) {
3431         PetscInt j, k;
3432         if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscArraycmp() later */
3433           PetscCall(PetscArrayzero(S, subset_size * subset_size));
3434           PetscCall(PetscArrayzero(St, subset_size * subset_size));
3435         }
3436         for (j = 0; j < subset_size; j++) {
3437           for (k = j; k < subset_size; k++) {
3438             S[j * subset_size + k]  = Sarray[cumarray + j * subset_size + k];
3439             St[j * subset_size + k] = Starray[cumarray + j * subset_size + k];
3440           }
3441         }
3442       } else {
3443         PetscCall(PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size));
3444         PetscCall(PetscArraycpy(St, Starray + cumarray, subset_size * subset_size));
3445       }
3446     } else {
3447       S  = Sarray + cumarray;
3448       St = Starray + cumarray;
3449     }
3450     /* see if we can save some work */
3451     if (sub_schurs->n_subs == 1 && pcbddc->use_deluxe_scaling) PetscCall(PetscArraycmp(S, St, subset_size * subset_size, &same_data));
3452 
3453     if (same_data && !sub_schurs->change) { /* there's no need of constraints here */
3454       B_neigs = 0;
3455     } else {
3456       PetscBLASInt B_itype = 1, B_IL = 1, B_IU = 0;
3457       PetscReal    eps = -1.0; /* dlamch? */
3458       PetscInt     nmin_s;
3459       PetscBool    compute_range;
3460 
3461       PetscCheck(sub_schurs->is_symmetric, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not yet implemented");
3462       B_neigs       = 0;
3463       compute_range = (PetscBool)!same_data;
3464       if (nmin >= subset_size) compute_range = PETSC_FALSE;
3465 
3466       if (pcbddc->dbg_flag) {
3467         PetscInt nc = 0, c = pcbddc->mat_graph->nodes[idxs[0]].count, w = pcbddc->mat_graph->nodes[idxs[0]].which_dof;
3468 
3469         if (sub_schurs->change_primal_sub) PetscCall(ISGetLocalSize(sub_schurs->change_primal_sub[i], &nc));
3470         PetscCall(
3471           PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Computing for sub %" PetscInt_FMT "/%" PetscInt_FMT " size %" PetscInt_FMT " count %" PetscInt_FMT " fid %" PetscInt_FMT " (range %d) (change %" PetscInt_FMT ").\n", i, sub_schurs->n_subs, subset_size, c, w, compute_range, nc));
3472       }
3473 
3474       PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
3475       if (compute_range) {
3476         /* ask for eigenvalues larger than thresh */
3477         if (sub_schurs->is_posdef) {
3478 #if defined(PETSC_USE_COMPLEX)
3479           PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3480 #else
3481           PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3482 #endif
3483           PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3484         } else { /* no theory so far, but it works nicely */
3485           PetscInt  recipe = 0, recipe_m = 1;
3486           PetscReal bb[2];
3487 
3488           PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)pc)->prefix, "-pc_bddc_adaptive_recipe", &recipe, NULL));
3489           switch (recipe) {
3490           case 0:
3491             if (scal) {
3492               bb[0] = PETSC_MIN_REAL;
3493               bb[1] = lthresh;
3494             } else {
3495               bb[0] = uthresh;
3496               bb[1] = PETSC_MAX_REAL;
3497             }
3498 #if defined(PETSC_USE_COMPLEX)
3499             PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3500 #else
3501             PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3502 #endif
3503             PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3504             break;
3505           case 1:
3506             bb[0] = PETSC_MIN_REAL;
3507             bb[1] = lthresh * lthresh;
3508 #if defined(PETSC_USE_COMPLEX)
3509             PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3510 #else
3511             PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3512 #endif
3513             PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3514             if (!scal) {
3515               PetscBLASInt B_neigs2 = 0;
3516 
3517               bb[0] = PetscMax(lthresh * lthresh, uthresh);
3518               bb[1] = PETSC_MAX_REAL;
3519               PetscCall(PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size));
3520               PetscCall(PetscArraycpy(St, Starray + cumarray, subset_size * subset_size));
3521 #if defined(PETSC_USE_COMPLEX)
3522               PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3523 #else
3524               PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3525 #endif
3526               PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3527               B_neigs += B_neigs2;
3528             }
3529             break;
3530           case 2:
3531             if (scal) {
3532               bb[0] = PETSC_MIN_REAL;
3533               bb[1] = 0;
3534 #if defined(PETSC_USE_COMPLEX)
3535               PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3536 #else
3537               PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3538 #endif
3539               PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3540             } else {
3541               PetscBLASInt B_neigs2 = 0;
3542               PetscBool    do_copy  = PETSC_FALSE;
3543 
3544               lthresh = PetscMax(lthresh, 0.0);
3545               if (lthresh > 0.0) {
3546                 bb[0] = PETSC_MIN_REAL;
3547                 bb[1] = lthresh * lthresh;
3548 
3549                 do_copy = PETSC_TRUE;
3550 #if defined(PETSC_USE_COMPLEX)
3551                 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3552 #else
3553                 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3554 #endif
3555                 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3556               }
3557               bb[0] = PetscMax(lthresh * lthresh, uthresh);
3558               bb[1] = PETSC_MAX_REAL;
3559               if (do_copy) {
3560                 PetscCall(PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size));
3561                 PetscCall(PetscArraycpy(St, Starray + cumarray, subset_size * subset_size));
3562               }
3563 #if defined(PETSC_USE_COMPLEX)
3564               PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3565 #else
3566               PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3567 #endif
3568               PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3569               B_neigs += B_neigs2;
3570             }
3571             break;
3572           case 3:
3573             if (scal) {
3574               PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)pc)->prefix, "-pc_bddc_adaptive_recipe3_min_scal", &recipe_m, NULL));
3575             } else {
3576               PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)pc)->prefix, "-pc_bddc_adaptive_recipe3_min", &recipe_m, NULL));
3577             }
3578             if (!scal) {
3579               bb[0] = uthresh;
3580               bb[1] = PETSC_MAX_REAL;
3581 #if defined(PETSC_USE_COMPLEX)
3582               PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3583 #else
3584               PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3585 #endif
3586               PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3587             }
3588             if (recipe_m > 0 && B_N - B_neigs > 0) {
3589               PetscBLASInt B_neigs2 = 0;
3590 
3591               PetscCall(PetscBLASIntCast(PetscMin(recipe_m, B_N - B_neigs), &B_IU));
3592               PetscCall(PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size));
3593               PetscCall(PetscArraycpy(St, Starray + cumarray, subset_size * subset_size));
3594 #if defined(PETSC_USE_COMPLEX)
3595               PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3596 #else
3597               PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3598 #endif
3599               PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3600               B_neigs += B_neigs2;
3601             }
3602             break;
3603           case 4:
3604             bb[0] = PETSC_MIN_REAL;
3605             bb[1] = lthresh;
3606 #if defined(PETSC_USE_COMPLEX)
3607             PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3608 #else
3609             PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3610 #endif
3611             PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3612             {
3613               PetscBLASInt B_neigs2 = 0;
3614 
3615               bb[0] = PetscMax(lthresh + PETSC_SMALL, uthresh);
3616               bb[1] = PETSC_MAX_REAL;
3617               PetscCall(PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size));
3618               PetscCall(PetscArraycpy(St, Starray + cumarray, subset_size * subset_size));
3619 #if defined(PETSC_USE_COMPLEX)
3620               PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3621 #else
3622               PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3623 #endif
3624               PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3625               B_neigs += B_neigs2;
3626             }
3627             break;
3628           case 5: /* same as before: first compute all eigenvalues, then filter */
3629 #if defined(PETSC_USE_COMPLEX)
3630             PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "A", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3631 #else
3632             PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "A", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3633 #endif
3634             PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3635             {
3636               PetscInt e, k, ne;
3637               for (e = 0, ne = 0; e < B_neigs; e++) {
3638                 if (eigs[e] < lthresh || eigs[e] > uthresh) {
3639                   for (k = 0; k < B_N; k++) S[ne * B_N + k] = eigv[e * B_N + k];
3640                   eigs[ne] = eigs[e];
3641                   ne++;
3642                 }
3643               }
3644               PetscCall(PetscArraycpy(eigv, S, B_N * ne));
3645               PetscCall(PetscBLASIntCast(ne, &B_neigs));
3646             }
3647             break;
3648           default:
3649             SETERRQ(PetscObjectComm((PetscObject)pc), PETSC_ERR_SUP, "Unknown recipe %" PetscInt_FMT, recipe);
3650           }
3651         }
3652       } else if (!same_data) { /* this is just to see all the eigenvalues */
3653         PetscCall(PetscBLASIntCast(PetscMax(1, PetscMin(B_N, nmax)), &B_IU));
3654 #if defined(PETSC_USE_COMPLEX)
3655         PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3656 #else
3657         PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3658 #endif
3659         PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3660       } else { /* same_data is true, so just get the adaptive functional requested by the user */
3661         PetscInt k;
3662         PetscCheck(sub_schurs->change_primal_sub, PETSC_COMM_SELF, PETSC_ERR_PLIB, "This should not happen");
3663         PetscCall(ISGetLocalSize(sub_schurs->change_primal_sub[i], &nmax));
3664         PetscCall(PetscBLASIntCast(nmax, &B_neigs));
3665         nmin = nmax;
3666         PetscCall(PetscArrayzero(eigv, subset_size * nmax));
3667         for (k = 0; k < nmax; k++) {
3668           eigs[k]                     = 1. / PETSC_SMALL;
3669           eigv[k * (subset_size + 1)] = 1.0;
3670         }
3671       }
3672       PetscCall(PetscFPTrapPop());
3673       if (B_ierr) {
3674         PetscCheck(B_ierr >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYGVX Lapack routine: illegal value for argument %" PetscBLASInt_FMT, -B_ierr);
3675         PetscCheck(B_ierr > B_N, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYGVX Lapack routine: %" PetscBLASInt_FMT " eigenvalues failed to converge", B_ierr);
3676         SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYGVX Lapack routine: leading minor of order %" PetscBLASInt_FMT " is not positive definite", B_ierr - B_N - 1);
3677       }
3678 
3679       if (B_neigs > nmax) {
3680         if (pcbddc->dbg_flag) PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "   found %" PetscBLASInt_FMT " eigs, more than maximum required %" PetscInt_FMT ".\n", B_neigs, nmax));
3681         if (upart) eigs_start = scal ? 0 : B_neigs - nmax;
3682         PetscCall(PetscBLASIntCast(nmax, &B_neigs));
3683       }
3684 
3685       nmin_s = PetscMin(nmin, B_N);
3686       if (B_neigs < nmin_s) {
3687         PetscBLASInt B_neigs2 = 0;
3688 
3689         if (upart) {
3690           if (scal) {
3691             PetscCall(PetscBLASIntCast(nmin_s, &B_IU));
3692             B_IL = B_neigs + 1;
3693           } else {
3694             PetscCall(PetscBLASIntCast(B_N - nmin_s + 1, &B_IL));
3695             B_IU = B_N - B_neigs;
3696           }
3697         } else {
3698           B_IL = B_neigs + 1;
3699           PetscCall(PetscBLASIntCast(nmin_s, &B_IU));
3700         }
3701         if (pcbddc->dbg_flag) {
3702           PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "   found %" PetscBLASInt_FMT " eigs, less than minimum required %" PetscInt_FMT ". Asking for %" PetscBLASInt_FMT " to %" PetscBLASInt_FMT " incl (fortran like)\n", B_neigs, nmin, B_IL, B_IU));
3703         }
3704         if (sub_schurs->is_symmetric) {
3705           PetscInt j, k;
3706           for (j = 0; j < subset_size; j++) {
3707             for (k = j; k < subset_size; k++) {
3708               S[j * subset_size + k]  = Sarray[cumarray + j * subset_size + k];
3709               St[j * subset_size + k] = Starray[cumarray + j * subset_size + k];
3710             }
3711           }
3712         } else {
3713           PetscCall(PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size));
3714           PetscCall(PetscArraycpy(St, Starray + cumarray, subset_size * subset_size));
3715         }
3716         PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
3717 #if defined(PETSC_USE_COMPLEX)
3718         PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * subset_size, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3719 #else
3720         PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * subset_size, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3721 #endif
3722         PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3723         PetscCall(PetscFPTrapPop());
3724         B_neigs += B_neigs2;
3725       }
3726       if (B_ierr) {
3727         PetscCheck(B_ierr >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYGVX Lapack routine: illegal value for argument %" PetscBLASInt_FMT, -B_ierr);
3728         PetscCheck(B_ierr > B_N, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYGVX Lapack routine: %" PetscBLASInt_FMT " eigenvalues failed to converge", B_ierr);
3729         SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYGVX Lapack routine: leading minor of order %" PetscBLASInt_FMT " is not positive definite", B_ierr - B_N - 1);
3730       }
3731       if (pcbddc->dbg_flag) {
3732         PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "   -> Got %" PetscBLASInt_FMT " eigs\n", B_neigs));
3733         for (j = 0; j < B_neigs; j++) {
3734           if (!sub_schurs->gdsw) {
3735             if (eigs[j] == 0.0) {
3736               PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "     Inf\n"));
3737             } else {
3738               if (upart) {
3739                 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "     %1.6e\n", (double)eigs[j + eigs_start]));
3740               } else {
3741                 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "     %1.6e\n", (double)(1 / eigs[j + eigs_start])));
3742               }
3743             }
3744           } else {
3745             double pg = (double)eigs[j + eigs_start];
3746             if (pg < 2 * PETSC_SMALL) pg = 0.0;
3747             PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "     %1.6e\n", pg));
3748           }
3749         }
3750       }
3751     }
3752     /* change the basis back to the original one */
3753     if (sub_schurs->change) {
3754       Mat change, phi, phit;
3755 
3756       if (pcbddc->dbg_flag > 2) {
3757         PetscInt ii;
3758         for (ii = 0; ii < B_neigs; ii++) {
3759           PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "   -> Eigenvector (old basis) %" PetscInt_FMT "/%" PetscBLASInt_FMT " (%" PetscBLASInt_FMT ")\n", ii, B_neigs, B_N));
3760           for (j = 0; j < B_N; j++) {
3761 #if defined(PETSC_USE_COMPLEX)
3762             PetscReal r = PetscRealPart(eigv[(ii + eigs_start) * subset_size + j]);
3763             PetscReal c = PetscImaginaryPart(eigv[(ii + eigs_start) * subset_size + j]);
3764             PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "       %1.4e + %1.4e i\n", (double)r, (double)c));
3765 #else
3766             PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "       %1.4e\n", (double)(eigv[(ii + eigs_start) * subset_size + j])));
3767 #endif
3768           }
3769         }
3770       }
3771       PetscCall(KSPGetOperators(sub_schurs->change[i], &change, NULL));
3772       PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, B_neigs, eigv + eigs_start * subset_size, &phit));
3773       PetscCall(MatMatMult(change, phit, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &phi));
3774       PetscCall(MatCopy(phi, phit, SAME_NONZERO_PATTERN));
3775       PetscCall(MatDestroy(&phit));
3776       PetscCall(MatDestroy(&phi));
3777     }
3778     maxneigs                               = PetscMax(B_neigs, maxneigs);
3779     pcbddc->adaptive_constraints_n[i + nv] = B_neigs;
3780     if (B_neigs) {
3781       PetscCall(PetscArraycpy(pcbddc->adaptive_constraints_data + pcbddc->adaptive_constraints_data_ptr[cum], eigv + eigs_start * subset_size, B_neigs * subset_size));
3782 
3783       if (pcbddc->dbg_flag > 1) {
3784         PetscInt ii;
3785         for (ii = 0; ii < B_neigs; ii++) {
3786           PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "   -> Eigenvector %" PetscInt_FMT "/%" PetscBLASInt_FMT " (%" PetscBLASInt_FMT ")\n", ii, B_neigs, B_N));
3787           for (j = 0; j < B_N; j++) {
3788 #if defined(PETSC_USE_COMPLEX)
3789             PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii * subset_size + j + pcbddc->adaptive_constraints_data_ptr[cum]]);
3790             PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii * subset_size + j + pcbddc->adaptive_constraints_data_ptr[cum]]);
3791             PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "       %1.4e + %1.4e i\n", (double)r, (double)c));
3792 #else
3793             PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "       %1.4e\n", (double)PetscRealPart(pcbddc->adaptive_constraints_data[ii * subset_size + j + pcbddc->adaptive_constraints_data_ptr[cum]])));
3794 #endif
3795           }
3796         }
3797       }
3798       PetscCall(PetscArraycpy(pcbddc->adaptive_constraints_idxs + pcbddc->adaptive_constraints_idxs_ptr[cum], idxs, subset_size));
3799       pcbddc->adaptive_constraints_idxs_ptr[cum + 1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size;
3800       pcbddc->adaptive_constraints_data_ptr[cum + 1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size * B_neigs;
3801       cum++;
3802     }
3803     PetscCall(ISRestoreIndices(sub_schurs->is_subs[i], &idxs));
3804     /* shift for next computation */
3805     cumarray += subset_size * subset_size;
3806   }
3807   if (pcbddc->dbg_flag) PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
3808 
3809   if (mss) {
3810     if (sub_schurs->gdsw) {
3811       PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_all, &Sarray));
3812       PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all, &Starray));
3813     } else {
3814       PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all, &Sarray));
3815       PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all, &Starray));
3816       /* destroy matrices (junk) */
3817       PetscCall(MatDestroy(&sub_schurs->sum_S_Ej_inv_all));
3818       PetscCall(MatDestroy(&sub_schurs->sum_S_Ej_tilda_all));
3819     }
3820   }
3821   if (allocated_S_St) PetscCall(PetscFree2(S, St));
3822   PetscCall(PetscFree5(eigv, eigs, work, B_iwork, B_ifail));
3823 #if defined(PETSC_USE_COMPLEX)
3824   PetscCall(PetscFree(rwork));
3825 #endif
3826   if (pcbddc->dbg_flag) {
3827     PetscInt maxneigs_r;
3828     PetscCallMPI(MPIU_Allreduce(&maxneigs, &maxneigs_r, 1, MPIU_INT, MPI_MAX, PetscObjectComm((PetscObject)pc)));
3829     PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Maximum number of constraints per cc %" PetscInt_FMT "\n", maxneigs_r));
3830   }
3831   PetscCall(PetscLogEventEnd(PC_BDDC_AdaptiveSetUp[pcbddc->current_level], pc, 0, 0, 0));
3832   PetscFunctionReturn(PETSC_SUCCESS);
3833 }
3834 
3835 PetscErrorCode PCBDDCSetUpSolvers(PC pc)
3836 {
3837   Mat coarse_submat;
3838 
3839   PetscFunctionBegin;
3840   /* Setup local scatters R_to_B and (optionally) R_to_D */
3841   /* PCBDDCSetUpLocalWorkVectors should be called first! */
3842   PetscCall(PCBDDCSetUpLocalScatters(pc));
3843 
3844   /* Setup local neumann solver ksp_R */
3845   /* PCBDDCSetUpLocalScatters should be called first! */
3846   PetscCall(PCBDDCSetUpLocalSolvers(pc, PETSC_FALSE, PETSC_TRUE));
3847 
3848   /*
3849      Setup local correction and local part of coarse basis.
3850      Gives back the dense local part of the coarse matrix in column major ordering
3851   */
3852   PetscCall(PCBDDCSetUpCorrection(pc, &coarse_submat));
3853 
3854   /* Compute total number of coarse nodes and setup coarse solver */
3855   PetscCall(PCBDDCSetUpCoarseSolver(pc, coarse_submat));
3856   PetscCall(MatDestroy(&coarse_submat));
3857   PetscFunctionReturn(PETSC_SUCCESS);
3858 }
3859 
3860 PetscErrorCode PCBDDCResetCustomization(PC pc)
3861 {
3862   PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
3863 
3864   PetscFunctionBegin;
3865   PetscCall(ISDestroy(&pcbddc->user_primal_vertices));
3866   PetscCall(ISDestroy(&pcbddc->user_primal_vertices_local));
3867   PetscCall(ISDestroy(&pcbddc->NeumannBoundaries));
3868   PetscCall(ISDestroy(&pcbddc->NeumannBoundariesLocal));
3869   PetscCall(ISDestroy(&pcbddc->DirichletBoundaries));
3870   PetscCall(MatNullSpaceDestroy(&pcbddc->onearnullspace));
3871   PetscCall(PetscFree(pcbddc->onearnullvecs_state));
3872   PetscCall(ISDestroy(&pcbddc->DirichletBoundariesLocal));
3873   PetscCall(PCBDDCSetDofsSplitting(pc, 0, NULL));
3874   PetscCall(PCBDDCSetDofsSplittingLocal(pc, 0, NULL));
3875   PetscFunctionReturn(PETSC_SUCCESS);
3876 }
3877 
3878 PetscErrorCode PCBDDCResetTopography(PC pc)
3879 {
3880   PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
3881   PetscInt i;
3882 
3883   PetscFunctionBegin;
3884   PetscCall(MatDestroy(&pcbddc->nedcG));
3885   PetscCall(ISDestroy(&pcbddc->nedclocal));
3886   PetscCall(MatDestroy(&pcbddc->discretegradient));
3887   PetscCall(MatDestroy(&pcbddc->user_ChangeOfBasisMatrix));
3888   PetscCall(MatDestroy(&pcbddc->ChangeOfBasisMatrix));
3889   PetscCall(MatDestroy(&pcbddc->switch_static_change));
3890   PetscCall(VecDestroy(&pcbddc->work_change));
3891   PetscCall(MatDestroy(&pcbddc->ConstraintMatrix));
3892   PetscCall(MatDestroy(&pcbddc->divudotp));
3893   PetscCall(ISDestroy(&pcbddc->divudotp_vl2l));
3894   PetscCall(PCBDDCGraphDestroy(&pcbddc->mat_graph));
3895   for (i = 0; i < pcbddc->n_local_subs; i++) PetscCall(ISDestroy(&pcbddc->local_subs[i]));
3896   pcbddc->n_local_subs = 0;
3897   PetscCall(PetscFree(pcbddc->local_subs));
3898   PetscCall(PCBDDCSubSchursDestroy(&pcbddc->sub_schurs));
3899   pcbddc->graphanalyzed        = PETSC_FALSE;
3900   pcbddc->recompute_topography = PETSC_TRUE;
3901   pcbddc->corner_selected      = PETSC_FALSE;
3902   PetscFunctionReturn(PETSC_SUCCESS);
3903 }
3904 
3905 PetscErrorCode PCBDDCResetSolvers(PC pc)
3906 {
3907   PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
3908 
3909   PetscFunctionBegin;
3910   PetscCall(VecDestroy(&pcbddc->coarse_vec));
3911   PetscCall(MatDestroy(&pcbddc->coarse_phi_B));
3912   PetscCall(MatDestroy(&pcbddc->coarse_phi_D));
3913   PetscCall(MatDestroy(&pcbddc->coarse_psi_B));
3914   PetscCall(MatDestroy(&pcbddc->coarse_psi_D));
3915   PetscCall(VecDestroy(&pcbddc->vec1_P));
3916   PetscCall(VecDestroy(&pcbddc->vec1_C));
3917   PetscCall(MatDestroy(&pcbddc->local_auxmat2));
3918   PetscCall(MatDestroy(&pcbddc->local_auxmat1));
3919   PetscCall(VecDestroy(&pcbddc->vec1_R));
3920   PetscCall(VecDestroy(&pcbddc->vec2_R));
3921   PetscCall(ISDestroy(&pcbddc->is_R_local));
3922   PetscCall(VecScatterDestroy(&pcbddc->R_to_B));
3923   PetscCall(VecScatterDestroy(&pcbddc->R_to_D));
3924   PetscCall(VecScatterDestroy(&pcbddc->coarse_loc_to_glob));
3925   PetscCall(KSPReset(pcbddc->ksp_D));
3926   PetscCall(KSPReset(pcbddc->ksp_R));
3927   PetscCall(KSPReset(pcbddc->coarse_ksp));
3928   PetscCall(MatDestroy(&pcbddc->local_mat));
3929   PetscCall(PetscFree(pcbddc->primal_indices_local_idxs));
3930   PetscCall(PetscFree2(pcbddc->local_primal_ref_node, pcbddc->local_primal_ref_mult));
3931   PetscCall(PetscFree(pcbddc->global_primal_indices));
3932   PetscCall(ISDestroy(&pcbddc->coarse_subassembling));
3933   PetscCall(MatDestroy(&pcbddc->benign_change));
3934   PetscCall(VecDestroy(&pcbddc->benign_vec));
3935   PetscCall(PCBDDCBenignShellMat(pc, PETSC_TRUE));
3936   PetscCall(MatDestroy(&pcbddc->benign_B0));
3937   PetscCall(PetscSFDestroy(&pcbddc->benign_sf));
3938   if (pcbddc->benign_zerodiag_subs) {
3939     PetscInt i;
3940     for (i = 0; i < pcbddc->benign_n; i++) PetscCall(ISDestroy(&pcbddc->benign_zerodiag_subs[i]));
3941     PetscCall(PetscFree(pcbddc->benign_zerodiag_subs));
3942   }
3943   PetscCall(PetscFree3(pcbddc->benign_p0_lidx, pcbddc->benign_p0_gidx, pcbddc->benign_p0));
3944   PetscFunctionReturn(PETSC_SUCCESS);
3945 }
3946 
3947 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc)
3948 {
3949   PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
3950   PC_IS   *pcis   = (PC_IS *)pc->data;
3951   VecType  impVecType;
3952   PetscInt n_constraints, n_R, old_size;
3953 
3954   PetscFunctionBegin;
3955   n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices;
3956   n_R           = pcis->n - pcbddc->n_vertices;
3957   PetscCall(VecGetType(pcis->vec1_N, &impVecType));
3958   /* local work vectors (try to avoid unneeded work)*/
3959   /* R nodes */
3960   old_size = -1;
3961   if (pcbddc->vec1_R) PetscCall(VecGetSize(pcbddc->vec1_R, &old_size));
3962   if (n_R != old_size) {
3963     PetscCall(VecDestroy(&pcbddc->vec1_R));
3964     PetscCall(VecDestroy(&pcbddc->vec2_R));
3965     PetscCall(VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N), &pcbddc->vec1_R));
3966     PetscCall(VecSetSizes(pcbddc->vec1_R, PETSC_DECIDE, n_R));
3967     PetscCall(VecSetType(pcbddc->vec1_R, impVecType));
3968     PetscCall(VecDuplicate(pcbddc->vec1_R, &pcbddc->vec2_R));
3969   }
3970   /* local primal dofs */
3971   old_size = -1;
3972   if (pcbddc->vec1_P) PetscCall(VecGetSize(pcbddc->vec1_P, &old_size));
3973   if (pcbddc->local_primal_size != old_size) {
3974     PetscCall(VecDestroy(&pcbddc->vec1_P));
3975     PetscCall(VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N), &pcbddc->vec1_P));
3976     PetscCall(VecSetSizes(pcbddc->vec1_P, PETSC_DECIDE, pcbddc->local_primal_size));
3977     PetscCall(VecSetType(pcbddc->vec1_P, impVecType));
3978   }
3979   /* local explicit constraints */
3980   old_size = -1;
3981   if (pcbddc->vec1_C) PetscCall(VecGetSize(pcbddc->vec1_C, &old_size));
3982   if (n_constraints && n_constraints != old_size) {
3983     PetscCall(VecDestroy(&pcbddc->vec1_C));
3984     PetscCall(VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N), &pcbddc->vec1_C));
3985     PetscCall(VecSetSizes(pcbddc->vec1_C, PETSC_DECIDE, n_constraints));
3986     PetscCall(VecSetType(pcbddc->vec1_C, impVecType));
3987   }
3988   PetscFunctionReturn(PETSC_SUCCESS);
3989 }
3990 
3991 static PetscErrorCode MatSetValuesSubMat(Mat A, Mat S, PetscInt nr, const PetscInt rows[], PetscInt nc, const PetscInt cols[], InsertMode imode)
3992 {
3993   PetscBool          flg;
3994   const PetscScalar *a;
3995 
3996   PetscFunctionBegin;
3997   PetscCall(PetscObjectBaseTypeCompare((PetscObject)S, MATSEQDENSE, &flg));
3998   if (flg) {
3999     PetscCall(MatDenseGetArrayRead(S, &a));
4000     PetscCall(MatSetOption(A, MAT_ROW_ORIENTED, PETSC_FALSE));
4001     PetscCall(MatSetValues(A, nr, rows, nc, cols, a, imode));
4002     PetscCall(MatSetOption(A, MAT_ROW_ORIENTED, PETSC_TRUE));
4003     PetscCall(MatDenseRestoreArrayRead(S, &a));
4004   } else {
4005     const PetscInt *ii, *jj;
4006     PetscInt        n;
4007     PetscInt        buf[8192], *bufc = NULL;
4008     PetscBool       freeb = PETSC_FALSE;
4009     Mat             Sm    = S;
4010 
4011     PetscCall(PetscObjectBaseTypeCompare((PetscObject)S, MATSEQAIJ, &flg));
4012     if (!flg) PetscCall(MatConvert(S, MATSEQAIJ, MAT_INITIAL_MATRIX, &Sm));
4013     else PetscCall(PetscObjectReference((PetscObject)S));
4014     PetscCall(MatSeqAIJGetArrayRead(Sm, &a));
4015     PetscCall(MatGetRowIJ(Sm, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, &jj, &flg));
4016     PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot get IJ structure");
4017     if (nc <= (PetscInt)PETSC_STATIC_ARRAY_LENGTH(buf)) {
4018       bufc = buf;
4019     } else {
4020       PetscCall(PetscMalloc1(nc, &bufc));
4021       freeb = PETSC_TRUE;
4022     }
4023 
4024     for (PetscInt i = 0; i < n; i++) {
4025       const PetscInt nci = ii[i + 1] - ii[i];
4026 
4027       for (PetscInt j = 0; j < nci; j++) bufc[j] = cols[jj[ii[i] + j]];
4028       PetscCall(MatSetValues(A, 1, rows + i, nci, bufc, a + ii[i], imode));
4029     }
4030     PetscCall(MatRestoreRowIJ(Sm, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, &jj, &flg));
4031     PetscCall(MatSeqAIJRestoreArrayRead(Sm, &a));
4032     PetscCall(MatDestroy(&Sm));
4033     if (freeb) PetscCall(PetscFree(bufc));
4034   }
4035   PetscCall(MatAssemblyBegin(A, MAT_FLUSH_ASSEMBLY));
4036   PetscCall(MatAssemblyEnd(A, MAT_FLUSH_ASSEMBLY));
4037   PetscFunctionReturn(PETSC_SUCCESS);
4038 }
4039 
4040 static PetscErrorCode MatCreateSeqAIJFromDenseExpand(Mat D, PetscInt n, const PetscInt j[], Mat *mat)
4041 {
4042   Mat_SeqAIJ        *aij;
4043   PetscInt          *ii, *jj;
4044   PetscScalar       *aa;
4045   PetscInt           nnz = 0, m, nc;
4046   const PetscScalar *a;
4047   const PetscScalar  zero = 0.0;
4048 
4049   PetscFunctionBegin;
4050   PetscCall(MatGetLocalSize(D, &m, &nc));
4051   PetscCall(MatDenseGetArrayRead(D, &a));
4052   PetscCall(PetscMalloc1(m + 1, &ii));
4053   PetscCall(PetscMalloc1(m * nc, &jj));
4054   PetscCall(PetscMalloc1(m * nc, &aa));
4055   ii[0] = 0;
4056   for (PetscInt k = 0; k < m; k++) {
4057     for (PetscInt s = 0; s < nc; s++) {
4058       const PetscInt    c = s + k * nc;
4059       const PetscScalar v = a[k + s * m];
4060 
4061       if (PetscUnlikely(j[c] < 0 || v == zero)) continue;
4062       jj[nnz] = j[c];
4063       aa[nnz] = a[k + s * m];
4064       nnz++;
4065     }
4066     ii[k + 1] = nnz;
4067   }
4068 
4069   PetscCall(MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)D), m, n, ii, jj, aa, mat));
4070   PetscCall(MatDenseRestoreArrayRead(D, &a));
4071 
4072   aij          = (Mat_SeqAIJ *)(*mat)->data;
4073   aij->free_a  = PETSC_TRUE;
4074   aij->free_ij = PETSC_TRUE;
4075   PetscFunctionReturn(PETSC_SUCCESS);
4076 }
4077 
4078 /* adapted from MatInvertVariableBlockDiagonal_SeqAIJ */
4079 static PetscErrorCode MatSeqAIJInvertVariableBlockDiagonalMat(Mat A, PetscInt nblocks, const PetscInt *bsizes, Mat *B)
4080 {
4081   PetscInt        n = A->rmap->n, ncnt = 0, ncnt2 = 0, bsizemax = 0, *v_pivots = NULL;
4082   const PetscBool allowzeropivot    = PETSC_FALSE;
4083   PetscBool       zeropivotdetected = PETSC_FALSE;
4084   const PetscReal shift             = 0.0;
4085   PetscInt        ipvt[5], *ii, *jj, *indi, *indj;
4086   PetscScalar     work[25], *v_work = NULL, *aa, *diag;
4087   PetscLogDouble  flops = 0.0;
4088 
4089   PetscFunctionBegin;
4090   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Not for rectangular matrices");
4091   for (PetscInt i = 0; i < nblocks; i++) {
4092     ncnt += bsizes[i];
4093     ncnt2 += PetscSqr(bsizes[i]);
4094   }
4095   PetscCheck(ncnt == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Total blocksizes %" PetscInt_FMT " doesn't match number matrix rows %" PetscInt_FMT, ncnt, n);
4096   for (PetscInt i = 0; i < nblocks; i++) bsizemax = PetscMax(bsizemax, bsizes[i]);
4097   if (bsizemax > 7) PetscCall(PetscMalloc2(bsizemax, &v_work, bsizemax, &v_pivots));
4098 
4099   PetscCall(PetscMalloc1(n + 1, &ii));
4100   PetscCall(PetscMalloc1(ncnt2, &jj));
4101   PetscCall(PetscCalloc1(ncnt2, &aa));
4102 
4103   ncnt  = 0;
4104   ii[0] = 0;
4105   indi  = ii;
4106   indj  = jj;
4107   diag  = aa;
4108   for (PetscInt i = 0; i < nblocks; i++) {
4109     const PetscInt bs = bsizes[i];
4110 
4111     for (PetscInt k = 0; k < bs; k++) {
4112       indi[k + 1] = indi[k] + bs;
4113       for (PetscInt j = 0; j < bs; j++) indj[k * bs + j] = ncnt + j;
4114     }
4115     PetscCall(MatGetValues(A, bs, indj, bs, indj, diag));
4116     switch (bs) {
4117     case 1:
4118       *diag = 1.0 / (*diag);
4119       break;
4120     case 2:
4121       PetscCall(PetscKernel_A_gets_inverse_A_2(diag, shift, allowzeropivot, &zeropivotdetected));
4122       break;
4123     case 3:
4124       PetscCall(PetscKernel_A_gets_inverse_A_3(diag, shift, allowzeropivot, &zeropivotdetected));
4125       break;
4126     case 4:
4127       PetscCall(PetscKernel_A_gets_inverse_A_4(diag, shift, allowzeropivot, &zeropivotdetected));
4128       break;
4129     case 5:
4130       PetscCall(PetscKernel_A_gets_inverse_A_5(diag, ipvt, work, shift, allowzeropivot, &zeropivotdetected));
4131       break;
4132     case 6:
4133       PetscCall(PetscKernel_A_gets_inverse_A_6(diag, shift, allowzeropivot, &zeropivotdetected));
4134       break;
4135     case 7:
4136       PetscCall(PetscKernel_A_gets_inverse_A_7(diag, shift, allowzeropivot, &zeropivotdetected));
4137       break;
4138     default:
4139       PetscCall(PetscKernel_A_gets_inverse_A(bs, diag, v_pivots, v_work, allowzeropivot, &zeropivotdetected));
4140     }
4141     ncnt += bs;
4142     flops += 2.0 * PetscPowInt(bs, 3) / 3.0;
4143     diag += bs * bs;
4144     indj += bs * bs;
4145     indi += bs;
4146   }
4147   PetscCall(PetscLogFlops(flops));
4148   PetscCall(PetscFree2(v_work, v_pivots));
4149   PetscCall(MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A), n, n, ii, jj, aa, B));
4150   {
4151     Mat_SeqAIJ *aij = (Mat_SeqAIJ *)(*B)->data;
4152     aij->free_a     = PETSC_TRUE;
4153     aij->free_ij    = PETSC_TRUE;
4154   }
4155   PetscFunctionReturn(PETSC_SUCCESS);
4156 }
4157 
4158 static PetscErrorCode MatDenseScatter(Mat A, PetscSF sf, Mat B)
4159 {
4160   const PetscScalar *rarr;
4161   PetscScalar       *larr;
4162   PetscSF            vsf;
4163   PetscInt           n, rld, lld;
4164 
4165   PetscFunctionBegin;
4166   PetscCall(MatGetSize(A, NULL, &n));
4167   PetscCall(MatDenseGetLDA(A, &rld));
4168   PetscCall(MatDenseGetLDA(B, &lld));
4169   PetscCall(MatDenseGetArrayRead(A, &rarr));
4170   PetscCall(MatDenseGetArrayWrite(B, &larr));
4171   PetscCall(PetscSFCreateStridedSF(sf, n, rld, lld, &vsf));
4172   PetscCall(PetscSFBcastBegin(vsf, MPIU_SCALAR, rarr, larr, MPI_REPLACE));
4173   PetscCall(PetscSFBcastEnd(vsf, MPIU_SCALAR, rarr, larr, MPI_REPLACE));
4174   PetscCall(MatDenseRestoreArrayRead(A, &rarr));
4175   PetscCall(MatDenseRestoreArrayWrite(B, &larr));
4176   PetscCall(PetscSFDestroy(&vsf));
4177   PetscFunctionReturn(PETSC_SUCCESS);
4178 }
4179 
4180 PetscErrorCode PCBDDCSetUpCorrection(PC pc, Mat *coarse_submat)
4181 {
4182   PC_IS          *pcis       = (PC_IS *)pc->data;
4183   PC_BDDC        *pcbddc     = (PC_BDDC *)pc->data;
4184   PCBDDCGraph     graph      = pcbddc->mat_graph;
4185   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
4186   /* submatrices of local problem */
4187   Mat A_RV = NULL, A_VR, A_VV, local_auxmat2_R = NULL;
4188   /* submatrices of local coarse problem */
4189   Mat S_CV = NULL, S_VC = NULL, S_CC = NULL;
4190   /* working matrices */
4191   Mat C_CR;
4192 
4193   /* additional working stuff */
4194   PC              pc_R;
4195   IS              is_R, is_V, is_C;
4196   const PetscInt *idx_V, *idx_C;
4197   Mat             F, Brhs = NULL;
4198   Vec             dummy_vec;
4199   PetscBool       isLU, isCHOL, need_benign_correction, sparserhs;
4200   PetscInt       *idx_V_B;
4201   PetscInt        lda_rhs, n_vertices, n_constraints, *p0_lidx_I;
4202   PetscInt        n_eff_vertices, n_eff_constraints;
4203   PetscInt        i, n_R, n_D, n_B;
4204   PetscScalar     one = 1.0, m_one = -1.0;
4205 
4206   /* Multi-element support */
4207   PetscBool multi_element = graph->multi_element;
4208   PetscInt *V_to_eff_V = NULL, *C_to_eff_C = NULL;
4209   PetscInt *B_eff_V_J = NULL, *R_eff_V_J = NULL, *B_eff_C_J = NULL, *R_eff_C_J = NULL;
4210   IS        is_C_perm = NULL;
4211   PetscInt  n_C_bss = 0, *C_bss = NULL;
4212   Mat       coarse_phi_multi;
4213 
4214   PetscFunctionBegin;
4215   PetscCheck(pcbddc->symmetric_primal || !pcbddc->benign_n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Non-symmetric primal basis computation with benign trick not yet implemented");
4216   PetscCall(PetscLogEventBegin(PC_BDDC_CorrectionSetUp[pcbddc->current_level], pc, 0, 0, 0));
4217 
4218   /* Set Non-overlapping dimensions */
4219   n_vertices    = pcbddc->n_vertices;
4220   n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices;
4221   n_B           = pcis->n_B;
4222   n_D           = pcis->n - n_B;
4223   n_R           = pcis->n - n_vertices;
4224 
4225   /* vertices in boundary numbering */
4226   PetscCall(PetscMalloc1(n_vertices, &idx_V_B));
4227   PetscCall(ISGlobalToLocalMappingApply(pcis->BtoNmap, IS_GTOLM_DROP, n_vertices, pcbddc->local_primal_ref_node, &i, idx_V_B));
4228   PetscCheck(i == n_vertices, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error in boundary numbering for BDDC vertices! %" PetscInt_FMT " != %" PetscInt_FMT, n_vertices, i);
4229 
4230   /* these two cases still need to be optimized */
4231   if (pcbddc->benign_saddle_point || !pcbddc->symmetric_primal) multi_element = PETSC_FALSE;
4232 
4233   /* Subdomain contribution (Non-overlapping) to coarse matrix  */
4234   if (multi_element) {
4235     PetscCheck(!pcbddc->benign_n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not yet implemented");
4236 
4237     PetscCall(MatCreate(PETSC_COMM_SELF, coarse_submat));
4238     PetscCall(MatSetSizes(*coarse_submat, pcbddc->local_primal_size, pcbddc->local_primal_size, pcbddc->local_primal_size, pcbddc->local_primal_size));
4239     PetscCall(MatSetType(*coarse_submat, MATSEQAIJ));
4240     PetscCall(MatSetOption(*coarse_submat, MAT_IGNORE_ZERO_ENTRIES, PETSC_TRUE));
4241     PetscCall(MatSetOption(*coarse_submat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
4242 
4243     /* group vertices and constraints by subdomain id */
4244     const PetscInt *vidxs = pcbddc->primal_indices_local_idxs;
4245     const PetscInt *cidxs = pcbddc->primal_indices_local_idxs + n_vertices;
4246     PetscInt       *count_eff, *V_eff_to_V, *C_eff_to_C, *nnz;
4247     PetscInt        n_el = PetscMax(graph->n_local_subs, 1);
4248 
4249     PetscCall(PetscCalloc1(2 * n_el, &count_eff));
4250     PetscCall(PetscMalloc1(n_vertices, &V_to_eff_V));
4251     PetscCall(PetscMalloc1(n_constraints, &C_to_eff_C));
4252     for (PetscInt i = 0; i < n_vertices; i++) {
4253       PetscInt s = 2 * graph->nodes[vidxs[i]].local_sub;
4254 
4255       V_to_eff_V[i] = count_eff[s];
4256       count_eff[s] += 1;
4257     }
4258     for (PetscInt i = 0; i < n_constraints; i++) {
4259       PetscInt s = 2 * graph->nodes[cidxs[i]].local_sub + 1;
4260 
4261       C_to_eff_C[i] = count_eff[s];
4262       count_eff[s] += 1;
4263     }
4264 
4265     /* preallocation */
4266     PetscCall(PetscMalloc1(n_vertices + n_constraints, &nnz));
4267     for (PetscInt i = 0; i < n_vertices; i++) {
4268       PetscInt s = 2 * graph->nodes[vidxs[i]].local_sub;
4269 
4270       nnz[i] = count_eff[s] + count_eff[s + 1];
4271     }
4272     for (PetscInt i = 0; i < n_constraints; i++) {
4273       PetscInt s = 2 * graph->nodes[cidxs[i]].local_sub;
4274 
4275       nnz[i + n_vertices] = count_eff[s] + count_eff[s + 1];
4276     }
4277     PetscCall(MatSeqAIJSetPreallocation(*coarse_submat, 0, nnz));
4278     PetscCall(PetscFree(nnz));
4279 
4280     n_eff_vertices    = 0;
4281     n_eff_constraints = 0;
4282     for (PetscInt i = 0; i < n_el; i++) {
4283       n_eff_vertices       = PetscMax(n_eff_vertices, count_eff[2 * i]);
4284       n_eff_constraints    = PetscMax(n_eff_constraints, count_eff[2 * i + 1]);
4285       count_eff[2 * i]     = 0;
4286       count_eff[2 * i + 1] = 0;
4287     }
4288 
4289     const PetscInt *idx;
4290     PetscCall(PetscMalloc2(n_el * n_eff_vertices, &V_eff_to_V, n_el * n_eff_constraints, &C_eff_to_C));
4291 
4292     for (PetscInt i = 0; i < n_vertices; i++) {
4293       const PetscInt e = graph->nodes[vidxs[i]].local_sub;
4294       const PetscInt s = 2 * e;
4295 
4296       V_eff_to_V[e * n_eff_vertices + count_eff[s]] = i;
4297       count_eff[s] += 1;
4298     }
4299     for (PetscInt i = 0; i < n_constraints; i++) {
4300       const PetscInt e = graph->nodes[cidxs[i]].local_sub;
4301       const PetscInt s = 2 * e + 1;
4302 
4303       C_eff_to_C[e * n_eff_constraints + count_eff[s]] = i;
4304       count_eff[s] += 1;
4305     }
4306 
4307     PetscCall(PetscMalloc1(n_R * n_eff_vertices, &R_eff_V_J));
4308     PetscCall(PetscMalloc1(n_R * n_eff_constraints, &R_eff_C_J));
4309     PetscCall(PetscMalloc1(n_B * n_eff_vertices, &B_eff_V_J));
4310     PetscCall(PetscMalloc1(n_B * n_eff_constraints, &B_eff_C_J));
4311     for (PetscInt i = 0; i < n_R * n_eff_vertices; i++) R_eff_V_J[i] = -1;
4312     for (PetscInt i = 0; i < n_R * n_eff_constraints; i++) R_eff_C_J[i] = -1;
4313     for (PetscInt i = 0; i < n_B * n_eff_vertices; i++) B_eff_V_J[i] = -1;
4314     for (PetscInt i = 0; i < n_B * n_eff_constraints; i++) B_eff_C_J[i] = -1;
4315 
4316     PetscCall(ISGetIndices(pcbddc->is_R_local, &idx));
4317     for (PetscInt i = 0; i < n_R; i++) {
4318       const PetscInt e = graph->nodes[idx[i]].local_sub;
4319       const PetscInt s = 2 * e;
4320       PetscInt       j;
4321 
4322       for (j = 0; j < count_eff[s]; j++) R_eff_V_J[i * n_eff_vertices + j] = V_eff_to_V[e * n_eff_vertices + j];
4323       for (j = 0; j < count_eff[s + 1]; j++) R_eff_C_J[i * n_eff_constraints + j] = C_eff_to_C[e * n_eff_constraints + j];
4324     }
4325     PetscCall(ISRestoreIndices(pcbddc->is_R_local, &idx));
4326     PetscCall(ISGetIndices(pcis->is_B_local, &idx));
4327     for (PetscInt i = 0; i < n_B; i++) {
4328       const PetscInt e = graph->nodes[idx[i]].local_sub;
4329       const PetscInt s = 2 * e;
4330       PetscInt       j;
4331 
4332       for (j = 0; j < count_eff[s]; j++) B_eff_V_J[i * n_eff_vertices + j] = V_eff_to_V[e * n_eff_vertices + j];
4333       for (j = 0; j < count_eff[s + 1]; j++) B_eff_C_J[i * n_eff_constraints + j] = C_eff_to_C[e * n_eff_constraints + j];
4334     }
4335     PetscCall(ISRestoreIndices(pcis->is_B_local, &idx));
4336 
4337     /* permutation and blocksizes for block invert of S_CC */
4338     PetscInt *idxp;
4339 
4340     PetscCall(PetscMalloc1(n_constraints, &idxp));
4341     PetscCall(PetscMalloc1(n_el, &C_bss));
4342     n_C_bss = 0;
4343     for (PetscInt e = 0, cnt = 0; e < n_el; e++) {
4344       const PetscInt nc = count_eff[2 * e + 1];
4345 
4346       if (nc) C_bss[n_C_bss++] = nc;
4347       for (PetscInt c = 0; c < nc; c++) { idxp[cnt + c] = C_eff_to_C[e * n_eff_constraints + c]; }
4348       cnt += nc;
4349     }
4350 
4351     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n_constraints, idxp, PETSC_OWN_POINTER, &is_C_perm));
4352 
4353     PetscCall(PetscFree2(V_eff_to_V, C_eff_to_C));
4354     PetscCall(PetscFree(count_eff));
4355   } else {
4356     PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, pcbddc->local_primal_size, pcbddc->local_primal_size, NULL, coarse_submat));
4357     n_eff_constraints = n_constraints;
4358     n_eff_vertices    = n_vertices;
4359   }
4360 
4361   /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */
4362   PetscCall(KSPGetPC(pcbddc->ksp_R, &pc_R));
4363   PetscCall(PCSetUp(pc_R));
4364   PetscCall(PetscObjectTypeCompare((PetscObject)pc_R, PCLU, &isLU));
4365   PetscCall(PetscObjectTypeCompare((PetscObject)pc_R, PCCHOLESKY, &isCHOL));
4366   lda_rhs                = n_R;
4367   need_benign_correction = PETSC_FALSE;
4368   if (isLU || isCHOL) {
4369     PetscCall(PCFactorGetMatrix(pc_R, &F));
4370   } else if (sub_schurs && sub_schurs->reuse_solver) {
4371     PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
4372     MatFactorType      type;
4373 
4374     F = reuse_solver->F;
4375     PetscCall(MatGetFactorType(F, &type));
4376     if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE;
4377     if (type == MAT_FACTOR_LU) isLU = PETSC_TRUE;
4378     PetscCall(MatGetSize(F, &lda_rhs, NULL));
4379     need_benign_correction = (PetscBool)(!!reuse_solver->benign_n);
4380   } else F = NULL;
4381 
4382   /* determine if we can use a sparse right-hand side */
4383   sparserhs = PETSC_FALSE;
4384   if (F && !multi_element) {
4385     MatSolverType solver;
4386 
4387     PetscCall(MatFactorGetSolverType(F, &solver));
4388     PetscCall(PetscStrcmp(solver, MATSOLVERMUMPS, &sparserhs));
4389   }
4390 
4391   /* create dummy vector to modify rhs and sol of MatMatSolve (work array will never be used) */
4392   dummy_vec = NULL;
4393   if (need_benign_correction && lda_rhs != n_R && F) {
4394     PetscCall(VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N), &dummy_vec));
4395     PetscCall(VecSetSizes(dummy_vec, lda_rhs, PETSC_DECIDE));
4396     PetscCall(VecSetType(dummy_vec, ((PetscObject)pcis->vec1_N)->type_name));
4397   }
4398 
4399   PetscCall(MatDestroy(&pcbddc->local_auxmat1));
4400   PetscCall(MatDestroy(&pcbddc->local_auxmat2));
4401 
4402   PetscCall(ISCreateStride(PETSC_COMM_SELF, n_R, 0, 1, &is_R));
4403   PetscCall(ISCreateStride(PETSC_COMM_SELF, n_vertices, 0, 1, &is_V));
4404   PetscCall(ISCreateStride(PETSC_COMM_SELF, n_constraints, n_vertices, 1, &is_C));
4405   PetscCall(ISGetIndices(is_V, &idx_V));
4406   PetscCall(ISGetIndices(is_C, &idx_C));
4407 
4408   /* Precompute stuffs needed for preprocessing and application of BDDC*/
4409   if (n_constraints) {
4410     Mat C_B;
4411 
4412     /* Extract constraints on R nodes: C_{CR}  */
4413     PetscCall(MatCreateSubMatrix(pcbddc->ConstraintMatrix, is_C, pcbddc->is_R_local, MAT_INITIAL_MATRIX, &C_CR));
4414     PetscCall(MatCreateSubMatrix(pcbddc->ConstraintMatrix, is_C, pcis->is_B_local, MAT_INITIAL_MATRIX, &C_B));
4415 
4416     /* Assemble         local_auxmat2_R =        (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */
4417     /* Assemble pcbddc->local_auxmat2   = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */
4418     if (!sparserhs) {
4419       PetscScalar *marr;
4420 
4421       PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, lda_rhs, n_eff_constraints, NULL, &Brhs));
4422       PetscCall(MatDenseGetArrayWrite(Brhs, &marr));
4423       for (i = 0; i < n_constraints; i++) {
4424         const PetscScalar *row_cmat_values;
4425         const PetscInt    *row_cmat_indices;
4426         PetscInt           size_of_constraint, j, col = C_to_eff_C ? C_to_eff_C[i] : i;
4427 
4428         PetscCall(MatGetRow(C_CR, i, &size_of_constraint, &row_cmat_indices, &row_cmat_values));
4429         for (j = 0; j < size_of_constraint; j++) marr[row_cmat_indices[j] + col * lda_rhs] = -row_cmat_values[j];
4430         PetscCall(MatRestoreRow(C_CR, i, &size_of_constraint, &row_cmat_indices, &row_cmat_values));
4431       }
4432       PetscCall(MatDenseRestoreArrayWrite(Brhs, &marr));
4433     } else {
4434       Mat tC_CR;
4435 
4436       PetscCall(MatScale(C_CR, -1.0));
4437       if (lda_rhs != n_R) {
4438         PetscScalar *aa;
4439         PetscInt     r, *ii, *jj;
4440         PetscBool    done;
4441 
4442         PetscCall(MatGetRowIJ(C_CR, 0, PETSC_FALSE, PETSC_FALSE, &r, (const PetscInt **)&ii, (const PetscInt **)&jj, &done));
4443         PetscCheck(done, PETSC_COMM_SELF, PETSC_ERR_PLIB, "GetRowIJ failed");
4444         PetscCall(MatSeqAIJGetArray(C_CR, &aa));
4445         PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, n_constraints, lda_rhs, ii, jj, aa, &tC_CR));
4446         PetscCall(MatRestoreRowIJ(C_CR, 0, PETSC_FALSE, PETSC_FALSE, &r, (const PetscInt **)&ii, (const PetscInt **)&jj, &done));
4447         PetscCheck(done, PETSC_COMM_SELF, PETSC_ERR_PLIB, "RestoreRowIJ failed");
4448       } else {
4449         PetscCall(PetscObjectReference((PetscObject)C_CR));
4450         tC_CR = C_CR;
4451       }
4452       PetscCall(MatCreateTranspose(tC_CR, &Brhs));
4453       PetscCall(MatDestroy(&tC_CR));
4454     }
4455     PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, lda_rhs, n_eff_constraints, NULL, &local_auxmat2_R));
4456     if (F) {
4457       if (need_benign_correction) {
4458         PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
4459 
4460         /* rhs is already zero on interior dofs, no need to change the rhs */
4461         PetscCall(PetscArrayzero(reuse_solver->benign_save_vals, pcbddc->benign_n));
4462       }
4463       PetscCall(MatMatSolve(F, Brhs, local_auxmat2_R));
4464       if (need_benign_correction) {
4465         PetscScalar       *marr;
4466         PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
4467 
4468         /* XXX multi_element? */
4469         PetscCall(MatDenseGetArray(local_auxmat2_R, &marr));
4470         if (lda_rhs != n_R) {
4471           for (i = 0; i < n_eff_constraints; i++) {
4472             PetscCall(VecPlaceArray(dummy_vec, marr + i * lda_rhs));
4473             PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver, dummy_vec, NULL, PETSC_TRUE, PETSC_TRUE));
4474             PetscCall(VecResetArray(dummy_vec));
4475           }
4476         } else {
4477           for (i = 0; i < n_eff_constraints; i++) {
4478             PetscCall(VecPlaceArray(pcbddc->vec1_R, marr + i * lda_rhs));
4479             PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver, pcbddc->vec1_R, NULL, PETSC_TRUE, PETSC_TRUE));
4480             PetscCall(VecResetArray(pcbddc->vec1_R));
4481           }
4482         }
4483         PetscCall(MatDenseRestoreArray(local_auxmat2_R, &marr));
4484       }
4485     } else {
4486       const PetscScalar *barr;
4487       PetscScalar       *marr;
4488 
4489       PetscCall(MatDenseGetArrayRead(Brhs, &barr));
4490       PetscCall(MatDenseGetArray(local_auxmat2_R, &marr));
4491       for (i = 0; i < n_eff_constraints; i++) {
4492         PetscCall(VecPlaceArray(pcbddc->vec1_R, barr + i * lda_rhs));
4493         PetscCall(VecPlaceArray(pcbddc->vec2_R, marr + i * lda_rhs));
4494         PetscCall(KSPSolve(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec2_R));
4495         PetscCall(KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec2_R));
4496         PetscCall(VecResetArray(pcbddc->vec1_R));
4497         PetscCall(VecResetArray(pcbddc->vec2_R));
4498       }
4499       PetscCall(MatDenseRestoreArrayRead(Brhs, &barr));
4500       PetscCall(MatDenseRestoreArray(local_auxmat2_R, &marr));
4501     }
4502     if (sparserhs) PetscCall(MatScale(C_CR, -1.0));
4503     PetscCall(MatDestroy(&Brhs));
4504     /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR})^{-1}  */
4505     if (!pcbddc->switch_static) {
4506       PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_B, n_eff_constraints, NULL, &pcbddc->local_auxmat2));
4507       for (i = 0; i < n_eff_constraints; i++) {
4508         Vec r, b;
4509         PetscCall(MatDenseGetColumnVecRead(local_auxmat2_R, i, &r));
4510         PetscCall(MatDenseGetColumnVec(pcbddc->local_auxmat2, i, &b));
4511         PetscCall(VecScatterBegin(pcbddc->R_to_B, r, b, INSERT_VALUES, SCATTER_FORWARD));
4512         PetscCall(VecScatterEnd(pcbddc->R_to_B, r, b, INSERT_VALUES, SCATTER_FORWARD));
4513         PetscCall(MatDenseRestoreColumnVec(pcbddc->local_auxmat2, i, &b));
4514         PetscCall(MatDenseRestoreColumnVecRead(local_auxmat2_R, i, &r));
4515       }
4516       if (multi_element) {
4517         Mat T;
4518 
4519         PetscCall(MatCreateSeqAIJFromDenseExpand(local_auxmat2_R, n_constraints, R_eff_C_J, &T));
4520         PetscCall(MatDestroy(&local_auxmat2_R));
4521         local_auxmat2_R = T;
4522         PetscCall(MatCreateSeqAIJFromDenseExpand(pcbddc->local_auxmat2, n_constraints, B_eff_C_J, &T));
4523         PetscCall(MatDestroy(&pcbddc->local_auxmat2));
4524         pcbddc->local_auxmat2 = T;
4525       }
4526       PetscCall(MatMatMult(C_B, pcbddc->local_auxmat2, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &S_CC));
4527     } else {
4528       if (multi_element) {
4529         Mat T;
4530 
4531         PetscCall(MatCreateSeqAIJFromDenseExpand(local_auxmat2_R, n_constraints, R_eff_C_J, &T));
4532         PetscCall(MatDestroy(&local_auxmat2_R));
4533         local_auxmat2_R = T;
4534       }
4535       if (lda_rhs != n_R) {
4536         PetscCall(MatCreateSubMatrix(local_auxmat2_R, is_R, NULL, MAT_INITIAL_MATRIX, &pcbddc->local_auxmat2));
4537       } else {
4538         PetscCall(PetscObjectReference((PetscObject)local_auxmat2_R));
4539         pcbddc->local_auxmat2 = local_auxmat2_R;
4540       }
4541       PetscCall(MatMatMult(C_CR, pcbddc->local_auxmat2, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &S_CC));
4542     }
4543     PetscCall(MatScale(S_CC, m_one));
4544     if (multi_element) {
4545       Mat T, T2;
4546       IS  isp, ispi;
4547 
4548       isp = is_C_perm;
4549 
4550       PetscCall(ISInvertPermutation(isp, PETSC_DECIDE, &ispi));
4551       PetscCall(MatPermute(S_CC, isp, isp, &T));
4552       PetscCall(MatSeqAIJInvertVariableBlockDiagonalMat(T, n_C_bss, C_bss, &T2));
4553       PetscCall(MatDestroy(&T));
4554       PetscCall(MatDestroy(&S_CC));
4555       PetscCall(MatPermute(T2, ispi, ispi, &S_CC));
4556       PetscCall(MatDestroy(&T2));
4557       PetscCall(ISDestroy(&ispi));
4558     } else {
4559       if (isCHOL) {
4560         PetscCall(MatCholeskyFactor(S_CC, NULL, NULL));
4561       } else {
4562         PetscCall(MatLUFactor(S_CC, NULL, NULL, NULL));
4563       }
4564       PetscCall(MatSeqDenseInvertFactors_Private(S_CC));
4565     }
4566     /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */
4567     PetscCall(MatMatMult(S_CC, C_B, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &pcbddc->local_auxmat1));
4568     PetscCall(MatDestroy(&C_B));
4569     PetscCall(MatSetValuesSubMat(*coarse_submat, S_CC, n_constraints, idx_C, n_constraints, idx_C, INSERT_VALUES));
4570   }
4571 
4572   /* Get submatrices from subdomain matrix */
4573   if (n_vertices) {
4574 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
4575     PetscBool oldpin;
4576 #endif
4577     IS is_aux;
4578 
4579     if (sub_schurs && sub_schurs->reuse_solver) { /* is_R_local is not sorted, ISComplement doesn't like it */
4580       IS tis;
4581 
4582       PetscCall(ISDuplicate(pcbddc->is_R_local, &tis));
4583       PetscCall(ISSort(tis));
4584       PetscCall(ISComplement(tis, 0, pcis->n, &is_aux));
4585       PetscCall(ISDestroy(&tis));
4586     } else {
4587       PetscCall(ISComplement(pcbddc->is_R_local, 0, pcis->n, &is_aux));
4588     }
4589 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
4590     oldpin = pcbddc->local_mat->boundtocpu;
4591 #endif
4592     PetscCall(MatBindToCPU(pcbddc->local_mat, PETSC_TRUE));
4593     PetscCall(MatCreateSubMatrix(pcbddc->local_mat, pcbddc->is_R_local, is_aux, MAT_INITIAL_MATRIX, &A_RV));
4594     PetscCall(MatCreateSubMatrix(pcbddc->local_mat, is_aux, pcbddc->is_R_local, MAT_INITIAL_MATRIX, &A_VR));
4595     /* TODO REMOVE: MatMatMult(A_VR,A_RRmA_RV) below may raise an error */
4596     PetscCall(MatConvert(A_VR, MATSEQAIJ, MAT_INPLACE_MATRIX, &A_VR));
4597     PetscCall(MatCreateSubMatrix(pcbddc->local_mat, is_aux, is_aux, MAT_INITIAL_MATRIX, &A_VV));
4598 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
4599     PetscCall(MatBindToCPU(pcbddc->local_mat, oldpin));
4600 #endif
4601     PetscCall(ISDestroy(&is_aux));
4602   }
4603   PetscCall(ISDestroy(&is_C_perm));
4604   PetscCall(PetscFree(C_bss));
4605 
4606   p0_lidx_I = NULL;
4607   if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) {
4608     const PetscInt *idxs;
4609 
4610     PetscCall(ISGetIndices(pcis->is_I_local, &idxs));
4611     PetscCall(PetscMalloc1(pcbddc->benign_n, &p0_lidx_I));
4612     for (i = 0; i < pcbddc->benign_n; i++) PetscCall(PetscFindInt(pcbddc->benign_p0_lidx[i], pcis->n - pcis->n_B, idxs, &p0_lidx_I[i]));
4613     PetscCall(ISRestoreIndices(pcis->is_I_local, &idxs));
4614   }
4615 
4616   /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */
4617 
4618   /* Matrices of coarse basis functions (local) */
4619   PetscCall(MatDestroy(&pcbddc->coarse_phi_B));
4620   PetscCall(MatDestroy(&pcbddc->coarse_psi_B));
4621   PetscCall(MatDestroy(&pcbddc->coarse_phi_D));
4622   PetscCall(MatDestroy(&pcbddc->coarse_psi_D));
4623   if (!multi_element) {
4624     PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_B, pcbddc->local_primal_size, NULL, &pcbddc->coarse_phi_B));
4625     if (pcbddc->switch_static || pcbddc->dbg_flag) PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_D, pcbddc->local_primal_size, NULL, &pcbddc->coarse_phi_D));
4626     coarse_phi_multi = NULL;
4627   } else { /* Create temporary NEST matrix to hold coarse basis functions blocks */
4628     IS is_rows[2] = {pcbddc->is_R_local, NULL};
4629     IS is_cols[2] = {is_V, is_C};
4630 
4631     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n_vertices, pcbddc->local_primal_ref_node, PETSC_USE_POINTER, &is_rows[1]));
4632     PetscCall(MatCreateNest(PETSC_COMM_SELF, 2, is_rows, 2, is_cols, NULL, &coarse_phi_multi));
4633     PetscCall(ISDestroy(&is_rows[1]));
4634   }
4635 
4636   /* vertices */
4637   if (n_vertices) {
4638     PetscBool restoreavr = PETSC_FALSE;
4639     Mat       A_RRmA_RV  = NULL;
4640 
4641     PetscCall(MatSetValuesSubMat(*coarse_submat, A_VV, n_vertices, idx_V, n_vertices, idx_V, ADD_VALUES));
4642     PetscCall(MatDestroy(&A_VV));
4643 
4644     if (n_R) {
4645       Mat A_RV_bcorr = NULL, S_VV;
4646 
4647       PetscCall(MatScale(A_RV, m_one));
4648       if (need_benign_correction) {
4649         ISLocalToGlobalMapping RtoN;
4650         IS                     is_p0;
4651         PetscInt              *idxs_p0, n;
4652 
4653         PetscCall(PetscMalloc1(pcbddc->benign_n, &idxs_p0));
4654         PetscCall(ISLocalToGlobalMappingCreateIS(pcbddc->is_R_local, &RtoN));
4655         PetscCall(ISGlobalToLocalMappingApply(RtoN, IS_GTOLM_DROP, pcbddc->benign_n, pcbddc->benign_p0_lidx, &n, idxs_p0));
4656         PetscCheck(n == pcbddc->benign_n, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error in R numbering for benign p0! %" PetscInt_FMT " != %" PetscInt_FMT, n, pcbddc->benign_n);
4657         PetscCall(ISLocalToGlobalMappingDestroy(&RtoN));
4658         PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idxs_p0, PETSC_OWN_POINTER, &is_p0));
4659         PetscCall(MatCreateSubMatrix(A_RV, is_p0, NULL, MAT_INITIAL_MATRIX, &A_RV_bcorr));
4660         PetscCall(ISDestroy(&is_p0));
4661       }
4662 
4663       PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, lda_rhs, n_eff_vertices, NULL, &A_RRmA_RV));
4664       if (!sparserhs || need_benign_correction) {
4665         if (lda_rhs == n_R && !multi_element) {
4666           PetscCall(MatConvert(A_RV, MATDENSE, MAT_INPLACE_MATRIX, &A_RV));
4667         } else {
4668           Mat             T;
4669           PetscScalar    *av, *array;
4670           const PetscInt *xadj, *adjncy;
4671           PetscInt        n;
4672           PetscBool       flg_row;
4673 
4674           PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, lda_rhs, n_eff_vertices, NULL, &T));
4675           PetscCall(MatDenseGetArrayWrite(T, &array));
4676           PetscCall(MatConvert(A_RV, MATSEQAIJ, MAT_INPLACE_MATRIX, &A_RV));
4677           PetscCall(MatGetRowIJ(A_RV, 0, PETSC_FALSE, PETSC_FALSE, &n, &xadj, &adjncy, &flg_row));
4678           PetscCall(MatSeqAIJGetArray(A_RV, &av));
4679           for (i = 0; i < n; i++) {
4680             PetscInt j;
4681             for (j = xadj[i]; j < xadj[i + 1]; j++) array[lda_rhs * (V_to_eff_V ? V_to_eff_V[adjncy[j]] : adjncy[j]) + i] = av[j];
4682           }
4683           PetscCall(MatRestoreRowIJ(A_RV, 0, PETSC_FALSE, PETSC_FALSE, &n, &xadj, &adjncy, &flg_row));
4684           PetscCall(MatDenseRestoreArrayWrite(T, &array));
4685           PetscCall(MatDestroy(&A_RV));
4686           A_RV = T;
4687         }
4688         if (need_benign_correction) {
4689           PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
4690           PetscScalar       *marr;
4691 
4692           /* XXX multi_element */
4693           PetscCall(MatDenseGetArray(A_RV, &marr));
4694           /* need \Phi^T A_RV = (I+L)A_RV, L given by
4695 
4696                  | 0 0  0 | (V)
4697              L = | 0 0 -1 | (P-p0)
4698                  | 0 0 -1 | (p0)
4699 
4700           */
4701           for (i = 0; i < reuse_solver->benign_n; i++) {
4702             const PetscScalar *vals;
4703             const PetscInt    *idxs, *idxs_zero;
4704             PetscInt           n, j, nz;
4705 
4706             PetscCall(ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i], &nz));
4707             PetscCall(ISGetIndices(reuse_solver->benign_zerodiag_subs[i], &idxs_zero));
4708             PetscCall(MatGetRow(A_RV_bcorr, i, &n, &idxs, &vals));
4709             for (j = 0; j < n; j++) {
4710               PetscScalar val = vals[j];
4711               PetscInt    k, col = idxs[j];
4712               for (k = 0; k < nz; k++) marr[idxs_zero[k] + lda_rhs * col] -= val;
4713             }
4714             PetscCall(MatRestoreRow(A_RV_bcorr, i, &n, &idxs, &vals));
4715             PetscCall(ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i], &idxs_zero));
4716           }
4717           PetscCall(MatDenseRestoreArray(A_RV, &marr));
4718         }
4719         PetscCall(PetscObjectReference((PetscObject)A_RV));
4720         Brhs = A_RV;
4721       } else {
4722         Mat tA_RVT, A_RVT;
4723 
4724         if (!pcbddc->symmetric_primal) {
4725           /* A_RV already scaled by -1 */
4726           PetscCall(MatTranspose(A_RV, MAT_INITIAL_MATRIX, &A_RVT));
4727         } else {
4728           restoreavr = PETSC_TRUE;
4729           PetscCall(MatScale(A_VR, -1.0));
4730           PetscCall(PetscObjectReference((PetscObject)A_VR));
4731           A_RVT = A_VR;
4732         }
4733         if (lda_rhs != n_R) {
4734           PetscScalar *aa;
4735           PetscInt     r, *ii, *jj;
4736           PetscBool    done;
4737 
4738           PetscCall(MatGetRowIJ(A_RVT, 0, PETSC_FALSE, PETSC_FALSE, &r, (const PetscInt **)&ii, (const PetscInt **)&jj, &done));
4739           PetscCheck(done, PETSC_COMM_SELF, PETSC_ERR_PLIB, "GetRowIJ failed");
4740           PetscCall(MatSeqAIJGetArray(A_RVT, &aa));
4741           PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, n_vertices, lda_rhs, ii, jj, aa, &tA_RVT));
4742           PetscCall(MatRestoreRowIJ(A_RVT, 0, PETSC_FALSE, PETSC_FALSE, &r, (const PetscInt **)&ii, (const PetscInt **)&jj, &done));
4743           PetscCheck(done, PETSC_COMM_SELF, PETSC_ERR_PLIB, "RestoreRowIJ failed");
4744         } else {
4745           PetscCall(PetscObjectReference((PetscObject)A_RVT));
4746           tA_RVT = A_RVT;
4747         }
4748         PetscCall(MatCreateTranspose(tA_RVT, &Brhs));
4749         PetscCall(MatDestroy(&tA_RVT));
4750         PetscCall(MatDestroy(&A_RVT));
4751       }
4752       if (F) {
4753         /* need to correct the rhs */
4754         if (need_benign_correction) {
4755           PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
4756           PetscScalar       *marr;
4757 
4758           PetscCall(MatDenseGetArray(Brhs, &marr));
4759           if (lda_rhs != n_R) {
4760             for (i = 0; i < n_eff_vertices; i++) {
4761               PetscCall(VecPlaceArray(dummy_vec, marr + i * lda_rhs));
4762               PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver, dummy_vec, NULL, PETSC_FALSE, PETSC_TRUE));
4763               PetscCall(VecResetArray(dummy_vec));
4764             }
4765           } else {
4766             for (i = 0; i < n_eff_vertices; i++) {
4767               PetscCall(VecPlaceArray(pcbddc->vec1_R, marr + i * lda_rhs));
4768               PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver, pcbddc->vec1_R, NULL, PETSC_FALSE, PETSC_TRUE));
4769               PetscCall(VecResetArray(pcbddc->vec1_R));
4770             }
4771           }
4772           PetscCall(MatDenseRestoreArray(Brhs, &marr));
4773         }
4774         PetscCall(MatMatSolve(F, Brhs, A_RRmA_RV));
4775         if (restoreavr) PetscCall(MatScale(A_VR, -1.0));
4776         /* need to correct the solution */
4777         if (need_benign_correction) {
4778           PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
4779           PetscScalar       *marr;
4780 
4781           PetscCall(MatDenseGetArray(A_RRmA_RV, &marr));
4782           if (lda_rhs != n_R) {
4783             for (i = 0; i < n_eff_vertices; i++) {
4784               PetscCall(VecPlaceArray(dummy_vec, marr + i * lda_rhs));
4785               PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver, dummy_vec, NULL, PETSC_TRUE, PETSC_TRUE));
4786               PetscCall(VecResetArray(dummy_vec));
4787             }
4788           } else {
4789             for (i = 0; i < n_eff_vertices; i++) {
4790               PetscCall(VecPlaceArray(pcbddc->vec1_R, marr + i * lda_rhs));
4791               PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver, pcbddc->vec1_R, NULL, PETSC_TRUE, PETSC_TRUE));
4792               PetscCall(VecResetArray(pcbddc->vec1_R));
4793             }
4794           }
4795           PetscCall(MatDenseRestoreArray(A_RRmA_RV, &marr));
4796         }
4797       } else {
4798         const PetscScalar *barr;
4799         PetscScalar       *marr;
4800 
4801         PetscCall(MatDenseGetArrayRead(Brhs, &barr));
4802         PetscCall(MatDenseGetArray(A_RRmA_RV, &marr));
4803         for (i = 0; i < n_eff_vertices; i++) {
4804           PetscCall(VecPlaceArray(pcbddc->vec1_R, barr + i * lda_rhs));
4805           PetscCall(VecPlaceArray(pcbddc->vec2_R, marr + i * lda_rhs));
4806           PetscCall(KSPSolve(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec2_R));
4807           PetscCall(KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec2_R));
4808           PetscCall(VecResetArray(pcbddc->vec1_R));
4809           PetscCall(VecResetArray(pcbddc->vec2_R));
4810         }
4811         PetscCall(MatDenseRestoreArrayRead(Brhs, &barr));
4812         PetscCall(MatDenseRestoreArray(A_RRmA_RV, &marr));
4813       }
4814       PetscCall(MatDestroy(&A_RV));
4815       PetscCall(MatDestroy(&Brhs));
4816       /* S_VV and S_CV */
4817       if (n_constraints) {
4818         Mat B;
4819 
4820         PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_B, n_eff_vertices, NULL, &B));
4821         PetscCall(MatDenseScatter(A_RRmA_RV, pcbddc->R_to_B, B));
4822 
4823         /* S_CV = pcbddc->local_auxmat1 * B */
4824         if (multi_element) {
4825           Mat T;
4826 
4827           PetscCall(MatCreateSeqAIJFromDenseExpand(B, n_vertices, B_eff_V_J, &T));
4828           PetscCall(MatDestroy(&B));
4829           B = T;
4830         }
4831         PetscCall(MatProductCreate(pcbddc->local_auxmat1, B, NULL, &S_CV));
4832         PetscCall(MatProductSetType(S_CV, MATPRODUCT_AB));
4833         PetscCall(MatProductSetFromOptions(S_CV));
4834         PetscCall(MatProductSymbolic(S_CV));
4835         PetscCall(MatProductNumeric(S_CV));
4836         PetscCall(MatProductClear(S_CV));
4837         PetscCall(MatDestroy(&B));
4838 
4839         /* B = local_auxmat2_R * S_CV */
4840         PetscCall(MatProductCreate(local_auxmat2_R, S_CV, NULL, &B));
4841         PetscCall(MatProductSetType(B, MATPRODUCT_AB));
4842         PetscCall(MatProductSetFromOptions(B));
4843         PetscCall(MatProductSymbolic(B));
4844         PetscCall(MatProductNumeric(B));
4845 
4846         PetscCall(MatScale(S_CV, m_one));
4847         PetscCall(MatSetValuesSubMat(*coarse_submat, S_CV, n_constraints, idx_C, n_vertices, idx_V, INSERT_VALUES));
4848 
4849         if (multi_element) {
4850           Mat T;
4851 
4852           PetscCall(MatCreateSeqAIJFromDenseExpand(A_RRmA_RV, n_vertices, R_eff_V_J, &T));
4853           PetscCall(MatDestroy(&A_RRmA_RV));
4854           A_RRmA_RV = T;
4855         }
4856         PetscCall(MatAXPY(A_RRmA_RV, 1.0, B, UNKNOWN_NONZERO_PATTERN)); /* XXX ? */
4857         PetscCall(MatDestroy(&B));
4858       } else if (multi_element) {
4859         Mat T;
4860 
4861         PetscCall(MatCreateSeqAIJFromDenseExpand(A_RRmA_RV, n_vertices, R_eff_V_J, &T));
4862         PetscCall(MatDestroy(&A_RRmA_RV));
4863         A_RRmA_RV = T;
4864       }
4865 
4866       if (lda_rhs != n_R) {
4867         Mat T;
4868 
4869         PetscCall(MatCreateSubMatrix(A_RRmA_RV, is_R, NULL, MAT_INITIAL_MATRIX, &T));
4870         PetscCall(MatDestroy(&A_RRmA_RV));
4871         A_RRmA_RV = T;
4872       }
4873 
4874       /* need A_VR * \Phi * A_RRmA_RV = A_VR * (I+L)^T * A_RRmA_RV, L given as before */
4875       if (need_benign_correction) { /* XXX SPARSE */
4876         PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
4877         PetscScalar       *sums;
4878         const PetscScalar *marr;
4879 
4880         PetscCall(MatDenseGetArrayRead(A_RRmA_RV, &marr));
4881         PetscCall(PetscMalloc1(n_vertices, &sums));
4882         for (i = 0; i < reuse_solver->benign_n; i++) {
4883           const PetscScalar *vals;
4884           const PetscInt    *idxs, *idxs_zero;
4885           PetscInt           n, j, nz;
4886 
4887           PetscCall(ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i], &nz));
4888           PetscCall(ISGetIndices(reuse_solver->benign_zerodiag_subs[i], &idxs_zero));
4889           for (j = 0; j < n_vertices; j++) {
4890             sums[j] = 0.;
4891             for (PetscInt k = 0; k < nz; k++) sums[j] += marr[idxs_zero[k] + j * n_R];
4892           }
4893           PetscCall(MatGetRow(A_RV_bcorr, i, &n, &idxs, &vals));
4894           for (j = 0; j < n; j++) {
4895             PetscScalar val = vals[j];
4896             for (PetscInt k = 0; k < n_vertices; k++) PetscCall(MatSetValue(*coarse_submat, idx_V[idxs[j]], idx_V[k], val * sums[k], ADD_VALUES));
4897           }
4898           PetscCall(MatRestoreRow(A_RV_bcorr, i, &n, &idxs, &vals));
4899           PetscCall(ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i], &idxs_zero));
4900         }
4901         PetscCall(PetscFree(sums));
4902         PetscCall(MatDestroy(&A_RV_bcorr));
4903         PetscCall(MatDenseRestoreArrayRead(A_RRmA_RV, &marr));
4904       }
4905 
4906       PetscCall(MatMatMult(A_VR, A_RRmA_RV, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &S_VV));
4907       PetscCall(MatSetValuesSubMat(*coarse_submat, S_VV, n_vertices, idx_V, n_vertices, idx_V, ADD_VALUES));
4908       PetscCall(MatDestroy(&S_VV));
4909     }
4910 
4911     /* coarse basis functions */
4912     if (coarse_phi_multi) {
4913       Mat Vid;
4914 
4915       PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, n_vertices, n_vertices, 1, NULL, &Vid));
4916       PetscCall(MatShift_Basic(Vid, 1.0));
4917       PetscCall(MatNestSetSubMat(coarse_phi_multi, 0, 0, A_RRmA_RV));
4918       PetscCall(MatNestSetSubMat(coarse_phi_multi, 1, 0, Vid));
4919       PetscCall(MatDestroy(&Vid));
4920     } else {
4921       if (A_RRmA_RV) {
4922         PetscCall(MatDenseScatter(A_RRmA_RV, pcbddc->R_to_B, pcbddc->coarse_phi_B));
4923         if (pcbddc->switch_static || pcbddc->dbg_flag) {
4924           PetscCall(MatDenseScatter(A_RRmA_RV, pcbddc->R_to_D, pcbddc->coarse_phi_D));
4925           if (pcbddc->benign_n) {
4926             for (i = 0; i < n_vertices; i++) { PetscCall(MatSetValues(pcbddc->coarse_phi_D, pcbddc->benign_n, p0_lidx_I, 1, &i, NULL, INSERT_VALUES)); }
4927             PetscCall(MatAssemblyBegin(pcbddc->coarse_phi_D, MAT_FINAL_ASSEMBLY));
4928             PetscCall(MatAssemblyEnd(pcbddc->coarse_phi_D, MAT_FINAL_ASSEMBLY));
4929           }
4930         }
4931       }
4932       for (i = 0; i < n_vertices; i++) PetscCall(MatSetValues(pcbddc->coarse_phi_B, 1, &idx_V_B[i], 1, &i, &one, INSERT_VALUES));
4933       PetscCall(MatAssemblyBegin(pcbddc->coarse_phi_B, MAT_FINAL_ASSEMBLY));
4934       PetscCall(MatAssemblyEnd(pcbddc->coarse_phi_B, MAT_FINAL_ASSEMBLY));
4935     }
4936     PetscCall(MatDestroy(&A_RRmA_RV));
4937   }
4938   PetscCall(MatDestroy(&A_RV));
4939   PetscCall(VecDestroy(&dummy_vec));
4940 
4941   if (n_constraints) {
4942     Mat B, B2;
4943 
4944     PetscCall(MatScale(S_CC, m_one));
4945     PetscCall(MatProductCreate(local_auxmat2_R, S_CC, NULL, &B));
4946     PetscCall(MatProductSetType(B, MATPRODUCT_AB));
4947     PetscCall(MatProductSetFromOptions(B));
4948     PetscCall(MatProductSymbolic(B));
4949     PetscCall(MatProductNumeric(B));
4950 
4951     if (n_vertices) {
4952       if (isCHOL || need_benign_correction) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */
4953         PetscCall(MatTranspose(S_CV, MAT_INITIAL_MATRIX, &S_VC));
4954       } else {
4955         if (lda_rhs != n_R) {
4956           Mat tB;
4957 
4958           PetscCall(MatCreateSubMatrix(B, is_R, NULL, MAT_INITIAL_MATRIX, &tB));
4959           PetscCall(MatDestroy(&B));
4960           B = tB;
4961         }
4962         PetscCall(MatMatMult(A_VR, B, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &S_VC));
4963       }
4964       PetscCall(MatSetValuesSubMat(*coarse_submat, S_VC, n_vertices, idx_V, n_constraints, idx_C, INSERT_VALUES));
4965     }
4966 
4967     /* coarse basis functions */
4968     if (coarse_phi_multi) {
4969       PetscCall(MatNestSetSubMat(coarse_phi_multi, 0, 1, B));
4970     } else {
4971       PetscCall(MatDenseGetSubMatrix(pcbddc->coarse_phi_B, PETSC_DECIDE, PETSC_DECIDE, n_vertices, n_vertices + n_constraints, &B2));
4972       PetscCall(MatDenseScatter(B, pcbddc->R_to_B, B2));
4973       PetscCall(MatDenseRestoreSubMatrix(pcbddc->coarse_phi_B, &B2));
4974       if (pcbddc->switch_static || pcbddc->dbg_flag) {
4975         PetscCall(MatDenseGetSubMatrix(pcbddc->coarse_phi_D, PETSC_DECIDE, PETSC_DECIDE, n_vertices, n_vertices + n_constraints, &B2));
4976         PetscCall(MatDenseScatter(B, pcbddc->R_to_D, B2));
4977         if (pcbddc->benign_n) {
4978           for (i = 0; i < n_constraints; i++) { PetscCall(MatSetValues(B2, pcbddc->benign_n, p0_lidx_I, 1, &i, NULL, INSERT_VALUES)); }
4979         }
4980         PetscCall(MatDenseRestoreSubMatrix(pcbddc->coarse_phi_D, &B2));
4981       }
4982     }
4983     PetscCall(MatDestroy(&B));
4984   }
4985 
4986   /* assemble sparse coarse basis functions */
4987   if (coarse_phi_multi) {
4988     Mat T;
4989 
4990     PetscCall(MatConvert(coarse_phi_multi, MATSEQAIJ, MAT_INITIAL_MATRIX, &T));
4991     PetscCall(MatDestroy(&coarse_phi_multi));
4992     PetscCall(MatCreateSubMatrix(T, pcis->is_B_local, NULL, MAT_INITIAL_MATRIX, &pcbddc->coarse_phi_B));
4993     if (pcbddc->switch_static || pcbddc->dbg_flag) { PetscCall(MatCreateSubMatrix(T, pcis->is_I_local, NULL, MAT_INITIAL_MATRIX, &pcbddc->coarse_phi_D)); }
4994     PetscCall(MatDestroy(&T));
4995   }
4996   PetscCall(MatDestroy(&local_auxmat2_R));
4997   PetscCall(PetscFree(p0_lidx_I));
4998 
4999   /* coarse matrix entries relative to B_0 */
5000   if (pcbddc->benign_n) {
5001     Mat                B0_B, B0_BPHI;
5002     IS                 is_dummy;
5003     const PetscScalar *data;
5004     PetscInt           j;
5005 
5006     PetscCall(ISCreateStride(PETSC_COMM_SELF, pcbddc->benign_n, 0, 1, &is_dummy));
5007     PetscCall(MatCreateSubMatrix(pcbddc->benign_B0, is_dummy, pcis->is_B_local, MAT_INITIAL_MATRIX, &B0_B));
5008     PetscCall(ISDestroy(&is_dummy));
5009     PetscCall(MatMatMult(B0_B, pcbddc->coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &B0_BPHI));
5010     PetscCall(MatConvert(B0_BPHI, MATSEQDENSE, MAT_INPLACE_MATRIX, &B0_BPHI));
5011     PetscCall(MatDenseGetArrayRead(B0_BPHI, &data));
5012     for (j = 0; j < pcbddc->benign_n; j++) {
5013       PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j;
5014       for (i = 0; i < pcbddc->local_primal_size; i++) {
5015         PetscCall(MatSetValue(*coarse_submat, primal_idx, i, data[i * pcbddc->benign_n + j], INSERT_VALUES));
5016         PetscCall(MatSetValue(*coarse_submat, i, primal_idx, data[i * pcbddc->benign_n + j], INSERT_VALUES));
5017       }
5018     }
5019     PetscCall(MatDenseRestoreArrayRead(B0_BPHI, &data));
5020     PetscCall(MatDestroy(&B0_B));
5021     PetscCall(MatDestroy(&B0_BPHI));
5022   }
5023 
5024   /* compute other basis functions for non-symmetric problems */
5025   if (!pcbddc->symmetric_primal) {
5026     Mat          B_V = NULL, B_C = NULL;
5027     PetscScalar *marray, *work;
5028 
5029     /* TODO multi_element MatDenseScatter */
5030     if (n_constraints) {
5031       Mat S_CCT, C_CRT;
5032 
5033       PetscCall(MatScale(S_CC, m_one));
5034       PetscCall(MatTranspose(C_CR, MAT_INITIAL_MATRIX, &C_CRT));
5035       PetscCall(MatTranspose(S_CC, MAT_INITIAL_MATRIX, &S_CCT));
5036       PetscCall(MatMatMult(C_CRT, S_CCT, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &B_C));
5037       PetscCall(MatConvert(B_C, MATDENSE, MAT_INPLACE_MATRIX, &B_C));
5038       PetscCall(MatDestroy(&S_CCT));
5039       if (n_vertices) {
5040         Mat S_VCT;
5041 
5042         PetscCall(MatTranspose(S_VC, MAT_INITIAL_MATRIX, &S_VCT));
5043         PetscCall(MatMatMult(C_CRT, S_VCT, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &B_V));
5044         PetscCall(MatDestroy(&S_VCT));
5045         PetscCall(MatConvert(B_V, MATDENSE, MAT_INPLACE_MATRIX, &B_V));
5046       }
5047       PetscCall(MatDestroy(&C_CRT));
5048     } else {
5049       PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_R, n_vertices, NULL, &B_V));
5050     }
5051     if (n_vertices && n_R) {
5052       PetscScalar    *av, *marray;
5053       const PetscInt *xadj, *adjncy;
5054       PetscInt        n;
5055       PetscBool       flg_row;
5056 
5057       /* B_V = B_V - A_VR^T */
5058       PetscCall(MatConvert(A_VR, MATSEQAIJ, MAT_INPLACE_MATRIX, &A_VR));
5059       PetscCall(MatGetRowIJ(A_VR, 0, PETSC_FALSE, PETSC_FALSE, &n, &xadj, &adjncy, &flg_row));
5060       PetscCall(MatSeqAIJGetArray(A_VR, &av));
5061       PetscCall(MatDenseGetArray(B_V, &marray));
5062       for (i = 0; i < n; i++) {
5063         PetscInt j;
5064         for (j = xadj[i]; j < xadj[i + 1]; j++) marray[i * n_R + adjncy[j]] -= av[j];
5065       }
5066       PetscCall(MatDenseRestoreArray(B_V, &marray));
5067       PetscCall(MatRestoreRowIJ(A_VR, 0, PETSC_FALSE, PETSC_FALSE, &n, &xadj, &adjncy, &flg_row));
5068       PetscCall(MatDestroy(&A_VR));
5069     }
5070 
5071     /* currently there's no support for MatTransposeMatSolve(F,B,X) */
5072     PetscCall(PetscMalloc1(n_R * pcbddc->local_primal_size, &work));
5073     if (n_vertices) {
5074       PetscCall(MatDenseGetArray(B_V, &marray));
5075       for (i = 0; i < n_vertices; i++) {
5076         PetscCall(VecPlaceArray(pcbddc->vec1_R, marray + i * n_R));
5077         PetscCall(VecPlaceArray(pcbddc->vec2_R, work + i * n_R));
5078         PetscCall(KSPSolveTranspose(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec2_R));
5079         PetscCall(KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec2_R));
5080         PetscCall(VecResetArray(pcbddc->vec1_R));
5081         PetscCall(VecResetArray(pcbddc->vec2_R));
5082       }
5083       PetscCall(MatDenseRestoreArray(B_V, &marray));
5084     }
5085     if (B_C) {
5086       PetscCall(MatDenseGetArray(B_C, &marray));
5087       for (i = n_vertices; i < n_constraints + n_vertices; i++) {
5088         PetscCall(VecPlaceArray(pcbddc->vec1_R, marray + (i - n_vertices) * n_R));
5089         PetscCall(VecPlaceArray(pcbddc->vec2_R, work + i * n_R));
5090         PetscCall(KSPSolveTranspose(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec2_R));
5091         PetscCall(KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec2_R));
5092         PetscCall(VecResetArray(pcbddc->vec1_R));
5093         PetscCall(VecResetArray(pcbddc->vec2_R));
5094       }
5095       PetscCall(MatDenseRestoreArray(B_C, &marray));
5096     }
5097     /* coarse basis functions */
5098     PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_B, pcbddc->local_primal_size, NULL, &pcbddc->coarse_psi_B));
5099     if (pcbddc->switch_static || pcbddc->dbg_flag) PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_D, pcbddc->local_primal_size, NULL, &pcbddc->coarse_psi_D));
5100     for (i = 0; i < pcbddc->local_primal_size; i++) {
5101       Vec v;
5102 
5103       PetscCall(VecPlaceArray(pcbddc->vec1_R, work + i * n_R));
5104       PetscCall(MatDenseGetColumnVec(pcbddc->coarse_psi_B, i, &v));
5105       PetscCall(VecScatterBegin(pcbddc->R_to_B, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD));
5106       PetscCall(VecScatterEnd(pcbddc->R_to_B, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD));
5107       if (i < n_vertices) {
5108         PetscScalar one = 1.0;
5109         PetscCall(VecSetValues(v, 1, &idx_V_B[i], &one, INSERT_VALUES));
5110         PetscCall(VecAssemblyBegin(v));
5111         PetscCall(VecAssemblyEnd(v));
5112       }
5113       PetscCall(MatDenseRestoreColumnVec(pcbddc->coarse_psi_B, i, &v));
5114 
5115       if (pcbddc->switch_static || pcbddc->dbg_flag) {
5116         PetscCall(MatDenseGetColumnVec(pcbddc->coarse_psi_D, i, &v));
5117         PetscCall(VecScatterBegin(pcbddc->R_to_D, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD));
5118         PetscCall(VecScatterEnd(pcbddc->R_to_D, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD));
5119         PetscCall(MatDenseRestoreColumnVec(pcbddc->coarse_psi_D, i, &v));
5120       }
5121       PetscCall(VecResetArray(pcbddc->vec1_R));
5122     }
5123     PetscCall(MatDestroy(&B_V));
5124     PetscCall(MatDestroy(&B_C));
5125     PetscCall(PetscFree(work));
5126   } else {
5127     PetscCall(PetscObjectReference((PetscObject)pcbddc->coarse_phi_B));
5128     pcbddc->coarse_psi_B = pcbddc->coarse_phi_B;
5129     PetscCall(PetscObjectReference((PetscObject)pcbddc->coarse_phi_D));
5130     pcbddc->coarse_psi_D = pcbddc->coarse_phi_D;
5131   }
5132   PetscCall(MatAssemblyBegin(*coarse_submat, MAT_FINAL_ASSEMBLY));
5133   PetscCall(MatAssemblyEnd(*coarse_submat, MAT_FINAL_ASSEMBLY));
5134 
5135   /* free memory */
5136   PetscCall(PetscFree(V_to_eff_V));
5137   PetscCall(PetscFree(C_to_eff_C));
5138   PetscCall(PetscFree(R_eff_V_J));
5139   PetscCall(PetscFree(R_eff_C_J));
5140   PetscCall(PetscFree(B_eff_V_J));
5141   PetscCall(PetscFree(B_eff_C_J));
5142   PetscCall(ISDestroy(&is_R));
5143   PetscCall(ISRestoreIndices(is_V, &idx_V));
5144   PetscCall(ISRestoreIndices(is_C, &idx_C));
5145   PetscCall(ISDestroy(&is_V));
5146   PetscCall(ISDestroy(&is_C));
5147   PetscCall(PetscFree(idx_V_B));
5148   PetscCall(MatDestroy(&S_CV));
5149   PetscCall(MatDestroy(&S_VC));
5150   PetscCall(MatDestroy(&S_CC));
5151   if (n_vertices) PetscCall(MatDestroy(&A_VR));
5152   if (n_constraints) PetscCall(MatDestroy(&C_CR));
5153   PetscCall(PetscLogEventEnd(PC_BDDC_CorrectionSetUp[pcbddc->current_level], pc, 0, 0, 0));
5154 
5155   /* Checking coarse_sub_mat and coarse basis functions */
5156   /* Symmetric case     : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
5157   /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
5158   if (pcbddc->dbg_flag) {
5159     Mat       AUXMAT, TM1, TM2, TM3, TM4;
5160     Mat       coarse_phi_D, coarse_phi_B;
5161     Mat       coarse_psi_D, coarse_psi_B;
5162     Mat       A_II, A_BB, A_IB, A_BI;
5163     Mat       C_B, CPHI;
5164     IS        is_dummy;
5165     Vec       mones;
5166     MatType   checkmattype = MATSEQAIJ;
5167     PetscReal real_value;
5168 
5169     if (pcbddc->benign_n && !pcbddc->benign_change_explicit) {
5170       Mat A;
5171       PetscCall(PCBDDCBenignProject(pc, NULL, NULL, &A));
5172       PetscCall(MatCreateSubMatrix(A, pcis->is_I_local, pcis->is_I_local, MAT_INITIAL_MATRIX, &A_II));
5173       PetscCall(MatCreateSubMatrix(A, pcis->is_I_local, pcis->is_B_local, MAT_INITIAL_MATRIX, &A_IB));
5174       PetscCall(MatCreateSubMatrix(A, pcis->is_B_local, pcis->is_I_local, MAT_INITIAL_MATRIX, &A_BI));
5175       PetscCall(MatCreateSubMatrix(A, pcis->is_B_local, pcis->is_B_local, MAT_INITIAL_MATRIX, &A_BB));
5176       PetscCall(MatDestroy(&A));
5177     } else {
5178       PetscCall(MatConvert(pcis->A_II, checkmattype, MAT_INITIAL_MATRIX, &A_II));
5179       PetscCall(MatConvert(pcis->A_IB, checkmattype, MAT_INITIAL_MATRIX, &A_IB));
5180       PetscCall(MatConvert(pcis->A_BI, checkmattype, MAT_INITIAL_MATRIX, &A_BI));
5181       PetscCall(MatConvert(pcis->A_BB, checkmattype, MAT_INITIAL_MATRIX, &A_BB));
5182     }
5183     PetscCall(MatConvert(pcbddc->coarse_phi_D, checkmattype, MAT_INITIAL_MATRIX, &coarse_phi_D));
5184     PetscCall(MatConvert(pcbddc->coarse_phi_B, checkmattype, MAT_INITIAL_MATRIX, &coarse_phi_B));
5185     if (!pcbddc->symmetric_primal) {
5186       PetscCall(MatConvert(pcbddc->coarse_psi_D, checkmattype, MAT_INITIAL_MATRIX, &coarse_psi_D));
5187       PetscCall(MatConvert(pcbddc->coarse_psi_B, checkmattype, MAT_INITIAL_MATRIX, &coarse_psi_B));
5188     }
5189     PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n"));
5190     PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Check coarse sub mat computation (symmetric %d)\n", pcbddc->symmetric_primal));
5191     PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
5192     if (!pcbddc->symmetric_primal) {
5193       PetscCall(MatMatMult(A_II, coarse_phi_D, MAT_INITIAL_MATRIX, 1.0, &AUXMAT));
5194       PetscCall(MatTransposeMatMult(coarse_psi_D, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM1));
5195       PetscCall(MatDestroy(&AUXMAT));
5196       PetscCall(MatMatMult(A_BB, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &AUXMAT));
5197       PetscCall(MatTransposeMatMult(coarse_psi_B, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM2));
5198       PetscCall(MatDestroy(&AUXMAT));
5199       PetscCall(MatMatMult(A_IB, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &AUXMAT));
5200       PetscCall(MatTransposeMatMult(coarse_psi_D, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM3));
5201       PetscCall(MatDestroy(&AUXMAT));
5202       PetscCall(MatMatMult(A_BI, coarse_phi_D, MAT_INITIAL_MATRIX, 1.0, &AUXMAT));
5203       PetscCall(MatTransposeMatMult(coarse_psi_B, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM4));
5204       PetscCall(MatDestroy(&AUXMAT));
5205     } else {
5206       PetscCall(MatPtAP(A_II, coarse_phi_D, MAT_INITIAL_MATRIX, 1.0, &TM1));
5207       PetscCall(MatPtAP(A_BB, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &TM2));
5208       PetscCall(MatMatMult(A_IB, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &AUXMAT));
5209       PetscCall(MatTransposeMatMult(coarse_phi_D, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM3));
5210       PetscCall(MatDestroy(&AUXMAT));
5211       PetscCall(MatMatMult(A_BI, coarse_phi_D, MAT_INITIAL_MATRIX, 1.0, &AUXMAT));
5212       PetscCall(MatTransposeMatMult(coarse_phi_B, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM4));
5213       PetscCall(MatDestroy(&AUXMAT));
5214     }
5215     PetscCall(MatAXPY(TM1, one, TM2, DIFFERENT_NONZERO_PATTERN));
5216     PetscCall(MatAXPY(TM1, one, TM3, DIFFERENT_NONZERO_PATTERN));
5217     PetscCall(MatAXPY(TM1, one, TM4, DIFFERENT_NONZERO_PATTERN));
5218     PetscCall(MatConvert(TM1, MATSEQDENSE, MAT_INPLACE_MATRIX, &TM1));
5219     if (pcbddc->benign_n) {
5220       Mat                B0_B, B0_BPHI;
5221       const PetscScalar *data2;
5222       PetscScalar       *data;
5223       PetscInt           j;
5224 
5225       PetscCall(ISCreateStride(PETSC_COMM_SELF, pcbddc->benign_n, 0, 1, &is_dummy));
5226       PetscCall(MatCreateSubMatrix(pcbddc->benign_B0, is_dummy, pcis->is_B_local, MAT_INITIAL_MATRIX, &B0_B));
5227       PetscCall(MatMatMult(B0_B, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &B0_BPHI));
5228       PetscCall(MatConvert(B0_BPHI, MATSEQDENSE, MAT_INPLACE_MATRIX, &B0_BPHI));
5229       PetscCall(MatDenseGetArray(TM1, &data));
5230       PetscCall(MatDenseGetArrayRead(B0_BPHI, &data2));
5231       for (j = 0; j < pcbddc->benign_n; j++) {
5232         PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j;
5233         for (i = 0; i < pcbddc->local_primal_size; i++) {
5234           data[primal_idx * pcbddc->local_primal_size + i] += data2[i * pcbddc->benign_n + j];
5235           data[i * pcbddc->local_primal_size + primal_idx] += data2[i * pcbddc->benign_n + j];
5236         }
5237       }
5238       PetscCall(MatDenseRestoreArray(TM1, &data));
5239       PetscCall(MatDenseRestoreArrayRead(B0_BPHI, &data2));
5240       PetscCall(MatDestroy(&B0_B));
5241       PetscCall(ISDestroy(&is_dummy));
5242       PetscCall(MatDestroy(&B0_BPHI));
5243     }
5244     PetscCall(MatAXPY(TM1, m_one, *coarse_submat, DIFFERENT_NONZERO_PATTERN));
5245     PetscCall(MatNorm(TM1, NORM_FROBENIUS, &real_value));
5246     PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer));
5247     PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d          matrix error % 1.14e\n", PetscGlobalRank, (double)real_value));
5248 
5249     /* check constraints */
5250     PetscCall(ISCreateStride(PETSC_COMM_SELF, pcbddc->local_primal_size - pcbddc->benign_n, 0, 1, &is_dummy));
5251     PetscCall(MatCreateSubMatrix(pcbddc->ConstraintMatrix, is_dummy, pcis->is_B_local, MAT_INITIAL_MATRIX, &C_B));
5252     if (!pcbddc->benign_n) { /* TODO: add benign case */
5253       PetscCall(MatMatMult(C_B, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &CPHI));
5254     } else {
5255       PetscScalar *data;
5256       Mat          tmat;
5257       PetscCall(MatDenseGetArray(pcbddc->coarse_phi_B, &data));
5258       PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, pcis->n_B, pcbddc->local_primal_size - pcbddc->benign_n, data, &tmat));
5259       PetscCall(MatDenseRestoreArray(pcbddc->coarse_phi_B, &data));
5260       PetscCall(MatMatMult(C_B, tmat, MAT_INITIAL_MATRIX, 1.0, &CPHI));
5261       PetscCall(MatDestroy(&tmat));
5262     }
5263     PetscCall(MatCreateVecs(CPHI, &mones, NULL));
5264     PetscCall(VecSet(mones, -1.0));
5265     PetscCall(MatDiagonalSet(CPHI, mones, ADD_VALUES));
5266     PetscCall(MatNorm(CPHI, NORM_FROBENIUS, &real_value));
5267     PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d phi constraints error % 1.14e\n", PetscGlobalRank, (double)real_value));
5268     if (!pcbddc->symmetric_primal) {
5269       PetscCall(MatMatMult(C_B, coarse_psi_B, MAT_REUSE_MATRIX, 1.0, &CPHI));
5270       PetscCall(VecSet(mones, -1.0));
5271       PetscCall(MatDiagonalSet(CPHI, mones, ADD_VALUES));
5272       PetscCall(MatNorm(CPHI, NORM_FROBENIUS, &real_value));
5273       PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d psi constraints error % 1.14e\n", PetscGlobalRank, (double)real_value));
5274     }
5275     PetscCall(MatDestroy(&C_B));
5276     PetscCall(MatDestroy(&CPHI));
5277     PetscCall(ISDestroy(&is_dummy));
5278     PetscCall(VecDestroy(&mones));
5279     PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
5280     PetscCall(MatDestroy(&A_II));
5281     PetscCall(MatDestroy(&A_BB));
5282     PetscCall(MatDestroy(&A_IB));
5283     PetscCall(MatDestroy(&A_BI));
5284     PetscCall(MatDestroy(&TM1));
5285     PetscCall(MatDestroy(&TM2));
5286     PetscCall(MatDestroy(&TM3));
5287     PetscCall(MatDestroy(&TM4));
5288     PetscCall(MatDestroy(&coarse_phi_D));
5289     PetscCall(MatDestroy(&coarse_phi_B));
5290     if (!pcbddc->symmetric_primal) {
5291       PetscCall(MatDestroy(&coarse_psi_D));
5292       PetscCall(MatDestroy(&coarse_psi_B));
5293     }
5294   }
5295 
5296 #if 0
5297   {
5298     PetscViewer viewer;
5299     char filename[256];
5300 
5301     PetscCall(PetscSNPrintf(filename, PETSC_STATIC_ARRAY_LENGTH(filename), "details_local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level));
5302     PetscCall(PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer));
5303     PetscCall(PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB));
5304     PetscCall(PetscObjectSetName((PetscObject)*coarse_submat,"coarse submat"));
5305     PetscCall(MatView(*coarse_submat,viewer));
5306     if (pcbddc->coarse_phi_B) {
5307       PetscCall(PetscObjectSetName((PetscObject)pcbddc->coarse_phi_B,"phi_B"));
5308       PetscCall(MatView(pcbddc->coarse_phi_B,viewer));
5309     }
5310     if (pcbddc->coarse_phi_D) {
5311       PetscCall(PetscObjectSetName((PetscObject)pcbddc->coarse_phi_D,"phi_D"));
5312       PetscCall(MatView(pcbddc->coarse_phi_D,viewer));
5313     }
5314     if (pcbddc->coarse_psi_B) {
5315       PetscCall(PetscObjectSetName((PetscObject)pcbddc->coarse_psi_B,"psi_B"));
5316       PetscCall(MatView(pcbddc->coarse_psi_B,viewer));
5317     }
5318     if (pcbddc->coarse_psi_D) {
5319       PetscCall(PetscObjectSetName((PetscObject)pcbddc->coarse_psi_D,"psi_D"));
5320       PetscCall(MatView(pcbddc->coarse_psi_D,viewer));
5321     }
5322     PetscCall(PetscObjectSetName((PetscObject)pcbddc->local_mat,"A"));
5323     PetscCall(MatView(pcbddc->local_mat,viewer));
5324     PetscCall(PetscObjectSetName((PetscObject)pcbddc->ConstraintMatrix,"C"));
5325     PetscCall(MatView(pcbddc->ConstraintMatrix,viewer));
5326     PetscCall(PetscObjectSetName((PetscObject)pcis->is_I_local,"I"));
5327     PetscCall(ISView(pcis->is_I_local,viewer));
5328     PetscCall(PetscObjectSetName((PetscObject)pcis->is_B_local,"B"));
5329     PetscCall(ISView(pcis->is_B_local,viewer));
5330     PetscCall(PetscObjectSetName((PetscObject)pcbddc->is_R_local,"R"));
5331     PetscCall(ISView(pcbddc->is_R_local,viewer));
5332     PetscCall(PetscViewerDestroy(&viewer));
5333   }
5334 #endif
5335 
5336   /* device support */
5337   {
5338     PetscBool iscuda, iship, iskokkos;
5339     MatType   mtype = NULL;
5340 
5341     PetscCall(PetscObjectTypeCompareAny((PetscObject)pcis->vec1_N, &iscuda, VECCUDA, VECMPICUDA, VECSEQCUDA, ""));
5342     PetscCall(PetscObjectTypeCompareAny((PetscObject)pcis->vec1_N, &iship, VECHIP, VECMPIHIP, VECSEQHIP, ""));
5343     PetscCall(PetscObjectTypeCompareAny((PetscObject)pcis->vec1_N, &iskokkos, VECKOKKOS, VECMPIKOKKOS, VECSEQKOKKOS, ""));
5344     if (iskokkos) {
5345       if (PetscDefined(HAVE_MACRO_KOKKOS_ENABLE_CUDA)) iscuda = PETSC_TRUE;
5346       else if (PetscDefined(HAVE_MACRO_KOKKOS_ENABLE_HIP)) iship = PETSC_TRUE;
5347     }
5348     if (iskokkos) mtype = multi_element ? MATSEQAIJKOKKOS : (iscuda ? MATSEQDENSECUDA : MATSEQDENSEHIP);
5349     else if (iship) mtype = multi_element ? MATSEQAIJHIPSPARSE : MATSEQDENSEHIP;
5350     else if (iscuda) mtype = multi_element ? MATSEQAIJCUSPARSE : MATSEQDENSECUDA;
5351     if (mtype) {
5352       if (pcbddc->local_auxmat1) PetscCall(MatConvert(pcbddc->local_auxmat1, mtype, MAT_INPLACE_MATRIX, &pcbddc->local_auxmat1));
5353       if (pcbddc->local_auxmat2) PetscCall(MatConvert(pcbddc->local_auxmat2, mtype, MAT_INPLACE_MATRIX, &pcbddc->local_auxmat2));
5354       if (pcbddc->coarse_phi_B) PetscCall(MatConvert(pcbddc->coarse_phi_B, mtype, MAT_INPLACE_MATRIX, &pcbddc->coarse_phi_B));
5355       if (pcbddc->coarse_phi_D) PetscCall(MatConvert(pcbddc->coarse_phi_D, mtype, MAT_INPLACE_MATRIX, &pcbddc->coarse_phi_D));
5356       if (pcbddc->coarse_psi_B) PetscCall(MatConvert(pcbddc->coarse_psi_B, mtype, MAT_INPLACE_MATRIX, &pcbddc->coarse_psi_B));
5357       if (pcbddc->coarse_psi_D) PetscCall(MatConvert(pcbddc->coarse_psi_D, mtype, MAT_INPLACE_MATRIX, &pcbddc->coarse_psi_D));
5358     }
5359   }
5360   PetscFunctionReturn(PETSC_SUCCESS);
5361 }
5362 
5363 PetscErrorCode MatCreateSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat *B)
5364 {
5365   Mat      *work_mat;
5366   IS        isrow_s, iscol_s;
5367   PetscBool rsorted, csorted;
5368   PetscInt  rsize, *idxs_perm_r = NULL, csize, *idxs_perm_c = NULL;
5369 
5370   PetscFunctionBegin;
5371   PetscCall(ISSorted(isrow, &rsorted));
5372   PetscCall(ISSorted(iscol, &csorted));
5373   PetscCall(ISGetLocalSize(isrow, &rsize));
5374   PetscCall(ISGetLocalSize(iscol, &csize));
5375 
5376   if (!rsorted) {
5377     const PetscInt *idxs;
5378     PetscInt       *idxs_sorted, i;
5379 
5380     PetscCall(PetscMalloc1(rsize, &idxs_perm_r));
5381     PetscCall(PetscMalloc1(rsize, &idxs_sorted));
5382     for (i = 0; i < rsize; i++) idxs_perm_r[i] = i;
5383     PetscCall(ISGetIndices(isrow, &idxs));
5384     PetscCall(PetscSortIntWithPermutation(rsize, idxs, idxs_perm_r));
5385     for (i = 0; i < rsize; i++) idxs_sorted[i] = idxs[idxs_perm_r[i]];
5386     PetscCall(ISRestoreIndices(isrow, &idxs));
5387     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, rsize, idxs_sorted, PETSC_OWN_POINTER, &isrow_s));
5388   } else {
5389     PetscCall(PetscObjectReference((PetscObject)isrow));
5390     isrow_s = isrow;
5391   }
5392 
5393   if (!csorted) {
5394     if (isrow == iscol) {
5395       PetscCall(PetscObjectReference((PetscObject)isrow_s));
5396       iscol_s = isrow_s;
5397     } else {
5398       const PetscInt *idxs;
5399       PetscInt       *idxs_sorted, i;
5400 
5401       PetscCall(PetscMalloc1(csize, &idxs_perm_c));
5402       PetscCall(PetscMalloc1(csize, &idxs_sorted));
5403       for (i = 0; i < csize; i++) idxs_perm_c[i] = i;
5404       PetscCall(ISGetIndices(iscol, &idxs));
5405       PetscCall(PetscSortIntWithPermutation(csize, idxs, idxs_perm_c));
5406       for (i = 0; i < csize; i++) idxs_sorted[i] = idxs[idxs_perm_c[i]];
5407       PetscCall(ISRestoreIndices(iscol, &idxs));
5408       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, csize, idxs_sorted, PETSC_OWN_POINTER, &iscol_s));
5409     }
5410   } else {
5411     PetscCall(PetscObjectReference((PetscObject)iscol));
5412     iscol_s = iscol;
5413   }
5414 
5415   PetscCall(MatCreateSubMatrices(A, 1, &isrow_s, &iscol_s, MAT_INITIAL_MATRIX, &work_mat));
5416 
5417   if (!rsorted || !csorted) {
5418     Mat new_mat;
5419     IS  is_perm_r, is_perm_c;
5420 
5421     if (!rsorted) {
5422       PetscInt *idxs_r, i;
5423       PetscCall(PetscMalloc1(rsize, &idxs_r));
5424       for (i = 0; i < rsize; i++) idxs_r[idxs_perm_r[i]] = i;
5425       PetscCall(PetscFree(idxs_perm_r));
5426       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, rsize, idxs_r, PETSC_OWN_POINTER, &is_perm_r));
5427     } else {
5428       PetscCall(ISCreateStride(PETSC_COMM_SELF, rsize, 0, 1, &is_perm_r));
5429     }
5430     PetscCall(ISSetPermutation(is_perm_r));
5431 
5432     if (!csorted) {
5433       if (isrow_s == iscol_s) {
5434         PetscCall(PetscObjectReference((PetscObject)is_perm_r));
5435         is_perm_c = is_perm_r;
5436       } else {
5437         PetscInt *idxs_c, i;
5438         PetscCheck(idxs_perm_c, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Permutation array not present");
5439         PetscCall(PetscMalloc1(csize, &idxs_c));
5440         for (i = 0; i < csize; i++) idxs_c[idxs_perm_c[i]] = i;
5441         PetscCall(PetscFree(idxs_perm_c));
5442         PetscCall(ISCreateGeneral(PETSC_COMM_SELF, csize, idxs_c, PETSC_OWN_POINTER, &is_perm_c));
5443       }
5444     } else {
5445       PetscCall(ISCreateStride(PETSC_COMM_SELF, csize, 0, 1, &is_perm_c));
5446     }
5447     PetscCall(ISSetPermutation(is_perm_c));
5448 
5449     PetscCall(MatPermute(work_mat[0], is_perm_r, is_perm_c, &new_mat));
5450     PetscCall(MatDestroy(&work_mat[0]));
5451     work_mat[0] = new_mat;
5452     PetscCall(ISDestroy(&is_perm_r));
5453     PetscCall(ISDestroy(&is_perm_c));
5454   }
5455 
5456   PetscCall(PetscObjectReference((PetscObject)work_mat[0]));
5457   *B = work_mat[0];
5458   PetscCall(MatDestroyMatrices(1, &work_mat));
5459   PetscCall(ISDestroy(&isrow_s));
5460   PetscCall(ISDestroy(&iscol_s));
5461   PetscFunctionReturn(PETSC_SUCCESS);
5462 }
5463 
5464 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix)
5465 {
5466   Mat_IS   *matis  = (Mat_IS *)pc->pmat->data;
5467   PC_BDDC  *pcbddc = (PC_BDDC *)pc->data;
5468   Mat       new_mat, lA;
5469   IS        is_local, is_global;
5470   PetscInt  local_size;
5471   PetscBool isseqaij, issym, isset;
5472 
5473   PetscFunctionBegin;
5474   PetscCall(MatDestroy(&pcbddc->local_mat));
5475   PetscCall(MatGetSize(matis->A, &local_size, NULL));
5476   if (pcbddc->mat_graph->multi_element) {
5477     Mat     *mats, *bdiags;
5478     IS      *gsubs;
5479     PetscInt nsubs = pcbddc->n_local_subs;
5480 
5481     PetscCall(PetscCalloc1(nsubs * nsubs, &mats));
5482     PetscCall(PetscMalloc1(nsubs, &gsubs));
5483     for (PetscInt i = 0; i < nsubs; i++) PetscCall(ISLocalToGlobalMappingApplyIS(matis->rmapping, pcbddc->local_subs[i], &gsubs[i]));
5484     PetscCall(MatCreateSubMatrices(ChangeOfBasisMatrix, nsubs, gsubs, gsubs, MAT_INITIAL_MATRIX, &bdiags));
5485     for (PetscInt i = 0; i < nsubs; i++) PetscCall(ISDestroy(&gsubs[i]));
5486     PetscCall(PetscFree(gsubs));
5487 
5488     for (PetscInt i = 0; i < nsubs; i++) mats[i * (1 + nsubs)] = bdiags[i];
5489     PetscCall(MatCreateNest(PETSC_COMM_SELF, nsubs, pcbddc->local_subs, nsubs, pcbddc->local_subs, mats, &new_mat));
5490     PetscCall(MatConvert(new_mat, MATSEQAIJ, MAT_INPLACE_MATRIX, &new_mat));
5491     PetscCall(MatDestroySubMatrices(nsubs, &bdiags));
5492     PetscCall(PetscFree(mats));
5493   } else {
5494     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)matis->A), local_size, 0, 1, &is_local));
5495     PetscCall(ISLocalToGlobalMappingApplyIS(matis->rmapping, is_local, &is_global));
5496     PetscCall(ISDestroy(&is_local));
5497     PetscCall(MatCreateSubMatrixUnsorted(ChangeOfBasisMatrix, is_global, is_global, &new_mat));
5498     PetscCall(ISDestroy(&is_global));
5499   }
5500   if (pcbddc->dbg_flag) {
5501     Vec       x, x_change;
5502     PetscReal error;
5503 
5504     PetscCall(MatCreateVecs(ChangeOfBasisMatrix, &x, &x_change));
5505     PetscCall(VecSetRandom(x, NULL));
5506     PetscCall(MatMult(ChangeOfBasisMatrix, x, x_change));
5507     PetscCall(VecScatterBegin(matis->cctx, x, matis->x, INSERT_VALUES, SCATTER_FORWARD));
5508     PetscCall(VecScatterEnd(matis->cctx, x, matis->x, INSERT_VALUES, SCATTER_FORWARD));
5509     PetscCall(MatMult(new_mat, matis->x, matis->y));
5510     if (!pcbddc->change_interior) {
5511       const PetscScalar *x, *y, *v;
5512       PetscReal          lerror = 0.;
5513       PetscInt           i;
5514 
5515       PetscCall(VecGetArrayRead(matis->x, &x));
5516       PetscCall(VecGetArrayRead(matis->y, &y));
5517       PetscCall(VecGetArrayRead(matis->counter, &v));
5518       for (i = 0; i < local_size; i++)
5519         if (PetscRealPart(v[i]) < 1.5 && PetscAbsScalar(x[i] - y[i]) > lerror) lerror = PetscAbsScalar(x[i] - y[i]);
5520       PetscCall(VecRestoreArrayRead(matis->x, &x));
5521       PetscCall(VecRestoreArrayRead(matis->y, &y));
5522       PetscCall(VecRestoreArrayRead(matis->counter, &v));
5523       PetscCallMPI(MPIU_Allreduce(&lerror, &error, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)pc)));
5524       if (error > PETSC_SMALL) {
5525         if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) {
5526           SETERRQ(PetscObjectComm((PetscObject)pc), PETSC_ERR_PLIB, "Error global vs local change on I: %1.6e", (double)error);
5527         } else {
5528           SETERRQ(PetscObjectComm((PetscObject)pc), PETSC_ERR_USER, "Error global vs local change on I: %1.6e", (double)error);
5529         }
5530       }
5531     }
5532     PetscCall(VecScatterBegin(matis->rctx, matis->y, x, INSERT_VALUES, SCATTER_REVERSE));
5533     PetscCall(VecScatterEnd(matis->rctx, matis->y, x, INSERT_VALUES, SCATTER_REVERSE));
5534     PetscCall(VecAXPY(x, -1.0, x_change));
5535     PetscCall(VecNorm(x, NORM_INFINITY, &error));
5536     if (error > PETSC_SMALL) {
5537       if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) {
5538         SETERRQ(PetscObjectComm((PetscObject)pc), PETSC_ERR_PLIB, "Error global vs local change on N: %1.6e", (double)error);
5539       } else {
5540         SETERRQ(PetscObjectComm((PetscObject)pc), PETSC_ERR_USER, "Error global vs local change on N: %1.6e", (double)error);
5541       }
5542     }
5543     PetscCall(VecDestroy(&x));
5544     PetscCall(VecDestroy(&x_change));
5545   }
5546 
5547   /* lA is present if we are setting up an inner BDDC for a saddle point FETI-DP */
5548   PetscCall(PetscObjectQuery((PetscObject)pc, "__KSPFETIDP_lA", (PetscObject *)&lA));
5549 
5550   /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */
5551   PetscCall(PetscObjectBaseTypeCompare((PetscObject)matis->A, MATSEQAIJ, &isseqaij));
5552   if (isseqaij) {
5553     PetscCall(MatDestroy(&pcbddc->local_mat));
5554     PetscCall(MatPtAP(matis->A, new_mat, MAT_INITIAL_MATRIX, 2.0, &pcbddc->local_mat));
5555     if (lA) {
5556       Mat work;
5557       PetscCall(MatPtAP(lA, new_mat, MAT_INITIAL_MATRIX, 2.0, &work));
5558       PetscCall(PetscObjectCompose((PetscObject)pc, "__KSPFETIDP_lA", (PetscObject)work));
5559       PetscCall(MatDestroy(&work));
5560     }
5561   } else {
5562     Mat work_mat;
5563 
5564     PetscCall(MatDestroy(&pcbddc->local_mat));
5565     PetscCall(MatConvert(matis->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &work_mat));
5566     PetscCall(MatPtAP(work_mat, new_mat, MAT_INITIAL_MATRIX, 2.0, &pcbddc->local_mat));
5567     PetscCall(MatDestroy(&work_mat));
5568     if (lA) {
5569       Mat work;
5570       PetscCall(MatConvert(lA, MATSEQAIJ, MAT_INITIAL_MATRIX, &work_mat));
5571       PetscCall(MatPtAP(work_mat, new_mat, MAT_INITIAL_MATRIX, 2.0, &work));
5572       PetscCall(PetscObjectCompose((PetscObject)pc, "__KSPFETIDP_lA", (PetscObject)work));
5573       PetscCall(MatDestroy(&work));
5574     }
5575   }
5576   PetscCall(MatIsSymmetricKnown(matis->A, &isset, &issym));
5577   if (isset) PetscCall(MatSetOption(pcbddc->local_mat, MAT_SYMMETRIC, issym));
5578   PetscCall(MatDestroy(&new_mat));
5579   PetscFunctionReturn(PETSC_SUCCESS);
5580 }
5581 
5582 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc)
5583 {
5584   PC_IS          *pcis        = (PC_IS *)pc->data;
5585   PC_BDDC        *pcbddc      = (PC_BDDC *)pc->data;
5586   PCBDDCSubSchurs sub_schurs  = pcbddc->sub_schurs;
5587   PetscInt       *idx_R_local = NULL;
5588   PetscInt        n_vertices, i, j, n_R, n_D, n_B;
5589   PetscInt        vbs, bs;
5590   PetscBT         bitmask = NULL;
5591 
5592   PetscFunctionBegin;
5593   /*
5594     No need to setup local scatters if
5595       - primal space is unchanged
5596         AND
5597       - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains)
5598         AND
5599       - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine
5600   */
5601   if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) PetscFunctionReturn(PETSC_SUCCESS);
5602   /* destroy old objects */
5603   PetscCall(ISDestroy(&pcbddc->is_R_local));
5604   PetscCall(VecScatterDestroy(&pcbddc->R_to_B));
5605   PetscCall(VecScatterDestroy(&pcbddc->R_to_D));
5606   /* Set Non-overlapping dimensions */
5607   n_B        = pcis->n_B;
5608   n_D        = pcis->n - n_B;
5609   n_vertices = pcbddc->n_vertices;
5610 
5611   /* Dohrmann's notation: dofs split in R (Remaining: all dofs but the vertices) and V (Vertices) */
5612 
5613   /* create auxiliary bitmask and allocate workspace */
5614   if (!sub_schurs || !sub_schurs->reuse_solver) {
5615     PetscCall(PetscMalloc1(pcis->n - n_vertices, &idx_R_local));
5616     PetscCall(PetscBTCreate(pcis->n, &bitmask));
5617     for (i = 0; i < n_vertices; i++) PetscCall(PetscBTSet(bitmask, pcbddc->local_primal_ref_node[i]));
5618 
5619     for (i = 0, n_R = 0; i < pcis->n; i++) {
5620       if (!PetscBTLookup(bitmask, i)) idx_R_local[n_R++] = i;
5621     }
5622   } else { /* A different ordering (already computed) is present if we are reusing the Schur solver */
5623     PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
5624 
5625     PetscCall(ISGetIndices(reuse_solver->is_R, (const PetscInt **)&idx_R_local));
5626     PetscCall(ISGetLocalSize(reuse_solver->is_R, &n_R));
5627   }
5628 
5629   /* Block code */
5630   vbs = 1;
5631   PetscCall(MatGetBlockSize(pcbddc->local_mat, &bs));
5632   if (bs > 1 && !(n_vertices % bs)) {
5633     PetscBool is_blocked = PETSC_TRUE;
5634     PetscInt *vary;
5635     if (!sub_schurs || !sub_schurs->reuse_solver) {
5636       PetscCall(PetscMalloc1(pcis->n / bs, &vary));
5637       PetscCall(PetscArrayzero(vary, pcis->n / bs));
5638       /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */
5639       /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */
5640       for (i = 0; i < n_vertices; i++) vary[pcbddc->local_primal_ref_node[i] / bs]++;
5641       for (i = 0; i < pcis->n / bs; i++) {
5642         if (vary[i] != 0 && vary[i] != bs) {
5643           is_blocked = PETSC_FALSE;
5644           break;
5645         }
5646       }
5647       PetscCall(PetscFree(vary));
5648     } else {
5649       /* Verify directly the R set */
5650       for (i = 0; i < n_R / bs; i++) {
5651         PetscInt j, node = idx_R_local[bs * i];
5652         for (j = 1; j < bs; j++) {
5653           if (node != idx_R_local[bs * i + j] - j) {
5654             is_blocked = PETSC_FALSE;
5655             break;
5656           }
5657         }
5658       }
5659     }
5660     if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */
5661       vbs = bs;
5662       for (i = 0; i < n_R / vbs; i++) idx_R_local[i] = idx_R_local[vbs * i] / vbs;
5663     }
5664   }
5665   PetscCall(ISCreateBlock(PETSC_COMM_SELF, vbs, n_R / vbs, idx_R_local, PETSC_COPY_VALUES, &pcbddc->is_R_local));
5666   if (sub_schurs && sub_schurs->reuse_solver) {
5667     PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
5668 
5669     PetscCall(ISRestoreIndices(reuse_solver->is_R, (const PetscInt **)&idx_R_local));
5670     PetscCall(ISDestroy(&reuse_solver->is_R));
5671     PetscCall(PetscObjectReference((PetscObject)pcbddc->is_R_local));
5672     reuse_solver->is_R = pcbddc->is_R_local;
5673   } else {
5674     PetscCall(PetscFree(idx_R_local));
5675   }
5676 
5677   /* print some info if requested */
5678   if (pcbddc->dbg_flag) {
5679     PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n"));
5680     PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
5681     PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer));
5682     PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d local dimensions\n", PetscGlobalRank));
5683     PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "local_size = %" PetscInt_FMT ", dirichlet_size = %" PetscInt_FMT ", boundary_size = %" PetscInt_FMT "\n", pcis->n, n_D, n_B));
5684     PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "r_size = %" PetscInt_FMT ", v_size = %" PetscInt_FMT ", constraints = %" PetscInt_FMT ", local_primal_size = %" PetscInt_FMT "\n", n_R, n_vertices,
5685                                                  pcbddc->local_primal_size - n_vertices - pcbddc->benign_n, pcbddc->local_primal_size));
5686     PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
5687   }
5688 
5689   /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */
5690   if (!sub_schurs || !sub_schurs->reuse_solver) {
5691     IS        is_aux1, is_aux2;
5692     PetscInt *aux_array1, *aux_array2, *is_indices, *idx_R_local;
5693 
5694     PetscCall(ISGetIndices(pcbddc->is_R_local, (const PetscInt **)&idx_R_local));
5695     PetscCall(PetscMalloc1(pcis->n_B - n_vertices, &aux_array1));
5696     PetscCall(PetscMalloc1(pcis->n_B - n_vertices, &aux_array2));
5697     PetscCall(ISGetIndices(pcis->is_I_local, (const PetscInt **)&is_indices));
5698     for (i = 0; i < n_D; i++) PetscCall(PetscBTSet(bitmask, is_indices[i]));
5699     PetscCall(ISRestoreIndices(pcis->is_I_local, (const PetscInt **)&is_indices));
5700     for (i = 0, j = 0; i < n_R; i++) {
5701       if (!PetscBTLookup(bitmask, idx_R_local[i])) aux_array1[j++] = i;
5702     }
5703     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, j, aux_array1, PETSC_OWN_POINTER, &is_aux1));
5704     PetscCall(ISGetIndices(pcis->is_B_local, (const PetscInt **)&is_indices));
5705     for (i = 0, j = 0; i < n_B; i++) {
5706       if (!PetscBTLookup(bitmask, is_indices[i])) aux_array2[j++] = i;
5707     }
5708     PetscCall(ISRestoreIndices(pcis->is_B_local, (const PetscInt **)&is_indices));
5709     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, j, aux_array2, PETSC_OWN_POINTER, &is_aux2));
5710     PetscCall(VecScatterCreate(pcbddc->vec1_R, is_aux1, pcis->vec1_B, is_aux2, &pcbddc->R_to_B));
5711     PetscCall(ISDestroy(&is_aux1));
5712     PetscCall(ISDestroy(&is_aux2));
5713 
5714     if (pcbddc->switch_static || pcbddc->dbg_flag) {
5715       PetscCall(PetscMalloc1(n_D, &aux_array1));
5716       for (i = 0, j = 0; i < n_R; i++) {
5717         if (PetscBTLookup(bitmask, idx_R_local[i])) aux_array1[j++] = i;
5718       }
5719       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, j, aux_array1, PETSC_OWN_POINTER, &is_aux1));
5720       PetscCall(VecScatterCreate(pcbddc->vec1_R, is_aux1, pcis->vec1_D, (IS)0, &pcbddc->R_to_D));
5721       PetscCall(ISDestroy(&is_aux1));
5722     }
5723     PetscCall(PetscBTDestroy(&bitmask));
5724     PetscCall(ISRestoreIndices(pcbddc->is_R_local, (const PetscInt **)&idx_R_local));
5725   } else {
5726     PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
5727     IS                 tis;
5728     PetscInt           schur_size;
5729 
5730     PetscCall(ISGetLocalSize(reuse_solver->is_B, &schur_size));
5731     PetscCall(ISCreateStride(PETSC_COMM_SELF, schur_size, n_D, 1, &tis));
5732     PetscCall(VecScatterCreate(pcbddc->vec1_R, tis, pcis->vec1_B, reuse_solver->is_B, &pcbddc->R_to_B));
5733     PetscCall(ISDestroy(&tis));
5734     if (pcbddc->switch_static || pcbddc->dbg_flag) {
5735       PetscCall(ISCreateStride(PETSC_COMM_SELF, n_D, 0, 1, &tis));
5736       PetscCall(VecScatterCreate(pcbddc->vec1_R, tis, pcis->vec1_D, (IS)0, &pcbddc->R_to_D));
5737       PetscCall(ISDestroy(&tis));
5738     }
5739   }
5740   PetscFunctionReturn(PETSC_SUCCESS);
5741 }
5742 
5743 PetscErrorCode MatNullSpacePropagateAny_Private(Mat A, IS is, Mat B)
5744 {
5745   MatNullSpace NullSpace;
5746   Mat          dmat;
5747   const Vec   *nullvecs;
5748   Vec          v, v2, *nullvecs2;
5749   VecScatter   sct = NULL;
5750   PetscScalar *ddata;
5751   PetscInt     k, nnsp_size, bsiz, bsiz2, n, N, bs;
5752   PetscBool    nnsp_has_cnst;
5753 
5754   PetscFunctionBegin;
5755   if (!is && !B) { /* MATIS */
5756     Mat_IS *matis = (Mat_IS *)A->data;
5757 
5758     if (!B) PetscCall(MatISGetLocalMat(A, &B));
5759     sct = matis->cctx;
5760     PetscCall(PetscObjectReference((PetscObject)sct));
5761   } else {
5762     PetscCall(MatGetNullSpace(B, &NullSpace));
5763     if (!NullSpace) PetscCall(MatGetNearNullSpace(B, &NullSpace));
5764     if (NullSpace) PetscFunctionReturn(PETSC_SUCCESS);
5765   }
5766   PetscCall(MatGetNullSpace(A, &NullSpace));
5767   if (!NullSpace) PetscCall(MatGetNearNullSpace(A, &NullSpace));
5768   if (!NullSpace) PetscFunctionReturn(PETSC_SUCCESS);
5769 
5770   PetscCall(MatCreateVecs(A, &v, NULL));
5771   PetscCall(MatCreateVecs(B, &v2, NULL));
5772   if (!sct) PetscCall(VecScatterCreate(v, is, v2, NULL, &sct));
5773   PetscCall(MatNullSpaceGetVecs(NullSpace, &nnsp_has_cnst, &nnsp_size, &nullvecs));
5774   bsiz = bsiz2 = nnsp_size + !!nnsp_has_cnst;
5775   PetscCall(PetscMalloc1(bsiz, &nullvecs2));
5776   PetscCall(VecGetBlockSize(v2, &bs));
5777   PetscCall(VecGetSize(v2, &N));
5778   PetscCall(VecGetLocalSize(v2, &n));
5779   PetscCall(PetscMalloc1(n * bsiz, &ddata));
5780   for (k = 0; k < nnsp_size; k++) {
5781     PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), bs, n, N, ddata + n * k, &nullvecs2[k]));
5782     PetscCall(VecScatterBegin(sct, nullvecs[k], nullvecs2[k], INSERT_VALUES, SCATTER_FORWARD));
5783     PetscCall(VecScatterEnd(sct, nullvecs[k], nullvecs2[k], INSERT_VALUES, SCATTER_FORWARD));
5784   }
5785   if (nnsp_has_cnst) {
5786     PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), bs, n, N, ddata + n * nnsp_size, &nullvecs2[nnsp_size]));
5787     PetscCall(VecSet(nullvecs2[nnsp_size], 1.0));
5788   }
5789   PetscCall(PCBDDCOrthonormalizeVecs(&bsiz2, nullvecs2));
5790   PetscCall(MatNullSpaceCreate(PetscObjectComm((PetscObject)B), PETSC_FALSE, bsiz2, nullvecs2, &NullSpace));
5791 
5792   PetscCall(MatCreateDense(PetscObjectComm((PetscObject)B), n, PETSC_DECIDE, N, bsiz2, ddata, &dmat));
5793   PetscCall(PetscObjectContainerCompose((PetscObject)dmat, "_PBDDC_Null_dmat_arr", ddata, PetscCtxDestroyDefault));
5794   PetscCall(PetscObjectCompose((PetscObject)NullSpace, "_PBDDC_Null_dmat", (PetscObject)dmat));
5795   PetscCall(MatDestroy(&dmat));
5796 
5797   for (k = 0; k < bsiz; k++) PetscCall(VecDestroy(&nullvecs2[k]));
5798   PetscCall(PetscFree(nullvecs2));
5799   PetscCall(MatSetNearNullSpace(B, NullSpace));
5800   PetscCall(MatNullSpaceDestroy(&NullSpace));
5801   PetscCall(VecDestroy(&v));
5802   PetscCall(VecDestroy(&v2));
5803   PetscCall(VecScatterDestroy(&sct));
5804   PetscFunctionReturn(PETSC_SUCCESS);
5805 }
5806 
5807 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann)
5808 {
5809   PC_BDDC     *pcbddc = (PC_BDDC *)pc->data;
5810   PC_IS       *pcis   = (PC_IS *)pc->data;
5811   PC           pc_temp;
5812   Mat          A_RR;
5813   MatNullSpace nnsp;
5814   MatReuse     reuse;
5815   PetscScalar  m_one = -1.0;
5816   PetscReal    value;
5817   PetscInt     n_D, n_R;
5818   PetscBool    issbaij, opts, isset, issym;
5819   PetscBool    f = PETSC_FALSE;
5820   char         dir_prefix[256], neu_prefix[256], str_level[16];
5821   size_t       len;
5822 
5823   PetscFunctionBegin;
5824   PetscCall(PetscLogEventBegin(PC_BDDC_LocalSolvers[pcbddc->current_level], pc, 0, 0, 0));
5825   /* approximate solver, propagate NearNullSpace if needed */
5826   if (!pc->setupcalled && (pcbddc->NullSpace_corr[0] || pcbddc->NullSpace_corr[2])) {
5827     MatNullSpace gnnsp1, gnnsp2;
5828     PetscBool    lhas, ghas;
5829 
5830     PetscCall(MatGetNearNullSpace(pcbddc->local_mat, &nnsp));
5831     PetscCall(MatGetNearNullSpace(pc->pmat, &gnnsp1));
5832     PetscCall(MatGetNullSpace(pc->pmat, &gnnsp2));
5833     lhas = nnsp ? PETSC_TRUE : PETSC_FALSE;
5834     PetscCallMPI(MPIU_Allreduce(&lhas, &ghas, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)pc)));
5835     if (!ghas && (gnnsp1 || gnnsp2)) PetscCall(MatNullSpacePropagateAny_Private(pc->pmat, NULL, NULL));
5836   }
5837 
5838   /* compute prefixes */
5839   PetscCall(PetscStrncpy(dir_prefix, "", sizeof(dir_prefix)));
5840   PetscCall(PetscStrncpy(neu_prefix, "", sizeof(neu_prefix)));
5841   if (!pcbddc->current_level) {
5842     PetscCall(PetscStrncpy(dir_prefix, ((PetscObject)pc)->prefix, sizeof(dir_prefix)));
5843     PetscCall(PetscStrncpy(neu_prefix, ((PetscObject)pc)->prefix, sizeof(neu_prefix)));
5844     PetscCall(PetscStrlcat(dir_prefix, "pc_bddc_dirichlet_", sizeof(dir_prefix)));
5845     PetscCall(PetscStrlcat(neu_prefix, "pc_bddc_neumann_", sizeof(neu_prefix)));
5846   } else {
5847     PetscCall(PetscSNPrintf(str_level, sizeof(str_level), "l%" PetscInt_FMT "_", pcbddc->current_level));
5848     PetscCall(PetscStrlen(((PetscObject)pc)->prefix, &len));
5849     len -= 15;                                /* remove "pc_bddc_coarse_" */
5850     if (pcbddc->current_level > 1) len -= 3;  /* remove "lX_" with X level number */
5851     if (pcbddc->current_level > 10) len -= 1; /* remove another char from level number */
5852     /* Nonstandard use of PetscStrncpy() to only copy a portion of the input string */
5853     PetscCall(PetscStrncpy(dir_prefix, ((PetscObject)pc)->prefix, len + 1));
5854     PetscCall(PetscStrncpy(neu_prefix, ((PetscObject)pc)->prefix, len + 1));
5855     PetscCall(PetscStrlcat(dir_prefix, "pc_bddc_dirichlet_", sizeof(dir_prefix)));
5856     PetscCall(PetscStrlcat(neu_prefix, "pc_bddc_neumann_", sizeof(neu_prefix)));
5857     PetscCall(PetscStrlcat(dir_prefix, str_level, sizeof(dir_prefix)));
5858     PetscCall(PetscStrlcat(neu_prefix, str_level, sizeof(neu_prefix)));
5859   }
5860 
5861   /* DIRICHLET PROBLEM */
5862   if (dirichlet) {
5863     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
5864     if (pcbddc->benign_n && !pcbddc->benign_change_explicit) {
5865       PetscCheck(sub_schurs && sub_schurs->reuse_solver, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not yet implemented");
5866       if (pcbddc->dbg_flag) {
5867         Mat A_IIn;
5868 
5869         PetscCall(PCBDDCBenignProject(pc, pcis->is_I_local, pcis->is_I_local, &A_IIn));
5870         PetscCall(MatDestroy(&pcis->A_II));
5871         pcis->A_II = A_IIn;
5872       }
5873     }
5874     PetscCall(MatIsSymmetricKnown(pcbddc->local_mat, &isset, &issym));
5875     if (isset) PetscCall(MatSetOption(pcis->A_II, MAT_SYMMETRIC, issym));
5876 
5877     /* Matrix for Dirichlet problem is pcis->A_II */
5878     n_D  = pcis->n - pcis->n_B;
5879     opts = PETSC_FALSE;
5880     if (!pcbddc->ksp_D) { /* create object if not yet build */
5881       opts = PETSC_TRUE;
5882       PetscCall(KSPCreate(PETSC_COMM_SELF, &pcbddc->ksp_D));
5883       PetscCall(KSPSetNestLevel(pcbddc->ksp_D, pc->kspnestlevel));
5884       PetscCall(PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D, (PetscObject)pc, 1));
5885       /* default */
5886       PetscCall(KSPSetType(pcbddc->ksp_D, KSPPREONLY));
5887       PetscCall(KSPSetOptionsPrefix(pcbddc->ksp_D, dir_prefix));
5888       PetscCall(PetscObjectTypeCompare((PetscObject)pcis->pA_II, MATSEQSBAIJ, &issbaij));
5889       PetscCall(KSPGetPC(pcbddc->ksp_D, &pc_temp));
5890       if (issbaij) {
5891         PetscCall(PCSetType(pc_temp, PCCHOLESKY));
5892       } else {
5893         PetscCall(PCSetType(pc_temp, PCLU));
5894       }
5895       PetscCall(KSPSetErrorIfNotConverged(pcbddc->ksp_D, pc->erroriffailure));
5896     }
5897     PetscCall(MatSetOptionsPrefix(pcis->pA_II, ((PetscObject)pcbddc->ksp_D)->prefix));
5898     PetscCall(MatViewFromOptions(pcis->pA_II, NULL, "-mat_view"));
5899     PetscCall(KSPSetOperators(pcbddc->ksp_D, pcis->A_II, pcis->pA_II));
5900     /* Allow user's customization */
5901     if (opts) PetscCall(KSPSetFromOptions(pcbddc->ksp_D));
5902     PetscCall(MatGetNearNullSpace(pcis->pA_II, &nnsp));
5903     if (pcbddc->NullSpace_corr[0] && !nnsp) { /* approximate solver, propagate NearNullSpace */
5904       PetscCall(MatNullSpacePropagateAny_Private(pcbddc->local_mat, pcis->is_I_local, pcis->pA_II));
5905     }
5906     PetscCall(MatGetNearNullSpace(pcis->pA_II, &nnsp));
5907     PetscCall(KSPGetPC(pcbddc->ksp_D, &pc_temp));
5908     PetscCall(PetscObjectHasFunction((PetscObject)pc_temp, "PCSetCoordinates_C", &f));
5909     if (f && pcbddc->mat_graph->cloc && !nnsp) {
5910       PetscReal      *coords = pcbddc->mat_graph->coords, *scoords;
5911       const PetscInt *idxs;
5912       PetscInt        cdim = pcbddc->mat_graph->cdim, nl, i, d;
5913 
5914       PetscCall(ISGetLocalSize(pcis->is_I_local, &nl));
5915       PetscCall(ISGetIndices(pcis->is_I_local, &idxs));
5916       PetscCall(PetscMalloc1(nl * cdim, &scoords));
5917       for (i = 0; i < nl; i++) {
5918         for (d = 0; d < cdim; d++) scoords[i * cdim + d] = coords[idxs[i] * cdim + d];
5919       }
5920       PetscCall(ISRestoreIndices(pcis->is_I_local, &idxs));
5921       PetscCall(PCSetCoordinates(pc_temp, cdim, nl, scoords));
5922       PetscCall(PetscFree(scoords));
5923     }
5924     if (sub_schurs && sub_schurs->reuse_solver) {
5925       PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
5926 
5927       PetscCall(KSPSetPC(pcbddc->ksp_D, reuse_solver->interior_solver));
5928     }
5929 
5930     /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
5931     if (!n_D) {
5932       PetscCall(KSPGetPC(pcbddc->ksp_D, &pc_temp));
5933       PetscCall(PCSetType(pc_temp, PCNONE));
5934     }
5935     PetscCall(KSPSetUp(pcbddc->ksp_D));
5936     /* set ksp_D into pcis data */
5937     PetscCall(PetscObjectReference((PetscObject)pcbddc->ksp_D));
5938     PetscCall(KSPDestroy(&pcis->ksp_D));
5939     pcis->ksp_D = pcbddc->ksp_D;
5940   }
5941 
5942   /* NEUMANN PROBLEM */
5943   A_RR = NULL;
5944   if (neumann) {
5945     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
5946     PetscInt        ibs, mbs;
5947     PetscBool       issbaij, reuse_neumann_solver, isset, issym;
5948     Mat_IS         *matis = (Mat_IS *)pc->pmat->data;
5949 
5950     reuse_neumann_solver = PETSC_FALSE;
5951     if (sub_schurs && sub_schurs->reuse_solver) {
5952       IS iP;
5953 
5954       reuse_neumann_solver = PETSC_TRUE;
5955       PetscCall(PetscObjectQuery((PetscObject)sub_schurs->A, "__KSPFETIDP_iP", (PetscObject *)&iP));
5956       if (iP) reuse_neumann_solver = PETSC_FALSE;
5957     }
5958     /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */
5959     PetscCall(ISGetSize(pcbddc->is_R_local, &n_R));
5960     if (pcbddc->ksp_R) { /* already created ksp */
5961       PetscInt nn_R;
5962       PetscCall(KSPGetOperators(pcbddc->ksp_R, NULL, &A_RR));
5963       PetscCall(PetscObjectReference((PetscObject)A_RR));
5964       PetscCall(MatGetSize(A_RR, &nn_R, NULL));
5965       if (nn_R != n_R) { /* old ksp is not reusable, so reset it */
5966         PetscCall(KSPReset(pcbddc->ksp_R));
5967         PetscCall(MatDestroy(&A_RR));
5968         reuse = MAT_INITIAL_MATRIX;
5969       } else {                                /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */
5970         if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */
5971           PetscCall(MatDestroy(&A_RR));
5972           reuse = MAT_INITIAL_MATRIX;
5973         } else { /* safe to reuse the matrix */
5974           reuse = MAT_REUSE_MATRIX;
5975         }
5976       }
5977       /* last check */
5978       if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
5979         PetscCall(MatDestroy(&A_RR));
5980         reuse = MAT_INITIAL_MATRIX;
5981       }
5982     } else { /* first time, so we need to create the matrix */
5983       reuse = MAT_INITIAL_MATRIX;
5984     }
5985     /* convert pcbddc->local_mat if needed later in PCBDDCSetUpCorrection
5986        TODO: Get Rid of these conversions */
5987     PetscCall(MatGetBlockSize(pcbddc->local_mat, &mbs));
5988     PetscCall(ISGetBlockSize(pcbddc->is_R_local, &ibs));
5989     PetscCall(PetscObjectTypeCompare((PetscObject)pcbddc->local_mat, MATSEQSBAIJ, &issbaij));
5990     if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */
5991       if (matis->A == pcbddc->local_mat) {
5992         PetscCall(MatDestroy(&pcbddc->local_mat));
5993         PetscCall(MatConvert(matis->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &pcbddc->local_mat));
5994       } else {
5995         PetscCall(MatConvert(pcbddc->local_mat, MATSEQAIJ, MAT_INPLACE_MATRIX, &pcbddc->local_mat));
5996       }
5997     } else if (issbaij) { /* need to convert to BAIJ to get off-diagonal blocks */
5998       if (matis->A == pcbddc->local_mat) {
5999         PetscCall(MatDestroy(&pcbddc->local_mat));
6000         PetscCall(MatConvert(matis->A, mbs > 1 ? MATSEQBAIJ : MATSEQAIJ, MAT_INITIAL_MATRIX, &pcbddc->local_mat));
6001       } else {
6002         PetscCall(MatConvert(pcbddc->local_mat, mbs > 1 ? MATSEQBAIJ : MATSEQAIJ, MAT_INPLACE_MATRIX, &pcbddc->local_mat));
6003       }
6004     }
6005     /* extract A_RR */
6006     if (reuse_neumann_solver) {
6007       PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
6008 
6009       if (pcbddc->dbg_flag) { /* we need A_RR to test the solver later */
6010         PetscCall(MatDestroy(&A_RR));
6011         if (reuse_solver->benign_n) { /* we are not using the explicit change of basis on the pressures */
6012           PetscCall(PCBDDCBenignProject(pc, pcbddc->is_R_local, pcbddc->is_R_local, &A_RR));
6013         } else {
6014           PetscCall(MatCreateSubMatrix(pcbddc->local_mat, pcbddc->is_R_local, pcbddc->is_R_local, MAT_INITIAL_MATRIX, &A_RR));
6015         }
6016       } else {
6017         PetscCall(MatDestroy(&A_RR));
6018         PetscCall(PCGetOperators(reuse_solver->correction_solver, &A_RR, NULL));
6019         PetscCall(PetscObjectReference((PetscObject)A_RR));
6020       }
6021     } else { /* we have to build the neumann solver, so we need to extract the relevant matrix */
6022       PetscCall(MatCreateSubMatrix(pcbddc->local_mat, pcbddc->is_R_local, pcbddc->is_R_local, reuse, &A_RR));
6023     }
6024     PetscCall(MatIsSymmetricKnown(pcbddc->local_mat, &isset, &issym));
6025     if (isset) PetscCall(MatSetOption(A_RR, MAT_SYMMETRIC, issym));
6026     opts = PETSC_FALSE;
6027     if (!pcbddc->ksp_R) { /* create object if not present */
6028       opts = PETSC_TRUE;
6029       PetscCall(KSPCreate(PETSC_COMM_SELF, &pcbddc->ksp_R));
6030       PetscCall(KSPSetNestLevel(pcbddc->ksp_R, pc->kspnestlevel));
6031       PetscCall(PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R, (PetscObject)pc, 1));
6032       /* default */
6033       PetscCall(KSPSetType(pcbddc->ksp_R, KSPPREONLY));
6034       PetscCall(KSPSetOptionsPrefix(pcbddc->ksp_R, neu_prefix));
6035       PetscCall(KSPGetPC(pcbddc->ksp_R, &pc_temp));
6036       PetscCall(PetscObjectTypeCompare((PetscObject)A_RR, MATSEQSBAIJ, &issbaij));
6037       if (issbaij) {
6038         PetscCall(PCSetType(pc_temp, PCCHOLESKY));
6039       } else {
6040         PetscCall(PCSetType(pc_temp, PCLU));
6041       }
6042       PetscCall(KSPSetErrorIfNotConverged(pcbddc->ksp_R, pc->erroriffailure));
6043     }
6044     PetscCall(MatSetOptionsPrefix(A_RR, ((PetscObject)pcbddc->ksp_R)->prefix));
6045     PetscCall(MatViewFromOptions(A_RR, NULL, "-mat_view"));
6046     PetscCall(KSPSetOperators(pcbddc->ksp_R, A_RR, A_RR));
6047     if (opts) { /* Allow user's customization once */
6048       PetscCall(KSPSetFromOptions(pcbddc->ksp_R));
6049     }
6050     PetscCall(MatGetNearNullSpace(A_RR, &nnsp));
6051     if (pcbddc->NullSpace_corr[2] && !nnsp) { /* approximate solver, propagate NearNullSpace */
6052       PetscCall(MatNullSpacePropagateAny_Private(pcbddc->local_mat, pcbddc->is_R_local, A_RR));
6053     }
6054     PetscCall(MatGetNearNullSpace(A_RR, &nnsp));
6055     PetscCall(KSPGetPC(pcbddc->ksp_R, &pc_temp));
6056     PetscCall(PetscObjectHasFunction((PetscObject)pc_temp, "PCSetCoordinates_C", &f));
6057     if (f && pcbddc->mat_graph->cloc && !nnsp) {
6058       PetscReal      *coords = pcbddc->mat_graph->coords, *scoords;
6059       const PetscInt *idxs;
6060       PetscInt        cdim = pcbddc->mat_graph->cdim, nl, i, d;
6061 
6062       PetscCall(ISGetLocalSize(pcbddc->is_R_local, &nl));
6063       PetscCall(ISGetIndices(pcbddc->is_R_local, &idxs));
6064       PetscCall(PetscMalloc1(nl * cdim, &scoords));
6065       for (i = 0; i < nl; i++) {
6066         for (d = 0; d < cdim; d++) scoords[i * cdim + d] = coords[idxs[i] * cdim + d];
6067       }
6068       PetscCall(ISRestoreIndices(pcbddc->is_R_local, &idxs));
6069       PetscCall(PCSetCoordinates(pc_temp, cdim, nl, scoords));
6070       PetscCall(PetscFree(scoords));
6071     }
6072 
6073     /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
6074     if (!n_R) {
6075       PetscCall(KSPGetPC(pcbddc->ksp_R, &pc_temp));
6076       PetscCall(PCSetType(pc_temp, PCNONE));
6077     }
6078     /* Reuse solver if it is present */
6079     if (reuse_neumann_solver) {
6080       PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
6081 
6082       PetscCall(KSPSetPC(pcbddc->ksp_R, reuse_solver->correction_solver));
6083     }
6084     PetscCall(KSPSetUp(pcbddc->ksp_R));
6085   }
6086 
6087   if (pcbddc->dbg_flag) {
6088     PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
6089     PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer));
6090     PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n"));
6091   }
6092   PetscCall(PetscLogEventEnd(PC_BDDC_LocalSolvers[pcbddc->current_level], pc, 0, 0, 0));
6093 
6094   /* adapt Dirichlet and Neumann solvers if a nullspace correction has been requested */
6095   if (pcbddc->NullSpace_corr[0]) PetscCall(PCBDDCSetUseExactDirichlet(pc, PETSC_FALSE));
6096   if (dirichlet && pcbddc->NullSpace_corr[0] && !pcbddc->switch_static) PetscCall(PCBDDCNullSpaceAssembleCorrection(pc, PETSC_TRUE, pcbddc->NullSpace_corr[1]));
6097   if (neumann && pcbddc->NullSpace_corr[2]) PetscCall(PCBDDCNullSpaceAssembleCorrection(pc, PETSC_FALSE, pcbddc->NullSpace_corr[3]));
6098   /* check Dirichlet and Neumann solvers */
6099   if (pcbddc->dbg_flag) {
6100     if (dirichlet) { /* Dirichlet */
6101       PetscCall(VecSetRandom(pcis->vec1_D, NULL));
6102       PetscCall(MatMult(pcis->A_II, pcis->vec1_D, pcis->vec2_D));
6103       PetscCall(KSPSolve(pcbddc->ksp_D, pcis->vec2_D, pcis->vec2_D));
6104       PetscCall(KSPCheckSolve(pcbddc->ksp_D, pc, pcis->vec2_D));
6105       PetscCall(VecAXPY(pcis->vec1_D, m_one, pcis->vec2_D));
6106       PetscCall(VecNorm(pcis->vec1_D, NORM_INFINITY, &value));
6107       PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n", PetscGlobalRank, ((PetscObject)pcbddc->ksp_D)->prefix, (double)value));
6108       PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
6109     }
6110     if (neumann) { /* Neumann */
6111       PetscCall(VecSetRandom(pcbddc->vec1_R, NULL));
6112       PetscCall(MatMult(A_RR, pcbddc->vec1_R, pcbddc->vec2_R));
6113       PetscCall(KSPSolve(pcbddc->ksp_R, pcbddc->vec2_R, pcbddc->vec2_R));
6114       PetscCall(KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec2_R));
6115       PetscCall(VecAXPY(pcbddc->vec1_R, m_one, pcbddc->vec2_R));
6116       PetscCall(VecNorm(pcbddc->vec1_R, NORM_INFINITY, &value));
6117       PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n", PetscGlobalRank, ((PetscObject)pcbddc->ksp_R)->prefix, (double)value));
6118       PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
6119     }
6120   }
6121   /* free Neumann problem's matrix */
6122   PetscCall(MatDestroy(&A_RR));
6123   PetscFunctionReturn(PETSC_SUCCESS);
6124 }
6125 
6126 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose)
6127 {
6128   PC_BDDC        *pcbddc       = (PC_BDDC *)pc->data;
6129   PCBDDCSubSchurs sub_schurs   = pcbddc->sub_schurs;
6130   PetscBool       reuse_solver = sub_schurs ? (sub_schurs->reuse_solver ? PETSC_TRUE : PETSC_FALSE) : PETSC_FALSE;
6131 
6132   PetscFunctionBegin;
6133   if (!reuse_solver) PetscCall(VecSet(pcbddc->vec1_R, 0.));
6134   if (!pcbddc->switch_static) {
6135     if (applytranspose && pcbddc->local_auxmat1) {
6136       PetscCall(MatMultTranspose(pcbddc->local_auxmat2, inout_B, pcbddc->vec1_C));
6137       PetscCall(MatMultTransposeAdd(pcbddc->local_auxmat1, pcbddc->vec1_C, inout_B, inout_B));
6138     }
6139     if (!reuse_solver) {
6140       PetscCall(VecScatterBegin(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE));
6141       PetscCall(VecScatterEnd(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE));
6142     } else {
6143       PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
6144 
6145       PetscCall(VecScatterBegin(reuse_solver->correction_scatter_B, inout_B, reuse_solver->rhs_B, INSERT_VALUES, SCATTER_FORWARD));
6146       PetscCall(VecScatterEnd(reuse_solver->correction_scatter_B, inout_B, reuse_solver->rhs_B, INSERT_VALUES, SCATTER_FORWARD));
6147     }
6148   } else {
6149     PetscCall(VecScatterBegin(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE));
6150     PetscCall(VecScatterEnd(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE));
6151     PetscCall(VecScatterBegin(pcbddc->R_to_D, inout_D, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE));
6152     PetscCall(VecScatterEnd(pcbddc->R_to_D, inout_D, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE));
6153     if (applytranspose && pcbddc->local_auxmat1) {
6154       PetscCall(MatMultTranspose(pcbddc->local_auxmat2, pcbddc->vec1_R, pcbddc->vec1_C));
6155       PetscCall(MatMultTransposeAdd(pcbddc->local_auxmat1, pcbddc->vec1_C, inout_B, inout_B));
6156       PetscCall(VecScatterBegin(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE));
6157       PetscCall(VecScatterEnd(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE));
6158     }
6159   }
6160   PetscCall(PetscLogEventBegin(PC_BDDC_Solves[pcbddc->current_level][1], pc, 0, 0, 0));
6161   if (!reuse_solver || pcbddc->switch_static) {
6162     if (applytranspose) {
6163       PetscCall(KSPSolveTranspose(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec1_R));
6164     } else {
6165       PetscCall(KSPSolve(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec1_R));
6166     }
6167     PetscCall(KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec1_R));
6168   } else {
6169     PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
6170 
6171     if (applytranspose) {
6172       PetscCall(MatFactorSolveSchurComplementTranspose(reuse_solver->F, reuse_solver->rhs_B, reuse_solver->sol_B));
6173     } else {
6174       PetscCall(MatFactorSolveSchurComplement(reuse_solver->F, reuse_solver->rhs_B, reuse_solver->sol_B));
6175     }
6176   }
6177   PetscCall(PetscLogEventEnd(PC_BDDC_Solves[pcbddc->current_level][1], pc, 0, 0, 0));
6178   PetscCall(VecSet(inout_B, 0.));
6179   if (!pcbddc->switch_static) {
6180     if (!reuse_solver) {
6181       PetscCall(VecScatterBegin(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD));
6182       PetscCall(VecScatterEnd(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD));
6183     } else {
6184       PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
6185 
6186       PetscCall(VecScatterBegin(reuse_solver->correction_scatter_B, reuse_solver->sol_B, inout_B, INSERT_VALUES, SCATTER_REVERSE));
6187       PetscCall(VecScatterEnd(reuse_solver->correction_scatter_B, reuse_solver->sol_B, inout_B, INSERT_VALUES, SCATTER_REVERSE));
6188     }
6189     if (!applytranspose && pcbddc->local_auxmat1) {
6190       PetscCall(MatMult(pcbddc->local_auxmat1, inout_B, pcbddc->vec1_C));
6191       PetscCall(MatMultAdd(pcbddc->local_auxmat2, pcbddc->vec1_C, inout_B, inout_B));
6192     }
6193   } else {
6194     PetscCall(VecScatterBegin(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD));
6195     PetscCall(VecScatterEnd(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD));
6196     PetscCall(VecScatterBegin(pcbddc->R_to_D, pcbddc->vec1_R, inout_D, INSERT_VALUES, SCATTER_FORWARD));
6197     PetscCall(VecScatterEnd(pcbddc->R_to_D, pcbddc->vec1_R, inout_D, INSERT_VALUES, SCATTER_FORWARD));
6198     if (!applytranspose && pcbddc->local_auxmat1) {
6199       PetscCall(MatMult(pcbddc->local_auxmat1, inout_B, pcbddc->vec1_C));
6200       PetscCall(MatMultAdd(pcbddc->local_auxmat2, pcbddc->vec1_C, pcbddc->vec1_R, pcbddc->vec1_R));
6201     }
6202     PetscCall(VecScatterBegin(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD));
6203     PetscCall(VecScatterEnd(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD));
6204     PetscCall(VecScatterBegin(pcbddc->R_to_D, pcbddc->vec1_R, inout_D, INSERT_VALUES, SCATTER_FORWARD));
6205     PetscCall(VecScatterEnd(pcbddc->R_to_D, pcbddc->vec1_R, inout_D, INSERT_VALUES, SCATTER_FORWARD));
6206   }
6207   PetscFunctionReturn(PETSC_SUCCESS);
6208 }
6209 
6210 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */
6211 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose)
6212 {
6213   PC_BDDC          *pcbddc = (PC_BDDC *)pc->data;
6214   PC_IS            *pcis   = (PC_IS *)pc->data;
6215   const PetscScalar zero   = 0.0;
6216 
6217   PetscFunctionBegin;
6218   /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */
6219   if (!pcbddc->benign_apply_coarse_only) {
6220     if (applytranspose) {
6221       PetscCall(MatMultTranspose(pcbddc->coarse_phi_B, pcis->vec1_B, pcbddc->vec1_P));
6222       if (pcbddc->switch_static) PetscCall(MatMultTransposeAdd(pcbddc->coarse_phi_D, pcis->vec1_D, pcbddc->vec1_P, pcbddc->vec1_P));
6223     } else {
6224       PetscCall(MatMultTranspose(pcbddc->coarse_psi_B, pcis->vec1_B, pcbddc->vec1_P));
6225       if (pcbddc->switch_static) PetscCall(MatMultTransposeAdd(pcbddc->coarse_psi_D, pcis->vec1_D, pcbddc->vec1_P, pcbddc->vec1_P));
6226     }
6227   } else {
6228     PetscCall(VecSet(pcbddc->vec1_P, zero));
6229   }
6230 
6231   /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */
6232   if (pcbddc->benign_n) {
6233     PetscScalar *array;
6234     PetscInt     j;
6235 
6236     PetscCall(VecGetArray(pcbddc->vec1_P, &array));
6237     for (j = 0; j < pcbddc->benign_n; j++) array[pcbddc->local_primal_size - pcbddc->benign_n + j] += pcbddc->benign_p0[j];
6238     PetscCall(VecRestoreArray(pcbddc->vec1_P, &array));
6239   }
6240 
6241   /* start communications from local primal nodes to rhs of coarse solver */
6242   PetscCall(VecSet(pcbddc->coarse_vec, zero));
6243   PetscCall(PCBDDCScatterCoarseDataBegin(pc, ADD_VALUES, SCATTER_FORWARD));
6244   PetscCall(PCBDDCScatterCoarseDataEnd(pc, ADD_VALUES, SCATTER_FORWARD));
6245 
6246   /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */
6247   PetscCall(PetscLogEventBegin(PC_BDDC_Solves[pcbddc->current_level][2], pc, 0, 0, 0));
6248   if (pcbddc->coarse_ksp) {
6249     Mat          coarse_mat;
6250     Vec          rhs, sol;
6251     MatNullSpace nullsp;
6252     PetscBool    isbddc = PETSC_FALSE;
6253 
6254     if (pcbddc->benign_have_null) {
6255       PC coarse_pc;
6256 
6257       PetscCall(KSPGetPC(pcbddc->coarse_ksp, &coarse_pc));
6258       PetscCall(PetscObjectTypeCompare((PetscObject)coarse_pc, PCBDDC, &isbddc));
6259       /* we need to propagate to coarser levels the need for a possible benign correction */
6260       if (isbddc && pcbddc->benign_apply_coarse_only && !pcbddc->benign_skip_correction) {
6261         PC_BDDC *coarsepcbddc                  = (PC_BDDC *)coarse_pc->data;
6262         coarsepcbddc->benign_skip_correction   = PETSC_FALSE;
6263         coarsepcbddc->benign_apply_coarse_only = PETSC_TRUE;
6264       }
6265     }
6266     PetscCall(KSPGetRhs(pcbddc->coarse_ksp, &rhs));
6267     PetscCall(KSPGetSolution(pcbddc->coarse_ksp, &sol));
6268     PetscCall(KSPGetOperators(pcbddc->coarse_ksp, &coarse_mat, NULL));
6269     if (applytranspose) {
6270       PetscCheck(!pcbddc->benign_apply_coarse_only, PetscObjectComm((PetscObject)pcbddc->coarse_ksp), PETSC_ERR_SUP, "Not yet implemented");
6271       PetscCall(KSPSolveTranspose(pcbddc->coarse_ksp, rhs, sol));
6272       PetscCall(KSPCheckSolve(pcbddc->coarse_ksp, pc, sol));
6273       PetscCall(MatGetTransposeNullSpace(coarse_mat, &nullsp));
6274       if (nullsp) PetscCall(MatNullSpaceRemove(nullsp, sol));
6275     } else {
6276       PetscCall(MatGetNullSpace(coarse_mat, &nullsp));
6277       if (pcbddc->benign_apply_coarse_only && isbddc) { /* need just to apply the coarse preconditioner during presolve */
6278         PC coarse_pc;
6279 
6280         if (nullsp) PetscCall(MatNullSpaceRemove(nullsp, rhs));
6281         PetscCall(KSPGetPC(pcbddc->coarse_ksp, &coarse_pc));
6282         PetscCall(PCPreSolve(coarse_pc, pcbddc->coarse_ksp));
6283         PetscCall(PCBDDCBenignRemoveInterior(coarse_pc, rhs, sol));
6284         PetscCall(PCPostSolve(coarse_pc, pcbddc->coarse_ksp));
6285       } else {
6286         PetscCall(KSPSolve(pcbddc->coarse_ksp, rhs, sol));
6287         PetscCall(KSPCheckSolve(pcbddc->coarse_ksp, pc, sol));
6288         if (nullsp) PetscCall(MatNullSpaceRemove(nullsp, sol));
6289       }
6290     }
6291     /* we don't need the benign correction at coarser levels anymore */
6292     if (pcbddc->benign_have_null && isbddc) {
6293       PC       coarse_pc;
6294       PC_BDDC *coarsepcbddc;
6295 
6296       PetscCall(KSPGetPC(pcbddc->coarse_ksp, &coarse_pc));
6297       coarsepcbddc                           = (PC_BDDC *)coarse_pc->data;
6298       coarsepcbddc->benign_skip_correction   = PETSC_TRUE;
6299       coarsepcbddc->benign_apply_coarse_only = PETSC_FALSE;
6300     }
6301   }
6302   PetscCall(PetscLogEventEnd(PC_BDDC_Solves[pcbddc->current_level][2], pc, 0, 0, 0));
6303 
6304   /* Local solution on R nodes */
6305   if (!pcbddc->benign_apply_coarse_only) PetscCall(PCBDDCSolveSubstructureCorrection(pc, pcis->vec1_B, pcis->vec1_D, applytranspose));
6306   /* communications from coarse sol to local primal nodes */
6307   PetscCall(PCBDDCScatterCoarseDataBegin(pc, INSERT_VALUES, SCATTER_REVERSE));
6308   PetscCall(PCBDDCScatterCoarseDataEnd(pc, INSERT_VALUES, SCATTER_REVERSE));
6309 
6310   /* Sum contributions from the two levels */
6311   if (!pcbddc->benign_apply_coarse_only) {
6312     if (applytranspose) {
6313       PetscCall(MatMultAdd(pcbddc->coarse_psi_B, pcbddc->vec1_P, pcis->vec1_B, pcis->vec1_B));
6314       if (pcbddc->switch_static) PetscCall(MatMultAdd(pcbddc->coarse_psi_D, pcbddc->vec1_P, pcis->vec1_D, pcis->vec1_D));
6315     } else {
6316       PetscCall(MatMultAdd(pcbddc->coarse_phi_B, pcbddc->vec1_P, pcis->vec1_B, pcis->vec1_B));
6317       if (pcbddc->switch_static) PetscCall(MatMultAdd(pcbddc->coarse_phi_D, pcbddc->vec1_P, pcis->vec1_D, pcis->vec1_D));
6318     }
6319     /* store p0 */
6320     if (pcbddc->benign_n) {
6321       PetscScalar *array;
6322       PetscInt     j;
6323 
6324       PetscCall(VecGetArray(pcbddc->vec1_P, &array));
6325       for (j = 0; j < pcbddc->benign_n; j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size - pcbddc->benign_n + j];
6326       PetscCall(VecRestoreArray(pcbddc->vec1_P, &array));
6327     }
6328   } else { /* expand the coarse solution */
6329     if (applytranspose) {
6330       PetscCall(MatMult(pcbddc->coarse_psi_B, pcbddc->vec1_P, pcis->vec1_B));
6331     } else {
6332       PetscCall(MatMult(pcbddc->coarse_phi_B, pcbddc->vec1_P, pcis->vec1_B));
6333     }
6334   }
6335   PetscFunctionReturn(PETSC_SUCCESS);
6336 }
6337 
6338 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc, InsertMode imode, ScatterMode smode)
6339 {
6340   PC_BDDC           *pcbddc = (PC_BDDC *)pc->data;
6341   Vec                from, to;
6342   const PetscScalar *array;
6343 
6344   PetscFunctionBegin;
6345   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
6346     from = pcbddc->coarse_vec;
6347     to   = pcbddc->vec1_P;
6348     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
6349       Vec tvec;
6350 
6351       PetscCall(KSPGetRhs(pcbddc->coarse_ksp, &tvec));
6352       PetscCall(VecResetArray(tvec));
6353       PetscCall(KSPGetSolution(pcbddc->coarse_ksp, &tvec));
6354       PetscCall(VecGetArrayRead(tvec, &array));
6355       PetscCall(VecPlaceArray(from, array));
6356       PetscCall(VecRestoreArrayRead(tvec, &array));
6357     }
6358   } else { /* from local to global -> put data in coarse right-hand side */
6359     from = pcbddc->vec1_P;
6360     to   = pcbddc->coarse_vec;
6361   }
6362   PetscCall(VecScatterBegin(pcbddc->coarse_loc_to_glob, from, to, imode, smode));
6363   PetscFunctionReturn(PETSC_SUCCESS);
6364 }
6365 
6366 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode)
6367 {
6368   PC_BDDC           *pcbddc = (PC_BDDC *)pc->data;
6369   Vec                from, to;
6370   const PetscScalar *array;
6371 
6372   PetscFunctionBegin;
6373   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
6374     from = pcbddc->coarse_vec;
6375     to   = pcbddc->vec1_P;
6376   } else { /* from local to global -> put data in coarse right-hand side */
6377     from = pcbddc->vec1_P;
6378     to   = pcbddc->coarse_vec;
6379   }
6380   PetscCall(VecScatterEnd(pcbddc->coarse_loc_to_glob, from, to, imode, smode));
6381   if (smode == SCATTER_FORWARD) {
6382     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
6383       Vec tvec;
6384 
6385       PetscCall(KSPGetRhs(pcbddc->coarse_ksp, &tvec));
6386       PetscCall(VecGetArrayRead(to, &array));
6387       PetscCall(VecPlaceArray(tvec, array));
6388       PetscCall(VecRestoreArrayRead(to, &array));
6389     }
6390   } else {
6391     if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */
6392       PetscCall(VecResetArray(from));
6393     }
6394   }
6395   PetscFunctionReturn(PETSC_SUCCESS);
6396 }
6397 
6398 PetscErrorCode PCBDDCConstraintsSetUp(PC pc)
6399 {
6400   PC_IS   *pcis   = (PC_IS *)pc->data;
6401   PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
6402   Mat_IS  *matis  = (Mat_IS *)pc->pmat->data;
6403   /* one and zero */
6404   PetscScalar one = 1.0, zero = 0.0;
6405   /* space to store constraints and their local indices */
6406   PetscScalar *constraints_data;
6407   PetscInt    *constraints_idxs, *constraints_idxs_B;
6408   PetscInt    *constraints_idxs_ptr, *constraints_data_ptr;
6409   PetscInt    *constraints_n;
6410   /* iterators */
6411   PetscInt i, j, k, total_counts, total_counts_cc, cum;
6412   /* BLAS integers */
6413   PetscBLASInt lwork, lierr;
6414   PetscBLASInt Blas_N, Blas_M, Blas_K, Blas_one = 1;
6415   PetscBLASInt Blas_LDA, Blas_LDB, Blas_LDC;
6416   /* reuse */
6417   PetscInt  olocal_primal_size, olocal_primal_size_cc;
6418   PetscInt *olocal_primal_ref_node, *olocal_primal_ref_mult;
6419   /* change of basis */
6420   PetscBool qr_needed;
6421   PetscBT   change_basis, qr_needed_idx;
6422   /* auxiliary stuff */
6423   PetscInt *nnz, *is_indices;
6424   PetscInt  ncc;
6425   /* some quantities */
6426   PetscInt  n_vertices, total_primal_vertices, valid_constraints;
6427   PetscInt  size_of_constraint, max_size_of_constraint = 0, max_constraints, temp_constraints;
6428   PetscReal tol; /* tolerance for retaining eigenmodes */
6429 
6430   PetscFunctionBegin;
6431   tol = PetscSqrtReal(PETSC_SMALL);
6432   /* Destroy Mat objects computed previously */
6433   PetscCall(MatDestroy(&pcbddc->ChangeOfBasisMatrix));
6434   PetscCall(MatDestroy(&pcbddc->ConstraintMatrix));
6435   PetscCall(MatDestroy(&pcbddc->switch_static_change));
6436   /* save info on constraints from previous setup (if any) */
6437   olocal_primal_size    = pcbddc->local_primal_size;
6438   olocal_primal_size_cc = pcbddc->local_primal_size_cc;
6439   PetscCall(PetscMalloc2(olocal_primal_size_cc, &olocal_primal_ref_node, olocal_primal_size_cc, &olocal_primal_ref_mult));
6440   PetscCall(PetscArraycpy(olocal_primal_ref_node, pcbddc->local_primal_ref_node, olocal_primal_size_cc));
6441   PetscCall(PetscArraycpy(olocal_primal_ref_mult, pcbddc->local_primal_ref_mult, olocal_primal_size_cc));
6442   PetscCall(PetscFree2(pcbddc->local_primal_ref_node, pcbddc->local_primal_ref_mult));
6443   PetscCall(PetscFree(pcbddc->primal_indices_local_idxs));
6444 
6445   if (!pcbddc->adaptive_selection) {
6446     IS           ISForVertices, *ISForFaces, *ISForEdges;
6447     MatNullSpace nearnullsp;
6448     const Vec   *nearnullvecs;
6449     Vec         *localnearnullsp;
6450     PetscScalar *array;
6451     PetscInt     n_ISForFaces, n_ISForEdges, nnsp_size, o_nf, o_ne;
6452     PetscBool    nnsp_has_cnst;
6453     /* LAPACK working arrays for SVD or POD */
6454     PetscBool    skip_lapack, boolforchange;
6455     PetscScalar *work;
6456     PetscReal   *singular_vals;
6457 #if defined(PETSC_USE_COMPLEX)
6458     PetscReal *rwork;
6459 #endif
6460     PetscScalar *temp_basis = NULL, *correlation_mat = NULL;
6461     PetscBLASInt dummy_int    = 1;
6462     PetscScalar  dummy_scalar = 1.;
6463     PetscBool    use_pod      = PETSC_FALSE;
6464 
6465     /* MKL SVD with same input gives different results on different processes! */
6466 #if defined(PETSC_MISSING_LAPACK_GESVD) || defined(PETSC_HAVE_MKL_LIBS)
6467     use_pod = PETSC_TRUE;
6468 #endif
6469     /* Get index sets for faces, edges and vertices from graph */
6470     PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, &n_ISForFaces, &ISForFaces, &n_ISForEdges, &ISForEdges, &ISForVertices));
6471     o_nf       = n_ISForFaces;
6472     o_ne       = n_ISForEdges;
6473     n_vertices = 0;
6474     if (ISForVertices) PetscCall(ISGetSize(ISForVertices, &n_vertices));
6475     /* print some info */
6476     if (pcbddc->dbg_flag && (!pcbddc->sub_schurs || pcbddc->sub_schurs_rebuild)) {
6477       if (!pcbddc->dbg_viewer) pcbddc->dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pc));
6478       PetscCall(PCBDDCGraphASCIIView(pcbddc->mat_graph, pcbddc->dbg_flag, pcbddc->dbg_viewer));
6479       PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer));
6480       PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "--------------------------------------------------------------\n"));
6481       PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate vertices (%d)\n", PetscGlobalRank, n_vertices, pcbddc->use_vertices));
6482       PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate edges    (%d)\n", PetscGlobalRank, n_ISForEdges, pcbddc->use_edges));
6483       PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate faces    (%d)\n", PetscGlobalRank, n_ISForFaces, pcbddc->use_faces));
6484       PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
6485       PetscCall(PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer));
6486     }
6487 
6488     if (!pcbddc->use_vertices) n_vertices = 0;
6489     if (!pcbddc->use_edges) n_ISForEdges = 0;
6490     if (!pcbddc->use_faces) n_ISForFaces = 0;
6491 
6492     /* check if near null space is attached to global mat */
6493     if (pcbddc->use_nnsp) {
6494       PetscCall(MatGetNearNullSpace(pc->pmat, &nearnullsp));
6495     } else nearnullsp = NULL;
6496 
6497     if (nearnullsp) {
6498       PetscCall(MatNullSpaceGetVecs(nearnullsp, &nnsp_has_cnst, &nnsp_size, &nearnullvecs));
6499       /* remove any stored info */
6500       PetscCall(MatNullSpaceDestroy(&pcbddc->onearnullspace));
6501       PetscCall(PetscFree(pcbddc->onearnullvecs_state));
6502       /* store information for BDDC solver reuse */
6503       PetscCall(PetscObjectReference((PetscObject)nearnullsp));
6504       pcbddc->onearnullspace = nearnullsp;
6505       PetscCall(PetscMalloc1(nnsp_size, &pcbddc->onearnullvecs_state));
6506       for (i = 0; i < nnsp_size; i++) PetscCall(PetscObjectStateGet((PetscObject)nearnullvecs[i], &pcbddc->onearnullvecs_state[i]));
6507     } else { /* if near null space is not provided BDDC uses constants by default */
6508       nnsp_size     = 0;
6509       nnsp_has_cnst = PETSC_TRUE;
6510     }
6511     /* get max number of constraints on a single cc */
6512     max_constraints = nnsp_size;
6513     if (nnsp_has_cnst) max_constraints++;
6514 
6515     /*
6516          Evaluate maximum storage size needed by the procedure
6517          - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]"
6518          - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]"
6519          There can be multiple constraints per connected component
6520                                                                                                                                                            */
6521     ncc = n_vertices + n_ISForFaces + n_ISForEdges;
6522     PetscCall(PetscMalloc3(ncc + 1, &constraints_idxs_ptr, ncc + 1, &constraints_data_ptr, ncc, &constraints_n));
6523 
6524     total_counts = n_ISForFaces + n_ISForEdges;
6525     total_counts *= max_constraints;
6526     total_counts += n_vertices;
6527     PetscCall(PetscBTCreate(total_counts, &change_basis));
6528 
6529     total_counts           = 0;
6530     max_size_of_constraint = 0;
6531     for (i = 0; i < n_ISForEdges + n_ISForFaces; i++) {
6532       IS used_is;
6533       if (i < n_ISForEdges) {
6534         used_is = ISForEdges[i];
6535       } else {
6536         used_is = ISForFaces[i - n_ISForEdges];
6537       }
6538       PetscCall(ISGetSize(used_is, &j));
6539       total_counts += j;
6540       max_size_of_constraint = PetscMax(j, max_size_of_constraint);
6541     }
6542     PetscCall(PetscMalloc3(total_counts * max_constraints + n_vertices, &constraints_data, total_counts + n_vertices, &constraints_idxs, total_counts + n_vertices, &constraints_idxs_B));
6543 
6544     /* get local part of global near null space vectors */
6545     PetscCall(PetscMalloc1(nnsp_size, &localnearnullsp));
6546     for (k = 0; k < nnsp_size; k++) {
6547       PetscCall(VecDuplicate(pcis->vec1_N, &localnearnullsp[k]));
6548       PetscCall(VecScatterBegin(matis->rctx, nearnullvecs[k], localnearnullsp[k], INSERT_VALUES, SCATTER_FORWARD));
6549       PetscCall(VecScatterEnd(matis->rctx, nearnullvecs[k], localnearnullsp[k], INSERT_VALUES, SCATTER_FORWARD));
6550     }
6551 
6552     /* whether or not to skip lapack calls */
6553     skip_lapack = PETSC_TRUE;
6554     if (n_ISForFaces + n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE;
6555 
6556     /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */
6557     if (!skip_lapack) {
6558       PetscScalar temp_work;
6559 
6560       if (use_pod) {
6561         /* Proper Orthogonal Decomposition (POD) using the snapshot method */
6562         PetscCall(PetscMalloc1(max_constraints * max_constraints, &correlation_mat));
6563         PetscCall(PetscMalloc1(max_constraints, &singular_vals));
6564         PetscCall(PetscMalloc1(max_size_of_constraint * max_constraints, &temp_basis));
6565 #if defined(PETSC_USE_COMPLEX)
6566         PetscCall(PetscMalloc1(3 * max_constraints, &rwork));
6567 #endif
6568         /* now we evaluate the optimal workspace using query with lwork=-1 */
6569         PetscCall(PetscBLASIntCast(max_constraints, &Blas_N));
6570         PetscCall(PetscBLASIntCast(max_constraints, &Blas_LDA));
6571         lwork = -1;
6572         PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
6573 #if !defined(PETSC_USE_COMPLEX)
6574         PetscCallBLAS("LAPACKsyev", LAPACKsyev_("V", "U", &Blas_N, correlation_mat, &Blas_LDA, singular_vals, &temp_work, &lwork, &lierr));
6575 #else
6576         PetscCallBLAS("LAPACKsyev", LAPACKsyev_("V", "U", &Blas_N, correlation_mat, &Blas_LDA, singular_vals, &temp_work, &lwork, rwork, &lierr));
6577 #endif
6578         PetscCall(PetscFPTrapPop());
6579         PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in query to SYEV Lapack routine %" PetscBLASInt_FMT, lierr);
6580       } else {
6581 #if !defined(PETSC_MISSING_LAPACK_GESVD)
6582         /* SVD */
6583         PetscInt max_n, min_n;
6584         max_n = max_size_of_constraint;
6585         min_n = max_constraints;
6586         if (max_size_of_constraint < max_constraints) {
6587           min_n = max_size_of_constraint;
6588           max_n = max_constraints;
6589         }
6590         PetscCall(PetscMalloc1(min_n, &singular_vals));
6591   #if defined(PETSC_USE_COMPLEX)
6592         PetscCall(PetscMalloc1(5 * min_n, &rwork));
6593   #endif
6594         /* now we evaluate the optimal workspace using query with lwork=-1 */
6595         lwork = -1;
6596         PetscCall(PetscBLASIntCast(max_n, &Blas_M));
6597         PetscCall(PetscBLASIntCast(min_n, &Blas_N));
6598         PetscCall(PetscBLASIntCast(max_n, &Blas_LDA));
6599         PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
6600   #if !defined(PETSC_USE_COMPLEX)
6601         PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("O", "N", &Blas_M, &Blas_N, &constraints_data[0], &Blas_LDA, singular_vals, &dummy_scalar, &dummy_int, &dummy_scalar, &dummy_int, &temp_work, &lwork, &lierr));
6602   #else
6603         PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("O", "N", &Blas_M, &Blas_N, &constraints_data[0], &Blas_LDA, singular_vals, &dummy_scalar, &dummy_int, &dummy_scalar, &dummy_int, &temp_work, &lwork, rwork, &lierr));
6604   #endif
6605         PetscCall(PetscFPTrapPop());
6606         PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in query to GESVD Lapack routine %" PetscBLASInt_FMT, lierr);
6607 #else
6608         SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "This should not happen");
6609 #endif /* on missing GESVD */
6610       }
6611       /* Allocate optimal workspace */
6612       PetscCall(PetscBLASIntCast((PetscInt)PetscRealPart(temp_work), &lwork));
6613       PetscCall(PetscMalloc1(lwork, &work));
6614     }
6615     /* Now we can loop on constraining sets */
6616     total_counts            = 0;
6617     constraints_idxs_ptr[0] = 0;
6618     constraints_data_ptr[0] = 0;
6619     /* vertices */
6620     if (n_vertices) {
6621       PetscCall(ISGetIndices(ISForVertices, (const PetscInt **)&is_indices));
6622       PetscCall(PetscArraycpy(constraints_idxs, is_indices, n_vertices));
6623       for (i = 0; i < n_vertices; i++) {
6624         constraints_n[total_counts]            = 1;
6625         constraints_data[total_counts]         = 1.0;
6626         constraints_idxs_ptr[total_counts + 1] = constraints_idxs_ptr[total_counts] + 1;
6627         constraints_data_ptr[total_counts + 1] = constraints_data_ptr[total_counts] + 1;
6628         total_counts++;
6629       }
6630       PetscCall(ISRestoreIndices(ISForVertices, (const PetscInt **)&is_indices));
6631     }
6632 
6633     /* edges and faces */
6634     total_counts_cc = total_counts;
6635     for (ncc = 0; ncc < n_ISForEdges + n_ISForFaces; ncc++) {
6636       IS        used_is;
6637       PetscBool idxs_copied = PETSC_FALSE;
6638 
6639       if (ncc < n_ISForEdges) {
6640         used_is       = ISForEdges[ncc];
6641         boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */
6642       } else {
6643         used_is       = ISForFaces[ncc - n_ISForEdges];
6644         boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */
6645       }
6646       temp_constraints = 0; /* zero the number of constraints I have on this conn comp */
6647 
6648       PetscCall(ISGetSize(used_is, &size_of_constraint));
6649       if (!size_of_constraint) continue;
6650       PetscCall(ISGetIndices(used_is, (const PetscInt **)&is_indices));
6651       if (nnsp_has_cnst) {
6652         PetscScalar quad_value;
6653 
6654         PetscCall(PetscArraycpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc], is_indices, size_of_constraint));
6655         idxs_copied = PETSC_TRUE;
6656 
6657         if (!pcbddc->use_nnsp_true) {
6658           quad_value = (PetscScalar)(1.0 / PetscSqrtReal((PetscReal)size_of_constraint));
6659         } else {
6660           quad_value = 1.0;
6661         }
6662         for (j = 0; j < size_of_constraint; j++) constraints_data[constraints_data_ptr[total_counts_cc] + j] = quad_value;
6663         temp_constraints++;
6664         total_counts++;
6665       }
6666       for (k = 0; k < nnsp_size; k++) {
6667         PetscReal    real_value;
6668         PetscScalar *ptr_to_data;
6669 
6670         PetscCall(VecGetArrayRead(localnearnullsp[k], (const PetscScalar **)&array));
6671         ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc] + temp_constraints * size_of_constraint];
6672         for (j = 0; j < size_of_constraint; j++) ptr_to_data[j] = array[is_indices[j]];
6673         PetscCall(VecRestoreArrayRead(localnearnullsp[k], (const PetscScalar **)&array));
6674         /* check if array is null on the connected component */
6675         PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_N));
6676         PetscCallBLAS("BLASasum", real_value = BLASasum_(&Blas_N, ptr_to_data, &Blas_one));
6677         if (real_value > tol * size_of_constraint) { /* keep indices and values */
6678           temp_constraints++;
6679           total_counts++;
6680           if (!idxs_copied) {
6681             PetscCall(PetscArraycpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc], is_indices, size_of_constraint));
6682             idxs_copied = PETSC_TRUE;
6683           }
6684         }
6685       }
6686       PetscCall(ISRestoreIndices(used_is, (const PetscInt **)&is_indices));
6687       valid_constraints = temp_constraints;
6688       if (!pcbddc->use_nnsp_true && temp_constraints) {
6689         if (temp_constraints == 1) { /* just normalize the constraint */
6690           PetscScalar norm, *ptr_to_data;
6691 
6692           ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]];
6693           PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_N));
6694           PetscCallBLAS("BLASdot", norm = BLASdot_(&Blas_N, ptr_to_data, &Blas_one, ptr_to_data, &Blas_one));
6695           norm = 1.0 / PetscSqrtReal(PetscRealPart(norm));
6696           PetscCallBLAS("BLASscal", BLASscal_(&Blas_N, &norm, ptr_to_data, &Blas_one));
6697         } else { /* perform SVD */
6698           PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]];
6699 
6700           if (use_pod) {
6701             /* SVD: Y = U*S*V^H                -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag
6702                POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2)
6703                -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined
6704                   the constraints basis will differ (by a complex factor with absolute value equal to 1)
6705                   from that computed using LAPACKgesvd
6706                -> This is due to a different computation of eigenvectors in LAPACKheev
6707                -> The quality of the POD-computed basis will be the same */
6708             PetscCall(PetscArrayzero(correlation_mat, temp_constraints * temp_constraints));
6709             /* Store upper triangular part of correlation matrix */
6710             PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_N));
6711             PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
6712             for (j = 0; j < temp_constraints; j++) {
6713               for (k = 0; k < j + 1; k++) PetscCallBLAS("BLASdot", correlation_mat[j * temp_constraints + k] = BLASdot_(&Blas_N, ptr_to_data + k * size_of_constraint, &Blas_one, ptr_to_data + j * size_of_constraint, &Blas_one));
6714             }
6715             /* compute eigenvalues and eigenvectors of correlation matrix */
6716             PetscCall(PetscBLASIntCast(temp_constraints, &Blas_N));
6717             PetscCall(PetscBLASIntCast(temp_constraints, &Blas_LDA));
6718 #if !defined(PETSC_USE_COMPLEX)
6719             PetscCallBLAS("LAPACKsyev", LAPACKsyev_("V", "U", &Blas_N, correlation_mat, &Blas_LDA, singular_vals, work, &lwork, &lierr));
6720 #else
6721             PetscCallBLAS("LAPACKsyev", LAPACKsyev_("V", "U", &Blas_N, correlation_mat, &Blas_LDA, singular_vals, work, &lwork, rwork, &lierr));
6722 #endif
6723             PetscCall(PetscFPTrapPop());
6724             PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYEV Lapack routine %" PetscBLASInt_FMT, lierr);
6725             /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */
6726             j = 0;
6727             while (j < temp_constraints && singular_vals[j] / singular_vals[temp_constraints - 1] < tol) j++;
6728             total_counts      = total_counts - j;
6729             valid_constraints = temp_constraints - j;
6730             /* scale and copy POD basis into used quadrature memory */
6731             PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_M));
6732             PetscCall(PetscBLASIntCast(temp_constraints, &Blas_N));
6733             PetscCall(PetscBLASIntCast(temp_constraints, &Blas_K));
6734             PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA));
6735             PetscCall(PetscBLASIntCast(temp_constraints, &Blas_LDB));
6736             PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDC));
6737             if (j < temp_constraints) {
6738               PetscInt ii;
6739               for (k = j; k < temp_constraints; k++) singular_vals[k] = 1.0 / PetscSqrtReal(singular_vals[k]);
6740               PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
6741               PetscCallBLAS("BLASgemm", BLASgemm_("N", "N", &Blas_M, &Blas_N, &Blas_K, &one, ptr_to_data, &Blas_LDA, correlation_mat, &Blas_LDB, &zero, temp_basis, &Blas_LDC));
6742               PetscCall(PetscFPTrapPop());
6743               for (k = 0; k < temp_constraints - j; k++) {
6744                 for (ii = 0; ii < size_of_constraint; ii++) ptr_to_data[k * size_of_constraint + ii] = singular_vals[temp_constraints - 1 - k] * temp_basis[(temp_constraints - 1 - k) * size_of_constraint + ii];
6745               }
6746             }
6747           } else {
6748 #if !defined(PETSC_MISSING_LAPACK_GESVD)
6749             PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_M));
6750             PetscCall(PetscBLASIntCast(temp_constraints, &Blas_N));
6751             PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA));
6752             PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
6753   #if !defined(PETSC_USE_COMPLEX)
6754             PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("O", "N", &Blas_M, &Blas_N, ptr_to_data, &Blas_LDA, singular_vals, &dummy_scalar, &dummy_int, &dummy_scalar, &dummy_int, work, &lwork, &lierr));
6755   #else
6756             PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("O", "N", &Blas_M, &Blas_N, ptr_to_data, &Blas_LDA, singular_vals, &dummy_scalar, &dummy_int, &dummy_scalar, &dummy_int, work, &lwork, rwork, &lierr));
6757   #endif
6758             PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in GESVD Lapack routine %" PetscBLASInt_FMT, lierr);
6759             PetscCall(PetscFPTrapPop());
6760             /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */
6761             k = temp_constraints;
6762             if (k > size_of_constraint) k = size_of_constraint;
6763             j = 0;
6764             while (j < k && singular_vals[k - j - 1] / singular_vals[0] < tol) j++;
6765             valid_constraints = k - j;
6766             total_counts      = total_counts - temp_constraints + valid_constraints;
6767 #else
6768             SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "This should not happen");
6769 #endif /* on missing GESVD */
6770           }
6771         }
6772       }
6773       /* update pointers information */
6774       if (valid_constraints) {
6775         constraints_n[total_counts_cc]            = valid_constraints;
6776         constraints_idxs_ptr[total_counts_cc + 1] = constraints_idxs_ptr[total_counts_cc] + size_of_constraint;
6777         constraints_data_ptr[total_counts_cc + 1] = constraints_data_ptr[total_counts_cc] + size_of_constraint * valid_constraints;
6778         /* set change_of_basis flag */
6779         if (boolforchange) PetscCall(PetscBTSet(change_basis, total_counts_cc));
6780         total_counts_cc++;
6781       }
6782     }
6783     /* free workspace */
6784     if (!skip_lapack) {
6785       PetscCall(PetscFree(work));
6786 #if defined(PETSC_USE_COMPLEX)
6787       PetscCall(PetscFree(rwork));
6788 #endif
6789       PetscCall(PetscFree(singular_vals));
6790       PetscCall(PetscFree(correlation_mat));
6791       PetscCall(PetscFree(temp_basis));
6792     }
6793     for (k = 0; k < nnsp_size; k++) PetscCall(VecDestroy(&localnearnullsp[k]));
6794     PetscCall(PetscFree(localnearnullsp));
6795     /* free index sets of faces, edges and vertices */
6796     PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, &o_nf, &ISForFaces, &o_ne, &ISForEdges, &ISForVertices));
6797   } else {
6798     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
6799 
6800     total_counts = 0;
6801     n_vertices   = 0;
6802     if (sub_schurs->is_vertices && pcbddc->use_vertices) PetscCall(ISGetLocalSize(sub_schurs->is_vertices, &n_vertices));
6803     max_constraints = 0;
6804     total_counts_cc = 0;
6805     for (i = 0; i < sub_schurs->n_subs + n_vertices; i++) {
6806       total_counts += pcbddc->adaptive_constraints_n[i];
6807       if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++;
6808       max_constraints = PetscMax(max_constraints, pcbddc->adaptive_constraints_n[i]);
6809     }
6810     constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr;
6811     constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr;
6812     constraints_idxs     = pcbddc->adaptive_constraints_idxs;
6813     constraints_data     = pcbddc->adaptive_constraints_data;
6814     /* constraints_n differs from pcbddc->adaptive_constraints_n */
6815     PetscCall(PetscMalloc1(total_counts_cc, &constraints_n));
6816     total_counts_cc = 0;
6817     for (i = 0; i < sub_schurs->n_subs + n_vertices; i++) {
6818       if (pcbddc->adaptive_constraints_n[i]) constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i];
6819     }
6820 
6821     max_size_of_constraint = 0;
6822     for (i = 0; i < total_counts_cc; i++) max_size_of_constraint = PetscMax(max_size_of_constraint, constraints_idxs_ptr[i + 1] - constraints_idxs_ptr[i]);
6823     PetscCall(PetscMalloc1(constraints_idxs_ptr[total_counts_cc], &constraints_idxs_B));
6824     /* Change of basis */
6825     PetscCall(PetscBTCreate(total_counts_cc, &change_basis));
6826     if (pcbddc->use_change_of_basis) {
6827       for (i = 0; i < sub_schurs->n_subs; i++) {
6828         if (PetscBTLookup(sub_schurs->is_edge, i) || pcbddc->use_change_on_faces) PetscCall(PetscBTSet(change_basis, i + n_vertices));
6829       }
6830     }
6831   }
6832   pcbddc->local_primal_size = total_counts;
6833   PetscCall(PetscMalloc1(pcbddc->local_primal_size + pcbddc->benign_n, &pcbddc->primal_indices_local_idxs));
6834 
6835   /* map constraints_idxs in boundary numbering */
6836   if (pcbddc->use_change_of_basis) {
6837     PetscCall(ISGlobalToLocalMappingApply(pcis->BtoNmap, IS_GTOLM_DROP, constraints_idxs_ptr[total_counts_cc], constraints_idxs, &i, constraints_idxs_B));
6838     PetscCheck(i == constraints_idxs_ptr[total_counts_cc], PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error in boundary numbering for constraints indices %" PetscInt_FMT " != %" PetscInt_FMT, constraints_idxs_ptr[total_counts_cc], i);
6839   }
6840 
6841   /* Create constraint matrix */
6842   PetscCall(MatCreate(PETSC_COMM_SELF, &pcbddc->ConstraintMatrix));
6843   PetscCall(MatSetType(pcbddc->ConstraintMatrix, MATAIJ));
6844   PetscCall(MatSetSizes(pcbddc->ConstraintMatrix, pcbddc->local_primal_size, pcis->n, pcbddc->local_primal_size, pcis->n));
6845 
6846   /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */
6847   /* determine if a QR strategy is needed for change of basis */
6848   qr_needed = pcbddc->use_qr_single;
6849   PetscCall(PetscBTCreate(total_counts_cc, &qr_needed_idx));
6850   total_primal_vertices        = 0;
6851   pcbddc->local_primal_size_cc = 0;
6852   for (i = 0; i < total_counts_cc; i++) {
6853     size_of_constraint = constraints_idxs_ptr[i + 1] - constraints_idxs_ptr[i];
6854     if (size_of_constraint == 1 && pcbddc->mat_graph->custom_minimal_size) {
6855       pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]];
6856       pcbddc->local_primal_size_cc += 1;
6857     } else if (PetscBTLookup(change_basis, i)) {
6858       for (k = 0; k < constraints_n[i]; k++) pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i] + k];
6859       pcbddc->local_primal_size_cc += constraints_n[i];
6860       if (constraints_n[i] > 1 || pcbddc->use_qr_single) {
6861         PetscCall(PetscBTSet(qr_needed_idx, i));
6862         qr_needed = PETSC_TRUE;
6863       }
6864     } else {
6865       pcbddc->local_primal_size_cc += 1;
6866     }
6867   }
6868   /* note that the local variable n_vertices used below stores the number of pointwise constraints */
6869   pcbddc->n_vertices = total_primal_vertices;
6870   /* permute indices in order to have a sorted set of vertices */
6871   PetscCall(PetscSortInt(total_primal_vertices, pcbddc->primal_indices_local_idxs));
6872   PetscCall(PetscMalloc2(pcbddc->local_primal_size_cc + pcbddc->benign_n, &pcbddc->local_primal_ref_node, pcbddc->local_primal_size_cc + pcbddc->benign_n, &pcbddc->local_primal_ref_mult));
6873   PetscCall(PetscArraycpy(pcbddc->local_primal_ref_node, pcbddc->primal_indices_local_idxs, total_primal_vertices));
6874   for (i = 0; i < total_primal_vertices; i++) pcbddc->local_primal_ref_mult[i] = 1;
6875 
6876   /* nonzero structure of constraint matrix */
6877   /* and get reference dof for local constraints */
6878   PetscCall(PetscMalloc1(pcbddc->local_primal_size, &nnz));
6879   for (i = 0; i < total_primal_vertices; i++) nnz[i] = 1;
6880 
6881   j            = total_primal_vertices;
6882   total_counts = total_primal_vertices;
6883   cum          = total_primal_vertices;
6884   for (i = n_vertices; i < total_counts_cc; i++) {
6885     if (!PetscBTLookup(change_basis, i)) {
6886       pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]];
6887       pcbddc->local_primal_ref_mult[cum] = constraints_n[i];
6888       cum++;
6889       size_of_constraint = constraints_idxs_ptr[i + 1] - constraints_idxs_ptr[i];
6890       for (k = 0; k < constraints_n[i]; k++) {
6891         pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i] + k];
6892         nnz[j + k]                                        = size_of_constraint;
6893       }
6894       j += constraints_n[i];
6895     }
6896   }
6897   PetscCall(MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix, 0, nnz));
6898   PetscCall(MatSetOption(pcbddc->ConstraintMatrix, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE));
6899   PetscCall(PetscFree(nnz));
6900 
6901   /* set values in constraint matrix */
6902   for (i = 0; i < total_primal_vertices; i++) PetscCall(MatSetValue(pcbddc->ConstraintMatrix, i, pcbddc->local_primal_ref_node[i], 1.0, INSERT_VALUES));
6903   total_counts = total_primal_vertices;
6904   for (i = n_vertices; i < total_counts_cc; i++) {
6905     if (!PetscBTLookup(change_basis, i)) {
6906       PetscInt *cols;
6907 
6908       size_of_constraint = constraints_idxs_ptr[i + 1] - constraints_idxs_ptr[i];
6909       cols               = constraints_idxs + constraints_idxs_ptr[i];
6910       for (k = 0; k < constraints_n[i]; k++) {
6911         PetscInt     row = total_counts + k;
6912         PetscScalar *vals;
6913 
6914         vals = constraints_data + constraints_data_ptr[i] + k * size_of_constraint;
6915         PetscCall(MatSetValues(pcbddc->ConstraintMatrix, 1, &row, size_of_constraint, cols, vals, INSERT_VALUES));
6916       }
6917       total_counts += constraints_n[i];
6918     }
6919   }
6920   /* assembling */
6921   PetscCall(MatAssemblyBegin(pcbddc->ConstraintMatrix, MAT_FINAL_ASSEMBLY));
6922   PetscCall(MatAssemblyEnd(pcbddc->ConstraintMatrix, MAT_FINAL_ASSEMBLY));
6923   PetscCall(MatViewFromOptions(pcbddc->ConstraintMatrix, (PetscObject)pc, "-pc_bddc_constraint_mat_view"));
6924 
6925   /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */
6926   if (pcbddc->use_change_of_basis) {
6927     /* dual and primal dofs on a single cc */
6928     PetscInt dual_dofs, primal_dofs;
6929     /* working stuff for GEQRF */
6930     PetscScalar *qr_basis = NULL, *qr_tau = NULL, *qr_work = NULL, lqr_work_t;
6931     PetscBLASInt lqr_work;
6932     /* working stuff for UNGQR */
6933     PetscScalar *gqr_work = NULL, lgqr_work_t = 0.0;
6934     PetscBLASInt lgqr_work;
6935     /* working stuff for TRTRS */
6936     PetscScalar *trs_rhs = NULL;
6937     PetscBLASInt Blas_NRHS;
6938     /* pointers for values insertion into change of basis matrix */
6939     PetscInt    *start_rows, *start_cols;
6940     PetscScalar *start_vals;
6941     /* working stuff for values insertion */
6942     PetscBT   is_primal;
6943     PetscInt *aux_primal_numbering_B;
6944     /* matrix sizes */
6945     PetscInt global_size, local_size;
6946     /* temporary change of basis */
6947     Mat localChangeOfBasisMatrix;
6948     /* extra space for debugging */
6949     PetscScalar *dbg_work = NULL;
6950 
6951     PetscCall(MatCreate(PETSC_COMM_SELF, &localChangeOfBasisMatrix));
6952     PetscCall(MatSetType(localChangeOfBasisMatrix, MATAIJ));
6953     PetscCall(MatSetSizes(localChangeOfBasisMatrix, pcis->n, pcis->n, pcis->n, pcis->n));
6954     /* nonzeros for local mat */
6955     PetscCall(PetscMalloc1(pcis->n, &nnz));
6956     if (!pcbddc->benign_change || pcbddc->fake_change) {
6957       for (i = 0; i < pcis->n; i++) nnz[i] = 1;
6958     } else {
6959       const PetscInt *ii;
6960       PetscInt        n;
6961       PetscBool       flg_row;
6962       PetscCall(MatGetRowIJ(pcbddc->benign_change, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, NULL, &flg_row));
6963       for (i = 0; i < n; i++) nnz[i] = ii[i + 1] - ii[i];
6964       PetscCall(MatRestoreRowIJ(pcbddc->benign_change, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, NULL, &flg_row));
6965     }
6966     for (i = n_vertices; i < total_counts_cc; i++) {
6967       if (PetscBTLookup(change_basis, i)) {
6968         size_of_constraint = constraints_idxs_ptr[i + 1] - constraints_idxs_ptr[i];
6969         if (PetscBTLookup(qr_needed_idx, i)) {
6970           for (j = 0; j < size_of_constraint; j++) nnz[constraints_idxs[constraints_idxs_ptr[i] + j]] = size_of_constraint;
6971         } else {
6972           nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint;
6973           for (j = 1; j < size_of_constraint; j++) nnz[constraints_idxs[constraints_idxs_ptr[i] + j]] = 2;
6974         }
6975       }
6976     }
6977     PetscCall(MatSeqAIJSetPreallocation(localChangeOfBasisMatrix, 0, nnz));
6978     PetscCall(MatSetOption(localChangeOfBasisMatrix, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE));
6979     PetscCall(PetscFree(nnz));
6980     /* Set interior change in the matrix */
6981     if (!pcbddc->benign_change || pcbddc->fake_change) {
6982       for (i = 0; i < pcis->n; i++) PetscCall(MatSetValue(localChangeOfBasisMatrix, i, i, 1.0, INSERT_VALUES));
6983     } else {
6984       const PetscInt *ii, *jj;
6985       PetscScalar    *aa;
6986       PetscInt        n;
6987       PetscBool       flg_row;
6988       PetscCall(MatGetRowIJ(pcbddc->benign_change, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, &jj, &flg_row));
6989       PetscCall(MatSeqAIJGetArray(pcbddc->benign_change, &aa));
6990       for (i = 0; i < n; i++) PetscCall(MatSetValues(localChangeOfBasisMatrix, 1, &i, ii[i + 1] - ii[i], jj + ii[i], aa + ii[i], INSERT_VALUES));
6991       PetscCall(MatSeqAIJRestoreArray(pcbddc->benign_change, &aa));
6992       PetscCall(MatRestoreRowIJ(pcbddc->benign_change, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, &jj, &flg_row));
6993     }
6994 
6995     if (pcbddc->dbg_flag) {
6996       PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "--------------------------------------------------------------\n"));
6997       PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Checking change of basis computation for subdomain %04d\n", PetscGlobalRank));
6998     }
6999 
7000     /* Now we loop on the constraints which need a change of basis */
7001     /*
7002        Change of basis matrix is evaluated similarly to the FIRST APPROACH in
7003        Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1)
7004 
7005        Basic blocks of change of basis matrix T computed:
7006 
7007           - By using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified)
7008 
7009             | 1        0   ...        0         s_1/S |
7010             | 0        1   ...        0         s_2/S |
7011             |              ...                        |
7012             | 0        ...            1     s_{n-1}/S |
7013             | -s_1/s_n ...    -s_{n-1}/s_n      s_n/S |
7014 
7015             with S = \sum_{i=1}^n s_i^2
7016             NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering
7017                   in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering
7018 
7019           - QR decomposition of constraints otherwise
7020     */
7021     if (qr_needed && max_size_of_constraint) {
7022       /* space to store Q */
7023       PetscCall(PetscMalloc1(max_size_of_constraint * max_size_of_constraint, &qr_basis));
7024       /* array to store scaling factors for reflectors */
7025       PetscCall(PetscMalloc1(max_constraints, &qr_tau));
7026       /* first we issue queries for optimal work */
7027       PetscCall(PetscBLASIntCast(max_size_of_constraint, &Blas_M));
7028       PetscCall(PetscBLASIntCast(max_constraints, &Blas_N));
7029       PetscCall(PetscBLASIntCast(max_size_of_constraint, &Blas_LDA));
7030       lqr_work = -1;
7031       PetscCallBLAS("LAPACKgeqrf", LAPACKgeqrf_(&Blas_M, &Blas_N, qr_basis, &Blas_LDA, qr_tau, &lqr_work_t, &lqr_work, &lierr));
7032       PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in query to GEQRF Lapack routine %" PetscBLASInt_FMT, lierr);
7033       PetscCall(PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t), &lqr_work));
7034       PetscCall(PetscMalloc1(lqr_work, &qr_work));
7035       lgqr_work = -1;
7036       PetscCall(PetscBLASIntCast(max_size_of_constraint, &Blas_M));
7037       PetscCall(PetscBLASIntCast(max_size_of_constraint, &Blas_N));
7038       PetscCall(PetscBLASIntCast(max_constraints, &Blas_K));
7039       PetscCall(PetscBLASIntCast(max_size_of_constraint, &Blas_LDA));
7040       if (Blas_K > Blas_M) Blas_K = Blas_M; /* adjust just for computing optimal work */
7041       PetscCallBLAS("LAPACKorgqr", LAPACKorgqr_(&Blas_M, &Blas_N, &Blas_K, qr_basis, &Blas_LDA, qr_tau, &lgqr_work_t, &lgqr_work, &lierr));
7042       PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in query to ORGQR/UNGQR Lapack routine %" PetscBLASInt_FMT, lierr);
7043       PetscCall(PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t), &lgqr_work));
7044       PetscCall(PetscMalloc1(lgqr_work, &gqr_work));
7045       /* array to store rhs and solution of triangular solver */
7046       PetscCall(PetscMalloc1(max_constraints * max_constraints, &trs_rhs));
7047       /* allocating workspace for check */
7048       if (pcbddc->dbg_flag) PetscCall(PetscMalloc1(max_size_of_constraint * (max_constraints + max_size_of_constraint), &dbg_work));
7049     }
7050     /* array to store whether a node is primal or not */
7051     PetscCall(PetscBTCreate(pcis->n_B, &is_primal));
7052     PetscCall(PetscMalloc1(total_primal_vertices, &aux_primal_numbering_B));
7053     PetscCall(ISGlobalToLocalMappingApply(pcis->BtoNmap, IS_GTOLM_DROP, total_primal_vertices, pcbddc->local_primal_ref_node, &i, aux_primal_numbering_B));
7054     PetscCheck(i == total_primal_vertices, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error in boundary numbering for BDDC vertices! %" PetscInt_FMT " != %" PetscInt_FMT, total_primal_vertices, i);
7055     for (i = 0; i < total_primal_vertices; i++) PetscCall(PetscBTSet(is_primal, aux_primal_numbering_B[i]));
7056     PetscCall(PetscFree(aux_primal_numbering_B));
7057 
7058     /* loop on constraints and see whether or not they need a change of basis and compute it */
7059     for (total_counts = n_vertices; total_counts < total_counts_cc; total_counts++) {
7060       size_of_constraint = constraints_idxs_ptr[total_counts + 1] - constraints_idxs_ptr[total_counts];
7061       if (PetscBTLookup(change_basis, total_counts)) {
7062         /* get constraint info */
7063         primal_dofs = constraints_n[total_counts];
7064         dual_dofs   = size_of_constraint - primal_dofs;
7065 
7066         if (pcbddc->dbg_flag) PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Constraints %" PetscInt_FMT ": %" PetscInt_FMT " need a change of basis (size %" PetscInt_FMT ")\n", total_counts, primal_dofs, size_of_constraint));
7067 
7068         if (PetscBTLookup(qr_needed_idx, total_counts)) { /* QR */
7069 
7070           /* copy quadrature constraints for change of basis check */
7071           if (pcbddc->dbg_flag) PetscCall(PetscArraycpy(dbg_work, &constraints_data[constraints_data_ptr[total_counts]], size_of_constraint * primal_dofs));
7072           /* copy temporary constraints into larger work vector (in order to store all columns of Q) */
7073           PetscCall(PetscArraycpy(qr_basis, &constraints_data[constraints_data_ptr[total_counts]], size_of_constraint * primal_dofs));
7074 
7075           /* compute QR decomposition of constraints */
7076           PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_M));
7077           PetscCall(PetscBLASIntCast(primal_dofs, &Blas_N));
7078           PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA));
7079           PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
7080           PetscCallBLAS("LAPACKgeqrf", LAPACKgeqrf_(&Blas_M, &Blas_N, qr_basis, &Blas_LDA, qr_tau, qr_work, &lqr_work, &lierr));
7081           PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in GEQRF Lapack routine %" PetscBLASInt_FMT, lierr);
7082           PetscCall(PetscFPTrapPop());
7083 
7084           /* explicitly compute R^-T */
7085           PetscCall(PetscArrayzero(trs_rhs, primal_dofs * primal_dofs));
7086           for (j = 0; j < primal_dofs; j++) trs_rhs[j * (primal_dofs + 1)] = 1.0;
7087           PetscCall(PetscBLASIntCast(primal_dofs, &Blas_N));
7088           PetscCall(PetscBLASIntCast(primal_dofs, &Blas_NRHS));
7089           PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA));
7090           PetscCall(PetscBLASIntCast(primal_dofs, &Blas_LDB));
7091           PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
7092           PetscCallBLAS("LAPACKtrtrs", LAPACKtrtrs_("U", "T", "N", &Blas_N, &Blas_NRHS, qr_basis, &Blas_LDA, trs_rhs, &Blas_LDB, &lierr));
7093           PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in TRTRS Lapack routine %" PetscBLASInt_FMT, lierr);
7094           PetscCall(PetscFPTrapPop());
7095 
7096           /* explicitly compute all columns of Q (Q = [Q1 | Q2]) overwriting QR factorization in qr_basis */
7097           PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_M));
7098           PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_N));
7099           PetscCall(PetscBLASIntCast(primal_dofs, &Blas_K));
7100           PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA));
7101           PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
7102           PetscCallBLAS("LAPACKorgqr", LAPACKorgqr_(&Blas_M, &Blas_N, &Blas_K, qr_basis, &Blas_LDA, qr_tau, gqr_work, &lgqr_work, &lierr));
7103           PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in ORGQR/UNGQR Lapack routine %" PetscBLASInt_FMT, lierr);
7104           PetscCall(PetscFPTrapPop());
7105 
7106           /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints
7107              i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below)
7108              where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */
7109           PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_M));
7110           PetscCall(PetscBLASIntCast(primal_dofs, &Blas_N));
7111           PetscCall(PetscBLASIntCast(primal_dofs, &Blas_K));
7112           PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA));
7113           PetscCall(PetscBLASIntCast(primal_dofs, &Blas_LDB));
7114           PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDC));
7115           PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
7116           PetscCallBLAS("BLASgemm", BLASgemm_("N", "N", &Blas_M, &Blas_N, &Blas_K, &one, qr_basis, &Blas_LDA, trs_rhs, &Blas_LDB, &zero, constraints_data + constraints_data_ptr[total_counts], &Blas_LDC));
7117           PetscCall(PetscFPTrapPop());
7118           PetscCall(PetscArraycpy(qr_basis, &constraints_data[constraints_data_ptr[total_counts]], size_of_constraint * primal_dofs));
7119 
7120           /* insert values in change of basis matrix respecting global ordering of new primal dofs */
7121           start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]];
7122           /* insert cols for primal dofs */
7123           for (j = 0; j < primal_dofs; j++) {
7124             start_vals = &qr_basis[j * size_of_constraint];
7125             start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts] + j];
7126             PetscCall(MatSetValues(localChangeOfBasisMatrix, size_of_constraint, start_rows, 1, start_cols, start_vals, INSERT_VALUES));
7127           }
7128           /* insert cols for dual dofs */
7129           for (j = 0, k = 0; j < dual_dofs; k++) {
7130             if (!PetscBTLookup(is_primal, constraints_idxs_B[constraints_idxs_ptr[total_counts] + k])) {
7131               start_vals = &qr_basis[(primal_dofs + j) * size_of_constraint];
7132               start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts] + k];
7133               PetscCall(MatSetValues(localChangeOfBasisMatrix, size_of_constraint, start_rows, 1, start_cols, start_vals, INSERT_VALUES));
7134               j++;
7135             }
7136           }
7137 
7138           /* check change of basis */
7139           if (pcbddc->dbg_flag) {
7140             PetscInt  ii, jj;
7141             PetscBool valid_qr = PETSC_TRUE;
7142             PetscCall(PetscBLASIntCast(primal_dofs, &Blas_M));
7143             PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_N));
7144             PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_K));
7145             PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA));
7146             PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDB));
7147             PetscCall(PetscBLASIntCast(primal_dofs, &Blas_LDC));
7148             PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
7149             PetscCallBLAS("BLASgemm", BLASgemm_("T", "N", &Blas_M, &Blas_N, &Blas_K, &one, dbg_work, &Blas_LDA, qr_basis, &Blas_LDB, &zero, &dbg_work[size_of_constraint * primal_dofs], &Blas_LDC));
7150             PetscCall(PetscFPTrapPop());
7151             for (jj = 0; jj < size_of_constraint; jj++) {
7152               for (ii = 0; ii < primal_dofs; ii++) {
7153                 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii]) > 1.e-12) valid_qr = PETSC_FALSE;
7154                 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii] - (PetscReal)1) > 1.e-12) valid_qr = PETSC_FALSE;
7155               }
7156             }
7157             if (!valid_qr) {
7158               PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "\t-> wrong change of basis!\n"));
7159               for (jj = 0; jj < size_of_constraint; jj++) {
7160                 for (ii = 0; ii < primal_dofs; ii++) {
7161                   if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii]) > 1.e-12) {
7162                     PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "\tQr basis function %" PetscInt_FMT " is not orthogonal to constraint %" PetscInt_FMT " (%1.14e)!\n", jj, ii, (double)PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii])));
7163                   }
7164                   if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii] - (PetscReal)1) > 1.e-12) {
7165                     PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "\tQr basis function %" PetscInt_FMT " is not unitary w.r.t constraint %" PetscInt_FMT " (%1.14e)!\n", jj, ii, (double)PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii])));
7166                   }
7167                 }
7168               }
7169             } else {
7170               PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "\t-> right change of basis!\n"));
7171             }
7172           }
7173         } else { /* simple transformation block */
7174           PetscInt    row, col;
7175           PetscScalar val, norm;
7176 
7177           PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_N));
7178           PetscCallBLAS("BLASdot", norm = BLASdot_(&Blas_N, constraints_data + constraints_data_ptr[total_counts], &Blas_one, constraints_data + constraints_data_ptr[total_counts], &Blas_one));
7179           for (j = 0; j < size_of_constraint; j++) {
7180             PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts] + j];
7181             row            = constraints_idxs[constraints_idxs_ptr[total_counts] + j];
7182             if (!PetscBTLookup(is_primal, row_B)) {
7183               col = constraints_idxs[constraints_idxs_ptr[total_counts]];
7184               PetscCall(MatSetValue(localChangeOfBasisMatrix, row, row, 1.0, INSERT_VALUES));
7185               PetscCall(MatSetValue(localChangeOfBasisMatrix, row, col, constraints_data[constraints_data_ptr[total_counts] + j] / norm, INSERT_VALUES));
7186             } else {
7187               for (k = 0; k < size_of_constraint; k++) {
7188                 col = constraints_idxs[constraints_idxs_ptr[total_counts] + k];
7189                 if (row != col) {
7190                   val = -constraints_data[constraints_data_ptr[total_counts] + k] / constraints_data[constraints_data_ptr[total_counts]];
7191                 } else {
7192                   val = constraints_data[constraints_data_ptr[total_counts]] / norm;
7193                 }
7194                 PetscCall(MatSetValue(localChangeOfBasisMatrix, row, col, val, INSERT_VALUES));
7195               }
7196             }
7197           }
7198           if (pcbddc->dbg_flag) PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "\t-> using standard change of basis\n"));
7199         }
7200       } else {
7201         if (pcbddc->dbg_flag) PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Constraint %" PetscInt_FMT " does not need a change of basis (size %" PetscInt_FMT ")\n", total_counts, size_of_constraint));
7202       }
7203     }
7204 
7205     /* free workspace */
7206     if (qr_needed) {
7207       if (pcbddc->dbg_flag) PetscCall(PetscFree(dbg_work));
7208       PetscCall(PetscFree(trs_rhs));
7209       PetscCall(PetscFree(qr_tau));
7210       PetscCall(PetscFree(qr_work));
7211       PetscCall(PetscFree(gqr_work));
7212       PetscCall(PetscFree(qr_basis));
7213     }
7214     PetscCall(PetscBTDestroy(&is_primal));
7215     PetscCall(MatAssemblyBegin(localChangeOfBasisMatrix, MAT_FINAL_ASSEMBLY));
7216     PetscCall(MatAssemblyEnd(localChangeOfBasisMatrix, MAT_FINAL_ASSEMBLY));
7217 
7218     /* assembling of global change of variable */
7219     if (!pcbddc->fake_change) {
7220       Mat      tmat;
7221       PetscInt bs;
7222 
7223       PetscCall(VecGetSize(pcis->vec1_global, &global_size));
7224       PetscCall(VecGetLocalSize(pcis->vec1_global, &local_size));
7225       PetscCall(MatDuplicate(pc->pmat, MAT_DO_NOT_COPY_VALUES, &tmat));
7226       PetscCall(MatISSetLocalMat(tmat, localChangeOfBasisMatrix));
7227       PetscCall(MatAssemblyBegin(tmat, MAT_FINAL_ASSEMBLY));
7228       PetscCall(MatAssemblyEnd(tmat, MAT_FINAL_ASSEMBLY));
7229       PetscCall(MatCreate(PetscObjectComm((PetscObject)pc), &pcbddc->ChangeOfBasisMatrix));
7230       PetscCall(MatSetType(pcbddc->ChangeOfBasisMatrix, MATAIJ));
7231       PetscCall(MatGetBlockSize(pc->pmat, &bs));
7232       PetscCall(MatSetBlockSize(pcbddc->ChangeOfBasisMatrix, bs));
7233       PetscCall(MatSetSizes(pcbddc->ChangeOfBasisMatrix, local_size, local_size, global_size, global_size));
7234       PetscCall(MatISSetMPIXAIJPreallocation_Private(tmat, pcbddc->ChangeOfBasisMatrix, PETSC_TRUE));
7235       PetscCall(MatConvert(tmat, MATAIJ, MAT_REUSE_MATRIX, &pcbddc->ChangeOfBasisMatrix));
7236       PetscCall(MatDestroy(&tmat));
7237       PetscCall(VecSet(pcis->vec1_global, 0.0));
7238       PetscCall(VecSet(pcis->vec1_N, 1.0));
7239       PetscCall(VecScatterBegin(matis->rctx, pcis->vec1_N, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE));
7240       PetscCall(VecScatterEnd(matis->rctx, pcis->vec1_N, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE));
7241       PetscCall(VecReciprocal(pcis->vec1_global));
7242       PetscCall(MatDiagonalScale(pcbddc->ChangeOfBasisMatrix, pcis->vec1_global, NULL));
7243 
7244       /* check */
7245       if (pcbddc->dbg_flag) {
7246         PetscReal error;
7247         Vec       x, x_change;
7248 
7249         PetscCall(VecDuplicate(pcis->vec1_global, &x));
7250         PetscCall(VecDuplicate(pcis->vec1_global, &x_change));
7251         PetscCall(VecSetRandom(x, NULL));
7252         PetscCall(VecCopy(x, pcis->vec1_global));
7253         PetscCall(VecScatterBegin(matis->rctx, x, pcis->vec1_N, INSERT_VALUES, SCATTER_FORWARD));
7254         PetscCall(VecScatterEnd(matis->rctx, x, pcis->vec1_N, INSERT_VALUES, SCATTER_FORWARD));
7255         PetscCall(MatMult(localChangeOfBasisMatrix, pcis->vec1_N, pcis->vec2_N));
7256         PetscCall(VecScatterBegin(matis->rctx, pcis->vec2_N, x, INSERT_VALUES, SCATTER_REVERSE));
7257         PetscCall(VecScatterEnd(matis->rctx, pcis->vec2_N, x, INSERT_VALUES, SCATTER_REVERSE));
7258         PetscCall(MatMult(pcbddc->ChangeOfBasisMatrix, pcis->vec1_global, x_change));
7259         PetscCall(VecAXPY(x, -1.0, x_change));
7260         PetscCall(VecNorm(x, NORM_INFINITY, &error));
7261         PetscCheck(error <= PETSC_SMALL, PetscObjectComm((PetscObject)pc), PETSC_ERR_PLIB, "Error global vs local change on N: %1.6e", (double)error);
7262         PetscCall(VecDestroy(&x));
7263         PetscCall(VecDestroy(&x_change));
7264       }
7265       /* adapt sub_schurs computed (if any) */
7266       if (pcbddc->use_deluxe_scaling) {
7267         PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
7268 
7269         PetscCheck(!pcbddc->use_change_of_basis || !pcbddc->adaptive_userdefined, PetscObjectComm((PetscObject)pc), PETSC_ERR_SUP, "Cannot mix automatic change of basis, adaptive selection and user-defined constraints");
7270         if (sub_schurs && sub_schurs->S_Ej_all) {
7271           Mat S_new, tmat;
7272           IS  is_all_N, is_V_Sall = NULL;
7273 
7274           PetscCall(ISLocalToGlobalMappingApplyIS(pcis->BtoNmap, sub_schurs->is_Ej_all, &is_all_N));
7275           PetscCall(MatCreateSubMatrix(localChangeOfBasisMatrix, is_all_N, is_all_N, MAT_INITIAL_MATRIX, &tmat));
7276           if (pcbddc->deluxe_zerorows) {
7277             ISLocalToGlobalMapping NtoSall;
7278             IS                     is_V;
7279             PetscCall(ISCreateGeneral(PETSC_COMM_SELF, pcbddc->n_vertices, pcbddc->local_primal_ref_node, PETSC_COPY_VALUES, &is_V));
7280             PetscCall(ISLocalToGlobalMappingCreateIS(is_all_N, &NtoSall));
7281             PetscCall(ISGlobalToLocalMappingApplyIS(NtoSall, IS_GTOLM_DROP, is_V, &is_V_Sall));
7282             PetscCall(ISLocalToGlobalMappingDestroy(&NtoSall));
7283             PetscCall(ISDestroy(&is_V));
7284           }
7285           PetscCall(ISDestroy(&is_all_N));
7286           PetscCall(MatPtAP(sub_schurs->S_Ej_all, tmat, MAT_INITIAL_MATRIX, 1.0, &S_new));
7287           PetscCall(MatDestroy(&sub_schurs->S_Ej_all));
7288           PetscCall(PetscObjectReference((PetscObject)S_new));
7289           if (pcbddc->deluxe_zerorows) {
7290             const PetscScalar *array;
7291             const PetscInt    *idxs_V, *idxs_all;
7292             PetscInt           i, n_V;
7293 
7294             PetscCall(MatZeroRowsColumnsIS(S_new, is_V_Sall, 1., NULL, NULL));
7295             PetscCall(ISGetLocalSize(is_V_Sall, &n_V));
7296             PetscCall(ISGetIndices(is_V_Sall, &idxs_V));
7297             PetscCall(ISGetIndices(sub_schurs->is_Ej_all, &idxs_all));
7298             PetscCall(VecGetArrayRead(pcis->D, &array));
7299             for (i = 0; i < n_V; i++) {
7300               PetscScalar val;
7301               PetscInt    idx;
7302 
7303               idx = idxs_V[i];
7304               val = array[idxs_all[idxs_V[i]]];
7305               PetscCall(MatSetValue(S_new, idx, idx, val, INSERT_VALUES));
7306             }
7307             PetscCall(MatAssemblyBegin(S_new, MAT_FINAL_ASSEMBLY));
7308             PetscCall(MatAssemblyEnd(S_new, MAT_FINAL_ASSEMBLY));
7309             PetscCall(VecRestoreArrayRead(pcis->D, &array));
7310             PetscCall(ISRestoreIndices(sub_schurs->is_Ej_all, &idxs_all));
7311             PetscCall(ISRestoreIndices(is_V_Sall, &idxs_V));
7312           }
7313           sub_schurs->S_Ej_all = S_new;
7314           PetscCall(MatDestroy(&S_new));
7315           if (sub_schurs->sum_S_Ej_all) {
7316             PetscCall(MatPtAP(sub_schurs->sum_S_Ej_all, tmat, MAT_INITIAL_MATRIX, 1.0, &S_new));
7317             PetscCall(MatDestroy(&sub_schurs->sum_S_Ej_all));
7318             PetscCall(PetscObjectReference((PetscObject)S_new));
7319             if (pcbddc->deluxe_zerorows) PetscCall(MatZeroRowsColumnsIS(S_new, is_V_Sall, 1., NULL, NULL));
7320             sub_schurs->sum_S_Ej_all = S_new;
7321             PetscCall(MatDestroy(&S_new));
7322           }
7323           PetscCall(ISDestroy(&is_V_Sall));
7324           PetscCall(MatDestroy(&tmat));
7325         }
7326         /* destroy any change of basis context in sub_schurs */
7327         if (sub_schurs && sub_schurs->change) {
7328           PetscInt i;
7329 
7330           for (i = 0; i < sub_schurs->n_subs; i++) PetscCall(KSPDestroy(&sub_schurs->change[i]));
7331           PetscCall(PetscFree(sub_schurs->change));
7332         }
7333       }
7334       if (pcbddc->switch_static) { /* need to save the local change */
7335         pcbddc->switch_static_change = localChangeOfBasisMatrix;
7336       } else {
7337         PetscCall(MatDestroy(&localChangeOfBasisMatrix));
7338       }
7339       /* determine if any process has changed the pressures locally */
7340       pcbddc->change_interior = pcbddc->benign_have_null;
7341     } else { /* fake change (get back change of basis into ConstraintMatrix and info on qr) */
7342       PetscCall(MatDestroy(&pcbddc->ConstraintMatrix));
7343       pcbddc->ConstraintMatrix = localChangeOfBasisMatrix;
7344       pcbddc->use_qr_single    = qr_needed;
7345     }
7346   } else if (pcbddc->user_ChangeOfBasisMatrix || pcbddc->benign_saddle_point) {
7347     if (!pcbddc->benign_have_null && pcbddc->user_ChangeOfBasisMatrix) {
7348       PetscCall(PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix));
7349       pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix;
7350     } else {
7351       Mat benign_global = NULL;
7352       if (pcbddc->benign_have_null) {
7353         Mat M;
7354 
7355         pcbddc->change_interior = PETSC_TRUE;
7356         PetscCall(VecCopy(matis->counter, pcis->vec1_N));
7357         PetscCall(VecReciprocal(pcis->vec1_N));
7358         PetscCall(MatDuplicate(pc->pmat, MAT_DO_NOT_COPY_VALUES, &benign_global));
7359         if (pcbddc->benign_change) {
7360           PetscCall(MatDuplicate(pcbddc->benign_change, MAT_COPY_VALUES, &M));
7361           PetscCall(MatDiagonalScale(M, pcis->vec1_N, NULL));
7362         } else {
7363           PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, pcis->n, pcis->n, 1, NULL, &M));
7364           PetscCall(MatDiagonalSet(M, pcis->vec1_N, INSERT_VALUES));
7365         }
7366         PetscCall(MatISSetLocalMat(benign_global, M));
7367         PetscCall(MatDestroy(&M));
7368         PetscCall(MatAssemblyBegin(benign_global, MAT_FINAL_ASSEMBLY));
7369         PetscCall(MatAssemblyEnd(benign_global, MAT_FINAL_ASSEMBLY));
7370       }
7371       if (pcbddc->user_ChangeOfBasisMatrix) {
7372         PetscCall(MatMatMult(pcbddc->user_ChangeOfBasisMatrix, benign_global, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &pcbddc->ChangeOfBasisMatrix));
7373         PetscCall(MatDestroy(&benign_global));
7374       } else if (pcbddc->benign_have_null) {
7375         pcbddc->ChangeOfBasisMatrix = benign_global;
7376       }
7377     }
7378     if (pcbddc->switch_static && pcbddc->ChangeOfBasisMatrix) { /* need to save the local change */
7379       IS              is_global;
7380       const PetscInt *gidxs;
7381 
7382       PetscCall(ISLocalToGlobalMappingGetIndices(matis->rmapping, &gidxs));
7383       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), pcis->n, gidxs, PETSC_COPY_VALUES, &is_global));
7384       PetscCall(ISLocalToGlobalMappingRestoreIndices(matis->rmapping, &gidxs));
7385       PetscCall(MatCreateSubMatrixUnsorted(pcbddc->ChangeOfBasisMatrix, is_global, is_global, &pcbddc->switch_static_change));
7386       PetscCall(ISDestroy(&is_global));
7387     }
7388   }
7389   if (!pcbddc->fake_change && pcbddc->ChangeOfBasisMatrix && !pcbddc->work_change) PetscCall(VecDuplicate(pcis->vec1_global, &pcbddc->work_change));
7390 
7391   if (!pcbddc->fake_change) {
7392     /* add pressure dofs to set of primal nodes for numbering purposes */
7393     for (i = 0; i < pcbddc->benign_n; i++) {
7394       pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc]  = pcbddc->benign_p0_lidx[i];
7395       pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i];
7396       pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc]  = 1;
7397       pcbddc->local_primal_size_cc++;
7398       pcbddc->local_primal_size++;
7399     }
7400 
7401     /* check if a new primal space has been introduced (also take into account benign trick) */
7402     pcbddc->new_primal_space_local = PETSC_TRUE;
7403     if (olocal_primal_size == pcbddc->local_primal_size) {
7404       PetscCall(PetscArraycmp(pcbddc->local_primal_ref_node, olocal_primal_ref_node, olocal_primal_size_cc, &pcbddc->new_primal_space_local));
7405       pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
7406       if (!pcbddc->new_primal_space_local) {
7407         PetscCall(PetscArraycmp(pcbddc->local_primal_ref_mult, olocal_primal_ref_mult, olocal_primal_size_cc, &pcbddc->new_primal_space_local));
7408         pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
7409       }
7410     }
7411     /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */
7412     PetscCallMPI(MPIU_Allreduce(&pcbddc->new_primal_space_local, &pcbddc->new_primal_space, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)pc)));
7413   }
7414   PetscCall(PetscFree2(olocal_primal_ref_node, olocal_primal_ref_mult));
7415 
7416   /* flush dbg viewer */
7417   if (pcbddc->dbg_flag) PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
7418 
7419   /* free workspace */
7420   PetscCall(PetscBTDestroy(&qr_needed_idx));
7421   PetscCall(PetscBTDestroy(&change_basis));
7422   if (!pcbddc->adaptive_selection) {
7423     PetscCall(PetscFree3(constraints_idxs_ptr, constraints_data_ptr, constraints_n));
7424     PetscCall(PetscFree3(constraints_data, constraints_idxs, constraints_idxs_B));
7425   } else {
7426     PetscCall(PetscFree5(pcbddc->adaptive_constraints_n, pcbddc->adaptive_constraints_idxs_ptr, pcbddc->adaptive_constraints_data_ptr, pcbddc->adaptive_constraints_idxs, pcbddc->adaptive_constraints_data));
7427     PetscCall(PetscFree(constraints_n));
7428     PetscCall(PetscFree(constraints_idxs_B));
7429   }
7430   PetscFunctionReturn(PETSC_SUCCESS);
7431 }
7432 
7433 PetscErrorCode PCBDDCAnalyzeInterface(PC pc)
7434 {
7435   ISLocalToGlobalMapping map;
7436   PC_BDDC               *pcbddc = (PC_BDDC *)pc->data;
7437   Mat_IS                *matis  = (Mat_IS *)pc->pmat->data;
7438   PetscInt               i, N;
7439   PetscBool              rcsr = PETSC_FALSE;
7440 
7441   PetscFunctionBegin;
7442   if (pcbddc->recompute_topography) {
7443     pcbddc->graphanalyzed = PETSC_FALSE;
7444     /* Reset previously computed graph */
7445     PetscCall(PCBDDCGraphReset(pcbddc->mat_graph));
7446     /* Init local Graph struct */
7447     PetscCall(MatGetSize(pc->pmat, &N, NULL));
7448     PetscCall(MatISGetLocalToGlobalMapping(pc->pmat, &map, NULL));
7449     PetscCall(PCBDDCGraphInit(pcbddc->mat_graph, map, N, pcbddc->graphmaxcount));
7450 
7451     if (pcbddc->user_primal_vertices_local && !pcbddc->user_primal_vertices) PetscCall(PCBDDCConsistencyCheckIS(pc, MPI_LOR, &pcbddc->user_primal_vertices_local));
7452     /* Check validity of the csr graph passed in by the user */
7453     PetscCheck(!pcbddc->mat_graph->nvtxs_csr || pcbddc->mat_graph->nvtxs_csr == pcbddc->mat_graph->nvtxs, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid size of local CSR graph! Found %" PetscInt_FMT ", expected %" PetscInt_FMT, pcbddc->mat_graph->nvtxs_csr,
7454                pcbddc->mat_graph->nvtxs);
7455 
7456     /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */
7457     if (!pcbddc->mat_graph->xadj && pcbddc->use_local_adj) {
7458       PetscInt *xadj, *adjncy;
7459       PetscInt  nvtxs;
7460       PetscBool flg_row;
7461       Mat       A;
7462 
7463       PetscCall(PetscObjectReference((PetscObject)matis->A));
7464       A = matis->A;
7465       for (PetscInt i = 0; i < pcbddc->local_adj_square; i++) {
7466         Mat AtA;
7467 
7468         PetscCall(MatProductCreate(A, A, NULL, &AtA));
7469         PetscCall(MatSetOptionsPrefix(AtA, "pc_bddc_graph_"));
7470         PetscCall(MatProductSetType(AtA, MATPRODUCT_AtB));
7471         PetscCall(MatProductSetFromOptions(AtA));
7472         PetscCall(MatProductSymbolic(AtA));
7473         PetscCall(MatProductClear(AtA));
7474         /* we only need the sparsity, cheat and tell PETSc the matrix has been assembled */
7475         AtA->assembled = PETSC_TRUE;
7476         PetscCall(MatDestroy(&A));
7477         A = AtA;
7478       }
7479       PetscCall(MatGetRowIJ(A, 0, PETSC_TRUE, PETSC_FALSE, &nvtxs, (const PetscInt **)&xadj, (const PetscInt **)&adjncy, &flg_row));
7480       if (flg_row) {
7481         PetscCall(PCBDDCSetLocalAdjacencyGraph(pc, nvtxs, xadj, adjncy, PETSC_COPY_VALUES));
7482         pcbddc->computed_rowadj = PETSC_TRUE;
7483         PetscCall(MatRestoreRowIJ(A, 0, PETSC_TRUE, PETSC_FALSE, &nvtxs, (const PetscInt **)&xadj, (const PetscInt **)&adjncy, &flg_row));
7484         rcsr = PETSC_TRUE;
7485       }
7486       PetscCall(MatDestroy(&A));
7487     }
7488     if (pcbddc->dbg_flag) PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
7489 
7490     if (pcbddc->mat_graph->cdim && !pcbddc->mat_graph->cloc) {
7491       PetscReal   *lcoords;
7492       PetscInt     n;
7493       MPI_Datatype dimrealtype;
7494       PetscMPIInt  cdimi;
7495 
7496       /* TODO: support for blocked */
7497       PetscCheck(pcbddc->mat_graph->cnloc == pc->pmat->rmap->n, PETSC_COMM_SELF, PETSC_ERR_USER, "Invalid number of local coordinates! Got %" PetscInt_FMT ", expected %" PetscInt_FMT, pcbddc->mat_graph->cnloc, pc->pmat->rmap->n);
7498       PetscCall(MatGetLocalSize(matis->A, &n, NULL));
7499       PetscCall(PetscMalloc1(pcbddc->mat_graph->cdim * n, &lcoords));
7500       PetscCall(PetscMPIIntCast(pcbddc->mat_graph->cdim, &cdimi));
7501       PetscCallMPI(MPI_Type_contiguous(cdimi, MPIU_REAL, &dimrealtype));
7502       PetscCallMPI(MPI_Type_commit(&dimrealtype));
7503       PetscCall(PetscSFBcastBegin(matis->sf, dimrealtype, pcbddc->mat_graph->coords, lcoords, MPI_REPLACE));
7504       PetscCall(PetscSFBcastEnd(matis->sf, dimrealtype, pcbddc->mat_graph->coords, lcoords, MPI_REPLACE));
7505       PetscCallMPI(MPI_Type_free(&dimrealtype));
7506       PetscCall(PetscFree(pcbddc->mat_graph->coords));
7507 
7508       pcbddc->mat_graph->coords = lcoords;
7509       pcbddc->mat_graph->cloc   = PETSC_TRUE;
7510       pcbddc->mat_graph->cnloc  = n;
7511     }
7512     PetscCheck(!pcbddc->mat_graph->cnloc || pcbddc->mat_graph->cnloc == pcbddc->mat_graph->nvtxs, PETSC_COMM_SELF, PETSC_ERR_USER, "Invalid number of local subdomain coordinates! Got %" PetscInt_FMT ", expected %" PetscInt_FMT, pcbddc->mat_graph->cnloc,
7513                pcbddc->mat_graph->nvtxs);
7514     pcbddc->mat_graph->active_coords = (PetscBool)(pcbddc->corner_selection && pcbddc->mat_graph->cdim && !pcbddc->corner_selected);
7515 
7516     /* attach info on disconnected subdomains if present */
7517     if (pcbddc->n_local_subs) {
7518       PetscInt *local_subs, n, totn;
7519 
7520       PetscCall(MatGetLocalSize(matis->A, &n, NULL));
7521       PetscCall(PetscMalloc1(n, &local_subs));
7522       for (i = 0; i < n; i++) local_subs[i] = pcbddc->n_local_subs;
7523       for (i = 0; i < pcbddc->n_local_subs; i++) {
7524         const PetscInt *idxs;
7525         PetscInt        nl, j;
7526 
7527         PetscCall(ISGetLocalSize(pcbddc->local_subs[i], &nl));
7528         PetscCall(ISGetIndices(pcbddc->local_subs[i], &idxs));
7529         for (j = 0; j < nl; j++) local_subs[idxs[j]] = i;
7530         PetscCall(ISRestoreIndices(pcbddc->local_subs[i], &idxs));
7531       }
7532       for (i = 0, totn = 0; i < n; i++) totn = PetscMax(totn, local_subs[i]);
7533       pcbddc->mat_graph->n_local_subs = totn + 1;
7534       pcbddc->mat_graph->local_subs   = local_subs;
7535     }
7536 
7537     /* Setup of Graph */
7538     PetscCall(PCBDDCGraphSetUp(pcbddc->mat_graph, pcbddc->vertex_size, pcbddc->NeumannBoundariesLocal, pcbddc->DirichletBoundariesLocal, pcbddc->n_ISForDofsLocal, pcbddc->ISForDofsLocal, pcbddc->user_primal_vertices_local));
7539   }
7540 
7541   if (!pcbddc->graphanalyzed) {
7542     /* Graph's connected components analysis */
7543     PetscCall(PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph));
7544     pcbddc->graphanalyzed   = PETSC_TRUE;
7545     pcbddc->corner_selected = pcbddc->corner_selection;
7546   }
7547   if (rcsr) pcbddc->mat_graph->nvtxs_csr = 0;
7548   PetscFunctionReturn(PETSC_SUCCESS);
7549 }
7550 
7551 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt *nio, Vec vecs[])
7552 {
7553   PetscInt     i, j, n;
7554   PetscScalar *alphas;
7555   PetscReal    norm, *onorms;
7556 
7557   PetscFunctionBegin;
7558   n = *nio;
7559   if (!n) PetscFunctionReturn(PETSC_SUCCESS);
7560   PetscCall(PetscMalloc2(n, &alphas, n, &onorms));
7561   PetscCall(VecNormalize(vecs[0], &norm));
7562   if (norm < PETSC_SMALL) {
7563     onorms[0] = 0.0;
7564     PetscCall(VecSet(vecs[0], 0.0));
7565   } else {
7566     onorms[0] = norm;
7567   }
7568 
7569   for (i = 1; i < n; i++) {
7570     PetscCall(VecMDot(vecs[i], i, vecs, alphas));
7571     for (j = 0; j < i; j++) alphas[j] = PetscConj(-alphas[j]);
7572     PetscCall(VecMAXPY(vecs[i], i, alphas, vecs));
7573     PetscCall(VecNormalize(vecs[i], &norm));
7574     if (norm < PETSC_SMALL) {
7575       onorms[i] = 0.0;
7576       PetscCall(VecSet(vecs[i], 0.0));
7577     } else {
7578       onorms[i] = norm;
7579     }
7580   }
7581   /* push nonzero vectors at the beginning */
7582   for (i = 0; i < n; i++) {
7583     if (onorms[i] == 0.0) {
7584       for (j = i + 1; j < n; j++) {
7585         if (onorms[j] != 0.0) {
7586           PetscCall(VecCopy(vecs[j], vecs[i]));
7587           onorms[i] = onorms[j];
7588           onorms[j] = 0.0;
7589           break;
7590         }
7591       }
7592     }
7593   }
7594   for (i = 0, *nio = 0; i < n; i++) *nio += onorms[i] != 0.0 ? 1 : 0;
7595   PetscCall(PetscFree2(alphas, onorms));
7596   PetscFunctionReturn(PETSC_SUCCESS);
7597 }
7598 
7599 static PetscErrorCode PCBDDCMatISGetSubassemblingPattern(Mat mat, PetscInt *n_subdomains, PetscInt redprocs, IS *is_sends, PetscBool *have_void)
7600 {
7601   ISLocalToGlobalMapping mapping;
7602   Mat                    A;
7603   PetscInt               n_neighs, *neighs, *n_shared, **shared;
7604   PetscMPIInt            size, rank, color;
7605   PetscInt              *xadj, *adjncy;
7606   PetscInt              *adjncy_wgt, *v_wgt, *ranks_send_to_idx;
7607   PetscInt               im_active, active_procs, N, n, i, j, threshold = 2;
7608   PetscInt               void_procs, *procs_candidates = NULL;
7609   PetscInt               xadj_count, *count;
7610   PetscBool              ismatis, use_vwgt = PETSC_FALSE;
7611   PetscSubcomm           psubcomm;
7612   MPI_Comm               subcomm;
7613 
7614   PetscFunctionBegin;
7615   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
7616   PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATIS, &ismatis));
7617   PetscCheck(ismatis, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Cannot use %s on a matrix object which is not of type MATIS", PETSC_FUNCTION_NAME);
7618   PetscValidLogicalCollectiveInt(mat, *n_subdomains, 2);
7619   PetscValidLogicalCollectiveInt(mat, redprocs, 3);
7620   PetscCheck(*n_subdomains > 0, PetscObjectComm((PetscObject)mat), PETSC_ERR_ARG_WRONG, "Invalid number of subdomains requested %" PetscInt_FMT, *n_subdomains);
7621 
7622   if (have_void) *have_void = PETSC_FALSE;
7623   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)mat), &size));
7624   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
7625   PetscCall(MatISGetLocalMat(mat, &A));
7626   PetscCall(MatGetLocalSize(A, &n, NULL));
7627   im_active = !!n;
7628   PetscCallMPI(MPIU_Allreduce(&im_active, &active_procs, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)mat)));
7629   void_procs = size - active_procs;
7630   /* get ranks of non-active processes in mat communicator */
7631   if (void_procs) {
7632     PetscInt ncand;
7633 
7634     if (have_void) *have_void = PETSC_TRUE;
7635     PetscCall(PetscMalloc1(size, &procs_candidates));
7636     PetscCallMPI(MPI_Allgather(&im_active, 1, MPIU_INT, procs_candidates, 1, MPIU_INT, PetscObjectComm((PetscObject)mat)));
7637     for (i = 0, ncand = 0; i < size; i++) {
7638       if (!procs_candidates[i]) procs_candidates[ncand++] = i;
7639     }
7640     /* force n_subdomains to be not greater that the number of non-active processes */
7641     *n_subdomains = PetscMin(void_procs, *n_subdomains);
7642   }
7643 
7644   /* number of subdomains requested greater than active processes or matrix size -> just shift the matrix
7645      number of subdomains requested 1 -> send to rank-0 or first candidate in voids  */
7646   PetscCall(MatGetSize(mat, &N, NULL));
7647   if (active_procs < *n_subdomains || *n_subdomains == 1 || N <= *n_subdomains) {
7648     PetscInt issize, isidx, dest;
7649     if (*n_subdomains == 1) dest = 0;
7650     else dest = rank;
7651     if (im_active) {
7652       issize = 1;
7653       if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */
7654         isidx = procs_candidates[dest];
7655       } else {
7656         isidx = dest;
7657       }
7658     } else {
7659       issize = 0;
7660       isidx  = -1;
7661     }
7662     if (*n_subdomains != 1) *n_subdomains = active_procs;
7663     PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)mat), issize, &isidx, PETSC_COPY_VALUES, is_sends));
7664     PetscCall(PetscFree(procs_candidates));
7665     PetscFunctionReturn(PETSC_SUCCESS);
7666   }
7667   PetscCall(PetscOptionsGetBool(NULL, ((PetscObject)A)->prefix, "-mat_is_partitioning_use_vwgt", &use_vwgt, NULL));
7668   PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)A)->prefix, "-mat_is_partitioning_threshold", &threshold, NULL));
7669   threshold = PetscMax(threshold, 2);
7670 
7671   /* Get info on mapping */
7672   PetscCall(MatISGetLocalToGlobalMapping(mat, &mapping, NULL));
7673   PetscCall(ISLocalToGlobalMappingGetInfo(mapping, &n_neighs, &neighs, &n_shared, &shared));
7674 
7675   /* build local CSR graph of subdomains' connectivity */
7676   PetscCall(PetscMalloc1(2, &xadj));
7677   xadj[0] = 0;
7678   xadj[1] = PetscMax(n_neighs - 1, 0);
7679   PetscCall(PetscMalloc1(xadj[1], &adjncy));
7680   PetscCall(PetscMalloc1(xadj[1], &adjncy_wgt));
7681   PetscCall(PetscCalloc1(n, &count));
7682   for (i = 1; i < n_neighs; i++)
7683     for (j = 0; j < n_shared[i]; j++) count[shared[i][j]] += 1;
7684 
7685   xadj_count = 0;
7686   for (i = 1; i < n_neighs; i++) {
7687     for (j = 0; j < n_shared[i]; j++) {
7688       if (count[shared[i][j]] < threshold) {
7689         adjncy[xadj_count]     = neighs[i];
7690         adjncy_wgt[xadj_count] = n_shared[i];
7691         xadj_count++;
7692         break;
7693       }
7694     }
7695   }
7696   xadj[1] = xadj_count;
7697   PetscCall(PetscFree(count));
7698   PetscCall(ISLocalToGlobalMappingRestoreInfo(mapping, &n_neighs, &neighs, &n_shared, &shared));
7699   PetscCall(PetscSortIntWithArray(xadj[1], adjncy, adjncy_wgt));
7700 
7701   PetscCall(PetscMalloc1(1, &ranks_send_to_idx));
7702 
7703   /* Restrict work on active processes only */
7704   PetscCall(PetscMPIIntCast(im_active, &color));
7705   if (void_procs) {
7706     PetscCall(PetscSubcommCreate(PetscObjectComm((PetscObject)mat), &psubcomm));
7707     PetscCall(PetscSubcommSetNumber(psubcomm, 2)); /* 2 groups, active process and not active processes */
7708     PetscCall(PetscSubcommSetTypeGeneral(psubcomm, color, rank));
7709     subcomm = PetscSubcommChild(psubcomm);
7710   } else {
7711     psubcomm = NULL;
7712     subcomm  = PetscObjectComm((PetscObject)mat);
7713   }
7714 
7715   v_wgt = NULL;
7716   if (!color) {
7717     PetscCall(PetscFree(xadj));
7718     PetscCall(PetscFree(adjncy));
7719     PetscCall(PetscFree(adjncy_wgt));
7720   } else {
7721     Mat             subdomain_adj;
7722     IS              new_ranks, new_ranks_contig;
7723     MatPartitioning partitioner;
7724     PetscInt        rstart, rend;
7725     PetscMPIInt     irstart = 0, irend = 0;
7726     PetscInt       *is_indices, *oldranks;
7727     PetscMPIInt     size;
7728     PetscBool       aggregate;
7729 
7730     PetscCallMPI(MPI_Comm_size(subcomm, &size));
7731     if (void_procs) {
7732       PetscInt prank = rank;
7733       PetscCall(PetscMalloc1(size, &oldranks));
7734       PetscCallMPI(MPI_Allgather(&prank, 1, MPIU_INT, oldranks, 1, MPIU_INT, subcomm));
7735       for (i = 0; i < xadj[1]; i++) PetscCall(PetscFindInt(adjncy[i], size, oldranks, &adjncy[i]));
7736       PetscCall(PetscSortIntWithArray(xadj[1], adjncy, adjncy_wgt));
7737     } else {
7738       oldranks = NULL;
7739     }
7740     aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE);
7741     if (aggregate) { /* TODO: all this part could be made more efficient */
7742       PetscInt     lrows, row, ncols, *cols;
7743       PetscMPIInt  nrank;
7744       PetscScalar *vals;
7745 
7746       PetscCallMPI(MPI_Comm_rank(subcomm, &nrank));
7747       lrows = 0;
7748       if (nrank < redprocs) {
7749         lrows = size / redprocs;
7750         if (nrank < size % redprocs) lrows++;
7751       }
7752       PetscCall(MatCreateAIJ(subcomm, lrows, lrows, size, size, 50, NULL, 50, NULL, &subdomain_adj));
7753       PetscCall(MatGetOwnershipRange(subdomain_adj, &rstart, &rend));
7754       PetscCall(PetscMPIIntCast(rstart, &irstart));
7755       PetscCall(PetscMPIIntCast(rend, &irend));
7756       PetscCall(MatSetOption(subdomain_adj, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE));
7757       PetscCall(MatSetOption(subdomain_adj, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE));
7758       row   = nrank;
7759       ncols = xadj[1] - xadj[0];
7760       cols  = adjncy;
7761       PetscCall(PetscMalloc1(ncols, &vals));
7762       for (i = 0; i < ncols; i++) vals[i] = adjncy_wgt[i];
7763       PetscCall(MatSetValues(subdomain_adj, 1, &row, ncols, cols, vals, INSERT_VALUES));
7764       PetscCall(MatAssemblyBegin(subdomain_adj, MAT_FINAL_ASSEMBLY));
7765       PetscCall(MatAssemblyEnd(subdomain_adj, MAT_FINAL_ASSEMBLY));
7766       PetscCall(PetscFree(xadj));
7767       PetscCall(PetscFree(adjncy));
7768       PetscCall(PetscFree(adjncy_wgt));
7769       PetscCall(PetscFree(vals));
7770       if (use_vwgt) {
7771         Vec                v;
7772         const PetscScalar *array;
7773         PetscInt           nl;
7774 
7775         PetscCall(MatCreateVecs(subdomain_adj, &v, NULL));
7776         PetscCall(VecSetValue(v, row, (PetscScalar)n, INSERT_VALUES));
7777         PetscCall(VecAssemblyBegin(v));
7778         PetscCall(VecAssemblyEnd(v));
7779         PetscCall(VecGetLocalSize(v, &nl));
7780         PetscCall(VecGetArrayRead(v, &array));
7781         PetscCall(PetscMalloc1(nl, &v_wgt));
7782         for (i = 0; i < nl; i++) v_wgt[i] = (PetscInt)PetscRealPart(array[i]);
7783         PetscCall(VecRestoreArrayRead(v, &array));
7784         PetscCall(VecDestroy(&v));
7785       }
7786     } else {
7787       PetscCall(MatCreateMPIAdj(subcomm, 1, size, xadj, adjncy, adjncy_wgt, &subdomain_adj));
7788       if (use_vwgt) {
7789         PetscCall(PetscMalloc1(1, &v_wgt));
7790         v_wgt[0] = n;
7791       }
7792     }
7793     /* PetscCall(MatView(subdomain_adj,0)); */
7794 
7795     /* Partition */
7796     PetscCall(MatPartitioningCreate(subcomm, &partitioner));
7797 #if defined(PETSC_HAVE_PTSCOTCH)
7798     PetscCall(MatPartitioningSetType(partitioner, MATPARTITIONINGPTSCOTCH));
7799 #elif defined(PETSC_HAVE_PARMETIS)
7800     PetscCall(MatPartitioningSetType(partitioner, MATPARTITIONINGPARMETIS));
7801 #else
7802     PetscCall(MatPartitioningSetType(partitioner, MATPARTITIONINGAVERAGE));
7803 #endif
7804     PetscCall(MatPartitioningSetAdjacency(partitioner, subdomain_adj));
7805     if (v_wgt) PetscCall(MatPartitioningSetVertexWeights(partitioner, v_wgt));
7806     *n_subdomains = PetscMin(size, *n_subdomains);
7807     PetscCall(MatPartitioningSetNParts(partitioner, *n_subdomains));
7808     PetscCall(MatPartitioningSetFromOptions(partitioner));
7809     PetscCall(MatPartitioningApply(partitioner, &new_ranks));
7810     /* PetscCall(MatPartitioningView(partitioner,0)); */
7811 
7812     /* renumber new_ranks to avoid "holes" in new set of processors */
7813     PetscCall(ISRenumber(new_ranks, NULL, NULL, &new_ranks_contig));
7814     PetscCall(ISDestroy(&new_ranks));
7815     PetscCall(ISGetIndices(new_ranks_contig, (const PetscInt **)&is_indices));
7816     if (!aggregate) {
7817       if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */
7818         PetscAssert(oldranks, PETSC_COMM_SELF, PETSC_ERR_PLIB, "This should not happen");
7819         ranks_send_to_idx[0] = procs_candidates[oldranks[is_indices[0]]];
7820       } else if (oldranks) {
7821         ranks_send_to_idx[0] = oldranks[is_indices[0]];
7822       } else {
7823         ranks_send_to_idx[0] = is_indices[0];
7824       }
7825     } else {
7826       PetscInt     idx = 0;
7827       PetscMPIInt  tag;
7828       MPI_Request *reqs;
7829 
7830       PetscCall(PetscObjectGetNewTag((PetscObject)subdomain_adj, &tag));
7831       PetscCall(PetscMalloc1(rend - rstart, &reqs));
7832       for (PetscMPIInt i = irstart; i < irend; i++) PetscCallMPI(MPIU_Isend(is_indices + i - rstart, 1, MPIU_INT, i, tag, subcomm, &reqs[i - rstart]));
7833       PetscCallMPI(MPIU_Recv(&idx, 1, MPIU_INT, MPI_ANY_SOURCE, tag, subcomm, MPI_STATUS_IGNORE));
7834       PetscCallMPI(MPI_Waitall(irend - irstart, reqs, MPI_STATUSES_IGNORE));
7835       PetscCall(PetscFree(reqs));
7836       if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */
7837         PetscAssert(oldranks, PETSC_COMM_SELF, PETSC_ERR_PLIB, "This should not happen");
7838         ranks_send_to_idx[0] = procs_candidates[oldranks[idx]];
7839       } else if (oldranks) {
7840         ranks_send_to_idx[0] = oldranks[idx];
7841       } else {
7842         ranks_send_to_idx[0] = idx;
7843       }
7844     }
7845     PetscCall(ISRestoreIndices(new_ranks_contig, (const PetscInt **)&is_indices));
7846     /* clean up */
7847     PetscCall(PetscFree(oldranks));
7848     PetscCall(ISDestroy(&new_ranks_contig));
7849     PetscCall(MatDestroy(&subdomain_adj));
7850     PetscCall(MatPartitioningDestroy(&partitioner));
7851   }
7852   PetscCall(PetscSubcommDestroy(&psubcomm));
7853   PetscCall(PetscFree(procs_candidates));
7854 
7855   /* assemble parallel IS for sends */
7856   i = 1;
7857   if (!color) i = 0;
7858   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)mat), i, ranks_send_to_idx, PETSC_OWN_POINTER, is_sends));
7859   PetscFunctionReturn(PETSC_SUCCESS);
7860 }
7861 
7862 typedef enum {
7863   MATDENSE_PRIVATE = 0,
7864   MATAIJ_PRIVATE,
7865   MATBAIJ_PRIVATE,
7866   MATSBAIJ_PRIVATE
7867 } MatTypePrivate;
7868 
7869 static PetscErrorCode PCBDDCMatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, PetscBool reuse, Mat *mat_n, PetscInt nis, IS isarray[], PetscInt nvecs, Vec nnsp_vec[])
7870 {
7871   Mat                    local_mat;
7872   IS                     is_sends_internal;
7873   PetscInt               rows, cols, new_local_rows;
7874   PetscInt               i, bs, buf_size_idxs, buf_size_idxs_is, buf_size_vals, buf_size_vecs;
7875   PetscBool              ismatis, isdense, newisdense, destroy_mat;
7876   ISLocalToGlobalMapping l2gmap;
7877   PetscInt              *l2gmap_indices;
7878   const PetscInt        *is_indices;
7879   MatType                new_local_type;
7880   /* buffers */
7881   PetscInt          *ptr_idxs, *send_buffer_idxs, *recv_buffer_idxs;
7882   PetscInt          *ptr_idxs_is, *send_buffer_idxs_is, *recv_buffer_idxs_is;
7883   PetscInt          *recv_buffer_idxs_local;
7884   PetscScalar       *ptr_vals, *recv_buffer_vals;
7885   const PetscScalar *send_buffer_vals;
7886   PetscScalar       *ptr_vecs, *send_buffer_vecs, *recv_buffer_vecs;
7887   /* MPI */
7888   MPI_Comm     comm, comm_n;
7889   PetscSubcomm subcomm;
7890   PetscMPIInt  n_sends, n_recvs, size;
7891   PetscMPIInt *iflags, *ilengths_idxs, *ilengths_vals, *ilengths_idxs_is;
7892   PetscMPIInt *onodes, *onodes_is, *olengths_idxs, *olengths_idxs_is, *olengths_vals;
7893   PetscMPIInt  len, tag_idxs, tag_idxs_is, tag_vals, tag_vecs, source_dest;
7894   MPI_Request *send_req_idxs, *send_req_idxs_is, *send_req_vals, *send_req_vecs;
7895   MPI_Request *recv_req_idxs, *recv_req_idxs_is, *recv_req_vals, *recv_req_vecs;
7896 
7897   PetscFunctionBegin;
7898   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
7899   PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATIS, &ismatis));
7900   PetscCheck(ismatis, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Cannot use %s on a matrix object which is not of type MATIS", PETSC_FUNCTION_NAME);
7901   PetscValidLogicalCollectiveInt(mat, n_subdomains, 3);
7902   PetscValidLogicalCollectiveBool(mat, restrict_comm, 4);
7903   PetscValidLogicalCollectiveBool(mat, restrict_full, 5);
7904   PetscValidLogicalCollectiveBool(mat, reuse, 6);
7905   PetscValidLogicalCollectiveInt(mat, nis, 8);
7906   PetscValidLogicalCollectiveInt(mat, nvecs, 10);
7907   if (nvecs) {
7908     PetscCheck(nvecs <= 1, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Just 1 vector supported");
7909     PetscValidHeaderSpecific(nnsp_vec[0], VEC_CLASSID, 11);
7910   }
7911   /* further checks */
7912   PetscCall(MatISGetLocalMat(mat, &local_mat));
7913   PetscCall(PetscObjectTypeCompare((PetscObject)local_mat, MATSEQDENSE, &isdense));
7914   /* XXX hack for multi_element */
7915   if (!isdense) PetscCall(MatConvert(local_mat, MATDENSE, MAT_INPLACE_MATRIX, &local_mat));
7916   PetscCall(PetscObjectTypeCompare((PetscObject)local_mat, MATSEQDENSE, &isdense));
7917   PetscCheck(isdense, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE");
7918 
7919   PetscCall(MatGetSize(local_mat, &rows, &cols));
7920   PetscCheck(rows == cols, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Local MATIS matrices should be square");
7921   if (reuse && *mat_n) {
7922     PetscInt mrows, mcols, mnrows, mncols;
7923     PetscValidHeaderSpecific(*mat_n, MAT_CLASSID, 7);
7924     PetscCall(PetscObjectTypeCompare((PetscObject)*mat_n, MATIS, &ismatis));
7925     PetscCheck(ismatis, PetscObjectComm((PetscObject)*mat_n), PETSC_ERR_SUP, "Cannot reuse a matrix which is not of type MATIS");
7926     PetscCall(MatGetSize(mat, &mrows, &mcols));
7927     PetscCall(MatGetSize(*mat_n, &mnrows, &mncols));
7928     PetscCheck(mrows == mnrows, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Cannot reuse matrix! Wrong number of rows %" PetscInt_FMT " != %" PetscInt_FMT, mrows, mnrows);
7929     PetscCheck(mcols == mncols, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Cannot reuse matrix! Wrong number of cols %" PetscInt_FMT " != %" PetscInt_FMT, mcols, mncols);
7930   }
7931   PetscCall(MatGetBlockSize(local_mat, &bs));
7932   PetscValidLogicalCollectiveInt(mat, bs, 1);
7933 
7934   /* prepare IS for sending if not provided */
7935   if (!is_sends) {
7936     PetscCheck(n_subdomains, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "You should specify either an IS or a target number of subdomains");
7937     PetscCall(PCBDDCMatISGetSubassemblingPattern(mat, &n_subdomains, 0, &is_sends_internal, NULL));
7938   } else {
7939     PetscCall(PetscObjectReference((PetscObject)is_sends));
7940     is_sends_internal = is_sends;
7941   }
7942 
7943   /* get comm */
7944   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
7945 
7946   /* compute number of sends */
7947   PetscCall(ISGetLocalSize(is_sends_internal, &i));
7948   PetscCall(PetscMPIIntCast(i, &n_sends));
7949 
7950   /* compute number of receives */
7951   PetscCallMPI(MPI_Comm_size(comm, &size));
7952   PetscCall(PetscMalloc1(size, &iflags));
7953   PetscCall(PetscArrayzero(iflags, size));
7954   PetscCall(ISGetIndices(is_sends_internal, &is_indices));
7955   for (i = 0; i < n_sends; i++) iflags[is_indices[i]] = 1;
7956   PetscCall(PetscGatherNumberOfMessages(comm, iflags, NULL, &n_recvs));
7957   PetscCall(PetscFree(iflags));
7958 
7959   /* restrict comm if requested */
7960   subcomm     = NULL;
7961   destroy_mat = PETSC_FALSE;
7962   if (restrict_comm) {
7963     PetscMPIInt color, subcommsize;
7964 
7965     color = 0;
7966     if (restrict_full) {
7967       if (!n_recvs) color = 1; /* processes not receiving anything will not participate in new comm (full restriction) */
7968     } else {
7969       if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not participate in new comm */
7970     }
7971     PetscCallMPI(MPIU_Allreduce(&color, &subcommsize, 1, MPI_INT, MPI_SUM, comm));
7972     subcommsize = size - subcommsize;
7973     /* check if reuse has been requested */
7974     if (reuse) {
7975       if (*mat_n) {
7976         PetscMPIInt subcommsize2;
7977         PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n), &subcommsize2));
7978         PetscCheck(subcommsize == subcommsize2, PetscObjectComm((PetscObject)*mat_n), PETSC_ERR_PLIB, "Cannot reuse matrix! wrong subcomm size %d != %d", subcommsize, subcommsize2);
7979         comm_n = PetscObjectComm((PetscObject)*mat_n);
7980       } else {
7981         comm_n = PETSC_COMM_SELF;
7982       }
7983     } else { /* MAT_INITIAL_MATRIX */
7984       PetscMPIInt rank;
7985 
7986       PetscCallMPI(MPI_Comm_rank(comm, &rank));
7987       PetscCall(PetscSubcommCreate(comm, &subcomm));
7988       PetscCall(PetscSubcommSetNumber(subcomm, 2));
7989       PetscCall(PetscSubcommSetTypeGeneral(subcomm, color, rank));
7990       comm_n = PetscSubcommChild(subcomm);
7991     }
7992     /* flag to destroy *mat_n if not significative */
7993     if (color) destroy_mat = PETSC_TRUE;
7994   } else {
7995     comm_n = comm;
7996   }
7997 
7998   /* prepare send/receive buffers */
7999   PetscCall(PetscMalloc1(size, &ilengths_idxs));
8000   PetscCall(PetscArrayzero(ilengths_idxs, size));
8001   PetscCall(PetscMalloc1(size, &ilengths_vals));
8002   PetscCall(PetscArrayzero(ilengths_vals, size));
8003   if (nis) PetscCall(PetscCalloc1(size, &ilengths_idxs_is));
8004 
8005   /* Get data from local matrices */
8006   PetscCheck(isdense, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Subassembling of AIJ local matrices not yet implemented");
8007   /* TODO: See below some guidelines on how to prepare the local buffers */
8008   /*
8009        send_buffer_vals should contain the raw values of the local matrix
8010        send_buffer_idxs should contain:
8011        - MatType_PRIVATE type
8012        - PetscInt        size_of_l2gmap
8013        - PetscInt        global_row_indices[size_of_l2gmap]
8014        - PetscInt        all_other_info_which_is_needed_to_compute_preallocation_and_set_values
8015     */
8016   {
8017     ISLocalToGlobalMapping mapping;
8018 
8019     PetscCall(MatISGetLocalToGlobalMapping(mat, &mapping, NULL));
8020     PetscCall(MatDenseGetArrayRead(local_mat, &send_buffer_vals));
8021     PetscCall(ISLocalToGlobalMappingGetSize(mapping, &i));
8022     PetscCall(PetscMalloc1(i + 2, &send_buffer_idxs));
8023     send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE;
8024     send_buffer_idxs[1] = i;
8025     PetscCall(ISLocalToGlobalMappingGetIndices(mapping, (const PetscInt **)&ptr_idxs));
8026     PetscCall(PetscArraycpy(&send_buffer_idxs[2], ptr_idxs, i));
8027     PetscCall(ISLocalToGlobalMappingRestoreIndices(mapping, (const PetscInt **)&ptr_idxs));
8028     PetscCall(PetscMPIIntCast(i, &len));
8029     for (i = 0; i < n_sends; i++) {
8030       ilengths_vals[is_indices[i]] = len * len;
8031       ilengths_idxs[is_indices[i]] = len + 2;
8032     }
8033   }
8034   PetscCall(PetscGatherMessageLengths2(comm, n_sends, n_recvs, ilengths_idxs, ilengths_vals, &onodes, &olengths_idxs, &olengths_vals));
8035   /* additional is (if any) */
8036   if (nis) {
8037     PetscMPIInt psum;
8038     PetscInt    j;
8039     for (j = 0, psum = 0; j < nis; j++) {
8040       PetscInt plen;
8041       PetscCall(ISGetLocalSize(isarray[j], &plen));
8042       PetscCall(PetscMPIIntCast(plen, &len));
8043       psum += len + 1; /* indices + length */
8044     }
8045     PetscCall(PetscMalloc1(psum, &send_buffer_idxs_is));
8046     for (j = 0, psum = 0; j < nis; j++) {
8047       PetscInt        plen;
8048       const PetscInt *is_array_idxs;
8049       PetscCall(ISGetLocalSize(isarray[j], &plen));
8050       send_buffer_idxs_is[psum] = plen;
8051       PetscCall(ISGetIndices(isarray[j], &is_array_idxs));
8052       PetscCall(PetscArraycpy(&send_buffer_idxs_is[psum + 1], is_array_idxs, plen));
8053       PetscCall(ISRestoreIndices(isarray[j], &is_array_idxs));
8054       psum += plen + 1; /* indices + length */
8055     }
8056     for (i = 0; i < n_sends; i++) ilengths_idxs_is[is_indices[i]] = psum;
8057     PetscCall(PetscGatherMessageLengths(comm, n_sends, n_recvs, ilengths_idxs_is, &onodes_is, &olengths_idxs_is));
8058   }
8059   PetscCall(MatISRestoreLocalMat(mat, &local_mat));
8060 
8061   buf_size_idxs    = 0;
8062   buf_size_vals    = 0;
8063   buf_size_idxs_is = 0;
8064   buf_size_vecs    = 0;
8065   for (i = 0; i < n_recvs; i++) {
8066     buf_size_idxs += olengths_idxs[i];
8067     buf_size_vals += olengths_vals[i];
8068     if (nis) buf_size_idxs_is += olengths_idxs_is[i];
8069     if (nvecs) buf_size_vecs += olengths_idxs[i];
8070   }
8071   PetscCall(PetscMalloc1(buf_size_idxs, &recv_buffer_idxs));
8072   PetscCall(PetscMalloc1(buf_size_vals, &recv_buffer_vals));
8073   PetscCall(PetscMalloc1(buf_size_idxs_is, &recv_buffer_idxs_is));
8074   PetscCall(PetscMalloc1(buf_size_vecs, &recv_buffer_vecs));
8075 
8076   /* get new tags for clean communications */
8077   PetscCall(PetscObjectGetNewTag((PetscObject)mat, &tag_idxs));
8078   PetscCall(PetscObjectGetNewTag((PetscObject)mat, &tag_vals));
8079   PetscCall(PetscObjectGetNewTag((PetscObject)mat, &tag_idxs_is));
8080   PetscCall(PetscObjectGetNewTag((PetscObject)mat, &tag_vecs));
8081 
8082   /* allocate for requests */
8083   PetscCall(PetscMalloc1(n_sends, &send_req_idxs));
8084   PetscCall(PetscMalloc1(n_sends, &send_req_vals));
8085   PetscCall(PetscMalloc1(n_sends, &send_req_idxs_is));
8086   PetscCall(PetscMalloc1(n_sends, &send_req_vecs));
8087   PetscCall(PetscMalloc1(n_recvs, &recv_req_idxs));
8088   PetscCall(PetscMalloc1(n_recvs, &recv_req_vals));
8089   PetscCall(PetscMalloc1(n_recvs, &recv_req_idxs_is));
8090   PetscCall(PetscMalloc1(n_recvs, &recv_req_vecs));
8091 
8092   /* communications */
8093   ptr_idxs    = recv_buffer_idxs;
8094   ptr_vals    = recv_buffer_vals;
8095   ptr_idxs_is = recv_buffer_idxs_is;
8096   ptr_vecs    = recv_buffer_vecs;
8097   for (i = 0; i < n_recvs; i++) {
8098     PetscCallMPI(MPIU_Irecv(ptr_idxs, olengths_idxs[i], MPIU_INT, onodes[i], tag_idxs, comm, &recv_req_idxs[i]));
8099     PetscCallMPI(MPIU_Irecv(ptr_vals, olengths_vals[i], MPIU_SCALAR, onodes[i], tag_vals, comm, &recv_req_vals[i]));
8100     ptr_idxs += olengths_idxs[i];
8101     ptr_vals += olengths_vals[i];
8102     if (nis) {
8103       PetscCallMPI(MPIU_Irecv(ptr_idxs_is, olengths_idxs_is[i], MPIU_INT, onodes_is[i], tag_idxs_is, comm, &recv_req_idxs_is[i]));
8104       ptr_idxs_is += olengths_idxs_is[i];
8105     }
8106     if (nvecs) {
8107       PetscCallMPI(MPIU_Irecv(ptr_vecs, olengths_idxs[i] - 2, MPIU_SCALAR, onodes[i], tag_vecs, comm, &recv_req_vecs[i]));
8108       ptr_vecs += olengths_idxs[i] - 2;
8109     }
8110   }
8111   for (i = 0; i < n_sends; i++) {
8112     PetscCall(PetscMPIIntCast(is_indices[i], &source_dest));
8113     PetscCallMPI(MPIU_Isend(send_buffer_idxs, ilengths_idxs[source_dest], MPIU_INT, source_dest, tag_idxs, comm, &send_req_idxs[i]));
8114     PetscCallMPI(MPIU_Isend(send_buffer_vals, ilengths_vals[source_dest], MPIU_SCALAR, source_dest, tag_vals, comm, &send_req_vals[i]));
8115     if (nis) PetscCallMPI(MPIU_Isend(send_buffer_idxs_is, ilengths_idxs_is[source_dest], MPIU_INT, source_dest, tag_idxs_is, comm, &send_req_idxs_is[i]));
8116     if (nvecs) {
8117       PetscCall(VecGetArray(nnsp_vec[0], &send_buffer_vecs));
8118       PetscCallMPI(MPIU_Isend(send_buffer_vecs, ilengths_idxs[source_dest] - 2, MPIU_SCALAR, source_dest, tag_vecs, comm, &send_req_vecs[i]));
8119     }
8120   }
8121   PetscCall(ISRestoreIndices(is_sends_internal, &is_indices));
8122   PetscCall(ISDestroy(&is_sends_internal));
8123 
8124   /* assemble new l2g map */
8125   PetscCallMPI(MPI_Waitall(n_recvs, recv_req_idxs, MPI_STATUSES_IGNORE));
8126   ptr_idxs       = recv_buffer_idxs;
8127   new_local_rows = 0;
8128   for (i = 0; i < n_recvs; i++) {
8129     new_local_rows += *(ptr_idxs + 1); /* second element is the local size of the l2gmap */
8130     ptr_idxs += olengths_idxs[i];
8131   }
8132   PetscCall(PetscMalloc1(new_local_rows, &l2gmap_indices));
8133   ptr_idxs       = recv_buffer_idxs;
8134   new_local_rows = 0;
8135   for (i = 0; i < n_recvs; i++) {
8136     PetscCall(PetscArraycpy(&l2gmap_indices[new_local_rows], ptr_idxs + 2, *(ptr_idxs + 1)));
8137     new_local_rows += *(ptr_idxs + 1); /* second element is the local size of the l2gmap */
8138     ptr_idxs += olengths_idxs[i];
8139   }
8140   PetscCall(PetscSortRemoveDupsInt(&new_local_rows, l2gmap_indices));
8141   PetscCall(ISLocalToGlobalMappingCreate(comm_n, 1, new_local_rows, l2gmap_indices, PETSC_COPY_VALUES, &l2gmap));
8142   PetscCall(PetscFree(l2gmap_indices));
8143 
8144   /* infer new local matrix type from received local matrices type */
8145   /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */
8146   /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */
8147   if (n_recvs) {
8148     MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0];
8149     ptr_idxs                              = recv_buffer_idxs;
8150     for (i = 0; i < n_recvs; i++) {
8151       if ((PetscInt)new_local_type_private != *ptr_idxs) {
8152         new_local_type_private = MATAIJ_PRIVATE;
8153         break;
8154       }
8155       ptr_idxs += olengths_idxs[i];
8156     }
8157     switch (new_local_type_private) {
8158     case MATDENSE_PRIVATE:
8159       new_local_type = MATSEQAIJ;
8160       bs             = 1;
8161       break;
8162     case MATAIJ_PRIVATE:
8163       new_local_type = MATSEQAIJ;
8164       bs             = 1;
8165       break;
8166     case MATBAIJ_PRIVATE:
8167       new_local_type = MATSEQBAIJ;
8168       break;
8169     case MATSBAIJ_PRIVATE:
8170       new_local_type = MATSEQSBAIJ;
8171       break;
8172     default:
8173       SETERRQ(comm, PETSC_ERR_SUP, "Unsupported private type %d in %s", new_local_type_private, PETSC_FUNCTION_NAME);
8174     }
8175   } else { /* by default, new_local_type is seqaij */
8176     new_local_type = MATSEQAIJ;
8177     bs             = 1;
8178   }
8179 
8180   /* create MATIS object if needed */
8181   if (!reuse) {
8182     PetscCall(MatGetSize(mat, &rows, &cols));
8183     PetscCall(MatCreateIS(comm_n, bs, PETSC_DECIDE, PETSC_DECIDE, rows, cols, l2gmap, l2gmap, mat_n));
8184   } else {
8185     /* it also destroys the local matrices */
8186     if (*mat_n) {
8187       PetscCall(MatSetLocalToGlobalMapping(*mat_n, l2gmap, l2gmap));
8188     } else { /* this is a fake object */
8189       PetscCall(MatCreateIS(comm_n, bs, PETSC_DECIDE, PETSC_DECIDE, rows, cols, l2gmap, l2gmap, mat_n));
8190     }
8191   }
8192   PetscCall(MatISGetLocalMat(*mat_n, &local_mat));
8193   PetscCall(MatSetType(local_mat, new_local_type));
8194 
8195   PetscCallMPI(MPI_Waitall(n_recvs, recv_req_vals, MPI_STATUSES_IGNORE));
8196 
8197   /* Global to local map of received indices */
8198   PetscCall(PetscMalloc1(buf_size_idxs, &recv_buffer_idxs_local)); /* needed for values insertion */
8199   PetscCall(ISGlobalToLocalMappingApply(l2gmap, IS_GTOLM_MASK, buf_size_idxs, recv_buffer_idxs, &i, recv_buffer_idxs_local));
8200   PetscCall(ISLocalToGlobalMappingDestroy(&l2gmap));
8201 
8202   /* restore attributes -> type of incoming data and its size */
8203   buf_size_idxs = 0;
8204   for (i = 0; i < n_recvs; i++) {
8205     recv_buffer_idxs_local[buf_size_idxs]     = recv_buffer_idxs[buf_size_idxs];
8206     recv_buffer_idxs_local[buf_size_idxs + 1] = recv_buffer_idxs[buf_size_idxs + 1];
8207     buf_size_idxs += olengths_idxs[i];
8208   }
8209   PetscCall(PetscFree(recv_buffer_idxs));
8210 
8211   /* set preallocation */
8212   PetscCall(PetscObjectTypeCompare((PetscObject)local_mat, MATSEQDENSE, &newisdense));
8213   if (!newisdense) {
8214     PetscInt *new_local_nnz = NULL;
8215 
8216     ptr_idxs = recv_buffer_idxs_local;
8217     if (n_recvs) PetscCall(PetscCalloc1(new_local_rows, &new_local_nnz));
8218     for (i = 0; i < n_recvs; i++) {
8219       PetscInt j;
8220       if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */
8221         for (j = 0; j < *(ptr_idxs + 1); j++) new_local_nnz[*(ptr_idxs + 2 + j)] += *(ptr_idxs + 1);
8222       } else {
8223         /* TODO */
8224       }
8225       ptr_idxs += olengths_idxs[i];
8226     }
8227     if (new_local_nnz) {
8228       for (i = 0; i < new_local_rows; i++) new_local_nnz[i] = PetscMin(new_local_nnz[i], new_local_rows);
8229       PetscCall(MatSeqAIJSetPreallocation(local_mat, 0, new_local_nnz));
8230       for (i = 0; i < new_local_rows; i++) new_local_nnz[i] /= bs;
8231       PetscCall(MatSeqBAIJSetPreallocation(local_mat, bs, 0, new_local_nnz));
8232       for (i = 0; i < new_local_rows; i++) new_local_nnz[i] = PetscMax(new_local_nnz[i] - i, 0);
8233       PetscCall(MatSeqSBAIJSetPreallocation(local_mat, bs, 0, new_local_nnz));
8234     } else {
8235       PetscCall(MatSetUp(local_mat));
8236     }
8237     PetscCall(PetscFree(new_local_nnz));
8238   } else {
8239     PetscCall(MatSetUp(local_mat));
8240   }
8241 
8242   /* set values */
8243   ptr_vals = recv_buffer_vals;
8244   ptr_idxs = recv_buffer_idxs_local;
8245   for (i = 0; i < n_recvs; i++) {
8246     if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */
8247       PetscCall(MatSetOption(local_mat, MAT_ROW_ORIENTED, PETSC_FALSE));
8248       PetscCall(MatSetValues(local_mat, *(ptr_idxs + 1), ptr_idxs + 2, *(ptr_idxs + 1), ptr_idxs + 2, ptr_vals, ADD_VALUES));
8249       PetscCall(MatAssemblyBegin(local_mat, MAT_FLUSH_ASSEMBLY));
8250       PetscCall(MatAssemblyEnd(local_mat, MAT_FLUSH_ASSEMBLY));
8251       PetscCall(MatSetOption(local_mat, MAT_ROW_ORIENTED, PETSC_TRUE));
8252     } else {
8253       /* TODO */
8254     }
8255     ptr_idxs += olengths_idxs[i];
8256     ptr_vals += olengths_vals[i];
8257   }
8258   PetscCall(MatAssemblyBegin(local_mat, MAT_FINAL_ASSEMBLY));
8259   PetscCall(MatAssemblyEnd(local_mat, MAT_FINAL_ASSEMBLY));
8260   PetscCall(MatISRestoreLocalMat(*mat_n, &local_mat));
8261   PetscCall(MatAssemblyBegin(*mat_n, MAT_FINAL_ASSEMBLY));
8262   PetscCall(MatAssemblyEnd(*mat_n, MAT_FINAL_ASSEMBLY));
8263   PetscCall(PetscFree(recv_buffer_vals));
8264 
8265 #if 0
8266   if (!restrict_comm) { /* check */
8267     Vec       lvec,rvec;
8268     PetscReal infty_error;
8269 
8270     PetscCall(MatCreateVecs(mat,&rvec,&lvec));
8271     PetscCall(VecSetRandom(rvec,NULL));
8272     PetscCall(MatMult(mat,rvec,lvec));
8273     PetscCall(VecScale(lvec,-1.0));
8274     PetscCall(MatMultAdd(*mat_n,rvec,lvec,lvec));
8275     PetscCall(VecNorm(lvec,NORM_INFINITY,&infty_error));
8276     PetscCall(PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error));
8277     PetscCall(VecDestroy(&rvec));
8278     PetscCall(VecDestroy(&lvec));
8279   }
8280 #endif
8281 
8282   /* assemble new additional is (if any) */
8283   if (nis) {
8284     PetscInt **temp_idxs, *count_is, j, psum;
8285 
8286     PetscCallMPI(MPI_Waitall(n_recvs, recv_req_idxs_is, MPI_STATUSES_IGNORE));
8287     PetscCall(PetscCalloc1(nis, &count_is));
8288     ptr_idxs = recv_buffer_idxs_is;
8289     psum     = 0;
8290     for (i = 0; i < n_recvs; i++) {
8291       for (j = 0; j < nis; j++) {
8292         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
8293         count_is[j] += plen;         /* increment counting of buffer for j-th IS */
8294         psum += plen;
8295         ptr_idxs += plen + 1; /* shift pointer to received data */
8296       }
8297     }
8298     PetscCall(PetscMalloc1(nis, &temp_idxs));
8299     PetscCall(PetscMalloc1(psum, &temp_idxs[0]));
8300     for (i = 1; i < nis; i++) temp_idxs[i] = PetscSafePointerPlusOffset(temp_idxs[i - 1], count_is[i - 1]);
8301     PetscCall(PetscArrayzero(count_is, nis));
8302     ptr_idxs = recv_buffer_idxs_is;
8303     for (i = 0; i < n_recvs; i++) {
8304       for (j = 0; j < nis; j++) {
8305         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
8306         PetscCall(PetscArraycpy(&temp_idxs[j][count_is[j]], ptr_idxs + 1, plen));
8307         count_is[j] += plen;  /* increment starting point of buffer for j-th IS */
8308         ptr_idxs += plen + 1; /* shift pointer to received data */
8309       }
8310     }
8311     for (i = 0; i < nis; i++) {
8312       PetscCall(ISDestroy(&isarray[i]));
8313       PetscCall(PetscSortRemoveDupsInt(&count_is[i], temp_idxs[i]));
8314       PetscCall(ISCreateGeneral(comm_n, count_is[i], temp_idxs[i], PETSC_COPY_VALUES, &isarray[i]));
8315     }
8316     PetscCall(PetscFree(count_is));
8317     PetscCall(PetscFree(temp_idxs[0]));
8318     PetscCall(PetscFree(temp_idxs));
8319   }
8320   /* free workspace */
8321   PetscCall(PetscFree(recv_buffer_idxs_is));
8322   PetscCallMPI(MPI_Waitall(n_sends, send_req_idxs, MPI_STATUSES_IGNORE));
8323   PetscCall(PetscFree(send_buffer_idxs));
8324   PetscCallMPI(MPI_Waitall(n_sends, send_req_vals, MPI_STATUSES_IGNORE));
8325   if (isdense) {
8326     PetscCall(MatISGetLocalMat(mat, &local_mat));
8327     PetscCall(MatDenseRestoreArrayRead(local_mat, &send_buffer_vals));
8328     PetscCall(MatISRestoreLocalMat(mat, &local_mat));
8329   } else {
8330     /* PetscCall(PetscFree(send_buffer_vals)); */
8331   }
8332   if (nis) {
8333     PetscCallMPI(MPI_Waitall(n_sends, send_req_idxs_is, MPI_STATUSES_IGNORE));
8334     PetscCall(PetscFree(send_buffer_idxs_is));
8335   }
8336 
8337   if (nvecs) {
8338     PetscCallMPI(MPI_Waitall(n_recvs, recv_req_vecs, MPI_STATUSES_IGNORE));
8339     PetscCallMPI(MPI_Waitall(n_sends, send_req_vecs, MPI_STATUSES_IGNORE));
8340     PetscCall(VecRestoreArray(nnsp_vec[0], &send_buffer_vecs));
8341     PetscCall(VecDestroy(&nnsp_vec[0]));
8342     PetscCall(VecCreate(comm_n, &nnsp_vec[0]));
8343     PetscCall(VecSetSizes(nnsp_vec[0], new_local_rows, PETSC_DECIDE));
8344     PetscCall(VecSetType(nnsp_vec[0], VECSTANDARD));
8345     /* set values */
8346     ptr_vals = recv_buffer_vecs;
8347     ptr_idxs = recv_buffer_idxs_local;
8348     PetscCall(VecGetArray(nnsp_vec[0], &send_buffer_vecs));
8349     for (i = 0; i < n_recvs; i++) {
8350       PetscInt j;
8351       for (j = 0; j < *(ptr_idxs + 1); j++) send_buffer_vecs[*(ptr_idxs + 2 + j)] += *(ptr_vals + j);
8352       ptr_idxs += olengths_idxs[i];
8353       ptr_vals += olengths_idxs[i] - 2;
8354     }
8355     PetscCall(VecRestoreArray(nnsp_vec[0], &send_buffer_vecs));
8356     PetscCall(VecAssemblyBegin(nnsp_vec[0]));
8357     PetscCall(VecAssemblyEnd(nnsp_vec[0]));
8358   }
8359 
8360   PetscCall(PetscFree(recv_buffer_vecs));
8361   PetscCall(PetscFree(recv_buffer_idxs_local));
8362   PetscCall(PetscFree(recv_req_idxs));
8363   PetscCall(PetscFree(recv_req_vals));
8364   PetscCall(PetscFree(recv_req_vecs));
8365   PetscCall(PetscFree(recv_req_idxs_is));
8366   PetscCall(PetscFree(send_req_idxs));
8367   PetscCall(PetscFree(send_req_vals));
8368   PetscCall(PetscFree(send_req_vecs));
8369   PetscCall(PetscFree(send_req_idxs_is));
8370   PetscCall(PetscFree(ilengths_vals));
8371   PetscCall(PetscFree(ilengths_idxs));
8372   PetscCall(PetscFree(olengths_vals));
8373   PetscCall(PetscFree(olengths_idxs));
8374   PetscCall(PetscFree(onodes));
8375   if (nis) {
8376     PetscCall(PetscFree(ilengths_idxs_is));
8377     PetscCall(PetscFree(olengths_idxs_is));
8378     PetscCall(PetscFree(onodes_is));
8379   }
8380   PetscCall(PetscSubcommDestroy(&subcomm));
8381   if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not participate */
8382     PetscCall(MatDestroy(mat_n));
8383     for (i = 0; i < nis; i++) PetscCall(ISDestroy(&isarray[i]));
8384     if (nvecs) { /* need to match VecDestroy nnsp_vec called in the other code path */
8385       PetscCall(VecDestroy(&nnsp_vec[0]));
8386     }
8387     *mat_n = NULL;
8388   }
8389   PetscFunctionReturn(PETSC_SUCCESS);
8390 }
8391 
8392 /* temporary hack into ksp private data structure */
8393 #include <petsc/private/kspimpl.h>
8394 
8395 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc, Mat coarse_submat)
8396 {
8397   PC_BDDC               *pcbddc = (PC_BDDC *)pc->data;
8398   PC_IS                 *pcis   = (PC_IS *)pc->data;
8399   PCBDDCGraph            graph  = pcbddc->mat_graph;
8400   Mat                    coarse_mat, coarse_mat_is;
8401   Mat                    coarsedivudotp = NULL;
8402   Mat                    coarseG, t_coarse_mat_is;
8403   MatNullSpace           CoarseNullSpace = NULL;
8404   ISLocalToGlobalMapping coarse_islg;
8405   IS                     coarse_is, *isarray, corners;
8406   PetscInt               i, im_active = -1, active_procs = -1;
8407   PetscInt               nis, nisdofs, nisneu, nisvert;
8408   PetscInt               coarse_eqs_per_proc, coarsening_ratio;
8409   PC                     pc_temp;
8410   PCType                 coarse_pc_type;
8411   KSPType                coarse_ksp_type;
8412   PetscBool              multilevel_requested, multilevel_allowed;
8413   PetscBool              coarse_reuse, multi_element = graph->multi_element;
8414   PetscInt               ncoarse, nedcfield;
8415   PetscBool              compute_vecs = PETSC_FALSE;
8416   PetscScalar           *array;
8417   MatReuse               coarse_mat_reuse;
8418   PetscBool              restr, full_restr, have_void;
8419   PetscMPIInt            size;
8420 
8421   PetscFunctionBegin;
8422   PetscCall(PetscLogEventBegin(PC_BDDC_CoarseSetUp[pcbddc->current_level], pc, 0, 0, 0));
8423   /* Assign global numbering to coarse dofs */
8424   if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */
8425     PetscInt ocoarse_size;
8426     compute_vecs = PETSC_TRUE;
8427 
8428     pcbddc->new_primal_space = PETSC_TRUE;
8429     ocoarse_size             = pcbddc->coarse_size;
8430     PetscCall(PetscFree(pcbddc->global_primal_indices));
8431     PetscCall(PCBDDCComputePrimalNumbering(pc, &pcbddc->coarse_size, &pcbddc->global_primal_indices));
8432     /* see if we can avoid some work */
8433     if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */
8434       /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */
8435       if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) {
8436         PetscCall(KSPReset(pcbddc->coarse_ksp));
8437         coarse_reuse = PETSC_FALSE;
8438       } else { /* we can safely reuse already computed coarse matrix */
8439         coarse_reuse = PETSC_TRUE;
8440       }
8441     } else { /* there's no coarse ksp, so we need to create the coarse matrix too */
8442       coarse_reuse = PETSC_FALSE;
8443     }
8444     /* reset any subassembling information */
8445     if (!coarse_reuse || pcbddc->recompute_topography) PetscCall(ISDestroy(&pcbddc->coarse_subassembling));
8446   } else { /* primal space is unchanged, so we can reuse coarse matrix */
8447     coarse_reuse = PETSC_TRUE;
8448   }
8449   if (coarse_reuse && pcbddc->coarse_ksp) {
8450     PetscCall(KSPGetOperators(pcbddc->coarse_ksp, &coarse_mat, NULL));
8451     PetscCall(PetscObjectReference((PetscObject)coarse_mat));
8452     coarse_mat_reuse = MAT_REUSE_MATRIX;
8453   } else {
8454     coarse_mat       = NULL;
8455     coarse_mat_reuse = MAT_INITIAL_MATRIX;
8456   }
8457 
8458   /* creates temporary l2gmap and IS for coarse indexes */
8459   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), pcbddc->local_primal_size, pcbddc->global_primal_indices, PETSC_COPY_VALUES, &coarse_is));
8460   PetscCall(ISLocalToGlobalMappingCreateIS(coarse_is, &coarse_islg));
8461 
8462   /* creates temporary MATIS object for coarse matrix */
8463   PetscCall(MatCreate(PetscObjectComm((PetscObject)pc), &t_coarse_mat_is));
8464   PetscCall(MatSetType(t_coarse_mat_is, MATIS));
8465   PetscCall(MatSetSizes(t_coarse_mat_is, PETSC_DECIDE, PETSC_DECIDE, pcbddc->coarse_size, pcbddc->coarse_size));
8466   PetscCall(MatISSetAllowRepeated(t_coarse_mat_is, PETSC_TRUE));
8467   PetscCall(MatSetLocalToGlobalMapping(t_coarse_mat_is, coarse_islg, coarse_islg));
8468   PetscCall(MatISSetLocalMat(t_coarse_mat_is, coarse_submat));
8469   PetscCall(MatAssemblyBegin(t_coarse_mat_is, MAT_FINAL_ASSEMBLY));
8470   PetscCall(MatAssemblyEnd(t_coarse_mat_is, MAT_FINAL_ASSEMBLY));
8471   PetscCall(MatViewFromOptions(t_coarse_mat_is, (PetscObject)pc, "-pc_bddc_coarse_mat_is_view"));
8472 
8473   /* count "active" (i.e. with positive local size) and "void" processes */
8474   im_active = !!pcis->n;
8475   PetscCallMPI(MPIU_Allreduce(&im_active, &active_procs, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)pc)));
8476 
8477   /* determine number of processes partecipating to coarse solver and compute subassembling pattern */
8478   /* restr : whether we want to exclude senders (which are not receivers) from the subassembling pattern */
8479   /* full_restr : just use the receivers from the subassembling pattern */
8480   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)pc), &size));
8481   coarse_mat_is        = NULL;
8482   multilevel_allowed   = PETSC_FALSE;
8483   multilevel_requested = PETSC_FALSE;
8484   coarse_eqs_per_proc  = PetscMin(PetscMax(pcbddc->coarse_size, 1), pcbddc->coarse_eqs_per_proc);
8485   if (coarse_eqs_per_proc < 0 || size == 1) coarse_eqs_per_proc = PetscMax(pcbddc->coarse_size, 1);
8486   if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE;
8487   if (pcbddc->coarse_size <= pcbddc->coarse_eqs_limit) multilevel_requested = PETSC_FALSE;
8488   coarsening_ratio = multi_element ? 1 : pcbddc->coarsening_ratio;
8489   if (multilevel_requested) {
8490     ncoarse    = active_procs / coarsening_ratio;
8491     restr      = PETSC_FALSE;
8492     full_restr = PETSC_FALSE;
8493   } else {
8494     ncoarse    = pcbddc->coarse_size / coarse_eqs_per_proc + !!(pcbddc->coarse_size % coarse_eqs_per_proc);
8495     restr      = PETSC_TRUE;
8496     full_restr = PETSC_TRUE;
8497   }
8498   if (!pcbddc->coarse_size || size == 1) multilevel_allowed = multilevel_requested = restr = full_restr = PETSC_FALSE;
8499   ncoarse = PetscMax(1, ncoarse);
8500   if (!pcbddc->coarse_subassembling) {
8501     if (coarsening_ratio > 1) {
8502       if (multilevel_requested) {
8503         PetscCall(PCBDDCMatISGetSubassemblingPattern(pc->pmat, &ncoarse, pcbddc->coarse_adj_red, &pcbddc->coarse_subassembling, &have_void));
8504       } else {
8505         PetscCall(PCBDDCMatISGetSubassemblingPattern(t_coarse_mat_is, &ncoarse, pcbddc->coarse_adj_red, &pcbddc->coarse_subassembling, &have_void));
8506       }
8507     } else {
8508       PetscMPIInt rank;
8509 
8510       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)pc), &rank));
8511       have_void = (active_procs == size) ? PETSC_FALSE : PETSC_TRUE;
8512       PetscCall(ISCreateStride(PetscObjectComm((PetscObject)pc), 1, rank, 1, &pcbddc->coarse_subassembling));
8513     }
8514   } else { /* if a subassembling pattern exists, then we can reuse the coarse ksp and compute the number of process involved */
8515     PetscInt psum;
8516     if (pcbddc->coarse_ksp) psum = 1;
8517     else psum = 0;
8518     PetscCallMPI(MPIU_Allreduce(&psum, &ncoarse, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)pc)));
8519     have_void = ncoarse < size ? PETSC_TRUE : PETSC_FALSE;
8520   }
8521   /* determine if we can go multilevel */
8522   if (multilevel_requested) {
8523     if (ncoarse > 1) multilevel_allowed = PETSC_TRUE; /* found enough processes */
8524     else restr = full_restr = PETSC_TRUE;             /* 1 subdomain, use a direct solver */
8525   }
8526   if (multilevel_allowed && have_void) restr = PETSC_TRUE;
8527 
8528   /* dump subassembling pattern */
8529   if (pcbddc->dbg_flag && multilevel_allowed) PetscCall(ISView(pcbddc->coarse_subassembling, pcbddc->dbg_viewer));
8530   /* compute dofs splitting and neumann boundaries for coarse dofs */
8531   nedcfield = -1;
8532   corners   = NULL;
8533   if (multilevel_allowed && !coarse_reuse && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal || pcbddc->nedclocal || pcbddc->corner_selected)) { /* protects from unneeded computations */
8534     PetscInt              *tidxs, *tidxs2, nout, tsize, i;
8535     const PetscInt        *idxs;
8536     ISLocalToGlobalMapping tmap;
8537 
8538     /* create map between primal indices (in local representative ordering) and local primal numbering */
8539     PetscCall(ISLocalToGlobalMappingCreate(PETSC_COMM_SELF, 1, pcbddc->local_primal_size, pcbddc->primal_indices_local_idxs, PETSC_COPY_VALUES, &tmap));
8540     /* allocate space for temporary storage */
8541     PetscCall(PetscMalloc1(pcbddc->local_primal_size, &tidxs));
8542     PetscCall(PetscMalloc1(pcbddc->local_primal_size, &tidxs2));
8543     /* allocate for IS array */
8544     nisdofs = pcbddc->n_ISForDofsLocal;
8545     if (pcbddc->nedclocal) {
8546       if (pcbddc->nedfield > -1) {
8547         nedcfield = pcbddc->nedfield;
8548       } else {
8549         nedcfield = 0;
8550         PetscCheck(!nisdofs, PetscObjectComm((PetscObject)pc), PETSC_ERR_PLIB, "This should not happen (%" PetscInt_FMT ")", nisdofs);
8551         nisdofs = 1;
8552       }
8553     }
8554     nisneu  = !!pcbddc->NeumannBoundariesLocal;
8555     nisvert = 0; /* nisvert is not used */
8556     nis     = nisdofs + nisneu + nisvert;
8557     PetscCall(PetscMalloc1(nis, &isarray));
8558     /* dofs splitting */
8559     for (i = 0; i < nisdofs; i++) {
8560       /* PetscCall(ISView(pcbddc->ISForDofsLocal[i],0)); */
8561       if (nedcfield != i) {
8562         PetscCall(ISGetLocalSize(pcbddc->ISForDofsLocal[i], &tsize));
8563         PetscCall(ISGetIndices(pcbddc->ISForDofsLocal[i], &idxs));
8564         PetscCall(ISGlobalToLocalMappingApply(tmap, IS_GTOLM_DROP, tsize, idxs, &nout, tidxs));
8565         PetscCall(ISRestoreIndices(pcbddc->ISForDofsLocal[i], &idxs));
8566       } else {
8567         PetscCall(ISGetLocalSize(pcbddc->nedclocal, &tsize));
8568         PetscCall(ISGetIndices(pcbddc->nedclocal, &idxs));
8569         PetscCall(ISGlobalToLocalMappingApply(tmap, IS_GTOLM_DROP, tsize, idxs, &nout, tidxs));
8570         PetscCheck(tsize == nout, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Failed when mapping coarse nedelec field! %" PetscInt_FMT " != %" PetscInt_FMT, tsize, nout);
8571         PetscCall(ISRestoreIndices(pcbddc->nedclocal, &idxs));
8572       }
8573       PetscCall(ISLocalToGlobalMappingApply(coarse_islg, nout, tidxs, tidxs2));
8574       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), nout, tidxs2, PETSC_COPY_VALUES, &isarray[i]));
8575       /* PetscCall(ISView(isarray[i],0)); */
8576     }
8577     /* neumann boundaries */
8578     if (pcbddc->NeumannBoundariesLocal) {
8579       /* PetscCall(ISView(pcbddc->NeumannBoundariesLocal,0)); */
8580       PetscCall(ISGetLocalSize(pcbddc->NeumannBoundariesLocal, &tsize));
8581       PetscCall(ISGetIndices(pcbddc->NeumannBoundariesLocal, &idxs));
8582       PetscCall(ISGlobalToLocalMappingApply(tmap, IS_GTOLM_DROP, tsize, idxs, &nout, tidxs));
8583       PetscCall(ISRestoreIndices(pcbddc->NeumannBoundariesLocal, &idxs));
8584       PetscCall(ISLocalToGlobalMappingApply(coarse_islg, nout, tidxs, tidxs2));
8585       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), nout, tidxs2, PETSC_COPY_VALUES, &isarray[nisdofs]));
8586       /* PetscCall(ISView(isarray[nisdofs],0)); */
8587     }
8588     /* coordinates */
8589     if (pcbddc->corner_selected) {
8590       PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &corners));
8591       PetscCall(ISGetLocalSize(corners, &tsize));
8592       PetscCall(ISGetIndices(corners, &idxs));
8593       PetscCall(ISGlobalToLocalMappingApply(tmap, IS_GTOLM_DROP, tsize, idxs, &nout, tidxs));
8594       PetscCheck(tsize == nout, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Failed when mapping corners! %" PetscInt_FMT " != %" PetscInt_FMT, tsize, nout);
8595       PetscCall(ISRestoreIndices(corners, &idxs));
8596       PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &corners));
8597       PetscCall(ISLocalToGlobalMappingApply(coarse_islg, nout, tidxs, tidxs2));
8598       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), nout, tidxs2, PETSC_COPY_VALUES, &corners));
8599     }
8600     PetscCall(PetscFree(tidxs));
8601     PetscCall(PetscFree(tidxs2));
8602     PetscCall(ISLocalToGlobalMappingDestroy(&tmap));
8603   } else {
8604     nis     = 0;
8605     nisdofs = 0;
8606     nisneu  = 0;
8607     nisvert = 0;
8608     isarray = NULL;
8609   }
8610   /* destroy no longer needed map */
8611   PetscCall(ISLocalToGlobalMappingDestroy(&coarse_islg));
8612 
8613   /* subassemble */
8614   if (multilevel_allowed) {
8615     Vec       vp[1];
8616     PetscInt  nvecs = 0;
8617     PetscBool reuse;
8618 
8619     vp[0] = NULL;
8620     /* XXX HDIV also */
8621     if (pcbddc->benign_have_null) { /* propagate no-net-flux quadrature to coarser level */
8622       PetscCall(VecCreate(PetscObjectComm((PetscObject)pc), &vp[0]));
8623       PetscCall(VecSetSizes(vp[0], pcbddc->local_primal_size, PETSC_DECIDE));
8624       PetscCall(VecSetType(vp[0], VECSTANDARD));
8625       nvecs = 1;
8626 
8627       if (pcbddc->divudotp) {
8628         Mat      B, loc_divudotp;
8629         Vec      v, p;
8630         IS       dummy;
8631         PetscInt np;
8632 
8633         PetscCall(MatISGetLocalMat(pcbddc->divudotp, &loc_divudotp));
8634         PetscCall(MatGetSize(loc_divudotp, &np, NULL));
8635         PetscCall(ISCreateStride(PETSC_COMM_SELF, np, 0, 1, &dummy));
8636         PetscCall(MatCreateSubMatrix(loc_divudotp, dummy, pcis->is_B_local, MAT_INITIAL_MATRIX, &B));
8637         PetscCall(MatCreateVecs(B, &v, &p));
8638         PetscCall(VecSet(p, 1.));
8639         PetscCall(MatMultTranspose(B, p, v));
8640         PetscCall(VecDestroy(&p));
8641         PetscCall(MatDestroy(&B));
8642         PetscCall(VecGetArray(vp[0], &array));
8643         PetscCall(VecPlaceArray(pcbddc->vec1_P, array));
8644         PetscCall(MatMultTranspose(pcbddc->coarse_phi_B, v, pcbddc->vec1_P));
8645         PetscCall(VecResetArray(pcbddc->vec1_P));
8646         PetscCall(VecRestoreArray(vp[0], &array));
8647         PetscCall(ISDestroy(&dummy));
8648         PetscCall(VecDestroy(&v));
8649       }
8650     }
8651     if (coarse_mat) reuse = PETSC_TRUE;
8652     else reuse = PETSC_FALSE;
8653     if (multi_element) {
8654       /* XXX divudotp */
8655       PetscCall(MatISSetAllowRepeated(t_coarse_mat_is, PETSC_FALSE));
8656       PetscCall(PetscObjectReference((PetscObject)t_coarse_mat_is));
8657       coarse_mat_is = t_coarse_mat_is;
8658     } else {
8659       PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &reuse, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)pc)));
8660       if (reuse) {
8661         PetscCall(PCBDDCMatISSubassemble(t_coarse_mat_is, pcbddc->coarse_subassembling, 0, restr, full_restr, PETSC_TRUE, &coarse_mat, nis, isarray, nvecs, vp));
8662       } else {
8663         PetscCall(PCBDDCMatISSubassemble(t_coarse_mat_is, pcbddc->coarse_subassembling, 0, restr, full_restr, PETSC_FALSE, &coarse_mat_is, nis, isarray, nvecs, vp));
8664       }
8665       if (vp[0]) { /* vp[0] could have been placed on a different set of processes */
8666         PetscScalar       *arraym;
8667         const PetscScalar *arrayv;
8668         PetscInt           nl;
8669         PetscCall(VecGetLocalSize(vp[0], &nl));
8670         PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, 1, nl, NULL, &coarsedivudotp));
8671         PetscCall(MatDenseGetArray(coarsedivudotp, &arraym));
8672         PetscCall(VecGetArrayRead(vp[0], &arrayv));
8673         PetscCall(PetscArraycpy(arraym, arrayv, nl));
8674         PetscCall(VecRestoreArrayRead(vp[0], &arrayv));
8675         PetscCall(MatDenseRestoreArray(coarsedivudotp, &arraym));
8676         PetscCall(VecDestroy(&vp[0]));
8677       } else {
8678         PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, 0, 0, 1, NULL, &coarsedivudotp));
8679       }
8680     }
8681   } else {
8682     if (ncoarse != size) PetscCall(PCBDDCMatISSubassemble(t_coarse_mat_is, pcbddc->coarse_subassembling, 0, restr, full_restr, PETSC_FALSE, &coarse_mat_is, 0, NULL, 0, NULL));
8683     else {
8684       PetscCall(PetscObjectReference((PetscObject)t_coarse_mat_is));
8685       coarse_mat_is = t_coarse_mat_is;
8686     }
8687   }
8688   if (coarse_mat_is || coarse_mat) {
8689     if (!multilevel_allowed) {
8690       PetscCall(MatConvert(coarse_mat_is, MATAIJ, coarse_mat_reuse, &coarse_mat));
8691     } else {
8692       /* if this matrix is present, it means we are not reusing the coarse matrix */
8693       if (coarse_mat_is) {
8694         PetscCheck(!coarse_mat, PetscObjectComm((PetscObject)coarse_mat_is), PETSC_ERR_PLIB, "This should not happen");
8695         PetscCall(PetscObjectReference((PetscObject)coarse_mat_is));
8696         coarse_mat = coarse_mat_is;
8697       }
8698     }
8699   }
8700   PetscCall(MatDestroy(&t_coarse_mat_is));
8701   PetscCall(MatDestroy(&coarse_mat_is));
8702 
8703   /* create local to global scatters for coarse problem */
8704   if (compute_vecs) {
8705     PetscInt lrows;
8706     PetscCall(VecDestroy(&pcbddc->coarse_vec));
8707     if (coarse_mat) {
8708       PetscCall(MatGetLocalSize(coarse_mat, &lrows, NULL));
8709     } else {
8710       lrows = 0;
8711     }
8712     PetscCall(VecCreate(PetscObjectComm((PetscObject)pc), &pcbddc->coarse_vec));
8713     PetscCall(VecSetSizes(pcbddc->coarse_vec, lrows, PETSC_DECIDE));
8714     PetscCall(VecSetType(pcbddc->coarse_vec, coarse_mat ? coarse_mat->defaultvectype : VECSTANDARD));
8715     PetscCall(VecScatterDestroy(&pcbddc->coarse_loc_to_glob));
8716     PetscCall(VecScatterCreate(pcbddc->vec1_P, NULL, pcbddc->coarse_vec, coarse_is, &pcbddc->coarse_loc_to_glob));
8717   }
8718   PetscCall(ISDestroy(&coarse_is));
8719 
8720   /* set defaults for coarse KSP and PC */
8721   if (multilevel_allowed) {
8722     coarse_ksp_type = KSPRICHARDSON;
8723     coarse_pc_type  = PCBDDC;
8724   } else {
8725     coarse_ksp_type = KSPPREONLY;
8726     coarse_pc_type  = PCREDUNDANT;
8727   }
8728 
8729   /* print some info if requested */
8730   if (pcbddc->dbg_flag) {
8731     if (!multilevel_allowed) {
8732       PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n"));
8733       if (multilevel_requested) {
8734         PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Not enough active processes on level %" PetscInt_FMT " (active processes %" PetscInt_FMT ", coarsening ratio %" PetscInt_FMT ")\n", pcbddc->current_level, active_procs, coarsening_ratio));
8735       } else if (pcbddc->max_levels) {
8736         PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Maximum number of requested levels reached (%" PetscInt_FMT ")\n", pcbddc->max_levels));
8737       }
8738       PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
8739     }
8740   }
8741 
8742   /* communicate coarse discrete gradient */
8743   coarseG = NULL;
8744   if (pcbddc->nedcG && multilevel_allowed) {
8745     MPI_Comm ccomm;
8746     if (coarse_mat) {
8747       ccomm = PetscObjectComm((PetscObject)coarse_mat);
8748     } else {
8749       ccomm = MPI_COMM_NULL;
8750     }
8751     PetscCall(MatMPIAIJRestrict(pcbddc->nedcG, ccomm, &coarseG));
8752   }
8753 
8754   /* create the coarse KSP object only once with defaults */
8755   if (coarse_mat) {
8756     PetscBool   isredundant, isbddc, force, valid;
8757     PetscViewer dbg_viewer = NULL;
8758     PetscBool   isset, issym, isher, isspd;
8759 
8760     if (pcbddc->dbg_flag) {
8761       dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat));
8762       PetscCall(PetscViewerASCIIAddTab(dbg_viewer, 2 * pcbddc->current_level));
8763     }
8764     if (!pcbddc->coarse_ksp) {
8765       char   prefix[256], str_level[16];
8766       size_t len;
8767 
8768       PetscCall(KSPCreate(PetscObjectComm((PetscObject)coarse_mat), &pcbddc->coarse_ksp));
8769       PetscCall(KSPSetNestLevel(pcbddc->coarse_ksp, pc->kspnestlevel));
8770       PetscCall(KSPSetErrorIfNotConverged(pcbddc->coarse_ksp, pc->erroriffailure));
8771       PetscCall(PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp, (PetscObject)pc, 1));
8772       PetscCall(KSPSetTolerances(pcbddc->coarse_ksp, PETSC_CURRENT, PETSC_CURRENT, PETSC_CURRENT, 1));
8773       PetscCall(KSPSetOperators(pcbddc->coarse_ksp, coarse_mat, coarse_mat));
8774       PetscCall(KSPSetType(pcbddc->coarse_ksp, coarse_ksp_type));
8775       PetscCall(KSPSetNormType(pcbddc->coarse_ksp, KSP_NORM_NONE));
8776       PetscCall(KSPGetPC(pcbddc->coarse_ksp, &pc_temp));
8777       /* TODO is this logic correct? should check for coarse_mat type */
8778       PetscCall(PCSetType(pc_temp, coarse_pc_type));
8779       /* prefix */
8780       PetscCall(PetscStrncpy(prefix, "", sizeof(prefix)));
8781       PetscCall(PetscStrncpy(str_level, "", sizeof(str_level)));
8782       if (!pcbddc->current_level) {
8783         PetscCall(PetscStrncpy(prefix, ((PetscObject)pc)->prefix, sizeof(prefix)));
8784         PetscCall(PetscStrlcat(prefix, "pc_bddc_coarse_", sizeof(prefix)));
8785       } else {
8786         PetscCall(PetscStrlen(((PetscObject)pc)->prefix, &len));
8787         if (pcbddc->current_level > 1) len -= 3;  /* remove "lX_" with X level number */
8788         if (pcbddc->current_level > 10) len -= 1; /* remove another char from level number */
8789         /* Nonstandard use of PetscStrncpy() to copy only a portion of the string */
8790         PetscCall(PetscStrncpy(prefix, ((PetscObject)pc)->prefix, len + 1));
8791         PetscCall(PetscSNPrintf(str_level, sizeof(str_level), "l%" PetscInt_FMT "_", pcbddc->current_level));
8792         PetscCall(PetscStrlcat(prefix, str_level, sizeof(prefix)));
8793       }
8794       PetscCall(KSPSetOptionsPrefix(pcbddc->coarse_ksp, prefix));
8795       /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
8796       PetscCall(PCBDDCSetLevel(pc_temp, pcbddc->current_level + 1));
8797       PetscCall(PCBDDCSetCoarseningRatio(pc_temp, pcbddc->coarsening_ratio));
8798       PetscCall(PCBDDCSetLevels(pc_temp, pcbddc->max_levels));
8799       /* allow user customization */
8800       PetscCall(KSPSetFromOptions(pcbddc->coarse_ksp));
8801       /* get some info after set from options */
8802       PetscCall(KSPGetPC(pcbddc->coarse_ksp, &pc_temp));
8803       /* multilevel cannot be done with coarse PC different from BDDC, NN, HPDDM, unless forced to */
8804       force = PETSC_FALSE;
8805       PetscCall(PetscOptionsGetBool(NULL, ((PetscObject)pc_temp)->prefix, "-pc_type_forced", &force, NULL));
8806       PetscCall(PetscObjectTypeCompareAny((PetscObject)pc_temp, &valid, PCBDDC, PCNN, PCHPDDM, ""));
8807       PetscCall(PetscObjectTypeCompare((PetscObject)pc_temp, PCBDDC, &isbddc));
8808       if (multilevel_allowed && !force && !valid) {
8809         isbddc = PETSC_TRUE;
8810         PetscCall(PCSetType(pc_temp, PCBDDC));
8811         PetscCall(PCBDDCSetLevel(pc_temp, pcbddc->current_level + 1));
8812         PetscCall(PCBDDCSetCoarseningRatio(pc_temp, pcbddc->coarsening_ratio));
8813         PetscCall(PCBDDCSetLevels(pc_temp, pcbddc->max_levels));
8814         if (pc_temp->ops->setfromoptions) { /* need to setfromoptions again, skipping the pc_type */
8815           PetscObjectOptionsBegin((PetscObject)pc_temp);
8816           PetscCall((*pc_temp->ops->setfromoptions)(pc_temp, PetscOptionsObject));
8817           PetscCall(PetscObjectProcessOptionsHandlers((PetscObject)pc_temp, PetscOptionsObject));
8818           PetscOptionsEnd();
8819           pc_temp->setfromoptionscalled++;
8820         }
8821       }
8822     }
8823     /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
8824     PetscCall(KSPGetPC(pcbddc->coarse_ksp, &pc_temp));
8825     if (nisdofs) {
8826       PetscCall(PCBDDCSetDofsSplitting(pc_temp, nisdofs, isarray));
8827       for (i = 0; i < nisdofs; i++) PetscCall(ISDestroy(&isarray[i]));
8828     }
8829     if (nisneu) {
8830       PetscCall(PCBDDCSetNeumannBoundaries(pc_temp, isarray[nisdofs]));
8831       PetscCall(ISDestroy(&isarray[nisdofs]));
8832     }
8833     if (nisvert) {
8834       PetscCall(PCBDDCSetPrimalVerticesIS(pc_temp, isarray[nis - 1]));
8835       PetscCall(ISDestroy(&isarray[nis - 1]));
8836     }
8837     if (coarseG) PetscCall(PCBDDCSetDiscreteGradient(pc_temp, coarseG, 1, nedcfield, PETSC_FALSE, PETSC_TRUE));
8838 
8839     /* get some info after set from options */
8840     PetscCall(PetscObjectTypeCompare((PetscObject)pc_temp, PCBDDC, &isbddc));
8841 
8842     /* multilevel can only be requested via -pc_bddc_levels or PCBDDCSetLevels */
8843     if (isbddc && !multilevel_allowed) PetscCall(PCSetType(pc_temp, coarse_pc_type));
8844     /* multilevel cannot be done with coarse PC different from BDDC, NN, HPDDM, unless forced to */
8845     force = PETSC_FALSE;
8846     PetscCall(PetscOptionsGetBool(NULL, ((PetscObject)pc_temp)->prefix, "-pc_type_forced", &force, NULL));
8847     PetscCall(PetscObjectTypeCompareAny((PetscObject)pc_temp, &valid, PCBDDC, PCNN, PCHPDDM, ""));
8848     if (multilevel_requested && multilevel_allowed && !valid && !force) PetscCall(PCSetType(pc_temp, PCBDDC));
8849     PetscCall(PetscObjectTypeCompare((PetscObject)pc_temp, PCREDUNDANT, &isredundant));
8850     if (isredundant) {
8851       KSP inner_ksp;
8852       PC  inner_pc;
8853 
8854       PetscCall(PCRedundantGetKSP(pc_temp, &inner_ksp));
8855       PetscCall(KSPGetPC(inner_ksp, &inner_pc));
8856     }
8857 
8858     /* parameters which miss an API */
8859     PetscCall(PetscObjectTypeCompare((PetscObject)pc_temp, PCBDDC, &isbddc));
8860     if (isbddc) {
8861       PC_BDDC *pcbddc_coarse = (PC_BDDC *)pc_temp->data;
8862 
8863       pcbddc_coarse->detect_disconnected = PETSC_TRUE;
8864       pcbddc_coarse->coarse_eqs_per_proc = pcbddc->coarse_eqs_per_proc;
8865       pcbddc_coarse->coarse_eqs_limit    = pcbddc->coarse_eqs_limit;
8866       pcbddc_coarse->benign_saddle_point = pcbddc->benign_have_null;
8867       if (pcbddc_coarse->benign_saddle_point) {
8868         Mat                    coarsedivudotp_is;
8869         ISLocalToGlobalMapping l2gmap, rl2g, cl2g;
8870         IS                     row, col;
8871         const PetscInt        *gidxs;
8872         PetscInt               n, st, M, N;
8873 
8874         PetscCall(MatGetSize(coarsedivudotp, &n, NULL));
8875         PetscCallMPI(MPI_Scan(&n, &st, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)coarse_mat)));
8876         st = st - n;
8877         PetscCall(ISCreateStride(PetscObjectComm((PetscObject)coarse_mat), 1, st, 1, &row));
8878         PetscCall(MatISGetLocalToGlobalMapping(coarse_mat, &l2gmap, NULL));
8879         PetscCall(ISLocalToGlobalMappingGetSize(l2gmap, &n));
8880         PetscCall(ISLocalToGlobalMappingGetIndices(l2gmap, &gidxs));
8881         PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)coarse_mat), n, gidxs, PETSC_COPY_VALUES, &col));
8882         PetscCall(ISLocalToGlobalMappingRestoreIndices(l2gmap, &gidxs));
8883         PetscCall(ISLocalToGlobalMappingCreateIS(row, &rl2g));
8884         PetscCall(ISLocalToGlobalMappingCreateIS(col, &cl2g));
8885         PetscCall(ISGetSize(row, &M));
8886         PetscCall(MatGetSize(coarse_mat, &N, NULL));
8887         PetscCall(ISDestroy(&row));
8888         PetscCall(ISDestroy(&col));
8889         PetscCall(MatCreate(PetscObjectComm((PetscObject)coarse_mat), &coarsedivudotp_is));
8890         PetscCall(MatSetType(coarsedivudotp_is, MATIS));
8891         PetscCall(MatSetSizes(coarsedivudotp_is, PETSC_DECIDE, PETSC_DECIDE, M, N));
8892         PetscCall(MatSetLocalToGlobalMapping(coarsedivudotp_is, rl2g, cl2g));
8893         PetscCall(ISLocalToGlobalMappingDestroy(&rl2g));
8894         PetscCall(ISLocalToGlobalMappingDestroy(&cl2g));
8895         PetscCall(MatISSetLocalMat(coarsedivudotp_is, coarsedivudotp));
8896         PetscCall(MatDestroy(&coarsedivudotp));
8897         PetscCall(PCBDDCSetDivergenceMat(pc_temp, coarsedivudotp_is, PETSC_FALSE, NULL));
8898         PetscCall(MatDestroy(&coarsedivudotp_is));
8899         pcbddc_coarse->adaptive_userdefined = PETSC_TRUE;
8900         if (pcbddc->adaptive_threshold[0] == 0.0) pcbddc_coarse->deluxe_zerorows = PETSC_TRUE;
8901       }
8902     }
8903 
8904     /* propagate symmetry info of coarse matrix */
8905     PetscCall(MatSetOption(coarse_mat, MAT_STRUCTURALLY_SYMMETRIC, PETSC_TRUE));
8906     PetscCall(MatIsSymmetricKnown(pc->pmat, &isset, &issym));
8907     if (isset) PetscCall(MatSetOption(coarse_mat, MAT_SYMMETRIC, issym));
8908     PetscCall(MatIsHermitianKnown(pc->pmat, &isset, &isher));
8909     if (isset) PetscCall(MatSetOption(coarse_mat, MAT_HERMITIAN, isher));
8910     PetscCall(MatIsSPDKnown(pc->pmat, &isset, &isspd));
8911     if (isset) PetscCall(MatSetOption(coarse_mat, MAT_SPD, isspd));
8912 
8913     if (pcbddc->benign_saddle_point && !pcbddc->benign_have_null) PetscCall(MatSetOption(coarse_mat, MAT_SPD, PETSC_TRUE));
8914     /* set operators */
8915     PetscCall(MatViewFromOptions(coarse_mat, (PetscObject)pc, "-pc_bddc_coarse_mat_view"));
8916     PetscCall(MatSetOptionsPrefix(coarse_mat, ((PetscObject)pcbddc->coarse_ksp)->prefix));
8917     PetscCall(KSPSetOperators(pcbddc->coarse_ksp, coarse_mat, coarse_mat));
8918     if (pcbddc->dbg_flag) PetscCall(PetscViewerASCIISubtractTab(dbg_viewer, 2 * pcbddc->current_level));
8919   }
8920   PetscCall(MatDestroy(&coarseG));
8921   PetscCall(PetscFree(isarray));
8922 #if 0
8923   {
8924     PetscViewer viewer;
8925     char filename[256];
8926     PetscCall(PetscSNPrintf(filename, PETSC_STATIC_ARRAY_LENGTH(filename), "coarse_mat_level%d.m",pcbddc->current_level));
8927     PetscCall(PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer));
8928     PetscCall(PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB));
8929     PetscCall(MatView(coarse_mat,viewer));
8930     PetscCall(PetscViewerPopFormat(viewer));
8931     PetscCall(PetscViewerDestroy(&viewer));
8932   }
8933 #endif
8934 
8935   if (corners) {
8936     Vec             gv;
8937     IS              is;
8938     const PetscInt *idxs;
8939     PetscInt        i, d, N, n, cdim = pcbddc->mat_graph->cdim;
8940     PetscScalar    *coords;
8941 
8942     PetscCheck(pcbddc->mat_graph->cloc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing local coordinates");
8943     PetscCall(VecGetSize(pcbddc->coarse_vec, &N));
8944     PetscCall(VecGetLocalSize(pcbddc->coarse_vec, &n));
8945     PetscCall(VecCreate(PetscObjectComm((PetscObject)pcbddc->coarse_vec), &gv));
8946     PetscCall(VecSetBlockSize(gv, cdim));
8947     PetscCall(VecSetSizes(gv, n * cdim, N * cdim));
8948     PetscCall(VecSetType(gv, VECSTANDARD));
8949     PetscCall(VecSetFromOptions(gv));
8950     PetscCall(VecSet(gv, PETSC_MAX_REAL)); /* we only propagate coordinates from vertices constraints */
8951 
8952     PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &is));
8953     PetscCall(ISGetLocalSize(is, &n));
8954     PetscCall(ISGetIndices(is, &idxs));
8955     PetscCall(PetscMalloc1(n * cdim, &coords));
8956     for (i = 0; i < n; i++) {
8957       for (d = 0; d < cdim; d++) coords[cdim * i + d] = pcbddc->mat_graph->coords[cdim * idxs[i] + d];
8958     }
8959     PetscCall(ISRestoreIndices(is, &idxs));
8960     PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &is));
8961 
8962     PetscCall(ISGetLocalSize(corners, &n));
8963     PetscCall(ISGetIndices(corners, &idxs));
8964     PetscCall(VecSetValuesBlocked(gv, n, idxs, coords, INSERT_VALUES));
8965     PetscCall(ISRestoreIndices(corners, &idxs));
8966     PetscCall(PetscFree(coords));
8967     PetscCall(VecAssemblyBegin(gv));
8968     PetscCall(VecAssemblyEnd(gv));
8969     PetscCall(VecGetArray(gv, &coords));
8970     if (pcbddc->coarse_ksp) {
8971       PC        coarse_pc;
8972       PetscBool isbddc;
8973 
8974       PetscCall(KSPGetPC(pcbddc->coarse_ksp, &coarse_pc));
8975       PetscCall(PetscObjectTypeCompare((PetscObject)coarse_pc, PCBDDC, &isbddc));
8976       if (isbddc) { /* coarse coordinates have PETSC_MAX_REAL, specific for BDDC */
8977         PetscReal *realcoords;
8978 
8979         PetscCall(VecGetLocalSize(gv, &n));
8980 #if defined(PETSC_USE_COMPLEX)
8981         PetscCall(PetscMalloc1(n, &realcoords));
8982         for (i = 0; i < n; i++) realcoords[i] = PetscRealPart(coords[i]);
8983 #else
8984         realcoords = coords;
8985 #endif
8986         PetscCall(PCSetCoordinates(coarse_pc, cdim, n / cdim, realcoords));
8987 #if defined(PETSC_USE_COMPLEX)
8988         PetscCall(PetscFree(realcoords));
8989 #endif
8990       }
8991     }
8992     PetscCall(VecRestoreArray(gv, &coords));
8993     PetscCall(VecDestroy(&gv));
8994   }
8995   PetscCall(ISDestroy(&corners));
8996 
8997   if (pcbddc->coarse_ksp) {
8998     Vec crhs, csol;
8999 
9000     PetscCall(KSPGetSolution(pcbddc->coarse_ksp, &csol));
9001     PetscCall(KSPGetRhs(pcbddc->coarse_ksp, &crhs));
9002     if (!csol) PetscCall(MatCreateVecs(coarse_mat, &pcbddc->coarse_ksp->vec_sol, NULL));
9003     if (!crhs) PetscCall(MatCreateVecs(coarse_mat, NULL, &pcbddc->coarse_ksp->vec_rhs));
9004   }
9005   PetscCall(MatDestroy(&coarsedivudotp));
9006 
9007   /* compute null space for coarse solver if the benign trick has been requested */
9008   if (pcbddc->benign_null) {
9009     PetscCall(VecSet(pcbddc->vec1_P, 0.));
9010     for (i = 0; i < pcbddc->benign_n; i++) PetscCall(VecSetValue(pcbddc->vec1_P, pcbddc->local_primal_size - pcbddc->benign_n + i, 1.0, INSERT_VALUES));
9011     PetscCall(VecAssemblyBegin(pcbddc->vec1_P));
9012     PetscCall(VecAssemblyEnd(pcbddc->vec1_P));
9013     PetscCall(VecScatterBegin(pcbddc->coarse_loc_to_glob, pcbddc->vec1_P, pcbddc->coarse_vec, INSERT_VALUES, SCATTER_FORWARD));
9014     PetscCall(VecScatterEnd(pcbddc->coarse_loc_to_glob, pcbddc->vec1_P, pcbddc->coarse_vec, INSERT_VALUES, SCATTER_FORWARD));
9015     if (coarse_mat) {
9016       Vec          nullv;
9017       PetscScalar *array, *array2;
9018       PetscInt     nl;
9019 
9020       PetscCall(MatCreateVecs(coarse_mat, &nullv, NULL));
9021       PetscCall(VecGetLocalSize(nullv, &nl));
9022       PetscCall(VecGetArrayRead(pcbddc->coarse_vec, (const PetscScalar **)&array));
9023       PetscCall(VecGetArray(nullv, &array2));
9024       PetscCall(PetscArraycpy(array2, array, nl));
9025       PetscCall(VecRestoreArray(nullv, &array2));
9026       PetscCall(VecRestoreArrayRead(pcbddc->coarse_vec, (const PetscScalar **)&array));
9027       PetscCall(VecNormalize(nullv, NULL));
9028       PetscCall(MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat), PETSC_FALSE, 1, &nullv, &CoarseNullSpace));
9029       PetscCall(VecDestroy(&nullv));
9030     }
9031   }
9032   PetscCall(PetscLogEventEnd(PC_BDDC_CoarseSetUp[pcbddc->current_level], pc, 0, 0, 0));
9033 
9034   PetscCall(PetscLogEventBegin(PC_BDDC_CoarseSolver[pcbddc->current_level], pc, 0, 0, 0));
9035   if (pcbddc->coarse_ksp) {
9036     PetscBool ispreonly;
9037 
9038     if (CoarseNullSpace) {
9039       PetscBool isnull;
9040 
9041       PetscCall(MatNullSpaceTest(CoarseNullSpace, coarse_mat, &isnull));
9042       if (isnull) PetscCall(MatSetNullSpace(coarse_mat, CoarseNullSpace));
9043       /* TODO: add local nullspaces (if any) */
9044     }
9045     /* setup coarse ksp */
9046     PetscCall(KSPSetUp(pcbddc->coarse_ksp));
9047     /* Check coarse problem if in debug mode or if solving with an iterative method */
9048     PetscCall(PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp, KSPPREONLY, &ispreonly));
9049     if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates)) {
9050       KSP         check_ksp;
9051       KSPType     check_ksp_type;
9052       PC          check_pc;
9053       Vec         check_vec, coarse_vec;
9054       PetscReal   abs_infty_error, infty_error, lambda_min = 1.0, lambda_max = 1.0;
9055       PetscInt    its;
9056       PetscBool   compute_eigs;
9057       PetscReal  *eigs_r, *eigs_c;
9058       PetscInt    neigs;
9059       const char *prefix;
9060 
9061       /* Create ksp object suitable for estimation of extreme eigenvalues */
9062       PetscCall(KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp), &check_ksp));
9063       PetscCall(KSPSetNestLevel(check_ksp, pc->kspnestlevel));
9064       PetscCall(PetscObjectIncrementTabLevel((PetscObject)check_ksp, (PetscObject)pcbddc->coarse_ksp, 0));
9065       PetscCall(KSPSetErrorIfNotConverged(pcbddc->coarse_ksp, PETSC_FALSE));
9066       PetscCall(KSPSetOperators(check_ksp, coarse_mat, coarse_mat));
9067       PetscCall(KSPSetTolerances(check_ksp, 1.e-12, 1.e-12, PETSC_CURRENT, pcbddc->coarse_size));
9068       /* prevent from setup unneeded object */
9069       PetscCall(KSPGetPC(check_ksp, &check_pc));
9070       PetscCall(PCSetType(check_pc, PCNONE));
9071       if (ispreonly) {
9072         check_ksp_type = KSPPREONLY;
9073         compute_eigs   = PETSC_FALSE;
9074       } else {
9075         check_ksp_type = KSPGMRES;
9076         compute_eigs   = PETSC_TRUE;
9077       }
9078       PetscCall(KSPSetType(check_ksp, check_ksp_type));
9079       PetscCall(KSPSetComputeSingularValues(check_ksp, compute_eigs));
9080       PetscCall(KSPSetComputeEigenvalues(check_ksp, compute_eigs));
9081       PetscCall(KSPGMRESSetRestart(check_ksp, pcbddc->coarse_size + 1));
9082       PetscCall(KSPGetOptionsPrefix(pcbddc->coarse_ksp, &prefix));
9083       PetscCall(KSPSetOptionsPrefix(check_ksp, prefix));
9084       PetscCall(KSPAppendOptionsPrefix(check_ksp, "check_"));
9085       PetscCall(KSPSetFromOptions(check_ksp));
9086       PetscCall(KSPSetUp(check_ksp));
9087       PetscCall(KSPGetPC(pcbddc->coarse_ksp, &check_pc));
9088       PetscCall(KSPSetPC(check_ksp, check_pc));
9089       /* create random vec */
9090       PetscCall(MatCreateVecs(coarse_mat, &coarse_vec, &check_vec));
9091       PetscCall(VecSetRandom(check_vec, NULL));
9092       PetscCall(MatMult(coarse_mat, check_vec, coarse_vec));
9093       /* solve coarse problem */
9094       PetscCall(KSPSolve(check_ksp, coarse_vec, coarse_vec));
9095       PetscCall(KSPCheckSolve(check_ksp, pc, coarse_vec));
9096       /* set eigenvalue estimation if preonly has not been requested */
9097       if (compute_eigs) {
9098         PetscCall(PetscMalloc1(pcbddc->coarse_size + 1, &eigs_r));
9099         PetscCall(PetscMalloc1(pcbddc->coarse_size + 1, &eigs_c));
9100         PetscCall(KSPComputeEigenvalues(check_ksp, pcbddc->coarse_size + 1, eigs_r, eigs_c, &neigs));
9101         if (neigs) {
9102           lambda_max = eigs_r[neigs - 1];
9103           lambda_min = eigs_r[0];
9104           if (pcbddc->use_coarse_estimates) {
9105             if (lambda_max >= lambda_min) { /* using PETSC_SMALL since lambda_max == lambda_min is not allowed by KSPChebyshevSetEigenvalues */
9106               PetscCall(KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp, lambda_max + PETSC_SMALL, lambda_min));
9107               PetscCall(KSPRichardsonSetScale(pcbddc->coarse_ksp, 2.0 / (lambda_max + lambda_min)));
9108             }
9109           }
9110         }
9111       }
9112 
9113       /* check coarse problem residual error */
9114       if (pcbddc->dbg_flag) {
9115         PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp));
9116         PetscCall(PetscViewerASCIIAddTab(dbg_viewer, 2 * (pcbddc->current_level + 1)));
9117         PetscCall(VecAXPY(check_vec, -1.0, coarse_vec));
9118         PetscCall(VecNorm(check_vec, NORM_INFINITY, &infty_error));
9119         PetscCall(MatMult(coarse_mat, check_vec, coarse_vec));
9120         PetscCall(VecNorm(coarse_vec, NORM_INFINITY, &abs_infty_error));
9121         PetscCall(PetscViewerASCIIPrintf(dbg_viewer, "Coarse problem details (use estimates %d)\n", pcbddc->use_coarse_estimates));
9122         PetscCall(PetscObjectPrintClassNamePrefixType((PetscObject)pcbddc->coarse_ksp, dbg_viewer));
9123         PetscCall(PetscObjectPrintClassNamePrefixType((PetscObject)check_pc, dbg_viewer));
9124         PetscCall(PetscViewerASCIIPrintf(dbg_viewer, "Coarse problem exact infty_error   : %1.6e\n", (double)infty_error));
9125         PetscCall(PetscViewerASCIIPrintf(dbg_viewer, "Coarse problem residual infty_error: %1.6e\n", (double)abs_infty_error));
9126         if (CoarseNullSpace) PetscCall(PetscViewerASCIIPrintf(dbg_viewer, "Coarse problem is singular\n"));
9127         if (compute_eigs) {
9128           PetscReal          lambda_max_s, lambda_min_s;
9129           KSPConvergedReason reason;
9130           PetscCall(KSPGetType(check_ksp, &check_ksp_type));
9131           PetscCall(KSPGetIterationNumber(check_ksp, &its));
9132           PetscCall(KSPGetConvergedReason(check_ksp, &reason));
9133           PetscCall(KSPComputeExtremeSingularValues(check_ksp, &lambda_max_s, &lambda_min_s));
9134           PetscCall(PetscViewerASCIIPrintf(dbg_viewer, "Coarse problem eigenvalues (estimated with %" PetscInt_FMT " iterations of %s, conv reason %d): %1.6e %1.6e (%1.6e %1.6e)\n", its, check_ksp_type, reason, (double)lambda_min, (double)lambda_max, (double)lambda_min_s, (double)lambda_max_s));
9135           for (i = 0; i < neigs; i++) PetscCall(PetscViewerASCIIPrintf(dbg_viewer, "%1.6e %1.6ei\n", (double)eigs_r[i], (double)eigs_c[i]));
9136         }
9137         PetscCall(PetscViewerFlush(dbg_viewer));
9138         PetscCall(PetscViewerASCIISubtractTab(dbg_viewer, 2 * (pcbddc->current_level + 1)));
9139       }
9140       PetscCall(VecDestroy(&check_vec));
9141       PetscCall(VecDestroy(&coarse_vec));
9142       PetscCall(KSPDestroy(&check_ksp));
9143       if (compute_eigs) {
9144         PetscCall(PetscFree(eigs_r));
9145         PetscCall(PetscFree(eigs_c));
9146       }
9147     }
9148   }
9149   PetscCall(MatNullSpaceDestroy(&CoarseNullSpace));
9150   /* print additional info */
9151   if (pcbddc->dbg_flag) {
9152     /* waits until all processes reaches this point */
9153     PetscCall(PetscBarrier((PetscObject)pc));
9154     PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Coarse solver setup completed at level %" PetscInt_FMT "\n", pcbddc->current_level));
9155     PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
9156   }
9157 
9158   /* free memory */
9159   PetscCall(MatDestroy(&coarse_mat));
9160   PetscCall(PetscLogEventEnd(PC_BDDC_CoarseSolver[pcbddc->current_level], pc, 0, 0, 0));
9161   PetscFunctionReturn(PETSC_SUCCESS);
9162 }
9163 
9164 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc, PetscInt *coarse_size_n, PetscInt **local_primal_indices_n)
9165 {
9166   PC_BDDC        *pcbddc = (PC_BDDC *)pc->data;
9167   PC_IS          *pcis   = (PC_IS *)pc->data;
9168   IS              subset, subset_mult, subset_n;
9169   PetscInt        local_size, coarse_size = 0;
9170   PetscInt       *local_primal_indices = NULL;
9171   const PetscInt *t_local_primal_indices;
9172 
9173   PetscFunctionBegin;
9174   /* Compute global number of coarse dofs */
9175   PetscCheck(!pcbddc->local_primal_size || pcbddc->local_primal_ref_node, PETSC_COMM_SELF, PETSC_ERR_PLIB, "BDDC ConstraintsSetUp should be called first");
9176   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc->pmat), pcbddc->local_primal_size_cc, pcbddc->local_primal_ref_node, PETSC_COPY_VALUES, &subset_n));
9177   PetscCall(ISLocalToGlobalMappingApplyIS(pcis->mapping, subset_n, &subset));
9178   PetscCall(ISDestroy(&subset_n));
9179   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc->pmat), pcbddc->local_primal_size_cc, pcbddc->local_primal_ref_mult, PETSC_COPY_VALUES, &subset_mult));
9180   PetscCall(ISRenumber(subset, subset_mult, &coarse_size, &subset_n));
9181   PetscCall(ISDestroy(&subset));
9182   PetscCall(ISDestroy(&subset_mult));
9183   PetscCall(ISGetLocalSize(subset_n, &local_size));
9184   PetscCheck(local_size == pcbddc->local_primal_size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Invalid number of local primal indices computed %" PetscInt_FMT " != %" PetscInt_FMT, local_size, pcbddc->local_primal_size);
9185   PetscCall(PetscMalloc1(local_size, &local_primal_indices));
9186   PetscCall(ISGetIndices(subset_n, &t_local_primal_indices));
9187   PetscCall(PetscArraycpy(local_primal_indices, t_local_primal_indices, local_size));
9188   PetscCall(ISRestoreIndices(subset_n, &t_local_primal_indices));
9189   PetscCall(ISDestroy(&subset_n));
9190 
9191   if (pcbddc->dbg_flag) {
9192     PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
9193     PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n"));
9194     PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Size of coarse problem is %" PetscInt_FMT "\n", coarse_size));
9195     PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
9196   }
9197 
9198   /* get back data */
9199   *coarse_size_n          = coarse_size;
9200   *local_primal_indices_n = local_primal_indices;
9201   PetscFunctionReturn(PETSC_SUCCESS);
9202 }
9203 
9204 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx, Vec gwork, Vec lwork, IS globalis, IS *localis)
9205 {
9206   IS           localis_t;
9207   PetscInt     i, lsize, *idxs, n;
9208   PetscScalar *vals;
9209 
9210   PetscFunctionBegin;
9211   /* get indices in local ordering exploiting local to global map */
9212   PetscCall(ISGetLocalSize(globalis, &lsize));
9213   PetscCall(PetscMalloc1(lsize, &vals));
9214   for (i = 0; i < lsize; i++) vals[i] = 1.0;
9215   PetscCall(ISGetIndices(globalis, (const PetscInt **)&idxs));
9216   PetscCall(VecSet(gwork, 0.0));
9217   PetscCall(VecSet(lwork, 0.0));
9218   if (idxs) { /* multilevel guard */
9219     PetscCall(VecSetOption(gwork, VEC_IGNORE_NEGATIVE_INDICES, PETSC_TRUE));
9220     PetscCall(VecSetValues(gwork, lsize, idxs, vals, INSERT_VALUES));
9221   }
9222   PetscCall(VecAssemblyBegin(gwork));
9223   PetscCall(ISRestoreIndices(globalis, (const PetscInt **)&idxs));
9224   PetscCall(PetscFree(vals));
9225   PetscCall(VecAssemblyEnd(gwork));
9226   /* now compute set in local ordering */
9227   PetscCall(VecScatterBegin(g2l_ctx, gwork, lwork, INSERT_VALUES, SCATTER_FORWARD));
9228   PetscCall(VecScatterEnd(g2l_ctx, gwork, lwork, INSERT_VALUES, SCATTER_FORWARD));
9229   PetscCall(VecGetArrayRead(lwork, (const PetscScalar **)&vals));
9230   PetscCall(VecGetSize(lwork, &n));
9231   for (i = 0, lsize = 0; i < n; i++) {
9232     if (PetscRealPart(vals[i]) > 0.5) lsize++;
9233   }
9234   PetscCall(PetscMalloc1(lsize, &idxs));
9235   for (i = 0, lsize = 0; i < n; i++) {
9236     if (PetscRealPart(vals[i]) > 0.5) idxs[lsize++] = i;
9237   }
9238   PetscCall(VecRestoreArrayRead(lwork, (const PetscScalar **)&vals));
9239   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)gwork), lsize, idxs, PETSC_OWN_POINTER, &localis_t));
9240   *localis = localis_t;
9241   PetscFunctionReturn(PETSC_SUCCESS);
9242 }
9243 
9244 PetscErrorCode PCBDDCComputeFakeChange(PC pc, PetscBool constraints, PCBDDCGraph graph, PCBDDCSubSchurs schurs, Mat *change, IS *change_primal, IS *change_primal_mult, PetscBool *change_with_qr)
9245 {
9246   PC_IS   *pcis   = (PC_IS *)pc->data;
9247   PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
9248   PC_IS   *pcisf;
9249   PC_BDDC *pcbddcf;
9250   PC       pcf;
9251 
9252   PetscFunctionBegin;
9253   PetscCall(PCCreate(PetscObjectComm((PetscObject)pc), &pcf));
9254   PetscCall(PCSetOperators(pcf, pc->mat, pc->pmat));
9255   PetscCall(PCSetType(pcf, PCBDDC));
9256 
9257   pcisf   = (PC_IS *)pcf->data;
9258   pcbddcf = (PC_BDDC *)pcf->data;
9259 
9260   pcisf->is_B_local = pcis->is_B_local;
9261   pcisf->vec1_N     = pcis->vec1_N;
9262   pcisf->BtoNmap    = pcis->BtoNmap;
9263   pcisf->n          = pcis->n;
9264   pcisf->n_B        = pcis->n_B;
9265 
9266   PetscCall(PetscFree(pcbddcf->mat_graph));
9267   PetscCall(PetscFree(pcbddcf->sub_schurs));
9268   pcbddcf->mat_graph             = graph ? graph : pcbddc->mat_graph;
9269   pcbddcf->sub_schurs            = schurs;
9270   pcbddcf->adaptive_selection    = schurs ? PETSC_TRUE : PETSC_FALSE;
9271   pcbddcf->adaptive_threshold[0] = pcbddc->adaptive_threshold[0];
9272   pcbddcf->adaptive_threshold[1] = pcbddc->adaptive_threshold[1];
9273   pcbddcf->adaptive_nmin         = pcbddc->adaptive_nmin;
9274   pcbddcf->adaptive_nmax         = pcbddc->adaptive_nmax;
9275   pcbddcf->use_faces             = PETSC_TRUE;
9276   pcbddcf->use_change_of_basis   = (PetscBool)!constraints;
9277   pcbddcf->use_change_on_faces   = (PetscBool)!constraints;
9278   pcbddcf->use_qr_single         = (PetscBool)!constraints;
9279   pcbddcf->fake_change           = PETSC_TRUE;
9280   pcbddcf->dbg_flag              = pcbddc->dbg_flag;
9281 
9282   PetscCall(PCBDDCAdaptiveSelection(pcf));
9283   PetscCall(PCBDDCConstraintsSetUp(pcf));
9284 
9285   *change = pcbddcf->ConstraintMatrix;
9286   if (change_primal) PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc->pmat), pcbddcf->local_primal_size_cc, pcbddcf->local_primal_ref_node, PETSC_COPY_VALUES, change_primal));
9287   if (change_primal_mult) PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc->pmat), pcbddcf->local_primal_size_cc, pcbddcf->local_primal_ref_mult, PETSC_COPY_VALUES, change_primal_mult));
9288   if (change_with_qr) *change_with_qr = pcbddcf->use_qr_single;
9289 
9290   if (schurs) pcbddcf->sub_schurs = NULL;
9291   pcbddcf->ConstraintMatrix = NULL;
9292   pcbddcf->mat_graph        = NULL;
9293   pcisf->is_B_local         = NULL;
9294   pcisf->vec1_N             = NULL;
9295   pcisf->BtoNmap            = NULL;
9296   PetscCall(PCDestroy(&pcf));
9297   PetscFunctionReturn(PETSC_SUCCESS);
9298 }
9299 
9300 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc)
9301 {
9302   PC_IS          *pcis       = (PC_IS *)pc->data;
9303   PC_BDDC        *pcbddc     = (PC_BDDC *)pc->data;
9304   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
9305   Mat             S_j;
9306   PetscInt       *used_xadj, *used_adjncy;
9307   PetscBool       free_used_adj;
9308 
9309   PetscFunctionBegin;
9310   PetscCall(PetscLogEventBegin(PC_BDDC_Schurs[pcbddc->current_level], pc, 0, 0, 0));
9311   /* decide the adjacency to be used for determining internal problems for local schur on subsets */
9312   free_used_adj = PETSC_FALSE;
9313   if (pcbddc->sub_schurs_layers == -1) {
9314     used_xadj   = NULL;
9315     used_adjncy = NULL;
9316   } else {
9317     if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) {
9318       used_xadj   = pcbddc->mat_graph->xadj;
9319       used_adjncy = pcbddc->mat_graph->adjncy;
9320     } else if (pcbddc->computed_rowadj) {
9321       used_xadj   = pcbddc->mat_graph->xadj;
9322       used_adjncy = pcbddc->mat_graph->adjncy;
9323     } else {
9324       PetscBool       flg_row = PETSC_FALSE;
9325       const PetscInt *xadj, *adjncy;
9326       PetscInt        nvtxs;
9327 
9328       PetscCall(MatGetRowIJ(pcbddc->local_mat, 0, PETSC_TRUE, PETSC_FALSE, &nvtxs, &xadj, &adjncy, &flg_row));
9329       if (flg_row) {
9330         PetscCall(PetscMalloc2(nvtxs + 1, &used_xadj, xadj[nvtxs], &used_adjncy));
9331         PetscCall(PetscArraycpy(used_xadj, xadj, nvtxs + 1));
9332         PetscCall(PetscArraycpy(used_adjncy, adjncy, xadj[nvtxs]));
9333         free_used_adj = PETSC_TRUE;
9334       } else {
9335         pcbddc->sub_schurs_layers = -1;
9336         used_xadj                 = NULL;
9337         used_adjncy               = NULL;
9338       }
9339       PetscCall(MatRestoreRowIJ(pcbddc->local_mat, 0, PETSC_TRUE, PETSC_FALSE, &nvtxs, &xadj, &adjncy, &flg_row));
9340     }
9341   }
9342 
9343   /* setup sub_schurs data */
9344   PetscCall(MatCreateSchurComplement(pcis->A_II, pcis->pA_II, pcis->A_IB, pcis->A_BI, pcis->A_BB, &S_j));
9345   if (!sub_schurs->schur_explicit) {
9346     /* pcbddc->ksp_D up to date only if not using MatFactor with Schur complement support */
9347     PetscCall(MatSchurComplementSetKSP(S_j, pcbddc->ksp_D));
9348     PetscCall(PCBDDCSubSchursSetUp(sub_schurs, NULL, S_j, PETSC_FALSE, used_xadj, used_adjncy, pcbddc->sub_schurs_layers, NULL, pcbddc->adaptive_selection, PETSC_FALSE, PETSC_FALSE, 0, NULL, NULL, NULL, NULL));
9349   } else {
9350     Mat       change        = NULL;
9351     Vec       scaling       = NULL;
9352     IS        change_primal = NULL, iP;
9353     PetscInt  benign_n;
9354     PetscBool reuse_solvers     = (PetscBool)!pcbddc->use_change_of_basis;
9355     PetscBool need_change       = PETSC_FALSE;
9356     PetscBool discrete_harmonic = PETSC_FALSE;
9357 
9358     if (!pcbddc->use_vertices && reuse_solvers) {
9359       PetscInt n_vertices;
9360 
9361       PetscCall(ISGetLocalSize(sub_schurs->is_vertices, &n_vertices));
9362       reuse_solvers = (PetscBool)!n_vertices;
9363     }
9364     if (!pcbddc->benign_change_explicit) {
9365       benign_n = pcbddc->benign_n;
9366     } else {
9367       benign_n = 0;
9368     }
9369     /* sub_schurs->change is a local object; instead, PCBDDCConstraintsSetUp and the quantities used in the test below are logically collective on pc.
9370        We need a global reduction to avoid possible deadlocks.
9371        We assume that sub_schurs->change is created once, and then reused for different solves, unless the topography has been recomputed */
9372     if (pcbddc->adaptive_userdefined || (pcbddc->deluxe_zerorows && !pcbddc->use_change_of_basis)) {
9373       PetscBool have_loc_change = (PetscBool)(!!sub_schurs->change);
9374       PetscCallMPI(MPIU_Allreduce(&have_loc_change, &need_change, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)pc)));
9375       need_change = (PetscBool)(!need_change);
9376     }
9377     /* If the user defines additional constraints, we import them here */
9378     if (need_change) {
9379       PetscCheck(!pcbddc->sub_schurs_rebuild, PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot compute change of basis with a different graph");
9380       PetscCall(PCBDDCComputeFakeChange(pc, PETSC_FALSE, NULL, NULL, &change, &change_primal, NULL, &sub_schurs->change_with_qr));
9381     }
9382     if (!pcbddc->use_deluxe_scaling) scaling = pcis->D;
9383 
9384     PetscCall(PetscObjectQuery((PetscObject)pc, "__KSPFETIDP_iP", (PetscObject *)&iP));
9385     if (iP) {
9386       PetscOptionsBegin(PetscObjectComm((PetscObject)iP), sub_schurs->prefix, "BDDC sub_schurs options", "PC");
9387       PetscCall(PetscOptionsBool("-sub_schurs_discrete_harmonic", NULL, NULL, discrete_harmonic, &discrete_harmonic, NULL));
9388       PetscOptionsEnd();
9389     }
9390     if (discrete_harmonic) {
9391       Mat A;
9392       PetscCall(MatDuplicate(pcbddc->local_mat, MAT_COPY_VALUES, &A));
9393       PetscCall(MatZeroRowsColumnsIS(A, iP, 1.0, NULL, NULL));
9394       PetscCall(PetscObjectCompose((PetscObject)A, "__KSPFETIDP_iP", (PetscObject)iP));
9395       PetscCall(PCBDDCSubSchursSetUp(sub_schurs, A, S_j, pcbddc->sub_schurs_exact_schur, used_xadj, used_adjncy, pcbddc->sub_schurs_layers, scaling, pcbddc->adaptive_selection, reuse_solvers, pcbddc->benign_saddle_point, benign_n, pcbddc->benign_p0_lidx,
9396                                      pcbddc->benign_zerodiag_subs, change, change_primal));
9397       PetscCall(MatDestroy(&A));
9398     } else {
9399       PetscCall(PCBDDCSubSchursSetUp(sub_schurs, pcbddc->local_mat, S_j, pcbddc->sub_schurs_exact_schur, used_xadj, used_adjncy, pcbddc->sub_schurs_layers, scaling, pcbddc->adaptive_selection, reuse_solvers, pcbddc->benign_saddle_point, benign_n,
9400                                      pcbddc->benign_p0_lidx, pcbddc->benign_zerodiag_subs, change, change_primal));
9401     }
9402     PetscCall(MatDestroy(&change));
9403     PetscCall(ISDestroy(&change_primal));
9404   }
9405   PetscCall(MatDestroy(&S_j));
9406 
9407   /* free adjacency */
9408   if (free_used_adj) PetscCall(PetscFree2(used_xadj, used_adjncy));
9409   PetscCall(PetscLogEventEnd(PC_BDDC_Schurs[pcbddc->current_level], pc, 0, 0, 0));
9410   PetscFunctionReturn(PETSC_SUCCESS);
9411 }
9412 
9413 PetscErrorCode PCBDDCInitSubSchurs(PC pc)
9414 {
9415   PC_IS      *pcis   = (PC_IS *)pc->data;
9416   PC_BDDC    *pcbddc = (PC_BDDC *)pc->data;
9417   PCBDDCGraph graph;
9418 
9419   PetscFunctionBegin;
9420   /* attach interface graph for determining subsets */
9421   if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */
9422     IS       verticesIS, verticescomm;
9423     PetscInt vsize, *idxs;
9424 
9425     PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &verticesIS));
9426     PetscCall(ISGetSize(verticesIS, &vsize));
9427     PetscCall(ISGetIndices(verticesIS, (const PetscInt **)&idxs));
9428     PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), vsize, idxs, PETSC_COPY_VALUES, &verticescomm));
9429     PetscCall(ISRestoreIndices(verticesIS, (const PetscInt **)&idxs));
9430     PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &verticesIS));
9431     PetscCall(PCBDDCGraphCreate(&graph));
9432     PetscCall(PCBDDCGraphInit(graph, pcbddc->mat_graph->l2gmap, pcbddc->mat_graph->nvtxs_global, pcbddc->graphmaxcount));
9433     PetscCall(PCBDDCGraphSetUp(graph, pcbddc->mat_graph->custom_minimal_size, NULL, pcbddc->DirichletBoundariesLocal, 0, NULL, verticescomm));
9434     PetscCall(ISDestroy(&verticescomm));
9435     PetscCall(PCBDDCGraphComputeConnectedComponents(graph));
9436   } else {
9437     graph = pcbddc->mat_graph;
9438   }
9439   /* print some info */
9440   if (pcbddc->dbg_flag && !pcbddc->sub_schurs_rebuild) {
9441     IS       vertices;
9442     PetscInt nv, nedges, nfaces;
9443     PetscCall(PCBDDCGraphASCIIView(graph, pcbddc->dbg_flag, pcbddc->dbg_viewer));
9444     PetscCall(PCBDDCGraphGetCandidatesIS(graph, &nfaces, NULL, &nedges, NULL, &vertices));
9445     PetscCall(ISGetSize(vertices, &nv));
9446     PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer));
9447     PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "--------------------------------------------------------------\n"));
9448     PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate vertices (%d)\n", PetscGlobalRank, nv, pcbddc->use_vertices));
9449     PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate edges    (%d)\n", PetscGlobalRank, nedges, pcbddc->use_edges));
9450     PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate faces    (%d)\n", PetscGlobalRank, nfaces, pcbddc->use_faces));
9451     PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
9452     PetscCall(PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer));
9453     PetscCall(PCBDDCGraphRestoreCandidatesIS(graph, &nfaces, NULL, &nedges, NULL, &vertices));
9454   }
9455 
9456   /* sub_schurs init */
9457   if (!pcbddc->sub_schurs) PetscCall(PCBDDCSubSchursCreate(&pcbddc->sub_schurs));
9458   PetscCall(PCBDDCSubSchursInit(pcbddc->sub_schurs, ((PetscObject)pc)->prefix, pcis->is_I_local, pcis->is_B_local, graph, pcis->BtoNmap, pcbddc->sub_schurs_rebuild, PETSC_FALSE));
9459 
9460   /* free graph struct */
9461   if (pcbddc->sub_schurs_rebuild) PetscCall(PCBDDCGraphDestroy(&graph));
9462   PetscFunctionReturn(PETSC_SUCCESS);
9463 }
9464 
9465 static PetscErrorCode PCBDDCViewGlobalIS(PC pc, IS is, PetscViewer viewer)
9466 {
9467   Mat_IS         *matis = (Mat_IS *)pc->pmat->data;
9468   PetscInt        n     = pc->pmat->rmap->n, ln, ni, st;
9469   const PetscInt *idxs;
9470   IS              gis;
9471 
9472   PetscFunctionBegin;
9473   if (!is) PetscFunctionReturn(PETSC_SUCCESS);
9474   PetscCall(MatGetOwnershipRange(pc->pmat, &st, NULL));
9475   PetscCall(MatGetLocalSize(matis->A, NULL, &ln));
9476   PetscCall(PetscArrayzero(matis->sf_leafdata, ln));
9477   PetscCall(PetscArrayzero(matis->sf_rootdata, n));
9478   PetscCall(ISGetLocalSize(is, &ni));
9479   PetscCall(ISGetIndices(is, &idxs));
9480   for (PetscInt i = 0; i < ni; i++) {
9481     if (idxs[i] < 0 || idxs[i] >= ln) continue;
9482     matis->sf_leafdata[idxs[i]] = 1;
9483   }
9484   PetscCall(ISRestoreIndices(is, &idxs));
9485   PetscCall(PetscSFReduceBegin(matis->sf, MPIU_INT, matis->sf_leafdata, matis->sf_rootdata, MPI_SUM));
9486   PetscCall(PetscSFReduceEnd(matis->sf, MPIU_INT, matis->sf_leafdata, matis->sf_rootdata, MPI_SUM));
9487   ln = 0;
9488   for (PetscInt i = 0; i < n; i++) {
9489     if (matis->sf_rootdata[i]) matis->sf_rootdata[ln++] = i + st;
9490   }
9491   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), ln, matis->sf_rootdata, PETSC_USE_POINTER, &gis));
9492   PetscCall(ISView(gis, viewer));
9493   PetscCall(ISDestroy(&gis));
9494   PetscFunctionReturn(PETSC_SUCCESS);
9495 }
9496 
9497 PetscErrorCode PCBDDCLoadOrViewCustomization(PC pc, PetscBool load, const char *outfile)
9498 {
9499   PetscInt    header[11];
9500   PC_BDDC    *pcbddc = (PC_BDDC *)pc->data;
9501   PetscViewer viewer;
9502   MPI_Comm    comm = PetscObjectComm((PetscObject)pc);
9503 
9504   PetscFunctionBegin;
9505   PetscCall(PetscViewerBinaryOpen(comm, outfile ? outfile : "bddc_dump.dat", load ? FILE_MODE_READ : FILE_MODE_WRITE, &viewer));
9506   if (load) {
9507     IS  is;
9508     Mat A;
9509 
9510     PetscCall(PetscViewerBinaryRead(viewer, header, PETSC_STATIC_ARRAY_LENGTH(header), NULL, PETSC_INT));
9511     PetscCheck(header[0] == 0 || header[0] == 1, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file");
9512     PetscCheck(header[1] == 0 || header[1] == 1, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file");
9513     PetscCheck(header[2] >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file");
9514     PetscCheck(header[3] == 0 || header[3] == 1, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file");
9515     PetscCheck(header[4] == 0 || header[4] == 1, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file");
9516     PetscCheck(header[5] >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file");
9517     PetscCheck(header[7] == 0 || header[7] == 1, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file");
9518     PetscCheck(header[8] == 0 || header[8] == 1, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file");
9519     PetscCheck(header[9] == 0 || header[9] == 1, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file");
9520     PetscCheck(header[10] == 0 || header[10] == 1, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file");
9521     if (header[0]) {
9522       PetscCall(ISCreate(comm, &is));
9523       PetscCall(ISLoad(is, viewer));
9524       PetscCall(PCBDDCSetDirichletBoundaries(pc, is));
9525       PetscCall(ISDestroy(&is));
9526     }
9527     if (header[1]) {
9528       PetscCall(ISCreate(comm, &is));
9529       PetscCall(ISLoad(is, viewer));
9530       PetscCall(PCBDDCSetNeumannBoundaries(pc, is));
9531       PetscCall(ISDestroy(&is));
9532     }
9533     if (header[2]) {
9534       IS *isarray;
9535 
9536       PetscCall(PetscMalloc1(header[2], &isarray));
9537       for (PetscInt i = 0; i < header[2]; i++) {
9538         PetscCall(ISCreate(comm, &isarray[i]));
9539         PetscCall(ISLoad(isarray[i], viewer));
9540       }
9541       PetscCall(PCBDDCSetDofsSplitting(pc, header[2], isarray));
9542       for (PetscInt i = 0; i < header[2]; i++) PetscCall(ISDestroy(&isarray[i]));
9543       PetscCall(PetscFree(isarray));
9544     }
9545     if (header[3]) {
9546       PetscCall(ISCreate(comm, &is));
9547       PetscCall(ISLoad(is, viewer));
9548       PetscCall(PCBDDCSetPrimalVerticesIS(pc, is));
9549       PetscCall(ISDestroy(&is));
9550     }
9551     if (header[4]) {
9552       PetscCall(MatCreate(comm, &A));
9553       PetscCall(MatSetType(A, MATAIJ));
9554       PetscCall(MatLoad(A, viewer));
9555       PetscCall(PCBDDCSetDiscreteGradient(pc, A, header[5], header[6], (PetscBool)header[7], (PetscBool)header[8]));
9556       PetscCall(MatDestroy(&A));
9557     }
9558     if (header[9]) {
9559       PetscCall(MatCreate(comm, &A));
9560       PetscCall(MatSetType(A, MATIS));
9561       PetscCall(MatLoad(A, viewer));
9562       PetscCall(PCBDDCSetDivergenceMat(pc, A, (PetscBool)header[10], NULL));
9563       PetscCall(MatDestroy(&A));
9564     }
9565   } else {
9566     header[0]  = (PetscInt)!!pcbddc->DirichletBoundariesLocal;
9567     header[1]  = (PetscInt)!!pcbddc->NeumannBoundariesLocal;
9568     header[2]  = pcbddc->n_ISForDofsLocal;
9569     header[3]  = (PetscInt)!!pcbddc->user_primal_vertices_local;
9570     header[4]  = (PetscInt)!!pcbddc->discretegradient;
9571     header[5]  = pcbddc->nedorder;
9572     header[6]  = pcbddc->nedfield;
9573     header[7]  = (PetscInt)pcbddc->nedglobal;
9574     header[8]  = (PetscInt)pcbddc->conforming;
9575     header[9]  = (PetscInt)!!pcbddc->divudotp;
9576     header[10] = (PetscInt)pcbddc->divudotp_trans;
9577     if (header[4]) header[3] = 0;
9578 
9579     PetscCall(PetscViewerBinaryWrite(viewer, header, PETSC_STATIC_ARRAY_LENGTH(header), PETSC_INT));
9580     PetscCall(PCBDDCViewGlobalIS(pc, pcbddc->DirichletBoundariesLocal, viewer));
9581     PetscCall(PCBDDCViewGlobalIS(pc, pcbddc->NeumannBoundariesLocal, viewer));
9582     for (PetscInt i = 0; i < header[2]; i++) PetscCall(PCBDDCViewGlobalIS(pc, pcbddc->ISForDofsLocal[i], viewer));
9583     if (header[3]) PetscCall(PCBDDCViewGlobalIS(pc, pcbddc->user_primal_vertices_local, viewer));
9584     if (header[4]) PetscCall(MatView(pcbddc->discretegradient, viewer));
9585     if (header[9]) PetscCall(MatView(pcbddc->divudotp, viewer));
9586   }
9587   PetscCall(PetscViewerDestroy(&viewer));
9588   PetscFunctionReturn(PETSC_SUCCESS);
9589 }
9590 
9591 #include <../src/mat/impls/aij/mpi/mpiaij.h>
9592 static PetscErrorCode MatMPIAIJRestrict(Mat A, MPI_Comm ccomm, Mat *B)
9593 {
9594   Mat         At;
9595   IS          rows;
9596   PetscInt    rst, ren;
9597   PetscLayout rmap;
9598 
9599   PetscFunctionBegin;
9600   rst = ren = 0;
9601   if (ccomm != MPI_COMM_NULL) {
9602     PetscCall(PetscLayoutCreate(ccomm, &rmap));
9603     PetscCall(PetscLayoutSetSize(rmap, A->rmap->N));
9604     PetscCall(PetscLayoutSetBlockSize(rmap, 1));
9605     PetscCall(PetscLayoutSetUp(rmap));
9606     PetscCall(PetscLayoutGetRange(rmap, &rst, &ren));
9607   }
9608   PetscCall(ISCreateStride(PetscObjectComm((PetscObject)A), ren - rst, rst, 1, &rows));
9609   PetscCall(MatCreateSubMatrix(A, rows, NULL, MAT_INITIAL_MATRIX, &At));
9610   PetscCall(ISDestroy(&rows));
9611 
9612   if (ccomm != MPI_COMM_NULL) {
9613     Mat_MPIAIJ *a, *b;
9614     IS          from, to;
9615     Vec         gvec;
9616     PetscInt    lsize;
9617 
9618     PetscCall(MatCreate(ccomm, B));
9619     PetscCall(MatSetSizes(*B, ren - rst, PETSC_DECIDE, PETSC_DECIDE, At->cmap->N));
9620     PetscCall(MatSetType(*B, MATAIJ));
9621     PetscCall(PetscLayoutDestroy(&(*B)->rmap));
9622     PetscCall(PetscLayoutSetUp((*B)->cmap));
9623     a = (Mat_MPIAIJ *)At->data;
9624     b = (Mat_MPIAIJ *)(*B)->data;
9625     PetscCallMPI(MPI_Comm_size(ccomm, &b->size));
9626     PetscCallMPI(MPI_Comm_rank(ccomm, &b->rank));
9627     PetscCall(PetscObjectReference((PetscObject)a->A));
9628     PetscCall(PetscObjectReference((PetscObject)a->B));
9629     b->A = a->A;
9630     b->B = a->B;
9631 
9632     b->donotstash   = a->donotstash;
9633     b->roworiented  = a->roworiented;
9634     b->rowindices   = NULL;
9635     b->rowvalues    = NULL;
9636     b->getrowactive = PETSC_FALSE;
9637 
9638     (*B)->rmap         = rmap;
9639     (*B)->factortype   = A->factortype;
9640     (*B)->assembled    = PETSC_TRUE;
9641     (*B)->insertmode   = NOT_SET_VALUES;
9642     (*B)->preallocated = PETSC_TRUE;
9643 
9644     if (a->colmap) {
9645 #if defined(PETSC_USE_CTABLE)
9646       PetscCall(PetscHMapIDuplicate(a->colmap, &b->colmap));
9647 #else
9648       PetscCall(PetscMalloc1(At->cmap->N, &b->colmap));
9649       PetscCall(PetscArraycpy(b->colmap, a->colmap, At->cmap->N));
9650 #endif
9651     } else b->colmap = NULL;
9652     if (a->garray) {
9653       PetscInt len;
9654       len = a->B->cmap->n;
9655       PetscCall(PetscMalloc1(len + 1, &b->garray));
9656       if (len) PetscCall(PetscArraycpy(b->garray, a->garray, len));
9657     } else b->garray = NULL;
9658 
9659     PetscCall(PetscObjectReference((PetscObject)a->lvec));
9660     b->lvec = a->lvec;
9661 
9662     /* cannot use VecScatterCopy */
9663     PetscCall(VecGetLocalSize(b->lvec, &lsize));
9664     PetscCall(ISCreateGeneral(ccomm, lsize, b->garray, PETSC_USE_POINTER, &from));
9665     PetscCall(ISCreateStride(PETSC_COMM_SELF, lsize, 0, 1, &to));
9666     PetscCall(MatCreateVecs(*B, &gvec, NULL));
9667     PetscCall(VecScatterCreate(gvec, from, b->lvec, to, &b->Mvctx));
9668     PetscCall(ISDestroy(&from));
9669     PetscCall(ISDestroy(&to));
9670     PetscCall(VecDestroy(&gvec));
9671   }
9672   PetscCall(MatDestroy(&At));
9673   PetscFunctionReturn(PETSC_SUCCESS);
9674 }
9675 
9676 /* same as MatCreateSubMatrix(A, rows, NULL,...) but allows repeated rows */
9677 static PetscErrorCode MatAIJExtractRows(Mat A, IS rows, Mat *sA)
9678 {
9679   PetscBool isaij;
9680   MPI_Comm  comm;
9681 
9682   PetscFunctionBegin;
9683   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
9684   PetscCall(PetscObjectBaseTypeCompareAny((PetscObject)A, &isaij, MATSEQAIJ, MATMPIAIJ, ""));
9685   PetscCheck(isaij, comm, PETSC_ERR_SUP, "Not implemented");
9686   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &isaij));
9687   if (isaij) { /* SeqAIJ supports repeated rows */
9688     PetscCall(MatCreateSubMatrix(A, rows, NULL, MAT_INITIAL_MATRIX, sA));
9689   } else {
9690     Mat                A_loc;
9691     Mat_SeqAIJ        *da;
9692     PetscSF            sf;
9693     PetscInt           ni, *di, *dj, m = A->rmap->n, c, *ldata, *rdata;
9694     PetscScalar       *daa;
9695     const PetscInt    *idxs;
9696     const PetscSFNode *iremotes;
9697     PetscSFNode       *remotes;
9698 
9699     /* SF for incoming rows */
9700     PetscCall(PetscSFCreate(comm, &sf));
9701     PetscCall(ISGetLocalSize(rows, &ni));
9702     PetscCall(ISGetIndices(rows, &idxs));
9703     PetscCall(PetscSFSetGraphLayout(sf, A->rmap, ni, NULL, PETSC_USE_POINTER, idxs));
9704     PetscCall(ISRestoreIndices(rows, &idxs));
9705 
9706     PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &A_loc));
9707     da = (Mat_SeqAIJ *)A_loc->data;
9708     PetscCall(PetscMalloc2(2 * ni, &ldata, 2 * m, &rdata));
9709     for (PetscInt i = 0; i < m; i++) {
9710       rdata[2 * i + 0] = da->i[i + 1] - da->i[i];
9711       rdata[2 * i + 1] = da->i[i];
9712     }
9713     PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, rdata, ldata, MPI_REPLACE));
9714     PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, rdata, ldata, MPI_REPLACE));
9715     PetscCall(PetscMalloc1(ni + 1, &di));
9716     di[0] = 0;
9717     for (PetscInt i = 0; i < ni; i++) di[i + 1] = di[i] + ldata[2 * i + 0];
9718     PetscCall(PetscMalloc1(di[ni], &dj));
9719     PetscCall(PetscMalloc1(di[ni], &daa));
9720     PetscCall(PetscMalloc1(di[ni], &remotes));
9721 
9722     PetscCall(PetscSFGetGraph(sf, NULL, NULL, NULL, &iremotes));
9723 
9724     /* SF graph for nonzeros */
9725     c = 0;
9726     for (PetscInt i = 0; i < ni; i++) {
9727       const PetscInt rank  = iremotes[i].rank;
9728       const PetscInt rsize = ldata[2 * i];
9729       for (PetscInt j = 0; j < rsize; j++) {
9730         remotes[c].rank  = rank;
9731         remotes[c].index = ldata[2 * i + 1] + j;
9732         c++;
9733       }
9734     }
9735     PetscCheck(c == di[ni], PETSC_COMM_SELF, PETSC_ERR_PLIB, "Invalid number of local nonzeros %" PetscInt_FMT " != %" PetscInt_FMT, c, di[ni]);
9736     PetscCall(PetscSFSetGraph(sf, da->i[m], di[ni], NULL, PETSC_USE_POINTER, remotes, PETSC_USE_POINTER));
9737     PetscCall(PetscSFBcastBegin(sf, MPIU_INT, da->j, dj, MPI_REPLACE));
9738     PetscCall(PetscSFBcastEnd(sf, MPIU_INT, da->j, dj, MPI_REPLACE));
9739     PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, da->a, daa, MPI_REPLACE));
9740     PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, da->a, daa, MPI_REPLACE));
9741 
9742     PetscCall(MatCreateMPIAIJWithArrays(comm, ni, A->cmap->n, PETSC_DECIDE, A->cmap->N, di, dj, daa, sA));
9743     PetscCall(MatDestroy(&A_loc));
9744     PetscCall(PetscSFDestroy(&sf));
9745     PetscCall(PetscFree(di));
9746     PetscCall(PetscFree(dj));
9747     PetscCall(PetscFree(daa));
9748     PetscCall(PetscFree(remotes));
9749     PetscCall(PetscFree2(ldata, rdata));
9750   }
9751   PetscFunctionReturn(PETSC_SUCCESS);
9752 }
9753