xref: /petsc/src/ksp/pc/impls/bddc/bddcprivate.c (revision 7b103a856c2b08bbed78a8bb48cd10d38d731eac)
1 #include <../src/mat/impls/aij/seq/aij.h>
2 #include <petsc/private/pcbddcimpl.h>
3 #include <petsc/private/pcbddcprivateimpl.h>
4 #include <petsc/private/kernels/blockinvert.h>
5 #include <../src/mat/impls/dense/seq/dense.h>
6 #include <petscdmplex.h>
7 #include <petscblaslapack.h>
8 #include <petsc/private/sfimpl.h>
9 #include <petsc/private/dmpleximpl.h>
10 #include <petscdmda.h>
11 
12 static PetscErrorCode MatMPIAIJRestrict(Mat, MPI_Comm, Mat *);
13 
14 /* if range is true,  it returns B s.t. span{B} = range(A)
15    if range is false, it returns B s.t. range(B) _|_ range(A) */
16 static PetscErrorCode MatDenseOrthogonalRangeOrComplement(Mat A, PetscBool range, PetscInt lw, PetscScalar *work, PetscReal *rwork, Mat *B)
17 {
18   PetscScalar *uwork, *data, *U, ds = 0.;
19   PetscReal   *sing;
20   PetscBLASInt bM, bN, lwork, lierr, di = 1;
21   PetscInt     ulw, i, nr, nc, n;
22 #if defined(PETSC_USE_COMPLEX)
23   PetscReal *rwork2;
24 #endif
25 
26   PetscFunctionBegin;
27   PetscCall(MatGetSize(A, &nr, &nc));
28   if (!nr || !nc) PetscFunctionReturn(PETSC_SUCCESS);
29 
30   /* workspace */
31   if (!work) {
32     ulw = PetscMax(PetscMax(1, 5 * PetscMin(nr, nc)), 3 * PetscMin(nr, nc) + PetscMax(nr, nc));
33     PetscCall(PetscMalloc1(ulw, &uwork));
34   } else {
35     ulw   = lw;
36     uwork = work;
37   }
38   n = PetscMin(nr, nc);
39   if (!rwork) {
40     PetscCall(PetscMalloc1(n, &sing));
41   } else {
42     sing = rwork;
43   }
44 
45   /* SVD */
46   PetscCall(PetscMalloc1(nr * nr, &U));
47   PetscCall(PetscBLASIntCast(nr, &bM));
48   PetscCall(PetscBLASIntCast(nc, &bN));
49   PetscCall(PetscBLASIntCast(ulw, &lwork));
50   PetscCall(MatDenseGetArray(A, &data));
51   PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
52 #if !defined(PETSC_USE_COMPLEX)
53   PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("A", "N", &bM, &bN, data, &bM, sing, U, &bM, &ds, &di, uwork, &lwork, &lierr));
54 #else
55   PetscCall(PetscMalloc1(5 * n, &rwork2));
56   PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("A", "N", &bM, &bN, data, &bM, sing, U, &bM, &ds, &di, uwork, &lwork, rwork2, &lierr));
57   PetscCall(PetscFree(rwork2));
58 #endif
59   PetscCall(PetscFPTrapPop());
60   PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in GESVD Lapack routine %" PetscBLASInt_FMT, lierr);
61   PetscCall(MatDenseRestoreArray(A, &data));
62   for (i = 0; i < n; i++)
63     if (sing[i] < PETSC_SMALL) break;
64   if (!rwork) PetscCall(PetscFree(sing));
65   if (!work) PetscCall(PetscFree(uwork));
66   /* create B */
67   if (!range) {
68     PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, nr, nr - i, NULL, B));
69     PetscCall(MatDenseGetArray(*B, &data));
70     PetscCall(PetscArraycpy(data, U + nr * i, (nr - i) * nr));
71   } else {
72     PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, nr, i, NULL, B));
73     PetscCall(MatDenseGetArray(*B, &data));
74     PetscCall(PetscArraycpy(data, U, i * nr));
75   }
76   PetscCall(MatDenseRestoreArray(*B, &data));
77   PetscCall(PetscFree(U));
78   PetscFunctionReturn(PETSC_SUCCESS);
79 }
80 
81 /* TODO REMOVE */
82 #if defined(PRINT_GDET)
83 static int inc = 0;
84 static int lev = 0;
85 #endif
86 
87 static PetscErrorCode PCBDDCComputeNedelecChangeEdge(Mat lG, IS edge, IS extrow, IS extcol, IS corners, Mat *Gins, Mat *GKins, PetscScalar cvals[2], PetscScalar *work, PetscReal *rwork)
88 {
89   Mat          GE, GEd;
90   PetscInt     rsize, csize, esize;
91   PetscScalar *ptr;
92 
93   PetscFunctionBegin;
94   PetscCall(ISGetSize(edge, &esize));
95   if (!esize) PetscFunctionReturn(PETSC_SUCCESS);
96   PetscCall(ISGetSize(extrow, &rsize));
97   PetscCall(ISGetSize(extcol, &csize));
98 
99   /* gradients */
100   ptr = work + 5 * esize;
101   PetscCall(MatCreateSubMatrix(lG, extrow, extcol, MAT_INITIAL_MATRIX, &GE));
102   PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, rsize, csize, ptr, Gins));
103   PetscCall(MatConvert(GE, MATSEQDENSE, MAT_REUSE_MATRIX, Gins));
104   PetscCall(MatDestroy(&GE));
105 
106   /* constants */
107   ptr += rsize * csize;
108   PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, esize, csize, ptr, &GEd));
109   PetscCall(MatCreateSubMatrix(lG, edge, extcol, MAT_INITIAL_MATRIX, &GE));
110   PetscCall(MatConvert(GE, MATSEQDENSE, MAT_REUSE_MATRIX, &GEd));
111   PetscCall(MatDestroy(&GE));
112   PetscCall(MatDenseOrthogonalRangeOrComplement(GEd, PETSC_FALSE, 5 * esize, work, rwork, GKins));
113   PetscCall(MatDestroy(&GEd));
114 
115   if (corners) {
116     Mat                GEc;
117     const PetscScalar *vals;
118     PetscScalar        v;
119 
120     PetscCall(MatCreateSubMatrix(lG, edge, corners, MAT_INITIAL_MATRIX, &GEc));
121     PetscCall(MatTransposeMatMult(GEc, *GKins, MAT_INITIAL_MATRIX, 1.0, &GEd));
122     PetscCall(MatDenseGetArrayRead(GEd, &vals));
123     /* v       = PetscAbsScalar(vals[0]); */
124     v        = 1.;
125     cvals[0] = vals[0] / v;
126     cvals[1] = vals[1] / v;
127     PetscCall(MatDenseRestoreArrayRead(GEd, &vals));
128     PetscCall(MatScale(*GKins, 1. / v));
129 #if defined(PRINT_GDET)
130     {
131       PetscViewer viewer;
132       char        filename[256];
133       PetscCall(PetscSNPrintf(filename, PETSC_STATIC_ARRAY_LENGTH(filename), "Gdet_l%d_r%d_cc%d.m", lev, PetscGlobalRank, inc++));
134       PetscCall(PetscViewerASCIIOpen(PETSC_COMM_SELF, filename, &viewer));
135       PetscCall(PetscViewerPushFormat(viewer, PETSC_VIEWER_ASCII_MATLAB));
136       PetscCall(PetscObjectSetName((PetscObject)GEc, "GEc"));
137       PetscCall(MatView(GEc, viewer));
138       PetscCall(PetscObjectSetName((PetscObject)*GKins, "GK"));
139       PetscCall(MatView(*GKins, viewer));
140       PetscCall(PetscObjectSetName((PetscObject)GEd, "Gproj"));
141       PetscCall(MatView(GEd, viewer));
142       PetscCall(PetscViewerDestroy(&viewer));
143     }
144 #endif
145     PetscCall(MatDestroy(&GEd));
146     PetscCall(MatDestroy(&GEc));
147   }
148   PetscFunctionReturn(PETSC_SUCCESS);
149 }
150 
151 static PetscErrorCode MatAIJExtractRows(Mat, IS, Mat *);
152 
153 PetscErrorCode PCBDDCNedelecSupport(PC pc)
154 {
155   PC_BDDC               *pcbddc = (PC_BDDC *)pc->data;
156   Mat_IS                *matis  = (Mat_IS *)pc->pmat->data;
157   Mat                    G, T, conn, lG, lGt, lGis, lGall, lGe, lGinit;
158   PetscSF                sfv;
159   ISLocalToGlobalMapping el2g, vl2g, fl2g, al2g;
160   MPI_Comm               comm;
161   IS                     lned, primals, allprimals, nedfieldlocal, elements_corners = NULL;
162   IS                    *eedges, *extrows, *extcols, *alleedges;
163   PetscBT                btv, bte, btvc, btb, btbd, btvcand, btvi, btee, bter;
164   PetscScalar           *vals, *work;
165   PetscReal             *rwork;
166   const PetscInt        *idxs, *ii, *jj, *iit, *jjt;
167   PetscInt               ne, nv, Lv, order, n, field;
168   PetscInt               i, j, extmem, cum, maxsize, nee;
169   PetscInt              *extrow, *extrowcum, *marks, *vmarks, *gidxs;
170   PetscInt              *sfvleaves, *sfvroots;
171   PetscInt              *corners, *cedges;
172   PetscInt              *ecount, **eneighs, *vcount, **vneighs;
173   PetscInt              *emarks;
174   PetscBool              print, eerr, done, lrc[2], conforming, global, setprimal;
175 
176   PetscFunctionBegin;
177   /* If the discrete gradient is defined for a subset of dofs and global is true,
178      it assumes G is given in global ordering for all the dofs.
179      Otherwise, the ordering is global for the Nedelec field */
180   order      = pcbddc->nedorder;
181   conforming = pcbddc->conforming;
182   field      = pcbddc->nedfield;
183   global     = pcbddc->nedglobal;
184   setprimal  = PETSC_FALSE;
185   print      = PETSC_FALSE;
186 
187   /* Command line customization */
188   PetscOptionsBegin(PetscObjectComm((PetscObject)pc), ((PetscObject)pc)->prefix, "BDDC Nedelec options", "PC");
189   PetscCall(PetscOptionsBool("-pc_bddc_nedelec_field_primal", "All edge dofs set as primals: Toselli's algorithm C", NULL, setprimal, &setprimal, NULL));
190   /* print debug info and adaptive order TODO: to be removed */
191   PetscCall(PetscOptionsInt("-pc_bddc_nedelec_order", "Test variable order code (to be removed)", NULL, order, &order, NULL));
192   PetscCall(PetscOptionsBool("-pc_bddc_nedelec_print", "Print debug info", NULL, print, &print, NULL));
193   PetscOptionsEnd();
194 
195   /* Return if there are no edges in the decomposition */
196   PetscCall(MatISGetLocalToGlobalMapping(pc->pmat, &al2g, NULL));
197   PetscCall(ISLocalToGlobalMappingGetSize(al2g, &n));
198   PetscCall(PetscObjectGetComm((PetscObject)pc, &comm));
199   PetscCall(VecGetArrayRead(matis->counter, (const PetscScalar **)&vals));
200   lrc[0] = PETSC_FALSE;
201   for (i = 0; i < n; i++) {
202     if (PetscRealPart(vals[i]) > 2.) {
203       lrc[0] = PETSC_TRUE;
204       break;
205     }
206   }
207   PetscCall(VecRestoreArrayRead(matis->counter, (const PetscScalar **)&vals));
208   PetscCallMPI(MPIU_Allreduce(&lrc[0], &lrc[1], 1, MPIU_BOOL, MPI_LOR, comm));
209   if (!lrc[1]) PetscFunctionReturn(PETSC_SUCCESS);
210 
211   /* Get Nedelec field */
212   PetscCheck(!pcbddc->n_ISForDofsLocal || field < pcbddc->n_ISForDofsLocal, comm, PETSC_ERR_USER, "Invalid field for Nedelec %" PetscInt_FMT ": number of fields is %" PetscInt_FMT, field, pcbddc->n_ISForDofsLocal);
213   if (pcbddc->n_ISForDofsLocal && field >= 0) {
214     PetscCall(PetscObjectReference((PetscObject)pcbddc->ISForDofsLocal[field]));
215     nedfieldlocal = pcbddc->ISForDofsLocal[field];
216     PetscCall(ISGetLocalSize(nedfieldlocal, &ne));
217   } else if (!pcbddc->n_ISForDofsLocal && field != PETSC_DECIDE) {
218     ne            = n;
219     nedfieldlocal = NULL;
220     global        = PETSC_TRUE;
221   } else if (field == PETSC_DECIDE) {
222     PetscInt rst, ren, *idx;
223 
224     PetscCall(PetscArrayzero(matis->sf_leafdata, n));
225     PetscCall(PetscArrayzero(matis->sf_rootdata, pc->pmat->rmap->n));
226     PetscCall(MatGetOwnershipRange(pcbddc->discretegradient, &rst, &ren));
227     for (i = rst; i < ren; i++) {
228       PetscInt nc;
229 
230       PetscCall(MatGetRow(pcbddc->discretegradient, i, &nc, NULL, NULL));
231       if (nc > 1) matis->sf_rootdata[i - rst] = 1;
232       PetscCall(MatRestoreRow(pcbddc->discretegradient, i, &nc, NULL, NULL));
233     }
234     PetscCall(PetscSFBcastBegin(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE));
235     PetscCall(PetscSFBcastEnd(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE));
236     PetscCall(PetscMalloc1(n, &idx));
237     for (i = 0, ne = 0; i < n; i++)
238       if (matis->sf_leafdata[i]) idx[ne++] = i;
239     PetscCall(ISCreateGeneral(comm, ne, idx, PETSC_OWN_POINTER, &nedfieldlocal));
240   } else {
241     SETERRQ(comm, PETSC_ERR_USER, "When multiple fields are present, the Nedelec field has to be specified");
242   }
243 
244   /* Sanity checks */
245   PetscCheck(order || conforming, comm, PETSC_ERR_SUP, "Variable order and non-conforming spaces are not supported at the same time");
246   PetscCheck(!pcbddc->user_ChangeOfBasisMatrix, comm, PETSC_ERR_SUP, "Cannot generate Nedelec support with user defined change of basis");
247   PetscCheck(!order || (ne % order == 0), PETSC_COMM_SELF, PETSC_ERR_USER, "The number of local edge dofs %" PetscInt_FMT " is not a multiple of the order %" PetscInt_FMT, ne, order);
248 
249   /* Just set primal dofs and return */
250   if (setprimal) {
251     IS        enedfieldlocal;
252     PetscInt *eidxs;
253 
254     PetscCall(PetscMalloc1(ne, &eidxs));
255     PetscCall(VecGetArrayRead(matis->counter, (const PetscScalar **)&vals));
256     if (nedfieldlocal) {
257       PetscCall(ISGetIndices(nedfieldlocal, &idxs));
258       for (i = 0, cum = 0; i < ne; i++) {
259         if (PetscRealPart(vals[idxs[i]]) > 2.) eidxs[cum++] = idxs[i];
260       }
261       PetscCall(ISRestoreIndices(nedfieldlocal, &idxs));
262     } else {
263       for (i = 0, cum = 0; i < ne; i++) {
264         if (PetscRealPart(vals[i]) > 2.) eidxs[cum++] = i;
265       }
266     }
267     PetscCall(VecRestoreArrayRead(matis->counter, (const PetscScalar **)&vals));
268     PetscCall(ISCreateGeneral(comm, cum, eidxs, PETSC_COPY_VALUES, &enedfieldlocal));
269     PetscCall(PCBDDCSetPrimalVerticesLocalIS(pc, enedfieldlocal));
270     PetscCall(PetscFree(eidxs));
271     PetscCall(ISDestroy(&nedfieldlocal));
272     PetscCall(ISDestroy(&enedfieldlocal));
273     PetscFunctionReturn(PETSC_SUCCESS);
274   }
275 
276   /* Compute some l2g maps */
277   if (nedfieldlocal) {
278     IS is;
279 
280     /* need to map from the local Nedelec field to local numbering */
281     PetscCall(ISLocalToGlobalMappingCreateIS(nedfieldlocal, &fl2g));
282     /* need to map from the local Nedelec field to global numbering for the whole dofs*/
283     PetscCall(ISLocalToGlobalMappingApplyIS(al2g, nedfieldlocal, &is));
284     PetscCall(ISLocalToGlobalMappingCreateIS(is, &al2g));
285     /* need to map from the local Nedelec field to global numbering (for Nedelec only) */
286     if (global) {
287       PetscCall(PetscObjectReference((PetscObject)al2g));
288       el2g = al2g;
289     } else {
290       IS gis;
291 
292       PetscCall(ISRenumber(is, NULL, NULL, &gis));
293       PetscCall(ISLocalToGlobalMappingCreateIS(gis, &el2g));
294       PetscCall(ISDestroy(&gis));
295     }
296     PetscCall(ISDestroy(&is));
297   } else {
298     /* one ref for the destruction of al2g, one for el2g */
299     PetscCall(PetscObjectReference((PetscObject)al2g));
300     PetscCall(PetscObjectReference((PetscObject)al2g));
301     el2g = al2g;
302     fl2g = NULL;
303   }
304 
305   /* Start communication to drop connections for interior edges (for cc analysis only) */
306   PetscCall(PetscArrayzero(matis->sf_leafdata, n));
307   PetscCall(PetscArrayzero(matis->sf_rootdata, pc->pmat->rmap->n));
308   if (nedfieldlocal) {
309     PetscCall(ISGetIndices(nedfieldlocal, &idxs));
310     for (i = 0; i < ne; i++) matis->sf_leafdata[idxs[i]] = 1;
311     PetscCall(ISRestoreIndices(nedfieldlocal, &idxs));
312   } else {
313     for (i = 0; i < ne; i++) matis->sf_leafdata[i] = 1;
314   }
315   PetscCall(PetscSFReduceBegin(matis->sf, MPIU_INT, matis->sf_leafdata, matis->sf_rootdata, MPI_SUM));
316   PetscCall(PetscSFReduceEnd(matis->sf, MPIU_INT, matis->sf_leafdata, matis->sf_rootdata, MPI_SUM));
317 
318   /* There's no way to detect all possible corner candidates in a element-by-element case in a pure algebraic setting
319      Firedrake attaches a index set to identify them upfront. If it is present, we assume we are in such a case */
320   if (matis->allow_repeated) PetscCall(PetscObjectQuery((PetscObject)pcbddc->discretegradient, "_elements_corners", (PetscObject *)&elements_corners));
321 
322   /* drop connections with interior edges to avoid unneeded communications and memory movements */
323   PetscCall(MatViewFromOptions(pcbddc->discretegradient, (PetscObject)pc, "-pc_bddc_discrete_gradient_view"));
324   PetscCall(MatDuplicate(pcbddc->discretegradient, MAT_COPY_VALUES, &G));
325   PetscCall(MatSetOption(G, MAT_KEEP_NONZERO_PATTERN, PETSC_FALSE));
326   if (global) {
327     PetscInt rst;
328 
329     PetscCall(MatGetOwnershipRange(G, &rst, NULL));
330     for (i = 0, cum = 0; i < pc->pmat->rmap->n; i++) {
331       if (matis->sf_rootdata[i] < 2) matis->sf_rootdata[cum++] = i + rst;
332     }
333     PetscCall(MatSetOption(G, MAT_NO_OFF_PROC_ZERO_ROWS, PETSC_TRUE));
334     PetscCall(MatZeroRows(G, cum, matis->sf_rootdata, 0., NULL, NULL));
335   } else {
336     PetscInt *tbz;
337 
338     PetscCall(PetscMalloc1(ne, &tbz));
339     PetscCall(PetscSFBcastBegin(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE));
340     PetscCall(PetscSFBcastEnd(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE));
341     PetscCall(ISGetIndices(nedfieldlocal, &idxs));
342     for (i = 0, cum = 0; i < ne; i++)
343       if (matis->sf_leafdata[idxs[i]] == 1) tbz[cum++] = i;
344     PetscCall(ISRestoreIndices(nedfieldlocal, &idxs));
345     PetscCall(ISLocalToGlobalMappingApply(el2g, cum, tbz, tbz));
346     PetscCall(MatZeroRows(G, cum, tbz, 0., NULL, NULL));
347     PetscCall(PetscFree(tbz));
348   }
349 
350   /* Extract subdomain relevant rows of G  */
351   PetscCall(ISLocalToGlobalMappingGetIndices(el2g, &idxs));
352   PetscCall(ISCreateGeneral(comm, ne, idxs, PETSC_USE_POINTER, &lned));
353   PetscCall(MatAIJExtractRows(G, lned, &lGall));
354   /* PetscCall(MatCreateSubMatrix(G, lned, NULL, MAT_INITIAL_MATRIX, &lGall)); */
355   PetscCall(ISLocalToGlobalMappingRestoreIndices(el2g, &idxs));
356   PetscCall(ISDestroy(&lned));
357   PetscCall(MatConvert(lGall, MATIS, MAT_INITIAL_MATRIX, &lGis));
358   PetscCall(MatDestroy(&lGall));
359   PetscCall(MatISGetLocalMat(lGis, &lG));
360   if (matis->allow_repeated) { /* multi-element support */
361     Mat                   *lGn, B;
362     IS                    *is_rows, *tcols, tmap, nmap;
363     PetscInt               subnv;
364     const PetscInt        *subvidxs;
365     ISLocalToGlobalMapping mapn;
366 
367     PetscCall(PetscCalloc1(pcbddc->n_local_subs * pcbddc->n_local_subs, &lGn));
368     PetscCall(PetscMalloc1(pcbddc->n_local_subs, &is_rows));
369     PetscCall(PetscMalloc1(pcbddc->n_local_subs, &tcols));
370     for (PetscInt i = 0; i < pcbddc->n_local_subs; i++) {
371       if (fl2g) {
372         PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_MASK, pcbddc->local_subs[i], &is_rows[i]));
373       } else {
374         PetscCall(PetscObjectReference((PetscObject)pcbddc->local_subs[i]));
375         is_rows[i] = pcbddc->local_subs[i];
376       }
377       PetscCall(MatCreateSubMatrix(lG, is_rows[i], NULL, MAT_INITIAL_MATRIX, &lGn[i * (1 + pcbddc->n_local_subs)]));
378       PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(lGn[i * (1 + pcbddc->n_local_subs)], &mapn));
379       PetscCall(ISLocalToGlobalMappingGetSize(mapn, &subnv));
380       PetscCall(ISLocalToGlobalMappingGetIndices(mapn, &subvidxs));
381       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, subnv, subvidxs, PETSC_COPY_VALUES, &tcols[i]));
382       PetscCall(ISLocalToGlobalMappingRestoreIndices(mapn, &subvidxs));
383       PetscCall(ISLocalToGlobalMappingDestroy(&mapn));
384     }
385 
386     /* Create new MATIS with repeated vertices */
387     PetscCall(MatCreate(comm, &B));
388     PetscCall(MatSetSizes(B, lGis->rmap->n, lGis->cmap->n, lGis->rmap->N, lGis->cmap->N));
389     PetscCall(MatSetType(B, MATIS));
390     PetscCall(MatISSetAllowRepeated(B, PETSC_TRUE));
391     PetscCall(ISConcatenate(PETSC_COMM_SELF, pcbddc->n_local_subs, tcols, &tmap));
392     PetscCall(ISLocalToGlobalMappingApplyIS(lGis->cmap->mapping, tmap, &nmap));
393     PetscCall(ISDestroy(&tmap));
394     PetscCall(ISGetLocalSize(nmap, &subnv));
395     PetscCall(ISGetIndices(nmap, &subvidxs));
396     PetscCall(ISCreateGeneral(comm, subnv, subvidxs, PETSC_USE_POINTER, &tmap));
397     PetscCall(ISRestoreIndices(nmap, &subvidxs));
398     PetscCall(ISLocalToGlobalMappingCreateIS(tmap, &mapn));
399     PetscCall(ISDestroy(&tmap));
400     PetscCall(ISDestroy(&nmap));
401     PetscCall(MatSetLocalToGlobalMapping(B, lGis->rmap->mapping, mapn));
402     PetscCall(ISLocalToGlobalMappingDestroy(&mapn));
403     PetscCall(MatCreateNest(PETSC_COMM_SELF, pcbddc->n_local_subs, is_rows, pcbddc->n_local_subs, NULL, lGn, &lG));
404     for (PetscInt i = 0; i < pcbddc->n_local_subs; i++) {
405       PetscCall(MatDestroy(&lGn[i * (1 + pcbddc->n_local_subs)]));
406       PetscCall(ISDestroy(&is_rows[i]));
407       PetscCall(ISDestroy(&tcols[i]));
408     }
409     PetscCall(MatConvert(lG, MATSEQAIJ, MAT_INPLACE_MATRIX, &lG));
410     PetscCall(PetscFree(lGn));
411     PetscCall(PetscFree(is_rows));
412     PetscCall(PetscFree(tcols));
413     PetscCall(MatISSetLocalMat(B, lG));
414     PetscCall(MatDestroy(&lG));
415 
416     PetscCall(MatDestroy(&lGis));
417     lGis = B;
418 
419     lGis->assembled = PETSC_TRUE;
420   }
421   PetscCall(MatViewFromOptions(lGis, (PetscObject)pc, "-pc_bddc_nedelec_init_G_view"));
422 
423   /* SF for nodal dofs communications */
424   PetscCall(MatGetLocalSize(G, NULL, &Lv));
425   PetscCall(MatISGetLocalToGlobalMapping(lGis, NULL, &vl2g));
426   PetscCall(PetscObjectReference((PetscObject)vl2g));
427   PetscCall(ISLocalToGlobalMappingGetSize(vl2g, &nv));
428   PetscCall(PetscSFCreate(comm, &sfv));
429   PetscCall(ISLocalToGlobalMappingGetIndices(vl2g, &idxs));
430   PetscCall(PetscSFSetGraphLayout(sfv, lGis->cmap, nv, NULL, PETSC_OWN_POINTER, idxs));
431   PetscCall(ISLocalToGlobalMappingRestoreIndices(vl2g, &idxs));
432 
433   if (elements_corners) {
434     IS      tmp;
435     Vec     global, local;
436     Mat_IS *tGis = (Mat_IS *)lGis->data;
437 
438     PetscCall(MatCreateVecs(lGis, &global, NULL));
439     PetscCall(MatCreateVecs(tGis->A, &local, NULL));
440     PetscCall(PCBDDCGlobalToLocal(tGis->cctx, global, local, elements_corners, &tmp));
441     PetscCall(VecDestroy(&global));
442     PetscCall(VecDestroy(&local));
443     elements_corners = tmp;
444   }
445 
446   /* Destroy temporary G */
447   PetscCall(MatISGetLocalMat(lGis, &lG));
448   PetscCall(PetscObjectReference((PetscObject)lG));
449   PetscCall(MatDestroy(&G));
450   PetscCall(MatDestroy(&lGis));
451 
452   if (print) {
453     PetscCall(PetscObjectSetName((PetscObject)lG, "initial_lG"));
454     PetscCall(MatView(lG, NULL));
455   }
456 
457   /* Save lG for values insertion in change of basis */
458   PetscCall(MatDuplicate(lG, MAT_COPY_VALUES, &lGinit));
459 
460   /* Analyze the edge-nodes connections (duplicate lG) */
461   PetscCall(MatDuplicate(lG, MAT_COPY_VALUES, &lGe));
462   PetscCall(MatSetOption(lGe, MAT_KEEP_NONZERO_PATTERN, PETSC_FALSE));
463   PetscCall(PetscBTCreate(nv, &btv));
464   PetscCall(PetscBTCreate(ne, &bte));
465   PetscCall(PetscBTCreate(ne, &btb));
466   PetscCall(PetscBTCreate(ne, &btbd));
467   /* need to import the boundary specification to ensure the
468      proper detection of coarse edges' endpoints */
469   if (pcbddc->DirichletBoundariesLocal) {
470     IS is;
471 
472     if (fl2g) {
473       PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_MASK, pcbddc->DirichletBoundariesLocal, &is));
474     } else {
475       is = pcbddc->DirichletBoundariesLocal;
476     }
477     PetscCall(ISGetLocalSize(is, &cum));
478     PetscCall(ISGetIndices(is, &idxs));
479     for (i = 0; i < cum; i++) {
480       if (idxs[i] >= 0 && idxs[i] < ne) {
481         PetscCall(PetscBTSet(btb, idxs[i]));
482         PetscCall(PetscBTSet(btbd, idxs[i]));
483       }
484     }
485     PetscCall(ISRestoreIndices(is, &idxs));
486     if (fl2g) PetscCall(ISDestroy(&is));
487   }
488   if (pcbddc->NeumannBoundariesLocal) {
489     IS is;
490 
491     if (fl2g) {
492       PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_MASK, pcbddc->NeumannBoundariesLocal, &is));
493     } else {
494       is = pcbddc->NeumannBoundariesLocal;
495     }
496     PetscCall(ISGetLocalSize(is, &cum));
497     PetscCall(ISGetIndices(is, &idxs));
498     for (i = 0; i < cum; i++) {
499       if (idxs[i] >= 0 && idxs[i] < ne) PetscCall(PetscBTSet(btb, idxs[i]));
500     }
501     PetscCall(ISRestoreIndices(is, &idxs));
502     if (fl2g) PetscCall(ISDestroy(&is));
503   }
504 
505   /* Count neighs per dof */
506   PetscCall(ISLocalToGlobalMappingGetNodeInfo(el2g, NULL, &ecount, NULL));
507   PetscCall(ISLocalToGlobalMappingGetNodeInfo(vl2g, NULL, &vcount, NULL));
508 
509   /* need to remove coarse faces' dofs and coarse edges' dirichlet dofs
510      for proper detection of coarse edges' endpoints */
511   PetscCall(PetscBTCreate(ne, &btee));
512   for (i = 0; i < ne; i++) {
513     if ((ecount[i] > 2 && !PetscBTLookup(btbd, i)) || (ecount[i] == 2 && PetscBTLookup(btb, i))) PetscCall(PetscBTSet(btee, i));
514   }
515   PetscCall(PetscMalloc1(ne, &marks));
516   if (!conforming) {
517     PetscCall(MatTranspose(lGe, MAT_INITIAL_MATRIX, &lGt));
518     PetscCall(MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done));
519   }
520   PetscCall(MatGetRowIJ(lGe, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
521   PetscCall(MatSeqAIJGetArray(lGe, &vals));
522   cum = 0;
523   for (i = 0; i < ne; i++) {
524     /* eliminate rows corresponding to edge dofs belonging to coarse faces */
525     if (!PetscBTLookup(btee, i)) {
526       marks[cum++] = i;
527       continue;
528     }
529     /* set badly connected edge dofs as primal */
530     if (!conforming) {
531       if (ii[i + 1] - ii[i] != order + 1) { /* every row of G on the coarse edge should list order+1 nodal dofs */
532         marks[cum++] = i;
533         PetscCall(PetscBTSet(bte, i));
534         for (j = ii[i]; j < ii[i + 1]; j++) PetscCall(PetscBTSet(btv, jj[j]));
535       } else {
536         /* every edge dofs should be connected through a certain number of nodal dofs
537            to other edge dofs belonging to coarse edges
538            - at most 2 endpoints
539            - order-1 interior nodal dofs
540            - no undefined nodal dofs (nconn < order)
541         */
542         PetscInt ends = 0, ints = 0, undef = 0;
543         for (j = ii[i]; j < ii[i + 1]; j++) {
544           PetscInt v     = jj[j], k;
545           PetscInt nconn = iit[v + 1] - iit[v];
546           for (k = iit[v]; k < iit[v + 1]; k++)
547             if (!PetscBTLookup(btee, jjt[k])) nconn--;
548           if (nconn > order) ends++;
549           else if (nconn == order) ints++;
550           else undef++;
551         }
552         if (undef || ends > 2 || ints != order - 1) {
553           marks[cum++] = i;
554           PetscCall(PetscBTSet(bte, i));
555           for (j = ii[i]; j < ii[i + 1]; j++) PetscCall(PetscBTSet(btv, jj[j]));
556         }
557       }
558     }
559     /* We assume the order on the element edge is ii[i+1]-ii[i]-1 */
560     if (!order && ii[i + 1] != ii[i]) {
561       PetscScalar val = 1. / (ii[i + 1] - ii[i] - 1);
562       for (j = ii[i]; j < ii[i + 1]; j++) vals[j] = val;
563     }
564   }
565   PetscCall(PetscBTDestroy(&btee));
566   PetscCall(MatSeqAIJRestoreArray(lGe, &vals));
567   PetscCall(MatRestoreRowIJ(lGe, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
568   if (!conforming) {
569     PetscCall(MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done));
570     PetscCall(MatDestroy(&lGt));
571   }
572   PetscCall(MatZeroRows(lGe, cum, marks, 0., NULL, NULL));
573 
574   /* identify splitpoints and corner candidates */
575   PetscCall(PetscMalloc2(nv, &sfvleaves, Lv, &sfvroots));
576   PetscCall(PetscBTCreate(nv, &btvcand));
577   if (elements_corners) {
578     PetscCall(ISGetLocalSize(elements_corners, &cum));
579     PetscCall(ISGetIndices(elements_corners, &idxs));
580     for (i = 0; i < cum; i++) PetscCall(PetscBTSet(btvcand, idxs[i]));
581     PetscCall(ISRestoreIndices(elements_corners, &idxs));
582   }
583 
584   if (matis->allow_repeated) { /* assign a uniq global id to edge local subsets and communicate it with nodal space */
585     PetscSF   emlsf, vmlsf;
586     PetscInt *eleaves, *vleaves, *meleaves, *mvleaves;
587     PetscInt  cum_subs = 0, n_subs = pcbddc->n_local_subs, bs, emnr, emnl, vmnr, vmnl;
588 
589     PetscCall(ISLocalToGlobalMappingGetBlockSize(el2g, &bs));
590     PetscCheck(bs == 1, comm, PETSC_ERR_SUP, "Not coded");
591     PetscCall(ISLocalToGlobalMappingGetBlockSize(vl2g, &bs));
592     PetscCheck(bs == 1, comm, PETSC_ERR_SUP, "Not coded");
593 
594     PetscCall(ISLocalToGlobalMappingGetBlockMultiLeavesSF(el2g, &emlsf));
595     PetscCall(ISLocalToGlobalMappingGetBlockMultiLeavesSF(vl2g, &vmlsf));
596 
597     PetscCall(PetscSFGetGraph(emlsf, &emnr, &emnl, NULL, NULL));
598     for (i = 0, j = 0; i < ne; i++) j += ecount[i];
599     PetscCheck(emnr == ne, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Invalid number of roots in edge multi-leaves SF %" PetscInt_FMT " != %" PetscInt_FMT, emnr, ne);
600     PetscCheck(emnl == j, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Invalid number of leaves in edge multi-leaves SF %" PetscInt_FMT " != %" PetscInt_FMT, emnl, j);
601 
602     PetscCall(PetscSFGetGraph(vmlsf, &vmnr, &vmnl, NULL, NULL));
603     for (i = 0, j = 0; i < nv; i++) j += vcount[i];
604     PetscCheck(vmnr == nv, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Invalid number of roots in nodal multi-leaves SF %" PetscInt_FMT " != %" PetscInt_FMT, vmnr, nv);
605     PetscCheck(vmnl == j, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Invalid number of leaves in nodal multi-leaves SF %" PetscInt_FMT " != %" PetscInt_FMT, vmnl, j);
606 
607     PetscCall(PetscMalloc1(ne, &eleaves));
608     PetscCall(PetscMalloc1(nv, &vleaves));
609     for (i = 0; i < ne; i++) eleaves[i] = PETSC_INT_MAX;
610     for (i = 0; i < nv; i++) vleaves[i] = PETSC_INT_MAX;
611     PetscCall(PetscMalloc1(emnl, &meleaves));
612     PetscCall(PetscMalloc1(vmnl, &mvleaves));
613 
614     PetscCallMPI(MPI_Exscan(&n_subs, &cum_subs, 1, MPIU_INT, MPI_SUM, comm));
615     PetscCall(MatGetRowIJ(lGinit, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
616     for (i = 0; i < n_subs; i++) {
617       const PetscInt *idxs;
618       const PetscInt  subid = cum_subs + i;
619       PetscInt        ns;
620 
621       PetscCall(ISGetLocalSize(pcbddc->local_subs[i], &ns));
622       PetscCall(ISGetIndices(pcbddc->local_subs[i], &idxs));
623       for (j = 0; j < ns; j++) {
624         const PetscInt e = idxs[j];
625 
626         eleaves[e] = subid;
627         for (PetscInt k = ii[e]; k < ii[e + 1]; k++) vleaves[jj[k]] = subid;
628       }
629       PetscCall(ISRestoreIndices(pcbddc->local_subs[i], &idxs));
630     }
631     PetscCall(MatRestoreRowIJ(lGinit, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
632     PetscCall(PetscSFBcastBegin(emlsf, MPIU_INT, eleaves, meleaves, MPI_REPLACE));
633     PetscCall(PetscSFBcastEnd(emlsf, MPIU_INT, eleaves, meleaves, MPI_REPLACE));
634     PetscCall(PetscSFBcastBegin(vmlsf, MPIU_INT, vleaves, mvleaves, MPI_REPLACE));
635     PetscCall(PetscSFBcastEnd(vmlsf, MPIU_INT, vleaves, mvleaves, MPI_REPLACE));
636     PetscCall(PetscFree(eleaves));
637     PetscCall(PetscFree(vleaves));
638 
639     PetscCall(PetscMalloc1(ne + 1, &eneighs));
640     eneighs[0] = meleaves;
641     for (i = 0; i < ne; i++) {
642       PetscCall(PetscSortInt(ecount[i], eneighs[i]));
643       eneighs[i + 1] = eneighs[i] + ecount[i];
644     }
645     PetscCall(PetscMalloc1(nv + 1, &vneighs));
646     vneighs[0] = mvleaves;
647     for (i = 0; i < nv; i++) {
648       PetscCall(PetscSortInt(vcount[i], vneighs[i]));
649       vneighs[i + 1] = vneighs[i] + vcount[i];
650     }
651   } else {
652     PetscCall(ISLocalToGlobalMappingGetNodeInfo(el2g, NULL, NULL, &eneighs));
653     PetscCall(ISLocalToGlobalMappingGetNodeInfo(vl2g, NULL, NULL, &vneighs));
654   }
655 
656   PetscCall(MatTranspose(lGe, MAT_INITIAL_MATRIX, &lGt));
657   if (print) {
658     PetscCall(PetscObjectSetName((PetscObject)lGe, "edgerestr_lG"));
659     PetscCall(MatView(lGe, NULL));
660     PetscCall(PetscObjectSetName((PetscObject)lGt, "edgerestr_lGt"));
661     PetscCall(MatView(lGt, NULL));
662   }
663   PetscCall(MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
664   PetscCall(MatSeqAIJGetArray(lGt, &vals));
665   for (i = 0; i < nv; i++) {
666     PetscInt  ord = order, test = ii[i + 1] - ii[i], vc = vcount[i];
667     PetscBool sneighs = PETSC_TRUE, bdir = PETSC_FALSE;
668     if (!order) { /* variable order */
669       PetscReal vorder = 0.;
670 
671       for (j = ii[i]; j < ii[i + 1]; j++) vorder += PetscRealPart(vals[j]);
672       test = PetscFloorReal(vorder + 10. * PETSC_SQRT_MACHINE_EPSILON);
673       PetscCheck(vorder - test <= PETSC_SQRT_MACHINE_EPSILON, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected value for vorder: %g (%" PetscInt_FMT ")", (double)vorder, test);
674       ord = 1;
675     }
676     for (j = ii[i]; j < ii[i + 1] && sneighs; j++) {
677       const PetscInt e = jj[j];
678 
679       if (PetscBTLookup(btbd, e)) {
680         bdir = PETSC_TRUE;
681         break;
682       }
683       if (vc != ecount[e]) {
684         sneighs = PETSC_FALSE;
685       } else {
686         const PetscInt *vn = vneighs[i], *en = eneighs[e];
687 
688         for (PetscInt k = 0; k < vc; k++) {
689           if (vn[k] != en[k]) {
690             sneighs = PETSC_FALSE;
691             break;
692           }
693         }
694       }
695     }
696     if (elements_corners) test = 0;
697     if (!sneighs || test >= 3 * ord || bdir) { /* splitpoints */
698       if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "SPLITPOINT %" PetscInt_FMT " (%s %s %s)\n", i, PetscBools[!sneighs], PetscBools[test >= 3 * ord], PetscBools[bdir]));
699       PetscCall(PetscBTSet(btv, i));
700     } else if (test == ord) {
701       if (order == 1 || (!order && ii[i + 1] - ii[i] == 1)) {
702         if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "ENDPOINT %" PetscInt_FMT "\n", i));
703         PetscCall(PetscBTSet(btv, i));
704       } else if (!elements_corners) {
705         if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "CORNER CANDIDATE %" PetscInt_FMT "\n", i));
706         PetscCall(PetscBTSet(btvcand, i));
707       }
708     }
709   }
710   PetscCall(PetscBTDestroy(&btbd));
711 
712   /* a candidate is valid if it is connected to another candidate via a non-primal edge dof */
713   if (order != 1) {
714     if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "INSPECTING CANDIDATES\n"));
715     PetscCall(MatGetRowIJ(lGe, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done));
716     for (i = 0; i < nv; i++) {
717       if (PetscBTLookup(btvcand, i)) {
718         PetscBool found = PETSC_FALSE;
719         for (j = ii[i]; j < ii[i + 1] && !found; j++) {
720           PetscInt k, e = jj[j];
721           if (PetscBTLookup(bte, e)) continue;
722           for (k = iit[e]; k < iit[e + 1]; k++) {
723             PetscInt v = jjt[k];
724             if (v != i && PetscBTLookup(btvcand, v)) {
725               found = PETSC_TRUE;
726               break;
727             }
728           }
729         }
730         if (!found) {
731           if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "  CANDIDATE %" PetscInt_FMT " CLEARED\n", i));
732           PetscCall(PetscBTClear(btvcand, i));
733         } else {
734           if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "  CANDIDATE %" PetscInt_FMT " ACCEPTED\n", i));
735         }
736       }
737     }
738     PetscCall(MatRestoreRowIJ(lGe, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done));
739   }
740   PetscCall(MatSeqAIJRestoreArray(lGt, &vals));
741   PetscCall(MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
742   PetscCall(MatDestroy(&lGe));
743 
744   /* Get the local G^T explicitly */
745   PetscCall(MatDestroy(&lGt));
746   PetscCall(MatTranspose(lG, MAT_INITIAL_MATRIX, &lGt));
747   PetscCall(MatSetOption(lGt, MAT_KEEP_NONZERO_PATTERN, PETSC_FALSE));
748 
749   /* Mark shared nodal dofs */
750   PetscCall(PetscBTCreate(nv, &btvi));
751   for (i = 0; i < nv; i++) {
752     if (vcount[i] > 1) PetscCall(PetscBTSet(btvi, i));
753   }
754 
755   if (matis->allow_repeated) {
756     PetscCall(PetscFree(eneighs[0]));
757     PetscCall(PetscFree(vneighs[0]));
758     PetscCall(PetscFree(eneighs));
759     PetscCall(PetscFree(vneighs));
760   }
761   PetscCall(ISLocalToGlobalMappingRestoreNodeInfo(el2g, NULL, &ecount, &eneighs));
762   PetscCall(ISLocalToGlobalMappingRestoreNodeInfo(vl2g, NULL, &vcount, &vneighs));
763 
764   /* communicate corners and splitpoints */
765   PetscCall(PetscMalloc1(nv, &vmarks));
766   PetscCall(PetscArrayzero(sfvleaves, nv));
767   PetscCall(PetscArrayzero(sfvroots, Lv));
768   for (i = 0; i < nv; i++)
769     if (PetscUnlikely(PetscBTLookup(btv, i))) sfvleaves[i] = 1;
770 
771   if (print) {
772     IS tbz;
773 
774     cum = 0;
775     for (i = 0; i < nv; i++)
776       if (sfvleaves[i]) vmarks[cum++] = i;
777 
778     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, cum, vmarks, PETSC_COPY_VALUES, &tbz));
779     PetscCall(PetscObjectSetName((PetscObject)tbz, "corners_to_be_zeroed_local"));
780     PetscCall(ISView(tbz, NULL));
781     PetscCall(ISDestroy(&tbz));
782   }
783 
784   PetscCall(PetscSFReduceBegin(sfv, MPIU_INT, sfvleaves, sfvroots, MPI_SUM));
785   PetscCall(PetscSFReduceEnd(sfv, MPIU_INT, sfvleaves, sfvroots, MPI_SUM));
786   PetscCall(PetscSFBcastBegin(sfv, MPIU_INT, sfvroots, sfvleaves, MPI_REPLACE));
787   PetscCall(PetscSFBcastEnd(sfv, MPIU_INT, sfvroots, sfvleaves, MPI_REPLACE));
788 
789   /* Zero rows of lGt corresponding to identified corners
790      and interior nodal dofs */
791   cum = 0;
792   for (i = 0; i < nv; i++) {
793     if (sfvleaves[i]) {
794       vmarks[cum++] = i;
795       PetscCall(PetscBTSet(btv, i));
796     } else if (!PetscBTLookup(btvi, i)) vmarks[cum++] = i;
797   }
798   PetscCall(PetscBTDestroy(&btvi));
799   if (print) {
800     IS tbz;
801 
802     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, cum, vmarks, PETSC_COPY_VALUES, &tbz));
803     PetscCall(PetscObjectSetName((PetscObject)tbz, "corners_to_be_zeroed_with_interior"));
804     PetscCall(ISView(tbz, NULL));
805     PetscCall(ISDestroy(&tbz));
806   }
807   PetscCall(MatZeroRows(lGt, cum, vmarks, 0., NULL, NULL));
808   PetscCall(PetscFree(vmarks));
809   PetscCall(PetscSFDestroy(&sfv));
810   PetscCall(PetscFree2(sfvleaves, sfvroots));
811 
812   /* Recompute G */
813   PetscCall(MatDestroy(&lG));
814   PetscCall(MatTranspose(lGt, MAT_INITIAL_MATRIX, &lG));
815   if (print) {
816     PetscCall(PetscObjectSetName((PetscObject)lG, "used_lG"));
817     PetscCall(MatView(lG, NULL));
818     PetscCall(PetscObjectSetName((PetscObject)lGt, "used_lGt"));
819     PetscCall(MatView(lGt, NULL));
820   }
821 
822   /* Get primal dofs (if any) */
823   cum = 0;
824   for (i = 0; i < ne; i++) {
825     if (PetscUnlikely(PetscBTLookup(bte, i))) marks[cum++] = i;
826   }
827   if (fl2g) PetscCall(ISLocalToGlobalMappingApply(fl2g, cum, marks, marks));
828   PetscCall(ISCreateGeneral(comm, cum, marks, PETSC_COPY_VALUES, &primals));
829   if (print) {
830     PetscCall(PetscObjectSetName((PetscObject)primals, "prescribed_primal_dofs"));
831     PetscCall(ISView(primals, NULL));
832   }
833   PetscCall(PetscBTDestroy(&bte));
834   /* TODO: what if the user passed in some of them ?  */
835   PetscCall(PCBDDCSetPrimalVerticesLocalIS(pc, primals));
836   PetscCall(ISDestroy(&primals));
837 
838   /* Compute edge connectivity */
839   PetscCall(PetscObjectSetOptionsPrefix((PetscObject)lG, "econn_"));
840 
841   /* Symbolic conn = lG*lGt */
842   if (!elements_corners) { /* if present, we assume we are in the element-by-element case and the CSR graph is not needed */
843     PetscCall(MatProductCreate(lG, lGt, NULL, &conn));
844     PetscCall(MatProductSetType(conn, MATPRODUCT_AB));
845     PetscCall(MatProductSetAlgorithm(conn, "default"));
846     PetscCall(MatProductSetFill(conn, PETSC_DEFAULT));
847     PetscCall(PetscObjectSetOptionsPrefix((PetscObject)conn, "econn_"));
848     PetscCall(MatProductSetFromOptions(conn));
849     PetscCall(MatProductSymbolic(conn));
850     PetscCall(MatGetRowIJ(conn, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
851     if (fl2g) {
852       PetscBT   btf;
853       PetscInt *iia, *jja, *iiu, *jju;
854       PetscBool rest = PETSC_FALSE, free = PETSC_FALSE;
855 
856       /* create CSR for all local dofs */
857       PetscCall(PetscMalloc1(n + 1, &iia));
858       if (pcbddc->mat_graph->nvtxs_csr) { /* the user has passed in a CSR graph */
859         PetscCheck(pcbddc->mat_graph->nvtxs_csr == n, PETSC_COMM_SELF, PETSC_ERR_USER, "Invalid size of CSR graph %" PetscInt_FMT ". Should be %" PetscInt_FMT, pcbddc->mat_graph->nvtxs_csr, n);
860         iiu = pcbddc->mat_graph->xadj;
861         jju = pcbddc->mat_graph->adjncy;
862       } else if (pcbddc->use_local_adj) {
863         rest = PETSC_TRUE;
864         PetscCall(MatGetRowIJ(matis->A, 0, PETSC_TRUE, PETSC_FALSE, &i, (const PetscInt **)&iiu, (const PetscInt **)&jju, &done));
865       } else {
866         free = PETSC_TRUE;
867         PetscCall(PetscMalloc2(n + 1, &iiu, n, &jju));
868         iiu[0] = 0;
869         for (i = 0; i < n; i++) {
870           iiu[i + 1] = i + 1;
871           jju[i]     = -1;
872         }
873       }
874 
875       /* import sizes of CSR */
876       iia[0] = 0;
877       for (i = 0; i < n; i++) iia[i + 1] = iiu[i + 1] - iiu[i];
878 
879       /* overwrite entries corresponding to the Nedelec field */
880       PetscCall(PetscBTCreate(n, &btf));
881       PetscCall(ISGetIndices(nedfieldlocal, &idxs));
882       for (i = 0; i < ne; i++) {
883         PetscCall(PetscBTSet(btf, idxs[i]));
884         iia[idxs[i] + 1] = ii[i + 1] - ii[i];
885       }
886 
887       /* iia in CSR */
888       for (i = 0; i < n; i++) iia[i + 1] += iia[i];
889 
890       /* jja in CSR */
891       PetscCall(PetscMalloc1(iia[n], &jja));
892       for (i = 0; i < n; i++)
893         if (!PetscBTLookup(btf, i))
894           for (j = 0; j < iiu[i + 1] - iiu[i]; j++) jja[iia[i] + j] = jju[iiu[i] + j];
895 
896       /* map edge dofs connectivity */
897       if (jj) {
898         PetscCall(ISLocalToGlobalMappingApply(fl2g, ii[ne], jj, (PetscInt *)jj));
899         for (i = 0; i < ne; i++) {
900           PetscInt e = idxs[i];
901           for (j = 0; j < ii[i + 1] - ii[i]; j++) jja[iia[e] + j] = jj[ii[i] + j];
902         }
903       }
904       PetscCall(ISRestoreIndices(nedfieldlocal, &idxs));
905       PetscCall(PCBDDCSetLocalAdjacencyGraph(pc, n, iia, jja, PETSC_COPY_VALUES));
906       if (rest) PetscCall(MatRestoreRowIJ(matis->A, 0, PETSC_TRUE, PETSC_FALSE, &i, (const PetscInt **)&iiu, (const PetscInt **)&jju, &done));
907       if (free) PetscCall(PetscFree2(iiu, jju));
908       PetscCall(PetscBTDestroy(&btf));
909     } else {
910       PetscCall(PCBDDCSetLocalAdjacencyGraph(pc, n, ii, jj, PETSC_COPY_VALUES));
911     }
912     PetscCall(MatRestoreRowIJ(conn, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
913     PetscCall(MatDestroy(&conn));
914   }
915 
916   /* Analyze interface for edge dofs */
917   PetscCall(PCBDDCAnalyzeInterface(pc));
918   pcbddc->mat_graph->twodim = PETSC_FALSE;
919 
920   /* Get coarse edges in the edge space */
921   PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, NULL, NULL, &nee, &alleedges, &allprimals));
922 
923   if (fl2g) {
924     PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_DROP, allprimals, &primals));
925     PetscCall(PetscMalloc1(nee, &eedges));
926     for (i = 0; i < nee; i++) PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_DROP, alleedges[i], &eedges[i]));
927   } else {
928     eedges  = alleedges;
929     primals = allprimals;
930   }
931 
932   /* Mark fine edge dofs with their coarse edge id */
933   PetscCall(PetscArrayzero(marks, ne));
934   PetscCall(ISGetLocalSize(primals, &cum));
935   PetscCall(ISGetIndices(primals, &idxs));
936   for (i = 0; i < cum; i++) marks[idxs[i]] = nee + 1;
937   PetscCall(ISRestoreIndices(primals, &idxs));
938   if (print) {
939     PetscCall(PetscObjectSetName((PetscObject)primals, "obtained_primal_dofs"));
940     PetscCall(ISView(primals, NULL));
941   }
942 
943   maxsize = 0;
944   for (i = 0; i < nee; i++) {
945     PetscInt size, mark = i + 1;
946 
947     PetscCall(ISGetLocalSize(eedges[i], &size));
948     PetscCall(ISGetIndices(eedges[i], &idxs));
949     for (j = 0; j < size; j++) marks[idxs[j]] = mark;
950     PetscCall(ISRestoreIndices(eedges[i], &idxs));
951     maxsize = PetscMax(maxsize, size);
952   }
953 
954   /* Find coarse edge endpoints */
955   PetscCall(MatGetRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
956   PetscCall(MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done));
957   for (i = 0; i < nee; i++) {
958     PetscInt mark = i + 1, size;
959 
960     PetscCall(ISGetLocalSize(eedges[i], &size));
961     if (!size && nedfieldlocal) continue;
962     PetscCheck(size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected zero sized edge %" PetscInt_FMT, i);
963     PetscCall(ISGetIndices(eedges[i], &idxs));
964     if (print) {
965       PetscCall(PetscPrintf(PETSC_COMM_SELF, "ENDPOINTS ANALYSIS EDGE %" PetscInt_FMT "\n", i));
966       PetscCall(ISView(eedges[i], NULL));
967     }
968     for (j = 0; j < size; j++) {
969       PetscInt k, ee = idxs[j];
970       if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "  idx %" PetscInt_FMT "\n", ee));
971       for (k = ii[ee]; k < ii[ee + 1]; k++) {
972         if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "    inspect %" PetscInt_FMT "\n", jj[k]));
973         if (PetscBTLookup(btv, jj[k])) {
974           if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "      corner found (already set) %" PetscInt_FMT "\n", jj[k]));
975         } else if (PetscBTLookup(btvcand, jj[k])) { /* is it ok? */
976           PetscInt  k2;
977           PetscBool corner = PETSC_FALSE;
978           for (k2 = iit[jj[k]]; k2 < iit[jj[k] + 1]; k2++) {
979             if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "        INSPECTING %" PetscInt_FMT ": mark %" PetscInt_FMT " (ref mark %" PetscInt_FMT "), boundary %d\n", jjt[k2], marks[jjt[k2]], mark, (int)!!PetscBTLookup(btb, jjt[k2])));
980             /* it's a corner if either is connected with an edge dof belonging to a different cc or
981                if the edge dof lie on the natural part of the boundary */
982             if ((marks[jjt[k2]] && marks[jjt[k2]] != mark) || (!marks[jjt[k2]] && PetscBTLookup(btb, jjt[k2]))) {
983               corner = PETSC_TRUE;
984               break;
985             }
986           }
987           if (corner) { /* found the nodal dof corresponding to the endpoint of the edge */
988             if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "        corner found %" PetscInt_FMT "\n", jj[k]));
989             PetscCall(PetscBTSet(btv, jj[k]));
990           } else {
991             if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "        no corners found\n"));
992           }
993         }
994       }
995     }
996     PetscCall(ISRestoreIndices(eedges[i], &idxs));
997   }
998   PetscCall(MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done));
999   PetscCall(MatRestoreRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
1000   PetscCall(PetscBTDestroy(&btb));
1001 
1002   /* Reset marked primal dofs */
1003   PetscCall(ISGetLocalSize(primals, &cum));
1004   PetscCall(ISGetIndices(primals, &idxs));
1005   for (i = 0; i < cum; i++) marks[idxs[i]] = 0;
1006   PetscCall(ISRestoreIndices(primals, &idxs));
1007 
1008   /* Now use the initial lG */
1009   PetscCall(MatDestroy(&lG));
1010   PetscCall(MatDestroy(&lGt));
1011   lG = lGinit;
1012   PetscCall(MatTranspose(lG, MAT_INITIAL_MATRIX, &lGt));
1013 
1014   /* Compute extended cols indices */
1015   PetscCall(PetscBTCreate(nv, &btvc));
1016   PetscCall(PetscBTCreate(nee, &bter));
1017   PetscCall(MatGetRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
1018   PetscCall(MatSeqAIJGetMaxRowNonzeros(lG, &i));
1019   i *= maxsize;
1020   PetscCall(PetscCalloc1(nee, &extcols));
1021   PetscCall(PetscMalloc2(i, &extrow, i, &gidxs));
1022   eerr = PETSC_FALSE;
1023   for (i = 0; i < nee; i++) {
1024     PetscInt size, found = 0;
1025 
1026     cum = 0;
1027     PetscCall(ISGetLocalSize(eedges[i], &size));
1028     if (!size && nedfieldlocal) continue;
1029     PetscCheck(size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected zero sized edge %" PetscInt_FMT, i);
1030     PetscCall(ISGetIndices(eedges[i], &idxs));
1031     PetscCall(PetscBTMemzero(nv, btvc));
1032     for (j = 0; j < size; j++) {
1033       PetscInt k, ee = idxs[j];
1034       for (k = ii[ee]; k < ii[ee + 1]; k++) {
1035         PetscInt vv = jj[k];
1036         if (!PetscBTLookup(btv, vv)) extrow[cum++] = vv;
1037         else if (!PetscBTLookupSet(btvc, vv)) found++;
1038       }
1039     }
1040     PetscCall(ISRestoreIndices(eedges[i], &idxs));
1041     PetscCall(PetscSortRemoveDupsInt(&cum, extrow));
1042     PetscCall(ISLocalToGlobalMappingApply(vl2g, cum, extrow, gidxs));
1043     PetscCall(PetscSortIntWithArray(cum, gidxs, extrow));
1044     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, cum, extrow, PETSC_COPY_VALUES, &extcols[i]));
1045     /* it may happen that endpoints are not defined at this point
1046        if it is the case, mark this edge for a second pass */
1047     if (cum != size - 1 || found != 2) {
1048       PetscCall(PetscBTSet(bter, i));
1049       if (print) {
1050         PetscCall(PetscObjectSetName((PetscObject)eedges[i], "error_edge"));
1051         PetscCall(ISView(eedges[i], NULL));
1052         PetscCall(PetscObjectSetName((PetscObject)extcols[i], "error_extcol"));
1053         PetscCall(ISView(extcols[i], NULL));
1054       }
1055       eerr = PETSC_TRUE;
1056     }
1057   }
1058   /* PetscCheck(!eerr,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL FIRST PASS"); */
1059   PetscCallMPI(MPIU_Allreduce(&eerr, &done, 1, MPIU_BOOL, MPI_LOR, comm));
1060   if (done) {
1061     PetscInt *newprimals;
1062 
1063     PetscCall(PetscMalloc1(ne, &newprimals));
1064     PetscCall(ISGetLocalSize(primals, &cum));
1065     PetscCall(ISGetIndices(primals, &idxs));
1066     PetscCall(PetscArraycpy(newprimals, idxs, cum));
1067     PetscCall(ISRestoreIndices(primals, &idxs));
1068     PetscCall(MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done));
1069     if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "DOING SECOND PASS (eerr %s)\n", PetscBools[eerr]));
1070     for (i = 0; i < nee; i++) {
1071       PetscBool has_candidates = PETSC_FALSE;
1072       if (PetscBTLookup(bter, i)) {
1073         PetscInt size, mark = i + 1;
1074 
1075         PetscCall(ISGetLocalSize(eedges[i], &size));
1076         PetscCall(ISGetIndices(eedges[i], &idxs));
1077         /* for (j=0;j<size;j++) newprimals[cum++] = idxs[j]; */
1078         for (j = 0; j < size; j++) {
1079           PetscInt k, ee = idxs[j];
1080           if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "Inspecting edge dof %" PetscInt_FMT " [%" PetscInt_FMT " %" PetscInt_FMT ")\n", ee, ii[ee], ii[ee + 1]));
1081           for (k = ii[ee]; k < ii[ee + 1]; k++) {
1082             /* set all candidates located on the edge as corners */
1083             if (PetscBTLookup(btvcand, jj[k])) {
1084               PetscInt k2, vv = jj[k];
1085               has_candidates = PETSC_TRUE;
1086               if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "  Candidate set to vertex %" PetscInt_FMT "\n", vv));
1087               PetscCall(PetscBTSet(btv, vv));
1088               /* set all edge dofs connected to candidate as primals */
1089               for (k2 = iit[vv]; k2 < iit[vv + 1]; k2++) {
1090                 if (marks[jjt[k2]] == mark) {
1091                   PetscInt k3, ee2 = jjt[k2];
1092                   if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "    Connected edge dof set to primal %" PetscInt_FMT "\n", ee2));
1093                   newprimals[cum++] = ee2;
1094                   /* finally set the new corners */
1095                   for (k3 = ii[ee2]; k3 < ii[ee2 + 1]; k3++) {
1096                     if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "      Connected nodal dof set to vertex %" PetscInt_FMT "\n", jj[k3]));
1097                     PetscCall(PetscBTSet(btv, jj[k3]));
1098                   }
1099                 }
1100               }
1101             } else {
1102               if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "  Not a candidate vertex %" PetscInt_FMT "\n", jj[k]));
1103             }
1104           }
1105         }
1106         if (!has_candidates) { /* circular edge */
1107           PetscInt k, ee = idxs[0], *tmarks;
1108 
1109           PetscCall(PetscCalloc1(ne, &tmarks));
1110           if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "  Circular edge %" PetscInt_FMT "\n", i));
1111           for (k = ii[ee]; k < ii[ee + 1]; k++) {
1112             PetscInt k2;
1113             if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "    Set to corner %" PetscInt_FMT "\n", jj[k]));
1114             PetscCall(PetscBTSet(btv, jj[k]));
1115             for (k2 = iit[jj[k]]; k2 < iit[jj[k] + 1]; k2++) tmarks[jjt[k2]]++;
1116           }
1117           for (j = 0; j < size; j++) {
1118             if (tmarks[idxs[j]] > 1) {
1119               if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "  Edge dof set to primal %" PetscInt_FMT "\n", idxs[j]));
1120               newprimals[cum++] = idxs[j];
1121             }
1122           }
1123           PetscCall(PetscFree(tmarks));
1124         }
1125         PetscCall(ISRestoreIndices(eedges[i], &idxs));
1126       }
1127       PetscCall(ISDestroy(&extcols[i]));
1128     }
1129     PetscCall(PetscFree(extcols));
1130     PetscCall(MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done));
1131     PetscCall(PetscSortRemoveDupsInt(&cum, newprimals));
1132     if (fl2g) {
1133       PetscCall(ISLocalToGlobalMappingApply(fl2g, cum, newprimals, newprimals));
1134       PetscCall(ISDestroy(&primals));
1135       for (i = 0; i < nee; i++) PetscCall(ISDestroy(&eedges[i]));
1136       PetscCall(PetscFree(eedges));
1137     }
1138     PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, NULL, NULL, &nee, &alleedges, &allprimals));
1139     PetscCall(ISCreateGeneral(comm, cum, newprimals, PETSC_COPY_VALUES, &primals));
1140     PetscCall(PetscFree(newprimals));
1141     PetscCall(PCBDDCSetPrimalVerticesLocalIS(pc, primals));
1142     PetscCall(ISDestroy(&primals));
1143     PetscCall(PCBDDCAnalyzeInterface(pc));
1144     pcbddc->mat_graph->twodim = PETSC_FALSE;
1145     PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, NULL, NULL, &nee, &alleedges, &allprimals));
1146     if (fl2g) {
1147       PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_DROP, allprimals, &primals));
1148       PetscCall(PetscMalloc1(nee, &eedges));
1149       for (i = 0; i < nee; i++) PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_DROP, alleedges[i], &eedges[i]));
1150     } else {
1151       eedges  = alleedges;
1152       primals = allprimals;
1153     }
1154     PetscCall(PetscCalloc1(nee, &extcols));
1155 
1156     /* Mark again */
1157     PetscCall(PetscArrayzero(marks, ne));
1158     for (i = 0; i < nee; i++) {
1159       PetscInt size, mark = i + 1;
1160 
1161       PetscCall(ISGetLocalSize(eedges[i], &size));
1162       PetscCall(ISGetIndices(eedges[i], &idxs));
1163       for (j = 0; j < size; j++) marks[idxs[j]] = mark;
1164       PetscCall(ISRestoreIndices(eedges[i], &idxs));
1165     }
1166     if (print) {
1167       PetscCall(PetscObjectSetName((PetscObject)primals, "obtained_primal_dofs_secondpass"));
1168       PetscCall(ISView(primals, NULL));
1169     }
1170 
1171     /* Recompute extended cols */
1172     eerr = PETSC_FALSE;
1173     for (i = 0; i < nee; i++) {
1174       PetscInt size;
1175 
1176       cum = 0;
1177       PetscCall(ISGetLocalSize(eedges[i], &size));
1178       if (!size && nedfieldlocal) continue;
1179       PetscCheck(size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected zero sized edge %" PetscInt_FMT, i);
1180       PetscCall(ISGetIndices(eedges[i], &idxs));
1181       for (j = 0; j < size; j++) {
1182         PetscInt k, ee = idxs[j];
1183         for (k = ii[ee]; k < ii[ee + 1]; k++)
1184           if (!PetscBTLookup(btv, jj[k])) extrow[cum++] = jj[k];
1185       }
1186       PetscCall(ISRestoreIndices(eedges[i], &idxs));
1187       PetscCall(PetscSortRemoveDupsInt(&cum, extrow));
1188       PetscCall(ISLocalToGlobalMappingApply(vl2g, cum, extrow, gidxs));
1189       PetscCall(PetscSortIntWithArray(cum, gidxs, extrow));
1190       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, cum, extrow, PETSC_COPY_VALUES, &extcols[i]));
1191       if (cum != size - 1) {
1192         if (print) {
1193           PetscCall(PetscObjectSetName((PetscObject)eedges[i], "error_edge_secondpass"));
1194           PetscCall(ISView(eedges[i], NULL));
1195           PetscCall(PetscObjectSetName((PetscObject)extcols[i], "error_extcol_secondpass"));
1196           PetscCall(ISView(extcols[i], NULL));
1197         }
1198         eerr = PETSC_TRUE;
1199       }
1200     }
1201   }
1202   PetscCall(MatRestoreRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
1203   PetscCall(PetscFree2(extrow, gidxs));
1204   PetscCall(PetscBTDestroy(&bter));
1205   if (print) PetscCall(PCBDDCGraphASCIIView(pcbddc->mat_graph, 5, PETSC_VIEWER_STDOUT_SELF));
1206   /* an error should not occur at this point */
1207   PetscCheck(!eerr, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected SIZE OF EDGE > EXTCOL SECOND PASS");
1208 
1209   /* Check the number of endpoints */
1210   PetscCall(MatGetRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
1211   PetscCall(PetscMalloc1(2 * nee, &corners));
1212   PetscCall(PetscMalloc1(nee, &cedges));
1213   for (i = 0; i < nee; i++) {
1214     PetscInt size, found = 0, gc[2];
1215 
1216     /* init with defaults */
1217     cedges[i] = corners[i * 2] = corners[i * 2 + 1] = -1;
1218     PetscCall(ISGetLocalSize(eedges[i], &size));
1219     if (!size && nedfieldlocal) continue;
1220     PetscCheck(size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected zero sized edge %" PetscInt_FMT, i);
1221     PetscCall(ISGetIndices(eedges[i], &idxs));
1222     PetscCall(PetscBTMemzero(nv, btvc));
1223     for (j = 0; j < size; j++) {
1224       PetscInt k, ee = idxs[j];
1225       for (k = ii[ee]; k < ii[ee + 1]; k++) {
1226         PetscInt vv = jj[k];
1227         if (PetscBTLookup(btv, vv) && !PetscBTLookupSet(btvc, vv)) {
1228           PetscCheck(found != 2, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Found more than two corners for edge %" PetscInt_FMT, i);
1229           corners[i * 2 + found++] = vv;
1230         }
1231       }
1232     }
1233     if (found != 2) {
1234       PetscInt e;
1235       if (fl2g) {
1236         PetscCall(ISLocalToGlobalMappingApply(fl2g, 1, idxs, &e));
1237       } else {
1238         e = idxs[0];
1239       }
1240       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_PLIB, "Found %" PetscInt_FMT " corners for edge %" PetscInt_FMT " (astart %" PetscInt_FMT ", estart %" PetscInt_FMT ")", found, i, e, idxs[0]);
1241     }
1242 
1243     /* get primal dof index on this coarse edge */
1244     PetscCall(ISLocalToGlobalMappingApply(vl2g, 2, corners + 2 * i, gc));
1245     if (gc[0] > gc[1]) {
1246       PetscInt swap      = corners[2 * i];
1247       corners[2 * i]     = corners[2 * i + 1];
1248       corners[2 * i + 1] = swap;
1249     }
1250     cedges[i] = idxs[size - 1];
1251     PetscCall(ISRestoreIndices(eedges[i], &idxs));
1252     if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "EDGE %" PetscInt_FMT ": ce %" PetscInt_FMT ", corners (%" PetscInt_FMT ",%" PetscInt_FMT ")\n", i, cedges[i], corners[2 * i], corners[2 * i + 1]));
1253   }
1254   PetscCall(MatRestoreRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
1255   PetscCall(PetscBTDestroy(&btvc));
1256 
1257   if (PetscDefined(USE_DEBUG)) {
1258     /* Inspects columns of lG (rows of lGt) and make sure the change of basis will
1259      not interfere with neighbouring coarse edges */
1260     PetscCall(PetscMalloc1(nee + 1, &emarks));
1261     PetscCall(MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
1262     for (i = 0; i < nv; i++) {
1263       PetscInt emax = 0, eemax = 0;
1264 
1265       if (ii[i + 1] == ii[i] || PetscBTLookup(btv, i)) continue;
1266       PetscCall(PetscArrayzero(emarks, nee + 1));
1267       for (j = ii[i]; j < ii[i + 1]; j++) emarks[marks[jj[j]]]++;
1268       for (j = 1; j < nee + 1; j++) {
1269         if (emax < emarks[j]) {
1270           emax  = emarks[j];
1271           eemax = j;
1272         }
1273       }
1274       /* not relevant for edges */
1275       if (!eemax) continue;
1276 
1277       for (j = ii[i]; j < ii[i + 1]; j++) {
1278         PetscCheck(!marks[jj[j]] || marks[jj[j]] == eemax, PETSC_COMM_SELF, PETSC_ERR_SUP, "Found 2 coarse edges (id %" PetscInt_FMT " and %" PetscInt_FMT ") connected through the %" PetscInt_FMT " nodal dof at edge dof %" PetscInt_FMT, marks[jj[j]] - 1, eemax, i, jj[j]);
1279       }
1280     }
1281     PetscCall(PetscFree(emarks));
1282     PetscCall(MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
1283   }
1284 
1285   /* Compute extended rows indices for edge blocks of the change of basis */
1286   PetscCall(MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
1287   PetscCall(MatSeqAIJGetMaxRowNonzeros(lGt, &extmem));
1288   extmem *= maxsize;
1289   PetscCall(PetscMalloc1(extmem * nee, &extrow));
1290   PetscCall(PetscMalloc1(nee, &extrows));
1291   PetscCall(PetscCalloc1(nee, &extrowcum));
1292   for (i = 0; i < nv; i++) {
1293     PetscInt mark = 0, size, start;
1294 
1295     if (ii[i + 1] == ii[i] || PetscBTLookup(btv, i)) continue;
1296     for (j = ii[i]; j < ii[i + 1]; j++)
1297       if (marks[jj[j]] && !mark) mark = marks[jj[j]];
1298 
1299     /* not relevant */
1300     if (!mark) continue;
1301 
1302     /* import extended row */
1303     mark--;
1304     start = mark * extmem + extrowcum[mark];
1305     size  = ii[i + 1] - ii[i];
1306     PetscCheck(extrowcum[mark] + size <= extmem, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Not enough memory allocated %" PetscInt_FMT " > %" PetscInt_FMT, extrowcum[mark] + size, extmem);
1307     PetscCall(PetscArraycpy(extrow + start, jj + ii[i], size));
1308     extrowcum[mark] += size;
1309   }
1310   PetscCall(MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
1311   PetscCall(MatDestroy(&lGt));
1312   PetscCall(PetscFree(marks));
1313 
1314   /* Compress extrows */
1315   cum = 0;
1316   for (i = 0; i < nee; i++) {
1317     PetscInt size = extrowcum[i], *start = extrow + i * extmem;
1318     PetscCall(PetscSortRemoveDupsInt(&size, start));
1319     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, size, start, PETSC_USE_POINTER, &extrows[i]));
1320     cum = PetscMax(cum, size);
1321   }
1322   PetscCall(PetscFree(extrowcum));
1323   PetscCall(PetscBTDestroy(&btv));
1324   PetscCall(PetscBTDestroy(&btvcand));
1325 
1326   /* Workspace for lapack inner calls and VecSetValues */
1327   PetscCall(PetscMalloc2((5 + cum + maxsize) * maxsize, &work, maxsize, &rwork));
1328 
1329   /* Create change of basis matrix (no preallocation) */
1330   PetscCall(MatCreate(comm, &T));
1331   PetscCall(MatSetLayouts(T, pc->mat->rmap, pc->mat->cmap));
1332   PetscCall(MatSetType(T, MATAIJ));
1333   PetscCall(MatSetLocalToGlobalMapping(T, al2g, al2g));
1334   PetscCall(MatSetOption(T, MAT_ROW_ORIENTED, PETSC_FALSE));
1335   PetscCall(MatSetOption(T, MAT_IGNORE_ZERO_ENTRIES, PETSC_TRUE));
1336   //PetscCall(MatSeqAIJSetPreallocation(T, maxsize, NULL));
1337   //PetscCall(MatMPIAIJSetPreallocation(T, maxsize, NULL, maxsize, NULL));
1338   //PetscCall(MatSetOption(T, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE));
1339 
1340   /* Defaults to identity */
1341   {
1342     Vec                w;
1343     const PetscScalar *wa;
1344 
1345     PetscCall(MatCreateVecs(T, &w, NULL));
1346     PetscCall(VecSetLocalToGlobalMapping(w, al2g));
1347     PetscCall(VecSet(w, 1.0));
1348     for (i = 0; i < nee; i++) {
1349       const PetscInt *idxs;
1350       PetscInt        nl;
1351 
1352       PetscCall(ISGetLocalSize(eedges[i], &nl));
1353       PetscCall(ISGetIndices(eedges[i], &idxs));
1354       PetscCall(VecSetValuesLocal(w, nl, idxs, NULL, INSERT_VALUES));
1355       PetscCall(ISRestoreIndices(eedges[i], &idxs));
1356     }
1357     PetscCall(VecAssemblyBegin(w));
1358     PetscCall(VecAssemblyEnd(w));
1359     PetscCall(VecGetArrayRead(w, &wa));
1360     for (i = T->rmap->rstart; i < T->rmap->rend; i++)
1361       if (PetscAbsScalar(wa[i - T->rmap->rstart])) PetscCall(MatSetValue(T, i, i, 1.0, INSERT_VALUES));
1362     PetscCall(VecRestoreArrayRead(w, &wa));
1363     PetscCall(VecDestroy(&w));
1364   }
1365 
1366   /* Create discrete gradient for the coarser level if needed */
1367   PetscCall(MatDestroy(&pcbddc->nedcG));
1368   PetscCall(ISDestroy(&pcbddc->nedclocal));
1369   if (pcbddc->current_level < pcbddc->max_levels) {
1370     ISLocalToGlobalMapping cel2g, cvl2g;
1371     IS                     wis, gwis;
1372     PetscInt               cnv, cne;
1373 
1374     PetscCall(ISCreateGeneral(comm, nee, cedges, PETSC_COPY_VALUES, &wis));
1375     if (fl2g) {
1376       PetscCall(ISLocalToGlobalMappingApplyIS(fl2g, wis, &pcbddc->nedclocal));
1377     } else {
1378       PetscCall(PetscObjectReference((PetscObject)wis));
1379       pcbddc->nedclocal = wis;
1380     }
1381     PetscCall(ISLocalToGlobalMappingApplyIS(el2g, wis, &gwis));
1382     PetscCall(ISDestroy(&wis));
1383     PetscCall(ISRenumber(gwis, NULL, &cne, &wis));
1384     PetscCall(ISLocalToGlobalMappingCreateIS(wis, &cel2g));
1385     PetscCall(ISDestroy(&wis));
1386     PetscCall(ISDestroy(&gwis));
1387 
1388     PetscCall(ISCreateGeneral(comm, 2 * nee, corners, PETSC_USE_POINTER, &wis));
1389     PetscCall(ISLocalToGlobalMappingApplyIS(vl2g, wis, &gwis));
1390     PetscCall(ISDestroy(&wis));
1391     PetscCall(ISRenumber(gwis, NULL, &cnv, &wis));
1392     PetscCall(ISLocalToGlobalMappingCreateIS(wis, &cvl2g));
1393     PetscCall(ISDestroy(&wis));
1394     PetscCall(ISDestroy(&gwis));
1395 
1396     PetscCall(MatCreate(comm, &pcbddc->nedcG));
1397     PetscCall(MatSetSizes(pcbddc->nedcG, PETSC_DECIDE, PETSC_DECIDE, cne, cnv));
1398     PetscCall(MatSetType(pcbddc->nedcG, MATAIJ));
1399     PetscCall(MatSeqAIJSetPreallocation(pcbddc->nedcG, 2, NULL));
1400     PetscCall(MatMPIAIJSetPreallocation(pcbddc->nedcG, 2, NULL, 2, NULL));
1401     PetscCall(MatSetLocalToGlobalMapping(pcbddc->nedcG, cel2g, cvl2g));
1402     PetscCall(ISLocalToGlobalMappingDestroy(&cel2g));
1403     PetscCall(ISLocalToGlobalMappingDestroy(&cvl2g));
1404   }
1405   PetscCall(ISLocalToGlobalMappingDestroy(&vl2g));
1406 
1407 #if defined(PRINT_GDET)
1408   inc = 0;
1409   lev = pcbddc->current_level;
1410 #endif
1411 
1412   /* Insert values in the change of basis matrix */
1413   for (i = 0; i < nee; i++) {
1414     Mat         Gins = NULL, GKins = NULL;
1415     IS          cornersis = NULL;
1416     PetscScalar cvals[2];
1417 
1418     if (pcbddc->nedcG) PetscCall(ISCreateGeneral(PETSC_COMM_SELF, 2, corners + 2 * i, PETSC_USE_POINTER, &cornersis));
1419     PetscCall(PCBDDCComputeNedelecChangeEdge(lG, eedges[i], extrows[i], extcols[i], cornersis, &Gins, &GKins, cvals, work, rwork));
1420     if (Gins && GKins) {
1421       const PetscScalar *data;
1422       const PetscInt    *rows, *cols;
1423       PetscInt           nrh, nch, nrc, ncc;
1424 
1425       PetscCall(ISGetIndices(eedges[i], &cols));
1426       /* H1 */
1427       PetscCall(ISGetIndices(extrows[i], &rows));
1428       PetscCall(MatGetSize(Gins, &nrh, &nch));
1429       PetscCall(MatDenseGetArrayRead(Gins, &data));
1430       PetscCall(MatSetValuesLocal(T, nrh, rows, nch, cols, data, INSERT_VALUES));
1431       PetscCall(MatDenseRestoreArrayRead(Gins, &data));
1432       PetscCall(ISRestoreIndices(extrows[i], &rows));
1433       /* complement */
1434       PetscCall(MatGetSize(GKins, &nrc, &ncc));
1435       PetscCheck(ncc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Constant function has not been generated for coarse edge %" PetscInt_FMT, i);
1436       PetscCheck(ncc + nch == nrc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "The sum of the number of columns of GKins %" PetscInt_FMT " and Gins %" PetscInt_FMT " does not match %" PetscInt_FMT " for coarse edge %" PetscInt_FMT, ncc, nch, nrc, i);
1437       PetscCheck(ncc == 1 || !pcbddc->nedcG, PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot generate the coarse discrete gradient for coarse edge %" PetscInt_FMT " with ncc %" PetscInt_FMT, i, ncc);
1438       PetscCall(MatDenseGetArrayRead(GKins, &data));
1439       PetscCall(MatSetValuesLocal(T, nrc, cols, ncc, cols + nch, data, INSERT_VALUES));
1440       PetscCall(MatDenseRestoreArrayRead(GKins, &data));
1441 
1442       /* coarse discrete gradient */
1443       if (pcbddc->nedcG) {
1444         PetscInt cols[2];
1445 
1446         cols[0] = 2 * i;
1447         cols[1] = 2 * i + 1;
1448         PetscCall(MatSetValuesLocal(pcbddc->nedcG, 1, &i, 2, cols, cvals, INSERT_VALUES));
1449       }
1450       PetscCall(ISRestoreIndices(eedges[i], &cols));
1451     }
1452     PetscCall(ISDestroy(&extrows[i]));
1453     PetscCall(ISDestroy(&extcols[i]));
1454     PetscCall(ISDestroy(&cornersis));
1455     PetscCall(MatDestroy(&Gins));
1456     PetscCall(MatDestroy(&GKins));
1457   }
1458 
1459   /* for FDM element-by-element: first dof on the edge only constraint. Why? */
1460   if (elements_corners && pcbddc->mat_graph->multi_element) {
1461     MatNullSpace nnsp;
1462     Vec          quad_vec;
1463 
1464     PetscCall(MatCreateVecs(pc->pmat, &quad_vec, NULL));
1465     PetscCall(PCBDDCNullSpaceCreate(PetscObjectComm((PetscObject)pc), PETSC_FALSE, 1, &quad_vec, &nnsp));
1466     PetscCall(VecLockReadPop(quad_vec));
1467     PetscCall(VecSetLocalToGlobalMapping(quad_vec, al2g));
1468     for (i = 0; i < nee; i++) {
1469       const PetscInt *idxs;
1470       PetscScalar     one = 1.0;
1471 
1472       PetscCall(ISGetLocalSize(eedges[i], &cum));
1473       if (!cum) continue;
1474       PetscCall(ISGetIndices(eedges[i], &idxs));
1475       PetscCall(VecSetValuesLocal(quad_vec, 1, idxs, &one, INSERT_VALUES));
1476       PetscCall(ISRestoreIndices(eedges[i], &idxs));
1477     }
1478     PetscCall(VecLockReadPush(quad_vec));
1479     PetscCall(VecDestroy(&quad_vec));
1480     PetscCall(MatSetNearNullSpace(pc->pmat, nnsp));
1481     PetscCall(MatNullSpaceDestroy(&nnsp));
1482   }
1483   PetscCall(ISLocalToGlobalMappingDestroy(&el2g));
1484   PetscCall(ISLocalToGlobalMappingDestroy(&al2g));
1485 
1486   /* Start assembling */
1487   PetscCall(MatAssemblyBegin(T, MAT_FINAL_ASSEMBLY));
1488   if (pcbddc->nedcG) PetscCall(MatAssemblyBegin(pcbddc->nedcG, MAT_FINAL_ASSEMBLY));
1489 
1490   /* Free */
1491   if (fl2g) {
1492     PetscCall(ISDestroy(&primals));
1493     for (i = 0; i < nee; i++) PetscCall(ISDestroy(&eedges[i]));
1494     PetscCall(PetscFree(eedges));
1495   }
1496 
1497   /* hack mat_graph with primal dofs on the coarse edges */
1498   {
1499     PCBDDCGraph graph  = pcbddc->mat_graph;
1500     PetscInt   *oqueue = graph->queue;
1501     PetscInt   *ocptr  = graph->cptr;
1502     PetscInt    ncc, *idxs;
1503 
1504     /* find first primal edge */
1505     if (pcbddc->nedclocal) {
1506       PetscCall(ISGetIndices(pcbddc->nedclocal, (const PetscInt **)&idxs));
1507     } else {
1508       if (fl2g) PetscCall(ISLocalToGlobalMappingApply(fl2g, nee, cedges, cedges));
1509       idxs = cedges;
1510     }
1511     cum = 0;
1512     while (cum < nee && cedges[cum] < 0) cum++;
1513 
1514     /* adapt connected components */
1515     PetscCall(PetscMalloc2(graph->nvtxs + 1, &graph->cptr, ocptr[graph->ncc], &graph->queue));
1516     graph->cptr[0] = 0;
1517     for (i = 0, ncc = 0; i < graph->ncc; i++) {
1518       PetscInt lc = ocptr[i + 1] - ocptr[i];
1519       if (cum != nee && oqueue[ocptr[i + 1] - 1] == cedges[cum]) { /* this cc has a primal dof */
1520         graph->cptr[ncc + 1]           = graph->cptr[ncc] + 1;
1521         graph->queue[graph->cptr[ncc]] = cedges[cum];
1522         ncc++;
1523         lc--;
1524         cum++;
1525         while (cum < nee && cedges[cum] < 0) cum++;
1526       }
1527       graph->cptr[ncc + 1] = graph->cptr[ncc] + lc;
1528       for (j = 0; j < lc; j++) graph->queue[graph->cptr[ncc] + j] = oqueue[ocptr[i] + j];
1529       ncc++;
1530     }
1531     graph->ncc = ncc;
1532     if (pcbddc->nedclocal) PetscCall(ISRestoreIndices(pcbddc->nedclocal, (const PetscInt **)&idxs));
1533     PetscCall(PetscFree2(ocptr, oqueue));
1534   }
1535   PetscCall(ISLocalToGlobalMappingDestroy(&fl2g));
1536   PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, NULL, NULL, &nee, &alleedges, &allprimals));
1537   PetscCall(PCBDDCGraphResetCSR(pcbddc->mat_graph));
1538 
1539   PetscCall(ISDestroy(&nedfieldlocal));
1540   PetscCall(PetscFree(extrow));
1541   PetscCall(PetscFree2(work, rwork));
1542   PetscCall(PetscFree(corners));
1543   PetscCall(PetscFree(cedges));
1544   PetscCall(PetscFree(extrows));
1545   PetscCall(PetscFree(extcols));
1546   PetscCall(MatDestroy(&lG));
1547 
1548   /* Complete assembling */
1549   PetscCall(MatAssemblyEnd(T, MAT_FINAL_ASSEMBLY));
1550   PetscCall(MatViewFromOptions(T, (PetscObject)pc, "-pc_bddc_nedelec_change_view"));
1551   if (pcbddc->nedcG) {
1552     PetscCall(MatAssemblyEnd(pcbddc->nedcG, MAT_FINAL_ASSEMBLY));
1553     PetscCall(MatViewFromOptions(pcbddc->nedcG, (PetscObject)pc, "-pc_bddc_nedelec_coarse_change_view"));
1554   }
1555 
1556   PetscCall(ISDestroy(&elements_corners));
1557 
1558   /* set change of basis */
1559   PetscCall(PCBDDCSetChangeOfBasisMat(pc, T, PETSC_FALSE));
1560   PetscCall(MatDestroy(&T));
1561   PetscFunctionReturn(PETSC_SUCCESS);
1562 }
1563 
1564 /* the near-null space of BDDC carries information on quadrature weights,
1565    and these can be collinear -> so cheat with MatNullSpaceCreate
1566    and create a suitable set of basis vectors first */
1567 PetscErrorCode PCBDDCNullSpaceCreate(MPI_Comm comm, PetscBool has_const, PetscInt nvecs, Vec quad_vecs[], MatNullSpace *nnsp)
1568 {
1569   PetscInt i;
1570 
1571   PetscFunctionBegin;
1572   for (i = 0; i < nvecs; i++) {
1573     PetscInt first, last;
1574 
1575     PetscCall(VecGetOwnershipRange(quad_vecs[i], &first, &last));
1576     PetscCheck(last - first >= 2 * nvecs || !has_const, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not implemented");
1577     if (i >= first && i < last) {
1578       PetscScalar *data;
1579       PetscCall(VecGetArray(quad_vecs[i], &data));
1580       if (!has_const) {
1581         data[i - first] = 1.;
1582       } else {
1583         data[2 * i - first]     = 1. / PetscSqrtReal(2.);
1584         data[2 * i - first + 1] = -1. / PetscSqrtReal(2.);
1585       }
1586       PetscCall(VecRestoreArray(quad_vecs[i], &data));
1587     }
1588     PetscCall(PetscObjectStateIncrease((PetscObject)quad_vecs[i]));
1589   }
1590   PetscCall(MatNullSpaceCreate(comm, has_const, nvecs, quad_vecs, nnsp));
1591   for (i = 0; i < nvecs; i++) { /* reset vectors */
1592     PetscInt first, last;
1593     PetscCall(VecLockReadPop(quad_vecs[i]));
1594     PetscCall(VecGetOwnershipRange(quad_vecs[i], &first, &last));
1595     if (i >= first && i < last) {
1596       PetscScalar *data;
1597       PetscCall(VecGetArray(quad_vecs[i], &data));
1598       if (!has_const) {
1599         data[i - first] = 0.;
1600       } else {
1601         data[2 * i - first]     = 0.;
1602         data[2 * i - first + 1] = 0.;
1603       }
1604       PetscCall(VecRestoreArray(quad_vecs[i], &data));
1605     }
1606     PetscCall(PetscObjectStateIncrease((PetscObject)quad_vecs[i]));
1607     PetscCall(VecLockReadPush(quad_vecs[i]));
1608   }
1609   PetscFunctionReturn(PETSC_SUCCESS);
1610 }
1611 
1612 PetscErrorCode PCBDDCComputeNoNetFlux(Mat A, Mat divudotp, PetscBool transpose, IS vl2l, PCBDDCGraph graph, MatNullSpace *nnsp)
1613 {
1614   Mat                    loc_divudotp;
1615   Vec                    p, v, quad_vec;
1616   ISLocalToGlobalMapping map;
1617   PetscScalar           *array;
1618 
1619   PetscFunctionBegin;
1620   PetscCall(MatCreateVecs(A, &quad_vec, NULL));
1621   if (!transpose) {
1622     PetscCall(MatISGetLocalToGlobalMapping(A, &map, NULL));
1623   } else {
1624     PetscCall(MatISGetLocalToGlobalMapping(A, NULL, &map));
1625   }
1626   PetscCall(PCBDDCNullSpaceCreate(PetscObjectComm((PetscObject)A), PETSC_FALSE, 1, &quad_vec, nnsp));
1627   PetscCall(VecLockReadPop(quad_vec));
1628   PetscCall(VecSetLocalToGlobalMapping(quad_vec, map));
1629 
1630   /* compute local quad vec */
1631   PetscCall(MatISGetLocalMat(divudotp, &loc_divudotp));
1632   if (!transpose) {
1633     PetscCall(MatCreateVecs(loc_divudotp, &v, &p));
1634   } else {
1635     PetscCall(MatCreateVecs(loc_divudotp, &p, &v));
1636   }
1637   /* the assumption here is that the constant vector interpolates the constant on the L2 conforming space */
1638   PetscCall(VecSet(p, 1.));
1639   if (!transpose) {
1640     PetscCall(MatMultTranspose(loc_divudotp, p, v));
1641   } else {
1642     PetscCall(MatMult(loc_divudotp, p, v));
1643   }
1644   PetscCall(VecDestroy(&p));
1645   if (vl2l) {
1646     Mat        lA;
1647     VecScatter sc;
1648     Vec        vins;
1649 
1650     PetscCall(MatISGetLocalMat(A, &lA));
1651     PetscCall(MatCreateVecs(lA, &vins, NULL));
1652     PetscCall(VecScatterCreate(v, NULL, vins, vl2l, &sc));
1653     PetscCall(VecScatterBegin(sc, v, vins, INSERT_VALUES, SCATTER_FORWARD));
1654     PetscCall(VecScatterEnd(sc, v, vins, INSERT_VALUES, SCATTER_FORWARD));
1655     PetscCall(VecScatterDestroy(&sc));
1656     PetscCall(VecDestroy(&v));
1657     v = vins;
1658   }
1659 
1660   /* mask summation of interface values */
1661   PetscInt        n, *mmask, *mask, *idxs, nmr, nr;
1662   const PetscInt *degree;
1663   PetscSF         msf;
1664 
1665   PetscCall(VecGetLocalSize(v, &n));
1666   PetscCall(PetscSFGetGraph(graph->interface_subset_sf, &nr, NULL, NULL, NULL));
1667   PetscCall(PetscSFGetMultiSF(graph->interface_subset_sf, &msf));
1668   PetscCall(PetscSFGetGraph(msf, &nmr, NULL, NULL, NULL));
1669   PetscCall(PetscCalloc3(nmr, &mmask, n, &mask, n, &idxs));
1670   PetscCall(PetscSFComputeDegreeBegin(graph->interface_subset_sf, &degree));
1671   PetscCall(PetscSFComputeDegreeEnd(graph->interface_subset_sf, &degree));
1672   for (PetscInt i = 0, c = 0; i < nr; i++) {
1673     mmask[c] = 1;
1674     c += degree[i];
1675   }
1676   PetscCall(PetscSFScatterBegin(graph->interface_subset_sf, MPIU_INT, mmask, mask));
1677   PetscCall(PetscSFScatterEnd(graph->interface_subset_sf, MPIU_INT, mmask, mask));
1678   PetscCall(VecGetArray(v, &array));
1679   for (PetscInt i = 0; i < n; i++) {
1680     array[i] *= mask[i];
1681     idxs[i] = i;
1682   }
1683   PetscCall(VecSetValuesLocal(quad_vec, n, idxs, array, ADD_VALUES));
1684   PetscCall(VecRestoreArray(v, &array));
1685   PetscCall(PetscFree3(mmask, mask, idxs));
1686   PetscCall(VecDestroy(&v));
1687   PetscCall(VecAssemblyBegin(quad_vec));
1688   PetscCall(VecAssemblyEnd(quad_vec));
1689   PetscCall(VecViewFromOptions(quad_vec, NULL, "-pc_bddc_quad_vec_view"));
1690   PetscCall(VecLockReadPush(quad_vec));
1691   PetscCall(VecDestroy(&quad_vec));
1692   PetscFunctionReturn(PETSC_SUCCESS);
1693 }
1694 
1695 PetscErrorCode PCBDDCAddPrimalVerticesLocalIS(PC pc, IS primalv)
1696 {
1697   PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
1698 
1699   PetscFunctionBegin;
1700   if (primalv) {
1701     if (pcbddc->user_primal_vertices_local) {
1702       IS list[2], newp;
1703 
1704       list[0] = primalv;
1705       list[1] = pcbddc->user_primal_vertices_local;
1706       PetscCall(ISConcatenate(PetscObjectComm((PetscObject)pc), 2, list, &newp));
1707       PetscCall(ISSortRemoveDups(newp));
1708       PetscCall(ISDestroy(&list[1]));
1709       pcbddc->user_primal_vertices_local = newp;
1710     } else {
1711       PetscCall(PCBDDCSetPrimalVerticesLocalIS(pc, primalv));
1712     }
1713   }
1714   PetscFunctionReturn(PETSC_SUCCESS);
1715 }
1716 
1717 static PetscErrorCode func_coords_private(PetscInt dim, PetscReal t, const PetscReal X[], PetscInt Nf, PetscScalar *out, void *ctx)
1718 {
1719   PetscInt f, *comp = (PetscInt *)ctx;
1720 
1721   PetscFunctionBegin;
1722   for (f = 0; f < Nf; f++) out[f] = X[*comp];
1723   PetscFunctionReturn(PETSC_SUCCESS);
1724 }
1725 
1726 PetscErrorCode PCBDDCComputeLocalTopologyInfo(PC pc)
1727 {
1728   Vec       local, global;
1729   PC_BDDC  *pcbddc     = (PC_BDDC *)pc->data;
1730   Mat_IS   *matis      = (Mat_IS *)pc->pmat->data;
1731   PetscBool monolithic = PETSC_FALSE;
1732 
1733   PetscFunctionBegin;
1734   PetscOptionsBegin(PetscObjectComm((PetscObject)pc), ((PetscObject)pc)->prefix, "BDDC topology options", "PC");
1735   PetscCall(PetscOptionsBool("-pc_bddc_monolithic", "Discard any information on dofs splitting", NULL, monolithic, &monolithic, NULL));
1736   PetscOptionsEnd();
1737   /* need to convert from global to local topology information and remove references to information in global ordering */
1738   PetscCall(MatCreateVecs(pc->pmat, &global, NULL));
1739   PetscCall(MatCreateVecs(matis->A, &local, NULL));
1740   PetscCall(VecBindToCPU(global, PETSC_TRUE));
1741   PetscCall(VecBindToCPU(local, PETSC_TRUE));
1742   if (monolithic) { /* just get block size to properly compute vertices */
1743     if (pcbddc->vertex_size == 1) PetscCall(MatGetBlockSize(pc->pmat, &pcbddc->vertex_size));
1744     goto boundary;
1745   }
1746 
1747   if (pcbddc->user_provided_isfordofs) {
1748     if (pcbddc->n_ISForDofs) {
1749       PetscInt i;
1750 
1751       PetscCall(PetscMalloc1(pcbddc->n_ISForDofs, &pcbddc->ISForDofsLocal));
1752       for (i = 0; i < pcbddc->n_ISForDofs; i++) {
1753         PetscInt bs;
1754 
1755         PetscCall(PCBDDCGlobalToLocal(matis->rctx, global, local, pcbddc->ISForDofs[i], &pcbddc->ISForDofsLocal[i]));
1756         PetscCall(ISGetBlockSize(pcbddc->ISForDofs[i], &bs));
1757         PetscCall(ISSetBlockSize(pcbddc->ISForDofsLocal[i], bs));
1758         PetscCall(ISDestroy(&pcbddc->ISForDofs[i]));
1759       }
1760       pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs;
1761       pcbddc->n_ISForDofs      = 0;
1762       PetscCall(PetscFree(pcbddc->ISForDofs));
1763     }
1764   } else {
1765     if (!pcbddc->n_ISForDofsLocal) { /* field split not present */
1766       DM dm;
1767 
1768       PetscCall(MatGetDM(pc->pmat, &dm));
1769       if (!dm) PetscCall(PCGetDM(pc, &dm));
1770       if (dm) {
1771         IS      *fields;
1772         PetscInt nf, i;
1773 
1774         PetscCall(DMCreateFieldDecomposition(dm, &nf, NULL, &fields, NULL));
1775         PetscCall(PetscMalloc1(nf, &pcbddc->ISForDofsLocal));
1776         for (i = 0; i < nf; i++) {
1777           PetscInt bs;
1778 
1779           PetscCall(PCBDDCGlobalToLocal(matis->rctx, global, local, fields[i], &pcbddc->ISForDofsLocal[i]));
1780           PetscCall(ISGetBlockSize(fields[i], &bs));
1781           PetscCall(ISSetBlockSize(pcbddc->ISForDofsLocal[i], bs));
1782           PetscCall(ISDestroy(&fields[i]));
1783         }
1784         PetscCall(PetscFree(fields));
1785         pcbddc->n_ISForDofsLocal = nf;
1786       } else { /* See if MATIS has fields attached by the conversion from MatNest */
1787         PetscContainer c;
1788 
1789         PetscCall(PetscObjectQuery((PetscObject)pc->pmat, "_convert_nest_lfields", (PetscObject *)&c));
1790         if (c) {
1791           MatISLocalFields lf;
1792           PetscCall(PetscContainerGetPointer(c, (void **)&lf));
1793           PetscCall(PCBDDCSetDofsSplittingLocal(pc, lf->nr, lf->rf));
1794         } else { /* fallback, create the default fields if bs > 1 */
1795           PetscInt i, n = matis->A->rmap->n;
1796           PetscCall(MatGetBlockSize(pc->pmat, &i));
1797           if (i > 1) {
1798             pcbddc->n_ISForDofsLocal = i;
1799             PetscCall(PetscMalloc1(pcbddc->n_ISForDofsLocal, &pcbddc->ISForDofsLocal));
1800             for (i = 0; i < pcbddc->n_ISForDofsLocal; i++) PetscCall(ISCreateStride(PetscObjectComm((PetscObject)pc), n / pcbddc->n_ISForDofsLocal, i, pcbddc->n_ISForDofsLocal, &pcbddc->ISForDofsLocal[i]));
1801           }
1802         }
1803       }
1804     } else {
1805       PetscInt i;
1806       for (i = 0; i < pcbddc->n_ISForDofsLocal; i++) PetscCall(PCBDDCConsistencyCheckIS(pc, MPI_LAND, &pcbddc->ISForDofsLocal[i]));
1807     }
1808   }
1809 
1810 boundary:
1811   if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) {
1812     PetscCall(PCBDDCGlobalToLocal(matis->rctx, global, local, pcbddc->DirichletBoundaries, &pcbddc->DirichletBoundariesLocal));
1813   } else if (pcbddc->DirichletBoundariesLocal) {
1814     PetscCall(PCBDDCConsistencyCheckIS(pc, MPI_LAND, &pcbddc->DirichletBoundariesLocal));
1815   }
1816   if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) {
1817     PetscCall(PCBDDCGlobalToLocal(matis->rctx, global, local, pcbddc->NeumannBoundaries, &pcbddc->NeumannBoundariesLocal));
1818   } else if (pcbddc->NeumannBoundariesLocal) {
1819     PetscCall(PCBDDCConsistencyCheckIS(pc, MPI_LOR, &pcbddc->NeumannBoundariesLocal));
1820   }
1821   if (!pcbddc->user_primal_vertices_local && pcbddc->user_primal_vertices) PetscCall(PCBDDCGlobalToLocal(matis->rctx, global, local, pcbddc->user_primal_vertices, &pcbddc->user_primal_vertices_local));
1822   PetscCall(VecDestroy(&global));
1823   PetscCall(VecDestroy(&local));
1824   /* detect local disconnected subdomains if requested or needed */
1825   if (pcbddc->detect_disconnected || matis->allow_repeated) {
1826     IS        primalv = NULL;
1827     PetscInt  nel;
1828     PetscBool filter = pcbddc->detect_disconnected_filter;
1829 
1830     for (PetscInt i = 0; i < pcbddc->n_local_subs; i++) PetscCall(ISDestroy(&pcbddc->local_subs[i]));
1831     PetscCall(PetscFree(pcbddc->local_subs));
1832     PetscCall(MatGetVariableBlockSizes(matis->A, &nel, NULL));
1833     if (matis->allow_repeated && nel) {
1834       const PetscInt *elsizes;
1835 
1836       pcbddc->n_local_subs = nel;
1837       PetscCall(MatGetVariableBlockSizes(matis->A, NULL, &elsizes));
1838       PetscCall(PetscMalloc1(nel, &pcbddc->local_subs));
1839       for (PetscInt i = 0, c = 0; i < nel; i++) {
1840         PetscCall(ISCreateStride(PETSC_COMM_SELF, elsizes[i], c, 1, &pcbddc->local_subs[i]));
1841         c += elsizes[i];
1842       }
1843     } else {
1844       PetscCall(PCBDDCDetectDisconnectedComponents(pc, filter, &pcbddc->n_local_subs, &pcbddc->local_subs, &primalv));
1845     }
1846     PetscCall(PCBDDCAddPrimalVerticesLocalIS(pc, primalv));
1847     PetscCall(ISDestroy(&primalv));
1848   }
1849   /* early stage corner detection */
1850   {
1851     DM dm;
1852 
1853     PetscCall(MatGetDM(pc->pmat, &dm));
1854     if (!dm) PetscCall(PCGetDM(pc, &dm));
1855     if (dm) {
1856       PetscBool isda;
1857 
1858       PetscCall(PetscObjectTypeCompare((PetscObject)dm, DMDA, &isda));
1859       if (isda) {
1860         ISLocalToGlobalMapping l2l;
1861         IS                     corners;
1862         Mat                    lA;
1863         PetscBool              gl, lo;
1864 
1865         {
1866           Vec                cvec;
1867           const PetscScalar *coords;
1868           PetscInt           dof, n, cdim;
1869           PetscBool          memc = PETSC_TRUE;
1870 
1871           PetscCall(DMDAGetInfo(dm, NULL, NULL, NULL, NULL, NULL, NULL, NULL, &dof, NULL, NULL, NULL, NULL, NULL));
1872           PetscCall(DMGetCoordinates(dm, &cvec));
1873           PetscCall(VecGetLocalSize(cvec, &n));
1874           PetscCall(VecGetBlockSize(cvec, &cdim));
1875           n /= cdim;
1876           PetscCall(PetscFree(pcbddc->mat_graph->coords));
1877           PetscCall(PetscMalloc1(dof * n * cdim, &pcbddc->mat_graph->coords));
1878           PetscCall(VecGetArrayRead(cvec, &coords));
1879 #if defined(PETSC_USE_COMPLEX)
1880           memc = PETSC_FALSE;
1881 #endif
1882           if (dof != 1) memc = PETSC_FALSE;
1883           if (memc) {
1884             PetscCall(PetscArraycpy(pcbddc->mat_graph->coords, coords, cdim * n * dof));
1885           } else { /* BDDC graph does not use any blocked information, we need to replicate the data */
1886             PetscReal *bcoords = pcbddc->mat_graph->coords;
1887             PetscInt   i, b, d;
1888 
1889             for (i = 0; i < n; i++) {
1890               for (b = 0; b < dof; b++) {
1891                 for (d = 0; d < cdim; d++) bcoords[i * dof * cdim + b * cdim + d] = PetscRealPart(coords[i * cdim + d]);
1892               }
1893             }
1894           }
1895           PetscCall(VecRestoreArrayRead(cvec, &coords));
1896           pcbddc->mat_graph->cdim  = cdim;
1897           pcbddc->mat_graph->cnloc = dof * n;
1898           pcbddc->mat_graph->cloc  = PETSC_FALSE;
1899         }
1900         PetscCall(DMDAGetSubdomainCornersIS(dm, &corners));
1901         PetscCall(MatISGetLocalMat(pc->pmat, &lA));
1902         PetscCall(MatGetLocalToGlobalMapping(lA, &l2l, NULL));
1903         PetscCall(MatISRestoreLocalMat(pc->pmat, &lA));
1904         lo = (PetscBool)(l2l && corners);
1905         PetscCallMPI(MPIU_Allreduce(&lo, &gl, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)pc)));
1906         if (gl) { /* From PETSc's DMDA */
1907           const PetscInt *idx;
1908           PetscInt        dof, bs, *idxout, n;
1909 
1910           PetscCall(DMDAGetInfo(dm, NULL, NULL, NULL, NULL, NULL, NULL, NULL, &dof, NULL, NULL, NULL, NULL, NULL));
1911           PetscCall(ISLocalToGlobalMappingGetBlockSize(l2l, &bs));
1912           PetscCall(ISGetLocalSize(corners, &n));
1913           PetscCall(ISGetIndices(corners, &idx));
1914           if (bs == dof) {
1915             PetscCall(PetscMalloc1(n, &idxout));
1916             PetscCall(ISLocalToGlobalMappingApplyBlock(l2l, n, idx, idxout));
1917           } else { /* the original DMDA local-to-local map have been modified */
1918             PetscInt i, d;
1919 
1920             PetscCall(PetscMalloc1(dof * n, &idxout));
1921             for (i = 0; i < n; i++)
1922               for (d = 0; d < dof; d++) idxout[dof * i + d] = dof * idx[i] + d;
1923             PetscCall(ISLocalToGlobalMappingApply(l2l, dof * n, idxout, idxout));
1924 
1925             bs = 1;
1926             n *= dof;
1927           }
1928           PetscCall(ISRestoreIndices(corners, &idx));
1929           PetscCall(DMDARestoreSubdomainCornersIS(dm, &corners));
1930           PetscCall(ISCreateBlock(PetscObjectComm((PetscObject)pc), bs, n, idxout, PETSC_OWN_POINTER, &corners));
1931           PetscCall(PCBDDCAddPrimalVerticesLocalIS(pc, corners));
1932           PetscCall(ISDestroy(&corners));
1933           pcbddc->corner_selected  = PETSC_TRUE;
1934           pcbddc->corner_selection = PETSC_TRUE;
1935         }
1936         if (corners) PetscCall(DMDARestoreSubdomainCornersIS(dm, &corners));
1937       }
1938     }
1939   }
1940   if (pcbddc->corner_selection && !pcbddc->mat_graph->cdim) {
1941     DM dm;
1942 
1943     PetscCall(MatGetDM(pc->pmat, &dm));
1944     if (!dm) PetscCall(PCGetDM(pc, &dm));
1945     if (dm) { /* this can get very expensive, I need to find a faster alternative */
1946       Vec          vcoords;
1947       PetscSection section;
1948       PetscReal   *coords;
1949       PetscInt     d, cdim, nl, nf, **ctxs;
1950       PetscErrorCode (**funcs)(PetscInt, PetscReal, const PetscReal *, PetscInt, PetscScalar *, void *);
1951       /* debug coordinates */
1952       PetscViewer       viewer;
1953       PetscBool         flg;
1954       PetscViewerFormat format;
1955       const char       *prefix;
1956 
1957       PetscCall(DMGetCoordinateDim(dm, &cdim));
1958       PetscCall(DMGetLocalSection(dm, &section));
1959       PetscCall(PetscSectionGetNumFields(section, &nf));
1960       PetscCall(DMCreateGlobalVector(dm, &vcoords));
1961       PetscCall(VecGetLocalSize(vcoords, &nl));
1962       PetscCall(PetscMalloc1(nl * cdim, &coords));
1963       PetscCall(PetscMalloc2(nf, &funcs, nf, &ctxs));
1964       PetscCall(PetscMalloc1(nf, &ctxs[0]));
1965       for (d = 0; d < nf; d++) funcs[d] = func_coords_private;
1966       for (d = 1; d < nf; d++) ctxs[d] = ctxs[d - 1] + 1;
1967 
1968       /* debug coordinates */
1969       PetscCall(PCGetOptionsPrefix(pc, &prefix));
1970       PetscCall(PetscOptionsCreateViewer(PetscObjectComm((PetscObject)vcoords), ((PetscObject)vcoords)->options, prefix, "-pc_bddc_coords_vec_view", &viewer, &format, &flg));
1971       if (flg) PetscCall(PetscViewerPushFormat(viewer, format));
1972       for (d = 0; d < cdim; d++) {
1973         PetscInt           i;
1974         const PetscScalar *v;
1975         char               name[16];
1976 
1977         for (i = 0; i < nf; i++) ctxs[i][0] = d;
1978         PetscCall(PetscSNPrintf(name, sizeof(name), "bddc_coords_%" PetscInt_FMT, d));
1979         PetscCall(PetscObjectSetName((PetscObject)vcoords, name));
1980         PetscCall(DMProjectFunction(dm, 0.0, funcs, (void **)ctxs, INSERT_VALUES, vcoords));
1981         if (flg) PetscCall(VecView(vcoords, viewer));
1982         PetscCall(VecGetArrayRead(vcoords, &v));
1983         for (i = 0; i < nl; i++) coords[i * cdim + d] = PetscRealPart(v[i]);
1984         PetscCall(VecRestoreArrayRead(vcoords, &v));
1985       }
1986       PetscCall(VecDestroy(&vcoords));
1987       PetscCall(PCSetCoordinates(pc, cdim, nl, coords));
1988       PetscCall(PetscFree(coords));
1989       PetscCall(PetscFree(ctxs[0]));
1990       PetscCall(PetscFree2(funcs, ctxs));
1991       if (flg) {
1992         PetscCall(PetscViewerPopFormat(viewer));
1993         PetscCall(PetscViewerDestroy(&viewer));
1994       }
1995     }
1996   }
1997   PetscFunctionReturn(PETSC_SUCCESS);
1998 }
1999 
2000 PetscErrorCode PCBDDCConsistencyCheckIS(PC pc, MPI_Op mop, IS *is)
2001 {
2002   Mat_IS         *matis = (Mat_IS *)pc->pmat->data;
2003   IS              nis;
2004   const PetscInt *idxs;
2005   PetscInt        i, nd, n = matis->A->rmap->n, *nidxs, nnd;
2006 
2007   PetscFunctionBegin;
2008   PetscCheck(mop == MPI_LAND || mop == MPI_LOR, PetscObjectComm((PetscObject)pc), PETSC_ERR_SUP, "Supported are MPI_LAND and MPI_LOR");
2009   if (mop == MPI_LAND) {
2010     /* init rootdata with true */
2011     for (i = 0; i < pc->pmat->rmap->n; i++) matis->sf_rootdata[i] = 1;
2012   } else {
2013     PetscCall(PetscArrayzero(matis->sf_rootdata, pc->pmat->rmap->n));
2014   }
2015   PetscCall(PetscArrayzero(matis->sf_leafdata, n));
2016   PetscCall(ISGetLocalSize(*is, &nd));
2017   PetscCall(ISGetIndices(*is, &idxs));
2018   for (i = 0; i < nd; i++)
2019     if (-1 < idxs[i] && idxs[i] < n) matis->sf_leafdata[idxs[i]] = 1;
2020   PetscCall(ISRestoreIndices(*is, &idxs));
2021   PetscCall(PetscSFReduceBegin(matis->sf, MPIU_INT, matis->sf_leafdata, matis->sf_rootdata, mop));
2022   PetscCall(PetscSFReduceEnd(matis->sf, MPIU_INT, matis->sf_leafdata, matis->sf_rootdata, mop));
2023   PetscCall(PetscSFBcastBegin(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE));
2024   PetscCall(PetscSFBcastEnd(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE));
2025   if (mop == MPI_LAND) {
2026     PetscCall(PetscMalloc1(nd, &nidxs));
2027   } else {
2028     PetscCall(PetscMalloc1(n, &nidxs));
2029   }
2030   for (i = 0, nnd = 0; i < n; i++)
2031     if (matis->sf_leafdata[i]) nidxs[nnd++] = i;
2032   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)*is), nnd, nidxs, PETSC_OWN_POINTER, &nis));
2033   PetscCall(ISDestroy(is));
2034   *is = nis;
2035   PetscFunctionReturn(PETSC_SUCCESS);
2036 }
2037 
2038 PetscErrorCode PCBDDCBenignRemoveInterior(PC pc, Vec r, Vec z)
2039 {
2040   PC_IS   *pcis   = (PC_IS *)pc->data;
2041   PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
2042 
2043   PetscFunctionBegin;
2044   if (!pcbddc->benign_have_null) PetscFunctionReturn(PETSC_SUCCESS);
2045   if (pcbddc->ChangeOfBasisMatrix) {
2046     Vec swap;
2047 
2048     PetscCall(MatMultTranspose(pcbddc->ChangeOfBasisMatrix, r, pcbddc->work_change));
2049     swap                = pcbddc->work_change;
2050     pcbddc->work_change = r;
2051     r                   = swap;
2052   }
2053   PetscCall(VecScatterBegin(pcis->global_to_D, r, pcis->vec1_D, INSERT_VALUES, SCATTER_FORWARD));
2054   PetscCall(VecScatterEnd(pcis->global_to_D, r, pcis->vec1_D, INSERT_VALUES, SCATTER_FORWARD));
2055   PetscCall(PetscLogEventBegin(PC_BDDC_Solves[pcbddc->current_level][0], pc, 0, 0, 0));
2056   PetscCall(KSPSolve(pcbddc->ksp_D, pcis->vec1_D, pcis->vec2_D));
2057   PetscCall(PetscLogEventEnd(PC_BDDC_Solves[pcbddc->current_level][0], pc, 0, 0, 0));
2058   PetscCall(KSPCheckSolve(pcbddc->ksp_D, pc, pcis->vec2_D));
2059   PetscCall(VecSet(z, 0.));
2060   PetscCall(VecScatterBegin(pcis->global_to_D, pcis->vec2_D, z, INSERT_VALUES, SCATTER_REVERSE));
2061   PetscCall(VecScatterEnd(pcis->global_to_D, pcis->vec2_D, z, INSERT_VALUES, SCATTER_REVERSE));
2062   if (pcbddc->ChangeOfBasisMatrix) {
2063     pcbddc->work_change = r;
2064     PetscCall(VecCopy(z, pcbddc->work_change));
2065     PetscCall(MatMult(pcbddc->ChangeOfBasisMatrix, pcbddc->work_change, z));
2066   }
2067   PetscFunctionReturn(PETSC_SUCCESS);
2068 }
2069 
2070 static PetscErrorCode PCBDDCBenignMatMult_Private_Private(Mat A, Vec x, Vec y, PetscBool transpose)
2071 {
2072   PCBDDCBenignMatMult_ctx ctx;
2073   PetscBool               apply_right, apply_left, reset_x;
2074 
2075   PetscFunctionBegin;
2076   PetscCall(MatShellGetContext(A, &ctx));
2077   if (transpose) {
2078     apply_right = ctx->apply_left;
2079     apply_left  = ctx->apply_right;
2080   } else {
2081     apply_right = ctx->apply_right;
2082     apply_left  = ctx->apply_left;
2083   }
2084   reset_x = PETSC_FALSE;
2085   if (apply_right) {
2086     const PetscScalar *ax;
2087     PetscInt           nl, i;
2088 
2089     PetscCall(VecGetLocalSize(x, &nl));
2090     PetscCall(VecGetArrayRead(x, &ax));
2091     PetscCall(PetscArraycpy(ctx->work, ax, nl));
2092     PetscCall(VecRestoreArrayRead(x, &ax));
2093     for (i = 0; i < ctx->benign_n; i++) {
2094       PetscScalar     sum, val;
2095       const PetscInt *idxs;
2096       PetscInt        nz, j;
2097       PetscCall(ISGetLocalSize(ctx->benign_zerodiag_subs[i], &nz));
2098       PetscCall(ISGetIndices(ctx->benign_zerodiag_subs[i], &idxs));
2099       sum = 0.;
2100       if (ctx->apply_p0) {
2101         val = ctx->work[idxs[nz - 1]];
2102         for (j = 0; j < nz - 1; j++) {
2103           sum += ctx->work[idxs[j]];
2104           ctx->work[idxs[j]] += val;
2105         }
2106       } else {
2107         for (j = 0; j < nz - 1; j++) sum += ctx->work[idxs[j]];
2108       }
2109       ctx->work[idxs[nz - 1]] -= sum;
2110       PetscCall(ISRestoreIndices(ctx->benign_zerodiag_subs[i], &idxs));
2111     }
2112     PetscCall(VecPlaceArray(x, ctx->work));
2113     reset_x = PETSC_TRUE;
2114   }
2115   if (transpose) {
2116     PetscCall(MatMultTranspose(ctx->A, x, y));
2117   } else {
2118     PetscCall(MatMult(ctx->A, x, y));
2119   }
2120   if (reset_x) PetscCall(VecResetArray(x));
2121   if (apply_left) {
2122     PetscScalar *ay;
2123     PetscInt     i;
2124 
2125     PetscCall(VecGetArray(y, &ay));
2126     for (i = 0; i < ctx->benign_n; i++) {
2127       PetscScalar     sum, val;
2128       const PetscInt *idxs;
2129       PetscInt        nz, j;
2130       PetscCall(ISGetLocalSize(ctx->benign_zerodiag_subs[i], &nz));
2131       PetscCall(ISGetIndices(ctx->benign_zerodiag_subs[i], &idxs));
2132       val = -ay[idxs[nz - 1]];
2133       if (ctx->apply_p0) {
2134         sum = 0.;
2135         for (j = 0; j < nz - 1; j++) {
2136           sum += ay[idxs[j]];
2137           ay[idxs[j]] += val;
2138         }
2139         ay[idxs[nz - 1]] += sum;
2140       } else {
2141         for (j = 0; j < nz - 1; j++) ay[idxs[j]] += val;
2142         ay[idxs[nz - 1]] = 0.;
2143       }
2144       PetscCall(ISRestoreIndices(ctx->benign_zerodiag_subs[i], &idxs));
2145     }
2146     PetscCall(VecRestoreArray(y, &ay));
2147   }
2148   PetscFunctionReturn(PETSC_SUCCESS);
2149 }
2150 
2151 static PetscErrorCode PCBDDCBenignMatMultTranspose_Private(Mat A, Vec x, Vec y)
2152 {
2153   PetscFunctionBegin;
2154   PetscCall(PCBDDCBenignMatMult_Private_Private(A, x, y, PETSC_TRUE));
2155   PetscFunctionReturn(PETSC_SUCCESS);
2156 }
2157 
2158 static PetscErrorCode PCBDDCBenignMatMult_Private(Mat A, Vec x, Vec y)
2159 {
2160   PetscFunctionBegin;
2161   PetscCall(PCBDDCBenignMatMult_Private_Private(A, x, y, PETSC_FALSE));
2162   PetscFunctionReturn(PETSC_SUCCESS);
2163 }
2164 
2165 PetscErrorCode PCBDDCBenignShellMat(PC pc, PetscBool restore)
2166 {
2167   PC_IS                  *pcis   = (PC_IS *)pc->data;
2168   PC_BDDC                *pcbddc = (PC_BDDC *)pc->data;
2169   PCBDDCBenignMatMult_ctx ctx;
2170 
2171   PetscFunctionBegin;
2172   if (!restore) {
2173     Mat                A_IB, A_BI;
2174     PetscScalar       *work;
2175     PCBDDCReuseSolvers reuse = pcbddc->sub_schurs ? pcbddc->sub_schurs->reuse_solver : NULL;
2176 
2177     PetscCheck(!pcbddc->benign_original_mat, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Benign original mat has not been restored");
2178     if (!pcbddc->benign_change || !pcbddc->benign_n || pcbddc->benign_change_explicit) PetscFunctionReturn(PETSC_SUCCESS);
2179     PetscCall(PetscMalloc1(pcis->n, &work));
2180     PetscCall(MatCreate(PETSC_COMM_SELF, &A_IB));
2181     PetscCall(MatSetSizes(A_IB, pcis->n - pcis->n_B, pcis->n_B, PETSC_DECIDE, PETSC_DECIDE));
2182     PetscCall(MatSetType(A_IB, MATSHELL));
2183     PetscCall(MatShellSetOperation(A_IB, MATOP_MULT, (void (*)(void))PCBDDCBenignMatMult_Private));
2184     PetscCall(MatShellSetOperation(A_IB, MATOP_MULT_TRANSPOSE, (void (*)(void))PCBDDCBenignMatMultTranspose_Private));
2185     PetscCall(PetscNew(&ctx));
2186     PetscCall(MatShellSetContext(A_IB, ctx));
2187     ctx->apply_left  = PETSC_TRUE;
2188     ctx->apply_right = PETSC_FALSE;
2189     ctx->apply_p0    = PETSC_FALSE;
2190     ctx->benign_n    = pcbddc->benign_n;
2191     if (reuse) {
2192       ctx->benign_zerodiag_subs = reuse->benign_zerodiag_subs;
2193       ctx->free                 = PETSC_FALSE;
2194     } else { /* TODO: could be optimized for successive solves */
2195       ISLocalToGlobalMapping N_to_D;
2196       PetscInt               i;
2197 
2198       PetscCall(ISLocalToGlobalMappingCreateIS(pcis->is_I_local, &N_to_D));
2199       PetscCall(PetscMalloc1(pcbddc->benign_n, &ctx->benign_zerodiag_subs));
2200       for (i = 0; i < pcbddc->benign_n; i++) PetscCall(ISGlobalToLocalMappingApplyIS(N_to_D, IS_GTOLM_DROP, pcbddc->benign_zerodiag_subs[i], &ctx->benign_zerodiag_subs[i]));
2201       PetscCall(ISLocalToGlobalMappingDestroy(&N_to_D));
2202       ctx->free = PETSC_TRUE;
2203     }
2204     ctx->A    = pcis->A_IB;
2205     ctx->work = work;
2206     PetscCall(MatSetUp(A_IB));
2207     PetscCall(MatAssemblyBegin(A_IB, MAT_FINAL_ASSEMBLY));
2208     PetscCall(MatAssemblyEnd(A_IB, MAT_FINAL_ASSEMBLY));
2209     pcis->A_IB = A_IB;
2210 
2211     /* A_BI as A_IB^T */
2212     PetscCall(MatCreateTranspose(A_IB, &A_BI));
2213     pcbddc->benign_original_mat = pcis->A_BI;
2214     pcis->A_BI                  = A_BI;
2215   } else {
2216     if (!pcbddc->benign_original_mat) PetscFunctionReturn(PETSC_SUCCESS);
2217     PetscCall(MatShellGetContext(pcis->A_IB, &ctx));
2218     PetscCall(MatDestroy(&pcis->A_IB));
2219     pcis->A_IB = ctx->A;
2220     ctx->A     = NULL;
2221     PetscCall(MatDestroy(&pcis->A_BI));
2222     pcis->A_BI                  = pcbddc->benign_original_mat;
2223     pcbddc->benign_original_mat = NULL;
2224     if (ctx->free) {
2225       PetscInt i;
2226       for (i = 0; i < ctx->benign_n; i++) PetscCall(ISDestroy(&ctx->benign_zerodiag_subs[i]));
2227       PetscCall(PetscFree(ctx->benign_zerodiag_subs));
2228     }
2229     PetscCall(PetscFree(ctx->work));
2230     PetscCall(PetscFree(ctx));
2231   }
2232   PetscFunctionReturn(PETSC_SUCCESS);
2233 }
2234 
2235 /* used just in bddc debug mode */
2236 static PetscErrorCode PCBDDCBenignProject(PC pc, IS is1, IS is2, Mat *B)
2237 {
2238   PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
2239   Mat_IS  *matis  = (Mat_IS *)pc->pmat->data;
2240   Mat      An;
2241 
2242   PetscFunctionBegin;
2243   PetscCall(MatPtAP(matis->A, pcbddc->benign_change, MAT_INITIAL_MATRIX, 2.0, &An));
2244   PetscCall(MatZeroRowsColumns(An, pcbddc->benign_n, pcbddc->benign_p0_lidx, 1.0, NULL, NULL));
2245   if (is1) {
2246     PetscCall(MatCreateSubMatrix(An, is1, is2, MAT_INITIAL_MATRIX, B));
2247     PetscCall(MatDestroy(&An));
2248   } else {
2249     *B = An;
2250   }
2251   PetscFunctionReturn(PETSC_SUCCESS);
2252 }
2253 
2254 /* TODO: add reuse flag */
2255 PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B)
2256 {
2257   Mat             Bt;
2258   PetscScalar    *a, *bdata;
2259   const PetscInt *ii, *ij;
2260   PetscInt        m, n, i, nnz, *bii, *bij;
2261   PetscBool       flg_row;
2262 
2263   PetscFunctionBegin;
2264   PetscCall(MatGetSize(A, &n, &m));
2265   PetscCall(MatGetRowIJ(A, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, &ij, &flg_row));
2266   PetscCall(MatSeqAIJGetArray(A, &a));
2267   nnz = n;
2268   for (i = 0; i < ii[n]; i++) {
2269     if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++;
2270   }
2271   PetscCall(PetscMalloc1(n + 1, &bii));
2272   PetscCall(PetscMalloc1(nnz, &bij));
2273   PetscCall(PetscMalloc1(nnz, &bdata));
2274   nnz    = 0;
2275   bii[0] = 0;
2276   for (i = 0; i < n; i++) {
2277     PetscInt j;
2278     for (j = ii[i]; j < ii[i + 1]; j++) {
2279       PetscScalar entry = a[j];
2280       if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || (n == m && ij[j] == i)) {
2281         bij[nnz]   = ij[j];
2282         bdata[nnz] = entry;
2283         nnz++;
2284       }
2285     }
2286     bii[i + 1] = nnz;
2287   }
2288   PetscCall(MatSeqAIJRestoreArray(A, &a));
2289   PetscCall(MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A), n, m, bii, bij, bdata, &Bt));
2290   PetscCall(MatRestoreRowIJ(A, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, &ij, &flg_row));
2291   {
2292     Mat_SeqAIJ *b = (Mat_SeqAIJ *)Bt->data;
2293     b->free_a     = PETSC_TRUE;
2294     b->free_ij    = PETSC_TRUE;
2295   }
2296   if (*B == A) PetscCall(MatDestroy(&A));
2297   *B = Bt;
2298   PetscFunctionReturn(PETSC_SUCCESS);
2299 }
2300 
2301 PetscErrorCode PCBDDCDetectDisconnectedComponents(PC pc, PetscBool filter, PetscInt *ncc, IS *cc[], IS *primalv)
2302 {
2303   Mat                    B = NULL;
2304   DM                     dm;
2305   IS                     is_dummy, *cc_n;
2306   ISLocalToGlobalMapping l2gmap_dummy;
2307   PCBDDCGraph            graph;
2308   PetscInt              *xadj_filtered = NULL, *adjncy_filtered = NULL;
2309   PetscInt               i, n;
2310   PetscInt              *xadj, *adjncy;
2311   PetscBool              isplex = PETSC_FALSE;
2312 
2313   PetscFunctionBegin;
2314   if (ncc) *ncc = 0;
2315   if (cc) *cc = NULL;
2316   if (primalv) *primalv = NULL;
2317   PetscCall(PCBDDCGraphCreate(&graph));
2318   PetscCall(MatGetDM(pc->pmat, &dm));
2319   if (!dm) PetscCall(PCGetDM(pc, &dm));
2320   if (dm) PetscCall(PetscObjectTypeCompareAny((PetscObject)dm, &isplex, DMPLEX, DMP4EST, DMP8EST, ""));
2321   if (filter) isplex = PETSC_FALSE;
2322 
2323   if (isplex) { /* this code has been modified from plexpartition.c */
2324     PetscInt        p, pStart, pEnd, a, adjSize, idx, size, nroots;
2325     PetscInt       *adj = NULL;
2326     IS              cellNumbering;
2327     const PetscInt *cellNum;
2328     PetscBool       useCone, useClosure;
2329     PetscSection    section;
2330     PetscSegBuffer  adjBuffer;
2331     PetscSF         sfPoint;
2332 
2333     PetscCall(DMConvert(dm, DMPLEX, &dm));
2334     PetscCall(DMPlexGetHeightStratum(dm, 0, &pStart, &pEnd));
2335     PetscCall(DMGetPointSF(dm, &sfPoint));
2336     PetscCall(PetscSFGetGraph(sfPoint, &nroots, NULL, NULL, NULL));
2337     /* Build adjacency graph via a section/segbuffer */
2338     PetscCall(PetscSectionCreate(PetscObjectComm((PetscObject)dm), &section));
2339     PetscCall(PetscSectionSetChart(section, pStart, pEnd));
2340     PetscCall(PetscSegBufferCreate(sizeof(PetscInt), 1000, &adjBuffer));
2341     /* Always use FVM adjacency to create partitioner graph */
2342     PetscCall(DMGetBasicAdjacency(dm, &useCone, &useClosure));
2343     PetscCall(DMSetBasicAdjacency(dm, PETSC_TRUE, PETSC_FALSE));
2344     PetscCall(DMPlexGetCellNumbering(dm, &cellNumbering));
2345     PetscCall(ISGetIndices(cellNumbering, &cellNum));
2346     for (n = 0, p = pStart; p < pEnd; p++) {
2347       /* Skip non-owned cells in parallel (ParMetis expects no overlap) */
2348       if (nroots > 0) {
2349         if (cellNum[p] < 0) continue;
2350       }
2351       adjSize = PETSC_DETERMINE;
2352       PetscCall(DMPlexGetAdjacency(dm, p, &adjSize, &adj));
2353       for (a = 0; a < adjSize; ++a) {
2354         const PetscInt point = adj[a];
2355         if (pStart <= point && point < pEnd) {
2356           PetscInt *PETSC_RESTRICT pBuf;
2357           PetscCall(PetscSectionAddDof(section, p, 1));
2358           PetscCall(PetscSegBufferGetInts(adjBuffer, 1, &pBuf));
2359           *pBuf = point;
2360         }
2361       }
2362       n++;
2363     }
2364     PetscCall(DMSetBasicAdjacency(dm, useCone, useClosure));
2365     /* Derive CSR graph from section/segbuffer */
2366     PetscCall(PetscSectionSetUp(section));
2367     PetscCall(PetscSectionGetStorageSize(section, &size));
2368     PetscCall(PetscMalloc1(n + 1, &xadj));
2369     for (idx = 0, p = pStart; p < pEnd; p++) {
2370       if (nroots > 0) {
2371         if (cellNum[p] < 0) continue;
2372       }
2373       PetscCall(PetscSectionGetOffset(section, p, &xadj[idx++]));
2374     }
2375     xadj[n] = size;
2376     PetscCall(PetscSegBufferExtractAlloc(adjBuffer, &adjncy));
2377     /* Clean up */
2378     PetscCall(PetscSegBufferDestroy(&adjBuffer));
2379     PetscCall(PetscSectionDestroy(&section));
2380     PetscCall(PetscFree(adj));
2381     graph->xadj   = xadj;
2382     graph->adjncy = adjncy;
2383   } else {
2384     Mat       A;
2385     PetscBool isseqaij, flg_row;
2386 
2387     PetscCall(MatISGetLocalMat(pc->pmat, &A));
2388     if (!A->rmap->N || !A->cmap->N) {
2389       PetscCall(PCBDDCGraphDestroy(&graph));
2390       PetscFunctionReturn(PETSC_SUCCESS);
2391     }
2392     PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &isseqaij));
2393     if (!isseqaij && filter) {
2394       PetscBool isseqdense;
2395 
2396       PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQDENSE, &isseqdense));
2397       if (!isseqdense) {
2398         PetscCall(MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &B));
2399       } else { /* TODO: rectangular case and LDA */
2400         PetscScalar *array;
2401         PetscReal    chop = 1.e-6;
2402 
2403         PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &B));
2404         PetscCall(MatDenseGetArray(B, &array));
2405         PetscCall(MatGetSize(B, &n, NULL));
2406         for (i = 0; i < n; i++) {
2407           PetscInt j;
2408           for (j = i + 1; j < n; j++) {
2409             PetscReal thresh = chop * (PetscAbsScalar(array[i * (n + 1)]) + PetscAbsScalar(array[j * (n + 1)]));
2410             if (PetscAbsScalar(array[i * n + j]) < thresh) array[i * n + j] = 0.;
2411             if (PetscAbsScalar(array[j * n + i]) < thresh) array[j * n + i] = 0.;
2412           }
2413         }
2414         PetscCall(MatDenseRestoreArray(B, &array));
2415         PetscCall(MatConvert(B, MATSEQAIJ, MAT_INPLACE_MATRIX, &B));
2416       }
2417     } else {
2418       PetscCall(PetscObjectReference((PetscObject)A));
2419       B = A;
2420     }
2421     PetscCall(MatGetRowIJ(B, 0, PETSC_TRUE, PETSC_FALSE, &n, (const PetscInt **)&xadj, (const PetscInt **)&adjncy, &flg_row));
2422 
2423     /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */
2424     if (filter) {
2425       PetscScalar *data;
2426       PetscInt     j, cum;
2427 
2428       PetscCall(PetscCalloc2(n + 1, &xadj_filtered, xadj[n], &adjncy_filtered));
2429       PetscCall(MatSeqAIJGetArray(B, &data));
2430       cum = 0;
2431       for (i = 0; i < n; i++) {
2432         PetscInt t;
2433 
2434         for (j = xadj[i]; j < xadj[i + 1]; j++) {
2435           if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) continue;
2436           adjncy_filtered[cum + xadj_filtered[i]++] = adjncy[j];
2437         }
2438         t                = xadj_filtered[i];
2439         xadj_filtered[i] = cum;
2440         cum += t;
2441       }
2442       PetscCall(MatSeqAIJRestoreArray(B, &data));
2443       graph->xadj   = xadj_filtered;
2444       graph->adjncy = adjncy_filtered;
2445     } else {
2446       graph->xadj   = xadj;
2447       graph->adjncy = adjncy;
2448     }
2449   }
2450   /* compute local connected components using PCBDDCGraph */
2451   graph->seq_graph = PETSC_TRUE; /* analyze local connected components (i.e. disconnected subdomains) irrespective of dofs count */
2452   PetscCall(ISCreateStride(PETSC_COMM_SELF, n, 0, 1, &is_dummy));
2453   PetscCall(ISLocalToGlobalMappingCreateIS(is_dummy, &l2gmap_dummy));
2454   PetscCall(ISDestroy(&is_dummy));
2455   PetscCall(PCBDDCGraphInit(graph, l2gmap_dummy, n, PETSC_INT_MAX));
2456   PetscCall(ISLocalToGlobalMappingDestroy(&l2gmap_dummy));
2457   PetscCall(PCBDDCGraphSetUp(graph, 1, NULL, NULL, 0, NULL, NULL));
2458   PetscCall(PCBDDCGraphComputeConnectedComponents(graph));
2459 
2460   /* partial clean up */
2461   PetscCall(PetscFree2(xadj_filtered, adjncy_filtered));
2462   if (B) {
2463     PetscBool flg_row;
2464     PetscCall(MatRestoreRowIJ(B, 0, PETSC_TRUE, PETSC_FALSE, &n, (const PetscInt **)&xadj, (const PetscInt **)&adjncy, &flg_row));
2465     PetscCall(MatDestroy(&B));
2466   }
2467   if (isplex) {
2468     PetscCall(PetscFree(xadj));
2469     PetscCall(PetscFree(adjncy));
2470   }
2471 
2472   /* get back data */
2473   if (isplex) {
2474     if (ncc) *ncc = graph->ncc;
2475     if (cc || primalv) {
2476       Mat          A;
2477       PetscBT      btv, btvt, btvc;
2478       PetscSection subSection;
2479       PetscInt    *ids, cum, cump, *cids, *pids;
2480       PetscInt     dim, cStart, cEnd, fStart, fEnd, vStart, vEnd, pStart, pEnd;
2481 
2482       PetscCall(DMGetDimension(dm, &dim));
2483       PetscCall(DMPlexGetSubdomainSection(dm, &subSection));
2484       PetscCall(DMPlexGetHeightStratum(dm, 1, &fStart, &fEnd));
2485       PetscCall(DMPlexGetHeightStratum(dm, 0, &cStart, &cEnd));
2486       PetscCall(DMPlexGetDepthStratum(dm, 0, &vStart, &vEnd));
2487       PetscCall(DMPlexGetChart(dm, &pStart, &pEnd));
2488       PetscCall(MatISGetLocalMat(pc->pmat, &A));
2489       PetscCall(PetscMalloc3(A->rmap->n, &ids, graph->ncc + 1, &cids, A->rmap->n, &pids));
2490       PetscCall(PetscBTCreate(A->rmap->n, &btv));
2491       PetscCall(PetscBTCreate(A->rmap->n, &btvt));
2492       PetscCall(PetscBTCreate(pEnd - pStart, &btvc));
2493 
2494       /* First see if we find corners for the subdomains, i.e. a vertex
2495          shared by at least dim subdomain boundary faces. This does not
2496          cover all the possible cases with simplices but it is enough
2497          for tensor cells */
2498       if (vStart != fStart && dim <= 3) {
2499         for (PetscInt c = cStart; c < cEnd; c++) {
2500           PetscInt        nf, cnt = 0, mcnt = dim, *cfaces;
2501           const PetscInt *faces;
2502 
2503           PetscCall(DMPlexGetConeSize(dm, c, &nf));
2504           PetscCall(DMGetWorkArray(dm, nf, MPIU_INT, &cfaces));
2505           PetscCall(DMPlexGetCone(dm, c, &faces));
2506           for (PetscInt f = 0; f < nf; f++) {
2507             PetscInt nc, ff;
2508 
2509             PetscCall(DMPlexGetSupportSize(dm, faces[f], &nc));
2510             PetscCall(DMPlexGetTreeParent(dm, faces[f], &ff, NULL));
2511             if (nc == 1 && faces[f] == ff) cfaces[cnt++] = faces[f];
2512           }
2513           if (cnt >= mcnt) {
2514             PetscInt size, *closure = NULL;
2515 
2516             PetscCall(DMPlexGetTransitiveClosure(dm, c, PETSC_TRUE, &size, &closure));
2517             for (PetscInt k = 0; k < 2 * size; k += 2) {
2518               PetscInt v = closure[k];
2519               if (v >= vStart && v < vEnd) {
2520                 PetscInt vsize, *vclosure = NULL;
2521 
2522                 cnt = 0;
2523                 PetscCall(DMPlexGetTransitiveClosure(dm, v, PETSC_FALSE, &vsize, &vclosure));
2524                 for (PetscInt vk = 0; vk < 2 * vsize; vk += 2) {
2525                   PetscInt f = vclosure[vk];
2526                   if (f >= fStart && f < fEnd) {
2527                     PetscInt  nc, ff;
2528                     PetscBool valid = PETSC_FALSE;
2529 
2530                     for (PetscInt fk = 0; fk < nf; fk++)
2531                       if (f == cfaces[fk]) valid = PETSC_TRUE;
2532                     if (!valid) continue;
2533                     PetscCall(DMPlexGetSupportSize(dm, f, &nc));
2534                     PetscCall(DMPlexGetTreeParent(dm, f, &ff, NULL));
2535                     if (nc == 1 && f == ff) cnt++;
2536                   }
2537                 }
2538                 if (cnt >= mcnt) PetscCall(PetscBTSet(btvc, v - pStart));
2539                 PetscCall(DMPlexRestoreTransitiveClosure(dm, v, PETSC_FALSE, &vsize, &vclosure));
2540               }
2541             }
2542             PetscCall(DMPlexRestoreTransitiveClosure(dm, c, PETSC_TRUE, &size, &closure));
2543           }
2544           PetscCall(DMRestoreWorkArray(dm, nf, MPIU_INT, &cfaces));
2545         }
2546       }
2547 
2548       cids[0] = 0;
2549       for (i = 0, cump = 0, cum = 0; i < graph->ncc; i++) {
2550         PetscInt j;
2551 
2552         PetscCall(PetscBTMemzero(A->rmap->n, btvt));
2553         for (j = graph->cptr[i]; j < graph->cptr[i + 1]; j++) {
2554           PetscInt k, size, *closure = NULL, cell = graph->queue[j];
2555 
2556           PetscCall(DMPlexGetTransitiveClosure(dm, cell, PETSC_TRUE, &size, &closure));
2557           for (k = 0; k < 2 * size; k += 2) {
2558             PetscInt s, pp, p = closure[k], off, dof, cdof;
2559 
2560             PetscCall(PetscSectionGetConstraintDof(subSection, p, &cdof));
2561             PetscCall(PetscSectionGetOffset(subSection, p, &off));
2562             PetscCall(PetscSectionGetDof(subSection, p, &dof));
2563             for (s = 0; s < dof - cdof; s++) {
2564               if (PetscBTLookupSet(btvt, off + s)) continue;
2565               if (PetscBTLookup(btvc, p - pStart)) pids[cump++] = off + s; /* subdomain corner */
2566               else if (!PetscBTLookup(btv, off + s)) ids[cum++] = off + s;
2567               else pids[cump++] = off + s; /* cross-vertex */
2568             }
2569             PetscCall(DMPlexGetTreeParent(dm, p, &pp, NULL));
2570             if (pp != p) {
2571               PetscCall(PetscSectionGetConstraintDof(subSection, pp, &cdof));
2572               PetscCall(PetscSectionGetOffset(subSection, pp, &off));
2573               PetscCall(PetscSectionGetDof(subSection, pp, &dof));
2574               for (s = 0; s < dof - cdof; s++) {
2575                 if (PetscBTLookupSet(btvt, off + s)) continue;
2576                 if (PetscBTLookup(btvc, pp - pStart)) pids[cump++] = off + s; /* subdomain corner */
2577                 else if (!PetscBTLookup(btv, off + s)) ids[cum++] = off + s;
2578                 else pids[cump++] = off + s; /* cross-vertex */
2579               }
2580             }
2581           }
2582           PetscCall(DMPlexRestoreTransitiveClosure(dm, cell, PETSC_TRUE, &size, &closure));
2583         }
2584         cids[i + 1] = cum;
2585         /* mark dofs as already assigned */
2586         for (j = cids[i]; j < cids[i + 1]; j++) PetscCall(PetscBTSet(btv, ids[j]));
2587       }
2588       if (cc) {
2589         PetscCall(PetscMalloc1(graph->ncc, &cc_n));
2590         for (i = 0; i < graph->ncc; i++) PetscCall(ISCreateGeneral(PETSC_COMM_SELF, cids[i + 1] - cids[i], ids + cids[i], PETSC_COPY_VALUES, &cc_n[i]));
2591         *cc = cc_n;
2592       }
2593       if (primalv) PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), cump, pids, PETSC_COPY_VALUES, primalv));
2594       PetscCall(PetscFree3(ids, cids, pids));
2595       PetscCall(PetscBTDestroy(&btv));
2596       PetscCall(PetscBTDestroy(&btvt));
2597       PetscCall(PetscBTDestroy(&btvc));
2598       PetscCall(DMDestroy(&dm));
2599     }
2600   } else {
2601     if (ncc) *ncc = graph->ncc;
2602     if (cc) {
2603       PetscCall(PetscMalloc1(graph->ncc, &cc_n));
2604       for (i = 0; i < graph->ncc; i++) PetscCall(ISCreateGeneral(PETSC_COMM_SELF, graph->cptr[i + 1] - graph->cptr[i], graph->queue + graph->cptr[i], PETSC_COPY_VALUES, &cc_n[i]));
2605       *cc = cc_n;
2606     }
2607   }
2608   /* clean up graph */
2609   graph->xadj   = NULL;
2610   graph->adjncy = NULL;
2611   PetscCall(PCBDDCGraphDestroy(&graph));
2612   PetscFunctionReturn(PETSC_SUCCESS);
2613 }
2614 
2615 PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag)
2616 {
2617   PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
2618   PC_IS   *pcis   = (PC_IS *)pc->data;
2619   IS       dirIS  = NULL;
2620   PetscInt i;
2621 
2622   PetscFunctionBegin;
2623   PetscCall(PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph, &dirIS));
2624   if (zerodiag) {
2625     Mat             A;
2626     Vec             vec3_N;
2627     PetscScalar    *vals;
2628     const PetscInt *idxs;
2629     PetscInt        nz, *count;
2630 
2631     /* p0 */
2632     PetscCall(VecSet(pcis->vec1_N, 0.));
2633     PetscCall(PetscMalloc1(pcis->n, &vals));
2634     PetscCall(ISGetLocalSize(zerodiag, &nz));
2635     PetscCall(ISGetIndices(zerodiag, &idxs));
2636     for (i = 0; i < nz; i++) vals[i] = 1.;
2637     PetscCall(VecSetValues(pcis->vec1_N, nz, idxs, vals, INSERT_VALUES));
2638     PetscCall(VecAssemblyBegin(pcis->vec1_N));
2639     PetscCall(VecAssemblyEnd(pcis->vec1_N));
2640     /* v_I */
2641     PetscCall(VecSetRandom(pcis->vec2_N, NULL));
2642     for (i = 0; i < nz; i++) vals[i] = 0.;
2643     PetscCall(VecSetValues(pcis->vec2_N, nz, idxs, vals, INSERT_VALUES));
2644     PetscCall(ISRestoreIndices(zerodiag, &idxs));
2645     PetscCall(ISGetIndices(pcis->is_B_local, &idxs));
2646     for (i = 0; i < pcis->n_B; i++) vals[i] = 0.;
2647     PetscCall(VecSetValues(pcis->vec2_N, pcis->n_B, idxs, vals, INSERT_VALUES));
2648     PetscCall(ISRestoreIndices(pcis->is_B_local, &idxs));
2649     if (dirIS) {
2650       PetscInt n;
2651 
2652       PetscCall(ISGetLocalSize(dirIS, &n));
2653       PetscCall(ISGetIndices(dirIS, &idxs));
2654       for (i = 0; i < n; i++) vals[i] = 0.;
2655       PetscCall(VecSetValues(pcis->vec2_N, n, idxs, vals, INSERT_VALUES));
2656       PetscCall(ISRestoreIndices(dirIS, &idxs));
2657     }
2658     PetscCall(VecAssemblyBegin(pcis->vec2_N));
2659     PetscCall(VecAssemblyEnd(pcis->vec2_N));
2660     PetscCall(VecDuplicate(pcis->vec1_N, &vec3_N));
2661     PetscCall(VecSet(vec3_N, 0.));
2662     PetscCall(MatISGetLocalMat(pc->pmat, &A));
2663     PetscCall(MatMult(A, pcis->vec1_N, vec3_N));
2664     PetscCall(VecDot(vec3_N, pcis->vec2_N, &vals[0]));
2665     PetscCheck(PetscAbsScalar(vals[0]) <= 1.e-1, PETSC_COMM_SELF, PETSC_ERR_SUP, "Benign trick can not be applied! b(v_I,p_0) = %1.6e (should be numerically 0.)", (double)PetscAbsScalar(vals[0]));
2666     PetscCall(PetscFree(vals));
2667     PetscCall(VecDestroy(&vec3_N));
2668 
2669     /* there should not be any pressure dofs lying on the interface */
2670     PetscCall(PetscCalloc1(pcis->n, &count));
2671     PetscCall(ISGetIndices(pcis->is_B_local, &idxs));
2672     for (i = 0; i < pcis->n_B; i++) count[idxs[i]]++;
2673     PetscCall(ISRestoreIndices(pcis->is_B_local, &idxs));
2674     PetscCall(ISGetIndices(zerodiag, &idxs));
2675     for (i = 0; i < nz; i++) PetscCheck(!count[idxs[i]], PETSC_COMM_SELF, PETSC_ERR_SUP, "Benign trick can not be applied! pressure dof %" PetscInt_FMT " is an interface dof", idxs[i]);
2676     PetscCall(ISRestoreIndices(zerodiag, &idxs));
2677     PetscCall(PetscFree(count));
2678   }
2679   PetscCall(ISDestroy(&dirIS));
2680 
2681   /* check PCBDDCBenignGetOrSetP0 */
2682   PetscCall(VecSetRandom(pcis->vec1_global, NULL));
2683   for (i = 0; i < pcbddc->benign_n; i++) pcbddc->benign_p0[i] = -PetscGlobalRank - i;
2684   PetscCall(PCBDDCBenignGetOrSetP0(pc, pcis->vec1_global, PETSC_FALSE));
2685   for (i = 0; i < pcbddc->benign_n; i++) pcbddc->benign_p0[i] = 1;
2686   PetscCall(PCBDDCBenignGetOrSetP0(pc, pcis->vec1_global, PETSC_TRUE));
2687   for (i = 0; i < pcbddc->benign_n; i++) {
2688     PetscInt val = PetscRealPart(pcbddc->benign_p0[i]);
2689     PetscCheck(val == -PetscGlobalRank - i, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error testing PCBDDCBenignGetOrSetP0! Found %g at %" PetscInt_FMT " instead of %g", (double)PetscRealPart(pcbddc->benign_p0[i]), i, (double)(-PetscGlobalRank - i));
2690   }
2691   PetscFunctionReturn(PETSC_SUCCESS);
2692 }
2693 
2694 PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, PetscBool reuse, IS *zerodiaglocal)
2695 {
2696   PC_BDDC  *pcbddc    = (PC_BDDC *)pc->data;
2697   Mat_IS   *matis     = (Mat_IS *)pc->pmat->data;
2698   IS        pressures = NULL, zerodiag = NULL, *bzerodiag = NULL, zerodiag_save, *zerodiag_subs;
2699   PetscInt  nz, n, benign_n, bsp = 1;
2700   PetscInt *interior_dofs, n_interior_dofs, nneu;
2701   PetscBool sorted, have_null, has_null_pressures, recompute_zerodiag, checkb;
2702 
2703   PetscFunctionBegin;
2704   if (reuse) goto project_b0;
2705   PetscCall(PetscSFDestroy(&pcbddc->benign_sf));
2706   PetscCall(MatDestroy(&pcbddc->benign_B0));
2707   for (n = 0; n < pcbddc->benign_n; n++) PetscCall(ISDestroy(&pcbddc->benign_zerodiag_subs[n]));
2708   PetscCall(PetscFree(pcbddc->benign_zerodiag_subs));
2709   has_null_pressures = PETSC_TRUE;
2710   have_null          = PETSC_TRUE;
2711   /* if a local information on dofs is present, gets pressure dofs from command line (uses the last field is not provided)
2712      Without local information, it uses only the zerodiagonal dofs (ok if the pressure block is all zero and it is a scalar field)
2713      Checks if all the pressure dofs in each subdomain have a zero diagonal
2714      If not, a change of basis on pressures is not needed
2715      since the local Schur complements are already SPD
2716   */
2717   if (pcbddc->n_ISForDofsLocal) {
2718     IS        iP = NULL;
2719     PetscInt  p, *pp;
2720     PetscBool flg, blocked = PETSC_FALSE;
2721 
2722     PetscCall(PetscMalloc1(pcbddc->n_ISForDofsLocal, &pp));
2723     n = pcbddc->n_ISForDofsLocal;
2724     PetscOptionsBegin(PetscObjectComm((PetscObject)pc), ((PetscObject)pc)->prefix, "BDDC benign options", "PC");
2725     PetscCall(PetscOptionsIntArray("-pc_bddc_pressure_field", "Field id for pressures", NULL, pp, &n, &flg));
2726     PetscCall(PetscOptionsBool("-pc_bddc_pressure_blocked", "Use blocked pressure fields", NULL, blocked, &blocked, NULL));
2727     PetscOptionsEnd();
2728     if (!flg) {
2729       n     = 1;
2730       pp[0] = pcbddc->n_ISForDofsLocal - 1;
2731     }
2732 
2733     bsp = 0;
2734     for (p = 0; p < n; p++) {
2735       PetscInt bs = 1;
2736 
2737       PetscCheck(pp[p] >= 0 && pp[p] < pcbddc->n_ISForDofsLocal, PetscObjectComm((PetscObject)pc), PETSC_ERR_USER, "Invalid field id for pressures %" PetscInt_FMT, pp[p]);
2738       if (blocked) PetscCall(ISGetBlockSize(pcbddc->ISForDofsLocal[pp[p]], &bs));
2739       bsp += bs;
2740     }
2741     PetscCall(PetscMalloc1(bsp, &bzerodiag));
2742     bsp = 0;
2743     for (p = 0; p < n; p++) {
2744       const PetscInt *idxs;
2745       PetscInt        b, bs = 1, npl, *bidxs;
2746 
2747       if (blocked) PetscCall(ISGetBlockSize(pcbddc->ISForDofsLocal[pp[p]], &bs));
2748       PetscCall(ISGetLocalSize(pcbddc->ISForDofsLocal[pp[p]], &npl));
2749       PetscCall(ISGetIndices(pcbddc->ISForDofsLocal[pp[p]], &idxs));
2750       PetscCall(PetscMalloc1(npl / bs, &bidxs));
2751       for (b = 0; b < bs; b++) {
2752         PetscInt i;
2753 
2754         for (i = 0; i < npl / bs; i++) bidxs[i] = idxs[bs * i + b];
2755         PetscCall(ISCreateGeneral(PETSC_COMM_SELF, npl / bs, bidxs, PETSC_COPY_VALUES, &bzerodiag[bsp]));
2756         bsp++;
2757       }
2758       PetscCall(PetscFree(bidxs));
2759       PetscCall(ISRestoreIndices(pcbddc->ISForDofsLocal[pp[p]], &idxs));
2760     }
2761     PetscCall(ISConcatenate(PETSC_COMM_SELF, bsp, bzerodiag, &pressures));
2762 
2763     /* remove zeroed out pressures if we are setting up a BDDC solver for a saddle-point FETI-DP */
2764     PetscCall(PetscObjectQuery((PetscObject)pc, "__KSPFETIDP_lP", (PetscObject *)&iP));
2765     if (iP) {
2766       IS newpressures;
2767 
2768       PetscCall(ISDifference(pressures, iP, &newpressures));
2769       PetscCall(ISDestroy(&pressures));
2770       pressures = newpressures;
2771     }
2772     PetscCall(ISSorted(pressures, &sorted));
2773     if (!sorted) PetscCall(ISSort(pressures));
2774     PetscCall(PetscFree(pp));
2775   }
2776 
2777   /* pcis has not been setup yet, so get the local size from the subdomain matrix */
2778   PetscCall(MatGetLocalSize(pcbddc->local_mat, &n, NULL));
2779   if (!n) pcbddc->benign_change_explicit = PETSC_TRUE;
2780   PetscCall(MatFindZeroDiagonals(pcbddc->local_mat, &zerodiag));
2781   PetscCall(ISSorted(zerodiag, &sorted));
2782   if (!sorted) PetscCall(ISSort(zerodiag));
2783   PetscCall(PetscObjectReference((PetscObject)zerodiag));
2784   zerodiag_save = zerodiag;
2785   PetscCall(ISGetLocalSize(zerodiag, &nz));
2786   if (!nz) {
2787     if (n) have_null = PETSC_FALSE;
2788     has_null_pressures = PETSC_FALSE;
2789     PetscCall(ISDestroy(&zerodiag));
2790   }
2791   recompute_zerodiag = PETSC_FALSE;
2792 
2793   /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */
2794   zerodiag_subs   = NULL;
2795   benign_n        = 0;
2796   n_interior_dofs = 0;
2797   interior_dofs   = NULL;
2798   nneu            = 0;
2799   if (pcbddc->NeumannBoundariesLocal) PetscCall(ISGetLocalSize(pcbddc->NeumannBoundariesLocal, &nneu));
2800   checkb = (PetscBool)(!pcbddc->NeumannBoundariesLocal || pcbddc->current_level);
2801   if (checkb) { /* need to compute interior nodes */
2802     PetscInt               n, i;
2803     PetscInt              *count;
2804     ISLocalToGlobalMapping mapping;
2805 
2806     PetscCall(MatISGetLocalToGlobalMapping(pc->pmat, &mapping, NULL));
2807     PetscCall(ISLocalToGlobalMappingGetNodeInfo(mapping, &n, &count, NULL));
2808     PetscCall(PetscMalloc1(n, &interior_dofs));
2809     for (i = 0; i < n; i++)
2810       if (count[i] < 2) interior_dofs[n_interior_dofs++] = i;
2811     PetscCall(ISLocalToGlobalMappingRestoreNodeInfo(mapping, &n, &count, NULL));
2812   }
2813   if (has_null_pressures) {
2814     IS             *subs;
2815     PetscInt        nsubs, i, j, nl;
2816     const PetscInt *idxs;
2817     PetscScalar    *array;
2818     Vec            *work;
2819 
2820     subs  = pcbddc->local_subs;
2821     nsubs = pcbddc->n_local_subs;
2822     /* these vectors are needed to check if the constant on pressures is in the kernel of the local operator B (i.e. B(v_I,p0) should be zero) */
2823     if (checkb) {
2824       PetscCall(VecDuplicateVecs(matis->y, 2, &work));
2825       PetscCall(ISGetLocalSize(zerodiag, &nl));
2826       PetscCall(ISGetIndices(zerodiag, &idxs));
2827       /* work[0] = 1_p */
2828       PetscCall(VecSet(work[0], 0.));
2829       PetscCall(VecGetArray(work[0], &array));
2830       for (j = 0; j < nl; j++) array[idxs[j]] = 1.;
2831       PetscCall(VecRestoreArray(work[0], &array));
2832       /* work[0] = 1_v */
2833       PetscCall(VecSet(work[1], 1.));
2834       PetscCall(VecGetArray(work[1], &array));
2835       for (j = 0; j < nl; j++) array[idxs[j]] = 0.;
2836       PetscCall(VecRestoreArray(work[1], &array));
2837       PetscCall(ISRestoreIndices(zerodiag, &idxs));
2838     }
2839 
2840     if (nsubs > 1 || bsp > 1) {
2841       IS      *is;
2842       PetscInt b, totb;
2843 
2844       totb  = bsp;
2845       is    = bsp > 1 ? bzerodiag : &zerodiag;
2846       nsubs = PetscMax(nsubs, 1);
2847       PetscCall(PetscCalloc1(nsubs * totb, &zerodiag_subs));
2848       for (b = 0; b < totb; b++) {
2849         for (i = 0; i < nsubs; i++) {
2850           ISLocalToGlobalMapping l2g;
2851           IS                     t_zerodiag_subs;
2852           PetscInt               nl;
2853 
2854           if (subs) {
2855             PetscCall(ISLocalToGlobalMappingCreateIS(subs[i], &l2g));
2856           } else {
2857             IS tis;
2858 
2859             PetscCall(MatGetLocalSize(pcbddc->local_mat, &nl, NULL));
2860             PetscCall(ISCreateStride(PETSC_COMM_SELF, nl, 0, 1, &tis));
2861             PetscCall(ISLocalToGlobalMappingCreateIS(tis, &l2g));
2862             PetscCall(ISDestroy(&tis));
2863           }
2864           PetscCall(ISGlobalToLocalMappingApplyIS(l2g, IS_GTOLM_DROP, is[b], &t_zerodiag_subs));
2865           PetscCall(ISGetLocalSize(t_zerodiag_subs, &nl));
2866           if (nl) {
2867             PetscBool valid = PETSC_TRUE;
2868 
2869             if (checkb) {
2870               PetscCall(VecSet(matis->x, 0));
2871               PetscCall(ISGetLocalSize(subs[i], &nl));
2872               PetscCall(ISGetIndices(subs[i], &idxs));
2873               PetscCall(VecGetArray(matis->x, &array));
2874               for (j = 0; j < nl; j++) array[idxs[j]] = 1.;
2875               PetscCall(VecRestoreArray(matis->x, &array));
2876               PetscCall(ISRestoreIndices(subs[i], &idxs));
2877               PetscCall(VecPointwiseMult(matis->x, work[0], matis->x));
2878               PetscCall(MatMult(matis->A, matis->x, matis->y));
2879               PetscCall(VecPointwiseMult(matis->y, work[1], matis->y));
2880               PetscCall(VecGetArray(matis->y, &array));
2881               for (j = 0; j < n_interior_dofs; j++) {
2882                 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) {
2883                   valid = PETSC_FALSE;
2884                   break;
2885                 }
2886               }
2887               PetscCall(VecRestoreArray(matis->y, &array));
2888             }
2889             if (valid && nneu) {
2890               const PetscInt *idxs;
2891               PetscInt        nzb;
2892 
2893               PetscCall(ISGetIndices(pcbddc->NeumannBoundariesLocal, &idxs));
2894               PetscCall(ISGlobalToLocalMappingApply(l2g, IS_GTOLM_DROP, nneu, idxs, &nzb, NULL));
2895               PetscCall(ISRestoreIndices(pcbddc->NeumannBoundariesLocal, &idxs));
2896               if (nzb) valid = PETSC_FALSE;
2897             }
2898             if (valid && pressures) {
2899               IS       t_pressure_subs, tmp;
2900               PetscInt i1, i2;
2901 
2902               PetscCall(ISGlobalToLocalMappingApplyIS(l2g, IS_GTOLM_DROP, pressures, &t_pressure_subs));
2903               PetscCall(ISEmbed(t_zerodiag_subs, t_pressure_subs, PETSC_TRUE, &tmp));
2904               PetscCall(ISGetLocalSize(tmp, &i1));
2905               PetscCall(ISGetLocalSize(t_zerodiag_subs, &i2));
2906               if (i2 != i1) valid = PETSC_FALSE;
2907               PetscCall(ISDestroy(&t_pressure_subs));
2908               PetscCall(ISDestroy(&tmp));
2909             }
2910             if (valid) {
2911               PetscCall(ISLocalToGlobalMappingApplyIS(l2g, t_zerodiag_subs, &zerodiag_subs[benign_n]));
2912               benign_n++;
2913             } else recompute_zerodiag = PETSC_TRUE;
2914           }
2915           PetscCall(ISDestroy(&t_zerodiag_subs));
2916           PetscCall(ISLocalToGlobalMappingDestroy(&l2g));
2917         }
2918       }
2919     } else { /* there's just one subdomain (or zero if they have not been detected */
2920       PetscBool valid = PETSC_TRUE;
2921 
2922       if (nneu) valid = PETSC_FALSE;
2923       if (valid && pressures) PetscCall(ISEqual(pressures, zerodiag, &valid));
2924       if (valid && checkb) {
2925         PetscCall(MatMult(matis->A, work[0], matis->x));
2926         PetscCall(VecPointwiseMult(matis->x, work[1], matis->x));
2927         PetscCall(VecGetArray(matis->x, &array));
2928         for (j = 0; j < n_interior_dofs; j++) {
2929           if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) {
2930             valid = PETSC_FALSE;
2931             break;
2932           }
2933         }
2934         PetscCall(VecRestoreArray(matis->x, &array));
2935       }
2936       if (valid) {
2937         benign_n = 1;
2938         PetscCall(PetscMalloc1(benign_n, &zerodiag_subs));
2939         PetscCall(PetscObjectReference((PetscObject)zerodiag));
2940         zerodiag_subs[0] = zerodiag;
2941       }
2942     }
2943     if (checkb) PetscCall(VecDestroyVecs(2, &work));
2944   }
2945   PetscCall(PetscFree(interior_dofs));
2946 
2947   if (!benign_n) {
2948     PetscInt n;
2949 
2950     PetscCall(ISDestroy(&zerodiag));
2951     recompute_zerodiag = PETSC_FALSE;
2952     PetscCall(MatGetLocalSize(pcbddc->local_mat, &n, NULL));
2953     if (n) have_null = PETSC_FALSE;
2954   }
2955 
2956   /* final check for null pressures */
2957   if (zerodiag && pressures) PetscCall(ISEqual(pressures, zerodiag, &have_null));
2958 
2959   if (recompute_zerodiag) {
2960     PetscCall(ISDestroy(&zerodiag));
2961     if (benign_n == 1) {
2962       PetscCall(PetscObjectReference((PetscObject)zerodiag_subs[0]));
2963       zerodiag = zerodiag_subs[0];
2964     } else {
2965       PetscInt i, nzn, *new_idxs;
2966 
2967       nzn = 0;
2968       for (i = 0; i < benign_n; i++) {
2969         PetscInt ns;
2970         PetscCall(ISGetLocalSize(zerodiag_subs[i], &ns));
2971         nzn += ns;
2972       }
2973       PetscCall(PetscMalloc1(nzn, &new_idxs));
2974       nzn = 0;
2975       for (i = 0; i < benign_n; i++) {
2976         PetscInt ns, *idxs;
2977         PetscCall(ISGetLocalSize(zerodiag_subs[i], &ns));
2978         PetscCall(ISGetIndices(zerodiag_subs[i], (const PetscInt **)&idxs));
2979         PetscCall(PetscArraycpy(new_idxs + nzn, idxs, ns));
2980         PetscCall(ISRestoreIndices(zerodiag_subs[i], (const PetscInt **)&idxs));
2981         nzn += ns;
2982       }
2983       PetscCall(PetscSortInt(nzn, new_idxs));
2984       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nzn, new_idxs, PETSC_OWN_POINTER, &zerodiag));
2985     }
2986     have_null = PETSC_FALSE;
2987   }
2988 
2989   /* determines if the coarse solver will be singular or not */
2990   PetscCallMPI(MPIU_Allreduce(&have_null, &pcbddc->benign_null, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)pc)));
2991 
2992   /* Prepare matrix to compute no-net-flux */
2993   if (pcbddc->compute_nonetflux && !pcbddc->divudotp) {
2994     Mat                    A, loc_divudotp;
2995     ISLocalToGlobalMapping rl2g, cl2g, l2gmap;
2996     IS                     row, col, isused = NULL;
2997     PetscInt               M, N, n, st, n_isused;
2998 
2999     if (pressures) {
3000       isused = pressures;
3001     } else {
3002       isused = zerodiag_save;
3003     }
3004     PetscCall(MatISGetLocalToGlobalMapping(pc->pmat, &l2gmap, NULL));
3005     PetscCall(MatISGetLocalMat(pc->pmat, &A));
3006     PetscCall(MatGetLocalSize(A, &n, NULL));
3007     PetscCheck(isused || (n == 0), PETSC_COMM_SELF, PETSC_ERR_USER, "Don't know how to extract div u dot p! Please provide the pressure field");
3008     n_isused = 0;
3009     if (isused) PetscCall(ISGetLocalSize(isused, &n_isused));
3010     PetscCallMPI(MPI_Scan(&n_isused, &st, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)pc)));
3011     st = st - n_isused;
3012     if (n) {
3013       const PetscInt *gidxs;
3014 
3015       PetscCall(MatCreateSubMatrix(A, isused, NULL, MAT_INITIAL_MATRIX, &loc_divudotp));
3016       PetscCall(ISLocalToGlobalMappingGetIndices(l2gmap, &gidxs));
3017       /* TODO: extend ISCreateStride with st = PETSC_DECIDE */
3018       PetscCall(ISCreateStride(PetscObjectComm((PetscObject)pc), n_isused, st, 1, &row));
3019       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), n, gidxs, PETSC_COPY_VALUES, &col));
3020       PetscCall(ISLocalToGlobalMappingRestoreIndices(l2gmap, &gidxs));
3021     } else {
3022       PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, 0, 0, 1, NULL, &loc_divudotp));
3023       PetscCall(ISCreateStride(PetscObjectComm((PetscObject)pc), n_isused, st, 1, &row));
3024       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), 0, NULL, PETSC_COPY_VALUES, &col));
3025     }
3026     PetscCall(MatGetSize(pc->pmat, NULL, &N));
3027     PetscCall(ISGetSize(row, &M));
3028     PetscCall(ISLocalToGlobalMappingCreateIS(row, &rl2g));
3029     PetscCall(ISLocalToGlobalMappingCreateIS(col, &cl2g));
3030     PetscCall(ISDestroy(&row));
3031     PetscCall(ISDestroy(&col));
3032     PetscCall(MatCreate(PetscObjectComm((PetscObject)pc), &pcbddc->divudotp));
3033     PetscCall(MatSetType(pcbddc->divudotp, MATIS));
3034     PetscCall(MatSetSizes(pcbddc->divudotp, PETSC_DECIDE, PETSC_DECIDE, M, N));
3035     PetscCall(MatSetLocalToGlobalMapping(pcbddc->divudotp, rl2g, cl2g));
3036     PetscCall(ISLocalToGlobalMappingDestroy(&rl2g));
3037     PetscCall(ISLocalToGlobalMappingDestroy(&cl2g));
3038     PetscCall(MatISSetLocalMat(pcbddc->divudotp, loc_divudotp));
3039     PetscCall(MatDestroy(&loc_divudotp));
3040     PetscCall(MatAssemblyBegin(pcbddc->divudotp, MAT_FINAL_ASSEMBLY));
3041     PetscCall(MatAssemblyEnd(pcbddc->divudotp, MAT_FINAL_ASSEMBLY));
3042   }
3043   PetscCall(ISDestroy(&zerodiag_save));
3044   PetscCall(ISDestroy(&pressures));
3045   if (bzerodiag) {
3046     PetscInt i;
3047 
3048     for (i = 0; i < bsp; i++) PetscCall(ISDestroy(&bzerodiag[i]));
3049     PetscCall(PetscFree(bzerodiag));
3050   }
3051   pcbddc->benign_n             = benign_n;
3052   pcbddc->benign_zerodiag_subs = zerodiag_subs;
3053 
3054   /* determines if the problem has subdomains with 0 pressure block */
3055   have_null = (PetscBool)(!!pcbddc->benign_n);
3056   PetscCallMPI(MPIU_Allreduce(&have_null, &pcbddc->benign_have_null, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)pc)));
3057 
3058 project_b0:
3059   PetscCall(MatGetLocalSize(pcbddc->local_mat, &n, NULL));
3060   /* change of basis and p0 dofs */
3061   if (pcbddc->benign_n) {
3062     PetscInt i, s, *nnz;
3063 
3064     /* local change of basis for pressures */
3065     PetscCall(MatDestroy(&pcbddc->benign_change));
3066     PetscCall(MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat), &pcbddc->benign_change));
3067     PetscCall(MatSetType(pcbddc->benign_change, MATAIJ));
3068     PetscCall(MatSetSizes(pcbddc->benign_change, n, n, PETSC_DECIDE, PETSC_DECIDE));
3069     PetscCall(PetscMalloc1(n, &nnz));
3070     for (i = 0; i < n; i++) nnz[i] = 1; /* defaults to identity */
3071     for (i = 0; i < pcbddc->benign_n; i++) {
3072       const PetscInt *idxs;
3073       PetscInt        nzs, j;
3074 
3075       PetscCall(ISGetLocalSize(pcbddc->benign_zerodiag_subs[i], &nzs));
3076       PetscCall(ISGetIndices(pcbddc->benign_zerodiag_subs[i], &idxs));
3077       for (j = 0; j < nzs - 1; j++) nnz[idxs[j]] = 2; /* change on pressures */
3078       nnz[idxs[nzs - 1]] = nzs;                       /* last local pressure dof in subdomain */
3079       PetscCall(ISRestoreIndices(pcbddc->benign_zerodiag_subs[i], &idxs));
3080     }
3081     PetscCall(MatSeqAIJSetPreallocation(pcbddc->benign_change, 0, nnz));
3082     PetscCall(MatSetOption(pcbddc->benign_change, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE));
3083     PetscCall(PetscFree(nnz));
3084     /* set identity by default */
3085     for (i = 0; i < n; i++) PetscCall(MatSetValue(pcbddc->benign_change, i, i, 1., INSERT_VALUES));
3086     PetscCall(PetscFree3(pcbddc->benign_p0_lidx, pcbddc->benign_p0_gidx, pcbddc->benign_p0));
3087     PetscCall(PetscMalloc3(pcbddc->benign_n, &pcbddc->benign_p0_lidx, pcbddc->benign_n, &pcbddc->benign_p0_gidx, pcbddc->benign_n, &pcbddc->benign_p0));
3088     /* set change on pressures */
3089     for (s = 0; s < pcbddc->benign_n; s++) {
3090       PetscScalar    *array;
3091       const PetscInt *idxs;
3092       PetscInt        nzs;
3093 
3094       PetscCall(ISGetLocalSize(pcbddc->benign_zerodiag_subs[s], &nzs));
3095       PetscCall(ISGetIndices(pcbddc->benign_zerodiag_subs[s], &idxs));
3096       for (i = 0; i < nzs - 1; i++) {
3097         PetscScalar vals[2];
3098         PetscInt    cols[2];
3099 
3100         cols[0] = idxs[i];
3101         cols[1] = idxs[nzs - 1];
3102         vals[0] = 1.;
3103         vals[1] = 1.;
3104         PetscCall(MatSetValues(pcbddc->benign_change, 1, cols, 2, cols, vals, INSERT_VALUES));
3105       }
3106       PetscCall(PetscMalloc1(nzs, &array));
3107       for (i = 0; i < nzs - 1; i++) array[i] = -1.;
3108       array[nzs - 1] = 1.;
3109       PetscCall(MatSetValues(pcbddc->benign_change, 1, idxs + nzs - 1, nzs, idxs, array, INSERT_VALUES));
3110       /* store local idxs for p0 */
3111       pcbddc->benign_p0_lidx[s] = idxs[nzs - 1];
3112       PetscCall(ISRestoreIndices(pcbddc->benign_zerodiag_subs[s], &idxs));
3113       PetscCall(PetscFree(array));
3114     }
3115     PetscCall(MatAssemblyBegin(pcbddc->benign_change, MAT_FINAL_ASSEMBLY));
3116     PetscCall(MatAssemblyEnd(pcbddc->benign_change, MAT_FINAL_ASSEMBLY));
3117 
3118     /* project if needed */
3119     if (pcbddc->benign_change_explicit) {
3120       Mat M;
3121 
3122       PetscCall(MatPtAP(pcbddc->local_mat, pcbddc->benign_change, MAT_INITIAL_MATRIX, 2.0, &M));
3123       PetscCall(MatDestroy(&pcbddc->local_mat));
3124       PetscCall(MatSeqAIJCompress(M, &pcbddc->local_mat));
3125       PetscCall(MatDestroy(&M));
3126     }
3127     /* store global idxs for p0 */
3128     PetscCall(ISLocalToGlobalMappingApply(matis->rmapping, pcbddc->benign_n, pcbddc->benign_p0_lidx, pcbddc->benign_p0_gidx));
3129   }
3130   *zerodiaglocal = zerodiag;
3131   PetscFunctionReturn(PETSC_SUCCESS);
3132 }
3133 
3134 PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get)
3135 {
3136   PC_BDDC     *pcbddc = (PC_BDDC *)pc->data;
3137   PetscScalar *array;
3138 
3139   PetscFunctionBegin;
3140   if (!pcbddc->benign_sf) {
3141     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)pc), &pcbddc->benign_sf));
3142     PetscCall(PetscSFSetGraphLayout(pcbddc->benign_sf, pc->pmat->rmap, pcbddc->benign_n, NULL, PETSC_OWN_POINTER, pcbddc->benign_p0_gidx));
3143   }
3144   if (get) {
3145     PetscCall(VecGetArrayRead(v, (const PetscScalar **)&array));
3146     PetscCall(PetscSFBcastBegin(pcbddc->benign_sf, MPIU_SCALAR, array, pcbddc->benign_p0, MPI_REPLACE));
3147     PetscCall(PetscSFBcastEnd(pcbddc->benign_sf, MPIU_SCALAR, array, pcbddc->benign_p0, MPI_REPLACE));
3148     PetscCall(VecRestoreArrayRead(v, (const PetscScalar **)&array));
3149   } else {
3150     PetscCall(VecGetArray(v, &array));
3151     PetscCall(PetscSFReduceBegin(pcbddc->benign_sf, MPIU_SCALAR, pcbddc->benign_p0, array, MPI_REPLACE));
3152     PetscCall(PetscSFReduceEnd(pcbddc->benign_sf, MPIU_SCALAR, pcbddc->benign_p0, array, MPI_REPLACE));
3153     PetscCall(VecRestoreArray(v, &array));
3154   }
3155   PetscFunctionReturn(PETSC_SUCCESS);
3156 }
3157 
3158 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop)
3159 {
3160   PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
3161 
3162   PetscFunctionBegin;
3163   /* TODO: add error checking
3164     - avoid nested pop (or push) calls.
3165     - cannot push before pop.
3166     - cannot call this if pcbddc->local_mat is NULL
3167   */
3168   if (!pcbddc->benign_n) PetscFunctionReturn(PETSC_SUCCESS);
3169   if (pop) {
3170     if (pcbddc->benign_change_explicit) {
3171       IS       is_p0;
3172       MatReuse reuse;
3173 
3174       /* extract B_0 */
3175       reuse = MAT_INITIAL_MATRIX;
3176       if (pcbddc->benign_B0) reuse = MAT_REUSE_MATRIX;
3177       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, pcbddc->benign_n, pcbddc->benign_p0_lidx, PETSC_COPY_VALUES, &is_p0));
3178       PetscCall(MatCreateSubMatrix(pcbddc->local_mat, is_p0, NULL, reuse, &pcbddc->benign_B0));
3179       /* remove rows and cols from local problem */
3180       PetscCall(MatSetOption(pcbddc->local_mat, MAT_KEEP_NONZERO_PATTERN, PETSC_TRUE));
3181       PetscCall(MatSetOption(pcbddc->local_mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE));
3182       PetscCall(MatZeroRowsColumnsIS(pcbddc->local_mat, is_p0, 1.0, NULL, NULL));
3183       PetscCall(ISDestroy(&is_p0));
3184     } else {
3185       Mat_IS      *matis = (Mat_IS *)pc->pmat->data;
3186       PetscScalar *vals;
3187       PetscInt     i, n, *idxs_ins;
3188 
3189       PetscCall(VecGetLocalSize(matis->y, &n));
3190       PetscCall(PetscMalloc2(n, &idxs_ins, n, &vals));
3191       if (!pcbddc->benign_B0) {
3192         PetscInt *nnz;
3193         PetscCall(MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat), &pcbddc->benign_B0));
3194         PetscCall(MatSetType(pcbddc->benign_B0, MATAIJ));
3195         PetscCall(MatSetSizes(pcbddc->benign_B0, pcbddc->benign_n, n, PETSC_DECIDE, PETSC_DECIDE));
3196         PetscCall(PetscMalloc1(pcbddc->benign_n, &nnz));
3197         for (i = 0; i < pcbddc->benign_n; i++) {
3198           PetscCall(ISGetLocalSize(pcbddc->benign_zerodiag_subs[i], &nnz[i]));
3199           nnz[i] = n - nnz[i];
3200         }
3201         PetscCall(MatSeqAIJSetPreallocation(pcbddc->benign_B0, 0, nnz));
3202         PetscCall(MatSetOption(pcbddc->benign_B0, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE));
3203         PetscCall(PetscFree(nnz));
3204       }
3205 
3206       for (i = 0; i < pcbddc->benign_n; i++) {
3207         PetscScalar *array;
3208         PetscInt    *idxs, j, nz, cum;
3209 
3210         PetscCall(VecSet(matis->x, 0.));
3211         PetscCall(ISGetLocalSize(pcbddc->benign_zerodiag_subs[i], &nz));
3212         PetscCall(ISGetIndices(pcbddc->benign_zerodiag_subs[i], (const PetscInt **)&idxs));
3213         for (j = 0; j < nz; j++) vals[j] = 1.;
3214         PetscCall(VecSetValues(matis->x, nz, idxs, vals, INSERT_VALUES));
3215         PetscCall(VecAssemblyBegin(matis->x));
3216         PetscCall(VecAssemblyEnd(matis->x));
3217         PetscCall(VecSet(matis->y, 0.));
3218         PetscCall(MatMult(matis->A, matis->x, matis->y));
3219         PetscCall(VecGetArray(matis->y, &array));
3220         cum = 0;
3221         for (j = 0; j < n; j++) {
3222           if (PetscUnlikely(PetscAbsScalar(array[j]) > PETSC_SMALL)) {
3223             vals[cum]     = array[j];
3224             idxs_ins[cum] = j;
3225             cum++;
3226           }
3227         }
3228         PetscCall(MatSetValues(pcbddc->benign_B0, 1, &i, cum, idxs_ins, vals, INSERT_VALUES));
3229         PetscCall(VecRestoreArray(matis->y, &array));
3230         PetscCall(ISRestoreIndices(pcbddc->benign_zerodiag_subs[i], (const PetscInt **)&idxs));
3231       }
3232       PetscCall(MatAssemblyBegin(pcbddc->benign_B0, MAT_FINAL_ASSEMBLY));
3233       PetscCall(MatAssemblyEnd(pcbddc->benign_B0, MAT_FINAL_ASSEMBLY));
3234       PetscCall(PetscFree2(idxs_ins, vals));
3235     }
3236   } else { /* push */
3237 
3238     PetscCheck(pcbddc->benign_change_explicit, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot push B0!");
3239     for (PetscInt i = 0; i < pcbddc->benign_n; i++) {
3240       PetscScalar *B0_vals;
3241       PetscInt    *B0_cols, B0_ncol;
3242 
3243       PetscCall(MatGetRow(pcbddc->benign_B0, i, &B0_ncol, (const PetscInt **)&B0_cols, (const PetscScalar **)&B0_vals));
3244       PetscCall(MatSetValues(pcbddc->local_mat, 1, pcbddc->benign_p0_lidx + i, B0_ncol, B0_cols, B0_vals, INSERT_VALUES));
3245       PetscCall(MatSetValues(pcbddc->local_mat, B0_ncol, B0_cols, 1, pcbddc->benign_p0_lidx + i, B0_vals, INSERT_VALUES));
3246       PetscCall(MatSetValue(pcbddc->local_mat, pcbddc->benign_p0_lidx[i], pcbddc->benign_p0_lidx[i], 0.0, INSERT_VALUES));
3247       PetscCall(MatRestoreRow(pcbddc->benign_B0, i, &B0_ncol, (const PetscInt **)&B0_cols, (const PetscScalar **)&B0_vals));
3248     }
3249     PetscCall(MatAssemblyBegin(pcbddc->local_mat, MAT_FINAL_ASSEMBLY));
3250     PetscCall(MatAssemblyEnd(pcbddc->local_mat, MAT_FINAL_ASSEMBLY));
3251   }
3252   PetscFunctionReturn(PETSC_SUCCESS);
3253 }
3254 
3255 PetscErrorCode PCBDDCAdaptiveSelection(PC pc)
3256 {
3257   PC_BDDC        *pcbddc     = (PC_BDDC *)pc->data;
3258   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
3259   PetscBLASInt    B_neigs, B_ierr, B_lwork;
3260   PetscBLASInt   *B_iwork, *B_ifail;
3261   PetscScalar    *work, lwork;
3262   PetscScalar    *St, *S, *eigv;
3263   PetscScalar    *Sarray, *Starray;
3264   PetscReal      *eigs, thresh, lthresh, uthresh;
3265   PetscInt        i, nmax, nmin, nv, cum, mss, cum2, cumarray, maxneigs;
3266   PetscBool       allocated_S_St, upart;
3267 #if defined(PETSC_USE_COMPLEX)
3268   PetscReal *rwork;
3269 #endif
3270 
3271   PetscFunctionBegin;
3272   if (!pcbddc->adaptive_selection) PetscFunctionReturn(PETSC_SUCCESS);
3273   PetscCheck(sub_schurs, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Adaptive selection of constraints requires SubSchurs data");
3274   PetscCheck(sub_schurs->schur_explicit || !sub_schurs->n_subs, PetscObjectComm((PetscObject)pc), PETSC_ERR_SUP, "Adaptive selection of constraints requires MUMPS and/or MKL_CPARDISO");
3275   PetscCheck(!sub_schurs->n_subs || sub_schurs->is_symmetric, PETSC_COMM_SELF, PETSC_ERR_SUP, "Adaptive selection not yet implemented for this matrix pencil (herm %d, symm %d, posdef %d)", sub_schurs->is_hermitian, sub_schurs->is_symmetric,
3276              sub_schurs->is_posdef);
3277   PetscCall(PetscLogEventBegin(PC_BDDC_AdaptiveSetUp[pcbddc->current_level], pc, 0, 0, 0));
3278 
3279   if (pcbddc->dbg_flag) {
3280     if (!pcbddc->dbg_viewer) pcbddc->dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pc));
3281     PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
3282     PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n"));
3283     PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Check adaptive selection of constraints\n"));
3284     PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer));
3285   }
3286 
3287   if (pcbddc->dbg_flag) PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d cc %" PetscInt_FMT " (%d,%d).\n", PetscGlobalRank, sub_schurs->n_subs, sub_schurs->is_hermitian, sub_schurs->is_posdef));
3288 
3289   /* max size of subsets */
3290   mss = 0;
3291   for (i = 0; i < sub_schurs->n_subs; i++) {
3292     PetscInt subset_size;
3293 
3294     PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size));
3295     mss = PetscMax(mss, subset_size);
3296   }
3297 
3298   /* min/max and threshold */
3299   nmax           = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss;
3300   nmin           = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0;
3301   nmax           = PetscMax(nmin, nmax);
3302   allocated_S_St = PETSC_FALSE;
3303   if (nmin || !sub_schurs->is_posdef) { /* XXX */
3304     allocated_S_St = PETSC_TRUE;
3305   }
3306 
3307   /* allocate lapack workspace */
3308   cum = cum2 = 0;
3309   maxneigs   = 0;
3310   for (i = 0; i < sub_schurs->n_subs; i++) {
3311     PetscInt n, subset_size;
3312 
3313     PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size));
3314     n = PetscMin(subset_size, nmax);
3315     cum += subset_size;
3316     cum2 += subset_size * n;
3317     maxneigs = PetscMax(maxneigs, n);
3318   }
3319   lwork = 0;
3320   if (mss) {
3321     PetscScalar  sdummy  = 0.;
3322     PetscBLASInt B_itype = 1;
3323     PetscBLASInt B_N, idummy = 0;
3324     PetscReal    rdummy = 0., zero = 0.0;
3325     PetscReal    eps = 0.0; /* dlamch? */
3326 
3327     PetscCheck(sub_schurs->is_symmetric, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not yet implemented");
3328     PetscCall(PetscBLASIntCast(mss, &B_N));
3329     B_lwork = -1;
3330     /* some implementations may complain about NULL pointers, even if we are querying */
3331     S       = &sdummy;
3332     St      = &sdummy;
3333     eigs    = &rdummy;
3334     eigv    = &sdummy;
3335     B_iwork = &idummy;
3336     B_ifail = &idummy;
3337 #if defined(PETSC_USE_COMPLEX)
3338     rwork = &rdummy;
3339 #endif
3340     thresh = 1.0;
3341     PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
3342 #if defined(PETSC_USE_COMPLEX)
3343     PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &zero, &thresh, B_iwork, B_iwork, &eps, &B_neigs, eigs, eigv, &B_N, &lwork, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3344 #else
3345     PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &zero, &thresh, B_iwork, B_iwork, &eps, &B_neigs, eigs, eigv, &B_N, &lwork, &B_lwork, B_iwork, B_ifail, &B_ierr));
3346 #endif
3347     PetscCheck(B_ierr == 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in query to SYGVX Lapack routine %" PetscBLASInt_FMT, B_ierr);
3348     PetscCall(PetscFPTrapPop());
3349   }
3350 
3351   nv = 0;
3352   if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */
3353     PetscCall(ISGetLocalSize(sub_schurs->is_vertices, &nv));
3354   }
3355   PetscCall(PetscBLASIntCast((PetscInt)PetscRealPart(lwork), &B_lwork));
3356   if (allocated_S_St) PetscCall(PetscMalloc2(mss * mss, &S, mss * mss, &St));
3357   PetscCall(PetscMalloc5(mss * mss, &eigv, mss, &eigs, B_lwork, &work, 5 * mss, &B_iwork, mss, &B_ifail));
3358 #if defined(PETSC_USE_COMPLEX)
3359   PetscCall(PetscMalloc1(7 * mss, &rwork));
3360 #endif
3361   PetscCall(PetscMalloc5(nv + sub_schurs->n_subs, &pcbddc->adaptive_constraints_n, nv + sub_schurs->n_subs + 1, &pcbddc->adaptive_constraints_idxs_ptr, nv + sub_schurs->n_subs + 1, &pcbddc->adaptive_constraints_data_ptr, nv + cum, &pcbddc->adaptive_constraints_idxs, nv + cum2,
3362                          &pcbddc->adaptive_constraints_data));
3363   PetscCall(PetscArrayzero(pcbddc->adaptive_constraints_n, nv + sub_schurs->n_subs));
3364 
3365   maxneigs = 0;
3366   cum = cumarray                           = 0;
3367   pcbddc->adaptive_constraints_idxs_ptr[0] = 0;
3368   pcbddc->adaptive_constraints_data_ptr[0] = 0;
3369   if (sub_schurs->is_vertices && pcbddc->use_vertices) {
3370     const PetscInt *idxs;
3371 
3372     PetscCall(ISGetIndices(sub_schurs->is_vertices, &idxs));
3373     for (cum = 0; cum < nv; cum++) {
3374       pcbddc->adaptive_constraints_n[cum]            = 1;
3375       pcbddc->adaptive_constraints_idxs[cum]         = idxs[cum];
3376       pcbddc->adaptive_constraints_data[cum]         = 1.0;
3377       pcbddc->adaptive_constraints_idxs_ptr[cum + 1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + 1;
3378       pcbddc->adaptive_constraints_data_ptr[cum + 1] = pcbddc->adaptive_constraints_data_ptr[cum] + 1;
3379     }
3380     PetscCall(ISRestoreIndices(sub_schurs->is_vertices, &idxs));
3381   }
3382 
3383   if (mss) { /* multilevel */
3384     if (sub_schurs->gdsw) {
3385       PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_all, &Sarray));
3386       PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all, &Starray));
3387     } else {
3388       PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all, &Sarray));
3389       PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all, &Starray));
3390     }
3391   }
3392 
3393   lthresh = pcbddc->adaptive_threshold[0];
3394   uthresh = pcbddc->adaptive_threshold[1];
3395   upart   = pcbddc->use_deluxe_scaling;
3396   for (i = 0; i < sub_schurs->n_subs; i++) {
3397     const PetscInt *idxs;
3398     PetscReal       upper, lower;
3399     PetscInt        j, subset_size, eigs_start = 0;
3400     PetscBLASInt    B_N;
3401     PetscBool       same_data = PETSC_FALSE;
3402     PetscBool       scal      = PETSC_FALSE;
3403 
3404     if (upart) {
3405       upper = PETSC_MAX_REAL;
3406       lower = uthresh;
3407     } else {
3408       if (sub_schurs->gdsw) {
3409         upper = uthresh;
3410         lower = PETSC_MIN_REAL;
3411       } else {
3412         PetscCheck(sub_schurs->is_posdef, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not yet implemented without deluxe scaling");
3413         upper = 1. / uthresh;
3414         lower = 0.;
3415       }
3416     }
3417     PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size));
3418     PetscCall(ISGetIndices(sub_schurs->is_subs[i], &idxs));
3419     PetscCall(PetscBLASIntCast(subset_size, &B_N));
3420     /* this is experimental: we assume the dofs have been properly grouped to have
3421        the diagonal blocks Schur complements either positive or negative definite (true for Stokes) */
3422     if (!sub_schurs->is_posdef) {
3423       Mat T;
3424 
3425       for (j = 0; j < subset_size; j++) {
3426         if (PetscRealPart(*(Sarray + cumarray + j * (subset_size + 1))) < 0.0) {
3427           PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, subset_size, Sarray + cumarray, &T));
3428           PetscCall(MatScale(T, -1.0));
3429           PetscCall(MatDestroy(&T));
3430           PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, subset_size, Starray + cumarray, &T));
3431           PetscCall(MatScale(T, -1.0));
3432           PetscCall(MatDestroy(&T));
3433           if (sub_schurs->change_primal_sub) {
3434             PetscInt        nz, k;
3435             const PetscInt *idxs;
3436 
3437             PetscCall(ISGetLocalSize(sub_schurs->change_primal_sub[i], &nz));
3438             PetscCall(ISGetIndices(sub_schurs->change_primal_sub[i], &idxs));
3439             for (k = 0; k < nz; k++) {
3440               *(Sarray + cumarray + idxs[k] * (subset_size + 1)) *= -1.0;
3441               *(Starray + cumarray + idxs[k] * (subset_size + 1)) = 0.0;
3442             }
3443             PetscCall(ISRestoreIndices(sub_schurs->change_primal_sub[i], &idxs));
3444           }
3445           scal = PETSC_TRUE;
3446           break;
3447         }
3448       }
3449     }
3450 
3451     if (allocated_S_St) { /* S and S_t should be copied since we could need them later */
3452       if (sub_schurs->is_symmetric) {
3453         PetscInt j, k;
3454         if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscArraycmp() later */
3455           PetscCall(PetscArrayzero(S, subset_size * subset_size));
3456           PetscCall(PetscArrayzero(St, subset_size * subset_size));
3457         }
3458         for (j = 0; j < subset_size; j++) {
3459           for (k = j; k < subset_size; k++) {
3460             S[j * subset_size + k]  = Sarray[cumarray + j * subset_size + k];
3461             St[j * subset_size + k] = Starray[cumarray + j * subset_size + k];
3462           }
3463         }
3464       } else {
3465         PetscCall(PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size));
3466         PetscCall(PetscArraycpy(St, Starray + cumarray, subset_size * subset_size));
3467       }
3468     } else {
3469       S  = Sarray + cumarray;
3470       St = Starray + cumarray;
3471     }
3472     /* see if we can save some work */
3473     if (sub_schurs->n_subs == 1 && pcbddc->use_deluxe_scaling) PetscCall(PetscArraycmp(S, St, subset_size * subset_size, &same_data));
3474 
3475     if (same_data && !sub_schurs->change) { /* there's no need of constraints here */
3476       B_neigs = 0;
3477     } else {
3478       PetscBLASInt B_itype = 1, B_IL = 1, B_IU = 0;
3479       PetscReal    eps = -1.0; /* dlamch? */
3480       PetscInt     nmin_s;
3481       PetscBool    compute_range;
3482 
3483       PetscCheck(sub_schurs->is_symmetric, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not yet implemented");
3484       B_neigs       = 0;
3485       compute_range = (PetscBool)!same_data;
3486       if (nmin >= subset_size) compute_range = PETSC_FALSE;
3487 
3488       if (pcbddc->dbg_flag) {
3489         PetscInt nc = 0, c = pcbddc->mat_graph->nodes[idxs[0]].count, w = pcbddc->mat_graph->nodes[idxs[0]].which_dof;
3490 
3491         if (sub_schurs->change_primal_sub) PetscCall(ISGetLocalSize(sub_schurs->change_primal_sub[i], &nc));
3492         PetscCall(
3493           PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Computing for sub %" PetscInt_FMT "/%" PetscInt_FMT " size %" PetscInt_FMT " count %" PetscInt_FMT " fid %" PetscInt_FMT " (range %d) (change %" PetscInt_FMT ").\n", i, sub_schurs->n_subs, subset_size, c, w, compute_range, nc));
3494       }
3495 
3496       PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
3497       if (compute_range) {
3498         /* ask for eigenvalues larger than thresh */
3499         if (sub_schurs->is_posdef) {
3500 #if defined(PETSC_USE_COMPLEX)
3501           PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3502 #else
3503           PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3504 #endif
3505           PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3506         } else { /* no theory so far, but it works nicely */
3507           PetscInt  recipe = 0, recipe_m = 1;
3508           PetscReal bb[2];
3509 
3510           PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)pc)->prefix, "-pc_bddc_adaptive_recipe", &recipe, NULL));
3511           switch (recipe) {
3512           case 0:
3513             if (scal) {
3514               bb[0] = PETSC_MIN_REAL;
3515               bb[1] = lthresh;
3516             } else {
3517               bb[0] = uthresh;
3518               bb[1] = PETSC_MAX_REAL;
3519             }
3520 #if defined(PETSC_USE_COMPLEX)
3521             PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3522 #else
3523             PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3524 #endif
3525             PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3526             break;
3527           case 1:
3528             bb[0] = PETSC_MIN_REAL;
3529             bb[1] = lthresh * lthresh;
3530 #if defined(PETSC_USE_COMPLEX)
3531             PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3532 #else
3533             PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3534 #endif
3535             PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3536             if (!scal) {
3537               PetscBLASInt B_neigs2 = 0;
3538 
3539               bb[0] = PetscMax(lthresh * lthresh, uthresh);
3540               bb[1] = PETSC_MAX_REAL;
3541               PetscCall(PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size));
3542               PetscCall(PetscArraycpy(St, Starray + cumarray, subset_size * subset_size));
3543 #if defined(PETSC_USE_COMPLEX)
3544               PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3545 #else
3546               PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3547 #endif
3548               PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3549               B_neigs += B_neigs2;
3550             }
3551             break;
3552           case 2:
3553             if (scal) {
3554               bb[0] = PETSC_MIN_REAL;
3555               bb[1] = 0;
3556 #if defined(PETSC_USE_COMPLEX)
3557               PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3558 #else
3559               PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3560 #endif
3561               PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3562             } else {
3563               PetscBLASInt B_neigs2 = 0;
3564               PetscBool    do_copy  = PETSC_FALSE;
3565 
3566               lthresh = PetscMax(lthresh, 0.0);
3567               if (lthresh > 0.0) {
3568                 bb[0] = PETSC_MIN_REAL;
3569                 bb[1] = lthresh * lthresh;
3570 
3571                 do_copy = PETSC_TRUE;
3572 #if defined(PETSC_USE_COMPLEX)
3573                 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3574 #else
3575                 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3576 #endif
3577                 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3578               }
3579               bb[0] = PetscMax(lthresh * lthresh, uthresh);
3580               bb[1] = PETSC_MAX_REAL;
3581               if (do_copy) {
3582                 PetscCall(PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size));
3583                 PetscCall(PetscArraycpy(St, Starray + cumarray, subset_size * subset_size));
3584               }
3585 #if defined(PETSC_USE_COMPLEX)
3586               PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3587 #else
3588               PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3589 #endif
3590               PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3591               B_neigs += B_neigs2;
3592             }
3593             break;
3594           case 3:
3595             if (scal) {
3596               PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)pc)->prefix, "-pc_bddc_adaptive_recipe3_min_scal", &recipe_m, NULL));
3597             } else {
3598               PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)pc)->prefix, "-pc_bddc_adaptive_recipe3_min", &recipe_m, NULL));
3599             }
3600             if (!scal) {
3601               bb[0] = uthresh;
3602               bb[1] = PETSC_MAX_REAL;
3603 #if defined(PETSC_USE_COMPLEX)
3604               PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3605 #else
3606               PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3607 #endif
3608               PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3609             }
3610             if (recipe_m > 0 && B_N - B_neigs > 0) {
3611               PetscBLASInt B_neigs2 = 0;
3612 
3613               PetscCall(PetscBLASIntCast(PetscMin(recipe_m, B_N - B_neigs), &B_IU));
3614               PetscCall(PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size));
3615               PetscCall(PetscArraycpy(St, Starray + cumarray, subset_size * subset_size));
3616 #if defined(PETSC_USE_COMPLEX)
3617               PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3618 #else
3619               PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3620 #endif
3621               PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3622               B_neigs += B_neigs2;
3623             }
3624             break;
3625           case 4:
3626             bb[0] = PETSC_MIN_REAL;
3627             bb[1] = lthresh;
3628 #if defined(PETSC_USE_COMPLEX)
3629             PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3630 #else
3631             PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3632 #endif
3633             PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3634             {
3635               PetscBLASInt B_neigs2 = 0;
3636 
3637               bb[0] = PetscMax(lthresh + PETSC_SMALL, uthresh);
3638               bb[1] = PETSC_MAX_REAL;
3639               PetscCall(PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size));
3640               PetscCall(PetscArraycpy(St, Starray + cumarray, subset_size * subset_size));
3641 #if defined(PETSC_USE_COMPLEX)
3642               PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3643 #else
3644               PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3645 #endif
3646               PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3647               B_neigs += B_neigs2;
3648             }
3649             break;
3650           case 5: /* same as before: first compute all eigenvalues, then filter */
3651 #if defined(PETSC_USE_COMPLEX)
3652             PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "A", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3653 #else
3654             PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "A", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3655 #endif
3656             PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3657             {
3658               PetscInt e, k, ne;
3659               for (e = 0, ne = 0; e < B_neigs; e++) {
3660                 if (eigs[e] < lthresh || eigs[e] > uthresh) {
3661                   for (k = 0; k < B_N; k++) S[ne * B_N + k] = eigv[e * B_N + k];
3662                   eigs[ne] = eigs[e];
3663                   ne++;
3664                 }
3665               }
3666               PetscCall(PetscArraycpy(eigv, S, B_N * ne));
3667               PetscCall(PetscBLASIntCast(ne, &B_neigs));
3668             }
3669             break;
3670           default:
3671             SETERRQ(PetscObjectComm((PetscObject)pc), PETSC_ERR_SUP, "Unknown recipe %" PetscInt_FMT, recipe);
3672           }
3673         }
3674       } else if (!same_data) { /* this is just to see all the eigenvalues */
3675         PetscCall(PetscBLASIntCast(PetscMax(1, PetscMin(B_N, nmax)), &B_IU));
3676 #if defined(PETSC_USE_COMPLEX)
3677         PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3678 #else
3679         PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3680 #endif
3681         PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3682       } else { /* same_data is true, so just get the adaptive functional requested by the user */
3683         PetscInt k;
3684         PetscCheck(sub_schurs->change_primal_sub, PETSC_COMM_SELF, PETSC_ERR_PLIB, "This should not happen");
3685         PetscCall(ISGetLocalSize(sub_schurs->change_primal_sub[i], &nmax));
3686         PetscCall(PetscBLASIntCast(nmax, &B_neigs));
3687         nmin = nmax;
3688         PetscCall(PetscArrayzero(eigv, subset_size * nmax));
3689         for (k = 0; k < nmax; k++) {
3690           eigs[k]                     = 1. / PETSC_SMALL;
3691           eigv[k * (subset_size + 1)] = 1.0;
3692         }
3693       }
3694       PetscCall(PetscFPTrapPop());
3695       if (B_ierr) {
3696         PetscCheck(B_ierr >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYGVX Lapack routine: illegal value for argument %" PetscBLASInt_FMT, -B_ierr);
3697         PetscCheck(B_ierr > B_N, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYGVX Lapack routine: %" PetscBLASInt_FMT " eigenvalues failed to converge", B_ierr);
3698         SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYGVX Lapack routine: leading minor of order %" PetscBLASInt_FMT " is not positive definite", B_ierr - B_N - 1);
3699       }
3700 
3701       if (B_neigs > nmax) {
3702         if (pcbddc->dbg_flag) PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "   found %" PetscBLASInt_FMT " eigs, more than maximum required %" PetscInt_FMT ".\n", B_neigs, nmax));
3703         if (upart) eigs_start = scal ? 0 : B_neigs - nmax;
3704         PetscCall(PetscBLASIntCast(nmax, &B_neigs));
3705       }
3706 
3707       nmin_s = PetscMin(nmin, B_N);
3708       if (B_neigs < nmin_s) {
3709         PetscBLASInt B_neigs2 = 0;
3710 
3711         if (upart) {
3712           if (scal) {
3713             PetscCall(PetscBLASIntCast(nmin_s, &B_IU));
3714             B_IL = B_neigs + 1;
3715           } else {
3716             PetscCall(PetscBLASIntCast(B_N - nmin_s + 1, &B_IL));
3717             B_IU = B_N - B_neigs;
3718           }
3719         } else {
3720           B_IL = B_neigs + 1;
3721           PetscCall(PetscBLASIntCast(nmin_s, &B_IU));
3722         }
3723         if (pcbddc->dbg_flag) {
3724           PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "   found %" PetscBLASInt_FMT " eigs, less than minimum required %" PetscInt_FMT ". Asking for %" PetscBLASInt_FMT " to %" PetscBLASInt_FMT " incl (fortran like)\n", B_neigs, nmin, B_IL, B_IU));
3725         }
3726         if (sub_schurs->is_symmetric) {
3727           PetscInt j, k;
3728           for (j = 0; j < subset_size; j++) {
3729             for (k = j; k < subset_size; k++) {
3730               S[j * subset_size + k]  = Sarray[cumarray + j * subset_size + k];
3731               St[j * subset_size + k] = Starray[cumarray + j * subset_size + k];
3732             }
3733           }
3734         } else {
3735           PetscCall(PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size));
3736           PetscCall(PetscArraycpy(St, Starray + cumarray, subset_size * subset_size));
3737         }
3738         PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
3739 #if defined(PETSC_USE_COMPLEX)
3740         PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * subset_size, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3741 #else
3742         PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * subset_size, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3743 #endif
3744         PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3745         PetscCall(PetscFPTrapPop());
3746         B_neigs += B_neigs2;
3747       }
3748       if (B_ierr) {
3749         PetscCheck(B_ierr >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYGVX Lapack routine: illegal value for argument %" PetscBLASInt_FMT, -B_ierr);
3750         PetscCheck(B_ierr > B_N, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYGVX Lapack routine: %" PetscBLASInt_FMT " eigenvalues failed to converge", B_ierr);
3751         SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYGVX Lapack routine: leading minor of order %" PetscBLASInt_FMT " is not positive definite", B_ierr - B_N - 1);
3752       }
3753       if (pcbddc->dbg_flag) {
3754         PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "   -> Got %" PetscBLASInt_FMT " eigs\n", B_neigs));
3755         for (j = 0; j < B_neigs; j++) {
3756           if (!sub_schurs->gdsw) {
3757             if (eigs[j] == 0.0) {
3758               PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "     Inf\n"));
3759             } else {
3760               if (upart) {
3761                 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "     %1.6e\n", (double)eigs[j + eigs_start]));
3762               } else {
3763                 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "     %1.6e\n", (double)(1 / eigs[j + eigs_start])));
3764               }
3765             }
3766           } else {
3767             double pg = (double)eigs[j + eigs_start];
3768             if (pg < 2 * PETSC_SMALL) pg = 0.0;
3769             PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "     %1.6e\n", pg));
3770           }
3771         }
3772       }
3773     }
3774     /* change the basis back to the original one */
3775     if (sub_schurs->change) {
3776       Mat change, phi, phit;
3777 
3778       if (pcbddc->dbg_flag > 2) {
3779         PetscInt ii;
3780         for (ii = 0; ii < B_neigs; ii++) {
3781           PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "   -> Eigenvector (old basis) %" PetscInt_FMT "/%" PetscBLASInt_FMT " (%" PetscBLASInt_FMT ")\n", ii, B_neigs, B_N));
3782           for (j = 0; j < B_N; j++) {
3783 #if defined(PETSC_USE_COMPLEX)
3784             PetscReal r = PetscRealPart(eigv[(ii + eigs_start) * subset_size + j]);
3785             PetscReal c = PetscImaginaryPart(eigv[(ii + eigs_start) * subset_size + j]);
3786             PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "       %1.4e + %1.4e i\n", (double)r, (double)c));
3787 #else
3788             PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "       %1.4e\n", (double)(eigv[(ii + eigs_start) * subset_size + j])));
3789 #endif
3790           }
3791         }
3792       }
3793       PetscCall(KSPGetOperators(sub_schurs->change[i], &change, NULL));
3794       PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, B_neigs, eigv + eigs_start * subset_size, &phit));
3795       PetscCall(MatMatMult(change, phit, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &phi));
3796       PetscCall(MatCopy(phi, phit, SAME_NONZERO_PATTERN));
3797       PetscCall(MatDestroy(&phit));
3798       PetscCall(MatDestroy(&phi));
3799     }
3800     maxneigs                               = PetscMax(B_neigs, maxneigs);
3801     pcbddc->adaptive_constraints_n[i + nv] = B_neigs;
3802     if (B_neigs) {
3803       PetscCall(PetscArraycpy(pcbddc->adaptive_constraints_data + pcbddc->adaptive_constraints_data_ptr[cum], eigv + eigs_start * subset_size, B_neigs * subset_size));
3804 
3805       if (pcbddc->dbg_flag > 1) {
3806         PetscInt ii;
3807         for (ii = 0; ii < B_neigs; ii++) {
3808           PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "   -> Eigenvector %" PetscInt_FMT "/%" PetscBLASInt_FMT " (%" PetscBLASInt_FMT ")\n", ii, B_neigs, B_N));
3809           for (j = 0; j < B_N; j++) {
3810 #if defined(PETSC_USE_COMPLEX)
3811             PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii * subset_size + j + pcbddc->adaptive_constraints_data_ptr[cum]]);
3812             PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii * subset_size + j + pcbddc->adaptive_constraints_data_ptr[cum]]);
3813             PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "       %1.4e + %1.4e i\n", (double)r, (double)c));
3814 #else
3815             PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "       %1.4e\n", (double)PetscRealPart(pcbddc->adaptive_constraints_data[ii * subset_size + j + pcbddc->adaptive_constraints_data_ptr[cum]])));
3816 #endif
3817           }
3818         }
3819       }
3820       PetscCall(PetscArraycpy(pcbddc->adaptive_constraints_idxs + pcbddc->adaptive_constraints_idxs_ptr[cum], idxs, subset_size));
3821       pcbddc->adaptive_constraints_idxs_ptr[cum + 1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size;
3822       pcbddc->adaptive_constraints_data_ptr[cum + 1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size * B_neigs;
3823       cum++;
3824     }
3825     PetscCall(ISRestoreIndices(sub_schurs->is_subs[i], &idxs));
3826     /* shift for next computation */
3827     cumarray += subset_size * subset_size;
3828   }
3829   if (pcbddc->dbg_flag) PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
3830 
3831   if (mss) {
3832     if (sub_schurs->gdsw) {
3833       PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_all, &Sarray));
3834       PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all, &Starray));
3835     } else {
3836       PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all, &Sarray));
3837       PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all, &Starray));
3838       /* destroy matrices (junk) */
3839       PetscCall(MatDestroy(&sub_schurs->sum_S_Ej_inv_all));
3840       PetscCall(MatDestroy(&sub_schurs->sum_S_Ej_tilda_all));
3841     }
3842   }
3843   if (allocated_S_St) PetscCall(PetscFree2(S, St));
3844   PetscCall(PetscFree5(eigv, eigs, work, B_iwork, B_ifail));
3845 #if defined(PETSC_USE_COMPLEX)
3846   PetscCall(PetscFree(rwork));
3847 #endif
3848   if (pcbddc->dbg_flag) {
3849     PetscInt maxneigs_r;
3850     PetscCallMPI(MPIU_Allreduce(&maxneigs, &maxneigs_r, 1, MPIU_INT, MPI_MAX, PetscObjectComm((PetscObject)pc)));
3851     PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Maximum number of constraints per cc %" PetscInt_FMT "\n", maxneigs_r));
3852   }
3853   PetscCall(PetscLogEventEnd(PC_BDDC_AdaptiveSetUp[pcbddc->current_level], pc, 0, 0, 0));
3854   PetscFunctionReturn(PETSC_SUCCESS);
3855 }
3856 
3857 PetscErrorCode PCBDDCSetUpSolvers(PC pc)
3858 {
3859   Mat coarse_submat;
3860 
3861   PetscFunctionBegin;
3862   /* Setup local scatters R_to_B and (optionally) R_to_D */
3863   /* PCBDDCSetUpLocalWorkVectors should be called first! */
3864   PetscCall(PCBDDCSetUpLocalScatters(pc));
3865 
3866   /* Setup local neumann solver ksp_R */
3867   /* PCBDDCSetUpLocalScatters should be called first! */
3868   PetscCall(PCBDDCSetUpLocalSolvers(pc, PETSC_FALSE, PETSC_TRUE));
3869 
3870   /*
3871      Setup local correction and local part of coarse basis.
3872      Gives back the dense local part of the coarse matrix in column major ordering
3873   */
3874   PetscCall(PCBDDCSetUpCorrection(pc, &coarse_submat));
3875 
3876   /* Compute total number of coarse nodes and setup coarse solver */
3877   PetscCall(PCBDDCSetUpCoarseSolver(pc, coarse_submat));
3878   PetscCall(MatDestroy(&coarse_submat));
3879   PetscFunctionReturn(PETSC_SUCCESS);
3880 }
3881 
3882 PetscErrorCode PCBDDCResetCustomization(PC pc)
3883 {
3884   PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
3885 
3886   PetscFunctionBegin;
3887   PetscCall(ISDestroy(&pcbddc->user_primal_vertices));
3888   PetscCall(ISDestroy(&pcbddc->user_primal_vertices_local));
3889   PetscCall(ISDestroy(&pcbddc->NeumannBoundaries));
3890   PetscCall(ISDestroy(&pcbddc->NeumannBoundariesLocal));
3891   PetscCall(ISDestroy(&pcbddc->DirichletBoundaries));
3892   PetscCall(MatNullSpaceDestroy(&pcbddc->onearnullspace));
3893   PetscCall(PetscFree(pcbddc->onearnullvecs_state));
3894   PetscCall(ISDestroy(&pcbddc->DirichletBoundariesLocal));
3895   PetscCall(PCBDDCSetDofsSplitting(pc, 0, NULL));
3896   PetscCall(PCBDDCSetDofsSplittingLocal(pc, 0, NULL));
3897   PetscFunctionReturn(PETSC_SUCCESS);
3898 }
3899 
3900 PetscErrorCode PCBDDCResetTopography(PC pc)
3901 {
3902   PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
3903   PetscInt i;
3904 
3905   PetscFunctionBegin;
3906   PetscCall(MatDestroy(&pcbddc->nedcG));
3907   PetscCall(ISDestroy(&pcbddc->nedclocal));
3908   PetscCall(MatDestroy(&pcbddc->discretegradient));
3909   PetscCall(MatDestroy(&pcbddc->user_ChangeOfBasisMatrix));
3910   PetscCall(MatDestroy(&pcbddc->ChangeOfBasisMatrix));
3911   PetscCall(MatDestroy(&pcbddc->switch_static_change));
3912   PetscCall(VecDestroy(&pcbddc->work_change));
3913   PetscCall(MatDestroy(&pcbddc->ConstraintMatrix));
3914   PetscCall(MatDestroy(&pcbddc->divudotp));
3915   PetscCall(ISDestroy(&pcbddc->divudotp_vl2l));
3916   PetscCall(PCBDDCGraphDestroy(&pcbddc->mat_graph));
3917   for (i = 0; i < pcbddc->n_local_subs; i++) PetscCall(ISDestroy(&pcbddc->local_subs[i]));
3918   pcbddc->n_local_subs = 0;
3919   PetscCall(PetscFree(pcbddc->local_subs));
3920   PetscCall(PCBDDCSubSchursDestroy(&pcbddc->sub_schurs));
3921   pcbddc->graphanalyzed        = PETSC_FALSE;
3922   pcbddc->recompute_topography = PETSC_TRUE;
3923   pcbddc->corner_selected      = PETSC_FALSE;
3924   PetscFunctionReturn(PETSC_SUCCESS);
3925 }
3926 
3927 PetscErrorCode PCBDDCResetSolvers(PC pc)
3928 {
3929   PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
3930 
3931   PetscFunctionBegin;
3932   PetscCall(VecDestroy(&pcbddc->coarse_vec));
3933   PetscCall(MatDestroy(&pcbddc->coarse_phi_B));
3934   PetscCall(MatDestroy(&pcbddc->coarse_phi_D));
3935   PetscCall(MatDestroy(&pcbddc->coarse_psi_B));
3936   PetscCall(MatDestroy(&pcbddc->coarse_psi_D));
3937   PetscCall(VecDestroy(&pcbddc->vec1_P));
3938   PetscCall(VecDestroy(&pcbddc->vec1_C));
3939   PetscCall(MatDestroy(&pcbddc->local_auxmat2));
3940   PetscCall(MatDestroy(&pcbddc->local_auxmat1));
3941   PetscCall(VecDestroy(&pcbddc->vec1_R));
3942   PetscCall(VecDestroy(&pcbddc->vec2_R));
3943   PetscCall(ISDestroy(&pcbddc->is_R_local));
3944   PetscCall(VecScatterDestroy(&pcbddc->R_to_B));
3945   PetscCall(VecScatterDestroy(&pcbddc->R_to_D));
3946   PetscCall(VecScatterDestroy(&pcbddc->coarse_loc_to_glob));
3947   PetscCall(KSPReset(pcbddc->ksp_D));
3948   PetscCall(KSPReset(pcbddc->ksp_R));
3949   PetscCall(KSPReset(pcbddc->coarse_ksp));
3950   PetscCall(MatDestroy(&pcbddc->local_mat));
3951   PetscCall(PetscFree(pcbddc->primal_indices_local_idxs));
3952   PetscCall(PetscFree2(pcbddc->local_primal_ref_node, pcbddc->local_primal_ref_mult));
3953   PetscCall(PetscFree(pcbddc->global_primal_indices));
3954   PetscCall(ISDestroy(&pcbddc->coarse_subassembling));
3955   PetscCall(MatDestroy(&pcbddc->benign_change));
3956   PetscCall(VecDestroy(&pcbddc->benign_vec));
3957   PetscCall(PCBDDCBenignShellMat(pc, PETSC_TRUE));
3958   PetscCall(MatDestroy(&pcbddc->benign_B0));
3959   PetscCall(PetscSFDestroy(&pcbddc->benign_sf));
3960   if (pcbddc->benign_zerodiag_subs) {
3961     PetscInt i;
3962     for (i = 0; i < pcbddc->benign_n; i++) PetscCall(ISDestroy(&pcbddc->benign_zerodiag_subs[i]));
3963     PetscCall(PetscFree(pcbddc->benign_zerodiag_subs));
3964   }
3965   PetscCall(PetscFree3(pcbddc->benign_p0_lidx, pcbddc->benign_p0_gidx, pcbddc->benign_p0));
3966   PetscFunctionReturn(PETSC_SUCCESS);
3967 }
3968 
3969 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc)
3970 {
3971   PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
3972   PC_IS   *pcis   = (PC_IS *)pc->data;
3973   VecType  impVecType;
3974   PetscInt n_constraints, n_R, old_size;
3975 
3976   PetscFunctionBegin;
3977   n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices;
3978   n_R           = pcis->n - pcbddc->n_vertices;
3979   PetscCall(VecGetType(pcis->vec1_N, &impVecType));
3980   /* local work vectors (try to avoid unneeded work)*/
3981   /* R nodes */
3982   old_size = -1;
3983   if (pcbddc->vec1_R) PetscCall(VecGetSize(pcbddc->vec1_R, &old_size));
3984   if (n_R != old_size) {
3985     PetscCall(VecDestroy(&pcbddc->vec1_R));
3986     PetscCall(VecDestroy(&pcbddc->vec2_R));
3987     PetscCall(VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N), &pcbddc->vec1_R));
3988     PetscCall(VecSetSizes(pcbddc->vec1_R, PETSC_DECIDE, n_R));
3989     PetscCall(VecSetType(pcbddc->vec1_R, impVecType));
3990     PetscCall(VecDuplicate(pcbddc->vec1_R, &pcbddc->vec2_R));
3991   }
3992   /* local primal dofs */
3993   old_size = -1;
3994   if (pcbddc->vec1_P) PetscCall(VecGetSize(pcbddc->vec1_P, &old_size));
3995   if (pcbddc->local_primal_size != old_size) {
3996     PetscCall(VecDestroy(&pcbddc->vec1_P));
3997     PetscCall(VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N), &pcbddc->vec1_P));
3998     PetscCall(VecSetSizes(pcbddc->vec1_P, PETSC_DECIDE, pcbddc->local_primal_size));
3999     PetscCall(VecSetType(pcbddc->vec1_P, impVecType));
4000   }
4001   /* local explicit constraints */
4002   old_size = -1;
4003   if (pcbddc->vec1_C) PetscCall(VecGetSize(pcbddc->vec1_C, &old_size));
4004   if (n_constraints && n_constraints != old_size) {
4005     PetscCall(VecDestroy(&pcbddc->vec1_C));
4006     PetscCall(VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N), &pcbddc->vec1_C));
4007     PetscCall(VecSetSizes(pcbddc->vec1_C, PETSC_DECIDE, n_constraints));
4008     PetscCall(VecSetType(pcbddc->vec1_C, impVecType));
4009   }
4010   PetscFunctionReturn(PETSC_SUCCESS);
4011 }
4012 
4013 static PetscErrorCode MatSetValuesSubMat(Mat A, Mat S, PetscInt nr, const PetscInt rows[], PetscInt nc, const PetscInt cols[], InsertMode imode)
4014 {
4015   PetscBool          flg;
4016   const PetscScalar *a;
4017 
4018   PetscFunctionBegin;
4019   PetscCall(PetscObjectBaseTypeCompare((PetscObject)S, MATSEQDENSE, &flg));
4020   if (flg) {
4021     PetscCall(MatDenseGetArrayRead(S, &a));
4022     PetscCall(MatSetOption(A, MAT_ROW_ORIENTED, PETSC_FALSE));
4023     PetscCall(MatSetValues(A, nr, rows, nc, cols, a, imode));
4024     PetscCall(MatSetOption(A, MAT_ROW_ORIENTED, PETSC_TRUE));
4025     PetscCall(MatDenseRestoreArrayRead(S, &a));
4026   } else {
4027     const PetscInt *ii, *jj;
4028     PetscInt        n;
4029     PetscInt        buf[8192], *bufc = NULL;
4030     PetscBool       freeb = PETSC_FALSE;
4031     Mat             Sm    = S;
4032 
4033     PetscCall(PetscObjectBaseTypeCompare((PetscObject)S, MATSEQAIJ, &flg));
4034     if (!flg) PetscCall(MatConvert(S, MATSEQAIJ, MAT_INITIAL_MATRIX, &Sm));
4035     else PetscCall(PetscObjectReference((PetscObject)S));
4036     PetscCall(MatSeqAIJGetArrayRead(Sm, &a));
4037     PetscCall(MatGetRowIJ(Sm, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, &jj, &flg));
4038     PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot get IJ structure");
4039     if (nc <= (PetscInt)PETSC_STATIC_ARRAY_LENGTH(buf)) {
4040       bufc = buf;
4041     } else {
4042       PetscCall(PetscMalloc1(nc, &bufc));
4043       freeb = PETSC_TRUE;
4044     }
4045 
4046     for (PetscInt i = 0; i < n; i++) {
4047       const PetscInt nci = ii[i + 1] - ii[i];
4048 
4049       for (PetscInt j = 0; j < nci; j++) bufc[j] = cols[jj[ii[i] + j]];
4050       PetscCall(MatSetValues(A, 1, rows + i, nci, bufc, a + ii[i], imode));
4051     }
4052     PetscCall(MatRestoreRowIJ(Sm, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, &jj, &flg));
4053     PetscCall(MatSeqAIJRestoreArrayRead(Sm, &a));
4054     PetscCall(MatDestroy(&Sm));
4055     if (freeb) PetscCall(PetscFree(bufc));
4056   }
4057   PetscCall(MatAssemblyBegin(A, MAT_FLUSH_ASSEMBLY));
4058   PetscCall(MatAssemblyEnd(A, MAT_FLUSH_ASSEMBLY));
4059   PetscFunctionReturn(PETSC_SUCCESS);
4060 }
4061 
4062 static PetscErrorCode MatCreateSeqAIJFromDenseExpand(Mat D, PetscInt n, const PetscInt j[], Mat *mat)
4063 {
4064   Mat_SeqAIJ        *aij;
4065   PetscInt          *ii, *jj;
4066   PetscScalar       *aa;
4067   PetscInt           nnz = 0, m, nc;
4068   const PetscScalar *a;
4069   const PetscScalar  zero = 0.0;
4070 
4071   PetscFunctionBegin;
4072   PetscCall(MatGetLocalSize(D, &m, &nc));
4073   PetscCall(MatDenseGetArrayRead(D, &a));
4074   PetscCall(PetscMalloc1(m + 1, &ii));
4075   PetscCall(PetscMalloc1(m * nc, &jj));
4076   PetscCall(PetscMalloc1(m * nc, &aa));
4077   ii[0] = 0;
4078   for (PetscInt k = 0; k < m; k++) {
4079     for (PetscInt s = 0; s < nc; s++) {
4080       const PetscInt    c = s + k * nc;
4081       const PetscScalar v = a[k + s * m];
4082 
4083       if (PetscUnlikely(j[c] < 0 || v == zero)) continue;
4084       jj[nnz] = j[c];
4085       aa[nnz] = a[k + s * m];
4086       nnz++;
4087     }
4088     ii[k + 1] = nnz;
4089   }
4090 
4091   PetscCall(MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)D), m, n, ii, jj, aa, mat));
4092   PetscCall(MatDenseRestoreArrayRead(D, &a));
4093 
4094   aij          = (Mat_SeqAIJ *)(*mat)->data;
4095   aij->free_a  = PETSC_TRUE;
4096   aij->free_ij = PETSC_TRUE;
4097   PetscFunctionReturn(PETSC_SUCCESS);
4098 }
4099 
4100 /* adapted from MatInvertVariableBlockDiagonal_SeqAIJ */
4101 static PetscErrorCode MatSeqAIJInvertVariableBlockDiagonalMat(Mat A, PetscInt nblocks, const PetscInt *bsizes, Mat *B)
4102 {
4103   PetscInt        n = A->rmap->n, ncnt = 0, ncnt2 = 0, bsizemax = 0, *v_pivots = NULL;
4104   const PetscBool allowzeropivot    = PETSC_FALSE;
4105   PetscBool       zeropivotdetected = PETSC_FALSE;
4106   const PetscReal shift             = 0.0;
4107   PetscInt        ipvt[5], *ii, *jj, *indi, *indj;
4108   PetscScalar     work[25], *v_work = NULL, *aa, *diag;
4109   PetscLogDouble  flops = 0.0;
4110 
4111   PetscFunctionBegin;
4112   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Not for rectangular matrices");
4113   for (PetscInt i = 0; i < nblocks; i++) {
4114     ncnt += bsizes[i];
4115     ncnt2 += PetscSqr(bsizes[i]);
4116   }
4117   PetscCheck(ncnt == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Total blocksizes %" PetscInt_FMT " doesn't match number matrix rows %" PetscInt_FMT, ncnt, n);
4118   for (PetscInt i = 0; i < nblocks; i++) bsizemax = PetscMax(bsizemax, bsizes[i]);
4119   if (bsizemax > 7) PetscCall(PetscMalloc2(bsizemax, &v_work, bsizemax, &v_pivots));
4120 
4121   PetscCall(PetscMalloc1(n + 1, &ii));
4122   PetscCall(PetscMalloc1(ncnt2, &jj));
4123   PetscCall(PetscCalloc1(ncnt2, &aa));
4124 
4125   ncnt  = 0;
4126   ii[0] = 0;
4127   indi  = ii;
4128   indj  = jj;
4129   diag  = aa;
4130   for (PetscInt i = 0; i < nblocks; i++) {
4131     const PetscInt bs = bsizes[i];
4132 
4133     for (PetscInt k = 0; k < bs; k++) {
4134       indi[k + 1] = indi[k] + bs;
4135       for (PetscInt j = 0; j < bs; j++) indj[k * bs + j] = ncnt + j;
4136     }
4137     PetscCall(MatGetValues(A, bs, indj, bs, indj, diag));
4138     switch (bs) {
4139     case 1:
4140       *diag = 1.0 / (*diag);
4141       break;
4142     case 2:
4143       PetscCall(PetscKernel_A_gets_inverse_A_2(diag, shift, allowzeropivot, &zeropivotdetected));
4144       break;
4145     case 3:
4146       PetscCall(PetscKernel_A_gets_inverse_A_3(diag, shift, allowzeropivot, &zeropivotdetected));
4147       break;
4148     case 4:
4149       PetscCall(PetscKernel_A_gets_inverse_A_4(diag, shift, allowzeropivot, &zeropivotdetected));
4150       break;
4151     case 5:
4152       PetscCall(PetscKernel_A_gets_inverse_A_5(diag, ipvt, work, shift, allowzeropivot, &zeropivotdetected));
4153       break;
4154     case 6:
4155       PetscCall(PetscKernel_A_gets_inverse_A_6(diag, shift, allowzeropivot, &zeropivotdetected));
4156       break;
4157     case 7:
4158       PetscCall(PetscKernel_A_gets_inverse_A_7(diag, shift, allowzeropivot, &zeropivotdetected));
4159       break;
4160     default:
4161       PetscCall(PetscKernel_A_gets_inverse_A(bs, diag, v_pivots, v_work, allowzeropivot, &zeropivotdetected));
4162     }
4163     ncnt += bs;
4164     flops += 2.0 * PetscPowInt(bs, 3) / 3.0;
4165     diag += bs * bs;
4166     indj += bs * bs;
4167     indi += bs;
4168   }
4169   PetscCall(PetscLogFlops(flops));
4170   PetscCall(PetscFree2(v_work, v_pivots));
4171   PetscCall(MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A), n, n, ii, jj, aa, B));
4172   {
4173     Mat_SeqAIJ *aij = (Mat_SeqAIJ *)(*B)->data;
4174     aij->free_a     = PETSC_TRUE;
4175     aij->free_ij    = PETSC_TRUE;
4176   }
4177   PetscFunctionReturn(PETSC_SUCCESS);
4178 }
4179 
4180 static PetscErrorCode MatDenseScatter(Mat A, PetscSF sf, Mat B)
4181 {
4182   const PetscScalar *rarr;
4183   PetscScalar       *larr;
4184   PetscSF            vsf;
4185   PetscInt           n, rld, lld;
4186 
4187   PetscFunctionBegin;
4188   PetscCall(MatGetSize(A, NULL, &n));
4189   PetscCall(MatDenseGetLDA(A, &rld));
4190   PetscCall(MatDenseGetLDA(B, &lld));
4191   PetscCall(MatDenseGetArrayRead(A, &rarr));
4192   PetscCall(MatDenseGetArrayWrite(B, &larr));
4193   PetscCall(PetscSFCreateStridedSF(sf, n, rld, lld, &vsf));
4194   PetscCall(PetscSFBcastBegin(vsf, MPIU_SCALAR, rarr, larr, MPI_REPLACE));
4195   PetscCall(PetscSFBcastEnd(vsf, MPIU_SCALAR, rarr, larr, MPI_REPLACE));
4196   PetscCall(MatDenseRestoreArrayRead(A, &rarr));
4197   PetscCall(MatDenseRestoreArrayWrite(B, &larr));
4198   PetscCall(PetscSFDestroy(&vsf));
4199   PetscFunctionReturn(PETSC_SUCCESS);
4200 }
4201 
4202 PetscErrorCode PCBDDCSetUpCorrection(PC pc, Mat *coarse_submat)
4203 {
4204   PC_IS          *pcis       = (PC_IS *)pc->data;
4205   PC_BDDC        *pcbddc     = (PC_BDDC *)pc->data;
4206   PCBDDCGraph     graph      = pcbddc->mat_graph;
4207   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
4208   /* submatrices of local problem */
4209   Mat A_RV = NULL, A_VR, A_VV, local_auxmat2_R = NULL;
4210   /* submatrices of local coarse problem */
4211   Mat S_CV = NULL, S_VC = NULL, S_CC = NULL;
4212   /* working matrices */
4213   Mat C_CR;
4214 
4215   /* additional working stuff */
4216   PC              pc_R;
4217   IS              is_R, is_V, is_C;
4218   const PetscInt *idx_V, *idx_C;
4219   Mat             F, Brhs = NULL;
4220   Vec             dummy_vec;
4221   PetscBool       isLU, isCHOL, need_benign_correction, sparserhs;
4222   PetscInt       *idx_V_B;
4223   PetscInt        lda_rhs, n_vertices, n_constraints, *p0_lidx_I;
4224   PetscInt        n_eff_vertices, n_eff_constraints;
4225   PetscInt        i, n_R, n_D, n_B;
4226   PetscScalar     one = 1.0, m_one = -1.0;
4227 
4228   /* Multi-element support */
4229   PetscBool multi_element = graph->multi_element;
4230   PetscInt *V_to_eff_V = NULL, *C_to_eff_C = NULL;
4231   PetscInt *B_eff_V_J = NULL, *R_eff_V_J = NULL, *B_eff_C_J = NULL, *R_eff_C_J = NULL;
4232   IS        is_C_perm = NULL;
4233   PetscInt  n_C_bss = 0, *C_bss = NULL;
4234   Mat       coarse_phi_multi;
4235 
4236   PetscFunctionBegin;
4237   PetscCheck(pcbddc->symmetric_primal || !pcbddc->benign_n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Non-symmetric primal basis computation with benign trick not yet implemented");
4238   PetscCall(PetscLogEventBegin(PC_BDDC_CorrectionSetUp[pcbddc->current_level], pc, 0, 0, 0));
4239 
4240   /* Set Non-overlapping dimensions */
4241   n_vertices    = pcbddc->n_vertices;
4242   n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices;
4243   n_B           = pcis->n_B;
4244   n_D           = pcis->n - n_B;
4245   n_R           = pcis->n - n_vertices;
4246 
4247   /* vertices in boundary numbering */
4248   PetscCall(PetscMalloc1(n_vertices, &idx_V_B));
4249   PetscCall(ISGlobalToLocalMappingApply(pcis->BtoNmap, IS_GTOLM_DROP, n_vertices, pcbddc->local_primal_ref_node, &i, idx_V_B));
4250   PetscCheck(i == n_vertices, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error in boundary numbering for BDDC vertices! %" PetscInt_FMT " != %" PetscInt_FMT, n_vertices, i);
4251 
4252   /* these two cases still need to be optimized */
4253   if (pcbddc->benign_saddle_point || !pcbddc->symmetric_primal) multi_element = PETSC_FALSE;
4254 
4255   /* Subdomain contribution (Non-overlapping) to coarse matrix  */
4256   if (multi_element) {
4257     PetscCheck(!pcbddc->benign_n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not yet implemented");
4258 
4259     PetscCall(MatCreate(PETSC_COMM_SELF, coarse_submat));
4260     PetscCall(MatSetSizes(*coarse_submat, pcbddc->local_primal_size, pcbddc->local_primal_size, pcbddc->local_primal_size, pcbddc->local_primal_size));
4261     PetscCall(MatSetType(*coarse_submat, MATSEQAIJ));
4262     PetscCall(MatSetOption(*coarse_submat, MAT_IGNORE_ZERO_ENTRIES, PETSC_TRUE));
4263     PetscCall(MatSetOption(*coarse_submat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
4264 
4265     /* group vertices and constraints by subdomain id */
4266     const PetscInt *vidxs = pcbddc->primal_indices_local_idxs;
4267     const PetscInt *cidxs = pcbddc->primal_indices_local_idxs + n_vertices;
4268     PetscInt       *count_eff, *V_eff_to_V, *C_eff_to_C, *nnz;
4269     PetscInt        n_el = PetscMax(graph->n_local_subs, 1);
4270 
4271     PetscCall(PetscCalloc1(2 * n_el, &count_eff));
4272     PetscCall(PetscMalloc1(n_vertices, &V_to_eff_V));
4273     PetscCall(PetscMalloc1(n_constraints, &C_to_eff_C));
4274     for (PetscInt i = 0; i < n_vertices; i++) {
4275       PetscInt s = 2 * graph->nodes[vidxs[i]].local_sub;
4276 
4277       V_to_eff_V[i] = count_eff[s];
4278       count_eff[s] += 1;
4279     }
4280     for (PetscInt i = 0; i < n_constraints; i++) {
4281       PetscInt s = 2 * graph->nodes[cidxs[i]].local_sub + 1;
4282 
4283       C_to_eff_C[i] = count_eff[s];
4284       count_eff[s] += 1;
4285     }
4286 
4287     /* preallocation */
4288     PetscCall(PetscMalloc1(n_vertices + n_constraints, &nnz));
4289     for (PetscInt i = 0; i < n_vertices; i++) {
4290       PetscInt s = 2 * graph->nodes[vidxs[i]].local_sub;
4291 
4292       nnz[i] = count_eff[s] + count_eff[s + 1];
4293     }
4294     for (PetscInt i = 0; i < n_constraints; i++) {
4295       PetscInt s = 2 * graph->nodes[cidxs[i]].local_sub;
4296 
4297       nnz[i + n_vertices] = count_eff[s] + count_eff[s + 1];
4298     }
4299     PetscCall(MatSeqAIJSetPreallocation(*coarse_submat, 0, nnz));
4300     PetscCall(PetscFree(nnz));
4301 
4302     n_eff_vertices    = 0;
4303     n_eff_constraints = 0;
4304     for (PetscInt i = 0; i < n_el; i++) {
4305       n_eff_vertices       = PetscMax(n_eff_vertices, count_eff[2 * i]);
4306       n_eff_constraints    = PetscMax(n_eff_constraints, count_eff[2 * i + 1]);
4307       count_eff[2 * i]     = 0;
4308       count_eff[2 * i + 1] = 0;
4309     }
4310 
4311     const PetscInt *idx;
4312     PetscCall(PetscMalloc2(n_el * n_eff_vertices, &V_eff_to_V, n_el * n_eff_constraints, &C_eff_to_C));
4313 
4314     for (PetscInt i = 0; i < n_vertices; i++) {
4315       const PetscInt e = graph->nodes[vidxs[i]].local_sub;
4316       const PetscInt s = 2 * e;
4317 
4318       V_eff_to_V[e * n_eff_vertices + count_eff[s]] = i;
4319       count_eff[s] += 1;
4320     }
4321     for (PetscInt i = 0; i < n_constraints; i++) {
4322       const PetscInt e = graph->nodes[cidxs[i]].local_sub;
4323       const PetscInt s = 2 * e + 1;
4324 
4325       C_eff_to_C[e * n_eff_constraints + count_eff[s]] = i;
4326       count_eff[s] += 1;
4327     }
4328 
4329     PetscCall(PetscMalloc1(n_R * n_eff_vertices, &R_eff_V_J));
4330     PetscCall(PetscMalloc1(n_R * n_eff_constraints, &R_eff_C_J));
4331     PetscCall(PetscMalloc1(n_B * n_eff_vertices, &B_eff_V_J));
4332     PetscCall(PetscMalloc1(n_B * n_eff_constraints, &B_eff_C_J));
4333     for (PetscInt i = 0; i < n_R * n_eff_vertices; i++) R_eff_V_J[i] = -1;
4334     for (PetscInt i = 0; i < n_R * n_eff_constraints; i++) R_eff_C_J[i] = -1;
4335     for (PetscInt i = 0; i < n_B * n_eff_vertices; i++) B_eff_V_J[i] = -1;
4336     for (PetscInt i = 0; i < n_B * n_eff_constraints; i++) B_eff_C_J[i] = -1;
4337 
4338     PetscCall(ISGetIndices(pcbddc->is_R_local, &idx));
4339     for (PetscInt i = 0; i < n_R; i++) {
4340       const PetscInt e = graph->nodes[idx[i]].local_sub;
4341       const PetscInt s = 2 * e;
4342       PetscInt       j;
4343 
4344       for (j = 0; j < count_eff[s]; j++) R_eff_V_J[i * n_eff_vertices + j] = V_eff_to_V[e * n_eff_vertices + j];
4345       for (j = 0; j < count_eff[s + 1]; j++) R_eff_C_J[i * n_eff_constraints + j] = C_eff_to_C[e * n_eff_constraints + j];
4346     }
4347     PetscCall(ISRestoreIndices(pcbddc->is_R_local, &idx));
4348     PetscCall(ISGetIndices(pcis->is_B_local, &idx));
4349     for (PetscInt i = 0; i < n_B; i++) {
4350       const PetscInt e = graph->nodes[idx[i]].local_sub;
4351       const PetscInt s = 2 * e;
4352       PetscInt       j;
4353 
4354       for (j = 0; j < count_eff[s]; j++) B_eff_V_J[i * n_eff_vertices + j] = V_eff_to_V[e * n_eff_vertices + j];
4355       for (j = 0; j < count_eff[s + 1]; j++) B_eff_C_J[i * n_eff_constraints + j] = C_eff_to_C[e * n_eff_constraints + j];
4356     }
4357     PetscCall(ISRestoreIndices(pcis->is_B_local, &idx));
4358 
4359     /* permutation and blocksizes for block invert of S_CC */
4360     PetscInt *idxp;
4361 
4362     PetscCall(PetscMalloc1(n_constraints, &idxp));
4363     PetscCall(PetscMalloc1(n_el, &C_bss));
4364     n_C_bss = 0;
4365     for (PetscInt e = 0, cnt = 0; e < n_el; e++) {
4366       const PetscInt nc = count_eff[2 * e + 1];
4367 
4368       if (nc) C_bss[n_C_bss++] = nc;
4369       for (PetscInt c = 0; c < nc; c++) idxp[cnt + c] = C_eff_to_C[e * n_eff_constraints + c];
4370       cnt += nc;
4371     }
4372 
4373     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n_constraints, idxp, PETSC_OWN_POINTER, &is_C_perm));
4374 
4375     PetscCall(PetscFree2(V_eff_to_V, C_eff_to_C));
4376     PetscCall(PetscFree(count_eff));
4377   } else {
4378     PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, pcbddc->local_primal_size, pcbddc->local_primal_size, NULL, coarse_submat));
4379     n_eff_constraints = n_constraints;
4380     n_eff_vertices    = n_vertices;
4381   }
4382 
4383   /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */
4384   PetscCall(KSPGetPC(pcbddc->ksp_R, &pc_R));
4385   PetscCall(PCSetUp(pc_R));
4386   PetscCall(PetscObjectTypeCompare((PetscObject)pc_R, PCLU, &isLU));
4387   PetscCall(PetscObjectTypeCompare((PetscObject)pc_R, PCCHOLESKY, &isCHOL));
4388   lda_rhs                = n_R;
4389   need_benign_correction = PETSC_FALSE;
4390   if (isLU || isCHOL) {
4391     PetscCall(PCFactorGetMatrix(pc_R, &F));
4392   } else if (sub_schurs && sub_schurs->reuse_solver) {
4393     PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
4394     MatFactorType      type;
4395 
4396     F = reuse_solver->F;
4397     PetscCall(MatGetFactorType(F, &type));
4398     if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE;
4399     if (type == MAT_FACTOR_LU) isLU = PETSC_TRUE;
4400     PetscCall(MatGetSize(F, &lda_rhs, NULL));
4401     need_benign_correction = (PetscBool)(!!reuse_solver->benign_n);
4402   } else F = NULL;
4403 
4404   /* determine if we can use a sparse right-hand side */
4405   sparserhs = PETSC_FALSE;
4406   if (F && !multi_element) {
4407     MatSolverType solver;
4408 
4409     PetscCall(MatFactorGetSolverType(F, &solver));
4410     PetscCall(PetscStrcmp(solver, MATSOLVERMUMPS, &sparserhs));
4411   }
4412 
4413   /* create dummy vector to modify rhs and sol of MatMatSolve (work array will never be used) */
4414   dummy_vec = NULL;
4415   if (need_benign_correction && lda_rhs != n_R && F) {
4416     PetscCall(VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N), &dummy_vec));
4417     PetscCall(VecSetSizes(dummy_vec, lda_rhs, PETSC_DECIDE));
4418     PetscCall(VecSetType(dummy_vec, ((PetscObject)pcis->vec1_N)->type_name));
4419   }
4420 
4421   PetscCall(MatDestroy(&pcbddc->local_auxmat1));
4422   PetscCall(MatDestroy(&pcbddc->local_auxmat2));
4423 
4424   PetscCall(ISCreateStride(PETSC_COMM_SELF, n_R, 0, 1, &is_R));
4425   PetscCall(ISCreateStride(PETSC_COMM_SELF, n_vertices, 0, 1, &is_V));
4426   PetscCall(ISCreateStride(PETSC_COMM_SELF, n_constraints, n_vertices, 1, &is_C));
4427   PetscCall(ISGetIndices(is_V, &idx_V));
4428   PetscCall(ISGetIndices(is_C, &idx_C));
4429 
4430   /* Precompute stuffs needed for preprocessing and application of BDDC*/
4431   if (n_constraints) {
4432     Mat C_B;
4433 
4434     /* Extract constraints on R nodes: C_{CR}  */
4435     PetscCall(MatCreateSubMatrix(pcbddc->ConstraintMatrix, is_C, pcbddc->is_R_local, MAT_INITIAL_MATRIX, &C_CR));
4436     PetscCall(MatCreateSubMatrix(pcbddc->ConstraintMatrix, is_C, pcis->is_B_local, MAT_INITIAL_MATRIX, &C_B));
4437 
4438     /* Assemble         local_auxmat2_R =        (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */
4439     /* Assemble pcbddc->local_auxmat2   = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */
4440     if (!sparserhs) {
4441       PetscScalar *marr;
4442 
4443       PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, lda_rhs, n_eff_constraints, NULL, &Brhs));
4444       PetscCall(MatDenseGetArrayWrite(Brhs, &marr));
4445       for (i = 0; i < n_constraints; i++) {
4446         const PetscScalar *row_cmat_values;
4447         const PetscInt    *row_cmat_indices;
4448         PetscInt           size_of_constraint, j, col = C_to_eff_C ? C_to_eff_C[i] : i;
4449 
4450         PetscCall(MatGetRow(C_CR, i, &size_of_constraint, &row_cmat_indices, &row_cmat_values));
4451         for (j = 0; j < size_of_constraint; j++) marr[row_cmat_indices[j] + col * lda_rhs] = -row_cmat_values[j];
4452         PetscCall(MatRestoreRow(C_CR, i, &size_of_constraint, &row_cmat_indices, &row_cmat_values));
4453       }
4454       PetscCall(MatDenseRestoreArrayWrite(Brhs, &marr));
4455     } else {
4456       Mat tC_CR;
4457 
4458       PetscCall(MatScale(C_CR, -1.0));
4459       if (lda_rhs != n_R) {
4460         PetscScalar *aa;
4461         PetscInt     r, *ii, *jj;
4462         PetscBool    done;
4463 
4464         PetscCall(MatGetRowIJ(C_CR, 0, PETSC_FALSE, PETSC_FALSE, &r, (const PetscInt **)&ii, (const PetscInt **)&jj, &done));
4465         PetscCheck(done, PETSC_COMM_SELF, PETSC_ERR_PLIB, "GetRowIJ failed");
4466         PetscCall(MatSeqAIJGetArray(C_CR, &aa));
4467         PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, n_constraints, lda_rhs, ii, jj, aa, &tC_CR));
4468         PetscCall(MatRestoreRowIJ(C_CR, 0, PETSC_FALSE, PETSC_FALSE, &r, (const PetscInt **)&ii, (const PetscInt **)&jj, &done));
4469         PetscCheck(done, PETSC_COMM_SELF, PETSC_ERR_PLIB, "RestoreRowIJ failed");
4470       } else {
4471         PetscCall(PetscObjectReference((PetscObject)C_CR));
4472         tC_CR = C_CR;
4473       }
4474       PetscCall(MatCreateTranspose(tC_CR, &Brhs));
4475       PetscCall(MatDestroy(&tC_CR));
4476     }
4477     PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, lda_rhs, n_eff_constraints, NULL, &local_auxmat2_R));
4478     if (F) {
4479       if (need_benign_correction) {
4480         PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
4481 
4482         /* rhs is already zero on interior dofs, no need to change the rhs */
4483         PetscCall(PetscArrayzero(reuse_solver->benign_save_vals, pcbddc->benign_n));
4484       }
4485       PetscCall(MatMatSolve(F, Brhs, local_auxmat2_R));
4486       if (need_benign_correction) {
4487         PetscScalar       *marr;
4488         PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
4489 
4490         /* XXX multi_element? */
4491         PetscCall(MatDenseGetArray(local_auxmat2_R, &marr));
4492         if (lda_rhs != n_R) {
4493           for (i = 0; i < n_eff_constraints; i++) {
4494             PetscCall(VecPlaceArray(dummy_vec, marr + i * lda_rhs));
4495             PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver, dummy_vec, NULL, PETSC_TRUE, PETSC_TRUE));
4496             PetscCall(VecResetArray(dummy_vec));
4497           }
4498         } else {
4499           for (i = 0; i < n_eff_constraints; i++) {
4500             PetscCall(VecPlaceArray(pcbddc->vec1_R, marr + i * lda_rhs));
4501             PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver, pcbddc->vec1_R, NULL, PETSC_TRUE, PETSC_TRUE));
4502             PetscCall(VecResetArray(pcbddc->vec1_R));
4503           }
4504         }
4505         PetscCall(MatDenseRestoreArray(local_auxmat2_R, &marr));
4506       }
4507     } else {
4508       const PetscScalar *barr;
4509       PetscScalar       *marr;
4510 
4511       PetscCall(MatDenseGetArrayRead(Brhs, &barr));
4512       PetscCall(MatDenseGetArray(local_auxmat2_R, &marr));
4513       for (i = 0; i < n_eff_constraints; i++) {
4514         PetscCall(VecPlaceArray(pcbddc->vec1_R, barr + i * lda_rhs));
4515         PetscCall(VecPlaceArray(pcbddc->vec2_R, marr + i * lda_rhs));
4516         PetscCall(KSPSolve(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec2_R));
4517         PetscCall(KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec2_R));
4518         PetscCall(VecResetArray(pcbddc->vec1_R));
4519         PetscCall(VecResetArray(pcbddc->vec2_R));
4520       }
4521       PetscCall(MatDenseRestoreArrayRead(Brhs, &barr));
4522       PetscCall(MatDenseRestoreArray(local_auxmat2_R, &marr));
4523     }
4524     if (sparserhs) PetscCall(MatScale(C_CR, -1.0));
4525     PetscCall(MatDestroy(&Brhs));
4526     /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR})^{-1}  */
4527     if (!pcbddc->switch_static) {
4528       PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_B, n_eff_constraints, NULL, &pcbddc->local_auxmat2));
4529       for (i = 0; i < n_eff_constraints; i++) {
4530         Vec r, b;
4531         PetscCall(MatDenseGetColumnVecRead(local_auxmat2_R, i, &r));
4532         PetscCall(MatDenseGetColumnVec(pcbddc->local_auxmat2, i, &b));
4533         PetscCall(VecScatterBegin(pcbddc->R_to_B, r, b, INSERT_VALUES, SCATTER_FORWARD));
4534         PetscCall(VecScatterEnd(pcbddc->R_to_B, r, b, INSERT_VALUES, SCATTER_FORWARD));
4535         PetscCall(MatDenseRestoreColumnVec(pcbddc->local_auxmat2, i, &b));
4536         PetscCall(MatDenseRestoreColumnVecRead(local_auxmat2_R, i, &r));
4537       }
4538       if (multi_element) {
4539         Mat T;
4540 
4541         PetscCall(MatCreateSeqAIJFromDenseExpand(local_auxmat2_R, n_constraints, R_eff_C_J, &T));
4542         PetscCall(MatDestroy(&local_auxmat2_R));
4543         local_auxmat2_R = T;
4544         PetscCall(MatCreateSeqAIJFromDenseExpand(pcbddc->local_auxmat2, n_constraints, B_eff_C_J, &T));
4545         PetscCall(MatDestroy(&pcbddc->local_auxmat2));
4546         pcbddc->local_auxmat2 = T;
4547       }
4548       PetscCall(MatMatMult(C_B, pcbddc->local_auxmat2, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &S_CC));
4549     } else {
4550       if (multi_element) {
4551         Mat T;
4552 
4553         PetscCall(MatCreateSeqAIJFromDenseExpand(local_auxmat2_R, n_constraints, R_eff_C_J, &T));
4554         PetscCall(MatDestroy(&local_auxmat2_R));
4555         local_auxmat2_R = T;
4556       }
4557       if (lda_rhs != n_R) {
4558         PetscCall(MatCreateSubMatrix(local_auxmat2_R, is_R, NULL, MAT_INITIAL_MATRIX, &pcbddc->local_auxmat2));
4559       } else {
4560         PetscCall(PetscObjectReference((PetscObject)local_auxmat2_R));
4561         pcbddc->local_auxmat2 = local_auxmat2_R;
4562       }
4563       PetscCall(MatMatMult(C_CR, pcbddc->local_auxmat2, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &S_CC));
4564     }
4565     PetscCall(MatScale(S_CC, m_one));
4566     if (multi_element) {
4567       Mat T, T2;
4568       IS  isp, ispi;
4569 
4570       isp = is_C_perm;
4571 
4572       PetscCall(ISInvertPermutation(isp, PETSC_DECIDE, &ispi));
4573       PetscCall(MatPermute(S_CC, isp, isp, &T));
4574       PetscCall(MatSeqAIJInvertVariableBlockDiagonalMat(T, n_C_bss, C_bss, &T2));
4575       PetscCall(MatDestroy(&T));
4576       PetscCall(MatDestroy(&S_CC));
4577       PetscCall(MatPermute(T2, ispi, ispi, &S_CC));
4578       PetscCall(MatDestroy(&T2));
4579       PetscCall(ISDestroy(&ispi));
4580     } else {
4581       if (isCHOL) {
4582         PetscCall(MatCholeskyFactor(S_CC, NULL, NULL));
4583       } else {
4584         PetscCall(MatLUFactor(S_CC, NULL, NULL, NULL));
4585       }
4586       PetscCall(MatSeqDenseInvertFactors_Private(S_CC));
4587     }
4588     /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */
4589     PetscCall(MatMatMult(S_CC, C_B, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &pcbddc->local_auxmat1));
4590     PetscCall(MatDestroy(&C_B));
4591     PetscCall(MatSetValuesSubMat(*coarse_submat, S_CC, n_constraints, idx_C, n_constraints, idx_C, INSERT_VALUES));
4592   }
4593 
4594   /* Get submatrices from subdomain matrix */
4595   if (n_vertices) {
4596 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
4597     PetscBool oldpin;
4598 #endif
4599     IS is_aux;
4600 
4601     if (sub_schurs && sub_schurs->reuse_solver) { /* is_R_local is not sorted, ISComplement doesn't like it */
4602       IS tis;
4603 
4604       PetscCall(ISDuplicate(pcbddc->is_R_local, &tis));
4605       PetscCall(ISSort(tis));
4606       PetscCall(ISComplement(tis, 0, pcis->n, &is_aux));
4607       PetscCall(ISDestroy(&tis));
4608     } else {
4609       PetscCall(ISComplement(pcbddc->is_R_local, 0, pcis->n, &is_aux));
4610     }
4611 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
4612     oldpin = pcbddc->local_mat->boundtocpu;
4613 #endif
4614     PetscCall(MatBindToCPU(pcbddc->local_mat, PETSC_TRUE));
4615     PetscCall(MatCreateSubMatrix(pcbddc->local_mat, pcbddc->is_R_local, is_aux, MAT_INITIAL_MATRIX, &A_RV));
4616     PetscCall(MatCreateSubMatrix(pcbddc->local_mat, is_aux, pcbddc->is_R_local, MAT_INITIAL_MATRIX, &A_VR));
4617     /* TODO REMOVE: MatMatMult(A_VR,A_RRmA_RV) below may raise an error */
4618     PetscCall(MatConvert(A_VR, MATSEQAIJ, MAT_INPLACE_MATRIX, &A_VR));
4619     PetscCall(MatCreateSubMatrix(pcbddc->local_mat, is_aux, is_aux, MAT_INITIAL_MATRIX, &A_VV));
4620 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
4621     PetscCall(MatBindToCPU(pcbddc->local_mat, oldpin));
4622 #endif
4623     PetscCall(ISDestroy(&is_aux));
4624   }
4625   PetscCall(ISDestroy(&is_C_perm));
4626   PetscCall(PetscFree(C_bss));
4627 
4628   p0_lidx_I = NULL;
4629   if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) {
4630     const PetscInt *idxs;
4631 
4632     PetscCall(ISGetIndices(pcis->is_I_local, &idxs));
4633     PetscCall(PetscMalloc1(pcbddc->benign_n, &p0_lidx_I));
4634     for (i = 0; i < pcbddc->benign_n; i++) PetscCall(PetscFindInt(pcbddc->benign_p0_lidx[i], pcis->n - pcis->n_B, idxs, &p0_lidx_I[i]));
4635     PetscCall(ISRestoreIndices(pcis->is_I_local, &idxs));
4636   }
4637 
4638   /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */
4639 
4640   /* Matrices of coarse basis functions (local) */
4641   PetscCall(MatDestroy(&pcbddc->coarse_phi_B));
4642   PetscCall(MatDestroy(&pcbddc->coarse_psi_B));
4643   PetscCall(MatDestroy(&pcbddc->coarse_phi_D));
4644   PetscCall(MatDestroy(&pcbddc->coarse_psi_D));
4645   if (!multi_element) {
4646     PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_B, pcbddc->local_primal_size, NULL, &pcbddc->coarse_phi_B));
4647     if (pcbddc->switch_static || pcbddc->dbg_flag) PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_D, pcbddc->local_primal_size, NULL, &pcbddc->coarse_phi_D));
4648     coarse_phi_multi = NULL;
4649   } else { /* Create temporary NEST matrix to hold coarse basis functions blocks */
4650     IS is_rows[2] = {pcbddc->is_R_local, NULL};
4651     IS is_cols[2] = {is_V, is_C};
4652 
4653     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n_vertices, pcbddc->local_primal_ref_node, PETSC_USE_POINTER, &is_rows[1]));
4654     PetscCall(MatCreateNest(PETSC_COMM_SELF, 2, is_rows, 2, is_cols, NULL, &coarse_phi_multi));
4655     PetscCall(ISDestroy(&is_rows[1]));
4656   }
4657 
4658   /* vertices */
4659   if (n_vertices) {
4660     PetscBool restoreavr = PETSC_FALSE;
4661     Mat       A_RRmA_RV  = NULL;
4662 
4663     PetscCall(MatSetValuesSubMat(*coarse_submat, A_VV, n_vertices, idx_V, n_vertices, idx_V, ADD_VALUES));
4664     PetscCall(MatDestroy(&A_VV));
4665 
4666     if (n_R) {
4667       Mat A_RV_bcorr = NULL, S_VV;
4668 
4669       PetscCall(MatScale(A_RV, m_one));
4670       if (need_benign_correction) {
4671         ISLocalToGlobalMapping RtoN;
4672         IS                     is_p0;
4673         PetscInt              *idxs_p0, n;
4674 
4675         PetscCall(PetscMalloc1(pcbddc->benign_n, &idxs_p0));
4676         PetscCall(ISLocalToGlobalMappingCreateIS(pcbddc->is_R_local, &RtoN));
4677         PetscCall(ISGlobalToLocalMappingApply(RtoN, IS_GTOLM_DROP, pcbddc->benign_n, pcbddc->benign_p0_lidx, &n, idxs_p0));
4678         PetscCheck(n == pcbddc->benign_n, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error in R numbering for benign p0! %" PetscInt_FMT " != %" PetscInt_FMT, n, pcbddc->benign_n);
4679         PetscCall(ISLocalToGlobalMappingDestroy(&RtoN));
4680         PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idxs_p0, PETSC_OWN_POINTER, &is_p0));
4681         PetscCall(MatCreateSubMatrix(A_RV, is_p0, NULL, MAT_INITIAL_MATRIX, &A_RV_bcorr));
4682         PetscCall(ISDestroy(&is_p0));
4683       }
4684 
4685       PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, lda_rhs, n_eff_vertices, NULL, &A_RRmA_RV));
4686       if (!sparserhs || need_benign_correction) {
4687         if (lda_rhs == n_R && !multi_element) {
4688           PetscCall(MatConvert(A_RV, MATDENSE, MAT_INPLACE_MATRIX, &A_RV));
4689         } else {
4690           Mat             T;
4691           PetscScalar    *av, *array;
4692           const PetscInt *xadj, *adjncy;
4693           PetscInt        n;
4694           PetscBool       flg_row;
4695 
4696           PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, lda_rhs, n_eff_vertices, NULL, &T));
4697           PetscCall(MatDenseGetArrayWrite(T, &array));
4698           PetscCall(MatConvert(A_RV, MATSEQAIJ, MAT_INPLACE_MATRIX, &A_RV));
4699           PetscCall(MatGetRowIJ(A_RV, 0, PETSC_FALSE, PETSC_FALSE, &n, &xadj, &adjncy, &flg_row));
4700           PetscCall(MatSeqAIJGetArray(A_RV, &av));
4701           for (i = 0; i < n; i++) {
4702             PetscInt j;
4703             for (j = xadj[i]; j < xadj[i + 1]; j++) array[lda_rhs * (V_to_eff_V ? V_to_eff_V[adjncy[j]] : adjncy[j]) + i] = av[j];
4704           }
4705           PetscCall(MatRestoreRowIJ(A_RV, 0, PETSC_FALSE, PETSC_FALSE, &n, &xadj, &adjncy, &flg_row));
4706           PetscCall(MatDenseRestoreArrayWrite(T, &array));
4707           PetscCall(MatDestroy(&A_RV));
4708           A_RV = T;
4709         }
4710         if (need_benign_correction) {
4711           PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
4712           PetscScalar       *marr;
4713 
4714           /* XXX multi_element */
4715           PetscCall(MatDenseGetArray(A_RV, &marr));
4716           /* need \Phi^T A_RV = (I+L)A_RV, L given by
4717 
4718                  | 0 0  0 | (V)
4719              L = | 0 0 -1 | (P-p0)
4720                  | 0 0 -1 | (p0)
4721 
4722           */
4723           for (i = 0; i < reuse_solver->benign_n; i++) {
4724             const PetscScalar *vals;
4725             const PetscInt    *idxs, *idxs_zero;
4726             PetscInt           n, j, nz;
4727 
4728             PetscCall(ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i], &nz));
4729             PetscCall(ISGetIndices(reuse_solver->benign_zerodiag_subs[i], &idxs_zero));
4730             PetscCall(MatGetRow(A_RV_bcorr, i, &n, &idxs, &vals));
4731             for (j = 0; j < n; j++) {
4732               PetscScalar val = vals[j];
4733               PetscInt    k, col = idxs[j];
4734               for (k = 0; k < nz; k++) marr[idxs_zero[k] + lda_rhs * col] -= val;
4735             }
4736             PetscCall(MatRestoreRow(A_RV_bcorr, i, &n, &idxs, &vals));
4737             PetscCall(ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i], &idxs_zero));
4738           }
4739           PetscCall(MatDenseRestoreArray(A_RV, &marr));
4740         }
4741         PetscCall(PetscObjectReference((PetscObject)A_RV));
4742         Brhs = A_RV;
4743       } else {
4744         Mat tA_RVT, A_RVT;
4745 
4746         if (!pcbddc->symmetric_primal) {
4747           /* A_RV already scaled by -1 */
4748           PetscCall(MatTranspose(A_RV, MAT_INITIAL_MATRIX, &A_RVT));
4749         } else {
4750           restoreavr = PETSC_TRUE;
4751           PetscCall(MatScale(A_VR, -1.0));
4752           PetscCall(PetscObjectReference((PetscObject)A_VR));
4753           A_RVT = A_VR;
4754         }
4755         if (lda_rhs != n_R) {
4756           PetscScalar *aa;
4757           PetscInt     r, *ii, *jj;
4758           PetscBool    done;
4759 
4760           PetscCall(MatGetRowIJ(A_RVT, 0, PETSC_FALSE, PETSC_FALSE, &r, (const PetscInt **)&ii, (const PetscInt **)&jj, &done));
4761           PetscCheck(done, PETSC_COMM_SELF, PETSC_ERR_PLIB, "GetRowIJ failed");
4762           PetscCall(MatSeqAIJGetArray(A_RVT, &aa));
4763           PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, n_vertices, lda_rhs, ii, jj, aa, &tA_RVT));
4764           PetscCall(MatRestoreRowIJ(A_RVT, 0, PETSC_FALSE, PETSC_FALSE, &r, (const PetscInt **)&ii, (const PetscInt **)&jj, &done));
4765           PetscCheck(done, PETSC_COMM_SELF, PETSC_ERR_PLIB, "RestoreRowIJ failed");
4766         } else {
4767           PetscCall(PetscObjectReference((PetscObject)A_RVT));
4768           tA_RVT = A_RVT;
4769         }
4770         PetscCall(MatCreateTranspose(tA_RVT, &Brhs));
4771         PetscCall(MatDestroy(&tA_RVT));
4772         PetscCall(MatDestroy(&A_RVT));
4773       }
4774       if (F) {
4775         /* need to correct the rhs */
4776         if (need_benign_correction) {
4777           PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
4778           PetscScalar       *marr;
4779 
4780           PetscCall(MatDenseGetArray(Brhs, &marr));
4781           if (lda_rhs != n_R) {
4782             for (i = 0; i < n_eff_vertices; i++) {
4783               PetscCall(VecPlaceArray(dummy_vec, marr + i * lda_rhs));
4784               PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver, dummy_vec, NULL, PETSC_FALSE, PETSC_TRUE));
4785               PetscCall(VecResetArray(dummy_vec));
4786             }
4787           } else {
4788             for (i = 0; i < n_eff_vertices; i++) {
4789               PetscCall(VecPlaceArray(pcbddc->vec1_R, marr + i * lda_rhs));
4790               PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver, pcbddc->vec1_R, NULL, PETSC_FALSE, PETSC_TRUE));
4791               PetscCall(VecResetArray(pcbddc->vec1_R));
4792             }
4793           }
4794           PetscCall(MatDenseRestoreArray(Brhs, &marr));
4795         }
4796         PetscCall(MatMatSolve(F, Brhs, A_RRmA_RV));
4797         if (restoreavr) PetscCall(MatScale(A_VR, -1.0));
4798         /* need to correct the solution */
4799         if (need_benign_correction) {
4800           PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
4801           PetscScalar       *marr;
4802 
4803           PetscCall(MatDenseGetArray(A_RRmA_RV, &marr));
4804           if (lda_rhs != n_R) {
4805             for (i = 0; i < n_eff_vertices; i++) {
4806               PetscCall(VecPlaceArray(dummy_vec, marr + i * lda_rhs));
4807               PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver, dummy_vec, NULL, PETSC_TRUE, PETSC_TRUE));
4808               PetscCall(VecResetArray(dummy_vec));
4809             }
4810           } else {
4811             for (i = 0; i < n_eff_vertices; i++) {
4812               PetscCall(VecPlaceArray(pcbddc->vec1_R, marr + i * lda_rhs));
4813               PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver, pcbddc->vec1_R, NULL, PETSC_TRUE, PETSC_TRUE));
4814               PetscCall(VecResetArray(pcbddc->vec1_R));
4815             }
4816           }
4817           PetscCall(MatDenseRestoreArray(A_RRmA_RV, &marr));
4818         }
4819       } else {
4820         const PetscScalar *barr;
4821         PetscScalar       *marr;
4822 
4823         PetscCall(MatDenseGetArrayRead(Brhs, &barr));
4824         PetscCall(MatDenseGetArray(A_RRmA_RV, &marr));
4825         for (i = 0; i < n_eff_vertices; i++) {
4826           PetscCall(VecPlaceArray(pcbddc->vec1_R, barr + i * lda_rhs));
4827           PetscCall(VecPlaceArray(pcbddc->vec2_R, marr + i * lda_rhs));
4828           PetscCall(KSPSolve(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec2_R));
4829           PetscCall(KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec2_R));
4830           PetscCall(VecResetArray(pcbddc->vec1_R));
4831           PetscCall(VecResetArray(pcbddc->vec2_R));
4832         }
4833         PetscCall(MatDenseRestoreArrayRead(Brhs, &barr));
4834         PetscCall(MatDenseRestoreArray(A_RRmA_RV, &marr));
4835       }
4836       PetscCall(MatDestroy(&A_RV));
4837       PetscCall(MatDestroy(&Brhs));
4838       /* S_VV and S_CV */
4839       if (n_constraints) {
4840         Mat B;
4841 
4842         PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_B, n_eff_vertices, NULL, &B));
4843         PetscCall(MatDenseScatter(A_RRmA_RV, pcbddc->R_to_B, B));
4844 
4845         /* S_CV = pcbddc->local_auxmat1 * B */
4846         if (multi_element) {
4847           Mat T;
4848 
4849           PetscCall(MatCreateSeqAIJFromDenseExpand(B, n_vertices, B_eff_V_J, &T));
4850           PetscCall(MatDestroy(&B));
4851           B = T;
4852         }
4853         PetscCall(MatProductCreate(pcbddc->local_auxmat1, B, NULL, &S_CV));
4854         PetscCall(MatProductSetType(S_CV, MATPRODUCT_AB));
4855         PetscCall(MatProductSetFromOptions(S_CV));
4856         PetscCall(MatProductSymbolic(S_CV));
4857         PetscCall(MatProductNumeric(S_CV));
4858         PetscCall(MatProductClear(S_CV));
4859         PetscCall(MatDestroy(&B));
4860 
4861         /* B = local_auxmat2_R * S_CV */
4862         PetscCall(MatProductCreate(local_auxmat2_R, S_CV, NULL, &B));
4863         PetscCall(MatProductSetType(B, MATPRODUCT_AB));
4864         PetscCall(MatProductSetFromOptions(B));
4865         PetscCall(MatProductSymbolic(B));
4866         PetscCall(MatProductNumeric(B));
4867 
4868         PetscCall(MatScale(S_CV, m_one));
4869         PetscCall(MatSetValuesSubMat(*coarse_submat, S_CV, n_constraints, idx_C, n_vertices, idx_V, INSERT_VALUES));
4870 
4871         if (multi_element) {
4872           Mat T;
4873 
4874           PetscCall(MatCreateSeqAIJFromDenseExpand(A_RRmA_RV, n_vertices, R_eff_V_J, &T));
4875           PetscCall(MatDestroy(&A_RRmA_RV));
4876           A_RRmA_RV = T;
4877         }
4878         PetscCall(MatAXPY(A_RRmA_RV, 1.0, B, UNKNOWN_NONZERO_PATTERN)); /* XXX ? */
4879         PetscCall(MatDestroy(&B));
4880       } else if (multi_element) {
4881         Mat T;
4882 
4883         PetscCall(MatCreateSeqAIJFromDenseExpand(A_RRmA_RV, n_vertices, R_eff_V_J, &T));
4884         PetscCall(MatDestroy(&A_RRmA_RV));
4885         A_RRmA_RV = T;
4886       }
4887 
4888       if (lda_rhs != n_R) {
4889         Mat T;
4890 
4891         PetscCall(MatCreateSubMatrix(A_RRmA_RV, is_R, NULL, MAT_INITIAL_MATRIX, &T));
4892         PetscCall(MatDestroy(&A_RRmA_RV));
4893         A_RRmA_RV = T;
4894       }
4895 
4896       /* need A_VR * \Phi * A_RRmA_RV = A_VR * (I+L)^T * A_RRmA_RV, L given as before */
4897       if (need_benign_correction) { /* XXX SPARSE */
4898         PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
4899         PetscScalar       *sums;
4900         const PetscScalar *marr;
4901 
4902         PetscCall(MatDenseGetArrayRead(A_RRmA_RV, &marr));
4903         PetscCall(PetscMalloc1(n_vertices, &sums));
4904         for (i = 0; i < reuse_solver->benign_n; i++) {
4905           const PetscScalar *vals;
4906           const PetscInt    *idxs, *idxs_zero;
4907           PetscInt           n, j, nz;
4908 
4909           PetscCall(ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i], &nz));
4910           PetscCall(ISGetIndices(reuse_solver->benign_zerodiag_subs[i], &idxs_zero));
4911           for (j = 0; j < n_vertices; j++) {
4912             sums[j] = 0.;
4913             for (PetscInt k = 0; k < nz; k++) sums[j] += marr[idxs_zero[k] + j * n_R];
4914           }
4915           PetscCall(MatGetRow(A_RV_bcorr, i, &n, &idxs, &vals));
4916           for (j = 0; j < n; j++) {
4917             PetscScalar val = vals[j];
4918             for (PetscInt k = 0; k < n_vertices; k++) PetscCall(MatSetValue(*coarse_submat, idx_V[idxs[j]], idx_V[k], val * sums[k], ADD_VALUES));
4919           }
4920           PetscCall(MatRestoreRow(A_RV_bcorr, i, &n, &idxs, &vals));
4921           PetscCall(ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i], &idxs_zero));
4922         }
4923         PetscCall(PetscFree(sums));
4924         PetscCall(MatDestroy(&A_RV_bcorr));
4925         PetscCall(MatDenseRestoreArrayRead(A_RRmA_RV, &marr));
4926       }
4927 
4928       PetscCall(MatMatMult(A_VR, A_RRmA_RV, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &S_VV));
4929       PetscCall(MatSetValuesSubMat(*coarse_submat, S_VV, n_vertices, idx_V, n_vertices, idx_V, ADD_VALUES));
4930       PetscCall(MatDestroy(&S_VV));
4931     }
4932 
4933     /* coarse basis functions */
4934     if (coarse_phi_multi) {
4935       Mat Vid;
4936 
4937       PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, n_vertices, n_vertices, 1, NULL, &Vid));
4938       PetscCall(MatShift_Basic(Vid, 1.0));
4939       PetscCall(MatNestSetSubMat(coarse_phi_multi, 0, 0, A_RRmA_RV));
4940       PetscCall(MatNestSetSubMat(coarse_phi_multi, 1, 0, Vid));
4941       PetscCall(MatDestroy(&Vid));
4942     } else {
4943       if (A_RRmA_RV) {
4944         PetscCall(MatDenseScatter(A_RRmA_RV, pcbddc->R_to_B, pcbddc->coarse_phi_B));
4945         if (pcbddc->switch_static || pcbddc->dbg_flag) {
4946           PetscCall(MatDenseScatter(A_RRmA_RV, pcbddc->R_to_D, pcbddc->coarse_phi_D));
4947           if (pcbddc->benign_n) {
4948             for (i = 0; i < n_vertices; i++) PetscCall(MatSetValues(pcbddc->coarse_phi_D, pcbddc->benign_n, p0_lidx_I, 1, &i, NULL, INSERT_VALUES));
4949             PetscCall(MatAssemblyBegin(pcbddc->coarse_phi_D, MAT_FINAL_ASSEMBLY));
4950             PetscCall(MatAssemblyEnd(pcbddc->coarse_phi_D, MAT_FINAL_ASSEMBLY));
4951           }
4952         }
4953       }
4954       for (i = 0; i < n_vertices; i++) PetscCall(MatSetValues(pcbddc->coarse_phi_B, 1, &idx_V_B[i], 1, &i, &one, INSERT_VALUES));
4955       PetscCall(MatAssemblyBegin(pcbddc->coarse_phi_B, MAT_FINAL_ASSEMBLY));
4956       PetscCall(MatAssemblyEnd(pcbddc->coarse_phi_B, MAT_FINAL_ASSEMBLY));
4957     }
4958     PetscCall(MatDestroy(&A_RRmA_RV));
4959   }
4960   PetscCall(MatDestroy(&A_RV));
4961   PetscCall(VecDestroy(&dummy_vec));
4962 
4963   if (n_constraints) {
4964     Mat B, B2;
4965 
4966     PetscCall(MatScale(S_CC, m_one));
4967     PetscCall(MatProductCreate(local_auxmat2_R, S_CC, NULL, &B));
4968     PetscCall(MatProductSetType(B, MATPRODUCT_AB));
4969     PetscCall(MatProductSetFromOptions(B));
4970     PetscCall(MatProductSymbolic(B));
4971     PetscCall(MatProductNumeric(B));
4972 
4973     if (n_vertices) {
4974       if (isCHOL || need_benign_correction) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */
4975         PetscCall(MatTranspose(S_CV, MAT_INITIAL_MATRIX, &S_VC));
4976       } else {
4977         if (lda_rhs != n_R) {
4978           Mat tB;
4979 
4980           PetscCall(MatCreateSubMatrix(B, is_R, NULL, MAT_INITIAL_MATRIX, &tB));
4981           PetscCall(MatDestroy(&B));
4982           B = tB;
4983         }
4984         PetscCall(MatMatMult(A_VR, B, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &S_VC));
4985       }
4986       PetscCall(MatSetValuesSubMat(*coarse_submat, S_VC, n_vertices, idx_V, n_constraints, idx_C, INSERT_VALUES));
4987     }
4988 
4989     /* coarse basis functions */
4990     if (coarse_phi_multi) {
4991       PetscCall(MatNestSetSubMat(coarse_phi_multi, 0, 1, B));
4992     } else {
4993       PetscCall(MatDenseGetSubMatrix(pcbddc->coarse_phi_B, PETSC_DECIDE, PETSC_DECIDE, n_vertices, n_vertices + n_constraints, &B2));
4994       PetscCall(MatDenseScatter(B, pcbddc->R_to_B, B2));
4995       PetscCall(MatDenseRestoreSubMatrix(pcbddc->coarse_phi_B, &B2));
4996       if (pcbddc->switch_static || pcbddc->dbg_flag) {
4997         PetscCall(MatDenseGetSubMatrix(pcbddc->coarse_phi_D, PETSC_DECIDE, PETSC_DECIDE, n_vertices, n_vertices + n_constraints, &B2));
4998         PetscCall(MatDenseScatter(B, pcbddc->R_to_D, B2));
4999         if (pcbddc->benign_n) {
5000           for (i = 0; i < n_constraints; i++) PetscCall(MatSetValues(B2, pcbddc->benign_n, p0_lidx_I, 1, &i, NULL, INSERT_VALUES));
5001         }
5002         PetscCall(MatDenseRestoreSubMatrix(pcbddc->coarse_phi_D, &B2));
5003       }
5004     }
5005     PetscCall(MatDestroy(&B));
5006   }
5007 
5008   /* assemble sparse coarse basis functions */
5009   if (coarse_phi_multi) {
5010     Mat T;
5011 
5012     PetscCall(MatConvert(coarse_phi_multi, MATSEQAIJ, MAT_INITIAL_MATRIX, &T));
5013     PetscCall(MatDestroy(&coarse_phi_multi));
5014     PetscCall(MatCreateSubMatrix(T, pcis->is_B_local, NULL, MAT_INITIAL_MATRIX, &pcbddc->coarse_phi_B));
5015     if (pcbddc->switch_static || pcbddc->dbg_flag) PetscCall(MatCreateSubMatrix(T, pcis->is_I_local, NULL, MAT_INITIAL_MATRIX, &pcbddc->coarse_phi_D));
5016     PetscCall(MatDestroy(&T));
5017   }
5018   PetscCall(MatDestroy(&local_auxmat2_R));
5019   PetscCall(PetscFree(p0_lidx_I));
5020 
5021   /* coarse matrix entries relative to B_0 */
5022   if (pcbddc->benign_n) {
5023     Mat                B0_B, B0_BPHI;
5024     IS                 is_dummy;
5025     const PetscScalar *data;
5026     PetscInt           j;
5027 
5028     PetscCall(ISCreateStride(PETSC_COMM_SELF, pcbddc->benign_n, 0, 1, &is_dummy));
5029     PetscCall(MatCreateSubMatrix(pcbddc->benign_B0, is_dummy, pcis->is_B_local, MAT_INITIAL_MATRIX, &B0_B));
5030     PetscCall(ISDestroy(&is_dummy));
5031     PetscCall(MatMatMult(B0_B, pcbddc->coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &B0_BPHI));
5032     PetscCall(MatConvert(B0_BPHI, MATSEQDENSE, MAT_INPLACE_MATRIX, &B0_BPHI));
5033     PetscCall(MatDenseGetArrayRead(B0_BPHI, &data));
5034     for (j = 0; j < pcbddc->benign_n; j++) {
5035       PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j;
5036       for (i = 0; i < pcbddc->local_primal_size; i++) {
5037         PetscCall(MatSetValue(*coarse_submat, primal_idx, i, data[i * pcbddc->benign_n + j], INSERT_VALUES));
5038         PetscCall(MatSetValue(*coarse_submat, i, primal_idx, data[i * pcbddc->benign_n + j], INSERT_VALUES));
5039       }
5040     }
5041     PetscCall(MatDenseRestoreArrayRead(B0_BPHI, &data));
5042     PetscCall(MatDestroy(&B0_B));
5043     PetscCall(MatDestroy(&B0_BPHI));
5044   }
5045 
5046   /* compute other basis functions for non-symmetric problems */
5047   if (!pcbddc->symmetric_primal) {
5048     Mat          B_V = NULL, B_C = NULL;
5049     PetscScalar *marray, *work;
5050 
5051     /* TODO multi_element MatDenseScatter */
5052     if (n_constraints) {
5053       Mat S_CCT, C_CRT;
5054 
5055       PetscCall(MatScale(S_CC, m_one));
5056       PetscCall(MatTranspose(C_CR, MAT_INITIAL_MATRIX, &C_CRT));
5057       PetscCall(MatTranspose(S_CC, MAT_INITIAL_MATRIX, &S_CCT));
5058       PetscCall(MatMatMult(C_CRT, S_CCT, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &B_C));
5059       PetscCall(MatConvert(B_C, MATDENSE, MAT_INPLACE_MATRIX, &B_C));
5060       PetscCall(MatDestroy(&S_CCT));
5061       if (n_vertices) {
5062         Mat S_VCT;
5063 
5064         PetscCall(MatTranspose(S_VC, MAT_INITIAL_MATRIX, &S_VCT));
5065         PetscCall(MatMatMult(C_CRT, S_VCT, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &B_V));
5066         PetscCall(MatDestroy(&S_VCT));
5067         PetscCall(MatConvert(B_V, MATDENSE, MAT_INPLACE_MATRIX, &B_V));
5068       }
5069       PetscCall(MatDestroy(&C_CRT));
5070     } else {
5071       PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_R, n_vertices, NULL, &B_V));
5072     }
5073     if (n_vertices && n_R) {
5074       PetscScalar    *av, *marray;
5075       const PetscInt *xadj, *adjncy;
5076       PetscInt        n;
5077       PetscBool       flg_row;
5078 
5079       /* B_V = B_V - A_VR^T */
5080       PetscCall(MatConvert(A_VR, MATSEQAIJ, MAT_INPLACE_MATRIX, &A_VR));
5081       PetscCall(MatGetRowIJ(A_VR, 0, PETSC_FALSE, PETSC_FALSE, &n, &xadj, &adjncy, &flg_row));
5082       PetscCall(MatSeqAIJGetArray(A_VR, &av));
5083       PetscCall(MatDenseGetArray(B_V, &marray));
5084       for (i = 0; i < n; i++) {
5085         PetscInt j;
5086         for (j = xadj[i]; j < xadj[i + 1]; j++) marray[i * n_R + adjncy[j]] -= av[j];
5087       }
5088       PetscCall(MatDenseRestoreArray(B_V, &marray));
5089       PetscCall(MatRestoreRowIJ(A_VR, 0, PETSC_FALSE, PETSC_FALSE, &n, &xadj, &adjncy, &flg_row));
5090       PetscCall(MatDestroy(&A_VR));
5091     }
5092 
5093     /* currently there's no support for MatTransposeMatSolve(F,B,X) */
5094     PetscCall(PetscMalloc1(n_R * pcbddc->local_primal_size, &work));
5095     if (n_vertices) {
5096       PetscCall(MatDenseGetArray(B_V, &marray));
5097       for (i = 0; i < n_vertices; i++) {
5098         PetscCall(VecPlaceArray(pcbddc->vec1_R, marray + i * n_R));
5099         PetscCall(VecPlaceArray(pcbddc->vec2_R, work + i * n_R));
5100         PetscCall(KSPSolveTranspose(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec2_R));
5101         PetscCall(KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec2_R));
5102         PetscCall(VecResetArray(pcbddc->vec1_R));
5103         PetscCall(VecResetArray(pcbddc->vec2_R));
5104       }
5105       PetscCall(MatDenseRestoreArray(B_V, &marray));
5106     }
5107     if (B_C) {
5108       PetscCall(MatDenseGetArray(B_C, &marray));
5109       for (i = n_vertices; i < n_constraints + n_vertices; i++) {
5110         PetscCall(VecPlaceArray(pcbddc->vec1_R, marray + (i - n_vertices) * n_R));
5111         PetscCall(VecPlaceArray(pcbddc->vec2_R, work + i * n_R));
5112         PetscCall(KSPSolveTranspose(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec2_R));
5113         PetscCall(KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec2_R));
5114         PetscCall(VecResetArray(pcbddc->vec1_R));
5115         PetscCall(VecResetArray(pcbddc->vec2_R));
5116       }
5117       PetscCall(MatDenseRestoreArray(B_C, &marray));
5118     }
5119     /* coarse basis functions */
5120     PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_B, pcbddc->local_primal_size, NULL, &pcbddc->coarse_psi_B));
5121     if (pcbddc->switch_static || pcbddc->dbg_flag) PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_D, pcbddc->local_primal_size, NULL, &pcbddc->coarse_psi_D));
5122     for (i = 0; i < pcbddc->local_primal_size; i++) {
5123       Vec v;
5124 
5125       PetscCall(VecPlaceArray(pcbddc->vec1_R, work + i * n_R));
5126       PetscCall(MatDenseGetColumnVec(pcbddc->coarse_psi_B, i, &v));
5127       PetscCall(VecScatterBegin(pcbddc->R_to_B, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD));
5128       PetscCall(VecScatterEnd(pcbddc->R_to_B, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD));
5129       if (i < n_vertices) {
5130         PetscScalar one = 1.0;
5131         PetscCall(VecSetValues(v, 1, &idx_V_B[i], &one, INSERT_VALUES));
5132         PetscCall(VecAssemblyBegin(v));
5133         PetscCall(VecAssemblyEnd(v));
5134       }
5135       PetscCall(MatDenseRestoreColumnVec(pcbddc->coarse_psi_B, i, &v));
5136 
5137       if (pcbddc->switch_static || pcbddc->dbg_flag) {
5138         PetscCall(MatDenseGetColumnVec(pcbddc->coarse_psi_D, i, &v));
5139         PetscCall(VecScatterBegin(pcbddc->R_to_D, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD));
5140         PetscCall(VecScatterEnd(pcbddc->R_to_D, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD));
5141         PetscCall(MatDenseRestoreColumnVec(pcbddc->coarse_psi_D, i, &v));
5142       }
5143       PetscCall(VecResetArray(pcbddc->vec1_R));
5144     }
5145     PetscCall(MatDestroy(&B_V));
5146     PetscCall(MatDestroy(&B_C));
5147     PetscCall(PetscFree(work));
5148   } else {
5149     PetscCall(PetscObjectReference((PetscObject)pcbddc->coarse_phi_B));
5150     pcbddc->coarse_psi_B = pcbddc->coarse_phi_B;
5151     PetscCall(PetscObjectReference((PetscObject)pcbddc->coarse_phi_D));
5152     pcbddc->coarse_psi_D = pcbddc->coarse_phi_D;
5153   }
5154   PetscCall(MatAssemblyBegin(*coarse_submat, MAT_FINAL_ASSEMBLY));
5155   PetscCall(MatAssemblyEnd(*coarse_submat, MAT_FINAL_ASSEMBLY));
5156 
5157   /* free memory */
5158   PetscCall(PetscFree(V_to_eff_V));
5159   PetscCall(PetscFree(C_to_eff_C));
5160   PetscCall(PetscFree(R_eff_V_J));
5161   PetscCall(PetscFree(R_eff_C_J));
5162   PetscCall(PetscFree(B_eff_V_J));
5163   PetscCall(PetscFree(B_eff_C_J));
5164   PetscCall(ISDestroy(&is_R));
5165   PetscCall(ISRestoreIndices(is_V, &idx_V));
5166   PetscCall(ISRestoreIndices(is_C, &idx_C));
5167   PetscCall(ISDestroy(&is_V));
5168   PetscCall(ISDestroy(&is_C));
5169   PetscCall(PetscFree(idx_V_B));
5170   PetscCall(MatDestroy(&S_CV));
5171   PetscCall(MatDestroy(&S_VC));
5172   PetscCall(MatDestroy(&S_CC));
5173   if (n_vertices) PetscCall(MatDestroy(&A_VR));
5174   if (n_constraints) PetscCall(MatDestroy(&C_CR));
5175   PetscCall(PetscLogEventEnd(PC_BDDC_CorrectionSetUp[pcbddc->current_level], pc, 0, 0, 0));
5176 
5177   /* Checking coarse_sub_mat and coarse basis functions */
5178   /* Symmetric case     : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
5179   /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
5180   if (pcbddc->dbg_flag) {
5181     Mat       AUXMAT, TM1, TM2, TM3, TM4;
5182     Mat       coarse_phi_D, coarse_phi_B;
5183     Mat       coarse_psi_D, coarse_psi_B;
5184     Mat       A_II, A_BB, A_IB, A_BI;
5185     Mat       C_B, CPHI;
5186     IS        is_dummy;
5187     Vec       mones;
5188     MatType   checkmattype = MATSEQAIJ;
5189     PetscReal real_value;
5190 
5191     if (pcbddc->benign_n && !pcbddc->benign_change_explicit) {
5192       Mat A;
5193       PetscCall(PCBDDCBenignProject(pc, NULL, NULL, &A));
5194       PetscCall(MatCreateSubMatrix(A, pcis->is_I_local, pcis->is_I_local, MAT_INITIAL_MATRIX, &A_II));
5195       PetscCall(MatCreateSubMatrix(A, pcis->is_I_local, pcis->is_B_local, MAT_INITIAL_MATRIX, &A_IB));
5196       PetscCall(MatCreateSubMatrix(A, pcis->is_B_local, pcis->is_I_local, MAT_INITIAL_MATRIX, &A_BI));
5197       PetscCall(MatCreateSubMatrix(A, pcis->is_B_local, pcis->is_B_local, MAT_INITIAL_MATRIX, &A_BB));
5198       PetscCall(MatDestroy(&A));
5199     } else {
5200       PetscCall(MatConvert(pcis->A_II, checkmattype, MAT_INITIAL_MATRIX, &A_II));
5201       PetscCall(MatConvert(pcis->A_IB, checkmattype, MAT_INITIAL_MATRIX, &A_IB));
5202       PetscCall(MatConvert(pcis->A_BI, checkmattype, MAT_INITIAL_MATRIX, &A_BI));
5203       PetscCall(MatConvert(pcis->A_BB, checkmattype, MAT_INITIAL_MATRIX, &A_BB));
5204     }
5205     PetscCall(MatConvert(pcbddc->coarse_phi_D, checkmattype, MAT_INITIAL_MATRIX, &coarse_phi_D));
5206     PetscCall(MatConvert(pcbddc->coarse_phi_B, checkmattype, MAT_INITIAL_MATRIX, &coarse_phi_B));
5207     if (!pcbddc->symmetric_primal) {
5208       PetscCall(MatConvert(pcbddc->coarse_psi_D, checkmattype, MAT_INITIAL_MATRIX, &coarse_psi_D));
5209       PetscCall(MatConvert(pcbddc->coarse_psi_B, checkmattype, MAT_INITIAL_MATRIX, &coarse_psi_B));
5210     }
5211     PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n"));
5212     PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Check coarse sub mat computation (symmetric %d)\n", pcbddc->symmetric_primal));
5213     PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
5214     if (!pcbddc->symmetric_primal) {
5215       PetscCall(MatMatMult(A_II, coarse_phi_D, MAT_INITIAL_MATRIX, 1.0, &AUXMAT));
5216       PetscCall(MatTransposeMatMult(coarse_psi_D, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM1));
5217       PetscCall(MatDestroy(&AUXMAT));
5218       PetscCall(MatMatMult(A_BB, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &AUXMAT));
5219       PetscCall(MatTransposeMatMult(coarse_psi_B, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM2));
5220       PetscCall(MatDestroy(&AUXMAT));
5221       PetscCall(MatMatMult(A_IB, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &AUXMAT));
5222       PetscCall(MatTransposeMatMult(coarse_psi_D, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM3));
5223       PetscCall(MatDestroy(&AUXMAT));
5224       PetscCall(MatMatMult(A_BI, coarse_phi_D, MAT_INITIAL_MATRIX, 1.0, &AUXMAT));
5225       PetscCall(MatTransposeMatMult(coarse_psi_B, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM4));
5226       PetscCall(MatDestroy(&AUXMAT));
5227     } else {
5228       PetscCall(MatPtAP(A_II, coarse_phi_D, MAT_INITIAL_MATRIX, 1.0, &TM1));
5229       PetscCall(MatPtAP(A_BB, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &TM2));
5230       PetscCall(MatMatMult(A_IB, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &AUXMAT));
5231       PetscCall(MatTransposeMatMult(coarse_phi_D, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM3));
5232       PetscCall(MatDestroy(&AUXMAT));
5233       PetscCall(MatMatMult(A_BI, coarse_phi_D, MAT_INITIAL_MATRIX, 1.0, &AUXMAT));
5234       PetscCall(MatTransposeMatMult(coarse_phi_B, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM4));
5235       PetscCall(MatDestroy(&AUXMAT));
5236     }
5237     PetscCall(MatAXPY(TM1, one, TM2, DIFFERENT_NONZERO_PATTERN));
5238     PetscCall(MatAXPY(TM1, one, TM3, DIFFERENT_NONZERO_PATTERN));
5239     PetscCall(MatAXPY(TM1, one, TM4, DIFFERENT_NONZERO_PATTERN));
5240     PetscCall(MatConvert(TM1, MATSEQDENSE, MAT_INPLACE_MATRIX, &TM1));
5241     if (pcbddc->benign_n) {
5242       Mat                B0_B, B0_BPHI;
5243       const PetscScalar *data2;
5244       PetscScalar       *data;
5245       PetscInt           j;
5246 
5247       PetscCall(ISCreateStride(PETSC_COMM_SELF, pcbddc->benign_n, 0, 1, &is_dummy));
5248       PetscCall(MatCreateSubMatrix(pcbddc->benign_B0, is_dummy, pcis->is_B_local, MAT_INITIAL_MATRIX, &B0_B));
5249       PetscCall(MatMatMult(B0_B, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &B0_BPHI));
5250       PetscCall(MatConvert(B0_BPHI, MATSEQDENSE, MAT_INPLACE_MATRIX, &B0_BPHI));
5251       PetscCall(MatDenseGetArray(TM1, &data));
5252       PetscCall(MatDenseGetArrayRead(B0_BPHI, &data2));
5253       for (j = 0; j < pcbddc->benign_n; j++) {
5254         PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j;
5255         for (i = 0; i < pcbddc->local_primal_size; i++) {
5256           data[primal_idx * pcbddc->local_primal_size + i] += data2[i * pcbddc->benign_n + j];
5257           data[i * pcbddc->local_primal_size + primal_idx] += data2[i * pcbddc->benign_n + j];
5258         }
5259       }
5260       PetscCall(MatDenseRestoreArray(TM1, &data));
5261       PetscCall(MatDenseRestoreArrayRead(B0_BPHI, &data2));
5262       PetscCall(MatDestroy(&B0_B));
5263       PetscCall(ISDestroy(&is_dummy));
5264       PetscCall(MatDestroy(&B0_BPHI));
5265     }
5266     PetscCall(MatAXPY(TM1, m_one, *coarse_submat, DIFFERENT_NONZERO_PATTERN));
5267     PetscCall(MatNorm(TM1, NORM_FROBENIUS, &real_value));
5268     PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer));
5269     PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d          matrix error % 1.14e\n", PetscGlobalRank, (double)real_value));
5270 
5271     /* check constraints */
5272     PetscCall(ISCreateStride(PETSC_COMM_SELF, pcbddc->local_primal_size - pcbddc->benign_n, 0, 1, &is_dummy));
5273     PetscCall(MatCreateSubMatrix(pcbddc->ConstraintMatrix, is_dummy, pcis->is_B_local, MAT_INITIAL_MATRIX, &C_B));
5274     if (!pcbddc->benign_n) { /* TODO: add benign case */
5275       PetscCall(MatMatMult(C_B, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &CPHI));
5276     } else {
5277       PetscScalar *data;
5278       Mat          tmat;
5279       PetscCall(MatDenseGetArray(pcbddc->coarse_phi_B, &data));
5280       PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, pcis->n_B, pcbddc->local_primal_size - pcbddc->benign_n, data, &tmat));
5281       PetscCall(MatDenseRestoreArray(pcbddc->coarse_phi_B, &data));
5282       PetscCall(MatMatMult(C_B, tmat, MAT_INITIAL_MATRIX, 1.0, &CPHI));
5283       PetscCall(MatDestroy(&tmat));
5284     }
5285     PetscCall(MatCreateVecs(CPHI, &mones, NULL));
5286     PetscCall(VecSet(mones, -1.0));
5287     PetscCall(MatDiagonalSet(CPHI, mones, ADD_VALUES));
5288     PetscCall(MatNorm(CPHI, NORM_FROBENIUS, &real_value));
5289     PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d phi constraints error % 1.14e\n", PetscGlobalRank, (double)real_value));
5290     if (!pcbddc->symmetric_primal) {
5291       PetscCall(MatMatMult(C_B, coarse_psi_B, MAT_REUSE_MATRIX, 1.0, &CPHI));
5292       PetscCall(VecSet(mones, -1.0));
5293       PetscCall(MatDiagonalSet(CPHI, mones, ADD_VALUES));
5294       PetscCall(MatNorm(CPHI, NORM_FROBENIUS, &real_value));
5295       PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d psi constraints error % 1.14e\n", PetscGlobalRank, (double)real_value));
5296     }
5297     PetscCall(MatDestroy(&C_B));
5298     PetscCall(MatDestroy(&CPHI));
5299     PetscCall(ISDestroy(&is_dummy));
5300     PetscCall(VecDestroy(&mones));
5301     PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
5302     PetscCall(MatDestroy(&A_II));
5303     PetscCall(MatDestroy(&A_BB));
5304     PetscCall(MatDestroy(&A_IB));
5305     PetscCall(MatDestroy(&A_BI));
5306     PetscCall(MatDestroy(&TM1));
5307     PetscCall(MatDestroy(&TM2));
5308     PetscCall(MatDestroy(&TM3));
5309     PetscCall(MatDestroy(&TM4));
5310     PetscCall(MatDestroy(&coarse_phi_D));
5311     PetscCall(MatDestroy(&coarse_phi_B));
5312     if (!pcbddc->symmetric_primal) {
5313       PetscCall(MatDestroy(&coarse_psi_D));
5314       PetscCall(MatDestroy(&coarse_psi_B));
5315     }
5316   }
5317 
5318 #if 0
5319   {
5320     PetscViewer viewer;
5321     char filename[256];
5322 
5323     PetscCall(PetscSNPrintf(filename, PETSC_STATIC_ARRAY_LENGTH(filename), "details_local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level));
5324     PetscCall(PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer));
5325     PetscCall(PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB));
5326     PetscCall(PetscObjectSetName((PetscObject)*coarse_submat,"coarse submat"));
5327     PetscCall(MatView(*coarse_submat,viewer));
5328     if (pcbddc->coarse_phi_B) {
5329       PetscCall(PetscObjectSetName((PetscObject)pcbddc->coarse_phi_B,"phi_B"));
5330       PetscCall(MatView(pcbddc->coarse_phi_B,viewer));
5331     }
5332     if (pcbddc->coarse_phi_D) {
5333       PetscCall(PetscObjectSetName((PetscObject)pcbddc->coarse_phi_D,"phi_D"));
5334       PetscCall(MatView(pcbddc->coarse_phi_D,viewer));
5335     }
5336     if (pcbddc->coarse_psi_B) {
5337       PetscCall(PetscObjectSetName((PetscObject)pcbddc->coarse_psi_B,"psi_B"));
5338       PetscCall(MatView(pcbddc->coarse_psi_B,viewer));
5339     }
5340     if (pcbddc->coarse_psi_D) {
5341       PetscCall(PetscObjectSetName((PetscObject)pcbddc->coarse_psi_D,"psi_D"));
5342       PetscCall(MatView(pcbddc->coarse_psi_D,viewer));
5343     }
5344     PetscCall(PetscObjectSetName((PetscObject)pcbddc->local_mat,"A"));
5345     PetscCall(MatView(pcbddc->local_mat,viewer));
5346     PetscCall(PetscObjectSetName((PetscObject)pcbddc->ConstraintMatrix,"C"));
5347     PetscCall(MatView(pcbddc->ConstraintMatrix,viewer));
5348     PetscCall(PetscObjectSetName((PetscObject)pcis->is_I_local,"I"));
5349     PetscCall(ISView(pcis->is_I_local,viewer));
5350     PetscCall(PetscObjectSetName((PetscObject)pcis->is_B_local,"B"));
5351     PetscCall(ISView(pcis->is_B_local,viewer));
5352     PetscCall(PetscObjectSetName((PetscObject)pcbddc->is_R_local,"R"));
5353     PetscCall(ISView(pcbddc->is_R_local,viewer));
5354     PetscCall(PetscViewerDestroy(&viewer));
5355   }
5356 #endif
5357 
5358   /* device support */
5359   {
5360     PetscBool iscuda, iship, iskokkos;
5361     MatType   mtype = NULL;
5362 
5363     PetscCall(PetscObjectTypeCompareAny((PetscObject)pcis->vec1_N, &iscuda, VECCUDA, VECMPICUDA, VECSEQCUDA, ""));
5364     PetscCall(PetscObjectTypeCompareAny((PetscObject)pcis->vec1_N, &iship, VECHIP, VECMPIHIP, VECSEQHIP, ""));
5365     PetscCall(PetscObjectTypeCompareAny((PetscObject)pcis->vec1_N, &iskokkos, VECKOKKOS, VECMPIKOKKOS, VECSEQKOKKOS, ""));
5366     if (iskokkos) {
5367       if (PetscDefined(HAVE_MACRO_KOKKOS_ENABLE_CUDA)) iscuda = PETSC_TRUE;
5368       else if (PetscDefined(HAVE_MACRO_KOKKOS_ENABLE_HIP)) iship = PETSC_TRUE;
5369     }
5370     if (iskokkos) mtype = multi_element ? MATSEQAIJKOKKOS : (iscuda ? MATSEQDENSECUDA : MATSEQDENSEHIP);
5371     else if (iship) mtype = multi_element ? MATSEQAIJHIPSPARSE : MATSEQDENSEHIP;
5372     else if (iscuda) mtype = multi_element ? MATSEQAIJCUSPARSE : MATSEQDENSECUDA;
5373     if (mtype) {
5374       if (pcbddc->local_auxmat1) PetscCall(MatConvert(pcbddc->local_auxmat1, mtype, MAT_INPLACE_MATRIX, &pcbddc->local_auxmat1));
5375       if (pcbddc->local_auxmat2) PetscCall(MatConvert(pcbddc->local_auxmat2, mtype, MAT_INPLACE_MATRIX, &pcbddc->local_auxmat2));
5376       if (pcbddc->coarse_phi_B) PetscCall(MatConvert(pcbddc->coarse_phi_B, mtype, MAT_INPLACE_MATRIX, &pcbddc->coarse_phi_B));
5377       if (pcbddc->coarse_phi_D) PetscCall(MatConvert(pcbddc->coarse_phi_D, mtype, MAT_INPLACE_MATRIX, &pcbddc->coarse_phi_D));
5378       if (pcbddc->coarse_psi_B) PetscCall(MatConvert(pcbddc->coarse_psi_B, mtype, MAT_INPLACE_MATRIX, &pcbddc->coarse_psi_B));
5379       if (pcbddc->coarse_psi_D) PetscCall(MatConvert(pcbddc->coarse_psi_D, mtype, MAT_INPLACE_MATRIX, &pcbddc->coarse_psi_D));
5380     }
5381   }
5382   PetscFunctionReturn(PETSC_SUCCESS);
5383 }
5384 
5385 PetscErrorCode MatCreateSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat *B)
5386 {
5387   Mat      *work_mat;
5388   IS        isrow_s, iscol_s;
5389   PetscBool rsorted, csorted;
5390   PetscInt  rsize, *idxs_perm_r = NULL, csize, *idxs_perm_c = NULL;
5391 
5392   PetscFunctionBegin;
5393   PetscCall(ISSorted(isrow, &rsorted));
5394   PetscCall(ISSorted(iscol, &csorted));
5395   PetscCall(ISGetLocalSize(isrow, &rsize));
5396   PetscCall(ISGetLocalSize(iscol, &csize));
5397 
5398   if (!rsorted) {
5399     const PetscInt *idxs;
5400     PetscInt       *idxs_sorted, i;
5401 
5402     PetscCall(PetscMalloc1(rsize, &idxs_perm_r));
5403     PetscCall(PetscMalloc1(rsize, &idxs_sorted));
5404     for (i = 0; i < rsize; i++) idxs_perm_r[i] = i;
5405     PetscCall(ISGetIndices(isrow, &idxs));
5406     PetscCall(PetscSortIntWithPermutation(rsize, idxs, idxs_perm_r));
5407     for (i = 0; i < rsize; i++) idxs_sorted[i] = idxs[idxs_perm_r[i]];
5408     PetscCall(ISRestoreIndices(isrow, &idxs));
5409     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, rsize, idxs_sorted, PETSC_OWN_POINTER, &isrow_s));
5410   } else {
5411     PetscCall(PetscObjectReference((PetscObject)isrow));
5412     isrow_s = isrow;
5413   }
5414 
5415   if (!csorted) {
5416     if (isrow == iscol) {
5417       PetscCall(PetscObjectReference((PetscObject)isrow_s));
5418       iscol_s = isrow_s;
5419     } else {
5420       const PetscInt *idxs;
5421       PetscInt       *idxs_sorted, i;
5422 
5423       PetscCall(PetscMalloc1(csize, &idxs_perm_c));
5424       PetscCall(PetscMalloc1(csize, &idxs_sorted));
5425       for (i = 0; i < csize; i++) idxs_perm_c[i] = i;
5426       PetscCall(ISGetIndices(iscol, &idxs));
5427       PetscCall(PetscSortIntWithPermutation(csize, idxs, idxs_perm_c));
5428       for (i = 0; i < csize; i++) idxs_sorted[i] = idxs[idxs_perm_c[i]];
5429       PetscCall(ISRestoreIndices(iscol, &idxs));
5430       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, csize, idxs_sorted, PETSC_OWN_POINTER, &iscol_s));
5431     }
5432   } else {
5433     PetscCall(PetscObjectReference((PetscObject)iscol));
5434     iscol_s = iscol;
5435   }
5436 
5437   PetscCall(MatCreateSubMatrices(A, 1, &isrow_s, &iscol_s, MAT_INITIAL_MATRIX, &work_mat));
5438 
5439   if (!rsorted || !csorted) {
5440     Mat new_mat;
5441     IS  is_perm_r, is_perm_c;
5442 
5443     if (!rsorted) {
5444       PetscInt *idxs_r, i;
5445       PetscCall(PetscMalloc1(rsize, &idxs_r));
5446       for (i = 0; i < rsize; i++) idxs_r[idxs_perm_r[i]] = i;
5447       PetscCall(PetscFree(idxs_perm_r));
5448       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, rsize, idxs_r, PETSC_OWN_POINTER, &is_perm_r));
5449     } else {
5450       PetscCall(ISCreateStride(PETSC_COMM_SELF, rsize, 0, 1, &is_perm_r));
5451     }
5452     PetscCall(ISSetPermutation(is_perm_r));
5453 
5454     if (!csorted) {
5455       if (isrow_s == iscol_s) {
5456         PetscCall(PetscObjectReference((PetscObject)is_perm_r));
5457         is_perm_c = is_perm_r;
5458       } else {
5459         PetscInt *idxs_c, i;
5460         PetscCheck(idxs_perm_c, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Permutation array not present");
5461         PetscCall(PetscMalloc1(csize, &idxs_c));
5462         for (i = 0; i < csize; i++) idxs_c[idxs_perm_c[i]] = i;
5463         PetscCall(PetscFree(idxs_perm_c));
5464         PetscCall(ISCreateGeneral(PETSC_COMM_SELF, csize, idxs_c, PETSC_OWN_POINTER, &is_perm_c));
5465       }
5466     } else {
5467       PetscCall(ISCreateStride(PETSC_COMM_SELF, csize, 0, 1, &is_perm_c));
5468     }
5469     PetscCall(ISSetPermutation(is_perm_c));
5470 
5471     PetscCall(MatPermute(work_mat[0], is_perm_r, is_perm_c, &new_mat));
5472     PetscCall(MatDestroy(&work_mat[0]));
5473     work_mat[0] = new_mat;
5474     PetscCall(ISDestroy(&is_perm_r));
5475     PetscCall(ISDestroy(&is_perm_c));
5476   }
5477 
5478   PetscCall(PetscObjectReference((PetscObject)work_mat[0]));
5479   *B = work_mat[0];
5480   PetscCall(MatDestroyMatrices(1, &work_mat));
5481   PetscCall(ISDestroy(&isrow_s));
5482   PetscCall(ISDestroy(&iscol_s));
5483   PetscFunctionReturn(PETSC_SUCCESS);
5484 }
5485 
5486 static PetscErrorCode MatPtAPWithPrefix_Private(Mat A, Mat P, PetscReal fill, const char *prefix, Mat *C)
5487 {
5488   PetscFunctionBegin;
5489   PetscCall(MatProductCreate(A, P, NULL, C));
5490   PetscCall(MatProductSetType(*C, MATPRODUCT_PtAP));
5491   PetscCall(MatProductSetAlgorithm(*C, "default"));
5492   PetscCall(MatProductSetFill(*C, fill));
5493   PetscCall(MatSetOptionsPrefix(*C, prefix));
5494   PetscCall(MatProductSetFromOptions(*C));
5495   PetscCall(MatProductSymbolic(*C));
5496   PetscCall(MatProductNumeric(*C));
5497   (*C)->symmetric = A->symmetric;
5498   (*C)->spd       = A->spd;
5499   PetscFunctionReturn(PETSC_SUCCESS);
5500 }
5501 
5502 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix)
5503 {
5504   Mat_IS   *matis  = (Mat_IS *)pc->pmat->data;
5505   PC_BDDC  *pcbddc = (PC_BDDC *)pc->data;
5506   Mat       new_mat, lA;
5507   IS        is_local, is_global;
5508   PetscInt  local_size;
5509   PetscBool isseqaij, issym, isset;
5510   char      ptapprefix[256];
5511 
5512   PetscFunctionBegin;
5513   PetscCall(MatDestroy(&pcbddc->local_mat));
5514   PetscCall(MatGetSize(matis->A, &local_size, NULL));
5515   if (pcbddc->mat_graph->multi_element) {
5516     Mat     *mats, *bdiags;
5517     IS      *gsubs;
5518     PetscInt nsubs = pcbddc->n_local_subs;
5519 
5520     PetscCall(PetscCalloc1(nsubs * nsubs, &mats));
5521 #if 1
5522     PetscCall(PetscMalloc1(nsubs, &gsubs));
5523     for (PetscInt i = 0; i < nsubs; i++) PetscCall(ISLocalToGlobalMappingApplyIS(matis->rmapping, pcbddc->local_subs[i], &gsubs[i]));
5524     PetscCall(MatCreateSubMatrices(ChangeOfBasisMatrix, nsubs, gsubs, gsubs, MAT_INITIAL_MATRIX, &bdiags));
5525     for (PetscInt i = 0; i < nsubs; i++) PetscCall(ISDestroy(&gsubs[i]));
5526     PetscCall(PetscFree(gsubs));
5527 #else /* this does not work since MatCreateSubMatrices does not support repeated indices */
5528     Mat *tmats;
5529     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)matis->A), local_size, 0, 1, &is_local));
5530     PetscCall(ISLocalToGlobalMappingApplyIS(matis->rmapping, is_local, &is_global));
5531     PetscCall(ISDestroy(&is_local));
5532     PetscCall(MatSetOption(ChangeOfBasisMatrix, MAT_SUBMAT_SINGLEIS, PETSC_TRUE));
5533     PetscCall(MatCreateSubMatrices(ChangeOfBasisMatrix, 1, &is_global, &is_global, MAT_INITIAL_MATRIX, &tmats));
5534     PetscCall(ISDestroy(&is_global));
5535     PetscCall(MatCreateSubMatrices(tmats[0], nsubs, pcbddc->local_subs, pcbddc->local_subs, MAT_INITIAL_MATRIX, &bdiags));
5536     PetscCall(MatDestroySubMatrices(1, &tmats));
5537 #endif
5538     for (PetscInt i = 0; i < nsubs; i++) mats[i * (1 + nsubs)] = bdiags[i];
5539     PetscCall(MatCreateNest(PETSC_COMM_SELF, nsubs, pcbddc->local_subs, nsubs, pcbddc->local_subs, mats, &new_mat));
5540     PetscCall(MatConvert(new_mat, MATSEQAIJ, MAT_INPLACE_MATRIX, &new_mat));
5541     PetscCall(MatDestroySubMatrices(nsubs, &bdiags));
5542     PetscCall(PetscFree(mats));
5543   } else {
5544     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)matis->A), local_size, 0, 1, &is_local));
5545     PetscCall(ISLocalToGlobalMappingApplyIS(matis->rmapping, is_local, &is_global));
5546     PetscCall(ISDestroy(&is_local));
5547     PetscCall(MatCreateSubMatrixUnsorted(ChangeOfBasisMatrix, is_global, is_global, &new_mat));
5548     PetscCall(ISDestroy(&is_global));
5549   }
5550   if (pcbddc->dbg_flag) {
5551     Vec       x, x_change;
5552     PetscReal error;
5553 
5554     PetscCall(MatCreateVecs(ChangeOfBasisMatrix, &x, &x_change));
5555     PetscCall(VecSetRandom(x, NULL));
5556     PetscCall(MatMult(ChangeOfBasisMatrix, x, x_change));
5557     PetscCall(VecScatterBegin(matis->cctx, x, matis->x, INSERT_VALUES, SCATTER_FORWARD));
5558     PetscCall(VecScatterEnd(matis->cctx, x, matis->x, INSERT_VALUES, SCATTER_FORWARD));
5559     PetscCall(MatMult(new_mat, matis->x, matis->y));
5560     if (!pcbddc->change_interior) {
5561       const PetscScalar *x, *y, *v;
5562       PetscReal          lerror = 0.;
5563       PetscInt           i;
5564 
5565       PetscCall(VecGetArrayRead(matis->x, &x));
5566       PetscCall(VecGetArrayRead(matis->y, &y));
5567       PetscCall(VecGetArrayRead(matis->counter, &v));
5568       for (i = 0; i < local_size; i++)
5569         if (PetscRealPart(v[i]) < 1.5 && PetscAbsScalar(x[i] - y[i]) > lerror) lerror = PetscAbsScalar(x[i] - y[i]);
5570       PetscCall(VecRestoreArrayRead(matis->x, &x));
5571       PetscCall(VecRestoreArrayRead(matis->y, &y));
5572       PetscCall(VecRestoreArrayRead(matis->counter, &v));
5573       PetscCallMPI(MPIU_Allreduce(&lerror, &error, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)pc)));
5574       if (error > PETSC_SMALL) {
5575         if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) {
5576           SETERRQ(PetscObjectComm((PetscObject)pc), PETSC_ERR_PLIB, "Error global vs local change on I: %1.6e", (double)error);
5577         } else {
5578           SETERRQ(PetscObjectComm((PetscObject)pc), PETSC_ERR_USER, "Error global vs local change on I: %1.6e", (double)error);
5579         }
5580       }
5581     }
5582     PetscCall(VecScatterBegin(matis->rctx, matis->y, x, INSERT_VALUES, SCATTER_REVERSE));
5583     PetscCall(VecScatterEnd(matis->rctx, matis->y, x, INSERT_VALUES, SCATTER_REVERSE));
5584     PetscCall(VecAXPY(x, -1.0, x_change));
5585     PetscCall(VecNorm(x, NORM_INFINITY, &error));
5586     if (error > PETSC_SMALL) {
5587       if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) {
5588         SETERRQ(PetscObjectComm((PetscObject)pc), PETSC_ERR_PLIB, "Error global vs local change on N: %1.6e", (double)error);
5589       } else {
5590         SETERRQ(PetscObjectComm((PetscObject)pc), PETSC_ERR_USER, "Error global vs local change on N: %1.6e", (double)error);
5591       }
5592     }
5593     PetscCall(VecDestroy(&x));
5594     PetscCall(VecDestroy(&x_change));
5595   }
5596 
5597   /* lA is present if we are setting up an inner BDDC for a saddle point FETI-DP */
5598   PetscCall(PetscObjectQuery((PetscObject)pc, "__KSPFETIDP_lA", (PetscObject *)&lA));
5599 
5600   /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */
5601   if (((PetscObject)pc)->prefix) PetscCall(PetscSNPrintf(ptapprefix, sizeof(ptapprefix), "%spc_bddc_change_", ((PetscObject)pc)->prefix));
5602   else PetscCall(PetscSNPrintf(ptapprefix, sizeof(ptapprefix), "pc_bddc_change_"));
5603   PetscCall(PetscObjectBaseTypeCompare((PetscObject)matis->A, MATSEQAIJ, &isseqaij));
5604   if (isseqaij) {
5605     PetscCall(MatDestroy(&pcbddc->local_mat));
5606     PetscCall(MatPtAPWithPrefix_Private(matis->A, new_mat, PETSC_DEFAULT, ptapprefix, &pcbddc->local_mat));
5607     if (lA) {
5608       Mat work;
5609       PetscCall(MatPtAPWithPrefix_Private(lA, new_mat, PETSC_DEFAULT, ptapprefix, &work));
5610       PetscCall(PetscObjectCompose((PetscObject)pc, "__KSPFETIDP_lA", (PetscObject)work));
5611       PetscCall(MatDestroy(&work));
5612     }
5613   } else {
5614     Mat work_mat;
5615 
5616     PetscCall(MatDestroy(&pcbddc->local_mat));
5617     PetscCall(MatConvert(matis->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &work_mat));
5618     PetscCall(MatPtAPWithPrefix_Private(work_mat, new_mat, PETSC_DEFAULT, ptapprefix, &pcbddc->local_mat));
5619     PetscCall(MatDestroy(&work_mat));
5620     if (lA) {
5621       Mat work;
5622       PetscCall(MatConvert(lA, MATSEQAIJ, MAT_INITIAL_MATRIX, &work_mat));
5623       PetscCall(MatPtAPWithPrefix_Private(work_mat, new_mat, PETSC_DEFAULT, ptapprefix, &work));
5624       PetscCall(PetscObjectCompose((PetscObject)pc, "__KSPFETIDP_lA", (PetscObject)work));
5625       PetscCall(MatDestroy(&work));
5626     }
5627   }
5628   PetscCall(MatIsSymmetricKnown(matis->A, &isset, &issym));
5629   if (isset) PetscCall(MatSetOption(pcbddc->local_mat, MAT_SYMMETRIC, issym));
5630   PetscCall(MatDestroy(&new_mat));
5631   PetscFunctionReturn(PETSC_SUCCESS);
5632 }
5633 
5634 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc)
5635 {
5636   PC_IS          *pcis        = (PC_IS *)pc->data;
5637   PC_BDDC        *pcbddc      = (PC_BDDC *)pc->data;
5638   PCBDDCSubSchurs sub_schurs  = pcbddc->sub_schurs;
5639   PetscInt       *idx_R_local = NULL;
5640   PetscInt        n_vertices, i, j, n_R, n_D, n_B;
5641   PetscInt        vbs, bs;
5642   PetscBT         bitmask = NULL;
5643 
5644   PetscFunctionBegin;
5645   /*
5646     No need to setup local scatters if
5647       - primal space is unchanged
5648         AND
5649       - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains)
5650         AND
5651       - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine
5652   */
5653   if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) PetscFunctionReturn(PETSC_SUCCESS);
5654   /* destroy old objects */
5655   PetscCall(ISDestroy(&pcbddc->is_R_local));
5656   PetscCall(VecScatterDestroy(&pcbddc->R_to_B));
5657   PetscCall(VecScatterDestroy(&pcbddc->R_to_D));
5658   /* Set Non-overlapping dimensions */
5659   n_B        = pcis->n_B;
5660   n_D        = pcis->n - n_B;
5661   n_vertices = pcbddc->n_vertices;
5662 
5663   /* Dohrmann's notation: dofs split in R (Remaining: all dofs but the vertices) and V (Vertices) */
5664 
5665   /* create auxiliary bitmask and allocate workspace */
5666   if (!sub_schurs || !sub_schurs->reuse_solver) {
5667     PetscCall(PetscMalloc1(pcis->n - n_vertices, &idx_R_local));
5668     PetscCall(PetscBTCreate(pcis->n, &bitmask));
5669     for (i = 0; i < n_vertices; i++) PetscCall(PetscBTSet(bitmask, pcbddc->local_primal_ref_node[i]));
5670 
5671     for (i = 0, n_R = 0; i < pcis->n; i++) {
5672       if (!PetscBTLookup(bitmask, i)) idx_R_local[n_R++] = i;
5673     }
5674   } else { /* A different ordering (already computed) is present if we are reusing the Schur solver */
5675     PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
5676 
5677     PetscCall(ISGetIndices(reuse_solver->is_R, (const PetscInt **)&idx_R_local));
5678     PetscCall(ISGetLocalSize(reuse_solver->is_R, &n_R));
5679   }
5680 
5681   /* Block code */
5682   vbs = 1;
5683   PetscCall(MatGetBlockSize(pcbddc->local_mat, &bs));
5684   if (bs > 1 && !(n_vertices % bs)) {
5685     PetscBool is_blocked = PETSC_TRUE;
5686     PetscInt *vary;
5687     if (!sub_schurs || !sub_schurs->reuse_solver) {
5688       PetscCall(PetscMalloc1(pcis->n / bs, &vary));
5689       PetscCall(PetscArrayzero(vary, pcis->n / bs));
5690       /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */
5691       /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */
5692       for (i = 0; i < n_vertices; i++) vary[pcbddc->local_primal_ref_node[i] / bs]++;
5693       for (i = 0; i < pcis->n / bs; i++) {
5694         if (vary[i] != 0 && vary[i] != bs) {
5695           is_blocked = PETSC_FALSE;
5696           break;
5697         }
5698       }
5699       PetscCall(PetscFree(vary));
5700     } else {
5701       /* Verify directly the R set */
5702       for (i = 0; i < n_R / bs; i++) {
5703         PetscInt j, node = idx_R_local[bs * i];
5704         for (j = 1; j < bs; j++) {
5705           if (node != idx_R_local[bs * i + j] - j) {
5706             is_blocked = PETSC_FALSE;
5707             break;
5708           }
5709         }
5710       }
5711     }
5712     if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */
5713       vbs = bs;
5714       for (i = 0; i < n_R / vbs; i++) idx_R_local[i] = idx_R_local[vbs * i] / vbs;
5715     }
5716   }
5717   PetscCall(ISCreateBlock(PETSC_COMM_SELF, vbs, n_R / vbs, idx_R_local, PETSC_COPY_VALUES, &pcbddc->is_R_local));
5718   if (sub_schurs && sub_schurs->reuse_solver) {
5719     PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
5720 
5721     PetscCall(ISRestoreIndices(reuse_solver->is_R, (const PetscInt **)&idx_R_local));
5722     PetscCall(ISDestroy(&reuse_solver->is_R));
5723     PetscCall(PetscObjectReference((PetscObject)pcbddc->is_R_local));
5724     reuse_solver->is_R = pcbddc->is_R_local;
5725   } else {
5726     PetscCall(PetscFree(idx_R_local));
5727   }
5728 
5729   /* print some info if requested */
5730   if (pcbddc->dbg_flag) {
5731     PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n"));
5732     PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
5733     PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer));
5734     PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d local dimensions\n", PetscGlobalRank));
5735     PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "local_size = %" PetscInt_FMT ", dirichlet_size = %" PetscInt_FMT ", boundary_size = %" PetscInt_FMT "\n", pcis->n, n_D, n_B));
5736     PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "r_size = %" PetscInt_FMT ", v_size = %" PetscInt_FMT ", constraints = %" PetscInt_FMT ", local_primal_size = %" PetscInt_FMT "\n", n_R, n_vertices,
5737                                                  pcbddc->local_primal_size - n_vertices - pcbddc->benign_n, pcbddc->local_primal_size));
5738     PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
5739   }
5740 
5741   /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */
5742   if (!sub_schurs || !sub_schurs->reuse_solver) {
5743     IS        is_aux1, is_aux2;
5744     PetscInt *aux_array1, *aux_array2, *is_indices, *idx_R_local;
5745 
5746     PetscCall(ISGetIndices(pcbddc->is_R_local, (const PetscInt **)&idx_R_local));
5747     PetscCall(PetscMalloc1(pcis->n_B - n_vertices, &aux_array1));
5748     PetscCall(PetscMalloc1(pcis->n_B - n_vertices, &aux_array2));
5749     PetscCall(ISGetIndices(pcis->is_I_local, (const PetscInt **)&is_indices));
5750     for (i = 0; i < n_D; i++) PetscCall(PetscBTSet(bitmask, is_indices[i]));
5751     PetscCall(ISRestoreIndices(pcis->is_I_local, (const PetscInt **)&is_indices));
5752     for (i = 0, j = 0; i < n_R; i++) {
5753       if (!PetscBTLookup(bitmask, idx_R_local[i])) aux_array1[j++] = i;
5754     }
5755     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, j, aux_array1, PETSC_OWN_POINTER, &is_aux1));
5756     PetscCall(ISGetIndices(pcis->is_B_local, (const PetscInt **)&is_indices));
5757     for (i = 0, j = 0; i < n_B; i++) {
5758       if (!PetscBTLookup(bitmask, is_indices[i])) aux_array2[j++] = i;
5759     }
5760     PetscCall(ISRestoreIndices(pcis->is_B_local, (const PetscInt **)&is_indices));
5761     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, j, aux_array2, PETSC_OWN_POINTER, &is_aux2));
5762     PetscCall(VecScatterCreate(pcbddc->vec1_R, is_aux1, pcis->vec1_B, is_aux2, &pcbddc->R_to_B));
5763     PetscCall(ISDestroy(&is_aux1));
5764     PetscCall(ISDestroy(&is_aux2));
5765 
5766     if (pcbddc->switch_static || pcbddc->dbg_flag) {
5767       PetscCall(PetscMalloc1(n_D, &aux_array1));
5768       for (i = 0, j = 0; i < n_R; i++) {
5769         if (PetscBTLookup(bitmask, idx_R_local[i])) aux_array1[j++] = i;
5770       }
5771       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, j, aux_array1, PETSC_OWN_POINTER, &is_aux1));
5772       PetscCall(VecScatterCreate(pcbddc->vec1_R, is_aux1, pcis->vec1_D, (IS)0, &pcbddc->R_to_D));
5773       PetscCall(ISDestroy(&is_aux1));
5774     }
5775     PetscCall(PetscBTDestroy(&bitmask));
5776     PetscCall(ISRestoreIndices(pcbddc->is_R_local, (const PetscInt **)&idx_R_local));
5777   } else {
5778     PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
5779     IS                 tis;
5780     PetscInt           schur_size;
5781 
5782     PetscCall(ISGetLocalSize(reuse_solver->is_B, &schur_size));
5783     PetscCall(ISCreateStride(PETSC_COMM_SELF, schur_size, n_D, 1, &tis));
5784     PetscCall(VecScatterCreate(pcbddc->vec1_R, tis, pcis->vec1_B, reuse_solver->is_B, &pcbddc->R_to_B));
5785     PetscCall(ISDestroy(&tis));
5786     if (pcbddc->switch_static || pcbddc->dbg_flag) {
5787       PetscCall(ISCreateStride(PETSC_COMM_SELF, n_D, 0, 1, &tis));
5788       PetscCall(VecScatterCreate(pcbddc->vec1_R, tis, pcis->vec1_D, (IS)0, &pcbddc->R_to_D));
5789       PetscCall(ISDestroy(&tis));
5790     }
5791   }
5792   PetscFunctionReturn(PETSC_SUCCESS);
5793 }
5794 
5795 PetscErrorCode MatNullSpacePropagateAny_Private(Mat A, IS is, Mat B)
5796 {
5797   MatNullSpace NullSpace;
5798   Mat          dmat;
5799   const Vec   *nullvecs;
5800   Vec          v, v2, *nullvecs2;
5801   VecScatter   sct = NULL;
5802   PetscScalar *ddata;
5803   PetscInt     k, nnsp_size, bsiz, bsiz2, n, N, bs;
5804   PetscBool    nnsp_has_cnst;
5805 
5806   PetscFunctionBegin;
5807   if (!is && !B) { /* MATIS */
5808     Mat_IS *matis = (Mat_IS *)A->data;
5809 
5810     if (!B) PetscCall(MatISGetLocalMat(A, &B));
5811     sct = matis->cctx;
5812     PetscCall(PetscObjectReference((PetscObject)sct));
5813   } else {
5814     PetscCall(MatGetNullSpace(B, &NullSpace));
5815     if (!NullSpace) PetscCall(MatGetNearNullSpace(B, &NullSpace));
5816     if (NullSpace) PetscFunctionReturn(PETSC_SUCCESS);
5817   }
5818   PetscCall(MatGetNullSpace(A, &NullSpace));
5819   if (!NullSpace) PetscCall(MatGetNearNullSpace(A, &NullSpace));
5820   if (!NullSpace) PetscFunctionReturn(PETSC_SUCCESS);
5821 
5822   PetscCall(MatCreateVecs(A, &v, NULL));
5823   PetscCall(MatCreateVecs(B, &v2, NULL));
5824   if (!sct) PetscCall(VecScatterCreate(v, is, v2, NULL, &sct));
5825   PetscCall(MatNullSpaceGetVecs(NullSpace, &nnsp_has_cnst, &nnsp_size, &nullvecs));
5826   bsiz = bsiz2 = nnsp_size + !!nnsp_has_cnst;
5827   PetscCall(PetscMalloc1(bsiz, &nullvecs2));
5828   PetscCall(VecGetBlockSize(v2, &bs));
5829   PetscCall(VecGetSize(v2, &N));
5830   PetscCall(VecGetLocalSize(v2, &n));
5831   PetscCall(PetscMalloc1(n * bsiz, &ddata));
5832   for (k = 0; k < nnsp_size; k++) {
5833     PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), bs, n, N, ddata + n * k, &nullvecs2[k]));
5834     PetscCall(VecScatterBegin(sct, nullvecs[k], nullvecs2[k], INSERT_VALUES, SCATTER_FORWARD));
5835     PetscCall(VecScatterEnd(sct, nullvecs[k], nullvecs2[k], INSERT_VALUES, SCATTER_FORWARD));
5836   }
5837   if (nnsp_has_cnst) {
5838     PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), bs, n, N, ddata + n * nnsp_size, &nullvecs2[nnsp_size]));
5839     PetscCall(VecSet(nullvecs2[nnsp_size], 1.0));
5840   }
5841   PetscCall(PCBDDCOrthonormalizeVecs(&bsiz2, nullvecs2));
5842   PetscCall(MatNullSpaceCreate(PetscObjectComm((PetscObject)B), PETSC_FALSE, bsiz2, nullvecs2, &NullSpace));
5843 
5844   PetscCall(MatCreateDense(PetscObjectComm((PetscObject)B), n, PETSC_DECIDE, N, bsiz2, ddata, &dmat));
5845   PetscCall(PetscObjectContainerCompose((PetscObject)dmat, "_PBDDC_Null_dmat_arr", ddata, PetscCtxDestroyDefault));
5846   PetscCall(PetscObjectCompose((PetscObject)NullSpace, "_PBDDC_Null_dmat", (PetscObject)dmat));
5847   PetscCall(MatDestroy(&dmat));
5848 
5849   for (k = 0; k < bsiz; k++) PetscCall(VecDestroy(&nullvecs2[k]));
5850   PetscCall(PetscFree(nullvecs2));
5851   PetscCall(MatSetNearNullSpace(B, NullSpace));
5852   PetscCall(MatNullSpaceDestroy(&NullSpace));
5853   PetscCall(VecDestroy(&v));
5854   PetscCall(VecDestroy(&v2));
5855   PetscCall(VecScatterDestroy(&sct));
5856   PetscFunctionReturn(PETSC_SUCCESS);
5857 }
5858 
5859 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann)
5860 {
5861   PC_BDDC     *pcbddc = (PC_BDDC *)pc->data;
5862   PC_IS       *pcis   = (PC_IS *)pc->data;
5863   PC           pc_temp;
5864   Mat          A_RR;
5865   MatNullSpace nnsp;
5866   MatReuse     reuse;
5867   PetscScalar  m_one = -1.0;
5868   PetscReal    value;
5869   PetscInt     n_D, n_R;
5870   PetscBool    issbaij, opts, isset, issym;
5871   PetscBool    f = PETSC_FALSE;
5872   char         dir_prefix[256], neu_prefix[256], str_level[16];
5873   size_t       len;
5874 
5875   PetscFunctionBegin;
5876   PetscCall(PetscLogEventBegin(PC_BDDC_LocalSolvers[pcbddc->current_level], pc, 0, 0, 0));
5877   /* approximate solver, propagate NearNullSpace if needed */
5878   if (!pc->setupcalled && (pcbddc->NullSpace_corr[0] || pcbddc->NullSpace_corr[2])) {
5879     MatNullSpace gnnsp1, gnnsp2;
5880     PetscBool    lhas, ghas;
5881 
5882     PetscCall(MatGetNearNullSpace(pcbddc->local_mat, &nnsp));
5883     PetscCall(MatGetNearNullSpace(pc->pmat, &gnnsp1));
5884     PetscCall(MatGetNullSpace(pc->pmat, &gnnsp2));
5885     lhas = nnsp ? PETSC_TRUE : PETSC_FALSE;
5886     PetscCallMPI(MPIU_Allreduce(&lhas, &ghas, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)pc)));
5887     if (!ghas && (gnnsp1 || gnnsp2)) PetscCall(MatNullSpacePropagateAny_Private(pc->pmat, NULL, NULL));
5888   }
5889 
5890   /* compute prefixes */
5891   PetscCall(PetscStrncpy(dir_prefix, "", sizeof(dir_prefix)));
5892   PetscCall(PetscStrncpy(neu_prefix, "", sizeof(neu_prefix)));
5893   if (!pcbddc->current_level) {
5894     PetscCall(PetscStrncpy(dir_prefix, ((PetscObject)pc)->prefix, sizeof(dir_prefix)));
5895     PetscCall(PetscStrncpy(neu_prefix, ((PetscObject)pc)->prefix, sizeof(neu_prefix)));
5896     PetscCall(PetscStrlcat(dir_prefix, "pc_bddc_dirichlet_", sizeof(dir_prefix)));
5897     PetscCall(PetscStrlcat(neu_prefix, "pc_bddc_neumann_", sizeof(neu_prefix)));
5898   } else {
5899     PetscCall(PetscSNPrintf(str_level, sizeof(str_level), "l%" PetscInt_FMT "_", pcbddc->current_level));
5900     PetscCall(PetscStrlen(((PetscObject)pc)->prefix, &len));
5901     len -= 15;                                /* remove "pc_bddc_coarse_" */
5902     if (pcbddc->current_level > 1) len -= 3;  /* remove "lX_" with X level number */
5903     if (pcbddc->current_level > 10) len -= 1; /* remove another char from level number */
5904     /* Nonstandard use of PetscStrncpy() to only copy a portion of the input string */
5905     PetscCall(PetscStrncpy(dir_prefix, ((PetscObject)pc)->prefix, len + 1));
5906     PetscCall(PetscStrncpy(neu_prefix, ((PetscObject)pc)->prefix, len + 1));
5907     PetscCall(PetscStrlcat(dir_prefix, "pc_bddc_dirichlet_", sizeof(dir_prefix)));
5908     PetscCall(PetscStrlcat(neu_prefix, "pc_bddc_neumann_", sizeof(neu_prefix)));
5909     PetscCall(PetscStrlcat(dir_prefix, str_level, sizeof(dir_prefix)));
5910     PetscCall(PetscStrlcat(neu_prefix, str_level, sizeof(neu_prefix)));
5911   }
5912 
5913   /* DIRICHLET PROBLEM */
5914   if (dirichlet) {
5915     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
5916     if (pcbddc->benign_n && !pcbddc->benign_change_explicit) {
5917       PetscCheck(sub_schurs && sub_schurs->reuse_solver, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not yet implemented");
5918       if (pcbddc->dbg_flag) {
5919         Mat A_IIn;
5920 
5921         PetscCall(PCBDDCBenignProject(pc, pcis->is_I_local, pcis->is_I_local, &A_IIn));
5922         PetscCall(MatDestroy(&pcis->A_II));
5923         pcis->A_II = A_IIn;
5924       }
5925     }
5926     PetscCall(MatIsSymmetricKnown(pcbddc->local_mat, &isset, &issym));
5927     if (isset) PetscCall(MatSetOption(pcis->A_II, MAT_SYMMETRIC, issym));
5928 
5929     /* Matrix for Dirichlet problem is pcis->A_II */
5930     n_D  = pcis->n - pcis->n_B;
5931     opts = PETSC_FALSE;
5932     if (!pcbddc->ksp_D) { /* create object if not yet build */
5933       opts = PETSC_TRUE;
5934       PetscCall(KSPCreate(PETSC_COMM_SELF, &pcbddc->ksp_D));
5935       PetscCall(KSPSetNestLevel(pcbddc->ksp_D, pc->kspnestlevel));
5936       PetscCall(PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D, (PetscObject)pc, 1));
5937       /* default */
5938       PetscCall(KSPSetType(pcbddc->ksp_D, KSPPREONLY));
5939       PetscCall(KSPSetOptionsPrefix(pcbddc->ksp_D, dir_prefix));
5940       PetscCall(PetscObjectTypeCompare((PetscObject)pcis->pA_II, MATSEQSBAIJ, &issbaij));
5941       PetscCall(KSPGetPC(pcbddc->ksp_D, &pc_temp));
5942       if (issbaij) {
5943         PetscCall(PCSetType(pc_temp, PCCHOLESKY));
5944       } else {
5945         PetscCall(PCSetType(pc_temp, PCLU));
5946       }
5947       PetscCall(KSPSetErrorIfNotConverged(pcbddc->ksp_D, pc->erroriffailure));
5948     }
5949     PetscCall(MatSetOptionsPrefix(pcis->pA_II, ((PetscObject)pcbddc->ksp_D)->prefix));
5950     PetscCall(MatViewFromOptions(pcis->pA_II, NULL, "-mat_view"));
5951     PetscCall(KSPSetOperators(pcbddc->ksp_D, pcis->A_II, pcis->pA_II));
5952     /* Allow user's customization */
5953     if (opts) PetscCall(KSPSetFromOptions(pcbddc->ksp_D));
5954     PetscCall(MatGetNearNullSpace(pcis->pA_II, &nnsp));
5955     if (pcbddc->NullSpace_corr[0] && !nnsp) { /* approximate solver, propagate NearNullSpace */
5956       PetscCall(MatNullSpacePropagateAny_Private(pcbddc->local_mat, pcis->is_I_local, pcis->pA_II));
5957     }
5958     PetscCall(MatGetNearNullSpace(pcis->pA_II, &nnsp));
5959     PetscCall(KSPGetPC(pcbddc->ksp_D, &pc_temp));
5960     PetscCall(PetscObjectHasFunction((PetscObject)pc_temp, "PCSetCoordinates_C", &f));
5961     if (f && pcbddc->mat_graph->cloc && !nnsp) {
5962       PetscReal      *coords = pcbddc->mat_graph->coords, *scoords;
5963       const PetscInt *idxs;
5964       PetscInt        cdim = pcbddc->mat_graph->cdim, nl, i, d;
5965 
5966       PetscCall(ISGetLocalSize(pcis->is_I_local, &nl));
5967       PetscCall(ISGetIndices(pcis->is_I_local, &idxs));
5968       PetscCall(PetscMalloc1(nl * cdim, &scoords));
5969       for (i = 0; i < nl; i++) {
5970         for (d = 0; d < cdim; d++) scoords[i * cdim + d] = coords[idxs[i] * cdim + d];
5971       }
5972       PetscCall(ISRestoreIndices(pcis->is_I_local, &idxs));
5973       PetscCall(PCSetCoordinates(pc_temp, cdim, nl, scoords));
5974       PetscCall(PetscFree(scoords));
5975     }
5976     if (sub_schurs && sub_schurs->reuse_solver) {
5977       PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
5978 
5979       PetscCall(KSPSetPC(pcbddc->ksp_D, reuse_solver->interior_solver));
5980     }
5981 
5982     /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
5983     if (!n_D) {
5984       PetscCall(KSPGetPC(pcbddc->ksp_D, &pc_temp));
5985       PetscCall(PCSetType(pc_temp, PCNONE));
5986     }
5987     PetscCall(KSPSetUp(pcbddc->ksp_D));
5988     /* set ksp_D into pcis data */
5989     PetscCall(PetscObjectReference((PetscObject)pcbddc->ksp_D));
5990     PetscCall(KSPDestroy(&pcis->ksp_D));
5991     pcis->ksp_D = pcbddc->ksp_D;
5992   }
5993 
5994   /* NEUMANN PROBLEM */
5995   A_RR = NULL;
5996   if (neumann) {
5997     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
5998     PetscInt        ibs, mbs;
5999     PetscBool       issbaij, reuse_neumann_solver, isset, issym;
6000     Mat_IS         *matis = (Mat_IS *)pc->pmat->data;
6001 
6002     reuse_neumann_solver = PETSC_FALSE;
6003     if (sub_schurs && sub_schurs->reuse_solver) {
6004       IS iP;
6005 
6006       reuse_neumann_solver = PETSC_TRUE;
6007       PetscCall(PetscObjectQuery((PetscObject)sub_schurs->A, "__KSPFETIDP_iP", (PetscObject *)&iP));
6008       if (iP) reuse_neumann_solver = PETSC_FALSE;
6009     }
6010     /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */
6011     PetscCall(ISGetSize(pcbddc->is_R_local, &n_R));
6012     if (pcbddc->ksp_R) { /* already created ksp */
6013       PetscInt nn_R;
6014       PetscCall(KSPGetOperators(pcbddc->ksp_R, NULL, &A_RR));
6015       PetscCall(PetscObjectReference((PetscObject)A_RR));
6016       PetscCall(MatGetSize(A_RR, &nn_R, NULL));
6017       if (nn_R != n_R) { /* old ksp is not reusable, so reset it */
6018         PetscCall(KSPReset(pcbddc->ksp_R));
6019         PetscCall(MatDestroy(&A_RR));
6020         reuse = MAT_INITIAL_MATRIX;
6021       } else {                                /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */
6022         if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */
6023           PetscCall(MatDestroy(&A_RR));
6024           reuse = MAT_INITIAL_MATRIX;
6025         } else { /* safe to reuse the matrix */
6026           reuse = MAT_REUSE_MATRIX;
6027         }
6028       }
6029       /* last check */
6030       if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
6031         PetscCall(MatDestroy(&A_RR));
6032         reuse = MAT_INITIAL_MATRIX;
6033       }
6034     } else { /* first time, so we need to create the matrix */
6035       reuse = MAT_INITIAL_MATRIX;
6036     }
6037     /* convert pcbddc->local_mat if needed later in PCBDDCSetUpCorrection
6038        TODO: Get Rid of these conversions */
6039     PetscCall(MatGetBlockSize(pcbddc->local_mat, &mbs));
6040     PetscCall(ISGetBlockSize(pcbddc->is_R_local, &ibs));
6041     PetscCall(PetscObjectTypeCompare((PetscObject)pcbddc->local_mat, MATSEQSBAIJ, &issbaij));
6042     if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */
6043       if (matis->A == pcbddc->local_mat) {
6044         PetscCall(MatDestroy(&pcbddc->local_mat));
6045         PetscCall(MatConvert(matis->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &pcbddc->local_mat));
6046       } else {
6047         PetscCall(MatConvert(pcbddc->local_mat, MATSEQAIJ, MAT_INPLACE_MATRIX, &pcbddc->local_mat));
6048       }
6049     } else if (issbaij) { /* need to convert to BAIJ to get off-diagonal blocks */
6050       if (matis->A == pcbddc->local_mat) {
6051         PetscCall(MatDestroy(&pcbddc->local_mat));
6052         PetscCall(MatConvert(matis->A, mbs > 1 ? MATSEQBAIJ : MATSEQAIJ, MAT_INITIAL_MATRIX, &pcbddc->local_mat));
6053       } else {
6054         PetscCall(MatConvert(pcbddc->local_mat, mbs > 1 ? MATSEQBAIJ : MATSEQAIJ, MAT_INPLACE_MATRIX, &pcbddc->local_mat));
6055       }
6056     }
6057     /* extract A_RR */
6058     if (reuse_neumann_solver) {
6059       PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
6060 
6061       if (pcbddc->dbg_flag) { /* we need A_RR to test the solver later */
6062         PetscCall(MatDestroy(&A_RR));
6063         if (reuse_solver->benign_n) { /* we are not using the explicit change of basis on the pressures */
6064           PetscCall(PCBDDCBenignProject(pc, pcbddc->is_R_local, pcbddc->is_R_local, &A_RR));
6065         } else {
6066           PetscCall(MatCreateSubMatrix(pcbddc->local_mat, pcbddc->is_R_local, pcbddc->is_R_local, MAT_INITIAL_MATRIX, &A_RR));
6067         }
6068       } else {
6069         PetscCall(MatDestroy(&A_RR));
6070         PetscCall(PCGetOperators(reuse_solver->correction_solver, &A_RR, NULL));
6071         PetscCall(PetscObjectReference((PetscObject)A_RR));
6072       }
6073     } else { /* we have to build the neumann solver, so we need to extract the relevant matrix */
6074       PetscCall(MatCreateSubMatrix(pcbddc->local_mat, pcbddc->is_R_local, pcbddc->is_R_local, reuse, &A_RR));
6075     }
6076     PetscCall(MatIsSymmetricKnown(pcbddc->local_mat, &isset, &issym));
6077     if (isset) PetscCall(MatSetOption(A_RR, MAT_SYMMETRIC, issym));
6078     opts = PETSC_FALSE;
6079     if (!pcbddc->ksp_R) { /* create object if not present */
6080       opts = PETSC_TRUE;
6081       PetscCall(KSPCreate(PETSC_COMM_SELF, &pcbddc->ksp_R));
6082       PetscCall(KSPSetNestLevel(pcbddc->ksp_R, pc->kspnestlevel));
6083       PetscCall(PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R, (PetscObject)pc, 1));
6084       /* default */
6085       PetscCall(KSPSetType(pcbddc->ksp_R, KSPPREONLY));
6086       PetscCall(KSPSetOptionsPrefix(pcbddc->ksp_R, neu_prefix));
6087       PetscCall(KSPGetPC(pcbddc->ksp_R, &pc_temp));
6088       PetscCall(PetscObjectTypeCompare((PetscObject)A_RR, MATSEQSBAIJ, &issbaij));
6089       if (issbaij) {
6090         PetscCall(PCSetType(pc_temp, PCCHOLESKY));
6091       } else {
6092         PetscCall(PCSetType(pc_temp, PCLU));
6093       }
6094       PetscCall(KSPSetErrorIfNotConverged(pcbddc->ksp_R, pc->erroriffailure));
6095     }
6096     PetscCall(MatSetOptionsPrefix(A_RR, ((PetscObject)pcbddc->ksp_R)->prefix));
6097     PetscCall(MatViewFromOptions(A_RR, NULL, "-mat_view"));
6098     PetscCall(KSPSetOperators(pcbddc->ksp_R, A_RR, A_RR));
6099     if (opts) { /* Allow user's customization once */
6100       PetscCall(KSPSetFromOptions(pcbddc->ksp_R));
6101     }
6102     PetscCall(MatGetNearNullSpace(A_RR, &nnsp));
6103     if (pcbddc->NullSpace_corr[2] && !nnsp) { /* approximate solver, propagate NearNullSpace */
6104       PetscCall(MatNullSpacePropagateAny_Private(pcbddc->local_mat, pcbddc->is_R_local, A_RR));
6105     }
6106     PetscCall(MatGetNearNullSpace(A_RR, &nnsp));
6107     PetscCall(KSPGetPC(pcbddc->ksp_R, &pc_temp));
6108     PetscCall(PetscObjectHasFunction((PetscObject)pc_temp, "PCSetCoordinates_C", &f));
6109     if (f && pcbddc->mat_graph->cloc && !nnsp) {
6110       PetscReal      *coords = pcbddc->mat_graph->coords, *scoords;
6111       const PetscInt *idxs;
6112       PetscInt        cdim = pcbddc->mat_graph->cdim, nl, i, d;
6113 
6114       PetscCall(ISGetLocalSize(pcbddc->is_R_local, &nl));
6115       PetscCall(ISGetIndices(pcbddc->is_R_local, &idxs));
6116       PetscCall(PetscMalloc1(nl * cdim, &scoords));
6117       for (i = 0; i < nl; i++) {
6118         for (d = 0; d < cdim; d++) scoords[i * cdim + d] = coords[idxs[i] * cdim + d];
6119       }
6120       PetscCall(ISRestoreIndices(pcbddc->is_R_local, &idxs));
6121       PetscCall(PCSetCoordinates(pc_temp, cdim, nl, scoords));
6122       PetscCall(PetscFree(scoords));
6123     }
6124 
6125     /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
6126     if (!n_R) {
6127       PetscCall(KSPGetPC(pcbddc->ksp_R, &pc_temp));
6128       PetscCall(PCSetType(pc_temp, PCNONE));
6129     }
6130     /* Reuse solver if it is present */
6131     if (reuse_neumann_solver) {
6132       PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
6133 
6134       PetscCall(KSPSetPC(pcbddc->ksp_R, reuse_solver->correction_solver));
6135     }
6136     PetscCall(KSPSetUp(pcbddc->ksp_R));
6137   }
6138 
6139   if (pcbddc->dbg_flag) {
6140     PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
6141     PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer));
6142     PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n"));
6143   }
6144   PetscCall(PetscLogEventEnd(PC_BDDC_LocalSolvers[pcbddc->current_level], pc, 0, 0, 0));
6145 
6146   /* adapt Dirichlet and Neumann solvers if a nullspace correction has been requested */
6147   if (pcbddc->NullSpace_corr[0]) PetscCall(PCBDDCSetUseExactDirichlet(pc, PETSC_FALSE));
6148   if (dirichlet && pcbddc->NullSpace_corr[0] && !pcbddc->switch_static) PetscCall(PCBDDCNullSpaceAssembleCorrection(pc, PETSC_TRUE, pcbddc->NullSpace_corr[1]));
6149   if (neumann && pcbddc->NullSpace_corr[2]) PetscCall(PCBDDCNullSpaceAssembleCorrection(pc, PETSC_FALSE, pcbddc->NullSpace_corr[3]));
6150   /* check Dirichlet and Neumann solvers */
6151   if (pcbddc->dbg_flag) {
6152     if (dirichlet) { /* Dirichlet */
6153       PetscCall(VecSetRandom(pcis->vec1_D, NULL));
6154       PetscCall(MatMult(pcis->A_II, pcis->vec1_D, pcis->vec2_D));
6155       PetscCall(KSPSolve(pcbddc->ksp_D, pcis->vec2_D, pcis->vec2_D));
6156       PetscCall(KSPCheckSolve(pcbddc->ksp_D, pc, pcis->vec2_D));
6157       PetscCall(VecAXPY(pcis->vec1_D, m_one, pcis->vec2_D));
6158       PetscCall(VecNorm(pcis->vec1_D, NORM_INFINITY, &value));
6159       PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n", PetscGlobalRank, ((PetscObject)pcbddc->ksp_D)->prefix, (double)value));
6160       PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
6161     }
6162     if (neumann) { /* Neumann */
6163       PetscCall(VecSetRandom(pcbddc->vec1_R, NULL));
6164       PetscCall(MatMult(A_RR, pcbddc->vec1_R, pcbddc->vec2_R));
6165       PetscCall(KSPSolve(pcbddc->ksp_R, pcbddc->vec2_R, pcbddc->vec2_R));
6166       PetscCall(KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec2_R));
6167       PetscCall(VecAXPY(pcbddc->vec1_R, m_one, pcbddc->vec2_R));
6168       PetscCall(VecNorm(pcbddc->vec1_R, NORM_INFINITY, &value));
6169       PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n", PetscGlobalRank, ((PetscObject)pcbddc->ksp_R)->prefix, (double)value));
6170       PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
6171     }
6172   }
6173   /* free Neumann problem's matrix */
6174   PetscCall(MatDestroy(&A_RR));
6175   PetscFunctionReturn(PETSC_SUCCESS);
6176 }
6177 
6178 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose)
6179 {
6180   PC_BDDC        *pcbddc       = (PC_BDDC *)pc->data;
6181   PCBDDCSubSchurs sub_schurs   = pcbddc->sub_schurs;
6182   PetscBool       reuse_solver = sub_schurs ? (sub_schurs->reuse_solver ? PETSC_TRUE : PETSC_FALSE) : PETSC_FALSE;
6183 
6184   PetscFunctionBegin;
6185   if (!reuse_solver) PetscCall(VecSet(pcbddc->vec1_R, 0.));
6186   if (!pcbddc->switch_static) {
6187     if (applytranspose && pcbddc->local_auxmat1) {
6188       PetscCall(MatMultTranspose(pcbddc->local_auxmat2, inout_B, pcbddc->vec1_C));
6189       PetscCall(MatMultTransposeAdd(pcbddc->local_auxmat1, pcbddc->vec1_C, inout_B, inout_B));
6190     }
6191     if (!reuse_solver) {
6192       PetscCall(VecScatterBegin(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE));
6193       PetscCall(VecScatterEnd(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE));
6194     } else {
6195       PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
6196 
6197       PetscCall(VecScatterBegin(reuse_solver->correction_scatter_B, inout_B, reuse_solver->rhs_B, INSERT_VALUES, SCATTER_FORWARD));
6198       PetscCall(VecScatterEnd(reuse_solver->correction_scatter_B, inout_B, reuse_solver->rhs_B, INSERT_VALUES, SCATTER_FORWARD));
6199     }
6200   } else {
6201     PetscCall(VecScatterBegin(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE));
6202     PetscCall(VecScatterEnd(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE));
6203     PetscCall(VecScatterBegin(pcbddc->R_to_D, inout_D, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE));
6204     PetscCall(VecScatterEnd(pcbddc->R_to_D, inout_D, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE));
6205     if (applytranspose && pcbddc->local_auxmat1) {
6206       PetscCall(MatMultTranspose(pcbddc->local_auxmat2, pcbddc->vec1_R, pcbddc->vec1_C));
6207       PetscCall(MatMultTransposeAdd(pcbddc->local_auxmat1, pcbddc->vec1_C, inout_B, inout_B));
6208       PetscCall(VecScatterBegin(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE));
6209       PetscCall(VecScatterEnd(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE));
6210     }
6211   }
6212   PetscCall(PetscLogEventBegin(PC_BDDC_Solves[pcbddc->current_level][1], pc, 0, 0, 0));
6213   if (!reuse_solver || pcbddc->switch_static) {
6214     if (applytranspose) {
6215       PetscCall(KSPSolveTranspose(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec1_R));
6216     } else {
6217       PetscCall(KSPSolve(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec1_R));
6218     }
6219     PetscCall(KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec1_R));
6220   } else {
6221     PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
6222 
6223     if (applytranspose) {
6224       PetscCall(MatFactorSolveSchurComplementTranspose(reuse_solver->F, reuse_solver->rhs_B, reuse_solver->sol_B));
6225     } else {
6226       PetscCall(MatFactorSolveSchurComplement(reuse_solver->F, reuse_solver->rhs_B, reuse_solver->sol_B));
6227     }
6228   }
6229   PetscCall(PetscLogEventEnd(PC_BDDC_Solves[pcbddc->current_level][1], pc, 0, 0, 0));
6230   PetscCall(VecSet(inout_B, 0.));
6231   if (!pcbddc->switch_static) {
6232     if (!reuse_solver) {
6233       PetscCall(VecScatterBegin(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD));
6234       PetscCall(VecScatterEnd(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD));
6235     } else {
6236       PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
6237 
6238       PetscCall(VecScatterBegin(reuse_solver->correction_scatter_B, reuse_solver->sol_B, inout_B, INSERT_VALUES, SCATTER_REVERSE));
6239       PetscCall(VecScatterEnd(reuse_solver->correction_scatter_B, reuse_solver->sol_B, inout_B, INSERT_VALUES, SCATTER_REVERSE));
6240     }
6241     if (!applytranspose && pcbddc->local_auxmat1) {
6242       PetscCall(MatMult(pcbddc->local_auxmat1, inout_B, pcbddc->vec1_C));
6243       PetscCall(MatMultAdd(pcbddc->local_auxmat2, pcbddc->vec1_C, inout_B, inout_B));
6244     }
6245   } else {
6246     PetscCall(VecScatterBegin(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD));
6247     PetscCall(VecScatterEnd(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD));
6248     PetscCall(VecScatterBegin(pcbddc->R_to_D, pcbddc->vec1_R, inout_D, INSERT_VALUES, SCATTER_FORWARD));
6249     PetscCall(VecScatterEnd(pcbddc->R_to_D, pcbddc->vec1_R, inout_D, INSERT_VALUES, SCATTER_FORWARD));
6250     if (!applytranspose && pcbddc->local_auxmat1) {
6251       PetscCall(MatMult(pcbddc->local_auxmat1, inout_B, pcbddc->vec1_C));
6252       PetscCall(MatMultAdd(pcbddc->local_auxmat2, pcbddc->vec1_C, pcbddc->vec1_R, pcbddc->vec1_R));
6253     }
6254     PetscCall(VecScatterBegin(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD));
6255     PetscCall(VecScatterEnd(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD));
6256     PetscCall(VecScatterBegin(pcbddc->R_to_D, pcbddc->vec1_R, inout_D, INSERT_VALUES, SCATTER_FORWARD));
6257     PetscCall(VecScatterEnd(pcbddc->R_to_D, pcbddc->vec1_R, inout_D, INSERT_VALUES, SCATTER_FORWARD));
6258   }
6259   PetscFunctionReturn(PETSC_SUCCESS);
6260 }
6261 
6262 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */
6263 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose)
6264 {
6265   PC_BDDC          *pcbddc = (PC_BDDC *)pc->data;
6266   PC_IS            *pcis   = (PC_IS *)pc->data;
6267   const PetscScalar zero   = 0.0;
6268 
6269   PetscFunctionBegin;
6270   /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */
6271   if (!pcbddc->benign_apply_coarse_only) {
6272     if (applytranspose) {
6273       PetscCall(MatMultTranspose(pcbddc->coarse_phi_B, pcis->vec1_B, pcbddc->vec1_P));
6274       if (pcbddc->switch_static) PetscCall(MatMultTransposeAdd(pcbddc->coarse_phi_D, pcis->vec1_D, pcbddc->vec1_P, pcbddc->vec1_P));
6275     } else {
6276       PetscCall(MatMultTranspose(pcbddc->coarse_psi_B, pcis->vec1_B, pcbddc->vec1_P));
6277       if (pcbddc->switch_static) PetscCall(MatMultTransposeAdd(pcbddc->coarse_psi_D, pcis->vec1_D, pcbddc->vec1_P, pcbddc->vec1_P));
6278     }
6279   } else {
6280     PetscCall(VecSet(pcbddc->vec1_P, zero));
6281   }
6282 
6283   /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */
6284   if (pcbddc->benign_n) {
6285     PetscScalar *array;
6286     PetscInt     j;
6287 
6288     PetscCall(VecGetArray(pcbddc->vec1_P, &array));
6289     for (j = 0; j < pcbddc->benign_n; j++) array[pcbddc->local_primal_size - pcbddc->benign_n + j] += pcbddc->benign_p0[j];
6290     PetscCall(VecRestoreArray(pcbddc->vec1_P, &array));
6291   }
6292 
6293   /* start communications from local primal nodes to rhs of coarse solver */
6294   PetscCall(VecSet(pcbddc->coarse_vec, zero));
6295   PetscCall(PCBDDCScatterCoarseDataBegin(pc, ADD_VALUES, SCATTER_FORWARD));
6296   PetscCall(PCBDDCScatterCoarseDataEnd(pc, ADD_VALUES, SCATTER_FORWARD));
6297 
6298   /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */
6299   PetscCall(PetscLogEventBegin(PC_BDDC_Solves[pcbddc->current_level][2], pc, 0, 0, 0));
6300   if (pcbddc->coarse_ksp) {
6301     Mat          coarse_mat;
6302     Vec          rhs, sol;
6303     MatNullSpace nullsp;
6304     PetscBool    isbddc = PETSC_FALSE;
6305 
6306     if (pcbddc->benign_have_null) {
6307       PC coarse_pc;
6308 
6309       PetscCall(KSPGetPC(pcbddc->coarse_ksp, &coarse_pc));
6310       PetscCall(PetscObjectTypeCompare((PetscObject)coarse_pc, PCBDDC, &isbddc));
6311       /* we need to propagate to coarser levels the need for a possible benign correction */
6312       if (isbddc && pcbddc->benign_apply_coarse_only && !pcbddc->benign_skip_correction) {
6313         PC_BDDC *coarsepcbddc                  = (PC_BDDC *)coarse_pc->data;
6314         coarsepcbddc->benign_skip_correction   = PETSC_FALSE;
6315         coarsepcbddc->benign_apply_coarse_only = PETSC_TRUE;
6316       }
6317     }
6318     PetscCall(KSPGetRhs(pcbddc->coarse_ksp, &rhs));
6319     PetscCall(KSPGetSolution(pcbddc->coarse_ksp, &sol));
6320     PetscCall(KSPGetOperators(pcbddc->coarse_ksp, &coarse_mat, NULL));
6321     if (applytranspose) {
6322       PetscCheck(!pcbddc->benign_apply_coarse_only, PetscObjectComm((PetscObject)pcbddc->coarse_ksp), PETSC_ERR_SUP, "Not yet implemented");
6323       PetscCall(KSPSolveTranspose(pcbddc->coarse_ksp, rhs, sol));
6324       PetscCall(KSPCheckSolve(pcbddc->coarse_ksp, pc, sol));
6325       PetscCall(MatGetTransposeNullSpace(coarse_mat, &nullsp));
6326       if (nullsp) PetscCall(MatNullSpaceRemove(nullsp, sol));
6327     } else {
6328       PetscCall(MatGetNullSpace(coarse_mat, &nullsp));
6329       if (pcbddc->benign_apply_coarse_only && isbddc) { /* need just to apply the coarse preconditioner during presolve */
6330         PC coarse_pc;
6331 
6332         if (nullsp) PetscCall(MatNullSpaceRemove(nullsp, rhs));
6333         PetscCall(KSPGetPC(pcbddc->coarse_ksp, &coarse_pc));
6334         PetscCall(PCPreSolve(coarse_pc, pcbddc->coarse_ksp));
6335         PetscCall(PCBDDCBenignRemoveInterior(coarse_pc, rhs, sol));
6336         PetscCall(PCPostSolve(coarse_pc, pcbddc->coarse_ksp));
6337       } else {
6338         PetscCall(KSPSolve(pcbddc->coarse_ksp, rhs, sol));
6339         PetscCall(KSPCheckSolve(pcbddc->coarse_ksp, pc, sol));
6340         if (nullsp) PetscCall(MatNullSpaceRemove(nullsp, sol));
6341       }
6342     }
6343     /* we don't need the benign correction at coarser levels anymore */
6344     if (pcbddc->benign_have_null && isbddc) {
6345       PC       coarse_pc;
6346       PC_BDDC *coarsepcbddc;
6347 
6348       PetscCall(KSPGetPC(pcbddc->coarse_ksp, &coarse_pc));
6349       coarsepcbddc                           = (PC_BDDC *)coarse_pc->data;
6350       coarsepcbddc->benign_skip_correction   = PETSC_TRUE;
6351       coarsepcbddc->benign_apply_coarse_only = PETSC_FALSE;
6352     }
6353   }
6354   PetscCall(PetscLogEventEnd(PC_BDDC_Solves[pcbddc->current_level][2], pc, 0, 0, 0));
6355 
6356   /* Local solution on R nodes */
6357   if (!pcbddc->benign_apply_coarse_only) PetscCall(PCBDDCSolveSubstructureCorrection(pc, pcis->vec1_B, pcis->vec1_D, applytranspose));
6358   /* communications from coarse sol to local primal nodes */
6359   PetscCall(PCBDDCScatterCoarseDataBegin(pc, INSERT_VALUES, SCATTER_REVERSE));
6360   PetscCall(PCBDDCScatterCoarseDataEnd(pc, INSERT_VALUES, SCATTER_REVERSE));
6361 
6362   /* Sum contributions from the two levels */
6363   if (!pcbddc->benign_apply_coarse_only) {
6364     if (applytranspose) {
6365       PetscCall(MatMultAdd(pcbddc->coarse_psi_B, pcbddc->vec1_P, pcis->vec1_B, pcis->vec1_B));
6366       if (pcbddc->switch_static) PetscCall(MatMultAdd(pcbddc->coarse_psi_D, pcbddc->vec1_P, pcis->vec1_D, pcis->vec1_D));
6367     } else {
6368       PetscCall(MatMultAdd(pcbddc->coarse_phi_B, pcbddc->vec1_P, pcis->vec1_B, pcis->vec1_B));
6369       if (pcbddc->switch_static) PetscCall(MatMultAdd(pcbddc->coarse_phi_D, pcbddc->vec1_P, pcis->vec1_D, pcis->vec1_D));
6370     }
6371     /* store p0 */
6372     if (pcbddc->benign_n) {
6373       PetscScalar *array;
6374       PetscInt     j;
6375 
6376       PetscCall(VecGetArray(pcbddc->vec1_P, &array));
6377       for (j = 0; j < pcbddc->benign_n; j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size - pcbddc->benign_n + j];
6378       PetscCall(VecRestoreArray(pcbddc->vec1_P, &array));
6379     }
6380   } else { /* expand the coarse solution */
6381     if (applytranspose) {
6382       PetscCall(MatMult(pcbddc->coarse_psi_B, pcbddc->vec1_P, pcis->vec1_B));
6383     } else {
6384       PetscCall(MatMult(pcbddc->coarse_phi_B, pcbddc->vec1_P, pcis->vec1_B));
6385     }
6386   }
6387   PetscFunctionReturn(PETSC_SUCCESS);
6388 }
6389 
6390 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc, InsertMode imode, ScatterMode smode)
6391 {
6392   PC_BDDC           *pcbddc = (PC_BDDC *)pc->data;
6393   Vec                from, to;
6394   const PetscScalar *array;
6395 
6396   PetscFunctionBegin;
6397   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
6398     from = pcbddc->coarse_vec;
6399     to   = pcbddc->vec1_P;
6400     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
6401       Vec tvec;
6402 
6403       PetscCall(KSPGetRhs(pcbddc->coarse_ksp, &tvec));
6404       PetscCall(VecResetArray(tvec));
6405       PetscCall(KSPGetSolution(pcbddc->coarse_ksp, &tvec));
6406       PetscCall(VecGetArrayRead(tvec, &array));
6407       PetscCall(VecPlaceArray(from, array));
6408       PetscCall(VecRestoreArrayRead(tvec, &array));
6409     }
6410   } else { /* from local to global -> put data in coarse right-hand side */
6411     from = pcbddc->vec1_P;
6412     to   = pcbddc->coarse_vec;
6413   }
6414   PetscCall(VecScatterBegin(pcbddc->coarse_loc_to_glob, from, to, imode, smode));
6415   PetscFunctionReturn(PETSC_SUCCESS);
6416 }
6417 
6418 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode)
6419 {
6420   PC_BDDC           *pcbddc = (PC_BDDC *)pc->data;
6421   Vec                from, to;
6422   const PetscScalar *array;
6423 
6424   PetscFunctionBegin;
6425   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
6426     from = pcbddc->coarse_vec;
6427     to   = pcbddc->vec1_P;
6428   } else { /* from local to global -> put data in coarse right-hand side */
6429     from = pcbddc->vec1_P;
6430     to   = pcbddc->coarse_vec;
6431   }
6432   PetscCall(VecScatterEnd(pcbddc->coarse_loc_to_glob, from, to, imode, smode));
6433   if (smode == SCATTER_FORWARD) {
6434     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
6435       Vec tvec;
6436 
6437       PetscCall(KSPGetRhs(pcbddc->coarse_ksp, &tvec));
6438       PetscCall(VecGetArrayRead(to, &array));
6439       PetscCall(VecPlaceArray(tvec, array));
6440       PetscCall(VecRestoreArrayRead(to, &array));
6441     }
6442   } else {
6443     if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */
6444       PetscCall(VecResetArray(from));
6445     }
6446   }
6447   PetscFunctionReturn(PETSC_SUCCESS);
6448 }
6449 
6450 PetscErrorCode PCBDDCConstraintsSetUp(PC pc)
6451 {
6452   PC_IS   *pcis   = (PC_IS *)pc->data;
6453   PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
6454   Mat_IS  *matis  = (Mat_IS *)pc->pmat->data;
6455   /* one and zero */
6456   PetscScalar one = 1.0, zero = 0.0;
6457   /* space to store constraints and their local indices */
6458   PetscScalar *constraints_data;
6459   PetscInt    *constraints_idxs, *constraints_idxs_B;
6460   PetscInt    *constraints_idxs_ptr, *constraints_data_ptr;
6461   PetscInt    *constraints_n;
6462   /* iterators */
6463   PetscInt i, j, k, total_counts, total_counts_cc, cum;
6464   /* BLAS integers */
6465   PetscBLASInt lwork, lierr;
6466   PetscBLASInt Blas_N, Blas_M, Blas_K, Blas_one = 1;
6467   PetscBLASInt Blas_LDA, Blas_LDB, Blas_LDC;
6468   /* reuse */
6469   PetscInt  olocal_primal_size, olocal_primal_size_cc;
6470   PetscInt *olocal_primal_ref_node, *olocal_primal_ref_mult;
6471   /* change of basis */
6472   PetscBool qr_needed;
6473   PetscBT   change_basis, qr_needed_idx;
6474   /* auxiliary stuff */
6475   PetscInt *nnz, *is_indices;
6476   PetscInt  ncc;
6477   /* some quantities */
6478   PetscInt  n_vertices, total_primal_vertices, valid_constraints;
6479   PetscInt  size_of_constraint, max_size_of_constraint = 0, max_constraints, temp_constraints;
6480   PetscReal tol; /* tolerance for retaining eigenmodes */
6481 
6482   PetscFunctionBegin;
6483   tol = PetscSqrtReal(PETSC_SMALL);
6484   /* Destroy Mat objects computed previously */
6485   PetscCall(MatDestroy(&pcbddc->ChangeOfBasisMatrix));
6486   PetscCall(MatDestroy(&pcbddc->ConstraintMatrix));
6487   PetscCall(MatDestroy(&pcbddc->switch_static_change));
6488   /* save info on constraints from previous setup (if any) */
6489   olocal_primal_size    = pcbddc->local_primal_size;
6490   olocal_primal_size_cc = pcbddc->local_primal_size_cc;
6491   PetscCall(PetscMalloc2(olocal_primal_size_cc, &olocal_primal_ref_node, olocal_primal_size_cc, &olocal_primal_ref_mult));
6492   PetscCall(PetscArraycpy(olocal_primal_ref_node, pcbddc->local_primal_ref_node, olocal_primal_size_cc));
6493   PetscCall(PetscArraycpy(olocal_primal_ref_mult, pcbddc->local_primal_ref_mult, olocal_primal_size_cc));
6494   PetscCall(PetscFree2(pcbddc->local_primal_ref_node, pcbddc->local_primal_ref_mult));
6495   PetscCall(PetscFree(pcbddc->primal_indices_local_idxs));
6496 
6497   if (!pcbddc->adaptive_selection) {
6498     IS           ISForVertices, *ISForFaces, *ISForEdges;
6499     MatNullSpace nearnullsp;
6500     const Vec   *nearnullvecs;
6501     Vec         *localnearnullsp;
6502     PetscScalar *array;
6503     PetscInt     n_ISForFaces, n_ISForEdges, nnsp_size, o_nf, o_ne;
6504     PetscBool    nnsp_has_cnst;
6505     /* LAPACK working arrays for SVD or POD */
6506     PetscBool    skip_lapack, boolforchange;
6507     PetscScalar *work;
6508     PetscReal   *singular_vals;
6509 #if defined(PETSC_USE_COMPLEX)
6510     PetscReal *rwork;
6511 #endif
6512     PetscScalar *temp_basis = NULL, *correlation_mat = NULL;
6513     PetscBLASInt dummy_int    = 1;
6514     PetscScalar  dummy_scalar = 1.;
6515     PetscBool    use_pod      = PETSC_FALSE;
6516 
6517     /* MKL SVD with same input gives different results on different processes! */
6518 #if defined(PETSC_MISSING_LAPACK_GESVD) || defined(PETSC_HAVE_MKL_LIBS)
6519     use_pod = PETSC_TRUE;
6520 #endif
6521     /* Get index sets for faces, edges and vertices from graph */
6522     PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, &n_ISForFaces, &ISForFaces, &n_ISForEdges, &ISForEdges, &ISForVertices));
6523     o_nf       = n_ISForFaces;
6524     o_ne       = n_ISForEdges;
6525     n_vertices = 0;
6526     if (ISForVertices) PetscCall(ISGetSize(ISForVertices, &n_vertices));
6527     /* print some info */
6528     if (pcbddc->dbg_flag && (!pcbddc->sub_schurs || pcbddc->sub_schurs_rebuild)) {
6529       if (!pcbddc->dbg_viewer) pcbddc->dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pc));
6530       PetscCall(PCBDDCGraphASCIIView(pcbddc->mat_graph, pcbddc->dbg_flag, pcbddc->dbg_viewer));
6531       PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer));
6532       PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "--------------------------------------------------------------\n"));
6533       PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate vertices (%d)\n", PetscGlobalRank, n_vertices, pcbddc->use_vertices));
6534       PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate edges    (%d)\n", PetscGlobalRank, n_ISForEdges, pcbddc->use_edges));
6535       PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate faces    (%d)\n", PetscGlobalRank, n_ISForFaces, pcbddc->use_faces));
6536       PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
6537       PetscCall(PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer));
6538     }
6539 
6540     if (!pcbddc->use_vertices) n_vertices = 0;
6541     if (!pcbddc->use_edges) n_ISForEdges = 0;
6542     if (!pcbddc->use_faces) n_ISForFaces = 0;
6543 
6544     /* check if near null space is attached to global mat */
6545     if (pcbddc->use_nnsp) PetscCall(MatGetNearNullSpace(pc->pmat, &nearnullsp));
6546     else nearnullsp = NULL;
6547 
6548     if (nearnullsp) {
6549       PetscCall(MatNullSpaceGetVecs(nearnullsp, &nnsp_has_cnst, &nnsp_size, &nearnullvecs));
6550       /* remove any stored info */
6551       PetscCall(MatNullSpaceDestroy(&pcbddc->onearnullspace));
6552       PetscCall(PetscFree(pcbddc->onearnullvecs_state));
6553       /* store information for BDDC solver reuse */
6554       PetscCall(PetscObjectReference((PetscObject)nearnullsp));
6555       pcbddc->onearnullspace = nearnullsp;
6556       PetscCall(PetscMalloc1(nnsp_size, &pcbddc->onearnullvecs_state));
6557       for (i = 0; i < nnsp_size; i++) PetscCall(PetscObjectStateGet((PetscObject)nearnullvecs[i], &pcbddc->onearnullvecs_state[i]));
6558     } else { /* if near null space is not provided BDDC uses constants by default */
6559       nnsp_size     = 0;
6560       nnsp_has_cnst = PETSC_TRUE;
6561     }
6562     /* get max number of constraints on a single cc */
6563     max_constraints = nnsp_size;
6564     if (nnsp_has_cnst) max_constraints++;
6565 
6566     /*
6567          Evaluate maximum storage size needed by the procedure
6568          - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]"
6569          - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]"
6570          There can be multiple constraints per connected component
6571                                                                                                                                                            */
6572     ncc = n_vertices + n_ISForFaces + n_ISForEdges;
6573     PetscCall(PetscMalloc3(ncc + 1, &constraints_idxs_ptr, ncc + 1, &constraints_data_ptr, ncc, &constraints_n));
6574 
6575     total_counts = n_ISForFaces + n_ISForEdges;
6576     total_counts *= max_constraints;
6577     total_counts += n_vertices;
6578     PetscCall(PetscBTCreate(total_counts, &change_basis));
6579 
6580     total_counts           = 0;
6581     max_size_of_constraint = 0;
6582     for (i = 0; i < n_ISForEdges + n_ISForFaces; i++) {
6583       IS used_is;
6584       if (i < n_ISForEdges) {
6585         used_is = ISForEdges[i];
6586       } else {
6587         used_is = ISForFaces[i - n_ISForEdges];
6588       }
6589       PetscCall(ISGetSize(used_is, &j));
6590       total_counts += j;
6591       max_size_of_constraint = PetscMax(j, max_size_of_constraint);
6592     }
6593     PetscCall(PetscMalloc3(total_counts * max_constraints + n_vertices, &constraints_data, total_counts + n_vertices, &constraints_idxs, total_counts + n_vertices, &constraints_idxs_B));
6594 
6595     /* get local part of global near null space vectors */
6596     PetscCall(PetscMalloc1(nnsp_size, &localnearnullsp));
6597     for (k = 0; k < nnsp_size; k++) {
6598       PetscCall(VecDuplicate(pcis->vec1_N, &localnearnullsp[k]));
6599       PetscCall(VecScatterBegin(matis->rctx, nearnullvecs[k], localnearnullsp[k], INSERT_VALUES, SCATTER_FORWARD));
6600       PetscCall(VecScatterEnd(matis->rctx, nearnullvecs[k], localnearnullsp[k], INSERT_VALUES, SCATTER_FORWARD));
6601     }
6602 
6603     /* whether or not to skip lapack calls */
6604     skip_lapack = PETSC_TRUE;
6605     if (n_ISForFaces + n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE;
6606 
6607     /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */
6608     if (!skip_lapack) {
6609       PetscScalar temp_work;
6610 
6611       if (use_pod) {
6612         /* Proper Orthogonal Decomposition (POD) using the snapshot method */
6613         PetscCall(PetscMalloc1(max_constraints * max_constraints, &correlation_mat));
6614         PetscCall(PetscMalloc1(max_constraints, &singular_vals));
6615         PetscCall(PetscMalloc1(max_size_of_constraint * max_constraints, &temp_basis));
6616 #if defined(PETSC_USE_COMPLEX)
6617         PetscCall(PetscMalloc1(3 * max_constraints, &rwork));
6618 #endif
6619         /* now we evaluate the optimal workspace using query with lwork=-1 */
6620         PetscCall(PetscBLASIntCast(max_constraints, &Blas_N));
6621         PetscCall(PetscBLASIntCast(max_constraints, &Blas_LDA));
6622         lwork = -1;
6623         PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
6624 #if !defined(PETSC_USE_COMPLEX)
6625         PetscCallBLAS("LAPACKsyev", LAPACKsyev_("V", "U", &Blas_N, correlation_mat, &Blas_LDA, singular_vals, &temp_work, &lwork, &lierr));
6626 #else
6627         PetscCallBLAS("LAPACKsyev", LAPACKsyev_("V", "U", &Blas_N, correlation_mat, &Blas_LDA, singular_vals, &temp_work, &lwork, rwork, &lierr));
6628 #endif
6629         PetscCall(PetscFPTrapPop());
6630         PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in query to SYEV Lapack routine %" PetscBLASInt_FMT, lierr);
6631       } else {
6632 #if !defined(PETSC_MISSING_LAPACK_GESVD)
6633         /* SVD */
6634         PetscInt max_n, min_n;
6635         max_n = max_size_of_constraint;
6636         min_n = max_constraints;
6637         if (max_size_of_constraint < max_constraints) {
6638           min_n = max_size_of_constraint;
6639           max_n = max_constraints;
6640         }
6641         PetscCall(PetscMalloc1(min_n, &singular_vals));
6642   #if defined(PETSC_USE_COMPLEX)
6643         PetscCall(PetscMalloc1(5 * min_n, &rwork));
6644   #endif
6645         /* now we evaluate the optimal workspace using query with lwork=-1 */
6646         lwork = -1;
6647         PetscCall(PetscBLASIntCast(max_n, &Blas_M));
6648         PetscCall(PetscBLASIntCast(min_n, &Blas_N));
6649         PetscCall(PetscBLASIntCast(max_n, &Blas_LDA));
6650         PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
6651   #if !defined(PETSC_USE_COMPLEX)
6652         PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("O", "N", &Blas_M, &Blas_N, &constraints_data[0], &Blas_LDA, singular_vals, &dummy_scalar, &dummy_int, &dummy_scalar, &dummy_int, &temp_work, &lwork, &lierr));
6653   #else
6654         PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("O", "N", &Blas_M, &Blas_N, &constraints_data[0], &Blas_LDA, singular_vals, &dummy_scalar, &dummy_int, &dummy_scalar, &dummy_int, &temp_work, &lwork, rwork, &lierr));
6655   #endif
6656         PetscCall(PetscFPTrapPop());
6657         PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in query to GESVD Lapack routine %" PetscBLASInt_FMT, lierr);
6658 #else
6659         SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "This should not happen");
6660 #endif /* on missing GESVD */
6661       }
6662       /* Allocate optimal workspace */
6663       PetscCall(PetscBLASIntCast((PetscInt)PetscRealPart(temp_work), &lwork));
6664       PetscCall(PetscMalloc1(lwork, &work));
6665     }
6666     /* Now we can loop on constraining sets */
6667     total_counts            = 0;
6668     constraints_idxs_ptr[0] = 0;
6669     constraints_data_ptr[0] = 0;
6670     /* vertices */
6671     if (n_vertices) {
6672       PetscCall(ISGetIndices(ISForVertices, (const PetscInt **)&is_indices));
6673       PetscCall(PetscArraycpy(constraints_idxs, is_indices, n_vertices));
6674       for (i = 0; i < n_vertices; i++) {
6675         constraints_n[total_counts]            = 1;
6676         constraints_data[total_counts]         = 1.0;
6677         constraints_idxs_ptr[total_counts + 1] = constraints_idxs_ptr[total_counts] + 1;
6678         constraints_data_ptr[total_counts + 1] = constraints_data_ptr[total_counts] + 1;
6679         total_counts++;
6680       }
6681       PetscCall(ISRestoreIndices(ISForVertices, (const PetscInt **)&is_indices));
6682     }
6683 
6684     /* edges and faces */
6685     total_counts_cc = total_counts;
6686     for (ncc = 0; ncc < n_ISForEdges + n_ISForFaces; ncc++) {
6687       IS        used_is;
6688       PetscBool idxs_copied = PETSC_FALSE;
6689 
6690       if (ncc < n_ISForEdges) {
6691         used_is       = ISForEdges[ncc];
6692         boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */
6693       } else {
6694         used_is       = ISForFaces[ncc - n_ISForEdges];
6695         boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */
6696       }
6697       temp_constraints = 0; /* zero the number of constraints I have on this conn comp */
6698 
6699       PetscCall(ISGetSize(used_is, &size_of_constraint));
6700       if (!size_of_constraint) continue;
6701       PetscCall(ISGetIndices(used_is, (const PetscInt **)&is_indices));
6702       if (nnsp_has_cnst) {
6703         PetscScalar quad_value;
6704 
6705         PetscCall(PetscArraycpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc], is_indices, size_of_constraint));
6706         idxs_copied = PETSC_TRUE;
6707 
6708         if (!pcbddc->use_nnsp_true) {
6709           quad_value = (PetscScalar)(1.0 / PetscSqrtReal((PetscReal)size_of_constraint));
6710         } else {
6711           quad_value = 1.0;
6712         }
6713         for (j = 0; j < size_of_constraint; j++) constraints_data[constraints_data_ptr[total_counts_cc] + j] = quad_value;
6714         temp_constraints++;
6715         total_counts++;
6716       }
6717       for (k = 0; k < nnsp_size; k++) {
6718         PetscReal    real_value;
6719         PetscScalar *ptr_to_data;
6720 
6721         PetscCall(VecGetArrayRead(localnearnullsp[k], (const PetscScalar **)&array));
6722         ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc] + temp_constraints * size_of_constraint];
6723         for (j = 0; j < size_of_constraint; j++) ptr_to_data[j] = array[is_indices[j]];
6724         PetscCall(VecRestoreArrayRead(localnearnullsp[k], (const PetscScalar **)&array));
6725         /* check if array is null on the connected component */
6726         PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_N));
6727         PetscCallBLAS("BLASasum", real_value = BLASasum_(&Blas_N, ptr_to_data, &Blas_one));
6728         if (real_value > tol * size_of_constraint) { /* keep indices and values */
6729           temp_constraints++;
6730           total_counts++;
6731           if (!idxs_copied) {
6732             PetscCall(PetscArraycpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc], is_indices, size_of_constraint));
6733             idxs_copied = PETSC_TRUE;
6734           }
6735         }
6736       }
6737       PetscCall(ISRestoreIndices(used_is, (const PetscInt **)&is_indices));
6738       valid_constraints = temp_constraints;
6739       if (!pcbddc->use_nnsp_true && temp_constraints) {
6740         if (temp_constraints == 1) { /* just normalize the constraint */
6741           PetscScalar norm, *ptr_to_data;
6742 
6743           ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]];
6744           PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_N));
6745           PetscCallBLAS("BLASdot", norm = BLASdot_(&Blas_N, ptr_to_data, &Blas_one, ptr_to_data, &Blas_one));
6746           norm = 1.0 / PetscSqrtReal(PetscRealPart(norm));
6747           PetscCallBLAS("BLASscal", BLASscal_(&Blas_N, &norm, ptr_to_data, &Blas_one));
6748         } else { /* perform SVD */
6749           PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]];
6750 
6751           if (use_pod) {
6752             /* SVD: Y = U*S*V^H                -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag
6753                POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2)
6754                -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined
6755                   the constraints basis will differ (by a complex factor with absolute value equal to 1)
6756                   from that computed using LAPACKgesvd
6757                -> This is due to a different computation of eigenvectors in LAPACKheev
6758                -> The quality of the POD-computed basis will be the same */
6759             PetscCall(PetscArrayzero(correlation_mat, temp_constraints * temp_constraints));
6760             /* Store upper triangular part of correlation matrix */
6761             PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_N));
6762             PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
6763             for (j = 0; j < temp_constraints; j++) {
6764               for (k = 0; k < j + 1; k++) PetscCallBLAS("BLASdot", correlation_mat[j * temp_constraints + k] = BLASdot_(&Blas_N, ptr_to_data + k * size_of_constraint, &Blas_one, ptr_to_data + j * size_of_constraint, &Blas_one));
6765             }
6766             /* compute eigenvalues and eigenvectors of correlation matrix */
6767             PetscCall(PetscBLASIntCast(temp_constraints, &Blas_N));
6768             PetscCall(PetscBLASIntCast(temp_constraints, &Blas_LDA));
6769 #if !defined(PETSC_USE_COMPLEX)
6770             PetscCallBLAS("LAPACKsyev", LAPACKsyev_("V", "U", &Blas_N, correlation_mat, &Blas_LDA, singular_vals, work, &lwork, &lierr));
6771 #else
6772             PetscCallBLAS("LAPACKsyev", LAPACKsyev_("V", "U", &Blas_N, correlation_mat, &Blas_LDA, singular_vals, work, &lwork, rwork, &lierr));
6773 #endif
6774             PetscCall(PetscFPTrapPop());
6775             PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYEV Lapack routine %" PetscBLASInt_FMT, lierr);
6776             /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */
6777             j = 0;
6778             while (j < temp_constraints && singular_vals[j] / singular_vals[temp_constraints - 1] < tol) j++;
6779             total_counts      = total_counts - j;
6780             valid_constraints = temp_constraints - j;
6781             /* scale and copy POD basis into used quadrature memory */
6782             PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_M));
6783             PetscCall(PetscBLASIntCast(temp_constraints, &Blas_N));
6784             PetscCall(PetscBLASIntCast(temp_constraints, &Blas_K));
6785             PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA));
6786             PetscCall(PetscBLASIntCast(temp_constraints, &Blas_LDB));
6787             PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDC));
6788             if (j < temp_constraints) {
6789               PetscInt ii;
6790               for (k = j; k < temp_constraints; k++) singular_vals[k] = 1.0 / PetscSqrtReal(singular_vals[k]);
6791               PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
6792               PetscCallBLAS("BLASgemm", BLASgemm_("N", "N", &Blas_M, &Blas_N, &Blas_K, &one, ptr_to_data, &Blas_LDA, correlation_mat, &Blas_LDB, &zero, temp_basis, &Blas_LDC));
6793               PetscCall(PetscFPTrapPop());
6794               for (k = 0; k < temp_constraints - j; k++) {
6795                 for (ii = 0; ii < size_of_constraint; ii++) ptr_to_data[k * size_of_constraint + ii] = singular_vals[temp_constraints - 1 - k] * temp_basis[(temp_constraints - 1 - k) * size_of_constraint + ii];
6796               }
6797             }
6798           } else {
6799 #if !defined(PETSC_MISSING_LAPACK_GESVD)
6800             PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_M));
6801             PetscCall(PetscBLASIntCast(temp_constraints, &Blas_N));
6802             PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA));
6803             PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
6804   #if !defined(PETSC_USE_COMPLEX)
6805             PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("O", "N", &Blas_M, &Blas_N, ptr_to_data, &Blas_LDA, singular_vals, &dummy_scalar, &dummy_int, &dummy_scalar, &dummy_int, work, &lwork, &lierr));
6806   #else
6807             PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("O", "N", &Blas_M, &Blas_N, ptr_to_data, &Blas_LDA, singular_vals, &dummy_scalar, &dummy_int, &dummy_scalar, &dummy_int, work, &lwork, rwork, &lierr));
6808   #endif
6809             PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in GESVD Lapack routine %" PetscBLASInt_FMT, lierr);
6810             PetscCall(PetscFPTrapPop());
6811             /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */
6812             k = temp_constraints;
6813             if (k > size_of_constraint) k = size_of_constraint;
6814             j = 0;
6815             while (j < k && singular_vals[k - j - 1] / singular_vals[0] < tol) j++;
6816             valid_constraints = k - j;
6817             total_counts      = total_counts - temp_constraints + valid_constraints;
6818 #else
6819             SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "This should not happen");
6820 #endif /* on missing GESVD */
6821           }
6822         }
6823       }
6824       /* update pointers information */
6825       if (valid_constraints) {
6826         constraints_n[total_counts_cc]            = valid_constraints;
6827         constraints_idxs_ptr[total_counts_cc + 1] = constraints_idxs_ptr[total_counts_cc] + size_of_constraint;
6828         constraints_data_ptr[total_counts_cc + 1] = constraints_data_ptr[total_counts_cc] + size_of_constraint * valid_constraints;
6829         /* set change_of_basis flag */
6830         if (boolforchange) PetscCall(PetscBTSet(change_basis, total_counts_cc));
6831         total_counts_cc++;
6832       }
6833     }
6834     /* free workspace */
6835     if (!skip_lapack) {
6836       PetscCall(PetscFree(work));
6837 #if defined(PETSC_USE_COMPLEX)
6838       PetscCall(PetscFree(rwork));
6839 #endif
6840       PetscCall(PetscFree(singular_vals));
6841       PetscCall(PetscFree(correlation_mat));
6842       PetscCall(PetscFree(temp_basis));
6843     }
6844     for (k = 0; k < nnsp_size; k++) PetscCall(VecDestroy(&localnearnullsp[k]));
6845     PetscCall(PetscFree(localnearnullsp));
6846     /* free index sets of faces, edges and vertices */
6847     PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, &o_nf, &ISForFaces, &o_ne, &ISForEdges, &ISForVertices));
6848   } else {
6849     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
6850 
6851     total_counts = 0;
6852     n_vertices   = 0;
6853     if (sub_schurs->is_vertices && pcbddc->use_vertices) PetscCall(ISGetLocalSize(sub_schurs->is_vertices, &n_vertices));
6854     max_constraints = 0;
6855     total_counts_cc = 0;
6856     for (i = 0; i < sub_schurs->n_subs + n_vertices; i++) {
6857       total_counts += pcbddc->adaptive_constraints_n[i];
6858       if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++;
6859       max_constraints = PetscMax(max_constraints, pcbddc->adaptive_constraints_n[i]);
6860     }
6861     constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr;
6862     constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr;
6863     constraints_idxs     = pcbddc->adaptive_constraints_idxs;
6864     constraints_data     = pcbddc->adaptive_constraints_data;
6865     /* constraints_n differs from pcbddc->adaptive_constraints_n */
6866     PetscCall(PetscMalloc1(total_counts_cc, &constraints_n));
6867     total_counts_cc = 0;
6868     for (i = 0; i < sub_schurs->n_subs + n_vertices; i++) {
6869       if (pcbddc->adaptive_constraints_n[i]) constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i];
6870     }
6871 
6872     max_size_of_constraint = 0;
6873     for (i = 0; i < total_counts_cc; i++) max_size_of_constraint = PetscMax(max_size_of_constraint, constraints_idxs_ptr[i + 1] - constraints_idxs_ptr[i]);
6874     PetscCall(PetscMalloc1(constraints_idxs_ptr[total_counts_cc], &constraints_idxs_B));
6875     /* Change of basis */
6876     PetscCall(PetscBTCreate(total_counts_cc, &change_basis));
6877     if (pcbddc->use_change_of_basis) {
6878       for (i = 0; i < sub_schurs->n_subs; i++) {
6879         if (PetscBTLookup(sub_schurs->is_edge, i) || pcbddc->use_change_on_faces) PetscCall(PetscBTSet(change_basis, i + n_vertices));
6880       }
6881     }
6882   }
6883   pcbddc->local_primal_size = total_counts;
6884   PetscCall(PetscMalloc1(pcbddc->local_primal_size + pcbddc->benign_n, &pcbddc->primal_indices_local_idxs));
6885 
6886   /* map constraints_idxs in boundary numbering */
6887   if (pcbddc->use_change_of_basis) {
6888     PetscCall(ISGlobalToLocalMappingApply(pcis->BtoNmap, IS_GTOLM_DROP, constraints_idxs_ptr[total_counts_cc], constraints_idxs, &i, constraints_idxs_B));
6889     PetscCheck(i == constraints_idxs_ptr[total_counts_cc], PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error in boundary numbering for constraints indices %" PetscInt_FMT " != %" PetscInt_FMT, constraints_idxs_ptr[total_counts_cc], i);
6890   }
6891 
6892   /* Create constraint matrix */
6893   PetscCall(MatCreate(PETSC_COMM_SELF, &pcbddc->ConstraintMatrix));
6894   PetscCall(MatSetType(pcbddc->ConstraintMatrix, MATAIJ));
6895   PetscCall(MatSetSizes(pcbddc->ConstraintMatrix, pcbddc->local_primal_size, pcis->n, pcbddc->local_primal_size, pcis->n));
6896 
6897   /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */
6898   /* determine if a QR strategy is needed for change of basis */
6899   qr_needed = pcbddc->use_qr_single;
6900   PetscCall(PetscBTCreate(total_counts_cc, &qr_needed_idx));
6901   total_primal_vertices        = 0;
6902   pcbddc->local_primal_size_cc = 0;
6903   for (i = 0; i < total_counts_cc; i++) {
6904     size_of_constraint = constraints_idxs_ptr[i + 1] - constraints_idxs_ptr[i];
6905     if (size_of_constraint == 1 && pcbddc->mat_graph->custom_minimal_size) {
6906       pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]];
6907       pcbddc->local_primal_size_cc += 1;
6908     } else if (PetscBTLookup(change_basis, i)) {
6909       for (k = 0; k < constraints_n[i]; k++) pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i] + k];
6910       pcbddc->local_primal_size_cc += constraints_n[i];
6911       if (constraints_n[i] > 1 || pcbddc->use_qr_single) {
6912         PetscCall(PetscBTSet(qr_needed_idx, i));
6913         qr_needed = PETSC_TRUE;
6914       }
6915     } else {
6916       pcbddc->local_primal_size_cc += 1;
6917     }
6918   }
6919   /* note that the local variable n_vertices used below stores the number of pointwise constraints */
6920   pcbddc->n_vertices = total_primal_vertices;
6921   /* permute indices in order to have a sorted set of vertices */
6922   PetscCall(PetscSortInt(total_primal_vertices, pcbddc->primal_indices_local_idxs));
6923   PetscCall(PetscMalloc2(pcbddc->local_primal_size_cc + pcbddc->benign_n, &pcbddc->local_primal_ref_node, pcbddc->local_primal_size_cc + pcbddc->benign_n, &pcbddc->local_primal_ref_mult));
6924   PetscCall(PetscArraycpy(pcbddc->local_primal_ref_node, pcbddc->primal_indices_local_idxs, total_primal_vertices));
6925   for (i = 0; i < total_primal_vertices; i++) pcbddc->local_primal_ref_mult[i] = 1;
6926 
6927   /* nonzero structure of constraint matrix */
6928   /* and get reference dof for local constraints */
6929   PetscCall(PetscMalloc1(pcbddc->local_primal_size, &nnz));
6930   for (i = 0; i < total_primal_vertices; i++) nnz[i] = 1;
6931 
6932   j            = total_primal_vertices;
6933   total_counts = total_primal_vertices;
6934   cum          = total_primal_vertices;
6935   for (i = n_vertices; i < total_counts_cc; i++) {
6936     if (!PetscBTLookup(change_basis, i)) {
6937       pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]];
6938       pcbddc->local_primal_ref_mult[cum] = constraints_n[i];
6939       cum++;
6940       size_of_constraint = constraints_idxs_ptr[i + 1] - constraints_idxs_ptr[i];
6941       for (k = 0; k < constraints_n[i]; k++) {
6942         pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i] + k];
6943         nnz[j + k]                                        = size_of_constraint;
6944       }
6945       j += constraints_n[i];
6946     }
6947   }
6948   PetscCall(MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix, 0, nnz));
6949   PetscCall(MatSetOption(pcbddc->ConstraintMatrix, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE));
6950   PetscCall(PetscFree(nnz));
6951 
6952   /* set values in constraint matrix */
6953   for (i = 0; i < total_primal_vertices; i++) PetscCall(MatSetValue(pcbddc->ConstraintMatrix, i, pcbddc->local_primal_ref_node[i], 1.0, INSERT_VALUES));
6954   total_counts = total_primal_vertices;
6955   for (i = n_vertices; i < total_counts_cc; i++) {
6956     if (!PetscBTLookup(change_basis, i)) {
6957       PetscInt *cols;
6958 
6959       size_of_constraint = constraints_idxs_ptr[i + 1] - constraints_idxs_ptr[i];
6960       cols               = constraints_idxs + constraints_idxs_ptr[i];
6961       for (k = 0; k < constraints_n[i]; k++) {
6962         PetscInt     row = total_counts + k;
6963         PetscScalar *vals;
6964 
6965         vals = constraints_data + constraints_data_ptr[i] + k * size_of_constraint;
6966         PetscCall(MatSetValues(pcbddc->ConstraintMatrix, 1, &row, size_of_constraint, cols, vals, INSERT_VALUES));
6967       }
6968       total_counts += constraints_n[i];
6969     }
6970   }
6971   /* assembling */
6972   PetscCall(MatAssemblyBegin(pcbddc->ConstraintMatrix, MAT_FINAL_ASSEMBLY));
6973   PetscCall(MatAssemblyEnd(pcbddc->ConstraintMatrix, MAT_FINAL_ASSEMBLY));
6974   PetscCall(MatViewFromOptions(pcbddc->ConstraintMatrix, (PetscObject)pc, "-pc_bddc_constraint_mat_view"));
6975 
6976   /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */
6977   if (pcbddc->use_change_of_basis) {
6978     /* dual and primal dofs on a single cc */
6979     PetscInt dual_dofs, primal_dofs;
6980     /* working stuff for GEQRF */
6981     PetscScalar *qr_basis = NULL, *qr_tau = NULL, *qr_work = NULL, lqr_work_t;
6982     PetscBLASInt lqr_work;
6983     /* working stuff for UNGQR */
6984     PetscScalar *gqr_work = NULL, lgqr_work_t = 0.0;
6985     PetscBLASInt lgqr_work;
6986     /* working stuff for TRTRS */
6987     PetscScalar *trs_rhs = NULL;
6988     PetscBLASInt Blas_NRHS;
6989     /* pointers for values insertion into change of basis matrix */
6990     PetscInt    *start_rows, *start_cols;
6991     PetscScalar *start_vals;
6992     /* working stuff for values insertion */
6993     PetscBT   is_primal;
6994     PetscInt *aux_primal_numbering_B;
6995     /* matrix sizes */
6996     PetscInt global_size, local_size;
6997     /* temporary change of basis */
6998     Mat localChangeOfBasisMatrix;
6999     /* extra space for debugging */
7000     PetscScalar *dbg_work = NULL;
7001 
7002     PetscCall(MatCreate(PETSC_COMM_SELF, &localChangeOfBasisMatrix));
7003     PetscCall(MatSetType(localChangeOfBasisMatrix, MATAIJ));
7004     PetscCall(MatSetSizes(localChangeOfBasisMatrix, pcis->n, pcis->n, pcis->n, pcis->n));
7005     /* nonzeros for local mat */
7006     PetscCall(PetscMalloc1(pcis->n, &nnz));
7007     if (!pcbddc->benign_change || pcbddc->fake_change) {
7008       for (i = 0; i < pcis->n; i++) nnz[i] = 1;
7009     } else {
7010       const PetscInt *ii;
7011       PetscInt        n;
7012       PetscBool       flg_row;
7013       PetscCall(MatGetRowIJ(pcbddc->benign_change, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, NULL, &flg_row));
7014       for (i = 0; i < n; i++) nnz[i] = ii[i + 1] - ii[i];
7015       PetscCall(MatRestoreRowIJ(pcbddc->benign_change, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, NULL, &flg_row));
7016     }
7017     for (i = n_vertices; i < total_counts_cc; i++) {
7018       if (PetscBTLookup(change_basis, i)) {
7019         size_of_constraint = constraints_idxs_ptr[i + 1] - constraints_idxs_ptr[i];
7020         if (PetscBTLookup(qr_needed_idx, i)) {
7021           for (j = 0; j < size_of_constraint; j++) nnz[constraints_idxs[constraints_idxs_ptr[i] + j]] = size_of_constraint;
7022         } else {
7023           nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint;
7024           for (j = 1; j < size_of_constraint; j++) nnz[constraints_idxs[constraints_idxs_ptr[i] + j]] = 2;
7025         }
7026       }
7027     }
7028     PetscCall(MatSeqAIJSetPreallocation(localChangeOfBasisMatrix, 0, nnz));
7029     PetscCall(MatSetOption(localChangeOfBasisMatrix, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE));
7030     PetscCall(PetscFree(nnz));
7031     /* Set interior change in the matrix */
7032     if (!pcbddc->benign_change || pcbddc->fake_change) {
7033       for (i = 0; i < pcis->n; i++) PetscCall(MatSetValue(localChangeOfBasisMatrix, i, i, 1.0, INSERT_VALUES));
7034     } else {
7035       const PetscInt *ii, *jj;
7036       PetscScalar    *aa;
7037       PetscInt        n;
7038       PetscBool       flg_row;
7039       PetscCall(MatGetRowIJ(pcbddc->benign_change, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, &jj, &flg_row));
7040       PetscCall(MatSeqAIJGetArray(pcbddc->benign_change, &aa));
7041       for (i = 0; i < n; i++) PetscCall(MatSetValues(localChangeOfBasisMatrix, 1, &i, ii[i + 1] - ii[i], jj + ii[i], aa + ii[i], INSERT_VALUES));
7042       PetscCall(MatSeqAIJRestoreArray(pcbddc->benign_change, &aa));
7043       PetscCall(MatRestoreRowIJ(pcbddc->benign_change, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, &jj, &flg_row));
7044     }
7045 
7046     if (pcbddc->dbg_flag) {
7047       PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "--------------------------------------------------------------\n"));
7048       PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Checking change of basis computation for subdomain %04d\n", PetscGlobalRank));
7049     }
7050 
7051     /* Now we loop on the constraints which need a change of basis */
7052     /*
7053        Change of basis matrix is evaluated similarly to the FIRST APPROACH in
7054        Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1)
7055 
7056        Basic blocks of change of basis matrix T computed:
7057 
7058           - By using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified)
7059 
7060             | 1        0   ...        0         s_1/S |
7061             | 0        1   ...        0         s_2/S |
7062             |              ...                        |
7063             | 0        ...            1     s_{n-1}/S |
7064             | -s_1/s_n ...    -s_{n-1}/s_n      s_n/S |
7065 
7066             with S = \sum_{i=1}^n s_i^2
7067             NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering
7068                   in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering
7069 
7070           - QR decomposition of constraints otherwise
7071     */
7072     if (qr_needed && max_size_of_constraint) {
7073       /* space to store Q */
7074       PetscCall(PetscMalloc1(max_size_of_constraint * max_size_of_constraint, &qr_basis));
7075       /* array to store scaling factors for reflectors */
7076       PetscCall(PetscMalloc1(max_constraints, &qr_tau));
7077       /* first we issue queries for optimal work */
7078       PetscCall(PetscBLASIntCast(max_size_of_constraint, &Blas_M));
7079       PetscCall(PetscBLASIntCast(max_constraints, &Blas_N));
7080       PetscCall(PetscBLASIntCast(max_size_of_constraint, &Blas_LDA));
7081       lqr_work = -1;
7082       PetscCallBLAS("LAPACKgeqrf", LAPACKgeqrf_(&Blas_M, &Blas_N, qr_basis, &Blas_LDA, qr_tau, &lqr_work_t, &lqr_work, &lierr));
7083       PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in query to GEQRF Lapack routine %" PetscBLASInt_FMT, lierr);
7084       PetscCall(PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t), &lqr_work));
7085       PetscCall(PetscMalloc1(lqr_work, &qr_work));
7086       lgqr_work = -1;
7087       PetscCall(PetscBLASIntCast(max_size_of_constraint, &Blas_M));
7088       PetscCall(PetscBLASIntCast(max_size_of_constraint, &Blas_N));
7089       PetscCall(PetscBLASIntCast(max_constraints, &Blas_K));
7090       PetscCall(PetscBLASIntCast(max_size_of_constraint, &Blas_LDA));
7091       if (Blas_K > Blas_M) Blas_K = Blas_M; /* adjust just for computing optimal work */
7092       PetscCallBLAS("LAPACKorgqr", LAPACKorgqr_(&Blas_M, &Blas_N, &Blas_K, qr_basis, &Blas_LDA, qr_tau, &lgqr_work_t, &lgqr_work, &lierr));
7093       PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in query to ORGQR/UNGQR Lapack routine %" PetscBLASInt_FMT, lierr);
7094       PetscCall(PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t), &lgqr_work));
7095       PetscCall(PetscMalloc1(lgqr_work, &gqr_work));
7096       /* array to store rhs and solution of triangular solver */
7097       PetscCall(PetscMalloc1(max_constraints * max_constraints, &trs_rhs));
7098       /* allocating workspace for check */
7099       if (pcbddc->dbg_flag) PetscCall(PetscMalloc1(max_size_of_constraint * (max_constraints + max_size_of_constraint), &dbg_work));
7100     }
7101     /* array to store whether a node is primal or not */
7102     PetscCall(PetscBTCreate(pcis->n_B, &is_primal));
7103     PetscCall(PetscMalloc1(total_primal_vertices, &aux_primal_numbering_B));
7104     PetscCall(ISGlobalToLocalMappingApply(pcis->BtoNmap, IS_GTOLM_DROP, total_primal_vertices, pcbddc->local_primal_ref_node, &i, aux_primal_numbering_B));
7105     PetscCheck(i == total_primal_vertices, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error in boundary numbering for BDDC vertices! %" PetscInt_FMT " != %" PetscInt_FMT, total_primal_vertices, i);
7106     for (i = 0; i < total_primal_vertices; i++) PetscCall(PetscBTSet(is_primal, aux_primal_numbering_B[i]));
7107     PetscCall(PetscFree(aux_primal_numbering_B));
7108 
7109     /* loop on constraints and see whether or not they need a change of basis and compute it */
7110     for (total_counts = n_vertices; total_counts < total_counts_cc; total_counts++) {
7111       size_of_constraint = constraints_idxs_ptr[total_counts + 1] - constraints_idxs_ptr[total_counts];
7112       if (PetscBTLookup(change_basis, total_counts)) {
7113         /* get constraint info */
7114         primal_dofs = constraints_n[total_counts];
7115         dual_dofs   = size_of_constraint - primal_dofs;
7116 
7117         if (pcbddc->dbg_flag) PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Constraints %" PetscInt_FMT ": %" PetscInt_FMT " need a change of basis (size %" PetscInt_FMT ")\n", total_counts, primal_dofs, size_of_constraint));
7118 
7119         if (PetscBTLookup(qr_needed_idx, total_counts)) { /* QR */
7120 
7121           /* copy quadrature constraints for change of basis check */
7122           if (pcbddc->dbg_flag) PetscCall(PetscArraycpy(dbg_work, &constraints_data[constraints_data_ptr[total_counts]], size_of_constraint * primal_dofs));
7123           /* copy temporary constraints into larger work vector (in order to store all columns of Q) */
7124           PetscCall(PetscArraycpy(qr_basis, &constraints_data[constraints_data_ptr[total_counts]], size_of_constraint * primal_dofs));
7125 
7126           /* compute QR decomposition of constraints */
7127           PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_M));
7128           PetscCall(PetscBLASIntCast(primal_dofs, &Blas_N));
7129           PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA));
7130           PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
7131           PetscCallBLAS("LAPACKgeqrf", LAPACKgeqrf_(&Blas_M, &Blas_N, qr_basis, &Blas_LDA, qr_tau, qr_work, &lqr_work, &lierr));
7132           PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in GEQRF Lapack routine %" PetscBLASInt_FMT, lierr);
7133           PetscCall(PetscFPTrapPop());
7134 
7135           /* explicitly compute R^-T */
7136           PetscCall(PetscArrayzero(trs_rhs, primal_dofs * primal_dofs));
7137           for (j = 0; j < primal_dofs; j++) trs_rhs[j * (primal_dofs + 1)] = 1.0;
7138           PetscCall(PetscBLASIntCast(primal_dofs, &Blas_N));
7139           PetscCall(PetscBLASIntCast(primal_dofs, &Blas_NRHS));
7140           PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA));
7141           PetscCall(PetscBLASIntCast(primal_dofs, &Blas_LDB));
7142           PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
7143           PetscCallBLAS("LAPACKtrtrs", LAPACKtrtrs_("U", "T", "N", &Blas_N, &Blas_NRHS, qr_basis, &Blas_LDA, trs_rhs, &Blas_LDB, &lierr));
7144           PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in TRTRS Lapack routine %" PetscBLASInt_FMT, lierr);
7145           PetscCall(PetscFPTrapPop());
7146 
7147           /* explicitly compute all columns of Q (Q = [Q1 | Q2]) overwriting QR factorization in qr_basis */
7148           PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_M));
7149           PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_N));
7150           PetscCall(PetscBLASIntCast(primal_dofs, &Blas_K));
7151           PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA));
7152           PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
7153           PetscCallBLAS("LAPACKorgqr", LAPACKorgqr_(&Blas_M, &Blas_N, &Blas_K, qr_basis, &Blas_LDA, qr_tau, gqr_work, &lgqr_work, &lierr));
7154           PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in ORGQR/UNGQR Lapack routine %" PetscBLASInt_FMT, lierr);
7155           PetscCall(PetscFPTrapPop());
7156 
7157           /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints
7158              i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below)
7159              where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */
7160           PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_M));
7161           PetscCall(PetscBLASIntCast(primal_dofs, &Blas_N));
7162           PetscCall(PetscBLASIntCast(primal_dofs, &Blas_K));
7163           PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA));
7164           PetscCall(PetscBLASIntCast(primal_dofs, &Blas_LDB));
7165           PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDC));
7166           PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
7167           PetscCallBLAS("BLASgemm", BLASgemm_("N", "N", &Blas_M, &Blas_N, &Blas_K, &one, qr_basis, &Blas_LDA, trs_rhs, &Blas_LDB, &zero, constraints_data + constraints_data_ptr[total_counts], &Blas_LDC));
7168           PetscCall(PetscFPTrapPop());
7169           PetscCall(PetscArraycpy(qr_basis, &constraints_data[constraints_data_ptr[total_counts]], size_of_constraint * primal_dofs));
7170 
7171           /* insert values in change of basis matrix respecting global ordering of new primal dofs */
7172           start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]];
7173           /* insert cols for primal dofs */
7174           for (j = 0; j < primal_dofs; j++) {
7175             start_vals = &qr_basis[j * size_of_constraint];
7176             start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts] + j];
7177             PetscCall(MatSetValues(localChangeOfBasisMatrix, size_of_constraint, start_rows, 1, start_cols, start_vals, INSERT_VALUES));
7178           }
7179           /* insert cols for dual dofs */
7180           for (j = 0, k = 0; j < dual_dofs; k++) {
7181             if (!PetscBTLookup(is_primal, constraints_idxs_B[constraints_idxs_ptr[total_counts] + k])) {
7182               start_vals = &qr_basis[(primal_dofs + j) * size_of_constraint];
7183               start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts] + k];
7184               PetscCall(MatSetValues(localChangeOfBasisMatrix, size_of_constraint, start_rows, 1, start_cols, start_vals, INSERT_VALUES));
7185               j++;
7186             }
7187           }
7188 
7189           /* check change of basis */
7190           if (pcbddc->dbg_flag) {
7191             PetscInt  ii, jj;
7192             PetscBool valid_qr = PETSC_TRUE;
7193             PetscCall(PetscBLASIntCast(primal_dofs, &Blas_M));
7194             PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_N));
7195             PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_K));
7196             PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA));
7197             PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDB));
7198             PetscCall(PetscBLASIntCast(primal_dofs, &Blas_LDC));
7199             PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
7200             PetscCallBLAS("BLASgemm", BLASgemm_("T", "N", &Blas_M, &Blas_N, &Blas_K, &one, dbg_work, &Blas_LDA, qr_basis, &Blas_LDB, &zero, &dbg_work[size_of_constraint * primal_dofs], &Blas_LDC));
7201             PetscCall(PetscFPTrapPop());
7202             for (jj = 0; jj < size_of_constraint; jj++) {
7203               for (ii = 0; ii < primal_dofs; ii++) {
7204                 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii]) > 1.e-12) valid_qr = PETSC_FALSE;
7205                 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii] - (PetscReal)1) > 1.e-12) valid_qr = PETSC_FALSE;
7206               }
7207             }
7208             if (!valid_qr) {
7209               PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "\t-> wrong change of basis!\n"));
7210               for (jj = 0; jj < size_of_constraint; jj++) {
7211                 for (ii = 0; ii < primal_dofs; ii++) {
7212                   if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii]) > 1.e-12) {
7213                     PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "\tQr basis function %" PetscInt_FMT " is not orthogonal to constraint %" PetscInt_FMT " (%1.14e)!\n", jj, ii, (double)PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii])));
7214                   }
7215                   if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii] - (PetscReal)1) > 1.e-12) {
7216                     PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "\tQr basis function %" PetscInt_FMT " is not unitary w.r.t constraint %" PetscInt_FMT " (%1.14e)!\n", jj, ii, (double)PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii])));
7217                   }
7218                 }
7219               }
7220             } else {
7221               PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "\t-> right change of basis!\n"));
7222             }
7223           }
7224         } else { /* simple transformation block */
7225           PetscInt    row, col;
7226           PetscScalar val, norm;
7227 
7228           PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_N));
7229           PetscCallBLAS("BLASdot", norm = BLASdot_(&Blas_N, constraints_data + constraints_data_ptr[total_counts], &Blas_one, constraints_data + constraints_data_ptr[total_counts], &Blas_one));
7230           for (j = 0; j < size_of_constraint; j++) {
7231             PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts] + j];
7232             row            = constraints_idxs[constraints_idxs_ptr[total_counts] + j];
7233             if (!PetscBTLookup(is_primal, row_B)) {
7234               col = constraints_idxs[constraints_idxs_ptr[total_counts]];
7235               PetscCall(MatSetValue(localChangeOfBasisMatrix, row, row, 1.0, INSERT_VALUES));
7236               PetscCall(MatSetValue(localChangeOfBasisMatrix, row, col, constraints_data[constraints_data_ptr[total_counts] + j] / norm, INSERT_VALUES));
7237             } else {
7238               for (k = 0; k < size_of_constraint; k++) {
7239                 col = constraints_idxs[constraints_idxs_ptr[total_counts] + k];
7240                 if (row != col) {
7241                   val = -constraints_data[constraints_data_ptr[total_counts] + k] / constraints_data[constraints_data_ptr[total_counts]];
7242                 } else {
7243                   val = constraints_data[constraints_data_ptr[total_counts]] / norm;
7244                 }
7245                 PetscCall(MatSetValue(localChangeOfBasisMatrix, row, col, val, INSERT_VALUES));
7246               }
7247             }
7248           }
7249           if (pcbddc->dbg_flag) PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "\t-> using standard change of basis\n"));
7250         }
7251       } else {
7252         if (pcbddc->dbg_flag) PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Constraint %" PetscInt_FMT " does not need a change of basis (size %" PetscInt_FMT ")\n", total_counts, size_of_constraint));
7253       }
7254     }
7255 
7256     /* free workspace */
7257     if (qr_needed) {
7258       if (pcbddc->dbg_flag) PetscCall(PetscFree(dbg_work));
7259       PetscCall(PetscFree(trs_rhs));
7260       PetscCall(PetscFree(qr_tau));
7261       PetscCall(PetscFree(qr_work));
7262       PetscCall(PetscFree(gqr_work));
7263       PetscCall(PetscFree(qr_basis));
7264     }
7265     PetscCall(PetscBTDestroy(&is_primal));
7266     PetscCall(MatAssemblyBegin(localChangeOfBasisMatrix, MAT_FINAL_ASSEMBLY));
7267     PetscCall(MatAssemblyEnd(localChangeOfBasisMatrix, MAT_FINAL_ASSEMBLY));
7268 
7269     /* assembling of global change of variable */
7270     if (!pcbddc->fake_change) {
7271       Mat tmat;
7272 
7273       PetscCall(VecGetSize(pcis->vec1_global, &global_size));
7274       PetscCall(VecGetLocalSize(pcis->vec1_global, &local_size));
7275       PetscCall(MatDuplicate(pc->pmat, MAT_DO_NOT_COPY_VALUES, &tmat));
7276       PetscCall(MatISSetLocalMat(tmat, localChangeOfBasisMatrix));
7277       PetscCall(MatAssemblyBegin(tmat, MAT_FINAL_ASSEMBLY));
7278       PetscCall(MatAssemblyEnd(tmat, MAT_FINAL_ASSEMBLY));
7279       PetscCall(MatConvert(tmat, MATAIJ, MAT_INITIAL_MATRIX, &pcbddc->ChangeOfBasisMatrix));
7280       PetscCall(MatDestroy(&tmat));
7281       PetscCall(VecSet(pcis->vec1_global, 0.0));
7282       PetscCall(VecSet(pcis->vec1_N, 1.0));
7283       PetscCall(VecScatterBegin(matis->rctx, pcis->vec1_N, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE));
7284       PetscCall(VecScatterEnd(matis->rctx, pcis->vec1_N, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE));
7285       PetscCall(VecReciprocal(pcis->vec1_global));
7286       PetscCall(MatDiagonalScale(pcbddc->ChangeOfBasisMatrix, pcis->vec1_global, NULL));
7287 
7288       /* check */
7289       if (pcbddc->dbg_flag) {
7290         PetscReal error;
7291         Vec       x, x_change;
7292 
7293         PetscCall(VecDuplicate(pcis->vec1_global, &x));
7294         PetscCall(VecDuplicate(pcis->vec1_global, &x_change));
7295         PetscCall(VecSetRandom(x, NULL));
7296         PetscCall(VecCopy(x, pcis->vec1_global));
7297         PetscCall(VecScatterBegin(matis->rctx, x, pcis->vec1_N, INSERT_VALUES, SCATTER_FORWARD));
7298         PetscCall(VecScatterEnd(matis->rctx, x, pcis->vec1_N, INSERT_VALUES, SCATTER_FORWARD));
7299         PetscCall(MatMult(localChangeOfBasisMatrix, pcis->vec1_N, pcis->vec2_N));
7300         PetscCall(VecScatterBegin(matis->rctx, pcis->vec2_N, x, INSERT_VALUES, SCATTER_REVERSE));
7301         PetscCall(VecScatterEnd(matis->rctx, pcis->vec2_N, x, INSERT_VALUES, SCATTER_REVERSE));
7302         PetscCall(MatMult(pcbddc->ChangeOfBasisMatrix, pcis->vec1_global, x_change));
7303         PetscCall(VecAXPY(x, -1.0, x_change));
7304         PetscCall(VecNorm(x, NORM_INFINITY, &error));
7305         PetscCheck(error <= PETSC_SMALL, PetscObjectComm((PetscObject)pc), PETSC_ERR_PLIB, "Error global vs local change on N: %1.6e", (double)error);
7306         PetscCall(VecDestroy(&x));
7307         PetscCall(VecDestroy(&x_change));
7308       }
7309       /* adapt sub_schurs computed (if any) */
7310       if (pcbddc->use_deluxe_scaling) {
7311         PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
7312 
7313         PetscCheck(!pcbddc->use_change_of_basis || !pcbddc->adaptive_userdefined, PetscObjectComm((PetscObject)pc), PETSC_ERR_SUP, "Cannot mix automatic change of basis, adaptive selection and user-defined constraints");
7314         if (sub_schurs && sub_schurs->S_Ej_all) {
7315           Mat S_new, tmat;
7316           IS  is_all_N, is_V_Sall = NULL;
7317 
7318           PetscCall(ISLocalToGlobalMappingApplyIS(pcis->BtoNmap, sub_schurs->is_Ej_all, &is_all_N));
7319           PetscCall(MatCreateSubMatrix(localChangeOfBasisMatrix, is_all_N, is_all_N, MAT_INITIAL_MATRIX, &tmat));
7320           if (pcbddc->deluxe_zerorows) {
7321             ISLocalToGlobalMapping NtoSall;
7322             IS                     is_V;
7323             PetscCall(ISCreateGeneral(PETSC_COMM_SELF, pcbddc->n_vertices, pcbddc->local_primal_ref_node, PETSC_COPY_VALUES, &is_V));
7324             PetscCall(ISLocalToGlobalMappingCreateIS(is_all_N, &NtoSall));
7325             PetscCall(ISGlobalToLocalMappingApplyIS(NtoSall, IS_GTOLM_DROP, is_V, &is_V_Sall));
7326             PetscCall(ISLocalToGlobalMappingDestroy(&NtoSall));
7327             PetscCall(ISDestroy(&is_V));
7328           }
7329           PetscCall(ISDestroy(&is_all_N));
7330           PetscCall(MatPtAP(sub_schurs->S_Ej_all, tmat, MAT_INITIAL_MATRIX, 1.0, &S_new));
7331           PetscCall(MatDestroy(&sub_schurs->S_Ej_all));
7332           PetscCall(PetscObjectReference((PetscObject)S_new));
7333           if (pcbddc->deluxe_zerorows) {
7334             const PetscScalar *array;
7335             const PetscInt    *idxs_V, *idxs_all;
7336             PetscInt           i, n_V;
7337 
7338             PetscCall(MatZeroRowsColumnsIS(S_new, is_V_Sall, 1., NULL, NULL));
7339             PetscCall(ISGetLocalSize(is_V_Sall, &n_V));
7340             PetscCall(ISGetIndices(is_V_Sall, &idxs_V));
7341             PetscCall(ISGetIndices(sub_schurs->is_Ej_all, &idxs_all));
7342             PetscCall(VecGetArrayRead(pcis->D, &array));
7343             for (i = 0; i < n_V; i++) {
7344               PetscScalar val;
7345               PetscInt    idx;
7346 
7347               idx = idxs_V[i];
7348               val = array[idxs_all[idxs_V[i]]];
7349               PetscCall(MatSetValue(S_new, idx, idx, val, INSERT_VALUES));
7350             }
7351             PetscCall(MatAssemblyBegin(S_new, MAT_FINAL_ASSEMBLY));
7352             PetscCall(MatAssemblyEnd(S_new, MAT_FINAL_ASSEMBLY));
7353             PetscCall(VecRestoreArrayRead(pcis->D, &array));
7354             PetscCall(ISRestoreIndices(sub_schurs->is_Ej_all, &idxs_all));
7355             PetscCall(ISRestoreIndices(is_V_Sall, &idxs_V));
7356           }
7357           sub_schurs->S_Ej_all = S_new;
7358           PetscCall(MatDestroy(&S_new));
7359           if (sub_schurs->sum_S_Ej_all) {
7360             PetscCall(MatPtAP(sub_schurs->sum_S_Ej_all, tmat, MAT_INITIAL_MATRIX, 1.0, &S_new));
7361             PetscCall(MatDestroy(&sub_schurs->sum_S_Ej_all));
7362             PetscCall(PetscObjectReference((PetscObject)S_new));
7363             if (pcbddc->deluxe_zerorows) PetscCall(MatZeroRowsColumnsIS(S_new, is_V_Sall, 1., NULL, NULL));
7364             sub_schurs->sum_S_Ej_all = S_new;
7365             PetscCall(MatDestroy(&S_new));
7366           }
7367           PetscCall(ISDestroy(&is_V_Sall));
7368           PetscCall(MatDestroy(&tmat));
7369         }
7370         /* destroy any change of basis context in sub_schurs */
7371         if (sub_schurs && sub_schurs->change) {
7372           PetscInt i;
7373 
7374           for (i = 0; i < sub_schurs->n_subs; i++) PetscCall(KSPDestroy(&sub_schurs->change[i]));
7375           PetscCall(PetscFree(sub_schurs->change));
7376         }
7377       }
7378       if (pcbddc->switch_static) { /* need to save the local change */
7379         pcbddc->switch_static_change = localChangeOfBasisMatrix;
7380       } else {
7381         PetscCall(MatDestroy(&localChangeOfBasisMatrix));
7382       }
7383       /* determine if any process has changed the pressures locally */
7384       pcbddc->change_interior = pcbddc->benign_have_null;
7385     } else { /* fake change (get back change of basis into ConstraintMatrix and info on qr) */
7386       PetscCall(MatDestroy(&pcbddc->ConstraintMatrix));
7387       pcbddc->ConstraintMatrix = localChangeOfBasisMatrix;
7388       pcbddc->use_qr_single    = qr_needed;
7389     }
7390   } else if (pcbddc->user_ChangeOfBasisMatrix || pcbddc->benign_saddle_point) {
7391     if (!pcbddc->benign_have_null && pcbddc->user_ChangeOfBasisMatrix) {
7392       PetscCall(PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix));
7393       pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix;
7394     } else {
7395       Mat benign_global = NULL;
7396       if (pcbddc->benign_have_null) {
7397         Mat M;
7398 
7399         pcbddc->change_interior = PETSC_TRUE;
7400         PetscCall(VecCopy(matis->counter, pcis->vec1_N));
7401         PetscCall(VecReciprocal(pcis->vec1_N));
7402         PetscCall(MatDuplicate(pc->pmat, MAT_DO_NOT_COPY_VALUES, &benign_global));
7403         if (pcbddc->benign_change) {
7404           PetscCall(MatDuplicate(pcbddc->benign_change, MAT_COPY_VALUES, &M));
7405           PetscCall(MatDiagonalScale(M, pcis->vec1_N, NULL));
7406         } else {
7407           PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, pcis->n, pcis->n, 1, NULL, &M));
7408           PetscCall(MatDiagonalSet(M, pcis->vec1_N, INSERT_VALUES));
7409         }
7410         PetscCall(MatISSetLocalMat(benign_global, M));
7411         PetscCall(MatDestroy(&M));
7412         PetscCall(MatAssemblyBegin(benign_global, MAT_FINAL_ASSEMBLY));
7413         PetscCall(MatAssemblyEnd(benign_global, MAT_FINAL_ASSEMBLY));
7414       }
7415       if (pcbddc->user_ChangeOfBasisMatrix) {
7416         PetscCall(MatMatMult(pcbddc->user_ChangeOfBasisMatrix, benign_global, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &pcbddc->ChangeOfBasisMatrix));
7417         PetscCall(MatDestroy(&benign_global));
7418       } else if (pcbddc->benign_have_null) {
7419         pcbddc->ChangeOfBasisMatrix = benign_global;
7420       }
7421     }
7422     if (pcbddc->switch_static && pcbddc->ChangeOfBasisMatrix) { /* need to save the local change */
7423       IS              is_global;
7424       const PetscInt *gidxs;
7425 
7426       PetscCall(ISLocalToGlobalMappingGetIndices(matis->rmapping, &gidxs));
7427       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), pcis->n, gidxs, PETSC_COPY_VALUES, &is_global));
7428       PetscCall(ISLocalToGlobalMappingRestoreIndices(matis->rmapping, &gidxs));
7429       PetscCall(MatCreateSubMatrixUnsorted(pcbddc->ChangeOfBasisMatrix, is_global, is_global, &pcbddc->switch_static_change));
7430       PetscCall(ISDestroy(&is_global));
7431     }
7432   }
7433   if (!pcbddc->fake_change && pcbddc->ChangeOfBasisMatrix && !pcbddc->work_change) PetscCall(VecDuplicate(pcis->vec1_global, &pcbddc->work_change));
7434 
7435   if (!pcbddc->fake_change) {
7436     /* add pressure dofs to set of primal nodes for numbering purposes */
7437     for (i = 0; i < pcbddc->benign_n; i++) {
7438       pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc]  = pcbddc->benign_p0_lidx[i];
7439       pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i];
7440       pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc]  = 1;
7441       pcbddc->local_primal_size_cc++;
7442       pcbddc->local_primal_size++;
7443     }
7444 
7445     /* check if a new primal space has been introduced (also take into account benign trick) */
7446     pcbddc->new_primal_space_local = PETSC_TRUE;
7447     if (olocal_primal_size == pcbddc->local_primal_size) {
7448       PetscCall(PetscArraycmp(pcbddc->local_primal_ref_node, olocal_primal_ref_node, olocal_primal_size_cc, &pcbddc->new_primal_space_local));
7449       pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
7450       if (!pcbddc->new_primal_space_local) {
7451         PetscCall(PetscArraycmp(pcbddc->local_primal_ref_mult, olocal_primal_ref_mult, olocal_primal_size_cc, &pcbddc->new_primal_space_local));
7452         pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
7453       }
7454     }
7455     /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */
7456     PetscCallMPI(MPIU_Allreduce(&pcbddc->new_primal_space_local, &pcbddc->new_primal_space, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)pc)));
7457   }
7458   PetscCall(PetscFree2(olocal_primal_ref_node, olocal_primal_ref_mult));
7459 
7460   /* flush dbg viewer */
7461   if (pcbddc->dbg_flag) PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
7462 
7463   /* free workspace */
7464   PetscCall(PetscBTDestroy(&qr_needed_idx));
7465   PetscCall(PetscBTDestroy(&change_basis));
7466   if (!pcbddc->adaptive_selection) {
7467     PetscCall(PetscFree3(constraints_idxs_ptr, constraints_data_ptr, constraints_n));
7468     PetscCall(PetscFree3(constraints_data, constraints_idxs, constraints_idxs_B));
7469   } else {
7470     PetscCall(PetscFree5(pcbddc->adaptive_constraints_n, pcbddc->adaptive_constraints_idxs_ptr, pcbddc->adaptive_constraints_data_ptr, pcbddc->adaptive_constraints_idxs, pcbddc->adaptive_constraints_data));
7471     PetscCall(PetscFree(constraints_n));
7472     PetscCall(PetscFree(constraints_idxs_B));
7473   }
7474   PetscFunctionReturn(PETSC_SUCCESS);
7475 }
7476 
7477 PetscErrorCode PCBDDCAnalyzeInterface(PC pc)
7478 {
7479   ISLocalToGlobalMapping map;
7480   PC_BDDC               *pcbddc = (PC_BDDC *)pc->data;
7481   Mat_IS                *matis  = (Mat_IS *)pc->pmat->data;
7482   PetscInt               i, N;
7483   PetscBool              rcsr = PETSC_FALSE;
7484 
7485   PetscFunctionBegin;
7486   if (pcbddc->recompute_topography) {
7487     pcbddc->graphanalyzed = PETSC_FALSE;
7488     /* Reset previously computed graph */
7489     PetscCall(PCBDDCGraphReset(pcbddc->mat_graph));
7490     /* Init local Graph struct */
7491     PetscCall(MatGetSize(pc->pmat, &N, NULL));
7492     PetscCall(MatISGetLocalToGlobalMapping(pc->pmat, &map, NULL));
7493     PetscCall(PCBDDCGraphInit(pcbddc->mat_graph, map, N, pcbddc->graphmaxcount));
7494 
7495     if (pcbddc->user_primal_vertices_local && !pcbddc->user_primal_vertices) PetscCall(PCBDDCConsistencyCheckIS(pc, MPI_LOR, &pcbddc->user_primal_vertices_local));
7496     /* Check validity of the csr graph passed in by the user */
7497     PetscCheck(!pcbddc->mat_graph->nvtxs_csr || pcbddc->mat_graph->nvtxs_csr == pcbddc->mat_graph->nvtxs, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid size of local CSR graph! Found %" PetscInt_FMT ", expected %" PetscInt_FMT, pcbddc->mat_graph->nvtxs_csr,
7498                pcbddc->mat_graph->nvtxs);
7499 
7500     /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */
7501     if (!pcbddc->mat_graph->xadj && pcbddc->use_local_adj) {
7502       PetscInt *xadj, *adjncy;
7503       PetscInt  nvtxs;
7504       PetscBool flg_row;
7505       Mat       A;
7506 
7507       PetscCall(PetscObjectReference((PetscObject)matis->A));
7508       A = matis->A;
7509       for (PetscInt i = 0; i < pcbddc->local_adj_square; i++) {
7510         Mat AtA;
7511 
7512         PetscCall(MatProductCreate(A, A, NULL, &AtA));
7513         PetscCall(MatSetOptionsPrefix(AtA, "pc_bddc_graph_"));
7514         PetscCall(MatProductSetType(AtA, MATPRODUCT_AtB));
7515         PetscCall(MatProductSetFromOptions(AtA));
7516         PetscCall(MatProductSymbolic(AtA));
7517         PetscCall(MatProductClear(AtA));
7518         /* we only need the sparsity, cheat and tell PETSc the matrix has been assembled */
7519         AtA->assembled = PETSC_TRUE;
7520         PetscCall(MatDestroy(&A));
7521         A = AtA;
7522       }
7523       PetscCall(MatGetRowIJ(A, 0, PETSC_TRUE, PETSC_FALSE, &nvtxs, (const PetscInt **)&xadj, (const PetscInt **)&adjncy, &flg_row));
7524       if (flg_row) {
7525         PetscCall(PCBDDCSetLocalAdjacencyGraph(pc, nvtxs, xadj, adjncy, PETSC_COPY_VALUES));
7526         pcbddc->computed_rowadj = PETSC_TRUE;
7527         PetscCall(MatRestoreRowIJ(A, 0, PETSC_TRUE, PETSC_FALSE, &nvtxs, (const PetscInt **)&xadj, (const PetscInt **)&adjncy, &flg_row));
7528         rcsr = PETSC_TRUE;
7529       }
7530       PetscCall(MatDestroy(&A));
7531     }
7532     if (pcbddc->dbg_flag) PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
7533 
7534     if (pcbddc->mat_graph->cdim && !pcbddc->mat_graph->cloc) {
7535       PetscReal   *lcoords;
7536       PetscInt     n;
7537       MPI_Datatype dimrealtype;
7538       PetscMPIInt  cdimi;
7539 
7540       /* TODO: support for blocked */
7541       PetscCheck(pcbddc->mat_graph->cnloc == pc->pmat->rmap->n, PETSC_COMM_SELF, PETSC_ERR_USER, "Invalid number of local coordinates! Got %" PetscInt_FMT ", expected %" PetscInt_FMT, pcbddc->mat_graph->cnloc, pc->pmat->rmap->n);
7542       PetscCall(MatGetLocalSize(matis->A, &n, NULL));
7543       PetscCall(PetscMalloc1(pcbddc->mat_graph->cdim * n, &lcoords));
7544       PetscCall(PetscMPIIntCast(pcbddc->mat_graph->cdim, &cdimi));
7545       PetscCallMPI(MPI_Type_contiguous(cdimi, MPIU_REAL, &dimrealtype));
7546       PetscCallMPI(MPI_Type_commit(&dimrealtype));
7547       PetscCall(PetscSFBcastBegin(matis->sf, dimrealtype, pcbddc->mat_graph->coords, lcoords, MPI_REPLACE));
7548       PetscCall(PetscSFBcastEnd(matis->sf, dimrealtype, pcbddc->mat_graph->coords, lcoords, MPI_REPLACE));
7549       PetscCallMPI(MPI_Type_free(&dimrealtype));
7550       PetscCall(PetscFree(pcbddc->mat_graph->coords));
7551 
7552       pcbddc->mat_graph->coords = lcoords;
7553       pcbddc->mat_graph->cloc   = PETSC_TRUE;
7554       pcbddc->mat_graph->cnloc  = n;
7555     }
7556     PetscCheck(!pcbddc->mat_graph->cnloc || pcbddc->mat_graph->cnloc == pcbddc->mat_graph->nvtxs, PETSC_COMM_SELF, PETSC_ERR_USER, "Invalid number of local subdomain coordinates! Got %" PetscInt_FMT ", expected %" PetscInt_FMT, pcbddc->mat_graph->cnloc,
7557                pcbddc->mat_graph->nvtxs);
7558     pcbddc->mat_graph->active_coords = (PetscBool)(pcbddc->corner_selection && pcbddc->mat_graph->cdim && !pcbddc->corner_selected);
7559 
7560     /* attach info on disconnected subdomains if present */
7561     if (pcbddc->n_local_subs) {
7562       PetscInt *local_subs, n, totn;
7563 
7564       PetscCall(MatGetLocalSize(matis->A, &n, NULL));
7565       PetscCall(PetscMalloc1(n, &local_subs));
7566       for (i = 0; i < n; i++) local_subs[i] = pcbddc->n_local_subs;
7567       for (i = 0; i < pcbddc->n_local_subs; i++) {
7568         const PetscInt *idxs;
7569         PetscInt        nl, j;
7570 
7571         PetscCall(ISGetLocalSize(pcbddc->local_subs[i], &nl));
7572         PetscCall(ISGetIndices(pcbddc->local_subs[i], &idxs));
7573         for (j = 0; j < nl; j++) local_subs[idxs[j]] = i;
7574         PetscCall(ISRestoreIndices(pcbddc->local_subs[i], &idxs));
7575       }
7576       for (i = 0, totn = 0; i < n; i++) totn = PetscMax(totn, local_subs[i]);
7577       pcbddc->mat_graph->n_local_subs = totn + 1;
7578       pcbddc->mat_graph->local_subs   = local_subs;
7579     }
7580 
7581     /* Setup of Graph */
7582     PetscCall(PCBDDCGraphSetUp(pcbddc->mat_graph, pcbddc->vertex_size, pcbddc->NeumannBoundariesLocal, pcbddc->DirichletBoundariesLocal, pcbddc->n_ISForDofsLocal, pcbddc->ISForDofsLocal, pcbddc->user_primal_vertices_local));
7583   }
7584 
7585   if (!pcbddc->graphanalyzed) {
7586     /* Graph's connected components analysis */
7587     PetscCall(PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph));
7588     pcbddc->graphanalyzed   = PETSC_TRUE;
7589     pcbddc->corner_selected = pcbddc->corner_selection;
7590   }
7591   if (rcsr) pcbddc->mat_graph->nvtxs_csr = 0;
7592   PetscFunctionReturn(PETSC_SUCCESS);
7593 }
7594 
7595 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt *nio, Vec vecs[])
7596 {
7597   PetscInt     i, j, n;
7598   PetscScalar *alphas;
7599   PetscReal    norm, *onorms;
7600 
7601   PetscFunctionBegin;
7602   n = *nio;
7603   if (!n) PetscFunctionReturn(PETSC_SUCCESS);
7604   PetscCall(PetscMalloc2(n, &alphas, n, &onorms));
7605   PetscCall(VecNormalize(vecs[0], &norm));
7606   if (norm < PETSC_SMALL) {
7607     onorms[0] = 0.0;
7608     PetscCall(VecSet(vecs[0], 0.0));
7609   } else {
7610     onorms[0] = norm;
7611   }
7612 
7613   for (i = 1; i < n; i++) {
7614     PetscCall(VecMDot(vecs[i], i, vecs, alphas));
7615     for (j = 0; j < i; j++) alphas[j] = PetscConj(-alphas[j]);
7616     PetscCall(VecMAXPY(vecs[i], i, alphas, vecs));
7617     PetscCall(VecNormalize(vecs[i], &norm));
7618     if (norm < PETSC_SMALL) {
7619       onorms[i] = 0.0;
7620       PetscCall(VecSet(vecs[i], 0.0));
7621     } else {
7622       onorms[i] = norm;
7623     }
7624   }
7625   /* push nonzero vectors at the beginning */
7626   for (i = 0; i < n; i++) {
7627     if (onorms[i] == 0.0) {
7628       for (j = i + 1; j < n; j++) {
7629         if (onorms[j] != 0.0) {
7630           PetscCall(VecCopy(vecs[j], vecs[i]));
7631           onorms[i] = onorms[j];
7632           onorms[j] = 0.0;
7633           break;
7634         }
7635       }
7636     }
7637   }
7638   for (i = 0, *nio = 0; i < n; i++) *nio += onorms[i] != 0.0 ? 1 : 0;
7639   PetscCall(PetscFree2(alphas, onorms));
7640   PetscFunctionReturn(PETSC_SUCCESS);
7641 }
7642 
7643 static PetscErrorCode PCBDDCMatISGetSubassemblingPattern(Mat mat, PetscInt *n_subdomains, PetscInt redprocs, IS *is_sends, PetscBool *have_void)
7644 {
7645   ISLocalToGlobalMapping mapping;
7646   Mat                    A;
7647   PetscInt               n_neighs, *neighs, *n_shared, **shared;
7648   PetscMPIInt            size, rank, color;
7649   PetscInt              *xadj, *adjncy;
7650   PetscInt              *adjncy_wgt, *v_wgt, *ranks_send_to_idx;
7651   PetscInt               im_active, active_procs, N, n, i, j, threshold = 2;
7652   PetscInt               void_procs, *procs_candidates = NULL;
7653   PetscInt               xadj_count, *count;
7654   PetscBool              ismatis, use_vwgt = PETSC_FALSE;
7655   PetscSubcomm           psubcomm;
7656   MPI_Comm               subcomm;
7657 
7658   PetscFunctionBegin;
7659   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
7660   PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATIS, &ismatis));
7661   PetscCheck(ismatis, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Cannot use %s on a matrix object which is not of type MATIS", PETSC_FUNCTION_NAME);
7662   PetscValidLogicalCollectiveInt(mat, *n_subdomains, 2);
7663   PetscValidLogicalCollectiveInt(mat, redprocs, 3);
7664   PetscCheck(*n_subdomains > 0, PetscObjectComm((PetscObject)mat), PETSC_ERR_ARG_WRONG, "Invalid number of subdomains requested %" PetscInt_FMT, *n_subdomains);
7665 
7666   if (have_void) *have_void = PETSC_FALSE;
7667   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)mat), &size));
7668   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
7669   PetscCall(MatISGetLocalMat(mat, &A));
7670   PetscCall(MatGetLocalSize(A, &n, NULL));
7671   im_active = !!n;
7672   PetscCallMPI(MPIU_Allreduce(&im_active, &active_procs, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)mat)));
7673   void_procs = size - active_procs;
7674   /* get ranks of non-active processes in mat communicator */
7675   if (void_procs) {
7676     PetscInt ncand;
7677 
7678     if (have_void) *have_void = PETSC_TRUE;
7679     PetscCall(PetscMalloc1(size, &procs_candidates));
7680     PetscCallMPI(MPI_Allgather(&im_active, 1, MPIU_INT, procs_candidates, 1, MPIU_INT, PetscObjectComm((PetscObject)mat)));
7681     for (i = 0, ncand = 0; i < size; i++) {
7682       if (!procs_candidates[i]) procs_candidates[ncand++] = i;
7683     }
7684     /* force n_subdomains to be not greater that the number of non-active processes */
7685     *n_subdomains = PetscMin(void_procs, *n_subdomains);
7686   }
7687 
7688   /* number of subdomains requested greater than active processes or matrix size -> just shift the matrix
7689      number of subdomains requested 1 -> send to rank-0 or first candidate in voids  */
7690   PetscCall(MatGetSize(mat, &N, NULL));
7691   if (active_procs < *n_subdomains || *n_subdomains == 1 || N <= *n_subdomains) {
7692     PetscInt  issize, isidx, dest;
7693     PetscBool default_sub;
7694 
7695     if (*n_subdomains == 1) dest = 0;
7696     else dest = rank;
7697     if (im_active) {
7698       issize = 1;
7699       if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */
7700         isidx = procs_candidates[dest];
7701       } else {
7702         isidx = dest;
7703       }
7704     } else {
7705       issize = 0;
7706       isidx  = rank;
7707     }
7708     if (*n_subdomains != 1) *n_subdomains = active_procs;
7709     PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)mat), issize, &isidx, PETSC_COPY_VALUES, is_sends));
7710     default_sub = (PetscBool)(isidx == rank);
7711     PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &default_sub, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
7712     if (default_sub) PetscCall(PetscObjectSetName((PetscObject)*is_sends, "default subassembling"));
7713     PetscCall(PetscFree(procs_candidates));
7714     PetscFunctionReturn(PETSC_SUCCESS);
7715   }
7716   PetscCall(PetscOptionsGetBool(NULL, ((PetscObject)A)->prefix, "-mat_is_partitioning_use_vwgt", &use_vwgt, NULL));
7717   PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)A)->prefix, "-mat_is_partitioning_threshold", &threshold, NULL));
7718   threshold = PetscMax(threshold, 2);
7719 
7720   /* Get info on mapping */
7721   PetscCall(MatISGetLocalToGlobalMapping(mat, &mapping, NULL));
7722   PetscCall(ISLocalToGlobalMappingGetInfo(mapping, &n_neighs, &neighs, &n_shared, &shared));
7723 
7724   /* build local CSR graph of subdomains' connectivity */
7725   PetscCall(PetscMalloc1(2, &xadj));
7726   xadj[0] = 0;
7727   xadj[1] = PetscMax(n_neighs - 1, 0);
7728   PetscCall(PetscMalloc1(xadj[1], &adjncy));
7729   PetscCall(PetscMalloc1(xadj[1], &adjncy_wgt));
7730   PetscCall(PetscCalloc1(n, &count));
7731   for (i = 1; i < n_neighs; i++)
7732     for (j = 0; j < n_shared[i]; j++) count[shared[i][j]] += 1;
7733 
7734   xadj_count = 0;
7735   for (i = 1; i < n_neighs; i++) {
7736     for (j = 0; j < n_shared[i]; j++) {
7737       if (count[shared[i][j]] < threshold) {
7738         adjncy[xadj_count]     = neighs[i];
7739         adjncy_wgt[xadj_count] = n_shared[i];
7740         xadj_count++;
7741         break;
7742       }
7743     }
7744   }
7745   xadj[1] = xadj_count;
7746   PetscCall(PetscFree(count));
7747   PetscCall(ISLocalToGlobalMappingRestoreInfo(mapping, &n_neighs, &neighs, &n_shared, &shared));
7748   PetscCall(PetscSortIntWithArray(xadj[1], adjncy, adjncy_wgt));
7749 
7750   PetscCall(PetscMalloc1(1, &ranks_send_to_idx));
7751 
7752   /* Restrict work on active processes only */
7753   PetscCall(PetscMPIIntCast(im_active, &color));
7754   if (void_procs) {
7755     PetscCall(PetscSubcommCreate(PetscObjectComm((PetscObject)mat), &psubcomm));
7756     PetscCall(PetscSubcommSetNumber(psubcomm, 2)); /* 2 groups, active process and not active processes */
7757     PetscCall(PetscSubcommSetTypeGeneral(psubcomm, color, rank));
7758     subcomm = PetscSubcommChild(psubcomm);
7759   } else {
7760     psubcomm = NULL;
7761     subcomm  = PetscObjectComm((PetscObject)mat);
7762   }
7763 
7764   v_wgt = NULL;
7765   if (!color) {
7766     PetscCall(PetscFree(xadj));
7767     PetscCall(PetscFree(adjncy));
7768     PetscCall(PetscFree(adjncy_wgt));
7769   } else {
7770     Mat             subdomain_adj;
7771     IS              new_ranks, new_ranks_contig;
7772     MatPartitioning partitioner;
7773     PetscInt        rstart, rend;
7774     PetscMPIInt     irstart = 0, irend = 0;
7775     PetscInt       *is_indices, *oldranks;
7776     PetscMPIInt     size;
7777     PetscBool       aggregate;
7778 
7779     PetscCallMPI(MPI_Comm_size(subcomm, &size));
7780     if (void_procs) {
7781       PetscInt prank = rank;
7782       PetscCall(PetscMalloc1(size, &oldranks));
7783       PetscCallMPI(MPI_Allgather(&prank, 1, MPIU_INT, oldranks, 1, MPIU_INT, subcomm));
7784       for (i = 0; i < xadj[1]; i++) PetscCall(PetscFindInt(adjncy[i], size, oldranks, &adjncy[i]));
7785       PetscCall(PetscSortIntWithArray(xadj[1], adjncy, adjncy_wgt));
7786     } else {
7787       oldranks = NULL;
7788     }
7789     aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE);
7790     if (aggregate) { /* TODO: all this part could be made more efficient */
7791       PetscInt     lrows, row, ncols, *cols;
7792       PetscMPIInt  nrank;
7793       PetscScalar *vals;
7794 
7795       PetscCallMPI(MPI_Comm_rank(subcomm, &nrank));
7796       lrows = 0;
7797       if (nrank < redprocs) {
7798         lrows = size / redprocs;
7799         if (nrank < size % redprocs) lrows++;
7800       }
7801       PetscCall(MatCreateAIJ(subcomm, lrows, lrows, size, size, 50, NULL, 50, NULL, &subdomain_adj));
7802       PetscCall(MatGetOwnershipRange(subdomain_adj, &rstart, &rend));
7803       PetscCall(PetscMPIIntCast(rstart, &irstart));
7804       PetscCall(PetscMPIIntCast(rend, &irend));
7805       PetscCall(MatSetOption(subdomain_adj, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE));
7806       PetscCall(MatSetOption(subdomain_adj, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE));
7807       row   = nrank;
7808       ncols = xadj[1] - xadj[0];
7809       cols  = adjncy;
7810       PetscCall(PetscMalloc1(ncols, &vals));
7811       for (i = 0; i < ncols; i++) vals[i] = adjncy_wgt[i];
7812       PetscCall(MatSetValues(subdomain_adj, 1, &row, ncols, cols, vals, INSERT_VALUES));
7813       PetscCall(MatAssemblyBegin(subdomain_adj, MAT_FINAL_ASSEMBLY));
7814       PetscCall(MatAssemblyEnd(subdomain_adj, MAT_FINAL_ASSEMBLY));
7815       PetscCall(PetscFree(xadj));
7816       PetscCall(PetscFree(adjncy));
7817       PetscCall(PetscFree(adjncy_wgt));
7818       PetscCall(PetscFree(vals));
7819       if (use_vwgt) {
7820         Vec                v;
7821         const PetscScalar *array;
7822         PetscInt           nl;
7823 
7824         PetscCall(MatCreateVecs(subdomain_adj, &v, NULL));
7825         PetscCall(VecSetValue(v, row, (PetscScalar)n, INSERT_VALUES));
7826         PetscCall(VecAssemblyBegin(v));
7827         PetscCall(VecAssemblyEnd(v));
7828         PetscCall(VecGetLocalSize(v, &nl));
7829         PetscCall(VecGetArrayRead(v, &array));
7830         PetscCall(PetscMalloc1(nl, &v_wgt));
7831         for (i = 0; i < nl; i++) v_wgt[i] = (PetscInt)PetscRealPart(array[i]);
7832         PetscCall(VecRestoreArrayRead(v, &array));
7833         PetscCall(VecDestroy(&v));
7834       }
7835     } else {
7836       PetscCall(MatCreateMPIAdj(subcomm, 1, size, xadj, adjncy, adjncy_wgt, &subdomain_adj));
7837       if (use_vwgt) {
7838         PetscCall(PetscMalloc1(1, &v_wgt));
7839         v_wgt[0] = n;
7840       }
7841     }
7842     /* PetscCall(MatView(subdomain_adj,0)); */
7843 
7844     /* Partition */
7845     PetscCall(MatPartitioningCreate(subcomm, &partitioner));
7846 #if defined(PETSC_HAVE_PTSCOTCH)
7847     PetscCall(MatPartitioningSetType(partitioner, MATPARTITIONINGPTSCOTCH));
7848 #elif defined(PETSC_HAVE_PARMETIS)
7849     PetscCall(MatPartitioningSetType(partitioner, MATPARTITIONINGPARMETIS));
7850 #else
7851     PetscCall(MatPartitioningSetType(partitioner, MATPARTITIONINGAVERAGE));
7852 #endif
7853     PetscCall(MatPartitioningSetAdjacency(partitioner, subdomain_adj));
7854     if (v_wgt) PetscCall(MatPartitioningSetVertexWeights(partitioner, v_wgt));
7855     *n_subdomains = PetscMin(size, *n_subdomains);
7856     PetscCall(MatPartitioningSetNParts(partitioner, *n_subdomains));
7857     PetscCall(MatPartitioningSetFromOptions(partitioner));
7858     PetscCall(MatPartitioningApply(partitioner, &new_ranks));
7859     /* PetscCall(MatPartitioningView(partitioner,0)); */
7860 
7861     /* renumber new_ranks to avoid "holes" in new set of processors */
7862     PetscCall(ISRenumber(new_ranks, NULL, NULL, &new_ranks_contig));
7863     PetscCall(ISDestroy(&new_ranks));
7864     PetscCall(ISGetIndices(new_ranks_contig, (const PetscInt **)&is_indices));
7865     if (!aggregate) {
7866       if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */
7867         PetscAssert(oldranks, PETSC_COMM_SELF, PETSC_ERR_PLIB, "This should not happen");
7868         ranks_send_to_idx[0] = procs_candidates[oldranks[is_indices[0]]];
7869       } else if (oldranks) {
7870         ranks_send_to_idx[0] = oldranks[is_indices[0]];
7871       } else {
7872         ranks_send_to_idx[0] = is_indices[0];
7873       }
7874     } else {
7875       PetscInt     idx = 0;
7876       PetscMPIInt  tag;
7877       MPI_Request *reqs;
7878 
7879       PetscCall(PetscObjectGetNewTag((PetscObject)subdomain_adj, &tag));
7880       PetscCall(PetscMalloc1(rend - rstart, &reqs));
7881       for (PetscMPIInt i = irstart; i < irend; i++) PetscCallMPI(MPIU_Isend(is_indices + i - rstart, 1, MPIU_INT, i, tag, subcomm, &reqs[i - rstart]));
7882       PetscCallMPI(MPIU_Recv(&idx, 1, MPIU_INT, MPI_ANY_SOURCE, tag, subcomm, MPI_STATUS_IGNORE));
7883       PetscCallMPI(MPI_Waitall(irend - irstart, reqs, MPI_STATUSES_IGNORE));
7884       PetscCall(PetscFree(reqs));
7885       if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */
7886         PetscAssert(oldranks, PETSC_COMM_SELF, PETSC_ERR_PLIB, "This should not happen");
7887         ranks_send_to_idx[0] = procs_candidates[oldranks[idx]];
7888       } else if (oldranks) {
7889         ranks_send_to_idx[0] = oldranks[idx];
7890       } else {
7891         ranks_send_to_idx[0] = idx;
7892       }
7893     }
7894     PetscCall(ISRestoreIndices(new_ranks_contig, (const PetscInt **)&is_indices));
7895     /* clean up */
7896     PetscCall(PetscFree(oldranks));
7897     PetscCall(ISDestroy(&new_ranks_contig));
7898     PetscCall(MatDestroy(&subdomain_adj));
7899     PetscCall(MatPartitioningDestroy(&partitioner));
7900   }
7901   PetscCall(PetscSubcommDestroy(&psubcomm));
7902   PetscCall(PetscFree(procs_candidates));
7903 
7904   /* assemble parallel IS for sends */
7905   i = 1;
7906   if (!color) i = 0;
7907   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)mat), i, ranks_send_to_idx, PETSC_OWN_POINTER, is_sends));
7908   PetscFunctionReturn(PETSC_SUCCESS);
7909 }
7910 
7911 typedef enum {
7912   MATDENSE_PRIVATE = 0,
7913   MATAIJ_PRIVATE,
7914   MATBAIJ_PRIVATE,
7915   MATSBAIJ_PRIVATE
7916 } MatTypePrivate;
7917 
7918 static PetscErrorCode PCBDDCMatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, PetscBool reuse, Mat *mat_n, PetscInt nis, IS isarray[], PetscInt nvecs, Vec nnsp_vec[])
7919 {
7920   Mat                    local_mat;
7921   IS                     is_sends_internal;
7922   PetscInt               rows, cols, new_local_rows;
7923   PetscInt               i, bs, buf_size_idxs, buf_size_idxs_is, buf_size_vals, buf_size_vecs;
7924   PetscBool              ismatis, isdense, newisdense, destroy_mat;
7925   ISLocalToGlobalMapping l2gmap;
7926   PetscInt              *l2gmap_indices;
7927   const PetscInt        *is_indices;
7928   MatType                new_local_type;
7929   /* buffers */
7930   PetscInt          *ptr_idxs, *send_buffer_idxs, *recv_buffer_idxs;
7931   PetscInt          *ptr_idxs_is, *send_buffer_idxs_is, *recv_buffer_idxs_is;
7932   PetscInt          *recv_buffer_idxs_local;
7933   PetscScalar       *ptr_vals, *recv_buffer_vals;
7934   const PetscScalar *send_buffer_vals;
7935   PetscScalar       *ptr_vecs, *send_buffer_vecs, *recv_buffer_vecs;
7936   /* MPI */
7937   MPI_Comm     comm, comm_n;
7938   PetscSubcomm subcomm;
7939   PetscMPIInt  n_sends, n_recvs, size;
7940   PetscMPIInt *iflags, *ilengths_idxs, *ilengths_vals, *ilengths_idxs_is;
7941   PetscMPIInt *onodes, *onodes_is, *olengths_idxs, *olengths_idxs_is, *olengths_vals;
7942   PetscMPIInt  len, tag_idxs, tag_idxs_is, tag_vals, tag_vecs, source_dest;
7943   MPI_Request *send_req_idxs, *send_req_idxs_is, *send_req_vals, *send_req_vecs;
7944   MPI_Request *recv_req_idxs, *recv_req_idxs_is, *recv_req_vals, *recv_req_vecs;
7945 
7946   PetscFunctionBegin;
7947   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
7948   PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATIS, &ismatis));
7949   PetscCheck(ismatis, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Cannot use %s on a matrix object which is not of type MATIS", PETSC_FUNCTION_NAME);
7950   PetscValidLogicalCollectiveInt(mat, n_subdomains, 3);
7951   PetscValidLogicalCollectiveBool(mat, restrict_comm, 4);
7952   PetscValidLogicalCollectiveBool(mat, restrict_full, 5);
7953   PetscValidLogicalCollectiveBool(mat, reuse, 6);
7954   PetscValidLogicalCollectiveInt(mat, nis, 8);
7955   PetscValidLogicalCollectiveInt(mat, nvecs, 10);
7956   if (nvecs) {
7957     PetscCheck(nvecs <= 1, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Just 1 vector supported");
7958     PetscValidHeaderSpecific(nnsp_vec[0], VEC_CLASSID, 11);
7959   }
7960   /* further checks */
7961   PetscCall(MatISGetLocalMat(mat, &local_mat));
7962   PetscCall(PetscObjectTypeCompare((PetscObject)local_mat, MATSEQDENSE, &isdense));
7963   PetscCheck(isdense, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE");
7964 
7965   PetscCall(MatGetSize(local_mat, &rows, &cols));
7966   PetscCheck(rows == cols, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Local MATIS matrices should be square");
7967   if (reuse && *mat_n) {
7968     PetscInt mrows, mcols, mnrows, mncols;
7969     PetscValidHeaderSpecific(*mat_n, MAT_CLASSID, 7);
7970     PetscCall(PetscObjectTypeCompare((PetscObject)*mat_n, MATIS, &ismatis));
7971     PetscCheck(ismatis, PetscObjectComm((PetscObject)*mat_n), PETSC_ERR_SUP, "Cannot reuse a matrix which is not of type MATIS");
7972     PetscCall(MatGetSize(mat, &mrows, &mcols));
7973     PetscCall(MatGetSize(*mat_n, &mnrows, &mncols));
7974     PetscCheck(mrows == mnrows, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Cannot reuse matrix! Wrong number of rows %" PetscInt_FMT " != %" PetscInt_FMT, mrows, mnrows);
7975     PetscCheck(mcols == mncols, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Cannot reuse matrix! Wrong number of cols %" PetscInt_FMT " != %" PetscInt_FMT, mcols, mncols);
7976   }
7977   PetscCall(MatGetBlockSize(local_mat, &bs));
7978   PetscValidLogicalCollectiveInt(mat, bs, 1);
7979 
7980   /* prepare IS for sending if not provided */
7981   if (!is_sends) {
7982     PetscCheck(n_subdomains, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "You should specify either an IS or a target number of subdomains");
7983     PetscCall(PCBDDCMatISGetSubassemblingPattern(mat, &n_subdomains, 0, &is_sends_internal, NULL));
7984   } else {
7985     PetscCall(PetscObjectReference((PetscObject)is_sends));
7986     is_sends_internal = is_sends;
7987   }
7988 
7989   /* get comm */
7990   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
7991 
7992   /* compute number of sends */
7993   PetscCall(ISGetLocalSize(is_sends_internal, &i));
7994   PetscCall(PetscMPIIntCast(i, &n_sends));
7995 
7996   /* compute number of receives */
7997   PetscCallMPI(MPI_Comm_size(comm, &size));
7998   PetscCall(PetscMalloc1(size, &iflags));
7999   PetscCall(PetscArrayzero(iflags, size));
8000   PetscCall(ISGetIndices(is_sends_internal, &is_indices));
8001   for (i = 0; i < n_sends; i++) iflags[is_indices[i]] = 1;
8002   PetscCall(PetscGatherNumberOfMessages(comm, iflags, NULL, &n_recvs));
8003   PetscCall(PetscFree(iflags));
8004 
8005   /* restrict comm if requested */
8006   subcomm     = NULL;
8007   destroy_mat = PETSC_FALSE;
8008   if (restrict_comm) {
8009     PetscMPIInt color, subcommsize;
8010 
8011     color = 0;
8012     if (restrict_full) {
8013       if (!n_recvs) color = 1; /* processes not receiving anything will not participate in new comm (full restriction) */
8014     } else {
8015       if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not participate in new comm */
8016     }
8017     PetscCallMPI(MPIU_Allreduce(&color, &subcommsize, 1, MPI_INT, MPI_SUM, comm));
8018     subcommsize = size - subcommsize;
8019     /* check if reuse has been requested */
8020     if (reuse) {
8021       if (*mat_n) {
8022         PetscMPIInt subcommsize2;
8023         PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n), &subcommsize2));
8024         PetscCheck(subcommsize == subcommsize2, PetscObjectComm((PetscObject)*mat_n), PETSC_ERR_PLIB, "Cannot reuse matrix! wrong subcomm size %d != %d", subcommsize, subcommsize2);
8025         comm_n = PetscObjectComm((PetscObject)*mat_n);
8026       } else {
8027         comm_n = PETSC_COMM_SELF;
8028       }
8029     } else { /* MAT_INITIAL_MATRIX */
8030       PetscMPIInt rank;
8031 
8032       PetscCallMPI(MPI_Comm_rank(comm, &rank));
8033       PetscCall(PetscSubcommCreate(comm, &subcomm));
8034       PetscCall(PetscSubcommSetNumber(subcomm, 2));
8035       PetscCall(PetscSubcommSetTypeGeneral(subcomm, color, rank));
8036       comm_n = PetscSubcommChild(subcomm);
8037     }
8038     /* flag to destroy *mat_n if not significative */
8039     if (color) destroy_mat = PETSC_TRUE;
8040   } else {
8041     comm_n = comm;
8042   }
8043 
8044   /* prepare send/receive buffers */
8045   PetscCall(PetscMalloc1(size, &ilengths_idxs));
8046   PetscCall(PetscArrayzero(ilengths_idxs, size));
8047   PetscCall(PetscMalloc1(size, &ilengths_vals));
8048   PetscCall(PetscArrayzero(ilengths_vals, size));
8049   if (nis) PetscCall(PetscCalloc1(size, &ilengths_idxs_is));
8050 
8051   /* Get data from local matrices */
8052   PetscCheck(isdense, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Subassembling of AIJ local matrices not yet implemented");
8053   /* TODO: See below some guidelines on how to prepare the local buffers */
8054   /*
8055        send_buffer_vals should contain the raw values of the local matrix
8056        send_buffer_idxs should contain:
8057        - MatType_PRIVATE type
8058        - PetscInt        size_of_l2gmap
8059        - PetscInt        global_row_indices[size_of_l2gmap]
8060        - PetscInt        all_other_info_which_is_needed_to_compute_preallocation_and_set_values
8061     */
8062   {
8063     ISLocalToGlobalMapping mapping;
8064 
8065     PetscCall(MatISGetLocalToGlobalMapping(mat, &mapping, NULL));
8066     PetscCall(MatDenseGetArrayRead(local_mat, &send_buffer_vals));
8067     PetscCall(ISLocalToGlobalMappingGetSize(mapping, &i));
8068     PetscCall(PetscMalloc1(i + 2, &send_buffer_idxs));
8069     send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE;
8070     send_buffer_idxs[1] = i;
8071     PetscCall(ISLocalToGlobalMappingGetIndices(mapping, (const PetscInt **)&ptr_idxs));
8072     PetscCall(PetscArraycpy(&send_buffer_idxs[2], ptr_idxs, i));
8073     PetscCall(ISLocalToGlobalMappingRestoreIndices(mapping, (const PetscInt **)&ptr_idxs));
8074     PetscCall(PetscMPIIntCast(i, &len));
8075     for (i = 0; i < n_sends; i++) {
8076       ilengths_vals[is_indices[i]] = len * len;
8077       ilengths_idxs[is_indices[i]] = len + 2;
8078     }
8079   }
8080   PetscCall(PetscGatherMessageLengths2(comm, n_sends, n_recvs, ilengths_idxs, ilengths_vals, &onodes, &olengths_idxs, &olengths_vals));
8081   /* additional is (if any) */
8082   if (nis) {
8083     PetscMPIInt psum;
8084     PetscInt    j;
8085     for (j = 0, psum = 0; j < nis; j++) {
8086       PetscInt plen;
8087       PetscCall(ISGetLocalSize(isarray[j], &plen));
8088       PetscCall(PetscMPIIntCast(plen, &len));
8089       psum += len + 1; /* indices + length */
8090     }
8091     PetscCall(PetscMalloc1(psum, &send_buffer_idxs_is));
8092     for (j = 0, psum = 0; j < nis; j++) {
8093       PetscInt        plen;
8094       const PetscInt *is_array_idxs;
8095       PetscCall(ISGetLocalSize(isarray[j], &plen));
8096       send_buffer_idxs_is[psum] = plen;
8097       PetscCall(ISGetIndices(isarray[j], &is_array_idxs));
8098       PetscCall(PetscArraycpy(&send_buffer_idxs_is[psum + 1], is_array_idxs, plen));
8099       PetscCall(ISRestoreIndices(isarray[j], &is_array_idxs));
8100       psum += plen + 1; /* indices + length */
8101     }
8102     for (i = 0; i < n_sends; i++) ilengths_idxs_is[is_indices[i]] = psum;
8103     PetscCall(PetscGatherMessageLengths(comm, n_sends, n_recvs, ilengths_idxs_is, &onodes_is, &olengths_idxs_is));
8104   }
8105   PetscCall(MatISRestoreLocalMat(mat, &local_mat));
8106 
8107   buf_size_idxs    = 0;
8108   buf_size_vals    = 0;
8109   buf_size_idxs_is = 0;
8110   buf_size_vecs    = 0;
8111   for (i = 0; i < n_recvs; i++) {
8112     buf_size_idxs += olengths_idxs[i];
8113     buf_size_vals += olengths_vals[i];
8114     if (nis) buf_size_idxs_is += olengths_idxs_is[i];
8115     if (nvecs) buf_size_vecs += olengths_idxs[i];
8116   }
8117   PetscCall(PetscMalloc1(buf_size_idxs, &recv_buffer_idxs));
8118   PetscCall(PetscMalloc1(buf_size_vals, &recv_buffer_vals));
8119   PetscCall(PetscMalloc1(buf_size_idxs_is, &recv_buffer_idxs_is));
8120   PetscCall(PetscMalloc1(buf_size_vecs, &recv_buffer_vecs));
8121 
8122   /* get new tags for clean communications */
8123   PetscCall(PetscObjectGetNewTag((PetscObject)mat, &tag_idxs));
8124   PetscCall(PetscObjectGetNewTag((PetscObject)mat, &tag_vals));
8125   PetscCall(PetscObjectGetNewTag((PetscObject)mat, &tag_idxs_is));
8126   PetscCall(PetscObjectGetNewTag((PetscObject)mat, &tag_vecs));
8127 
8128   /* allocate for requests */
8129   PetscCall(PetscMalloc1(n_sends, &send_req_idxs));
8130   PetscCall(PetscMalloc1(n_sends, &send_req_vals));
8131   PetscCall(PetscMalloc1(n_sends, &send_req_idxs_is));
8132   PetscCall(PetscMalloc1(n_sends, &send_req_vecs));
8133   PetscCall(PetscMalloc1(n_recvs, &recv_req_idxs));
8134   PetscCall(PetscMalloc1(n_recvs, &recv_req_vals));
8135   PetscCall(PetscMalloc1(n_recvs, &recv_req_idxs_is));
8136   PetscCall(PetscMalloc1(n_recvs, &recv_req_vecs));
8137 
8138   /* communications */
8139   ptr_idxs    = recv_buffer_idxs;
8140   ptr_vals    = recv_buffer_vals;
8141   ptr_idxs_is = recv_buffer_idxs_is;
8142   ptr_vecs    = recv_buffer_vecs;
8143   for (i = 0; i < n_recvs; i++) {
8144     PetscCallMPI(MPIU_Irecv(ptr_idxs, olengths_idxs[i], MPIU_INT, onodes[i], tag_idxs, comm, &recv_req_idxs[i]));
8145     PetscCallMPI(MPIU_Irecv(ptr_vals, olengths_vals[i], MPIU_SCALAR, onodes[i], tag_vals, comm, &recv_req_vals[i]));
8146     ptr_idxs += olengths_idxs[i];
8147     ptr_vals += olengths_vals[i];
8148     if (nis) {
8149       PetscCallMPI(MPIU_Irecv(ptr_idxs_is, olengths_idxs_is[i], MPIU_INT, onodes_is[i], tag_idxs_is, comm, &recv_req_idxs_is[i]));
8150       ptr_idxs_is += olengths_idxs_is[i];
8151     }
8152     if (nvecs) {
8153       PetscCallMPI(MPIU_Irecv(ptr_vecs, olengths_idxs[i] - 2, MPIU_SCALAR, onodes[i], tag_vecs, comm, &recv_req_vecs[i]));
8154       ptr_vecs += olengths_idxs[i] - 2;
8155     }
8156   }
8157   for (i = 0; i < n_sends; i++) {
8158     PetscCall(PetscMPIIntCast(is_indices[i], &source_dest));
8159     PetscCallMPI(MPIU_Isend(send_buffer_idxs, ilengths_idxs[source_dest], MPIU_INT, source_dest, tag_idxs, comm, &send_req_idxs[i]));
8160     PetscCallMPI(MPIU_Isend(send_buffer_vals, ilengths_vals[source_dest], MPIU_SCALAR, source_dest, tag_vals, comm, &send_req_vals[i]));
8161     if (nis) PetscCallMPI(MPIU_Isend(send_buffer_idxs_is, ilengths_idxs_is[source_dest], MPIU_INT, source_dest, tag_idxs_is, comm, &send_req_idxs_is[i]));
8162     if (nvecs) {
8163       PetscCall(VecGetArray(nnsp_vec[0], &send_buffer_vecs));
8164       PetscCallMPI(MPIU_Isend(send_buffer_vecs, ilengths_idxs[source_dest] - 2, MPIU_SCALAR, source_dest, tag_vecs, comm, &send_req_vecs[i]));
8165     }
8166   }
8167   PetscCall(ISRestoreIndices(is_sends_internal, &is_indices));
8168   PetscCall(ISDestroy(&is_sends_internal));
8169 
8170   /* assemble new l2g map */
8171   PetscCallMPI(MPI_Waitall(n_recvs, recv_req_idxs, MPI_STATUSES_IGNORE));
8172   ptr_idxs       = recv_buffer_idxs;
8173   new_local_rows = 0;
8174   for (i = 0; i < n_recvs; i++) {
8175     new_local_rows += *(ptr_idxs + 1); /* second element is the local size of the l2gmap */
8176     ptr_idxs += olengths_idxs[i];
8177   }
8178   PetscCall(PetscMalloc1(new_local_rows, &l2gmap_indices));
8179   ptr_idxs       = recv_buffer_idxs;
8180   new_local_rows = 0;
8181   for (i = 0; i < n_recvs; i++) {
8182     PetscCall(PetscArraycpy(&l2gmap_indices[new_local_rows], ptr_idxs + 2, *(ptr_idxs + 1)));
8183     new_local_rows += *(ptr_idxs + 1); /* second element is the local size of the l2gmap */
8184     ptr_idxs += olengths_idxs[i];
8185   }
8186   PetscCall(PetscSortRemoveDupsInt(&new_local_rows, l2gmap_indices));
8187   PetscCall(ISLocalToGlobalMappingCreate(comm_n, 1, new_local_rows, l2gmap_indices, PETSC_COPY_VALUES, &l2gmap));
8188   PetscCall(PetscFree(l2gmap_indices));
8189 
8190   /* infer new local matrix type from received local matrices type */
8191   /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */
8192   /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */
8193   if (n_recvs) {
8194     MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0];
8195     ptr_idxs                              = recv_buffer_idxs;
8196     for (i = 0; i < n_recvs; i++) {
8197       if ((PetscInt)new_local_type_private != *ptr_idxs) {
8198         new_local_type_private = MATAIJ_PRIVATE;
8199         break;
8200       }
8201       ptr_idxs += olengths_idxs[i];
8202     }
8203     switch (new_local_type_private) {
8204     case MATDENSE_PRIVATE:
8205       new_local_type = MATSEQAIJ;
8206       bs             = 1;
8207       break;
8208     case MATAIJ_PRIVATE:
8209       new_local_type = MATSEQAIJ;
8210       bs             = 1;
8211       break;
8212     case MATBAIJ_PRIVATE:
8213       new_local_type = MATSEQBAIJ;
8214       break;
8215     case MATSBAIJ_PRIVATE:
8216       new_local_type = MATSEQSBAIJ;
8217       break;
8218     default:
8219       SETERRQ(comm, PETSC_ERR_SUP, "Unsupported private type %d in %s", new_local_type_private, PETSC_FUNCTION_NAME);
8220     }
8221   } else { /* by default, new_local_type is seqaij */
8222     new_local_type = MATSEQAIJ;
8223     bs             = 1;
8224   }
8225 
8226   /* create MATIS object if needed */
8227   if (!reuse) {
8228     PetscCall(MatGetSize(mat, &rows, &cols));
8229     PetscCall(MatCreateIS(comm_n, bs, PETSC_DECIDE, PETSC_DECIDE, rows, cols, l2gmap, l2gmap, mat_n));
8230   } else {
8231     /* it also destroys the local matrices */
8232     if (*mat_n) {
8233       PetscCall(MatSetLocalToGlobalMapping(*mat_n, l2gmap, l2gmap));
8234     } else { /* this is a fake object */
8235       PetscCall(MatCreateIS(comm_n, bs, PETSC_DECIDE, PETSC_DECIDE, rows, cols, l2gmap, l2gmap, mat_n));
8236     }
8237   }
8238   PetscCall(MatISGetLocalMat(*mat_n, &local_mat));
8239   PetscCall(MatSetType(local_mat, new_local_type));
8240 
8241   PetscCallMPI(MPI_Waitall(n_recvs, recv_req_vals, MPI_STATUSES_IGNORE));
8242 
8243   /* Global to local map of received indices */
8244   PetscCall(PetscMalloc1(buf_size_idxs, &recv_buffer_idxs_local)); /* needed for values insertion */
8245   PetscCall(ISGlobalToLocalMappingApply(l2gmap, IS_GTOLM_MASK, buf_size_idxs, recv_buffer_idxs, &i, recv_buffer_idxs_local));
8246   PetscCall(ISLocalToGlobalMappingDestroy(&l2gmap));
8247 
8248   /* restore attributes -> type of incoming data and its size */
8249   buf_size_idxs = 0;
8250   for (i = 0; i < n_recvs; i++) {
8251     recv_buffer_idxs_local[buf_size_idxs]     = recv_buffer_idxs[buf_size_idxs];
8252     recv_buffer_idxs_local[buf_size_idxs + 1] = recv_buffer_idxs[buf_size_idxs + 1];
8253     buf_size_idxs += olengths_idxs[i];
8254   }
8255   PetscCall(PetscFree(recv_buffer_idxs));
8256 
8257   /* set preallocation */
8258   PetscCall(PetscObjectTypeCompare((PetscObject)local_mat, MATSEQDENSE, &newisdense));
8259   if (!newisdense) {
8260     PetscInt *new_local_nnz = NULL;
8261 
8262     ptr_idxs = recv_buffer_idxs_local;
8263     if (n_recvs) PetscCall(PetscCalloc1(new_local_rows, &new_local_nnz));
8264     for (i = 0; i < n_recvs; i++) {
8265       PetscInt j;
8266       if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */
8267         for (j = 0; j < *(ptr_idxs + 1); j++) new_local_nnz[*(ptr_idxs + 2 + j)] += *(ptr_idxs + 1);
8268       } else {
8269         /* TODO */
8270       }
8271       ptr_idxs += olengths_idxs[i];
8272     }
8273     if (new_local_nnz) {
8274       for (i = 0; i < new_local_rows; i++) new_local_nnz[i] = PetscMin(new_local_nnz[i], new_local_rows);
8275       PetscCall(MatSeqAIJSetPreallocation(local_mat, 0, new_local_nnz));
8276       for (i = 0; i < new_local_rows; i++) new_local_nnz[i] /= bs;
8277       PetscCall(MatSeqBAIJSetPreallocation(local_mat, bs, 0, new_local_nnz));
8278       for (i = 0; i < new_local_rows; i++) new_local_nnz[i] = PetscMax(new_local_nnz[i] - i, 0);
8279       PetscCall(MatSeqSBAIJSetPreallocation(local_mat, bs, 0, new_local_nnz));
8280     } else {
8281       PetscCall(MatSetUp(local_mat));
8282     }
8283     PetscCall(PetscFree(new_local_nnz));
8284   } else {
8285     PetscCall(MatSetUp(local_mat));
8286   }
8287 
8288   /* set values */
8289   ptr_vals = recv_buffer_vals;
8290   ptr_idxs = recv_buffer_idxs_local;
8291   for (i = 0; i < n_recvs; i++) {
8292     if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */
8293       PetscCall(MatSetOption(local_mat, MAT_ROW_ORIENTED, PETSC_FALSE));
8294       PetscCall(MatSetValues(local_mat, *(ptr_idxs + 1), ptr_idxs + 2, *(ptr_idxs + 1), ptr_idxs + 2, ptr_vals, ADD_VALUES));
8295       PetscCall(MatAssemblyBegin(local_mat, MAT_FLUSH_ASSEMBLY));
8296       PetscCall(MatAssemblyEnd(local_mat, MAT_FLUSH_ASSEMBLY));
8297       PetscCall(MatSetOption(local_mat, MAT_ROW_ORIENTED, PETSC_TRUE));
8298     } else {
8299       /* TODO */
8300     }
8301     ptr_idxs += olengths_idxs[i];
8302     ptr_vals += olengths_vals[i];
8303   }
8304   PetscCall(MatAssemblyBegin(local_mat, MAT_FINAL_ASSEMBLY));
8305   PetscCall(MatAssemblyEnd(local_mat, MAT_FINAL_ASSEMBLY));
8306   PetscCall(MatISRestoreLocalMat(*mat_n, &local_mat));
8307   PetscCall(MatAssemblyBegin(*mat_n, MAT_FINAL_ASSEMBLY));
8308   PetscCall(MatAssemblyEnd(*mat_n, MAT_FINAL_ASSEMBLY));
8309   PetscCall(PetscFree(recv_buffer_vals));
8310 
8311 #if 0
8312   if (!restrict_comm) { /* check */
8313     Vec       lvec,rvec;
8314     PetscReal infty_error;
8315 
8316     PetscCall(MatCreateVecs(mat,&rvec,&lvec));
8317     PetscCall(VecSetRandom(rvec,NULL));
8318     PetscCall(MatMult(mat,rvec,lvec));
8319     PetscCall(VecScale(lvec,-1.0));
8320     PetscCall(MatMultAdd(*mat_n,rvec,lvec,lvec));
8321     PetscCall(VecNorm(lvec,NORM_INFINITY,&infty_error));
8322     PetscCall(PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error));
8323     PetscCall(VecDestroy(&rvec));
8324     PetscCall(VecDestroy(&lvec));
8325   }
8326 #endif
8327 
8328   /* assemble new additional is (if any) */
8329   if (nis) {
8330     PetscInt **temp_idxs, *count_is, j, psum;
8331 
8332     PetscCallMPI(MPI_Waitall(n_recvs, recv_req_idxs_is, MPI_STATUSES_IGNORE));
8333     PetscCall(PetscCalloc1(nis, &count_is));
8334     ptr_idxs = recv_buffer_idxs_is;
8335     psum     = 0;
8336     for (i = 0; i < n_recvs; i++) {
8337       for (j = 0; j < nis; j++) {
8338         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
8339         count_is[j] += plen;         /* increment counting of buffer for j-th IS */
8340         psum += plen;
8341         ptr_idxs += plen + 1; /* shift pointer to received data */
8342       }
8343     }
8344     PetscCall(PetscMalloc1(nis, &temp_idxs));
8345     PetscCall(PetscMalloc1(psum, &temp_idxs[0]));
8346     for (i = 1; i < nis; i++) temp_idxs[i] = PetscSafePointerPlusOffset(temp_idxs[i - 1], count_is[i - 1]);
8347     PetscCall(PetscArrayzero(count_is, nis));
8348     ptr_idxs = recv_buffer_idxs_is;
8349     for (i = 0; i < n_recvs; i++) {
8350       for (j = 0; j < nis; j++) {
8351         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
8352         PetscCall(PetscArraycpy(&temp_idxs[j][count_is[j]], ptr_idxs + 1, plen));
8353         count_is[j] += plen;  /* increment starting point of buffer for j-th IS */
8354         ptr_idxs += plen + 1; /* shift pointer to received data */
8355       }
8356     }
8357     for (i = 0; i < nis; i++) {
8358       PetscCall(ISDestroy(&isarray[i]));
8359       PetscCall(PetscSortRemoveDupsInt(&count_is[i], temp_idxs[i]));
8360       PetscCall(ISCreateGeneral(comm_n, count_is[i], temp_idxs[i], PETSC_COPY_VALUES, &isarray[i]));
8361     }
8362     PetscCall(PetscFree(count_is));
8363     PetscCall(PetscFree(temp_idxs[0]));
8364     PetscCall(PetscFree(temp_idxs));
8365   }
8366   /* free workspace */
8367   PetscCall(PetscFree(recv_buffer_idxs_is));
8368   PetscCallMPI(MPI_Waitall(n_sends, send_req_idxs, MPI_STATUSES_IGNORE));
8369   PetscCall(PetscFree(send_buffer_idxs));
8370   PetscCallMPI(MPI_Waitall(n_sends, send_req_vals, MPI_STATUSES_IGNORE));
8371   if (isdense) {
8372     PetscCall(MatISGetLocalMat(mat, &local_mat));
8373     PetscCall(MatDenseRestoreArrayRead(local_mat, &send_buffer_vals));
8374     PetscCall(MatISRestoreLocalMat(mat, &local_mat));
8375   } else {
8376     /* PetscCall(PetscFree(send_buffer_vals)); */
8377   }
8378   if (nis) {
8379     PetscCallMPI(MPI_Waitall(n_sends, send_req_idxs_is, MPI_STATUSES_IGNORE));
8380     PetscCall(PetscFree(send_buffer_idxs_is));
8381   }
8382 
8383   if (nvecs) {
8384     PetscCallMPI(MPI_Waitall(n_recvs, recv_req_vecs, MPI_STATUSES_IGNORE));
8385     PetscCallMPI(MPI_Waitall(n_sends, send_req_vecs, MPI_STATUSES_IGNORE));
8386     PetscCall(VecRestoreArray(nnsp_vec[0], &send_buffer_vecs));
8387     PetscCall(VecDestroy(&nnsp_vec[0]));
8388     PetscCall(VecCreate(comm_n, &nnsp_vec[0]));
8389     PetscCall(VecSetSizes(nnsp_vec[0], new_local_rows, PETSC_DECIDE));
8390     PetscCall(VecSetType(nnsp_vec[0], VECSTANDARD));
8391     /* set values */
8392     ptr_vals = recv_buffer_vecs;
8393     ptr_idxs = recv_buffer_idxs_local;
8394     PetscCall(VecGetArray(nnsp_vec[0], &send_buffer_vecs));
8395     for (i = 0; i < n_recvs; i++) {
8396       PetscInt j;
8397       for (j = 0; j < *(ptr_idxs + 1); j++) send_buffer_vecs[*(ptr_idxs + 2 + j)] += *(ptr_vals + j);
8398       ptr_idxs += olengths_idxs[i];
8399       ptr_vals += olengths_idxs[i] - 2;
8400     }
8401     PetscCall(VecRestoreArray(nnsp_vec[0], &send_buffer_vecs));
8402     PetscCall(VecAssemblyBegin(nnsp_vec[0]));
8403     PetscCall(VecAssemblyEnd(nnsp_vec[0]));
8404   }
8405 
8406   PetscCall(PetscFree(recv_buffer_vecs));
8407   PetscCall(PetscFree(recv_buffer_idxs_local));
8408   PetscCall(PetscFree(recv_req_idxs));
8409   PetscCall(PetscFree(recv_req_vals));
8410   PetscCall(PetscFree(recv_req_vecs));
8411   PetscCall(PetscFree(recv_req_idxs_is));
8412   PetscCall(PetscFree(send_req_idxs));
8413   PetscCall(PetscFree(send_req_vals));
8414   PetscCall(PetscFree(send_req_vecs));
8415   PetscCall(PetscFree(send_req_idxs_is));
8416   PetscCall(PetscFree(ilengths_vals));
8417   PetscCall(PetscFree(ilengths_idxs));
8418   PetscCall(PetscFree(olengths_vals));
8419   PetscCall(PetscFree(olengths_idxs));
8420   PetscCall(PetscFree(onodes));
8421   if (nis) {
8422     PetscCall(PetscFree(ilengths_idxs_is));
8423     PetscCall(PetscFree(olengths_idxs_is));
8424     PetscCall(PetscFree(onodes_is));
8425   }
8426   PetscCall(PetscSubcommDestroy(&subcomm));
8427   if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not participate */
8428     PetscCall(MatDestroy(mat_n));
8429     for (i = 0; i < nis; i++) PetscCall(ISDestroy(&isarray[i]));
8430     if (nvecs) { /* need to match VecDestroy nnsp_vec called in the other code path */
8431       PetscCall(VecDestroy(&nnsp_vec[0]));
8432     }
8433     *mat_n = NULL;
8434   }
8435   PetscFunctionReturn(PETSC_SUCCESS);
8436 }
8437 
8438 /* temporary hack into ksp private data structure */
8439 #include <petsc/private/kspimpl.h>
8440 
8441 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc, Mat coarse_submat)
8442 {
8443   PC_BDDC               *pcbddc = (PC_BDDC *)pc->data;
8444   PC_IS                 *pcis   = (PC_IS *)pc->data;
8445   PCBDDCGraph            graph  = pcbddc->mat_graph;
8446   Mat                    coarse_mat, coarse_mat_is;
8447   Mat                    coarsedivudotp = NULL;
8448   Mat                    coarseG, t_coarse_mat_is;
8449   MatNullSpace           CoarseNullSpace = NULL;
8450   ISLocalToGlobalMapping coarse_islg;
8451   IS                     coarse_is, *isarray, corners;
8452   PetscInt               i, im_active = -1, active_procs = -1;
8453   PetscInt               nis, nisdofs, nisneu, nisvert;
8454   PetscInt               coarse_eqs_per_proc, coarsening_ratio;
8455   PC                     pc_temp;
8456   PCType                 coarse_pc_type;
8457   KSPType                coarse_ksp_type;
8458   PetscBool              multilevel_requested, multilevel_allowed;
8459   PetscBool              coarse_reuse, multi_element = graph->multi_element;
8460   PetscInt               ncoarse, nedcfield;
8461   PetscBool              compute_vecs = PETSC_FALSE;
8462   PetscScalar           *array;
8463   MatReuse               coarse_mat_reuse;
8464   PetscBool              restr, full_restr, have_void;
8465   PetscMPIInt            size;
8466 
8467   PetscFunctionBegin;
8468   PetscCall(PetscLogEventBegin(PC_BDDC_CoarseSetUp[pcbddc->current_level], pc, 0, 0, 0));
8469   /* Assign global numbering to coarse dofs */
8470   if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */
8471     PetscInt ocoarse_size;
8472     compute_vecs = PETSC_TRUE;
8473 
8474     pcbddc->new_primal_space = PETSC_TRUE;
8475     ocoarse_size             = pcbddc->coarse_size;
8476     PetscCall(PetscFree(pcbddc->global_primal_indices));
8477     PetscCall(PCBDDCComputePrimalNumbering(pc, &pcbddc->coarse_size, &pcbddc->global_primal_indices));
8478     /* see if we can avoid some work */
8479     if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */
8480       /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */
8481       if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) {
8482         PetscCall(KSPReset(pcbddc->coarse_ksp));
8483         coarse_reuse = PETSC_FALSE;
8484       } else { /* we can safely reuse already computed coarse matrix */
8485         coarse_reuse = PETSC_TRUE;
8486       }
8487     } else { /* there's no coarse ksp, so we need to create the coarse matrix too */
8488       coarse_reuse = PETSC_FALSE;
8489     }
8490     /* reset any subassembling information */
8491     if (!coarse_reuse || pcbddc->recompute_topography) PetscCall(ISDestroy(&pcbddc->coarse_subassembling));
8492   } else { /* primal space is unchanged, so we can reuse coarse matrix */
8493     coarse_reuse = PETSC_TRUE;
8494   }
8495   if (coarse_reuse && pcbddc->coarse_ksp) {
8496     PetscCall(KSPGetOperators(pcbddc->coarse_ksp, &coarse_mat, NULL));
8497     PetscCall(PetscObjectReference((PetscObject)coarse_mat));
8498     coarse_mat_reuse = MAT_REUSE_MATRIX;
8499   } else {
8500     coarse_mat       = NULL;
8501     coarse_mat_reuse = MAT_INITIAL_MATRIX;
8502   }
8503 
8504   /* creates temporary l2gmap and IS for coarse indexes */
8505   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), pcbddc->local_primal_size, pcbddc->global_primal_indices, PETSC_COPY_VALUES, &coarse_is));
8506   PetscCall(ISLocalToGlobalMappingCreateIS(coarse_is, &coarse_islg));
8507 
8508   /* creates temporary MATIS object for coarse matrix */
8509   PetscCall(MatCreate(PetscObjectComm((PetscObject)pc), &t_coarse_mat_is));
8510   PetscCall(MatSetType(t_coarse_mat_is, MATIS));
8511   PetscCall(MatSetSizes(t_coarse_mat_is, PETSC_DECIDE, PETSC_DECIDE, pcbddc->coarse_size, pcbddc->coarse_size));
8512   PetscCall(MatISSetAllowRepeated(t_coarse_mat_is, multi_element));
8513   PetscCall(MatSetLocalToGlobalMapping(t_coarse_mat_is, coarse_islg, coarse_islg));
8514   PetscCall(MatISSetLocalMat(t_coarse_mat_is, coarse_submat));
8515   PetscCall(MatAssemblyBegin(t_coarse_mat_is, MAT_FINAL_ASSEMBLY));
8516   PetscCall(MatAssemblyEnd(t_coarse_mat_is, MAT_FINAL_ASSEMBLY));
8517   PetscCall(MatViewFromOptions(t_coarse_mat_is, (PetscObject)pc, "-pc_bddc_coarse_mat_is_view"));
8518 
8519   /* count "active" (i.e. with positive local size) and "void" processes */
8520   im_active = !!pcis->n;
8521   PetscCallMPI(MPIU_Allreduce(&im_active, &active_procs, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)pc)));
8522 
8523   /* determine number of processes partecipating to coarse solver and compute subassembling pattern */
8524   /* restr : whether we want to exclude senders (which are not receivers) from the subassembling pattern */
8525   /* full_restr : just use the receivers from the subassembling pattern */
8526   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)pc), &size));
8527   coarse_mat_is        = NULL;
8528   multilevel_allowed   = PETSC_FALSE;
8529   multilevel_requested = PETSC_FALSE;
8530   coarse_eqs_per_proc  = PetscMin(PetscMax(pcbddc->coarse_size, 1), pcbddc->coarse_eqs_per_proc);
8531   if (coarse_eqs_per_proc < 0 || size == 1) coarse_eqs_per_proc = PetscMax(pcbddc->coarse_size, 1);
8532   if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE;
8533   if (pcbddc->coarse_size <= pcbddc->coarse_eqs_limit) multilevel_requested = PETSC_FALSE;
8534   coarsening_ratio = multi_element ? 1 : pcbddc->coarsening_ratio;
8535   if (multilevel_requested) {
8536     ncoarse    = active_procs / coarsening_ratio;
8537     restr      = PETSC_FALSE;
8538     full_restr = PETSC_FALSE;
8539   } else {
8540     ncoarse    = pcbddc->coarse_size / coarse_eqs_per_proc + !!(pcbddc->coarse_size % coarse_eqs_per_proc);
8541     restr      = PETSC_TRUE;
8542     full_restr = PETSC_TRUE;
8543   }
8544   if (!pcbddc->coarse_size || (size == 1 && !multi_element)) multilevel_allowed = multilevel_requested = restr = full_restr = PETSC_FALSE;
8545   ncoarse = PetscMax(1, ncoarse);
8546   if (!pcbddc->coarse_subassembling) {
8547     if (coarsening_ratio > 1) {
8548       if (multilevel_requested) {
8549         PetscCall(PCBDDCMatISGetSubassemblingPattern(pc->pmat, &ncoarse, pcbddc->coarse_adj_red, &pcbddc->coarse_subassembling, &have_void));
8550       } else {
8551         PetscCall(PCBDDCMatISGetSubassemblingPattern(t_coarse_mat_is, &ncoarse, pcbddc->coarse_adj_red, &pcbddc->coarse_subassembling, &have_void));
8552       }
8553     } else {
8554       PetscMPIInt rank;
8555 
8556       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)pc), &rank));
8557       have_void = (active_procs == size) ? PETSC_FALSE : PETSC_TRUE;
8558       PetscCall(ISCreateStride(PetscObjectComm((PetscObject)pc), 1, rank, 1, &pcbddc->coarse_subassembling));
8559       PetscCall(PetscObjectSetName((PetscObject)pcbddc->coarse_subassembling, "default subassembling"));
8560     }
8561   } else { /* if a subassembling pattern exists, then we can reuse the coarse ksp and compute the number of process involved */
8562     PetscInt psum;
8563     if (pcbddc->coarse_ksp) psum = 1;
8564     else psum = 0;
8565     PetscCallMPI(MPIU_Allreduce(&psum, &ncoarse, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)pc)));
8566     have_void = ncoarse < size ? PETSC_TRUE : PETSC_FALSE;
8567   }
8568   /* determine if we can go multilevel */
8569   if (multilevel_requested) {
8570     if (ncoarse > 1) multilevel_allowed = PETSC_TRUE; /* found enough processes */
8571     else restr = full_restr = PETSC_TRUE;             /* 1 subdomain, use a direct solver */
8572   }
8573   if (multilevel_allowed && have_void) restr = PETSC_TRUE;
8574 
8575   /* dump subassembling pattern */
8576   if (pcbddc->dbg_flag && multilevel_allowed) PetscCall(ISView(pcbddc->coarse_subassembling, pcbddc->dbg_viewer));
8577   /* compute dofs splitting and neumann boundaries for coarse dofs */
8578   nedcfield = -1;
8579   corners   = NULL;
8580   if (multilevel_allowed && !coarse_reuse && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal || pcbddc->nedclocal || pcbddc->corner_selected)) { /* protects from unneeded computations */
8581     PetscInt              *tidxs, *tidxs2, nout, tsize, i;
8582     const PetscInt        *idxs;
8583     ISLocalToGlobalMapping tmap;
8584 
8585     /* create map between primal indices (in local representative ordering) and local primal numbering */
8586     PetscCall(ISLocalToGlobalMappingCreate(PETSC_COMM_SELF, 1, pcbddc->local_primal_size, pcbddc->primal_indices_local_idxs, PETSC_COPY_VALUES, &tmap));
8587     /* allocate space for temporary storage */
8588     PetscCall(PetscMalloc1(pcbddc->local_primal_size, &tidxs));
8589     PetscCall(PetscMalloc1(pcbddc->local_primal_size, &tidxs2));
8590     /* allocate for IS array */
8591     nisdofs = pcbddc->n_ISForDofsLocal;
8592     if (pcbddc->nedclocal) {
8593       if (pcbddc->nedfield > -1) {
8594         nedcfield = pcbddc->nedfield;
8595       } else {
8596         nedcfield = 0;
8597         PetscCheck(!nisdofs, PetscObjectComm((PetscObject)pc), PETSC_ERR_PLIB, "This should not happen (%" PetscInt_FMT ")", nisdofs);
8598         nisdofs = 1;
8599       }
8600     }
8601     nisneu  = !!pcbddc->NeumannBoundariesLocal;
8602     nisvert = 0; /* nisvert is not used */
8603     nis     = nisdofs + nisneu + nisvert;
8604     PetscCall(PetscMalloc1(nis, &isarray));
8605     /* dofs splitting */
8606     for (i = 0; i < nisdofs; i++) {
8607       /* PetscCall(ISView(pcbddc->ISForDofsLocal[i],0)); */
8608       if (nedcfield != i) {
8609         PetscCall(ISGetLocalSize(pcbddc->ISForDofsLocal[i], &tsize));
8610         PetscCall(ISGetIndices(pcbddc->ISForDofsLocal[i], &idxs));
8611         PetscCall(ISGlobalToLocalMappingApply(tmap, IS_GTOLM_DROP, tsize, idxs, &nout, tidxs));
8612         PetscCall(ISRestoreIndices(pcbddc->ISForDofsLocal[i], &idxs));
8613       } else {
8614         PetscCall(ISGetLocalSize(pcbddc->nedclocal, &tsize));
8615         PetscCall(ISGetIndices(pcbddc->nedclocal, &idxs));
8616         PetscCall(ISGlobalToLocalMappingApply(tmap, IS_GTOLM_DROP, tsize, idxs, &nout, tidxs));
8617         PetscCheck(tsize == nout, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Failed when mapping coarse nedelec field! %" PetscInt_FMT " != %" PetscInt_FMT, tsize, nout);
8618         PetscCall(ISRestoreIndices(pcbddc->nedclocal, &idxs));
8619       }
8620       PetscCall(ISLocalToGlobalMappingApply(coarse_islg, nout, tidxs, tidxs2));
8621       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), nout, tidxs2, PETSC_COPY_VALUES, &isarray[i]));
8622       /* PetscCall(ISView(isarray[i],0)); */
8623     }
8624     /* neumann boundaries */
8625     if (pcbddc->NeumannBoundariesLocal) {
8626       /* PetscCall(ISView(pcbddc->NeumannBoundariesLocal,0)); */
8627       PetscCall(ISGetLocalSize(pcbddc->NeumannBoundariesLocal, &tsize));
8628       PetscCall(ISGetIndices(pcbddc->NeumannBoundariesLocal, &idxs));
8629       PetscCall(ISGlobalToLocalMappingApply(tmap, IS_GTOLM_DROP, tsize, idxs, &nout, tidxs));
8630       PetscCall(ISRestoreIndices(pcbddc->NeumannBoundariesLocal, &idxs));
8631       PetscCall(ISLocalToGlobalMappingApply(coarse_islg, nout, tidxs, tidxs2));
8632       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), nout, tidxs2, PETSC_COPY_VALUES, &isarray[nisdofs]));
8633       /* PetscCall(ISView(isarray[nisdofs],0)); */
8634     }
8635     /* coordinates */
8636     if (pcbddc->corner_selected) {
8637       PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &corners));
8638       PetscCall(ISGetLocalSize(corners, &tsize));
8639       PetscCall(ISGetIndices(corners, &idxs));
8640       PetscCall(ISGlobalToLocalMappingApply(tmap, IS_GTOLM_DROP, tsize, idxs, &nout, tidxs));
8641       PetscCheck(tsize == nout, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Failed when mapping corners! %" PetscInt_FMT " != %" PetscInt_FMT, tsize, nout);
8642       PetscCall(ISRestoreIndices(corners, &idxs));
8643       PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &corners));
8644       PetscCall(ISLocalToGlobalMappingApply(coarse_islg, nout, tidxs, tidxs2));
8645       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), nout, tidxs2, PETSC_COPY_VALUES, &corners));
8646     }
8647     PetscCall(PetscFree(tidxs));
8648     PetscCall(PetscFree(tidxs2));
8649     PetscCall(ISLocalToGlobalMappingDestroy(&tmap));
8650   } else {
8651     nis     = 0;
8652     nisdofs = 0;
8653     nisneu  = 0;
8654     nisvert = 0;
8655     isarray = NULL;
8656   }
8657   /* destroy no longer needed map */
8658   PetscCall(ISLocalToGlobalMappingDestroy(&coarse_islg));
8659 
8660   /* subassemble */
8661   if (multilevel_allowed) {
8662     Vec       vp[1];
8663     PetscInt  nvecs = 0;
8664     PetscBool reuse;
8665 
8666     vp[0] = NULL;
8667     /* XXX HDIV also */
8668     if (pcbddc->benign_have_null) { /* propagate no-net-flux quadrature to coarser level */
8669       PetscCall(VecCreate(PetscObjectComm((PetscObject)pc), &vp[0]));
8670       PetscCall(VecSetSizes(vp[0], pcbddc->local_primal_size, PETSC_DECIDE));
8671       PetscCall(VecSetType(vp[0], VECSTANDARD));
8672       nvecs = 1;
8673 
8674       if (pcbddc->divudotp) {
8675         Mat      B, loc_divudotp;
8676         Vec      v, p;
8677         IS       dummy;
8678         PetscInt np;
8679 
8680         PetscCall(MatISGetLocalMat(pcbddc->divudotp, &loc_divudotp));
8681         PetscCall(MatGetSize(loc_divudotp, &np, NULL));
8682         PetscCall(ISCreateStride(PETSC_COMM_SELF, np, 0, 1, &dummy));
8683         PetscCall(MatCreateSubMatrix(loc_divudotp, dummy, pcis->is_B_local, MAT_INITIAL_MATRIX, &B));
8684         PetscCall(MatCreateVecs(B, &v, &p));
8685         PetscCall(VecSet(p, 1.));
8686         PetscCall(MatMultTranspose(B, p, v));
8687         PetscCall(VecDestroy(&p));
8688         PetscCall(MatDestroy(&B));
8689         PetscCall(VecGetArray(vp[0], &array));
8690         PetscCall(VecPlaceArray(pcbddc->vec1_P, array));
8691         PetscCall(MatMultTranspose(pcbddc->coarse_phi_B, v, pcbddc->vec1_P));
8692         PetscCall(VecResetArray(pcbddc->vec1_P));
8693         PetscCall(VecRestoreArray(vp[0], &array));
8694         PetscCall(ISDestroy(&dummy));
8695         PetscCall(VecDestroy(&v));
8696       }
8697     }
8698     if (coarse_mat) reuse = PETSC_TRUE;
8699     else reuse = PETSC_FALSE;
8700     if (multi_element) {
8701       PetscCall(PetscObjectReference((PetscObject)t_coarse_mat_is));
8702       coarse_mat_is = t_coarse_mat_is;
8703     } else {
8704       PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &reuse, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)pc)));
8705       if (reuse) {
8706         PetscCall(PCBDDCMatISSubassemble(t_coarse_mat_is, pcbddc->coarse_subassembling, 0, restr, full_restr, PETSC_TRUE, &coarse_mat, nis, isarray, nvecs, vp));
8707       } else {
8708         PetscCall(PCBDDCMatISSubassemble(t_coarse_mat_is, pcbddc->coarse_subassembling, 0, restr, full_restr, PETSC_FALSE, &coarse_mat_is, nis, isarray, nvecs, vp));
8709       }
8710       if (vp[0]) { /* vp[0] could have been placed on a different set of processes */
8711         PetscScalar       *arraym;
8712         const PetscScalar *arrayv;
8713         PetscInt           nl;
8714         PetscCall(VecGetLocalSize(vp[0], &nl));
8715         PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, 1, nl, NULL, &coarsedivudotp));
8716         PetscCall(MatDenseGetArray(coarsedivudotp, &arraym));
8717         PetscCall(VecGetArrayRead(vp[0], &arrayv));
8718         PetscCall(PetscArraycpy(arraym, arrayv, nl));
8719         PetscCall(VecRestoreArrayRead(vp[0], &arrayv));
8720         PetscCall(MatDenseRestoreArray(coarsedivudotp, &arraym));
8721         PetscCall(VecDestroy(&vp[0]));
8722       } else {
8723         PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, 0, 0, 1, NULL, &coarsedivudotp));
8724       }
8725     }
8726   } else {
8727     PetscBool default_sub;
8728 
8729     PetscCall(PetscStrcmp(((PetscObject)pcbddc->coarse_subassembling)->name, "default subassembling", &default_sub));
8730     if (!default_sub) PetscCall(PCBDDCMatISSubassemble(t_coarse_mat_is, pcbddc->coarse_subassembling, 0, restr, full_restr, PETSC_FALSE, &coarse_mat_is, 0, NULL, 0, NULL));
8731     else {
8732       PetscCall(PetscObjectReference((PetscObject)t_coarse_mat_is));
8733       coarse_mat_is = t_coarse_mat_is;
8734     }
8735   }
8736   if (coarse_mat_is || coarse_mat) {
8737     if (!multilevel_allowed) {
8738       PetscCall(MatConvert(coarse_mat_is, MATAIJ, coarse_mat_reuse, &coarse_mat));
8739     } else {
8740       /* if this matrix is present, it means we are not reusing the coarse matrix */
8741       if (coarse_mat_is) {
8742         PetscCheck(!coarse_mat, PetscObjectComm((PetscObject)coarse_mat_is), PETSC_ERR_PLIB, "This should not happen");
8743         PetscCall(PetscObjectReference((PetscObject)coarse_mat_is));
8744         coarse_mat = coarse_mat_is;
8745       }
8746     }
8747   }
8748   PetscCall(MatDestroy(&t_coarse_mat_is));
8749   PetscCall(MatDestroy(&coarse_mat_is));
8750 
8751   /* create local to global scatters for coarse problem */
8752   if (compute_vecs) {
8753     PetscInt lrows;
8754     PetscCall(VecDestroy(&pcbddc->coarse_vec));
8755     if (coarse_mat) {
8756       PetscCall(MatGetLocalSize(coarse_mat, &lrows, NULL));
8757     } else {
8758       lrows = 0;
8759     }
8760     PetscCall(VecCreate(PetscObjectComm((PetscObject)pc), &pcbddc->coarse_vec));
8761     PetscCall(VecSetSizes(pcbddc->coarse_vec, lrows, PETSC_DECIDE));
8762     PetscCall(VecSetType(pcbddc->coarse_vec, coarse_mat ? coarse_mat->defaultvectype : VECSTANDARD));
8763     PetscCall(VecScatterDestroy(&pcbddc->coarse_loc_to_glob));
8764     PetscCall(VecScatterCreate(pcbddc->vec1_P, NULL, pcbddc->coarse_vec, coarse_is, &pcbddc->coarse_loc_to_glob));
8765   }
8766   PetscCall(ISDestroy(&coarse_is));
8767 
8768   /* set defaults for coarse KSP and PC */
8769   if (multilevel_allowed) {
8770     coarse_ksp_type = KSPRICHARDSON;
8771     coarse_pc_type  = PCBDDC;
8772   } else {
8773     coarse_ksp_type = KSPPREONLY;
8774     coarse_pc_type  = PCREDUNDANT;
8775   }
8776 
8777   /* print some info if requested */
8778   if (pcbddc->dbg_flag) {
8779     if (!multilevel_allowed) {
8780       PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n"));
8781       if (multilevel_requested) {
8782         PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Not enough active processes on level %" PetscInt_FMT " (active processes %" PetscInt_FMT ", coarsening ratio %" PetscInt_FMT ")\n", pcbddc->current_level, active_procs, coarsening_ratio));
8783       } else if (pcbddc->max_levels) {
8784         PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Maximum number of requested levels reached (%" PetscInt_FMT ")\n", pcbddc->max_levels));
8785       }
8786       PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
8787     }
8788   }
8789 
8790   /* communicate coarse discrete gradient */
8791   coarseG = NULL;
8792   if (pcbddc->nedcG && multilevel_allowed) {
8793     MPI_Comm ccomm;
8794     if (coarse_mat) {
8795       ccomm = PetscObjectComm((PetscObject)coarse_mat);
8796     } else {
8797       ccomm = MPI_COMM_NULL;
8798     }
8799     PetscCall(MatMPIAIJRestrict(pcbddc->nedcG, ccomm, &coarseG));
8800   }
8801 
8802   /* create the coarse KSP object only once with defaults */
8803   if (coarse_mat) {
8804     PetscBool   isredundant, isbddc, force, valid;
8805     PetscViewer dbg_viewer = NULL;
8806     PetscBool   isset, issym, isher, isspd;
8807 
8808     if (pcbddc->dbg_flag) {
8809       dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat));
8810       PetscCall(PetscViewerASCIIAddTab(dbg_viewer, 2 * pcbddc->current_level));
8811     }
8812     if (!pcbddc->coarse_ksp) {
8813       char   prefix[256], str_level[16];
8814       size_t len;
8815 
8816       PetscCall(KSPCreate(PetscObjectComm((PetscObject)coarse_mat), &pcbddc->coarse_ksp));
8817       PetscCall(KSPSetNestLevel(pcbddc->coarse_ksp, pc->kspnestlevel));
8818       PetscCall(KSPSetErrorIfNotConverged(pcbddc->coarse_ksp, pc->erroriffailure));
8819       PetscCall(PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp, (PetscObject)pc, 1));
8820       PetscCall(KSPSetTolerances(pcbddc->coarse_ksp, PETSC_CURRENT, PETSC_CURRENT, PETSC_CURRENT, 1));
8821       PetscCall(KSPSetOperators(pcbddc->coarse_ksp, coarse_mat, coarse_mat));
8822       PetscCall(KSPSetType(pcbddc->coarse_ksp, coarse_ksp_type));
8823       PetscCall(KSPSetNormType(pcbddc->coarse_ksp, KSP_NORM_NONE));
8824       PetscCall(KSPGetPC(pcbddc->coarse_ksp, &pc_temp));
8825       /* TODO is this logic correct? should check for coarse_mat type */
8826       PetscCall(PCSetType(pc_temp, coarse_pc_type));
8827       /* prefix */
8828       PetscCall(PetscStrncpy(prefix, "", sizeof(prefix)));
8829       PetscCall(PetscStrncpy(str_level, "", sizeof(str_level)));
8830       if (!pcbddc->current_level) {
8831         PetscCall(PetscStrncpy(prefix, ((PetscObject)pc)->prefix, sizeof(prefix)));
8832         PetscCall(PetscStrlcat(prefix, "pc_bddc_coarse_", sizeof(prefix)));
8833       } else {
8834         PetscCall(PetscStrlen(((PetscObject)pc)->prefix, &len));
8835         if (pcbddc->current_level > 1) len -= 3;  /* remove "lX_" with X level number */
8836         if (pcbddc->current_level > 10) len -= 1; /* remove another char from level number */
8837         /* Nonstandard use of PetscStrncpy() to copy only a portion of the string */
8838         PetscCall(PetscStrncpy(prefix, ((PetscObject)pc)->prefix, len + 1));
8839         PetscCall(PetscSNPrintf(str_level, sizeof(str_level), "l%" PetscInt_FMT "_", pcbddc->current_level));
8840         PetscCall(PetscStrlcat(prefix, str_level, sizeof(prefix)));
8841       }
8842       PetscCall(KSPSetOptionsPrefix(pcbddc->coarse_ksp, prefix));
8843       /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
8844       PetscCall(PCBDDCSetLevel(pc_temp, pcbddc->current_level + 1));
8845       PetscCall(PCBDDCSetCoarseningRatio(pc_temp, pcbddc->coarsening_ratio));
8846       PetscCall(PCBDDCSetLevels(pc_temp, pcbddc->max_levels));
8847       /* allow user customization */
8848       PetscCall(KSPSetFromOptions(pcbddc->coarse_ksp));
8849       /* get some info after set from options */
8850       PetscCall(KSPGetPC(pcbddc->coarse_ksp, &pc_temp));
8851       /* multilevel cannot be done with coarse PC different from BDDC, NN, HPDDM, unless forced to */
8852       force = PETSC_FALSE;
8853       PetscCall(PetscOptionsGetBool(NULL, ((PetscObject)pc_temp)->prefix, "-pc_type_forced", &force, NULL));
8854       PetscCall(PetscObjectTypeCompareAny((PetscObject)pc_temp, &valid, PCBDDC, PCNN, PCHPDDM, ""));
8855       PetscCall(PetscObjectTypeCompare((PetscObject)pc_temp, PCBDDC, &isbddc));
8856       if (multilevel_allowed && !force && !valid) {
8857         isbddc = PETSC_TRUE;
8858         PetscCall(PCSetType(pc_temp, PCBDDC));
8859         PetscCall(PCBDDCSetLevel(pc_temp, pcbddc->current_level + 1));
8860         PetscCall(PCBDDCSetCoarseningRatio(pc_temp, pcbddc->coarsening_ratio));
8861         PetscCall(PCBDDCSetLevels(pc_temp, pcbddc->max_levels));
8862         if (pc_temp->ops->setfromoptions) { /* need to setfromoptions again, skipping the pc_type */
8863           PetscObjectOptionsBegin((PetscObject)pc_temp);
8864           PetscCall((*pc_temp->ops->setfromoptions)(pc_temp, PetscOptionsObject));
8865           PetscCall(PetscObjectProcessOptionsHandlers((PetscObject)pc_temp, PetscOptionsObject));
8866           PetscOptionsEnd();
8867           pc_temp->setfromoptionscalled++;
8868         }
8869       }
8870     }
8871     /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
8872     PetscCall(KSPGetPC(pcbddc->coarse_ksp, &pc_temp));
8873     if (nisdofs) {
8874       PetscCall(PCBDDCSetDofsSplitting(pc_temp, nisdofs, isarray));
8875       for (i = 0; i < nisdofs; i++) PetscCall(ISDestroy(&isarray[i]));
8876     }
8877     if (nisneu) {
8878       PetscCall(PCBDDCSetNeumannBoundaries(pc_temp, isarray[nisdofs]));
8879       PetscCall(ISDestroy(&isarray[nisdofs]));
8880     }
8881     if (nisvert) {
8882       PetscCall(PCBDDCSetPrimalVerticesIS(pc_temp, isarray[nis - 1]));
8883       PetscCall(ISDestroy(&isarray[nis - 1]));
8884     }
8885     if (coarseG) PetscCall(PCBDDCSetDiscreteGradient(pc_temp, coarseG, 1, nedcfield, PETSC_FALSE, PETSC_TRUE));
8886 
8887     /* get some info after set from options */
8888     PetscCall(PetscObjectTypeCompare((PetscObject)pc_temp, PCBDDC, &isbddc));
8889 
8890     /* multilevel can only be requested via -pc_bddc_levels or PCBDDCSetLevels */
8891     if (isbddc && !multilevel_allowed) PetscCall(PCSetType(pc_temp, coarse_pc_type));
8892     /* multilevel cannot be done with coarse PC different from BDDC, NN, HPDDM, unless forced to */
8893     force = PETSC_FALSE;
8894     PetscCall(PetscOptionsGetBool(NULL, ((PetscObject)pc_temp)->prefix, "-pc_type_forced", &force, NULL));
8895     PetscCall(PetscObjectTypeCompareAny((PetscObject)pc_temp, &valid, PCBDDC, PCNN, PCHPDDM, ""));
8896     if (multilevel_requested && multilevel_allowed && !valid && !force) PetscCall(PCSetType(pc_temp, PCBDDC));
8897     PetscCall(PetscObjectTypeCompare((PetscObject)pc_temp, PCREDUNDANT, &isredundant));
8898     if (isredundant) {
8899       KSP inner_ksp;
8900       PC  inner_pc;
8901 
8902       PetscCall(PCRedundantGetKSP(pc_temp, &inner_ksp));
8903       PetscCall(KSPGetPC(inner_ksp, &inner_pc));
8904     }
8905 
8906     /* parameters which miss an API */
8907     PetscCall(PetscObjectTypeCompare((PetscObject)pc_temp, PCBDDC, &isbddc));
8908     if (isbddc) {
8909       PC_BDDC *pcbddc_coarse = (PC_BDDC *)pc_temp->data;
8910 
8911       pcbddc_coarse->detect_disconnected = PETSC_TRUE;
8912       pcbddc_coarse->coarse_eqs_per_proc = pcbddc->coarse_eqs_per_proc;
8913       pcbddc_coarse->coarse_eqs_limit    = pcbddc->coarse_eqs_limit;
8914       pcbddc_coarse->benign_saddle_point = pcbddc->benign_have_null;
8915       if (pcbddc_coarse->benign_saddle_point) {
8916         Mat                    coarsedivudotp_is;
8917         ISLocalToGlobalMapping l2gmap, rl2g, cl2g;
8918         IS                     row, col;
8919         const PetscInt        *gidxs;
8920         PetscInt               n, st, M, N;
8921 
8922         PetscCall(MatGetSize(coarsedivudotp, &n, NULL));
8923         PetscCallMPI(MPI_Scan(&n, &st, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)coarse_mat)));
8924         st = st - n;
8925         PetscCall(ISCreateStride(PetscObjectComm((PetscObject)coarse_mat), 1, st, 1, &row));
8926         PetscCall(MatISGetLocalToGlobalMapping(coarse_mat, &l2gmap, NULL));
8927         PetscCall(ISLocalToGlobalMappingGetSize(l2gmap, &n));
8928         PetscCall(ISLocalToGlobalMappingGetIndices(l2gmap, &gidxs));
8929         PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)coarse_mat), n, gidxs, PETSC_COPY_VALUES, &col));
8930         PetscCall(ISLocalToGlobalMappingRestoreIndices(l2gmap, &gidxs));
8931         PetscCall(ISLocalToGlobalMappingCreateIS(row, &rl2g));
8932         PetscCall(ISLocalToGlobalMappingCreateIS(col, &cl2g));
8933         PetscCall(ISGetSize(row, &M));
8934         PetscCall(MatGetSize(coarse_mat, &N, NULL));
8935         PetscCall(ISDestroy(&row));
8936         PetscCall(ISDestroy(&col));
8937         PetscCall(MatCreate(PetscObjectComm((PetscObject)coarse_mat), &coarsedivudotp_is));
8938         PetscCall(MatSetType(coarsedivudotp_is, MATIS));
8939         PetscCall(MatSetSizes(coarsedivudotp_is, PETSC_DECIDE, PETSC_DECIDE, M, N));
8940         PetscCall(MatSetLocalToGlobalMapping(coarsedivudotp_is, rl2g, cl2g));
8941         PetscCall(ISLocalToGlobalMappingDestroy(&rl2g));
8942         PetscCall(ISLocalToGlobalMappingDestroy(&cl2g));
8943         PetscCall(MatISSetLocalMat(coarsedivudotp_is, coarsedivudotp));
8944         PetscCall(MatDestroy(&coarsedivudotp));
8945         PetscCall(PCBDDCSetDivergenceMat(pc_temp, coarsedivudotp_is, PETSC_FALSE, NULL));
8946         PetscCall(MatDestroy(&coarsedivudotp_is));
8947         pcbddc_coarse->adaptive_userdefined = PETSC_TRUE;
8948         if (pcbddc->adaptive_threshold[0] == 0.0) pcbddc_coarse->deluxe_zerorows = PETSC_TRUE;
8949       }
8950     }
8951 
8952     /* propagate symmetry info of coarse matrix */
8953     PetscCall(MatSetOption(coarse_mat, MAT_STRUCTURALLY_SYMMETRIC, PETSC_TRUE));
8954     PetscCall(MatIsSymmetricKnown(pc->pmat, &isset, &issym));
8955     if (isset) PetscCall(MatSetOption(coarse_mat, MAT_SYMMETRIC, issym));
8956     PetscCall(MatIsHermitianKnown(pc->pmat, &isset, &isher));
8957     if (isset) PetscCall(MatSetOption(coarse_mat, MAT_HERMITIAN, isher));
8958     PetscCall(MatIsSPDKnown(pc->pmat, &isset, &isspd));
8959     if (isset) PetscCall(MatSetOption(coarse_mat, MAT_SPD, isspd));
8960 
8961     if (pcbddc->benign_saddle_point && !pcbddc->benign_have_null) PetscCall(MatSetOption(coarse_mat, MAT_SPD, PETSC_TRUE));
8962     /* set operators */
8963     PetscCall(MatViewFromOptions(coarse_mat, (PetscObject)pc, "-pc_bddc_coarse_mat_view"));
8964     PetscCall(MatSetOptionsPrefix(coarse_mat, ((PetscObject)pcbddc->coarse_ksp)->prefix));
8965     PetscCall(KSPSetOperators(pcbddc->coarse_ksp, coarse_mat, coarse_mat));
8966     if (pcbddc->dbg_flag) PetscCall(PetscViewerASCIISubtractTab(dbg_viewer, 2 * pcbddc->current_level));
8967   }
8968   PetscCall(MatDestroy(&coarseG));
8969   PetscCall(PetscFree(isarray));
8970 #if 0
8971   {
8972     PetscViewer viewer;
8973     char filename[256];
8974     PetscCall(PetscSNPrintf(filename, PETSC_STATIC_ARRAY_LENGTH(filename), "coarse_mat_level%d.m",pcbddc->current_level));
8975     PetscCall(PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer));
8976     PetscCall(PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB));
8977     PetscCall(MatView(coarse_mat,viewer));
8978     PetscCall(PetscViewerPopFormat(viewer));
8979     PetscCall(PetscViewerDestroy(&viewer));
8980   }
8981 #endif
8982 
8983   if (corners) {
8984     Vec             gv;
8985     IS              is;
8986     const PetscInt *idxs;
8987     PetscInt        i, d, N, n, cdim = pcbddc->mat_graph->cdim;
8988     PetscScalar    *coords;
8989 
8990     PetscCheck(pcbddc->mat_graph->cloc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing local coordinates");
8991     PetscCall(VecGetSize(pcbddc->coarse_vec, &N));
8992     PetscCall(VecGetLocalSize(pcbddc->coarse_vec, &n));
8993     PetscCall(VecCreate(PetscObjectComm((PetscObject)pcbddc->coarse_vec), &gv));
8994     PetscCall(VecSetBlockSize(gv, cdim));
8995     PetscCall(VecSetSizes(gv, n * cdim, N * cdim));
8996     PetscCall(VecSetType(gv, VECSTANDARD));
8997     PetscCall(VecSetFromOptions(gv));
8998     PetscCall(VecSet(gv, PETSC_MAX_REAL)); /* we only propagate coordinates from vertices constraints */
8999 
9000     PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &is));
9001     PetscCall(ISGetLocalSize(is, &n));
9002     PetscCall(ISGetIndices(is, &idxs));
9003     PetscCall(PetscMalloc1(n * cdim, &coords));
9004     for (i = 0; i < n; i++) {
9005       for (d = 0; d < cdim; d++) coords[cdim * i + d] = pcbddc->mat_graph->coords[cdim * idxs[i] + d];
9006     }
9007     PetscCall(ISRestoreIndices(is, &idxs));
9008     PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &is));
9009 
9010     PetscCall(ISGetLocalSize(corners, &n));
9011     PetscCall(ISGetIndices(corners, &idxs));
9012     PetscCall(VecSetValuesBlocked(gv, n, idxs, coords, INSERT_VALUES));
9013     PetscCall(ISRestoreIndices(corners, &idxs));
9014     PetscCall(PetscFree(coords));
9015     PetscCall(VecAssemblyBegin(gv));
9016     PetscCall(VecAssemblyEnd(gv));
9017     PetscCall(VecGetArray(gv, &coords));
9018     if (pcbddc->coarse_ksp) {
9019       PC        coarse_pc;
9020       PetscBool isbddc;
9021 
9022       PetscCall(KSPGetPC(pcbddc->coarse_ksp, &coarse_pc));
9023       PetscCall(PetscObjectTypeCompare((PetscObject)coarse_pc, PCBDDC, &isbddc));
9024       if (isbddc) { /* coarse coordinates have PETSC_MAX_REAL, specific for BDDC */
9025         PetscReal *realcoords;
9026 
9027         PetscCall(VecGetLocalSize(gv, &n));
9028 #if defined(PETSC_USE_COMPLEX)
9029         PetscCall(PetscMalloc1(n, &realcoords));
9030         for (i = 0; i < n; i++) realcoords[i] = PetscRealPart(coords[i]);
9031 #else
9032         realcoords = coords;
9033 #endif
9034         PetscCall(PCSetCoordinates(coarse_pc, cdim, n / cdim, realcoords));
9035 #if defined(PETSC_USE_COMPLEX)
9036         PetscCall(PetscFree(realcoords));
9037 #endif
9038       }
9039     }
9040     PetscCall(VecRestoreArray(gv, &coords));
9041     PetscCall(VecDestroy(&gv));
9042   }
9043   PetscCall(ISDestroy(&corners));
9044 
9045   if (pcbddc->coarse_ksp) {
9046     Vec crhs, csol;
9047 
9048     PetscCall(KSPGetSolution(pcbddc->coarse_ksp, &csol));
9049     PetscCall(KSPGetRhs(pcbddc->coarse_ksp, &crhs));
9050     if (!csol) PetscCall(MatCreateVecs(coarse_mat, &pcbddc->coarse_ksp->vec_sol, NULL));
9051     if (!crhs) PetscCall(MatCreateVecs(coarse_mat, NULL, &pcbddc->coarse_ksp->vec_rhs));
9052   }
9053   PetscCall(MatDestroy(&coarsedivudotp));
9054 
9055   /* compute null space for coarse solver if the benign trick has been requested */
9056   if (pcbddc->benign_null) {
9057     PetscCall(VecSet(pcbddc->vec1_P, 0.));
9058     for (i = 0; i < pcbddc->benign_n; i++) PetscCall(VecSetValue(pcbddc->vec1_P, pcbddc->local_primal_size - pcbddc->benign_n + i, 1.0, INSERT_VALUES));
9059     PetscCall(VecAssemblyBegin(pcbddc->vec1_P));
9060     PetscCall(VecAssemblyEnd(pcbddc->vec1_P));
9061     PetscCall(VecScatterBegin(pcbddc->coarse_loc_to_glob, pcbddc->vec1_P, pcbddc->coarse_vec, INSERT_VALUES, SCATTER_FORWARD));
9062     PetscCall(VecScatterEnd(pcbddc->coarse_loc_to_glob, pcbddc->vec1_P, pcbddc->coarse_vec, INSERT_VALUES, SCATTER_FORWARD));
9063     if (coarse_mat) {
9064       Vec          nullv;
9065       PetscScalar *array, *array2;
9066       PetscInt     nl;
9067 
9068       PetscCall(MatCreateVecs(coarse_mat, &nullv, NULL));
9069       PetscCall(VecGetLocalSize(nullv, &nl));
9070       PetscCall(VecGetArrayRead(pcbddc->coarse_vec, (const PetscScalar **)&array));
9071       PetscCall(VecGetArray(nullv, &array2));
9072       PetscCall(PetscArraycpy(array2, array, nl));
9073       PetscCall(VecRestoreArray(nullv, &array2));
9074       PetscCall(VecRestoreArrayRead(pcbddc->coarse_vec, (const PetscScalar **)&array));
9075       PetscCall(VecNormalize(nullv, NULL));
9076       PetscCall(MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat), PETSC_FALSE, 1, &nullv, &CoarseNullSpace));
9077       PetscCall(VecDestroy(&nullv));
9078     }
9079   }
9080   PetscCall(PetscLogEventEnd(PC_BDDC_CoarseSetUp[pcbddc->current_level], pc, 0, 0, 0));
9081 
9082   PetscCall(PetscLogEventBegin(PC_BDDC_CoarseSolver[pcbddc->current_level], pc, 0, 0, 0));
9083   if (pcbddc->coarse_ksp) {
9084     PetscBool ispreonly;
9085 
9086     if (CoarseNullSpace) {
9087       PetscBool isnull;
9088 
9089       PetscCall(MatNullSpaceTest(CoarseNullSpace, coarse_mat, &isnull));
9090       if (isnull) PetscCall(MatSetNullSpace(coarse_mat, CoarseNullSpace));
9091       /* TODO: add local nullspaces (if any) */
9092     }
9093     /* setup coarse ksp */
9094     PetscCall(KSPSetUp(pcbddc->coarse_ksp));
9095     /* Check coarse problem if in debug mode or if solving with an iterative method */
9096     PetscCall(PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp, KSPPREONLY, &ispreonly));
9097     if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates)) {
9098       KSP         check_ksp;
9099       KSPType     check_ksp_type;
9100       PC          check_pc;
9101       Vec         check_vec, coarse_vec;
9102       PetscReal   abs_infty_error, infty_error, lambda_min = 1.0, lambda_max = 1.0;
9103       PetscInt    its;
9104       PetscBool   compute_eigs;
9105       PetscReal  *eigs_r, *eigs_c;
9106       PetscInt    neigs;
9107       const char *prefix;
9108 
9109       /* Create ksp object suitable for estimation of extreme eigenvalues */
9110       PetscCall(KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp), &check_ksp));
9111       PetscCall(KSPSetNestLevel(check_ksp, pc->kspnestlevel));
9112       PetscCall(PetscObjectIncrementTabLevel((PetscObject)check_ksp, (PetscObject)pcbddc->coarse_ksp, 0));
9113       PetscCall(KSPSetErrorIfNotConverged(pcbddc->coarse_ksp, PETSC_FALSE));
9114       PetscCall(KSPSetOperators(check_ksp, coarse_mat, coarse_mat));
9115       PetscCall(KSPSetTolerances(check_ksp, 1.e-12, 1.e-12, PETSC_CURRENT, pcbddc->coarse_size));
9116       /* prevent from setup unneeded object */
9117       PetscCall(KSPGetPC(check_ksp, &check_pc));
9118       PetscCall(PCSetType(check_pc, PCNONE));
9119       if (ispreonly) {
9120         check_ksp_type = KSPPREONLY;
9121         compute_eigs   = PETSC_FALSE;
9122       } else {
9123         check_ksp_type = KSPGMRES;
9124         compute_eigs   = PETSC_TRUE;
9125       }
9126       PetscCall(KSPSetType(check_ksp, check_ksp_type));
9127       PetscCall(KSPSetComputeSingularValues(check_ksp, compute_eigs));
9128       PetscCall(KSPSetComputeEigenvalues(check_ksp, compute_eigs));
9129       PetscCall(KSPGMRESSetRestart(check_ksp, pcbddc->coarse_size + 1));
9130       PetscCall(KSPGetOptionsPrefix(pcbddc->coarse_ksp, &prefix));
9131       PetscCall(KSPSetOptionsPrefix(check_ksp, prefix));
9132       PetscCall(KSPAppendOptionsPrefix(check_ksp, "check_"));
9133       PetscCall(KSPSetFromOptions(check_ksp));
9134       PetscCall(KSPSetUp(check_ksp));
9135       PetscCall(KSPGetPC(pcbddc->coarse_ksp, &check_pc));
9136       PetscCall(KSPSetPC(check_ksp, check_pc));
9137       /* create random vec */
9138       PetscCall(MatCreateVecs(coarse_mat, &coarse_vec, &check_vec));
9139       PetscCall(VecSetRandom(check_vec, NULL));
9140       PetscCall(MatMult(coarse_mat, check_vec, coarse_vec));
9141       /* solve coarse problem */
9142       PetscCall(KSPSolve(check_ksp, coarse_vec, coarse_vec));
9143       PetscCall(KSPCheckSolve(check_ksp, pc, coarse_vec));
9144       /* set eigenvalue estimation if preonly has not been requested */
9145       if (compute_eigs) {
9146         PetscCall(PetscMalloc1(pcbddc->coarse_size + 1, &eigs_r));
9147         PetscCall(PetscMalloc1(pcbddc->coarse_size + 1, &eigs_c));
9148         PetscCall(KSPComputeEigenvalues(check_ksp, pcbddc->coarse_size + 1, eigs_r, eigs_c, &neigs));
9149         if (neigs) {
9150           lambda_max = eigs_r[neigs - 1];
9151           lambda_min = eigs_r[0];
9152           if (pcbddc->use_coarse_estimates) {
9153             if (lambda_max >= lambda_min) { /* using PETSC_SMALL since lambda_max == lambda_min is not allowed by KSPChebyshevSetEigenvalues */
9154               PetscCall(KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp, lambda_max + PETSC_SMALL, lambda_min));
9155               PetscCall(KSPRichardsonSetScale(pcbddc->coarse_ksp, 2.0 / (lambda_max + lambda_min)));
9156             }
9157           }
9158         }
9159       }
9160 
9161       /* check coarse problem residual error */
9162       if (pcbddc->dbg_flag) {
9163         PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp));
9164         PetscCall(PetscViewerASCIIAddTab(dbg_viewer, 2 * (pcbddc->current_level + 1)));
9165         PetscCall(VecAXPY(check_vec, -1.0, coarse_vec));
9166         PetscCall(VecNorm(check_vec, NORM_INFINITY, &infty_error));
9167         PetscCall(MatMult(coarse_mat, check_vec, coarse_vec));
9168         PetscCall(VecNorm(coarse_vec, NORM_INFINITY, &abs_infty_error));
9169         PetscCall(PetscViewerASCIIPrintf(dbg_viewer, "Coarse problem details (use estimates %d)\n", pcbddc->use_coarse_estimates));
9170         PetscCall(PetscObjectPrintClassNamePrefixType((PetscObject)pcbddc->coarse_ksp, dbg_viewer));
9171         PetscCall(PetscObjectPrintClassNamePrefixType((PetscObject)check_pc, dbg_viewer));
9172         PetscCall(PetscViewerASCIIPrintf(dbg_viewer, "Coarse problem exact infty_error   : %1.6e\n", (double)infty_error));
9173         PetscCall(PetscViewerASCIIPrintf(dbg_viewer, "Coarse problem residual infty_error: %1.6e\n", (double)abs_infty_error));
9174         if (CoarseNullSpace) PetscCall(PetscViewerASCIIPrintf(dbg_viewer, "Coarse problem is singular\n"));
9175         if (compute_eigs) {
9176           PetscReal          lambda_max_s, lambda_min_s;
9177           KSPConvergedReason reason;
9178           PetscCall(KSPGetType(check_ksp, &check_ksp_type));
9179           PetscCall(KSPGetIterationNumber(check_ksp, &its));
9180           PetscCall(KSPGetConvergedReason(check_ksp, &reason));
9181           PetscCall(KSPComputeExtremeSingularValues(check_ksp, &lambda_max_s, &lambda_min_s));
9182           PetscCall(PetscViewerASCIIPrintf(dbg_viewer, "Coarse problem eigenvalues (estimated with %" PetscInt_FMT " iterations of %s, conv reason %d): %1.6e %1.6e (%1.6e %1.6e)\n", its, check_ksp_type, reason, (double)lambda_min, (double)lambda_max, (double)lambda_min_s, (double)lambda_max_s));
9183           for (i = 0; i < neigs; i++) PetscCall(PetscViewerASCIIPrintf(dbg_viewer, "%1.6e %1.6ei\n", (double)eigs_r[i], (double)eigs_c[i]));
9184         }
9185         PetscCall(PetscViewerFlush(dbg_viewer));
9186         PetscCall(PetscViewerASCIISubtractTab(dbg_viewer, 2 * (pcbddc->current_level + 1)));
9187       }
9188       PetscCall(VecDestroy(&check_vec));
9189       PetscCall(VecDestroy(&coarse_vec));
9190       PetscCall(KSPDestroy(&check_ksp));
9191       if (compute_eigs) {
9192         PetscCall(PetscFree(eigs_r));
9193         PetscCall(PetscFree(eigs_c));
9194       }
9195     }
9196   }
9197   PetscCall(MatNullSpaceDestroy(&CoarseNullSpace));
9198   /* print additional info */
9199   if (pcbddc->dbg_flag) {
9200     /* waits until all processes reaches this point */
9201     PetscCall(PetscBarrier((PetscObject)pc));
9202     PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Coarse solver setup completed at level %" PetscInt_FMT "\n", pcbddc->current_level));
9203     PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
9204   }
9205 
9206   /* free memory */
9207   PetscCall(MatDestroy(&coarse_mat));
9208   PetscCall(PetscLogEventEnd(PC_BDDC_CoarseSolver[pcbddc->current_level], pc, 0, 0, 0));
9209   PetscFunctionReturn(PETSC_SUCCESS);
9210 }
9211 
9212 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc, PetscInt *coarse_size_n, PetscInt **local_primal_indices_n)
9213 {
9214   PC_BDDC        *pcbddc = (PC_BDDC *)pc->data;
9215   PC_IS          *pcis   = (PC_IS *)pc->data;
9216   IS              subset, subset_mult, subset_n;
9217   PetscInt        local_size, coarse_size = 0;
9218   PetscInt       *local_primal_indices = NULL;
9219   const PetscInt *t_local_primal_indices;
9220 
9221   PetscFunctionBegin;
9222   /* Compute global number of coarse dofs */
9223   PetscCheck(!pcbddc->local_primal_size || pcbddc->local_primal_ref_node, PETSC_COMM_SELF, PETSC_ERR_PLIB, "BDDC ConstraintsSetUp should be called first");
9224   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc->pmat), pcbddc->local_primal_size_cc, pcbddc->local_primal_ref_node, PETSC_COPY_VALUES, &subset_n));
9225   PetscCall(ISLocalToGlobalMappingApplyIS(pcis->mapping, subset_n, &subset));
9226   PetscCall(ISDestroy(&subset_n));
9227   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc->pmat), pcbddc->local_primal_size_cc, pcbddc->local_primal_ref_mult, PETSC_COPY_VALUES, &subset_mult));
9228   PetscCall(ISRenumber(subset, subset_mult, &coarse_size, &subset_n));
9229   PetscCall(ISDestroy(&subset));
9230   PetscCall(ISDestroy(&subset_mult));
9231   PetscCall(ISGetLocalSize(subset_n, &local_size));
9232   PetscCheck(local_size == pcbddc->local_primal_size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Invalid number of local primal indices computed %" PetscInt_FMT " != %" PetscInt_FMT, local_size, pcbddc->local_primal_size);
9233   PetscCall(PetscMalloc1(local_size, &local_primal_indices));
9234   PetscCall(ISGetIndices(subset_n, &t_local_primal_indices));
9235   PetscCall(PetscArraycpy(local_primal_indices, t_local_primal_indices, local_size));
9236   PetscCall(ISRestoreIndices(subset_n, &t_local_primal_indices));
9237   PetscCall(ISDestroy(&subset_n));
9238 
9239   if (pcbddc->dbg_flag) {
9240     PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
9241     PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n"));
9242     PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Size of coarse problem is %" PetscInt_FMT "\n", coarse_size));
9243     PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
9244   }
9245 
9246   /* get back data */
9247   *coarse_size_n          = coarse_size;
9248   *local_primal_indices_n = local_primal_indices;
9249   PetscFunctionReturn(PETSC_SUCCESS);
9250 }
9251 
9252 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx, Vec gwork, Vec lwork, IS globalis, IS *localis)
9253 {
9254   IS           localis_t;
9255   PetscInt     i, lsize, *idxs, n;
9256   PetscScalar *vals;
9257 
9258   PetscFunctionBegin;
9259   /* get indices in local ordering exploiting local to global map */
9260   PetscCall(ISGetLocalSize(globalis, &lsize));
9261   PetscCall(PetscMalloc1(lsize, &vals));
9262   for (i = 0; i < lsize; i++) vals[i] = 1.0;
9263   PetscCall(ISGetIndices(globalis, (const PetscInt **)&idxs));
9264   PetscCall(VecSet(gwork, 0.0));
9265   PetscCall(VecSet(lwork, 0.0));
9266   if (idxs) { /* multilevel guard */
9267     PetscCall(VecSetOption(gwork, VEC_IGNORE_NEGATIVE_INDICES, PETSC_TRUE));
9268     PetscCall(VecSetValues(gwork, lsize, idxs, vals, INSERT_VALUES));
9269   }
9270   PetscCall(VecAssemblyBegin(gwork));
9271   PetscCall(ISRestoreIndices(globalis, (const PetscInt **)&idxs));
9272   PetscCall(PetscFree(vals));
9273   PetscCall(VecAssemblyEnd(gwork));
9274   /* now compute set in local ordering */
9275   PetscCall(VecScatterBegin(g2l_ctx, gwork, lwork, INSERT_VALUES, SCATTER_FORWARD));
9276   PetscCall(VecScatterEnd(g2l_ctx, gwork, lwork, INSERT_VALUES, SCATTER_FORWARD));
9277   PetscCall(VecGetArrayRead(lwork, (const PetscScalar **)&vals));
9278   PetscCall(VecGetSize(lwork, &n));
9279   for (i = 0, lsize = 0; i < n; i++) {
9280     if (PetscRealPart(vals[i]) > 0.5) lsize++;
9281   }
9282   PetscCall(PetscMalloc1(lsize, &idxs));
9283   for (i = 0, lsize = 0; i < n; i++) {
9284     if (PetscRealPart(vals[i]) > 0.5) idxs[lsize++] = i;
9285   }
9286   PetscCall(VecRestoreArrayRead(lwork, (const PetscScalar **)&vals));
9287   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)gwork), lsize, idxs, PETSC_OWN_POINTER, &localis_t));
9288   *localis = localis_t;
9289   PetscFunctionReturn(PETSC_SUCCESS);
9290 }
9291 
9292 PetscErrorCode PCBDDCComputeFakeChange(PC pc, PetscBool constraints, PCBDDCGraph graph, PCBDDCSubSchurs schurs, Mat *change, IS *change_primal, IS *change_primal_mult, PetscBool *change_with_qr)
9293 {
9294   PC_IS   *pcis   = (PC_IS *)pc->data;
9295   PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
9296   PC_IS   *pcisf;
9297   PC_BDDC *pcbddcf;
9298   PC       pcf;
9299 
9300   PetscFunctionBegin;
9301   PetscCall(PCCreate(PetscObjectComm((PetscObject)pc), &pcf));
9302   PetscCall(PCSetOperators(pcf, pc->mat, pc->pmat));
9303   PetscCall(PCSetType(pcf, PCBDDC));
9304 
9305   pcisf   = (PC_IS *)pcf->data;
9306   pcbddcf = (PC_BDDC *)pcf->data;
9307 
9308   pcisf->is_B_local = pcis->is_B_local;
9309   pcisf->vec1_N     = pcis->vec1_N;
9310   pcisf->BtoNmap    = pcis->BtoNmap;
9311   pcisf->n          = pcis->n;
9312   pcisf->n_B        = pcis->n_B;
9313 
9314   PetscCall(PetscFree(pcbddcf->mat_graph));
9315   PetscCall(PetscFree(pcbddcf->sub_schurs));
9316   pcbddcf->mat_graph             = graph ? graph : pcbddc->mat_graph;
9317   pcbddcf->sub_schurs            = schurs;
9318   pcbddcf->adaptive_selection    = schurs ? PETSC_TRUE : PETSC_FALSE;
9319   pcbddcf->adaptive_threshold[0] = pcbddc->adaptive_threshold[0];
9320   pcbddcf->adaptive_threshold[1] = pcbddc->adaptive_threshold[1];
9321   pcbddcf->adaptive_nmin         = pcbddc->adaptive_nmin;
9322   pcbddcf->adaptive_nmax         = pcbddc->adaptive_nmax;
9323   pcbddcf->use_faces             = PETSC_TRUE;
9324   pcbddcf->use_change_of_basis   = (PetscBool)!constraints;
9325   pcbddcf->use_change_on_faces   = (PetscBool)!constraints;
9326   pcbddcf->use_qr_single         = (PetscBool)!constraints;
9327   pcbddcf->fake_change           = PETSC_TRUE;
9328   pcbddcf->dbg_flag              = pcbddc->dbg_flag;
9329 
9330   PetscCall(PCBDDCAdaptiveSelection(pcf));
9331   PetscCall(PCBDDCConstraintsSetUp(pcf));
9332 
9333   *change = pcbddcf->ConstraintMatrix;
9334   if (change_primal) PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc->pmat), pcbddcf->local_primal_size_cc, pcbddcf->local_primal_ref_node, PETSC_COPY_VALUES, change_primal));
9335   if (change_primal_mult) PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc->pmat), pcbddcf->local_primal_size_cc, pcbddcf->local_primal_ref_mult, PETSC_COPY_VALUES, change_primal_mult));
9336   if (change_with_qr) *change_with_qr = pcbddcf->use_qr_single;
9337 
9338   if (schurs) pcbddcf->sub_schurs = NULL;
9339   pcbddcf->ConstraintMatrix = NULL;
9340   pcbddcf->mat_graph        = NULL;
9341   pcisf->is_B_local         = NULL;
9342   pcisf->vec1_N             = NULL;
9343   pcisf->BtoNmap            = NULL;
9344   PetscCall(PCDestroy(&pcf));
9345   PetscFunctionReturn(PETSC_SUCCESS);
9346 }
9347 
9348 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc)
9349 {
9350   PC_IS          *pcis       = (PC_IS *)pc->data;
9351   PC_BDDC        *pcbddc     = (PC_BDDC *)pc->data;
9352   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
9353   Mat             S_j;
9354   PetscInt       *used_xadj, *used_adjncy;
9355   PetscBool       free_used_adj;
9356 
9357   PetscFunctionBegin;
9358   PetscCall(PetscLogEventBegin(PC_BDDC_Schurs[pcbddc->current_level], pc, 0, 0, 0));
9359   /* decide the adjacency to be used for determining internal problems for local schur on subsets */
9360   free_used_adj = PETSC_FALSE;
9361   if (pcbddc->sub_schurs_layers == -1) {
9362     used_xadj   = NULL;
9363     used_adjncy = NULL;
9364   } else {
9365     if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) {
9366       used_xadj   = pcbddc->mat_graph->xadj;
9367       used_adjncy = pcbddc->mat_graph->adjncy;
9368     } else if (pcbddc->computed_rowadj) {
9369       used_xadj   = pcbddc->mat_graph->xadj;
9370       used_adjncy = pcbddc->mat_graph->adjncy;
9371     } else {
9372       PetscBool       flg_row = PETSC_FALSE;
9373       const PetscInt *xadj, *adjncy;
9374       PetscInt        nvtxs;
9375 
9376       PetscCall(MatGetRowIJ(pcbddc->local_mat, 0, PETSC_TRUE, PETSC_FALSE, &nvtxs, &xadj, &adjncy, &flg_row));
9377       if (flg_row) {
9378         PetscCall(PetscMalloc2(nvtxs + 1, &used_xadj, xadj[nvtxs], &used_adjncy));
9379         PetscCall(PetscArraycpy(used_xadj, xadj, nvtxs + 1));
9380         PetscCall(PetscArraycpy(used_adjncy, adjncy, xadj[nvtxs]));
9381         free_used_adj = PETSC_TRUE;
9382       } else {
9383         pcbddc->sub_schurs_layers = -1;
9384         used_xadj                 = NULL;
9385         used_adjncy               = NULL;
9386       }
9387       PetscCall(MatRestoreRowIJ(pcbddc->local_mat, 0, PETSC_TRUE, PETSC_FALSE, &nvtxs, &xadj, &adjncy, &flg_row));
9388     }
9389   }
9390 
9391   /* setup sub_schurs data */
9392   PetscCall(MatCreateSchurComplement(pcis->A_II, pcis->pA_II, pcis->A_IB, pcis->A_BI, pcis->A_BB, &S_j));
9393   if (!sub_schurs->schur_explicit) {
9394     /* pcbddc->ksp_D up to date only if not using MatFactor with Schur complement support */
9395     PetscCall(MatSchurComplementSetKSP(S_j, pcbddc->ksp_D));
9396     PetscCall(PCBDDCSubSchursSetUp(sub_schurs, NULL, S_j, PETSC_FALSE, used_xadj, used_adjncy, pcbddc->sub_schurs_layers, NULL, pcbddc->adaptive_selection, PETSC_FALSE, PETSC_FALSE, 0, NULL, NULL, NULL, NULL));
9397   } else {
9398     Mat       change        = NULL;
9399     Vec       scaling       = NULL;
9400     IS        change_primal = NULL, iP;
9401     PetscInt  benign_n;
9402     PetscBool reuse_solvers     = (PetscBool)!pcbddc->use_change_of_basis;
9403     PetscBool need_change       = PETSC_FALSE;
9404     PetscBool discrete_harmonic = PETSC_FALSE;
9405 
9406     if (!pcbddc->use_vertices && reuse_solvers) {
9407       PetscInt n_vertices;
9408 
9409       PetscCall(ISGetLocalSize(sub_schurs->is_vertices, &n_vertices));
9410       reuse_solvers = (PetscBool)!n_vertices;
9411     }
9412     if (!pcbddc->benign_change_explicit) {
9413       benign_n = pcbddc->benign_n;
9414     } else {
9415       benign_n = 0;
9416     }
9417     /* sub_schurs->change is a local object; instead, PCBDDCConstraintsSetUp and the quantities used in the test below are logically collective on pc.
9418        We need a global reduction to avoid possible deadlocks.
9419        We assume that sub_schurs->change is created once, and then reused for different solves, unless the topography has been recomputed */
9420     if (pcbddc->adaptive_userdefined || (pcbddc->deluxe_zerorows && !pcbddc->use_change_of_basis)) {
9421       PetscBool have_loc_change = (PetscBool)(!!sub_schurs->change);
9422       PetscCallMPI(MPIU_Allreduce(&have_loc_change, &need_change, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)pc)));
9423       need_change = (PetscBool)(!need_change);
9424     }
9425     /* If the user defines additional constraints, we import them here */
9426     if (need_change) {
9427       PetscCheck(!pcbddc->sub_schurs_rebuild, PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot compute change of basis with a different graph");
9428       PetscCall(PCBDDCComputeFakeChange(pc, PETSC_FALSE, NULL, NULL, &change, &change_primal, NULL, &sub_schurs->change_with_qr));
9429     }
9430     if (!pcbddc->use_deluxe_scaling) scaling = pcis->D;
9431 
9432     PetscCall(PetscObjectQuery((PetscObject)pc, "__KSPFETIDP_iP", (PetscObject *)&iP));
9433     if (iP) {
9434       PetscOptionsBegin(PetscObjectComm((PetscObject)iP), sub_schurs->prefix, "BDDC sub_schurs options", "PC");
9435       PetscCall(PetscOptionsBool("-sub_schurs_discrete_harmonic", NULL, NULL, discrete_harmonic, &discrete_harmonic, NULL));
9436       PetscOptionsEnd();
9437     }
9438     if (discrete_harmonic) {
9439       Mat A;
9440       PetscCall(MatDuplicate(pcbddc->local_mat, MAT_COPY_VALUES, &A));
9441       PetscCall(MatZeroRowsColumnsIS(A, iP, 1.0, NULL, NULL));
9442       PetscCall(PetscObjectCompose((PetscObject)A, "__KSPFETIDP_iP", (PetscObject)iP));
9443       PetscCall(PCBDDCSubSchursSetUp(sub_schurs, A, S_j, pcbddc->sub_schurs_exact_schur, used_xadj, used_adjncy, pcbddc->sub_schurs_layers, scaling, pcbddc->adaptive_selection, reuse_solvers, pcbddc->benign_saddle_point, benign_n, pcbddc->benign_p0_lidx,
9444                                      pcbddc->benign_zerodiag_subs, change, change_primal));
9445       PetscCall(MatDestroy(&A));
9446     } else {
9447       PetscCall(PCBDDCSubSchursSetUp(sub_schurs, pcbddc->local_mat, S_j, pcbddc->sub_schurs_exact_schur, used_xadj, used_adjncy, pcbddc->sub_schurs_layers, scaling, pcbddc->adaptive_selection, reuse_solvers, pcbddc->benign_saddle_point, benign_n,
9448                                      pcbddc->benign_p0_lidx, pcbddc->benign_zerodiag_subs, change, change_primal));
9449     }
9450     PetscCall(MatDestroy(&change));
9451     PetscCall(ISDestroy(&change_primal));
9452   }
9453   PetscCall(MatDestroy(&S_j));
9454 
9455   /* free adjacency */
9456   if (free_used_adj) PetscCall(PetscFree2(used_xadj, used_adjncy));
9457   PetscCall(PetscLogEventEnd(PC_BDDC_Schurs[pcbddc->current_level], pc, 0, 0, 0));
9458   PetscFunctionReturn(PETSC_SUCCESS);
9459 }
9460 
9461 PetscErrorCode PCBDDCInitSubSchurs(PC pc)
9462 {
9463   PC_IS      *pcis   = (PC_IS *)pc->data;
9464   PC_BDDC    *pcbddc = (PC_BDDC *)pc->data;
9465   PCBDDCGraph graph;
9466 
9467   PetscFunctionBegin;
9468   /* attach interface graph for determining subsets */
9469   if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */
9470     IS       verticesIS, verticescomm;
9471     PetscInt vsize, *idxs;
9472 
9473     PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &verticesIS));
9474     PetscCall(ISGetSize(verticesIS, &vsize));
9475     PetscCall(ISGetIndices(verticesIS, (const PetscInt **)&idxs));
9476     PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), vsize, idxs, PETSC_COPY_VALUES, &verticescomm));
9477     PetscCall(ISRestoreIndices(verticesIS, (const PetscInt **)&idxs));
9478     PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &verticesIS));
9479     PetscCall(PCBDDCGraphCreate(&graph));
9480     PetscCall(PCBDDCGraphInit(graph, pcbddc->mat_graph->l2gmap, pcbddc->mat_graph->nvtxs_global, pcbddc->graphmaxcount));
9481     PetscCall(PCBDDCGraphSetUp(graph, pcbddc->mat_graph->custom_minimal_size, NULL, pcbddc->DirichletBoundariesLocal, 0, NULL, verticescomm));
9482     PetscCall(ISDestroy(&verticescomm));
9483     PetscCall(PCBDDCGraphComputeConnectedComponents(graph));
9484   } else {
9485     graph = pcbddc->mat_graph;
9486   }
9487   /* print some info */
9488   if (pcbddc->dbg_flag && !pcbddc->sub_schurs_rebuild) {
9489     IS       vertices;
9490     PetscInt nv, nedges, nfaces;
9491     PetscCall(PCBDDCGraphASCIIView(graph, pcbddc->dbg_flag, pcbddc->dbg_viewer));
9492     PetscCall(PCBDDCGraphGetCandidatesIS(graph, &nfaces, NULL, &nedges, NULL, &vertices));
9493     PetscCall(ISGetSize(vertices, &nv));
9494     PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer));
9495     PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "--------------------------------------------------------------\n"));
9496     PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate vertices (%d)\n", PetscGlobalRank, nv, pcbddc->use_vertices));
9497     PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate edges    (%d)\n", PetscGlobalRank, nedges, pcbddc->use_edges));
9498     PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate faces    (%d)\n", PetscGlobalRank, nfaces, pcbddc->use_faces));
9499     PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
9500     PetscCall(PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer));
9501     PetscCall(PCBDDCGraphRestoreCandidatesIS(graph, &nfaces, NULL, &nedges, NULL, &vertices));
9502   }
9503 
9504   /* sub_schurs init */
9505   if (!pcbddc->sub_schurs) PetscCall(PCBDDCSubSchursCreate(&pcbddc->sub_schurs));
9506   PetscCall(PCBDDCSubSchursInit(pcbddc->sub_schurs, ((PetscObject)pc)->prefix, pcis->is_I_local, pcis->is_B_local, graph, pcis->BtoNmap, pcbddc->sub_schurs_rebuild, PETSC_FALSE));
9507 
9508   /* free graph struct */
9509   if (pcbddc->sub_schurs_rebuild) PetscCall(PCBDDCGraphDestroy(&graph));
9510   PetscFunctionReturn(PETSC_SUCCESS);
9511 }
9512 
9513 static PetscErrorCode PCBDDCViewGlobalIS(PC pc, IS is, PetscViewer viewer)
9514 {
9515   Mat_IS         *matis = (Mat_IS *)pc->pmat->data;
9516   PetscInt        n     = pc->pmat->rmap->n, ln, ni, st;
9517   const PetscInt *idxs;
9518   IS              gis;
9519 
9520   PetscFunctionBegin;
9521   if (!is) PetscFunctionReturn(PETSC_SUCCESS);
9522   PetscCall(MatGetOwnershipRange(pc->pmat, &st, NULL));
9523   PetscCall(MatGetLocalSize(matis->A, NULL, &ln));
9524   PetscCall(PetscArrayzero(matis->sf_leafdata, ln));
9525   PetscCall(PetscArrayzero(matis->sf_rootdata, n));
9526   PetscCall(ISGetLocalSize(is, &ni));
9527   PetscCall(ISGetIndices(is, &idxs));
9528   for (PetscInt i = 0; i < ni; i++) {
9529     if (idxs[i] < 0 || idxs[i] >= ln) continue;
9530     matis->sf_leafdata[idxs[i]] = 1;
9531   }
9532   PetscCall(ISRestoreIndices(is, &idxs));
9533   PetscCall(PetscSFReduceBegin(matis->sf, MPIU_INT, matis->sf_leafdata, matis->sf_rootdata, MPI_SUM));
9534   PetscCall(PetscSFReduceEnd(matis->sf, MPIU_INT, matis->sf_leafdata, matis->sf_rootdata, MPI_SUM));
9535   ln = 0;
9536   for (PetscInt i = 0; i < n; i++) {
9537     if (matis->sf_rootdata[i]) matis->sf_rootdata[ln++] = i + st;
9538   }
9539   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), ln, matis->sf_rootdata, PETSC_USE_POINTER, &gis));
9540   PetscCall(ISView(gis, viewer));
9541   PetscCall(ISDestroy(&gis));
9542   PetscFunctionReturn(PETSC_SUCCESS);
9543 }
9544 
9545 PetscErrorCode PCBDDCLoadOrViewCustomization(PC pc, PetscBool load, const char *outfile)
9546 {
9547   PetscInt    header[11];
9548   PC_BDDC    *pcbddc = (PC_BDDC *)pc->data;
9549   PetscViewer viewer;
9550   MPI_Comm    comm = PetscObjectComm((PetscObject)pc);
9551 
9552   PetscFunctionBegin;
9553   PetscCall(PetscViewerBinaryOpen(comm, outfile ? outfile : "bddc_dump.dat", load ? FILE_MODE_READ : FILE_MODE_WRITE, &viewer));
9554   if (load) {
9555     IS  is;
9556     Mat A;
9557 
9558     PetscCall(PetscViewerBinaryRead(viewer, header, PETSC_STATIC_ARRAY_LENGTH(header), NULL, PETSC_INT));
9559     PetscCheck(header[0] == 0 || header[0] == 1, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file");
9560     PetscCheck(header[1] == 0 || header[1] == 1, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file");
9561     PetscCheck(header[2] >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file");
9562     PetscCheck(header[3] == 0 || header[3] == 1, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file");
9563     PetscCheck(header[4] == 0 || header[4] == 1, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file");
9564     PetscCheck(header[5] >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file");
9565     PetscCheck(header[7] == 0 || header[7] == 1, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file");
9566     PetscCheck(header[8] == 0 || header[8] == 1, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file");
9567     PetscCheck(header[9] == 0 || header[9] == 1, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file");
9568     PetscCheck(header[10] == 0 || header[10] == 1, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file");
9569     if (header[0]) {
9570       PetscCall(ISCreate(comm, &is));
9571       PetscCall(ISLoad(is, viewer));
9572       PetscCall(PCBDDCSetDirichletBoundaries(pc, is));
9573       PetscCall(ISDestroy(&is));
9574     }
9575     if (header[1]) {
9576       PetscCall(ISCreate(comm, &is));
9577       PetscCall(ISLoad(is, viewer));
9578       PetscCall(PCBDDCSetNeumannBoundaries(pc, is));
9579       PetscCall(ISDestroy(&is));
9580     }
9581     if (header[2]) {
9582       IS *isarray;
9583 
9584       PetscCall(PetscMalloc1(header[2], &isarray));
9585       for (PetscInt i = 0; i < header[2]; i++) {
9586         PetscCall(ISCreate(comm, &isarray[i]));
9587         PetscCall(ISLoad(isarray[i], viewer));
9588       }
9589       PetscCall(PCBDDCSetDofsSplitting(pc, header[2], isarray));
9590       for (PetscInt i = 0; i < header[2]; i++) PetscCall(ISDestroy(&isarray[i]));
9591       PetscCall(PetscFree(isarray));
9592     }
9593     if (header[3]) {
9594       PetscCall(ISCreate(comm, &is));
9595       PetscCall(ISLoad(is, viewer));
9596       PetscCall(PCBDDCSetPrimalVerticesIS(pc, is));
9597       PetscCall(ISDestroy(&is));
9598     }
9599     if (header[4]) {
9600       PetscCall(MatCreate(comm, &A));
9601       PetscCall(MatSetType(A, MATAIJ));
9602       PetscCall(MatLoad(A, viewer));
9603       PetscCall(PCBDDCSetDiscreteGradient(pc, A, header[5], header[6], (PetscBool)header[7], (PetscBool)header[8]));
9604       PetscCall(MatDestroy(&A));
9605     }
9606     if (header[9]) {
9607       PetscCall(MatCreate(comm, &A));
9608       PetscCall(MatSetType(A, MATIS));
9609       PetscCall(MatLoad(A, viewer));
9610       PetscCall(PCBDDCSetDivergenceMat(pc, A, (PetscBool)header[10], NULL));
9611       PetscCall(MatDestroy(&A));
9612     }
9613   } else {
9614     header[0]  = (PetscInt)!!pcbddc->DirichletBoundariesLocal;
9615     header[1]  = (PetscInt)!!pcbddc->NeumannBoundariesLocal;
9616     header[2]  = pcbddc->n_ISForDofsLocal;
9617     header[3]  = (PetscInt)!!pcbddc->user_primal_vertices_local;
9618     header[4]  = (PetscInt)!!pcbddc->discretegradient;
9619     header[5]  = pcbddc->nedorder;
9620     header[6]  = pcbddc->nedfield;
9621     header[7]  = (PetscInt)pcbddc->nedglobal;
9622     header[8]  = (PetscInt)pcbddc->conforming;
9623     header[9]  = (PetscInt)!!pcbddc->divudotp;
9624     header[10] = (PetscInt)pcbddc->divudotp_trans;
9625     if (header[4]) header[3] = 0;
9626 
9627     PetscCall(PetscViewerBinaryWrite(viewer, header, PETSC_STATIC_ARRAY_LENGTH(header), PETSC_INT));
9628     PetscCall(PCBDDCViewGlobalIS(pc, pcbddc->DirichletBoundariesLocal, viewer));
9629     PetscCall(PCBDDCViewGlobalIS(pc, pcbddc->NeumannBoundariesLocal, viewer));
9630     for (PetscInt i = 0; i < header[2]; i++) PetscCall(PCBDDCViewGlobalIS(pc, pcbddc->ISForDofsLocal[i], viewer));
9631     if (header[3]) PetscCall(PCBDDCViewGlobalIS(pc, pcbddc->user_primal_vertices_local, viewer));
9632     if (header[4]) PetscCall(MatView(pcbddc->discretegradient, viewer));
9633     if (header[9]) PetscCall(MatView(pcbddc->divudotp, viewer));
9634   }
9635   PetscCall(PetscViewerDestroy(&viewer));
9636   PetscFunctionReturn(PETSC_SUCCESS);
9637 }
9638 
9639 #include <../src/mat/impls/aij/mpi/mpiaij.h>
9640 static PetscErrorCode MatMPIAIJRestrict(Mat A, MPI_Comm ccomm, Mat *B)
9641 {
9642   Mat         At;
9643   IS          rows;
9644   PetscInt    rst, ren;
9645   PetscLayout rmap;
9646 
9647   PetscFunctionBegin;
9648   rst = ren = 0;
9649   if (ccomm != MPI_COMM_NULL) {
9650     PetscCall(PetscLayoutCreate(ccomm, &rmap));
9651     PetscCall(PetscLayoutSetSize(rmap, A->rmap->N));
9652     PetscCall(PetscLayoutSetBlockSize(rmap, 1));
9653     PetscCall(PetscLayoutSetUp(rmap));
9654     PetscCall(PetscLayoutGetRange(rmap, &rst, &ren));
9655   }
9656   PetscCall(ISCreateStride(PetscObjectComm((PetscObject)A), ren - rst, rst, 1, &rows));
9657   PetscCall(MatCreateSubMatrix(A, rows, NULL, MAT_INITIAL_MATRIX, &At));
9658   PetscCall(ISDestroy(&rows));
9659 
9660   if (ccomm != MPI_COMM_NULL) {
9661     Mat_MPIAIJ *a, *b;
9662     IS          from, to;
9663     Vec         gvec;
9664     PetscInt    lsize;
9665 
9666     PetscCall(MatCreate(ccomm, B));
9667     PetscCall(MatSetSizes(*B, ren - rst, PETSC_DECIDE, PETSC_DECIDE, At->cmap->N));
9668     PetscCall(MatSetType(*B, MATAIJ));
9669     PetscCall(PetscLayoutDestroy(&(*B)->rmap));
9670     PetscCall(PetscLayoutSetUp((*B)->cmap));
9671     a = (Mat_MPIAIJ *)At->data;
9672     b = (Mat_MPIAIJ *)(*B)->data;
9673     PetscCallMPI(MPI_Comm_size(ccomm, &b->size));
9674     PetscCallMPI(MPI_Comm_rank(ccomm, &b->rank));
9675     PetscCall(PetscObjectReference((PetscObject)a->A));
9676     PetscCall(PetscObjectReference((PetscObject)a->B));
9677     b->A = a->A;
9678     b->B = a->B;
9679 
9680     b->donotstash   = a->donotstash;
9681     b->roworiented  = a->roworiented;
9682     b->rowindices   = NULL;
9683     b->rowvalues    = NULL;
9684     b->getrowactive = PETSC_FALSE;
9685 
9686     (*B)->rmap         = rmap;
9687     (*B)->factortype   = A->factortype;
9688     (*B)->assembled    = PETSC_TRUE;
9689     (*B)->insertmode   = NOT_SET_VALUES;
9690     (*B)->preallocated = PETSC_TRUE;
9691 
9692     if (a->colmap) {
9693 #if defined(PETSC_USE_CTABLE)
9694       PetscCall(PetscHMapIDuplicate(a->colmap, &b->colmap));
9695 #else
9696       PetscCall(PetscMalloc1(At->cmap->N, &b->colmap));
9697       PetscCall(PetscArraycpy(b->colmap, a->colmap, At->cmap->N));
9698 #endif
9699     } else b->colmap = NULL;
9700     if (a->garray) {
9701       PetscInt len;
9702       len = a->B->cmap->n;
9703       PetscCall(PetscMalloc1(len + 1, &b->garray));
9704       if (len) PetscCall(PetscArraycpy(b->garray, a->garray, len));
9705     } else b->garray = NULL;
9706 
9707     PetscCall(PetscObjectReference((PetscObject)a->lvec));
9708     b->lvec = a->lvec;
9709 
9710     /* cannot use VecScatterCopy */
9711     PetscCall(VecGetLocalSize(b->lvec, &lsize));
9712     PetscCall(ISCreateGeneral(ccomm, lsize, b->garray, PETSC_USE_POINTER, &from));
9713     PetscCall(ISCreateStride(PETSC_COMM_SELF, lsize, 0, 1, &to));
9714     PetscCall(MatCreateVecs(*B, &gvec, NULL));
9715     PetscCall(VecScatterCreate(gvec, from, b->lvec, to, &b->Mvctx));
9716     PetscCall(ISDestroy(&from));
9717     PetscCall(ISDestroy(&to));
9718     PetscCall(VecDestroy(&gvec));
9719   }
9720   PetscCall(MatDestroy(&At));
9721   PetscFunctionReturn(PETSC_SUCCESS);
9722 }
9723 
9724 /* same as MatCreateSubMatrix(A, rows, NULL,...) but allows repeated rows */
9725 static PetscErrorCode MatAIJExtractRows(Mat A, IS rows, Mat *sA)
9726 {
9727   PetscBool isaij;
9728   MPI_Comm  comm;
9729 
9730   PetscFunctionBegin;
9731   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
9732   PetscCall(PetscObjectBaseTypeCompareAny((PetscObject)A, &isaij, MATSEQAIJ, MATMPIAIJ, ""));
9733   PetscCheck(isaij, comm, PETSC_ERR_SUP, "Not implemented");
9734   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &isaij));
9735   if (isaij) { /* SeqAIJ supports repeated rows */
9736     PetscCall(MatCreateSubMatrix(A, rows, NULL, MAT_INITIAL_MATRIX, sA));
9737   } else {
9738     Mat                A_loc;
9739     Mat_SeqAIJ        *da;
9740     PetscSF            sf;
9741     PetscInt           ni, *di, *dj, m = A->rmap->n, c, *ldata, *rdata;
9742     PetscScalar       *daa;
9743     const PetscInt    *idxs;
9744     const PetscSFNode *iremotes;
9745     PetscSFNode       *remotes;
9746 
9747     /* SF for incoming rows */
9748     PetscCall(PetscSFCreate(comm, &sf));
9749     PetscCall(ISGetLocalSize(rows, &ni));
9750     PetscCall(ISGetIndices(rows, &idxs));
9751     PetscCall(PetscSFSetGraphLayout(sf, A->rmap, ni, NULL, PETSC_USE_POINTER, idxs));
9752     PetscCall(ISRestoreIndices(rows, &idxs));
9753 
9754     PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &A_loc));
9755     da = (Mat_SeqAIJ *)A_loc->data;
9756     PetscCall(PetscMalloc2(2 * ni, &ldata, 2 * m, &rdata));
9757     for (PetscInt i = 0; i < m; i++) {
9758       rdata[2 * i + 0] = da->i[i + 1] - da->i[i];
9759       rdata[2 * i + 1] = da->i[i];
9760     }
9761     PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, rdata, ldata, MPI_REPLACE));
9762     PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, rdata, ldata, MPI_REPLACE));
9763     PetscCall(PetscMalloc1(ni + 1, &di));
9764     di[0] = 0;
9765     for (PetscInt i = 0; i < ni; i++) di[i + 1] = di[i] + ldata[2 * i + 0];
9766     PetscCall(PetscMalloc1(di[ni], &dj));
9767     PetscCall(PetscMalloc1(di[ni], &daa));
9768     PetscCall(PetscMalloc1(di[ni], &remotes));
9769 
9770     PetscCall(PetscSFGetGraph(sf, NULL, NULL, NULL, &iremotes));
9771 
9772     /* SF graph for nonzeros */
9773     c = 0;
9774     for (PetscInt i = 0; i < ni; i++) {
9775       const PetscInt rank  = iremotes[i].rank;
9776       const PetscInt rsize = ldata[2 * i];
9777       for (PetscInt j = 0; j < rsize; j++) {
9778         remotes[c].rank  = rank;
9779         remotes[c].index = ldata[2 * i + 1] + j;
9780         c++;
9781       }
9782     }
9783     PetscCheck(c == di[ni], PETSC_COMM_SELF, PETSC_ERR_PLIB, "Invalid number of local nonzeros %" PetscInt_FMT " != %" PetscInt_FMT, c, di[ni]);
9784     PetscCall(PetscSFSetGraph(sf, da->i[m], di[ni], NULL, PETSC_USE_POINTER, remotes, PETSC_USE_POINTER));
9785     PetscCall(PetscSFBcastBegin(sf, MPIU_INT, da->j, dj, MPI_REPLACE));
9786     PetscCall(PetscSFBcastEnd(sf, MPIU_INT, da->j, dj, MPI_REPLACE));
9787     PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, da->a, daa, MPI_REPLACE));
9788     PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, da->a, daa, MPI_REPLACE));
9789 
9790     PetscCall(MatCreateMPIAIJWithArrays(comm, ni, A->cmap->n, PETSC_DECIDE, A->cmap->N, di, dj, daa, sA));
9791     PetscCall(MatDestroy(&A_loc));
9792     PetscCall(PetscSFDestroy(&sf));
9793     PetscCall(PetscFree(di));
9794     PetscCall(PetscFree(dj));
9795     PetscCall(PetscFree(daa));
9796     PetscCall(PetscFree(remotes));
9797     PetscCall(PetscFree2(ldata, rdata));
9798   }
9799   PetscFunctionReturn(PETSC_SUCCESS);
9800 }
9801