xref: /petsc/src/ksp/pc/impls/bddc/bddcprivate.c (revision ae1ee55146a7ad071171b861759b85940c7e5c67)
1 #include <../src/mat/impls/aij/seq/aij.h>
2 #include <petsc/private/pcbddcimpl.h>
3 #include <petsc/private/pcbddcprivateimpl.h>
4 #include <petsc/private/kernels/blockinvert.h>
5 #include <../src/mat/impls/dense/seq/dense.h>
6 #include <petscdmplex.h>
7 #include <petscblaslapack.h>
8 #include <petsc/private/sfimpl.h>
9 #include <petsc/private/dmpleximpl.h>
10 #include <petscdmda.h>
11 
12 static PetscErrorCode MatMPIAIJRestrict(Mat, MPI_Comm, Mat *);
13 
14 /* if range is true,  it returns B s.t. span{B} = range(A)
15    if range is false, it returns B s.t. range(B) _|_ range(A) */
MatDenseOrthogonalRangeOrComplement(Mat A,PetscBool range,PetscInt lw,PetscScalar * work,PetscReal * rwork,Mat * B)16 static PetscErrorCode MatDenseOrthogonalRangeOrComplement(Mat A, PetscBool range, PetscInt lw, PetscScalar *work, PetscReal *rwork, Mat *B)
17 {
18   PetscScalar *uwork, *data, *U, ds = 0.;
19   PetscReal   *sing;
20   PetscBLASInt bM, bN, lwork, lierr, di = 1;
21   PetscInt     ulw, i, nr, nc, n;
22 #if defined(PETSC_USE_COMPLEX)
23   PetscReal *rwork2;
24 #endif
25 
26   PetscFunctionBegin;
27   PetscCall(MatGetSize(A, &nr, &nc));
28   if (!nr || !nc) PetscFunctionReturn(PETSC_SUCCESS);
29 
30   /* workspace */
31   if (!work) {
32     ulw = PetscMax(PetscMax(1, 5 * PetscMin(nr, nc)), 3 * PetscMin(nr, nc) + PetscMax(nr, nc));
33     PetscCall(PetscMalloc1(ulw, &uwork));
34   } else {
35     ulw   = lw;
36     uwork = work;
37   }
38   n = PetscMin(nr, nc);
39   if (!rwork) {
40     PetscCall(PetscMalloc1(n, &sing));
41   } else {
42     sing = rwork;
43   }
44 
45   /* SVD */
46   PetscCall(PetscMalloc1(nr * nr, &U));
47   PetscCall(PetscBLASIntCast(nr, &bM));
48   PetscCall(PetscBLASIntCast(nc, &bN));
49   PetscCall(PetscBLASIntCast(ulw, &lwork));
50   PetscCall(MatDenseGetArray(A, &data));
51   PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
52 #if !defined(PETSC_USE_COMPLEX)
53   PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("A", "N", &bM, &bN, data, &bM, sing, U, &bM, &ds, &di, uwork, &lwork, &lierr));
54 #else
55   PetscCall(PetscMalloc1(5 * n, &rwork2));
56   PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("A", "N", &bM, &bN, data, &bM, sing, U, &bM, &ds, &di, uwork, &lwork, rwork2, &lierr));
57   PetscCall(PetscFree(rwork2));
58 #endif
59   PetscCall(PetscFPTrapPop());
60   PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in GESVD Lapack routine %" PetscBLASInt_FMT, lierr);
61   PetscCall(MatDenseRestoreArray(A, &data));
62   for (i = 0; i < n; i++)
63     if (sing[i] < PETSC_SMALL) break;
64   if (!rwork) PetscCall(PetscFree(sing));
65   if (!work) PetscCall(PetscFree(uwork));
66   /* create B */
67   if (!range) {
68     PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, nr, nr - i, NULL, B));
69     PetscCall(MatDenseGetArray(*B, &data));
70     PetscCall(PetscArraycpy(data, U + nr * i, (nr - i) * nr));
71   } else {
72     PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, nr, i, NULL, B));
73     PetscCall(MatDenseGetArray(*B, &data));
74     PetscCall(PetscArraycpy(data, U, i * nr));
75   }
76   PetscCall(MatDenseRestoreArray(*B, &data));
77   PetscCall(PetscFree(U));
78   PetscFunctionReturn(PETSC_SUCCESS);
79 }
80 
81 /* TODO REMOVE */
82 #if defined(PRINT_GDET)
83 static int inc = 0;
84 static int lev = 0;
85 #endif
86 
PCBDDCComputeNedelecChangeEdge(Mat lG,IS edge,IS extrow,IS extcol,IS corners,Mat * Gins,Mat * GKins,PetscScalar cvals[2],PetscScalar * work,PetscReal * rwork)87 static PetscErrorCode PCBDDCComputeNedelecChangeEdge(Mat lG, IS edge, IS extrow, IS extcol, IS corners, Mat *Gins, Mat *GKins, PetscScalar cvals[2], PetscScalar *work, PetscReal *rwork)
88 {
89   Mat          GE, GEd;
90   PetscInt     rsize, csize, esize;
91   PetscScalar *ptr;
92 
93   PetscFunctionBegin;
94   PetscCall(ISGetSize(edge, &esize));
95   if (!esize) PetscFunctionReturn(PETSC_SUCCESS);
96   PetscCall(ISGetSize(extrow, &rsize));
97   PetscCall(ISGetSize(extcol, &csize));
98 
99   /* gradients */
100   ptr = work + 5 * esize;
101   PetscCall(MatCreateSubMatrix(lG, extrow, extcol, MAT_INITIAL_MATRIX, &GE));
102   PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, rsize, csize, ptr, Gins));
103   PetscCall(MatConvert(GE, MATSEQDENSE, MAT_REUSE_MATRIX, Gins));
104   PetscCall(MatDestroy(&GE));
105 
106   /* constants */
107   ptr += rsize * csize;
108   PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, esize, csize, ptr, &GEd));
109   PetscCall(MatCreateSubMatrix(lG, edge, extcol, MAT_INITIAL_MATRIX, &GE));
110   PetscCall(MatConvert(GE, MATSEQDENSE, MAT_REUSE_MATRIX, &GEd));
111   PetscCall(MatDestroy(&GE));
112   PetscCall(MatDenseOrthogonalRangeOrComplement(GEd, PETSC_FALSE, 5 * esize, work, rwork, GKins));
113   PetscCall(MatDestroy(&GEd));
114 
115   if (corners) {
116     Mat                GEc;
117     const PetscScalar *vals;
118     PetscScalar        v;
119 
120     PetscCall(MatCreateSubMatrix(lG, edge, corners, MAT_INITIAL_MATRIX, &GEc));
121     PetscCall(MatTransposeMatMult(GEc, *GKins, MAT_INITIAL_MATRIX, 1.0, &GEd));
122     PetscCall(MatDenseGetArrayRead(GEd, &vals));
123     /* v       = PetscAbsScalar(vals[0]); */
124     v        = 1.;
125     cvals[0] = vals[0] / v;
126     cvals[1] = vals[1] / v;
127     PetscCall(MatDenseRestoreArrayRead(GEd, &vals));
128     PetscCall(MatScale(*GKins, 1. / v));
129 #if defined(PRINT_GDET)
130     {
131       PetscViewer viewer;
132       char        filename[256];
133       PetscCall(PetscSNPrintf(filename, PETSC_STATIC_ARRAY_LENGTH(filename), "Gdet_l%d_r%d_cc%d.m", lev, PetscGlobalRank, inc++));
134       PetscCall(PetscViewerASCIIOpen(PETSC_COMM_SELF, filename, &viewer));
135       PetscCall(PetscViewerPushFormat(viewer, PETSC_VIEWER_ASCII_MATLAB));
136       PetscCall(PetscObjectSetName((PetscObject)GEc, "GEc"));
137       PetscCall(MatView(GEc, viewer));
138       PetscCall(PetscObjectSetName((PetscObject)*GKins, "GK"));
139       PetscCall(MatView(*GKins, viewer));
140       PetscCall(PetscObjectSetName((PetscObject)GEd, "Gproj"));
141       PetscCall(MatView(GEd, viewer));
142       PetscCall(PetscViewerDestroy(&viewer));
143     }
144 #endif
145     PetscCall(MatDestroy(&GEd));
146     PetscCall(MatDestroy(&GEc));
147   }
148   PetscFunctionReturn(PETSC_SUCCESS);
149 }
150 
151 static PetscErrorCode MatAIJExtractRows(Mat, IS, Mat *);
152 
PCBDDCNedelecSupport(PC pc)153 PetscErrorCode PCBDDCNedelecSupport(PC pc)
154 {
155   PC_BDDC               *pcbddc = (PC_BDDC *)pc->data;
156   Mat_IS                *matis  = (Mat_IS *)pc->pmat->data;
157   Mat                    G, T, conn, lG, lGt, lGis, lGall, lGe, lGinit;
158   PetscSF                sfv;
159   ISLocalToGlobalMapping el2g, vl2g, fl2g, al2g;
160   MPI_Comm               comm;
161   IS                     lned, primals, allprimals, nedfieldlocal, elements_corners = NULL;
162   IS                    *eedges, *extrows, *extcols, *alleedges;
163   PetscBT                btv, bte, btvc, btb, btbd, btvcand, btvi, btee, bter;
164   PetscScalar           *vals, *work;
165   PetscReal             *rwork;
166   const PetscInt        *idxs, *ii, *jj, *iit, *jjt;
167   PetscInt               ne, nv, Lv, order, n, field;
168   PetscInt               i, j, extmem, cum, maxsize, nee;
169   PetscInt              *extrow, *extrowcum, *marks, *vmarks, *gidxs;
170   PetscInt              *sfvleaves, *sfvroots;
171   PetscInt              *corners, *cedges;
172   PetscInt              *ecount, **eneighs, *vcount, **vneighs;
173   PetscInt              *emarks;
174   PetscBool              print, eerr, done, lrc[2], conforming, global, setprimal;
175 
176   PetscFunctionBegin;
177   /* If the discrete gradient is defined for a subset of dofs and global is true,
178      it assumes G is given in global ordering for all the dofs.
179      Otherwise, the ordering is global for the Nedelec field */
180   order      = pcbddc->nedorder;
181   conforming = pcbddc->conforming;
182   field      = pcbddc->nedfield;
183   global     = pcbddc->nedglobal;
184   setprimal  = PETSC_FALSE;
185   print      = PETSC_FALSE;
186 
187   /* Command line customization */
188   PetscOptionsBegin(PetscObjectComm((PetscObject)pc), ((PetscObject)pc)->prefix, "BDDC Nedelec options", "PC");
189   PetscCall(PetscOptionsBool("-pc_bddc_nedelec_field_primal", "All edge dofs set as primals: Toselli's algorithm C", NULL, setprimal, &setprimal, NULL));
190   /* print debug info and adaptive order TODO: to be removed */
191   PetscCall(PetscOptionsInt("-pc_bddc_nedelec_order", "Test variable order code (to be removed)", NULL, order, &order, NULL));
192   PetscCall(PetscOptionsBool("-pc_bddc_nedelec_print", "Print debug info", NULL, print, &print, NULL));
193   PetscOptionsEnd();
194 
195   /* Return if there are no edges in the decomposition */
196   PetscCall(MatISGetLocalToGlobalMapping(pc->pmat, &al2g, NULL));
197   PetscCall(ISLocalToGlobalMappingGetSize(al2g, &n));
198   PetscCall(PetscObjectGetComm((PetscObject)pc, &comm));
199   PetscCall(VecGetArrayRead(matis->counter, (const PetscScalar **)&vals));
200   lrc[0] = PETSC_FALSE;
201   for (i = 0; i < n; i++) {
202     if (PetscRealPart(vals[i]) > 2.) {
203       lrc[0] = PETSC_TRUE;
204       break;
205     }
206   }
207   PetscCall(VecRestoreArrayRead(matis->counter, (const PetscScalar **)&vals));
208   PetscCallMPI(MPIU_Allreduce(&lrc[0], &lrc[1], 1, MPI_C_BOOL, MPI_LOR, comm));
209   if (!lrc[1]) PetscFunctionReturn(PETSC_SUCCESS);
210 
211   /* Get Nedelec field */
212   PetscCheck(!pcbddc->n_ISForDofsLocal || field < pcbddc->n_ISForDofsLocal, comm, PETSC_ERR_USER, "Invalid field for Nedelec %" PetscInt_FMT ": number of fields is %" PetscInt_FMT, field, pcbddc->n_ISForDofsLocal);
213   if (pcbddc->n_ISForDofsLocal && field >= 0) {
214     PetscCall(PetscObjectReference((PetscObject)pcbddc->ISForDofsLocal[field]));
215     nedfieldlocal = pcbddc->ISForDofsLocal[field];
216     PetscCall(ISGetLocalSize(nedfieldlocal, &ne));
217   } else if (!pcbddc->n_ISForDofsLocal && field != PETSC_DECIDE) {
218     ne            = n;
219     nedfieldlocal = NULL;
220     global        = PETSC_TRUE;
221   } else if (field == PETSC_DECIDE) {
222     PetscInt rst, ren, *idx;
223 
224     PetscCall(PetscArrayzero(matis->sf_leafdata, n));
225     PetscCall(PetscArrayzero(matis->sf_rootdata, pc->pmat->rmap->n));
226     PetscCall(MatGetOwnershipRange(pcbddc->discretegradient, &rst, &ren));
227     for (i = rst; i < ren; i++) {
228       PetscInt nc;
229 
230       PetscCall(MatGetRow(pcbddc->discretegradient, i, &nc, NULL, NULL));
231       if (nc > 1) matis->sf_rootdata[i - rst] = 1;
232       PetscCall(MatRestoreRow(pcbddc->discretegradient, i, &nc, NULL, NULL));
233     }
234     PetscCall(PetscSFBcastBegin(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE));
235     PetscCall(PetscSFBcastEnd(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE));
236     PetscCall(PetscMalloc1(n, &idx));
237     for (i = 0, ne = 0; i < n; i++)
238       if (matis->sf_leafdata[i]) idx[ne++] = i;
239     PetscCall(ISCreateGeneral(comm, ne, idx, PETSC_OWN_POINTER, &nedfieldlocal));
240   } else {
241     SETERRQ(comm, PETSC_ERR_USER, "When multiple fields are present, the Nedelec field has to be specified");
242   }
243 
244   /* Sanity checks */
245   PetscCheck(order || conforming, comm, PETSC_ERR_SUP, "Variable order and non-conforming spaces are not supported at the same time");
246   PetscCheck(!pcbddc->user_ChangeOfBasisMatrix, comm, PETSC_ERR_SUP, "Cannot generate Nedelec support with user defined change of basis");
247   PetscCheck(!order || (ne % order == 0), PETSC_COMM_SELF, PETSC_ERR_USER, "The number of local edge dofs %" PetscInt_FMT " is not a multiple of the order %" PetscInt_FMT, ne, order);
248 
249   /* Just set primal dofs and return */
250   if (setprimal) {
251     IS        enedfieldlocal;
252     PetscInt *eidxs;
253 
254     PetscCall(PetscMalloc1(ne, &eidxs));
255     PetscCall(VecGetArrayRead(matis->counter, (const PetscScalar **)&vals));
256     if (nedfieldlocal) {
257       PetscCall(ISGetIndices(nedfieldlocal, &idxs));
258       for (i = 0, cum = 0; i < ne; i++) {
259         if (PetscRealPart(vals[idxs[i]]) > 2.) eidxs[cum++] = idxs[i];
260       }
261       PetscCall(ISRestoreIndices(nedfieldlocal, &idxs));
262     } else {
263       for (i = 0, cum = 0; i < ne; i++) {
264         if (PetscRealPart(vals[i]) > 2.) eidxs[cum++] = i;
265       }
266     }
267     PetscCall(VecRestoreArrayRead(matis->counter, (const PetscScalar **)&vals));
268     PetscCall(ISCreateGeneral(comm, cum, eidxs, PETSC_COPY_VALUES, &enedfieldlocal));
269     PetscCall(PCBDDCSetPrimalVerticesLocalIS(pc, enedfieldlocal));
270     PetscCall(PetscFree(eidxs));
271     PetscCall(ISDestroy(&nedfieldlocal));
272     PetscCall(ISDestroy(&enedfieldlocal));
273     PetscFunctionReturn(PETSC_SUCCESS);
274   }
275 
276   /* Compute some l2g maps */
277   if (nedfieldlocal) {
278     IS is;
279 
280     /* need to map from the local Nedelec field to local numbering */
281     PetscCall(ISLocalToGlobalMappingCreateIS(nedfieldlocal, &fl2g));
282     /* need to map from the local Nedelec field to global numbering for the whole dofs*/
283     PetscCall(ISLocalToGlobalMappingApplyIS(al2g, nedfieldlocal, &is));
284     PetscCall(ISLocalToGlobalMappingCreateIS(is, &al2g));
285     /* need to map from the local Nedelec field to global numbering (for Nedelec only) */
286     if (global) {
287       PetscCall(PetscObjectReference((PetscObject)al2g));
288       el2g = al2g;
289     } else {
290       IS gis;
291 
292       PetscCall(ISRenumber(is, NULL, NULL, &gis));
293       PetscCall(ISLocalToGlobalMappingCreateIS(gis, &el2g));
294       PetscCall(ISDestroy(&gis));
295     }
296     PetscCall(ISDestroy(&is));
297   } else {
298     /* one ref for the destruction of al2g, one for el2g */
299     PetscCall(PetscObjectReference((PetscObject)al2g));
300     PetscCall(PetscObjectReference((PetscObject)al2g));
301     el2g = al2g;
302     fl2g = NULL;
303   }
304 
305   /* Start communication to drop connections for interior edges (for cc analysis only) */
306   PetscCall(PetscArrayzero(matis->sf_leafdata, n));
307   PetscCall(PetscArrayzero(matis->sf_rootdata, pc->pmat->rmap->n));
308   if (nedfieldlocal) {
309     PetscCall(ISGetIndices(nedfieldlocal, &idxs));
310     for (i = 0; i < ne; i++) matis->sf_leafdata[idxs[i]] = 1;
311     PetscCall(ISRestoreIndices(nedfieldlocal, &idxs));
312   } else {
313     for (i = 0; i < ne; i++) matis->sf_leafdata[i] = 1;
314   }
315   PetscCall(PetscSFReduceBegin(matis->sf, MPIU_INT, matis->sf_leafdata, matis->sf_rootdata, MPI_SUM));
316   PetscCall(PetscSFReduceEnd(matis->sf, MPIU_INT, matis->sf_leafdata, matis->sf_rootdata, MPI_SUM));
317 
318   /* There's no way to detect all possible corner candidates in a element-by-element case in a pure algebraic setting
319      Firedrake attaches a index set to identify them upfront. If it is present, we assume we are in such a case */
320   if (matis->allow_repeated) PetscCall(PetscObjectQuery((PetscObject)pcbddc->discretegradient, "_elements_corners", (PetscObject *)&elements_corners));
321 
322   /* drop connections with interior edges to avoid unneeded communications and memory movements */
323   PetscCall(MatViewFromOptions(pcbddc->discretegradient, (PetscObject)pc, "-pc_bddc_discrete_gradient_view"));
324   PetscCall(MatDuplicate(pcbddc->discretegradient, MAT_COPY_VALUES, &G));
325   PetscCall(MatSetOption(G, MAT_KEEP_NONZERO_PATTERN, PETSC_FALSE));
326   if (global) {
327     PetscInt rst;
328 
329     PetscCall(MatGetOwnershipRange(G, &rst, NULL));
330     for (i = 0, cum = 0; i < pc->pmat->rmap->n; i++) {
331       if (matis->sf_rootdata[i] < 2) matis->sf_rootdata[cum++] = i + rst;
332     }
333     PetscCall(MatSetOption(G, MAT_NO_OFF_PROC_ZERO_ROWS, PETSC_TRUE));
334     PetscCall(MatZeroRows(G, cum, matis->sf_rootdata, 0., NULL, NULL));
335   } else {
336     PetscInt *tbz;
337 
338     PetscCall(PetscMalloc1(ne, &tbz));
339     PetscCall(PetscSFBcastBegin(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE));
340     PetscCall(PetscSFBcastEnd(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE));
341     PetscCall(ISGetIndices(nedfieldlocal, &idxs));
342     for (i = 0, cum = 0; i < ne; i++)
343       if (matis->sf_leafdata[idxs[i]] == 1) tbz[cum++] = i;
344     PetscCall(ISRestoreIndices(nedfieldlocal, &idxs));
345     PetscCall(ISLocalToGlobalMappingApply(el2g, cum, tbz, tbz));
346     PetscCall(MatZeroRows(G, cum, tbz, 0., NULL, NULL));
347     PetscCall(PetscFree(tbz));
348   }
349 
350   /* Extract subdomain relevant rows of G  */
351   PetscCall(ISLocalToGlobalMappingGetIndices(el2g, &idxs));
352   PetscCall(ISCreateGeneral(comm, ne, idxs, PETSC_USE_POINTER, &lned));
353   PetscCall(MatAIJExtractRows(G, lned, &lGall));
354   /* PetscCall(MatCreateSubMatrix(G, lned, NULL, MAT_INITIAL_MATRIX, &lGall)); */
355   PetscCall(ISLocalToGlobalMappingRestoreIndices(el2g, &idxs));
356   PetscCall(ISDestroy(&lned));
357   PetscCall(MatConvert(lGall, MATIS, MAT_INITIAL_MATRIX, &lGis));
358   PetscCall(MatDestroy(&lGall));
359   PetscCall(MatISGetLocalMat(lGis, &lG));
360   if (matis->allow_repeated) { /* multi-element support */
361     Mat                   *lGn, B;
362     IS                    *is_rows, *tcols, tmap, nmap;
363     PetscInt               subnv;
364     const PetscInt        *subvidxs;
365     ISLocalToGlobalMapping mapn;
366 
367     PetscCall(PetscCalloc1(pcbddc->n_local_subs * pcbddc->n_local_subs, &lGn));
368     PetscCall(PetscMalloc1(pcbddc->n_local_subs, &is_rows));
369     PetscCall(PetscMalloc1(pcbddc->n_local_subs, &tcols));
370     for (PetscInt i = 0; i < pcbddc->n_local_subs; i++) {
371       if (fl2g) {
372         PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_MASK, pcbddc->local_subs[i], &is_rows[i]));
373       } else {
374         PetscCall(PetscObjectReference((PetscObject)pcbddc->local_subs[i]));
375         is_rows[i] = pcbddc->local_subs[i];
376       }
377       PetscCall(MatCreateSubMatrix(lG, is_rows[i], NULL, MAT_INITIAL_MATRIX, &lGn[i * (1 + pcbddc->n_local_subs)]));
378       PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(lGn[i * (1 + pcbddc->n_local_subs)], &mapn));
379       PetscCall(ISLocalToGlobalMappingGetSize(mapn, &subnv));
380       PetscCall(ISLocalToGlobalMappingGetIndices(mapn, &subvidxs));
381       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, subnv, subvidxs, PETSC_COPY_VALUES, &tcols[i]));
382       PetscCall(ISLocalToGlobalMappingRestoreIndices(mapn, &subvidxs));
383       PetscCall(ISLocalToGlobalMappingDestroy(&mapn));
384     }
385 
386     /* Create new MATIS with repeated vertices */
387     PetscCall(MatCreate(comm, &B));
388     PetscCall(MatSetSizes(B, lGis->rmap->n, lGis->cmap->n, lGis->rmap->N, lGis->cmap->N));
389     PetscCall(MatSetType(B, MATIS));
390     PetscCall(MatISSetAllowRepeated(B, PETSC_TRUE));
391     PetscCall(ISConcatenate(PETSC_COMM_SELF, pcbddc->n_local_subs, tcols, &tmap));
392     PetscCall(ISLocalToGlobalMappingApplyIS(lGis->cmap->mapping, tmap, &nmap));
393     PetscCall(ISDestroy(&tmap));
394     PetscCall(ISGetLocalSize(nmap, &subnv));
395     PetscCall(ISGetIndices(nmap, &subvidxs));
396     PetscCall(ISCreateGeneral(comm, subnv, subvidxs, PETSC_USE_POINTER, &tmap));
397     PetscCall(ISRestoreIndices(nmap, &subvidxs));
398     PetscCall(ISLocalToGlobalMappingCreateIS(tmap, &mapn));
399     PetscCall(ISDestroy(&tmap));
400     PetscCall(ISDestroy(&nmap));
401     PetscCall(MatSetLocalToGlobalMapping(B, lGis->rmap->mapping, mapn));
402     PetscCall(ISLocalToGlobalMappingDestroy(&mapn));
403     PetscCall(MatCreateNest(PETSC_COMM_SELF, pcbddc->n_local_subs, is_rows, pcbddc->n_local_subs, NULL, lGn, &lG));
404     for (PetscInt i = 0; i < pcbddc->n_local_subs; i++) {
405       PetscCall(MatDestroy(&lGn[i * (1 + pcbddc->n_local_subs)]));
406       PetscCall(ISDestroy(&is_rows[i]));
407       PetscCall(ISDestroy(&tcols[i]));
408     }
409     PetscCall(MatConvert(lG, MATSEQAIJ, MAT_INPLACE_MATRIX, &lG));
410     PetscCall(PetscFree(lGn));
411     PetscCall(PetscFree(is_rows));
412     PetscCall(PetscFree(tcols));
413     PetscCall(MatISSetLocalMat(B, lG));
414     PetscCall(MatDestroy(&lG));
415 
416     PetscCall(MatDestroy(&lGis));
417     lGis = B;
418 
419     lGis->assembled = PETSC_TRUE;
420   }
421   PetscCall(MatViewFromOptions(lGis, (PetscObject)pc, "-pc_bddc_nedelec_init_G_view"));
422 
423   /* SF for nodal dofs communications */
424   PetscCall(MatGetLocalSize(G, NULL, &Lv));
425   PetscCall(MatISGetLocalToGlobalMapping(lGis, NULL, &vl2g));
426   PetscCall(PetscObjectReference((PetscObject)vl2g));
427   PetscCall(ISLocalToGlobalMappingGetSize(vl2g, &nv));
428   PetscCall(PetscSFCreate(comm, &sfv));
429   PetscCall(ISLocalToGlobalMappingGetIndices(vl2g, &idxs));
430   PetscCall(PetscSFSetGraphLayout(sfv, lGis->cmap, nv, NULL, PETSC_OWN_POINTER, idxs));
431   PetscCall(ISLocalToGlobalMappingRestoreIndices(vl2g, &idxs));
432 
433   if (elements_corners) {
434     IS      tmp;
435     Vec     global, local;
436     Mat_IS *tGis = (Mat_IS *)lGis->data;
437 
438     PetscCall(MatCreateVecs(lGis, &global, NULL));
439     PetscCall(MatCreateVecs(tGis->A, &local, NULL));
440     PetscCall(PCBDDCGlobalToLocal(tGis->cctx, global, local, elements_corners, &tmp));
441     PetscCall(VecDestroy(&global));
442     PetscCall(VecDestroy(&local));
443     elements_corners = tmp;
444   }
445 
446   /* Destroy temporary G */
447   PetscCall(MatISGetLocalMat(lGis, &lG));
448   PetscCall(PetscObjectReference((PetscObject)lG));
449   PetscCall(MatDestroy(&G));
450   PetscCall(MatDestroy(&lGis));
451 
452   if (print) {
453     PetscCall(PetscObjectSetName((PetscObject)lG, "initial_lG"));
454     PetscCall(MatView(lG, NULL));
455   }
456 
457   /* Save lG for values insertion in change of basis */
458   PetscCall(MatDuplicate(lG, MAT_COPY_VALUES, &lGinit));
459 
460   /* Analyze the edge-nodes connections (duplicate lG) */
461   PetscCall(MatDuplicate(lG, MAT_COPY_VALUES, &lGe));
462   PetscCall(MatSetOption(lGe, MAT_KEEP_NONZERO_PATTERN, PETSC_FALSE));
463   PetscCall(PetscBTCreate(nv, &btv));
464   PetscCall(PetscBTCreate(ne, &bte));
465   PetscCall(PetscBTCreate(ne, &btb));
466   PetscCall(PetscBTCreate(ne, &btbd));
467   /* need to import the boundary specification to ensure the
468      proper detection of coarse edges' endpoints */
469   if (pcbddc->DirichletBoundariesLocal) {
470     IS is;
471 
472     if (fl2g) {
473       PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_MASK, pcbddc->DirichletBoundariesLocal, &is));
474     } else {
475       is = pcbddc->DirichletBoundariesLocal;
476     }
477     PetscCall(ISGetLocalSize(is, &cum));
478     PetscCall(ISGetIndices(is, &idxs));
479     for (i = 0; i < cum; i++) {
480       if (idxs[i] >= 0 && idxs[i] < ne) {
481         PetscCall(PetscBTSet(btb, idxs[i]));
482         PetscCall(PetscBTSet(btbd, idxs[i]));
483       }
484     }
485     PetscCall(ISRestoreIndices(is, &idxs));
486     if (fl2g) PetscCall(ISDestroy(&is));
487   }
488   if (pcbddc->NeumannBoundariesLocal) {
489     IS is;
490 
491     if (fl2g) {
492       PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_MASK, pcbddc->NeumannBoundariesLocal, &is));
493     } else {
494       is = pcbddc->NeumannBoundariesLocal;
495     }
496     PetscCall(ISGetLocalSize(is, &cum));
497     PetscCall(ISGetIndices(is, &idxs));
498     for (i = 0; i < cum; i++) {
499       if (idxs[i] >= 0 && idxs[i] < ne) PetscCall(PetscBTSet(btb, idxs[i]));
500     }
501     PetscCall(ISRestoreIndices(is, &idxs));
502     if (fl2g) PetscCall(ISDestroy(&is));
503   }
504 
505   /* Count neighs per dof */
506   PetscCall(ISLocalToGlobalMappingGetNodeInfo(el2g, NULL, &ecount, NULL));
507   PetscCall(ISLocalToGlobalMappingGetNodeInfo(vl2g, NULL, &vcount, NULL));
508 
509   /* need to remove coarse faces' dofs and coarse edges' dirichlet dofs
510      for proper detection of coarse edges' endpoints */
511   PetscCall(PetscBTCreate(ne, &btee));
512   for (i = 0; i < ne; i++) {
513     if ((ecount[i] > 2 && !PetscBTLookup(btbd, i)) || (ecount[i] == 2 && PetscBTLookup(btb, i))) PetscCall(PetscBTSet(btee, i));
514   }
515   PetscCall(PetscMalloc1(ne, &marks));
516   if (!conforming) {
517     PetscCall(MatTranspose(lGe, MAT_INITIAL_MATRIX, &lGt));
518     PetscCall(MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done));
519   }
520   PetscCall(MatGetRowIJ(lGe, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
521   PetscCall(MatSeqAIJGetArray(lGe, &vals));
522   cum = 0;
523   for (i = 0; i < ne; i++) {
524     /* eliminate rows corresponding to edge dofs belonging to coarse faces */
525     if (!PetscBTLookup(btee, i)) {
526       marks[cum++] = i;
527       continue;
528     }
529     /* set badly connected edge dofs as primal */
530     if (!conforming) {
531       if (ii[i + 1] - ii[i] != order + 1) { /* every row of G on the coarse edge should list order+1 nodal dofs */
532         marks[cum++] = i;
533         PetscCall(PetscBTSet(bte, i));
534         for (j = ii[i]; j < ii[i + 1]; j++) PetscCall(PetscBTSet(btv, jj[j]));
535       } else {
536         /* every edge dofs should be connected through a certain number of nodal dofs
537            to other edge dofs belonging to coarse edges
538            - at most 2 endpoints
539            - order-1 interior nodal dofs
540            - no undefined nodal dofs (nconn < order)
541         */
542         PetscInt ends = 0, ints = 0, undef = 0;
543         for (j = ii[i]; j < ii[i + 1]; j++) {
544           PetscInt v     = jj[j], k;
545           PetscInt nconn = iit[v + 1] - iit[v];
546           for (k = iit[v]; k < iit[v + 1]; k++)
547             if (!PetscBTLookup(btee, jjt[k])) nconn--;
548           if (nconn > order) ends++;
549           else if (nconn == order) ints++;
550           else undef++;
551         }
552         if (undef || ends > 2 || ints != order - 1) {
553           marks[cum++] = i;
554           PetscCall(PetscBTSet(bte, i));
555           for (j = ii[i]; j < ii[i + 1]; j++) PetscCall(PetscBTSet(btv, jj[j]));
556         }
557       }
558     }
559     /* We assume the order on the element edge is ii[i+1]-ii[i]-1 */
560     if (!order && ii[i + 1] != ii[i]) {
561       PetscScalar val = 1. / (ii[i + 1] - ii[i] - 1);
562       for (j = ii[i]; j < ii[i + 1]; j++) vals[j] = val;
563     }
564   }
565   PetscCall(PetscBTDestroy(&btee));
566   PetscCall(MatSeqAIJRestoreArray(lGe, &vals));
567   PetscCall(MatRestoreRowIJ(lGe, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
568   if (!conforming) {
569     PetscCall(MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done));
570     PetscCall(MatDestroy(&lGt));
571   }
572   PetscCall(MatZeroRows(lGe, cum, marks, 0., NULL, NULL));
573 
574   /* identify splitpoints and corner candidates */
575   PetscCall(PetscMalloc2(nv, &sfvleaves, Lv, &sfvroots));
576   PetscCall(PetscBTCreate(nv, &btvcand));
577   if (elements_corners) {
578     PetscCall(ISGetLocalSize(elements_corners, &cum));
579     PetscCall(ISGetIndices(elements_corners, &idxs));
580     for (i = 0; i < cum; i++) PetscCall(PetscBTSet(btvcand, idxs[i]));
581     PetscCall(ISRestoreIndices(elements_corners, &idxs));
582   }
583 
584   if (matis->allow_repeated) { /* assign a uniq global id to edge local subsets and communicate it with nodal space */
585     PetscSF   emlsf, vmlsf;
586     PetscInt *eleaves, *vleaves, *meleaves, *mvleaves;
587     PetscInt  cum_subs = 0, n_subs = pcbddc->n_local_subs, bs, emnr, emnl, vmnr, vmnl;
588 
589     PetscCall(ISLocalToGlobalMappingGetBlockSize(el2g, &bs));
590     PetscCheck(bs == 1, comm, PETSC_ERR_SUP, "Not coded");
591     PetscCall(ISLocalToGlobalMappingGetBlockSize(vl2g, &bs));
592     PetscCheck(bs == 1, comm, PETSC_ERR_SUP, "Not coded");
593 
594     PetscCall(ISLocalToGlobalMappingGetBlockMultiLeavesSF(el2g, &emlsf));
595     PetscCall(ISLocalToGlobalMappingGetBlockMultiLeavesSF(vl2g, &vmlsf));
596 
597     PetscCall(PetscSFGetGraph(emlsf, &emnr, &emnl, NULL, NULL));
598     for (i = 0, j = 0; i < ne; i++) j += ecount[i];
599     PetscCheck(emnr == ne, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Invalid number of roots in edge multi-leaves SF %" PetscInt_FMT " != %" PetscInt_FMT, emnr, ne);
600     PetscCheck(emnl == j, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Invalid number of leaves in edge multi-leaves SF %" PetscInt_FMT " != %" PetscInt_FMT, emnl, j);
601 
602     PetscCall(PetscSFGetGraph(vmlsf, &vmnr, &vmnl, NULL, NULL));
603     for (i = 0, j = 0; i < nv; i++) j += vcount[i];
604     PetscCheck(vmnr == nv, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Invalid number of roots in nodal multi-leaves SF %" PetscInt_FMT " != %" PetscInt_FMT, vmnr, nv);
605     PetscCheck(vmnl == j, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Invalid number of leaves in nodal multi-leaves SF %" PetscInt_FMT " != %" PetscInt_FMT, vmnl, j);
606 
607     PetscCall(PetscMalloc1(ne, &eleaves));
608     PetscCall(PetscMalloc1(nv, &vleaves));
609     for (i = 0; i < ne; i++) eleaves[i] = PETSC_INT_MAX;
610     for (i = 0; i < nv; i++) vleaves[i] = PETSC_INT_MAX;
611     PetscCall(PetscMalloc1(emnl, &meleaves));
612     PetscCall(PetscMalloc1(vmnl, &mvleaves));
613 
614     PetscCallMPI(MPI_Exscan(&n_subs, &cum_subs, 1, MPIU_INT, MPI_SUM, comm));
615     PetscCall(MatGetRowIJ(lGinit, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
616     for (i = 0; i < n_subs; i++) {
617       const PetscInt *idxs;
618       const PetscInt  subid = cum_subs + i;
619       PetscInt        ns;
620 
621       PetscCall(ISGetLocalSize(pcbddc->local_subs[i], &ns));
622       PetscCall(ISGetIndices(pcbddc->local_subs[i], &idxs));
623       for (j = 0; j < ns; j++) {
624         const PetscInt e = idxs[j];
625 
626         eleaves[e] = subid;
627         for (PetscInt k = ii[e]; k < ii[e + 1]; k++) vleaves[jj[k]] = subid;
628       }
629       PetscCall(ISRestoreIndices(pcbddc->local_subs[i], &idxs));
630     }
631     PetscCall(MatRestoreRowIJ(lGinit, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
632     PetscCall(PetscSFBcastBegin(emlsf, MPIU_INT, eleaves, meleaves, MPI_REPLACE));
633     PetscCall(PetscSFBcastEnd(emlsf, MPIU_INT, eleaves, meleaves, MPI_REPLACE));
634     PetscCall(PetscSFBcastBegin(vmlsf, MPIU_INT, vleaves, mvleaves, MPI_REPLACE));
635     PetscCall(PetscSFBcastEnd(vmlsf, MPIU_INT, vleaves, mvleaves, MPI_REPLACE));
636     PetscCall(PetscFree(eleaves));
637     PetscCall(PetscFree(vleaves));
638 
639     PetscCall(PetscMalloc1(ne + 1, &eneighs));
640     eneighs[0] = meleaves;
641     for (i = 0; i < ne; i++) {
642       PetscCall(PetscSortInt(ecount[i], eneighs[i]));
643       eneighs[i + 1] = eneighs[i] + ecount[i];
644     }
645     PetscCall(PetscMalloc1(nv + 1, &vneighs));
646     vneighs[0] = mvleaves;
647     for (i = 0; i < nv; i++) {
648       PetscCall(PetscSortInt(vcount[i], vneighs[i]));
649       vneighs[i + 1] = vneighs[i] + vcount[i];
650     }
651   } else {
652     PetscCall(ISLocalToGlobalMappingGetNodeInfo(el2g, NULL, NULL, &eneighs));
653     PetscCall(ISLocalToGlobalMappingGetNodeInfo(vl2g, NULL, NULL, &vneighs));
654   }
655 
656   PetscCall(MatTranspose(lGe, MAT_INITIAL_MATRIX, &lGt));
657   if (print) {
658     PetscCall(PetscObjectSetName((PetscObject)lGe, "edgerestr_lG"));
659     PetscCall(MatView(lGe, NULL));
660     PetscCall(PetscObjectSetName((PetscObject)lGt, "edgerestr_lGt"));
661     PetscCall(MatView(lGt, NULL));
662   }
663   PetscCall(MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
664   PetscCall(MatSeqAIJGetArray(lGt, &vals));
665   for (i = 0; i < nv; i++) {
666     PetscInt  ord = order, test = ii[i + 1] - ii[i], vc = vcount[i];
667     PetscBool sneighs = PETSC_TRUE, bdir = PETSC_FALSE;
668     if (!order) { /* variable order */
669       PetscReal vorder = 0.;
670 
671       for (j = ii[i]; j < ii[i + 1]; j++) vorder += PetscRealPart(vals[j]);
672       test = PetscFloorReal(vorder + 10. * PETSC_SQRT_MACHINE_EPSILON);
673       PetscCheck(vorder - test <= PETSC_SQRT_MACHINE_EPSILON, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected value for vorder: %g (%" PetscInt_FMT ")", (double)vorder, test);
674       ord = 1;
675     }
676     for (j = ii[i]; j < ii[i + 1] && sneighs; j++) {
677       const PetscInt e = jj[j];
678 
679       if (PetscBTLookup(btbd, e)) {
680         bdir = PETSC_TRUE;
681         break;
682       }
683       if (vc != ecount[e]) {
684         sneighs = PETSC_FALSE;
685       } else {
686         const PetscInt *vn = vneighs[i], *en = eneighs[e];
687 
688         for (PetscInt k = 0; k < vc; k++) {
689           if (vn[k] != en[k]) {
690             sneighs = PETSC_FALSE;
691             break;
692           }
693         }
694       }
695     }
696     if (elements_corners) test = 0;
697     if (!sneighs || test >= 3 * ord || bdir) { /* splitpoints */
698       if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "SPLITPOINT %" PetscInt_FMT " (%s %s %s)\n", i, PetscBools[!sneighs], PetscBools[test >= 3 * ord], PetscBools[bdir]));
699       PetscCall(PetscBTSet(btv, i));
700     } else if (test == ord) {
701       if (order == 1 || (!order && ii[i + 1] - ii[i] == 1)) {
702         if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "ENDPOINT %" PetscInt_FMT "\n", i));
703         PetscCall(PetscBTSet(btv, i));
704       } else if (!elements_corners) {
705         if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "CORNER CANDIDATE %" PetscInt_FMT "\n", i));
706         PetscCall(PetscBTSet(btvcand, i));
707       }
708     }
709   }
710   PetscCall(PetscBTDestroy(&btbd));
711 
712   /* a candidate is valid if it is connected to another candidate via a non-primal edge dof */
713   if (order != 1) {
714     if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "INSPECTING CANDIDATES\n"));
715     PetscCall(MatGetRowIJ(lGe, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done));
716     for (i = 0; i < nv; i++) {
717       if (PetscBTLookup(btvcand, i)) {
718         PetscBool found = PETSC_FALSE;
719         for (j = ii[i]; j < ii[i + 1] && !found; j++) {
720           PetscInt k, e = jj[j];
721           if (PetscBTLookup(bte, e)) continue;
722           for (k = iit[e]; k < iit[e + 1]; k++) {
723             PetscInt v = jjt[k];
724             if (v != i && PetscBTLookup(btvcand, v)) {
725               found = PETSC_TRUE;
726               break;
727             }
728           }
729         }
730         if (!found) {
731           if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "  CANDIDATE %" PetscInt_FMT " CLEARED\n", i));
732           PetscCall(PetscBTClear(btvcand, i));
733         } else {
734           if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "  CANDIDATE %" PetscInt_FMT " ACCEPTED\n", i));
735         }
736       }
737     }
738     PetscCall(MatRestoreRowIJ(lGe, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done));
739   }
740   PetscCall(MatSeqAIJRestoreArray(lGt, &vals));
741   PetscCall(MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
742   PetscCall(MatDestroy(&lGe));
743 
744   /* Get the local G^T explicitly */
745   PetscCall(MatDestroy(&lGt));
746   PetscCall(MatTranspose(lG, MAT_INITIAL_MATRIX, &lGt));
747   PetscCall(MatSetOption(lGt, MAT_KEEP_NONZERO_PATTERN, PETSC_FALSE));
748 
749   /* Mark shared nodal dofs */
750   PetscCall(PetscBTCreate(nv, &btvi));
751   for (i = 0; i < nv; i++) {
752     if (vcount[i] > 1) PetscCall(PetscBTSet(btvi, i));
753   }
754 
755   if (matis->allow_repeated) {
756     PetscCall(PetscFree(eneighs[0]));
757     PetscCall(PetscFree(vneighs[0]));
758     PetscCall(PetscFree(eneighs));
759     PetscCall(PetscFree(vneighs));
760   }
761   PetscCall(ISLocalToGlobalMappingRestoreNodeInfo(el2g, NULL, &ecount, &eneighs));
762   PetscCall(ISLocalToGlobalMappingRestoreNodeInfo(vl2g, NULL, &vcount, &vneighs));
763 
764   /* communicate corners and splitpoints */
765   PetscCall(PetscMalloc1(nv, &vmarks));
766   PetscCall(PetscArrayzero(sfvleaves, nv));
767   PetscCall(PetscArrayzero(sfvroots, Lv));
768   for (i = 0; i < nv; i++)
769     if (PetscUnlikely(PetscBTLookup(btv, i))) sfvleaves[i] = 1;
770 
771   if (print) {
772     IS tbz;
773 
774     cum = 0;
775     for (i = 0; i < nv; i++)
776       if (sfvleaves[i]) vmarks[cum++] = i;
777 
778     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, cum, vmarks, PETSC_COPY_VALUES, &tbz));
779     PetscCall(PetscObjectSetName((PetscObject)tbz, "corners_to_be_zeroed_local"));
780     PetscCall(ISView(tbz, NULL));
781     PetscCall(ISDestroy(&tbz));
782   }
783 
784   PetscCall(PetscSFReduceBegin(sfv, MPIU_INT, sfvleaves, sfvroots, MPI_SUM));
785   PetscCall(PetscSFReduceEnd(sfv, MPIU_INT, sfvleaves, sfvroots, MPI_SUM));
786   PetscCall(PetscSFBcastBegin(sfv, MPIU_INT, sfvroots, sfvleaves, MPI_REPLACE));
787   PetscCall(PetscSFBcastEnd(sfv, MPIU_INT, sfvroots, sfvleaves, MPI_REPLACE));
788 
789   /* Zero rows of lGt corresponding to identified corners
790      and interior nodal dofs */
791   cum = 0;
792   for (i = 0; i < nv; i++) {
793     if (sfvleaves[i]) {
794       vmarks[cum++] = i;
795       PetscCall(PetscBTSet(btv, i));
796     } else if (!PetscBTLookup(btvi, i)) vmarks[cum++] = i;
797   }
798   PetscCall(PetscBTDestroy(&btvi));
799   if (print) {
800     IS tbz;
801 
802     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, cum, vmarks, PETSC_COPY_VALUES, &tbz));
803     PetscCall(PetscObjectSetName((PetscObject)tbz, "corners_to_be_zeroed_with_interior"));
804     PetscCall(ISView(tbz, NULL));
805     PetscCall(ISDestroy(&tbz));
806   }
807   PetscCall(MatZeroRows(lGt, cum, vmarks, 0., NULL, NULL));
808   PetscCall(PetscFree(vmarks));
809   PetscCall(PetscSFDestroy(&sfv));
810   PetscCall(PetscFree2(sfvleaves, sfvroots));
811 
812   /* Recompute G */
813   PetscCall(MatDestroy(&lG));
814   PetscCall(MatTranspose(lGt, MAT_INITIAL_MATRIX, &lG));
815   if (print) {
816     PetscCall(PetscObjectSetName((PetscObject)lG, "used_lG"));
817     PetscCall(MatView(lG, NULL));
818     PetscCall(PetscObjectSetName((PetscObject)lGt, "used_lGt"));
819     PetscCall(MatView(lGt, NULL));
820   }
821 
822   /* Get primal dofs (if any) */
823   cum = 0;
824   for (i = 0; i < ne; i++) {
825     if (PetscUnlikely(PetscBTLookup(bte, i))) marks[cum++] = i;
826   }
827   if (fl2g) PetscCall(ISLocalToGlobalMappingApply(fl2g, cum, marks, marks));
828   PetscCall(ISCreateGeneral(comm, cum, marks, PETSC_COPY_VALUES, &primals));
829   if (print) {
830     PetscCall(PetscObjectSetName((PetscObject)primals, "prescribed_primal_dofs"));
831     PetscCall(ISView(primals, NULL));
832   }
833   PetscCall(PetscBTDestroy(&bte));
834   /* TODO: what if the user passed in some of them ?  */
835   PetscCall(PCBDDCSetPrimalVerticesLocalIS(pc, primals));
836   PetscCall(ISDestroy(&primals));
837 
838   /* Compute edge connectivity */
839   PetscCall(PetscObjectSetOptionsPrefix((PetscObject)lG, "econn_"));
840 
841   /* Symbolic conn = lG*lGt */
842   if (!elements_corners) { /* if present, we assume we are in the element-by-element case and the CSR graph is not needed */
843     PetscCall(MatProductCreate(lG, lGt, NULL, &conn));
844     PetscCall(MatProductSetType(conn, MATPRODUCT_AB));
845     PetscCall(MatProductSetAlgorithm(conn, "default"));
846     PetscCall(MatProductSetFill(conn, PETSC_DEFAULT));
847     PetscCall(PetscObjectSetOptionsPrefix((PetscObject)conn, "econn_"));
848     PetscCall(MatProductSetFromOptions(conn));
849     PetscCall(MatProductSymbolic(conn));
850     PetscCall(MatGetRowIJ(conn, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
851     if (fl2g) {
852       PetscBT   btf;
853       PetscInt *iia, *jja, *iiu, *jju;
854       PetscBool rest = PETSC_FALSE, free = PETSC_FALSE;
855 
856       /* create CSR for all local dofs */
857       PetscCall(PetscMalloc1(n + 1, &iia));
858       if (pcbddc->mat_graph->nvtxs_csr) { /* the user has passed in a CSR graph */
859         PetscCheck(pcbddc->mat_graph->nvtxs_csr == n, PETSC_COMM_SELF, PETSC_ERR_USER, "Invalid size of CSR graph %" PetscInt_FMT ". Should be %" PetscInt_FMT, pcbddc->mat_graph->nvtxs_csr, n);
860         iiu = pcbddc->mat_graph->xadj;
861         jju = pcbddc->mat_graph->adjncy;
862       } else if (pcbddc->use_local_adj) {
863         rest = PETSC_TRUE;
864         PetscCall(MatGetRowIJ(matis->A, 0, PETSC_TRUE, PETSC_FALSE, &i, (const PetscInt **)&iiu, (const PetscInt **)&jju, &done));
865       } else {
866         free = PETSC_TRUE;
867         PetscCall(PetscMalloc2(n + 1, &iiu, n, &jju));
868         iiu[0] = 0;
869         for (i = 0; i < n; i++) {
870           iiu[i + 1] = i + 1;
871           jju[i]     = -1;
872         }
873       }
874 
875       /* import sizes of CSR */
876       iia[0] = 0;
877       for (i = 0; i < n; i++) iia[i + 1] = iiu[i + 1] - iiu[i];
878 
879       /* overwrite entries corresponding to the Nedelec field */
880       PetscCall(PetscBTCreate(n, &btf));
881       PetscCall(ISGetIndices(nedfieldlocal, &idxs));
882       for (i = 0; i < ne; i++) {
883         PetscCall(PetscBTSet(btf, idxs[i]));
884         iia[idxs[i] + 1] = ii[i + 1] - ii[i];
885       }
886 
887       /* iia in CSR */
888       for (i = 0; i < n; i++) iia[i + 1] += iia[i];
889 
890       /* jja in CSR */
891       PetscCall(PetscMalloc1(iia[n], &jja));
892       for (i = 0; i < n; i++)
893         if (!PetscBTLookup(btf, i))
894           for (j = 0; j < iiu[i + 1] - iiu[i]; j++) jja[iia[i] + j] = jju[iiu[i] + j];
895 
896       /* map edge dofs connectivity */
897       if (jj) {
898         PetscCall(ISLocalToGlobalMappingApply(fl2g, ii[ne], jj, (PetscInt *)jj));
899         for (i = 0; i < ne; i++) {
900           PetscInt e = idxs[i];
901           for (j = 0; j < ii[i + 1] - ii[i]; j++) jja[iia[e] + j] = jj[ii[i] + j];
902         }
903       }
904       PetscCall(ISRestoreIndices(nedfieldlocal, &idxs));
905       PetscCall(PCBDDCSetLocalAdjacencyGraph(pc, n, iia, jja, PETSC_COPY_VALUES));
906       if (rest) PetscCall(MatRestoreRowIJ(matis->A, 0, PETSC_TRUE, PETSC_FALSE, &i, (const PetscInt **)&iiu, (const PetscInt **)&jju, &done));
907       if (free) PetscCall(PetscFree2(iiu, jju));
908       PetscCall(PetscBTDestroy(&btf));
909     } else {
910       PetscCall(PCBDDCSetLocalAdjacencyGraph(pc, n, ii, jj, PETSC_COPY_VALUES));
911     }
912     PetscCall(MatRestoreRowIJ(conn, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
913     PetscCall(MatDestroy(&conn));
914   }
915 
916   /* Analyze interface for edge dofs */
917   PetscCall(PCBDDCAnalyzeInterface(pc));
918   pcbddc->mat_graph->twodim = PETSC_FALSE;
919 
920   /* Get coarse edges in the edge space */
921   PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, NULL, NULL, &nee, &alleedges, &allprimals));
922 
923   if (fl2g) {
924     PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_DROP, allprimals, &primals));
925     PetscCall(PetscMalloc1(nee, &eedges));
926     for (i = 0; i < nee; i++) PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_DROP, alleedges[i], &eedges[i]));
927   } else {
928     eedges  = alleedges;
929     primals = allprimals;
930   }
931 
932   /* Mark fine edge dofs with their coarse edge id */
933   PetscCall(PetscArrayzero(marks, ne));
934   PetscCall(ISGetLocalSize(primals, &cum));
935   PetscCall(ISGetIndices(primals, &idxs));
936   for (i = 0; i < cum; i++) marks[idxs[i]] = nee + 1;
937   PetscCall(ISRestoreIndices(primals, &idxs));
938   if (print) {
939     PetscCall(PetscObjectSetName((PetscObject)primals, "obtained_primal_dofs"));
940     PetscCall(ISView(primals, NULL));
941   }
942 
943   maxsize = 0;
944   for (i = 0; i < nee; i++) {
945     PetscInt size, mark = i + 1;
946 
947     PetscCall(ISGetLocalSize(eedges[i], &size));
948     PetscCall(ISGetIndices(eedges[i], &idxs));
949     for (j = 0; j < size; j++) marks[idxs[j]] = mark;
950     PetscCall(ISRestoreIndices(eedges[i], &idxs));
951     maxsize = PetscMax(maxsize, size);
952   }
953 
954   /* Find coarse edge endpoints */
955   PetscCall(MatGetRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
956   PetscCall(MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done));
957   for (i = 0; i < nee; i++) {
958     PetscInt mark = i + 1, size;
959 
960     PetscCall(ISGetLocalSize(eedges[i], &size));
961     if (!size && nedfieldlocal) continue;
962     PetscCheck(size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected zero sized edge %" PetscInt_FMT, i);
963     PetscCall(ISGetIndices(eedges[i], &idxs));
964     if (print) {
965       PetscCall(PetscPrintf(PETSC_COMM_SELF, "ENDPOINTS ANALYSIS EDGE %" PetscInt_FMT "\n", i));
966       PetscCall(ISView(eedges[i], NULL));
967     }
968     for (j = 0; j < size; j++) {
969       PetscInt k, ee = idxs[j];
970       if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "  idx %" PetscInt_FMT "\n", ee));
971       for (k = ii[ee]; k < ii[ee + 1]; k++) {
972         if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "    inspect %" PetscInt_FMT "\n", jj[k]));
973         if (PetscBTLookup(btv, jj[k])) {
974           if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "      corner found (already set) %" PetscInt_FMT "\n", jj[k]));
975         } else if (PetscBTLookup(btvcand, jj[k])) { /* is it ok? */
976           PetscInt  k2;
977           PetscBool corner = PETSC_FALSE;
978           for (k2 = iit[jj[k]]; k2 < iit[jj[k] + 1]; k2++) {
979             if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "        INSPECTING %" PetscInt_FMT ": mark %" PetscInt_FMT " (ref mark %" PetscInt_FMT "), boundary %d\n", jjt[k2], marks[jjt[k2]], mark, (int)!!PetscBTLookup(btb, jjt[k2])));
980             /* it's a corner if either is connected with an edge dof belonging to a different cc or
981                if the edge dof lie on the natural part of the boundary */
982             if ((marks[jjt[k2]] && marks[jjt[k2]] != mark) || (!marks[jjt[k2]] && PetscBTLookup(btb, jjt[k2]))) {
983               corner = PETSC_TRUE;
984               break;
985             }
986           }
987           if (corner) { /* found the nodal dof corresponding to the endpoint of the edge */
988             if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "        corner found %" PetscInt_FMT "\n", jj[k]));
989             PetscCall(PetscBTSet(btv, jj[k]));
990           } else {
991             if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "        no corners found\n"));
992           }
993         }
994       }
995     }
996     PetscCall(ISRestoreIndices(eedges[i], &idxs));
997   }
998   PetscCall(MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done));
999   PetscCall(MatRestoreRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
1000   PetscCall(PetscBTDestroy(&btb));
1001 
1002   /* Reset marked primal dofs */
1003   PetscCall(ISGetLocalSize(primals, &cum));
1004   PetscCall(ISGetIndices(primals, &idxs));
1005   for (i = 0; i < cum; i++) marks[idxs[i]] = 0;
1006   PetscCall(ISRestoreIndices(primals, &idxs));
1007 
1008   /* Now use the initial lG */
1009   PetscCall(MatDestroy(&lG));
1010   PetscCall(MatDestroy(&lGt));
1011   lG = lGinit;
1012   PetscCall(MatTranspose(lG, MAT_INITIAL_MATRIX, &lGt));
1013 
1014   /* Compute extended cols indices */
1015   PetscCall(PetscBTCreate(nv, &btvc));
1016   PetscCall(PetscBTCreate(nee, &bter));
1017   PetscCall(MatGetRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
1018   PetscCall(MatSeqAIJGetMaxRowNonzeros(lG, &i));
1019   i *= maxsize;
1020   PetscCall(PetscCalloc1(nee, &extcols));
1021   PetscCall(PetscMalloc2(i, &extrow, i, &gidxs));
1022   eerr = PETSC_FALSE;
1023   for (i = 0; i < nee; i++) {
1024     PetscInt size, found = 0;
1025 
1026     cum = 0;
1027     PetscCall(ISGetLocalSize(eedges[i], &size));
1028     if (!size && nedfieldlocal) continue;
1029     PetscCheck(size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected zero sized edge %" PetscInt_FMT, i);
1030     PetscCall(ISGetIndices(eedges[i], &idxs));
1031     PetscCall(PetscBTMemzero(nv, btvc));
1032     for (j = 0; j < size; j++) {
1033       PetscInt k, ee = idxs[j];
1034       for (k = ii[ee]; k < ii[ee + 1]; k++) {
1035         PetscInt vv = jj[k];
1036         if (!PetscBTLookup(btv, vv)) extrow[cum++] = vv;
1037         else if (!PetscBTLookupSet(btvc, vv)) found++;
1038       }
1039     }
1040     PetscCall(ISRestoreIndices(eedges[i], &idxs));
1041     PetscCall(PetscSortRemoveDupsInt(&cum, extrow));
1042     PetscCall(ISLocalToGlobalMappingApply(vl2g, cum, extrow, gidxs));
1043     PetscCall(PetscSortIntWithArray(cum, gidxs, extrow));
1044     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, cum, extrow, PETSC_COPY_VALUES, &extcols[i]));
1045     /* it may happen that endpoints are not defined at this point
1046        if it is the case, mark this edge for a second pass */
1047     if (cum != size - 1 || found != 2) {
1048       PetscCall(PetscBTSet(bter, i));
1049       if (print) {
1050         PetscCall(PetscObjectSetName((PetscObject)eedges[i], "error_edge"));
1051         PetscCall(ISView(eedges[i], NULL));
1052         PetscCall(PetscObjectSetName((PetscObject)extcols[i], "error_extcol"));
1053         PetscCall(ISView(extcols[i], NULL));
1054       }
1055       eerr = PETSC_TRUE;
1056     }
1057   }
1058   /* PetscCheck(!eerr,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL FIRST PASS"); */
1059   PetscCallMPI(MPIU_Allreduce(&eerr, &done, 1, MPI_C_BOOL, MPI_LOR, comm));
1060   if (done) {
1061     PetscInt *newprimals;
1062 
1063     PetscCall(PetscMalloc1(ne, &newprimals));
1064     PetscCall(ISGetLocalSize(primals, &cum));
1065     PetscCall(ISGetIndices(primals, &idxs));
1066     PetscCall(PetscArraycpy(newprimals, idxs, cum));
1067     PetscCall(ISRestoreIndices(primals, &idxs));
1068     PetscCall(MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done));
1069     if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "DOING SECOND PASS (eerr %s)\n", PetscBools[eerr]));
1070     for (i = 0; i < nee; i++) {
1071       PetscBool has_candidates = PETSC_FALSE;
1072       if (PetscBTLookup(bter, i)) {
1073         PetscInt size, mark = i + 1;
1074 
1075         PetscCall(ISGetLocalSize(eedges[i], &size));
1076         PetscCall(ISGetIndices(eedges[i], &idxs));
1077         /* for (j=0;j<size;j++) newprimals[cum++] = idxs[j]; */
1078         for (j = 0; j < size; j++) {
1079           PetscInt k, ee = idxs[j];
1080           if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "Inspecting edge dof %" PetscInt_FMT " [%" PetscInt_FMT " %" PetscInt_FMT ")\n", ee, ii[ee], ii[ee + 1]));
1081           for (k = ii[ee]; k < ii[ee + 1]; k++) {
1082             /* set all candidates located on the edge as corners */
1083             if (PetscBTLookup(btvcand, jj[k])) {
1084               PetscInt k2, vv = jj[k];
1085               has_candidates = PETSC_TRUE;
1086               if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "  Candidate set to vertex %" PetscInt_FMT "\n", vv));
1087               PetscCall(PetscBTSet(btv, vv));
1088               /* set all edge dofs connected to candidate as primals */
1089               for (k2 = iit[vv]; k2 < iit[vv + 1]; k2++) {
1090                 if (marks[jjt[k2]] == mark) {
1091                   PetscInt k3, ee2 = jjt[k2];
1092                   if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "    Connected edge dof set to primal %" PetscInt_FMT "\n", ee2));
1093                   newprimals[cum++] = ee2;
1094                   /* finally set the new corners */
1095                   for (k3 = ii[ee2]; k3 < ii[ee2 + 1]; k3++) {
1096                     if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "      Connected nodal dof set to vertex %" PetscInt_FMT "\n", jj[k3]));
1097                     PetscCall(PetscBTSet(btv, jj[k3]));
1098                   }
1099                 }
1100               }
1101             } else {
1102               if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "  Not a candidate vertex %" PetscInt_FMT "\n", jj[k]));
1103             }
1104           }
1105         }
1106         if (!has_candidates) { /* circular edge */
1107           PetscInt k, ee = idxs[0], *tmarks;
1108 
1109           PetscCall(PetscCalloc1(ne, &tmarks));
1110           if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "  Circular edge %" PetscInt_FMT "\n", i));
1111           for (k = ii[ee]; k < ii[ee + 1]; k++) {
1112             PetscInt k2;
1113             if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "    Set to corner %" PetscInt_FMT "\n", jj[k]));
1114             PetscCall(PetscBTSet(btv, jj[k]));
1115             for (k2 = iit[jj[k]]; k2 < iit[jj[k] + 1]; k2++) tmarks[jjt[k2]]++;
1116           }
1117           for (j = 0; j < size; j++) {
1118             if (tmarks[idxs[j]] > 1) {
1119               if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "  Edge dof set to primal %" PetscInt_FMT "\n", idxs[j]));
1120               newprimals[cum++] = idxs[j];
1121             }
1122           }
1123           PetscCall(PetscFree(tmarks));
1124         }
1125         PetscCall(ISRestoreIndices(eedges[i], &idxs));
1126       }
1127       PetscCall(ISDestroy(&extcols[i]));
1128     }
1129     PetscCall(PetscFree(extcols));
1130     PetscCall(MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done));
1131     PetscCall(PetscSortRemoveDupsInt(&cum, newprimals));
1132     if (fl2g) {
1133       PetscCall(ISLocalToGlobalMappingApply(fl2g, cum, newprimals, newprimals));
1134       PetscCall(ISDestroy(&primals));
1135       for (i = 0; i < nee; i++) PetscCall(ISDestroy(&eedges[i]));
1136       PetscCall(PetscFree(eedges));
1137     }
1138     PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, NULL, NULL, &nee, &alleedges, &allprimals));
1139     PetscCall(ISCreateGeneral(comm, cum, newprimals, PETSC_COPY_VALUES, &primals));
1140     PetscCall(PetscFree(newprimals));
1141     PetscCall(PCBDDCSetPrimalVerticesLocalIS(pc, primals));
1142     PetscCall(ISDestroy(&primals));
1143     PetscCall(PCBDDCAnalyzeInterface(pc));
1144     pcbddc->mat_graph->twodim = PETSC_FALSE;
1145     PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, NULL, NULL, &nee, &alleedges, &allprimals));
1146     if (fl2g) {
1147       PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_DROP, allprimals, &primals));
1148       PetscCall(PetscMalloc1(nee, &eedges));
1149       for (i = 0; i < nee; i++) PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_DROP, alleedges[i], &eedges[i]));
1150     } else {
1151       eedges  = alleedges;
1152       primals = allprimals;
1153     }
1154     PetscCall(PetscCalloc1(nee, &extcols));
1155 
1156     /* Mark again */
1157     PetscCall(PetscArrayzero(marks, ne));
1158     for (i = 0; i < nee; i++) {
1159       PetscInt size, mark = i + 1;
1160 
1161       PetscCall(ISGetLocalSize(eedges[i], &size));
1162       PetscCall(ISGetIndices(eedges[i], &idxs));
1163       for (j = 0; j < size; j++) marks[idxs[j]] = mark;
1164       PetscCall(ISRestoreIndices(eedges[i], &idxs));
1165     }
1166     if (print) {
1167       PetscCall(PetscObjectSetName((PetscObject)primals, "obtained_primal_dofs_secondpass"));
1168       PetscCall(ISView(primals, NULL));
1169     }
1170 
1171     /* Recompute extended cols */
1172     eerr = PETSC_FALSE;
1173     for (i = 0; i < nee; i++) {
1174       PetscInt size;
1175 
1176       cum = 0;
1177       PetscCall(ISGetLocalSize(eedges[i], &size));
1178       if (!size && nedfieldlocal) continue;
1179       PetscCheck(size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected zero sized edge %" PetscInt_FMT, i);
1180       PetscCall(ISGetIndices(eedges[i], &idxs));
1181       for (j = 0; j < size; j++) {
1182         PetscInt k, ee = idxs[j];
1183         for (k = ii[ee]; k < ii[ee + 1]; k++)
1184           if (!PetscBTLookup(btv, jj[k])) extrow[cum++] = jj[k];
1185       }
1186       PetscCall(ISRestoreIndices(eedges[i], &idxs));
1187       PetscCall(PetscSortRemoveDupsInt(&cum, extrow));
1188       PetscCall(ISLocalToGlobalMappingApply(vl2g, cum, extrow, gidxs));
1189       PetscCall(PetscSortIntWithArray(cum, gidxs, extrow));
1190       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, cum, extrow, PETSC_COPY_VALUES, &extcols[i]));
1191       if (cum != size - 1) {
1192         if (print) {
1193           PetscCall(PetscObjectSetName((PetscObject)eedges[i], "error_edge_secondpass"));
1194           PetscCall(ISView(eedges[i], NULL));
1195           PetscCall(PetscObjectSetName((PetscObject)extcols[i], "error_extcol_secondpass"));
1196           PetscCall(ISView(extcols[i], NULL));
1197         }
1198         eerr = PETSC_TRUE;
1199       }
1200     }
1201   }
1202   PetscCall(MatRestoreRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
1203   PetscCall(PetscFree2(extrow, gidxs));
1204   PetscCall(PetscBTDestroy(&bter));
1205   if (print) PetscCall(PCBDDCGraphASCIIView(pcbddc->mat_graph, 5, PETSC_VIEWER_STDOUT_SELF));
1206   /* an error should not occur at this point */
1207   PetscCheck(!eerr, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected SIZE OF EDGE > EXTCOL SECOND PASS");
1208 
1209   /* Check the number of endpoints */
1210   PetscCall(MatGetRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
1211   PetscCall(PetscMalloc1(2 * nee, &corners));
1212   PetscCall(PetscMalloc1(nee, &cedges));
1213   for (i = 0; i < nee; i++) {
1214     PetscInt size, found = 0, gc[2];
1215 
1216     /* init with defaults */
1217     cedges[i] = corners[i * 2] = corners[i * 2 + 1] = -1;
1218     PetscCall(ISGetLocalSize(eedges[i], &size));
1219     if (!size && nedfieldlocal) continue;
1220     PetscCheck(size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected zero sized edge %" PetscInt_FMT, i);
1221     PetscCall(ISGetIndices(eedges[i], &idxs));
1222     PetscCall(PetscBTMemzero(nv, btvc));
1223     for (j = 0; j < size; j++) {
1224       PetscInt k, ee = idxs[j];
1225       for (k = ii[ee]; k < ii[ee + 1]; k++) {
1226         PetscInt vv = jj[k];
1227         if (PetscBTLookup(btv, vv) && !PetscBTLookupSet(btvc, vv)) {
1228           PetscCheck(found != 2, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Found more than two corners for edge %" PetscInt_FMT, i);
1229           corners[i * 2 + found++] = vv;
1230         }
1231       }
1232     }
1233     if (found != 2) {
1234       PetscInt e;
1235       if (fl2g) {
1236         PetscCall(ISLocalToGlobalMappingApply(fl2g, 1, idxs, &e));
1237       } else {
1238         e = idxs[0];
1239       }
1240       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_PLIB, "Found %" PetscInt_FMT " corners for edge %" PetscInt_FMT " (astart %" PetscInt_FMT ", estart %" PetscInt_FMT ")", found, i, e, idxs[0]);
1241     }
1242 
1243     /* get primal dof index on this coarse edge */
1244     PetscCall(ISLocalToGlobalMappingApply(vl2g, 2, corners + 2 * i, gc));
1245     if (gc[0] > gc[1]) {
1246       PetscInt swap      = corners[2 * i];
1247       corners[2 * i]     = corners[2 * i + 1];
1248       corners[2 * i + 1] = swap;
1249     }
1250     cedges[i] = idxs[size - 1];
1251     PetscCall(ISRestoreIndices(eedges[i], &idxs));
1252     if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "EDGE %" PetscInt_FMT ": ce %" PetscInt_FMT ", corners (%" PetscInt_FMT ",%" PetscInt_FMT ")\n", i, cedges[i], corners[2 * i], corners[2 * i + 1]));
1253   }
1254   PetscCall(MatRestoreRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
1255   PetscCall(PetscBTDestroy(&btvc));
1256 
1257   if (PetscDefined(USE_DEBUG)) {
1258     /* Inspects columns of lG (rows of lGt) and make sure the change of basis will
1259      not interfere with neighbouring coarse edges */
1260     PetscCall(PetscMalloc1(nee + 1, &emarks));
1261     PetscCall(MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
1262     for (i = 0; i < nv; i++) {
1263       PetscInt emax = 0, eemax = 0;
1264 
1265       if (ii[i + 1] == ii[i] || PetscBTLookup(btv, i)) continue;
1266       PetscCall(PetscArrayzero(emarks, nee + 1));
1267       for (j = ii[i]; j < ii[i + 1]; j++) emarks[marks[jj[j]]]++;
1268       for (j = 1; j < nee + 1; j++) {
1269         if (emax < emarks[j]) {
1270           emax  = emarks[j];
1271           eemax = j;
1272         }
1273       }
1274       /* not relevant for edges */
1275       if (!eemax) continue;
1276 
1277       for (j = ii[i]; j < ii[i + 1]; j++) {
1278         PetscCheck(!marks[jj[j]] || marks[jj[j]] == eemax, PETSC_COMM_SELF, PETSC_ERR_SUP, "Found 2 coarse edges (id %" PetscInt_FMT " and %" PetscInt_FMT ") connected through the %" PetscInt_FMT " nodal dof at edge dof %" PetscInt_FMT, marks[jj[j]] - 1, eemax, i, jj[j]);
1279       }
1280     }
1281     PetscCall(PetscFree(emarks));
1282     PetscCall(MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
1283   }
1284 
1285   /* Compute extended rows indices for edge blocks of the change of basis */
1286   PetscCall(MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
1287   PetscCall(MatSeqAIJGetMaxRowNonzeros(lGt, &extmem));
1288   extmem *= maxsize;
1289   PetscCall(PetscMalloc1(extmem * nee, &extrow));
1290   PetscCall(PetscMalloc1(nee, &extrows));
1291   PetscCall(PetscCalloc1(nee, &extrowcum));
1292   for (i = 0; i < nv; i++) {
1293     PetscInt mark = 0, size, start;
1294 
1295     if (ii[i + 1] == ii[i] || PetscBTLookup(btv, i)) continue;
1296     for (j = ii[i]; j < ii[i + 1]; j++)
1297       if (marks[jj[j]] && !mark) mark = marks[jj[j]];
1298 
1299     /* not relevant */
1300     if (!mark) continue;
1301 
1302     /* import extended row */
1303     mark--;
1304     start = mark * extmem + extrowcum[mark];
1305     size  = ii[i + 1] - ii[i];
1306     PetscCheck(extrowcum[mark] + size <= extmem, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Not enough memory allocated %" PetscInt_FMT " > %" PetscInt_FMT, extrowcum[mark] + size, extmem);
1307     PetscCall(PetscArraycpy(extrow + start, jj + ii[i], size));
1308     extrowcum[mark] += size;
1309   }
1310   PetscCall(MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
1311   PetscCall(MatDestroy(&lGt));
1312   PetscCall(PetscFree(marks));
1313 
1314   /* Compress extrows */
1315   cum = 0;
1316   for (i = 0; i < nee; i++) {
1317     PetscInt size = extrowcum[i], *start = extrow + i * extmem;
1318     PetscCall(PetscSortRemoveDupsInt(&size, start));
1319     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, size, start, PETSC_USE_POINTER, &extrows[i]));
1320     cum = PetscMax(cum, size);
1321   }
1322   PetscCall(PetscFree(extrowcum));
1323   PetscCall(PetscBTDestroy(&btv));
1324   PetscCall(PetscBTDestroy(&btvcand));
1325 
1326   /* Workspace for lapack inner calls and VecSetValues */
1327   PetscCall(PetscMalloc2((5 + cum + maxsize) * maxsize, &work, maxsize, &rwork));
1328 
1329   /* Create change of basis matrix (no preallocation) */
1330   PetscCall(MatCreate(comm, &T));
1331   PetscCall(MatSetLayouts(T, pc->mat->rmap, pc->mat->cmap));
1332   PetscCall(MatSetType(T, MATAIJ));
1333   PetscCall(MatSetLocalToGlobalMapping(T, al2g, al2g));
1334   PetscCall(MatSetOption(T, MAT_ROW_ORIENTED, PETSC_FALSE));
1335   PetscCall(MatSetOption(T, MAT_IGNORE_ZERO_ENTRIES, PETSC_TRUE));
1336   //PetscCall(MatSeqAIJSetPreallocation(T, maxsize, NULL));
1337   //PetscCall(MatMPIAIJSetPreallocation(T, maxsize, NULL, maxsize, NULL));
1338   //PetscCall(MatSetOption(T, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE));
1339 
1340   /* Defaults to identity */
1341   {
1342     Vec                w;
1343     const PetscScalar *wa;
1344 
1345     PetscCall(MatCreateVecs(T, &w, NULL));
1346     PetscCall(VecSetLocalToGlobalMapping(w, al2g));
1347     PetscCall(VecSet(w, 1.0));
1348     for (i = 0; i < nee; i++) {
1349       const PetscInt *idxs;
1350       PetscInt        nl;
1351 
1352       PetscCall(ISGetLocalSize(eedges[i], &nl));
1353       PetscCall(ISGetIndices(eedges[i], &idxs));
1354       PetscCall(VecSetValuesLocal(w, nl, idxs, NULL, INSERT_VALUES));
1355       PetscCall(ISRestoreIndices(eedges[i], &idxs));
1356     }
1357     PetscCall(VecAssemblyBegin(w));
1358     PetscCall(VecAssemblyEnd(w));
1359     PetscCall(VecGetArrayRead(w, &wa));
1360     for (i = T->rmap->rstart; i < T->rmap->rend; i++)
1361       if (PetscAbsScalar(wa[i - T->rmap->rstart])) PetscCall(MatSetValue(T, i, i, 1.0, INSERT_VALUES));
1362     PetscCall(VecRestoreArrayRead(w, &wa));
1363     PetscCall(VecDestroy(&w));
1364   }
1365 
1366   /* Create discrete gradient for the coarser level if needed */
1367   PetscCall(MatDestroy(&pcbddc->nedcG));
1368   PetscCall(ISDestroy(&pcbddc->nedclocal));
1369   if (pcbddc->current_level < pcbddc->max_levels) {
1370     ISLocalToGlobalMapping cel2g, cvl2g;
1371     IS                     wis, gwis;
1372     PetscInt               cnv, cne;
1373 
1374     PetscCall(ISCreateGeneral(comm, nee, cedges, PETSC_COPY_VALUES, &wis));
1375     if (fl2g) {
1376       PetscCall(ISLocalToGlobalMappingApplyIS(fl2g, wis, &pcbddc->nedclocal));
1377     } else {
1378       PetscCall(PetscObjectReference((PetscObject)wis));
1379       pcbddc->nedclocal = wis;
1380     }
1381     PetscCall(ISLocalToGlobalMappingApplyIS(el2g, wis, &gwis));
1382     PetscCall(ISDestroy(&wis));
1383     PetscCall(ISRenumber(gwis, NULL, &cne, &wis));
1384     PetscCall(ISLocalToGlobalMappingCreateIS(wis, &cel2g));
1385     PetscCall(ISDestroy(&wis));
1386     PetscCall(ISDestroy(&gwis));
1387 
1388     PetscCall(ISCreateGeneral(comm, 2 * nee, corners, PETSC_USE_POINTER, &wis));
1389     PetscCall(ISLocalToGlobalMappingApplyIS(vl2g, wis, &gwis));
1390     PetscCall(ISDestroy(&wis));
1391     PetscCall(ISRenumber(gwis, NULL, &cnv, &wis));
1392     PetscCall(ISLocalToGlobalMappingCreateIS(wis, &cvl2g));
1393     PetscCall(ISDestroy(&wis));
1394     PetscCall(ISDestroy(&gwis));
1395 
1396     PetscCall(MatCreate(comm, &pcbddc->nedcG));
1397     PetscCall(MatSetSizes(pcbddc->nedcG, PETSC_DECIDE, PETSC_DECIDE, cne, cnv));
1398     PetscCall(MatSetType(pcbddc->nedcG, MATAIJ));
1399     PetscCall(MatSeqAIJSetPreallocation(pcbddc->nedcG, 2, NULL));
1400     PetscCall(MatMPIAIJSetPreallocation(pcbddc->nedcG, 2, NULL, 2, NULL));
1401     PetscCall(MatSetLocalToGlobalMapping(pcbddc->nedcG, cel2g, cvl2g));
1402     PetscCall(ISLocalToGlobalMappingDestroy(&cel2g));
1403     PetscCall(ISLocalToGlobalMappingDestroy(&cvl2g));
1404   }
1405 
1406   MatNullSpace nnsp;
1407   PetscBool    nnsp_has_const = PETSC_FALSE;
1408   const Vec   *nnsp_vecs      = NULL;
1409   PetscInt     nnsp_nvecs     = 0;
1410   VecScatter   nnsp_vscat     = NULL;
1411   PetscCall(MatGetNullSpace(pcbddc->discretegradient, &nnsp));
1412   if (nnsp) PetscCall(MatNullSpaceGetVecs(nnsp, &nnsp_has_const, &nnsp_nvecs, &nnsp_vecs));
1413   if (nnsp_has_const || nnsp_nvecs) { /* create scatter to import edge constraints */
1414     IS                 allextcols, gallextcols, galleedges, is_E_to_zero;
1415     Vec                E, V;
1416     PetscInt          *eedgesidxs;
1417     const PetscScalar *evals;
1418 
1419     PetscCall(MatCreateVecs(pc->pmat, &E, NULL));
1420     PetscCall(MatCreateVecs(pcbddc->discretegradient, &V, NULL));
1421     PetscCall(ISConcatenate(PETSC_COMM_SELF, nee, extcols, &allextcols));
1422     cum = 0;
1423     for (i = 0; i < nee; i++) {
1424       PetscInt j;
1425 
1426       PetscCall(ISGetLocalSize(eedges[i], &j));
1427       PetscCheck(j, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Zero sized edge %" PetscInt_FMT, i);
1428       cum += j - 1;
1429     }
1430     PetscCall(PetscMalloc1(PetscMax(cum, pc->pmat->rmap->n), &eedgesidxs));
1431     cum = 0;
1432     for (i = 0; i < nee; i++) {
1433       const PetscInt *idxs;
1434       PetscInt        j;
1435 
1436       PetscCall(ISGetLocalSize(eedges[i], &j));
1437       PetscCall(ISGetIndices(eedges[i], &idxs));
1438       PetscCall(PetscArraycpy(eedgesidxs + cum, idxs, j - 1)); /* last on the edge is primal */
1439       PetscCall(ISRestoreIndices(eedges[i], &idxs));
1440       cum += j - 1;
1441     }
1442     PetscCall(ISLocalToGlobalMappingApply(al2g, cum, eedgesidxs, eedgesidxs));
1443     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, cum, eedgesidxs, PETSC_USE_POINTER, &galleedges));
1444     PetscCall(ISLocalToGlobalMappingApplyIS(vl2g, allextcols, &gallextcols));
1445     PetscCall(VecScatterCreate(V, gallextcols, E, galleedges, &nnsp_vscat));
1446     PetscCall(ISDestroy(&allextcols));
1447     PetscCall(ISDestroy(&gallextcols));
1448     PetscCall(ISDestroy(&galleedges));
1449 
1450     /* identify dofs we must zero if importing user-defined near nullspace from pmat */
1451     PetscCall(VecSet(E, 1.0));
1452     PetscCall(VecSetValues(E, cum, eedgesidxs, NULL, INSERT_VALUES));
1453     PetscCall(VecAssemblyBegin(E));
1454     PetscCall(VecAssemblyEnd(E));
1455     PetscCall(VecGetArrayRead(E, &evals));
1456     for (i = 0, cum = 0; i < pc->pmat->rmap->n; i++)
1457       if (evals[i] == 0.0) eedgesidxs[cum++] = i + pc->pmat->rmap->rstart;
1458     PetscCall(VecRestoreArrayRead(E, &evals));
1459     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, cum, eedgesidxs, PETSC_COPY_VALUES, &is_E_to_zero));
1460     PetscCall(PetscFree(eedgesidxs));
1461 
1462     PetscCall(PetscObjectCompose((PetscObject)nnsp_vscat, "__V_Vec", (PetscObject)V));
1463     PetscCall(PetscObjectCompose((PetscObject)nnsp_vscat, "__E_Vec", (PetscObject)E));
1464     PetscCall(PetscObjectCompose((PetscObject)nnsp_vscat, "__E_zero", (PetscObject)is_E_to_zero));
1465     PetscCall(ISDestroy(&is_E_to_zero));
1466     PetscCall(VecDestroy(&V));
1467     PetscCall(VecDestroy(&E));
1468   }
1469 #if defined(PRINT_GDET)
1470   inc = 0;
1471   lev = pcbddc->current_level;
1472 #endif
1473 
1474   /* Insert values in the change of basis matrix */
1475   for (i = 0; i < nee; i++) {
1476     Mat         Gins = NULL, GKins = NULL;
1477     IS          cornersis = NULL;
1478     PetscScalar cvals[2];
1479 
1480     if (pcbddc->nedcG) PetscCall(ISCreateGeneral(PETSC_COMM_SELF, 2, corners + 2 * i, PETSC_USE_POINTER, &cornersis));
1481     PetscCall(PCBDDCComputeNedelecChangeEdge(lG, eedges[i], extrows[i], extcols[i], cornersis, &Gins, &GKins, cvals, work, rwork));
1482     if (Gins && GKins) {
1483       const PetscScalar *data;
1484       const PetscInt    *rows, *cols;
1485       PetscInt           nrh, nch, nrc, ncc;
1486 
1487       PetscCall(ISGetIndices(eedges[i], &cols));
1488       /* H1 */
1489       PetscCall(ISGetIndices(extrows[i], &rows));
1490       PetscCall(MatGetSize(Gins, &nrh, &nch));
1491       PetscCall(MatDenseGetArrayRead(Gins, &data));
1492       PetscCall(MatSetValuesLocal(T, nrh, rows, nch, cols, data, INSERT_VALUES));
1493       PetscCall(MatDenseRestoreArrayRead(Gins, &data));
1494       PetscCall(ISRestoreIndices(extrows[i], &rows));
1495       /* complement */
1496       PetscCall(MatGetSize(GKins, &nrc, &ncc));
1497       PetscCheck(ncc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Constant function has not been generated for coarse edge %" PetscInt_FMT, i);
1498       PetscCheck(ncc + nch == nrc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "The sum of the number of columns of GKins %" PetscInt_FMT " and Gins %" PetscInt_FMT " does not match %" PetscInt_FMT " for coarse edge %" PetscInt_FMT, ncc, nch, nrc, i);
1499       PetscCheck(ncc == 1 || !pcbddc->nedcG, PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot generate the coarse discrete gradient for coarse edge %" PetscInt_FMT " with ncc %" PetscInt_FMT, i, ncc);
1500       PetscCall(MatDenseGetArrayRead(GKins, &data));
1501       PetscCall(MatSetValuesLocal(T, nrc, cols, ncc, cols + nch, data, INSERT_VALUES));
1502       PetscCall(MatDenseRestoreArrayRead(GKins, &data));
1503 
1504       /* coarse discrete gradient */
1505       if (pcbddc->nedcG) {
1506         PetscInt cols[2];
1507 
1508         cols[0] = 2 * i;
1509         cols[1] = 2 * i + 1;
1510         PetscCall(MatSetValuesLocal(pcbddc->nedcG, 1, &i, 2, cols, cvals, INSERT_VALUES));
1511       }
1512       PetscCall(ISRestoreIndices(eedges[i], &cols));
1513     }
1514     PetscCall(ISDestroy(&extrows[i]));
1515     PetscCall(ISDestroy(&extcols[i]));
1516     PetscCall(ISDestroy(&cornersis));
1517     PetscCall(MatDestroy(&Gins));
1518     PetscCall(MatDestroy(&GKins));
1519   }
1520 
1521   /* import edge constraints */
1522   if (nnsp_vscat) {
1523     Vec          V, E, *quadvecs;
1524     PetscInt     nvecs, nvecs_orth;
1525     MatNullSpace onnsp           = NULL;
1526     PetscBool    onnsp_has_const = PETSC_FALSE;
1527     const Vec   *onnsp_vecs      = NULL;
1528     PetscInt     onnsp_nvecs     = 0, new_nnsp_nvecs, old_nnsp_nvecs;
1529     IS           is_E_to_zero;
1530 
1531     /* import nearnullspace from preconditioning matrix if user-defined */
1532     PetscCall(MatGetNearNullSpace(pc->pmat, &onnsp));
1533     if (onnsp) {
1534       PetscBool isinternal;
1535 
1536       PetscCall(PetscStrcmp("_internal_BDDC_nedelec_nnsp", ((PetscObject)onnsp)->name, &isinternal));
1537       if (!isinternal) PetscCall(MatNullSpaceGetVecs(onnsp, &onnsp_has_const, &onnsp_nvecs, &onnsp_vecs));
1538     }
1539     new_nnsp_nvecs = nnsp_nvecs + (nnsp_has_const ? 1 : 0);
1540     old_nnsp_nvecs = onnsp_nvecs + (onnsp_has_const ? 1 : 0);
1541     nvecs          = old_nnsp_nvecs + new_nnsp_nvecs;
1542     PetscCall(PetscMalloc1(nvecs, &quadvecs));
1543 
1544     PetscCall(PetscObjectQuery((PetscObject)nnsp_vscat, "__V_Vec", (PetscObject *)&V));
1545     PetscCall(PetscObjectQuery((PetscObject)nnsp_vscat, "__E_Vec", (PetscObject *)&E));
1546     PetscCall(PetscObjectQuery((PetscObject)nnsp_vscat, "__E_zero", (PetscObject *)&is_E_to_zero));
1547     for (i = 0; i < nvecs; i++) PetscCall(VecDuplicate(E, &quadvecs[i]));
1548     cum = 0;
1549     if (nnsp_has_const) {
1550       PetscCall(VecSet(V, 1.0));
1551       PetscCall(VecScatterBegin(nnsp_vscat, V, quadvecs[0], INSERT_VALUES, SCATTER_FORWARD));
1552       PetscCall(VecScatterEnd(nnsp_vscat, V, quadvecs[0], INSERT_VALUES, SCATTER_FORWARD));
1553       cum = 1;
1554     }
1555     for (i = 0; i < nnsp_nvecs; i++) {
1556       PetscCall(VecScatterBegin(nnsp_vscat, nnsp_vecs[i], quadvecs[i + cum], INSERT_VALUES, SCATTER_FORWARD));
1557       PetscCall(VecScatterEnd(nnsp_vscat, nnsp_vecs[i], quadvecs[i + cum], INSERT_VALUES, SCATTER_FORWARD));
1558     }
1559 
1560     /* Now add old nnsp if present */
1561     cum = 0;
1562     if (onnsp_has_const) {
1563       PetscCall(VecSet(quadvecs[new_nnsp_nvecs], 1.0));
1564       PetscCall(VecISSet(quadvecs[new_nnsp_nvecs], is_E_to_zero, 0));
1565       cum = 1;
1566     }
1567     for (i = 0; i < onnsp_nvecs; i++) {
1568       PetscCall(VecCopy(onnsp_vecs[i], quadvecs[i + cum + new_nnsp_nvecs]));
1569       PetscCall(VecISSet(quadvecs[i + cum + new_nnsp_nvecs], is_E_to_zero, 0));
1570     }
1571     nvecs_orth = nvecs;
1572     PetscCall(PCBDDCOrthonormalizeVecs(&nvecs_orth, quadvecs));
1573     PetscCall(MatNullSpaceCreate(PetscObjectComm((PetscObject)pc), PETSC_FALSE, nvecs_orth, quadvecs, &nnsp));
1574     for (i = 0; i < nvecs; i++) PetscCall(VecDestroy(&quadvecs[i]));
1575     PetscCall(PetscFree(quadvecs));
1576     PetscCall(PetscObjectSetName((PetscObject)nnsp, "_internal_BDDC_nedelec_nnsp"));
1577     PetscCall(MatSetNearNullSpace(pc->pmat, nnsp));
1578     PetscCall(MatNullSpaceDestroy(&nnsp));
1579   }
1580   PetscCall(VecScatterDestroy(&nnsp_vscat));
1581   PetscCall(ISLocalToGlobalMappingDestroy(&vl2g));
1582   PetscCall(ISLocalToGlobalMappingDestroy(&el2g));
1583   PetscCall(ISLocalToGlobalMappingDestroy(&al2g));
1584 
1585   /* Start assembling */
1586   PetscCall(MatAssemblyBegin(T, MAT_FINAL_ASSEMBLY));
1587   if (pcbddc->nedcG) PetscCall(MatAssemblyBegin(pcbddc->nedcG, MAT_FINAL_ASSEMBLY));
1588 
1589   /* Free */
1590   if (fl2g) {
1591     PetscCall(ISDestroy(&primals));
1592     for (i = 0; i < nee; i++) PetscCall(ISDestroy(&eedges[i]));
1593     PetscCall(PetscFree(eedges));
1594   }
1595 
1596   /* hack mat_graph with primal dofs on the coarse edges */
1597   {
1598     PCBDDCGraph graph  = pcbddc->mat_graph;
1599     PetscInt   *oqueue = graph->queue;
1600     PetscInt   *ocptr  = graph->cptr;
1601     PetscInt    ncc, *idxs;
1602 
1603     /* find first primal edge */
1604     if (pcbddc->nedclocal) {
1605       PetscCall(ISGetIndices(pcbddc->nedclocal, (const PetscInt **)&idxs));
1606     } else {
1607       if (fl2g) PetscCall(ISLocalToGlobalMappingApply(fl2g, nee, cedges, cedges));
1608       idxs = cedges;
1609     }
1610     cum = 0;
1611     while (cum < nee && cedges[cum] < 0) cum++;
1612 
1613     /* adapt connected components */
1614     PetscCall(PetscMalloc2(graph->nvtxs + 1, &graph->cptr, ocptr[graph->ncc], &graph->queue));
1615     graph->cptr[0] = 0;
1616     for (i = 0, ncc = 0; i < graph->ncc; i++) {
1617       PetscInt lc = ocptr[i + 1] - ocptr[i];
1618       if (cum != nee && oqueue[ocptr[i + 1] - 1] == cedges[cum]) { /* this cc has a primal dof */
1619         graph->cptr[ncc + 1]           = graph->cptr[ncc] + 1;
1620         graph->queue[graph->cptr[ncc]] = cedges[cum];
1621         ncc++;
1622         lc--;
1623         cum++;
1624         while (cum < nee && cedges[cum] < 0) cum++;
1625       }
1626       graph->cptr[ncc + 1] = graph->cptr[ncc] + lc;
1627       for (j = 0; j < lc; j++) graph->queue[graph->cptr[ncc] + j] = oqueue[ocptr[i] + j];
1628       ncc++;
1629     }
1630     graph->ncc = ncc;
1631     if (pcbddc->nedclocal) PetscCall(ISRestoreIndices(pcbddc->nedclocal, (const PetscInt **)&idxs));
1632     PetscCall(PetscFree2(ocptr, oqueue));
1633   }
1634   PetscCall(ISLocalToGlobalMappingDestroy(&fl2g));
1635   PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, NULL, NULL, &nee, &alleedges, &allprimals));
1636   PetscCall(PCBDDCGraphResetCSR(pcbddc->mat_graph));
1637 
1638   PetscCall(ISDestroy(&nedfieldlocal));
1639   PetscCall(PetscFree(extrow));
1640   PetscCall(PetscFree2(work, rwork));
1641   PetscCall(PetscFree(corners));
1642   PetscCall(PetscFree(cedges));
1643   PetscCall(PetscFree(extrows));
1644   PetscCall(PetscFree(extcols));
1645   PetscCall(MatDestroy(&lG));
1646 
1647   /* Complete assembling */
1648   PetscCall(MatAssemblyEnd(T, MAT_FINAL_ASSEMBLY));
1649   PetscCall(MatViewFromOptions(T, (PetscObject)pc, "-pc_bddc_nedelec_change_view"));
1650   if (pcbddc->nedcG) {
1651     PetscCall(MatAssemblyEnd(pcbddc->nedcG, MAT_FINAL_ASSEMBLY));
1652     PetscCall(MatViewFromOptions(pcbddc->nedcG, (PetscObject)pc, "-pc_bddc_nedelec_coarse_change_view"));
1653   }
1654 
1655   PetscCall(ISDestroy(&elements_corners));
1656 
1657   /* set change of basis */
1658   PetscCall(PCBDDCSetChangeOfBasisMat(pc, T, PETSC_FALSE));
1659   PetscCall(MatDestroy(&T));
1660   PetscFunctionReturn(PETSC_SUCCESS);
1661 }
1662 
1663 /* the near-null space of BDDC carries information on quadrature weights,
1664    and these can be collinear -> so cheat with MatNullSpaceCreate
1665    and create a suitable set of basis vectors first */
PCBDDCNullSpaceCreate(MPI_Comm comm,PetscBool has_const,PetscInt nvecs,Vec quad_vecs[],MatNullSpace * nnsp)1666 PetscErrorCode PCBDDCNullSpaceCreate(MPI_Comm comm, PetscBool has_const, PetscInt nvecs, Vec quad_vecs[], MatNullSpace *nnsp)
1667 {
1668   PetscInt i;
1669 
1670   PetscFunctionBegin;
1671   for (i = 0; i < nvecs; i++) {
1672     PetscInt first, last;
1673 
1674     PetscCall(VecGetOwnershipRange(quad_vecs[i], &first, &last));
1675     PetscCheck(last - first >= 2 * nvecs || !has_const, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not implemented");
1676     if (i >= first && i < last) {
1677       PetscScalar *data;
1678       PetscCall(VecGetArray(quad_vecs[i], &data));
1679       if (!has_const) {
1680         data[i - first] = 1.;
1681       } else {
1682         data[2 * i - first]     = 1. / PetscSqrtReal(2.);
1683         data[2 * i - first + 1] = -1. / PetscSqrtReal(2.);
1684       }
1685       PetscCall(VecRestoreArray(quad_vecs[i], &data));
1686     }
1687     PetscCall(PetscObjectStateIncrease((PetscObject)quad_vecs[i]));
1688   }
1689   PetscCall(MatNullSpaceCreate(comm, has_const, nvecs, quad_vecs, nnsp));
1690   for (i = 0; i < nvecs; i++) { /* reset vectors */
1691     PetscInt first, last;
1692     PetscCall(VecLockReadPop(quad_vecs[i]));
1693     PetscCall(VecGetOwnershipRange(quad_vecs[i], &first, &last));
1694     if (i >= first && i < last) {
1695       PetscScalar *data;
1696       PetscCall(VecGetArray(quad_vecs[i], &data));
1697       if (!has_const) {
1698         data[i - first] = 0.;
1699       } else {
1700         data[2 * i - first]     = 0.;
1701         data[2 * i - first + 1] = 0.;
1702       }
1703       PetscCall(VecRestoreArray(quad_vecs[i], &data));
1704     }
1705     PetscCall(PetscObjectStateIncrease((PetscObject)quad_vecs[i]));
1706     PetscCall(VecLockReadPush(quad_vecs[i]));
1707   }
1708   PetscFunctionReturn(PETSC_SUCCESS);
1709 }
1710 
PCBDDCComputeNoNetFlux(Mat A,Mat divudotp,PetscBool transpose,IS vl2l,PCBDDCGraph graph,MatNullSpace * nnsp)1711 PetscErrorCode PCBDDCComputeNoNetFlux(Mat A, Mat divudotp, PetscBool transpose, IS vl2l, PCBDDCGraph graph, MatNullSpace *nnsp)
1712 {
1713   Mat                    loc_divudotp;
1714   Vec                    p, v, quad_vec;
1715   ISLocalToGlobalMapping map;
1716   PetscScalar           *array;
1717 
1718   PetscFunctionBegin;
1719   PetscCall(MatCreateVecs(A, &quad_vec, NULL));
1720   if (!transpose) {
1721     PetscCall(MatISGetLocalToGlobalMapping(A, &map, NULL));
1722   } else {
1723     PetscCall(MatISGetLocalToGlobalMapping(A, NULL, &map));
1724   }
1725   PetscCall(PCBDDCNullSpaceCreate(PetscObjectComm((PetscObject)A), PETSC_FALSE, 1, &quad_vec, nnsp));
1726   PetscCall(VecLockReadPop(quad_vec));
1727   PetscCall(VecSetLocalToGlobalMapping(quad_vec, map));
1728 
1729   /* compute local quad vec */
1730   PetscCall(MatISGetLocalMat(divudotp, &loc_divudotp));
1731   if (!transpose) {
1732     PetscCall(MatCreateVecs(loc_divudotp, &v, &p));
1733   } else {
1734     PetscCall(MatCreateVecs(loc_divudotp, &p, &v));
1735   }
1736   /* the assumption here is that the constant vector interpolates the constant on the L2 conforming space */
1737   PetscCall(VecSet(p, 1.));
1738   if (!transpose) {
1739     PetscCall(MatMultTranspose(loc_divudotp, p, v));
1740   } else {
1741     PetscCall(MatMult(loc_divudotp, p, v));
1742   }
1743   PetscCall(VecDestroy(&p));
1744   if (vl2l) {
1745     Mat        lA;
1746     VecScatter sc;
1747     Vec        vins;
1748 
1749     PetscCall(MatISGetLocalMat(A, &lA));
1750     PetscCall(MatCreateVecs(lA, &vins, NULL));
1751     PetscCall(VecScatterCreate(v, NULL, vins, vl2l, &sc));
1752     PetscCall(VecScatterBegin(sc, v, vins, INSERT_VALUES, SCATTER_FORWARD));
1753     PetscCall(VecScatterEnd(sc, v, vins, INSERT_VALUES, SCATTER_FORWARD));
1754     PetscCall(VecScatterDestroy(&sc));
1755     PetscCall(VecDestroy(&v));
1756     v = vins;
1757   }
1758 
1759   /* mask summation of interface values */
1760   PetscInt        n, *mmask, *mask, *idxs, nmr, nr;
1761   const PetscInt *degree;
1762   PetscSF         msf;
1763 
1764   PetscCall(VecGetLocalSize(v, &n));
1765   PetscCall(PetscSFGetGraph(graph->interface_subset_sf, &nr, NULL, NULL, NULL));
1766   PetscCall(PetscSFGetMultiSF(graph->interface_subset_sf, &msf));
1767   PetscCall(PetscSFGetGraph(msf, &nmr, NULL, NULL, NULL));
1768   PetscCall(PetscCalloc3(nmr, &mmask, n, &mask, n, &idxs));
1769   PetscCall(PetscSFComputeDegreeBegin(graph->interface_subset_sf, &degree));
1770   PetscCall(PetscSFComputeDegreeEnd(graph->interface_subset_sf, &degree));
1771   for (PetscInt i = 0, c = 0; i < nr; i++) {
1772     mmask[c] = 1;
1773     c += degree[i];
1774   }
1775   PetscCall(PetscSFScatterBegin(graph->interface_subset_sf, MPIU_INT, mmask, mask));
1776   PetscCall(PetscSFScatterEnd(graph->interface_subset_sf, MPIU_INT, mmask, mask));
1777   PetscCall(VecGetArray(v, &array));
1778   for (PetscInt i = 0; i < n; i++) {
1779     array[i] *= mask[i];
1780     idxs[i] = i;
1781   }
1782   PetscCall(VecSetValuesLocal(quad_vec, n, idxs, array, ADD_VALUES));
1783   PetscCall(VecRestoreArray(v, &array));
1784   PetscCall(PetscFree3(mmask, mask, idxs));
1785   PetscCall(VecDestroy(&v));
1786   PetscCall(VecAssemblyBegin(quad_vec));
1787   PetscCall(VecAssemblyEnd(quad_vec));
1788   PetscCall(VecViewFromOptions(quad_vec, NULL, "-pc_bddc_quad_vec_view"));
1789   PetscCall(VecLockReadPush(quad_vec));
1790   PetscCall(VecDestroy(&quad_vec));
1791   PetscFunctionReturn(PETSC_SUCCESS);
1792 }
1793 
PCBDDCAddPrimalVerticesLocalIS(PC pc,IS primalv)1794 PetscErrorCode PCBDDCAddPrimalVerticesLocalIS(PC pc, IS primalv)
1795 {
1796   PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
1797 
1798   PetscFunctionBegin;
1799   if (primalv) {
1800     if (pcbddc->user_primal_vertices_local) {
1801       IS list[2], newp;
1802 
1803       list[0] = primalv;
1804       list[1] = pcbddc->user_primal_vertices_local;
1805       PetscCall(ISConcatenate(PetscObjectComm((PetscObject)pc), 2, list, &newp));
1806       PetscCall(ISSortRemoveDups(newp));
1807       PetscCall(ISDestroy(&list[1]));
1808       pcbddc->user_primal_vertices_local = newp;
1809     } else {
1810       PetscCall(PCBDDCSetPrimalVerticesLocalIS(pc, primalv));
1811     }
1812   }
1813   PetscFunctionReturn(PETSC_SUCCESS);
1814 }
1815 
func_coords_private(PetscInt dim,PetscReal t,const PetscReal X[],PetscInt Nf,PetscScalar * out,PetscCtx ctx)1816 static PetscErrorCode func_coords_private(PetscInt dim, PetscReal t, const PetscReal X[], PetscInt Nf, PetscScalar *out, PetscCtx ctx)
1817 {
1818   PetscInt f, *comp = (PetscInt *)ctx;
1819 
1820   PetscFunctionBegin;
1821   for (f = 0; f < Nf; f++) out[f] = X[*comp];
1822   PetscFunctionReturn(PETSC_SUCCESS);
1823 }
1824 
PCBDDCComputeLocalTopologyInfo(PC pc)1825 PetscErrorCode PCBDDCComputeLocalTopologyInfo(PC pc)
1826 {
1827   Vec       local, global;
1828   PC_BDDC  *pcbddc     = (PC_BDDC *)pc->data;
1829   Mat_IS   *matis      = (Mat_IS *)pc->pmat->data;
1830   PetscBool monolithic = PETSC_FALSE;
1831 
1832   PetscFunctionBegin;
1833   PetscOptionsBegin(PetscObjectComm((PetscObject)pc), ((PetscObject)pc)->prefix, "BDDC topology options", "PC");
1834   PetscCall(PetscOptionsBool("-pc_bddc_monolithic", "Discard any information on dofs splitting", NULL, monolithic, &monolithic, NULL));
1835   PetscOptionsEnd();
1836   /* need to convert from global to local topology information and remove references to information in global ordering */
1837   PetscCall(MatCreateVecs(pc->pmat, &global, NULL));
1838   PetscCall(MatCreateVecs(matis->A, &local, NULL));
1839   PetscCall(VecBindToCPU(global, PETSC_TRUE));
1840   PetscCall(VecBindToCPU(local, PETSC_TRUE));
1841   if (monolithic) { /* just get block size to properly compute vertices */
1842     if (pcbddc->vertex_size == 1) PetscCall(MatGetBlockSize(pc->pmat, &pcbddc->vertex_size));
1843     goto boundary;
1844   }
1845 
1846   if (pcbddc->user_provided_isfordofs) {
1847     if (pcbddc->n_ISForDofs) {
1848       PetscInt i;
1849 
1850       PetscCall(PetscMalloc1(pcbddc->n_ISForDofs, &pcbddc->ISForDofsLocal));
1851       for (i = 0; i < pcbddc->n_ISForDofs; i++) {
1852         PetscInt bs;
1853 
1854         PetscCall(PCBDDCGlobalToLocal(matis->rctx, global, local, pcbddc->ISForDofs[i], &pcbddc->ISForDofsLocal[i]));
1855         PetscCall(ISGetBlockSize(pcbddc->ISForDofs[i], &bs));
1856         PetscCall(ISSetBlockSize(pcbddc->ISForDofsLocal[i], bs));
1857         PetscCall(ISDestroy(&pcbddc->ISForDofs[i]));
1858       }
1859       pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs;
1860       pcbddc->n_ISForDofs      = 0;
1861       PetscCall(PetscFree(pcbddc->ISForDofs));
1862     }
1863   } else {
1864     if (!pcbddc->n_ISForDofsLocal) { /* field split not present */
1865       DM dm;
1866 
1867       PetscCall(MatGetDM(pc->pmat, &dm));
1868       if (!dm) PetscCall(PCGetDM(pc, &dm));
1869       if (dm) {
1870         IS      *fields;
1871         PetscInt nf, i;
1872 
1873         PetscCall(DMCreateFieldDecomposition(dm, &nf, NULL, &fields, NULL));
1874         PetscCall(PetscMalloc1(nf, &pcbddc->ISForDofsLocal));
1875         for (i = 0; i < nf; i++) {
1876           PetscInt bs;
1877 
1878           PetscCall(PCBDDCGlobalToLocal(matis->rctx, global, local, fields[i], &pcbddc->ISForDofsLocal[i]));
1879           PetscCall(ISGetBlockSize(fields[i], &bs));
1880           PetscCall(ISSetBlockSize(pcbddc->ISForDofsLocal[i], bs));
1881           PetscCall(ISDestroy(&fields[i]));
1882         }
1883         PetscCall(PetscFree(fields));
1884         pcbddc->n_ISForDofsLocal = nf;
1885       } else { /* See if MATIS has fields attached by the conversion from MatNest */
1886         PetscContainer c;
1887 
1888         PetscCall(PetscObjectQuery((PetscObject)pc->pmat, "_convert_nest_lfields", (PetscObject *)&c));
1889         if (c) {
1890           MatISLocalFields lf;
1891           PetscCall(PetscContainerGetPointer(c, &lf));
1892           PetscCall(PCBDDCSetDofsSplittingLocal(pc, lf->nr, lf->rf));
1893         } else { /* fallback, create the default fields if bs > 1 */
1894           PetscInt i, n = matis->A->rmap->n;
1895           PetscCall(MatGetBlockSize(pc->pmat, &i));
1896           if (i > 1) {
1897             pcbddc->n_ISForDofsLocal = i;
1898             PetscCall(PetscMalloc1(pcbddc->n_ISForDofsLocal, &pcbddc->ISForDofsLocal));
1899             for (i = 0; i < pcbddc->n_ISForDofsLocal; i++) PetscCall(ISCreateStride(PetscObjectComm((PetscObject)pc), n / pcbddc->n_ISForDofsLocal, i, pcbddc->n_ISForDofsLocal, &pcbddc->ISForDofsLocal[i]));
1900           }
1901         }
1902       }
1903     } else {
1904       PetscInt i;
1905       for (i = 0; i < pcbddc->n_ISForDofsLocal; i++) PetscCall(PCBDDCConsistencyCheckIS(pc, MPI_LAND, &pcbddc->ISForDofsLocal[i]));
1906     }
1907   }
1908 
1909 boundary:
1910   if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) {
1911     PetscCall(PCBDDCGlobalToLocal(matis->rctx, global, local, pcbddc->DirichletBoundaries, &pcbddc->DirichletBoundariesLocal));
1912   } else if (pcbddc->DirichletBoundariesLocal) {
1913     PetscCall(PCBDDCConsistencyCheckIS(pc, MPI_LAND, &pcbddc->DirichletBoundariesLocal));
1914   }
1915   if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) {
1916     PetscCall(PCBDDCGlobalToLocal(matis->rctx, global, local, pcbddc->NeumannBoundaries, &pcbddc->NeumannBoundariesLocal));
1917   } else if (pcbddc->NeumannBoundariesLocal) {
1918     PetscCall(PCBDDCConsistencyCheckIS(pc, MPI_LOR, &pcbddc->NeumannBoundariesLocal));
1919   }
1920   if (!pcbddc->user_primal_vertices_local && pcbddc->user_primal_vertices) PetscCall(PCBDDCGlobalToLocal(matis->rctx, global, local, pcbddc->user_primal_vertices, &pcbddc->user_primal_vertices_local));
1921   PetscCall(VecDestroy(&global));
1922   PetscCall(VecDestroy(&local));
1923   /* detect local disconnected subdomains if requested or needed */
1924   if (pcbddc->detect_disconnected || matis->allow_repeated) {
1925     IS        primalv = NULL;
1926     PetscInt  nel;
1927     PetscBool filter = pcbddc->detect_disconnected_filter;
1928 
1929     for (PetscInt i = 0; i < pcbddc->n_local_subs; i++) PetscCall(ISDestroy(&pcbddc->local_subs[i]));
1930     PetscCall(PetscFree(pcbddc->local_subs));
1931     PetscCall(MatGetVariableBlockSizes(matis->A, &nel, NULL));
1932     if (matis->allow_repeated && nel) {
1933       const PetscInt *elsizes;
1934 
1935       pcbddc->n_local_subs = nel;
1936       PetscCall(MatGetVariableBlockSizes(matis->A, NULL, &elsizes));
1937       PetscCall(PetscMalloc1(nel, &pcbddc->local_subs));
1938       for (PetscInt i = 0, c = 0; i < nel; i++) {
1939         PetscCall(ISCreateStride(PETSC_COMM_SELF, elsizes[i], c, 1, &pcbddc->local_subs[i]));
1940         c += elsizes[i];
1941       }
1942     } else {
1943       PetscCall(PCBDDCDetectDisconnectedComponents(pc, filter, &pcbddc->n_local_subs, &pcbddc->local_subs, &primalv));
1944     }
1945     PetscCall(PCBDDCAddPrimalVerticesLocalIS(pc, primalv));
1946     PetscCall(ISDestroy(&primalv));
1947   }
1948   /* early stage corner detection */
1949   {
1950     DM dm;
1951 
1952     PetscCall(MatGetDM(pc->pmat, &dm));
1953     if (!dm) PetscCall(PCGetDM(pc, &dm));
1954     if (dm) {
1955       PetscBool isda;
1956 
1957       PetscCall(PetscObjectTypeCompare((PetscObject)dm, DMDA, &isda));
1958       if (isda) {
1959         ISLocalToGlobalMapping l2l;
1960         IS                     corners;
1961         Mat                    lA;
1962         PetscBool              gl, lo;
1963 
1964         {
1965           Vec                cvec;
1966           const PetscScalar *coords;
1967           PetscInt           dof, n, cdim;
1968           PetscBool          memc = PetscDefined(USE_COMPLEX) ? PETSC_FALSE : PETSC_TRUE;
1969 
1970           PetscCall(DMDAGetInfo(dm, NULL, NULL, NULL, NULL, NULL, NULL, NULL, &dof, NULL, NULL, NULL, NULL, NULL));
1971           PetscCall(DMGetCoordinates(dm, &cvec));
1972           PetscCall(VecGetLocalSize(cvec, &n));
1973           PetscCall(VecGetBlockSize(cvec, &cdim));
1974           n /= cdim;
1975           PetscCall(PetscFree(pcbddc->mat_graph->coords));
1976           PetscCall(PetscMalloc1(dof * n * cdim, &pcbddc->mat_graph->coords));
1977           PetscCall(VecGetArrayRead(cvec, &coords));
1978           if (dof != 1) memc = PETSC_FALSE;
1979           if (memc) {
1980             PetscCall(PetscArraycpy(pcbddc->mat_graph->coords, coords, cdim * n * dof));
1981           } else { /* BDDC graph does not use any blocked information, we need to replicate the data */
1982             PetscReal *bcoords = pcbddc->mat_graph->coords;
1983             PetscInt   i, b, d;
1984 
1985             for (i = 0; i < n; i++) {
1986               for (b = 0; b < dof; b++) {
1987                 for (d = 0; d < cdim; d++) bcoords[i * dof * cdim + b * cdim + d] = PetscRealPart(coords[i * cdim + d]);
1988               }
1989             }
1990           }
1991           PetscCall(VecRestoreArrayRead(cvec, &coords));
1992           pcbddc->mat_graph->cdim  = cdim;
1993           pcbddc->mat_graph->cnloc = dof * n;
1994           pcbddc->mat_graph->cloc  = PETSC_FALSE;
1995         }
1996         PetscCall(DMDAGetSubdomainCornersIS(dm, &corners));
1997         PetscCall(MatISGetLocalMat(pc->pmat, &lA));
1998         PetscCall(MatGetLocalToGlobalMapping(lA, &l2l, NULL));
1999         PetscCall(MatISRestoreLocalMat(pc->pmat, &lA));
2000         lo = (PetscBool)(l2l && corners);
2001         PetscCallMPI(MPIU_Allreduce(&lo, &gl, 1, MPI_C_BOOL, MPI_LAND, PetscObjectComm((PetscObject)pc)));
2002         if (gl) { /* From PETSc's DMDA */
2003           const PetscInt *idx;
2004           PetscInt        dof, bs, *idxout, n;
2005 
2006           PetscCall(DMDAGetInfo(dm, NULL, NULL, NULL, NULL, NULL, NULL, NULL, &dof, NULL, NULL, NULL, NULL, NULL));
2007           PetscCall(ISLocalToGlobalMappingGetBlockSize(l2l, &bs));
2008           PetscCall(ISGetLocalSize(corners, &n));
2009           PetscCall(ISGetIndices(corners, &idx));
2010           if (bs == dof) {
2011             PetscCall(PetscMalloc1(n, &idxout));
2012             PetscCall(ISLocalToGlobalMappingApplyBlock(l2l, n, idx, idxout));
2013           } else { /* the original DMDA local-to-local map have been modified */
2014             PetscInt i, d;
2015 
2016             PetscCall(PetscMalloc1(dof * n, &idxout));
2017             for (i = 0; i < n; i++)
2018               for (d = 0; d < dof; d++) idxout[dof * i + d] = dof * idx[i] + d;
2019             PetscCall(ISLocalToGlobalMappingApply(l2l, dof * n, idxout, idxout));
2020 
2021             bs = 1;
2022             n *= dof;
2023           }
2024           PetscCall(ISRestoreIndices(corners, &idx));
2025           PetscCall(DMDARestoreSubdomainCornersIS(dm, &corners));
2026           PetscCall(ISCreateBlock(PetscObjectComm((PetscObject)pc), bs, n, idxout, PETSC_OWN_POINTER, &corners));
2027           PetscCall(PCBDDCAddPrimalVerticesLocalIS(pc, corners));
2028           PetscCall(ISDestroy(&corners));
2029           pcbddc->corner_selected  = PETSC_TRUE;
2030           pcbddc->corner_selection = PETSC_TRUE;
2031         }
2032         if (corners) PetscCall(DMDARestoreSubdomainCornersIS(dm, &corners));
2033       }
2034     }
2035   }
2036   if (pcbddc->corner_selection && !pcbddc->mat_graph->cdim) {
2037     DM dm;
2038 
2039     PetscCall(MatGetDM(pc->pmat, &dm));
2040     if (!dm) PetscCall(PCGetDM(pc, &dm));
2041     if (dm) { /* this can get very expensive, I need to find a faster alternative */
2042       Vec          vcoords;
2043       PetscSection section;
2044       PetscReal   *coords;
2045       PetscInt     d, cdim, nl, nf, **ctxs;
2046       PetscErrorCode (**funcs)(PetscInt, PetscReal, const PetscReal *, PetscInt, PetscScalar *, void *);
2047       /* debug coordinates */
2048       PetscViewer       viewer;
2049       PetscBool         flg;
2050       PetscViewerFormat format;
2051       const char       *prefix;
2052 
2053       PetscCall(DMGetCoordinateDim(dm, &cdim));
2054       PetscCall(DMGetLocalSection(dm, &section));
2055       PetscCall(PetscSectionGetNumFields(section, &nf));
2056       PetscCall(DMCreateGlobalVector(dm, &vcoords));
2057       PetscCall(VecGetLocalSize(vcoords, &nl));
2058       PetscCall(PetscMalloc1(nl * cdim, &coords));
2059       PetscCall(PetscMalloc2(nf, &funcs, nf, &ctxs));
2060       PetscCall(PetscMalloc1(nf, &ctxs[0]));
2061       for (d = 0; d < nf; d++) funcs[d] = func_coords_private;
2062       for (d = 1; d < nf; d++) ctxs[d] = ctxs[d - 1] + 1;
2063 
2064       /* debug coordinates */
2065       PetscCall(PCGetOptionsPrefix(pc, &prefix));
2066       PetscCall(PetscOptionsCreateViewer(PetscObjectComm((PetscObject)vcoords), ((PetscObject)vcoords)->options, prefix, "-pc_bddc_coords_vec_view", &viewer, &format, &flg));
2067       if (flg) PetscCall(PetscViewerPushFormat(viewer, format));
2068       for (d = 0; d < cdim; d++) {
2069         PetscInt           i;
2070         const PetscScalar *v;
2071         char               name[16];
2072 
2073         for (i = 0; i < nf; i++) ctxs[i][0] = d;
2074         PetscCall(PetscSNPrintf(name, sizeof(name), "bddc_coords_%" PetscInt_FMT, d));
2075         PetscCall(PetscObjectSetName((PetscObject)vcoords, name));
2076         PetscCall(DMProjectFunction(dm, 0.0, funcs, (void **)ctxs, INSERT_VALUES, vcoords));
2077         if (flg) PetscCall(VecView(vcoords, viewer));
2078         PetscCall(VecGetArrayRead(vcoords, &v));
2079         for (i = 0; i < nl; i++) coords[i * cdim + d] = PetscRealPart(v[i]);
2080         PetscCall(VecRestoreArrayRead(vcoords, &v));
2081       }
2082       PetscCall(VecDestroy(&vcoords));
2083       PetscCall(PCSetCoordinates(pc, cdim, nl, coords));
2084       PetscCall(PetscFree(coords));
2085       PetscCall(PetscFree(ctxs[0]));
2086       PetscCall(PetscFree2(funcs, ctxs));
2087       if (flg) {
2088         PetscCall(PetscViewerPopFormat(viewer));
2089         PetscCall(PetscViewerDestroy(&viewer));
2090       }
2091     }
2092   }
2093   PetscFunctionReturn(PETSC_SUCCESS);
2094 }
2095 
PCBDDCConsistencyCheckIS(PC pc,MPI_Op mop,IS * is)2096 PetscErrorCode PCBDDCConsistencyCheckIS(PC pc, MPI_Op mop, IS *is)
2097 {
2098   Mat_IS         *matis = (Mat_IS *)pc->pmat->data;
2099   IS              nis;
2100   const PetscInt *idxs;
2101   PetscInt        i, nd, n = matis->A->rmap->n, *nidxs, nnd;
2102 
2103   PetscFunctionBegin;
2104   PetscCheck(mop == MPI_LAND || mop == MPI_LOR, PetscObjectComm((PetscObject)pc), PETSC_ERR_SUP, "Supported are MPI_LAND and MPI_LOR");
2105   if (mop == MPI_LAND) {
2106     /* init rootdata with true */
2107     for (i = 0; i < pc->pmat->rmap->n; i++) matis->sf_rootdata[i] = 1;
2108   } else {
2109     PetscCall(PetscArrayzero(matis->sf_rootdata, pc->pmat->rmap->n));
2110   }
2111   PetscCall(PetscArrayzero(matis->sf_leafdata, n));
2112   PetscCall(ISGetLocalSize(*is, &nd));
2113   PetscCall(ISGetIndices(*is, &idxs));
2114   for (i = 0; i < nd; i++)
2115     if (-1 < idxs[i] && idxs[i] < n) matis->sf_leafdata[idxs[i]] = 1;
2116   PetscCall(ISRestoreIndices(*is, &idxs));
2117   PetscCall(PetscSFReduceBegin(matis->sf, MPIU_INT, matis->sf_leafdata, matis->sf_rootdata, mop));
2118   PetscCall(PetscSFReduceEnd(matis->sf, MPIU_INT, matis->sf_leafdata, matis->sf_rootdata, mop));
2119   PetscCall(PetscSFBcastBegin(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE));
2120   PetscCall(PetscSFBcastEnd(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE));
2121   if (mop == MPI_LAND) {
2122     PetscCall(PetscMalloc1(nd, &nidxs));
2123   } else {
2124     PetscCall(PetscMalloc1(n, &nidxs));
2125   }
2126   for (i = 0, nnd = 0; i < n; i++)
2127     if (matis->sf_leafdata[i]) nidxs[nnd++] = i;
2128   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)*is), nnd, nidxs, PETSC_OWN_POINTER, &nis));
2129   PetscCall(ISDestroy(is));
2130   *is = nis;
2131   PetscFunctionReturn(PETSC_SUCCESS);
2132 }
2133 
PCBDDCBenignRemoveInterior(PC pc,Vec r,Vec z)2134 PetscErrorCode PCBDDCBenignRemoveInterior(PC pc, Vec r, Vec z)
2135 {
2136   PC_IS   *pcis   = (PC_IS *)pc->data;
2137   PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
2138 
2139   PetscFunctionBegin;
2140   if (!pcbddc->benign_have_null) PetscFunctionReturn(PETSC_SUCCESS);
2141   if (pcbddc->ChangeOfBasisMatrix) {
2142     Vec swap;
2143 
2144     PetscCall(MatMultTranspose(pcbddc->ChangeOfBasisMatrix, r, pcbddc->work_change));
2145     swap                = pcbddc->work_change;
2146     pcbddc->work_change = r;
2147     r                   = swap;
2148   }
2149   PetscCall(VecScatterBegin(pcis->global_to_D, r, pcis->vec1_D, INSERT_VALUES, SCATTER_FORWARD));
2150   PetscCall(VecScatterEnd(pcis->global_to_D, r, pcis->vec1_D, INSERT_VALUES, SCATTER_FORWARD));
2151   PetscCall(PetscLogEventBegin(PC_BDDC_Solves[pcbddc->current_level][0], pc, 0, 0, 0));
2152   PetscCall(KSPSolve(pcbddc->ksp_D, pcis->vec1_D, pcis->vec2_D));
2153   PetscCall(PetscLogEventEnd(PC_BDDC_Solves[pcbddc->current_level][0], pc, 0, 0, 0));
2154   PetscCall(KSPCheckSolve(pcbddc->ksp_D, pc, pcis->vec2_D));
2155   PetscCall(VecSet(z, 0.));
2156   PetscCall(VecScatterBegin(pcis->global_to_D, pcis->vec2_D, z, INSERT_VALUES, SCATTER_REVERSE));
2157   PetscCall(VecScatterEnd(pcis->global_to_D, pcis->vec2_D, z, INSERT_VALUES, SCATTER_REVERSE));
2158   if (pcbddc->ChangeOfBasisMatrix) {
2159     pcbddc->work_change = r;
2160     PetscCall(VecCopy(z, pcbddc->work_change));
2161     PetscCall(MatMult(pcbddc->ChangeOfBasisMatrix, pcbddc->work_change, z));
2162   }
2163   PetscFunctionReturn(PETSC_SUCCESS);
2164 }
2165 
PCBDDCBenignMatMult_Private_Private(Mat A,Vec x,Vec y,PetscBool transpose)2166 static PetscErrorCode PCBDDCBenignMatMult_Private_Private(Mat A, Vec x, Vec y, PetscBool transpose)
2167 {
2168   PCBDDCBenignMatMult_ctx ctx;
2169   PetscBool               apply_right, apply_left, reset_x;
2170 
2171   PetscFunctionBegin;
2172   PetscCall(MatShellGetContext(A, &ctx));
2173   if (transpose) {
2174     apply_right = ctx->apply_left;
2175     apply_left  = ctx->apply_right;
2176   } else {
2177     apply_right = ctx->apply_right;
2178     apply_left  = ctx->apply_left;
2179   }
2180   reset_x = PETSC_FALSE;
2181   if (apply_right) {
2182     const PetscScalar *ax;
2183     PetscInt           nl, i;
2184 
2185     PetscCall(VecGetLocalSize(x, &nl));
2186     PetscCall(VecGetArrayRead(x, &ax));
2187     PetscCall(PetscArraycpy(ctx->work, ax, nl));
2188     PetscCall(VecRestoreArrayRead(x, &ax));
2189     for (i = 0; i < ctx->benign_n; i++) {
2190       PetscScalar     sum, val;
2191       const PetscInt *idxs;
2192       PetscInt        nz, j;
2193       PetscCall(ISGetLocalSize(ctx->benign_zerodiag_subs[i], &nz));
2194       PetscCall(ISGetIndices(ctx->benign_zerodiag_subs[i], &idxs));
2195       sum = 0.;
2196       if (ctx->apply_p0) {
2197         val = ctx->work[idxs[nz - 1]];
2198         for (j = 0; j < nz - 1; j++) {
2199           sum += ctx->work[idxs[j]];
2200           ctx->work[idxs[j]] += val;
2201         }
2202       } else {
2203         for (j = 0; j < nz - 1; j++) sum += ctx->work[idxs[j]];
2204       }
2205       ctx->work[idxs[nz - 1]] -= sum;
2206       PetscCall(ISRestoreIndices(ctx->benign_zerodiag_subs[i], &idxs));
2207     }
2208     PetscCall(VecPlaceArray(x, ctx->work));
2209     reset_x = PETSC_TRUE;
2210   }
2211   if (transpose) {
2212     PetscCall(MatMultTranspose(ctx->A, x, y));
2213   } else {
2214     PetscCall(MatMult(ctx->A, x, y));
2215   }
2216   if (reset_x) PetscCall(VecResetArray(x));
2217   if (apply_left) {
2218     PetscScalar *ay;
2219     PetscInt     i;
2220 
2221     PetscCall(VecGetArray(y, &ay));
2222     for (i = 0; i < ctx->benign_n; i++) {
2223       PetscScalar     sum, val;
2224       const PetscInt *idxs;
2225       PetscInt        nz, j;
2226       PetscCall(ISGetLocalSize(ctx->benign_zerodiag_subs[i], &nz));
2227       PetscCall(ISGetIndices(ctx->benign_zerodiag_subs[i], &idxs));
2228       val = -ay[idxs[nz - 1]];
2229       if (ctx->apply_p0) {
2230         sum = 0.;
2231         for (j = 0; j < nz - 1; j++) {
2232           sum += ay[idxs[j]];
2233           ay[idxs[j]] += val;
2234         }
2235         ay[idxs[nz - 1]] += sum;
2236       } else {
2237         for (j = 0; j < nz - 1; j++) ay[idxs[j]] += val;
2238         ay[idxs[nz - 1]] = 0.;
2239       }
2240       PetscCall(ISRestoreIndices(ctx->benign_zerodiag_subs[i], &idxs));
2241     }
2242     PetscCall(VecRestoreArray(y, &ay));
2243   }
2244   PetscFunctionReturn(PETSC_SUCCESS);
2245 }
2246 
PCBDDCBenignMatMultTranspose_Private(Mat A,Vec x,Vec y)2247 static PetscErrorCode PCBDDCBenignMatMultTranspose_Private(Mat A, Vec x, Vec y)
2248 {
2249   PetscFunctionBegin;
2250   PetscCall(PCBDDCBenignMatMult_Private_Private(A, x, y, PETSC_TRUE));
2251   PetscFunctionReturn(PETSC_SUCCESS);
2252 }
2253 
PCBDDCBenignMatMult_Private(Mat A,Vec x,Vec y)2254 static PetscErrorCode PCBDDCBenignMatMult_Private(Mat A, Vec x, Vec y)
2255 {
2256   PetscFunctionBegin;
2257   PetscCall(PCBDDCBenignMatMult_Private_Private(A, x, y, PETSC_FALSE));
2258   PetscFunctionReturn(PETSC_SUCCESS);
2259 }
2260 
PCBDDCBenignShellMat(PC pc,PetscBool restore)2261 PetscErrorCode PCBDDCBenignShellMat(PC pc, PetscBool restore)
2262 {
2263   PC_IS                  *pcis   = (PC_IS *)pc->data;
2264   PC_BDDC                *pcbddc = (PC_BDDC *)pc->data;
2265   PCBDDCBenignMatMult_ctx ctx;
2266 
2267   PetscFunctionBegin;
2268   if (!restore) {
2269     Mat                A_IB, A_BI;
2270     PetscScalar       *work;
2271     PCBDDCReuseSolvers reuse = pcbddc->sub_schurs ? pcbddc->sub_schurs->reuse_solver : NULL;
2272 
2273     PetscCheck(!pcbddc->benign_original_mat, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Benign original mat has not been restored");
2274     if (!pcbddc->benign_change || !pcbddc->benign_n || pcbddc->benign_change_explicit) PetscFunctionReturn(PETSC_SUCCESS);
2275     PetscCall(PetscMalloc1(pcis->n, &work));
2276     PetscCall(MatCreate(PETSC_COMM_SELF, &A_IB));
2277     PetscCall(MatSetSizes(A_IB, pcis->n - pcis->n_B, pcis->n_B, PETSC_DECIDE, PETSC_DECIDE));
2278     PetscCall(MatSetType(A_IB, MATSHELL));
2279     PetscCall(MatShellSetOperation(A_IB, MATOP_MULT, (PetscErrorCodeFn *)PCBDDCBenignMatMult_Private));
2280     PetscCall(MatShellSetOperation(A_IB, MATOP_MULT_TRANSPOSE, (PetscErrorCodeFn *)PCBDDCBenignMatMultTranspose_Private));
2281     PetscCall(PetscNew(&ctx));
2282     PetscCall(MatShellSetContext(A_IB, ctx));
2283     ctx->apply_left  = PETSC_TRUE;
2284     ctx->apply_right = PETSC_FALSE;
2285     ctx->apply_p0    = PETSC_FALSE;
2286     ctx->benign_n    = pcbddc->benign_n;
2287     if (reuse) {
2288       ctx->benign_zerodiag_subs = reuse->benign_zerodiag_subs;
2289       ctx->free                 = PETSC_FALSE;
2290     } else { /* TODO: could be optimized for successive solves */
2291       ISLocalToGlobalMapping N_to_D;
2292       PetscInt               i;
2293 
2294       PetscCall(ISLocalToGlobalMappingCreateIS(pcis->is_I_local, &N_to_D));
2295       PetscCall(PetscMalloc1(pcbddc->benign_n, &ctx->benign_zerodiag_subs));
2296       for (i = 0; i < pcbddc->benign_n; i++) PetscCall(ISGlobalToLocalMappingApplyIS(N_to_D, IS_GTOLM_DROP, pcbddc->benign_zerodiag_subs[i], &ctx->benign_zerodiag_subs[i]));
2297       PetscCall(ISLocalToGlobalMappingDestroy(&N_to_D));
2298       ctx->free = PETSC_TRUE;
2299     }
2300     ctx->A    = pcis->A_IB;
2301     ctx->work = work;
2302     PetscCall(MatSetUp(A_IB));
2303     PetscCall(MatAssemblyBegin(A_IB, MAT_FINAL_ASSEMBLY));
2304     PetscCall(MatAssemblyEnd(A_IB, MAT_FINAL_ASSEMBLY));
2305     pcis->A_IB = A_IB;
2306 
2307     /* A_BI as A_IB^T */
2308     PetscCall(MatCreateTranspose(A_IB, &A_BI));
2309     pcbddc->benign_original_mat = pcis->A_BI;
2310     pcis->A_BI                  = A_BI;
2311   } else {
2312     if (!pcbddc->benign_original_mat) PetscFunctionReturn(PETSC_SUCCESS);
2313     PetscCall(MatShellGetContext(pcis->A_IB, &ctx));
2314     PetscCall(MatDestroy(&pcis->A_IB));
2315     pcis->A_IB = ctx->A;
2316     ctx->A     = NULL;
2317     PetscCall(MatDestroy(&pcis->A_BI));
2318     pcis->A_BI                  = pcbddc->benign_original_mat;
2319     pcbddc->benign_original_mat = NULL;
2320     if (ctx->free) {
2321       PetscInt i;
2322       for (i = 0; i < ctx->benign_n; i++) PetscCall(ISDestroy(&ctx->benign_zerodiag_subs[i]));
2323       PetscCall(PetscFree(ctx->benign_zerodiag_subs));
2324     }
2325     PetscCall(PetscFree(ctx->work));
2326     PetscCall(PetscFree(ctx));
2327   }
2328   PetscFunctionReturn(PETSC_SUCCESS);
2329 }
2330 
2331 /* used just in bddc debug mode */
PCBDDCBenignProject(PC pc,IS is1,IS is2,Mat * B)2332 static PetscErrorCode PCBDDCBenignProject(PC pc, IS is1, IS is2, Mat *B)
2333 {
2334   PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
2335   Mat_IS  *matis  = (Mat_IS *)pc->pmat->data;
2336   Mat      An;
2337 
2338   PetscFunctionBegin;
2339   PetscCall(MatPtAP(matis->A, pcbddc->benign_change, MAT_INITIAL_MATRIX, 2.0, &An));
2340   PetscCall(MatZeroRowsColumns(An, pcbddc->benign_n, pcbddc->benign_p0_lidx, 1.0, NULL, NULL));
2341   if (is1) {
2342     PetscCall(MatCreateSubMatrix(An, is1, is2, MAT_INITIAL_MATRIX, B));
2343     PetscCall(MatDestroy(&An));
2344   } else {
2345     *B = An;
2346   }
2347   PetscFunctionReturn(PETSC_SUCCESS);
2348 }
2349 
2350 /* TODO: add reuse flag */
MatSeqAIJCompress(Mat A,Mat * B)2351 PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B)
2352 {
2353   Mat             Bt;
2354   PetscScalar    *a, *bdata;
2355   const PetscInt *ii, *ij;
2356   PetscInt        m, n, i, nnz, *bii, *bij;
2357   PetscBool       flg_row;
2358 
2359   PetscFunctionBegin;
2360   PetscCall(MatGetSize(A, &n, &m));
2361   PetscCall(MatGetRowIJ(A, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, &ij, &flg_row));
2362   PetscCall(MatSeqAIJGetArray(A, &a));
2363   nnz = n;
2364   for (i = 0; i < ii[n]; i++) {
2365     if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++;
2366   }
2367   PetscCall(PetscMalloc1(n + 1, &bii));
2368   PetscCall(PetscMalloc1(nnz, &bij));
2369   PetscCall(PetscMalloc1(nnz, &bdata));
2370   nnz    = 0;
2371   bii[0] = 0;
2372   for (i = 0; i < n; i++) {
2373     PetscInt j;
2374     for (j = ii[i]; j < ii[i + 1]; j++) {
2375       PetscScalar entry = a[j];
2376       if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || (n == m && ij[j] == i)) {
2377         bij[nnz]   = ij[j];
2378         bdata[nnz] = entry;
2379         nnz++;
2380       }
2381     }
2382     bii[i + 1] = nnz;
2383   }
2384   PetscCall(MatSeqAIJRestoreArray(A, &a));
2385   PetscCall(MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A), n, m, bii, bij, bdata, &Bt));
2386   PetscCall(MatRestoreRowIJ(A, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, &ij, &flg_row));
2387   {
2388     Mat_SeqAIJ *b = (Mat_SeqAIJ *)Bt->data;
2389     b->free_a     = PETSC_TRUE;
2390     b->free_ij    = PETSC_TRUE;
2391   }
2392   if (*B == A) PetscCall(MatDestroy(&A));
2393   *B = Bt;
2394   PetscFunctionReturn(PETSC_SUCCESS);
2395 }
2396 
PCBDDCDetectDisconnectedComponents(PC pc,PetscBool filter,PetscInt * ncc,IS * cc[],IS * primalv)2397 PetscErrorCode PCBDDCDetectDisconnectedComponents(PC pc, PetscBool filter, PetscInt *ncc, IS *cc[], IS *primalv)
2398 {
2399   Mat                    B = NULL;
2400   DM                     dm;
2401   IS                     is_dummy, *cc_n;
2402   ISLocalToGlobalMapping l2gmap_dummy;
2403   PCBDDCGraph            graph;
2404   PetscInt              *xadj_filtered = NULL, *adjncy_filtered = NULL;
2405   PetscInt               i, n;
2406   PetscInt              *xadj, *adjncy;
2407   PetscBool              isplex = PETSC_FALSE;
2408 
2409   PetscFunctionBegin;
2410   if (ncc) *ncc = 0;
2411   if (cc) *cc = NULL;
2412   if (primalv) *primalv = NULL;
2413   PetscCall(PCBDDCGraphCreate(&graph));
2414   PetscCall(MatGetDM(pc->pmat, &dm));
2415   if (!dm) PetscCall(PCGetDM(pc, &dm));
2416   if (dm) PetscCall(PetscObjectTypeCompareAny((PetscObject)dm, &isplex, DMPLEX, DMP4EST, DMP8EST, ""));
2417   if (filter) isplex = PETSC_FALSE;
2418 
2419   if (isplex) { /* this code has been modified from plexpartition.c */
2420     PetscInt        p, pStart, pEnd, a, adjSize, idx, size, nroots;
2421     PetscInt       *adj = NULL;
2422     IS              cellNumbering;
2423     const PetscInt *cellNum;
2424     PetscBool       useCone, useClosure;
2425     PetscSection    section;
2426     PetscSegBuffer  adjBuffer;
2427     PetscSF         sfPoint;
2428 
2429     PetscCall(DMConvert(dm, DMPLEX, &dm));
2430     PetscCall(DMPlexGetHeightStratum(dm, 0, &pStart, &pEnd));
2431     PetscCall(DMGetPointSF(dm, &sfPoint));
2432     PetscCall(PetscSFGetGraph(sfPoint, &nroots, NULL, NULL, NULL));
2433     /* Build adjacency graph via a section/segbuffer */
2434     PetscCall(PetscSectionCreate(PetscObjectComm((PetscObject)dm), &section));
2435     PetscCall(PetscSectionSetChart(section, pStart, pEnd));
2436     PetscCall(PetscSegBufferCreate(sizeof(PetscInt), 1000, &adjBuffer));
2437     /* Always use FVM adjacency to create partitioner graph */
2438     PetscCall(DMGetBasicAdjacency(dm, &useCone, &useClosure));
2439     PetscCall(DMSetBasicAdjacency(dm, PETSC_TRUE, PETSC_FALSE));
2440     PetscCall(DMPlexGetCellNumbering(dm, &cellNumbering));
2441     PetscCall(ISGetIndices(cellNumbering, &cellNum));
2442     for (n = 0, p = pStart; p < pEnd; p++) {
2443       /* Skip non-owned cells in parallel (ParMetis expects no overlap) */
2444       if (nroots > 0) {
2445         if (cellNum[p] < 0) continue;
2446       }
2447       adjSize = PETSC_DETERMINE;
2448       PetscCall(DMPlexGetAdjacency(dm, p, &adjSize, &adj));
2449       for (a = 0; a < adjSize; ++a) {
2450         const PetscInt point = adj[a];
2451         if (pStart <= point && point < pEnd) {
2452           PetscInt *PETSC_RESTRICT pBuf;
2453           PetscCall(PetscSectionAddDof(section, p, 1));
2454           PetscCall(PetscSegBufferGetInts(adjBuffer, 1, &pBuf));
2455           *pBuf = point;
2456         }
2457       }
2458       n++;
2459     }
2460     PetscCall(DMSetBasicAdjacency(dm, useCone, useClosure));
2461     /* Derive CSR graph from section/segbuffer */
2462     PetscCall(PetscSectionSetUp(section));
2463     PetscCall(PetscSectionGetStorageSize(section, &size));
2464     PetscCall(PetscMalloc1(n + 1, &xadj));
2465     for (idx = 0, p = pStart; p < pEnd; p++) {
2466       if (nroots > 0) {
2467         if (cellNum[p] < 0) continue;
2468       }
2469       PetscCall(PetscSectionGetOffset(section, p, &xadj[idx++]));
2470     }
2471     xadj[n] = size;
2472     PetscCall(PetscSegBufferExtractAlloc(adjBuffer, &adjncy));
2473     /* Clean up */
2474     PetscCall(PetscSegBufferDestroy(&adjBuffer));
2475     PetscCall(PetscSectionDestroy(&section));
2476     PetscCall(PetscFree(adj));
2477     graph->xadj   = xadj;
2478     graph->adjncy = adjncy;
2479   } else {
2480     Mat       A;
2481     PetscBool isseqaij, flg_row;
2482 
2483     PetscCall(MatISGetLocalMat(pc->pmat, &A));
2484     if (!A->rmap->N || !A->cmap->N) {
2485       PetscCall(PCBDDCGraphDestroy(&graph));
2486       PetscFunctionReturn(PETSC_SUCCESS);
2487     }
2488     PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &isseqaij));
2489     if (!isseqaij && filter) {
2490       PetscBool isseqdense;
2491 
2492       PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQDENSE, &isseqdense));
2493       if (!isseqdense) {
2494         PetscCall(MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &B));
2495       } else { /* TODO: rectangular case and LDA */
2496         PetscScalar *array;
2497         PetscReal    chop = 1.e-6;
2498 
2499         PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &B));
2500         PetscCall(MatDenseGetArray(B, &array));
2501         PetscCall(MatGetSize(B, &n, NULL));
2502         for (i = 0; i < n; i++) {
2503           PetscInt j;
2504           for (j = i + 1; j < n; j++) {
2505             PetscReal thresh = chop * (PetscAbsScalar(array[i * (n + 1)]) + PetscAbsScalar(array[j * (n + 1)]));
2506             if (PetscAbsScalar(array[i * n + j]) < thresh) array[i * n + j] = 0.;
2507             if (PetscAbsScalar(array[j * n + i]) < thresh) array[j * n + i] = 0.;
2508           }
2509         }
2510         PetscCall(MatDenseRestoreArray(B, &array));
2511         PetscCall(MatConvert(B, MATSEQAIJ, MAT_INPLACE_MATRIX, &B));
2512       }
2513     } else {
2514       PetscCall(PetscObjectReference((PetscObject)A));
2515       B = A;
2516     }
2517     PetscCall(MatGetRowIJ(B, 0, PETSC_TRUE, PETSC_FALSE, &n, (const PetscInt **)&xadj, (const PetscInt **)&adjncy, &flg_row));
2518 
2519     /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */
2520     if (filter) {
2521       PetscScalar *data;
2522       PetscInt     j, cum;
2523 
2524       PetscCall(PetscCalloc2(n + 1, &xadj_filtered, xadj[n], &adjncy_filtered));
2525       PetscCall(MatSeqAIJGetArray(B, &data));
2526       cum = 0;
2527       for (i = 0; i < n; i++) {
2528         PetscInt t;
2529 
2530         for (j = xadj[i]; j < xadj[i + 1]; j++) {
2531           if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) continue;
2532           adjncy_filtered[cum + xadj_filtered[i]++] = adjncy[j];
2533         }
2534         t                = xadj_filtered[i];
2535         xadj_filtered[i] = cum;
2536         cum += t;
2537       }
2538       PetscCall(MatSeqAIJRestoreArray(B, &data));
2539       graph->xadj   = xadj_filtered;
2540       graph->adjncy = adjncy_filtered;
2541     } else {
2542       graph->xadj   = xadj;
2543       graph->adjncy = adjncy;
2544     }
2545   }
2546   /* compute local connected components using PCBDDCGraph */
2547   graph->seq_graph = PETSC_TRUE; /* analyze local connected components (i.e. disconnected subdomains) irrespective of dofs count */
2548   PetscCall(ISCreateStride(PETSC_COMM_SELF, n, 0, 1, &is_dummy));
2549   PetscCall(ISLocalToGlobalMappingCreateIS(is_dummy, &l2gmap_dummy));
2550   PetscCall(ISDestroy(&is_dummy));
2551   PetscCall(PCBDDCGraphInit(graph, l2gmap_dummy, n, PETSC_INT_MAX));
2552   PetscCall(ISLocalToGlobalMappingDestroy(&l2gmap_dummy));
2553   PetscCall(PCBDDCGraphSetUp(graph, 1, NULL, NULL, 0, NULL, NULL));
2554   PetscCall(PCBDDCGraphComputeConnectedComponents(graph));
2555 
2556   /* partial clean up */
2557   PetscCall(PetscFree2(xadj_filtered, adjncy_filtered));
2558   if (B) {
2559     PetscBool flg_row;
2560     PetscCall(MatRestoreRowIJ(B, 0, PETSC_TRUE, PETSC_FALSE, &n, (const PetscInt **)&xadj, (const PetscInt **)&adjncy, &flg_row));
2561     PetscCall(MatDestroy(&B));
2562   }
2563   if (isplex) {
2564     PetscCall(PetscFree(xadj));
2565     PetscCall(PetscFree(adjncy));
2566   }
2567 
2568   /* get back data */
2569   if (isplex) {
2570     if (ncc) *ncc = graph->ncc;
2571     if (cc || primalv) {
2572       Mat          A;
2573       PetscBT      btv, btvt, btvc;
2574       PetscSection subSection;
2575       PetscInt    *ids, cum, cump, *cids, *pids;
2576       PetscInt     dim, cStart, cEnd, fStart, fEnd, vStart, vEnd, pStart, pEnd;
2577 
2578       PetscCall(DMGetDimension(dm, &dim));
2579       PetscCall(DMPlexGetSubdomainSection(dm, &subSection));
2580       PetscCall(DMPlexGetHeightStratum(dm, 1, &fStart, &fEnd));
2581       PetscCall(DMPlexGetHeightStratum(dm, 0, &cStart, &cEnd));
2582       PetscCall(DMPlexGetDepthStratum(dm, 0, &vStart, &vEnd));
2583       PetscCall(DMPlexGetChart(dm, &pStart, &pEnd));
2584       PetscCall(MatISGetLocalMat(pc->pmat, &A));
2585       PetscCall(PetscMalloc3(A->rmap->n, &ids, graph->ncc + 1, &cids, A->rmap->n, &pids));
2586       PetscCall(PetscBTCreate(A->rmap->n, &btv));
2587       PetscCall(PetscBTCreate(A->rmap->n, &btvt));
2588       PetscCall(PetscBTCreate(pEnd - pStart, &btvc));
2589 
2590       /* First see if we find corners for the subdomains, i.e. a vertex
2591          shared by at least dim subdomain boundary faces. This does not
2592          cover all the possible cases with simplices but it is enough
2593          for tensor cells */
2594       if (vStart != fStart && dim <= 3) {
2595         for (PetscInt c = cStart; c < cEnd; c++) {
2596           PetscInt        nf, cnt = 0, mcnt = dim, *cfaces;
2597           const PetscInt *faces;
2598 
2599           PetscCall(DMPlexGetConeSize(dm, c, &nf));
2600           PetscCall(DMGetWorkArray(dm, nf, MPIU_INT, &cfaces));
2601           PetscCall(DMPlexGetCone(dm, c, &faces));
2602           for (PetscInt f = 0; f < nf; f++) {
2603             PetscInt nc, ff;
2604 
2605             PetscCall(DMPlexGetSupportSize(dm, faces[f], &nc));
2606             PetscCall(DMPlexGetTreeParent(dm, faces[f], &ff, NULL));
2607             if (nc == 1 && faces[f] == ff) cfaces[cnt++] = faces[f];
2608           }
2609           if (cnt >= mcnt) {
2610             PetscInt size, *closure = NULL;
2611 
2612             PetscCall(DMPlexGetTransitiveClosure(dm, c, PETSC_TRUE, &size, &closure));
2613             for (PetscInt k = 0; k < 2 * size; k += 2) {
2614               PetscInt v = closure[k];
2615               if (v >= vStart && v < vEnd) {
2616                 PetscInt vsize, *vclosure = NULL;
2617 
2618                 cnt = 0;
2619                 PetscCall(DMPlexGetTransitiveClosure(dm, v, PETSC_FALSE, &vsize, &vclosure));
2620                 for (PetscInt vk = 0; vk < 2 * vsize; vk += 2) {
2621                   PetscInt f = vclosure[vk];
2622                   if (f >= fStart && f < fEnd) {
2623                     PetscInt  nc, ff;
2624                     PetscBool valid = PETSC_FALSE;
2625 
2626                     for (PetscInt fk = 0; fk < nf; fk++)
2627                       if (f == cfaces[fk]) valid = PETSC_TRUE;
2628                     if (!valid) continue;
2629                     PetscCall(DMPlexGetSupportSize(dm, f, &nc));
2630                     PetscCall(DMPlexGetTreeParent(dm, f, &ff, NULL));
2631                     if (nc == 1 && f == ff) cnt++;
2632                   }
2633                 }
2634                 if (cnt >= mcnt) PetscCall(PetscBTSet(btvc, v - pStart));
2635                 PetscCall(DMPlexRestoreTransitiveClosure(dm, v, PETSC_FALSE, &vsize, &vclosure));
2636               }
2637             }
2638             PetscCall(DMPlexRestoreTransitiveClosure(dm, c, PETSC_TRUE, &size, &closure));
2639           }
2640           PetscCall(DMRestoreWorkArray(dm, nf, MPIU_INT, &cfaces));
2641         }
2642       }
2643 
2644       cids[0] = 0;
2645       for (i = 0, cump = 0, cum = 0; i < graph->ncc; i++) {
2646         PetscInt j;
2647 
2648         PetscCall(PetscBTMemzero(A->rmap->n, btvt));
2649         for (j = graph->cptr[i]; j < graph->cptr[i + 1]; j++) {
2650           PetscInt k, size, *closure = NULL, cell = graph->queue[j];
2651 
2652           PetscCall(DMPlexGetTransitiveClosure(dm, cell, PETSC_TRUE, &size, &closure));
2653           for (k = 0; k < 2 * size; k += 2) {
2654             PetscInt s, pp, p = closure[k], off, dof, cdof;
2655 
2656             PetscCall(PetscSectionGetConstraintDof(subSection, p, &cdof));
2657             PetscCall(PetscSectionGetOffset(subSection, p, &off));
2658             PetscCall(PetscSectionGetDof(subSection, p, &dof));
2659             for (s = 0; s < dof - cdof; s++) {
2660               if (PetscBTLookupSet(btvt, off + s)) continue;
2661               if (PetscBTLookup(btvc, p - pStart)) pids[cump++] = off + s; /* subdomain corner */
2662               else if (!PetscBTLookup(btv, off + s)) ids[cum++] = off + s;
2663               else pids[cump++] = off + s; /* cross-vertex */
2664             }
2665             PetscCall(DMPlexGetTreeParent(dm, p, &pp, NULL));
2666             if (pp != p) {
2667               PetscCall(PetscSectionGetConstraintDof(subSection, pp, &cdof));
2668               PetscCall(PetscSectionGetOffset(subSection, pp, &off));
2669               PetscCall(PetscSectionGetDof(subSection, pp, &dof));
2670               for (s = 0; s < dof - cdof; s++) {
2671                 if (PetscBTLookupSet(btvt, off + s)) continue;
2672                 if (PetscBTLookup(btvc, pp - pStart)) pids[cump++] = off + s; /* subdomain corner */
2673                 else if (!PetscBTLookup(btv, off + s)) ids[cum++] = off + s;
2674                 else pids[cump++] = off + s; /* cross-vertex */
2675               }
2676             }
2677           }
2678           PetscCall(DMPlexRestoreTransitiveClosure(dm, cell, PETSC_TRUE, &size, &closure));
2679         }
2680         cids[i + 1] = cum;
2681         /* mark dofs as already assigned */
2682         for (j = cids[i]; j < cids[i + 1]; j++) PetscCall(PetscBTSet(btv, ids[j]));
2683       }
2684       if (cc) {
2685         PetscCall(PetscMalloc1(graph->ncc, &cc_n));
2686         for (i = 0; i < graph->ncc; i++) PetscCall(ISCreateGeneral(PETSC_COMM_SELF, cids[i + 1] - cids[i], ids + cids[i], PETSC_COPY_VALUES, &cc_n[i]));
2687         *cc = cc_n;
2688       }
2689       if (primalv) PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), cump, pids, PETSC_COPY_VALUES, primalv));
2690       PetscCall(PetscFree3(ids, cids, pids));
2691       PetscCall(PetscBTDestroy(&btv));
2692       PetscCall(PetscBTDestroy(&btvt));
2693       PetscCall(PetscBTDestroy(&btvc));
2694       PetscCall(DMDestroy(&dm));
2695     }
2696   } else {
2697     if (ncc) *ncc = graph->ncc;
2698     if (cc) {
2699       PetscCall(PetscMalloc1(graph->ncc, &cc_n));
2700       for (i = 0; i < graph->ncc; i++) PetscCall(ISCreateGeneral(PETSC_COMM_SELF, graph->cptr[i + 1] - graph->cptr[i], graph->queue + graph->cptr[i], PETSC_COPY_VALUES, &cc_n[i]));
2701       *cc = cc_n;
2702     }
2703   }
2704   /* clean up graph */
2705   graph->xadj   = NULL;
2706   graph->adjncy = NULL;
2707   PetscCall(PCBDDCGraphDestroy(&graph));
2708   PetscFunctionReturn(PETSC_SUCCESS);
2709 }
2710 
PCBDDCBenignCheck(PC pc,IS zerodiag)2711 PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag)
2712 {
2713   PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
2714   PC_IS   *pcis   = (PC_IS *)pc->data;
2715   IS       dirIS  = NULL;
2716   PetscInt i;
2717 
2718   PetscFunctionBegin;
2719   PetscCall(PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph, &dirIS));
2720   if (zerodiag) {
2721     Mat             A;
2722     Vec             vec3_N;
2723     PetscScalar    *vals;
2724     const PetscInt *idxs;
2725     PetscInt        nz, *count;
2726 
2727     /* p0 */
2728     PetscCall(VecSet(pcis->vec1_N, 0.));
2729     PetscCall(PetscMalloc1(pcis->n, &vals));
2730     PetscCall(ISGetLocalSize(zerodiag, &nz));
2731     PetscCall(ISGetIndices(zerodiag, &idxs));
2732     for (i = 0; i < nz; i++) vals[i] = 1.;
2733     PetscCall(VecSetValues(pcis->vec1_N, nz, idxs, vals, INSERT_VALUES));
2734     PetscCall(VecAssemblyBegin(pcis->vec1_N));
2735     PetscCall(VecAssemblyEnd(pcis->vec1_N));
2736     /* v_I */
2737     PetscCall(VecSetRandom(pcis->vec2_N, NULL));
2738     for (i = 0; i < nz; i++) vals[i] = 0.;
2739     PetscCall(VecSetValues(pcis->vec2_N, nz, idxs, vals, INSERT_VALUES));
2740     PetscCall(ISRestoreIndices(zerodiag, &idxs));
2741     PetscCall(ISGetIndices(pcis->is_B_local, &idxs));
2742     for (i = 0; i < pcis->n_B; i++) vals[i] = 0.;
2743     PetscCall(VecSetValues(pcis->vec2_N, pcis->n_B, idxs, vals, INSERT_VALUES));
2744     PetscCall(ISRestoreIndices(pcis->is_B_local, &idxs));
2745     if (dirIS) {
2746       PetscInt n;
2747 
2748       PetscCall(ISGetLocalSize(dirIS, &n));
2749       PetscCall(ISGetIndices(dirIS, &idxs));
2750       for (i = 0; i < n; i++) vals[i] = 0.;
2751       PetscCall(VecSetValues(pcis->vec2_N, n, idxs, vals, INSERT_VALUES));
2752       PetscCall(ISRestoreIndices(dirIS, &idxs));
2753     }
2754     PetscCall(VecAssemblyBegin(pcis->vec2_N));
2755     PetscCall(VecAssemblyEnd(pcis->vec2_N));
2756     PetscCall(VecDuplicate(pcis->vec1_N, &vec3_N));
2757     PetscCall(VecSet(vec3_N, 0.));
2758     PetscCall(MatISGetLocalMat(pc->pmat, &A));
2759     PetscCall(MatMult(A, pcis->vec1_N, vec3_N));
2760     PetscCall(VecDot(vec3_N, pcis->vec2_N, &vals[0]));
2761     PetscCheck(PetscAbsScalar(vals[0]) <= 1.e-1, PETSC_COMM_SELF, PETSC_ERR_SUP, "Benign trick can not be applied! b(v_I,p_0) = %1.6e (should be numerically 0.)", (double)PetscAbsScalar(vals[0]));
2762     PetscCall(PetscFree(vals));
2763     PetscCall(VecDestroy(&vec3_N));
2764 
2765     /* there should not be any pressure dofs lying on the interface */
2766     PetscCall(PetscCalloc1(pcis->n, &count));
2767     PetscCall(ISGetIndices(pcis->is_B_local, &idxs));
2768     for (i = 0; i < pcis->n_B; i++) count[idxs[i]]++;
2769     PetscCall(ISRestoreIndices(pcis->is_B_local, &idxs));
2770     PetscCall(ISGetIndices(zerodiag, &idxs));
2771     for (i = 0; i < nz; i++) PetscCheck(!count[idxs[i]], PETSC_COMM_SELF, PETSC_ERR_SUP, "Benign trick can not be applied! pressure dof %" PetscInt_FMT " is an interface dof", idxs[i]);
2772     PetscCall(ISRestoreIndices(zerodiag, &idxs));
2773     PetscCall(PetscFree(count));
2774   }
2775   PetscCall(ISDestroy(&dirIS));
2776 
2777   /* check PCBDDCBenignGetOrSetP0 */
2778   PetscCall(VecSetRandom(pcis->vec1_global, NULL));
2779   for (i = 0; i < pcbddc->benign_n; i++) pcbddc->benign_p0[i] = -PetscGlobalRank - i;
2780   PetscCall(PCBDDCBenignGetOrSetP0(pc, pcis->vec1_global, PETSC_FALSE));
2781   for (i = 0; i < pcbddc->benign_n; i++) pcbddc->benign_p0[i] = 1;
2782   PetscCall(PCBDDCBenignGetOrSetP0(pc, pcis->vec1_global, PETSC_TRUE));
2783   for (i = 0; i < pcbddc->benign_n; i++) {
2784     PetscInt val = PetscRealPart(pcbddc->benign_p0[i]);
2785     PetscCheck(val == -PetscGlobalRank - i, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error testing PCBDDCBenignGetOrSetP0! Found %g at %" PetscInt_FMT " instead of %g", (double)PetscRealPart(pcbddc->benign_p0[i]), i, (double)(-PetscGlobalRank - i));
2786   }
2787   PetscFunctionReturn(PETSC_SUCCESS);
2788 }
2789 
PCBDDCBenignDetectSaddlePoint(PC pc,PetscBool reuse,IS * zerodiaglocal)2790 PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, PetscBool reuse, IS *zerodiaglocal)
2791 {
2792   PC_BDDC  *pcbddc    = (PC_BDDC *)pc->data;
2793   Mat_IS   *matis     = (Mat_IS *)pc->pmat->data;
2794   IS        pressures = NULL, zerodiag = NULL, *bzerodiag = NULL, zerodiag_save, *zerodiag_subs;
2795   PetscInt  nz, n, benign_n, bsp = 1;
2796   PetscInt *interior_dofs, n_interior_dofs, nneu;
2797   PetscBool sorted, have_null, has_null_pressures, recompute_zerodiag, checkb;
2798 
2799   PetscFunctionBegin;
2800   if (reuse) goto project_b0;
2801   PetscCall(PetscSFDestroy(&pcbddc->benign_sf));
2802   PetscCall(MatDestroy(&pcbddc->benign_B0));
2803   for (n = 0; n < pcbddc->benign_n; n++) PetscCall(ISDestroy(&pcbddc->benign_zerodiag_subs[n]));
2804   PetscCall(PetscFree(pcbddc->benign_zerodiag_subs));
2805   has_null_pressures = PETSC_TRUE;
2806   have_null          = PETSC_TRUE;
2807   /* if a local information on dofs is present, gets pressure dofs from command line (uses the last field is not provided)
2808      Without local information, it uses only the zerodiagonal dofs (ok if the pressure block is all zero and it is a scalar field)
2809      Checks if all the pressure dofs in each subdomain have a zero diagonal
2810      If not, a change of basis on pressures is not needed
2811      since the local Schur complements are already SPD
2812   */
2813   if (pcbddc->n_ISForDofsLocal) {
2814     IS        iP = NULL;
2815     PetscInt  p, *pp;
2816     PetscBool flg, blocked = PETSC_FALSE;
2817 
2818     PetscCall(PetscMalloc1(pcbddc->n_ISForDofsLocal, &pp));
2819     n = pcbddc->n_ISForDofsLocal;
2820     PetscOptionsBegin(PetscObjectComm((PetscObject)pc), ((PetscObject)pc)->prefix, "BDDC benign options", "PC");
2821     PetscCall(PetscOptionsIntArray("-pc_bddc_pressure_field", "Field id for pressures", NULL, pp, &n, &flg));
2822     PetscCall(PetscOptionsBool("-pc_bddc_pressure_blocked", "Use blocked pressure fields", NULL, blocked, &blocked, NULL));
2823     PetscOptionsEnd();
2824     if (!flg) {
2825       n     = 1;
2826       pp[0] = pcbddc->n_ISForDofsLocal - 1;
2827     }
2828 
2829     bsp = 0;
2830     for (p = 0; p < n; p++) {
2831       PetscInt bs = 1;
2832 
2833       PetscCheck(pp[p] >= 0 && pp[p] < pcbddc->n_ISForDofsLocal, PetscObjectComm((PetscObject)pc), PETSC_ERR_USER, "Invalid field id for pressures %" PetscInt_FMT, pp[p]);
2834       if (blocked) PetscCall(ISGetBlockSize(pcbddc->ISForDofsLocal[pp[p]], &bs));
2835       bsp += bs;
2836     }
2837     PetscCall(PetscMalloc1(bsp, &bzerodiag));
2838     bsp = 0;
2839     for (p = 0; p < n; p++) {
2840       const PetscInt *idxs;
2841       PetscInt        b, bs = 1, npl, *bidxs;
2842 
2843       if (blocked) PetscCall(ISGetBlockSize(pcbddc->ISForDofsLocal[pp[p]], &bs));
2844       PetscCall(ISGetLocalSize(pcbddc->ISForDofsLocal[pp[p]], &npl));
2845       PetscCall(ISGetIndices(pcbddc->ISForDofsLocal[pp[p]], &idxs));
2846       PetscCall(PetscMalloc1(npl / bs, &bidxs));
2847       for (b = 0; b < bs; b++) {
2848         PetscInt i;
2849 
2850         for (i = 0; i < npl / bs; i++) bidxs[i] = idxs[bs * i + b];
2851         PetscCall(ISCreateGeneral(PETSC_COMM_SELF, npl / bs, bidxs, PETSC_COPY_VALUES, &bzerodiag[bsp]));
2852         bsp++;
2853       }
2854       PetscCall(PetscFree(bidxs));
2855       PetscCall(ISRestoreIndices(pcbddc->ISForDofsLocal[pp[p]], &idxs));
2856     }
2857     PetscCall(ISConcatenate(PETSC_COMM_SELF, bsp, bzerodiag, &pressures));
2858 
2859     /* remove zeroed out pressures if we are setting up a BDDC solver for a saddle-point FETI-DP */
2860     PetscCall(PetscObjectQuery((PetscObject)pc, "__KSPFETIDP_lP", (PetscObject *)&iP));
2861     if (iP) {
2862       IS newpressures;
2863 
2864       PetscCall(ISDifference(pressures, iP, &newpressures));
2865       PetscCall(ISDestroy(&pressures));
2866       pressures = newpressures;
2867     }
2868     PetscCall(ISSorted(pressures, &sorted));
2869     if (!sorted) PetscCall(ISSort(pressures));
2870     PetscCall(PetscFree(pp));
2871   }
2872 
2873   /* pcis has not been setup yet, so get the local size from the subdomain matrix */
2874   PetscCall(MatGetLocalSize(pcbddc->local_mat, &n, NULL));
2875   if (!n) pcbddc->benign_change_explicit = PETSC_TRUE;
2876   PetscCall(MatFindZeroDiagonals(pcbddc->local_mat, &zerodiag));
2877   PetscCall(ISSorted(zerodiag, &sorted));
2878   if (!sorted) PetscCall(ISSort(zerodiag));
2879   PetscCall(PetscObjectReference((PetscObject)zerodiag));
2880   zerodiag_save = zerodiag;
2881   PetscCall(ISGetLocalSize(zerodiag, &nz));
2882   if (!nz) {
2883     if (n) have_null = PETSC_FALSE;
2884     has_null_pressures = PETSC_FALSE;
2885     PetscCall(ISDestroy(&zerodiag));
2886   }
2887   recompute_zerodiag = PETSC_FALSE;
2888 
2889   /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */
2890   zerodiag_subs   = NULL;
2891   benign_n        = 0;
2892   n_interior_dofs = 0;
2893   interior_dofs   = NULL;
2894   nneu            = 0;
2895   if (pcbddc->NeumannBoundariesLocal) PetscCall(ISGetLocalSize(pcbddc->NeumannBoundariesLocal, &nneu));
2896   checkb = (PetscBool)(!pcbddc->NeumannBoundariesLocal || pcbddc->current_level);
2897   if (checkb) { /* need to compute interior nodes */
2898     PetscInt               n, i;
2899     PetscInt              *count;
2900     ISLocalToGlobalMapping mapping;
2901 
2902     PetscCall(MatISGetLocalToGlobalMapping(pc->pmat, &mapping, NULL));
2903     PetscCall(ISLocalToGlobalMappingGetNodeInfo(mapping, &n, &count, NULL));
2904     PetscCall(PetscMalloc1(n, &interior_dofs));
2905     for (i = 0; i < n; i++)
2906       if (count[i] < 2) interior_dofs[n_interior_dofs++] = i;
2907     PetscCall(ISLocalToGlobalMappingRestoreNodeInfo(mapping, &n, &count, NULL));
2908   }
2909   if (has_null_pressures) {
2910     IS             *subs;
2911     PetscInt        nsubs, i, j, nl;
2912     const PetscInt *idxs;
2913     PetscScalar    *array;
2914     Vec            *work;
2915 
2916     subs  = pcbddc->local_subs;
2917     nsubs = pcbddc->n_local_subs;
2918     /* these vectors are needed to check if the constant on pressures is in the kernel of the local operator B (i.e. B(v_I,p0) should be zero) */
2919     if (checkb) {
2920       PetscCall(VecDuplicateVecs(matis->y, 2, &work));
2921       PetscCall(ISGetLocalSize(zerodiag, &nl));
2922       PetscCall(ISGetIndices(zerodiag, &idxs));
2923       /* work[0] = 1_p */
2924       PetscCall(VecSet(work[0], 0.));
2925       PetscCall(VecGetArray(work[0], &array));
2926       for (j = 0; j < nl; j++) array[idxs[j]] = 1.;
2927       PetscCall(VecRestoreArray(work[0], &array));
2928       /* work[0] = 1_v */
2929       PetscCall(VecSet(work[1], 1.));
2930       PetscCall(VecGetArray(work[1], &array));
2931       for (j = 0; j < nl; j++) array[idxs[j]] = 0.;
2932       PetscCall(VecRestoreArray(work[1], &array));
2933       PetscCall(ISRestoreIndices(zerodiag, &idxs));
2934     }
2935 
2936     if (nsubs > 1 || bsp > 1) {
2937       IS      *is;
2938       PetscInt b, totb;
2939 
2940       totb  = bsp;
2941       is    = bsp > 1 ? bzerodiag : &zerodiag;
2942       nsubs = PetscMax(nsubs, 1);
2943       PetscCall(PetscCalloc1(nsubs * totb, &zerodiag_subs));
2944       for (b = 0; b < totb; b++) {
2945         for (i = 0; i < nsubs; i++) {
2946           ISLocalToGlobalMapping l2g;
2947           IS                     t_zerodiag_subs;
2948           PetscInt               nl;
2949 
2950           if (subs) {
2951             PetscCall(ISLocalToGlobalMappingCreateIS(subs[i], &l2g));
2952           } else {
2953             IS tis;
2954 
2955             PetscCall(MatGetLocalSize(pcbddc->local_mat, &nl, NULL));
2956             PetscCall(ISCreateStride(PETSC_COMM_SELF, nl, 0, 1, &tis));
2957             PetscCall(ISLocalToGlobalMappingCreateIS(tis, &l2g));
2958             PetscCall(ISDestroy(&tis));
2959           }
2960           PetscCall(ISGlobalToLocalMappingApplyIS(l2g, IS_GTOLM_DROP, is[b], &t_zerodiag_subs));
2961           PetscCall(ISGetLocalSize(t_zerodiag_subs, &nl));
2962           if (nl) {
2963             PetscBool valid = PETSC_TRUE;
2964 
2965             if (checkb) {
2966               PetscCall(VecSet(matis->x, 0));
2967               PetscCall(ISGetLocalSize(subs[i], &nl));
2968               PetscCall(ISGetIndices(subs[i], &idxs));
2969               PetscCall(VecGetArray(matis->x, &array));
2970               for (j = 0; j < nl; j++) array[idxs[j]] = 1.;
2971               PetscCall(VecRestoreArray(matis->x, &array));
2972               PetscCall(ISRestoreIndices(subs[i], &idxs));
2973               PetscCall(VecPointwiseMult(matis->x, work[0], matis->x));
2974               PetscCall(MatMult(matis->A, matis->x, matis->y));
2975               PetscCall(VecPointwiseMult(matis->y, work[1], matis->y));
2976               PetscCall(VecGetArray(matis->y, &array));
2977               for (j = 0; j < n_interior_dofs; j++) {
2978                 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) {
2979                   valid = PETSC_FALSE;
2980                   break;
2981                 }
2982               }
2983               PetscCall(VecRestoreArray(matis->y, &array));
2984             }
2985             if (valid && nneu) {
2986               const PetscInt *idxs;
2987               PetscInt        nzb;
2988 
2989               PetscCall(ISGetIndices(pcbddc->NeumannBoundariesLocal, &idxs));
2990               PetscCall(ISGlobalToLocalMappingApply(l2g, IS_GTOLM_DROP, nneu, idxs, &nzb, NULL));
2991               PetscCall(ISRestoreIndices(pcbddc->NeumannBoundariesLocal, &idxs));
2992               if (nzb) valid = PETSC_FALSE;
2993             }
2994             if (valid && pressures) {
2995               IS       t_pressure_subs, tmp;
2996               PetscInt i1, i2;
2997 
2998               PetscCall(ISGlobalToLocalMappingApplyIS(l2g, IS_GTOLM_DROP, pressures, &t_pressure_subs));
2999               PetscCall(ISEmbed(t_zerodiag_subs, t_pressure_subs, PETSC_TRUE, &tmp));
3000               PetscCall(ISGetLocalSize(tmp, &i1));
3001               PetscCall(ISGetLocalSize(t_zerodiag_subs, &i2));
3002               if (i2 != i1) valid = PETSC_FALSE;
3003               PetscCall(ISDestroy(&t_pressure_subs));
3004               PetscCall(ISDestroy(&tmp));
3005             }
3006             if (valid) {
3007               PetscCall(ISLocalToGlobalMappingApplyIS(l2g, t_zerodiag_subs, &zerodiag_subs[benign_n]));
3008               benign_n++;
3009             } else recompute_zerodiag = PETSC_TRUE;
3010           }
3011           PetscCall(ISDestroy(&t_zerodiag_subs));
3012           PetscCall(ISLocalToGlobalMappingDestroy(&l2g));
3013         }
3014       }
3015     } else { /* there's just one subdomain (or zero if they have not been detected */
3016       PetscBool valid = PETSC_TRUE;
3017 
3018       if (nneu) valid = PETSC_FALSE;
3019       if (valid && pressures) PetscCall(ISEqual(pressures, zerodiag, &valid));
3020       if (valid && checkb) {
3021         PetscCall(MatMult(matis->A, work[0], matis->x));
3022         PetscCall(VecPointwiseMult(matis->x, work[1], matis->x));
3023         PetscCall(VecGetArray(matis->x, &array));
3024         for (j = 0; j < n_interior_dofs; j++) {
3025           if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) {
3026             valid = PETSC_FALSE;
3027             break;
3028           }
3029         }
3030         PetscCall(VecRestoreArray(matis->x, &array));
3031       }
3032       if (valid) {
3033         benign_n = 1;
3034         PetscCall(PetscMalloc1(benign_n, &zerodiag_subs));
3035         PetscCall(PetscObjectReference((PetscObject)zerodiag));
3036         zerodiag_subs[0] = zerodiag;
3037       }
3038     }
3039     if (checkb) PetscCall(VecDestroyVecs(2, &work));
3040   }
3041   PetscCall(PetscFree(interior_dofs));
3042 
3043   if (!benign_n) {
3044     PetscInt n;
3045 
3046     PetscCall(ISDestroy(&zerodiag));
3047     recompute_zerodiag = PETSC_FALSE;
3048     PetscCall(MatGetLocalSize(pcbddc->local_mat, &n, NULL));
3049     if (n) have_null = PETSC_FALSE;
3050   }
3051 
3052   /* final check for null pressures */
3053   if (zerodiag && pressures) PetscCall(ISEqual(pressures, zerodiag, &have_null));
3054 
3055   if (recompute_zerodiag) {
3056     PetscCall(ISDestroy(&zerodiag));
3057     if (benign_n == 1) {
3058       PetscCall(PetscObjectReference((PetscObject)zerodiag_subs[0]));
3059       zerodiag = zerodiag_subs[0];
3060     } else {
3061       PetscInt i, nzn, *new_idxs;
3062 
3063       nzn = 0;
3064       for (i = 0; i < benign_n; i++) {
3065         PetscInt ns;
3066         PetscCall(ISGetLocalSize(zerodiag_subs[i], &ns));
3067         nzn += ns;
3068       }
3069       PetscCall(PetscMalloc1(nzn, &new_idxs));
3070       nzn = 0;
3071       for (i = 0; i < benign_n; i++) {
3072         PetscInt ns, *idxs;
3073         PetscCall(ISGetLocalSize(zerodiag_subs[i], &ns));
3074         PetscCall(ISGetIndices(zerodiag_subs[i], (const PetscInt **)&idxs));
3075         PetscCall(PetscArraycpy(new_idxs + nzn, idxs, ns));
3076         PetscCall(ISRestoreIndices(zerodiag_subs[i], (const PetscInt **)&idxs));
3077         nzn += ns;
3078       }
3079       PetscCall(PetscSortInt(nzn, new_idxs));
3080       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nzn, new_idxs, PETSC_OWN_POINTER, &zerodiag));
3081     }
3082     have_null = PETSC_FALSE;
3083   }
3084 
3085   /* determines if the coarse solver will be singular or not */
3086   PetscCallMPI(MPIU_Allreduce(&have_null, &pcbddc->benign_null, 1, MPI_C_BOOL, MPI_LAND, PetscObjectComm((PetscObject)pc)));
3087 
3088   /* Prepare matrix to compute no-net-flux */
3089   if (pcbddc->compute_nonetflux && !pcbddc->divudotp) {
3090     Mat                    A, loc_divudotp;
3091     ISLocalToGlobalMapping rl2g, cl2g, l2gmap;
3092     IS                     row, col, isused = NULL;
3093     PetscInt               M, N, n, st, n_isused;
3094 
3095     if (pressures) {
3096       isused = pressures;
3097     } else {
3098       isused = zerodiag_save;
3099     }
3100     PetscCall(MatISGetLocalToGlobalMapping(pc->pmat, &l2gmap, NULL));
3101     PetscCall(MatISGetLocalMat(pc->pmat, &A));
3102     PetscCall(MatGetLocalSize(A, &n, NULL));
3103     PetscCheck(isused || (n == 0), PETSC_COMM_SELF, PETSC_ERR_USER, "Don't know how to extract div u dot p! Please provide the pressure field");
3104     n_isused = 0;
3105     if (isused) PetscCall(ISGetLocalSize(isused, &n_isused));
3106     PetscCallMPI(MPI_Scan(&n_isused, &st, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)pc)));
3107     st = st - n_isused;
3108     if (n) {
3109       const PetscInt *gidxs;
3110 
3111       PetscCall(MatCreateSubMatrix(A, isused, NULL, MAT_INITIAL_MATRIX, &loc_divudotp));
3112       PetscCall(ISLocalToGlobalMappingGetIndices(l2gmap, &gidxs));
3113       /* TODO: extend ISCreateStride with st = PETSC_DECIDE */
3114       PetscCall(ISCreateStride(PetscObjectComm((PetscObject)pc), n_isused, st, 1, &row));
3115       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), n, gidxs, PETSC_COPY_VALUES, &col));
3116       PetscCall(ISLocalToGlobalMappingRestoreIndices(l2gmap, &gidxs));
3117     } else {
3118       PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, 0, 0, 1, NULL, &loc_divudotp));
3119       PetscCall(ISCreateStride(PetscObjectComm((PetscObject)pc), n_isused, st, 1, &row));
3120       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), 0, NULL, PETSC_COPY_VALUES, &col));
3121     }
3122     PetscCall(MatGetSize(pc->pmat, NULL, &N));
3123     PetscCall(ISGetSize(row, &M));
3124     PetscCall(ISLocalToGlobalMappingCreateIS(row, &rl2g));
3125     PetscCall(ISLocalToGlobalMappingCreateIS(col, &cl2g));
3126     PetscCall(ISDestroy(&row));
3127     PetscCall(ISDestroy(&col));
3128     PetscCall(MatCreate(PetscObjectComm((PetscObject)pc), &pcbddc->divudotp));
3129     PetscCall(MatSetType(pcbddc->divudotp, MATIS));
3130     PetscCall(MatSetSizes(pcbddc->divudotp, PETSC_DECIDE, PETSC_DECIDE, M, N));
3131     PetscCall(MatSetLocalToGlobalMapping(pcbddc->divudotp, rl2g, cl2g));
3132     PetscCall(ISLocalToGlobalMappingDestroy(&rl2g));
3133     PetscCall(ISLocalToGlobalMappingDestroy(&cl2g));
3134     PetscCall(MatISSetLocalMat(pcbddc->divudotp, loc_divudotp));
3135     PetscCall(MatDestroy(&loc_divudotp));
3136     PetscCall(MatAssemblyBegin(pcbddc->divudotp, MAT_FINAL_ASSEMBLY));
3137     PetscCall(MatAssemblyEnd(pcbddc->divudotp, MAT_FINAL_ASSEMBLY));
3138   }
3139   PetscCall(ISDestroy(&zerodiag_save));
3140   PetscCall(ISDestroy(&pressures));
3141   if (bzerodiag) {
3142     PetscInt i;
3143 
3144     for (i = 0; i < bsp; i++) PetscCall(ISDestroy(&bzerodiag[i]));
3145     PetscCall(PetscFree(bzerodiag));
3146   }
3147   pcbddc->benign_n             = benign_n;
3148   pcbddc->benign_zerodiag_subs = zerodiag_subs;
3149 
3150   /* determines if the problem has subdomains with 0 pressure block */
3151   have_null = (PetscBool)(!!pcbddc->benign_n);
3152   PetscCallMPI(MPIU_Allreduce(&have_null, &pcbddc->benign_have_null, 1, MPI_C_BOOL, MPI_LOR, PetscObjectComm((PetscObject)pc)));
3153 
3154 project_b0:
3155   PetscCall(MatGetLocalSize(pcbddc->local_mat, &n, NULL));
3156   /* change of basis and p0 dofs */
3157   if (pcbddc->benign_n) {
3158     PetscInt i, s, *nnz;
3159 
3160     /* local change of basis for pressures */
3161     PetscCall(MatDestroy(&pcbddc->benign_change));
3162     PetscCall(MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat), &pcbddc->benign_change));
3163     PetscCall(MatSetType(pcbddc->benign_change, MATAIJ));
3164     PetscCall(MatSetSizes(pcbddc->benign_change, n, n, PETSC_DECIDE, PETSC_DECIDE));
3165     PetscCall(PetscMalloc1(n, &nnz));
3166     for (i = 0; i < n; i++) nnz[i] = 1; /* defaults to identity */
3167     for (i = 0; i < pcbddc->benign_n; i++) {
3168       const PetscInt *idxs;
3169       PetscInt        nzs, j;
3170 
3171       PetscCall(ISGetLocalSize(pcbddc->benign_zerodiag_subs[i], &nzs));
3172       PetscCall(ISGetIndices(pcbddc->benign_zerodiag_subs[i], &idxs));
3173       for (j = 0; j < nzs - 1; j++) nnz[idxs[j]] = 2; /* change on pressures */
3174       nnz[idxs[nzs - 1]] = nzs;                       /* last local pressure dof in subdomain */
3175       PetscCall(ISRestoreIndices(pcbddc->benign_zerodiag_subs[i], &idxs));
3176     }
3177     PetscCall(MatSeqAIJSetPreallocation(pcbddc->benign_change, 0, nnz));
3178     PetscCall(MatSetOption(pcbddc->benign_change, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE));
3179     PetscCall(PetscFree(nnz));
3180     /* set identity by default */
3181     for (i = 0; i < n; i++) PetscCall(MatSetValue(pcbddc->benign_change, i, i, 1., INSERT_VALUES));
3182     PetscCall(PetscFree3(pcbddc->benign_p0_lidx, pcbddc->benign_p0_gidx, pcbddc->benign_p0));
3183     PetscCall(PetscMalloc3(pcbddc->benign_n, &pcbddc->benign_p0_lidx, pcbddc->benign_n, &pcbddc->benign_p0_gidx, pcbddc->benign_n, &pcbddc->benign_p0));
3184     /* set change on pressures */
3185     for (s = 0; s < pcbddc->benign_n; s++) {
3186       PetscScalar    *array;
3187       const PetscInt *idxs;
3188       PetscInt        nzs;
3189 
3190       PetscCall(ISGetLocalSize(pcbddc->benign_zerodiag_subs[s], &nzs));
3191       PetscCall(ISGetIndices(pcbddc->benign_zerodiag_subs[s], &idxs));
3192       for (i = 0; i < nzs - 1; i++) {
3193         PetscScalar vals[2];
3194         PetscInt    cols[2];
3195 
3196         cols[0] = idxs[i];
3197         cols[1] = idxs[nzs - 1];
3198         vals[0] = 1.;
3199         vals[1] = 1.;
3200         PetscCall(MatSetValues(pcbddc->benign_change, 1, cols, 2, cols, vals, INSERT_VALUES));
3201       }
3202       PetscCall(PetscMalloc1(nzs, &array));
3203       for (i = 0; i < nzs - 1; i++) array[i] = -1.;
3204       array[nzs - 1] = 1.;
3205       PetscCall(MatSetValues(pcbddc->benign_change, 1, idxs + nzs - 1, nzs, idxs, array, INSERT_VALUES));
3206       /* store local idxs for p0 */
3207       pcbddc->benign_p0_lidx[s] = idxs[nzs - 1];
3208       PetscCall(ISRestoreIndices(pcbddc->benign_zerodiag_subs[s], &idxs));
3209       PetscCall(PetscFree(array));
3210     }
3211     PetscCall(MatAssemblyBegin(pcbddc->benign_change, MAT_FINAL_ASSEMBLY));
3212     PetscCall(MatAssemblyEnd(pcbddc->benign_change, MAT_FINAL_ASSEMBLY));
3213 
3214     /* project if needed */
3215     if (pcbddc->benign_change_explicit) {
3216       Mat M;
3217 
3218       PetscCall(MatPtAP(pcbddc->local_mat, pcbddc->benign_change, MAT_INITIAL_MATRIX, 2.0, &M));
3219       PetscCall(MatDestroy(&pcbddc->local_mat));
3220       PetscCall(MatSeqAIJCompress(M, &pcbddc->local_mat));
3221       PetscCall(MatDestroy(&M));
3222     }
3223     /* store global idxs for p0 */
3224     PetscCall(ISLocalToGlobalMappingApply(matis->rmapping, pcbddc->benign_n, pcbddc->benign_p0_lidx, pcbddc->benign_p0_gidx));
3225   }
3226   *zerodiaglocal = zerodiag;
3227   PetscFunctionReturn(PETSC_SUCCESS);
3228 }
3229 
PCBDDCBenignGetOrSetP0(PC pc,Vec v,PetscBool get)3230 PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get)
3231 {
3232   PC_BDDC     *pcbddc = (PC_BDDC *)pc->data;
3233   PetscScalar *array;
3234 
3235   PetscFunctionBegin;
3236   if (!pcbddc->benign_sf) {
3237     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)pc), &pcbddc->benign_sf));
3238     PetscCall(PetscSFSetGraphLayout(pcbddc->benign_sf, pc->pmat->rmap, pcbddc->benign_n, NULL, PETSC_OWN_POINTER, pcbddc->benign_p0_gidx));
3239   }
3240   if (get) {
3241     PetscCall(VecGetArrayRead(v, (const PetscScalar **)&array));
3242     PetscCall(PetscSFBcastBegin(pcbddc->benign_sf, MPIU_SCALAR, array, pcbddc->benign_p0, MPI_REPLACE));
3243     PetscCall(PetscSFBcastEnd(pcbddc->benign_sf, MPIU_SCALAR, array, pcbddc->benign_p0, MPI_REPLACE));
3244     PetscCall(VecRestoreArrayRead(v, (const PetscScalar **)&array));
3245   } else {
3246     PetscCall(VecGetArray(v, &array));
3247     PetscCall(PetscSFReduceBegin(pcbddc->benign_sf, MPIU_SCALAR, pcbddc->benign_p0, array, MPI_REPLACE));
3248     PetscCall(PetscSFReduceEnd(pcbddc->benign_sf, MPIU_SCALAR, pcbddc->benign_p0, array, MPI_REPLACE));
3249     PetscCall(VecRestoreArray(v, &array));
3250   }
3251   PetscFunctionReturn(PETSC_SUCCESS);
3252 }
3253 
PCBDDCBenignPopOrPushB0(PC pc,PetscBool pop)3254 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop)
3255 {
3256   PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
3257 
3258   PetscFunctionBegin;
3259   /* TODO: add error checking
3260     - avoid nested pop (or push) calls.
3261     - cannot push before pop.
3262     - cannot call this if pcbddc->local_mat is NULL
3263   */
3264   if (!pcbddc->benign_n) PetscFunctionReturn(PETSC_SUCCESS);
3265   if (pop) {
3266     if (pcbddc->benign_change_explicit) {
3267       IS       is_p0;
3268       MatReuse reuse;
3269 
3270       /* extract B_0 */
3271       reuse = MAT_INITIAL_MATRIX;
3272       if (pcbddc->benign_B0) reuse = MAT_REUSE_MATRIX;
3273       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, pcbddc->benign_n, pcbddc->benign_p0_lidx, PETSC_COPY_VALUES, &is_p0));
3274       PetscCall(MatCreateSubMatrix(pcbddc->local_mat, is_p0, NULL, reuse, &pcbddc->benign_B0));
3275       /* remove rows and cols from local problem */
3276       PetscCall(MatSetOption(pcbddc->local_mat, MAT_KEEP_NONZERO_PATTERN, PETSC_TRUE));
3277       PetscCall(MatSetOption(pcbddc->local_mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE));
3278       PetscCall(MatZeroRowsColumnsIS(pcbddc->local_mat, is_p0, 1.0, NULL, NULL));
3279       PetscCall(ISDestroy(&is_p0));
3280     } else {
3281       Mat_IS      *matis = (Mat_IS *)pc->pmat->data;
3282       PetscScalar *vals;
3283       PetscInt     i, n, *idxs_ins;
3284 
3285       PetscCall(VecGetLocalSize(matis->y, &n));
3286       PetscCall(PetscMalloc2(n, &idxs_ins, n, &vals));
3287       if (!pcbddc->benign_B0) {
3288         PetscInt *nnz;
3289         PetscCall(MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat), &pcbddc->benign_B0));
3290         PetscCall(MatSetType(pcbddc->benign_B0, MATAIJ));
3291         PetscCall(MatSetSizes(pcbddc->benign_B0, pcbddc->benign_n, n, PETSC_DECIDE, PETSC_DECIDE));
3292         PetscCall(PetscMalloc1(pcbddc->benign_n, &nnz));
3293         for (i = 0; i < pcbddc->benign_n; i++) {
3294           PetscCall(ISGetLocalSize(pcbddc->benign_zerodiag_subs[i], &nnz[i]));
3295           nnz[i] = n - nnz[i];
3296         }
3297         PetscCall(MatSeqAIJSetPreallocation(pcbddc->benign_B0, 0, nnz));
3298         PetscCall(MatSetOption(pcbddc->benign_B0, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE));
3299         PetscCall(PetscFree(nnz));
3300       }
3301 
3302       for (i = 0; i < pcbddc->benign_n; i++) {
3303         PetscScalar *array;
3304         PetscInt    *idxs, j, nz, cum;
3305 
3306         PetscCall(VecSet(matis->x, 0.));
3307         PetscCall(ISGetLocalSize(pcbddc->benign_zerodiag_subs[i], &nz));
3308         PetscCall(ISGetIndices(pcbddc->benign_zerodiag_subs[i], (const PetscInt **)&idxs));
3309         for (j = 0; j < nz; j++) vals[j] = 1.;
3310         PetscCall(VecSetValues(matis->x, nz, idxs, vals, INSERT_VALUES));
3311         PetscCall(VecAssemblyBegin(matis->x));
3312         PetscCall(VecAssemblyEnd(matis->x));
3313         PetscCall(VecSet(matis->y, 0.));
3314         PetscCall(MatMult(matis->A, matis->x, matis->y));
3315         PetscCall(VecGetArray(matis->y, &array));
3316         cum = 0;
3317         for (j = 0; j < n; j++) {
3318           if (PetscUnlikely(PetscAbsScalar(array[j]) > PETSC_SMALL)) {
3319             vals[cum]     = array[j];
3320             idxs_ins[cum] = j;
3321             cum++;
3322           }
3323         }
3324         PetscCall(MatSetValues(pcbddc->benign_B0, 1, &i, cum, idxs_ins, vals, INSERT_VALUES));
3325         PetscCall(VecRestoreArray(matis->y, &array));
3326         PetscCall(ISRestoreIndices(pcbddc->benign_zerodiag_subs[i], (const PetscInt **)&idxs));
3327       }
3328       PetscCall(MatAssemblyBegin(pcbddc->benign_B0, MAT_FINAL_ASSEMBLY));
3329       PetscCall(MatAssemblyEnd(pcbddc->benign_B0, MAT_FINAL_ASSEMBLY));
3330       PetscCall(PetscFree2(idxs_ins, vals));
3331     }
3332   } else { /* push */
3333 
3334     PetscCheck(pcbddc->benign_change_explicit, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot push B0!");
3335     for (PetscInt i = 0; i < pcbddc->benign_n; i++) {
3336       PetscScalar *B0_vals;
3337       PetscInt    *B0_cols, B0_ncol;
3338 
3339       PetscCall(MatGetRow(pcbddc->benign_B0, i, &B0_ncol, (const PetscInt **)&B0_cols, (const PetscScalar **)&B0_vals));
3340       PetscCall(MatSetValues(pcbddc->local_mat, 1, pcbddc->benign_p0_lidx + i, B0_ncol, B0_cols, B0_vals, INSERT_VALUES));
3341       PetscCall(MatSetValues(pcbddc->local_mat, B0_ncol, B0_cols, 1, pcbddc->benign_p0_lidx + i, B0_vals, INSERT_VALUES));
3342       PetscCall(MatSetValue(pcbddc->local_mat, pcbddc->benign_p0_lidx[i], pcbddc->benign_p0_lidx[i], 0.0, INSERT_VALUES));
3343       PetscCall(MatRestoreRow(pcbddc->benign_B0, i, &B0_ncol, (const PetscInt **)&B0_cols, (const PetscScalar **)&B0_vals));
3344     }
3345     PetscCall(MatAssemblyBegin(pcbddc->local_mat, MAT_FINAL_ASSEMBLY));
3346     PetscCall(MatAssemblyEnd(pcbddc->local_mat, MAT_FINAL_ASSEMBLY));
3347   }
3348   PetscFunctionReturn(PETSC_SUCCESS);
3349 }
3350 
PCBDDCAdaptiveSelection(PC pc)3351 PetscErrorCode PCBDDCAdaptiveSelection(PC pc)
3352 {
3353   PC_BDDC        *pcbddc     = (PC_BDDC *)pc->data;
3354   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
3355   PetscBLASInt    B_neigs, B_ierr, B_lwork;
3356   PetscBLASInt   *B_iwork, *B_ifail;
3357   PetscScalar    *work, lwork;
3358   PetscScalar    *St, *S, *eigv;
3359   PetscScalar    *Sarray, *Starray;
3360   PetscReal      *eigs, thresh, lthresh, uthresh;
3361   PetscInt        i, nmax, nmin, nv, cum, mss, cum2, cumarray, maxneigs;
3362   PetscBool       allocated_S_St, upart;
3363 #if defined(PETSC_USE_COMPLEX)
3364   PetscReal *rwork;
3365 #endif
3366 
3367   PetscFunctionBegin;
3368   if (!pcbddc->adaptive_selection) PetscFunctionReturn(PETSC_SUCCESS);
3369   PetscCheck(sub_schurs, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Adaptive selection of constraints requires SubSchurs data");
3370   PetscCheck(sub_schurs->schur_explicit || !sub_schurs->n_subs, PetscObjectComm((PetscObject)pc), PETSC_ERR_SUP, "Adaptive selection of constraints requires MUMPS and/or MKL_CPARDISO");
3371   PetscCheck(!sub_schurs->n_subs || sub_schurs->is_symmetric, PETSC_COMM_SELF, PETSC_ERR_SUP, "Adaptive selection not yet implemented for this matrix pencil (herm %d, symm %d, posdef %d)", sub_schurs->is_hermitian, sub_schurs->is_symmetric,
3372              sub_schurs->is_posdef);
3373   PetscCall(PetscLogEventBegin(PC_BDDC_AdaptiveSetUp[pcbddc->current_level], pc, 0, 0, 0));
3374 
3375   if (pcbddc->dbg_flag) {
3376     if (!pcbddc->dbg_viewer) pcbddc->dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pc));
3377     PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
3378     PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n"));
3379     PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Check adaptive selection of constraints\n"));
3380     PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer));
3381   }
3382 
3383   if (pcbddc->dbg_flag) PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d cc %" PetscInt_FMT " (%d,%d).\n", PetscGlobalRank, sub_schurs->n_subs, sub_schurs->is_hermitian, sub_schurs->is_posdef));
3384 
3385   /* max size of subsets */
3386   mss = 0;
3387   for (i = 0; i < sub_schurs->n_subs; i++) {
3388     PetscInt subset_size;
3389 
3390     PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size));
3391     mss = PetscMax(mss, subset_size);
3392   }
3393 
3394   /* min/max and threshold */
3395   nmax           = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss;
3396   nmin           = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0;
3397   nmax           = PetscMax(nmin, nmax);
3398   allocated_S_St = PETSC_FALSE;
3399   if (nmin || !sub_schurs->is_posdef) { /* XXX */
3400     allocated_S_St = PETSC_TRUE;
3401   }
3402 
3403   /* allocate lapack workspace */
3404   cum = cum2 = 0;
3405   maxneigs   = 0;
3406   for (i = 0; i < sub_schurs->n_subs; i++) {
3407     PetscInt n, subset_size;
3408 
3409     PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size));
3410     n = PetscMin(subset_size, nmax);
3411     cum += subset_size;
3412     cum2 += subset_size * n;
3413     maxneigs = PetscMax(maxneigs, n);
3414   }
3415   lwork = 0;
3416   if (mss) {
3417     PetscScalar  sdummy  = 0.;
3418     PetscBLASInt B_itype = 1;
3419     PetscBLASInt B_N, idummy = 0;
3420     PetscReal    rdummy = 0., zero = 0.0;
3421     PetscReal    eps = 0.0; /* dlamch? */
3422 
3423     PetscCheck(sub_schurs->is_symmetric, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not yet implemented");
3424     PetscCall(PetscBLASIntCast(mss, &B_N));
3425     B_lwork = -1;
3426     /* some implementations may complain about NULL pointers, even if we are querying */
3427     S       = &sdummy;
3428     St      = &sdummy;
3429     eigs    = &rdummy;
3430     eigv    = &sdummy;
3431     B_iwork = &idummy;
3432     B_ifail = &idummy;
3433 #if defined(PETSC_USE_COMPLEX)
3434     rwork = &rdummy;
3435 #endif
3436     thresh = 1.0;
3437     PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
3438 #if defined(PETSC_USE_COMPLEX)
3439     PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &zero, &thresh, B_iwork, B_iwork, &eps, &B_neigs, eigs, eigv, &B_N, &lwork, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3440 #else
3441     PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &zero, &thresh, B_iwork, B_iwork, &eps, &B_neigs, eigs, eigv, &B_N, &lwork, &B_lwork, B_iwork, B_ifail, &B_ierr));
3442 #endif
3443     PetscCheck(B_ierr == 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in query to SYGVX Lapack routine %" PetscBLASInt_FMT, B_ierr);
3444     PetscCall(PetscFPTrapPop());
3445   }
3446 
3447   nv = 0;
3448   if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */
3449     PetscCall(ISGetLocalSize(sub_schurs->is_vertices, &nv));
3450   }
3451   PetscCall(PetscBLASIntCast((PetscInt)PetscRealPart(lwork), &B_lwork));
3452   if (allocated_S_St) PetscCall(PetscMalloc2(mss * mss, &S, mss * mss, &St));
3453   PetscCall(PetscMalloc5(mss * mss, &eigv, mss, &eigs, B_lwork, &work, 5 * mss, &B_iwork, mss, &B_ifail));
3454 #if defined(PETSC_USE_COMPLEX)
3455   PetscCall(PetscMalloc1(7 * mss, &rwork));
3456 #endif
3457   PetscCall(PetscMalloc5(nv + sub_schurs->n_subs, &pcbddc->adaptive_constraints_n, nv + sub_schurs->n_subs + 1, &pcbddc->adaptive_constraints_idxs_ptr, nv + sub_schurs->n_subs + 1, &pcbddc->adaptive_constraints_data_ptr, nv + cum, &pcbddc->adaptive_constraints_idxs, nv + cum2,
3458                          &pcbddc->adaptive_constraints_data));
3459   PetscCall(PetscArrayzero(pcbddc->adaptive_constraints_n, nv + sub_schurs->n_subs));
3460 
3461   maxneigs = 0;
3462   cum = cumarray                           = 0;
3463   pcbddc->adaptive_constraints_idxs_ptr[0] = 0;
3464   pcbddc->adaptive_constraints_data_ptr[0] = 0;
3465   if (sub_schurs->is_vertices && pcbddc->use_vertices) {
3466     const PetscInt *idxs;
3467 
3468     PetscCall(ISGetIndices(sub_schurs->is_vertices, &idxs));
3469     for (cum = 0; cum < nv; cum++) {
3470       pcbddc->adaptive_constraints_n[cum]            = 1;
3471       pcbddc->adaptive_constraints_idxs[cum]         = idxs[cum];
3472       pcbddc->adaptive_constraints_data[cum]         = 1.0;
3473       pcbddc->adaptive_constraints_idxs_ptr[cum + 1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + 1;
3474       pcbddc->adaptive_constraints_data_ptr[cum + 1] = pcbddc->adaptive_constraints_data_ptr[cum] + 1;
3475     }
3476     PetscCall(ISRestoreIndices(sub_schurs->is_vertices, &idxs));
3477   }
3478 
3479   if (mss) { /* multilevel */
3480     if (sub_schurs->gdsw) {
3481       PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_all, &Sarray));
3482       PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all, &Starray));
3483     } else {
3484       PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all, &Sarray));
3485       PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all, &Starray));
3486     }
3487   }
3488 
3489   lthresh = pcbddc->adaptive_threshold[0];
3490   uthresh = pcbddc->adaptive_threshold[1];
3491   upart   = pcbddc->use_deluxe_scaling;
3492   for (i = 0; i < sub_schurs->n_subs; i++) {
3493     const PetscInt *idxs;
3494     PetscReal       upper, lower;
3495     PetscInt        j, subset_size, eigs_start = 0;
3496     PetscBLASInt    B_N;
3497     PetscBool       same_data = PETSC_FALSE;
3498     PetscBool       scal      = PETSC_FALSE;
3499 
3500     if (upart) {
3501       upper = PETSC_MAX_REAL;
3502       lower = uthresh;
3503     } else {
3504       if (sub_schurs->gdsw) {
3505         upper = uthresh;
3506         lower = PETSC_MIN_REAL;
3507       } else {
3508         PetscCheck(sub_schurs->is_posdef, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not yet implemented without deluxe scaling");
3509         upper = 1. / uthresh;
3510         lower = 0.;
3511       }
3512     }
3513     PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size));
3514     PetscCall(ISGetIndices(sub_schurs->is_subs[i], &idxs));
3515     PetscCall(PetscBLASIntCast(subset_size, &B_N));
3516     /* this is experimental: we assume the dofs have been properly grouped to have
3517        the diagonal blocks Schur complements either positive or negative definite (true for Stokes) */
3518     if (!sub_schurs->is_posdef) {
3519       Mat T;
3520 
3521       for (j = 0; j < subset_size; j++) {
3522         if (PetscRealPart(*(Sarray + cumarray + j * (subset_size + 1))) < 0.0) {
3523           PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, subset_size, Sarray + cumarray, &T));
3524           PetscCall(MatScale(T, -1.0));
3525           PetscCall(MatDestroy(&T));
3526           PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, subset_size, Starray + cumarray, &T));
3527           PetscCall(MatScale(T, -1.0));
3528           PetscCall(MatDestroy(&T));
3529           if (sub_schurs->change_primal_sub) {
3530             PetscInt        nz, k;
3531             const PetscInt *idxs;
3532 
3533             PetscCall(ISGetLocalSize(sub_schurs->change_primal_sub[i], &nz));
3534             PetscCall(ISGetIndices(sub_schurs->change_primal_sub[i], &idxs));
3535             for (k = 0; k < nz; k++) {
3536               *(Sarray + cumarray + idxs[k] * (subset_size + 1)) *= -1.0;
3537               *(Starray + cumarray + idxs[k] * (subset_size + 1)) = 0.0;
3538             }
3539             PetscCall(ISRestoreIndices(sub_schurs->change_primal_sub[i], &idxs));
3540           }
3541           scal = PETSC_TRUE;
3542           break;
3543         }
3544       }
3545     }
3546 
3547     if (allocated_S_St) { /* S and S_t should be copied since we could need them later */
3548       if (sub_schurs->is_symmetric) {
3549         PetscInt j, k;
3550         if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscArraycmp() later */
3551           PetscCall(PetscArrayzero(S, subset_size * subset_size));
3552           PetscCall(PetscArrayzero(St, subset_size * subset_size));
3553         }
3554         for (j = 0; j < subset_size; j++) {
3555           for (k = j; k < subset_size; k++) {
3556             S[j * subset_size + k]  = Sarray[cumarray + j * subset_size + k];
3557             St[j * subset_size + k] = Starray[cumarray + j * subset_size + k];
3558           }
3559         }
3560       } else {
3561         PetscCall(PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size));
3562         PetscCall(PetscArraycpy(St, Starray + cumarray, subset_size * subset_size));
3563       }
3564     } else {
3565       S  = Sarray + cumarray;
3566       St = Starray + cumarray;
3567     }
3568     /* see if we can save some work */
3569     if (sub_schurs->n_subs == 1 && pcbddc->use_deluxe_scaling) PetscCall(PetscArraycmp(S, St, subset_size * subset_size, &same_data));
3570 
3571     if (same_data && !sub_schurs->change) { /* there's no need of constraints here */
3572       B_neigs = 0;
3573     } else {
3574       PetscBLASInt B_itype = 1, B_IL = 1, B_IU = 0;
3575       PetscReal    eps = -1.0; /* dlamch? */
3576       PetscInt     nmin_s;
3577       PetscBool    compute_range;
3578 
3579       PetscCheck(sub_schurs->is_symmetric, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not yet implemented");
3580       B_neigs       = 0;
3581       compute_range = (PetscBool)!same_data;
3582       if (nmin >= subset_size) compute_range = PETSC_FALSE;
3583 
3584       if (pcbddc->dbg_flag) {
3585         PetscInt nc = 0, c = pcbddc->mat_graph->nodes[idxs[0]].count, w = pcbddc->mat_graph->nodes[idxs[0]].which_dof;
3586 
3587         if (sub_schurs->change_primal_sub) PetscCall(ISGetLocalSize(sub_schurs->change_primal_sub[i], &nc));
3588         PetscCall(
3589           PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Computing for sub %" PetscInt_FMT "/%" PetscInt_FMT " size %" PetscInt_FMT " count %" PetscInt_FMT " fid %" PetscInt_FMT " (range %d) (change %" PetscInt_FMT ").\n", i, sub_schurs->n_subs, subset_size, c, w, compute_range, nc));
3590       }
3591 
3592       PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
3593       if (compute_range) {
3594         /* ask for eigenvalues larger than thresh */
3595         if (sub_schurs->is_posdef) {
3596 #if defined(PETSC_USE_COMPLEX)
3597           PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3598 #else
3599           PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3600 #endif
3601           PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3602         } else { /* no theory so far, but it works nicely */
3603           PetscInt  recipe = 0, recipe_m = 1;
3604           PetscReal bb[2];
3605 
3606           PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)pc)->prefix, "-pc_bddc_adaptive_recipe", &recipe, NULL));
3607           switch (recipe) {
3608           case 0:
3609             if (scal) {
3610               bb[0] = PETSC_MIN_REAL;
3611               bb[1] = lthresh;
3612             } else {
3613               bb[0] = uthresh;
3614               bb[1] = PETSC_MAX_REAL;
3615             }
3616 #if defined(PETSC_USE_COMPLEX)
3617             PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3618 #else
3619             PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3620 #endif
3621             PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3622             break;
3623           case 1:
3624             bb[0] = PETSC_MIN_REAL;
3625             bb[1] = lthresh * lthresh;
3626 #if defined(PETSC_USE_COMPLEX)
3627             PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3628 #else
3629             PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3630 #endif
3631             PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3632             if (!scal) {
3633               PetscBLASInt B_neigs2 = 0;
3634 
3635               bb[0] = PetscMax(lthresh * lthresh, uthresh);
3636               bb[1] = PETSC_MAX_REAL;
3637               PetscCall(PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size));
3638               PetscCall(PetscArraycpy(St, Starray + cumarray, subset_size * subset_size));
3639 #if defined(PETSC_USE_COMPLEX)
3640               PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3641 #else
3642               PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3643 #endif
3644               PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3645               B_neigs += B_neigs2;
3646             }
3647             break;
3648           case 2:
3649             if (scal) {
3650               bb[0] = PETSC_MIN_REAL;
3651               bb[1] = 0;
3652 #if defined(PETSC_USE_COMPLEX)
3653               PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3654 #else
3655               PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3656 #endif
3657               PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3658             } else {
3659               PetscBLASInt B_neigs2 = 0;
3660               PetscBool    do_copy  = PETSC_FALSE;
3661 
3662               lthresh = PetscMax(lthresh, 0.0);
3663               if (lthresh > 0.0) {
3664                 bb[0] = PETSC_MIN_REAL;
3665                 bb[1] = lthresh * lthresh;
3666 
3667                 do_copy = PETSC_TRUE;
3668 #if defined(PETSC_USE_COMPLEX)
3669                 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3670 #else
3671                 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3672 #endif
3673                 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3674               }
3675               bb[0] = PetscMax(lthresh * lthresh, uthresh);
3676               bb[1] = PETSC_MAX_REAL;
3677               if (do_copy) {
3678                 PetscCall(PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size));
3679                 PetscCall(PetscArraycpy(St, Starray + cumarray, subset_size * subset_size));
3680               }
3681 #if defined(PETSC_USE_COMPLEX)
3682               PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3683 #else
3684               PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3685 #endif
3686               PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3687               B_neigs += B_neigs2;
3688             }
3689             break;
3690           case 3:
3691             if (scal) {
3692               PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)pc)->prefix, "-pc_bddc_adaptive_recipe3_min_scal", &recipe_m, NULL));
3693             } else {
3694               PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)pc)->prefix, "-pc_bddc_adaptive_recipe3_min", &recipe_m, NULL));
3695             }
3696             if (!scal) {
3697               bb[0] = uthresh;
3698               bb[1] = PETSC_MAX_REAL;
3699 #if defined(PETSC_USE_COMPLEX)
3700               PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3701 #else
3702               PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3703 #endif
3704               PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3705             }
3706             if (recipe_m > 0 && B_N - B_neigs > 0) {
3707               PetscBLASInt B_neigs2 = 0;
3708 
3709               PetscCall(PetscBLASIntCast(PetscMin(recipe_m, B_N - B_neigs), &B_IU));
3710               PetscCall(PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size));
3711               PetscCall(PetscArraycpy(St, Starray + cumarray, subset_size * subset_size));
3712 #if defined(PETSC_USE_COMPLEX)
3713               PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3714 #else
3715               PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3716 #endif
3717               PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3718               B_neigs += B_neigs2;
3719             }
3720             break;
3721           case 4:
3722             bb[0] = PETSC_MIN_REAL;
3723             bb[1] = lthresh;
3724 #if defined(PETSC_USE_COMPLEX)
3725             PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3726 #else
3727             PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3728 #endif
3729             PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3730             {
3731               PetscBLASInt B_neigs2 = 0;
3732 
3733               bb[0] = PetscMax(lthresh + PETSC_SMALL, uthresh);
3734               bb[1] = PETSC_MAX_REAL;
3735               PetscCall(PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size));
3736               PetscCall(PetscArraycpy(St, Starray + cumarray, subset_size * subset_size));
3737 #if defined(PETSC_USE_COMPLEX)
3738               PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3739 #else
3740               PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3741 #endif
3742               PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3743               B_neigs += B_neigs2;
3744             }
3745             break;
3746           case 5: /* same as before: first compute all eigenvalues, then filter */
3747 #if defined(PETSC_USE_COMPLEX)
3748             PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "A", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3749 #else
3750             PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "A", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3751 #endif
3752             PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3753             {
3754               PetscInt e, k, ne;
3755               for (e = 0, ne = 0; e < B_neigs; e++) {
3756                 if (eigs[e] < lthresh || eigs[e] > uthresh) {
3757                   for (k = 0; k < B_N; k++) S[ne * B_N + k] = eigv[e * B_N + k];
3758                   eigs[ne] = eigs[e];
3759                   ne++;
3760                 }
3761               }
3762               PetscCall(PetscArraycpy(eigv, S, B_N * ne));
3763               PetscCall(PetscBLASIntCast(ne, &B_neigs));
3764             }
3765             break;
3766           default:
3767             SETERRQ(PetscObjectComm((PetscObject)pc), PETSC_ERR_SUP, "Unknown recipe %" PetscInt_FMT, recipe);
3768           }
3769         }
3770       } else if (!same_data) { /* this is just to see all the eigenvalues */
3771         PetscCall(PetscBLASIntCast(PetscMax(1, PetscMin(B_N, nmax)), &B_IU));
3772 #if defined(PETSC_USE_COMPLEX)
3773         PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3774 #else
3775         PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3776 #endif
3777         PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3778       } else { /* same_data is true, so just get the adaptive functional requested by the user */
3779         PetscInt k;
3780         PetscCheck(sub_schurs->change_primal_sub, PETSC_COMM_SELF, PETSC_ERR_PLIB, "This should not happen");
3781         PetscCall(ISGetLocalSize(sub_schurs->change_primal_sub[i], &nmax));
3782         PetscCall(PetscBLASIntCast(nmax, &B_neigs));
3783         nmin = nmax;
3784         PetscCall(PetscArrayzero(eigv, subset_size * nmax));
3785         for (k = 0; k < nmax; k++) {
3786           eigs[k]                     = 1. / PETSC_SMALL;
3787           eigv[k * (subset_size + 1)] = 1.0;
3788         }
3789       }
3790       PetscCall(PetscFPTrapPop());
3791       if (B_ierr) {
3792         PetscCheck(B_ierr >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYGVX Lapack routine: illegal value for argument %" PetscBLASInt_FMT, -B_ierr);
3793         PetscCheck(B_ierr > B_N, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYGVX Lapack routine: %" PetscBLASInt_FMT " eigenvalues failed to converge", B_ierr);
3794         SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYGVX Lapack routine: leading minor of order %" PetscBLASInt_FMT " is not positive definite", B_ierr - B_N - 1);
3795       }
3796 
3797       if (B_neigs > nmax) {
3798         if (pcbddc->dbg_flag) PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "   found %" PetscBLASInt_FMT " eigs, more than maximum required %" PetscInt_FMT ".\n", B_neigs, nmax));
3799         if (upart) eigs_start = scal ? 0 : B_neigs - nmax;
3800         PetscCall(PetscBLASIntCast(nmax, &B_neigs));
3801       }
3802 
3803       nmin_s = PetscMin(nmin, B_N);
3804       if (B_neigs < nmin_s) {
3805         PetscBLASInt B_neigs2 = 0;
3806 
3807         if (upart) {
3808           if (scal) {
3809             PetscCall(PetscBLASIntCast(nmin_s, &B_IU));
3810             B_IL = B_neigs + 1;
3811           } else {
3812             PetscCall(PetscBLASIntCast(B_N - nmin_s + 1, &B_IL));
3813             B_IU = B_N - B_neigs;
3814           }
3815         } else {
3816           B_IL = B_neigs + 1;
3817           PetscCall(PetscBLASIntCast(nmin_s, &B_IU));
3818         }
3819         if (pcbddc->dbg_flag) {
3820           PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "   found %" PetscBLASInt_FMT " eigs, less than minimum required %" PetscInt_FMT ". Asking for %" PetscBLASInt_FMT " to %" PetscBLASInt_FMT " incl (fortran like)\n", B_neigs, nmin, B_IL, B_IU));
3821         }
3822         if (sub_schurs->is_symmetric) {
3823           PetscInt j, k;
3824           for (j = 0; j < subset_size; j++) {
3825             for (k = j; k < subset_size; k++) {
3826               S[j * subset_size + k]  = Sarray[cumarray + j * subset_size + k];
3827               St[j * subset_size + k] = Starray[cumarray + j * subset_size + k];
3828             }
3829           }
3830         } else {
3831           PetscCall(PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size));
3832           PetscCall(PetscArraycpy(St, Starray + cumarray, subset_size * subset_size));
3833         }
3834         PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
3835 #if defined(PETSC_USE_COMPLEX)
3836         PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * subset_size, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3837 #else
3838         PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * subset_size, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3839 #endif
3840         PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3841         PetscCall(PetscFPTrapPop());
3842         B_neigs += B_neigs2;
3843       }
3844       if (B_ierr) {
3845         PetscCheck(B_ierr >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYGVX Lapack routine: illegal value for argument %" PetscBLASInt_FMT, -B_ierr);
3846         PetscCheck(B_ierr > B_N, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYGVX Lapack routine: %" PetscBLASInt_FMT " eigenvalues failed to converge", B_ierr);
3847         SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYGVX Lapack routine: leading minor of order %" PetscBLASInt_FMT " is not positive definite", B_ierr - B_N - 1);
3848       }
3849       if (pcbddc->dbg_flag) {
3850         PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "   -> Got %" PetscBLASInt_FMT " eigs\n", B_neigs));
3851         for (j = 0; j < B_neigs; j++) {
3852           if (!sub_schurs->gdsw) {
3853             if (eigs[j] == 0.0) {
3854               PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "     Inf\n"));
3855             } else {
3856               if (upart) {
3857                 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "     %1.6e\n", (double)eigs[j + eigs_start]));
3858               } else {
3859                 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "     %1.6e\n", (double)(1 / eigs[j + eigs_start])));
3860               }
3861             }
3862           } else {
3863             double pg = (double)eigs[j + eigs_start];
3864             if (pg < 2 * PETSC_SMALL) pg = 0.0;
3865             PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "     %1.6e\n", pg));
3866           }
3867         }
3868       }
3869     }
3870     /* change the basis back to the original one */
3871     if (sub_schurs->change) {
3872       Mat change, phi, phit;
3873 
3874       if (pcbddc->dbg_flag > 2) {
3875         PetscInt ii;
3876         for (ii = 0; ii < B_neigs; ii++) {
3877           PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "   -> Eigenvector (old basis) %" PetscInt_FMT "/%" PetscBLASInt_FMT " (%" PetscBLASInt_FMT ")\n", ii, B_neigs, B_N));
3878           for (j = 0; j < B_N; j++) {
3879 #if defined(PETSC_USE_COMPLEX)
3880             PetscReal r = PetscRealPart(eigv[(ii + eigs_start) * subset_size + j]);
3881             PetscReal c = PetscImaginaryPart(eigv[(ii + eigs_start) * subset_size + j]);
3882             PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "       %1.4e + %1.4e i\n", (double)r, (double)c));
3883 #else
3884             PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "       %1.4e\n", (double)(eigv[(ii + eigs_start) * subset_size + j])));
3885 #endif
3886           }
3887         }
3888       }
3889       PetscCall(KSPGetOperators(sub_schurs->change[i], &change, NULL));
3890       PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, B_neigs, eigv + eigs_start * subset_size, &phit));
3891       PetscCall(MatMatMult(change, phit, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &phi));
3892       PetscCall(MatCopy(phi, phit, SAME_NONZERO_PATTERN));
3893       PetscCall(MatDestroy(&phit));
3894       PetscCall(MatDestroy(&phi));
3895     }
3896     maxneigs                               = PetscMax(B_neigs, maxneigs);
3897     pcbddc->adaptive_constraints_n[i + nv] = B_neigs;
3898     if (B_neigs) {
3899       PetscCall(PetscArraycpy(pcbddc->adaptive_constraints_data + pcbddc->adaptive_constraints_data_ptr[cum], eigv + eigs_start * subset_size, B_neigs * subset_size));
3900 
3901       if (pcbddc->dbg_flag > 1) {
3902         PetscInt ii;
3903         for (ii = 0; ii < B_neigs; ii++) {
3904           PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "   -> Eigenvector %" PetscInt_FMT "/%" PetscBLASInt_FMT " (%" PetscBLASInt_FMT ")\n", ii, B_neigs, B_N));
3905           for (j = 0; j < B_N; j++) {
3906 #if defined(PETSC_USE_COMPLEX)
3907             PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii * subset_size + j + pcbddc->adaptive_constraints_data_ptr[cum]]);
3908             PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii * subset_size + j + pcbddc->adaptive_constraints_data_ptr[cum]]);
3909             PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "       %1.4e + %1.4e i\n", (double)r, (double)c));
3910 #else
3911             PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "       %1.4e\n", (double)PetscRealPart(pcbddc->adaptive_constraints_data[ii * subset_size + j + pcbddc->adaptive_constraints_data_ptr[cum]])));
3912 #endif
3913           }
3914         }
3915       }
3916       PetscCall(PetscArraycpy(pcbddc->adaptive_constraints_idxs + pcbddc->adaptive_constraints_idxs_ptr[cum], idxs, subset_size));
3917       pcbddc->adaptive_constraints_idxs_ptr[cum + 1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size;
3918       pcbddc->adaptive_constraints_data_ptr[cum + 1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size * B_neigs;
3919       cum++;
3920     }
3921     PetscCall(ISRestoreIndices(sub_schurs->is_subs[i], &idxs));
3922     /* shift for next computation */
3923     cumarray += subset_size * subset_size;
3924   }
3925   if (pcbddc->dbg_flag) PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
3926 
3927   if (mss) {
3928     if (sub_schurs->gdsw) {
3929       PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_all, &Sarray));
3930       PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all, &Starray));
3931     } else {
3932       PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all, &Sarray));
3933       PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all, &Starray));
3934       /* destroy matrices (junk) */
3935       PetscCall(MatDestroy(&sub_schurs->sum_S_Ej_inv_all));
3936       PetscCall(MatDestroy(&sub_schurs->sum_S_Ej_tilda_all));
3937     }
3938   }
3939   if (allocated_S_St) PetscCall(PetscFree2(S, St));
3940   PetscCall(PetscFree5(eigv, eigs, work, B_iwork, B_ifail));
3941 #if defined(PETSC_USE_COMPLEX)
3942   PetscCall(PetscFree(rwork));
3943 #endif
3944   if (pcbddc->dbg_flag) {
3945     PetscInt maxneigs_r;
3946     PetscCallMPI(MPIU_Allreduce(&maxneigs, &maxneigs_r, 1, MPIU_INT, MPI_MAX, PetscObjectComm((PetscObject)pc)));
3947     PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Maximum number of constraints per cc %" PetscInt_FMT "\n", maxneigs_r));
3948   }
3949   PetscCall(PetscLogEventEnd(PC_BDDC_AdaptiveSetUp[pcbddc->current_level], pc, 0, 0, 0));
3950   PetscFunctionReturn(PETSC_SUCCESS);
3951 }
3952 
PCBDDCSetUpSolvers(PC pc)3953 PetscErrorCode PCBDDCSetUpSolvers(PC pc)
3954 {
3955   Mat coarse_submat;
3956 
3957   PetscFunctionBegin;
3958   /* Setup local scatters R_to_B and (optionally) R_to_D */
3959   /* PCBDDCSetUpLocalWorkVectors should be called first! */
3960   PetscCall(PCBDDCSetUpLocalScatters(pc));
3961 
3962   /* Setup local neumann solver ksp_R */
3963   /* PCBDDCSetUpLocalScatters should be called first! */
3964   PetscCall(PCBDDCSetUpLocalSolvers(pc, PETSC_FALSE, PETSC_TRUE));
3965 
3966   /*
3967      Setup local correction and local part of coarse basis.
3968      Gives back the dense local part of the coarse matrix in column major ordering
3969   */
3970   PetscCall(PCBDDCSetUpCorrection(pc, &coarse_submat));
3971 
3972   /* Compute total number of coarse nodes and setup coarse solver */
3973   PetscCall(PCBDDCSetUpCoarseSolver(pc, coarse_submat));
3974   PetscCall(MatDestroy(&coarse_submat));
3975   PetscFunctionReturn(PETSC_SUCCESS);
3976 }
3977 
PCBDDCResetCustomization(PC pc)3978 PetscErrorCode PCBDDCResetCustomization(PC pc)
3979 {
3980   PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
3981 
3982   PetscFunctionBegin;
3983   PetscCall(ISDestroy(&pcbddc->user_primal_vertices));
3984   PetscCall(ISDestroy(&pcbddc->user_primal_vertices_local));
3985   PetscCall(ISDestroy(&pcbddc->NeumannBoundaries));
3986   PetscCall(ISDestroy(&pcbddc->NeumannBoundariesLocal));
3987   PetscCall(ISDestroy(&pcbddc->DirichletBoundaries));
3988   PetscCall(MatNullSpaceDestroy(&pcbddc->onearnullspace));
3989   PetscCall(PetscFree(pcbddc->onearnullvecs_state));
3990   PetscCall(ISDestroy(&pcbddc->DirichletBoundariesLocal));
3991   PetscCall(PCBDDCSetDofsSplitting(pc, 0, NULL));
3992   PetscCall(PCBDDCSetDofsSplittingLocal(pc, 0, NULL));
3993   PetscFunctionReturn(PETSC_SUCCESS);
3994 }
3995 
PCBDDCResetTopography(PC pc)3996 PetscErrorCode PCBDDCResetTopography(PC pc)
3997 {
3998   PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
3999   PetscInt i;
4000 
4001   PetscFunctionBegin;
4002   PetscCall(MatDestroy(&pcbddc->nedcG));
4003   PetscCall(ISDestroy(&pcbddc->nedclocal));
4004   PetscCall(MatDestroy(&pcbddc->discretegradient));
4005   PetscCall(MatDestroy(&pcbddc->user_ChangeOfBasisMatrix));
4006   PetscCall(MatDestroy(&pcbddc->ChangeOfBasisMatrix));
4007   PetscCall(MatDestroy(&pcbddc->switch_static_change));
4008   PetscCall(VecDestroy(&pcbddc->work_change));
4009   PetscCall(MatDestroy(&pcbddc->ConstraintMatrix));
4010   PetscCall(MatDestroy(&pcbddc->divudotp));
4011   PetscCall(ISDestroy(&pcbddc->divudotp_vl2l));
4012   PetscCall(PCBDDCGraphDestroy(&pcbddc->mat_graph));
4013   for (i = 0; i < pcbddc->n_local_subs; i++) PetscCall(ISDestroy(&pcbddc->local_subs[i]));
4014   pcbddc->n_local_subs = 0;
4015   PetscCall(PetscFree(pcbddc->local_subs));
4016   PetscCall(PCBDDCSubSchursDestroy(&pcbddc->sub_schurs));
4017   pcbddc->graphanalyzed        = PETSC_FALSE;
4018   pcbddc->recompute_topography = PETSC_TRUE;
4019   pcbddc->corner_selected      = PETSC_FALSE;
4020   PetscFunctionReturn(PETSC_SUCCESS);
4021 }
4022 
PCBDDCResetSolvers(PC pc)4023 PetscErrorCode PCBDDCResetSolvers(PC pc)
4024 {
4025   PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
4026 
4027   PetscFunctionBegin;
4028   PetscCall(VecDestroy(&pcbddc->coarse_vec));
4029   PetscCall(MatDestroy(&pcbddc->coarse_phi_B));
4030   PetscCall(MatDestroy(&pcbddc->coarse_phi_D));
4031   PetscCall(MatDestroy(&pcbddc->coarse_psi_B));
4032   PetscCall(MatDestroy(&pcbddc->coarse_psi_D));
4033   PetscCall(VecDestroy(&pcbddc->vec1_P));
4034   PetscCall(VecDestroy(&pcbddc->vec1_C));
4035   PetscCall(MatDestroy(&pcbddc->local_auxmat2));
4036   PetscCall(MatDestroy(&pcbddc->local_auxmat1));
4037   PetscCall(VecDestroy(&pcbddc->vec1_R));
4038   PetscCall(VecDestroy(&pcbddc->vec2_R));
4039   PetscCall(ISDestroy(&pcbddc->is_R_local));
4040   PetscCall(VecScatterDestroy(&pcbddc->R_to_B));
4041   PetscCall(VecScatterDestroy(&pcbddc->R_to_D));
4042   PetscCall(VecScatterDestroy(&pcbddc->coarse_loc_to_glob));
4043   PetscCall(KSPReset(pcbddc->ksp_D));
4044   PetscCall(KSPReset(pcbddc->ksp_R));
4045   PetscCall(KSPReset(pcbddc->coarse_ksp));
4046   PetscCall(MatDestroy(&pcbddc->local_mat));
4047   PetscCall(PetscFree(pcbddc->primal_indices_local_idxs));
4048   PetscCall(PetscFree2(pcbddc->local_primal_ref_node, pcbddc->local_primal_ref_mult));
4049   PetscCall(PetscFree(pcbddc->global_primal_indices));
4050   PetscCall(ISDestroy(&pcbddc->coarse_subassembling));
4051   PetscCall(MatDestroy(&pcbddc->benign_change));
4052   PetscCall(VecDestroy(&pcbddc->benign_vec));
4053   PetscCall(PCBDDCBenignShellMat(pc, PETSC_TRUE));
4054   PetscCall(MatDestroy(&pcbddc->benign_B0));
4055   PetscCall(PetscSFDestroy(&pcbddc->benign_sf));
4056   if (pcbddc->benign_zerodiag_subs) {
4057     PetscInt i;
4058     for (i = 0; i < pcbddc->benign_n; i++) PetscCall(ISDestroy(&pcbddc->benign_zerodiag_subs[i]));
4059     PetscCall(PetscFree(pcbddc->benign_zerodiag_subs));
4060   }
4061   PetscCall(PetscFree3(pcbddc->benign_p0_lidx, pcbddc->benign_p0_gidx, pcbddc->benign_p0));
4062   PetscFunctionReturn(PETSC_SUCCESS);
4063 }
4064 
PCBDDCSetUpLocalWorkVectors(PC pc)4065 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc)
4066 {
4067   PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
4068   PC_IS   *pcis   = (PC_IS *)pc->data;
4069   VecType  impVecType;
4070   PetscInt n_constraints, n_R, old_size;
4071 
4072   PetscFunctionBegin;
4073   n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices;
4074   n_R           = pcis->n - pcbddc->n_vertices;
4075   PetscCall(VecGetType(pcis->vec1_N, &impVecType));
4076   /* local work vectors (try to avoid unneeded work)*/
4077   /* R nodes */
4078   old_size = -1;
4079   if (pcbddc->vec1_R) PetscCall(VecGetSize(pcbddc->vec1_R, &old_size));
4080   if (n_R != old_size) {
4081     PetscCall(VecDestroy(&pcbddc->vec1_R));
4082     PetscCall(VecDestroy(&pcbddc->vec2_R));
4083     PetscCall(VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N), &pcbddc->vec1_R));
4084     PetscCall(VecSetSizes(pcbddc->vec1_R, PETSC_DECIDE, n_R));
4085     PetscCall(VecSetType(pcbddc->vec1_R, impVecType));
4086     PetscCall(VecDuplicate(pcbddc->vec1_R, &pcbddc->vec2_R));
4087   }
4088   /* local primal dofs */
4089   old_size = -1;
4090   if (pcbddc->vec1_P) PetscCall(VecGetSize(pcbddc->vec1_P, &old_size));
4091   if (pcbddc->local_primal_size != old_size) {
4092     PetscCall(VecDestroy(&pcbddc->vec1_P));
4093     PetscCall(VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N), &pcbddc->vec1_P));
4094     PetscCall(VecSetSizes(pcbddc->vec1_P, PETSC_DECIDE, pcbddc->local_primal_size));
4095     PetscCall(VecSetType(pcbddc->vec1_P, impVecType));
4096   }
4097   /* local explicit constraints */
4098   old_size = -1;
4099   if (pcbddc->vec1_C) PetscCall(VecGetSize(pcbddc->vec1_C, &old_size));
4100   if (n_constraints && n_constraints != old_size) {
4101     PetscCall(VecDestroy(&pcbddc->vec1_C));
4102     PetscCall(VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N), &pcbddc->vec1_C));
4103     PetscCall(VecSetSizes(pcbddc->vec1_C, PETSC_DECIDE, n_constraints));
4104     PetscCall(VecSetType(pcbddc->vec1_C, impVecType));
4105   }
4106   PetscFunctionReturn(PETSC_SUCCESS);
4107 }
4108 
MatSetValuesSubMat(Mat A,Mat S,PetscInt nr,const PetscInt rows[],PetscInt nc,const PetscInt cols[],InsertMode imode)4109 static PetscErrorCode MatSetValuesSubMat(Mat A, Mat S, PetscInt nr, const PetscInt rows[], PetscInt nc, const PetscInt cols[], InsertMode imode)
4110 {
4111   PetscBool          flg;
4112   const PetscScalar *a;
4113 
4114   PetscFunctionBegin;
4115   PetscCall(PetscObjectBaseTypeCompare((PetscObject)S, MATSEQDENSE, &flg));
4116   if (flg) {
4117     PetscCall(MatDenseGetArrayRead(S, &a));
4118     PetscCall(MatSetOption(A, MAT_ROW_ORIENTED, PETSC_FALSE));
4119     PetscCall(MatSetValues(A, nr, rows, nc, cols, a, imode));
4120     PetscCall(MatSetOption(A, MAT_ROW_ORIENTED, PETSC_TRUE));
4121     PetscCall(MatDenseRestoreArrayRead(S, &a));
4122   } else {
4123     const PetscInt *ii, *jj;
4124     PetscInt        n;
4125     PetscInt        buf[8192], *bufc = NULL;
4126     PetscBool       freeb = PETSC_FALSE;
4127     Mat             Sm    = S;
4128 
4129     PetscCall(PetscObjectBaseTypeCompare((PetscObject)S, MATSEQAIJ, &flg));
4130     if (!flg) PetscCall(MatConvert(S, MATSEQAIJ, MAT_INITIAL_MATRIX, &Sm));
4131     else PetscCall(PetscObjectReference((PetscObject)S));
4132     PetscCall(MatSeqAIJGetArrayRead(Sm, &a));
4133     PetscCall(MatGetRowIJ(Sm, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, &jj, &flg));
4134     PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot get IJ structure");
4135     if (nc <= (PetscInt)PETSC_STATIC_ARRAY_LENGTH(buf)) {
4136       bufc = buf;
4137     } else {
4138       PetscCall(PetscMalloc1(nc, &bufc));
4139       freeb = PETSC_TRUE;
4140     }
4141 
4142     for (PetscInt i = 0; i < n; i++) {
4143       const PetscInt nci = ii[i + 1] - ii[i];
4144 
4145       for (PetscInt j = 0; j < nci; j++) bufc[j] = cols[jj[ii[i] + j]];
4146       PetscCall(MatSetValues(A, 1, rows + i, nci, bufc, a + ii[i], imode));
4147     }
4148     PetscCall(MatRestoreRowIJ(Sm, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, &jj, &flg));
4149     PetscCall(MatSeqAIJRestoreArrayRead(Sm, &a));
4150     PetscCall(MatDestroy(&Sm));
4151     if (freeb) PetscCall(PetscFree(bufc));
4152   }
4153   PetscCall(MatAssemblyBegin(A, MAT_FLUSH_ASSEMBLY));
4154   PetscCall(MatAssemblyEnd(A, MAT_FLUSH_ASSEMBLY));
4155   PetscFunctionReturn(PETSC_SUCCESS);
4156 }
4157 
MatCreateSeqAIJFromDenseExpand(Mat D,PetscInt n,const PetscInt j[],Mat * mat)4158 static PetscErrorCode MatCreateSeqAIJFromDenseExpand(Mat D, PetscInt n, const PetscInt j[], Mat *mat)
4159 {
4160   Mat_SeqAIJ        *aij;
4161   PetscInt          *ii, *jj;
4162   PetscScalar       *aa;
4163   PetscInt           nnz = 0, m, nc;
4164   const PetscScalar *a;
4165   const PetscScalar  zero = 0.0;
4166 
4167   PetscFunctionBegin;
4168   PetscCall(MatGetLocalSize(D, &m, &nc));
4169   PetscCall(MatDenseGetArrayRead(D, &a));
4170   PetscCall(PetscMalloc1(m + 1, &ii));
4171   PetscCall(PetscMalloc1(m * nc, &jj));
4172   PetscCall(PetscMalloc1(m * nc, &aa));
4173   ii[0] = 0;
4174   for (PetscInt k = 0; k < m; k++) {
4175     for (PetscInt s = 0; s < nc; s++) {
4176       const PetscInt    c = s + k * nc;
4177       const PetscScalar v = a[k + s * m];
4178 
4179       if (PetscUnlikely(j[c] < 0 || v == zero)) continue;
4180       jj[nnz] = j[c];
4181       aa[nnz] = a[k + s * m];
4182       nnz++;
4183     }
4184     ii[k + 1] = nnz;
4185   }
4186 
4187   PetscCall(MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)D), m, n, ii, jj, aa, mat));
4188   PetscCall(MatDenseRestoreArrayRead(D, &a));
4189 
4190   aij          = (Mat_SeqAIJ *)(*mat)->data;
4191   aij->free_a  = PETSC_TRUE;
4192   aij->free_ij = PETSC_TRUE;
4193   PetscFunctionReturn(PETSC_SUCCESS);
4194 }
4195 
4196 /* adapted from MatInvertVariableBlockDiagonal_SeqAIJ */
MatSeqAIJInvertVariableBlockDiagonalMat(Mat A,PetscInt nblocks,const PetscInt * bsizes,Mat * B)4197 static PetscErrorCode MatSeqAIJInvertVariableBlockDiagonalMat(Mat A, PetscInt nblocks, const PetscInt *bsizes, Mat *B)
4198 {
4199   PetscInt        n = A->rmap->n, ncnt = 0, ncnt2 = 0, bsizemax = 0, *v_pivots = NULL;
4200   const PetscBool allowzeropivot    = PETSC_FALSE;
4201   PetscBool       zeropivotdetected = PETSC_FALSE;
4202   const PetscReal shift             = 0.0;
4203   PetscInt        ipvt[5], *ii, *jj, *indi, *indj;
4204   PetscScalar     work[25], *v_work = NULL, *aa, *diag;
4205   PetscLogDouble  flops = 0.0;
4206 
4207   PetscFunctionBegin;
4208   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Not for rectangular matrices");
4209   for (PetscInt i = 0; i < nblocks; i++) {
4210     ncnt += bsizes[i];
4211     ncnt2 += PetscSqr(bsizes[i]);
4212   }
4213   PetscCheck(ncnt == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Total blocksizes %" PetscInt_FMT " doesn't match number matrix rows %" PetscInt_FMT, ncnt, n);
4214   for (PetscInt i = 0; i < nblocks; i++) bsizemax = PetscMax(bsizemax, bsizes[i]);
4215   if (bsizemax > 7) PetscCall(PetscMalloc2(bsizemax, &v_work, bsizemax, &v_pivots));
4216 
4217   PetscCall(PetscMalloc1(n + 1, &ii));
4218   PetscCall(PetscMalloc1(ncnt2, &jj));
4219   PetscCall(PetscCalloc1(ncnt2, &aa));
4220 
4221   ncnt  = 0;
4222   ii[0] = 0;
4223   indi  = ii;
4224   indj  = jj;
4225   diag  = aa;
4226   for (PetscInt i = 0; i < nblocks; i++) {
4227     const PetscInt bs = bsizes[i];
4228 
4229     for (PetscInt k = 0; k < bs; k++) {
4230       indi[k + 1] = indi[k] + bs;
4231       for (PetscInt j = 0; j < bs; j++) indj[k * bs + j] = ncnt + j;
4232     }
4233     PetscCall(MatGetValues(A, bs, indj, bs, indj, diag));
4234     switch (bs) {
4235     case 1:
4236       *diag = 1.0 / (*diag);
4237       break;
4238     case 2:
4239       PetscCall(PetscKernel_A_gets_inverse_A_2(diag, shift, allowzeropivot, &zeropivotdetected));
4240       break;
4241     case 3:
4242       PetscCall(PetscKernel_A_gets_inverse_A_3(diag, shift, allowzeropivot, &zeropivotdetected));
4243       break;
4244     case 4:
4245       PetscCall(PetscKernel_A_gets_inverse_A_4(diag, shift, allowzeropivot, &zeropivotdetected));
4246       break;
4247     case 5:
4248       PetscCall(PetscKernel_A_gets_inverse_A_5(diag, ipvt, work, shift, allowzeropivot, &zeropivotdetected));
4249       break;
4250     case 6:
4251       PetscCall(PetscKernel_A_gets_inverse_A_6(diag, shift, allowzeropivot, &zeropivotdetected));
4252       break;
4253     case 7:
4254       PetscCall(PetscKernel_A_gets_inverse_A_7(diag, shift, allowzeropivot, &zeropivotdetected));
4255       break;
4256     default:
4257       PetscCall(PetscKernel_A_gets_inverse_A(bs, diag, v_pivots, v_work, allowzeropivot, &zeropivotdetected));
4258     }
4259     ncnt += bs;
4260     flops += 2.0 * PetscPowInt(bs, 3) / 3.0;
4261     diag += bs * bs;
4262     indj += bs * bs;
4263     indi += bs;
4264   }
4265   PetscCall(PetscLogFlops(flops));
4266   PetscCall(PetscFree2(v_work, v_pivots));
4267   PetscCall(MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A), n, n, ii, jj, aa, B));
4268   {
4269     Mat_SeqAIJ *aij = (Mat_SeqAIJ *)(*B)->data;
4270     aij->free_a     = PETSC_TRUE;
4271     aij->free_ij    = PETSC_TRUE;
4272   }
4273   PetscFunctionReturn(PETSC_SUCCESS);
4274 }
4275 
PCBDDCSetUpCorrection(PC pc,Mat * coarse_submat)4276 PetscErrorCode PCBDDCSetUpCorrection(PC pc, Mat *coarse_submat)
4277 {
4278   PC_IS          *pcis       = (PC_IS *)pc->data;
4279   PC_BDDC        *pcbddc     = (PC_BDDC *)pc->data;
4280   PCBDDCGraph     graph      = pcbddc->mat_graph;
4281   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
4282   /* submatrices of local problem */
4283   Mat A_RV = NULL, A_VR, A_VV, local_auxmat2_R = NULL;
4284   /* submatrices of local coarse problem */
4285   Mat S_CV = NULL, S_VC = NULL, S_CC = NULL;
4286   /* working matrices */
4287   Mat C_CR;
4288 
4289   /* additional working stuff */
4290   PC              pc_R;
4291   IS              is_R, is_V, is_C;
4292   const PetscInt *idx_V, *idx_C;
4293   Mat             F, Brhs = NULL;
4294   Vec             dummy_vec;
4295   PetscBool       isPreonly, isLU, isCHOL, need_benign_correction, sparserhs;
4296   PetscInt       *idx_V_B;
4297   PetscInt        lda_rhs, n_vertices, n_constraints, *p0_lidx_I;
4298   PetscInt        n_eff_vertices, n_eff_constraints;
4299   PetscInt        i, n_R, n_D, n_B;
4300   PetscScalar     one = 1.0, m_one = -1.0;
4301 
4302   /* Multi-element support */
4303   PetscBool multi_element = graph->multi_element;
4304   PetscInt *V_to_eff_V = NULL, *C_to_eff_C = NULL;
4305   PetscInt *B_eff_V_J = NULL, *R_eff_V_J = NULL, *B_eff_C_J = NULL, *R_eff_C_J = NULL;
4306   IS        is_C_perm = NULL;
4307   PetscInt  n_C_bss = 0, *C_bss = NULL;
4308   Mat       coarse_phi_multi;
4309 
4310   PetscFunctionBegin;
4311   PetscCheck(pcbddc->symmetric_primal || !pcbddc->benign_n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Non-symmetric primal basis computation with benign trick not yet implemented");
4312   PetscCall(PetscLogEventBegin(PC_BDDC_CorrectionSetUp[pcbddc->current_level], pc, 0, 0, 0));
4313 
4314   /* Set Non-overlapping dimensions */
4315   n_vertices    = pcbddc->n_vertices;
4316   n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices;
4317   n_B           = pcis->n_B;
4318   n_D           = pcis->n - n_B;
4319   n_R           = pcis->n - n_vertices;
4320 
4321   /* vertices in boundary numbering */
4322   PetscCall(PetscMalloc1(n_vertices, &idx_V_B));
4323   PetscCall(ISGlobalToLocalMappingApply(pcis->BtoNmap, IS_GTOLM_DROP, n_vertices, pcbddc->local_primal_ref_node, &i, idx_V_B));
4324   PetscCheck(i == n_vertices, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error in boundary numbering for BDDC vertices! %" PetscInt_FMT " != %" PetscInt_FMT, n_vertices, i);
4325 
4326   /* these two cases still need to be optimized */
4327   if (pcbddc->benign_saddle_point || !pcbddc->symmetric_primal) multi_element = PETSC_FALSE;
4328 
4329   /* Subdomain contribution (Non-overlapping) to coarse matrix  */
4330   if (multi_element) {
4331     PetscCheck(!pcbddc->benign_n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not yet implemented");
4332 
4333     PetscCall(MatCreate(PETSC_COMM_SELF, coarse_submat));
4334     PetscCall(MatSetSizes(*coarse_submat, pcbddc->local_primal_size, pcbddc->local_primal_size, pcbddc->local_primal_size, pcbddc->local_primal_size));
4335     PetscCall(MatSetType(*coarse_submat, MATSEQAIJ));
4336     PetscCall(MatSetOption(*coarse_submat, MAT_IGNORE_ZERO_ENTRIES, PETSC_TRUE));
4337     PetscCall(MatSetOption(*coarse_submat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
4338 
4339     /* group vertices and constraints by subdomain id */
4340     const PetscInt *vidxs = pcbddc->primal_indices_local_idxs;
4341     const PetscInt *cidxs = pcbddc->primal_indices_local_idxs + n_vertices;
4342     PetscInt       *count_eff, *V_eff_to_V, *C_eff_to_C, *nnz;
4343     PetscInt        n_el = PetscMax(graph->n_local_subs, 1);
4344 
4345     PetscCall(PetscCalloc1(2 * n_el, &count_eff));
4346     PetscCall(PetscMalloc1(n_vertices, &V_to_eff_V));
4347     PetscCall(PetscMalloc1(n_constraints, &C_to_eff_C));
4348     for (PetscInt i = 0; i < n_vertices; i++) {
4349       PetscInt s = 2 * graph->nodes[vidxs[i]].local_sub;
4350 
4351       V_to_eff_V[i] = count_eff[s];
4352       count_eff[s] += 1;
4353     }
4354     for (PetscInt i = 0; i < n_constraints; i++) {
4355       PetscInt s = 2 * graph->nodes[cidxs[i]].local_sub + 1;
4356 
4357       C_to_eff_C[i] = count_eff[s];
4358       count_eff[s] += 1;
4359     }
4360 
4361     /* preallocation */
4362     PetscCall(PetscMalloc1(n_vertices + n_constraints, &nnz));
4363     for (PetscInt i = 0; i < n_vertices; i++) {
4364       PetscInt s = 2 * graph->nodes[vidxs[i]].local_sub;
4365 
4366       nnz[i] = count_eff[s] + count_eff[s + 1];
4367     }
4368     for (PetscInt i = 0; i < n_constraints; i++) {
4369       PetscInt s = 2 * graph->nodes[cidxs[i]].local_sub;
4370 
4371       nnz[i + n_vertices] = count_eff[s] + count_eff[s + 1];
4372     }
4373     PetscCall(MatSeqAIJSetPreallocation(*coarse_submat, 0, nnz));
4374     PetscCall(PetscFree(nnz));
4375 
4376     n_eff_vertices    = 0;
4377     n_eff_constraints = 0;
4378     for (PetscInt i = 0; i < n_el; i++) {
4379       n_eff_vertices       = PetscMax(n_eff_vertices, count_eff[2 * i]);
4380       n_eff_constraints    = PetscMax(n_eff_constraints, count_eff[2 * i + 1]);
4381       count_eff[2 * i]     = 0;
4382       count_eff[2 * i + 1] = 0;
4383     }
4384 
4385     const PetscInt *idx;
4386     PetscCall(PetscMalloc2(n_el * n_eff_vertices, &V_eff_to_V, n_el * n_eff_constraints, &C_eff_to_C));
4387 
4388     for (PetscInt i = 0; i < n_vertices; i++) {
4389       const PetscInt e = graph->nodes[vidxs[i]].local_sub;
4390       const PetscInt s = 2 * e;
4391 
4392       V_eff_to_V[e * n_eff_vertices + count_eff[s]] = i;
4393       count_eff[s] += 1;
4394     }
4395     for (PetscInt i = 0; i < n_constraints; i++) {
4396       const PetscInt e = graph->nodes[cidxs[i]].local_sub;
4397       const PetscInt s = 2 * e + 1;
4398 
4399       C_eff_to_C[e * n_eff_constraints + count_eff[s]] = i;
4400       count_eff[s] += 1;
4401     }
4402 
4403     PetscCall(PetscMalloc1(n_R * n_eff_vertices, &R_eff_V_J));
4404     PetscCall(PetscMalloc1(n_R * n_eff_constraints, &R_eff_C_J));
4405     PetscCall(PetscMalloc1(n_B * n_eff_vertices, &B_eff_V_J));
4406     PetscCall(PetscMalloc1(n_B * n_eff_constraints, &B_eff_C_J));
4407     for (PetscInt i = 0; i < n_R * n_eff_vertices; i++) R_eff_V_J[i] = -1;
4408     for (PetscInt i = 0; i < n_R * n_eff_constraints; i++) R_eff_C_J[i] = -1;
4409     for (PetscInt i = 0; i < n_B * n_eff_vertices; i++) B_eff_V_J[i] = -1;
4410     for (PetscInt i = 0; i < n_B * n_eff_constraints; i++) B_eff_C_J[i] = -1;
4411 
4412     PetscCall(ISGetIndices(pcbddc->is_R_local, &idx));
4413     for (PetscInt i = 0; i < n_R; i++) {
4414       const PetscInt e = graph->nodes[idx[i]].local_sub;
4415       const PetscInt s = 2 * e;
4416       PetscInt       j;
4417 
4418       for (j = 0; j < count_eff[s]; j++) R_eff_V_J[i * n_eff_vertices + j] = V_eff_to_V[e * n_eff_vertices + j];
4419       for (j = 0; j < count_eff[s + 1]; j++) R_eff_C_J[i * n_eff_constraints + j] = C_eff_to_C[e * n_eff_constraints + j];
4420     }
4421     PetscCall(ISRestoreIndices(pcbddc->is_R_local, &idx));
4422     PetscCall(ISGetIndices(pcis->is_B_local, &idx));
4423     for (PetscInt i = 0; i < n_B; i++) {
4424       const PetscInt e = graph->nodes[idx[i]].local_sub;
4425       const PetscInt s = 2 * e;
4426       PetscInt       j;
4427 
4428       for (j = 0; j < count_eff[s]; j++) B_eff_V_J[i * n_eff_vertices + j] = V_eff_to_V[e * n_eff_vertices + j];
4429       for (j = 0; j < count_eff[s + 1]; j++) B_eff_C_J[i * n_eff_constraints + j] = C_eff_to_C[e * n_eff_constraints + j];
4430     }
4431     PetscCall(ISRestoreIndices(pcis->is_B_local, &idx));
4432 
4433     /* permutation and blocksizes for block invert of S_CC */
4434     PetscInt *idxp;
4435 
4436     PetscCall(PetscMalloc1(n_constraints, &idxp));
4437     PetscCall(PetscMalloc1(n_el, &C_bss));
4438     n_C_bss = 0;
4439     for (PetscInt e = 0, cnt = 0; e < n_el; e++) {
4440       const PetscInt nc = count_eff[2 * e + 1];
4441 
4442       if (nc) C_bss[n_C_bss++] = nc;
4443       for (PetscInt c = 0; c < nc; c++) idxp[cnt + c] = C_eff_to_C[e * n_eff_constraints + c];
4444       cnt += nc;
4445     }
4446 
4447     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n_constraints, idxp, PETSC_OWN_POINTER, &is_C_perm));
4448 
4449     PetscCall(PetscFree2(V_eff_to_V, C_eff_to_C));
4450     PetscCall(PetscFree(count_eff));
4451   } else {
4452     PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, pcbddc->local_primal_size, pcbddc->local_primal_size, NULL, coarse_submat));
4453     n_eff_constraints = n_constraints;
4454     n_eff_vertices    = n_vertices;
4455   }
4456 
4457   /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */
4458   PetscCall(KSPGetPC(pcbddc->ksp_R, &pc_R));
4459   PetscCall(PCSetUp(pc_R));
4460   PetscCall(PetscObjectTypeCompare((PetscObject)pcbddc->ksp_R, KSPPREONLY, &isPreonly));
4461   PetscCall(PetscObjectTypeCompare((PetscObject)pc_R, PCLU, &isLU));
4462   PetscCall(PetscObjectTypeCompare((PetscObject)pc_R, PCCHOLESKY, &isCHOL));
4463   lda_rhs                = n_R;
4464   need_benign_correction = PETSC_FALSE;
4465   F                      = NULL;
4466   if (isPreonly && (isLU || isCHOL)) {
4467     PetscCall(PCFactorGetMatrix(pc_R, &F));
4468   } else if (sub_schurs && sub_schurs->reuse_solver) {
4469     PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
4470     MatFactorType      type;
4471 
4472     F = reuse_solver->F;
4473     PetscCall(MatGetFactorType(F, &type));
4474     if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE;
4475     if (type == MAT_FACTOR_LU) isLU = PETSC_TRUE;
4476     PetscCall(MatGetSize(F, &lda_rhs, NULL));
4477     need_benign_correction = (PetscBool)(!!reuse_solver->benign_n);
4478   }
4479 
4480   /* determine if we can use a sparse right-hand side */
4481   sparserhs = PETSC_FALSE;
4482   if (F && !multi_element) {
4483     MatSolverType solver;
4484 
4485     PetscCall(MatFactorGetSolverType(F, &solver));
4486     PetscCall(PetscStrcmp(solver, MATSOLVERMUMPS, &sparserhs));
4487   }
4488 
4489   /* create dummy vector to modify rhs and sol of MatMatSolve (work array will never be used) */
4490   dummy_vec = NULL;
4491   if (need_benign_correction && lda_rhs != n_R && F) {
4492     PetscCall(VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N), &dummy_vec));
4493     PetscCall(VecSetSizes(dummy_vec, lda_rhs, PETSC_DECIDE));
4494     PetscCall(VecSetType(dummy_vec, ((PetscObject)pcis->vec1_N)->type_name));
4495   }
4496 
4497   PetscCall(MatDestroy(&pcbddc->local_auxmat1));
4498   PetscCall(MatDestroy(&pcbddc->local_auxmat2));
4499 
4500   PetscCall(ISCreateStride(PETSC_COMM_SELF, n_R, 0, 1, &is_R));
4501   PetscCall(ISCreateStride(PETSC_COMM_SELF, n_vertices, 0, 1, &is_V));
4502   PetscCall(ISCreateStride(PETSC_COMM_SELF, n_constraints, n_vertices, 1, &is_C));
4503   PetscCall(ISGetIndices(is_V, &idx_V));
4504   PetscCall(ISGetIndices(is_C, &idx_C));
4505 
4506   /* Precompute stuffs needed for preprocessing and application of BDDC*/
4507   if (n_constraints) {
4508     Mat C_B;
4509 
4510     /* Extract constraints on R nodes: C_{CR}  */
4511     PetscCall(MatCreateSubMatrix(pcbddc->ConstraintMatrix, is_C, pcbddc->is_R_local, MAT_INITIAL_MATRIX, &C_CR));
4512     PetscCall(MatCreateSubMatrix(pcbddc->ConstraintMatrix, is_C, pcis->is_B_local, MAT_INITIAL_MATRIX, &C_B));
4513 
4514     /* Assemble         local_auxmat2_R =        (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */
4515     /* Assemble pcbddc->local_auxmat2   = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */
4516     if (!sparserhs) {
4517       PetscScalar *marr;
4518 
4519       PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, lda_rhs, n_eff_constraints, NULL, &Brhs));
4520       PetscCall(MatDenseGetArrayWrite(Brhs, &marr));
4521       for (i = 0; i < n_constraints; i++) {
4522         const PetscScalar *row_cmat_values;
4523         const PetscInt    *row_cmat_indices;
4524         PetscInt           size_of_constraint, j, col = C_to_eff_C ? C_to_eff_C[i] : i;
4525 
4526         PetscCall(MatGetRow(C_CR, i, &size_of_constraint, &row_cmat_indices, &row_cmat_values));
4527         for (j = 0; j < size_of_constraint; j++) marr[row_cmat_indices[j] + col * lda_rhs] = -row_cmat_values[j];
4528         PetscCall(MatRestoreRow(C_CR, i, &size_of_constraint, &row_cmat_indices, &row_cmat_values));
4529       }
4530       PetscCall(MatDenseRestoreArrayWrite(Brhs, &marr));
4531     } else {
4532       Mat tC_CR;
4533 
4534       PetscCall(MatScale(C_CR, -1.0));
4535       if (lda_rhs != n_R) {
4536         PetscScalar *aa;
4537         PetscInt     r, *ii, *jj;
4538         PetscBool    done;
4539 
4540         PetscCall(MatGetRowIJ(C_CR, 0, PETSC_FALSE, PETSC_FALSE, &r, (const PetscInt **)&ii, (const PetscInt **)&jj, &done));
4541         PetscCheck(done, PETSC_COMM_SELF, PETSC_ERR_PLIB, "GetRowIJ failed");
4542         PetscCall(MatSeqAIJGetArray(C_CR, &aa));
4543         PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, n_constraints, lda_rhs, ii, jj, aa, &tC_CR));
4544         PetscCall(MatRestoreRowIJ(C_CR, 0, PETSC_FALSE, PETSC_FALSE, &r, (const PetscInt **)&ii, (const PetscInt **)&jj, &done));
4545         PetscCheck(done, PETSC_COMM_SELF, PETSC_ERR_PLIB, "RestoreRowIJ failed");
4546       } else {
4547         PetscCall(PetscObjectReference((PetscObject)C_CR));
4548         tC_CR = C_CR;
4549       }
4550       PetscCall(MatCreateTranspose(tC_CR, &Brhs));
4551       PetscCall(MatDestroy(&tC_CR));
4552     }
4553     PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, lda_rhs, n_eff_constraints, NULL, &local_auxmat2_R));
4554     if (F) {
4555       if (need_benign_correction) {
4556         PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
4557 
4558         /* rhs is already zero on interior dofs, no need to change the rhs */
4559         PetscCall(PetscArrayzero(reuse_solver->benign_save_vals, pcbddc->benign_n));
4560       }
4561       PetscCall(MatMatSolve(F, Brhs, local_auxmat2_R));
4562       if (need_benign_correction) {
4563         PetscScalar       *marr;
4564         PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
4565 
4566         /* XXX multi_element? */
4567         PetscCall(MatDenseGetArray(local_auxmat2_R, &marr));
4568         if (lda_rhs != n_R) {
4569           for (i = 0; i < n_eff_constraints; i++) {
4570             PetscCall(VecPlaceArray(dummy_vec, marr + i * lda_rhs));
4571             PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver, dummy_vec, NULL, PETSC_TRUE, PETSC_TRUE));
4572             PetscCall(VecResetArray(dummy_vec));
4573           }
4574         } else {
4575           for (i = 0; i < n_eff_constraints; i++) {
4576             PetscCall(VecPlaceArray(pcbddc->vec1_R, marr + i * lda_rhs));
4577             PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver, pcbddc->vec1_R, NULL, PETSC_TRUE, PETSC_TRUE));
4578             PetscCall(VecResetArray(pcbddc->vec1_R));
4579           }
4580         }
4581         PetscCall(MatDenseRestoreArray(local_auxmat2_R, &marr));
4582       }
4583     } else {
4584       const PetscScalar *barr;
4585       PetscScalar       *marr;
4586 
4587       PetscCall(MatDenseGetArrayRead(Brhs, &barr));
4588       PetscCall(MatDenseGetArray(local_auxmat2_R, &marr));
4589       for (i = 0; i < n_eff_constraints; i++) {
4590         PetscCall(VecPlaceArray(pcbddc->vec1_R, barr + i * lda_rhs));
4591         PetscCall(VecPlaceArray(pcbddc->vec2_R, marr + i * lda_rhs));
4592         PetscCall(KSPSolve(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec2_R));
4593         PetscCall(KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec2_R));
4594         PetscCall(VecResetArray(pcbddc->vec1_R));
4595         PetscCall(VecResetArray(pcbddc->vec2_R));
4596       }
4597       PetscCall(MatDenseRestoreArrayRead(Brhs, &barr));
4598       PetscCall(MatDenseRestoreArray(local_auxmat2_R, &marr));
4599     }
4600     if (sparserhs) PetscCall(MatScale(C_CR, -1.0));
4601     PetscCall(MatDestroy(&Brhs));
4602     /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR})^{-1}  */
4603     if (!pcbddc->switch_static) {
4604       PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_B, n_eff_constraints, NULL, &pcbddc->local_auxmat2));
4605       PetscCall(MatDenseScatter_Private(pcbddc->R_to_B, local_auxmat2_R, pcbddc->local_auxmat2, INSERT_VALUES, SCATTER_FORWARD));
4606       if (multi_element) {
4607         Mat T;
4608 
4609         PetscCall(MatCreateSeqAIJFromDenseExpand(local_auxmat2_R, n_constraints, R_eff_C_J, &T));
4610         PetscCall(MatDestroy(&local_auxmat2_R));
4611         local_auxmat2_R = T;
4612         PetscCall(MatCreateSeqAIJFromDenseExpand(pcbddc->local_auxmat2, n_constraints, B_eff_C_J, &T));
4613         PetscCall(MatDestroy(&pcbddc->local_auxmat2));
4614         pcbddc->local_auxmat2 = T;
4615       }
4616       PetscCall(MatMatMult(C_B, pcbddc->local_auxmat2, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &S_CC));
4617     } else {
4618       if (multi_element) {
4619         Mat T;
4620 
4621         PetscCall(MatCreateSeqAIJFromDenseExpand(local_auxmat2_R, n_constraints, R_eff_C_J, &T));
4622         PetscCall(MatDestroy(&local_auxmat2_R));
4623         local_auxmat2_R = T;
4624       }
4625       if (lda_rhs != n_R) {
4626         PetscCall(MatCreateSubMatrix(local_auxmat2_R, is_R, NULL, MAT_INITIAL_MATRIX, &pcbddc->local_auxmat2));
4627       } else {
4628         PetscCall(PetscObjectReference((PetscObject)local_auxmat2_R));
4629         pcbddc->local_auxmat2 = local_auxmat2_R;
4630       }
4631       PetscCall(MatMatMult(C_CR, pcbddc->local_auxmat2, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &S_CC));
4632     }
4633     PetscCall(MatScale(S_CC, m_one));
4634     if (multi_element) {
4635       Mat T, T2;
4636       IS  isp, ispi;
4637 
4638       isp = is_C_perm;
4639 
4640       PetscCall(ISInvertPermutation(isp, PETSC_DECIDE, &ispi));
4641       PetscCall(MatPermute(S_CC, isp, isp, &T));
4642       PetscCall(MatSeqAIJInvertVariableBlockDiagonalMat(T, n_C_bss, C_bss, &T2));
4643       PetscCall(MatDestroy(&T));
4644       PetscCall(MatDestroy(&S_CC));
4645       PetscCall(MatPermute(T2, ispi, ispi, &S_CC));
4646       PetscCall(MatDestroy(&T2));
4647       PetscCall(ISDestroy(&ispi));
4648     } else {
4649       if (isCHOL) {
4650         PetscCall(MatCholeskyFactor(S_CC, NULL, NULL));
4651       } else {
4652         PetscCall(MatLUFactor(S_CC, NULL, NULL, NULL));
4653       }
4654       PetscCall(MatSeqDenseInvertFactors_Private(S_CC));
4655     }
4656     /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */
4657     PetscCall(MatMatMult(S_CC, C_B, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &pcbddc->local_auxmat1));
4658     PetscCall(MatDestroy(&C_B));
4659     PetscCall(MatSetValuesSubMat(*coarse_submat, S_CC, n_constraints, idx_C, n_constraints, idx_C, INSERT_VALUES));
4660   }
4661 
4662   /* Get submatrices from subdomain matrix */
4663   if (n_vertices) {
4664 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
4665     PetscBool oldpin;
4666 #endif
4667     IS is_aux;
4668 
4669     if (sub_schurs && sub_schurs->reuse_solver) { /* is_R_local is not sorted, ISComplement doesn't like it */
4670       IS tis;
4671 
4672       PetscCall(ISDuplicate(pcbddc->is_R_local, &tis));
4673       PetscCall(ISSort(tis));
4674       PetscCall(ISComplement(tis, 0, pcis->n, &is_aux));
4675       PetscCall(ISDestroy(&tis));
4676     } else {
4677       PetscCall(ISComplement(pcbddc->is_R_local, 0, pcis->n, &is_aux));
4678     }
4679 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
4680     oldpin = pcbddc->local_mat->boundtocpu;
4681 #endif
4682     PetscCall(MatBindToCPU(pcbddc->local_mat, PETSC_TRUE));
4683     PetscCall(MatCreateSubMatrix(pcbddc->local_mat, pcbddc->is_R_local, is_aux, MAT_INITIAL_MATRIX, &A_RV));
4684     PetscCall(MatCreateSubMatrix(pcbddc->local_mat, is_aux, pcbddc->is_R_local, MAT_INITIAL_MATRIX, &A_VR));
4685     /* TODO REMOVE: MatMatMult(A_VR,A_RRmA_RV) below may raise an error */
4686     PetscCall(MatConvert(A_VR, MATSEQAIJ, MAT_INPLACE_MATRIX, &A_VR));
4687     PetscCall(MatCreateSubMatrix(pcbddc->local_mat, is_aux, is_aux, MAT_INITIAL_MATRIX, &A_VV));
4688 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
4689     PetscCall(MatBindToCPU(pcbddc->local_mat, oldpin));
4690 #endif
4691     PetscCall(ISDestroy(&is_aux));
4692   }
4693   PetscCall(ISDestroy(&is_C_perm));
4694   PetscCall(PetscFree(C_bss));
4695 
4696   p0_lidx_I = NULL;
4697   if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) {
4698     const PetscInt *idxs;
4699 
4700     PetscCall(ISGetIndices(pcis->is_I_local, &idxs));
4701     PetscCall(PetscMalloc1(pcbddc->benign_n, &p0_lidx_I));
4702     for (i = 0; i < pcbddc->benign_n; i++) PetscCall(PetscFindInt(pcbddc->benign_p0_lidx[i], pcis->n - pcis->n_B, idxs, &p0_lidx_I[i]));
4703     PetscCall(ISRestoreIndices(pcis->is_I_local, &idxs));
4704   }
4705 
4706   /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */
4707 
4708   /* Matrices of coarse basis functions (local) */
4709   PetscCall(MatDestroy(&pcbddc->coarse_phi_B));
4710   PetscCall(MatDestroy(&pcbddc->coarse_psi_B));
4711   PetscCall(MatDestroy(&pcbddc->coarse_phi_D));
4712   PetscCall(MatDestroy(&pcbddc->coarse_psi_D));
4713   if (!multi_element) {
4714     PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_B, pcbddc->local_primal_size, NULL, &pcbddc->coarse_phi_B));
4715     if (pcbddc->switch_static || pcbddc->dbg_flag) PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_D, pcbddc->local_primal_size, NULL, &pcbddc->coarse_phi_D));
4716     coarse_phi_multi = NULL;
4717   } else { /* Create temporary NEST matrix to hold coarse basis functions blocks */
4718     IS is_rows[2] = {pcbddc->is_R_local, NULL};
4719     IS is_cols[2] = {is_V, is_C};
4720 
4721     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n_vertices, pcbddc->local_primal_ref_node, PETSC_USE_POINTER, &is_rows[1]));
4722     PetscCall(MatCreateNest(PETSC_COMM_SELF, 2, is_rows, 2, is_cols, NULL, &coarse_phi_multi));
4723     PetscCall(ISDestroy(&is_rows[1]));
4724   }
4725 
4726   /* vertices */
4727   if (n_vertices) {
4728     PetscBool restoreavr = PETSC_FALSE;
4729     Mat       A_RRmA_RV  = NULL;
4730 
4731     PetscCall(MatSetValuesSubMat(*coarse_submat, A_VV, n_vertices, idx_V, n_vertices, idx_V, ADD_VALUES));
4732     PetscCall(MatDestroy(&A_VV));
4733 
4734     if (n_R) {
4735       Mat A_RV_bcorr = NULL, S_VV;
4736 
4737       PetscCall(MatScale(A_RV, m_one));
4738       if (need_benign_correction) {
4739         ISLocalToGlobalMapping RtoN;
4740         IS                     is_p0;
4741         PetscInt              *idxs_p0, n;
4742 
4743         PetscCall(PetscMalloc1(pcbddc->benign_n, &idxs_p0));
4744         PetscCall(ISLocalToGlobalMappingCreateIS(pcbddc->is_R_local, &RtoN));
4745         PetscCall(ISGlobalToLocalMappingApply(RtoN, IS_GTOLM_DROP, pcbddc->benign_n, pcbddc->benign_p0_lidx, &n, idxs_p0));
4746         PetscCheck(n == pcbddc->benign_n, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error in R numbering for benign p0! %" PetscInt_FMT " != %" PetscInt_FMT, n, pcbddc->benign_n);
4747         PetscCall(ISLocalToGlobalMappingDestroy(&RtoN));
4748         PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idxs_p0, PETSC_OWN_POINTER, &is_p0));
4749         PetscCall(MatCreateSubMatrix(A_RV, is_p0, NULL, MAT_INITIAL_MATRIX, &A_RV_bcorr));
4750         PetscCall(ISDestroy(&is_p0));
4751       }
4752 
4753       PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, lda_rhs, n_eff_vertices, NULL, &A_RRmA_RV));
4754       if (!sparserhs || need_benign_correction) {
4755         if (lda_rhs == n_R && !multi_element) {
4756           PetscCall(MatConvert(A_RV, MATDENSE, MAT_INPLACE_MATRIX, &A_RV));
4757         } else {
4758           Mat             T;
4759           PetscScalar    *av, *array;
4760           const PetscInt *xadj, *adjncy;
4761           PetscInt        n;
4762           PetscBool       flg_row;
4763 
4764           PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, lda_rhs, n_eff_vertices, NULL, &T));
4765           PetscCall(MatDenseGetArrayWrite(T, &array));
4766           PetscCall(MatConvert(A_RV, MATSEQAIJ, MAT_INPLACE_MATRIX, &A_RV));
4767           PetscCall(MatGetRowIJ(A_RV, 0, PETSC_FALSE, PETSC_FALSE, &n, &xadj, &adjncy, &flg_row));
4768           PetscCall(MatSeqAIJGetArray(A_RV, &av));
4769           for (i = 0; i < n; i++) {
4770             PetscInt j;
4771             for (j = xadj[i]; j < xadj[i + 1]; j++) array[lda_rhs * (V_to_eff_V ? V_to_eff_V[adjncy[j]] : adjncy[j]) + i] = av[j];
4772           }
4773           PetscCall(MatRestoreRowIJ(A_RV, 0, PETSC_FALSE, PETSC_FALSE, &n, &xadj, &adjncy, &flg_row));
4774           PetscCall(MatDenseRestoreArrayWrite(T, &array));
4775           PetscCall(MatDestroy(&A_RV));
4776           A_RV = T;
4777         }
4778         if (need_benign_correction) {
4779           PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
4780           PetscScalar       *marr;
4781 
4782           /* XXX multi_element */
4783           PetscCall(MatDenseGetArray(A_RV, &marr));
4784           /* need \Phi^T A_RV = (I+L)A_RV, L given by
4785 
4786                  | 0 0  0 | (V)
4787              L = | 0 0 -1 | (P-p0)
4788                  | 0 0 -1 | (p0)
4789 
4790           */
4791           for (i = 0; i < reuse_solver->benign_n; i++) {
4792             const PetscScalar *vals;
4793             const PetscInt    *idxs, *idxs_zero;
4794             PetscInt           n, j, nz;
4795 
4796             PetscCall(ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i], &nz));
4797             PetscCall(ISGetIndices(reuse_solver->benign_zerodiag_subs[i], &idxs_zero));
4798             PetscCall(MatGetRow(A_RV_bcorr, i, &n, &idxs, &vals));
4799             for (j = 0; j < n; j++) {
4800               PetscScalar val = vals[j];
4801               PetscInt    k, col = idxs[j];
4802               for (k = 0; k < nz; k++) marr[idxs_zero[k] + lda_rhs * col] -= val;
4803             }
4804             PetscCall(MatRestoreRow(A_RV_bcorr, i, &n, &idxs, &vals));
4805             PetscCall(ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i], &idxs_zero));
4806           }
4807           PetscCall(MatDenseRestoreArray(A_RV, &marr));
4808         }
4809         PetscCall(PetscObjectReference((PetscObject)A_RV));
4810         Brhs = A_RV;
4811       } else {
4812         Mat tA_RVT, A_RVT;
4813 
4814         if (!pcbddc->symmetric_primal) {
4815           /* A_RV already scaled by -1 */
4816           PetscCall(MatTranspose(A_RV, MAT_INITIAL_MATRIX, &A_RVT));
4817         } else {
4818           restoreavr = PETSC_TRUE;
4819           PetscCall(MatScale(A_VR, -1.0));
4820           PetscCall(PetscObjectReference((PetscObject)A_VR));
4821           A_RVT = A_VR;
4822         }
4823         if (lda_rhs != n_R) {
4824           PetscScalar *aa;
4825           PetscInt     r, *ii, *jj;
4826           PetscBool    done;
4827 
4828           PetscCall(MatGetRowIJ(A_RVT, 0, PETSC_FALSE, PETSC_FALSE, &r, (const PetscInt **)&ii, (const PetscInt **)&jj, &done));
4829           PetscCheck(done, PETSC_COMM_SELF, PETSC_ERR_PLIB, "GetRowIJ failed");
4830           PetscCall(MatSeqAIJGetArray(A_RVT, &aa));
4831           PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, n_vertices, lda_rhs, ii, jj, aa, &tA_RVT));
4832           PetscCall(MatRestoreRowIJ(A_RVT, 0, PETSC_FALSE, PETSC_FALSE, &r, (const PetscInt **)&ii, (const PetscInt **)&jj, &done));
4833           PetscCheck(done, PETSC_COMM_SELF, PETSC_ERR_PLIB, "RestoreRowIJ failed");
4834         } else {
4835           PetscCall(PetscObjectReference((PetscObject)A_RVT));
4836           tA_RVT = A_RVT;
4837         }
4838         PetscCall(MatCreateTranspose(tA_RVT, &Brhs));
4839         PetscCall(MatDestroy(&tA_RVT));
4840         PetscCall(MatDestroy(&A_RVT));
4841       }
4842       if (F) {
4843         /* need to correct the rhs */
4844         if (need_benign_correction) {
4845           PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
4846           PetscScalar       *marr;
4847 
4848           PetscCall(MatDenseGetArray(Brhs, &marr));
4849           if (lda_rhs != n_R) {
4850             for (i = 0; i < n_eff_vertices; i++) {
4851               PetscCall(VecPlaceArray(dummy_vec, marr + i * lda_rhs));
4852               PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver, dummy_vec, NULL, PETSC_FALSE, PETSC_TRUE));
4853               PetscCall(VecResetArray(dummy_vec));
4854             }
4855           } else {
4856             for (i = 0; i < n_eff_vertices; i++) {
4857               PetscCall(VecPlaceArray(pcbddc->vec1_R, marr + i * lda_rhs));
4858               PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver, pcbddc->vec1_R, NULL, PETSC_FALSE, PETSC_TRUE));
4859               PetscCall(VecResetArray(pcbddc->vec1_R));
4860             }
4861           }
4862           PetscCall(MatDenseRestoreArray(Brhs, &marr));
4863         }
4864         PetscCall(MatMatSolve(F, Brhs, A_RRmA_RV));
4865         if (restoreavr) PetscCall(MatScale(A_VR, -1.0));
4866         /* need to correct the solution */
4867         if (need_benign_correction) {
4868           PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
4869           PetscScalar       *marr;
4870 
4871           PetscCall(MatDenseGetArray(A_RRmA_RV, &marr));
4872           if (lda_rhs != n_R) {
4873             for (i = 0; i < n_eff_vertices; i++) {
4874               PetscCall(VecPlaceArray(dummy_vec, marr + i * lda_rhs));
4875               PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver, dummy_vec, NULL, PETSC_TRUE, PETSC_TRUE));
4876               PetscCall(VecResetArray(dummy_vec));
4877             }
4878           } else {
4879             for (i = 0; i < n_eff_vertices; i++) {
4880               PetscCall(VecPlaceArray(pcbddc->vec1_R, marr + i * lda_rhs));
4881               PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver, pcbddc->vec1_R, NULL, PETSC_TRUE, PETSC_TRUE));
4882               PetscCall(VecResetArray(pcbddc->vec1_R));
4883             }
4884           }
4885           PetscCall(MatDenseRestoreArray(A_RRmA_RV, &marr));
4886         }
4887       } else {
4888         const PetscScalar *barr;
4889         PetscScalar       *marr;
4890 
4891         PetscCall(MatDenseGetArrayRead(Brhs, &barr));
4892         PetscCall(MatDenseGetArray(A_RRmA_RV, &marr));
4893         for (i = 0; i < n_eff_vertices; i++) {
4894           PetscCall(VecPlaceArray(pcbddc->vec1_R, barr + i * lda_rhs));
4895           PetscCall(VecPlaceArray(pcbddc->vec2_R, marr + i * lda_rhs));
4896           PetscCall(KSPSolve(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec2_R));
4897           PetscCall(KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec2_R));
4898           PetscCall(VecResetArray(pcbddc->vec1_R));
4899           PetscCall(VecResetArray(pcbddc->vec2_R));
4900         }
4901         PetscCall(MatDenseRestoreArrayRead(Brhs, &barr));
4902         PetscCall(MatDenseRestoreArray(A_RRmA_RV, &marr));
4903       }
4904       PetscCall(MatDestroy(&A_RV));
4905       PetscCall(MatDestroy(&Brhs));
4906       /* S_VV and S_CV */
4907       if (n_constraints) {
4908         Mat B;
4909 
4910         PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_B, n_eff_vertices, NULL, &B));
4911         PetscCall(MatDenseScatter_Private(pcbddc->R_to_B, A_RRmA_RV, B, INSERT_VALUES, SCATTER_FORWARD));
4912 
4913         /* S_CV = pcbddc->local_auxmat1 * B */
4914         if (multi_element) {
4915           Mat T;
4916 
4917           PetscCall(MatCreateSeqAIJFromDenseExpand(B, n_vertices, B_eff_V_J, &T));
4918           PetscCall(MatDestroy(&B));
4919           B = T;
4920         }
4921         PetscCall(MatProductCreate(pcbddc->local_auxmat1, B, NULL, &S_CV));
4922         PetscCall(MatProductSetType(S_CV, MATPRODUCT_AB));
4923         PetscCall(MatProductSetFromOptions(S_CV));
4924         PetscCall(MatProductSymbolic(S_CV));
4925         PetscCall(MatProductNumeric(S_CV));
4926         PetscCall(MatProductClear(S_CV));
4927         PetscCall(MatDestroy(&B));
4928 
4929         /* B = local_auxmat2_R * S_CV */
4930         PetscCall(MatProductCreate(local_auxmat2_R, S_CV, NULL, &B));
4931         PetscCall(MatProductSetType(B, MATPRODUCT_AB));
4932         PetscCall(MatProductSetFromOptions(B));
4933         PetscCall(MatProductSymbolic(B));
4934         PetscCall(MatProductNumeric(B));
4935 
4936         PetscCall(MatScale(S_CV, m_one));
4937         PetscCall(MatSetValuesSubMat(*coarse_submat, S_CV, n_constraints, idx_C, n_vertices, idx_V, INSERT_VALUES));
4938 
4939         if (multi_element) {
4940           Mat T;
4941 
4942           PetscCall(MatCreateSeqAIJFromDenseExpand(A_RRmA_RV, n_vertices, R_eff_V_J, &T));
4943           PetscCall(MatDestroy(&A_RRmA_RV));
4944           A_RRmA_RV = T;
4945         }
4946         PetscCall(MatAXPY(A_RRmA_RV, 1.0, B, UNKNOWN_NONZERO_PATTERN)); /* XXX ? */
4947         PetscCall(MatDestroy(&B));
4948       } else if (multi_element) {
4949         Mat T;
4950 
4951         PetscCall(MatCreateSeqAIJFromDenseExpand(A_RRmA_RV, n_vertices, R_eff_V_J, &T));
4952         PetscCall(MatDestroy(&A_RRmA_RV));
4953         A_RRmA_RV = T;
4954       }
4955 
4956       if (lda_rhs != n_R) {
4957         Mat T;
4958 
4959         PetscCall(MatCreateSubMatrix(A_RRmA_RV, is_R, NULL, MAT_INITIAL_MATRIX, &T));
4960         PetscCall(MatDestroy(&A_RRmA_RV));
4961         A_RRmA_RV = T;
4962       }
4963 
4964       /* need A_VR * \Phi * A_RRmA_RV = A_VR * (I+L)^T * A_RRmA_RV, L given as before */
4965       if (need_benign_correction) { /* XXX SPARSE */
4966         PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
4967         PetscScalar       *sums;
4968         const PetscScalar *marr;
4969 
4970         PetscCall(MatDenseGetArrayRead(A_RRmA_RV, &marr));
4971         PetscCall(PetscMalloc1(n_vertices, &sums));
4972         for (i = 0; i < reuse_solver->benign_n; i++) {
4973           const PetscScalar *vals;
4974           const PetscInt    *idxs, *idxs_zero;
4975           PetscInt           n, j, nz;
4976 
4977           PetscCall(ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i], &nz));
4978           PetscCall(ISGetIndices(reuse_solver->benign_zerodiag_subs[i], &idxs_zero));
4979           for (j = 0; j < n_vertices; j++) {
4980             sums[j] = 0.;
4981             for (PetscInt k = 0; k < nz; k++) sums[j] += marr[idxs_zero[k] + j * n_R];
4982           }
4983           PetscCall(MatGetRow(A_RV_bcorr, i, &n, &idxs, &vals));
4984           for (j = 0; j < n; j++) {
4985             PetscScalar val = vals[j];
4986             for (PetscInt k = 0; k < n_vertices; k++) PetscCall(MatSetValue(*coarse_submat, idx_V[idxs[j]], idx_V[k], val * sums[k], ADD_VALUES));
4987           }
4988           PetscCall(MatRestoreRow(A_RV_bcorr, i, &n, &idxs, &vals));
4989           PetscCall(ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i], &idxs_zero));
4990         }
4991         PetscCall(PetscFree(sums));
4992         PetscCall(MatDestroy(&A_RV_bcorr));
4993         PetscCall(MatDenseRestoreArrayRead(A_RRmA_RV, &marr));
4994       }
4995 
4996       PetscCall(MatMatMult(A_VR, A_RRmA_RV, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &S_VV));
4997       PetscCall(MatSetValuesSubMat(*coarse_submat, S_VV, n_vertices, idx_V, n_vertices, idx_V, ADD_VALUES));
4998       PetscCall(MatDestroy(&S_VV));
4999     }
5000 
5001     /* coarse basis functions */
5002     if (coarse_phi_multi) {
5003       Mat Vid;
5004 
5005       PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, n_vertices, n_vertices, 1, NULL, &Vid));
5006       PetscCall(MatShift_Basic(Vid, 1.0));
5007       PetscCall(MatNestSetSubMat(coarse_phi_multi, 0, 0, A_RRmA_RV));
5008       PetscCall(MatNestSetSubMat(coarse_phi_multi, 1, 0, Vid));
5009       PetscCall(MatDestroy(&Vid));
5010     } else {
5011       if (A_RRmA_RV) {
5012         Mat B;
5013 
5014         PetscCall(MatDenseGetSubMatrix(pcbddc->coarse_phi_B, PETSC_DECIDE, PETSC_DECIDE, 0, n_vertices, &B));
5015         PetscCall(MatDenseScatter_Private(pcbddc->R_to_B, A_RRmA_RV, B, INSERT_VALUES, SCATTER_FORWARD));
5016         PetscCall(MatDenseRestoreSubMatrix(pcbddc->coarse_phi_B, &B));
5017         if (pcbddc->switch_static || pcbddc->dbg_flag) {
5018           PetscCall(MatDenseGetSubMatrix(pcbddc->coarse_phi_D, PETSC_DECIDE, PETSC_DECIDE, 0, n_vertices, &B));
5019           PetscCall(MatDenseScatter_Private(pcbddc->R_to_D, A_RRmA_RV, B, INSERT_VALUES, SCATTER_FORWARD));
5020           PetscCall(MatDenseRestoreSubMatrix(pcbddc->coarse_phi_D, &B));
5021           if (pcbddc->benign_n) {
5022             for (i = 0; i < n_vertices; i++) PetscCall(MatSetValues(pcbddc->coarse_phi_D, pcbddc->benign_n, p0_lidx_I, 1, &i, NULL, INSERT_VALUES));
5023             PetscCall(MatAssemblyBegin(pcbddc->coarse_phi_D, MAT_FINAL_ASSEMBLY));
5024             PetscCall(MatAssemblyEnd(pcbddc->coarse_phi_D, MAT_FINAL_ASSEMBLY));
5025           }
5026         }
5027       }
5028       for (i = 0; i < n_vertices; i++) PetscCall(MatSetValues(pcbddc->coarse_phi_B, 1, &idx_V_B[i], 1, &i, &one, INSERT_VALUES));
5029       PetscCall(MatAssemblyBegin(pcbddc->coarse_phi_B, MAT_FINAL_ASSEMBLY));
5030       PetscCall(MatAssemblyEnd(pcbddc->coarse_phi_B, MAT_FINAL_ASSEMBLY));
5031     }
5032     PetscCall(MatDestroy(&A_RRmA_RV));
5033   }
5034   PetscCall(MatDestroy(&A_RV));
5035   PetscCall(VecDestroy(&dummy_vec));
5036 
5037   if (n_constraints) {
5038     Mat B, B2;
5039 
5040     PetscCall(MatScale(S_CC, m_one));
5041     PetscCall(MatProductCreate(local_auxmat2_R, S_CC, NULL, &B));
5042     PetscCall(MatProductSetType(B, MATPRODUCT_AB));
5043     PetscCall(MatProductSetFromOptions(B));
5044     PetscCall(MatProductSymbolic(B));
5045     PetscCall(MatProductNumeric(B));
5046 
5047     if (n_vertices) {
5048       if (isCHOL || need_benign_correction) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */
5049         PetscCall(MatTranspose(S_CV, MAT_INITIAL_MATRIX, &S_VC));
5050       } else {
5051         if (lda_rhs != n_R) {
5052           Mat tB;
5053 
5054           PetscCall(MatCreateSubMatrix(B, is_R, NULL, MAT_INITIAL_MATRIX, &tB));
5055           PetscCall(MatDestroy(&B));
5056           B = tB;
5057         }
5058         PetscCall(MatMatMult(A_VR, B, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &S_VC));
5059       }
5060       PetscCall(MatSetValuesSubMat(*coarse_submat, S_VC, n_vertices, idx_V, n_constraints, idx_C, INSERT_VALUES));
5061     }
5062 
5063     /* coarse basis functions */
5064     if (coarse_phi_multi) {
5065       PetscCall(MatNestSetSubMat(coarse_phi_multi, 0, 1, B));
5066     } else {
5067       PetscCall(MatDenseGetSubMatrix(pcbddc->coarse_phi_B, PETSC_DECIDE, PETSC_DECIDE, n_vertices, n_vertices + n_constraints, &B2));
5068       PetscCall(MatDenseScatter_Private(pcbddc->R_to_B, B, B2, INSERT_VALUES, SCATTER_FORWARD));
5069       PetscCall(MatDenseRestoreSubMatrix(pcbddc->coarse_phi_B, &B2));
5070       if (pcbddc->switch_static || pcbddc->dbg_flag) {
5071         PetscCall(MatDenseGetSubMatrix(pcbddc->coarse_phi_D, PETSC_DECIDE, PETSC_DECIDE, n_vertices, n_vertices + n_constraints, &B2));
5072         PetscCall(MatDenseScatter_Private(pcbddc->R_to_D, B, B2, INSERT_VALUES, SCATTER_FORWARD));
5073         if (pcbddc->benign_n) {
5074           for (i = 0; i < n_constraints; i++) PetscCall(MatSetValues(B2, pcbddc->benign_n, p0_lidx_I, 1, &i, NULL, INSERT_VALUES));
5075         }
5076         PetscCall(MatDenseRestoreSubMatrix(pcbddc->coarse_phi_D, &B2));
5077       }
5078     }
5079     PetscCall(MatDestroy(&B));
5080   }
5081 
5082   /* assemble sparse coarse basis functions */
5083   if (coarse_phi_multi) {
5084     Mat T;
5085 
5086     PetscCall(MatConvert(coarse_phi_multi, MATSEQAIJ, MAT_INITIAL_MATRIX, &T));
5087     PetscCall(MatDestroy(&coarse_phi_multi));
5088     PetscCall(MatCreateSubMatrix(T, pcis->is_B_local, NULL, MAT_INITIAL_MATRIX, &pcbddc->coarse_phi_B));
5089     if (pcbddc->switch_static || pcbddc->dbg_flag) PetscCall(MatCreateSubMatrix(T, pcis->is_I_local, NULL, MAT_INITIAL_MATRIX, &pcbddc->coarse_phi_D));
5090     PetscCall(MatDestroy(&T));
5091   }
5092   PetscCall(MatDestroy(&local_auxmat2_R));
5093   PetscCall(PetscFree(p0_lidx_I));
5094 
5095   /* coarse matrix entries relative to B_0 */
5096   if (pcbddc->benign_n) {
5097     Mat                B0_B, B0_BPHI;
5098     IS                 is_dummy;
5099     const PetscScalar *data;
5100     PetscInt           j;
5101 
5102     PetscCall(ISCreateStride(PETSC_COMM_SELF, pcbddc->benign_n, 0, 1, &is_dummy));
5103     PetscCall(MatCreateSubMatrix(pcbddc->benign_B0, is_dummy, pcis->is_B_local, MAT_INITIAL_MATRIX, &B0_B));
5104     PetscCall(ISDestroy(&is_dummy));
5105     PetscCall(MatMatMult(B0_B, pcbddc->coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &B0_BPHI));
5106     PetscCall(MatConvert(B0_BPHI, MATSEQDENSE, MAT_INPLACE_MATRIX, &B0_BPHI));
5107     PetscCall(MatDenseGetArrayRead(B0_BPHI, &data));
5108     for (j = 0; j < pcbddc->benign_n; j++) {
5109       PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j;
5110       for (i = 0; i < pcbddc->local_primal_size; i++) {
5111         PetscCall(MatSetValue(*coarse_submat, primal_idx, i, data[i * pcbddc->benign_n + j], INSERT_VALUES));
5112         PetscCall(MatSetValue(*coarse_submat, i, primal_idx, data[i * pcbddc->benign_n + j], INSERT_VALUES));
5113       }
5114     }
5115     PetscCall(MatDenseRestoreArrayRead(B0_BPHI, &data));
5116     PetscCall(MatDestroy(&B0_B));
5117     PetscCall(MatDestroy(&B0_BPHI));
5118   }
5119 
5120   /* compute other basis functions for non-symmetric problems */
5121   if (!pcbddc->symmetric_primal) {
5122     Mat          B_V = NULL, B_C = NULL;
5123     PetscScalar *marray, *work;
5124 
5125     /* TODO multi_element MatDenseScatter */
5126     if (n_constraints) {
5127       Mat S_CCT, C_CRT;
5128 
5129       PetscCall(MatScale(S_CC, m_one));
5130       PetscCall(MatTranspose(C_CR, MAT_INITIAL_MATRIX, &C_CRT));
5131       PetscCall(MatTranspose(S_CC, MAT_INITIAL_MATRIX, &S_CCT));
5132       PetscCall(MatMatMult(C_CRT, S_CCT, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &B_C));
5133       PetscCall(MatConvert(B_C, MATDENSE, MAT_INPLACE_MATRIX, &B_C));
5134       PetscCall(MatDestroy(&S_CCT));
5135       if (n_vertices) {
5136         Mat S_VCT;
5137 
5138         PetscCall(MatTranspose(S_VC, MAT_INITIAL_MATRIX, &S_VCT));
5139         PetscCall(MatMatMult(C_CRT, S_VCT, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &B_V));
5140         PetscCall(MatDestroy(&S_VCT));
5141         PetscCall(MatConvert(B_V, MATDENSE, MAT_INPLACE_MATRIX, &B_V));
5142       }
5143       PetscCall(MatDestroy(&C_CRT));
5144     } else {
5145       PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_R, n_vertices, NULL, &B_V));
5146     }
5147     if (n_vertices && n_R) {
5148       PetscScalar    *av, *marray;
5149       const PetscInt *xadj, *adjncy;
5150       PetscInt        n;
5151       PetscBool       flg_row;
5152 
5153       /* B_V = B_V - A_VR^T */
5154       PetscCall(MatConvert(A_VR, MATSEQAIJ, MAT_INPLACE_MATRIX, &A_VR));
5155       PetscCall(MatGetRowIJ(A_VR, 0, PETSC_FALSE, PETSC_FALSE, &n, &xadj, &adjncy, &flg_row));
5156       PetscCall(MatSeqAIJGetArray(A_VR, &av));
5157       PetscCall(MatDenseGetArray(B_V, &marray));
5158       for (i = 0; i < n; i++) {
5159         PetscInt j;
5160         for (j = xadj[i]; j < xadj[i + 1]; j++) marray[i * n_R + adjncy[j]] -= av[j];
5161       }
5162       PetscCall(MatDenseRestoreArray(B_V, &marray));
5163       PetscCall(MatRestoreRowIJ(A_VR, 0, PETSC_FALSE, PETSC_FALSE, &n, &xadj, &adjncy, &flg_row));
5164       PetscCall(MatDestroy(&A_VR));
5165     }
5166 
5167     /* currently there's no support for MatTransposeMatSolve(F,B,X) */
5168     PetscCall(PetscMalloc1(n_R * pcbddc->local_primal_size, &work));
5169     if (n_vertices) {
5170       PetscCall(MatDenseGetArray(B_V, &marray));
5171       for (i = 0; i < n_vertices; i++) {
5172         PetscCall(VecPlaceArray(pcbddc->vec1_R, marray + i * n_R));
5173         PetscCall(VecPlaceArray(pcbddc->vec2_R, work + i * n_R));
5174         PetscCall(KSPSolveTranspose(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec2_R));
5175         PetscCall(KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec2_R));
5176         PetscCall(VecResetArray(pcbddc->vec1_R));
5177         PetscCall(VecResetArray(pcbddc->vec2_R));
5178       }
5179       PetscCall(MatDenseRestoreArray(B_V, &marray));
5180     }
5181     if (B_C) {
5182       PetscCall(MatDenseGetArray(B_C, &marray));
5183       for (i = n_vertices; i < n_constraints + n_vertices; i++) {
5184         PetscCall(VecPlaceArray(pcbddc->vec1_R, marray + (i - n_vertices) * n_R));
5185         PetscCall(VecPlaceArray(pcbddc->vec2_R, work + i * n_R));
5186         PetscCall(KSPSolveTranspose(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec2_R));
5187         PetscCall(KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec2_R));
5188         PetscCall(VecResetArray(pcbddc->vec1_R));
5189         PetscCall(VecResetArray(pcbddc->vec2_R));
5190       }
5191       PetscCall(MatDenseRestoreArray(B_C, &marray));
5192     }
5193     /* coarse basis functions */
5194     PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_B, pcbddc->local_primal_size, NULL, &pcbddc->coarse_psi_B));
5195     if (pcbddc->switch_static || pcbddc->dbg_flag) PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_D, pcbddc->local_primal_size, NULL, &pcbddc->coarse_psi_D));
5196     for (i = 0; i < pcbddc->local_primal_size; i++) {
5197       Vec v;
5198 
5199       PetscCall(VecPlaceArray(pcbddc->vec1_R, work + i * n_R));
5200       PetscCall(MatDenseGetColumnVec(pcbddc->coarse_psi_B, i, &v));
5201       PetscCall(VecScatterBegin(pcbddc->R_to_B, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD));
5202       PetscCall(VecScatterEnd(pcbddc->R_to_B, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD));
5203       if (i < n_vertices) {
5204         PetscScalar one = 1.0;
5205         PetscCall(VecSetValues(v, 1, &idx_V_B[i], &one, INSERT_VALUES));
5206         PetscCall(VecAssemblyBegin(v));
5207         PetscCall(VecAssemblyEnd(v));
5208       }
5209       PetscCall(MatDenseRestoreColumnVec(pcbddc->coarse_psi_B, i, &v));
5210 
5211       if (pcbddc->switch_static || pcbddc->dbg_flag) {
5212         PetscCall(MatDenseGetColumnVec(pcbddc->coarse_psi_D, i, &v));
5213         PetscCall(VecScatterBegin(pcbddc->R_to_D, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD));
5214         PetscCall(VecScatterEnd(pcbddc->R_to_D, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD));
5215         PetscCall(MatDenseRestoreColumnVec(pcbddc->coarse_psi_D, i, &v));
5216       }
5217       PetscCall(VecResetArray(pcbddc->vec1_R));
5218     }
5219     PetscCall(MatDestroy(&B_V));
5220     PetscCall(MatDestroy(&B_C));
5221     PetscCall(PetscFree(work));
5222   } else {
5223     PetscCall(PetscObjectReference((PetscObject)pcbddc->coarse_phi_B));
5224     pcbddc->coarse_psi_B = pcbddc->coarse_phi_B;
5225     PetscCall(PetscObjectReference((PetscObject)pcbddc->coarse_phi_D));
5226     pcbddc->coarse_psi_D = pcbddc->coarse_phi_D;
5227   }
5228   PetscCall(MatAssemblyBegin(*coarse_submat, MAT_FINAL_ASSEMBLY));
5229   PetscCall(MatAssemblyEnd(*coarse_submat, MAT_FINAL_ASSEMBLY));
5230 
5231   /* free memory */
5232   PetscCall(PetscFree(V_to_eff_V));
5233   PetscCall(PetscFree(C_to_eff_C));
5234   PetscCall(PetscFree(R_eff_V_J));
5235   PetscCall(PetscFree(R_eff_C_J));
5236   PetscCall(PetscFree(B_eff_V_J));
5237   PetscCall(PetscFree(B_eff_C_J));
5238   PetscCall(ISDestroy(&is_R));
5239   PetscCall(ISRestoreIndices(is_V, &idx_V));
5240   PetscCall(ISRestoreIndices(is_C, &idx_C));
5241   PetscCall(ISDestroy(&is_V));
5242   PetscCall(ISDestroy(&is_C));
5243   PetscCall(PetscFree(idx_V_B));
5244   PetscCall(MatDestroy(&S_CV));
5245   PetscCall(MatDestroy(&S_VC));
5246   PetscCall(MatDestroy(&S_CC));
5247   if (n_vertices) PetscCall(MatDestroy(&A_VR));
5248   if (n_constraints) PetscCall(MatDestroy(&C_CR));
5249   PetscCall(PetscLogEventEnd(PC_BDDC_CorrectionSetUp[pcbddc->current_level], pc, 0, 0, 0));
5250 
5251   /* Checking coarse_sub_mat and coarse basis functions */
5252   /* Symmetric case     : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
5253   /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
5254   if (pcbddc->dbg_flag) {
5255     Mat       AUXMAT, TM1, TM2, TM3, TM4;
5256     Mat       coarse_phi_D, coarse_phi_B;
5257     Mat       coarse_psi_D, coarse_psi_B;
5258     Mat       A_II, A_BB, A_IB, A_BI;
5259     Mat       C_B, CPHI;
5260     IS        is_dummy;
5261     Vec       mones;
5262     MatType   checkmattype = MATSEQAIJ;
5263     PetscReal real_value;
5264 
5265     if (pcbddc->benign_n && !pcbddc->benign_change_explicit) {
5266       Mat A;
5267       PetscCall(PCBDDCBenignProject(pc, NULL, NULL, &A));
5268       PetscCall(MatCreateSubMatrix(A, pcis->is_I_local, pcis->is_I_local, MAT_INITIAL_MATRIX, &A_II));
5269       PetscCall(MatCreateSubMatrix(A, pcis->is_I_local, pcis->is_B_local, MAT_INITIAL_MATRIX, &A_IB));
5270       PetscCall(MatCreateSubMatrix(A, pcis->is_B_local, pcis->is_I_local, MAT_INITIAL_MATRIX, &A_BI));
5271       PetscCall(MatCreateSubMatrix(A, pcis->is_B_local, pcis->is_B_local, MAT_INITIAL_MATRIX, &A_BB));
5272       PetscCall(MatDestroy(&A));
5273     } else {
5274       PetscCall(MatConvert(pcis->A_II, checkmattype, MAT_INITIAL_MATRIX, &A_II));
5275       PetscCall(MatConvert(pcis->A_IB, checkmattype, MAT_INITIAL_MATRIX, &A_IB));
5276       PetscCall(MatConvert(pcis->A_BI, checkmattype, MAT_INITIAL_MATRIX, &A_BI));
5277       PetscCall(MatConvert(pcis->A_BB, checkmattype, MAT_INITIAL_MATRIX, &A_BB));
5278     }
5279     PetscCall(MatConvert(pcbddc->coarse_phi_D, checkmattype, MAT_INITIAL_MATRIX, &coarse_phi_D));
5280     PetscCall(MatConvert(pcbddc->coarse_phi_B, checkmattype, MAT_INITIAL_MATRIX, &coarse_phi_B));
5281     if (!pcbddc->symmetric_primal) {
5282       PetscCall(MatConvert(pcbddc->coarse_psi_D, checkmattype, MAT_INITIAL_MATRIX, &coarse_psi_D));
5283       PetscCall(MatConvert(pcbddc->coarse_psi_B, checkmattype, MAT_INITIAL_MATRIX, &coarse_psi_B));
5284     }
5285     PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n"));
5286     PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Check coarse sub mat computation (symmetric %d)\n", pcbddc->symmetric_primal));
5287     PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
5288     if (!pcbddc->symmetric_primal) {
5289       PetscCall(MatMatMult(A_II, coarse_phi_D, MAT_INITIAL_MATRIX, 1.0, &AUXMAT));
5290       PetscCall(MatTransposeMatMult(coarse_psi_D, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM1));
5291       PetscCall(MatDestroy(&AUXMAT));
5292       PetscCall(MatMatMult(A_BB, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &AUXMAT));
5293       PetscCall(MatTransposeMatMult(coarse_psi_B, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM2));
5294       PetscCall(MatDestroy(&AUXMAT));
5295       PetscCall(MatMatMult(A_IB, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &AUXMAT));
5296       PetscCall(MatTransposeMatMult(coarse_psi_D, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM3));
5297       PetscCall(MatDestroy(&AUXMAT));
5298       PetscCall(MatMatMult(A_BI, coarse_phi_D, MAT_INITIAL_MATRIX, 1.0, &AUXMAT));
5299       PetscCall(MatTransposeMatMult(coarse_psi_B, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM4));
5300       PetscCall(MatDestroy(&AUXMAT));
5301     } else {
5302       PetscCall(MatPtAP(A_II, coarse_phi_D, MAT_INITIAL_MATRIX, 1.0, &TM1));
5303       PetscCall(MatPtAP(A_BB, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &TM2));
5304       PetscCall(MatMatMult(A_IB, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &AUXMAT));
5305       PetscCall(MatTransposeMatMult(coarse_phi_D, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM3));
5306       PetscCall(MatDestroy(&AUXMAT));
5307       PetscCall(MatMatMult(A_BI, coarse_phi_D, MAT_INITIAL_MATRIX, 1.0, &AUXMAT));
5308       PetscCall(MatTransposeMatMult(coarse_phi_B, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM4));
5309       PetscCall(MatDestroy(&AUXMAT));
5310     }
5311     PetscCall(MatAXPY(TM1, one, TM2, DIFFERENT_NONZERO_PATTERN));
5312     PetscCall(MatAXPY(TM1, one, TM3, DIFFERENT_NONZERO_PATTERN));
5313     PetscCall(MatAXPY(TM1, one, TM4, DIFFERENT_NONZERO_PATTERN));
5314     PetscCall(MatConvert(TM1, MATSEQDENSE, MAT_INPLACE_MATRIX, &TM1));
5315     if (pcbddc->benign_n) {
5316       Mat                B0_B, B0_BPHI;
5317       const PetscScalar *data2;
5318       PetscScalar       *data;
5319       PetscInt           j;
5320 
5321       PetscCall(ISCreateStride(PETSC_COMM_SELF, pcbddc->benign_n, 0, 1, &is_dummy));
5322       PetscCall(MatCreateSubMatrix(pcbddc->benign_B0, is_dummy, pcis->is_B_local, MAT_INITIAL_MATRIX, &B0_B));
5323       PetscCall(MatMatMult(B0_B, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &B0_BPHI));
5324       PetscCall(MatConvert(B0_BPHI, MATSEQDENSE, MAT_INPLACE_MATRIX, &B0_BPHI));
5325       PetscCall(MatDenseGetArray(TM1, &data));
5326       PetscCall(MatDenseGetArrayRead(B0_BPHI, &data2));
5327       for (j = 0; j < pcbddc->benign_n; j++) {
5328         PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j;
5329         for (i = 0; i < pcbddc->local_primal_size; i++) {
5330           data[primal_idx * pcbddc->local_primal_size + i] += data2[i * pcbddc->benign_n + j];
5331           data[i * pcbddc->local_primal_size + primal_idx] += data2[i * pcbddc->benign_n + j];
5332         }
5333       }
5334       PetscCall(MatDenseRestoreArray(TM1, &data));
5335       PetscCall(MatDenseRestoreArrayRead(B0_BPHI, &data2));
5336       PetscCall(MatDestroy(&B0_B));
5337       PetscCall(ISDestroy(&is_dummy));
5338       PetscCall(MatDestroy(&B0_BPHI));
5339     }
5340     PetscCall(MatAXPY(TM1, m_one, *coarse_submat, DIFFERENT_NONZERO_PATTERN));
5341     PetscCall(MatNorm(TM1, NORM_FROBENIUS, &real_value));
5342     PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer));
5343     PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d          matrix error % 1.14e\n", PetscGlobalRank, (double)real_value));
5344 
5345     /* check constraints */
5346     PetscCall(ISCreateStride(PETSC_COMM_SELF, pcbddc->local_primal_size - pcbddc->benign_n, 0, 1, &is_dummy));
5347     PetscCall(MatCreateSubMatrix(pcbddc->ConstraintMatrix, is_dummy, pcis->is_B_local, MAT_INITIAL_MATRIX, &C_B));
5348     if (!pcbddc->benign_n) { /* TODO: add benign case */
5349       PetscCall(MatMatMult(C_B, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &CPHI));
5350     } else {
5351       PetscScalar *data;
5352       Mat          tmat;
5353       PetscCall(MatDenseGetArray(pcbddc->coarse_phi_B, &data));
5354       PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, pcis->n_B, pcbddc->local_primal_size - pcbddc->benign_n, data, &tmat));
5355       PetscCall(MatDenseRestoreArray(pcbddc->coarse_phi_B, &data));
5356       PetscCall(MatMatMult(C_B, tmat, MAT_INITIAL_MATRIX, 1.0, &CPHI));
5357       PetscCall(MatDestroy(&tmat));
5358     }
5359     PetscCall(MatCreateVecs(CPHI, &mones, NULL));
5360     PetscCall(VecSet(mones, -1.0));
5361     PetscCall(MatDiagonalSet(CPHI, mones, ADD_VALUES));
5362     PetscCall(MatNorm(CPHI, NORM_FROBENIUS, &real_value));
5363     PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d phi constraints error % 1.14e\n", PetscGlobalRank, (double)real_value));
5364     if (!pcbddc->symmetric_primal) {
5365       PetscCall(MatMatMult(C_B, coarse_psi_B, MAT_REUSE_MATRIX, 1.0, &CPHI));
5366       PetscCall(VecSet(mones, -1.0));
5367       PetscCall(MatDiagonalSet(CPHI, mones, ADD_VALUES));
5368       PetscCall(MatNorm(CPHI, NORM_FROBENIUS, &real_value));
5369       PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d psi constraints error % 1.14e\n", PetscGlobalRank, (double)real_value));
5370     }
5371     PetscCall(MatDestroy(&C_B));
5372     PetscCall(MatDestroy(&CPHI));
5373     PetscCall(ISDestroy(&is_dummy));
5374     PetscCall(VecDestroy(&mones));
5375     PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
5376     PetscCall(MatDestroy(&A_II));
5377     PetscCall(MatDestroy(&A_BB));
5378     PetscCall(MatDestroy(&A_IB));
5379     PetscCall(MatDestroy(&A_BI));
5380     PetscCall(MatDestroy(&TM1));
5381     PetscCall(MatDestroy(&TM2));
5382     PetscCall(MatDestroy(&TM3));
5383     PetscCall(MatDestroy(&TM4));
5384     PetscCall(MatDestroy(&coarse_phi_D));
5385     PetscCall(MatDestroy(&coarse_phi_B));
5386     if (!pcbddc->symmetric_primal) {
5387       PetscCall(MatDestroy(&coarse_psi_D));
5388       PetscCall(MatDestroy(&coarse_psi_B));
5389     }
5390   }
5391 
5392 #if 0
5393   {
5394     PetscViewer viewer;
5395     char filename[256];
5396 
5397     PetscCall(PetscSNPrintf(filename, PETSC_STATIC_ARRAY_LENGTH(filename), "details_local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level));
5398     PetscCall(PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer));
5399     PetscCall(PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB));
5400     PetscCall(PetscObjectSetName((PetscObject)*coarse_submat,"coarse submat"));
5401     PetscCall(MatView(*coarse_submat,viewer));
5402     if (pcbddc->coarse_phi_B) {
5403       PetscCall(PetscObjectSetName((PetscObject)pcbddc->coarse_phi_B,"phi_B"));
5404       PetscCall(MatView(pcbddc->coarse_phi_B,viewer));
5405     }
5406     if (pcbddc->coarse_phi_D) {
5407       PetscCall(PetscObjectSetName((PetscObject)pcbddc->coarse_phi_D,"phi_D"));
5408       PetscCall(MatView(pcbddc->coarse_phi_D,viewer));
5409     }
5410     if (pcbddc->coarse_psi_B) {
5411       PetscCall(PetscObjectSetName((PetscObject)pcbddc->coarse_psi_B,"psi_B"));
5412       PetscCall(MatView(pcbddc->coarse_psi_B,viewer));
5413     }
5414     if (pcbddc->coarse_psi_D) {
5415       PetscCall(PetscObjectSetName((PetscObject)pcbddc->coarse_psi_D,"psi_D"));
5416       PetscCall(MatView(pcbddc->coarse_psi_D,viewer));
5417     }
5418     PetscCall(PetscObjectSetName((PetscObject)pcbddc->local_mat,"A"));
5419     PetscCall(MatView(pcbddc->local_mat,viewer));
5420     PetscCall(PetscObjectSetName((PetscObject)pcbddc->ConstraintMatrix,"C"));
5421     PetscCall(MatView(pcbddc->ConstraintMatrix,viewer));
5422     PetscCall(PetscObjectSetName((PetscObject)pcis->is_I_local,"I"));
5423     PetscCall(ISView(pcis->is_I_local,viewer));
5424     PetscCall(PetscObjectSetName((PetscObject)pcis->is_B_local,"B"));
5425     PetscCall(ISView(pcis->is_B_local,viewer));
5426     PetscCall(PetscObjectSetName((PetscObject)pcbddc->is_R_local,"R"));
5427     PetscCall(ISView(pcbddc->is_R_local,viewer));
5428     PetscCall(PetscViewerDestroy(&viewer));
5429   }
5430 #endif
5431 
5432   /* device support */
5433   {
5434     PetscBool iscuda, iship, iskokkos;
5435     MatType   mtype = NULL;
5436 
5437     PetscCall(PetscObjectTypeCompareAny((PetscObject)pcis->vec1_N, &iscuda, VECCUDA, VECMPICUDA, VECSEQCUDA, ""));
5438     PetscCall(PetscObjectTypeCompareAny((PetscObject)pcis->vec1_N, &iship, VECHIP, VECMPIHIP, VECSEQHIP, ""));
5439     PetscCall(PetscObjectTypeCompareAny((PetscObject)pcis->vec1_N, &iskokkos, VECKOKKOS, VECMPIKOKKOS, VECSEQKOKKOS, ""));
5440     if (iskokkos) {
5441       if (PetscDefined(HAVE_MACRO_KOKKOS_ENABLE_CUDA)) iscuda = PETSC_TRUE;
5442       else if (PetscDefined(HAVE_MACRO_KOKKOS_ENABLE_HIP)) iship = PETSC_TRUE;
5443     }
5444     if (iskokkos) mtype = multi_element ? MATSEQAIJKOKKOS : (iscuda ? MATSEQDENSECUDA : MATSEQDENSEHIP);
5445     else if (iship) mtype = multi_element ? MATSEQAIJHIPSPARSE : MATSEQDENSEHIP;
5446     else if (iscuda) mtype = multi_element ? MATSEQAIJCUSPARSE : MATSEQDENSECUDA;
5447     if (mtype) {
5448       if (pcbddc->local_auxmat1) PetscCall(MatConvert(pcbddc->local_auxmat1, mtype, MAT_INPLACE_MATRIX, &pcbddc->local_auxmat1));
5449       if (pcbddc->local_auxmat2) PetscCall(MatConvert(pcbddc->local_auxmat2, mtype, MAT_INPLACE_MATRIX, &pcbddc->local_auxmat2));
5450       if (pcbddc->coarse_phi_B) PetscCall(MatConvert(pcbddc->coarse_phi_B, mtype, MAT_INPLACE_MATRIX, &pcbddc->coarse_phi_B));
5451       if (pcbddc->coarse_phi_D) PetscCall(MatConvert(pcbddc->coarse_phi_D, mtype, MAT_INPLACE_MATRIX, &pcbddc->coarse_phi_D));
5452       if (pcbddc->coarse_psi_B) PetscCall(MatConvert(pcbddc->coarse_psi_B, mtype, MAT_INPLACE_MATRIX, &pcbddc->coarse_psi_B));
5453       if (pcbddc->coarse_psi_D) PetscCall(MatConvert(pcbddc->coarse_psi_D, mtype, MAT_INPLACE_MATRIX, &pcbddc->coarse_psi_D));
5454     }
5455   }
5456   PetscFunctionReturn(PETSC_SUCCESS);
5457 }
5458 
MatCreateSubMatrixUnsorted(Mat A,IS isrow,IS iscol,Mat * B)5459 PetscErrorCode MatCreateSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat *B)
5460 {
5461   Mat      *work_mat;
5462   IS        isrow_s, iscol_s;
5463   PetscBool rsorted, csorted;
5464   PetscInt  rsize, *idxs_perm_r = NULL, csize, *idxs_perm_c = NULL;
5465 
5466   PetscFunctionBegin;
5467   PetscCall(ISSorted(isrow, &rsorted));
5468   PetscCall(ISSorted(iscol, &csorted));
5469   PetscCall(ISGetLocalSize(isrow, &rsize));
5470   PetscCall(ISGetLocalSize(iscol, &csize));
5471 
5472   if (!rsorted) {
5473     const PetscInt *idxs;
5474     PetscInt       *idxs_sorted, i;
5475 
5476     PetscCall(PetscMalloc1(rsize, &idxs_perm_r));
5477     PetscCall(PetscMalloc1(rsize, &idxs_sorted));
5478     for (i = 0; i < rsize; i++) idxs_perm_r[i] = i;
5479     PetscCall(ISGetIndices(isrow, &idxs));
5480     PetscCall(PetscSortIntWithPermutation(rsize, idxs, idxs_perm_r));
5481     for (i = 0; i < rsize; i++) idxs_sorted[i] = idxs[idxs_perm_r[i]];
5482     PetscCall(ISRestoreIndices(isrow, &idxs));
5483     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, rsize, idxs_sorted, PETSC_OWN_POINTER, &isrow_s));
5484   } else {
5485     PetscCall(PetscObjectReference((PetscObject)isrow));
5486     isrow_s = isrow;
5487   }
5488 
5489   if (!csorted) {
5490     if (isrow == iscol) {
5491       PetscCall(PetscObjectReference((PetscObject)isrow_s));
5492       iscol_s = isrow_s;
5493     } else {
5494       const PetscInt *idxs;
5495       PetscInt       *idxs_sorted, i;
5496 
5497       PetscCall(PetscMalloc1(csize, &idxs_perm_c));
5498       PetscCall(PetscMalloc1(csize, &idxs_sorted));
5499       for (i = 0; i < csize; i++) idxs_perm_c[i] = i;
5500       PetscCall(ISGetIndices(iscol, &idxs));
5501       PetscCall(PetscSortIntWithPermutation(csize, idxs, idxs_perm_c));
5502       for (i = 0; i < csize; i++) idxs_sorted[i] = idxs[idxs_perm_c[i]];
5503       PetscCall(ISRestoreIndices(iscol, &idxs));
5504       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, csize, idxs_sorted, PETSC_OWN_POINTER, &iscol_s));
5505     }
5506   } else {
5507     PetscCall(PetscObjectReference((PetscObject)iscol));
5508     iscol_s = iscol;
5509   }
5510 
5511   PetscCall(MatCreateSubMatrices(A, 1, &isrow_s, &iscol_s, MAT_INITIAL_MATRIX, &work_mat));
5512 
5513   if (!rsorted || !csorted) {
5514     Mat new_mat;
5515     IS  is_perm_r, is_perm_c;
5516 
5517     if (!rsorted) {
5518       PetscInt *idxs_r, i;
5519       PetscCall(PetscMalloc1(rsize, &idxs_r));
5520       for (i = 0; i < rsize; i++) idxs_r[idxs_perm_r[i]] = i;
5521       PetscCall(PetscFree(idxs_perm_r));
5522       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, rsize, idxs_r, PETSC_OWN_POINTER, &is_perm_r));
5523     } else {
5524       PetscCall(ISCreateStride(PETSC_COMM_SELF, rsize, 0, 1, &is_perm_r));
5525     }
5526     PetscCall(ISSetPermutation(is_perm_r));
5527 
5528     if (!csorted) {
5529       if (isrow_s == iscol_s) {
5530         PetscCall(PetscObjectReference((PetscObject)is_perm_r));
5531         is_perm_c = is_perm_r;
5532       } else {
5533         PetscInt *idxs_c, i;
5534         PetscCheck(idxs_perm_c, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Permutation array not present");
5535         PetscCall(PetscMalloc1(csize, &idxs_c));
5536         for (i = 0; i < csize; i++) idxs_c[idxs_perm_c[i]] = i;
5537         PetscCall(PetscFree(idxs_perm_c));
5538         PetscCall(ISCreateGeneral(PETSC_COMM_SELF, csize, idxs_c, PETSC_OWN_POINTER, &is_perm_c));
5539       }
5540     } else {
5541       PetscCall(ISCreateStride(PETSC_COMM_SELF, csize, 0, 1, &is_perm_c));
5542     }
5543     PetscCall(ISSetPermutation(is_perm_c));
5544 
5545     PetscCall(MatPermute(work_mat[0], is_perm_r, is_perm_c, &new_mat));
5546     PetscCall(MatDestroy(&work_mat[0]));
5547     work_mat[0] = new_mat;
5548     PetscCall(ISDestroy(&is_perm_r));
5549     PetscCall(ISDestroy(&is_perm_c));
5550   }
5551 
5552   PetscCall(PetscObjectReference((PetscObject)work_mat[0]));
5553   *B = work_mat[0];
5554   PetscCall(MatDestroyMatrices(1, &work_mat));
5555   PetscCall(ISDestroy(&isrow_s));
5556   PetscCall(ISDestroy(&iscol_s));
5557   PetscFunctionReturn(PETSC_SUCCESS);
5558 }
5559 
MatPtAPWithPrefix_Private(Mat A,Mat P,PetscReal fill,const char * prefix,Mat * C)5560 static PetscErrorCode MatPtAPWithPrefix_Private(Mat A, Mat P, PetscReal fill, const char *prefix, Mat *C)
5561 {
5562   PetscFunctionBegin;
5563   PetscCall(MatProductCreate(A, P, NULL, C));
5564   PetscCall(MatProductSetType(*C, MATPRODUCT_PtAP));
5565   PetscCall(MatProductSetAlgorithm(*C, "default"));
5566   PetscCall(MatProductSetFill(*C, fill));
5567   PetscCall(MatSetOptionsPrefix(*C, prefix));
5568   PetscCall(MatProductSetFromOptions(*C));
5569   PetscCall(MatProductSymbolic(*C));
5570   PetscCall(MatProductNumeric(*C));
5571   (*C)->symmetric = A->symmetric;
5572   (*C)->spd       = A->spd;
5573   PetscFunctionReturn(PETSC_SUCCESS);
5574 }
5575 
PCBDDCComputeLocalMatrix(PC pc,Mat ChangeOfBasisMatrix)5576 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix)
5577 {
5578   Mat_IS   *matis  = (Mat_IS *)pc->pmat->data;
5579   PC_BDDC  *pcbddc = (PC_BDDC *)pc->data;
5580   Mat       new_mat, lA;
5581   IS        is_local, is_global;
5582   PetscInt  local_size;
5583   PetscBool isseqaij, issym, isset;
5584   char      ptapprefix[256];
5585 
5586   PetscFunctionBegin;
5587   PetscCall(MatDestroy(&pcbddc->local_mat));
5588   PetscCall(MatGetSize(matis->A, &local_size, NULL));
5589   if (pcbddc->mat_graph->multi_element) {
5590     Mat     *mats, *bdiags;
5591     IS      *gsubs;
5592     PetscInt nsubs = pcbddc->n_local_subs;
5593 
5594     PetscCall(PetscCalloc1(nsubs * nsubs, &mats));
5595 #if 1
5596     PetscCall(PetscMalloc1(nsubs, &gsubs));
5597     for (PetscInt i = 0; i < nsubs; i++) PetscCall(ISLocalToGlobalMappingApplyIS(matis->rmapping, pcbddc->local_subs[i], &gsubs[i]));
5598     PetscCall(MatCreateSubMatrices(ChangeOfBasisMatrix, nsubs, gsubs, gsubs, MAT_INITIAL_MATRIX, &bdiags));
5599     for (PetscInt i = 0; i < nsubs; i++) PetscCall(ISDestroy(&gsubs[i]));
5600     PetscCall(PetscFree(gsubs));
5601 #else /* this does not work since MatCreateSubMatrices does not support repeated indices */
5602     Mat *tmats;
5603     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)matis->A), local_size, 0, 1, &is_local));
5604     PetscCall(ISLocalToGlobalMappingApplyIS(matis->rmapping, is_local, &is_global));
5605     PetscCall(ISDestroy(&is_local));
5606     PetscCall(MatSetOption(ChangeOfBasisMatrix, MAT_SUBMAT_SINGLEIS, PETSC_TRUE));
5607     PetscCall(MatCreateSubMatrices(ChangeOfBasisMatrix, 1, &is_global, &is_global, MAT_INITIAL_MATRIX, &tmats));
5608     PetscCall(ISDestroy(&is_global));
5609     PetscCall(MatCreateSubMatrices(tmats[0], nsubs, pcbddc->local_subs, pcbddc->local_subs, MAT_INITIAL_MATRIX, &bdiags));
5610     PetscCall(MatDestroySubMatrices(1, &tmats));
5611 #endif
5612     for (PetscInt i = 0; i < nsubs; i++) mats[i * (1 + nsubs)] = bdiags[i];
5613     PetscCall(MatCreateNest(PETSC_COMM_SELF, nsubs, pcbddc->local_subs, nsubs, pcbddc->local_subs, mats, &new_mat));
5614     PetscCall(MatConvert(new_mat, MATSEQAIJ, MAT_INPLACE_MATRIX, &new_mat));
5615     PetscCall(MatDestroySubMatrices(nsubs, &bdiags));
5616     PetscCall(PetscFree(mats));
5617   } else {
5618     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)matis->A), local_size, 0, 1, &is_local));
5619     PetscCall(ISLocalToGlobalMappingApplyIS(matis->rmapping, is_local, &is_global));
5620     PetscCall(ISDestroy(&is_local));
5621     PetscCall(MatCreateSubMatrixUnsorted(ChangeOfBasisMatrix, is_global, is_global, &new_mat));
5622     PetscCall(ISDestroy(&is_global));
5623   }
5624   if (pcbddc->dbg_flag) {
5625     Vec       x, x_change;
5626     PetscReal error;
5627 
5628     PetscCall(MatCreateVecs(ChangeOfBasisMatrix, &x, &x_change));
5629     PetscCall(VecSetRandom(x, NULL));
5630     PetscCall(MatMult(ChangeOfBasisMatrix, x, x_change));
5631     PetscCall(VecScatterBegin(matis->cctx, x, matis->x, INSERT_VALUES, SCATTER_FORWARD));
5632     PetscCall(VecScatterEnd(matis->cctx, x, matis->x, INSERT_VALUES, SCATTER_FORWARD));
5633     PetscCall(MatMult(new_mat, matis->x, matis->y));
5634     if (!pcbddc->change_interior) {
5635       const PetscScalar *x, *y, *v;
5636       PetscReal          lerror = 0.;
5637       PetscInt           i;
5638 
5639       PetscCall(VecGetArrayRead(matis->x, &x));
5640       PetscCall(VecGetArrayRead(matis->y, &y));
5641       PetscCall(VecGetArrayRead(matis->counter, &v));
5642       for (i = 0; i < local_size; i++)
5643         if (PetscRealPart(v[i]) < 1.5 && PetscAbsScalar(x[i] - y[i]) > lerror) lerror = PetscAbsScalar(x[i] - y[i]);
5644       PetscCall(VecRestoreArrayRead(matis->x, &x));
5645       PetscCall(VecRestoreArrayRead(matis->y, &y));
5646       PetscCall(VecRestoreArrayRead(matis->counter, &v));
5647       PetscCallMPI(MPIU_Allreduce(&lerror, &error, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)pc)));
5648       if (error > PETSC_SMALL) {
5649         if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) {
5650           SETERRQ(PetscObjectComm((PetscObject)pc), PETSC_ERR_PLIB, "Error global vs local change on I: %1.6e", (double)error);
5651         } else {
5652           SETERRQ(PetscObjectComm((PetscObject)pc), PETSC_ERR_USER, "Error global vs local change on I: %1.6e", (double)error);
5653         }
5654       }
5655     }
5656     PetscCall(VecScatterBegin(matis->rctx, matis->y, x, INSERT_VALUES, SCATTER_REVERSE));
5657     PetscCall(VecScatterEnd(matis->rctx, matis->y, x, INSERT_VALUES, SCATTER_REVERSE));
5658     PetscCall(VecAXPY(x, -1.0, x_change));
5659     PetscCall(VecNorm(x, NORM_INFINITY, &error));
5660     if (error > PETSC_SMALL) {
5661       if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) {
5662         SETERRQ(PetscObjectComm((PetscObject)pc), PETSC_ERR_PLIB, "Error global vs local change on N: %1.6e", (double)error);
5663       } else {
5664         SETERRQ(PetscObjectComm((PetscObject)pc), PETSC_ERR_USER, "Error global vs local change on N: %1.6e", (double)error);
5665       }
5666     }
5667     PetscCall(VecDestroy(&x));
5668     PetscCall(VecDestroy(&x_change));
5669   }
5670 
5671   /* lA is present if we are setting up an inner BDDC for a saddle point FETI-DP */
5672   PetscCall(PetscObjectQuery((PetscObject)pc, "__KSPFETIDP_lA", (PetscObject *)&lA));
5673 
5674   /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */
5675   if (((PetscObject)pc)->prefix) PetscCall(PetscSNPrintf(ptapprefix, sizeof(ptapprefix), "%spc_bddc_change_", ((PetscObject)pc)->prefix));
5676   else PetscCall(PetscSNPrintf(ptapprefix, sizeof(ptapprefix), "pc_bddc_change_"));
5677   PetscCall(PetscObjectBaseTypeCompare((PetscObject)matis->A, MATSEQAIJ, &isseqaij));
5678   if (isseqaij) {
5679     PetscCall(MatDestroy(&pcbddc->local_mat));
5680     PetscCall(MatPtAPWithPrefix_Private(matis->A, new_mat, PETSC_DEFAULT, ptapprefix, &pcbddc->local_mat));
5681     if (lA) {
5682       Mat work;
5683       PetscCall(MatPtAPWithPrefix_Private(lA, new_mat, PETSC_DEFAULT, ptapprefix, &work));
5684       PetscCall(PetscObjectCompose((PetscObject)pc, "__KSPFETIDP_lA", (PetscObject)work));
5685       PetscCall(MatDestroy(&work));
5686     }
5687   } else {
5688     Mat work_mat;
5689 
5690     PetscCall(MatDestroy(&pcbddc->local_mat));
5691     PetscCall(MatConvert(matis->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &work_mat));
5692     PetscCall(MatPtAPWithPrefix_Private(work_mat, new_mat, PETSC_DEFAULT, ptapprefix, &pcbddc->local_mat));
5693     PetscCall(MatDestroy(&work_mat));
5694     if (lA) {
5695       Mat work;
5696       PetscCall(MatConvert(lA, MATSEQAIJ, MAT_INITIAL_MATRIX, &work_mat));
5697       PetscCall(MatPtAPWithPrefix_Private(work_mat, new_mat, PETSC_DEFAULT, ptapprefix, &work));
5698       PetscCall(PetscObjectCompose((PetscObject)pc, "__KSPFETIDP_lA", (PetscObject)work));
5699       PetscCall(MatDestroy(&work));
5700     }
5701   }
5702   PetscCall(MatIsSymmetricKnown(matis->A, &isset, &issym));
5703   if (isset) PetscCall(MatSetOption(pcbddc->local_mat, MAT_SYMMETRIC, issym));
5704   PetscCall(MatDestroy(&new_mat));
5705   PetscFunctionReturn(PETSC_SUCCESS);
5706 }
5707 
PCBDDCSetUpLocalScatters(PC pc)5708 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc)
5709 {
5710   PC_IS          *pcis        = (PC_IS *)pc->data;
5711   PC_BDDC        *pcbddc      = (PC_BDDC *)pc->data;
5712   PCBDDCSubSchurs sub_schurs  = pcbddc->sub_schurs;
5713   PetscInt       *idx_R_local = NULL;
5714   PetscInt        n_vertices, i, j, n_R, n_D, n_B;
5715   PetscInt        vbs, bs;
5716   PetscBT         bitmask = NULL;
5717 
5718   PetscFunctionBegin;
5719   /*
5720     No need to setup local scatters if
5721       - primal space is unchanged
5722         AND
5723       - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains)
5724         AND
5725       - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine
5726   */
5727   if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) PetscFunctionReturn(PETSC_SUCCESS);
5728   /* destroy old objects */
5729   PetscCall(ISDestroy(&pcbddc->is_R_local));
5730   PetscCall(VecScatterDestroy(&pcbddc->R_to_B));
5731   PetscCall(VecScatterDestroy(&pcbddc->R_to_D));
5732   /* Set Non-overlapping dimensions */
5733   n_B        = pcis->n_B;
5734   n_D        = pcis->n - n_B;
5735   n_vertices = pcbddc->n_vertices;
5736 
5737   /* Dohrmann's notation: dofs split in R (Remaining: all dofs but the vertices) and V (Vertices) */
5738 
5739   /* create auxiliary bitmask and allocate workspace */
5740   if (!sub_schurs || !sub_schurs->reuse_solver) {
5741     PetscCall(PetscMalloc1(pcis->n - n_vertices, &idx_R_local));
5742     PetscCall(PetscBTCreate(pcis->n, &bitmask));
5743     for (i = 0; i < n_vertices; i++) PetscCall(PetscBTSet(bitmask, pcbddc->local_primal_ref_node[i]));
5744 
5745     for (i = 0, n_R = 0; i < pcis->n; i++) {
5746       if (!PetscBTLookup(bitmask, i)) idx_R_local[n_R++] = i;
5747     }
5748   } else { /* A different ordering (already computed) is present if we are reusing the Schur solver */
5749     PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
5750 
5751     PetscCall(ISGetIndices(reuse_solver->is_R, (const PetscInt **)&idx_R_local));
5752     PetscCall(ISGetLocalSize(reuse_solver->is_R, &n_R));
5753   }
5754 
5755   /* Block code */
5756   vbs = 1;
5757   PetscCall(MatGetBlockSize(pcbddc->local_mat, &bs));
5758   if (bs > 1 && !(n_vertices % bs)) {
5759     PetscBool is_blocked = PETSC_TRUE;
5760     PetscInt *vary;
5761     if (!sub_schurs || !sub_schurs->reuse_solver) {
5762       PetscCall(PetscMalloc1(pcis->n / bs, &vary));
5763       PetscCall(PetscArrayzero(vary, pcis->n / bs));
5764       /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */
5765       /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */
5766       for (i = 0; i < n_vertices; i++) vary[pcbddc->local_primal_ref_node[i] / bs]++;
5767       for (i = 0; i < pcis->n / bs; i++) {
5768         if (vary[i] != 0 && vary[i] != bs) {
5769           is_blocked = PETSC_FALSE;
5770           break;
5771         }
5772       }
5773       PetscCall(PetscFree(vary));
5774     } else {
5775       /* Verify directly the R set */
5776       for (i = 0; i < n_R / bs; i++) {
5777         PetscInt j, node = idx_R_local[bs * i];
5778         for (j = 1; j < bs; j++) {
5779           if (node != idx_R_local[bs * i + j] - j) {
5780             is_blocked = PETSC_FALSE;
5781             break;
5782           }
5783         }
5784       }
5785     }
5786     if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */
5787       vbs = bs;
5788       for (i = 0; i < n_R / vbs; i++) idx_R_local[i] = idx_R_local[vbs * i] / vbs;
5789     }
5790   }
5791   PetscCall(ISCreateBlock(PETSC_COMM_SELF, vbs, n_R / vbs, idx_R_local, PETSC_COPY_VALUES, &pcbddc->is_R_local));
5792   if (sub_schurs && sub_schurs->reuse_solver) {
5793     PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
5794 
5795     PetscCall(ISRestoreIndices(reuse_solver->is_R, (const PetscInt **)&idx_R_local));
5796     PetscCall(ISDestroy(&reuse_solver->is_R));
5797     PetscCall(PetscObjectReference((PetscObject)pcbddc->is_R_local));
5798     reuse_solver->is_R = pcbddc->is_R_local;
5799   } else {
5800     PetscCall(PetscFree(idx_R_local));
5801   }
5802 
5803   /* print some info if requested */
5804   if (pcbddc->dbg_flag) {
5805     PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n"));
5806     PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
5807     PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer));
5808     PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d local dimensions\n", PetscGlobalRank));
5809     PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "local_size = %" PetscInt_FMT ", dirichlet_size = %" PetscInt_FMT ", boundary_size = %" PetscInt_FMT "\n", pcis->n, n_D, n_B));
5810     PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "r_size = %" PetscInt_FMT ", v_size = %" PetscInt_FMT ", constraints = %" PetscInt_FMT ", local_primal_size = %" PetscInt_FMT "\n", n_R, n_vertices,
5811                                                  pcbddc->local_primal_size - n_vertices - pcbddc->benign_n, pcbddc->local_primal_size));
5812     PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
5813   }
5814 
5815   /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */
5816   if (!sub_schurs || !sub_schurs->reuse_solver) {
5817     IS        is_aux1, is_aux2;
5818     PetscInt *aux_array1, *aux_array2, *is_indices, *idx_R_local;
5819 
5820     PetscCall(ISGetIndices(pcbddc->is_R_local, (const PetscInt **)&idx_R_local));
5821     PetscCall(PetscMalloc1(pcis->n_B - n_vertices, &aux_array1));
5822     PetscCall(PetscMalloc1(pcis->n_B - n_vertices, &aux_array2));
5823     PetscCall(ISGetIndices(pcis->is_I_local, (const PetscInt **)&is_indices));
5824     for (i = 0; i < n_D; i++) PetscCall(PetscBTSet(bitmask, is_indices[i]));
5825     PetscCall(ISRestoreIndices(pcis->is_I_local, (const PetscInt **)&is_indices));
5826     for (i = 0, j = 0; i < n_R; i++) {
5827       if (!PetscBTLookup(bitmask, idx_R_local[i])) aux_array1[j++] = i;
5828     }
5829     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, j, aux_array1, PETSC_OWN_POINTER, &is_aux1));
5830     PetscCall(ISGetIndices(pcis->is_B_local, (const PetscInt **)&is_indices));
5831     for (i = 0, j = 0; i < n_B; i++) {
5832       if (!PetscBTLookup(bitmask, is_indices[i])) aux_array2[j++] = i;
5833     }
5834     PetscCall(ISRestoreIndices(pcis->is_B_local, (const PetscInt **)&is_indices));
5835     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, j, aux_array2, PETSC_OWN_POINTER, &is_aux2));
5836     PetscCall(VecScatterCreate(pcbddc->vec1_R, is_aux1, pcis->vec1_B, is_aux2, &pcbddc->R_to_B));
5837     PetscCall(ISDestroy(&is_aux1));
5838     PetscCall(ISDestroy(&is_aux2));
5839 
5840     if (pcbddc->switch_static || pcbddc->dbg_flag) {
5841       PetscCall(PetscMalloc1(n_D, &aux_array1));
5842       for (i = 0, j = 0; i < n_R; i++) {
5843         if (PetscBTLookup(bitmask, idx_R_local[i])) aux_array1[j++] = i;
5844       }
5845       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, j, aux_array1, PETSC_OWN_POINTER, &is_aux1));
5846       PetscCall(VecScatterCreate(pcbddc->vec1_R, is_aux1, pcis->vec1_D, (IS)0, &pcbddc->R_to_D));
5847       PetscCall(ISDestroy(&is_aux1));
5848     }
5849     PetscCall(PetscBTDestroy(&bitmask));
5850     PetscCall(ISRestoreIndices(pcbddc->is_R_local, (const PetscInt **)&idx_R_local));
5851   } else {
5852     PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
5853     IS                 tis;
5854     PetscInt           schur_size;
5855 
5856     PetscCall(ISGetLocalSize(reuse_solver->is_B, &schur_size));
5857     PetscCall(ISCreateStride(PETSC_COMM_SELF, schur_size, n_D, 1, &tis));
5858     PetscCall(VecScatterCreate(pcbddc->vec1_R, tis, pcis->vec1_B, reuse_solver->is_B, &pcbddc->R_to_B));
5859     PetscCall(ISDestroy(&tis));
5860     if (pcbddc->switch_static || pcbddc->dbg_flag) {
5861       PetscCall(ISCreateStride(PETSC_COMM_SELF, n_D, 0, 1, &tis));
5862       PetscCall(VecScatterCreate(pcbddc->vec1_R, tis, pcis->vec1_D, (IS)0, &pcbddc->R_to_D));
5863       PetscCall(ISDestroy(&tis));
5864     }
5865   }
5866   PetscFunctionReturn(PETSC_SUCCESS);
5867 }
5868 
MatNullSpacePropagateAny_Private(Mat A,IS is,Mat B)5869 PetscErrorCode MatNullSpacePropagateAny_Private(Mat A, IS is, Mat B)
5870 {
5871   MatNullSpace NullSpace;
5872   Mat          dmat;
5873   const Vec   *nullvecs;
5874   Vec          v, v2, *nullvecs2;
5875   VecScatter   sct = NULL;
5876   PetscScalar *ddata;
5877   PetscInt     k, nnsp_size, bsiz, bsiz2, n, N, bs;
5878   PetscBool    nnsp_has_cnst;
5879 
5880   PetscFunctionBegin;
5881   if (!is && !B) { /* MATIS */
5882     Mat_IS *matis = (Mat_IS *)A->data;
5883 
5884     if (!B) PetscCall(MatISGetLocalMat(A, &B));
5885     sct = matis->cctx;
5886     PetscCall(PetscObjectReference((PetscObject)sct));
5887   } else {
5888     PetscCall(MatGetNullSpace(B, &NullSpace));
5889     if (!NullSpace) PetscCall(MatGetNearNullSpace(B, &NullSpace));
5890     if (NullSpace) PetscFunctionReturn(PETSC_SUCCESS);
5891   }
5892   PetscCall(MatGetNullSpace(A, &NullSpace));
5893   if (!NullSpace) PetscCall(MatGetNearNullSpace(A, &NullSpace));
5894   if (!NullSpace) PetscFunctionReturn(PETSC_SUCCESS);
5895 
5896   PetscCall(MatCreateVecs(A, &v, NULL));
5897   PetscCall(MatCreateVecs(B, &v2, NULL));
5898   if (!sct) PetscCall(VecScatterCreate(v, is, v2, NULL, &sct));
5899   PetscCall(MatNullSpaceGetVecs(NullSpace, &nnsp_has_cnst, &nnsp_size, &nullvecs));
5900   bsiz = bsiz2 = nnsp_size + !!nnsp_has_cnst;
5901   PetscCall(PetscMalloc1(bsiz, &nullvecs2));
5902   PetscCall(VecGetBlockSize(v2, &bs));
5903   PetscCall(VecGetSize(v2, &N));
5904   PetscCall(VecGetLocalSize(v2, &n));
5905   PetscCall(PetscMalloc1(n * bsiz, &ddata));
5906   for (k = 0; k < nnsp_size; k++) {
5907     PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), bs, n, N, ddata + n * k, &nullvecs2[k]));
5908     PetscCall(VecScatterBegin(sct, nullvecs[k], nullvecs2[k], INSERT_VALUES, SCATTER_FORWARD));
5909     PetscCall(VecScatterEnd(sct, nullvecs[k], nullvecs2[k], INSERT_VALUES, SCATTER_FORWARD));
5910   }
5911   if (nnsp_has_cnst) {
5912     PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), bs, n, N, ddata + n * nnsp_size, &nullvecs2[nnsp_size]));
5913     PetscCall(VecSet(nullvecs2[nnsp_size], 1.0));
5914   }
5915   PetscCall(PCBDDCOrthonormalizeVecs(&bsiz2, nullvecs2));
5916   PetscCall(MatNullSpaceCreate(PetscObjectComm((PetscObject)B), PETSC_FALSE, bsiz2, nullvecs2, &NullSpace));
5917 
5918   PetscCall(MatCreateDense(PetscObjectComm((PetscObject)B), n, PETSC_DECIDE, N, bsiz2, ddata, &dmat));
5919   PetscCall(PetscObjectContainerCompose((PetscObject)dmat, "_PBDDC_Null_dmat_arr", ddata, PetscCtxDestroyDefault));
5920   PetscCall(PetscObjectCompose((PetscObject)NullSpace, "_PBDDC_Null_dmat", (PetscObject)dmat));
5921   PetscCall(MatDestroy(&dmat));
5922 
5923   for (k = 0; k < bsiz; k++) PetscCall(VecDestroy(&nullvecs2[k]));
5924   PetscCall(PetscFree(nullvecs2));
5925   PetscCall(MatSetNearNullSpace(B, NullSpace));
5926   PetscCall(MatNullSpaceDestroy(&NullSpace));
5927   PetscCall(VecDestroy(&v));
5928   PetscCall(VecDestroy(&v2));
5929   PetscCall(VecScatterDestroy(&sct));
5930   PetscFunctionReturn(PETSC_SUCCESS);
5931 }
5932 
PCBDDCSetUpLocalSolvers(PC pc,PetscBool dirichlet,PetscBool neumann)5933 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann)
5934 {
5935   PC_BDDC     *pcbddc = (PC_BDDC *)pc->data;
5936   PC_IS       *pcis   = (PC_IS *)pc->data;
5937   PC           pc_temp;
5938   Mat          A_RR;
5939   MatNullSpace nnsp;
5940   MatReuse     reuse;
5941   PetscScalar  m_one = -1.0;
5942   PetscReal    value;
5943   PetscInt     n_D, n_R;
5944   PetscBool    issbaij, opts, isset, issym;
5945   PetscBool    f = PETSC_FALSE;
5946   char         dir_prefix[256], neu_prefix[256], str_level[16];
5947   size_t       len;
5948 
5949   PetscFunctionBegin;
5950   PetscCall(PetscLogEventBegin(PC_BDDC_LocalSolvers[pcbddc->current_level], pc, 0, 0, 0));
5951   /* approximate solver, propagate NearNullSpace if needed */
5952   if (!pc->setupcalled && (pcbddc->NullSpace_corr[0] || pcbddc->NullSpace_corr[2])) {
5953     MatNullSpace gnnsp1, gnnsp2;
5954     PetscBool    lhas, ghas;
5955 
5956     PetscCall(MatGetNearNullSpace(pcbddc->local_mat, &nnsp));
5957     PetscCall(MatGetNearNullSpace(pc->pmat, &gnnsp1));
5958     PetscCall(MatGetNullSpace(pc->pmat, &gnnsp2));
5959     lhas = nnsp ? PETSC_TRUE : PETSC_FALSE;
5960     PetscCallMPI(MPIU_Allreduce(&lhas, &ghas, 1, MPI_C_BOOL, MPI_LOR, PetscObjectComm((PetscObject)pc)));
5961     if (!ghas && (gnnsp1 || gnnsp2)) PetscCall(MatNullSpacePropagateAny_Private(pc->pmat, NULL, NULL));
5962   }
5963 
5964   /* compute prefixes */
5965   PetscCall(PetscStrncpy(dir_prefix, "", sizeof(dir_prefix)));
5966   PetscCall(PetscStrncpy(neu_prefix, "", sizeof(neu_prefix)));
5967   if (!pcbddc->current_level) {
5968     PetscCall(PetscStrncpy(dir_prefix, ((PetscObject)pc)->prefix, sizeof(dir_prefix)));
5969     PetscCall(PetscStrncpy(neu_prefix, ((PetscObject)pc)->prefix, sizeof(neu_prefix)));
5970     PetscCall(PetscStrlcat(dir_prefix, "pc_bddc_dirichlet_", sizeof(dir_prefix)));
5971     PetscCall(PetscStrlcat(neu_prefix, "pc_bddc_neumann_", sizeof(neu_prefix)));
5972   } else {
5973     PetscCall(PetscSNPrintf(str_level, sizeof(str_level), "l%" PetscInt_FMT "_", pcbddc->current_level));
5974     PetscCall(PetscStrlen(((PetscObject)pc)->prefix, &len));
5975     len -= 15;                                /* remove "pc_bddc_coarse_" */
5976     if (pcbddc->current_level > 1) len -= 3;  /* remove "lX_" with X level number */
5977     if (pcbddc->current_level > 10) len -= 1; /* remove another char from level number */
5978     /* Nonstandard use of PetscStrncpy() to only copy a portion of the input string */
5979     PetscCall(PetscStrncpy(dir_prefix, ((PetscObject)pc)->prefix, len + 1));
5980     PetscCall(PetscStrncpy(neu_prefix, ((PetscObject)pc)->prefix, len + 1));
5981     PetscCall(PetscStrlcat(dir_prefix, "pc_bddc_dirichlet_", sizeof(dir_prefix)));
5982     PetscCall(PetscStrlcat(neu_prefix, "pc_bddc_neumann_", sizeof(neu_prefix)));
5983     PetscCall(PetscStrlcat(dir_prefix, str_level, sizeof(dir_prefix)));
5984     PetscCall(PetscStrlcat(neu_prefix, str_level, sizeof(neu_prefix)));
5985   }
5986 
5987   /* DIRICHLET PROBLEM */
5988   if (dirichlet) {
5989     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
5990     if (pcbddc->benign_n && !pcbddc->benign_change_explicit) {
5991       PetscCheck(sub_schurs && sub_schurs->reuse_solver, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not yet implemented");
5992       if (pcbddc->dbg_flag) {
5993         Mat A_IIn;
5994 
5995         PetscCall(PCBDDCBenignProject(pc, pcis->is_I_local, pcis->is_I_local, &A_IIn));
5996         PetscCall(MatDestroy(&pcis->A_II));
5997         pcis->A_II = A_IIn;
5998       }
5999     }
6000     PetscCall(MatIsSymmetricKnown(pcbddc->local_mat, &isset, &issym));
6001     if (isset) PetscCall(MatSetOption(pcis->A_II, MAT_SYMMETRIC, issym));
6002 
6003     /* Matrix for Dirichlet problem is pcis->A_II */
6004     n_D  = pcis->n - pcis->n_B;
6005     opts = PETSC_FALSE;
6006     if (!pcbddc->ksp_D) { /* create object if not yet build */
6007       opts = PETSC_TRUE;
6008       PetscCall(KSPCreate(PETSC_COMM_SELF, &pcbddc->ksp_D));
6009       PetscCall(KSPSetNestLevel(pcbddc->ksp_D, pc->kspnestlevel));
6010       PetscCall(PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D, (PetscObject)pc, 1));
6011       /* default */
6012       PetscCall(KSPSetType(pcbddc->ksp_D, KSPPREONLY));
6013       PetscCall(KSPSetOptionsPrefix(pcbddc->ksp_D, dir_prefix));
6014       PetscCall(PetscObjectTypeCompare((PetscObject)pcis->pA_II, MATSEQSBAIJ, &issbaij));
6015       PetscCall(KSPGetPC(pcbddc->ksp_D, &pc_temp));
6016       if (issbaij) {
6017         PetscCall(PCSetType(pc_temp, PCCHOLESKY));
6018       } else {
6019         PetscCall(PCSetType(pc_temp, PCLU));
6020       }
6021       PetscCall(KSPSetErrorIfNotConverged(pcbddc->ksp_D, pc->erroriffailure));
6022     }
6023     PetscCall(MatSetOptionsPrefix(pcis->pA_II, ((PetscObject)pcbddc->ksp_D)->prefix));
6024     PetscCall(MatViewFromOptions(pcis->pA_II, NULL, "-mat_view"));
6025     PetscCall(KSPSetOperators(pcbddc->ksp_D, pcis->A_II, pcis->pA_II));
6026     /* Allow user's customization */
6027     if (opts) PetscCall(KSPSetFromOptions(pcbddc->ksp_D));
6028     PetscCall(MatGetNearNullSpace(pcis->pA_II, &nnsp));
6029     if (pcbddc->NullSpace_corr[0] && !nnsp) { /* approximate solver, propagate NearNullSpace */
6030       PetscCall(MatNullSpacePropagateAny_Private(pcbddc->local_mat, pcis->is_I_local, pcis->pA_II));
6031     }
6032     PetscCall(MatGetNearNullSpace(pcis->pA_II, &nnsp));
6033     PetscCall(KSPGetPC(pcbddc->ksp_D, &pc_temp));
6034     PetscCall(PetscObjectHasFunction((PetscObject)pc_temp, "PCSetCoordinates_C", &f));
6035     if (f && pcbddc->mat_graph->cloc && !nnsp) {
6036       PetscReal      *coords = pcbddc->mat_graph->coords, *scoords;
6037       const PetscInt *idxs;
6038       PetscInt        cdim = pcbddc->mat_graph->cdim, nl, i, d;
6039 
6040       PetscCall(ISGetLocalSize(pcis->is_I_local, &nl));
6041       PetscCall(ISGetIndices(pcis->is_I_local, &idxs));
6042       PetscCall(PetscMalloc1(nl * cdim, &scoords));
6043       for (i = 0; i < nl; i++) {
6044         for (d = 0; d < cdim; d++) scoords[i * cdim + d] = coords[idxs[i] * cdim + d];
6045       }
6046       PetscCall(ISRestoreIndices(pcis->is_I_local, &idxs));
6047       PetscCall(PCSetCoordinates(pc_temp, cdim, nl, scoords));
6048       PetscCall(PetscFree(scoords));
6049     }
6050     if (sub_schurs && sub_schurs->reuse_solver) {
6051       PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
6052 
6053       PetscCall(KSPSetPC(pcbddc->ksp_D, reuse_solver->interior_solver));
6054     }
6055 
6056     /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
6057     if (!n_D) {
6058       PetscCall(KSPGetPC(pcbddc->ksp_D, &pc_temp));
6059       PetscCall(PCSetType(pc_temp, PCNONE));
6060     }
6061     PetscCall(KSPSetUp(pcbddc->ksp_D));
6062     /* set ksp_D into pcis data */
6063     PetscCall(PetscObjectReference((PetscObject)pcbddc->ksp_D));
6064     PetscCall(KSPDestroy(&pcis->ksp_D));
6065     pcis->ksp_D = pcbddc->ksp_D;
6066   }
6067 
6068   /* NEUMANN PROBLEM */
6069   A_RR = NULL;
6070   if (neumann) {
6071     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
6072     PetscInt        ibs, mbs;
6073     PetscBool       issbaij, reuse_neumann_solver, isset, issym;
6074     Mat_IS         *matis = (Mat_IS *)pc->pmat->data;
6075 
6076     reuse_neumann_solver = PETSC_FALSE;
6077     if (sub_schurs && sub_schurs->reuse_solver) {
6078       IS iP;
6079 
6080       reuse_neumann_solver = PETSC_TRUE;
6081       PetscCall(PetscObjectQuery((PetscObject)sub_schurs->A, "__KSPFETIDP_iP", (PetscObject *)&iP));
6082       if (iP) reuse_neumann_solver = PETSC_FALSE;
6083     }
6084     /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */
6085     PetscCall(ISGetSize(pcbddc->is_R_local, &n_R));
6086     if (pcbddc->ksp_R) { /* already created ksp */
6087       PetscInt nn_R;
6088       PetscCall(KSPGetOperators(pcbddc->ksp_R, NULL, &A_RR));
6089       PetscCall(PetscObjectReference((PetscObject)A_RR));
6090       PetscCall(MatGetSize(A_RR, &nn_R, NULL));
6091       if (nn_R != n_R) { /* old ksp is not reusable, so reset it */
6092         PetscCall(KSPReset(pcbddc->ksp_R));
6093         PetscCall(MatDestroy(&A_RR));
6094         reuse = MAT_INITIAL_MATRIX;
6095       } else {                                /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */
6096         if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */
6097           PetscCall(MatDestroy(&A_RR));
6098           reuse = MAT_INITIAL_MATRIX;
6099         } else { /* safe to reuse the matrix */
6100           reuse = MAT_REUSE_MATRIX;
6101         }
6102       }
6103       /* last check */
6104       if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
6105         PetscCall(MatDestroy(&A_RR));
6106         reuse = MAT_INITIAL_MATRIX;
6107       }
6108     } else { /* first time, so we need to create the matrix */
6109       reuse = MAT_INITIAL_MATRIX;
6110     }
6111     /* convert pcbddc->local_mat if needed later in PCBDDCSetUpCorrection
6112        TODO: Get Rid of these conversions */
6113     PetscCall(MatGetBlockSize(pcbddc->local_mat, &mbs));
6114     PetscCall(ISGetBlockSize(pcbddc->is_R_local, &ibs));
6115     PetscCall(PetscObjectTypeCompare((PetscObject)pcbddc->local_mat, MATSEQSBAIJ, &issbaij));
6116     if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */
6117       if (matis->A == pcbddc->local_mat) {
6118         PetscCall(MatDestroy(&pcbddc->local_mat));
6119         PetscCall(MatConvert(matis->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &pcbddc->local_mat));
6120       } else {
6121         PetscCall(MatConvert(pcbddc->local_mat, MATSEQAIJ, MAT_INPLACE_MATRIX, &pcbddc->local_mat));
6122       }
6123     } else if (issbaij) { /* need to convert to BAIJ to get off-diagonal blocks */
6124       if (matis->A == pcbddc->local_mat) {
6125         PetscCall(MatDestroy(&pcbddc->local_mat));
6126         PetscCall(MatConvert(matis->A, mbs > 1 ? MATSEQBAIJ : MATSEQAIJ, MAT_INITIAL_MATRIX, &pcbddc->local_mat));
6127       } else {
6128         PetscCall(MatConvert(pcbddc->local_mat, mbs > 1 ? MATSEQBAIJ : MATSEQAIJ, MAT_INPLACE_MATRIX, &pcbddc->local_mat));
6129       }
6130     }
6131     /* extract A_RR */
6132     if (reuse_neumann_solver) {
6133       PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
6134 
6135       if (pcbddc->dbg_flag) { /* we need A_RR to test the solver later */
6136         PetscCall(MatDestroy(&A_RR));
6137         if (reuse_solver->benign_n) { /* we are not using the explicit change of basis on the pressures */
6138           PetscCall(PCBDDCBenignProject(pc, pcbddc->is_R_local, pcbddc->is_R_local, &A_RR));
6139         } else {
6140           PetscCall(MatCreateSubMatrix(pcbddc->local_mat, pcbddc->is_R_local, pcbddc->is_R_local, MAT_INITIAL_MATRIX, &A_RR));
6141         }
6142       } else {
6143         PetscCall(MatDestroy(&A_RR));
6144         PetscCall(PCGetOperators(reuse_solver->correction_solver, &A_RR, NULL));
6145         PetscCall(PetscObjectReference((PetscObject)A_RR));
6146       }
6147     } else { /* we have to build the neumann solver, so we need to extract the relevant matrix */
6148       PetscCall(MatCreateSubMatrix(pcbddc->local_mat, pcbddc->is_R_local, pcbddc->is_R_local, reuse, &A_RR));
6149     }
6150     PetscCall(MatIsSymmetricKnown(pcbddc->local_mat, &isset, &issym));
6151     if (isset) PetscCall(MatSetOption(A_RR, MAT_SYMMETRIC, issym));
6152     opts = PETSC_FALSE;
6153     if (!pcbddc->ksp_R) { /* create object if not present */
6154       opts = PETSC_TRUE;
6155       PetscCall(KSPCreate(PETSC_COMM_SELF, &pcbddc->ksp_R));
6156       PetscCall(KSPSetNestLevel(pcbddc->ksp_R, pc->kspnestlevel));
6157       PetscCall(PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R, (PetscObject)pc, 1));
6158       /* default */
6159       PetscCall(KSPSetType(pcbddc->ksp_R, KSPPREONLY));
6160       PetscCall(KSPSetOptionsPrefix(pcbddc->ksp_R, neu_prefix));
6161       PetscCall(KSPGetPC(pcbddc->ksp_R, &pc_temp));
6162       PetscCall(PetscObjectTypeCompare((PetscObject)A_RR, MATSEQSBAIJ, &issbaij));
6163       if (issbaij) {
6164         PetscCall(PCSetType(pc_temp, PCCHOLESKY));
6165       } else {
6166         PetscCall(PCSetType(pc_temp, PCLU));
6167       }
6168       PetscCall(KSPSetErrorIfNotConverged(pcbddc->ksp_R, pc->erroriffailure));
6169     }
6170     PetscCall(MatSetOptionsPrefix(A_RR, ((PetscObject)pcbddc->ksp_R)->prefix));
6171     PetscCall(MatViewFromOptions(A_RR, NULL, "-mat_view"));
6172     PetscCall(KSPSetOperators(pcbddc->ksp_R, A_RR, A_RR));
6173     if (opts) { /* Allow user's customization once */
6174       PetscCall(KSPSetFromOptions(pcbddc->ksp_R));
6175     }
6176     PetscCall(MatGetNearNullSpace(A_RR, &nnsp));
6177     if (pcbddc->NullSpace_corr[2] && !nnsp) { /* approximate solver, propagate NearNullSpace */
6178       PetscCall(MatNullSpacePropagateAny_Private(pcbddc->local_mat, pcbddc->is_R_local, A_RR));
6179     }
6180     PetscCall(MatGetNearNullSpace(A_RR, &nnsp));
6181     PetscCall(KSPGetPC(pcbddc->ksp_R, &pc_temp));
6182     PetscCall(PetscObjectHasFunction((PetscObject)pc_temp, "PCSetCoordinates_C", &f));
6183     if (f && pcbddc->mat_graph->cloc && !nnsp) {
6184       PetscReal      *coords = pcbddc->mat_graph->coords, *scoords;
6185       const PetscInt *idxs;
6186       PetscInt        cdim = pcbddc->mat_graph->cdim, nl, i, d;
6187 
6188       PetscCall(ISGetLocalSize(pcbddc->is_R_local, &nl));
6189       PetscCall(ISGetIndices(pcbddc->is_R_local, &idxs));
6190       PetscCall(PetscMalloc1(nl * cdim, &scoords));
6191       for (i = 0; i < nl; i++) {
6192         for (d = 0; d < cdim; d++) scoords[i * cdim + d] = coords[idxs[i] * cdim + d];
6193       }
6194       PetscCall(ISRestoreIndices(pcbddc->is_R_local, &idxs));
6195       PetscCall(PCSetCoordinates(pc_temp, cdim, nl, scoords));
6196       PetscCall(PetscFree(scoords));
6197     }
6198 
6199     /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
6200     if (!n_R) {
6201       PetscCall(KSPGetPC(pcbddc->ksp_R, &pc_temp));
6202       PetscCall(PCSetType(pc_temp, PCNONE));
6203     }
6204     /* Reuse solver if it is present */
6205     if (reuse_neumann_solver) {
6206       PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
6207 
6208       PetscCall(KSPSetPC(pcbddc->ksp_R, reuse_solver->correction_solver));
6209     }
6210     PetscCall(KSPSetUp(pcbddc->ksp_R));
6211   }
6212 
6213   if (pcbddc->dbg_flag) {
6214     PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
6215     PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer));
6216     PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n"));
6217   }
6218   PetscCall(PetscLogEventEnd(PC_BDDC_LocalSolvers[pcbddc->current_level], pc, 0, 0, 0));
6219 
6220   /* adapt Dirichlet and Neumann solvers if a nullspace correction has been requested */
6221   if (pcbddc->NullSpace_corr[0]) PetscCall(PCBDDCSetUseExactDirichlet(pc, PETSC_FALSE));
6222   if (dirichlet && pcbddc->NullSpace_corr[0] && !pcbddc->switch_static) PetscCall(PCBDDCNullSpaceAssembleCorrection(pc, PETSC_TRUE, pcbddc->NullSpace_corr[1]));
6223   if (neumann && pcbddc->NullSpace_corr[2]) PetscCall(PCBDDCNullSpaceAssembleCorrection(pc, PETSC_FALSE, pcbddc->NullSpace_corr[3]));
6224   /* check Dirichlet and Neumann solvers */
6225   if (pcbddc->dbg_flag) {
6226     if (dirichlet) { /* Dirichlet */
6227       PetscCall(VecSetRandom(pcis->vec1_D, NULL));
6228       PetscCall(MatMult(pcis->A_II, pcis->vec1_D, pcis->vec2_D));
6229       PetscCall(KSPSolve(pcbddc->ksp_D, pcis->vec2_D, pcis->vec2_D));
6230       PetscCall(KSPCheckSolve(pcbddc->ksp_D, pc, pcis->vec2_D));
6231       PetscCall(VecAXPY(pcis->vec1_D, m_one, pcis->vec2_D));
6232       PetscCall(VecNorm(pcis->vec1_D, NORM_INFINITY, &value));
6233       PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n", PetscGlobalRank, ((PetscObject)pcbddc->ksp_D)->prefix, (double)value));
6234       PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
6235     }
6236     if (neumann) { /* Neumann */
6237       PetscCall(VecSetRandom(pcbddc->vec1_R, NULL));
6238       PetscCall(MatMult(A_RR, pcbddc->vec1_R, pcbddc->vec2_R));
6239       PetscCall(KSPSolve(pcbddc->ksp_R, pcbddc->vec2_R, pcbddc->vec2_R));
6240       PetscCall(KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec2_R));
6241       PetscCall(VecAXPY(pcbddc->vec1_R, m_one, pcbddc->vec2_R));
6242       PetscCall(VecNorm(pcbddc->vec1_R, NORM_INFINITY, &value));
6243       PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n", PetscGlobalRank, ((PetscObject)pcbddc->ksp_R)->prefix, (double)value));
6244       PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
6245     }
6246   }
6247   /* free Neumann problem's matrix */
6248   PetscCall(MatDestroy(&A_RR));
6249   PetscFunctionReturn(PETSC_SUCCESS);
6250 }
6251 
PCBDDCSolveSubstructureCorrection(PC pc,Vec inout_B,Vec inout_D,PetscBool applytranspose)6252 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose)
6253 {
6254   PC_BDDC        *pcbddc       = (PC_BDDC *)pc->data;
6255   PCBDDCSubSchurs sub_schurs   = pcbddc->sub_schurs;
6256   PetscBool       reuse_solver = sub_schurs ? (sub_schurs->reuse_solver ? PETSC_TRUE : PETSC_FALSE) : PETSC_FALSE;
6257 
6258   PetscFunctionBegin;
6259   if (!reuse_solver) PetscCall(VecSet(pcbddc->vec1_R, 0.));
6260   if (!pcbddc->switch_static) {
6261     if (applytranspose && pcbddc->local_auxmat1) {
6262       PetscCall(MatMultTranspose(pcbddc->local_auxmat2, inout_B, pcbddc->vec1_C));
6263       PetscCall(MatMultTransposeAdd(pcbddc->local_auxmat1, pcbddc->vec1_C, inout_B, inout_B));
6264     }
6265     if (!reuse_solver) {
6266       PetscCall(VecScatterBegin(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE));
6267       PetscCall(VecScatterEnd(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE));
6268     } else {
6269       PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
6270 
6271       PetscCall(VecScatterBegin(reuse_solver->correction_scatter_B, inout_B, reuse_solver->rhs_B, INSERT_VALUES, SCATTER_FORWARD));
6272       PetscCall(VecScatterEnd(reuse_solver->correction_scatter_B, inout_B, reuse_solver->rhs_B, INSERT_VALUES, SCATTER_FORWARD));
6273     }
6274   } else {
6275     PetscCall(VecScatterBegin(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE));
6276     PetscCall(VecScatterEnd(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE));
6277     PetscCall(VecScatterBegin(pcbddc->R_to_D, inout_D, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE));
6278     PetscCall(VecScatterEnd(pcbddc->R_to_D, inout_D, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE));
6279     if (applytranspose && pcbddc->local_auxmat1) {
6280       PetscCall(MatMultTranspose(pcbddc->local_auxmat2, pcbddc->vec1_R, pcbddc->vec1_C));
6281       PetscCall(MatMultTransposeAdd(pcbddc->local_auxmat1, pcbddc->vec1_C, inout_B, inout_B));
6282       PetscCall(VecScatterBegin(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE));
6283       PetscCall(VecScatterEnd(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE));
6284     }
6285   }
6286   PetscCall(PetscLogEventBegin(PC_BDDC_Solves[pcbddc->current_level][1], pc, 0, 0, 0));
6287   if (!reuse_solver || pcbddc->switch_static) {
6288     if (applytranspose) {
6289       PetscCall(KSPSolveTranspose(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec1_R));
6290     } else {
6291       PetscCall(KSPSolve(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec1_R));
6292     }
6293     PetscCall(KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec1_R));
6294   } else {
6295     PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
6296 
6297     if (applytranspose) {
6298       PetscCall(MatFactorSolveSchurComplementTranspose(reuse_solver->F, reuse_solver->rhs_B, reuse_solver->sol_B));
6299     } else {
6300       PetscCall(MatFactorSolveSchurComplement(reuse_solver->F, reuse_solver->rhs_B, reuse_solver->sol_B));
6301     }
6302   }
6303   PetscCall(PetscLogEventEnd(PC_BDDC_Solves[pcbddc->current_level][1], pc, 0, 0, 0));
6304   PetscCall(VecSet(inout_B, 0.));
6305   if (!pcbddc->switch_static) {
6306     if (!reuse_solver) {
6307       PetscCall(VecScatterBegin(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD));
6308       PetscCall(VecScatterEnd(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD));
6309     } else {
6310       PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
6311 
6312       PetscCall(VecScatterBegin(reuse_solver->correction_scatter_B, reuse_solver->sol_B, inout_B, INSERT_VALUES, SCATTER_REVERSE));
6313       PetscCall(VecScatterEnd(reuse_solver->correction_scatter_B, reuse_solver->sol_B, inout_B, INSERT_VALUES, SCATTER_REVERSE));
6314     }
6315     if (!applytranspose && pcbddc->local_auxmat1) {
6316       PetscCall(MatMult(pcbddc->local_auxmat1, inout_B, pcbddc->vec1_C));
6317       PetscCall(MatMultAdd(pcbddc->local_auxmat2, pcbddc->vec1_C, inout_B, inout_B));
6318     }
6319   } else {
6320     PetscCall(VecScatterBegin(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD));
6321     PetscCall(VecScatterEnd(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD));
6322     PetscCall(VecScatterBegin(pcbddc->R_to_D, pcbddc->vec1_R, inout_D, INSERT_VALUES, SCATTER_FORWARD));
6323     PetscCall(VecScatterEnd(pcbddc->R_to_D, pcbddc->vec1_R, inout_D, INSERT_VALUES, SCATTER_FORWARD));
6324     if (!applytranspose && pcbddc->local_auxmat1) {
6325       PetscCall(MatMult(pcbddc->local_auxmat1, inout_B, pcbddc->vec1_C));
6326       PetscCall(MatMultAdd(pcbddc->local_auxmat2, pcbddc->vec1_C, pcbddc->vec1_R, pcbddc->vec1_R));
6327     }
6328     PetscCall(VecScatterBegin(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD));
6329     PetscCall(VecScatterEnd(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD));
6330     PetscCall(VecScatterBegin(pcbddc->R_to_D, pcbddc->vec1_R, inout_D, INSERT_VALUES, SCATTER_FORWARD));
6331     PetscCall(VecScatterEnd(pcbddc->R_to_D, pcbddc->vec1_R, inout_D, INSERT_VALUES, SCATTER_FORWARD));
6332   }
6333   PetscFunctionReturn(PETSC_SUCCESS);
6334 }
6335 
6336 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */
PCBDDCApplyInterfacePreconditioner(PC pc,PetscBool applytranspose)6337 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose)
6338 {
6339   PC_BDDC          *pcbddc = (PC_BDDC *)pc->data;
6340   PC_IS            *pcis   = (PC_IS *)pc->data;
6341   const PetscScalar zero   = 0.0;
6342 
6343   PetscFunctionBegin;
6344   /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */
6345   if (!pcbddc->benign_apply_coarse_only) {
6346     if (applytranspose) {
6347       PetscCall(MatMultTranspose(pcbddc->coarse_phi_B, pcis->vec1_B, pcbddc->vec1_P));
6348       if (pcbddc->switch_static) PetscCall(MatMultTransposeAdd(pcbddc->coarse_phi_D, pcis->vec1_D, pcbddc->vec1_P, pcbddc->vec1_P));
6349     } else {
6350       PetscCall(MatMultTranspose(pcbddc->coarse_psi_B, pcis->vec1_B, pcbddc->vec1_P));
6351       if (pcbddc->switch_static) PetscCall(MatMultTransposeAdd(pcbddc->coarse_psi_D, pcis->vec1_D, pcbddc->vec1_P, pcbddc->vec1_P));
6352     }
6353   } else {
6354     PetscCall(VecSet(pcbddc->vec1_P, zero));
6355   }
6356 
6357   /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */
6358   if (pcbddc->benign_n) {
6359     PetscScalar *array;
6360     PetscInt     j;
6361 
6362     PetscCall(VecGetArray(pcbddc->vec1_P, &array));
6363     for (j = 0; j < pcbddc->benign_n; j++) array[pcbddc->local_primal_size - pcbddc->benign_n + j] += pcbddc->benign_p0[j];
6364     PetscCall(VecRestoreArray(pcbddc->vec1_P, &array));
6365   }
6366 
6367   /* start communications from local primal nodes to rhs of coarse solver */
6368   PetscCall(VecSet(pcbddc->coarse_vec, zero));
6369   PetscCall(PCBDDCScatterCoarseDataBegin(pc, ADD_VALUES, SCATTER_FORWARD));
6370   PetscCall(PCBDDCScatterCoarseDataEnd(pc, ADD_VALUES, SCATTER_FORWARD));
6371 
6372   /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */
6373   PetscCall(PetscLogEventBegin(PC_BDDC_Solves[pcbddc->current_level][2], pc, 0, 0, 0));
6374   if (pcbddc->coarse_ksp) {
6375     Mat          coarse_mat;
6376     Vec          rhs, sol;
6377     MatNullSpace nullsp;
6378     PetscBool    isbddc = PETSC_FALSE;
6379 
6380     if (pcbddc->benign_have_null) {
6381       PC coarse_pc;
6382 
6383       PetscCall(KSPGetPC(pcbddc->coarse_ksp, &coarse_pc));
6384       PetscCall(PetscObjectTypeCompare((PetscObject)coarse_pc, PCBDDC, &isbddc));
6385       /* we need to propagate to coarser levels the need for a possible benign correction */
6386       if (isbddc && pcbddc->benign_apply_coarse_only && !pcbddc->benign_skip_correction) {
6387         PC_BDDC *coarsepcbddc                  = (PC_BDDC *)coarse_pc->data;
6388         coarsepcbddc->benign_skip_correction   = PETSC_FALSE;
6389         coarsepcbddc->benign_apply_coarse_only = PETSC_TRUE;
6390       }
6391     }
6392     PetscCall(KSPGetRhs(pcbddc->coarse_ksp, &rhs));
6393     PetscCall(KSPGetSolution(pcbddc->coarse_ksp, &sol));
6394     PetscCall(KSPGetOperators(pcbddc->coarse_ksp, &coarse_mat, NULL));
6395     if (applytranspose) {
6396       PetscCheck(!pcbddc->benign_apply_coarse_only, PetscObjectComm((PetscObject)pcbddc->coarse_ksp), PETSC_ERR_SUP, "Not yet implemented");
6397       PetscCall(KSPSolveTranspose(pcbddc->coarse_ksp, rhs, sol));
6398       PetscCall(KSPCheckSolve(pcbddc->coarse_ksp, pc, sol));
6399       PetscCall(MatGetTransposeNullSpace(coarse_mat, &nullsp));
6400       if (nullsp) PetscCall(MatNullSpaceRemove(nullsp, sol));
6401     } else {
6402       PetscCall(MatGetNullSpace(coarse_mat, &nullsp));
6403       if (pcbddc->benign_apply_coarse_only && isbddc) { /* need just to apply the coarse preconditioner during presolve */
6404         PC coarse_pc;
6405 
6406         if (nullsp) PetscCall(MatNullSpaceRemove(nullsp, rhs));
6407         PetscCall(KSPGetPC(pcbddc->coarse_ksp, &coarse_pc));
6408         PetscCall(PCPreSolve(coarse_pc, pcbddc->coarse_ksp));
6409         PetscCall(PCBDDCBenignRemoveInterior(coarse_pc, rhs, sol));
6410         PetscCall(PCPostSolve(coarse_pc, pcbddc->coarse_ksp));
6411       } else {
6412         PetscCall(KSPSolve(pcbddc->coarse_ksp, rhs, sol));
6413         PetscCall(KSPCheckSolve(pcbddc->coarse_ksp, pc, sol));
6414         if (nullsp) PetscCall(MatNullSpaceRemove(nullsp, sol));
6415       }
6416     }
6417     /* we don't need the benign correction at coarser levels anymore */
6418     if (pcbddc->benign_have_null && isbddc) {
6419       PC       coarse_pc;
6420       PC_BDDC *coarsepcbddc;
6421 
6422       PetscCall(KSPGetPC(pcbddc->coarse_ksp, &coarse_pc));
6423       coarsepcbddc                           = (PC_BDDC *)coarse_pc->data;
6424       coarsepcbddc->benign_skip_correction   = PETSC_TRUE;
6425       coarsepcbddc->benign_apply_coarse_only = PETSC_FALSE;
6426     }
6427   }
6428   PetscCall(PetscLogEventEnd(PC_BDDC_Solves[pcbddc->current_level][2], pc, 0, 0, 0));
6429 
6430   /* Local solution on R nodes */
6431   if (!pcbddc->benign_apply_coarse_only) PetscCall(PCBDDCSolveSubstructureCorrection(pc, pcis->vec1_B, pcis->vec1_D, applytranspose));
6432   /* communications from coarse sol to local primal nodes */
6433   PetscCall(PCBDDCScatterCoarseDataBegin(pc, INSERT_VALUES, SCATTER_REVERSE));
6434   PetscCall(PCBDDCScatterCoarseDataEnd(pc, INSERT_VALUES, SCATTER_REVERSE));
6435 
6436   /* Sum contributions from the two levels */
6437   if (!pcbddc->benign_apply_coarse_only) {
6438     if (applytranspose) {
6439       PetscCall(MatMultAdd(pcbddc->coarse_psi_B, pcbddc->vec1_P, pcis->vec1_B, pcis->vec1_B));
6440       if (pcbddc->switch_static) PetscCall(MatMultAdd(pcbddc->coarse_psi_D, pcbddc->vec1_P, pcis->vec1_D, pcis->vec1_D));
6441     } else {
6442       PetscCall(MatMultAdd(pcbddc->coarse_phi_B, pcbddc->vec1_P, pcis->vec1_B, pcis->vec1_B));
6443       if (pcbddc->switch_static) PetscCall(MatMultAdd(pcbddc->coarse_phi_D, pcbddc->vec1_P, pcis->vec1_D, pcis->vec1_D));
6444     }
6445     /* store p0 */
6446     if (pcbddc->benign_n) {
6447       PetscScalar *array;
6448       PetscInt     j;
6449 
6450       PetscCall(VecGetArray(pcbddc->vec1_P, &array));
6451       for (j = 0; j < pcbddc->benign_n; j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size - pcbddc->benign_n + j];
6452       PetscCall(VecRestoreArray(pcbddc->vec1_P, &array));
6453     }
6454   } else { /* expand the coarse solution */
6455     if (applytranspose) {
6456       PetscCall(MatMult(pcbddc->coarse_psi_B, pcbddc->vec1_P, pcis->vec1_B));
6457     } else {
6458       PetscCall(MatMult(pcbddc->coarse_phi_B, pcbddc->vec1_P, pcis->vec1_B));
6459     }
6460   }
6461   PetscFunctionReturn(PETSC_SUCCESS);
6462 }
6463 
PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode,ScatterMode smode)6464 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc, InsertMode imode, ScatterMode smode)
6465 {
6466   PC_BDDC           *pcbddc = (PC_BDDC *)pc->data;
6467   Vec                from, to;
6468   const PetscScalar *array;
6469 
6470   PetscFunctionBegin;
6471   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
6472     from = pcbddc->coarse_vec;
6473     to   = pcbddc->vec1_P;
6474     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
6475       Vec tvec;
6476 
6477       PetscCall(KSPGetRhs(pcbddc->coarse_ksp, &tvec));
6478       PetscCall(VecResetArray(tvec));
6479       PetscCall(KSPGetSolution(pcbddc->coarse_ksp, &tvec));
6480       PetscCall(VecGetArrayRead(tvec, &array));
6481       PetscCall(VecPlaceArray(from, array));
6482       PetscCall(VecRestoreArrayRead(tvec, &array));
6483     }
6484   } else { /* from local to global -> put data in coarse right-hand side */
6485     from = pcbddc->vec1_P;
6486     to   = pcbddc->coarse_vec;
6487   }
6488   PetscCall(VecScatterBegin(pcbddc->coarse_loc_to_glob, from, to, imode, smode));
6489   PetscFunctionReturn(PETSC_SUCCESS);
6490 }
6491 
PCBDDCScatterCoarseDataEnd(PC pc,InsertMode imode,ScatterMode smode)6492 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode)
6493 {
6494   PC_BDDC           *pcbddc = (PC_BDDC *)pc->data;
6495   Vec                from, to;
6496   const PetscScalar *array;
6497 
6498   PetscFunctionBegin;
6499   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
6500     from = pcbddc->coarse_vec;
6501     to   = pcbddc->vec1_P;
6502   } else { /* from local to global -> put data in coarse right-hand side */
6503     from = pcbddc->vec1_P;
6504     to   = pcbddc->coarse_vec;
6505   }
6506   PetscCall(VecScatterEnd(pcbddc->coarse_loc_to_glob, from, to, imode, smode));
6507   if (smode == SCATTER_FORWARD) {
6508     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
6509       Vec tvec;
6510 
6511       PetscCall(KSPGetRhs(pcbddc->coarse_ksp, &tvec));
6512       PetscCall(VecGetArrayRead(to, &array));
6513       PetscCall(VecPlaceArray(tvec, array));
6514       PetscCall(VecRestoreArrayRead(to, &array));
6515     }
6516   } else {
6517     if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */
6518       PetscCall(VecResetArray(from));
6519     }
6520   }
6521   PetscFunctionReturn(PETSC_SUCCESS);
6522 }
6523 
PCBDDCConstraintsSetUp(PC pc)6524 PetscErrorCode PCBDDCConstraintsSetUp(PC pc)
6525 {
6526   PC_IS   *pcis   = (PC_IS *)pc->data;
6527   PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
6528   Mat_IS  *matis  = (Mat_IS *)pc->pmat->data;
6529   /* one and zero */
6530   PetscScalar one = 1.0, zero = 0.0;
6531   /* space to store constraints and their local indices */
6532   PetscScalar *constraints_data;
6533   PetscInt    *constraints_idxs, *constraints_idxs_B;
6534   PetscInt    *constraints_idxs_ptr, *constraints_data_ptr;
6535   PetscInt    *constraints_n;
6536   /* iterators */
6537   PetscInt i, j, k, total_counts, total_counts_cc, cum;
6538   /* BLAS integers */
6539   PetscBLASInt lwork, lierr;
6540   PetscBLASInt Blas_N, Blas_M, Blas_K, Blas_one = 1;
6541   PetscBLASInt Blas_LDA, Blas_LDB, Blas_LDC;
6542   /* reuse */
6543   PetscInt  olocal_primal_size, olocal_primal_size_cc;
6544   PetscInt *olocal_primal_ref_node, *olocal_primal_ref_mult;
6545   /* change of basis */
6546   PetscBool qr_needed;
6547   PetscBT   change_basis, qr_needed_idx;
6548   /* auxiliary stuff */
6549   PetscInt *nnz, *is_indices;
6550   PetscInt  ncc;
6551   /* some quantities */
6552   PetscInt  n_vertices, total_primal_vertices, valid_constraints;
6553   PetscInt  size_of_constraint, max_size_of_constraint = 0, max_constraints, temp_constraints;
6554   PetscReal tol; /* tolerance for retaining eigenmodes */
6555 
6556   PetscFunctionBegin;
6557   tol = PetscSqrtReal(PETSC_SMALL);
6558   /* Destroy Mat objects computed previously */
6559   PetscCall(MatDestroy(&pcbddc->ChangeOfBasisMatrix));
6560   PetscCall(MatDestroy(&pcbddc->ConstraintMatrix));
6561   PetscCall(MatDestroy(&pcbddc->switch_static_change));
6562   /* save info on constraints from previous setup (if any) */
6563   olocal_primal_size    = pcbddc->local_primal_size;
6564   olocal_primal_size_cc = pcbddc->local_primal_size_cc;
6565   PetscCall(PetscMalloc2(olocal_primal_size_cc, &olocal_primal_ref_node, olocal_primal_size_cc, &olocal_primal_ref_mult));
6566   PetscCall(PetscArraycpy(olocal_primal_ref_node, pcbddc->local_primal_ref_node, olocal_primal_size_cc));
6567   PetscCall(PetscArraycpy(olocal_primal_ref_mult, pcbddc->local_primal_ref_mult, olocal_primal_size_cc));
6568   PetscCall(PetscFree2(pcbddc->local_primal_ref_node, pcbddc->local_primal_ref_mult));
6569   PetscCall(PetscFree(pcbddc->primal_indices_local_idxs));
6570 
6571   if (!pcbddc->adaptive_selection) {
6572     IS           ISForVertices, *ISForFaces, *ISForEdges;
6573     MatNullSpace nearnullsp;
6574     const Vec   *nearnullvecs;
6575     Vec         *localnearnullsp;
6576     PetscScalar *array;
6577     PetscInt     n_ISForFaces, n_ISForEdges, nnsp_size, o_nf, o_ne;
6578     PetscBool    nnsp_has_cnst;
6579     /* LAPACK working arrays for SVD or POD */
6580     PetscBool    skip_lapack, boolforchange;
6581     PetscScalar *work;
6582     PetscReal   *singular_vals;
6583 #if defined(PETSC_USE_COMPLEX)
6584     PetscReal *rwork;
6585 #endif
6586     PetscScalar *temp_basis = NULL, *correlation_mat = NULL;
6587     PetscBLASInt dummy_int    = 1;
6588     PetscScalar  dummy_scalar = 1.;
6589     PetscBool    use_pod      = PetscDefined(MISSING_LAPACK_GESVD) || PetscDefined(HAVE_MKL_LIBS) ? PETSC_TRUE : PETSC_FALSE; /* MKL SVD with same input gives different results on different processes! */
6590 
6591     /* Get index sets for faces, edges and vertices from graph */
6592     PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, &n_ISForFaces, &ISForFaces, &n_ISForEdges, &ISForEdges, &ISForVertices));
6593     o_nf       = n_ISForFaces;
6594     o_ne       = n_ISForEdges;
6595     n_vertices = 0;
6596     if (ISForVertices) PetscCall(ISGetSize(ISForVertices, &n_vertices));
6597     /* print some info */
6598     if (pcbddc->dbg_flag && (!pcbddc->sub_schurs || pcbddc->sub_schurs_rebuild)) {
6599       if (!pcbddc->dbg_viewer) pcbddc->dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pc));
6600       PetscCall(PCBDDCGraphASCIIView(pcbddc->mat_graph, pcbddc->dbg_flag, pcbddc->dbg_viewer));
6601       PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer));
6602       PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "--------------------------------------------------------------\n"));
6603       PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate vertices (%d)\n", PetscGlobalRank, n_vertices, pcbddc->use_vertices));
6604       PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate edges    (%d)\n", PetscGlobalRank, n_ISForEdges, pcbddc->use_edges));
6605       PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate faces    (%d)\n", PetscGlobalRank, n_ISForFaces, pcbddc->use_faces));
6606       PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
6607       PetscCall(PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer));
6608     }
6609 
6610     if (!pcbddc->use_vertices) n_vertices = 0;
6611     if (!pcbddc->use_edges) n_ISForEdges = 0;
6612     if (!pcbddc->use_faces) n_ISForFaces = 0;
6613 
6614     /* check if near null space is attached to global mat */
6615     if (pcbddc->use_nnsp) PetscCall(MatGetNearNullSpace(pc->pmat, &nearnullsp));
6616     else nearnullsp = NULL;
6617 
6618     if (nearnullsp) {
6619       PetscCall(MatNullSpaceGetVecs(nearnullsp, &nnsp_has_cnst, &nnsp_size, &nearnullvecs));
6620       /* remove any stored info */
6621       PetscCall(MatNullSpaceDestroy(&pcbddc->onearnullspace));
6622       PetscCall(PetscFree(pcbddc->onearnullvecs_state));
6623       /* store information for BDDC solver reuse */
6624       PetscCall(PetscObjectReference((PetscObject)nearnullsp));
6625       pcbddc->onearnullspace = nearnullsp;
6626       PetscCall(PetscMalloc1(nnsp_size, &pcbddc->onearnullvecs_state));
6627       for (i = 0; i < nnsp_size; i++) PetscCall(PetscObjectStateGet((PetscObject)nearnullvecs[i], &pcbddc->onearnullvecs_state[i]));
6628     } else { /* if near null space is not provided BDDC uses constants by default */
6629       nnsp_size     = 0;
6630       nnsp_has_cnst = PETSC_TRUE;
6631     }
6632     /* get max number of constraints on a single cc */
6633     max_constraints = nnsp_size;
6634     if (nnsp_has_cnst) max_constraints++;
6635 
6636     /*
6637          Evaluate maximum storage size needed by the procedure
6638          - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]"
6639          - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]"
6640          There can be multiple constraints per connected component
6641                                                                                                                                                            */
6642     ncc = n_vertices + n_ISForFaces + n_ISForEdges;
6643     PetscCall(PetscMalloc3(ncc + 1, &constraints_idxs_ptr, ncc + 1, &constraints_data_ptr, ncc, &constraints_n));
6644 
6645     total_counts = n_ISForFaces + n_ISForEdges;
6646     total_counts *= max_constraints;
6647     total_counts += n_vertices;
6648     PetscCall(PetscBTCreate(total_counts, &change_basis));
6649 
6650     total_counts           = 0;
6651     max_size_of_constraint = 0;
6652     for (i = 0; i < n_ISForEdges + n_ISForFaces; i++) {
6653       IS used_is;
6654       if (i < n_ISForEdges) {
6655         used_is = ISForEdges[i];
6656       } else {
6657         used_is = ISForFaces[i - n_ISForEdges];
6658       }
6659       PetscCall(ISGetSize(used_is, &j));
6660       total_counts += j;
6661       max_size_of_constraint = PetscMax(j, max_size_of_constraint);
6662     }
6663     PetscCall(PetscMalloc3(total_counts * max_constraints + n_vertices, &constraints_data, total_counts + n_vertices, &constraints_idxs, total_counts + n_vertices, &constraints_idxs_B));
6664 
6665     /* get local part of global near null space vectors */
6666     PetscCall(PetscMalloc1(nnsp_size, &localnearnullsp));
6667     for (k = 0; k < nnsp_size; k++) {
6668       PetscCall(VecDuplicate(pcis->vec1_N, &localnearnullsp[k]));
6669       PetscCall(VecScatterBegin(matis->rctx, nearnullvecs[k], localnearnullsp[k], INSERT_VALUES, SCATTER_FORWARD));
6670       PetscCall(VecScatterEnd(matis->rctx, nearnullvecs[k], localnearnullsp[k], INSERT_VALUES, SCATTER_FORWARD));
6671     }
6672 
6673     /* whether or not to skip lapack calls */
6674     skip_lapack = PETSC_TRUE;
6675     if (n_ISForFaces + n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE;
6676 
6677     /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */
6678     if (!skip_lapack) {
6679       PetscScalar temp_work;
6680 
6681       if (use_pod) {
6682         /* Proper Orthogonal Decomposition (POD) using the snapshot method */
6683         PetscCall(PetscMalloc1(max_constraints * max_constraints, &correlation_mat));
6684         PetscCall(PetscMalloc1(max_constraints, &singular_vals));
6685         PetscCall(PetscMalloc1(max_size_of_constraint * max_constraints, &temp_basis));
6686 #if defined(PETSC_USE_COMPLEX)
6687         PetscCall(PetscMalloc1(3 * max_constraints, &rwork));
6688 #endif
6689         /* now we evaluate the optimal workspace using query with lwork=-1 */
6690         PetscCall(PetscBLASIntCast(max_constraints, &Blas_N));
6691         PetscCall(PetscBLASIntCast(max_constraints, &Blas_LDA));
6692         lwork = -1;
6693         PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
6694 #if !defined(PETSC_USE_COMPLEX)
6695         PetscCallBLAS("LAPACKsyev", LAPACKsyev_("V", "U", &Blas_N, correlation_mat, &Blas_LDA, singular_vals, &temp_work, &lwork, &lierr));
6696 #else
6697         PetscCallBLAS("LAPACKsyev", LAPACKsyev_("V", "U", &Blas_N, correlation_mat, &Blas_LDA, singular_vals, &temp_work, &lwork, rwork, &lierr));
6698 #endif
6699         PetscCall(PetscFPTrapPop());
6700         PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in query to SYEV Lapack routine %" PetscBLASInt_FMT, lierr);
6701       } else {
6702 #if !defined(PETSC_MISSING_LAPACK_GESVD)
6703         /* SVD */
6704         PetscInt max_n, min_n;
6705         max_n = max_size_of_constraint;
6706         min_n = max_constraints;
6707         if (max_size_of_constraint < max_constraints) {
6708           min_n = max_size_of_constraint;
6709           max_n = max_constraints;
6710         }
6711         PetscCall(PetscMalloc1(min_n, &singular_vals));
6712   #if defined(PETSC_USE_COMPLEX)
6713         PetscCall(PetscMalloc1(5 * min_n, &rwork));
6714   #endif
6715         /* now we evaluate the optimal workspace using query with lwork=-1 */
6716         lwork = -1;
6717         PetscCall(PetscBLASIntCast(max_n, &Blas_M));
6718         PetscCall(PetscBLASIntCast(min_n, &Blas_N));
6719         PetscCall(PetscBLASIntCast(max_n, &Blas_LDA));
6720         PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
6721   #if !defined(PETSC_USE_COMPLEX)
6722         PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("O", "N", &Blas_M, &Blas_N, &constraints_data[0], &Blas_LDA, singular_vals, &dummy_scalar, &dummy_int, &dummy_scalar, &dummy_int, &temp_work, &lwork, &lierr));
6723   #else
6724         PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("O", "N", &Blas_M, &Blas_N, &constraints_data[0], &Blas_LDA, singular_vals, &dummy_scalar, &dummy_int, &dummy_scalar, &dummy_int, &temp_work, &lwork, rwork, &lierr));
6725   #endif
6726         PetscCall(PetscFPTrapPop());
6727         PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in query to GESVD Lapack routine %" PetscBLASInt_FMT, lierr);
6728 #else
6729         SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "This should not happen");
6730 #endif /* on missing GESVD */
6731       }
6732       /* Allocate optimal workspace */
6733       PetscCall(PetscBLASIntCast((PetscInt)PetscRealPart(temp_work), &lwork));
6734       PetscCall(PetscMalloc1(lwork, &work));
6735     }
6736     /* Now we can loop on constraining sets */
6737     total_counts            = 0;
6738     constraints_idxs_ptr[0] = 0;
6739     constraints_data_ptr[0] = 0;
6740     /* vertices */
6741     if (n_vertices) {
6742       PetscCall(ISGetIndices(ISForVertices, (const PetscInt **)&is_indices));
6743       PetscCall(PetscArraycpy(constraints_idxs, is_indices, n_vertices));
6744       for (i = 0; i < n_vertices; i++) {
6745         constraints_n[total_counts]            = 1;
6746         constraints_data[total_counts]         = 1.0;
6747         constraints_idxs_ptr[total_counts + 1] = constraints_idxs_ptr[total_counts] + 1;
6748         constraints_data_ptr[total_counts + 1] = constraints_data_ptr[total_counts] + 1;
6749         total_counts++;
6750       }
6751       PetscCall(ISRestoreIndices(ISForVertices, (const PetscInt **)&is_indices));
6752     }
6753 
6754     /* edges and faces */
6755     total_counts_cc = total_counts;
6756     for (ncc = 0; ncc < n_ISForEdges + n_ISForFaces; ncc++) {
6757       IS        used_is;
6758       PetscBool idxs_copied = PETSC_FALSE;
6759 
6760       if (ncc < n_ISForEdges) {
6761         used_is       = ISForEdges[ncc];
6762         boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */
6763       } else {
6764         used_is       = ISForFaces[ncc - n_ISForEdges];
6765         boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */
6766       }
6767       temp_constraints = 0; /* zero the number of constraints I have on this conn comp */
6768 
6769       PetscCall(ISGetSize(used_is, &size_of_constraint));
6770       if (!size_of_constraint) continue;
6771       PetscCall(ISGetIndices(used_is, (const PetscInt **)&is_indices));
6772       if (nnsp_has_cnst) {
6773         PetscScalar quad_value;
6774 
6775         PetscCall(PetscArraycpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc], is_indices, size_of_constraint));
6776         idxs_copied = PETSC_TRUE;
6777 
6778         if (!pcbddc->use_nnsp_true) {
6779           quad_value = (PetscScalar)(1.0 / PetscSqrtReal((PetscReal)size_of_constraint));
6780         } else {
6781           quad_value = 1.0;
6782         }
6783         for (j = 0; j < size_of_constraint; j++) constraints_data[constraints_data_ptr[total_counts_cc] + j] = quad_value;
6784         temp_constraints++;
6785         total_counts++;
6786       }
6787       for (k = 0; k < nnsp_size; k++) {
6788         PetscReal    real_value;
6789         PetscScalar *ptr_to_data;
6790 
6791         PetscCall(VecGetArrayRead(localnearnullsp[k], (const PetscScalar **)&array));
6792         ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc] + temp_constraints * size_of_constraint];
6793         for (j = 0; j < size_of_constraint; j++) ptr_to_data[j] = array[is_indices[j]];
6794         PetscCall(VecRestoreArrayRead(localnearnullsp[k], (const PetscScalar **)&array));
6795         /* check if array is null on the connected component */
6796         PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_N));
6797         PetscCallBLAS("BLASasum", real_value = BLASasum_(&Blas_N, ptr_to_data, &Blas_one));
6798         if (real_value > tol * size_of_constraint) { /* keep indices and values */
6799           temp_constraints++;
6800           total_counts++;
6801           if (!idxs_copied) {
6802             PetscCall(PetscArraycpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc], is_indices, size_of_constraint));
6803             idxs_copied = PETSC_TRUE;
6804           }
6805         }
6806       }
6807       PetscCall(ISRestoreIndices(used_is, (const PetscInt **)&is_indices));
6808       valid_constraints = temp_constraints;
6809       if (!pcbddc->use_nnsp_true && temp_constraints) {
6810         if (temp_constraints == 1) { /* just normalize the constraint */
6811           PetscScalar norm, *ptr_to_data;
6812 
6813           ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]];
6814           PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_N));
6815           PetscCallBLAS("BLASdot", norm = BLASdot_(&Blas_N, ptr_to_data, &Blas_one, ptr_to_data, &Blas_one));
6816           norm = 1.0 / PetscSqrtReal(PetscRealPart(norm));
6817           PetscCallBLAS("BLASscal", BLASscal_(&Blas_N, &norm, ptr_to_data, &Blas_one));
6818         } else { /* perform SVD */
6819           PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]];
6820 
6821           if (use_pod) {
6822             /* SVD: Y = U*S*V^H                -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag
6823                POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2)
6824                -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined
6825                   the constraints basis will differ (by a complex factor with absolute value equal to 1)
6826                   from that computed using LAPACKgesvd
6827                -> This is due to a different computation of eigenvectors in LAPACKheev
6828                -> The quality of the POD-computed basis will be the same */
6829             PetscCall(PetscArrayzero(correlation_mat, temp_constraints * temp_constraints));
6830             /* Store upper triangular part of correlation matrix */
6831             PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_N));
6832             PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
6833             for (j = 0; j < temp_constraints; j++) {
6834               for (k = 0; k < j + 1; k++) PetscCallBLAS("BLASdot", correlation_mat[j * temp_constraints + k] = BLASdot_(&Blas_N, ptr_to_data + k * size_of_constraint, &Blas_one, ptr_to_data + j * size_of_constraint, &Blas_one));
6835             }
6836             /* compute eigenvalues and eigenvectors of correlation matrix */
6837             PetscCall(PetscBLASIntCast(temp_constraints, &Blas_N));
6838             PetscCall(PetscBLASIntCast(temp_constraints, &Blas_LDA));
6839 #if !defined(PETSC_USE_COMPLEX)
6840             PetscCallBLAS("LAPACKsyev", LAPACKsyev_("V", "U", &Blas_N, correlation_mat, &Blas_LDA, singular_vals, work, &lwork, &lierr));
6841 #else
6842             PetscCallBLAS("LAPACKsyev", LAPACKsyev_("V", "U", &Blas_N, correlation_mat, &Blas_LDA, singular_vals, work, &lwork, rwork, &lierr));
6843 #endif
6844             PetscCall(PetscFPTrapPop());
6845             PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYEV Lapack routine %" PetscBLASInt_FMT, lierr);
6846             /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */
6847             j = 0;
6848             while (j < temp_constraints && singular_vals[j] / singular_vals[temp_constraints - 1] < tol) j++;
6849             total_counts      = total_counts - j;
6850             valid_constraints = temp_constraints - j;
6851             /* scale and copy POD basis into used quadrature memory */
6852             PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_M));
6853             PetscCall(PetscBLASIntCast(temp_constraints, &Blas_N));
6854             PetscCall(PetscBLASIntCast(temp_constraints, &Blas_K));
6855             PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA));
6856             PetscCall(PetscBLASIntCast(temp_constraints, &Blas_LDB));
6857             PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDC));
6858             if (j < temp_constraints) {
6859               PetscInt ii;
6860               for (k = j; k < temp_constraints; k++) singular_vals[k] = 1.0 / PetscSqrtReal(singular_vals[k]);
6861               PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
6862               PetscCallBLAS("BLASgemm", BLASgemm_("N", "N", &Blas_M, &Blas_N, &Blas_K, &one, ptr_to_data, &Blas_LDA, correlation_mat, &Blas_LDB, &zero, temp_basis, &Blas_LDC));
6863               PetscCall(PetscFPTrapPop());
6864               for (k = 0; k < temp_constraints - j; k++) {
6865                 for (ii = 0; ii < size_of_constraint; ii++) ptr_to_data[k * size_of_constraint + ii] = singular_vals[temp_constraints - 1 - k] * temp_basis[(temp_constraints - 1 - k) * size_of_constraint + ii];
6866               }
6867             }
6868           } else {
6869 #if !defined(PETSC_MISSING_LAPACK_GESVD)
6870             PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_M));
6871             PetscCall(PetscBLASIntCast(temp_constraints, &Blas_N));
6872             PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA));
6873             PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
6874   #if !defined(PETSC_USE_COMPLEX)
6875             PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("O", "N", &Blas_M, &Blas_N, ptr_to_data, &Blas_LDA, singular_vals, &dummy_scalar, &dummy_int, &dummy_scalar, &dummy_int, work, &lwork, &lierr));
6876   #else
6877             PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("O", "N", &Blas_M, &Blas_N, ptr_to_data, &Blas_LDA, singular_vals, &dummy_scalar, &dummy_int, &dummy_scalar, &dummy_int, work, &lwork, rwork, &lierr));
6878   #endif
6879             PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in GESVD Lapack routine %" PetscBLASInt_FMT, lierr);
6880             PetscCall(PetscFPTrapPop());
6881             /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */
6882             k = temp_constraints;
6883             if (k > size_of_constraint) k = size_of_constraint;
6884             j = 0;
6885             while (j < k && singular_vals[k - j - 1] / singular_vals[0] < tol) j++;
6886             valid_constraints = k - j;
6887             total_counts      = total_counts - temp_constraints + valid_constraints;
6888 #else
6889             SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "This should not happen");
6890 #endif /* on missing GESVD */
6891           }
6892         }
6893       }
6894       /* update pointers information */
6895       if (valid_constraints) {
6896         constraints_n[total_counts_cc]            = valid_constraints;
6897         constraints_idxs_ptr[total_counts_cc + 1] = constraints_idxs_ptr[total_counts_cc] + size_of_constraint;
6898         constraints_data_ptr[total_counts_cc + 1] = constraints_data_ptr[total_counts_cc] + size_of_constraint * valid_constraints;
6899         /* set change_of_basis flag */
6900         if (boolforchange) PetscCall(PetscBTSet(change_basis, total_counts_cc));
6901         total_counts_cc++;
6902       }
6903     }
6904     /* free workspace */
6905     if (!skip_lapack) {
6906       PetscCall(PetscFree(work));
6907 #if defined(PETSC_USE_COMPLEX)
6908       PetscCall(PetscFree(rwork));
6909 #endif
6910       PetscCall(PetscFree(singular_vals));
6911       PetscCall(PetscFree(correlation_mat));
6912       PetscCall(PetscFree(temp_basis));
6913     }
6914     for (k = 0; k < nnsp_size; k++) PetscCall(VecDestroy(&localnearnullsp[k]));
6915     PetscCall(PetscFree(localnearnullsp));
6916     /* free index sets of faces, edges and vertices */
6917     PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, &o_nf, &ISForFaces, &o_ne, &ISForEdges, &ISForVertices));
6918   } else {
6919     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
6920 
6921     total_counts = 0;
6922     n_vertices   = 0;
6923     if (sub_schurs->is_vertices && pcbddc->use_vertices) PetscCall(ISGetLocalSize(sub_schurs->is_vertices, &n_vertices));
6924     max_constraints = 0;
6925     total_counts_cc = 0;
6926     for (i = 0; i < sub_schurs->n_subs + n_vertices; i++) {
6927       total_counts += pcbddc->adaptive_constraints_n[i];
6928       if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++;
6929       max_constraints = PetscMax(max_constraints, pcbddc->adaptive_constraints_n[i]);
6930     }
6931     constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr;
6932     constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr;
6933     constraints_idxs     = pcbddc->adaptive_constraints_idxs;
6934     constraints_data     = pcbddc->adaptive_constraints_data;
6935     /* constraints_n differs from pcbddc->adaptive_constraints_n */
6936     PetscCall(PetscMalloc1(total_counts_cc, &constraints_n));
6937     total_counts_cc = 0;
6938     for (i = 0; i < sub_schurs->n_subs + n_vertices; i++) {
6939       if (pcbddc->adaptive_constraints_n[i]) constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i];
6940     }
6941 
6942     max_size_of_constraint = 0;
6943     for (i = 0; i < total_counts_cc; i++) max_size_of_constraint = PetscMax(max_size_of_constraint, constraints_idxs_ptr[i + 1] - constraints_idxs_ptr[i]);
6944     PetscCall(PetscMalloc1(constraints_idxs_ptr[total_counts_cc], &constraints_idxs_B));
6945     /* Change of basis */
6946     PetscCall(PetscBTCreate(total_counts_cc, &change_basis));
6947     if (pcbddc->use_change_of_basis) {
6948       for (i = 0; i < sub_schurs->n_subs; i++) {
6949         if (PetscBTLookup(sub_schurs->is_edge, i) || pcbddc->use_change_on_faces) PetscCall(PetscBTSet(change_basis, i + n_vertices));
6950       }
6951     }
6952   }
6953   pcbddc->local_primal_size = total_counts;
6954   PetscCall(PetscMalloc1(pcbddc->local_primal_size + pcbddc->benign_n, &pcbddc->primal_indices_local_idxs));
6955 
6956   /* map constraints_idxs in boundary numbering */
6957   if (pcbddc->use_change_of_basis) {
6958     PetscCall(ISGlobalToLocalMappingApply(pcis->BtoNmap, IS_GTOLM_DROP, constraints_idxs_ptr[total_counts_cc], constraints_idxs, &i, constraints_idxs_B));
6959     PetscCheck(i == constraints_idxs_ptr[total_counts_cc], PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error in boundary numbering for constraints indices %" PetscInt_FMT " != %" PetscInt_FMT, constraints_idxs_ptr[total_counts_cc], i);
6960   }
6961 
6962   /* Create constraint matrix */
6963   PetscCall(MatCreate(PETSC_COMM_SELF, &pcbddc->ConstraintMatrix));
6964   PetscCall(MatSetType(pcbddc->ConstraintMatrix, MATAIJ));
6965   PetscCall(MatSetSizes(pcbddc->ConstraintMatrix, pcbddc->local_primal_size, pcis->n, pcbddc->local_primal_size, pcis->n));
6966 
6967   /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */
6968   /* determine if a QR strategy is needed for change of basis */
6969   qr_needed = pcbddc->use_qr_single;
6970   PetscCall(PetscBTCreate(total_counts_cc, &qr_needed_idx));
6971   total_primal_vertices        = 0;
6972   pcbddc->local_primal_size_cc = 0;
6973   for (i = 0; i < total_counts_cc; i++) {
6974     size_of_constraint = constraints_idxs_ptr[i + 1] - constraints_idxs_ptr[i];
6975     if (size_of_constraint == 1 && pcbddc->mat_graph->custom_minimal_size) {
6976       pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]];
6977       pcbddc->local_primal_size_cc += 1;
6978     } else if (PetscBTLookup(change_basis, i)) {
6979       for (k = 0; k < constraints_n[i]; k++) pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i] + k];
6980       pcbddc->local_primal_size_cc += constraints_n[i];
6981       if (constraints_n[i] > 1 || pcbddc->use_qr_single) {
6982         PetscCall(PetscBTSet(qr_needed_idx, i));
6983         qr_needed = PETSC_TRUE;
6984       }
6985     } else {
6986       pcbddc->local_primal_size_cc += 1;
6987     }
6988   }
6989   /* note that the local variable n_vertices used below stores the number of pointwise constraints */
6990   pcbddc->n_vertices = total_primal_vertices;
6991   /* permute indices in order to have a sorted set of vertices */
6992   PetscCall(PetscSortInt(total_primal_vertices, pcbddc->primal_indices_local_idxs));
6993   PetscCall(PetscMalloc2(pcbddc->local_primal_size_cc + pcbddc->benign_n, &pcbddc->local_primal_ref_node, pcbddc->local_primal_size_cc + pcbddc->benign_n, &pcbddc->local_primal_ref_mult));
6994   PetscCall(PetscArraycpy(pcbddc->local_primal_ref_node, pcbddc->primal_indices_local_idxs, total_primal_vertices));
6995   for (i = 0; i < total_primal_vertices; i++) pcbddc->local_primal_ref_mult[i] = 1;
6996 
6997   /* nonzero structure of constraint matrix */
6998   /* and get reference dof for local constraints */
6999   PetscCall(PetscMalloc1(pcbddc->local_primal_size, &nnz));
7000   for (i = 0; i < total_primal_vertices; i++) nnz[i] = 1;
7001 
7002   j            = total_primal_vertices;
7003   total_counts = total_primal_vertices;
7004   cum          = total_primal_vertices;
7005   for (i = n_vertices; i < total_counts_cc; i++) {
7006     if (!PetscBTLookup(change_basis, i)) {
7007       pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]];
7008       pcbddc->local_primal_ref_mult[cum] = constraints_n[i];
7009       cum++;
7010       size_of_constraint = constraints_idxs_ptr[i + 1] - constraints_idxs_ptr[i];
7011       for (k = 0; k < constraints_n[i]; k++) {
7012         pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i] + k];
7013         nnz[j + k]                                        = size_of_constraint;
7014       }
7015       j += constraints_n[i];
7016     }
7017   }
7018   PetscCall(MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix, 0, nnz));
7019   PetscCall(MatSetOption(pcbddc->ConstraintMatrix, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE));
7020   PetscCall(MatSetOption(pcbddc->ConstraintMatrix, MAT_IGNORE_ZERO_ENTRIES, PETSC_TRUE));
7021   PetscCall(PetscFree(nnz));
7022 
7023   /* set values in constraint matrix */
7024   for (i = 0; i < total_primal_vertices; i++) PetscCall(MatSetValue(pcbddc->ConstraintMatrix, i, pcbddc->local_primal_ref_node[i], 1.0, INSERT_VALUES));
7025   total_counts = total_primal_vertices;
7026   for (i = n_vertices; i < total_counts_cc; i++) {
7027     if (!PetscBTLookup(change_basis, i)) {
7028       PetscInt *cols;
7029 
7030       size_of_constraint = constraints_idxs_ptr[i + 1] - constraints_idxs_ptr[i];
7031       cols               = constraints_idxs + constraints_idxs_ptr[i];
7032       for (k = 0; k < constraints_n[i]; k++) {
7033         PetscInt     row = total_counts + k;
7034         PetscScalar *vals;
7035 
7036         vals = constraints_data + constraints_data_ptr[i] + k * size_of_constraint;
7037         PetscCall(MatSetValues(pcbddc->ConstraintMatrix, 1, &row, size_of_constraint, cols, vals, INSERT_VALUES));
7038       }
7039       total_counts += constraints_n[i];
7040     }
7041   }
7042   /* assembling */
7043   PetscCall(MatAssemblyBegin(pcbddc->ConstraintMatrix, MAT_FINAL_ASSEMBLY));
7044   PetscCall(MatAssemblyEnd(pcbddc->ConstraintMatrix, MAT_FINAL_ASSEMBLY));
7045   PetscCall(MatViewFromOptions(pcbddc->ConstraintMatrix, (PetscObject)pc, "-pc_bddc_constraint_mat_view"));
7046 
7047   /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */
7048   if (pcbddc->use_change_of_basis) {
7049     /* dual and primal dofs on a single cc */
7050     PetscInt dual_dofs, primal_dofs;
7051     /* working stuff for GEQRF */
7052     PetscScalar *qr_basis = NULL, *qr_tau = NULL, *qr_work = NULL, lqr_work_t;
7053     PetscBLASInt lqr_work;
7054     /* working stuff for UNGQR */
7055     PetscScalar *gqr_work = NULL, lgqr_work_t = 0.0;
7056     PetscBLASInt lgqr_work;
7057     /* working stuff for TRTRS */
7058     PetscScalar *trs_rhs = NULL;
7059     PetscBLASInt Blas_NRHS;
7060     /* pointers for values insertion into change of basis matrix */
7061     PetscInt    *start_rows, *start_cols;
7062     PetscScalar *start_vals;
7063     /* working stuff for values insertion */
7064     PetscBT   is_primal;
7065     PetscInt *aux_primal_numbering_B;
7066     /* matrix sizes */
7067     PetscInt global_size, local_size;
7068     /* temporary change of basis */
7069     Mat localChangeOfBasisMatrix;
7070     /* extra space for debugging */
7071     PetscScalar *dbg_work = NULL;
7072 
7073     PetscCall(MatCreate(PETSC_COMM_SELF, &localChangeOfBasisMatrix));
7074     PetscCall(MatSetType(localChangeOfBasisMatrix, MATAIJ));
7075     PetscCall(MatSetSizes(localChangeOfBasisMatrix, pcis->n, pcis->n, pcis->n, pcis->n));
7076     /* nonzeros for local mat */
7077     PetscCall(PetscMalloc1(pcis->n, &nnz));
7078     if (!pcbddc->benign_change || pcbddc->fake_change) {
7079       for (i = 0; i < pcis->n; i++) nnz[i] = 1;
7080     } else {
7081       const PetscInt *ii;
7082       PetscInt        n;
7083       PetscBool       flg_row;
7084       PetscCall(MatGetRowIJ(pcbddc->benign_change, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, NULL, &flg_row));
7085       for (i = 0; i < n; i++) nnz[i] = ii[i + 1] - ii[i];
7086       PetscCall(MatRestoreRowIJ(pcbddc->benign_change, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, NULL, &flg_row));
7087     }
7088     for (i = n_vertices; i < total_counts_cc; i++) {
7089       if (PetscBTLookup(change_basis, i)) {
7090         size_of_constraint = constraints_idxs_ptr[i + 1] - constraints_idxs_ptr[i];
7091         if (PetscBTLookup(qr_needed_idx, i)) {
7092           for (j = 0; j < size_of_constraint; j++) nnz[constraints_idxs[constraints_idxs_ptr[i] + j]] = size_of_constraint;
7093         } else {
7094           nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint;
7095           for (j = 1; j < size_of_constraint; j++) nnz[constraints_idxs[constraints_idxs_ptr[i] + j]] = 2;
7096         }
7097       }
7098     }
7099     PetscCall(MatSeqAIJSetPreallocation(localChangeOfBasisMatrix, 0, nnz));
7100     PetscCall(MatSetOption(localChangeOfBasisMatrix, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE));
7101     PetscCall(PetscFree(nnz));
7102     /* Set interior change in the matrix */
7103     if (!pcbddc->benign_change || pcbddc->fake_change) {
7104       for (i = 0; i < pcis->n; i++) PetscCall(MatSetValue(localChangeOfBasisMatrix, i, i, 1.0, INSERT_VALUES));
7105     } else {
7106       const PetscInt *ii, *jj;
7107       PetscScalar    *aa;
7108       PetscInt        n;
7109       PetscBool       flg_row;
7110       PetscCall(MatGetRowIJ(pcbddc->benign_change, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, &jj, &flg_row));
7111       PetscCall(MatSeqAIJGetArray(pcbddc->benign_change, &aa));
7112       for (i = 0; i < n; i++) PetscCall(MatSetValues(localChangeOfBasisMatrix, 1, &i, ii[i + 1] - ii[i], jj + ii[i], aa + ii[i], INSERT_VALUES));
7113       PetscCall(MatSeqAIJRestoreArray(pcbddc->benign_change, &aa));
7114       PetscCall(MatRestoreRowIJ(pcbddc->benign_change, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, &jj, &flg_row));
7115     }
7116 
7117     if (pcbddc->dbg_flag) {
7118       PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "--------------------------------------------------------------\n"));
7119       PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Checking change of basis computation for subdomain %04d\n", PetscGlobalRank));
7120     }
7121 
7122     /* Now we loop on the constraints which need a change of basis */
7123     /*
7124        Change of basis matrix is evaluated similarly to the FIRST APPROACH in
7125        Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1)
7126 
7127        Basic blocks of change of basis matrix T computed:
7128 
7129           - By using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified)
7130 
7131             | 1        0   ...        0         s_1/S |
7132             | 0        1   ...        0         s_2/S |
7133             |              ...                        |
7134             | 0        ...            1     s_{n-1}/S |
7135             | -s_1/s_n ...    -s_{n-1}/s_n      s_n/S |
7136 
7137             with S = \sum_{i=1}^n s_i^2
7138             NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering
7139                   in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering
7140 
7141           - QR decomposition of constraints otherwise
7142     */
7143     if (qr_needed && max_size_of_constraint) {
7144       /* space to store Q */
7145       PetscCall(PetscMalloc1(max_size_of_constraint * max_size_of_constraint, &qr_basis));
7146       /* array to store scaling factors for reflectors */
7147       PetscCall(PetscMalloc1(max_constraints, &qr_tau));
7148       /* first we issue queries for optimal work */
7149       PetscCall(PetscBLASIntCast(max_size_of_constraint, &Blas_M));
7150       PetscCall(PetscBLASIntCast(max_constraints, &Blas_N));
7151       PetscCall(PetscBLASIntCast(max_size_of_constraint, &Blas_LDA));
7152       lqr_work = -1;
7153       PetscCallBLAS("LAPACKgeqrf", LAPACKgeqrf_(&Blas_M, &Blas_N, qr_basis, &Blas_LDA, qr_tau, &lqr_work_t, &lqr_work, &lierr));
7154       PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in query to GEQRF Lapack routine %" PetscBLASInt_FMT, lierr);
7155       PetscCall(PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t), &lqr_work));
7156       PetscCall(PetscMalloc1(lqr_work, &qr_work));
7157       lgqr_work = -1;
7158       PetscCall(PetscBLASIntCast(max_size_of_constraint, &Blas_M));
7159       PetscCall(PetscBLASIntCast(max_size_of_constraint, &Blas_N));
7160       PetscCall(PetscBLASIntCast(max_constraints, &Blas_K));
7161       PetscCall(PetscBLASIntCast(max_size_of_constraint, &Blas_LDA));
7162       if (Blas_K > Blas_M) Blas_K = Blas_M; /* adjust just for computing optimal work */
7163       PetscCallBLAS("LAPACKorgqr", LAPACKorgqr_(&Blas_M, &Blas_N, &Blas_K, qr_basis, &Blas_LDA, qr_tau, &lgqr_work_t, &lgqr_work, &lierr));
7164       PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in query to ORGQR/UNGQR Lapack routine %" PetscBLASInt_FMT, lierr);
7165       PetscCall(PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t), &lgqr_work));
7166       PetscCall(PetscMalloc1(lgqr_work, &gqr_work));
7167       /* array to store rhs and solution of triangular solver */
7168       PetscCall(PetscMalloc1(max_constraints * max_constraints, &trs_rhs));
7169       /* allocating workspace for check */
7170       if (pcbddc->dbg_flag) PetscCall(PetscMalloc1(max_size_of_constraint * (max_constraints + max_size_of_constraint), &dbg_work));
7171     }
7172     /* array to store whether a node is primal or not */
7173     PetscCall(PetscBTCreate(pcis->n_B, &is_primal));
7174     PetscCall(PetscMalloc1(total_primal_vertices, &aux_primal_numbering_B));
7175     PetscCall(ISGlobalToLocalMappingApply(pcis->BtoNmap, IS_GTOLM_DROP, total_primal_vertices, pcbddc->local_primal_ref_node, &i, aux_primal_numbering_B));
7176     PetscCheck(i == total_primal_vertices, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error in boundary numbering for BDDC vertices! %" PetscInt_FMT " != %" PetscInt_FMT, total_primal_vertices, i);
7177     for (i = 0; i < total_primal_vertices; i++) PetscCall(PetscBTSet(is_primal, aux_primal_numbering_B[i]));
7178     PetscCall(PetscFree(aux_primal_numbering_B));
7179 
7180     /* loop on constraints and see whether or not they need a change of basis and compute it */
7181     for (total_counts = n_vertices; total_counts < total_counts_cc; total_counts++) {
7182       size_of_constraint = constraints_idxs_ptr[total_counts + 1] - constraints_idxs_ptr[total_counts];
7183       if (PetscBTLookup(change_basis, total_counts)) {
7184         /* get constraint info */
7185         primal_dofs = constraints_n[total_counts];
7186         dual_dofs   = size_of_constraint - primal_dofs;
7187 
7188         if (pcbddc->dbg_flag) PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Constraints %" PetscInt_FMT ": %" PetscInt_FMT " need a change of basis (size %" PetscInt_FMT ")\n", total_counts, primal_dofs, size_of_constraint));
7189 
7190         if (PetscBTLookup(qr_needed_idx, total_counts)) { /* QR */
7191 
7192           /* copy quadrature constraints for change of basis check */
7193           if (pcbddc->dbg_flag) PetscCall(PetscArraycpy(dbg_work, &constraints_data[constraints_data_ptr[total_counts]], size_of_constraint * primal_dofs));
7194           /* copy temporary constraints into larger work vector (in order to store all columns of Q) */
7195           PetscCall(PetscArraycpy(qr_basis, &constraints_data[constraints_data_ptr[total_counts]], size_of_constraint * primal_dofs));
7196 
7197           /* compute QR decomposition of constraints */
7198           PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_M));
7199           PetscCall(PetscBLASIntCast(primal_dofs, &Blas_N));
7200           PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA));
7201           PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
7202           PetscCallBLAS("LAPACKgeqrf", LAPACKgeqrf_(&Blas_M, &Blas_N, qr_basis, &Blas_LDA, qr_tau, qr_work, &lqr_work, &lierr));
7203           PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in GEQRF Lapack routine %" PetscBLASInt_FMT, lierr);
7204           PetscCall(PetscFPTrapPop());
7205 
7206           /* explicitly compute R^-T */
7207           PetscCall(PetscArrayzero(trs_rhs, primal_dofs * primal_dofs));
7208           for (j = 0; j < primal_dofs; j++) trs_rhs[j * (primal_dofs + 1)] = 1.0;
7209           PetscCall(PetscBLASIntCast(primal_dofs, &Blas_N));
7210           PetscCall(PetscBLASIntCast(primal_dofs, &Blas_NRHS));
7211           PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA));
7212           PetscCall(PetscBLASIntCast(primal_dofs, &Blas_LDB));
7213           PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
7214           PetscCallBLAS("LAPACKtrtrs", LAPACKtrtrs_("U", "T", "N", &Blas_N, &Blas_NRHS, qr_basis, &Blas_LDA, trs_rhs, &Blas_LDB, &lierr));
7215           PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in TRTRS Lapack routine %" PetscBLASInt_FMT, lierr);
7216           PetscCall(PetscFPTrapPop());
7217 
7218           /* explicitly compute all columns of Q (Q = [Q1 | Q2]) overwriting QR factorization in qr_basis */
7219           PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_M));
7220           PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_N));
7221           PetscCall(PetscBLASIntCast(primal_dofs, &Blas_K));
7222           PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA));
7223           PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
7224           PetscCallBLAS("LAPACKorgqr", LAPACKorgqr_(&Blas_M, &Blas_N, &Blas_K, qr_basis, &Blas_LDA, qr_tau, gqr_work, &lgqr_work, &lierr));
7225           PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in ORGQR/UNGQR Lapack routine %" PetscBLASInt_FMT, lierr);
7226           PetscCall(PetscFPTrapPop());
7227 
7228           /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints
7229              i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below)
7230              where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */
7231           PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_M));
7232           PetscCall(PetscBLASIntCast(primal_dofs, &Blas_N));
7233           PetscCall(PetscBLASIntCast(primal_dofs, &Blas_K));
7234           PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA));
7235           PetscCall(PetscBLASIntCast(primal_dofs, &Blas_LDB));
7236           PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDC));
7237           PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
7238           PetscCallBLAS("BLASgemm", BLASgemm_("N", "N", &Blas_M, &Blas_N, &Blas_K, &one, qr_basis, &Blas_LDA, trs_rhs, &Blas_LDB, &zero, constraints_data + constraints_data_ptr[total_counts], &Blas_LDC));
7239           PetscCall(PetscFPTrapPop());
7240           PetscCall(PetscArraycpy(qr_basis, &constraints_data[constraints_data_ptr[total_counts]], size_of_constraint * primal_dofs));
7241 
7242           /* insert values in change of basis matrix respecting global ordering of new primal dofs */
7243           start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]];
7244           /* insert cols for primal dofs */
7245           for (j = 0; j < primal_dofs; j++) {
7246             start_vals = &qr_basis[j * size_of_constraint];
7247             start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts] + j];
7248             PetscCall(MatSetValues(localChangeOfBasisMatrix, size_of_constraint, start_rows, 1, start_cols, start_vals, INSERT_VALUES));
7249           }
7250           /* insert cols for dual dofs */
7251           for (j = 0, k = 0; j < dual_dofs; k++) {
7252             if (!PetscBTLookup(is_primal, constraints_idxs_B[constraints_idxs_ptr[total_counts] + k])) {
7253               start_vals = &qr_basis[(primal_dofs + j) * size_of_constraint];
7254               start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts] + k];
7255               PetscCall(MatSetValues(localChangeOfBasisMatrix, size_of_constraint, start_rows, 1, start_cols, start_vals, INSERT_VALUES));
7256               j++;
7257             }
7258           }
7259 
7260           /* check change of basis */
7261           if (pcbddc->dbg_flag) {
7262             PetscInt  ii, jj;
7263             PetscBool valid_qr = PETSC_TRUE;
7264             PetscCall(PetscBLASIntCast(primal_dofs, &Blas_M));
7265             PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_N));
7266             PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_K));
7267             PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA));
7268             PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDB));
7269             PetscCall(PetscBLASIntCast(primal_dofs, &Blas_LDC));
7270             PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
7271             PetscCallBLAS("BLASgemm", BLASgemm_("T", "N", &Blas_M, &Blas_N, &Blas_K, &one, dbg_work, &Blas_LDA, qr_basis, &Blas_LDB, &zero, &dbg_work[size_of_constraint * primal_dofs], &Blas_LDC));
7272             PetscCall(PetscFPTrapPop());
7273             for (jj = 0; jj < size_of_constraint; jj++) {
7274               for (ii = 0; ii < primal_dofs; ii++) {
7275                 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii]) > 1.e-12) valid_qr = PETSC_FALSE;
7276                 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii] - (PetscReal)1) > 1.e-12) valid_qr = PETSC_FALSE;
7277               }
7278             }
7279             if (!valid_qr) {
7280               PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "\t-> wrong change of basis!\n"));
7281               for (jj = 0; jj < size_of_constraint; jj++) {
7282                 for (ii = 0; ii < primal_dofs; ii++) {
7283                   if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii]) > 1.e-12) {
7284                     PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "\tQr basis function %" PetscInt_FMT " is not orthogonal to constraint %" PetscInt_FMT " (%1.14e)!\n", jj, ii, (double)PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii])));
7285                   }
7286                   if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii] - (PetscReal)1) > 1.e-12) {
7287                     PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "\tQr basis function %" PetscInt_FMT " is not unitary w.r.t constraint %" PetscInt_FMT " (%1.14e)!\n", jj, ii, (double)PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii])));
7288                   }
7289                 }
7290               }
7291             } else {
7292               PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "\t-> right change of basis!\n"));
7293             }
7294           }
7295         } else { /* simple transformation block */
7296           PetscInt    row, col;
7297           PetscScalar val, norm;
7298 
7299           PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_N));
7300           PetscCallBLAS("BLASdot", norm = BLASdot_(&Blas_N, constraints_data + constraints_data_ptr[total_counts], &Blas_one, constraints_data + constraints_data_ptr[total_counts], &Blas_one));
7301           for (j = 0; j < size_of_constraint; j++) {
7302             PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts] + j];
7303             row            = constraints_idxs[constraints_idxs_ptr[total_counts] + j];
7304             if (!PetscBTLookup(is_primal, row_B)) {
7305               col = constraints_idxs[constraints_idxs_ptr[total_counts]];
7306               PetscCall(MatSetValue(localChangeOfBasisMatrix, row, row, 1.0, INSERT_VALUES));
7307               PetscCall(MatSetValue(localChangeOfBasisMatrix, row, col, constraints_data[constraints_data_ptr[total_counts] + j] / norm, INSERT_VALUES));
7308             } else {
7309               for (k = 0; k < size_of_constraint; k++) {
7310                 col = constraints_idxs[constraints_idxs_ptr[total_counts] + k];
7311                 if (row != col) {
7312                   val = -constraints_data[constraints_data_ptr[total_counts] + k] / constraints_data[constraints_data_ptr[total_counts]];
7313                 } else {
7314                   val = constraints_data[constraints_data_ptr[total_counts]] / norm;
7315                 }
7316                 PetscCall(MatSetValue(localChangeOfBasisMatrix, row, col, val, INSERT_VALUES));
7317               }
7318             }
7319           }
7320           if (pcbddc->dbg_flag) PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "\t-> using standard change of basis\n"));
7321         }
7322       } else {
7323         if (pcbddc->dbg_flag) PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Constraint %" PetscInt_FMT " does not need a change of basis (size %" PetscInt_FMT ")\n", total_counts, size_of_constraint));
7324       }
7325     }
7326 
7327     /* free workspace */
7328     if (qr_needed) {
7329       if (pcbddc->dbg_flag) PetscCall(PetscFree(dbg_work));
7330       PetscCall(PetscFree(trs_rhs));
7331       PetscCall(PetscFree(qr_tau));
7332       PetscCall(PetscFree(qr_work));
7333       PetscCall(PetscFree(gqr_work));
7334       PetscCall(PetscFree(qr_basis));
7335     }
7336     PetscCall(PetscBTDestroy(&is_primal));
7337     PetscCall(MatAssemblyBegin(localChangeOfBasisMatrix, MAT_FINAL_ASSEMBLY));
7338     PetscCall(MatAssemblyEnd(localChangeOfBasisMatrix, MAT_FINAL_ASSEMBLY));
7339 
7340     /* assembling of global change of variable */
7341     if (!pcbddc->fake_change) {
7342       Mat tmat;
7343 
7344       PetscCall(VecGetSize(pcis->vec1_global, &global_size));
7345       PetscCall(VecGetLocalSize(pcis->vec1_global, &local_size));
7346       PetscCall(MatDuplicate(pc->pmat, MAT_DO_NOT_COPY_VALUES, &tmat));
7347       PetscCall(MatISSetLocalMat(tmat, localChangeOfBasisMatrix));
7348       PetscCall(MatAssemblyBegin(tmat, MAT_FINAL_ASSEMBLY));
7349       PetscCall(MatAssemblyEnd(tmat, MAT_FINAL_ASSEMBLY));
7350       PetscCall(MatConvert(tmat, MATAIJ, MAT_INITIAL_MATRIX, &pcbddc->ChangeOfBasisMatrix));
7351       PetscCall(MatDestroy(&tmat));
7352       PetscCall(VecSet(pcis->vec1_global, 0.0));
7353       PetscCall(VecSet(pcis->vec1_N, 1.0));
7354       PetscCall(VecScatterBegin(matis->rctx, pcis->vec1_N, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE));
7355       PetscCall(VecScatterEnd(matis->rctx, pcis->vec1_N, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE));
7356       PetscCall(VecReciprocal(pcis->vec1_global));
7357       PetscCall(MatDiagonalScale(pcbddc->ChangeOfBasisMatrix, pcis->vec1_global, NULL));
7358 
7359       /* check */
7360       if (pcbddc->dbg_flag) {
7361         PetscReal error;
7362         Vec       x, x_change;
7363 
7364         PetscCall(VecDuplicate(pcis->vec1_global, &x));
7365         PetscCall(VecDuplicate(pcis->vec1_global, &x_change));
7366         PetscCall(VecSetRandom(x, NULL));
7367         PetscCall(VecCopy(x, pcis->vec1_global));
7368         PetscCall(VecScatterBegin(matis->rctx, x, pcis->vec1_N, INSERT_VALUES, SCATTER_FORWARD));
7369         PetscCall(VecScatterEnd(matis->rctx, x, pcis->vec1_N, INSERT_VALUES, SCATTER_FORWARD));
7370         PetscCall(MatMult(localChangeOfBasisMatrix, pcis->vec1_N, pcis->vec2_N));
7371         PetscCall(VecScatterBegin(matis->rctx, pcis->vec2_N, x, INSERT_VALUES, SCATTER_REVERSE));
7372         PetscCall(VecScatterEnd(matis->rctx, pcis->vec2_N, x, INSERT_VALUES, SCATTER_REVERSE));
7373         PetscCall(MatMult(pcbddc->ChangeOfBasisMatrix, pcis->vec1_global, x_change));
7374         PetscCall(VecAXPY(x, -1.0, x_change));
7375         PetscCall(VecNorm(x, NORM_INFINITY, &error));
7376         PetscCheck(error <= PETSC_SMALL, PetscObjectComm((PetscObject)pc), PETSC_ERR_PLIB, "Error global vs local change on N: %1.6e", (double)error);
7377         PetscCall(VecDestroy(&x));
7378         PetscCall(VecDestroy(&x_change));
7379       }
7380       /* adapt sub_schurs computed (if any) */
7381       if (pcbddc->use_deluxe_scaling) {
7382         PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
7383 
7384         PetscCheck(!pcbddc->use_change_of_basis || !pcbddc->adaptive_userdefined, PetscObjectComm((PetscObject)pc), PETSC_ERR_SUP, "Cannot mix automatic change of basis, adaptive selection and user-defined constraints");
7385         if (sub_schurs && sub_schurs->S_Ej_all) {
7386           Mat S_new, tmat;
7387           IS  is_all_N, is_V_Sall = NULL;
7388 
7389           PetscCall(ISLocalToGlobalMappingApplyIS(pcis->BtoNmap, sub_schurs->is_Ej_all, &is_all_N));
7390           PetscCall(MatCreateSubMatrix(localChangeOfBasisMatrix, is_all_N, is_all_N, MAT_INITIAL_MATRIX, &tmat));
7391           if (pcbddc->deluxe_zerorows) {
7392             ISLocalToGlobalMapping NtoSall;
7393             IS                     is_V;
7394             PetscCall(ISCreateGeneral(PETSC_COMM_SELF, pcbddc->n_vertices, pcbddc->local_primal_ref_node, PETSC_COPY_VALUES, &is_V));
7395             PetscCall(ISLocalToGlobalMappingCreateIS(is_all_N, &NtoSall));
7396             PetscCall(ISGlobalToLocalMappingApplyIS(NtoSall, IS_GTOLM_DROP, is_V, &is_V_Sall));
7397             PetscCall(ISLocalToGlobalMappingDestroy(&NtoSall));
7398             PetscCall(ISDestroy(&is_V));
7399           }
7400           PetscCall(ISDestroy(&is_all_N));
7401           PetscCall(MatPtAP(sub_schurs->S_Ej_all, tmat, MAT_INITIAL_MATRIX, 1.0, &S_new));
7402           PetscCall(MatDestroy(&sub_schurs->S_Ej_all));
7403           PetscCall(PetscObjectReference((PetscObject)S_new));
7404           if (pcbddc->deluxe_zerorows) {
7405             const PetscScalar *array;
7406             const PetscInt    *idxs_V, *idxs_all;
7407             PetscInt           i, n_V;
7408 
7409             PetscCall(MatZeroRowsColumnsIS(S_new, is_V_Sall, 1., NULL, NULL));
7410             PetscCall(ISGetLocalSize(is_V_Sall, &n_V));
7411             PetscCall(ISGetIndices(is_V_Sall, &idxs_V));
7412             PetscCall(ISGetIndices(sub_schurs->is_Ej_all, &idxs_all));
7413             PetscCall(VecGetArrayRead(pcis->D, &array));
7414             for (i = 0; i < n_V; i++) {
7415               PetscScalar val;
7416               PetscInt    idx;
7417 
7418               idx = idxs_V[i];
7419               val = array[idxs_all[idxs_V[i]]];
7420               PetscCall(MatSetValue(S_new, idx, idx, val, INSERT_VALUES));
7421             }
7422             PetscCall(MatAssemblyBegin(S_new, MAT_FINAL_ASSEMBLY));
7423             PetscCall(MatAssemblyEnd(S_new, MAT_FINAL_ASSEMBLY));
7424             PetscCall(VecRestoreArrayRead(pcis->D, &array));
7425             PetscCall(ISRestoreIndices(sub_schurs->is_Ej_all, &idxs_all));
7426             PetscCall(ISRestoreIndices(is_V_Sall, &idxs_V));
7427           }
7428           sub_schurs->S_Ej_all = S_new;
7429           PetscCall(MatDestroy(&S_new));
7430           if (sub_schurs->sum_S_Ej_all) {
7431             PetscCall(MatPtAP(sub_schurs->sum_S_Ej_all, tmat, MAT_INITIAL_MATRIX, 1.0, &S_new));
7432             PetscCall(MatDestroy(&sub_schurs->sum_S_Ej_all));
7433             PetscCall(PetscObjectReference((PetscObject)S_new));
7434             if (pcbddc->deluxe_zerorows) PetscCall(MatZeroRowsColumnsIS(S_new, is_V_Sall, 1., NULL, NULL));
7435             sub_schurs->sum_S_Ej_all = S_new;
7436             PetscCall(MatDestroy(&S_new));
7437           }
7438           PetscCall(ISDestroy(&is_V_Sall));
7439           PetscCall(MatDestroy(&tmat));
7440         }
7441         /* destroy any change of basis context in sub_schurs */
7442         if (sub_schurs && sub_schurs->change) {
7443           PetscInt i;
7444 
7445           for (i = 0; i < sub_schurs->n_subs; i++) PetscCall(KSPDestroy(&sub_schurs->change[i]));
7446           PetscCall(PetscFree(sub_schurs->change));
7447         }
7448       }
7449       if (pcbddc->switch_static) { /* need to save the local change */
7450         pcbddc->switch_static_change = localChangeOfBasisMatrix;
7451       } else {
7452         PetscCall(MatDestroy(&localChangeOfBasisMatrix));
7453       }
7454       /* determine if any process has changed the pressures locally */
7455       pcbddc->change_interior = pcbddc->benign_have_null;
7456     } else { /* fake change (get back change of basis into ConstraintMatrix and info on qr) */
7457       PetscCall(MatDestroy(&pcbddc->ConstraintMatrix));
7458       pcbddc->ConstraintMatrix = localChangeOfBasisMatrix;
7459       pcbddc->use_qr_single    = qr_needed;
7460     }
7461   } else if (pcbddc->user_ChangeOfBasisMatrix || pcbddc->benign_saddle_point) {
7462     if (!pcbddc->benign_have_null && pcbddc->user_ChangeOfBasisMatrix) {
7463       PetscCall(PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix));
7464       pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix;
7465     } else {
7466       Mat benign_global = NULL;
7467       if (pcbddc->benign_have_null) {
7468         Mat M;
7469 
7470         pcbddc->change_interior = PETSC_TRUE;
7471         PetscCall(VecCopy(matis->counter, pcis->vec1_N));
7472         PetscCall(VecReciprocal(pcis->vec1_N));
7473         PetscCall(MatDuplicate(pc->pmat, MAT_DO_NOT_COPY_VALUES, &benign_global));
7474         if (pcbddc->benign_change) {
7475           PetscCall(MatDuplicate(pcbddc->benign_change, MAT_COPY_VALUES, &M));
7476           PetscCall(MatDiagonalScale(M, pcis->vec1_N, NULL));
7477         } else {
7478           PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, pcis->n, pcis->n, 1, NULL, &M));
7479           PetscCall(MatDiagonalSet(M, pcis->vec1_N, INSERT_VALUES));
7480         }
7481         PetscCall(MatISSetLocalMat(benign_global, M));
7482         PetscCall(MatDestroy(&M));
7483         PetscCall(MatAssemblyBegin(benign_global, MAT_FINAL_ASSEMBLY));
7484         PetscCall(MatAssemblyEnd(benign_global, MAT_FINAL_ASSEMBLY));
7485       }
7486       if (pcbddc->user_ChangeOfBasisMatrix) {
7487         PetscCall(MatMatMult(pcbddc->user_ChangeOfBasisMatrix, benign_global, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &pcbddc->ChangeOfBasisMatrix));
7488         PetscCall(MatDestroy(&benign_global));
7489       } else if (pcbddc->benign_have_null) {
7490         pcbddc->ChangeOfBasisMatrix = benign_global;
7491       }
7492     }
7493     if (pcbddc->switch_static && pcbddc->ChangeOfBasisMatrix) { /* need to save the local change */
7494       IS              is_global;
7495       const PetscInt *gidxs;
7496 
7497       PetscCall(ISLocalToGlobalMappingGetIndices(matis->rmapping, &gidxs));
7498       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), pcis->n, gidxs, PETSC_COPY_VALUES, &is_global));
7499       PetscCall(ISLocalToGlobalMappingRestoreIndices(matis->rmapping, &gidxs));
7500       PetscCall(MatCreateSubMatrixUnsorted(pcbddc->ChangeOfBasisMatrix, is_global, is_global, &pcbddc->switch_static_change));
7501       PetscCall(ISDestroy(&is_global));
7502     }
7503   }
7504   if (!pcbddc->fake_change && pcbddc->ChangeOfBasisMatrix && !pcbddc->work_change) PetscCall(VecDuplicate(pcis->vec1_global, &pcbddc->work_change));
7505 
7506   if (!pcbddc->fake_change) {
7507     /* add pressure dofs to set of primal nodes for numbering purposes */
7508     for (i = 0; i < pcbddc->benign_n; i++) {
7509       pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc]  = pcbddc->benign_p0_lidx[i];
7510       pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i];
7511       pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc]  = 1;
7512       pcbddc->local_primal_size_cc++;
7513       pcbddc->local_primal_size++;
7514     }
7515 
7516     /* check if a new primal space has been introduced (also take into account benign trick) */
7517     pcbddc->new_primal_space_local = PETSC_TRUE;
7518     if (olocal_primal_size == pcbddc->local_primal_size) {
7519       PetscCall(PetscArraycmp(pcbddc->local_primal_ref_node, olocal_primal_ref_node, olocal_primal_size_cc, &pcbddc->new_primal_space_local));
7520       pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
7521       if (!pcbddc->new_primal_space_local) {
7522         PetscCall(PetscArraycmp(pcbddc->local_primal_ref_mult, olocal_primal_ref_mult, olocal_primal_size_cc, &pcbddc->new_primal_space_local));
7523         pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
7524       }
7525     }
7526     /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */
7527     PetscCallMPI(MPIU_Allreduce(&pcbddc->new_primal_space_local, &pcbddc->new_primal_space, 1, MPI_C_BOOL, MPI_LOR, PetscObjectComm((PetscObject)pc)));
7528   }
7529   PetscCall(PetscFree2(olocal_primal_ref_node, olocal_primal_ref_mult));
7530 
7531   /* flush dbg viewer */
7532   if (pcbddc->dbg_flag) PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
7533 
7534   /* free workspace */
7535   PetscCall(PetscBTDestroy(&qr_needed_idx));
7536   PetscCall(PetscBTDestroy(&change_basis));
7537   if (!pcbddc->adaptive_selection) {
7538     PetscCall(PetscFree3(constraints_idxs_ptr, constraints_data_ptr, constraints_n));
7539     PetscCall(PetscFree3(constraints_data, constraints_idxs, constraints_idxs_B));
7540   } else {
7541     PetscCall(PetscFree5(pcbddc->adaptive_constraints_n, pcbddc->adaptive_constraints_idxs_ptr, pcbddc->adaptive_constraints_data_ptr, pcbddc->adaptive_constraints_idxs, pcbddc->adaptive_constraints_data));
7542     PetscCall(PetscFree(constraints_n));
7543     PetscCall(PetscFree(constraints_idxs_B));
7544   }
7545   PetscFunctionReturn(PETSC_SUCCESS);
7546 }
7547 
PCBDDCAnalyzeInterface(PC pc)7548 PetscErrorCode PCBDDCAnalyzeInterface(PC pc)
7549 {
7550   ISLocalToGlobalMapping map;
7551   PC_BDDC               *pcbddc = (PC_BDDC *)pc->data;
7552   Mat_IS                *matis  = (Mat_IS *)pc->pmat->data;
7553   PetscInt               i, N;
7554   PetscBool              rcsr = PETSC_FALSE;
7555 
7556   PetscFunctionBegin;
7557   if (pcbddc->recompute_topography) {
7558     pcbddc->graphanalyzed = PETSC_FALSE;
7559     /* Reset previously computed graph */
7560     PetscCall(PCBDDCGraphReset(pcbddc->mat_graph));
7561     /* Init local Graph struct */
7562     PetscCall(MatGetSize(pc->pmat, &N, NULL));
7563     PetscCall(MatISGetLocalToGlobalMapping(pc->pmat, &map, NULL));
7564     PetscCall(PCBDDCGraphInit(pcbddc->mat_graph, map, N, pcbddc->graphmaxcount));
7565 
7566     if (pcbddc->user_primal_vertices_local && !pcbddc->user_primal_vertices) PetscCall(PCBDDCConsistencyCheckIS(pc, MPI_LOR, &pcbddc->user_primal_vertices_local));
7567     /* Check validity of the csr graph passed in by the user */
7568     PetscCheck(!pcbddc->mat_graph->nvtxs_csr || pcbddc->mat_graph->nvtxs_csr == pcbddc->mat_graph->nvtxs, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid size of local CSR graph! Found %" PetscInt_FMT ", expected %" PetscInt_FMT, pcbddc->mat_graph->nvtxs_csr,
7569                pcbddc->mat_graph->nvtxs);
7570 
7571     /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */
7572     if (!pcbddc->mat_graph->xadj && pcbddc->use_local_adj) {
7573       PetscInt *xadj, *adjncy;
7574       PetscInt  nvtxs;
7575       PetscBool flg_row;
7576       Mat       A;
7577 
7578       PetscCall(PetscObjectReference((PetscObject)matis->A));
7579       A = matis->A;
7580       for (PetscInt i = 0; i < pcbddc->local_adj_square; i++) {
7581         Mat AtA;
7582 
7583         PetscCall(MatProductCreate(A, A, NULL, &AtA));
7584         PetscCall(MatSetOptionsPrefix(AtA, "pc_bddc_graph_"));
7585         PetscCall(MatProductSetType(AtA, MATPRODUCT_AtB));
7586         PetscCall(MatProductSetFromOptions(AtA));
7587         PetscCall(MatProductSymbolic(AtA));
7588         PetscCall(MatProductClear(AtA));
7589         /* we only need the sparsity, cheat and tell PETSc the matrix has been assembled */
7590         AtA->assembled = PETSC_TRUE;
7591         PetscCall(MatDestroy(&A));
7592         A = AtA;
7593       }
7594       PetscCall(MatGetRowIJ(A, 0, PETSC_TRUE, PETSC_FALSE, &nvtxs, (const PetscInt **)&xadj, (const PetscInt **)&adjncy, &flg_row));
7595       if (flg_row) {
7596         PetscCall(PCBDDCSetLocalAdjacencyGraph(pc, nvtxs, xadj, adjncy, PETSC_COPY_VALUES));
7597         pcbddc->computed_rowadj = PETSC_TRUE;
7598         PetscCall(MatRestoreRowIJ(A, 0, PETSC_TRUE, PETSC_FALSE, &nvtxs, (const PetscInt **)&xadj, (const PetscInt **)&adjncy, &flg_row));
7599         rcsr = PETSC_TRUE;
7600       }
7601       PetscCall(MatDestroy(&A));
7602     }
7603     if (pcbddc->dbg_flag) PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
7604 
7605     if (pcbddc->mat_graph->cdim && !pcbddc->mat_graph->cloc) {
7606       PetscReal   *lcoords;
7607       PetscInt     n;
7608       MPI_Datatype dimrealtype;
7609       PetscMPIInt  cdimi;
7610 
7611       /* TODO: support for blocked */
7612       PetscCheck(pcbddc->mat_graph->cnloc == pc->pmat->rmap->n, PETSC_COMM_SELF, PETSC_ERR_USER, "Invalid number of local coordinates! Got %" PetscInt_FMT ", expected %" PetscInt_FMT, pcbddc->mat_graph->cnloc, pc->pmat->rmap->n);
7613       PetscCall(MatGetLocalSize(matis->A, &n, NULL));
7614       PetscCall(PetscMalloc1(pcbddc->mat_graph->cdim * n, &lcoords));
7615       PetscCall(PetscMPIIntCast(pcbddc->mat_graph->cdim, &cdimi));
7616       PetscCallMPI(MPI_Type_contiguous(cdimi, MPIU_REAL, &dimrealtype));
7617       PetscCallMPI(MPI_Type_commit(&dimrealtype));
7618       PetscCall(PetscSFBcastBegin(matis->sf, dimrealtype, pcbddc->mat_graph->coords, lcoords, MPI_REPLACE));
7619       PetscCall(PetscSFBcastEnd(matis->sf, dimrealtype, pcbddc->mat_graph->coords, lcoords, MPI_REPLACE));
7620       PetscCallMPI(MPI_Type_free(&dimrealtype));
7621       PetscCall(PetscFree(pcbddc->mat_graph->coords));
7622 
7623       pcbddc->mat_graph->coords = lcoords;
7624       pcbddc->mat_graph->cloc   = PETSC_TRUE;
7625       pcbddc->mat_graph->cnloc  = n;
7626     }
7627     PetscCheck(!pcbddc->mat_graph->cnloc || pcbddc->mat_graph->cnloc == pcbddc->mat_graph->nvtxs, PETSC_COMM_SELF, PETSC_ERR_USER, "Invalid number of local subdomain coordinates! Got %" PetscInt_FMT ", expected %" PetscInt_FMT, pcbddc->mat_graph->cnloc,
7628                pcbddc->mat_graph->nvtxs);
7629     pcbddc->mat_graph->active_coords = (PetscBool)(pcbddc->corner_selection && pcbddc->mat_graph->cdim && !pcbddc->corner_selected);
7630 
7631     /* attach info on disconnected subdomains if present */
7632     if (pcbddc->n_local_subs) {
7633       PetscInt *local_subs, n, totn;
7634 
7635       PetscCall(MatGetLocalSize(matis->A, &n, NULL));
7636       PetscCall(PetscMalloc1(n, &local_subs));
7637       for (i = 0; i < n; i++) local_subs[i] = pcbddc->n_local_subs;
7638       for (i = 0; i < pcbddc->n_local_subs; i++) {
7639         const PetscInt *idxs;
7640         PetscInt        nl, j;
7641 
7642         PetscCall(ISGetLocalSize(pcbddc->local_subs[i], &nl));
7643         PetscCall(ISGetIndices(pcbddc->local_subs[i], &idxs));
7644         for (j = 0; j < nl; j++) local_subs[idxs[j]] = i;
7645         PetscCall(ISRestoreIndices(pcbddc->local_subs[i], &idxs));
7646       }
7647       for (i = 0, totn = 0; i < n; i++) totn = PetscMax(totn, local_subs[i]);
7648       pcbddc->mat_graph->n_local_subs = totn + 1;
7649       pcbddc->mat_graph->local_subs   = local_subs;
7650     }
7651 
7652     /* Setup of Graph */
7653     PetscCall(PCBDDCGraphSetUp(pcbddc->mat_graph, pcbddc->vertex_size, pcbddc->NeumannBoundariesLocal, pcbddc->DirichletBoundariesLocal, pcbddc->n_ISForDofsLocal, pcbddc->ISForDofsLocal, pcbddc->user_primal_vertices_local));
7654   }
7655 
7656   if (!pcbddc->graphanalyzed) {
7657     /* Graph's connected components analysis */
7658     PetscCall(PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph));
7659     pcbddc->graphanalyzed   = PETSC_TRUE;
7660     pcbddc->corner_selected = pcbddc->corner_selection;
7661   }
7662   if (rcsr) pcbddc->mat_graph->nvtxs_csr = 0;
7663   PetscFunctionReturn(PETSC_SUCCESS);
7664 }
7665 
PCBDDCOrthonormalizeVecs(PetscInt * nio,Vec vecs[])7666 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt *nio, Vec vecs[])
7667 {
7668   PetscInt     i, j, n;
7669   PetscScalar *alphas;
7670   PetscReal    norm, *onorms;
7671 
7672   PetscFunctionBegin;
7673   n = *nio;
7674   if (!n) PetscFunctionReturn(PETSC_SUCCESS);
7675   PetscCall(PetscMalloc2(n, &alphas, n, &onorms));
7676   PetscCall(VecNormalize(vecs[0], &norm));
7677   if (norm < PETSC_SMALL) {
7678     onorms[0] = 0.0;
7679     PetscCall(VecSet(vecs[0], 0.0));
7680   } else {
7681     onorms[0] = norm;
7682   }
7683 
7684   for (i = 1; i < n; i++) {
7685     PetscCall(VecMDot(vecs[i], i, vecs, alphas));
7686     for (j = 0; j < i; j++) alphas[j] = PetscConj(-alphas[j]);
7687     PetscCall(VecMAXPY(vecs[i], i, alphas, vecs));
7688     PetscCall(VecNormalize(vecs[i], &norm));
7689     if (norm < PETSC_SMALL) {
7690       onorms[i] = 0.0;
7691       PetscCall(VecSet(vecs[i], 0.0));
7692     } else {
7693       onorms[i] = norm;
7694     }
7695   }
7696   /* push nonzero vectors at the beginning */
7697   for (i = 0; i < n; i++) {
7698     if (onorms[i] == 0.0) {
7699       for (j = i + 1; j < n; j++) {
7700         if (onorms[j] != 0.0) {
7701           PetscCall(VecCopy(vecs[j], vecs[i]));
7702           onorms[i] = onorms[j];
7703           onorms[j] = 0.0;
7704           break;
7705         }
7706       }
7707     }
7708   }
7709   for (i = 0, *nio = 0; i < n; i++) *nio += onorms[i] != 0.0 ? 1 : 0;
7710   PetscCall(PetscFree2(alphas, onorms));
7711   PetscFunctionReturn(PETSC_SUCCESS);
7712 }
7713 
PCBDDCMatISGetSubassemblingPattern(Mat mat,PetscInt * n_subdomains,PetscInt redprocs,IS * is_sends,PetscBool * have_void)7714 static PetscErrorCode PCBDDCMatISGetSubassemblingPattern(Mat mat, PetscInt *n_subdomains, PetscInt redprocs, IS *is_sends, PetscBool *have_void)
7715 {
7716   ISLocalToGlobalMapping mapping;
7717   Mat                    A;
7718   PetscInt               n_neighs, *neighs, *n_shared, **shared;
7719   PetscMPIInt            size, rank, color;
7720   PetscInt              *xadj, *adjncy;
7721   PetscInt              *adjncy_wgt, *v_wgt, *ranks_send_to_idx;
7722   PetscInt               im_active, active_procs, N, n, i, j, threshold = 2;
7723   PetscInt               void_procs, *procs_candidates = NULL;
7724   PetscInt               xadj_count, *count;
7725   PetscBool              ismatis, use_vwgt = PETSC_FALSE;
7726   PetscSubcomm           psubcomm;
7727   MPI_Comm               subcomm;
7728 
7729   PetscFunctionBegin;
7730   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
7731   PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATIS, &ismatis));
7732   PetscCheck(ismatis, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Cannot use %s on a matrix object which is not of type MATIS", PETSC_FUNCTION_NAME);
7733   PetscValidLogicalCollectiveInt(mat, *n_subdomains, 2);
7734   PetscValidLogicalCollectiveInt(mat, redprocs, 3);
7735   PetscCheck(*n_subdomains > 0, PetscObjectComm((PetscObject)mat), PETSC_ERR_ARG_WRONG, "Invalid number of subdomains requested %" PetscInt_FMT, *n_subdomains);
7736 
7737   if (have_void) *have_void = PETSC_FALSE;
7738   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)mat), &size));
7739   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
7740   PetscCall(MatISGetLocalMat(mat, &A));
7741   PetscCall(MatGetLocalSize(A, &n, NULL));
7742   im_active = !!n;
7743   PetscCallMPI(MPIU_Allreduce(&im_active, &active_procs, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)mat)));
7744   void_procs = size - active_procs;
7745   /* get ranks of non-active processes in mat communicator */
7746   if (void_procs) {
7747     PetscInt ncand;
7748 
7749     if (have_void) *have_void = PETSC_TRUE;
7750     PetscCall(PetscMalloc1(size, &procs_candidates));
7751     PetscCallMPI(MPI_Allgather(&im_active, 1, MPIU_INT, procs_candidates, 1, MPIU_INT, PetscObjectComm((PetscObject)mat)));
7752     for (i = 0, ncand = 0; i < size; i++) {
7753       if (!procs_candidates[i]) procs_candidates[ncand++] = i;
7754     }
7755     /* force n_subdomains to be not greater that the number of non-active processes */
7756     *n_subdomains = PetscMin(void_procs, *n_subdomains);
7757   }
7758 
7759   /* number of subdomains requested greater than active processes or matrix size -> just shift the matrix
7760      number of subdomains requested 1 -> send to rank-0 or first candidate in voids  */
7761   PetscCall(MatGetSize(mat, &N, NULL));
7762   if (active_procs < *n_subdomains || *n_subdomains == 1 || N <= *n_subdomains) {
7763     PetscInt  issize, isidx, dest;
7764     PetscBool default_sub;
7765 
7766     if (*n_subdomains == 1) dest = 0;
7767     else dest = rank;
7768     if (im_active) {
7769       issize = 1;
7770       if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */
7771         isidx = procs_candidates[dest];
7772       } else {
7773         isidx = dest;
7774       }
7775     } else {
7776       issize = 0;
7777       isidx  = rank;
7778     }
7779     if (*n_subdomains != 1) *n_subdomains = active_procs;
7780     PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)mat), issize, &isidx, PETSC_COPY_VALUES, is_sends));
7781     default_sub = (PetscBool)(isidx == rank);
7782     PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &default_sub, 1, MPI_C_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
7783     if (default_sub) PetscCall(PetscObjectSetName((PetscObject)*is_sends, "default subassembling"));
7784     PetscCall(PetscFree(procs_candidates));
7785     PetscFunctionReturn(PETSC_SUCCESS);
7786   }
7787   PetscCall(PetscOptionsGetBool(NULL, ((PetscObject)A)->prefix, "-mat_is_partitioning_use_vwgt", &use_vwgt, NULL));
7788   PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)A)->prefix, "-mat_is_partitioning_threshold", &threshold, NULL));
7789   threshold = PetscMax(threshold, 2);
7790 
7791   /* Get info on mapping */
7792   PetscCall(MatISGetLocalToGlobalMapping(mat, &mapping, NULL));
7793   PetscCall(ISLocalToGlobalMappingGetInfo(mapping, &n_neighs, &neighs, &n_shared, &shared));
7794 
7795   /* build local CSR graph of subdomains' connectivity */
7796   PetscCall(PetscMalloc1(2, &xadj));
7797   xadj[0] = 0;
7798   xadj[1] = PetscMax(n_neighs - 1, 0);
7799   PetscCall(PetscMalloc1(xadj[1], &adjncy));
7800   PetscCall(PetscMalloc1(xadj[1], &adjncy_wgt));
7801   PetscCall(PetscCalloc1(n, &count));
7802   for (i = 1; i < n_neighs; i++)
7803     for (j = 0; j < n_shared[i]; j++) count[shared[i][j]] += 1;
7804 
7805   xadj_count = 0;
7806   for (i = 1; i < n_neighs; i++) {
7807     for (j = 0; j < n_shared[i]; j++) {
7808       if (count[shared[i][j]] < threshold) {
7809         adjncy[xadj_count]     = neighs[i];
7810         adjncy_wgt[xadj_count] = n_shared[i];
7811         xadj_count++;
7812         break;
7813       }
7814     }
7815   }
7816   xadj[1] = xadj_count;
7817   PetscCall(PetscFree(count));
7818   PetscCall(ISLocalToGlobalMappingRestoreInfo(mapping, &n_neighs, &neighs, &n_shared, &shared));
7819   PetscCall(PetscSortIntWithArray(xadj[1], adjncy, adjncy_wgt));
7820 
7821   PetscCall(PetscMalloc1(1, &ranks_send_to_idx));
7822 
7823   /* Restrict work on active processes only */
7824   PetscCall(PetscMPIIntCast(im_active, &color));
7825   if (void_procs) {
7826     PetscCall(PetscSubcommCreate(PetscObjectComm((PetscObject)mat), &psubcomm));
7827     PetscCall(PetscSubcommSetNumber(psubcomm, 2)); /* 2 groups, active process and not active processes */
7828     PetscCall(PetscSubcommSetTypeGeneral(psubcomm, color, rank));
7829     subcomm = PetscSubcommChild(psubcomm);
7830   } else {
7831     psubcomm = NULL;
7832     subcomm  = PetscObjectComm((PetscObject)mat);
7833   }
7834 
7835   v_wgt = NULL;
7836   if (!color) {
7837     PetscCall(PetscFree(xadj));
7838     PetscCall(PetscFree(adjncy));
7839     PetscCall(PetscFree(adjncy_wgt));
7840   } else {
7841     Mat             subdomain_adj;
7842     IS              new_ranks, new_ranks_contig;
7843     MatPartitioning partitioner;
7844     PetscInt        rstart, rend;
7845     PetscMPIInt     irstart = 0, irend = 0;
7846     PetscInt       *is_indices, *oldranks;
7847     PetscMPIInt     size;
7848     PetscBool       aggregate;
7849 
7850     PetscCallMPI(MPI_Comm_size(subcomm, &size));
7851     if (void_procs) {
7852       PetscInt prank = rank;
7853       PetscCall(PetscMalloc1(size, &oldranks));
7854       PetscCallMPI(MPI_Allgather(&prank, 1, MPIU_INT, oldranks, 1, MPIU_INT, subcomm));
7855       for (i = 0; i < xadj[1]; i++) PetscCall(PetscFindInt(adjncy[i], size, oldranks, &adjncy[i]));
7856       PetscCall(PetscSortIntWithArray(xadj[1], adjncy, adjncy_wgt));
7857     } else {
7858       oldranks = NULL;
7859     }
7860     aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE);
7861     if (aggregate) { /* TODO: all this part could be made more efficient */
7862       PetscInt     lrows, row, ncols, *cols;
7863       PetscMPIInt  nrank;
7864       PetscScalar *vals;
7865 
7866       PetscCallMPI(MPI_Comm_rank(subcomm, &nrank));
7867       lrows = 0;
7868       if (nrank < redprocs) {
7869         lrows = size / redprocs;
7870         if (nrank < size % redprocs) lrows++;
7871       }
7872       PetscCall(MatCreateAIJ(subcomm, lrows, lrows, size, size, 50, NULL, 50, NULL, &subdomain_adj));
7873       PetscCall(MatGetOwnershipRange(subdomain_adj, &rstart, &rend));
7874       PetscCall(PetscMPIIntCast(rstart, &irstart));
7875       PetscCall(PetscMPIIntCast(rend, &irend));
7876       PetscCall(MatSetOption(subdomain_adj, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE));
7877       PetscCall(MatSetOption(subdomain_adj, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE));
7878       row   = nrank;
7879       ncols = xadj[1] - xadj[0];
7880       cols  = adjncy;
7881       PetscCall(PetscMalloc1(ncols, &vals));
7882       for (i = 0; i < ncols; i++) vals[i] = adjncy_wgt[i];
7883       PetscCall(MatSetValues(subdomain_adj, 1, &row, ncols, cols, vals, INSERT_VALUES));
7884       PetscCall(MatAssemblyBegin(subdomain_adj, MAT_FINAL_ASSEMBLY));
7885       PetscCall(MatAssemblyEnd(subdomain_adj, MAT_FINAL_ASSEMBLY));
7886       PetscCall(PetscFree(xadj));
7887       PetscCall(PetscFree(adjncy));
7888       PetscCall(PetscFree(adjncy_wgt));
7889       PetscCall(PetscFree(vals));
7890       if (use_vwgt) {
7891         Vec                v;
7892         const PetscScalar *array;
7893         PetscInt           nl;
7894 
7895         PetscCall(MatCreateVecs(subdomain_adj, &v, NULL));
7896         PetscCall(VecSetValue(v, row, (PetscScalar)n, INSERT_VALUES));
7897         PetscCall(VecAssemblyBegin(v));
7898         PetscCall(VecAssemblyEnd(v));
7899         PetscCall(VecGetLocalSize(v, &nl));
7900         PetscCall(VecGetArrayRead(v, &array));
7901         PetscCall(PetscMalloc1(nl, &v_wgt));
7902         for (i = 0; i < nl; i++) v_wgt[i] = (PetscInt)PetscRealPart(array[i]);
7903         PetscCall(VecRestoreArrayRead(v, &array));
7904         PetscCall(VecDestroy(&v));
7905       }
7906     } else {
7907       PetscCall(MatCreateMPIAdj(subcomm, 1, size, xadj, adjncy, adjncy_wgt, &subdomain_adj));
7908       if (use_vwgt) {
7909         PetscCall(PetscMalloc1(1, &v_wgt));
7910         v_wgt[0] = n;
7911       }
7912     }
7913     /* PetscCall(MatView(subdomain_adj,0)); */
7914 
7915     /* Partition */
7916     PetscCall(MatPartitioningCreate(subcomm, &partitioner));
7917 #if defined(PETSC_HAVE_PTSCOTCH)
7918     PetscCall(MatPartitioningSetType(partitioner, MATPARTITIONINGPTSCOTCH));
7919 #elif defined(PETSC_HAVE_PARMETIS)
7920     PetscCall(MatPartitioningSetType(partitioner, MATPARTITIONINGPARMETIS));
7921 #else
7922     PetscCall(MatPartitioningSetType(partitioner, MATPARTITIONINGAVERAGE));
7923 #endif
7924     PetscCall(MatPartitioningSetAdjacency(partitioner, subdomain_adj));
7925     if (v_wgt) PetscCall(MatPartitioningSetVertexWeights(partitioner, v_wgt));
7926     *n_subdomains = PetscMin(size, *n_subdomains);
7927     PetscCall(MatPartitioningSetNParts(partitioner, *n_subdomains));
7928     PetscCall(MatPartitioningSetFromOptions(partitioner));
7929     PetscCall(MatPartitioningApply(partitioner, &new_ranks));
7930     /* PetscCall(MatPartitioningView(partitioner,0)); */
7931 
7932     /* renumber new_ranks to avoid "holes" in new set of processors */
7933     PetscCall(ISRenumber(new_ranks, NULL, NULL, &new_ranks_contig));
7934     PetscCall(ISDestroy(&new_ranks));
7935     PetscCall(ISGetIndices(new_ranks_contig, (const PetscInt **)&is_indices));
7936     if (!aggregate) {
7937       if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */
7938         PetscAssert(oldranks, PETSC_COMM_SELF, PETSC_ERR_PLIB, "This should not happen");
7939         ranks_send_to_idx[0] = procs_candidates[oldranks[is_indices[0]]];
7940       } else if (oldranks) {
7941         ranks_send_to_idx[0] = oldranks[is_indices[0]];
7942       } else {
7943         ranks_send_to_idx[0] = is_indices[0];
7944       }
7945     } else {
7946       PetscInt     idx = 0;
7947       PetscMPIInt  tag;
7948       MPI_Request *reqs;
7949 
7950       PetscCall(PetscObjectGetNewTag((PetscObject)subdomain_adj, &tag));
7951       PetscCall(PetscMalloc1(rend - rstart, &reqs));
7952       for (PetscMPIInt i = irstart; i < irend; i++) PetscCallMPI(MPIU_Isend(is_indices + i - rstart, 1, MPIU_INT, i, tag, subcomm, &reqs[i - rstart]));
7953       PetscCallMPI(MPIU_Recv(&idx, 1, MPIU_INT, MPI_ANY_SOURCE, tag, subcomm, MPI_STATUS_IGNORE));
7954       PetscCallMPI(MPI_Waitall(irend - irstart, reqs, MPI_STATUSES_IGNORE));
7955       PetscCall(PetscFree(reqs));
7956       if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */
7957         PetscAssert(oldranks, PETSC_COMM_SELF, PETSC_ERR_PLIB, "This should not happen");
7958         ranks_send_to_idx[0] = procs_candidates[oldranks[idx]];
7959       } else if (oldranks) {
7960         ranks_send_to_idx[0] = oldranks[idx];
7961       } else {
7962         ranks_send_to_idx[0] = idx;
7963       }
7964     }
7965     PetscCall(ISRestoreIndices(new_ranks_contig, (const PetscInt **)&is_indices));
7966     /* clean up */
7967     PetscCall(PetscFree(oldranks));
7968     PetscCall(ISDestroy(&new_ranks_contig));
7969     PetscCall(MatDestroy(&subdomain_adj));
7970     PetscCall(MatPartitioningDestroy(&partitioner));
7971   }
7972   PetscCall(PetscSubcommDestroy(&psubcomm));
7973   PetscCall(PetscFree(procs_candidates));
7974 
7975   /* assemble parallel IS for sends */
7976   i = 1;
7977   if (!color) i = 0;
7978   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)mat), i, ranks_send_to_idx, PETSC_OWN_POINTER, is_sends));
7979   PetscFunctionReturn(PETSC_SUCCESS);
7980 }
7981 
7982 typedef enum {
7983   MATDENSE_PRIVATE = 0,
7984   MATAIJ_PRIVATE,
7985   MATBAIJ_PRIVATE,
7986   MATSBAIJ_PRIVATE
7987 } MatTypePrivate;
7988 
PCBDDCMatISSubassemble(Mat mat,IS is_sends,PetscInt n_subdomains,PetscBool restrict_comm,PetscBool restrict_full,PetscBool reuse,Mat * mat_n,PetscInt nis,IS isarray[],PetscInt nvecs,Vec nnsp_vec[])7989 static PetscErrorCode PCBDDCMatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, PetscBool reuse, Mat *mat_n, PetscInt nis, IS isarray[], PetscInt nvecs, Vec nnsp_vec[])
7990 {
7991   Mat                    local_mat;
7992   IS                     is_sends_internal;
7993   PetscInt               rows, cols, new_local_rows;
7994   PetscInt               i, bs, buf_size_idxs, buf_size_idxs_is, buf_size_vals, buf_size_vecs;
7995   PetscBool              ismatis, isdense, newisdense, destroy_mat;
7996   ISLocalToGlobalMapping l2gmap;
7997   PetscInt              *l2gmap_indices;
7998   const PetscInt        *is_indices;
7999   MatType                new_local_type;
8000   /* buffers */
8001   PetscInt          *ptr_idxs, *send_buffer_idxs, *recv_buffer_idxs;
8002   PetscInt          *ptr_idxs_is, *send_buffer_idxs_is, *recv_buffer_idxs_is;
8003   PetscInt          *recv_buffer_idxs_local;
8004   PetscScalar       *ptr_vals, *recv_buffer_vals;
8005   const PetscScalar *send_buffer_vals;
8006   PetscScalar       *ptr_vecs, *send_buffer_vecs, *recv_buffer_vecs;
8007   /* MPI */
8008   MPI_Comm     comm, comm_n;
8009   PetscSubcomm subcomm;
8010   PetscMPIInt  n_sends, n_recvs, size;
8011   PetscMPIInt *iflags, *ilengths_idxs, *ilengths_vals, *ilengths_idxs_is;
8012   PetscMPIInt *onodes, *onodes_is, *olengths_idxs, *olengths_idxs_is, *olengths_vals;
8013   PetscMPIInt  len, tag_idxs, tag_idxs_is, tag_vals, tag_vecs, source_dest;
8014   MPI_Request *send_req_idxs, *send_req_idxs_is, *send_req_vals, *send_req_vecs;
8015   MPI_Request *recv_req_idxs, *recv_req_idxs_is, *recv_req_vals, *recv_req_vecs;
8016 
8017   PetscFunctionBegin;
8018   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
8019   PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATIS, &ismatis));
8020   PetscCheck(ismatis, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Cannot use %s on a matrix object which is not of type MATIS", PETSC_FUNCTION_NAME);
8021   PetscValidLogicalCollectiveInt(mat, n_subdomains, 3);
8022   PetscValidLogicalCollectiveBool(mat, restrict_comm, 4);
8023   PetscValidLogicalCollectiveBool(mat, restrict_full, 5);
8024   PetscValidLogicalCollectiveBool(mat, reuse, 6);
8025   PetscValidLogicalCollectiveInt(mat, nis, 8);
8026   PetscValidLogicalCollectiveInt(mat, nvecs, 10);
8027   if (nvecs) {
8028     PetscCheck(nvecs <= 1, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Just 1 vector supported");
8029     PetscValidHeaderSpecific(nnsp_vec[0], VEC_CLASSID, 11);
8030   }
8031   /* further checks */
8032   PetscCall(MatISGetLocalMat(mat, &local_mat));
8033   PetscCall(PetscObjectTypeCompare((PetscObject)local_mat, MATSEQDENSE, &isdense));
8034   PetscCheck(isdense, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE");
8035 
8036   PetscCall(MatGetSize(local_mat, &rows, &cols));
8037   PetscCheck(rows == cols, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Local MATIS matrices should be square");
8038   if (reuse && *mat_n) {
8039     PetscInt mrows, mcols, mnrows, mncols;
8040     PetscValidHeaderSpecific(*mat_n, MAT_CLASSID, 7);
8041     PetscCall(PetscObjectTypeCompare((PetscObject)*mat_n, MATIS, &ismatis));
8042     PetscCheck(ismatis, PetscObjectComm((PetscObject)*mat_n), PETSC_ERR_SUP, "Cannot reuse a matrix which is not of type MATIS");
8043     PetscCall(MatGetSize(mat, &mrows, &mcols));
8044     PetscCall(MatGetSize(*mat_n, &mnrows, &mncols));
8045     PetscCheck(mrows == mnrows, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Cannot reuse matrix! Wrong number of rows %" PetscInt_FMT " != %" PetscInt_FMT, mrows, mnrows);
8046     PetscCheck(mcols == mncols, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Cannot reuse matrix! Wrong number of cols %" PetscInt_FMT " != %" PetscInt_FMT, mcols, mncols);
8047   }
8048   PetscCall(MatGetBlockSize(local_mat, &bs));
8049   PetscValidLogicalCollectiveInt(mat, bs, 1);
8050 
8051   /* prepare IS for sending if not provided */
8052   if (!is_sends) {
8053     PetscCheck(n_subdomains, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "You should specify either an IS or a target number of subdomains");
8054     PetscCall(PCBDDCMatISGetSubassemblingPattern(mat, &n_subdomains, 0, &is_sends_internal, NULL));
8055   } else {
8056     PetscCall(PetscObjectReference((PetscObject)is_sends));
8057     is_sends_internal = is_sends;
8058   }
8059 
8060   /* get comm */
8061   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
8062 
8063   /* compute number of sends */
8064   PetscCall(ISGetLocalSize(is_sends_internal, &i));
8065   PetscCall(PetscMPIIntCast(i, &n_sends));
8066 
8067   /* compute number of receives */
8068   PetscCallMPI(MPI_Comm_size(comm, &size));
8069   PetscCall(PetscMalloc1(size, &iflags));
8070   PetscCall(PetscArrayzero(iflags, size));
8071   PetscCall(ISGetIndices(is_sends_internal, &is_indices));
8072   for (i = 0; i < n_sends; i++) iflags[is_indices[i]] = 1;
8073   PetscCall(PetscGatherNumberOfMessages(comm, iflags, NULL, &n_recvs));
8074   PetscCall(PetscFree(iflags));
8075 
8076   /* restrict comm if requested */
8077   subcomm     = NULL;
8078   destroy_mat = PETSC_FALSE;
8079   if (restrict_comm) {
8080     PetscMPIInt color, subcommsize;
8081 
8082     color = 0;
8083     if (restrict_full) {
8084       if (!n_recvs) color = 1; /* processes not receiving anything will not participate in new comm (full restriction) */
8085     } else {
8086       if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not participate in new comm */
8087     }
8088     PetscCallMPI(MPIU_Allreduce(&color, &subcommsize, 1, MPI_INT, MPI_SUM, comm));
8089     subcommsize = size - subcommsize;
8090     /* check if reuse has been requested */
8091     if (reuse) {
8092       if (*mat_n) {
8093         PetscMPIInt subcommsize2;
8094         PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n), &subcommsize2));
8095         PetscCheck(subcommsize == subcommsize2, PetscObjectComm((PetscObject)*mat_n), PETSC_ERR_PLIB, "Cannot reuse matrix! wrong subcomm size %d != %d", subcommsize, subcommsize2);
8096         comm_n = PetscObjectComm((PetscObject)*mat_n);
8097       } else {
8098         comm_n = PETSC_COMM_SELF;
8099       }
8100     } else { /* MAT_INITIAL_MATRIX */
8101       PetscMPIInt rank;
8102 
8103       PetscCallMPI(MPI_Comm_rank(comm, &rank));
8104       PetscCall(PetscSubcommCreate(comm, &subcomm));
8105       PetscCall(PetscSubcommSetNumber(subcomm, 2));
8106       PetscCall(PetscSubcommSetTypeGeneral(subcomm, color, rank));
8107       comm_n = PetscSubcommChild(subcomm);
8108     }
8109     /* flag to destroy *mat_n if not significative */
8110     if (color) destroy_mat = PETSC_TRUE;
8111   } else {
8112     comm_n = comm;
8113   }
8114 
8115   /* prepare send/receive buffers */
8116   PetscCall(PetscMalloc1(size, &ilengths_idxs));
8117   PetscCall(PetscArrayzero(ilengths_idxs, size));
8118   PetscCall(PetscMalloc1(size, &ilengths_vals));
8119   PetscCall(PetscArrayzero(ilengths_vals, size));
8120   if (nis) PetscCall(PetscCalloc1(size, &ilengths_idxs_is));
8121 
8122   /* Get data from local matrices */
8123   PetscCheck(isdense, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Subassembling of AIJ local matrices not yet implemented");
8124   /* TODO: See below some guidelines on how to prepare the local buffers */
8125   /*
8126        send_buffer_vals should contain the raw values of the local matrix
8127        send_buffer_idxs should contain:
8128        - MatType_PRIVATE type
8129        - PetscInt        size_of_l2gmap
8130        - PetscInt        global_row_indices[size_of_l2gmap]
8131        - PetscInt        all_other_info_which_is_needed_to_compute_preallocation_and_set_values
8132     */
8133   {
8134     ISLocalToGlobalMapping mapping;
8135 
8136     PetscCall(MatISGetLocalToGlobalMapping(mat, &mapping, NULL));
8137     PetscCall(MatDenseGetArrayRead(local_mat, &send_buffer_vals));
8138     PetscCall(ISLocalToGlobalMappingGetSize(mapping, &i));
8139     PetscCall(PetscMalloc1(i + 2, &send_buffer_idxs));
8140     send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE;
8141     send_buffer_idxs[1] = i;
8142     PetscCall(ISLocalToGlobalMappingGetIndices(mapping, (const PetscInt **)&ptr_idxs));
8143     PetscCall(PetscArraycpy(&send_buffer_idxs[2], ptr_idxs, i));
8144     PetscCall(ISLocalToGlobalMappingRestoreIndices(mapping, (const PetscInt **)&ptr_idxs));
8145     PetscCall(PetscMPIIntCast(i, &len));
8146     for (i = 0; i < n_sends; i++) {
8147       ilengths_vals[is_indices[i]] = len * len;
8148       ilengths_idxs[is_indices[i]] = len + 2;
8149     }
8150   }
8151   PetscCall(PetscGatherMessageLengths2(comm, n_sends, n_recvs, ilengths_idxs, ilengths_vals, &onodes, &olengths_idxs, &olengths_vals));
8152   /* additional is (if any) */
8153   if (nis) {
8154     PetscMPIInt psum;
8155     PetscInt    j;
8156     for (j = 0, psum = 0; j < nis; j++) {
8157       PetscInt plen;
8158       PetscCall(ISGetLocalSize(isarray[j], &plen));
8159       PetscCall(PetscMPIIntCast(plen, &len));
8160       psum += len + 1; /* indices + length */
8161     }
8162     PetscCall(PetscMalloc1(psum, &send_buffer_idxs_is));
8163     for (j = 0, psum = 0; j < nis; j++) {
8164       PetscInt        plen;
8165       const PetscInt *is_array_idxs;
8166       PetscCall(ISGetLocalSize(isarray[j], &plen));
8167       send_buffer_idxs_is[psum] = plen;
8168       PetscCall(ISGetIndices(isarray[j], &is_array_idxs));
8169       PetscCall(PetscArraycpy(&send_buffer_idxs_is[psum + 1], is_array_idxs, plen));
8170       PetscCall(ISRestoreIndices(isarray[j], &is_array_idxs));
8171       psum += plen + 1; /* indices + length */
8172     }
8173     for (i = 0; i < n_sends; i++) ilengths_idxs_is[is_indices[i]] = psum;
8174     PetscCall(PetscGatherMessageLengths(comm, n_sends, n_recvs, ilengths_idxs_is, &onodes_is, &olengths_idxs_is));
8175   }
8176   PetscCall(MatISRestoreLocalMat(mat, &local_mat));
8177 
8178   buf_size_idxs    = 0;
8179   buf_size_vals    = 0;
8180   buf_size_idxs_is = 0;
8181   buf_size_vecs    = 0;
8182   for (i = 0; i < n_recvs; i++) {
8183     buf_size_idxs += olengths_idxs[i];
8184     buf_size_vals += olengths_vals[i];
8185     if (nis) buf_size_idxs_is += olengths_idxs_is[i];
8186     if (nvecs) buf_size_vecs += olengths_idxs[i];
8187   }
8188   PetscCall(PetscMalloc1(buf_size_idxs, &recv_buffer_idxs));
8189   PetscCall(PetscMalloc1(buf_size_vals, &recv_buffer_vals));
8190   PetscCall(PetscMalloc1(buf_size_idxs_is, &recv_buffer_idxs_is));
8191   PetscCall(PetscMalloc1(buf_size_vecs, &recv_buffer_vecs));
8192 
8193   /* get new tags for clean communications */
8194   PetscCall(PetscObjectGetNewTag((PetscObject)mat, &tag_idxs));
8195   PetscCall(PetscObjectGetNewTag((PetscObject)mat, &tag_vals));
8196   PetscCall(PetscObjectGetNewTag((PetscObject)mat, &tag_idxs_is));
8197   PetscCall(PetscObjectGetNewTag((PetscObject)mat, &tag_vecs));
8198 
8199   /* allocate for requests */
8200   PetscCall(PetscMalloc1(n_sends, &send_req_idxs));
8201   PetscCall(PetscMalloc1(n_sends, &send_req_vals));
8202   PetscCall(PetscMalloc1(n_sends, &send_req_idxs_is));
8203   PetscCall(PetscMalloc1(n_sends, &send_req_vecs));
8204   PetscCall(PetscMalloc1(n_recvs, &recv_req_idxs));
8205   PetscCall(PetscMalloc1(n_recvs, &recv_req_vals));
8206   PetscCall(PetscMalloc1(n_recvs, &recv_req_idxs_is));
8207   PetscCall(PetscMalloc1(n_recvs, &recv_req_vecs));
8208 
8209   /* communications */
8210   ptr_idxs    = recv_buffer_idxs;
8211   ptr_vals    = recv_buffer_vals;
8212   ptr_idxs_is = recv_buffer_idxs_is;
8213   ptr_vecs    = recv_buffer_vecs;
8214   for (i = 0; i < n_recvs; i++) {
8215     PetscCallMPI(MPIU_Irecv(ptr_idxs, olengths_idxs[i], MPIU_INT, onodes[i], tag_idxs, comm, &recv_req_idxs[i]));
8216     PetscCallMPI(MPIU_Irecv(ptr_vals, olengths_vals[i], MPIU_SCALAR, onodes[i], tag_vals, comm, &recv_req_vals[i]));
8217     ptr_idxs += olengths_idxs[i];
8218     ptr_vals += olengths_vals[i];
8219     if (nis) {
8220       PetscCallMPI(MPIU_Irecv(ptr_idxs_is, olengths_idxs_is[i], MPIU_INT, onodes_is[i], tag_idxs_is, comm, &recv_req_idxs_is[i]));
8221       ptr_idxs_is += olengths_idxs_is[i];
8222     }
8223     if (nvecs) {
8224       PetscCallMPI(MPIU_Irecv(ptr_vecs, olengths_idxs[i] - 2, MPIU_SCALAR, onodes[i], tag_vecs, comm, &recv_req_vecs[i]));
8225       ptr_vecs += olengths_idxs[i] - 2;
8226     }
8227   }
8228   for (i = 0; i < n_sends; i++) {
8229     PetscCall(PetscMPIIntCast(is_indices[i], &source_dest));
8230     PetscCallMPI(MPIU_Isend(send_buffer_idxs, ilengths_idxs[source_dest], MPIU_INT, source_dest, tag_idxs, comm, &send_req_idxs[i]));
8231     PetscCallMPI(MPIU_Isend(send_buffer_vals, ilengths_vals[source_dest], MPIU_SCALAR, source_dest, tag_vals, comm, &send_req_vals[i]));
8232     if (nis) PetscCallMPI(MPIU_Isend(send_buffer_idxs_is, ilengths_idxs_is[source_dest], MPIU_INT, source_dest, tag_idxs_is, comm, &send_req_idxs_is[i]));
8233     if (nvecs) {
8234       PetscCall(VecGetArray(nnsp_vec[0], &send_buffer_vecs));
8235       PetscCallMPI(MPIU_Isend(send_buffer_vecs, ilengths_idxs[source_dest] - 2, MPIU_SCALAR, source_dest, tag_vecs, comm, &send_req_vecs[i]));
8236     }
8237   }
8238   PetscCall(ISRestoreIndices(is_sends_internal, &is_indices));
8239   PetscCall(ISDestroy(&is_sends_internal));
8240 
8241   /* assemble new l2g map */
8242   PetscCallMPI(MPI_Waitall(n_recvs, recv_req_idxs, MPI_STATUSES_IGNORE));
8243   ptr_idxs       = recv_buffer_idxs;
8244   new_local_rows = 0;
8245   for (i = 0; i < n_recvs; i++) {
8246     new_local_rows += *(ptr_idxs + 1); /* second element is the local size of the l2gmap */
8247     ptr_idxs += olengths_idxs[i];
8248   }
8249   PetscCall(PetscMalloc1(new_local_rows, &l2gmap_indices));
8250   ptr_idxs       = recv_buffer_idxs;
8251   new_local_rows = 0;
8252   for (i = 0; i < n_recvs; i++) {
8253     PetscCall(PetscArraycpy(&l2gmap_indices[new_local_rows], ptr_idxs + 2, *(ptr_idxs + 1)));
8254     new_local_rows += *(ptr_idxs + 1); /* second element is the local size of the l2gmap */
8255     ptr_idxs += olengths_idxs[i];
8256   }
8257   PetscCall(PetscSortRemoveDupsInt(&new_local_rows, l2gmap_indices));
8258   PetscCall(ISLocalToGlobalMappingCreate(comm_n, 1, new_local_rows, l2gmap_indices, PETSC_COPY_VALUES, &l2gmap));
8259   PetscCall(PetscFree(l2gmap_indices));
8260 
8261   /* infer new local matrix type from received local matrices type */
8262   /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */
8263   /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */
8264   if (n_recvs) {
8265     MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0];
8266     ptr_idxs                              = recv_buffer_idxs;
8267     for (i = 0; i < n_recvs; i++) {
8268       if ((PetscInt)new_local_type_private != *ptr_idxs) {
8269         new_local_type_private = MATAIJ_PRIVATE;
8270         break;
8271       }
8272       ptr_idxs += olengths_idxs[i];
8273     }
8274     switch (new_local_type_private) {
8275     case MATDENSE_PRIVATE:
8276       new_local_type = MATSEQAIJ;
8277       bs             = 1;
8278       break;
8279     case MATAIJ_PRIVATE:
8280       new_local_type = MATSEQAIJ;
8281       bs             = 1;
8282       break;
8283     case MATBAIJ_PRIVATE:
8284       new_local_type = MATSEQBAIJ;
8285       break;
8286     case MATSBAIJ_PRIVATE:
8287       new_local_type = MATSEQSBAIJ;
8288       break;
8289     default:
8290       SETERRQ(comm, PETSC_ERR_SUP, "Unsupported private type %d in %s", new_local_type_private, PETSC_FUNCTION_NAME);
8291     }
8292   } else { /* by default, new_local_type is seqaij */
8293     new_local_type = MATSEQAIJ;
8294     bs             = 1;
8295   }
8296 
8297   /* create MATIS object if needed */
8298   if (!reuse) {
8299     PetscCall(MatGetSize(mat, &rows, &cols));
8300     PetscCall(MatCreateIS(comm_n, bs, PETSC_DECIDE, PETSC_DECIDE, rows, cols, l2gmap, l2gmap, mat_n));
8301   } else {
8302     /* it also destroys the local matrices */
8303     if (*mat_n) {
8304       PetscCall(MatSetLocalToGlobalMapping(*mat_n, l2gmap, l2gmap));
8305     } else { /* this is a fake object */
8306       PetscCall(MatCreateIS(comm_n, bs, PETSC_DECIDE, PETSC_DECIDE, rows, cols, l2gmap, l2gmap, mat_n));
8307     }
8308   }
8309   PetscCall(MatISGetLocalMat(*mat_n, &local_mat));
8310   PetscCall(MatSetType(local_mat, new_local_type));
8311 
8312   PetscCallMPI(MPI_Waitall(n_recvs, recv_req_vals, MPI_STATUSES_IGNORE));
8313 
8314   /* Global to local map of received indices */
8315   PetscCall(PetscMalloc1(buf_size_idxs, &recv_buffer_idxs_local)); /* needed for values insertion */
8316   PetscCall(ISGlobalToLocalMappingApply(l2gmap, IS_GTOLM_MASK, buf_size_idxs, recv_buffer_idxs, &i, recv_buffer_idxs_local));
8317   PetscCall(ISLocalToGlobalMappingDestroy(&l2gmap));
8318 
8319   /* restore attributes -> type of incoming data and its size */
8320   buf_size_idxs = 0;
8321   for (i = 0; i < n_recvs; i++) {
8322     recv_buffer_idxs_local[buf_size_idxs]     = recv_buffer_idxs[buf_size_idxs];
8323     recv_buffer_idxs_local[buf_size_idxs + 1] = recv_buffer_idxs[buf_size_idxs + 1];
8324     buf_size_idxs += olengths_idxs[i];
8325   }
8326   PetscCall(PetscFree(recv_buffer_idxs));
8327 
8328   /* set preallocation */
8329   PetscCall(PetscObjectTypeCompare((PetscObject)local_mat, MATSEQDENSE, &newisdense));
8330   if (!newisdense) {
8331     PetscInt *new_local_nnz = NULL;
8332 
8333     ptr_idxs = recv_buffer_idxs_local;
8334     if (n_recvs) PetscCall(PetscCalloc1(new_local_rows, &new_local_nnz));
8335     for (i = 0; i < n_recvs; i++) {
8336       PetscInt j;
8337       if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */
8338         for (j = 0; j < *(ptr_idxs + 1); j++) new_local_nnz[*(ptr_idxs + 2 + j)] += *(ptr_idxs + 1);
8339       } else {
8340         /* TODO */
8341       }
8342       ptr_idxs += olengths_idxs[i];
8343     }
8344     if (new_local_nnz) {
8345       for (i = 0; i < new_local_rows; i++) new_local_nnz[i] = PetscMin(new_local_nnz[i], new_local_rows);
8346       PetscCall(MatSeqAIJSetPreallocation(local_mat, 0, new_local_nnz));
8347       for (i = 0; i < new_local_rows; i++) new_local_nnz[i] /= bs;
8348       PetscCall(MatSeqBAIJSetPreallocation(local_mat, bs, 0, new_local_nnz));
8349       for (i = 0; i < new_local_rows; i++) new_local_nnz[i] = PetscMax(new_local_nnz[i] - i, 0);
8350       PetscCall(MatSeqSBAIJSetPreallocation(local_mat, bs, 0, new_local_nnz));
8351     } else {
8352       PetscCall(MatSetUp(local_mat));
8353     }
8354     PetscCall(PetscFree(new_local_nnz));
8355   } else {
8356     PetscCall(MatSetUp(local_mat));
8357   }
8358 
8359   /* set values */
8360   ptr_vals = recv_buffer_vals;
8361   ptr_idxs = recv_buffer_idxs_local;
8362   for (i = 0; i < n_recvs; i++) {
8363     if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */
8364       PetscCall(MatSetOption(local_mat, MAT_ROW_ORIENTED, PETSC_FALSE));
8365       PetscCall(MatSetValues(local_mat, *(ptr_idxs + 1), ptr_idxs + 2, *(ptr_idxs + 1), ptr_idxs + 2, ptr_vals, ADD_VALUES));
8366       PetscCall(MatAssemblyBegin(local_mat, MAT_FLUSH_ASSEMBLY));
8367       PetscCall(MatAssemblyEnd(local_mat, MAT_FLUSH_ASSEMBLY));
8368       PetscCall(MatSetOption(local_mat, MAT_ROW_ORIENTED, PETSC_TRUE));
8369     } else {
8370       /* TODO */
8371     }
8372     ptr_idxs += olengths_idxs[i];
8373     ptr_vals += olengths_vals[i];
8374   }
8375   PetscCall(MatAssemblyBegin(local_mat, MAT_FINAL_ASSEMBLY));
8376   PetscCall(MatAssemblyEnd(local_mat, MAT_FINAL_ASSEMBLY));
8377   PetscCall(MatISRestoreLocalMat(*mat_n, &local_mat));
8378   PetscCall(MatAssemblyBegin(*mat_n, MAT_FINAL_ASSEMBLY));
8379   PetscCall(MatAssemblyEnd(*mat_n, MAT_FINAL_ASSEMBLY));
8380   PetscCall(PetscFree(recv_buffer_vals));
8381 
8382 #if 0
8383   if (!restrict_comm) { /* check */
8384     Vec       lvec,rvec;
8385     PetscReal infty_error;
8386 
8387     PetscCall(MatCreateVecs(mat,&rvec,&lvec));
8388     PetscCall(VecSetRandom(rvec,NULL));
8389     PetscCall(MatMult(mat,rvec,lvec));
8390     PetscCall(VecScale(lvec,-1.0));
8391     PetscCall(MatMultAdd(*mat_n,rvec,lvec,lvec));
8392     PetscCall(VecNorm(lvec,NORM_INFINITY,&infty_error));
8393     PetscCall(PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error));
8394     PetscCall(VecDestroy(&rvec));
8395     PetscCall(VecDestroy(&lvec));
8396   }
8397 #endif
8398 
8399   /* assemble new additional is (if any) */
8400   if (nis) {
8401     PetscInt **temp_idxs, *count_is, j, psum;
8402 
8403     PetscCallMPI(MPI_Waitall(n_recvs, recv_req_idxs_is, MPI_STATUSES_IGNORE));
8404     PetscCall(PetscCalloc1(nis, &count_is));
8405     ptr_idxs = recv_buffer_idxs_is;
8406     psum     = 0;
8407     for (i = 0; i < n_recvs; i++) {
8408       for (j = 0; j < nis; j++) {
8409         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
8410         count_is[j] += plen;         /* increment counting of buffer for j-th IS */
8411         psum += plen;
8412         ptr_idxs += plen + 1; /* shift pointer to received data */
8413       }
8414     }
8415     PetscCall(PetscMalloc1(nis, &temp_idxs));
8416     PetscCall(PetscMalloc1(psum, &temp_idxs[0]));
8417     for (i = 1; i < nis; i++) temp_idxs[i] = PetscSafePointerPlusOffset(temp_idxs[i - 1], count_is[i - 1]);
8418     PetscCall(PetscArrayzero(count_is, nis));
8419     ptr_idxs = recv_buffer_idxs_is;
8420     for (i = 0; i < n_recvs; i++) {
8421       for (j = 0; j < nis; j++) {
8422         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
8423         PetscCall(PetscArraycpy(&temp_idxs[j][count_is[j]], ptr_idxs + 1, plen));
8424         count_is[j] += plen;  /* increment starting point of buffer for j-th IS */
8425         ptr_idxs += plen + 1; /* shift pointer to received data */
8426       }
8427     }
8428     for (i = 0; i < nis; i++) {
8429       PetscCall(ISDestroy(&isarray[i]));
8430       PetscCall(PetscSortRemoveDupsInt(&count_is[i], temp_idxs[i]));
8431       PetscCall(ISCreateGeneral(comm_n, count_is[i], temp_idxs[i], PETSC_COPY_VALUES, &isarray[i]));
8432     }
8433     PetscCall(PetscFree(count_is));
8434     PetscCall(PetscFree(temp_idxs[0]));
8435     PetscCall(PetscFree(temp_idxs));
8436   }
8437   /* free workspace */
8438   PetscCall(PetscFree(recv_buffer_idxs_is));
8439   PetscCallMPI(MPI_Waitall(n_sends, send_req_idxs, MPI_STATUSES_IGNORE));
8440   PetscCall(PetscFree(send_buffer_idxs));
8441   PetscCallMPI(MPI_Waitall(n_sends, send_req_vals, MPI_STATUSES_IGNORE));
8442   if (isdense) {
8443     PetscCall(MatISGetLocalMat(mat, &local_mat));
8444     PetscCall(MatDenseRestoreArrayRead(local_mat, &send_buffer_vals));
8445     PetscCall(MatISRestoreLocalMat(mat, &local_mat));
8446   } else {
8447     /* PetscCall(PetscFree(send_buffer_vals)); */
8448   }
8449   if (nis) {
8450     PetscCallMPI(MPI_Waitall(n_sends, send_req_idxs_is, MPI_STATUSES_IGNORE));
8451     PetscCall(PetscFree(send_buffer_idxs_is));
8452   }
8453 
8454   if (nvecs) {
8455     PetscCallMPI(MPI_Waitall(n_recvs, recv_req_vecs, MPI_STATUSES_IGNORE));
8456     PetscCallMPI(MPI_Waitall(n_sends, send_req_vecs, MPI_STATUSES_IGNORE));
8457     PetscCall(VecRestoreArray(nnsp_vec[0], &send_buffer_vecs));
8458     PetscCall(VecDestroy(&nnsp_vec[0]));
8459     PetscCall(VecCreate(comm_n, &nnsp_vec[0]));
8460     PetscCall(VecSetSizes(nnsp_vec[0], new_local_rows, PETSC_DECIDE));
8461     PetscCall(VecSetType(nnsp_vec[0], VECSTANDARD));
8462     /* set values */
8463     ptr_vals = recv_buffer_vecs;
8464     ptr_idxs = recv_buffer_idxs_local;
8465     PetscCall(VecGetArray(nnsp_vec[0], &send_buffer_vecs));
8466     for (i = 0; i < n_recvs; i++) {
8467       PetscInt j;
8468       for (j = 0; j < *(ptr_idxs + 1); j++) send_buffer_vecs[*(ptr_idxs + 2 + j)] += *(ptr_vals + j);
8469       ptr_idxs += olengths_idxs[i];
8470       ptr_vals += olengths_idxs[i] - 2;
8471     }
8472     PetscCall(VecRestoreArray(nnsp_vec[0], &send_buffer_vecs));
8473     PetscCall(VecAssemblyBegin(nnsp_vec[0]));
8474     PetscCall(VecAssemblyEnd(nnsp_vec[0]));
8475   }
8476 
8477   PetscCall(PetscFree(recv_buffer_vecs));
8478   PetscCall(PetscFree(recv_buffer_idxs_local));
8479   PetscCall(PetscFree(recv_req_idxs));
8480   PetscCall(PetscFree(recv_req_vals));
8481   PetscCall(PetscFree(recv_req_vecs));
8482   PetscCall(PetscFree(recv_req_idxs_is));
8483   PetscCall(PetscFree(send_req_idxs));
8484   PetscCall(PetscFree(send_req_vals));
8485   PetscCall(PetscFree(send_req_vecs));
8486   PetscCall(PetscFree(send_req_idxs_is));
8487   PetscCall(PetscFree(ilengths_vals));
8488   PetscCall(PetscFree(ilengths_idxs));
8489   PetscCall(PetscFree(olengths_vals));
8490   PetscCall(PetscFree(olengths_idxs));
8491   PetscCall(PetscFree(onodes));
8492   if (nis) {
8493     PetscCall(PetscFree(ilengths_idxs_is));
8494     PetscCall(PetscFree(olengths_idxs_is));
8495     PetscCall(PetscFree(onodes_is));
8496   }
8497   PetscCall(PetscSubcommDestroy(&subcomm));
8498   if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not participate */
8499     PetscCall(MatDestroy(mat_n));
8500     for (i = 0; i < nis; i++) PetscCall(ISDestroy(&isarray[i]));
8501     if (nvecs) { /* need to match VecDestroy nnsp_vec called in the other code path */
8502       PetscCall(VecDestroy(&nnsp_vec[0]));
8503     }
8504     *mat_n = NULL;
8505   }
8506   PetscFunctionReturn(PETSC_SUCCESS);
8507 }
8508 
8509 /* temporary hack into ksp private data structure */
8510 #include <petsc/private/kspimpl.h>
8511 
PCBDDCSetUpCoarseSolver(PC pc,Mat coarse_submat)8512 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc, Mat coarse_submat)
8513 {
8514   PC_BDDC               *pcbddc = (PC_BDDC *)pc->data;
8515   PC_IS                 *pcis   = (PC_IS *)pc->data;
8516   PCBDDCGraph            graph  = pcbddc->mat_graph;
8517   Mat                    coarse_mat, coarse_mat_is;
8518   Mat                    coarsedivudotp = NULL;
8519   Mat                    coarseG, t_coarse_mat_is;
8520   MatNullSpace           CoarseNullSpace = NULL;
8521   ISLocalToGlobalMapping coarse_islg;
8522   IS                     coarse_is, *isarray, corners;
8523   PetscInt               i, im_active = -1, active_procs = -1;
8524   PetscInt               nis, nisdofs, nisneu, nisvert;
8525   PetscInt               coarse_eqs_per_proc, coarsening_ratio;
8526   PC                     pc_temp;
8527   PCType                 coarse_pc_type;
8528   KSPType                coarse_ksp_type;
8529   PetscBool              multilevel_requested, multilevel_allowed;
8530   PetscBool              coarse_reuse, multi_element = graph->multi_element;
8531   PetscInt               ncoarse, nedcfield;
8532   PetscBool              compute_vecs = PETSC_FALSE;
8533   PetscScalar           *array;
8534   MatReuse               coarse_mat_reuse;
8535   PetscBool              restr, full_restr, have_void;
8536   PetscMPIInt            size;
8537 
8538   PetscFunctionBegin;
8539   PetscCall(PetscLogEventBegin(PC_BDDC_CoarseSetUp[pcbddc->current_level], pc, 0, 0, 0));
8540   /* Assign global numbering to coarse dofs */
8541   if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */
8542     PetscInt ocoarse_size;
8543     compute_vecs = PETSC_TRUE;
8544 
8545     pcbddc->new_primal_space = PETSC_TRUE;
8546     ocoarse_size             = pcbddc->coarse_size;
8547     PetscCall(PetscFree(pcbddc->global_primal_indices));
8548     PetscCall(PCBDDCComputePrimalNumbering(pc, &pcbddc->coarse_size, &pcbddc->global_primal_indices));
8549     /* see if we can avoid some work */
8550     if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */
8551       /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */
8552       if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) {
8553         PetscCall(KSPReset(pcbddc->coarse_ksp));
8554         coarse_reuse = PETSC_FALSE;
8555       } else { /* we can safely reuse already computed coarse matrix */
8556         coarse_reuse = PETSC_TRUE;
8557       }
8558     } else { /* there's no coarse ksp, so we need to create the coarse matrix too */
8559       coarse_reuse = PETSC_FALSE;
8560     }
8561     /* reset any subassembling information */
8562     if (!coarse_reuse || pcbddc->recompute_topography) PetscCall(ISDestroy(&pcbddc->coarse_subassembling));
8563   } else { /* primal space is unchanged, so we can reuse coarse matrix */
8564     coarse_reuse = PETSC_TRUE;
8565   }
8566   if (coarse_reuse && pcbddc->coarse_ksp) {
8567     PetscCall(KSPGetOperators(pcbddc->coarse_ksp, &coarse_mat, NULL));
8568     PetscCall(PetscObjectReference((PetscObject)coarse_mat));
8569     coarse_mat_reuse = MAT_REUSE_MATRIX;
8570   } else {
8571     coarse_mat       = NULL;
8572     coarse_mat_reuse = MAT_INITIAL_MATRIX;
8573   }
8574 
8575   /* creates temporary l2gmap and IS for coarse indexes */
8576   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), pcbddc->local_primal_size, pcbddc->global_primal_indices, PETSC_COPY_VALUES, &coarse_is));
8577   PetscCall(ISLocalToGlobalMappingCreateIS(coarse_is, &coarse_islg));
8578 
8579   /* creates temporary MATIS object for coarse matrix */
8580   PetscCall(MatCreate(PetscObjectComm((PetscObject)pc), &t_coarse_mat_is));
8581   PetscCall(MatSetType(t_coarse_mat_is, MATIS));
8582   PetscCall(MatSetSizes(t_coarse_mat_is, PETSC_DECIDE, PETSC_DECIDE, pcbddc->coarse_size, pcbddc->coarse_size));
8583   PetscCall(MatISSetAllowRepeated(t_coarse_mat_is, multi_element));
8584   PetscCall(MatSetLocalToGlobalMapping(t_coarse_mat_is, coarse_islg, coarse_islg));
8585   PetscCall(MatISSetLocalMat(t_coarse_mat_is, coarse_submat));
8586   PetscCall(MatAssemblyBegin(t_coarse_mat_is, MAT_FINAL_ASSEMBLY));
8587   PetscCall(MatAssemblyEnd(t_coarse_mat_is, MAT_FINAL_ASSEMBLY));
8588   PetscCall(MatViewFromOptions(t_coarse_mat_is, (PetscObject)pc, "-pc_bddc_coarse_mat_is_view"));
8589 
8590   /* count "active" (i.e. with positive local size) and "void" processes */
8591   im_active = !!pcis->n;
8592   PetscCallMPI(MPIU_Allreduce(&im_active, &active_procs, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)pc)));
8593 
8594   /* determine number of processes partecipating to coarse solver and compute subassembling pattern */
8595   /* restr : whether we want to exclude senders (which are not receivers) from the subassembling pattern */
8596   /* full_restr : just use the receivers from the subassembling pattern */
8597   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)pc), &size));
8598   coarse_mat_is        = NULL;
8599   multilevel_allowed   = PETSC_FALSE;
8600   multilevel_requested = PETSC_FALSE;
8601   coarse_eqs_per_proc  = PetscMin(PetscMax(pcbddc->coarse_size, 1), pcbddc->coarse_eqs_per_proc);
8602   if (coarse_eqs_per_proc < 0 || size == 1) coarse_eqs_per_proc = PetscMax(pcbddc->coarse_size, 1);
8603   if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE;
8604   if (pcbddc->coarse_size <= pcbddc->coarse_eqs_limit) multilevel_requested = PETSC_FALSE;
8605   coarsening_ratio = multi_element ? 1 : pcbddc->coarsening_ratio;
8606   if (multilevel_requested) {
8607     ncoarse    = active_procs / coarsening_ratio;
8608     restr      = PETSC_FALSE;
8609     full_restr = PETSC_FALSE;
8610   } else {
8611     ncoarse    = pcbddc->coarse_size / coarse_eqs_per_proc + !!(pcbddc->coarse_size % coarse_eqs_per_proc);
8612     restr      = PETSC_TRUE;
8613     full_restr = PETSC_TRUE;
8614   }
8615   if (!pcbddc->coarse_size || (size == 1 && !multi_element)) multilevel_allowed = multilevel_requested = restr = full_restr = PETSC_FALSE;
8616   ncoarse = PetscMax(1, ncoarse);
8617   if (!pcbddc->coarse_subassembling) {
8618     if (coarsening_ratio > 1) {
8619       if (multilevel_requested) {
8620         PetscCall(PCBDDCMatISGetSubassemblingPattern(pc->pmat, &ncoarse, pcbddc->coarse_adj_red, &pcbddc->coarse_subassembling, &have_void));
8621       } else {
8622         PetscCall(PCBDDCMatISGetSubassemblingPattern(t_coarse_mat_is, &ncoarse, pcbddc->coarse_adj_red, &pcbddc->coarse_subassembling, &have_void));
8623       }
8624     } else {
8625       PetscMPIInt rank;
8626 
8627       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)pc), &rank));
8628       have_void = (active_procs == size) ? PETSC_FALSE : PETSC_TRUE;
8629       PetscCall(ISCreateStride(PetscObjectComm((PetscObject)pc), 1, rank, 1, &pcbddc->coarse_subassembling));
8630       PetscCall(PetscObjectSetName((PetscObject)pcbddc->coarse_subassembling, "default subassembling"));
8631     }
8632   } else { /* if a subassembling pattern exists, then we can reuse the coarse ksp and compute the number of process involved */
8633     PetscInt psum;
8634     if (pcbddc->coarse_ksp) psum = 1;
8635     else psum = 0;
8636     PetscCallMPI(MPIU_Allreduce(&psum, &ncoarse, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)pc)));
8637     have_void = ncoarse < size ? PETSC_TRUE : PETSC_FALSE;
8638   }
8639   /* determine if we can go multilevel */
8640   if (multilevel_requested) {
8641     if (ncoarse > 1) multilevel_allowed = PETSC_TRUE; /* found enough processes */
8642     else restr = full_restr = PETSC_TRUE;             /* 1 subdomain, use a direct solver */
8643   }
8644   if (multilevel_allowed && have_void) restr = PETSC_TRUE;
8645 
8646   /* dump subassembling pattern */
8647   if (pcbddc->dbg_flag && multilevel_allowed) PetscCall(ISView(pcbddc->coarse_subassembling, pcbddc->dbg_viewer));
8648   /* compute dofs splitting and neumann boundaries for coarse dofs */
8649   nedcfield = -1;
8650   corners   = NULL;
8651   if (multilevel_allowed && !coarse_reuse && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal || pcbddc->nedclocal || pcbddc->corner_selected)) { /* protects from unneeded computations */
8652     PetscInt              *tidxs, *tidxs2, nout, tsize, i;
8653     const PetscInt        *idxs;
8654     ISLocalToGlobalMapping tmap;
8655 
8656     /* create map between primal indices (in local representative ordering) and local primal numbering */
8657     PetscCall(ISLocalToGlobalMappingCreate(PETSC_COMM_SELF, 1, pcbddc->local_primal_size, pcbddc->primal_indices_local_idxs, PETSC_COPY_VALUES, &tmap));
8658     /* allocate space for temporary storage */
8659     PetscCall(PetscMalloc1(pcbddc->local_primal_size, &tidxs));
8660     PetscCall(PetscMalloc1(pcbddc->local_primal_size, &tidxs2));
8661     /* allocate for IS array */
8662     nisdofs = pcbddc->n_ISForDofsLocal;
8663     if (pcbddc->nedclocal) {
8664       if (pcbddc->nedfield > -1) {
8665         nedcfield = pcbddc->nedfield;
8666       } else {
8667         nedcfield = 0;
8668         PetscCheck(!nisdofs, PetscObjectComm((PetscObject)pc), PETSC_ERR_PLIB, "This should not happen (%" PetscInt_FMT ")", nisdofs);
8669         nisdofs = 1;
8670       }
8671     }
8672     nisneu  = !!pcbddc->NeumannBoundariesLocal;
8673     nisvert = 0; /* nisvert is not used */
8674     nis     = nisdofs + nisneu + nisvert;
8675     PetscCall(PetscMalloc1(nis, &isarray));
8676     /* dofs splitting */
8677     for (i = 0; i < nisdofs; i++) {
8678       /* PetscCall(ISView(pcbddc->ISForDofsLocal[i],0)); */
8679       if (nedcfield != i) {
8680         PetscCall(ISGetLocalSize(pcbddc->ISForDofsLocal[i], &tsize));
8681         PetscCall(ISGetIndices(pcbddc->ISForDofsLocal[i], &idxs));
8682         PetscCall(ISGlobalToLocalMappingApply(tmap, IS_GTOLM_DROP, tsize, idxs, &nout, tidxs));
8683         PetscCall(ISRestoreIndices(pcbddc->ISForDofsLocal[i], &idxs));
8684       } else {
8685         PetscCall(ISGetLocalSize(pcbddc->nedclocal, &tsize));
8686         PetscCall(ISGetIndices(pcbddc->nedclocal, &idxs));
8687         PetscCall(ISGlobalToLocalMappingApply(tmap, IS_GTOLM_DROP, tsize, idxs, &nout, tidxs));
8688         PetscCheck(tsize == nout, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Failed when mapping coarse nedelec field! %" PetscInt_FMT " != %" PetscInt_FMT, tsize, nout);
8689         PetscCall(ISRestoreIndices(pcbddc->nedclocal, &idxs));
8690       }
8691       PetscCall(ISLocalToGlobalMappingApply(coarse_islg, nout, tidxs, tidxs2));
8692       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), nout, tidxs2, PETSC_COPY_VALUES, &isarray[i]));
8693       /* PetscCall(ISView(isarray[i],0)); */
8694     }
8695     /* neumann boundaries */
8696     if (pcbddc->NeumannBoundariesLocal) {
8697       /* PetscCall(ISView(pcbddc->NeumannBoundariesLocal,0)); */
8698       PetscCall(ISGetLocalSize(pcbddc->NeumannBoundariesLocal, &tsize));
8699       PetscCall(ISGetIndices(pcbddc->NeumannBoundariesLocal, &idxs));
8700       PetscCall(ISGlobalToLocalMappingApply(tmap, IS_GTOLM_DROP, tsize, idxs, &nout, tidxs));
8701       PetscCall(ISRestoreIndices(pcbddc->NeumannBoundariesLocal, &idxs));
8702       PetscCall(ISLocalToGlobalMappingApply(coarse_islg, nout, tidxs, tidxs2));
8703       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), nout, tidxs2, PETSC_COPY_VALUES, &isarray[nisdofs]));
8704       /* PetscCall(ISView(isarray[nisdofs],0)); */
8705     }
8706     /* coordinates */
8707     if (pcbddc->corner_selected) {
8708       PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &corners));
8709       PetscCall(ISGetLocalSize(corners, &tsize));
8710       PetscCall(ISGetIndices(corners, &idxs));
8711       PetscCall(ISGlobalToLocalMappingApply(tmap, IS_GTOLM_DROP, tsize, idxs, &nout, tidxs));
8712       PetscCheck(tsize == nout, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Failed when mapping corners! %" PetscInt_FMT " != %" PetscInt_FMT, tsize, nout);
8713       PetscCall(ISRestoreIndices(corners, &idxs));
8714       PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &corners));
8715       PetscCall(ISLocalToGlobalMappingApply(coarse_islg, nout, tidxs, tidxs2));
8716       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), nout, tidxs2, PETSC_COPY_VALUES, &corners));
8717     }
8718     PetscCall(PetscFree(tidxs));
8719     PetscCall(PetscFree(tidxs2));
8720     PetscCall(ISLocalToGlobalMappingDestroy(&tmap));
8721   } else {
8722     nis     = 0;
8723     nisdofs = 0;
8724     nisneu  = 0;
8725     nisvert = 0;
8726     isarray = NULL;
8727   }
8728   /* destroy no longer needed map */
8729   PetscCall(ISLocalToGlobalMappingDestroy(&coarse_islg));
8730 
8731   /* subassemble */
8732   if (multilevel_allowed) {
8733     Vec       vp[1];
8734     PetscInt  nvecs = 0;
8735     PetscBool reuse;
8736 
8737     vp[0] = NULL;
8738     /* XXX HDIV also */
8739     if (pcbddc->benign_have_null) { /* propagate no-net-flux quadrature to coarser level */
8740       PetscCall(VecCreate(PetscObjectComm((PetscObject)pc), &vp[0]));
8741       PetscCall(VecSetSizes(vp[0], pcbddc->local_primal_size, PETSC_DECIDE));
8742       PetscCall(VecSetType(vp[0], VECSTANDARD));
8743       nvecs = 1;
8744 
8745       if (pcbddc->divudotp) {
8746         Mat      B, loc_divudotp;
8747         Vec      v, p;
8748         IS       dummy;
8749         PetscInt np;
8750 
8751         PetscCall(MatISGetLocalMat(pcbddc->divudotp, &loc_divudotp));
8752         PetscCall(MatGetSize(loc_divudotp, &np, NULL));
8753         PetscCall(ISCreateStride(PETSC_COMM_SELF, np, 0, 1, &dummy));
8754         PetscCall(MatCreateSubMatrix(loc_divudotp, dummy, pcis->is_B_local, MAT_INITIAL_MATRIX, &B));
8755         PetscCall(MatCreateVecs(B, &v, &p));
8756         PetscCall(VecSet(p, 1.));
8757         PetscCall(MatMultTranspose(B, p, v));
8758         PetscCall(VecDestroy(&p));
8759         PetscCall(MatDestroy(&B));
8760         PetscCall(VecGetArray(vp[0], &array));
8761         PetscCall(VecPlaceArray(pcbddc->vec1_P, array));
8762         PetscCall(MatMultTranspose(pcbddc->coarse_phi_B, v, pcbddc->vec1_P));
8763         PetscCall(VecResetArray(pcbddc->vec1_P));
8764         PetscCall(VecRestoreArray(vp[0], &array));
8765         PetscCall(ISDestroy(&dummy));
8766         PetscCall(VecDestroy(&v));
8767       }
8768     }
8769     if (coarse_mat) reuse = PETSC_TRUE;
8770     else reuse = PETSC_FALSE;
8771     if (multi_element) {
8772       PetscCall(PetscObjectReference((PetscObject)t_coarse_mat_is));
8773       coarse_mat_is = t_coarse_mat_is;
8774     } else {
8775       PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &reuse, 1, MPI_C_BOOL, MPI_LOR, PetscObjectComm((PetscObject)pc)));
8776       if (reuse) {
8777         PetscCall(PCBDDCMatISSubassemble(t_coarse_mat_is, pcbddc->coarse_subassembling, 0, restr, full_restr, PETSC_TRUE, &coarse_mat, nis, isarray, nvecs, vp));
8778       } else {
8779         PetscCall(PCBDDCMatISSubassemble(t_coarse_mat_is, pcbddc->coarse_subassembling, 0, restr, full_restr, PETSC_FALSE, &coarse_mat_is, nis, isarray, nvecs, vp));
8780       }
8781       if (vp[0]) { /* vp[0] could have been placed on a different set of processes */
8782         PetscScalar       *arraym;
8783         const PetscScalar *arrayv;
8784         PetscInt           nl;
8785         PetscCall(VecGetLocalSize(vp[0], &nl));
8786         PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, 1, nl, NULL, &coarsedivudotp));
8787         PetscCall(MatDenseGetArray(coarsedivudotp, &arraym));
8788         PetscCall(VecGetArrayRead(vp[0], &arrayv));
8789         PetscCall(PetscArraycpy(arraym, arrayv, nl));
8790         PetscCall(VecRestoreArrayRead(vp[0], &arrayv));
8791         PetscCall(MatDenseRestoreArray(coarsedivudotp, &arraym));
8792         PetscCall(VecDestroy(&vp[0]));
8793       } else {
8794         PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, 0, 0, 1, NULL, &coarsedivudotp));
8795       }
8796     }
8797   } else {
8798     PetscBool default_sub;
8799 
8800     PetscCall(PetscStrcmp(((PetscObject)pcbddc->coarse_subassembling)->name, "default subassembling", &default_sub));
8801     if (!default_sub) PetscCall(PCBDDCMatISSubassemble(t_coarse_mat_is, pcbddc->coarse_subassembling, 0, restr, full_restr, PETSC_FALSE, &coarse_mat_is, 0, NULL, 0, NULL));
8802     else {
8803       PetscCall(PetscObjectReference((PetscObject)t_coarse_mat_is));
8804       coarse_mat_is = t_coarse_mat_is;
8805     }
8806   }
8807   if (coarse_mat_is || coarse_mat) {
8808     if (!multilevel_allowed) {
8809       PetscCall(MatConvert(coarse_mat_is, MATAIJ, coarse_mat_reuse, &coarse_mat));
8810     } else {
8811       /* if this matrix is present, it means we are not reusing the coarse matrix */
8812       if (coarse_mat_is) {
8813         PetscCheck(!coarse_mat, PetscObjectComm((PetscObject)coarse_mat_is), PETSC_ERR_PLIB, "This should not happen");
8814         PetscCall(PetscObjectReference((PetscObject)coarse_mat_is));
8815         coarse_mat = coarse_mat_is;
8816       }
8817     }
8818   }
8819   PetscCall(MatDestroy(&t_coarse_mat_is));
8820   PetscCall(MatDestroy(&coarse_mat_is));
8821 
8822   /* create local to global scatters for coarse problem */
8823   if (compute_vecs) {
8824     PetscInt lrows;
8825     PetscCall(VecDestroy(&pcbddc->coarse_vec));
8826     if (coarse_mat) {
8827       PetscCall(MatGetLocalSize(coarse_mat, &lrows, NULL));
8828     } else {
8829       lrows = 0;
8830     }
8831     PetscCall(VecCreate(PetscObjectComm((PetscObject)pc), &pcbddc->coarse_vec));
8832     PetscCall(VecSetSizes(pcbddc->coarse_vec, lrows, PETSC_DECIDE));
8833     PetscCall(VecSetType(pcbddc->coarse_vec, coarse_mat ? coarse_mat->defaultvectype : VECSTANDARD));
8834     PetscCall(VecScatterDestroy(&pcbddc->coarse_loc_to_glob));
8835     PetscCall(VecScatterCreate(pcbddc->vec1_P, NULL, pcbddc->coarse_vec, coarse_is, &pcbddc->coarse_loc_to_glob));
8836   }
8837   PetscCall(ISDestroy(&coarse_is));
8838 
8839   /* set defaults for coarse KSP and PC */
8840   if (multilevel_allowed) {
8841     coarse_ksp_type = KSPRICHARDSON;
8842     coarse_pc_type  = PCBDDC;
8843   } else {
8844     coarse_ksp_type = KSPPREONLY;
8845     coarse_pc_type  = PCREDUNDANT;
8846   }
8847 
8848   /* print some info if requested */
8849   if (pcbddc->dbg_flag) {
8850     if (!multilevel_allowed) {
8851       PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n"));
8852       if (multilevel_requested) {
8853         PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Not enough active processes on level %" PetscInt_FMT " (active processes %" PetscInt_FMT ", coarsening ratio %" PetscInt_FMT ")\n", pcbddc->current_level, active_procs, coarsening_ratio));
8854       } else if (pcbddc->max_levels) {
8855         PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Maximum number of requested levels reached (%" PetscInt_FMT ")\n", pcbddc->max_levels));
8856       }
8857       PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
8858     }
8859   }
8860 
8861   /* communicate coarse discrete gradient */
8862   coarseG = NULL;
8863   if (pcbddc->nedcG && multilevel_allowed) {
8864     MPI_Comm ccomm;
8865     if (coarse_mat) {
8866       ccomm = PetscObjectComm((PetscObject)coarse_mat);
8867     } else {
8868       ccomm = MPI_COMM_NULL;
8869     }
8870     PetscCall(MatMPIAIJRestrict(pcbddc->nedcG, ccomm, &coarseG));
8871   }
8872 
8873   /* create the coarse KSP object only once with defaults */
8874   if (coarse_mat) {
8875     PetscBool   isredundant, isbddc, force, valid;
8876     PetscViewer dbg_viewer = NULL;
8877     PetscBool   isset, issym, isher, isspd;
8878 
8879     if (pcbddc->dbg_flag) {
8880       dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat));
8881       PetscCall(PetscViewerASCIIAddTab(dbg_viewer, 2 * pcbddc->current_level));
8882     }
8883     if (!pcbddc->coarse_ksp) {
8884       char   prefix[256], str_level[16];
8885       size_t len;
8886 
8887       PetscCall(KSPCreate(PetscObjectComm((PetscObject)coarse_mat), &pcbddc->coarse_ksp));
8888       PetscCall(KSPSetNestLevel(pcbddc->coarse_ksp, pc->kspnestlevel));
8889       PetscCall(KSPSetErrorIfNotConverged(pcbddc->coarse_ksp, pc->erroriffailure));
8890       PetscCall(PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp, (PetscObject)pc, 1));
8891       PetscCall(KSPSetTolerances(pcbddc->coarse_ksp, PETSC_CURRENT, PETSC_CURRENT, PETSC_CURRENT, 1));
8892       PetscCall(KSPSetOperators(pcbddc->coarse_ksp, coarse_mat, coarse_mat));
8893       PetscCall(KSPSetType(pcbddc->coarse_ksp, coarse_ksp_type));
8894       PetscCall(KSPSetNormType(pcbddc->coarse_ksp, KSP_NORM_NONE));
8895       PetscCall(KSPGetPC(pcbddc->coarse_ksp, &pc_temp));
8896       /* TODO is this logic correct? should check for coarse_mat type */
8897       PetscCall(PCSetType(pc_temp, coarse_pc_type));
8898       /* prefix */
8899       PetscCall(PetscStrncpy(prefix, "", sizeof(prefix)));
8900       PetscCall(PetscStrncpy(str_level, "", sizeof(str_level)));
8901       if (!pcbddc->current_level) {
8902         PetscCall(PetscStrncpy(prefix, ((PetscObject)pc)->prefix, sizeof(prefix)));
8903         PetscCall(PetscStrlcat(prefix, "pc_bddc_coarse_", sizeof(prefix)));
8904       } else {
8905         PetscCall(PetscStrlen(((PetscObject)pc)->prefix, &len));
8906         if (pcbddc->current_level > 1) len -= 3;  /* remove "lX_" with X level number */
8907         if (pcbddc->current_level > 10) len -= 1; /* remove another char from level number */
8908         /* Nonstandard use of PetscStrncpy() to copy only a portion of the string */
8909         PetscCall(PetscStrncpy(prefix, ((PetscObject)pc)->prefix, len + 1));
8910         PetscCall(PetscSNPrintf(str_level, sizeof(str_level), "l%" PetscInt_FMT "_", pcbddc->current_level));
8911         PetscCall(PetscStrlcat(prefix, str_level, sizeof(prefix)));
8912       }
8913       PetscCall(KSPSetOptionsPrefix(pcbddc->coarse_ksp, prefix));
8914       /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
8915       PetscCall(PCBDDCSetLevel(pc_temp, pcbddc->current_level + 1));
8916       PetscCall(PCBDDCSetCoarseningRatio(pc_temp, pcbddc->coarsening_ratio));
8917       PetscCall(PCBDDCSetLevels(pc_temp, pcbddc->max_levels));
8918       /* allow user customization */
8919       PetscCall(KSPSetFromOptions(pcbddc->coarse_ksp));
8920       /* get some info after set from options */
8921       PetscCall(KSPGetPC(pcbddc->coarse_ksp, &pc_temp));
8922       /* multilevel cannot be done with coarse PC different from BDDC, NN, HPDDM, unless forced to */
8923       force = PETSC_FALSE;
8924       PetscCall(PetscOptionsGetBool(NULL, ((PetscObject)pc_temp)->prefix, "-pc_type_forced", &force, NULL));
8925       PetscCall(PetscObjectTypeCompareAny((PetscObject)pc_temp, &valid, PCBDDC, PCNN, PCHPDDM, ""));
8926       PetscCall(PetscObjectTypeCompare((PetscObject)pc_temp, PCBDDC, &isbddc));
8927       if (multilevel_allowed && !force && !valid) {
8928         isbddc = PETSC_TRUE;
8929         PetscCall(PCSetType(pc_temp, PCBDDC));
8930         PetscCall(PCBDDCSetLevel(pc_temp, pcbddc->current_level + 1));
8931         PetscCall(PCBDDCSetCoarseningRatio(pc_temp, pcbddc->coarsening_ratio));
8932         PetscCall(PCBDDCSetLevels(pc_temp, pcbddc->max_levels));
8933         if (pc_temp->ops->setfromoptions) { /* need to setfromoptions again, skipping the pc_type */
8934           PetscObjectOptionsBegin((PetscObject)pc_temp);
8935           PetscCall((*pc_temp->ops->setfromoptions)(pc_temp, PetscOptionsObject));
8936           PetscCall(PetscObjectProcessOptionsHandlers((PetscObject)pc_temp, PetscOptionsObject));
8937           PetscOptionsEnd();
8938           pc_temp->setfromoptionscalled++;
8939         }
8940       }
8941     }
8942     /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
8943     PetscCall(KSPGetPC(pcbddc->coarse_ksp, &pc_temp));
8944     if (nisdofs) {
8945       PetscCall(PCBDDCSetDofsSplitting(pc_temp, nisdofs, isarray));
8946       for (i = 0; i < nisdofs; i++) PetscCall(ISDestroy(&isarray[i]));
8947     }
8948     if (nisneu) {
8949       PetscCall(PCBDDCSetNeumannBoundaries(pc_temp, isarray[nisdofs]));
8950       PetscCall(ISDestroy(&isarray[nisdofs]));
8951     }
8952     if (nisvert) {
8953       PetscCall(PCBDDCSetPrimalVerticesIS(pc_temp, isarray[nis - 1]));
8954       PetscCall(ISDestroy(&isarray[nis - 1]));
8955     }
8956     if (coarseG) PetscCall(PCBDDCSetDiscreteGradient(pc_temp, coarseG, 1, nedcfield, PETSC_FALSE, PETSC_TRUE));
8957 
8958     /* get some info after set from options */
8959     PetscCall(PetscObjectTypeCompare((PetscObject)pc_temp, PCBDDC, &isbddc));
8960 
8961     /* multilevel can only be requested via -pc_bddc_levels or PCBDDCSetLevels */
8962     if (isbddc && !multilevel_allowed) PetscCall(PCSetType(pc_temp, coarse_pc_type));
8963     /* multilevel cannot be done with coarse PC different from BDDC, NN, HPDDM, unless forced to */
8964     force = PETSC_FALSE;
8965     PetscCall(PetscOptionsGetBool(NULL, ((PetscObject)pc_temp)->prefix, "-pc_type_forced", &force, NULL));
8966     PetscCall(PetscObjectTypeCompareAny((PetscObject)pc_temp, &valid, PCBDDC, PCNN, PCHPDDM, ""));
8967     if (multilevel_requested && multilevel_allowed && !valid && !force) PetscCall(PCSetType(pc_temp, PCBDDC));
8968     PetscCall(PetscObjectTypeCompare((PetscObject)pc_temp, PCREDUNDANT, &isredundant));
8969     if (isredundant) {
8970       KSP inner_ksp;
8971       PC  inner_pc;
8972 
8973       PetscCall(PCRedundantGetKSP(pc_temp, &inner_ksp));
8974       PetscCall(KSPGetPC(inner_ksp, &inner_pc));
8975     }
8976 
8977     /* parameters which miss an API */
8978     PetscCall(PetscObjectTypeCompare((PetscObject)pc_temp, PCBDDC, &isbddc));
8979     if (isbddc) {
8980       PC_BDDC *pcbddc_coarse = (PC_BDDC *)pc_temp->data;
8981 
8982       pcbddc_coarse->detect_disconnected = PETSC_TRUE;
8983       pcbddc_coarse->coarse_eqs_per_proc = pcbddc->coarse_eqs_per_proc;
8984       pcbddc_coarse->coarse_eqs_limit    = pcbddc->coarse_eqs_limit;
8985       pcbddc_coarse->benign_saddle_point = pcbddc->benign_have_null;
8986       if (pcbddc_coarse->benign_saddle_point) {
8987         Mat                    coarsedivudotp_is;
8988         ISLocalToGlobalMapping l2gmap, rl2g, cl2g;
8989         IS                     row, col;
8990         const PetscInt        *gidxs;
8991         PetscInt               n, st, M, N;
8992 
8993         PetscCall(MatGetSize(coarsedivudotp, &n, NULL));
8994         PetscCallMPI(MPI_Scan(&n, &st, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)coarse_mat)));
8995         st = st - n;
8996         PetscCall(ISCreateStride(PetscObjectComm((PetscObject)coarse_mat), 1, st, 1, &row));
8997         PetscCall(MatISGetLocalToGlobalMapping(coarse_mat, &l2gmap, NULL));
8998         PetscCall(ISLocalToGlobalMappingGetSize(l2gmap, &n));
8999         PetscCall(ISLocalToGlobalMappingGetIndices(l2gmap, &gidxs));
9000         PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)coarse_mat), n, gidxs, PETSC_COPY_VALUES, &col));
9001         PetscCall(ISLocalToGlobalMappingRestoreIndices(l2gmap, &gidxs));
9002         PetscCall(ISLocalToGlobalMappingCreateIS(row, &rl2g));
9003         PetscCall(ISLocalToGlobalMappingCreateIS(col, &cl2g));
9004         PetscCall(ISGetSize(row, &M));
9005         PetscCall(MatGetSize(coarse_mat, &N, NULL));
9006         PetscCall(ISDestroy(&row));
9007         PetscCall(ISDestroy(&col));
9008         PetscCall(MatCreate(PetscObjectComm((PetscObject)coarse_mat), &coarsedivudotp_is));
9009         PetscCall(MatSetType(coarsedivudotp_is, MATIS));
9010         PetscCall(MatSetSizes(coarsedivudotp_is, PETSC_DECIDE, PETSC_DECIDE, M, N));
9011         PetscCall(MatSetLocalToGlobalMapping(coarsedivudotp_is, rl2g, cl2g));
9012         PetscCall(ISLocalToGlobalMappingDestroy(&rl2g));
9013         PetscCall(ISLocalToGlobalMappingDestroy(&cl2g));
9014         PetscCall(MatISSetLocalMat(coarsedivudotp_is, coarsedivudotp));
9015         PetscCall(MatDestroy(&coarsedivudotp));
9016         PetscCall(PCBDDCSetDivergenceMat(pc_temp, coarsedivudotp_is, PETSC_FALSE, NULL));
9017         PetscCall(MatDestroy(&coarsedivudotp_is));
9018         pcbddc_coarse->adaptive_userdefined = PETSC_TRUE;
9019         if (pcbddc->adaptive_threshold[0] == 0.0) pcbddc_coarse->deluxe_zerorows = PETSC_TRUE;
9020       }
9021     }
9022 
9023     /* propagate symmetry info of coarse matrix */
9024     PetscCall(MatSetOption(coarse_mat, MAT_STRUCTURALLY_SYMMETRIC, PETSC_TRUE));
9025     PetscCall(MatIsSymmetricKnown(pc->pmat, &isset, &issym));
9026     if (isset) PetscCall(MatSetOption(coarse_mat, MAT_SYMMETRIC, issym));
9027     PetscCall(MatIsHermitianKnown(pc->pmat, &isset, &isher));
9028     if (isset) PetscCall(MatSetOption(coarse_mat, MAT_HERMITIAN, isher));
9029     PetscCall(MatIsSPDKnown(pc->pmat, &isset, &isspd));
9030     if (isset) PetscCall(MatSetOption(coarse_mat, MAT_SPD, isspd));
9031 
9032     if (pcbddc->benign_saddle_point && !pcbddc->benign_have_null) PetscCall(MatSetOption(coarse_mat, MAT_SPD, PETSC_TRUE));
9033     /* set operators */
9034     PetscCall(MatViewFromOptions(coarse_mat, (PetscObject)pc, "-pc_bddc_coarse_mat_view"));
9035     PetscCall(MatSetOptionsPrefix(coarse_mat, ((PetscObject)pcbddc->coarse_ksp)->prefix));
9036     PetscCall(KSPSetOperators(pcbddc->coarse_ksp, coarse_mat, coarse_mat));
9037     if (pcbddc->dbg_flag) PetscCall(PetscViewerASCIISubtractTab(dbg_viewer, 2 * pcbddc->current_level));
9038   }
9039   PetscCall(MatDestroy(&coarseG));
9040   PetscCall(PetscFree(isarray));
9041 #if 0
9042   {
9043     PetscViewer viewer;
9044     char filename[256];
9045     PetscCall(PetscSNPrintf(filename, PETSC_STATIC_ARRAY_LENGTH(filename), "coarse_mat_level%d.m",pcbddc->current_level));
9046     PetscCall(PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer));
9047     PetscCall(PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB));
9048     PetscCall(MatView(coarse_mat,viewer));
9049     PetscCall(PetscViewerPopFormat(viewer));
9050     PetscCall(PetscViewerDestroy(&viewer));
9051   }
9052 #endif
9053 
9054   if (corners) {
9055     Vec             gv;
9056     IS              is;
9057     const PetscInt *idxs;
9058     PetscInt        i, d, N, n, cdim = pcbddc->mat_graph->cdim;
9059     PetscScalar    *coords;
9060 
9061     PetscCheck(pcbddc->mat_graph->cloc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing local coordinates");
9062     PetscCall(VecGetSize(pcbddc->coarse_vec, &N));
9063     PetscCall(VecGetLocalSize(pcbddc->coarse_vec, &n));
9064     PetscCall(VecCreate(PetscObjectComm((PetscObject)pcbddc->coarse_vec), &gv));
9065     PetscCall(VecSetBlockSize(gv, cdim));
9066     PetscCall(VecSetSizes(gv, n * cdim, N * cdim));
9067     PetscCall(VecSetType(gv, VECSTANDARD));
9068     PetscCall(VecSetFromOptions(gv));
9069     PetscCall(VecSet(gv, PETSC_MAX_REAL)); /* we only propagate coordinates from vertices constraints */
9070 
9071     PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &is));
9072     PetscCall(ISGetLocalSize(is, &n));
9073     PetscCall(ISGetIndices(is, &idxs));
9074     PetscCall(PetscMalloc1(n * cdim, &coords));
9075     for (i = 0; i < n; i++) {
9076       for (d = 0; d < cdim; d++) coords[cdim * i + d] = pcbddc->mat_graph->coords[cdim * idxs[i] + d];
9077     }
9078     PetscCall(ISRestoreIndices(is, &idxs));
9079     PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &is));
9080 
9081     PetscCall(ISGetLocalSize(corners, &n));
9082     PetscCall(ISGetIndices(corners, &idxs));
9083     PetscCall(VecSetValuesBlocked(gv, n, idxs, coords, INSERT_VALUES));
9084     PetscCall(ISRestoreIndices(corners, &idxs));
9085     PetscCall(PetscFree(coords));
9086     PetscCall(VecAssemblyBegin(gv));
9087     PetscCall(VecAssemblyEnd(gv));
9088     PetscCall(VecGetArray(gv, &coords));
9089     if (pcbddc->coarse_ksp) {
9090       PC        coarse_pc;
9091       PetscBool isbddc;
9092 
9093       PetscCall(KSPGetPC(pcbddc->coarse_ksp, &coarse_pc));
9094       PetscCall(PetscObjectTypeCompare((PetscObject)coarse_pc, PCBDDC, &isbddc));
9095       if (isbddc) { /* coarse coordinates have PETSC_MAX_REAL, specific for BDDC */
9096         PetscReal *realcoords;
9097 
9098         PetscCall(VecGetLocalSize(gv, &n));
9099 #if defined(PETSC_USE_COMPLEX)
9100         PetscCall(PetscMalloc1(n, &realcoords));
9101         for (i = 0; i < n; i++) realcoords[i] = PetscRealPart(coords[i]);
9102 #else
9103         realcoords = coords;
9104 #endif
9105         PetscCall(PCSetCoordinates(coarse_pc, cdim, n / cdim, realcoords));
9106 #if defined(PETSC_USE_COMPLEX)
9107         PetscCall(PetscFree(realcoords));
9108 #endif
9109       }
9110     }
9111     PetscCall(VecRestoreArray(gv, &coords));
9112     PetscCall(VecDestroy(&gv));
9113   }
9114   PetscCall(ISDestroy(&corners));
9115 
9116   if (pcbddc->coarse_ksp) {
9117     Vec crhs, csol;
9118 
9119     PetscCall(KSPGetSolution(pcbddc->coarse_ksp, &csol));
9120     PetscCall(KSPGetRhs(pcbddc->coarse_ksp, &crhs));
9121     if (!csol) PetscCall(MatCreateVecs(coarse_mat, &pcbddc->coarse_ksp->vec_sol, NULL));
9122     if (!crhs) PetscCall(MatCreateVecs(coarse_mat, NULL, &pcbddc->coarse_ksp->vec_rhs));
9123   }
9124   PetscCall(MatDestroy(&coarsedivudotp));
9125 
9126   /* compute null space for coarse solver if the benign trick has been requested */
9127   if (pcbddc->benign_null) {
9128     PetscCall(VecSet(pcbddc->vec1_P, 0.));
9129     for (i = 0; i < pcbddc->benign_n; i++) PetscCall(VecSetValue(pcbddc->vec1_P, pcbddc->local_primal_size - pcbddc->benign_n + i, 1.0, INSERT_VALUES));
9130     PetscCall(VecAssemblyBegin(pcbddc->vec1_P));
9131     PetscCall(VecAssemblyEnd(pcbddc->vec1_P));
9132     PetscCall(VecScatterBegin(pcbddc->coarse_loc_to_glob, pcbddc->vec1_P, pcbddc->coarse_vec, INSERT_VALUES, SCATTER_FORWARD));
9133     PetscCall(VecScatterEnd(pcbddc->coarse_loc_to_glob, pcbddc->vec1_P, pcbddc->coarse_vec, INSERT_VALUES, SCATTER_FORWARD));
9134     if (coarse_mat) {
9135       Vec          nullv;
9136       PetscScalar *array, *array2;
9137       PetscInt     nl;
9138 
9139       PetscCall(MatCreateVecs(coarse_mat, &nullv, NULL));
9140       PetscCall(VecGetLocalSize(nullv, &nl));
9141       PetscCall(VecGetArrayRead(pcbddc->coarse_vec, (const PetscScalar **)&array));
9142       PetscCall(VecGetArray(nullv, &array2));
9143       PetscCall(PetscArraycpy(array2, array, nl));
9144       PetscCall(VecRestoreArray(nullv, &array2));
9145       PetscCall(VecRestoreArrayRead(pcbddc->coarse_vec, (const PetscScalar **)&array));
9146       PetscCall(VecNormalize(nullv, NULL));
9147       PetscCall(MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat), PETSC_FALSE, 1, &nullv, &CoarseNullSpace));
9148       PetscCall(VecDestroy(&nullv));
9149     }
9150   }
9151   PetscCall(PetscLogEventEnd(PC_BDDC_CoarseSetUp[pcbddc->current_level], pc, 0, 0, 0));
9152 
9153   PetscCall(PetscLogEventBegin(PC_BDDC_CoarseSolver[pcbddc->current_level], pc, 0, 0, 0));
9154   if (pcbddc->coarse_ksp) {
9155     PetscBool ispreonly;
9156 
9157     if (CoarseNullSpace) {
9158       PetscBool isnull;
9159 
9160       PetscCall(MatNullSpaceTest(CoarseNullSpace, coarse_mat, &isnull));
9161       if (isnull) PetscCall(MatSetNullSpace(coarse_mat, CoarseNullSpace));
9162       /* TODO: add local nullspaces (if any) */
9163     }
9164     /* setup coarse ksp */
9165     PetscCall(KSPSetUp(pcbddc->coarse_ksp));
9166     /* Check coarse problem if in debug mode or if solving with an iterative method */
9167     PetscCall(PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp, KSPPREONLY, &ispreonly));
9168     if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates)) {
9169       KSP         check_ksp;
9170       KSPType     check_ksp_type;
9171       PC          check_pc;
9172       Vec         check_vec, coarse_vec;
9173       PetscReal   abs_infty_error, infty_error, lambda_min = 1.0, lambda_max = 1.0;
9174       PetscInt    its;
9175       PetscBool   compute_eigs;
9176       PetscReal  *eigs_r, *eigs_c;
9177       PetscInt    neigs;
9178       const char *prefix;
9179 
9180       /* Create ksp object suitable for estimation of extreme eigenvalues */
9181       PetscCall(KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp), &check_ksp));
9182       PetscCall(KSPSetNestLevel(check_ksp, pc->kspnestlevel));
9183       PetscCall(PetscObjectIncrementTabLevel((PetscObject)check_ksp, (PetscObject)pcbddc->coarse_ksp, 0));
9184       PetscCall(KSPSetErrorIfNotConverged(pcbddc->coarse_ksp, PETSC_FALSE));
9185       PetscCall(KSPSetOperators(check_ksp, coarse_mat, coarse_mat));
9186       PetscCall(KSPSetTolerances(check_ksp, 1.e-12, 1.e-12, PETSC_CURRENT, pcbddc->coarse_size));
9187       /* prevent from setup unneeded object */
9188       PetscCall(KSPGetPC(check_ksp, &check_pc));
9189       PetscCall(PCSetType(check_pc, PCNONE));
9190       if (ispreonly) {
9191         check_ksp_type = KSPPREONLY;
9192         compute_eigs   = PETSC_FALSE;
9193       } else {
9194         check_ksp_type = KSPGMRES;
9195         compute_eigs   = PETSC_TRUE;
9196       }
9197       PetscCall(KSPSetType(check_ksp, check_ksp_type));
9198       PetscCall(KSPSetComputeSingularValues(check_ksp, compute_eigs));
9199       PetscCall(KSPSetComputeEigenvalues(check_ksp, compute_eigs));
9200       PetscCall(KSPGMRESSetRestart(check_ksp, pcbddc->coarse_size + 1));
9201       PetscCall(KSPGetOptionsPrefix(pcbddc->coarse_ksp, &prefix));
9202       PetscCall(KSPSetOptionsPrefix(check_ksp, prefix));
9203       PetscCall(KSPAppendOptionsPrefix(check_ksp, "check_"));
9204       PetscCall(KSPSetFromOptions(check_ksp));
9205       PetscCall(KSPSetUp(check_ksp));
9206       PetscCall(KSPGetPC(pcbddc->coarse_ksp, &check_pc));
9207       PetscCall(KSPSetPC(check_ksp, check_pc));
9208       /* create random vec */
9209       PetscCall(MatCreateVecs(coarse_mat, &coarse_vec, &check_vec));
9210       PetscCall(VecSetRandom(check_vec, NULL));
9211       PetscCall(MatMult(coarse_mat, check_vec, coarse_vec));
9212       /* solve coarse problem */
9213       PetscCall(KSPSolve(check_ksp, coarse_vec, coarse_vec));
9214       PetscCall(KSPCheckSolve(check_ksp, pc, coarse_vec));
9215       /* set eigenvalue estimation if preonly has not been requested */
9216       if (compute_eigs) {
9217         PetscCall(PetscMalloc1(pcbddc->coarse_size + 1, &eigs_r));
9218         PetscCall(PetscMalloc1(pcbddc->coarse_size + 1, &eigs_c));
9219         PetscCall(KSPComputeEigenvalues(check_ksp, pcbddc->coarse_size + 1, eigs_r, eigs_c, &neigs));
9220         if (neigs) {
9221           lambda_max = eigs_r[neigs - 1];
9222           lambda_min = eigs_r[0];
9223           if (pcbddc->use_coarse_estimates) {
9224             if (lambda_max >= lambda_min) { /* using PETSC_SMALL since lambda_max == lambda_min is not allowed by KSPChebyshevSetEigenvalues */
9225               PetscCall(KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp, lambda_max + PETSC_SMALL, lambda_min));
9226               PetscCall(KSPRichardsonSetScale(pcbddc->coarse_ksp, 2.0 / (lambda_max + lambda_min)));
9227             }
9228           }
9229         }
9230       }
9231 
9232       /* check coarse problem residual error */
9233       if (pcbddc->dbg_flag) {
9234         PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp));
9235         PetscCall(PetscViewerASCIIAddTab(dbg_viewer, 2 * (pcbddc->current_level + 1)));
9236         PetscCall(VecAXPY(check_vec, -1.0, coarse_vec));
9237         PetscCall(VecNorm(check_vec, NORM_INFINITY, &infty_error));
9238         PetscCall(MatMult(coarse_mat, check_vec, coarse_vec));
9239         PetscCall(VecNorm(coarse_vec, NORM_INFINITY, &abs_infty_error));
9240         PetscCall(PetscViewerASCIIPrintf(dbg_viewer, "Coarse problem details (use estimates %d)\n", pcbddc->use_coarse_estimates));
9241         PetscCall(PetscObjectPrintClassNamePrefixType((PetscObject)pcbddc->coarse_ksp, dbg_viewer));
9242         PetscCall(PetscObjectPrintClassNamePrefixType((PetscObject)check_pc, dbg_viewer));
9243         PetscCall(PetscViewerASCIIPrintf(dbg_viewer, "Coarse problem exact infty_error   : %1.6e\n", (double)infty_error));
9244         PetscCall(PetscViewerASCIIPrintf(dbg_viewer, "Coarse problem residual infty_error: %1.6e\n", (double)abs_infty_error));
9245         if (CoarseNullSpace) PetscCall(PetscViewerASCIIPrintf(dbg_viewer, "Coarse problem is singular\n"));
9246         if (compute_eigs) {
9247           PetscReal          lambda_max_s, lambda_min_s;
9248           KSPConvergedReason reason;
9249           PetscCall(KSPGetType(check_ksp, &check_ksp_type));
9250           PetscCall(KSPGetIterationNumber(check_ksp, &its));
9251           PetscCall(KSPGetConvergedReason(check_ksp, &reason));
9252           PetscCall(KSPComputeExtremeSingularValues(check_ksp, &lambda_max_s, &lambda_min_s));
9253           PetscCall(PetscViewerASCIIPrintf(dbg_viewer, "Coarse problem eigenvalues (estimated with %" PetscInt_FMT " iterations of %s, conv reason %d): %1.6e %1.6e (%1.6e %1.6e)\n", its, check_ksp_type, reason, (double)lambda_min, (double)lambda_max, (double)lambda_min_s, (double)lambda_max_s));
9254           for (i = 0; i < neigs; i++) PetscCall(PetscViewerASCIIPrintf(dbg_viewer, "%1.6e %1.6ei\n", (double)eigs_r[i], (double)eigs_c[i]));
9255         }
9256         PetscCall(PetscViewerFlush(dbg_viewer));
9257         PetscCall(PetscViewerASCIISubtractTab(dbg_viewer, 2 * (pcbddc->current_level + 1)));
9258       }
9259       PetscCall(VecDestroy(&check_vec));
9260       PetscCall(VecDestroy(&coarse_vec));
9261       PetscCall(KSPDestroy(&check_ksp));
9262       if (compute_eigs) {
9263         PetscCall(PetscFree(eigs_r));
9264         PetscCall(PetscFree(eigs_c));
9265       }
9266     }
9267   }
9268   PetscCall(MatNullSpaceDestroy(&CoarseNullSpace));
9269   /* print additional info */
9270   if (pcbddc->dbg_flag) {
9271     /* waits until all processes reaches this point */
9272     PetscCall(PetscBarrier((PetscObject)pc));
9273     PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Coarse solver setup completed at level %" PetscInt_FMT "\n", pcbddc->current_level));
9274     PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
9275   }
9276 
9277   /* free memory */
9278   PetscCall(MatDestroy(&coarse_mat));
9279   PetscCall(PetscLogEventEnd(PC_BDDC_CoarseSolver[pcbddc->current_level], pc, 0, 0, 0));
9280   PetscFunctionReturn(PETSC_SUCCESS);
9281 }
9282 
PCBDDCComputePrimalNumbering(PC pc,PetscInt * coarse_size_n,PetscInt ** local_primal_indices_n)9283 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc, PetscInt *coarse_size_n, PetscInt **local_primal_indices_n)
9284 {
9285   PC_BDDC        *pcbddc = (PC_BDDC *)pc->data;
9286   PC_IS          *pcis   = (PC_IS *)pc->data;
9287   IS              subset, subset_mult, subset_n;
9288   PetscInt        local_size, coarse_size = 0;
9289   PetscInt       *local_primal_indices = NULL;
9290   const PetscInt *t_local_primal_indices;
9291 
9292   PetscFunctionBegin;
9293   /* Compute global number of coarse dofs */
9294   PetscCheck(!pcbddc->local_primal_size || pcbddc->local_primal_ref_node, PETSC_COMM_SELF, PETSC_ERR_PLIB, "BDDC ConstraintsSetUp should be called first");
9295   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc->pmat), pcbddc->local_primal_size_cc, pcbddc->local_primal_ref_node, PETSC_COPY_VALUES, &subset_n));
9296   PetscCall(ISLocalToGlobalMappingApplyIS(pcis->mapping, subset_n, &subset));
9297   PetscCall(ISDestroy(&subset_n));
9298   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc->pmat), pcbddc->local_primal_size_cc, pcbddc->local_primal_ref_mult, PETSC_COPY_VALUES, &subset_mult));
9299   PetscCall(ISRenumber(subset, subset_mult, &coarse_size, &subset_n));
9300   PetscCall(ISDestroy(&subset));
9301   PetscCall(ISDestroy(&subset_mult));
9302   PetscCall(ISGetLocalSize(subset_n, &local_size));
9303   PetscCheck(local_size == pcbddc->local_primal_size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Invalid number of local primal indices computed %" PetscInt_FMT " != %" PetscInt_FMT, local_size, pcbddc->local_primal_size);
9304   PetscCall(PetscMalloc1(local_size, &local_primal_indices));
9305   PetscCall(ISGetIndices(subset_n, &t_local_primal_indices));
9306   PetscCall(PetscArraycpy(local_primal_indices, t_local_primal_indices, local_size));
9307   PetscCall(ISRestoreIndices(subset_n, &t_local_primal_indices));
9308   PetscCall(ISDestroy(&subset_n));
9309 
9310   if (pcbddc->dbg_flag) {
9311     PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
9312     PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n"));
9313     PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Size of coarse problem is %" PetscInt_FMT "\n", coarse_size));
9314     PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
9315   }
9316 
9317   /* get back data */
9318   *coarse_size_n          = coarse_size;
9319   *local_primal_indices_n = local_primal_indices;
9320   PetscFunctionReturn(PETSC_SUCCESS);
9321 }
9322 
PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork,Vec lwork,IS globalis,IS * localis)9323 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx, Vec gwork, Vec lwork, IS globalis, IS *localis)
9324 {
9325   IS           localis_t;
9326   PetscInt     i, lsize, *idxs, n;
9327   PetscScalar *vals;
9328 
9329   PetscFunctionBegin;
9330   /* get indices in local ordering exploiting local to global map */
9331   PetscCall(ISGetLocalSize(globalis, &lsize));
9332   PetscCall(PetscMalloc1(lsize, &vals));
9333   for (i = 0; i < lsize; i++) vals[i] = 1.0;
9334   PetscCall(ISGetIndices(globalis, (const PetscInt **)&idxs));
9335   PetscCall(VecSet(gwork, 0.0));
9336   PetscCall(VecSet(lwork, 0.0));
9337   if (idxs) { /* multilevel guard */
9338     PetscCall(VecSetOption(gwork, VEC_IGNORE_NEGATIVE_INDICES, PETSC_TRUE));
9339     PetscCall(VecSetValues(gwork, lsize, idxs, vals, INSERT_VALUES));
9340   }
9341   PetscCall(VecAssemblyBegin(gwork));
9342   PetscCall(ISRestoreIndices(globalis, (const PetscInt **)&idxs));
9343   PetscCall(PetscFree(vals));
9344   PetscCall(VecAssemblyEnd(gwork));
9345   /* now compute set in local ordering */
9346   PetscCall(VecScatterBegin(g2l_ctx, gwork, lwork, INSERT_VALUES, SCATTER_FORWARD));
9347   PetscCall(VecScatterEnd(g2l_ctx, gwork, lwork, INSERT_VALUES, SCATTER_FORWARD));
9348   PetscCall(VecGetArrayRead(lwork, (const PetscScalar **)&vals));
9349   PetscCall(VecGetSize(lwork, &n));
9350   for (i = 0, lsize = 0; i < n; i++) {
9351     if (PetscRealPart(vals[i]) > 0.5) lsize++;
9352   }
9353   PetscCall(PetscMalloc1(lsize, &idxs));
9354   for (i = 0, lsize = 0; i < n; i++) {
9355     if (PetscRealPart(vals[i]) > 0.5) idxs[lsize++] = i;
9356   }
9357   PetscCall(VecRestoreArrayRead(lwork, (const PetscScalar **)&vals));
9358   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)gwork), lsize, idxs, PETSC_OWN_POINTER, &localis_t));
9359   *localis = localis_t;
9360   PetscFunctionReturn(PETSC_SUCCESS);
9361 }
9362 
PCBDDCComputeFakeChange(PC pc,PetscBool constraints,PCBDDCGraph graph,PCBDDCSubSchurs schurs,Mat * change,IS * change_primal,IS * change_primal_mult,PetscBool * change_with_qr)9363 PetscErrorCode PCBDDCComputeFakeChange(PC pc, PetscBool constraints, PCBDDCGraph graph, PCBDDCSubSchurs schurs, Mat *change, IS *change_primal, IS *change_primal_mult, PetscBool *change_with_qr)
9364 {
9365   PC_IS   *pcis   = (PC_IS *)pc->data;
9366   PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
9367   PC_IS   *pcisf;
9368   PC_BDDC *pcbddcf;
9369   PC       pcf;
9370 
9371   PetscFunctionBegin;
9372   PetscCall(PCCreate(PetscObjectComm((PetscObject)pc), &pcf));
9373   PetscCall(PCSetOperators(pcf, pc->mat, pc->pmat));
9374   PetscCall(PCSetType(pcf, PCBDDC));
9375 
9376   pcisf   = (PC_IS *)pcf->data;
9377   pcbddcf = (PC_BDDC *)pcf->data;
9378 
9379   pcisf->is_B_local = pcis->is_B_local;
9380   pcisf->vec1_N     = pcis->vec1_N;
9381   pcisf->BtoNmap    = pcis->BtoNmap;
9382   pcisf->n          = pcis->n;
9383   pcisf->n_B        = pcis->n_B;
9384 
9385   PetscCall(PetscFree(pcbddcf->mat_graph));
9386   PetscCall(PetscFree(pcbddcf->sub_schurs));
9387   pcbddcf->mat_graph             = graph ? graph : pcbddc->mat_graph;
9388   pcbddcf->sub_schurs            = schurs;
9389   pcbddcf->adaptive_selection    = schurs ? PETSC_TRUE : PETSC_FALSE;
9390   pcbddcf->adaptive_threshold[0] = pcbddc->adaptive_threshold[0];
9391   pcbddcf->adaptive_threshold[1] = pcbddc->adaptive_threshold[1];
9392   pcbddcf->adaptive_nmin         = pcbddc->adaptive_nmin;
9393   pcbddcf->adaptive_nmax         = pcbddc->adaptive_nmax;
9394   pcbddcf->use_faces             = PETSC_TRUE;
9395   pcbddcf->use_change_of_basis   = (PetscBool)!constraints;
9396   pcbddcf->use_change_on_faces   = (PetscBool)!constraints;
9397   pcbddcf->use_qr_single         = (PetscBool)!constraints;
9398   pcbddcf->fake_change           = PETSC_TRUE;
9399   pcbddcf->dbg_flag              = pcbddc->dbg_flag;
9400 
9401   PetscCall(PCBDDCAdaptiveSelection(pcf));
9402   PetscCall(PCBDDCConstraintsSetUp(pcf));
9403 
9404   *change = pcbddcf->ConstraintMatrix;
9405   if (change_primal) PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc->pmat), pcbddcf->local_primal_size_cc, pcbddcf->local_primal_ref_node, PETSC_COPY_VALUES, change_primal));
9406   if (change_primal_mult) PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc->pmat), pcbddcf->local_primal_size_cc, pcbddcf->local_primal_ref_mult, PETSC_COPY_VALUES, change_primal_mult));
9407   if (change_with_qr) *change_with_qr = pcbddcf->use_qr_single;
9408 
9409   if (schurs) pcbddcf->sub_schurs = NULL;
9410   pcbddcf->ConstraintMatrix = NULL;
9411   pcbddcf->mat_graph        = NULL;
9412   pcisf->is_B_local         = NULL;
9413   pcisf->vec1_N             = NULL;
9414   pcisf->BtoNmap            = NULL;
9415   PetscCall(PCDestroy(&pcf));
9416   PetscFunctionReturn(PETSC_SUCCESS);
9417 }
9418 
PCBDDCSetUpSubSchurs(PC pc)9419 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc)
9420 {
9421   PC_IS          *pcis       = (PC_IS *)pc->data;
9422   PC_BDDC        *pcbddc     = (PC_BDDC *)pc->data;
9423   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
9424   Mat             S_j;
9425   PetscInt       *used_xadj, *used_adjncy;
9426   PetscBool       free_used_adj;
9427 
9428   PetscFunctionBegin;
9429   PetscCall(PetscLogEventBegin(PC_BDDC_Schurs[pcbddc->current_level], pc, 0, 0, 0));
9430   /* decide the adjacency to be used for determining internal problems for local schur on subsets */
9431   free_used_adj = PETSC_FALSE;
9432   if (pcbddc->sub_schurs_layers == -1) {
9433     used_xadj   = NULL;
9434     used_adjncy = NULL;
9435   } else {
9436     if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) {
9437       used_xadj   = pcbddc->mat_graph->xadj;
9438       used_adjncy = pcbddc->mat_graph->adjncy;
9439     } else if (pcbddc->computed_rowadj) {
9440       used_xadj   = pcbddc->mat_graph->xadj;
9441       used_adjncy = pcbddc->mat_graph->adjncy;
9442     } else {
9443       PetscBool       flg_row = PETSC_FALSE;
9444       const PetscInt *xadj, *adjncy;
9445       PetscInt        nvtxs;
9446 
9447       PetscCall(MatGetRowIJ(pcbddc->local_mat, 0, PETSC_TRUE, PETSC_FALSE, &nvtxs, &xadj, &adjncy, &flg_row));
9448       if (flg_row) {
9449         PetscCall(PetscMalloc2(nvtxs + 1, &used_xadj, xadj[nvtxs], &used_adjncy));
9450         PetscCall(PetscArraycpy(used_xadj, xadj, nvtxs + 1));
9451         PetscCall(PetscArraycpy(used_adjncy, adjncy, xadj[nvtxs]));
9452         free_used_adj = PETSC_TRUE;
9453       } else {
9454         pcbddc->sub_schurs_layers = -1;
9455         used_xadj                 = NULL;
9456         used_adjncy               = NULL;
9457       }
9458       PetscCall(MatRestoreRowIJ(pcbddc->local_mat, 0, PETSC_TRUE, PETSC_FALSE, &nvtxs, &xadj, &adjncy, &flg_row));
9459     }
9460   }
9461 
9462   /* setup sub_schurs data */
9463   PetscCall(MatCreateSchurComplement(pcis->A_II, pcis->pA_II, pcis->A_IB, pcis->A_BI, pcis->A_BB, &S_j));
9464   if (!sub_schurs->schur_explicit) {
9465     /* pcbddc->ksp_D up to date only if not using MatFactor with Schur complement support */
9466     PetscCall(MatSchurComplementSetKSP(S_j, pcbddc->ksp_D));
9467     PetscCall(PCBDDCSubSchursSetUp(sub_schurs, NULL, S_j, PETSC_FALSE, used_xadj, used_adjncy, pcbddc->sub_schurs_layers, NULL, pcbddc->adaptive_selection, PETSC_FALSE, PETSC_FALSE, 0, NULL, NULL, NULL, NULL));
9468   } else {
9469     Mat       change        = NULL;
9470     Vec       scaling       = NULL;
9471     IS        change_primal = NULL, iP;
9472     PetscInt  benign_n;
9473     PetscBool reuse_solvers     = (PetscBool)!pcbddc->use_change_of_basis;
9474     PetscBool need_change       = PETSC_FALSE;
9475     PetscBool discrete_harmonic = PETSC_FALSE;
9476 
9477     if (!pcbddc->use_vertices && reuse_solvers) {
9478       PetscInt n_vertices;
9479 
9480       PetscCall(ISGetLocalSize(sub_schurs->is_vertices, &n_vertices));
9481       reuse_solvers = (PetscBool)!n_vertices;
9482     }
9483     if (!pcbddc->benign_change_explicit) {
9484       benign_n = pcbddc->benign_n;
9485     } else {
9486       benign_n = 0;
9487     }
9488     /* sub_schurs->change is a local object; instead, PCBDDCConstraintsSetUp and the quantities used in the test below are logically collective on pc.
9489        We need a global reduction to avoid possible deadlocks.
9490        We assume that sub_schurs->change is created once, and then reused for different solves, unless the topography has been recomputed */
9491     if (pcbddc->adaptive_userdefined || (pcbddc->deluxe_zerorows && !pcbddc->use_change_of_basis)) {
9492       PetscBool have_loc_change = (PetscBool)(!!sub_schurs->change);
9493       PetscCallMPI(MPIU_Allreduce(&have_loc_change, &need_change, 1, MPI_C_BOOL, MPI_LOR, PetscObjectComm((PetscObject)pc)));
9494       need_change = (PetscBool)(!need_change);
9495     }
9496     /* If the user defines additional constraints, we import them here */
9497     if (need_change) {
9498       PetscCheck(!pcbddc->sub_schurs_rebuild, PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot compute change of basis with a different graph");
9499       PetscCall(PCBDDCComputeFakeChange(pc, PETSC_FALSE, NULL, NULL, &change, &change_primal, NULL, &sub_schurs->change_with_qr));
9500     }
9501     if (!pcbddc->use_deluxe_scaling) scaling = pcis->D;
9502 
9503     PetscCall(PetscObjectQuery((PetscObject)pc, "__KSPFETIDP_iP", (PetscObject *)&iP));
9504     if (iP) {
9505       PetscOptionsBegin(PetscObjectComm((PetscObject)iP), sub_schurs->prefix, "BDDC sub_schurs options", "PC");
9506       PetscCall(PetscOptionsBool("-sub_schurs_discrete_harmonic", NULL, NULL, discrete_harmonic, &discrete_harmonic, NULL));
9507       PetscOptionsEnd();
9508     }
9509     if (discrete_harmonic) {
9510       Mat A;
9511       PetscCall(MatDuplicate(pcbddc->local_mat, MAT_COPY_VALUES, &A));
9512       PetscCall(MatZeroRowsColumnsIS(A, iP, 1.0, NULL, NULL));
9513       PetscCall(PetscObjectCompose((PetscObject)A, "__KSPFETIDP_iP", (PetscObject)iP));
9514       PetscCall(PCBDDCSubSchursSetUp(sub_schurs, A, S_j, pcbddc->sub_schurs_exact_schur, used_xadj, used_adjncy, pcbddc->sub_schurs_layers, scaling, pcbddc->adaptive_selection, reuse_solvers, pcbddc->benign_saddle_point, benign_n, pcbddc->benign_p0_lidx,
9515                                      pcbddc->benign_zerodiag_subs, change, change_primal));
9516       PetscCall(MatDestroy(&A));
9517     } else {
9518       PetscCall(PCBDDCSubSchursSetUp(sub_schurs, pcbddc->local_mat, S_j, pcbddc->sub_schurs_exact_schur, used_xadj, used_adjncy, pcbddc->sub_schurs_layers, scaling, pcbddc->adaptive_selection, reuse_solvers, pcbddc->benign_saddle_point, benign_n,
9519                                      pcbddc->benign_p0_lidx, pcbddc->benign_zerodiag_subs, change, change_primal));
9520     }
9521     PetscCall(MatDestroy(&change));
9522     PetscCall(ISDestroy(&change_primal));
9523   }
9524   PetscCall(MatDestroy(&S_j));
9525 
9526   /* free adjacency */
9527   if (free_used_adj) PetscCall(PetscFree2(used_xadj, used_adjncy));
9528   PetscCall(PetscLogEventEnd(PC_BDDC_Schurs[pcbddc->current_level], pc, 0, 0, 0));
9529   PetscFunctionReturn(PETSC_SUCCESS);
9530 }
9531 
PCBDDCInitSubSchurs(PC pc)9532 PetscErrorCode PCBDDCInitSubSchurs(PC pc)
9533 {
9534   PC_IS      *pcis   = (PC_IS *)pc->data;
9535   PC_BDDC    *pcbddc = (PC_BDDC *)pc->data;
9536   PCBDDCGraph graph;
9537 
9538   PetscFunctionBegin;
9539   /* attach interface graph for determining subsets */
9540   if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */
9541     IS       verticesIS, verticescomm;
9542     PetscInt vsize, *idxs;
9543 
9544     PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &verticesIS));
9545     PetscCall(ISGetSize(verticesIS, &vsize));
9546     PetscCall(ISGetIndices(verticesIS, (const PetscInt **)&idxs));
9547     PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), vsize, idxs, PETSC_COPY_VALUES, &verticescomm));
9548     PetscCall(ISRestoreIndices(verticesIS, (const PetscInt **)&idxs));
9549     PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &verticesIS));
9550     PetscCall(PCBDDCGraphCreate(&graph));
9551     PetscCall(PCBDDCGraphInit(graph, pcbddc->mat_graph->l2gmap, pcbddc->mat_graph->nvtxs_global, pcbddc->graphmaxcount));
9552     PetscCall(PCBDDCGraphSetUp(graph, pcbddc->mat_graph->custom_minimal_size, NULL, pcbddc->DirichletBoundariesLocal, 0, NULL, verticescomm));
9553     PetscCall(ISDestroy(&verticescomm));
9554     PetscCall(PCBDDCGraphComputeConnectedComponents(graph));
9555   } else {
9556     graph = pcbddc->mat_graph;
9557   }
9558   /* print some info */
9559   if (pcbddc->dbg_flag && !pcbddc->sub_schurs_rebuild) {
9560     IS       vertices;
9561     PetscInt nv, nedges, nfaces;
9562     PetscCall(PCBDDCGraphASCIIView(graph, pcbddc->dbg_flag, pcbddc->dbg_viewer));
9563     PetscCall(PCBDDCGraphGetCandidatesIS(graph, &nfaces, NULL, &nedges, NULL, &vertices));
9564     PetscCall(ISGetSize(vertices, &nv));
9565     PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer));
9566     PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "--------------------------------------------------------------\n"));
9567     PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate vertices (%d)\n", PetscGlobalRank, nv, pcbddc->use_vertices));
9568     PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate edges    (%d)\n", PetscGlobalRank, nedges, pcbddc->use_edges));
9569     PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate faces    (%d)\n", PetscGlobalRank, nfaces, pcbddc->use_faces));
9570     PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
9571     PetscCall(PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer));
9572     PetscCall(PCBDDCGraphRestoreCandidatesIS(graph, &nfaces, NULL, &nedges, NULL, &vertices));
9573   }
9574 
9575   /* sub_schurs init */
9576   if (!pcbddc->sub_schurs) PetscCall(PCBDDCSubSchursCreate(&pcbddc->sub_schurs));
9577   PetscCall(PCBDDCSubSchursInit(pcbddc->sub_schurs, ((PetscObject)pc)->prefix, pcis->is_I_local, pcis->is_B_local, graph, pcis->BtoNmap, pcbddc->sub_schurs_rebuild, PETSC_FALSE));
9578 
9579   /* free graph struct */
9580   if (pcbddc->sub_schurs_rebuild) PetscCall(PCBDDCGraphDestroy(&graph));
9581   PetscFunctionReturn(PETSC_SUCCESS);
9582 }
9583 
PCBDDCViewGlobalIS(PC pc,IS is,PetscViewer viewer)9584 static PetscErrorCode PCBDDCViewGlobalIS(PC pc, IS is, PetscViewer viewer)
9585 {
9586   Mat_IS         *matis = (Mat_IS *)pc->pmat->data;
9587   PetscInt        n     = pc->pmat->rmap->n, ln, ni, st;
9588   const PetscInt *idxs;
9589   IS              gis;
9590 
9591   PetscFunctionBegin;
9592   if (!is) PetscFunctionReturn(PETSC_SUCCESS);
9593   PetscCall(MatGetOwnershipRange(pc->pmat, &st, NULL));
9594   PetscCall(MatGetLocalSize(matis->A, NULL, &ln));
9595   PetscCall(PetscArrayzero(matis->sf_leafdata, ln));
9596   PetscCall(PetscArrayzero(matis->sf_rootdata, n));
9597   PetscCall(ISGetLocalSize(is, &ni));
9598   PetscCall(ISGetIndices(is, &idxs));
9599   for (PetscInt i = 0; i < ni; i++) {
9600     if (idxs[i] < 0 || idxs[i] >= ln) continue;
9601     matis->sf_leafdata[idxs[i]] = 1;
9602   }
9603   PetscCall(ISRestoreIndices(is, &idxs));
9604   PetscCall(PetscSFReduceBegin(matis->sf, MPIU_INT, matis->sf_leafdata, matis->sf_rootdata, MPI_SUM));
9605   PetscCall(PetscSFReduceEnd(matis->sf, MPIU_INT, matis->sf_leafdata, matis->sf_rootdata, MPI_SUM));
9606   ln = 0;
9607   for (PetscInt i = 0; i < n; i++) {
9608     if (matis->sf_rootdata[i]) matis->sf_rootdata[ln++] = i + st;
9609   }
9610   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), ln, matis->sf_rootdata, PETSC_USE_POINTER, &gis));
9611   PetscCall(ISView(gis, viewer));
9612   PetscCall(ISDestroy(&gis));
9613   PetscFunctionReturn(PETSC_SUCCESS);
9614 }
9615 
PCBDDCLoadOrViewCustomization(PC pc,PetscBool load,const char * outfile)9616 PetscErrorCode PCBDDCLoadOrViewCustomization(PC pc, PetscBool load, const char *outfile)
9617 {
9618   PetscInt    header[11];
9619   PC_BDDC    *pcbddc = (PC_BDDC *)pc->data;
9620   PetscViewer viewer;
9621   MPI_Comm    comm = PetscObjectComm((PetscObject)pc);
9622 
9623   PetscFunctionBegin;
9624   PetscCall(PetscViewerBinaryOpen(comm, outfile ? outfile : "bddc_dump.dat", load ? FILE_MODE_READ : FILE_MODE_WRITE, &viewer));
9625   if (load) {
9626     IS  is;
9627     Mat A;
9628 
9629     PetscCall(PetscViewerBinaryRead(viewer, header, PETSC_STATIC_ARRAY_LENGTH(header), NULL, PETSC_INT));
9630     PetscCheck(header[0] == 0 || header[0] == 1, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file");
9631     PetscCheck(header[1] == 0 || header[1] == 1, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file");
9632     PetscCheck(header[2] >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file");
9633     PetscCheck(header[3] == 0 || header[3] == 1, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file");
9634     PetscCheck(header[4] == 0 || header[4] == 1, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file");
9635     PetscCheck(header[5] >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file");
9636     PetscCheck(header[7] == 0 || header[7] == 1, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file");
9637     PetscCheck(header[8] == 0 || header[8] == 1, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file");
9638     PetscCheck(header[9] == 0 || header[9] == 1, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file");
9639     PetscCheck(header[10] == 0 || header[10] == 1, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file");
9640     if (header[0]) {
9641       PetscCall(ISCreate(comm, &is));
9642       PetscCall(ISLoad(is, viewer));
9643       PetscCall(PCBDDCSetDirichletBoundaries(pc, is));
9644       PetscCall(ISDestroy(&is));
9645     }
9646     if (header[1]) {
9647       PetscCall(ISCreate(comm, &is));
9648       PetscCall(ISLoad(is, viewer));
9649       PetscCall(PCBDDCSetNeumannBoundaries(pc, is));
9650       PetscCall(ISDestroy(&is));
9651     }
9652     if (header[2]) {
9653       IS *isarray;
9654 
9655       PetscCall(PetscMalloc1(header[2], &isarray));
9656       for (PetscInt i = 0; i < header[2]; i++) {
9657         PetscCall(ISCreate(comm, &isarray[i]));
9658         PetscCall(ISLoad(isarray[i], viewer));
9659       }
9660       PetscCall(PCBDDCSetDofsSplitting(pc, header[2], isarray));
9661       for (PetscInt i = 0; i < header[2]; i++) PetscCall(ISDestroy(&isarray[i]));
9662       PetscCall(PetscFree(isarray));
9663     }
9664     if (header[3]) {
9665       PetscCall(ISCreate(comm, &is));
9666       PetscCall(ISLoad(is, viewer));
9667       PetscCall(PCBDDCSetPrimalVerticesIS(pc, is));
9668       PetscCall(ISDestroy(&is));
9669     }
9670     if (header[4]) {
9671       PetscCall(MatCreate(comm, &A));
9672       PetscCall(MatSetType(A, MATAIJ));
9673       PetscCall(MatLoad(A, viewer));
9674       PetscCall(PCBDDCSetDiscreteGradient(pc, A, header[5], header[6], (PetscBool)header[7], (PetscBool)header[8]));
9675       PetscCall(MatDestroy(&A));
9676     }
9677     if (header[9]) {
9678       PetscCall(MatCreate(comm, &A));
9679       PetscCall(MatSetType(A, MATIS));
9680       PetscCall(MatLoad(A, viewer));
9681       PetscCall(PCBDDCSetDivergenceMat(pc, A, (PetscBool)header[10], NULL));
9682       PetscCall(MatDestroy(&A));
9683     }
9684   } else {
9685     header[0]  = (PetscInt)!!pcbddc->DirichletBoundariesLocal;
9686     header[1]  = (PetscInt)!!pcbddc->NeumannBoundariesLocal;
9687     header[2]  = pcbddc->n_ISForDofsLocal;
9688     header[3]  = (PetscInt)!!pcbddc->user_primal_vertices_local;
9689     header[4]  = (PetscInt)!!pcbddc->discretegradient;
9690     header[5]  = pcbddc->nedorder;
9691     header[6]  = pcbddc->nedfield;
9692     header[7]  = (PetscInt)pcbddc->nedglobal;
9693     header[8]  = (PetscInt)pcbddc->conforming;
9694     header[9]  = (PetscInt)!!pcbddc->divudotp;
9695     header[10] = (PetscInt)pcbddc->divudotp_trans;
9696     if (header[4]) header[3] = 0;
9697 
9698     PetscCall(PetscViewerBinaryWrite(viewer, header, PETSC_STATIC_ARRAY_LENGTH(header), PETSC_INT));
9699     PetscCall(PCBDDCViewGlobalIS(pc, pcbddc->DirichletBoundariesLocal, viewer));
9700     PetscCall(PCBDDCViewGlobalIS(pc, pcbddc->NeumannBoundariesLocal, viewer));
9701     for (PetscInt i = 0; i < header[2]; i++) PetscCall(PCBDDCViewGlobalIS(pc, pcbddc->ISForDofsLocal[i], viewer));
9702     if (header[3]) PetscCall(PCBDDCViewGlobalIS(pc, pcbddc->user_primal_vertices_local, viewer));
9703     if (header[4]) PetscCall(MatView(pcbddc->discretegradient, viewer));
9704     if (header[9]) PetscCall(MatView(pcbddc->divudotp, viewer));
9705   }
9706   PetscCall(PetscViewerDestroy(&viewer));
9707   PetscFunctionReturn(PETSC_SUCCESS);
9708 }
9709 
9710 #include <../src/mat/impls/aij/mpi/mpiaij.h>
MatMPIAIJRestrict(Mat A,MPI_Comm ccomm,Mat * B)9711 static PetscErrorCode MatMPIAIJRestrict(Mat A, MPI_Comm ccomm, Mat *B)
9712 {
9713   Mat         At;
9714   IS          rows;
9715   PetscInt    rst, ren;
9716   PetscLayout rmap;
9717 
9718   PetscFunctionBegin;
9719   rst = ren = 0;
9720   if (ccomm != MPI_COMM_NULL) {
9721     PetscCall(PetscLayoutCreate(ccomm, &rmap));
9722     PetscCall(PetscLayoutSetSize(rmap, A->rmap->N));
9723     PetscCall(PetscLayoutSetBlockSize(rmap, 1));
9724     PetscCall(PetscLayoutSetUp(rmap));
9725     PetscCall(PetscLayoutGetRange(rmap, &rst, &ren));
9726   }
9727   PetscCall(ISCreateStride(PetscObjectComm((PetscObject)A), ren - rst, rst, 1, &rows));
9728   PetscCall(MatCreateSubMatrix(A, rows, NULL, MAT_INITIAL_MATRIX, &At));
9729   PetscCall(ISDestroy(&rows));
9730 
9731   if (ccomm != MPI_COMM_NULL) {
9732     Mat_MPIAIJ *a, *b;
9733     IS          from, to;
9734     Vec         gvec;
9735     PetscInt    lsize;
9736 
9737     PetscCall(MatCreate(ccomm, B));
9738     PetscCall(MatSetSizes(*B, ren - rst, PETSC_DECIDE, PETSC_DECIDE, At->cmap->N));
9739     PetscCall(MatSetType(*B, MATAIJ));
9740     PetscCall(PetscLayoutDestroy(&(*B)->rmap));
9741     PetscCall(PetscLayoutSetUp((*B)->cmap));
9742     a = (Mat_MPIAIJ *)At->data;
9743     b = (Mat_MPIAIJ *)(*B)->data;
9744     PetscCallMPI(MPI_Comm_size(ccomm, &b->size));
9745     PetscCallMPI(MPI_Comm_rank(ccomm, &b->rank));
9746     PetscCall(PetscObjectReference((PetscObject)a->A));
9747     PetscCall(PetscObjectReference((PetscObject)a->B));
9748     b->A = a->A;
9749     b->B = a->B;
9750 
9751     b->donotstash   = a->donotstash;
9752     b->roworiented  = a->roworiented;
9753     b->rowindices   = NULL;
9754     b->rowvalues    = NULL;
9755     b->getrowactive = PETSC_FALSE;
9756 
9757     (*B)->rmap         = rmap;
9758     (*B)->factortype   = A->factortype;
9759     (*B)->assembled    = PETSC_TRUE;
9760     (*B)->insertmode   = NOT_SET_VALUES;
9761     (*B)->preallocated = PETSC_TRUE;
9762 
9763     if (a->colmap) {
9764 #if defined(PETSC_USE_CTABLE)
9765       PetscCall(PetscHMapIDuplicate(a->colmap, &b->colmap));
9766 #else
9767       PetscCall(PetscMalloc1(At->cmap->N, &b->colmap));
9768       PetscCall(PetscArraycpy(b->colmap, a->colmap, At->cmap->N));
9769 #endif
9770     } else b->colmap = NULL;
9771     if (a->garray) {
9772       PetscInt len;
9773       len = a->B->cmap->n;
9774       PetscCall(PetscMalloc1(len + 1, &b->garray));
9775       if (len) PetscCall(PetscArraycpy(b->garray, a->garray, len));
9776     } else b->garray = NULL;
9777 
9778     PetscCall(PetscObjectReference((PetscObject)a->lvec));
9779     b->lvec = a->lvec;
9780 
9781     /* cannot use VecScatterCopy */
9782     PetscCall(VecGetLocalSize(b->lvec, &lsize));
9783     PetscCall(ISCreateGeneral(ccomm, lsize, b->garray, PETSC_USE_POINTER, &from));
9784     PetscCall(ISCreateStride(PETSC_COMM_SELF, lsize, 0, 1, &to));
9785     PetscCall(MatCreateVecs(*B, &gvec, NULL));
9786     PetscCall(VecScatterCreate(gvec, from, b->lvec, to, &b->Mvctx));
9787     PetscCall(ISDestroy(&from));
9788     PetscCall(ISDestroy(&to));
9789     PetscCall(VecDestroy(&gvec));
9790   }
9791   PetscCall(MatDestroy(&At));
9792   PetscFunctionReturn(PETSC_SUCCESS);
9793 }
9794 
9795 /* same as MatCreateSubMatrix(A, rows, NULL,...) but allows repeated rows */
MatAIJExtractRows(Mat A,IS rows,Mat * sA)9796 static PetscErrorCode MatAIJExtractRows(Mat A, IS rows, Mat *sA)
9797 {
9798   PetscBool isaij;
9799   MPI_Comm  comm;
9800 
9801   PetscFunctionBegin;
9802   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
9803   PetscCall(PetscObjectBaseTypeCompareAny((PetscObject)A, &isaij, MATSEQAIJ, MATMPIAIJ, ""));
9804   PetscCheck(isaij, comm, PETSC_ERR_SUP, "Not implemented");
9805   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &isaij));
9806   if (isaij) { /* SeqAIJ supports repeated rows */
9807     PetscCall(MatCreateSubMatrix(A, rows, NULL, MAT_INITIAL_MATRIX, sA));
9808   } else {
9809     Mat                A_loc;
9810     Mat_SeqAIJ        *da;
9811     PetscSF            sf;
9812     PetscInt           ni, *di, *dj, m = A->rmap->n, c, *ldata, *rdata;
9813     PetscScalar       *daa;
9814     const PetscInt    *idxs;
9815     const PetscSFNode *iremotes;
9816     PetscSFNode       *remotes;
9817 
9818     /* SF for incoming rows */
9819     PetscCall(PetscSFCreate(comm, &sf));
9820     PetscCall(ISGetLocalSize(rows, &ni));
9821     PetscCall(ISGetIndices(rows, &idxs));
9822     PetscCall(PetscSFSetGraphLayout(sf, A->rmap, ni, NULL, PETSC_USE_POINTER, idxs));
9823     PetscCall(ISRestoreIndices(rows, &idxs));
9824 
9825     PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &A_loc));
9826     da = (Mat_SeqAIJ *)A_loc->data;
9827     PetscCall(PetscMalloc2(2 * ni, &ldata, 2 * m, &rdata));
9828     for (PetscInt i = 0; i < m; i++) {
9829       rdata[2 * i + 0] = da->i[i + 1] - da->i[i];
9830       rdata[2 * i + 1] = da->i[i];
9831     }
9832     PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, rdata, ldata, MPI_REPLACE));
9833     PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, rdata, ldata, MPI_REPLACE));
9834     PetscCall(PetscMalloc1(ni + 1, &di));
9835     di[0] = 0;
9836     for (PetscInt i = 0; i < ni; i++) di[i + 1] = di[i] + ldata[2 * i + 0];
9837     PetscCall(PetscMalloc1(di[ni], &dj));
9838     PetscCall(PetscMalloc1(di[ni], &daa));
9839     PetscCall(PetscMalloc1(di[ni], &remotes));
9840 
9841     PetscCall(PetscSFGetGraph(sf, NULL, NULL, NULL, &iremotes));
9842 
9843     /* SF graph for nonzeros */
9844     c = 0;
9845     for (PetscInt i = 0; i < ni; i++) {
9846       const PetscInt rank  = iremotes[i].rank;
9847       const PetscInt rsize = ldata[2 * i];
9848       for (PetscInt j = 0; j < rsize; j++) {
9849         remotes[c].rank  = rank;
9850         remotes[c].index = ldata[2 * i + 1] + j;
9851         c++;
9852       }
9853     }
9854     PetscCheck(c == di[ni], PETSC_COMM_SELF, PETSC_ERR_PLIB, "Invalid number of local nonzeros %" PetscInt_FMT " != %" PetscInt_FMT, c, di[ni]);
9855     PetscCall(PetscSFSetGraph(sf, da->i[m], di[ni], NULL, PETSC_USE_POINTER, remotes, PETSC_USE_POINTER));
9856     PetscCall(PetscSFBcastBegin(sf, MPIU_INT, da->j, dj, MPI_REPLACE));
9857     PetscCall(PetscSFBcastEnd(sf, MPIU_INT, da->j, dj, MPI_REPLACE));
9858     PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, da->a, daa, MPI_REPLACE));
9859     PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, da->a, daa, MPI_REPLACE));
9860 
9861     PetscCall(MatCreateMPIAIJWithArrays(comm, ni, A->cmap->n, PETSC_DECIDE, A->cmap->N, di, dj, daa, sA));
9862     PetscCall(MatDestroy(&A_loc));
9863     PetscCall(PetscSFDestroy(&sf));
9864     PetscCall(PetscFree(di));
9865     PetscCall(PetscFree(dj));
9866     PetscCall(PetscFree(daa));
9867     PetscCall(PetscFree(remotes));
9868     PetscCall(PetscFree2(ldata, rdata));
9869   }
9870   PetscFunctionReturn(PETSC_SUCCESS);
9871 }
9872