xref: /petsc/src/ksp/pc/impls/bddc/bddcprivate.c (revision d5b43468fb8780a8feea140ccd6fa3e6a50411cc)
1 #include <../src/mat/impls/aij/seq/aij.h>
2 #include <petsc/private/pcbddcimpl.h>
3 #include <petsc/private/pcbddcprivateimpl.h>
4 #include <../src/mat/impls/dense/seq/dense.h>
5 #include <petscdmplex.h>
6 #include <petscblaslapack.h>
7 #include <petsc/private/sfimpl.h>
8 #include <petsc/private/dmpleximpl.h>
9 #include <petscdmda.h>
10 
11 static PetscErrorCode MatMPIAIJRestrict(Mat, MPI_Comm, Mat *);
12 
13 /* if range is true,  it returns B s.t. span{B} = range(A)
14    if range is false, it returns B s.t. range(B) _|_ range(A) */
15 PetscErrorCode MatDenseOrthogonalRangeOrComplement(Mat A, PetscBool range, PetscInt lw, PetscScalar *work, PetscReal *rwork, Mat *B)
16 {
17   PetscScalar *uwork, *data, *U, ds = 0.;
18   PetscReal   *sing;
19   PetscBLASInt bM, bN, lwork, lierr, di = 1;
20   PetscInt     ulw, i, nr, nc, n;
21 #if defined(PETSC_USE_COMPLEX)
22   PetscReal *rwork2;
23 #endif
24 
25   PetscFunctionBegin;
26   PetscCall(MatGetSize(A, &nr, &nc));
27   if (!nr || !nc) PetscFunctionReturn(0);
28 
29   /* workspace */
30   if (!work) {
31     ulw = PetscMax(PetscMax(1, 5 * PetscMin(nr, nc)), 3 * PetscMin(nr, nc) + PetscMax(nr, nc));
32     PetscCall(PetscMalloc1(ulw, &uwork));
33   } else {
34     ulw   = lw;
35     uwork = work;
36   }
37   n = PetscMin(nr, nc);
38   if (!rwork) {
39     PetscCall(PetscMalloc1(n, &sing));
40   } else {
41     sing = rwork;
42   }
43 
44   /* SVD */
45   PetscCall(PetscMalloc1(nr * nr, &U));
46   PetscCall(PetscBLASIntCast(nr, &bM));
47   PetscCall(PetscBLASIntCast(nc, &bN));
48   PetscCall(PetscBLASIntCast(ulw, &lwork));
49   PetscCall(MatDenseGetArray(A, &data));
50   PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
51 #if !defined(PETSC_USE_COMPLEX)
52   PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("A", "N", &bM, &bN, data, &bM, sing, U, &bM, &ds, &di, uwork, &lwork, &lierr));
53 #else
54   PetscCall(PetscMalloc1(5 * n, &rwork2));
55   PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("A", "N", &bM, &bN, data, &bM, sing, U, &bM, &ds, &di, uwork, &lwork, rwork2, &lierr));
56   PetscCall(PetscFree(rwork2));
57 #endif
58   PetscCall(PetscFPTrapPop());
59   PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in GESVD Lapack routine %d", (int)lierr);
60   PetscCall(MatDenseRestoreArray(A, &data));
61   for (i = 0; i < n; i++)
62     if (sing[i] < PETSC_SMALL) break;
63   if (!rwork) PetscCall(PetscFree(sing));
64   if (!work) PetscCall(PetscFree(uwork));
65   /* create B */
66   if (!range) {
67     PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, nr, nr - i, NULL, B));
68     PetscCall(MatDenseGetArray(*B, &data));
69     PetscCall(PetscArraycpy(data, U + nr * i, (nr - i) * nr));
70   } else {
71     PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, nr, i, NULL, B));
72     PetscCall(MatDenseGetArray(*B, &data));
73     PetscCall(PetscArraycpy(data, U, i * nr));
74   }
75   PetscCall(MatDenseRestoreArray(*B, &data));
76   PetscCall(PetscFree(U));
77   PetscFunctionReturn(0);
78 }
79 
80 /* TODO REMOVE */
81 #if defined(PRINT_GDET)
82 static int inc = 0;
83 static int lev = 0;
84 #endif
85 
86 PetscErrorCode PCBDDCComputeNedelecChangeEdge(Mat lG, IS edge, IS extrow, IS extcol, IS corners, Mat *Gins, Mat *GKins, PetscScalar cvals[2], PetscScalar *work, PetscReal *rwork)
87 {
88   Mat          GE, GEd;
89   PetscInt     rsize, csize, esize;
90   PetscScalar *ptr;
91 
92   PetscFunctionBegin;
93   PetscCall(ISGetSize(edge, &esize));
94   if (!esize) PetscFunctionReturn(0);
95   PetscCall(ISGetSize(extrow, &rsize));
96   PetscCall(ISGetSize(extcol, &csize));
97 
98   /* gradients */
99   ptr = work + 5 * esize;
100   PetscCall(MatCreateSubMatrix(lG, extrow, extcol, MAT_INITIAL_MATRIX, &GE));
101   PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, rsize, csize, ptr, Gins));
102   PetscCall(MatConvert(GE, MATSEQDENSE, MAT_REUSE_MATRIX, Gins));
103   PetscCall(MatDestroy(&GE));
104 
105   /* constants */
106   ptr += rsize * csize;
107   PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, esize, csize, ptr, &GEd));
108   PetscCall(MatCreateSubMatrix(lG, edge, extcol, MAT_INITIAL_MATRIX, &GE));
109   PetscCall(MatConvert(GE, MATSEQDENSE, MAT_REUSE_MATRIX, &GEd));
110   PetscCall(MatDestroy(&GE));
111   PetscCall(MatDenseOrthogonalRangeOrComplement(GEd, PETSC_FALSE, 5 * esize, work, rwork, GKins));
112   PetscCall(MatDestroy(&GEd));
113 
114   if (corners) {
115     Mat                GEc;
116     const PetscScalar *vals;
117     PetscScalar        v;
118 
119     PetscCall(MatCreateSubMatrix(lG, edge, corners, MAT_INITIAL_MATRIX, &GEc));
120     PetscCall(MatTransposeMatMult(GEc, *GKins, MAT_INITIAL_MATRIX, 1.0, &GEd));
121     PetscCall(MatDenseGetArrayRead(GEd, &vals));
122     /* v    = PetscAbsScalar(vals[0]) */;
123     v        = 1.;
124     cvals[0] = vals[0] / v;
125     cvals[1] = vals[1] / v;
126     PetscCall(MatDenseRestoreArrayRead(GEd, &vals));
127     PetscCall(MatScale(*GKins, 1. / v));
128 #if defined(PRINT_GDET)
129     {
130       PetscViewer viewer;
131       char        filename[256];
132       sprintf(filename, "Gdet_l%d_r%d_cc%d.m", lev, PetscGlobalRank, inc++);
133       PetscCall(PetscViewerASCIIOpen(PETSC_COMM_SELF, filename, &viewer));
134       PetscCall(PetscViewerPushFormat(viewer, PETSC_VIEWER_ASCII_MATLAB));
135       PetscCall(PetscObjectSetName((PetscObject)GEc, "GEc"));
136       PetscCall(MatView(GEc, viewer));
137       PetscCall(PetscObjectSetName((PetscObject)(*GKins), "GK"));
138       PetscCall(MatView(*GKins, viewer));
139       PetscCall(PetscObjectSetName((PetscObject)GEd, "Gproj"));
140       PetscCall(MatView(GEd, viewer));
141       PetscCall(PetscViewerDestroy(&viewer));
142     }
143 #endif
144     PetscCall(MatDestroy(&GEd));
145     PetscCall(MatDestroy(&GEc));
146   }
147 
148   PetscFunctionReturn(0);
149 }
150 
151 PetscErrorCode PCBDDCNedelecSupport(PC pc)
152 {
153   PC_BDDC               *pcbddc = (PC_BDDC *)pc->data;
154   Mat_IS                *matis  = (Mat_IS *)pc->pmat->data;
155   Mat                    G, T, conn, lG, lGt, lGis, lGall, lGe, lGinit;
156   Vec                    tvec;
157   PetscSF                sfv;
158   ISLocalToGlobalMapping el2g, vl2g, fl2g, al2g;
159   MPI_Comm               comm;
160   IS                     lned, primals, allprimals, nedfieldlocal;
161   IS                    *eedges, *extrows, *extcols, *alleedges;
162   PetscBT                btv, bte, btvc, btb, btbd, btvcand, btvi, btee, bter;
163   PetscScalar           *vals, *work;
164   PetscReal             *rwork;
165   const PetscInt        *idxs, *ii, *jj, *iit, *jjt;
166   PetscInt               ne, nv, Lv, order, n, field;
167   PetscInt               n_neigh, *neigh, *n_shared, **shared;
168   PetscInt               i, j, extmem, cum, maxsize, nee;
169   PetscInt              *extrow, *extrowcum, *marks, *vmarks, *gidxs;
170   PetscInt              *sfvleaves, *sfvroots;
171   PetscInt              *corners, *cedges;
172   PetscInt              *ecount, **eneighs, *vcount, **vneighs;
173   PetscInt              *emarks;
174   PetscBool              print, eerr, done, lrc[2], conforming, global, singular, setprimal;
175 
176   PetscFunctionBegin;
177   /* If the discrete gradient is defined for a subset of dofs and global is true,
178      it assumes G is given in global ordering for all the dofs.
179      Otherwise, the ordering is global for the Nedelec field */
180   order      = pcbddc->nedorder;
181   conforming = pcbddc->conforming;
182   field      = pcbddc->nedfield;
183   global     = pcbddc->nedglobal;
184   setprimal  = PETSC_FALSE;
185   print      = PETSC_FALSE;
186   singular   = PETSC_FALSE;
187 
188   /* Command line customization */
189   PetscOptionsBegin(PetscObjectComm((PetscObject)pc), ((PetscObject)pc)->prefix, "BDDC Nedelec options", "PC");
190   PetscCall(PetscOptionsBool("-pc_bddc_nedelec_field_primal", "All edge dofs set as primals: Toselli's algorithm C", NULL, setprimal, &setprimal, NULL));
191   PetscCall(PetscOptionsBool("-pc_bddc_nedelec_singular", "Infer nullspace from discrete gradient", NULL, singular, &singular, NULL));
192   PetscCall(PetscOptionsInt("-pc_bddc_nedelec_order", "Test variable order code (to be removed)", NULL, order, &order, NULL));
193   /* print debug info TODO: to be removed */
194   PetscCall(PetscOptionsBool("-pc_bddc_nedelec_print", "Print debug info", NULL, print, &print, NULL));
195   PetscOptionsEnd();
196 
197   /* Return if there are no edges in the decomposition and the problem is not singular */
198   PetscCall(MatISGetLocalToGlobalMapping(pc->pmat, &al2g, NULL));
199   PetscCall(ISLocalToGlobalMappingGetSize(al2g, &n));
200   PetscCall(PetscObjectGetComm((PetscObject)pc, &comm));
201   if (!singular) {
202     PetscCall(VecGetArrayRead(matis->counter, (const PetscScalar **)&vals));
203     lrc[0] = PETSC_FALSE;
204     for (i = 0; i < n; i++) {
205       if (PetscRealPart(vals[i]) > 2.) {
206         lrc[0] = PETSC_TRUE;
207         break;
208       }
209     }
210     PetscCall(VecRestoreArrayRead(matis->counter, (const PetscScalar **)&vals));
211     PetscCall(MPIU_Allreduce(&lrc[0], &lrc[1], 1, MPIU_BOOL, MPI_LOR, comm));
212     if (!lrc[1]) PetscFunctionReturn(0);
213   }
214 
215   /* Get Nedelec field */
216   PetscCheck(!pcbddc->n_ISForDofsLocal || field < pcbddc->n_ISForDofsLocal, comm, PETSC_ERR_USER, "Invalid field for Nedelec %" PetscInt_FMT ": number of fields is %" PetscInt_FMT, field, pcbddc->n_ISForDofsLocal);
217   if (pcbddc->n_ISForDofsLocal && field >= 0) {
218     PetscCall(PetscObjectReference((PetscObject)pcbddc->ISForDofsLocal[field]));
219     nedfieldlocal = pcbddc->ISForDofsLocal[field];
220     PetscCall(ISGetLocalSize(nedfieldlocal, &ne));
221   } else if (!pcbddc->n_ISForDofsLocal && field != PETSC_DECIDE) {
222     ne            = n;
223     nedfieldlocal = NULL;
224     global        = PETSC_TRUE;
225   } else if (field == PETSC_DECIDE) {
226     PetscInt rst, ren, *idx;
227 
228     PetscCall(PetscArrayzero(matis->sf_leafdata, n));
229     PetscCall(PetscArrayzero(matis->sf_rootdata, pc->pmat->rmap->n));
230     PetscCall(MatGetOwnershipRange(pcbddc->discretegradient, &rst, &ren));
231     for (i = rst; i < ren; i++) {
232       PetscInt nc;
233 
234       PetscCall(MatGetRow(pcbddc->discretegradient, i, &nc, NULL, NULL));
235       if (nc > 1) matis->sf_rootdata[i - rst] = 1;
236       PetscCall(MatRestoreRow(pcbddc->discretegradient, i, &nc, NULL, NULL));
237     }
238     PetscCall(PetscSFBcastBegin(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE));
239     PetscCall(PetscSFBcastEnd(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE));
240     PetscCall(PetscMalloc1(n, &idx));
241     for (i = 0, ne = 0; i < n; i++)
242       if (matis->sf_leafdata[i]) idx[ne++] = i;
243     PetscCall(ISCreateGeneral(comm, ne, idx, PETSC_OWN_POINTER, &nedfieldlocal));
244   } else {
245     SETERRQ(comm, PETSC_ERR_USER, "When multiple fields are present, the Nedelec field has to be specified");
246   }
247 
248   /* Sanity checks */
249   PetscCheck(order || conforming, comm, PETSC_ERR_SUP, "Variable order and non-conforming spaces are not supported at the same time");
250   PetscCheck(!pcbddc->user_ChangeOfBasisMatrix, comm, PETSC_ERR_SUP, "Cannot generate Nedelec support with user defined change of basis");
251   PetscCheck(!order || (ne % order == 0), PETSC_COMM_SELF, PETSC_ERR_USER, "The number of local edge dofs %" PetscInt_FMT " is not a multiple of the order %" PetscInt_FMT, ne, order);
252 
253   /* Just set primal dofs and return */
254   if (setprimal) {
255     IS        enedfieldlocal;
256     PetscInt *eidxs;
257 
258     PetscCall(PetscMalloc1(ne, &eidxs));
259     PetscCall(VecGetArrayRead(matis->counter, (const PetscScalar **)&vals));
260     if (nedfieldlocal) {
261       PetscCall(ISGetIndices(nedfieldlocal, &idxs));
262       for (i = 0, cum = 0; i < ne; i++) {
263         if (PetscRealPart(vals[idxs[i]]) > 2.) eidxs[cum++] = idxs[i];
264       }
265       PetscCall(ISRestoreIndices(nedfieldlocal, &idxs));
266     } else {
267       for (i = 0, cum = 0; i < ne; i++) {
268         if (PetscRealPart(vals[i]) > 2.) eidxs[cum++] = i;
269       }
270     }
271     PetscCall(VecRestoreArrayRead(matis->counter, (const PetscScalar **)&vals));
272     PetscCall(ISCreateGeneral(comm, cum, eidxs, PETSC_COPY_VALUES, &enedfieldlocal));
273     PetscCall(PCBDDCSetPrimalVerticesLocalIS(pc, enedfieldlocal));
274     PetscCall(PetscFree(eidxs));
275     PetscCall(ISDestroy(&nedfieldlocal));
276     PetscCall(ISDestroy(&enedfieldlocal));
277     PetscFunctionReturn(0);
278   }
279 
280   /* Compute some l2g maps */
281   if (nedfieldlocal) {
282     IS is;
283 
284     /* need to map from the local Nedelec field to local numbering */
285     PetscCall(ISLocalToGlobalMappingCreateIS(nedfieldlocal, &fl2g));
286     /* need to map from the local Nedelec field to global numbering for the whole dofs*/
287     PetscCall(ISLocalToGlobalMappingApplyIS(al2g, nedfieldlocal, &is));
288     PetscCall(ISLocalToGlobalMappingCreateIS(is, &al2g));
289     /* need to map from the local Nedelec field to global numbering (for Nedelec only) */
290     if (global) {
291       PetscCall(PetscObjectReference((PetscObject)al2g));
292       el2g = al2g;
293     } else {
294       IS gis;
295 
296       PetscCall(ISRenumber(is, NULL, NULL, &gis));
297       PetscCall(ISLocalToGlobalMappingCreateIS(gis, &el2g));
298       PetscCall(ISDestroy(&gis));
299     }
300     PetscCall(ISDestroy(&is));
301   } else {
302     /* restore default */
303     pcbddc->nedfield = -1;
304     /* one ref for the destruction of al2g, one for el2g */
305     PetscCall(PetscObjectReference((PetscObject)al2g));
306     PetscCall(PetscObjectReference((PetscObject)al2g));
307     el2g = al2g;
308     fl2g = NULL;
309   }
310 
311   /* Start communication to drop connections for interior edges (for cc analysis only) */
312   PetscCall(PetscArrayzero(matis->sf_leafdata, n));
313   PetscCall(PetscArrayzero(matis->sf_rootdata, pc->pmat->rmap->n));
314   if (nedfieldlocal) {
315     PetscCall(ISGetIndices(nedfieldlocal, &idxs));
316     for (i = 0; i < ne; i++) matis->sf_leafdata[idxs[i]] = 1;
317     PetscCall(ISRestoreIndices(nedfieldlocal, &idxs));
318   } else {
319     for (i = 0; i < ne; i++) matis->sf_leafdata[i] = 1;
320   }
321   PetscCall(PetscSFReduceBegin(matis->sf, MPIU_INT, matis->sf_leafdata, matis->sf_rootdata, MPI_SUM));
322   PetscCall(PetscSFReduceEnd(matis->sf, MPIU_INT, matis->sf_leafdata, matis->sf_rootdata, MPI_SUM));
323 
324   if (!singular) { /* drop connections with interior edges to avoid unneeded communications and memory movements */
325     PetscCall(MatDuplicate(pcbddc->discretegradient, MAT_COPY_VALUES, &G));
326     PetscCall(MatSetOption(G, MAT_KEEP_NONZERO_PATTERN, PETSC_FALSE));
327     if (global) {
328       PetscInt rst;
329 
330       PetscCall(MatGetOwnershipRange(G, &rst, NULL));
331       for (i = 0, cum = 0; i < pc->pmat->rmap->n; i++) {
332         if (matis->sf_rootdata[i] < 2) matis->sf_rootdata[cum++] = i + rst;
333       }
334       PetscCall(MatSetOption(G, MAT_NO_OFF_PROC_ZERO_ROWS, PETSC_TRUE));
335       PetscCall(MatZeroRows(G, cum, matis->sf_rootdata, 0., NULL, NULL));
336     } else {
337       PetscInt *tbz;
338 
339       PetscCall(PetscMalloc1(ne, &tbz));
340       PetscCall(PetscSFBcastBegin(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE));
341       PetscCall(PetscSFBcastEnd(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE));
342       PetscCall(ISGetIndices(nedfieldlocal, &idxs));
343       for (i = 0, cum = 0; i < ne; i++)
344         if (matis->sf_leafdata[idxs[i]] == 1) tbz[cum++] = i;
345       PetscCall(ISRestoreIndices(nedfieldlocal, &idxs));
346       PetscCall(ISLocalToGlobalMappingApply(el2g, cum, tbz, tbz));
347       PetscCall(MatZeroRows(G, cum, tbz, 0., NULL, NULL));
348       PetscCall(PetscFree(tbz));
349     }
350   } else { /* we need the entire G to infer the nullspace */
351     PetscCall(PetscObjectReference((PetscObject)pcbddc->discretegradient));
352     G = pcbddc->discretegradient;
353   }
354 
355   /* Extract subdomain relevant rows of G */
356   PetscCall(ISLocalToGlobalMappingGetIndices(el2g, &idxs));
357   PetscCall(ISCreateGeneral(comm, ne, idxs, PETSC_USE_POINTER, &lned));
358   PetscCall(MatCreateSubMatrix(G, lned, NULL, MAT_INITIAL_MATRIX, &lGall));
359   PetscCall(ISLocalToGlobalMappingRestoreIndices(el2g, &idxs));
360   PetscCall(ISDestroy(&lned));
361   PetscCall(MatConvert(lGall, MATIS, MAT_INITIAL_MATRIX, &lGis));
362   PetscCall(MatDestroy(&lGall));
363   PetscCall(MatISGetLocalMat(lGis, &lG));
364 
365   /* SF for nodal dofs communications */
366   PetscCall(MatGetLocalSize(G, NULL, &Lv));
367   PetscCall(MatISGetLocalToGlobalMapping(lGis, NULL, &vl2g));
368   PetscCall(PetscObjectReference((PetscObject)vl2g));
369   PetscCall(ISLocalToGlobalMappingGetSize(vl2g, &nv));
370   PetscCall(PetscSFCreate(comm, &sfv));
371   PetscCall(ISLocalToGlobalMappingGetIndices(vl2g, &idxs));
372   PetscCall(PetscSFSetGraphLayout(sfv, lGis->cmap, nv, NULL, PETSC_OWN_POINTER, idxs));
373   PetscCall(ISLocalToGlobalMappingRestoreIndices(vl2g, &idxs));
374   i = singular ? 2 : 1;
375   PetscCall(PetscMalloc2(i * nv, &sfvleaves, i * Lv, &sfvroots));
376 
377   /* Destroy temporary G created in MATIS format and modified G */
378   PetscCall(PetscObjectReference((PetscObject)lG));
379   PetscCall(MatDestroy(&lGis));
380   PetscCall(MatDestroy(&G));
381 
382   if (print) {
383     PetscCall(PetscObjectSetName((PetscObject)lG, "initial_lG"));
384     PetscCall(MatView(lG, NULL));
385   }
386 
387   /* Save lG for values insertion in change of basis */
388   PetscCall(MatDuplicate(lG, MAT_COPY_VALUES, &lGinit));
389 
390   /* Analyze the edge-nodes connections (duplicate lG) */
391   PetscCall(MatDuplicate(lG, MAT_COPY_VALUES, &lGe));
392   PetscCall(MatSetOption(lGe, MAT_KEEP_NONZERO_PATTERN, PETSC_FALSE));
393   PetscCall(PetscBTCreate(nv, &btv));
394   PetscCall(PetscBTCreate(ne, &bte));
395   PetscCall(PetscBTCreate(ne, &btb));
396   PetscCall(PetscBTCreate(ne, &btbd));
397   PetscCall(PetscBTCreate(nv, &btvcand));
398   /* need to import the boundary specification to ensure the
399      proper detection of coarse edges' endpoints */
400   if (pcbddc->DirichletBoundariesLocal) {
401     IS is;
402 
403     if (fl2g) {
404       PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_MASK, pcbddc->DirichletBoundariesLocal, &is));
405     } else {
406       is = pcbddc->DirichletBoundariesLocal;
407     }
408     PetscCall(ISGetLocalSize(is, &cum));
409     PetscCall(ISGetIndices(is, &idxs));
410     for (i = 0; i < cum; i++) {
411       if (idxs[i] >= 0) {
412         PetscCall(PetscBTSet(btb, idxs[i]));
413         PetscCall(PetscBTSet(btbd, idxs[i]));
414       }
415     }
416     PetscCall(ISRestoreIndices(is, &idxs));
417     if (fl2g) PetscCall(ISDestroy(&is));
418   }
419   if (pcbddc->NeumannBoundariesLocal) {
420     IS is;
421 
422     if (fl2g) {
423       PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_MASK, pcbddc->NeumannBoundariesLocal, &is));
424     } else {
425       is = pcbddc->NeumannBoundariesLocal;
426     }
427     PetscCall(ISGetLocalSize(is, &cum));
428     PetscCall(ISGetIndices(is, &idxs));
429     for (i = 0; i < cum; i++) {
430       if (idxs[i] >= 0) PetscCall(PetscBTSet(btb, idxs[i]));
431     }
432     PetscCall(ISRestoreIndices(is, &idxs));
433     if (fl2g) PetscCall(ISDestroy(&is));
434   }
435 
436   /* Count neighs per dof */
437   PetscCall(ISLocalToGlobalMappingGetNodeInfo(el2g, NULL, &ecount, &eneighs));
438   PetscCall(ISLocalToGlobalMappingGetNodeInfo(vl2g, NULL, &vcount, &vneighs));
439 
440   /* need to remove coarse faces' dofs and coarse edges' dirichlet dofs
441      for proper detection of coarse edges' endpoints */
442   PetscCall(PetscBTCreate(ne, &btee));
443   for (i = 0; i < ne; i++) {
444     if ((ecount[i] > 2 && !PetscBTLookup(btbd, i)) || (ecount[i] == 2 && PetscBTLookup(btb, i))) PetscCall(PetscBTSet(btee, i));
445   }
446   PetscCall(PetscMalloc1(ne, &marks));
447   if (!conforming) {
448     PetscCall(MatTranspose(lGe, MAT_INITIAL_MATRIX, &lGt));
449     PetscCall(MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done));
450   }
451   PetscCall(MatGetRowIJ(lGe, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
452   PetscCall(MatSeqAIJGetArray(lGe, &vals));
453   cum = 0;
454   for (i = 0; i < ne; i++) {
455     /* eliminate rows corresponding to edge dofs belonging to coarse faces */
456     if (!PetscBTLookup(btee, i)) {
457       marks[cum++] = i;
458       continue;
459     }
460     /* set badly connected edge dofs as primal */
461     if (!conforming) {
462       if (ii[i + 1] - ii[i] != order + 1) { /* every row of G on the coarse edge should list order+1 nodal dofs */
463         marks[cum++] = i;
464         PetscCall(PetscBTSet(bte, i));
465         for (j = ii[i]; j < ii[i + 1]; j++) PetscCall(PetscBTSet(btv, jj[j]));
466       } else {
467         /* every edge dofs should be connected trough a certain number of nodal dofs
468            to other edge dofs belonging to coarse edges
469            - at most 2 endpoints
470            - order-1 interior nodal dofs
471            - no undefined nodal dofs (nconn < order)
472         */
473         PetscInt ends = 0, ints = 0, undef = 0;
474         for (j = ii[i]; j < ii[i + 1]; j++) {
475           PetscInt v     = jj[j], k;
476           PetscInt nconn = iit[v + 1] - iit[v];
477           for (k = iit[v]; k < iit[v + 1]; k++)
478             if (!PetscBTLookup(btee, jjt[k])) nconn--;
479           if (nconn > order) ends++;
480           else if (nconn == order) ints++;
481           else undef++;
482         }
483         if (undef || ends > 2 || ints != order - 1) {
484           marks[cum++] = i;
485           PetscCall(PetscBTSet(bte, i));
486           for (j = ii[i]; j < ii[i + 1]; j++) PetscCall(PetscBTSet(btv, jj[j]));
487         }
488       }
489     }
490     /* We assume the order on the element edge is ii[i+1]-ii[i]-1 */
491     if (!order && ii[i + 1] != ii[i]) {
492       PetscScalar val = 1. / (ii[i + 1] - ii[i] - 1);
493       for (j = ii[i]; j < ii[i + 1]; j++) vals[j] = val;
494     }
495   }
496   PetscCall(PetscBTDestroy(&btee));
497   PetscCall(MatSeqAIJRestoreArray(lGe, &vals));
498   PetscCall(MatRestoreRowIJ(lGe, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
499   if (!conforming) {
500     PetscCall(MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done));
501     PetscCall(MatDestroy(&lGt));
502   }
503   PetscCall(MatZeroRows(lGe, cum, marks, 0., NULL, NULL));
504 
505   /* identify splitpoints and corner candidates */
506   PetscCall(MatTranspose(lGe, MAT_INITIAL_MATRIX, &lGt));
507   if (print) {
508     PetscCall(PetscObjectSetName((PetscObject)lGe, "edgerestr_lG"));
509     PetscCall(MatView(lGe, NULL));
510     PetscCall(PetscObjectSetName((PetscObject)lGt, "edgerestr_lGt"));
511     PetscCall(MatView(lGt, NULL));
512   }
513   PetscCall(MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
514   PetscCall(MatSeqAIJGetArray(lGt, &vals));
515   for (i = 0; i < nv; i++) {
516     PetscInt  ord = order, test = ii[i + 1] - ii[i], vc = vcount[i];
517     PetscBool sneighs = PETSC_TRUE, bdir = PETSC_FALSE;
518     if (!order) { /* variable order */
519       PetscReal vorder = 0.;
520 
521       for (j = ii[i]; j < ii[i + 1]; j++) vorder += PetscRealPart(vals[j]);
522       test = PetscFloorReal(vorder + 10. * PETSC_SQRT_MACHINE_EPSILON);
523       PetscCheck(vorder - test <= PETSC_SQRT_MACHINE_EPSILON, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected value for vorder: %g (%" PetscInt_FMT ")", (double)vorder, test);
524       ord = 1;
525     }
526     PetscAssert(test % ord == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected number of edge dofs %" PetscInt_FMT " connected with nodal dof %" PetscInt_FMT " with order %" PetscInt_FMT, test, i, ord);
527     for (j = ii[i]; j < ii[i + 1] && sneighs; j++) {
528       if (PetscBTLookup(btbd, jj[j])) {
529         bdir = PETSC_TRUE;
530         break;
531       }
532       if (vc != ecount[jj[j]]) {
533         sneighs = PETSC_FALSE;
534       } else {
535         PetscInt k, *vn = vneighs[i], *en = eneighs[jj[j]];
536         for (k = 0; k < vc; k++) {
537           if (vn[k] != en[k]) {
538             sneighs = PETSC_FALSE;
539             break;
540           }
541         }
542       }
543     }
544     if (!sneighs || test >= 3 * ord || bdir) { /* splitpoints */
545       if (print) PetscPrintf(PETSC_COMM_SELF, "SPLITPOINT %" PetscInt_FMT " (%s %s %s)\n", i, PetscBools[!sneighs], PetscBools[test >= 3 * ord], PetscBools[bdir]);
546       PetscCall(PetscBTSet(btv, i));
547     } else if (test == ord) {
548       if (order == 1 || (!order && ii[i + 1] - ii[i] == 1)) {
549         if (print) PetscPrintf(PETSC_COMM_SELF, "ENDPOINT %" PetscInt_FMT "\n", i);
550         PetscCall(PetscBTSet(btv, i));
551       } else {
552         if (print) PetscPrintf(PETSC_COMM_SELF, "CORNER CANDIDATE %" PetscInt_FMT "\n", i);
553         PetscCall(PetscBTSet(btvcand, i));
554       }
555     }
556   }
557   PetscCall(ISLocalToGlobalMappingRestoreNodeInfo(el2g, NULL, &ecount, &eneighs));
558   PetscCall(ISLocalToGlobalMappingRestoreNodeInfo(vl2g, NULL, &vcount, &vneighs));
559   PetscCall(PetscBTDestroy(&btbd));
560 
561   /* a candidate is valid if it is connected to another candidate via a non-primal edge dof */
562   if (order != 1) {
563     if (print) PetscPrintf(PETSC_COMM_SELF, "INSPECTING CANDIDATES\n");
564     PetscCall(MatGetRowIJ(lGe, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done));
565     for (i = 0; i < nv; i++) {
566       if (PetscBTLookup(btvcand, i)) {
567         PetscBool found = PETSC_FALSE;
568         for (j = ii[i]; j < ii[i + 1] && !found; j++) {
569           PetscInt k, e = jj[j];
570           if (PetscBTLookup(bte, e)) continue;
571           for (k = iit[e]; k < iit[e + 1]; k++) {
572             PetscInt v = jjt[k];
573             if (v != i && PetscBTLookup(btvcand, v)) {
574               found = PETSC_TRUE;
575               break;
576             }
577           }
578         }
579         if (!found) {
580           if (print) PetscPrintf(PETSC_COMM_SELF, "  CANDIDATE %" PetscInt_FMT " CLEARED\n", i);
581           PetscCall(PetscBTClear(btvcand, i));
582         } else {
583           if (print) PetscPrintf(PETSC_COMM_SELF, "  CANDIDATE %" PetscInt_FMT " ACCEPTED\n", i);
584         }
585       }
586     }
587     PetscCall(MatRestoreRowIJ(lGe, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done));
588   }
589   PetscCall(MatSeqAIJRestoreArray(lGt, &vals));
590   PetscCall(MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
591   PetscCall(MatDestroy(&lGe));
592 
593   /* Get the local G^T explicitly */
594   PetscCall(MatDestroy(&lGt));
595   PetscCall(MatTranspose(lG, MAT_INITIAL_MATRIX, &lGt));
596   PetscCall(MatSetOption(lGt, MAT_KEEP_NONZERO_PATTERN, PETSC_FALSE));
597 
598   /* Mark interior nodal dofs */
599   PetscCall(ISLocalToGlobalMappingGetInfo(vl2g, &n_neigh, &neigh, &n_shared, &shared));
600   PetscCall(PetscBTCreate(nv, &btvi));
601   for (i = 1; i < n_neigh; i++) {
602     for (j = 0; j < n_shared[i]; j++) PetscCall(PetscBTSet(btvi, shared[i][j]));
603   }
604   PetscCall(ISLocalToGlobalMappingRestoreInfo(vl2g, &n_neigh, &neigh, &n_shared, &shared));
605 
606   /* communicate corners and splitpoints */
607   PetscCall(PetscMalloc1(nv, &vmarks));
608   PetscCall(PetscArrayzero(sfvleaves, nv));
609   PetscCall(PetscArrayzero(sfvroots, Lv));
610   for (i = 0; i < nv; i++)
611     if (PetscUnlikely(PetscBTLookup(btv, i))) sfvleaves[i] = 1;
612 
613   if (print) {
614     IS tbz;
615 
616     cum = 0;
617     for (i = 0; i < nv; i++)
618       if (sfvleaves[i]) vmarks[cum++] = i;
619 
620     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, cum, vmarks, PETSC_COPY_VALUES, &tbz));
621     PetscCall(PetscObjectSetName((PetscObject)tbz, "corners_to_be_zeroed_local"));
622     PetscCall(ISView(tbz, NULL));
623     PetscCall(ISDestroy(&tbz));
624   }
625 
626   PetscCall(PetscSFReduceBegin(sfv, MPIU_INT, sfvleaves, sfvroots, MPI_SUM));
627   PetscCall(PetscSFReduceEnd(sfv, MPIU_INT, sfvleaves, sfvroots, MPI_SUM));
628   PetscCall(PetscSFBcastBegin(sfv, MPIU_INT, sfvroots, sfvleaves, MPI_REPLACE));
629   PetscCall(PetscSFBcastEnd(sfv, MPIU_INT, sfvroots, sfvleaves, MPI_REPLACE));
630 
631   /* Zero rows of lGt corresponding to identified corners
632      and interior nodal dofs */
633   cum = 0;
634   for (i = 0; i < nv; i++) {
635     if (sfvleaves[i]) {
636       vmarks[cum++] = i;
637       PetscCall(PetscBTSet(btv, i));
638     }
639     if (!PetscBTLookup(btvi, i)) vmarks[cum++] = i;
640   }
641   PetscCall(PetscBTDestroy(&btvi));
642   if (print) {
643     IS tbz;
644 
645     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, cum, vmarks, PETSC_COPY_VALUES, &tbz));
646     PetscCall(PetscObjectSetName((PetscObject)tbz, "corners_to_be_zeroed_with_interior"));
647     PetscCall(ISView(tbz, NULL));
648     PetscCall(ISDestroy(&tbz));
649   }
650   PetscCall(MatZeroRows(lGt, cum, vmarks, 0., NULL, NULL));
651   PetscCall(PetscFree(vmarks));
652   PetscCall(PetscSFDestroy(&sfv));
653   PetscCall(PetscFree2(sfvleaves, sfvroots));
654 
655   /* Recompute G */
656   PetscCall(MatDestroy(&lG));
657   PetscCall(MatTranspose(lGt, MAT_INITIAL_MATRIX, &lG));
658   if (print) {
659     PetscCall(PetscObjectSetName((PetscObject)lG, "used_lG"));
660     PetscCall(MatView(lG, NULL));
661     PetscCall(PetscObjectSetName((PetscObject)lGt, "used_lGt"));
662     PetscCall(MatView(lGt, NULL));
663   }
664 
665   /* Get primal dofs (if any) */
666   cum = 0;
667   for (i = 0; i < ne; i++) {
668     if (PetscUnlikely(PetscBTLookup(bte, i))) marks[cum++] = i;
669   }
670   if (fl2g) PetscCall(ISLocalToGlobalMappingApply(fl2g, cum, marks, marks));
671   PetscCall(ISCreateGeneral(comm, cum, marks, PETSC_COPY_VALUES, &primals));
672   if (print) {
673     PetscCall(PetscObjectSetName((PetscObject)primals, "prescribed_primal_dofs"));
674     PetscCall(ISView(primals, NULL));
675   }
676   PetscCall(PetscBTDestroy(&bte));
677   /* TODO: what if the user passed in some of them ?  */
678   PetscCall(PCBDDCSetPrimalVerticesLocalIS(pc, primals));
679   PetscCall(ISDestroy(&primals));
680 
681   /* Compute edge connectivity */
682   PetscCall(PetscObjectSetOptionsPrefix((PetscObject)lG, "econn_"));
683 
684   /* Symbolic conn = lG*lGt */
685   PetscCall(MatProductCreate(lG, lGt, NULL, &conn));
686   PetscCall(MatProductSetType(conn, MATPRODUCT_AB));
687   PetscCall(MatProductSetAlgorithm(conn, "default"));
688   PetscCall(MatProductSetFill(conn, PETSC_DEFAULT));
689   PetscCall(PetscObjectSetOptionsPrefix((PetscObject)conn, "econn_"));
690   PetscCall(MatProductSetFromOptions(conn));
691   PetscCall(MatProductSymbolic(conn));
692 
693   PetscCall(MatGetRowIJ(conn, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
694   if (fl2g) {
695     PetscBT   btf;
696     PetscInt *iia, *jja, *iiu, *jju;
697     PetscBool rest = PETSC_FALSE, free = PETSC_FALSE;
698 
699     /* create CSR for all local dofs */
700     PetscCall(PetscMalloc1(n + 1, &iia));
701     if (pcbddc->mat_graph->nvtxs_csr) { /* the user has passed in a CSR graph */
702       PetscCheck(pcbddc->mat_graph->nvtxs_csr == n, PETSC_COMM_SELF, PETSC_ERR_USER, "Invalid size of CSR graph %" PetscInt_FMT ". Should be %" PetscInt_FMT, pcbddc->mat_graph->nvtxs_csr, n);
703       iiu = pcbddc->mat_graph->xadj;
704       jju = pcbddc->mat_graph->adjncy;
705     } else if (pcbddc->use_local_adj) {
706       rest = PETSC_TRUE;
707       PetscCall(MatGetRowIJ(matis->A, 0, PETSC_TRUE, PETSC_FALSE, &i, (const PetscInt **)&iiu, (const PetscInt **)&jju, &done));
708     } else {
709       free = PETSC_TRUE;
710       PetscCall(PetscMalloc2(n + 1, &iiu, n, &jju));
711       iiu[0] = 0;
712       for (i = 0; i < n; i++) {
713         iiu[i + 1] = i + 1;
714         jju[i]     = -1;
715       }
716     }
717 
718     /* import sizes of CSR */
719     iia[0] = 0;
720     for (i = 0; i < n; i++) iia[i + 1] = iiu[i + 1] - iiu[i];
721 
722     /* overwrite entries corresponding to the Nedelec field */
723     PetscCall(PetscBTCreate(n, &btf));
724     PetscCall(ISGetIndices(nedfieldlocal, &idxs));
725     for (i = 0; i < ne; i++) {
726       PetscCall(PetscBTSet(btf, idxs[i]));
727       iia[idxs[i] + 1] = ii[i + 1] - ii[i];
728     }
729 
730     /* iia in CSR */
731     for (i = 0; i < n; i++) iia[i + 1] += iia[i];
732 
733     /* jja in CSR */
734     PetscCall(PetscMalloc1(iia[n], &jja));
735     for (i = 0; i < n; i++)
736       if (!PetscBTLookup(btf, i))
737         for (j = 0; j < iiu[i + 1] - iiu[i]; j++) jja[iia[i] + j] = jju[iiu[i] + j];
738 
739     /* map edge dofs connectivity */
740     if (jj) {
741       PetscCall(ISLocalToGlobalMappingApply(fl2g, ii[ne], jj, (PetscInt *)jj));
742       for (i = 0; i < ne; i++) {
743         PetscInt e = idxs[i];
744         for (j = 0; j < ii[i + 1] - ii[i]; j++) jja[iia[e] + j] = jj[ii[i] + j];
745       }
746     }
747     PetscCall(ISRestoreIndices(nedfieldlocal, &idxs));
748     PetscCall(PCBDDCSetLocalAdjacencyGraph(pc, n, iia, jja, PETSC_OWN_POINTER));
749     if (rest) PetscCall(MatRestoreRowIJ(matis->A, 0, PETSC_TRUE, PETSC_FALSE, &i, (const PetscInt **)&iiu, (const PetscInt **)&jju, &done));
750     if (free) PetscCall(PetscFree2(iiu, jju));
751     PetscCall(PetscBTDestroy(&btf));
752   } else {
753     PetscCall(PCBDDCSetLocalAdjacencyGraph(pc, n, ii, jj, PETSC_USE_POINTER));
754   }
755 
756   /* Analyze interface for edge dofs */
757   PetscCall(PCBDDCAnalyzeInterface(pc));
758   pcbddc->mat_graph->twodim = PETSC_FALSE;
759 
760   /* Get coarse edges in the edge space */
761   PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, NULL, NULL, &nee, &alleedges, &allprimals));
762   PetscCall(MatRestoreRowIJ(conn, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
763 
764   if (fl2g) {
765     PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_DROP, allprimals, &primals));
766     PetscCall(PetscMalloc1(nee, &eedges));
767     for (i = 0; i < nee; i++) PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_DROP, alleedges[i], &eedges[i]));
768   } else {
769     eedges  = alleedges;
770     primals = allprimals;
771   }
772 
773   /* Mark fine edge dofs with their coarse edge id */
774   PetscCall(PetscArrayzero(marks, ne));
775   PetscCall(ISGetLocalSize(primals, &cum));
776   PetscCall(ISGetIndices(primals, &idxs));
777   for (i = 0; i < cum; i++) marks[idxs[i]] = nee + 1;
778   PetscCall(ISRestoreIndices(primals, &idxs));
779   if (print) {
780     PetscCall(PetscObjectSetName((PetscObject)primals, "obtained_primal_dofs"));
781     PetscCall(ISView(primals, NULL));
782   }
783 
784   maxsize = 0;
785   for (i = 0; i < nee; i++) {
786     PetscInt size, mark = i + 1;
787 
788     PetscCall(ISGetLocalSize(eedges[i], &size));
789     PetscCall(ISGetIndices(eedges[i], &idxs));
790     for (j = 0; j < size; j++) marks[idxs[j]] = mark;
791     PetscCall(ISRestoreIndices(eedges[i], &idxs));
792     maxsize = PetscMax(maxsize, size);
793   }
794 
795   /* Find coarse edge endpoints */
796   PetscCall(MatGetRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
797   PetscCall(MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done));
798   for (i = 0; i < nee; i++) {
799     PetscInt mark = i + 1, size;
800 
801     PetscCall(ISGetLocalSize(eedges[i], &size));
802     if (!size && nedfieldlocal) continue;
803     PetscCheck(size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected zero sized edge %" PetscInt_FMT, i);
804     PetscCall(ISGetIndices(eedges[i], &idxs));
805     if (print) {
806       PetscCall(PetscPrintf(PETSC_COMM_SELF, "ENDPOINTS ANALYSIS EDGE %" PetscInt_FMT "\n", i));
807       PetscCall(ISView(eedges[i], NULL));
808     }
809     for (j = 0; j < size; j++) {
810       PetscInt k, ee = idxs[j];
811       if (print) PetscPrintf(PETSC_COMM_SELF, "  idx %" PetscInt_FMT "\n", ee);
812       for (k = ii[ee]; k < ii[ee + 1]; k++) {
813         if (print) PetscPrintf(PETSC_COMM_SELF, "    inspect %" PetscInt_FMT "\n", jj[k]);
814         if (PetscBTLookup(btv, jj[k])) {
815           if (print) PetscPrintf(PETSC_COMM_SELF, "      corner found (already set) %" PetscInt_FMT "\n", jj[k]);
816         } else if (PetscBTLookup(btvcand, jj[k])) { /* is it ok? */
817           PetscInt  k2;
818           PetscBool corner = PETSC_FALSE;
819           for (k2 = iit[jj[k]]; k2 < iit[jj[k] + 1]; k2++) {
820             if (print) PetscPrintf(PETSC_COMM_SELF, "        INSPECTING %" PetscInt_FMT ": mark %" PetscInt_FMT " (ref mark %" PetscInt_FMT "), boundary %d\n", jjt[k2], marks[jjt[k2]], mark, (int)!!PetscBTLookup(btb, jjt[k2]));
821             /* it's a corner if either is connected with an edge dof belonging to a different cc or
822                if the edge dof lie on the natural part of the boundary */
823             if ((marks[jjt[k2]] && marks[jjt[k2]] != mark) || (!marks[jjt[k2]] && PetscBTLookup(btb, jjt[k2]))) {
824               corner = PETSC_TRUE;
825               break;
826             }
827           }
828           if (corner) { /* found the nodal dof corresponding to the endpoint of the edge */
829             if (print) PetscPrintf(PETSC_COMM_SELF, "        corner found %" PetscInt_FMT "\n", jj[k]);
830             PetscCall(PetscBTSet(btv, jj[k]));
831           } else {
832             if (print) PetscPrintf(PETSC_COMM_SELF, "        no corners found\n");
833           }
834         }
835       }
836     }
837     PetscCall(ISRestoreIndices(eedges[i], &idxs));
838   }
839   PetscCall(MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done));
840   PetscCall(MatRestoreRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
841   PetscCall(PetscBTDestroy(&btb));
842 
843   /* Reset marked primal dofs */
844   PetscCall(ISGetLocalSize(primals, &cum));
845   PetscCall(ISGetIndices(primals, &idxs));
846   for (i = 0; i < cum; i++) marks[idxs[i]] = 0;
847   PetscCall(ISRestoreIndices(primals, &idxs));
848 
849   /* Now use the initial lG */
850   PetscCall(MatDestroy(&lG));
851   PetscCall(MatDestroy(&lGt));
852   lG = lGinit;
853   PetscCall(MatTranspose(lG, MAT_INITIAL_MATRIX, &lGt));
854 
855   /* Compute extended cols indices */
856   PetscCall(PetscBTCreate(nv, &btvc));
857   PetscCall(PetscBTCreate(nee, &bter));
858   PetscCall(MatGetRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
859   PetscCall(MatSeqAIJGetMaxRowNonzeros(lG, &i));
860   i *= maxsize;
861   PetscCall(PetscCalloc1(nee, &extcols));
862   PetscCall(PetscMalloc2(i, &extrow, i, &gidxs));
863   eerr = PETSC_FALSE;
864   for (i = 0; i < nee; i++) {
865     PetscInt size, found = 0;
866 
867     cum = 0;
868     PetscCall(ISGetLocalSize(eedges[i], &size));
869     if (!size && nedfieldlocal) continue;
870     PetscCheck(size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected zero sized edge %" PetscInt_FMT, i);
871     PetscCall(ISGetIndices(eedges[i], &idxs));
872     PetscCall(PetscBTMemzero(nv, btvc));
873     for (j = 0; j < size; j++) {
874       PetscInt k, ee = idxs[j];
875       for (k = ii[ee]; k < ii[ee + 1]; k++) {
876         PetscInt vv = jj[k];
877         if (!PetscBTLookup(btv, vv)) extrow[cum++] = vv;
878         else if (!PetscBTLookupSet(btvc, vv)) found++;
879       }
880     }
881     PetscCall(ISRestoreIndices(eedges[i], &idxs));
882     PetscCall(PetscSortRemoveDupsInt(&cum, extrow));
883     PetscCall(ISLocalToGlobalMappingApply(vl2g, cum, extrow, gidxs));
884     PetscCall(PetscSortIntWithArray(cum, gidxs, extrow));
885     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, cum, extrow, PETSC_COPY_VALUES, &extcols[i]));
886     /* it may happen that endpoints are not defined at this point
887        if it is the case, mark this edge for a second pass */
888     if (cum != size - 1 || found != 2) {
889       PetscCall(PetscBTSet(bter, i));
890       if (print) {
891         PetscCall(PetscObjectSetName((PetscObject)eedges[i], "error_edge"));
892         PetscCall(ISView(eedges[i], NULL));
893         PetscCall(PetscObjectSetName((PetscObject)extcols[i], "error_extcol"));
894         PetscCall(ISView(extcols[i], NULL));
895       }
896       eerr = PETSC_TRUE;
897     }
898   }
899   /* PetscCheck(!eerr,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL FIRST PASS"); */
900   PetscCall(MPIU_Allreduce(&eerr, &done, 1, MPIU_BOOL, MPI_LOR, comm));
901   if (done) {
902     PetscInt *newprimals;
903 
904     PetscCall(PetscMalloc1(ne, &newprimals));
905     PetscCall(ISGetLocalSize(primals, &cum));
906     PetscCall(ISGetIndices(primals, &idxs));
907     PetscCall(PetscArraycpy(newprimals, idxs, cum));
908     PetscCall(ISRestoreIndices(primals, &idxs));
909     PetscCall(MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done));
910     if (print) PetscPrintf(PETSC_COMM_SELF, "DOING SECOND PASS (eerr %s)\n", PetscBools[eerr]);
911     for (i = 0; i < nee; i++) {
912       PetscBool has_candidates = PETSC_FALSE;
913       if (PetscBTLookup(bter, i)) {
914         PetscInt size, mark = i + 1;
915 
916         PetscCall(ISGetLocalSize(eedges[i], &size));
917         PetscCall(ISGetIndices(eedges[i], &idxs));
918         /* for (j=0;j<size;j++) newprimals[cum++] = idxs[j]; */
919         for (j = 0; j < size; j++) {
920           PetscInt k, ee = idxs[j];
921           if (print) PetscPrintf(PETSC_COMM_SELF, "Inspecting edge dof %" PetscInt_FMT " [%" PetscInt_FMT " %" PetscInt_FMT ")\n", ee, ii[ee], ii[ee + 1]);
922           for (k = ii[ee]; k < ii[ee + 1]; k++) {
923             /* set all candidates located on the edge as corners */
924             if (PetscBTLookup(btvcand, jj[k])) {
925               PetscInt k2, vv = jj[k];
926               has_candidates = PETSC_TRUE;
927               if (print) PetscPrintf(PETSC_COMM_SELF, "  Candidate set to vertex %" PetscInt_FMT "\n", vv);
928               PetscCall(PetscBTSet(btv, vv));
929               /* set all edge dofs connected to candidate as primals */
930               for (k2 = iit[vv]; k2 < iit[vv + 1]; k2++) {
931                 if (marks[jjt[k2]] == mark) {
932                   PetscInt k3, ee2 = jjt[k2];
933                   if (print) PetscPrintf(PETSC_COMM_SELF, "    Connected edge dof set to primal %" PetscInt_FMT "\n", ee2);
934                   newprimals[cum++] = ee2;
935                   /* finally set the new corners */
936                   for (k3 = ii[ee2]; k3 < ii[ee2 + 1]; k3++) {
937                     if (print) PetscPrintf(PETSC_COMM_SELF, "      Connected nodal dof set to vertex %" PetscInt_FMT "\n", jj[k3]);
938                     PetscCall(PetscBTSet(btv, jj[k3]));
939                   }
940                 }
941               }
942             } else {
943               if (print) PetscPrintf(PETSC_COMM_SELF, "  Not a candidate vertex %" PetscInt_FMT "\n", jj[k]);
944             }
945           }
946         }
947         if (!has_candidates) { /* circular edge */
948           PetscInt k, ee = idxs[0], *tmarks;
949 
950           PetscCall(PetscCalloc1(ne, &tmarks));
951           if (print) PetscPrintf(PETSC_COMM_SELF, "  Circular edge %" PetscInt_FMT "\n", i);
952           for (k = ii[ee]; k < ii[ee + 1]; k++) {
953             PetscInt k2;
954             if (print) PetscPrintf(PETSC_COMM_SELF, "    Set to corner %" PetscInt_FMT "\n", jj[k]);
955             PetscCall(PetscBTSet(btv, jj[k]));
956             for (k2 = iit[jj[k]]; k2 < iit[jj[k] + 1]; k2++) tmarks[jjt[k2]]++;
957           }
958           for (j = 0; j < size; j++) {
959             if (tmarks[idxs[j]] > 1) {
960               if (print) PetscPrintf(PETSC_COMM_SELF, "  Edge dof set to primal %" PetscInt_FMT "\n", idxs[j]);
961               newprimals[cum++] = idxs[j];
962             }
963           }
964           PetscCall(PetscFree(tmarks));
965         }
966         PetscCall(ISRestoreIndices(eedges[i], &idxs));
967       }
968       PetscCall(ISDestroy(&extcols[i]));
969     }
970     PetscCall(PetscFree(extcols));
971     PetscCall(MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done));
972     PetscCall(PetscSortRemoveDupsInt(&cum, newprimals));
973     if (fl2g) {
974       PetscCall(ISLocalToGlobalMappingApply(fl2g, cum, newprimals, newprimals));
975       PetscCall(ISDestroy(&primals));
976       for (i = 0; i < nee; i++) PetscCall(ISDestroy(&eedges[i]));
977       PetscCall(PetscFree(eedges));
978     }
979     PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, NULL, NULL, &nee, &alleedges, &allprimals));
980     PetscCall(ISCreateGeneral(comm, cum, newprimals, PETSC_COPY_VALUES, &primals));
981     PetscCall(PetscFree(newprimals));
982     PetscCall(PCBDDCSetPrimalVerticesLocalIS(pc, primals));
983     PetscCall(ISDestroy(&primals));
984     PetscCall(PCBDDCAnalyzeInterface(pc));
985     pcbddc->mat_graph->twodim = PETSC_FALSE;
986     PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, NULL, NULL, &nee, &alleedges, &allprimals));
987     if (fl2g) {
988       PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_DROP, allprimals, &primals));
989       PetscCall(PetscMalloc1(nee, &eedges));
990       for (i = 0; i < nee; i++) PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_DROP, alleedges[i], &eedges[i]));
991     } else {
992       eedges  = alleedges;
993       primals = allprimals;
994     }
995     PetscCall(PetscCalloc1(nee, &extcols));
996 
997     /* Mark again */
998     PetscCall(PetscArrayzero(marks, ne));
999     for (i = 0; i < nee; i++) {
1000       PetscInt size, mark = i + 1;
1001 
1002       PetscCall(ISGetLocalSize(eedges[i], &size));
1003       PetscCall(ISGetIndices(eedges[i], &idxs));
1004       for (j = 0; j < size; j++) marks[idxs[j]] = mark;
1005       PetscCall(ISRestoreIndices(eedges[i], &idxs));
1006     }
1007     if (print) {
1008       PetscCall(PetscObjectSetName((PetscObject)primals, "obtained_primal_dofs_secondpass"));
1009       PetscCall(ISView(primals, NULL));
1010     }
1011 
1012     /* Recompute extended cols */
1013     eerr = PETSC_FALSE;
1014     for (i = 0; i < nee; i++) {
1015       PetscInt size;
1016 
1017       cum = 0;
1018       PetscCall(ISGetLocalSize(eedges[i], &size));
1019       if (!size && nedfieldlocal) continue;
1020       PetscCheck(size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected zero sized edge %" PetscInt_FMT, i);
1021       PetscCall(ISGetIndices(eedges[i], &idxs));
1022       for (j = 0; j < size; j++) {
1023         PetscInt k, ee = idxs[j];
1024         for (k = ii[ee]; k < ii[ee + 1]; k++)
1025           if (!PetscBTLookup(btv, jj[k])) extrow[cum++] = jj[k];
1026       }
1027       PetscCall(ISRestoreIndices(eedges[i], &idxs));
1028       PetscCall(PetscSortRemoveDupsInt(&cum, extrow));
1029       PetscCall(ISLocalToGlobalMappingApply(vl2g, cum, extrow, gidxs));
1030       PetscCall(PetscSortIntWithArray(cum, gidxs, extrow));
1031       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, cum, extrow, PETSC_COPY_VALUES, &extcols[i]));
1032       if (cum != size - 1) {
1033         if (print) {
1034           PetscCall(PetscObjectSetName((PetscObject)eedges[i], "error_edge_secondpass"));
1035           PetscCall(ISView(eedges[i], NULL));
1036           PetscCall(PetscObjectSetName((PetscObject)extcols[i], "error_extcol_secondpass"));
1037           PetscCall(ISView(extcols[i], NULL));
1038         }
1039         eerr = PETSC_TRUE;
1040       }
1041     }
1042   }
1043   PetscCall(MatRestoreRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
1044   PetscCall(PetscFree2(extrow, gidxs));
1045   PetscCall(PetscBTDestroy(&bter));
1046   if (print) PetscCall(PCBDDCGraphASCIIView(pcbddc->mat_graph, 5, PETSC_VIEWER_STDOUT_SELF));
1047   /* an error should not occur at this point */
1048   PetscCheck(!eerr, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected SIZE OF EDGE > EXTCOL SECOND PASS");
1049 
1050   /* Check the number of endpoints */
1051   PetscCall(MatGetRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
1052   PetscCall(PetscMalloc1(2 * nee, &corners));
1053   PetscCall(PetscMalloc1(nee, &cedges));
1054   for (i = 0; i < nee; i++) {
1055     PetscInt size, found = 0, gc[2];
1056 
1057     /* init with defaults */
1058     cedges[i] = corners[i * 2] = corners[i * 2 + 1] = -1;
1059     PetscCall(ISGetLocalSize(eedges[i], &size));
1060     if (!size && nedfieldlocal) continue;
1061     PetscCheck(size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected zero sized edge %" PetscInt_FMT, i);
1062     PetscCall(ISGetIndices(eedges[i], &idxs));
1063     PetscCall(PetscBTMemzero(nv, btvc));
1064     for (j = 0; j < size; j++) {
1065       PetscInt k, ee = idxs[j];
1066       for (k = ii[ee]; k < ii[ee + 1]; k++) {
1067         PetscInt vv = jj[k];
1068         if (PetscBTLookup(btv, vv) && !PetscBTLookupSet(btvc, vv)) {
1069           PetscCheck(found != 2, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Found more then two corners for edge %" PetscInt_FMT, i);
1070           corners[i * 2 + found++] = vv;
1071         }
1072       }
1073     }
1074     if (found != 2) {
1075       PetscInt e;
1076       if (fl2g) {
1077         PetscCall(ISLocalToGlobalMappingApply(fl2g, 1, idxs, &e));
1078       } else {
1079         e = idxs[0];
1080       }
1081       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_PLIB, "Found %" PetscInt_FMT " corners for edge %" PetscInt_FMT " (astart %" PetscInt_FMT ", estart %" PetscInt_FMT ")", found, i, e, idxs[0]);
1082     }
1083 
1084     /* get primal dof index on this coarse edge */
1085     PetscCall(ISLocalToGlobalMappingApply(vl2g, 2, corners + 2 * i, gc));
1086     if (gc[0] > gc[1]) {
1087       PetscInt swap      = corners[2 * i];
1088       corners[2 * i]     = corners[2 * i + 1];
1089       corners[2 * i + 1] = swap;
1090     }
1091     cedges[i] = idxs[size - 1];
1092     PetscCall(ISRestoreIndices(eedges[i], &idxs));
1093     if (print) PetscPrintf(PETSC_COMM_SELF, "EDGE %" PetscInt_FMT ": ce %" PetscInt_FMT ", corners (%" PetscInt_FMT ",%" PetscInt_FMT ")\n", i, cedges[i], corners[2 * i], corners[2 * i + 1]);
1094   }
1095   PetscCall(MatRestoreRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
1096   PetscCall(PetscBTDestroy(&btvc));
1097 
1098   if (PetscDefined(USE_DEBUG)) {
1099     /* Inspects columns of lG (rows of lGt) and make sure the change of basis will
1100      not interfere with neighbouring coarse edges */
1101     PetscCall(PetscMalloc1(nee + 1, &emarks));
1102     PetscCall(MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
1103     for (i = 0; i < nv; i++) {
1104       PetscInt emax = 0, eemax = 0;
1105 
1106       if (ii[i + 1] == ii[i] || PetscBTLookup(btv, i)) continue;
1107       PetscCall(PetscArrayzero(emarks, nee + 1));
1108       for (j = ii[i]; j < ii[i + 1]; j++) emarks[marks[jj[j]]]++;
1109       for (j = 1; j < nee + 1; j++) {
1110         if (emax < emarks[j]) {
1111           emax  = emarks[j];
1112           eemax = j;
1113         }
1114       }
1115       /* not relevant for edges */
1116       if (!eemax) continue;
1117 
1118       for (j = ii[i]; j < ii[i + 1]; j++) {
1119         PetscCheck(!marks[jj[j]] || marks[jj[j]] == eemax, PETSC_COMM_SELF, PETSC_ERR_SUP, "Found 2 coarse edges (id %" PetscInt_FMT " and %" PetscInt_FMT ") connected through the %" PetscInt_FMT " nodal dof at edge dof %" PetscInt_FMT, marks[jj[j]] - 1, eemax, i, jj[j]);
1120       }
1121     }
1122     PetscCall(PetscFree(emarks));
1123     PetscCall(MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
1124   }
1125 
1126   /* Compute extended rows indices for edge blocks of the change of basis */
1127   PetscCall(MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
1128   PetscCall(MatSeqAIJGetMaxRowNonzeros(lGt, &extmem));
1129   extmem *= maxsize;
1130   PetscCall(PetscMalloc1(extmem * nee, &extrow));
1131   PetscCall(PetscMalloc1(nee, &extrows));
1132   PetscCall(PetscCalloc1(nee, &extrowcum));
1133   for (i = 0; i < nv; i++) {
1134     PetscInt mark = 0, size, start;
1135 
1136     if (ii[i + 1] == ii[i] || PetscBTLookup(btv, i)) continue;
1137     for (j = ii[i]; j < ii[i + 1]; j++)
1138       if (marks[jj[j]] && !mark) mark = marks[jj[j]];
1139 
1140     /* not relevant */
1141     if (!mark) continue;
1142 
1143     /* import extended row */
1144     mark--;
1145     start = mark * extmem + extrowcum[mark];
1146     size  = ii[i + 1] - ii[i];
1147     PetscCheck(extrowcum[mark] + size <= extmem, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Not enough memory allocated %" PetscInt_FMT " > %" PetscInt_FMT, extrowcum[mark] + size, extmem);
1148     PetscCall(PetscArraycpy(extrow + start, jj + ii[i], size));
1149     extrowcum[mark] += size;
1150   }
1151   PetscCall(MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
1152   PetscCall(MatDestroy(&lGt));
1153   PetscCall(PetscFree(marks));
1154 
1155   /* Compress extrows */
1156   cum = 0;
1157   for (i = 0; i < nee; i++) {
1158     PetscInt size = extrowcum[i], *start = extrow + i * extmem;
1159     PetscCall(PetscSortRemoveDupsInt(&size, start));
1160     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, size, start, PETSC_USE_POINTER, &extrows[i]));
1161     cum = PetscMax(cum, size);
1162   }
1163   PetscCall(PetscFree(extrowcum));
1164   PetscCall(PetscBTDestroy(&btv));
1165   PetscCall(PetscBTDestroy(&btvcand));
1166 
1167   /* Workspace for lapack inner calls and VecSetValues */
1168   PetscCall(PetscMalloc2((5 + cum + maxsize) * maxsize, &work, maxsize, &rwork));
1169 
1170   /* Create change of basis matrix (preallocation can be improved) */
1171   PetscCall(MatCreate(comm, &T));
1172   PetscCall(MatSetSizes(T, pc->pmat->rmap->n, pc->pmat->rmap->n, pc->pmat->rmap->N, pc->pmat->rmap->N));
1173   PetscCall(MatSetType(T, MATAIJ));
1174   PetscCall(MatSeqAIJSetPreallocation(T, 10, NULL));
1175   PetscCall(MatMPIAIJSetPreallocation(T, 10, NULL, 10, NULL));
1176   PetscCall(MatSetLocalToGlobalMapping(T, al2g, al2g));
1177   PetscCall(MatSetOption(T, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE));
1178   PetscCall(MatSetOption(T, MAT_ROW_ORIENTED, PETSC_FALSE));
1179   PetscCall(ISLocalToGlobalMappingDestroy(&al2g));
1180 
1181   /* Defaults to identity */
1182   PetscCall(MatCreateVecs(pc->pmat, &tvec, NULL));
1183   PetscCall(VecSet(tvec, 1.0));
1184   PetscCall(MatDiagonalSet(T, tvec, INSERT_VALUES));
1185   PetscCall(VecDestroy(&tvec));
1186 
1187   /* Create discrete gradient for the coarser level if needed */
1188   PetscCall(MatDestroy(&pcbddc->nedcG));
1189   PetscCall(ISDestroy(&pcbddc->nedclocal));
1190   if (pcbddc->current_level < pcbddc->max_levels) {
1191     ISLocalToGlobalMapping cel2g, cvl2g;
1192     IS                     wis, gwis;
1193     PetscInt               cnv, cne;
1194 
1195     PetscCall(ISCreateGeneral(comm, nee, cedges, PETSC_COPY_VALUES, &wis));
1196     if (fl2g) {
1197       PetscCall(ISLocalToGlobalMappingApplyIS(fl2g, wis, &pcbddc->nedclocal));
1198     } else {
1199       PetscCall(PetscObjectReference((PetscObject)wis));
1200       pcbddc->nedclocal = wis;
1201     }
1202     PetscCall(ISLocalToGlobalMappingApplyIS(el2g, wis, &gwis));
1203     PetscCall(ISDestroy(&wis));
1204     PetscCall(ISRenumber(gwis, NULL, &cne, &wis));
1205     PetscCall(ISLocalToGlobalMappingCreateIS(wis, &cel2g));
1206     PetscCall(ISDestroy(&wis));
1207     PetscCall(ISDestroy(&gwis));
1208 
1209     PetscCall(ISCreateGeneral(comm, 2 * nee, corners, PETSC_USE_POINTER, &wis));
1210     PetscCall(ISLocalToGlobalMappingApplyIS(vl2g, wis, &gwis));
1211     PetscCall(ISDestroy(&wis));
1212     PetscCall(ISRenumber(gwis, NULL, &cnv, &wis));
1213     PetscCall(ISLocalToGlobalMappingCreateIS(wis, &cvl2g));
1214     PetscCall(ISDestroy(&wis));
1215     PetscCall(ISDestroy(&gwis));
1216 
1217     PetscCall(MatCreate(comm, &pcbddc->nedcG));
1218     PetscCall(MatSetSizes(pcbddc->nedcG, PETSC_DECIDE, PETSC_DECIDE, cne, cnv));
1219     PetscCall(MatSetType(pcbddc->nedcG, MATAIJ));
1220     PetscCall(MatSeqAIJSetPreallocation(pcbddc->nedcG, 2, NULL));
1221     PetscCall(MatMPIAIJSetPreallocation(pcbddc->nedcG, 2, NULL, 2, NULL));
1222     PetscCall(MatSetLocalToGlobalMapping(pcbddc->nedcG, cel2g, cvl2g));
1223     PetscCall(ISLocalToGlobalMappingDestroy(&cel2g));
1224     PetscCall(ISLocalToGlobalMappingDestroy(&cvl2g));
1225   }
1226   PetscCall(ISLocalToGlobalMappingDestroy(&vl2g));
1227 
1228 #if defined(PRINT_GDET)
1229   inc = 0;
1230   lev = pcbddc->current_level;
1231 #endif
1232 
1233   /* Insert values in the change of basis matrix */
1234   for (i = 0; i < nee; i++) {
1235     Mat         Gins = NULL, GKins = NULL;
1236     IS          cornersis = NULL;
1237     PetscScalar cvals[2];
1238 
1239     if (pcbddc->nedcG) PetscCall(ISCreateGeneral(PETSC_COMM_SELF, 2, corners + 2 * i, PETSC_USE_POINTER, &cornersis));
1240     PetscCall(PCBDDCComputeNedelecChangeEdge(lG, eedges[i], extrows[i], extcols[i], cornersis, &Gins, &GKins, cvals, work, rwork));
1241     if (Gins && GKins) {
1242       const PetscScalar *data;
1243       const PetscInt    *rows, *cols;
1244       PetscInt           nrh, nch, nrc, ncc;
1245 
1246       PetscCall(ISGetIndices(eedges[i], &cols));
1247       /* H1 */
1248       PetscCall(ISGetIndices(extrows[i], &rows));
1249       PetscCall(MatGetSize(Gins, &nrh, &nch));
1250       PetscCall(MatDenseGetArrayRead(Gins, &data));
1251       PetscCall(MatSetValuesLocal(T, nrh, rows, nch, cols, data, INSERT_VALUES));
1252       PetscCall(MatDenseRestoreArrayRead(Gins, &data));
1253       PetscCall(ISRestoreIndices(extrows[i], &rows));
1254       /* complement */
1255       PetscCall(MatGetSize(GKins, &nrc, &ncc));
1256       PetscCheck(ncc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Constant function has not been generated for coarse edge %" PetscInt_FMT, i);
1257       PetscCheck(ncc + nch == nrc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "The sum of the number of columns of GKins %" PetscInt_FMT " and Gins %" PetscInt_FMT " does not match %" PetscInt_FMT " for coarse edge %" PetscInt_FMT, ncc, nch, nrc, i);
1258       PetscCheck(ncc == 1 || !pcbddc->nedcG, PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot generate the coarse discrete gradient for coarse edge %" PetscInt_FMT " with ncc %" PetscInt_FMT, i, ncc);
1259       PetscCall(MatDenseGetArrayRead(GKins, &data));
1260       PetscCall(MatSetValuesLocal(T, nrc, cols, ncc, cols + nch, data, INSERT_VALUES));
1261       PetscCall(MatDenseRestoreArrayRead(GKins, &data));
1262 
1263       /* coarse discrete gradient */
1264       if (pcbddc->nedcG) {
1265         PetscInt cols[2];
1266 
1267         cols[0] = 2 * i;
1268         cols[1] = 2 * i + 1;
1269         PetscCall(MatSetValuesLocal(pcbddc->nedcG, 1, &i, 2, cols, cvals, INSERT_VALUES));
1270       }
1271       PetscCall(ISRestoreIndices(eedges[i], &cols));
1272     }
1273     PetscCall(ISDestroy(&extrows[i]));
1274     PetscCall(ISDestroy(&extcols[i]));
1275     PetscCall(ISDestroy(&cornersis));
1276     PetscCall(MatDestroy(&Gins));
1277     PetscCall(MatDestroy(&GKins));
1278   }
1279   PetscCall(ISLocalToGlobalMappingDestroy(&el2g));
1280 
1281   /* Start assembling */
1282   PetscCall(MatAssemblyBegin(T, MAT_FINAL_ASSEMBLY));
1283   if (pcbddc->nedcG) PetscCall(MatAssemblyBegin(pcbddc->nedcG, MAT_FINAL_ASSEMBLY));
1284 
1285   /* Free */
1286   if (fl2g) {
1287     PetscCall(ISDestroy(&primals));
1288     for (i = 0; i < nee; i++) PetscCall(ISDestroy(&eedges[i]));
1289     PetscCall(PetscFree(eedges));
1290   }
1291 
1292   /* hack mat_graph with primal dofs on the coarse edges */
1293   {
1294     PCBDDCGraph graph  = pcbddc->mat_graph;
1295     PetscInt   *oqueue = graph->queue;
1296     PetscInt   *ocptr  = graph->cptr;
1297     PetscInt    ncc, *idxs;
1298 
1299     /* find first primal edge */
1300     if (pcbddc->nedclocal) {
1301       PetscCall(ISGetIndices(pcbddc->nedclocal, (const PetscInt **)&idxs));
1302     } else {
1303       if (fl2g) PetscCall(ISLocalToGlobalMappingApply(fl2g, nee, cedges, cedges));
1304       idxs = cedges;
1305     }
1306     cum = 0;
1307     while (cum < nee && cedges[cum] < 0) cum++;
1308 
1309     /* adapt connected components */
1310     PetscCall(PetscMalloc2(graph->nvtxs + 1, &graph->cptr, ocptr[graph->ncc], &graph->queue));
1311     graph->cptr[0] = 0;
1312     for (i = 0, ncc = 0; i < graph->ncc; i++) {
1313       PetscInt lc = ocptr[i + 1] - ocptr[i];
1314       if (cum != nee && oqueue[ocptr[i + 1] - 1] == cedges[cum]) { /* this cc has a primal dof */
1315         graph->cptr[ncc + 1]           = graph->cptr[ncc] + 1;
1316         graph->queue[graph->cptr[ncc]] = cedges[cum];
1317         ncc++;
1318         lc--;
1319         cum++;
1320         while (cum < nee && cedges[cum] < 0) cum++;
1321       }
1322       graph->cptr[ncc + 1] = graph->cptr[ncc] + lc;
1323       for (j = 0; j < lc; j++) graph->queue[graph->cptr[ncc] + j] = oqueue[ocptr[i] + j];
1324       ncc++;
1325     }
1326     graph->ncc = ncc;
1327     if (pcbddc->nedclocal) PetscCall(ISRestoreIndices(pcbddc->nedclocal, (const PetscInt **)&idxs));
1328     PetscCall(PetscFree2(ocptr, oqueue));
1329   }
1330   PetscCall(ISLocalToGlobalMappingDestroy(&fl2g));
1331   PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, NULL, NULL, &nee, &alleedges, &allprimals));
1332   PetscCall(PCBDDCGraphResetCSR(pcbddc->mat_graph));
1333   PetscCall(MatDestroy(&conn));
1334 
1335   PetscCall(ISDestroy(&nedfieldlocal));
1336   PetscCall(PetscFree(extrow));
1337   PetscCall(PetscFree2(work, rwork));
1338   PetscCall(PetscFree(corners));
1339   PetscCall(PetscFree(cedges));
1340   PetscCall(PetscFree(extrows));
1341   PetscCall(PetscFree(extcols));
1342   PetscCall(MatDestroy(&lG));
1343 
1344   /* Complete assembling */
1345   PetscCall(MatAssemblyEnd(T, MAT_FINAL_ASSEMBLY));
1346   if (pcbddc->nedcG) {
1347     PetscCall(MatAssemblyEnd(pcbddc->nedcG, MAT_FINAL_ASSEMBLY));
1348 #if 0
1349     PetscCall(PetscObjectSetName((PetscObject)pcbddc->nedcG,"coarse_G"));
1350     PetscCall(MatView(pcbddc->nedcG,NULL));
1351 #endif
1352   }
1353 
1354   /* set change of basis */
1355   PetscCall(PCBDDCSetChangeOfBasisMat(pc, T, singular));
1356   PetscCall(MatDestroy(&T));
1357 
1358   PetscFunctionReturn(0);
1359 }
1360 
1361 /* the near-null space of BDDC carries information on quadrature weights,
1362    and these can be collinear -> so cheat with MatNullSpaceCreate
1363    and create a suitable set of basis vectors first */
1364 PetscErrorCode PCBDDCNullSpaceCreate(MPI_Comm comm, PetscBool has_const, PetscInt nvecs, Vec quad_vecs[], MatNullSpace *nnsp)
1365 {
1366   PetscInt i;
1367 
1368   PetscFunctionBegin;
1369   for (i = 0; i < nvecs; i++) {
1370     PetscInt first, last;
1371 
1372     PetscCall(VecGetOwnershipRange(quad_vecs[i], &first, &last));
1373     PetscCheck(last - first >= 2 * nvecs || !has_const, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not implemented");
1374     if (i >= first && i < last) {
1375       PetscScalar *data;
1376       PetscCall(VecGetArray(quad_vecs[i], &data));
1377       if (!has_const) {
1378         data[i - first] = 1.;
1379       } else {
1380         data[2 * i - first]     = 1. / PetscSqrtReal(2.);
1381         data[2 * i - first + 1] = -1. / PetscSqrtReal(2.);
1382       }
1383       PetscCall(VecRestoreArray(quad_vecs[i], &data));
1384     }
1385     PetscCall(PetscObjectStateIncrease((PetscObject)quad_vecs[i]));
1386   }
1387   PetscCall(MatNullSpaceCreate(comm, has_const, nvecs, quad_vecs, nnsp));
1388   for (i = 0; i < nvecs; i++) { /* reset vectors */
1389     PetscInt first, last;
1390     PetscCall(VecLockReadPop(quad_vecs[i]));
1391     PetscCall(VecGetOwnershipRange(quad_vecs[i], &first, &last));
1392     if (i >= first && i < last) {
1393       PetscScalar *data;
1394       PetscCall(VecGetArray(quad_vecs[i], &data));
1395       if (!has_const) {
1396         data[i - first] = 0.;
1397       } else {
1398         data[2 * i - first]     = 0.;
1399         data[2 * i - first + 1] = 0.;
1400       }
1401       PetscCall(VecRestoreArray(quad_vecs[i], &data));
1402     }
1403     PetscCall(PetscObjectStateIncrease((PetscObject)quad_vecs[i]));
1404     PetscCall(VecLockReadPush(quad_vecs[i]));
1405   }
1406   PetscFunctionReturn(0);
1407 }
1408 
1409 PetscErrorCode PCBDDCComputeNoNetFlux(Mat A, Mat divudotp, PetscBool transpose, IS vl2l, PCBDDCGraph graph, MatNullSpace *nnsp)
1410 {
1411   Mat                    loc_divudotp;
1412   Vec                    p, v, vins, quad_vec, *quad_vecs;
1413   ISLocalToGlobalMapping map;
1414   PetscScalar           *vals;
1415   const PetscScalar     *array;
1416   PetscInt               i, maxneighs = 0, maxsize, *gidxs;
1417   PetscInt               n_neigh, *neigh, *n_shared, **shared;
1418   PetscMPIInt            rank;
1419 
1420   PetscFunctionBegin;
1421   PetscCall(ISLocalToGlobalMappingGetInfo(graph->l2gmap, &n_neigh, &neigh, &n_shared, &shared));
1422   for (i = 0; i < n_neigh; i++) maxneighs = PetscMax(graph->count[shared[i][0]] + 1, maxneighs);
1423   PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &maxneighs, 1, MPIU_INT, MPI_MAX, PetscObjectComm((PetscObject)A)));
1424   if (!maxneighs) {
1425     PetscCall(ISLocalToGlobalMappingRestoreInfo(graph->l2gmap, &n_neigh, &neigh, &n_shared, &shared));
1426     *nnsp = NULL;
1427     PetscFunctionReturn(0);
1428   }
1429   maxsize = 0;
1430   for (i = 0; i < n_neigh; i++) maxsize = PetscMax(n_shared[i], maxsize);
1431   PetscCall(PetscMalloc2(maxsize, &gidxs, maxsize, &vals));
1432   /* create vectors to hold quadrature weights */
1433   PetscCall(MatCreateVecs(A, &quad_vec, NULL));
1434   if (!transpose) {
1435     PetscCall(MatISGetLocalToGlobalMapping(A, &map, NULL));
1436   } else {
1437     PetscCall(MatISGetLocalToGlobalMapping(A, NULL, &map));
1438   }
1439   PetscCall(VecDuplicateVecs(quad_vec, maxneighs, &quad_vecs));
1440   PetscCall(VecDestroy(&quad_vec));
1441   PetscCall(PCBDDCNullSpaceCreate(PetscObjectComm((PetscObject)A), PETSC_FALSE, maxneighs, quad_vecs, nnsp));
1442   for (i = 0; i < maxneighs; i++) PetscCall(VecLockReadPop(quad_vecs[i]));
1443 
1444   /* compute local quad vec */
1445   PetscCall(MatISGetLocalMat(divudotp, &loc_divudotp));
1446   if (!transpose) {
1447     PetscCall(MatCreateVecs(loc_divudotp, &v, &p));
1448   } else {
1449     PetscCall(MatCreateVecs(loc_divudotp, &p, &v));
1450   }
1451   PetscCall(VecSet(p, 1.));
1452   if (!transpose) {
1453     PetscCall(MatMultTranspose(loc_divudotp, p, v));
1454   } else {
1455     PetscCall(MatMult(loc_divudotp, p, v));
1456   }
1457   if (vl2l) {
1458     Mat        lA;
1459     VecScatter sc;
1460 
1461     PetscCall(MatISGetLocalMat(A, &lA));
1462     PetscCall(MatCreateVecs(lA, &vins, NULL));
1463     PetscCall(VecScatterCreate(v, NULL, vins, vl2l, &sc));
1464     PetscCall(VecScatterBegin(sc, v, vins, INSERT_VALUES, SCATTER_FORWARD));
1465     PetscCall(VecScatterEnd(sc, v, vins, INSERT_VALUES, SCATTER_FORWARD));
1466     PetscCall(VecScatterDestroy(&sc));
1467   } else {
1468     vins = v;
1469   }
1470   PetscCall(VecGetArrayRead(vins, &array));
1471   PetscCall(VecDestroy(&p));
1472 
1473   /* insert in global quadrature vecs */
1474   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank));
1475   for (i = 1; i < n_neigh; i++) {
1476     const PetscInt *idxs;
1477     PetscInt        idx, nn, j;
1478 
1479     idxs = shared[i];
1480     nn   = n_shared[i];
1481     for (j = 0; j < nn; j++) vals[j] = array[idxs[j]];
1482     PetscCall(PetscFindInt(rank, graph->count[idxs[0]], graph->neighbours_set[idxs[0]], &idx));
1483     idx = -(idx + 1);
1484     PetscCheck(idx >= 0 && idx < maxneighs, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Invalid index %" PetscInt_FMT " not in [0,%" PetscInt_FMT ")", idx, maxneighs);
1485     PetscCall(ISLocalToGlobalMappingApply(map, nn, idxs, gidxs));
1486     PetscCall(VecSetValues(quad_vecs[idx], nn, gidxs, vals, INSERT_VALUES));
1487   }
1488   PetscCall(ISLocalToGlobalMappingRestoreInfo(graph->l2gmap, &n_neigh, &neigh, &n_shared, &shared));
1489   PetscCall(VecRestoreArrayRead(vins, &array));
1490   if (vl2l) PetscCall(VecDestroy(&vins));
1491   PetscCall(VecDestroy(&v));
1492   PetscCall(PetscFree2(gidxs, vals));
1493 
1494   /* assemble near null space */
1495   for (i = 0; i < maxneighs; i++) PetscCall(VecAssemblyBegin(quad_vecs[i]));
1496   for (i = 0; i < maxneighs; i++) {
1497     PetscCall(VecAssemblyEnd(quad_vecs[i]));
1498     PetscCall(VecViewFromOptions(quad_vecs[i], NULL, "-pc_bddc_quad_vecs_view"));
1499     PetscCall(VecLockReadPush(quad_vecs[i]));
1500   }
1501   PetscCall(VecDestroyVecs(maxneighs, &quad_vecs));
1502   PetscFunctionReturn(0);
1503 }
1504 
1505 PetscErrorCode PCBDDCAddPrimalVerticesLocalIS(PC pc, IS primalv)
1506 {
1507   PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
1508 
1509   PetscFunctionBegin;
1510   if (primalv) {
1511     if (pcbddc->user_primal_vertices_local) {
1512       IS list[2], newp;
1513 
1514       list[0] = primalv;
1515       list[1] = pcbddc->user_primal_vertices_local;
1516       PetscCall(ISConcatenate(PetscObjectComm((PetscObject)pc), 2, list, &newp));
1517       PetscCall(ISSortRemoveDups(newp));
1518       PetscCall(ISDestroy(&list[1]));
1519       pcbddc->user_primal_vertices_local = newp;
1520     } else {
1521       PetscCall(PCBDDCSetPrimalVerticesLocalIS(pc, primalv));
1522     }
1523   }
1524   PetscFunctionReturn(0);
1525 }
1526 
1527 static PetscErrorCode func_coords_private(PetscInt dim, PetscReal t, const PetscReal X[], PetscInt Nf, PetscScalar *out, void *ctx)
1528 {
1529   PetscInt f, *comp = (PetscInt *)ctx;
1530 
1531   PetscFunctionBegin;
1532   for (f = 0; f < Nf; f++) out[f] = X[*comp];
1533   PetscFunctionReturn(0);
1534 }
1535 
1536 PetscErrorCode PCBDDCComputeLocalTopologyInfo(PC pc)
1537 {
1538   Vec       local, global;
1539   PC_BDDC  *pcbddc     = (PC_BDDC *)pc->data;
1540   Mat_IS   *matis      = (Mat_IS *)pc->pmat->data;
1541   PetscBool monolithic = PETSC_FALSE;
1542 
1543   PetscFunctionBegin;
1544   PetscOptionsBegin(PetscObjectComm((PetscObject)pc), ((PetscObject)pc)->prefix, "BDDC topology options", "PC");
1545   PetscCall(PetscOptionsBool("-pc_bddc_monolithic", "Discard any information on dofs splitting", NULL, monolithic, &monolithic, NULL));
1546   PetscOptionsEnd();
1547   /* need to convert from global to local topology information and remove references to information in global ordering */
1548   PetscCall(MatCreateVecs(pc->pmat, &global, NULL));
1549   PetscCall(MatCreateVecs(matis->A, &local, NULL));
1550   PetscCall(VecBindToCPU(global, PETSC_TRUE));
1551   PetscCall(VecBindToCPU(local, PETSC_TRUE));
1552   if (monolithic) { /* just get block size to properly compute vertices */
1553     if (pcbddc->vertex_size == 1) PetscCall(MatGetBlockSize(pc->pmat, &pcbddc->vertex_size));
1554     goto boundary;
1555   }
1556 
1557   if (pcbddc->user_provided_isfordofs) {
1558     if (pcbddc->n_ISForDofs) {
1559       PetscInt i;
1560 
1561       PetscCall(PetscMalloc1(pcbddc->n_ISForDofs, &pcbddc->ISForDofsLocal));
1562       for (i = 0; i < pcbddc->n_ISForDofs; i++) {
1563         PetscInt bs;
1564 
1565         PetscCall(PCBDDCGlobalToLocal(matis->rctx, global, local, pcbddc->ISForDofs[i], &pcbddc->ISForDofsLocal[i]));
1566         PetscCall(ISGetBlockSize(pcbddc->ISForDofs[i], &bs));
1567         PetscCall(ISSetBlockSize(pcbddc->ISForDofsLocal[i], bs));
1568         PetscCall(ISDestroy(&pcbddc->ISForDofs[i]));
1569       }
1570       pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs;
1571       pcbddc->n_ISForDofs      = 0;
1572       PetscCall(PetscFree(pcbddc->ISForDofs));
1573     }
1574   } else {
1575     if (!pcbddc->n_ISForDofsLocal) { /* field split not present */
1576       DM dm;
1577 
1578       PetscCall(MatGetDM(pc->pmat, &dm));
1579       if (!dm) PetscCall(PCGetDM(pc, &dm));
1580       if (dm) {
1581         IS      *fields;
1582         PetscInt nf, i;
1583 
1584         PetscCall(DMCreateFieldDecomposition(dm, &nf, NULL, &fields, NULL));
1585         PetscCall(PetscMalloc1(nf, &pcbddc->ISForDofsLocal));
1586         for (i = 0; i < nf; i++) {
1587           PetscInt bs;
1588 
1589           PetscCall(PCBDDCGlobalToLocal(matis->rctx, global, local, fields[i], &pcbddc->ISForDofsLocal[i]));
1590           PetscCall(ISGetBlockSize(fields[i], &bs));
1591           PetscCall(ISSetBlockSize(pcbddc->ISForDofsLocal[i], bs));
1592           PetscCall(ISDestroy(&fields[i]));
1593         }
1594         PetscCall(PetscFree(fields));
1595         pcbddc->n_ISForDofsLocal = nf;
1596       } else { /* See if MATIS has fields attached by the conversion from MatNest */
1597         PetscContainer c;
1598 
1599         PetscCall(PetscObjectQuery((PetscObject)pc->pmat, "_convert_nest_lfields", (PetscObject *)&c));
1600         if (c) {
1601           MatISLocalFields lf;
1602           PetscCall(PetscContainerGetPointer(c, (void **)&lf));
1603           PetscCall(PCBDDCSetDofsSplittingLocal(pc, lf->nr, lf->rf));
1604         } else { /* fallback, create the default fields if bs > 1 */
1605           PetscInt i, n = matis->A->rmap->n;
1606           PetscCall(MatGetBlockSize(pc->pmat, &i));
1607           if (i > 1) {
1608             pcbddc->n_ISForDofsLocal = i;
1609             PetscCall(PetscMalloc1(pcbddc->n_ISForDofsLocal, &pcbddc->ISForDofsLocal));
1610             for (i = 0; i < pcbddc->n_ISForDofsLocal; i++) PetscCall(ISCreateStride(PetscObjectComm((PetscObject)pc), n / pcbddc->n_ISForDofsLocal, i, pcbddc->n_ISForDofsLocal, &pcbddc->ISForDofsLocal[i]));
1611           }
1612         }
1613       }
1614     } else {
1615       PetscInt i;
1616       for (i = 0; i < pcbddc->n_ISForDofsLocal; i++) PetscCall(PCBDDCConsistencyCheckIS(pc, MPI_LAND, &pcbddc->ISForDofsLocal[i]));
1617     }
1618   }
1619 
1620 boundary:
1621   if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) {
1622     PetscCall(PCBDDCGlobalToLocal(matis->rctx, global, local, pcbddc->DirichletBoundaries, &pcbddc->DirichletBoundariesLocal));
1623   } else if (pcbddc->DirichletBoundariesLocal) {
1624     PetscCall(PCBDDCConsistencyCheckIS(pc, MPI_LAND, &pcbddc->DirichletBoundariesLocal));
1625   }
1626   if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) {
1627     PetscCall(PCBDDCGlobalToLocal(matis->rctx, global, local, pcbddc->NeumannBoundaries, &pcbddc->NeumannBoundariesLocal));
1628   } else if (pcbddc->NeumannBoundariesLocal) {
1629     PetscCall(PCBDDCConsistencyCheckIS(pc, MPI_LOR, &pcbddc->NeumannBoundariesLocal));
1630   }
1631   if (!pcbddc->user_primal_vertices_local && pcbddc->user_primal_vertices) PetscCall(PCBDDCGlobalToLocal(matis->rctx, global, local, pcbddc->user_primal_vertices, &pcbddc->user_primal_vertices_local));
1632   PetscCall(VecDestroy(&global));
1633   PetscCall(VecDestroy(&local));
1634   /* detect local disconnected subdomains if requested (use matis->A) */
1635   if (pcbddc->detect_disconnected) {
1636     IS        primalv = NULL;
1637     PetscInt  i;
1638     PetscBool filter = pcbddc->detect_disconnected_filter;
1639 
1640     for (i = 0; i < pcbddc->n_local_subs; i++) PetscCall(ISDestroy(&pcbddc->local_subs[i]));
1641     PetscCall(PetscFree(pcbddc->local_subs));
1642     PetscCall(PCBDDCDetectDisconnectedComponents(pc, filter, &pcbddc->n_local_subs, &pcbddc->local_subs, &primalv));
1643     PetscCall(PCBDDCAddPrimalVerticesLocalIS(pc, primalv));
1644     PetscCall(ISDestroy(&primalv));
1645   }
1646   /* early stage corner detection */
1647   {
1648     DM dm;
1649 
1650     PetscCall(MatGetDM(pc->pmat, &dm));
1651     if (!dm) PetscCall(PCGetDM(pc, &dm));
1652     if (dm) {
1653       PetscBool isda;
1654 
1655       PetscCall(PetscObjectTypeCompare((PetscObject)dm, DMDA, &isda));
1656       if (isda) {
1657         ISLocalToGlobalMapping l2l;
1658         IS                     corners;
1659         Mat                    lA;
1660         PetscBool              gl, lo;
1661 
1662         {
1663           Vec                cvec;
1664           const PetscScalar *coords;
1665           PetscInt           dof, n, cdim;
1666           PetscBool          memc = PETSC_TRUE;
1667 
1668           PetscCall(DMDAGetInfo(dm, NULL, NULL, NULL, NULL, NULL, NULL, NULL, &dof, NULL, NULL, NULL, NULL, NULL));
1669           PetscCall(DMGetCoordinates(dm, &cvec));
1670           PetscCall(VecGetLocalSize(cvec, &n));
1671           PetscCall(VecGetBlockSize(cvec, &cdim));
1672           n /= cdim;
1673           PetscCall(PetscFree(pcbddc->mat_graph->coords));
1674           PetscCall(PetscMalloc1(dof * n * cdim, &pcbddc->mat_graph->coords));
1675           PetscCall(VecGetArrayRead(cvec, &coords));
1676 #if defined(PETSC_USE_COMPLEX)
1677           memc = PETSC_FALSE;
1678 #endif
1679           if (dof != 1) memc = PETSC_FALSE;
1680           if (memc) {
1681             PetscCall(PetscArraycpy(pcbddc->mat_graph->coords, coords, cdim * n * dof));
1682           } else { /* BDDC graph does not use any blocked information, we need to replicate the data */
1683             PetscReal *bcoords = pcbddc->mat_graph->coords;
1684             PetscInt   i, b, d;
1685 
1686             for (i = 0; i < n; i++) {
1687               for (b = 0; b < dof; b++) {
1688                 for (d = 0; d < cdim; d++) bcoords[i * dof * cdim + b * cdim + d] = PetscRealPart(coords[i * cdim + d]);
1689               }
1690             }
1691           }
1692           PetscCall(VecRestoreArrayRead(cvec, &coords));
1693           pcbddc->mat_graph->cdim  = cdim;
1694           pcbddc->mat_graph->cnloc = dof * n;
1695           pcbddc->mat_graph->cloc  = PETSC_FALSE;
1696         }
1697         PetscCall(DMDAGetSubdomainCornersIS(dm, &corners));
1698         PetscCall(MatISGetLocalMat(pc->pmat, &lA));
1699         PetscCall(MatGetLocalToGlobalMapping(lA, &l2l, NULL));
1700         PetscCall(MatISRestoreLocalMat(pc->pmat, &lA));
1701         lo = (PetscBool)(l2l && corners);
1702         PetscCall(MPIU_Allreduce(&lo, &gl, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)pc)));
1703         if (gl) { /* From PETSc's DMDA */
1704           const PetscInt *idx;
1705           PetscInt        dof, bs, *idxout, n;
1706 
1707           PetscCall(DMDAGetInfo(dm, NULL, NULL, NULL, NULL, NULL, NULL, NULL, &dof, NULL, NULL, NULL, NULL, NULL));
1708           PetscCall(ISLocalToGlobalMappingGetBlockSize(l2l, &bs));
1709           PetscCall(ISGetLocalSize(corners, &n));
1710           PetscCall(ISGetIndices(corners, &idx));
1711           if (bs == dof) {
1712             PetscCall(PetscMalloc1(n, &idxout));
1713             PetscCall(ISLocalToGlobalMappingApplyBlock(l2l, n, idx, idxout));
1714           } else { /* the original DMDA local-to-local map have been modified */
1715             PetscInt i, d;
1716 
1717             PetscCall(PetscMalloc1(dof * n, &idxout));
1718             for (i = 0; i < n; i++)
1719               for (d = 0; d < dof; d++) idxout[dof * i + d] = dof * idx[i] + d;
1720             PetscCall(ISLocalToGlobalMappingApply(l2l, dof * n, idxout, idxout));
1721 
1722             bs = 1;
1723             n *= dof;
1724           }
1725           PetscCall(ISRestoreIndices(corners, &idx));
1726           PetscCall(DMDARestoreSubdomainCornersIS(dm, &corners));
1727           PetscCall(ISCreateBlock(PetscObjectComm((PetscObject)pc), bs, n, idxout, PETSC_OWN_POINTER, &corners));
1728           PetscCall(PCBDDCAddPrimalVerticesLocalIS(pc, corners));
1729           PetscCall(ISDestroy(&corners));
1730           pcbddc->corner_selected  = PETSC_TRUE;
1731           pcbddc->corner_selection = PETSC_TRUE;
1732         }
1733         if (corners) PetscCall(DMDARestoreSubdomainCornersIS(dm, &corners));
1734       }
1735     }
1736   }
1737   if (pcbddc->corner_selection && !pcbddc->mat_graph->cdim) {
1738     DM dm;
1739 
1740     PetscCall(MatGetDM(pc->pmat, &dm));
1741     if (!dm) PetscCall(PCGetDM(pc, &dm));
1742     if (dm) { /* this can get very expensive, I need to find a faster alternative */
1743       Vec          vcoords;
1744       PetscSection section;
1745       PetscReal   *coords;
1746       PetscInt     d, cdim, nl, nf, **ctxs;
1747       PetscErrorCode (**funcs)(PetscInt, PetscReal, const PetscReal *, PetscInt, PetscScalar *, void *);
1748       /* debug coordinates */
1749       PetscViewer       viewer;
1750       PetscBool         flg;
1751       PetscViewerFormat format;
1752       const char       *prefix;
1753 
1754       PetscCall(DMGetCoordinateDim(dm, &cdim));
1755       PetscCall(DMGetLocalSection(dm, &section));
1756       PetscCall(PetscSectionGetNumFields(section, &nf));
1757       PetscCall(DMCreateGlobalVector(dm, &vcoords));
1758       PetscCall(VecGetLocalSize(vcoords, &nl));
1759       PetscCall(PetscMalloc1(nl * cdim, &coords));
1760       PetscCall(PetscMalloc2(nf, &funcs, nf, &ctxs));
1761       PetscCall(PetscMalloc1(nf, &ctxs[0]));
1762       for (d = 0; d < nf; d++) funcs[d] = func_coords_private;
1763       for (d = 1; d < nf; d++) ctxs[d] = ctxs[d - 1] + 1;
1764 
1765       /* debug coordinates */
1766       PetscCall(PCGetOptionsPrefix(pc, &prefix));
1767       PetscCall(PetscOptionsGetViewer(PetscObjectComm((PetscObject)vcoords), ((PetscObject)vcoords)->options, prefix, "-pc_bddc_coords_vec_view", &viewer, &format, &flg));
1768       if (flg) PetscCall(PetscViewerPushFormat(viewer, format));
1769       for (d = 0; d < cdim; d++) {
1770         PetscInt           i;
1771         const PetscScalar *v;
1772         char               name[16];
1773 
1774         for (i = 0; i < nf; i++) ctxs[i][0] = d;
1775         PetscCall(PetscSNPrintf(name, sizeof(name), "bddc_coords_%d", (int)d));
1776         PetscCall(PetscObjectSetName((PetscObject)vcoords, name));
1777         PetscCall(DMProjectFunction(dm, 0.0, funcs, (void **)ctxs, INSERT_VALUES, vcoords));
1778         if (flg) PetscCall(VecView(vcoords, viewer));
1779         PetscCall(VecGetArrayRead(vcoords, &v));
1780         for (i = 0; i < nl; i++) coords[i * cdim + d] = PetscRealPart(v[i]);
1781         PetscCall(VecRestoreArrayRead(vcoords, &v));
1782       }
1783       PetscCall(VecDestroy(&vcoords));
1784       PetscCall(PCSetCoordinates(pc, cdim, nl, coords));
1785       PetscCall(PetscFree(coords));
1786       PetscCall(PetscFree(ctxs[0]));
1787       PetscCall(PetscFree2(funcs, ctxs));
1788       if (flg) {
1789         PetscCall(PetscViewerPopFormat(viewer));
1790         PetscCall(PetscViewerDestroy(&viewer));
1791       }
1792     }
1793   }
1794   PetscFunctionReturn(0);
1795 }
1796 
1797 PetscErrorCode PCBDDCConsistencyCheckIS(PC pc, MPI_Op mop, IS *is)
1798 {
1799   Mat_IS         *matis = (Mat_IS *)(pc->pmat->data);
1800   IS              nis;
1801   const PetscInt *idxs;
1802   PetscInt        i, nd, n = matis->A->rmap->n, *nidxs, nnd;
1803 
1804   PetscFunctionBegin;
1805   PetscCheck(mop == MPI_LAND || mop == MPI_LOR, PetscObjectComm((PetscObject)(pc)), PETSC_ERR_SUP, "Supported are MPI_LAND and MPI_LOR");
1806   if (mop == MPI_LAND) {
1807     /* init rootdata with true */
1808     for (i = 0; i < pc->pmat->rmap->n; i++) matis->sf_rootdata[i] = 1;
1809   } else {
1810     PetscCall(PetscArrayzero(matis->sf_rootdata, pc->pmat->rmap->n));
1811   }
1812   PetscCall(PetscArrayzero(matis->sf_leafdata, n));
1813   PetscCall(ISGetLocalSize(*is, &nd));
1814   PetscCall(ISGetIndices(*is, &idxs));
1815   for (i = 0; i < nd; i++)
1816     if (-1 < idxs[i] && idxs[i] < n) matis->sf_leafdata[idxs[i]] = 1;
1817   PetscCall(ISRestoreIndices(*is, &idxs));
1818   PetscCall(PetscSFReduceBegin(matis->sf, MPIU_INT, matis->sf_leafdata, matis->sf_rootdata, mop));
1819   PetscCall(PetscSFReduceEnd(matis->sf, MPIU_INT, matis->sf_leafdata, matis->sf_rootdata, mop));
1820   PetscCall(PetscSFBcastBegin(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE));
1821   PetscCall(PetscSFBcastEnd(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE));
1822   if (mop == MPI_LAND) {
1823     PetscCall(PetscMalloc1(nd, &nidxs));
1824   } else {
1825     PetscCall(PetscMalloc1(n, &nidxs));
1826   }
1827   for (i = 0, nnd = 0; i < n; i++)
1828     if (matis->sf_leafdata[i]) nidxs[nnd++] = i;
1829   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)(*is)), nnd, nidxs, PETSC_OWN_POINTER, &nis));
1830   PetscCall(ISDestroy(is));
1831   *is = nis;
1832   PetscFunctionReturn(0);
1833 }
1834 
1835 PetscErrorCode PCBDDCBenignRemoveInterior(PC pc, Vec r, Vec z)
1836 {
1837   PC_IS   *pcis   = (PC_IS *)(pc->data);
1838   PC_BDDC *pcbddc = (PC_BDDC *)(pc->data);
1839 
1840   PetscFunctionBegin;
1841   if (!pcbddc->benign_have_null) PetscFunctionReturn(0);
1842   if (pcbddc->ChangeOfBasisMatrix) {
1843     Vec swap;
1844 
1845     PetscCall(MatMultTranspose(pcbddc->ChangeOfBasisMatrix, r, pcbddc->work_change));
1846     swap                = pcbddc->work_change;
1847     pcbddc->work_change = r;
1848     r                   = swap;
1849   }
1850   PetscCall(VecScatterBegin(pcis->global_to_D, r, pcis->vec1_D, INSERT_VALUES, SCATTER_FORWARD));
1851   PetscCall(VecScatterEnd(pcis->global_to_D, r, pcis->vec1_D, INSERT_VALUES, SCATTER_FORWARD));
1852   PetscCall(PetscLogEventBegin(PC_BDDC_Solves[pcbddc->current_level][0], pc, 0, 0, 0));
1853   PetscCall(KSPSolve(pcbddc->ksp_D, pcis->vec1_D, pcis->vec2_D));
1854   PetscCall(PetscLogEventEnd(PC_BDDC_Solves[pcbddc->current_level][0], pc, 0, 0, 0));
1855   PetscCall(KSPCheckSolve(pcbddc->ksp_D, pc, pcis->vec2_D));
1856   PetscCall(VecSet(z, 0.));
1857   PetscCall(VecScatterBegin(pcis->global_to_D, pcis->vec2_D, z, INSERT_VALUES, SCATTER_REVERSE));
1858   PetscCall(VecScatterEnd(pcis->global_to_D, pcis->vec2_D, z, INSERT_VALUES, SCATTER_REVERSE));
1859   if (pcbddc->ChangeOfBasisMatrix) {
1860     pcbddc->work_change = r;
1861     PetscCall(VecCopy(z, pcbddc->work_change));
1862     PetscCall(MatMult(pcbddc->ChangeOfBasisMatrix, pcbddc->work_change, z));
1863   }
1864   PetscFunctionReturn(0);
1865 }
1866 
1867 PetscErrorCode PCBDDCBenignMatMult_Private_Private(Mat A, Vec x, Vec y, PetscBool transpose)
1868 {
1869   PCBDDCBenignMatMult_ctx ctx;
1870   PetscBool               apply_right, apply_left, reset_x;
1871 
1872   PetscFunctionBegin;
1873   PetscCall(MatShellGetContext(A, &ctx));
1874   if (transpose) {
1875     apply_right = ctx->apply_left;
1876     apply_left  = ctx->apply_right;
1877   } else {
1878     apply_right = ctx->apply_right;
1879     apply_left  = ctx->apply_left;
1880   }
1881   reset_x = PETSC_FALSE;
1882   if (apply_right) {
1883     const PetscScalar *ax;
1884     PetscInt           nl, i;
1885 
1886     PetscCall(VecGetLocalSize(x, &nl));
1887     PetscCall(VecGetArrayRead(x, &ax));
1888     PetscCall(PetscArraycpy(ctx->work, ax, nl));
1889     PetscCall(VecRestoreArrayRead(x, &ax));
1890     for (i = 0; i < ctx->benign_n; i++) {
1891       PetscScalar     sum, val;
1892       const PetscInt *idxs;
1893       PetscInt        nz, j;
1894       PetscCall(ISGetLocalSize(ctx->benign_zerodiag_subs[i], &nz));
1895       PetscCall(ISGetIndices(ctx->benign_zerodiag_subs[i], &idxs));
1896       sum = 0.;
1897       if (ctx->apply_p0) {
1898         val = ctx->work[idxs[nz - 1]];
1899         for (j = 0; j < nz - 1; j++) {
1900           sum += ctx->work[idxs[j]];
1901           ctx->work[idxs[j]] += val;
1902         }
1903       } else {
1904         for (j = 0; j < nz - 1; j++) sum += ctx->work[idxs[j]];
1905       }
1906       ctx->work[idxs[nz - 1]] -= sum;
1907       PetscCall(ISRestoreIndices(ctx->benign_zerodiag_subs[i], &idxs));
1908     }
1909     PetscCall(VecPlaceArray(x, ctx->work));
1910     reset_x = PETSC_TRUE;
1911   }
1912   if (transpose) {
1913     PetscCall(MatMultTranspose(ctx->A, x, y));
1914   } else {
1915     PetscCall(MatMult(ctx->A, x, y));
1916   }
1917   if (reset_x) PetscCall(VecResetArray(x));
1918   if (apply_left) {
1919     PetscScalar *ay;
1920     PetscInt     i;
1921 
1922     PetscCall(VecGetArray(y, &ay));
1923     for (i = 0; i < ctx->benign_n; i++) {
1924       PetscScalar     sum, val;
1925       const PetscInt *idxs;
1926       PetscInt        nz, j;
1927       PetscCall(ISGetLocalSize(ctx->benign_zerodiag_subs[i], &nz));
1928       PetscCall(ISGetIndices(ctx->benign_zerodiag_subs[i], &idxs));
1929       val = -ay[idxs[nz - 1]];
1930       if (ctx->apply_p0) {
1931         sum = 0.;
1932         for (j = 0; j < nz - 1; j++) {
1933           sum += ay[idxs[j]];
1934           ay[idxs[j]] += val;
1935         }
1936         ay[idxs[nz - 1]] += sum;
1937       } else {
1938         for (j = 0; j < nz - 1; j++) ay[idxs[j]] += val;
1939         ay[idxs[nz - 1]] = 0.;
1940       }
1941       PetscCall(ISRestoreIndices(ctx->benign_zerodiag_subs[i], &idxs));
1942     }
1943     PetscCall(VecRestoreArray(y, &ay));
1944   }
1945   PetscFunctionReturn(0);
1946 }
1947 
1948 PetscErrorCode PCBDDCBenignMatMultTranspose_Private(Mat A, Vec x, Vec y)
1949 {
1950   PetscFunctionBegin;
1951   PetscCall(PCBDDCBenignMatMult_Private_Private(A, x, y, PETSC_TRUE));
1952   PetscFunctionReturn(0);
1953 }
1954 
1955 PetscErrorCode PCBDDCBenignMatMult_Private(Mat A, Vec x, Vec y)
1956 {
1957   PetscFunctionBegin;
1958   PetscCall(PCBDDCBenignMatMult_Private_Private(A, x, y, PETSC_FALSE));
1959   PetscFunctionReturn(0);
1960 }
1961 
1962 PetscErrorCode PCBDDCBenignShellMat(PC pc, PetscBool restore)
1963 {
1964   PC_IS                  *pcis   = (PC_IS *)pc->data;
1965   PC_BDDC                *pcbddc = (PC_BDDC *)pc->data;
1966   PCBDDCBenignMatMult_ctx ctx;
1967 
1968   PetscFunctionBegin;
1969   if (!restore) {
1970     Mat                A_IB, A_BI;
1971     PetscScalar       *work;
1972     PCBDDCReuseSolvers reuse = pcbddc->sub_schurs ? pcbddc->sub_schurs->reuse_solver : NULL;
1973 
1974     PetscCheck(!pcbddc->benign_original_mat, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Benign original mat has not been restored");
1975     if (!pcbddc->benign_change || !pcbddc->benign_n || pcbddc->benign_change_explicit) PetscFunctionReturn(0);
1976     PetscCall(PetscMalloc1(pcis->n, &work));
1977     PetscCall(MatCreate(PETSC_COMM_SELF, &A_IB));
1978     PetscCall(MatSetSizes(A_IB, pcis->n - pcis->n_B, pcis->n_B, PETSC_DECIDE, PETSC_DECIDE));
1979     PetscCall(MatSetType(A_IB, MATSHELL));
1980     PetscCall(MatShellSetOperation(A_IB, MATOP_MULT, (void (*)(void))PCBDDCBenignMatMult_Private));
1981     PetscCall(MatShellSetOperation(A_IB, MATOP_MULT_TRANSPOSE, (void (*)(void))PCBDDCBenignMatMultTranspose_Private));
1982     PetscCall(PetscNew(&ctx));
1983     PetscCall(MatShellSetContext(A_IB, ctx));
1984     ctx->apply_left  = PETSC_TRUE;
1985     ctx->apply_right = PETSC_FALSE;
1986     ctx->apply_p0    = PETSC_FALSE;
1987     ctx->benign_n    = pcbddc->benign_n;
1988     if (reuse) {
1989       ctx->benign_zerodiag_subs = reuse->benign_zerodiag_subs;
1990       ctx->free                 = PETSC_FALSE;
1991     } else { /* TODO: could be optimized for successive solves */
1992       ISLocalToGlobalMapping N_to_D;
1993       PetscInt               i;
1994 
1995       PetscCall(ISLocalToGlobalMappingCreateIS(pcis->is_I_local, &N_to_D));
1996       PetscCall(PetscMalloc1(pcbddc->benign_n, &ctx->benign_zerodiag_subs));
1997       for (i = 0; i < pcbddc->benign_n; i++) PetscCall(ISGlobalToLocalMappingApplyIS(N_to_D, IS_GTOLM_DROP, pcbddc->benign_zerodiag_subs[i], &ctx->benign_zerodiag_subs[i]));
1998       PetscCall(ISLocalToGlobalMappingDestroy(&N_to_D));
1999       ctx->free = PETSC_TRUE;
2000     }
2001     ctx->A    = pcis->A_IB;
2002     ctx->work = work;
2003     PetscCall(MatSetUp(A_IB));
2004     PetscCall(MatAssemblyBegin(A_IB, MAT_FINAL_ASSEMBLY));
2005     PetscCall(MatAssemblyEnd(A_IB, MAT_FINAL_ASSEMBLY));
2006     pcis->A_IB = A_IB;
2007 
2008     /* A_BI as A_IB^T */
2009     PetscCall(MatCreateTranspose(A_IB, &A_BI));
2010     pcbddc->benign_original_mat = pcis->A_BI;
2011     pcis->A_BI                  = A_BI;
2012   } else {
2013     if (!pcbddc->benign_original_mat) PetscFunctionReturn(0);
2014     PetscCall(MatShellGetContext(pcis->A_IB, &ctx));
2015     PetscCall(MatDestroy(&pcis->A_IB));
2016     pcis->A_IB = ctx->A;
2017     ctx->A     = NULL;
2018     PetscCall(MatDestroy(&pcis->A_BI));
2019     pcis->A_BI                  = pcbddc->benign_original_mat;
2020     pcbddc->benign_original_mat = NULL;
2021     if (ctx->free) {
2022       PetscInt i;
2023       for (i = 0; i < ctx->benign_n; i++) PetscCall(ISDestroy(&ctx->benign_zerodiag_subs[i]));
2024       PetscCall(PetscFree(ctx->benign_zerodiag_subs));
2025     }
2026     PetscCall(PetscFree(ctx->work));
2027     PetscCall(PetscFree(ctx));
2028   }
2029   PetscFunctionReturn(0);
2030 }
2031 
2032 /* used just in bddc debug mode */
2033 PetscErrorCode PCBDDCBenignProject(PC pc, IS is1, IS is2, Mat *B)
2034 {
2035   PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
2036   Mat_IS  *matis  = (Mat_IS *)pc->pmat->data;
2037   Mat      An;
2038 
2039   PetscFunctionBegin;
2040   PetscCall(MatPtAP(matis->A, pcbddc->benign_change, MAT_INITIAL_MATRIX, 2.0, &An));
2041   PetscCall(MatZeroRowsColumns(An, pcbddc->benign_n, pcbddc->benign_p0_lidx, 1.0, NULL, NULL));
2042   if (is1) {
2043     PetscCall(MatCreateSubMatrix(An, is1, is2, MAT_INITIAL_MATRIX, B));
2044     PetscCall(MatDestroy(&An));
2045   } else {
2046     *B = An;
2047   }
2048   PetscFunctionReturn(0);
2049 }
2050 
2051 /* TODO: add reuse flag */
2052 PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B)
2053 {
2054   Mat             Bt;
2055   PetscScalar    *a, *bdata;
2056   const PetscInt *ii, *ij;
2057   PetscInt        m, n, i, nnz, *bii, *bij;
2058   PetscBool       flg_row;
2059 
2060   PetscFunctionBegin;
2061   PetscCall(MatGetSize(A, &n, &m));
2062   PetscCall(MatGetRowIJ(A, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, &ij, &flg_row));
2063   PetscCall(MatSeqAIJGetArray(A, &a));
2064   nnz = n;
2065   for (i = 0; i < ii[n]; i++) {
2066     if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++;
2067   }
2068   PetscCall(PetscMalloc1(n + 1, &bii));
2069   PetscCall(PetscMalloc1(nnz, &bij));
2070   PetscCall(PetscMalloc1(nnz, &bdata));
2071   nnz    = 0;
2072   bii[0] = 0;
2073   for (i = 0; i < n; i++) {
2074     PetscInt j;
2075     for (j = ii[i]; j < ii[i + 1]; j++) {
2076       PetscScalar entry = a[j];
2077       if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || (n == m && ij[j] == i)) {
2078         bij[nnz]   = ij[j];
2079         bdata[nnz] = entry;
2080         nnz++;
2081       }
2082     }
2083     bii[i + 1] = nnz;
2084   }
2085   PetscCall(MatSeqAIJRestoreArray(A, &a));
2086   PetscCall(MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A), n, m, bii, bij, bdata, &Bt));
2087   PetscCall(MatRestoreRowIJ(A, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, &ij, &flg_row));
2088   {
2089     Mat_SeqAIJ *b = (Mat_SeqAIJ *)(Bt->data);
2090     b->free_a     = PETSC_TRUE;
2091     b->free_ij    = PETSC_TRUE;
2092   }
2093   if (*B == A) PetscCall(MatDestroy(&A));
2094   *B = Bt;
2095   PetscFunctionReturn(0);
2096 }
2097 
2098 PetscErrorCode PCBDDCDetectDisconnectedComponents(PC pc, PetscBool filter, PetscInt *ncc, IS *cc[], IS *primalv)
2099 {
2100   Mat                    B = NULL;
2101   DM                     dm;
2102   IS                     is_dummy, *cc_n;
2103   ISLocalToGlobalMapping l2gmap_dummy;
2104   PCBDDCGraph            graph;
2105   PetscInt              *xadj_filtered = NULL, *adjncy_filtered = NULL;
2106   PetscInt               i, n;
2107   PetscInt              *xadj, *adjncy;
2108   PetscBool              isplex = PETSC_FALSE;
2109 
2110   PetscFunctionBegin;
2111   if (ncc) *ncc = 0;
2112   if (cc) *cc = NULL;
2113   if (primalv) *primalv = NULL;
2114   PetscCall(PCBDDCGraphCreate(&graph));
2115   PetscCall(MatGetDM(pc->pmat, &dm));
2116   if (!dm) PetscCall(PCGetDM(pc, &dm));
2117   if (dm) PetscCall(PetscObjectTypeCompare((PetscObject)dm, DMPLEX, &isplex));
2118   if (filter) isplex = PETSC_FALSE;
2119 
2120   if (isplex) { /* this code has been modified from plexpartition.c */
2121     PetscInt        p, pStart, pEnd, a, adjSize, idx, size, nroots;
2122     PetscInt       *adj = NULL;
2123     IS              cellNumbering;
2124     const PetscInt *cellNum;
2125     PetscBool       useCone, useClosure;
2126     PetscSection    section;
2127     PetscSegBuffer  adjBuffer;
2128     PetscSF         sfPoint;
2129 
2130     PetscCall(DMPlexGetHeightStratum(dm, 0, &pStart, &pEnd));
2131     PetscCall(DMGetPointSF(dm, &sfPoint));
2132     PetscCall(PetscSFGetGraph(sfPoint, &nroots, NULL, NULL, NULL));
2133     /* Build adjacency graph via a section/segbuffer */
2134     PetscCall(PetscSectionCreate(PetscObjectComm((PetscObject)dm), &section));
2135     PetscCall(PetscSectionSetChart(section, pStart, pEnd));
2136     PetscCall(PetscSegBufferCreate(sizeof(PetscInt), 1000, &adjBuffer));
2137     /* Always use FVM adjacency to create partitioner graph */
2138     PetscCall(DMGetBasicAdjacency(dm, &useCone, &useClosure));
2139     PetscCall(DMSetBasicAdjacency(dm, PETSC_TRUE, PETSC_FALSE));
2140     PetscCall(DMPlexGetCellNumbering(dm, &cellNumbering));
2141     PetscCall(ISGetIndices(cellNumbering, &cellNum));
2142     for (n = 0, p = pStart; p < pEnd; p++) {
2143       /* Skip non-owned cells in parallel (ParMetis expects no overlap) */
2144       if (nroots > 0) {
2145         if (cellNum[p] < 0) continue;
2146       }
2147       adjSize = PETSC_DETERMINE;
2148       PetscCall(DMPlexGetAdjacency(dm, p, &adjSize, &adj));
2149       for (a = 0; a < adjSize; ++a) {
2150         const PetscInt point = adj[a];
2151         if (pStart <= point && point < pEnd) {
2152           PetscInt *PETSC_RESTRICT pBuf;
2153           PetscCall(PetscSectionAddDof(section, p, 1));
2154           PetscCall(PetscSegBufferGetInts(adjBuffer, 1, &pBuf));
2155           *pBuf = point;
2156         }
2157       }
2158       n++;
2159     }
2160     PetscCall(DMSetBasicAdjacency(dm, useCone, useClosure));
2161     /* Derive CSR graph from section/segbuffer */
2162     PetscCall(PetscSectionSetUp(section));
2163     PetscCall(PetscSectionGetStorageSize(section, &size));
2164     PetscCall(PetscMalloc1(n + 1, &xadj));
2165     for (idx = 0, p = pStart; p < pEnd; p++) {
2166       if (nroots > 0) {
2167         if (cellNum[p] < 0) continue;
2168       }
2169       PetscCall(PetscSectionGetOffset(section, p, &(xadj[idx++])));
2170     }
2171     xadj[n] = size;
2172     PetscCall(PetscSegBufferExtractAlloc(adjBuffer, &adjncy));
2173     /* Clean up */
2174     PetscCall(PetscSegBufferDestroy(&adjBuffer));
2175     PetscCall(PetscSectionDestroy(&section));
2176     PetscCall(PetscFree(adj));
2177     graph->xadj   = xadj;
2178     graph->adjncy = adjncy;
2179   } else {
2180     Mat       A;
2181     PetscBool isseqaij, flg_row;
2182 
2183     PetscCall(MatISGetLocalMat(pc->pmat, &A));
2184     if (!A->rmap->N || !A->cmap->N) {
2185       PetscCall(PCBDDCGraphDestroy(&graph));
2186       PetscFunctionReturn(0);
2187     }
2188     PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &isseqaij));
2189     if (!isseqaij && filter) {
2190       PetscBool isseqdense;
2191 
2192       PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQDENSE, &isseqdense));
2193       if (!isseqdense) {
2194         PetscCall(MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &B));
2195       } else { /* TODO: rectangular case and LDA */
2196         PetscScalar *array;
2197         PetscReal    chop = 1.e-6;
2198 
2199         PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &B));
2200         PetscCall(MatDenseGetArray(B, &array));
2201         PetscCall(MatGetSize(B, &n, NULL));
2202         for (i = 0; i < n; i++) {
2203           PetscInt j;
2204           for (j = i + 1; j < n; j++) {
2205             PetscReal thresh = chop * (PetscAbsScalar(array[i * (n + 1)]) + PetscAbsScalar(array[j * (n + 1)]));
2206             if (PetscAbsScalar(array[i * n + j]) < thresh) array[i * n + j] = 0.;
2207             if (PetscAbsScalar(array[j * n + i]) < thresh) array[j * n + i] = 0.;
2208           }
2209         }
2210         PetscCall(MatDenseRestoreArray(B, &array));
2211         PetscCall(MatConvert(B, MATSEQAIJ, MAT_INPLACE_MATRIX, &B));
2212       }
2213     } else {
2214       PetscCall(PetscObjectReference((PetscObject)A));
2215       B = A;
2216     }
2217     PetscCall(MatGetRowIJ(B, 0, PETSC_TRUE, PETSC_FALSE, &n, (const PetscInt **)&xadj, (const PetscInt **)&adjncy, &flg_row));
2218 
2219     /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */
2220     if (filter) {
2221       PetscScalar *data;
2222       PetscInt     j, cum;
2223 
2224       PetscCall(PetscCalloc2(n + 1, &xadj_filtered, xadj[n], &adjncy_filtered));
2225       PetscCall(MatSeqAIJGetArray(B, &data));
2226       cum = 0;
2227       for (i = 0; i < n; i++) {
2228         PetscInt t;
2229 
2230         for (j = xadj[i]; j < xadj[i + 1]; j++) {
2231           if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) continue;
2232           adjncy_filtered[cum + xadj_filtered[i]++] = adjncy[j];
2233         }
2234         t                = xadj_filtered[i];
2235         xadj_filtered[i] = cum;
2236         cum += t;
2237       }
2238       PetscCall(MatSeqAIJRestoreArray(B, &data));
2239       graph->xadj   = xadj_filtered;
2240       graph->adjncy = adjncy_filtered;
2241     } else {
2242       graph->xadj   = xadj;
2243       graph->adjncy = adjncy;
2244     }
2245   }
2246   /* compute local connected components using PCBDDCGraph */
2247   PetscCall(ISCreateStride(PETSC_COMM_SELF, n, 0, 1, &is_dummy));
2248   PetscCall(ISLocalToGlobalMappingCreateIS(is_dummy, &l2gmap_dummy));
2249   PetscCall(ISDestroy(&is_dummy));
2250   PetscCall(PCBDDCGraphInit(graph, l2gmap_dummy, n, PETSC_MAX_INT));
2251   PetscCall(ISLocalToGlobalMappingDestroy(&l2gmap_dummy));
2252   PetscCall(PCBDDCGraphSetUp(graph, 1, NULL, NULL, 0, NULL, NULL));
2253   PetscCall(PCBDDCGraphComputeConnectedComponents(graph));
2254 
2255   /* partial clean up */
2256   PetscCall(PetscFree2(xadj_filtered, adjncy_filtered));
2257   if (B) {
2258     PetscBool flg_row;
2259     PetscCall(MatRestoreRowIJ(B, 0, PETSC_TRUE, PETSC_FALSE, &n, (const PetscInt **)&xadj, (const PetscInt **)&adjncy, &flg_row));
2260     PetscCall(MatDestroy(&B));
2261   }
2262   if (isplex) {
2263     PetscCall(PetscFree(xadj));
2264     PetscCall(PetscFree(adjncy));
2265   }
2266 
2267   /* get back data */
2268   if (isplex) {
2269     if (ncc) *ncc = graph->ncc;
2270     if (cc || primalv) {
2271       Mat          A;
2272       PetscBT      btv, btvt;
2273       PetscSection subSection;
2274       PetscInt    *ids, cum, cump, *cids, *pids;
2275 
2276       PetscCall(DMPlexGetSubdomainSection(dm, &subSection));
2277       PetscCall(MatISGetLocalMat(pc->pmat, &A));
2278       PetscCall(PetscMalloc3(A->rmap->n, &ids, graph->ncc + 1, &cids, A->rmap->n, &pids));
2279       PetscCall(PetscBTCreate(A->rmap->n, &btv));
2280       PetscCall(PetscBTCreate(A->rmap->n, &btvt));
2281 
2282       cids[0] = 0;
2283       for (i = 0, cump = 0, cum = 0; i < graph->ncc; i++) {
2284         PetscInt j;
2285 
2286         PetscCall(PetscBTMemzero(A->rmap->n, btvt));
2287         for (j = graph->cptr[i]; j < graph->cptr[i + 1]; j++) {
2288           PetscInt k, size, *closure = NULL, cell = graph->queue[j];
2289 
2290           PetscCall(DMPlexGetTransitiveClosure(dm, cell, PETSC_TRUE, &size, &closure));
2291           for (k = 0; k < 2 * size; k += 2) {
2292             PetscInt s, pp, p = closure[k], off, dof, cdof;
2293 
2294             PetscCall(PetscSectionGetConstraintDof(subSection, p, &cdof));
2295             PetscCall(PetscSectionGetOffset(subSection, p, &off));
2296             PetscCall(PetscSectionGetDof(subSection, p, &dof));
2297             for (s = 0; s < dof - cdof; s++) {
2298               if (PetscBTLookupSet(btvt, off + s)) continue;
2299               if (!PetscBTLookup(btv, off + s)) ids[cum++] = off + s;
2300               else pids[cump++] = off + s; /* cross-vertex */
2301             }
2302             PetscCall(DMPlexGetTreeParent(dm, p, &pp, NULL));
2303             if (pp != p) {
2304               PetscCall(PetscSectionGetConstraintDof(subSection, pp, &cdof));
2305               PetscCall(PetscSectionGetOffset(subSection, pp, &off));
2306               PetscCall(PetscSectionGetDof(subSection, pp, &dof));
2307               for (s = 0; s < dof - cdof; s++) {
2308                 if (PetscBTLookupSet(btvt, off + s)) continue;
2309                 if (!PetscBTLookup(btv, off + s)) ids[cum++] = off + s;
2310                 else pids[cump++] = off + s; /* cross-vertex */
2311               }
2312             }
2313           }
2314           PetscCall(DMPlexRestoreTransitiveClosure(dm, cell, PETSC_TRUE, &size, &closure));
2315         }
2316         cids[i + 1] = cum;
2317         /* mark dofs as already assigned */
2318         for (j = cids[i]; j < cids[i + 1]; j++) PetscCall(PetscBTSet(btv, ids[j]));
2319       }
2320       if (cc) {
2321         PetscCall(PetscMalloc1(graph->ncc, &cc_n));
2322         for (i = 0; i < graph->ncc; i++) PetscCall(ISCreateGeneral(PETSC_COMM_SELF, cids[i + 1] - cids[i], ids + cids[i], PETSC_COPY_VALUES, &cc_n[i]));
2323         *cc = cc_n;
2324       }
2325       if (primalv) PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), cump, pids, PETSC_COPY_VALUES, primalv));
2326       PetscCall(PetscFree3(ids, cids, pids));
2327       PetscCall(PetscBTDestroy(&btv));
2328       PetscCall(PetscBTDestroy(&btvt));
2329     }
2330   } else {
2331     if (ncc) *ncc = graph->ncc;
2332     if (cc) {
2333       PetscCall(PetscMalloc1(graph->ncc, &cc_n));
2334       for (i = 0; i < graph->ncc; i++) PetscCall(ISCreateGeneral(PETSC_COMM_SELF, graph->cptr[i + 1] - graph->cptr[i], graph->queue + graph->cptr[i], PETSC_COPY_VALUES, &cc_n[i]));
2335       *cc = cc_n;
2336     }
2337   }
2338   /* clean up graph */
2339   graph->xadj   = NULL;
2340   graph->adjncy = NULL;
2341   PetscCall(PCBDDCGraphDestroy(&graph));
2342   PetscFunctionReturn(0);
2343 }
2344 
2345 PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag)
2346 {
2347   PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
2348   PC_IS   *pcis   = (PC_IS *)(pc->data);
2349   IS       dirIS  = NULL;
2350   PetscInt i;
2351 
2352   PetscFunctionBegin;
2353   PetscCall(PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph, &dirIS));
2354   if (zerodiag) {
2355     Mat             A;
2356     Vec             vec3_N;
2357     PetscScalar    *vals;
2358     const PetscInt *idxs;
2359     PetscInt        nz, *count;
2360 
2361     /* p0 */
2362     PetscCall(VecSet(pcis->vec1_N, 0.));
2363     PetscCall(PetscMalloc1(pcis->n, &vals));
2364     PetscCall(ISGetLocalSize(zerodiag, &nz));
2365     PetscCall(ISGetIndices(zerodiag, &idxs));
2366     for (i = 0; i < nz; i++) vals[i] = 1.;
2367     PetscCall(VecSetValues(pcis->vec1_N, nz, idxs, vals, INSERT_VALUES));
2368     PetscCall(VecAssemblyBegin(pcis->vec1_N));
2369     PetscCall(VecAssemblyEnd(pcis->vec1_N));
2370     /* v_I */
2371     PetscCall(VecSetRandom(pcis->vec2_N, NULL));
2372     for (i = 0; i < nz; i++) vals[i] = 0.;
2373     PetscCall(VecSetValues(pcis->vec2_N, nz, idxs, vals, INSERT_VALUES));
2374     PetscCall(ISRestoreIndices(zerodiag, &idxs));
2375     PetscCall(ISGetIndices(pcis->is_B_local, &idxs));
2376     for (i = 0; i < pcis->n_B; i++) vals[i] = 0.;
2377     PetscCall(VecSetValues(pcis->vec2_N, pcis->n_B, idxs, vals, INSERT_VALUES));
2378     PetscCall(ISRestoreIndices(pcis->is_B_local, &idxs));
2379     if (dirIS) {
2380       PetscInt n;
2381 
2382       PetscCall(ISGetLocalSize(dirIS, &n));
2383       PetscCall(ISGetIndices(dirIS, &idxs));
2384       for (i = 0; i < n; i++) vals[i] = 0.;
2385       PetscCall(VecSetValues(pcis->vec2_N, n, idxs, vals, INSERT_VALUES));
2386       PetscCall(ISRestoreIndices(dirIS, &idxs));
2387     }
2388     PetscCall(VecAssemblyBegin(pcis->vec2_N));
2389     PetscCall(VecAssemblyEnd(pcis->vec2_N));
2390     PetscCall(VecDuplicate(pcis->vec1_N, &vec3_N));
2391     PetscCall(VecSet(vec3_N, 0.));
2392     PetscCall(MatISGetLocalMat(pc->pmat, &A));
2393     PetscCall(MatMult(A, pcis->vec1_N, vec3_N));
2394     PetscCall(VecDot(vec3_N, pcis->vec2_N, &vals[0]));
2395     PetscCheck(PetscAbsScalar(vals[0]) <= 1.e-1, PETSC_COMM_SELF, PETSC_ERR_SUP, "Benign trick can not be applied! b(v_I,p_0) = %1.6e (should be numerically 0.)", (double)PetscAbsScalar(vals[0]));
2396     PetscCall(PetscFree(vals));
2397     PetscCall(VecDestroy(&vec3_N));
2398 
2399     /* there should not be any pressure dofs lying on the interface */
2400     PetscCall(PetscCalloc1(pcis->n, &count));
2401     PetscCall(ISGetIndices(pcis->is_B_local, &idxs));
2402     for (i = 0; i < pcis->n_B; i++) count[idxs[i]]++;
2403     PetscCall(ISRestoreIndices(pcis->is_B_local, &idxs));
2404     PetscCall(ISGetIndices(zerodiag, &idxs));
2405     for (i = 0; i < nz; i++) PetscCheck(!count[idxs[i]], PETSC_COMM_SELF, PETSC_ERR_SUP, "Benign trick can not be applied! pressure dof %" PetscInt_FMT " is an interface dof", idxs[i]);
2406     PetscCall(ISRestoreIndices(zerodiag, &idxs));
2407     PetscCall(PetscFree(count));
2408   }
2409   PetscCall(ISDestroy(&dirIS));
2410 
2411   /* check PCBDDCBenignGetOrSetP0 */
2412   PetscCall(VecSetRandom(pcis->vec1_global, NULL));
2413   for (i = 0; i < pcbddc->benign_n; i++) pcbddc->benign_p0[i] = -PetscGlobalRank - i;
2414   PetscCall(PCBDDCBenignGetOrSetP0(pc, pcis->vec1_global, PETSC_FALSE));
2415   for (i = 0; i < pcbddc->benign_n; i++) pcbddc->benign_p0[i] = 1;
2416   PetscCall(PCBDDCBenignGetOrSetP0(pc, pcis->vec1_global, PETSC_TRUE));
2417   for (i = 0; i < pcbddc->benign_n; i++) {
2418     PetscInt val = PetscRealPart(pcbddc->benign_p0[i]);
2419     PetscCheck(val == -PetscGlobalRank - i, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error testing PCBDDCBenignGetOrSetP0! Found %g at %" PetscInt_FMT " instead of %g", (double)PetscRealPart(pcbddc->benign_p0[i]), i, (double)(-PetscGlobalRank - i));
2420   }
2421   PetscFunctionReturn(0);
2422 }
2423 
2424 PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, PetscBool reuse, IS *zerodiaglocal)
2425 {
2426   PC_BDDC  *pcbddc    = (PC_BDDC *)pc->data;
2427   Mat_IS   *matis     = (Mat_IS *)(pc->pmat->data);
2428   IS        pressures = NULL, zerodiag = NULL, *bzerodiag = NULL, zerodiag_save, *zerodiag_subs;
2429   PetscInt  nz, n, benign_n, bsp = 1;
2430   PetscInt *interior_dofs, n_interior_dofs, nneu;
2431   PetscBool sorted, have_null, has_null_pressures, recompute_zerodiag, checkb;
2432 
2433   PetscFunctionBegin;
2434   if (reuse) goto project_b0;
2435   PetscCall(PetscSFDestroy(&pcbddc->benign_sf));
2436   PetscCall(MatDestroy(&pcbddc->benign_B0));
2437   for (n = 0; n < pcbddc->benign_n; n++) PetscCall(ISDestroy(&pcbddc->benign_zerodiag_subs[n]));
2438   PetscCall(PetscFree(pcbddc->benign_zerodiag_subs));
2439   has_null_pressures = PETSC_TRUE;
2440   have_null          = PETSC_TRUE;
2441   /* if a local information on dofs is present, gets pressure dofs from command line (uses the last field is not provided)
2442      Without local information, it uses only the zerodiagonal dofs (ok if the pressure block is all zero and it is a scalar field)
2443      Checks if all the pressure dofs in each subdomain have a zero diagonal
2444      If not, a change of basis on pressures is not needed
2445      since the local Schur complements are already SPD
2446   */
2447   if (pcbddc->n_ISForDofsLocal) {
2448     IS        iP = NULL;
2449     PetscInt  p, *pp;
2450     PetscBool flg;
2451 
2452     PetscCall(PetscMalloc1(pcbddc->n_ISForDofsLocal, &pp));
2453     n = pcbddc->n_ISForDofsLocal;
2454     PetscOptionsBegin(PetscObjectComm((PetscObject)pc), ((PetscObject)pc)->prefix, "BDDC benign options", "PC");
2455     PetscCall(PetscOptionsIntArray("-pc_bddc_pressure_field", "Field id for pressures", NULL, pp, &n, &flg));
2456     PetscOptionsEnd();
2457     if (!flg) {
2458       n     = 1;
2459       pp[0] = pcbddc->n_ISForDofsLocal - 1;
2460     }
2461 
2462     bsp = 0;
2463     for (p = 0; p < n; p++) {
2464       PetscInt bs;
2465 
2466       PetscCheck(pp[p] >= 0 && pp[p] < pcbddc->n_ISForDofsLocal, PetscObjectComm((PetscObject)pc), PETSC_ERR_USER, "Invalid field id for pressures %" PetscInt_FMT, pp[p]);
2467       PetscCall(ISGetBlockSize(pcbddc->ISForDofsLocal[pp[p]], &bs));
2468       bsp += bs;
2469     }
2470     PetscCall(PetscMalloc1(bsp, &bzerodiag));
2471     bsp = 0;
2472     for (p = 0; p < n; p++) {
2473       const PetscInt *idxs;
2474       PetscInt        b, bs, npl, *bidxs;
2475 
2476       PetscCall(ISGetBlockSize(pcbddc->ISForDofsLocal[pp[p]], &bs));
2477       PetscCall(ISGetLocalSize(pcbddc->ISForDofsLocal[pp[p]], &npl));
2478       PetscCall(ISGetIndices(pcbddc->ISForDofsLocal[pp[p]], &idxs));
2479       PetscCall(PetscMalloc1(npl / bs, &bidxs));
2480       for (b = 0; b < bs; b++) {
2481         PetscInt i;
2482 
2483         for (i = 0; i < npl / bs; i++) bidxs[i] = idxs[bs * i + b];
2484         PetscCall(ISCreateGeneral(PETSC_COMM_SELF, npl / bs, bidxs, PETSC_COPY_VALUES, &bzerodiag[bsp]));
2485         bsp++;
2486       }
2487       PetscCall(PetscFree(bidxs));
2488       PetscCall(ISRestoreIndices(pcbddc->ISForDofsLocal[pp[p]], &idxs));
2489     }
2490     PetscCall(ISConcatenate(PETSC_COMM_SELF, bsp, bzerodiag, &pressures));
2491 
2492     /* remove zeroed out pressures if we are setting up a BDDC solver for a saddle-point FETI-DP */
2493     PetscCall(PetscObjectQuery((PetscObject)pc, "__KSPFETIDP_lP", (PetscObject *)&iP));
2494     if (iP) {
2495       IS newpressures;
2496 
2497       PetscCall(ISDifference(pressures, iP, &newpressures));
2498       PetscCall(ISDestroy(&pressures));
2499       pressures = newpressures;
2500     }
2501     PetscCall(ISSorted(pressures, &sorted));
2502     if (!sorted) PetscCall(ISSort(pressures));
2503     PetscCall(PetscFree(pp));
2504   }
2505 
2506   /* pcis has not been setup yet, so get the local size from the subdomain matrix */
2507   PetscCall(MatGetLocalSize(pcbddc->local_mat, &n, NULL));
2508   if (!n) pcbddc->benign_change_explicit = PETSC_TRUE;
2509   PetscCall(MatFindZeroDiagonals(pcbddc->local_mat, &zerodiag));
2510   PetscCall(ISSorted(zerodiag, &sorted));
2511   if (!sorted) PetscCall(ISSort(zerodiag));
2512   PetscCall(PetscObjectReference((PetscObject)zerodiag));
2513   zerodiag_save = zerodiag;
2514   PetscCall(ISGetLocalSize(zerodiag, &nz));
2515   if (!nz) {
2516     if (n) have_null = PETSC_FALSE;
2517     has_null_pressures = PETSC_FALSE;
2518     PetscCall(ISDestroy(&zerodiag));
2519   }
2520   recompute_zerodiag = PETSC_FALSE;
2521 
2522   /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */
2523   zerodiag_subs   = NULL;
2524   benign_n        = 0;
2525   n_interior_dofs = 0;
2526   interior_dofs   = NULL;
2527   nneu            = 0;
2528   if (pcbddc->NeumannBoundariesLocal) PetscCall(ISGetLocalSize(pcbddc->NeumannBoundariesLocal, &nneu));
2529   checkb = (PetscBool)(!pcbddc->NeumannBoundariesLocal || pcbddc->current_level);
2530   if (checkb) { /* need to compute interior nodes */
2531     PetscInt  n, i, j;
2532     PetscInt  n_neigh, *neigh, *n_shared, **shared;
2533     PetscInt *iwork;
2534 
2535     PetscCall(ISLocalToGlobalMappingGetSize(matis->rmapping, &n));
2536     PetscCall(ISLocalToGlobalMappingGetInfo(matis->rmapping, &n_neigh, &neigh, &n_shared, &shared));
2537     PetscCall(PetscCalloc1(n, &iwork));
2538     PetscCall(PetscMalloc1(n, &interior_dofs));
2539     for (i = 1; i < n_neigh; i++)
2540       for (j = 0; j < n_shared[i]; j++) iwork[shared[i][j]] += 1;
2541     for (i = 0; i < n; i++)
2542       if (!iwork[i]) interior_dofs[n_interior_dofs++] = i;
2543     PetscCall(PetscFree(iwork));
2544     PetscCall(ISLocalToGlobalMappingRestoreInfo(matis->rmapping, &n_neigh, &neigh, &n_shared, &shared));
2545   }
2546   if (has_null_pressures) {
2547     IS             *subs;
2548     PetscInt        nsubs, i, j, nl;
2549     const PetscInt *idxs;
2550     PetscScalar    *array;
2551     Vec            *work;
2552 
2553     subs  = pcbddc->local_subs;
2554     nsubs = pcbddc->n_local_subs;
2555     /* these vectors are needed to check if the constant on pressures is in the kernel of the local operator B (i.e. B(v_I,p0) should be zero) */
2556     if (checkb) {
2557       PetscCall(VecDuplicateVecs(matis->y, 2, &work));
2558       PetscCall(ISGetLocalSize(zerodiag, &nl));
2559       PetscCall(ISGetIndices(zerodiag, &idxs));
2560       /* work[0] = 1_p */
2561       PetscCall(VecSet(work[0], 0.));
2562       PetscCall(VecGetArray(work[0], &array));
2563       for (j = 0; j < nl; j++) array[idxs[j]] = 1.;
2564       PetscCall(VecRestoreArray(work[0], &array));
2565       /* work[0] = 1_v */
2566       PetscCall(VecSet(work[1], 1.));
2567       PetscCall(VecGetArray(work[1], &array));
2568       for (j = 0; j < nl; j++) array[idxs[j]] = 0.;
2569       PetscCall(VecRestoreArray(work[1], &array));
2570       PetscCall(ISRestoreIndices(zerodiag, &idxs));
2571     }
2572 
2573     if (nsubs > 1 || bsp > 1) {
2574       IS      *is;
2575       PetscInt b, totb;
2576 
2577       totb  = bsp;
2578       is    = bsp > 1 ? bzerodiag : &zerodiag;
2579       nsubs = PetscMax(nsubs, 1);
2580       PetscCall(PetscCalloc1(nsubs * totb, &zerodiag_subs));
2581       for (b = 0; b < totb; b++) {
2582         for (i = 0; i < nsubs; i++) {
2583           ISLocalToGlobalMapping l2g;
2584           IS                     t_zerodiag_subs;
2585           PetscInt               nl;
2586 
2587           if (subs) {
2588             PetscCall(ISLocalToGlobalMappingCreateIS(subs[i], &l2g));
2589           } else {
2590             IS tis;
2591 
2592             PetscCall(MatGetLocalSize(pcbddc->local_mat, &nl, NULL));
2593             PetscCall(ISCreateStride(PETSC_COMM_SELF, nl, 0, 1, &tis));
2594             PetscCall(ISLocalToGlobalMappingCreateIS(tis, &l2g));
2595             PetscCall(ISDestroy(&tis));
2596           }
2597           PetscCall(ISGlobalToLocalMappingApplyIS(l2g, IS_GTOLM_DROP, is[b], &t_zerodiag_subs));
2598           PetscCall(ISGetLocalSize(t_zerodiag_subs, &nl));
2599           if (nl) {
2600             PetscBool valid = PETSC_TRUE;
2601 
2602             if (checkb) {
2603               PetscCall(VecSet(matis->x, 0));
2604               PetscCall(ISGetLocalSize(subs[i], &nl));
2605               PetscCall(ISGetIndices(subs[i], &idxs));
2606               PetscCall(VecGetArray(matis->x, &array));
2607               for (j = 0; j < nl; j++) array[idxs[j]] = 1.;
2608               PetscCall(VecRestoreArray(matis->x, &array));
2609               PetscCall(ISRestoreIndices(subs[i], &idxs));
2610               PetscCall(VecPointwiseMult(matis->x, work[0], matis->x));
2611               PetscCall(MatMult(matis->A, matis->x, matis->y));
2612               PetscCall(VecPointwiseMult(matis->y, work[1], matis->y));
2613               PetscCall(VecGetArray(matis->y, &array));
2614               for (j = 0; j < n_interior_dofs; j++) {
2615                 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) {
2616                   valid = PETSC_FALSE;
2617                   break;
2618                 }
2619               }
2620               PetscCall(VecRestoreArray(matis->y, &array));
2621             }
2622             if (valid && nneu) {
2623               const PetscInt *idxs;
2624               PetscInt        nzb;
2625 
2626               PetscCall(ISGetIndices(pcbddc->NeumannBoundariesLocal, &idxs));
2627               PetscCall(ISGlobalToLocalMappingApply(l2g, IS_GTOLM_DROP, nneu, idxs, &nzb, NULL));
2628               PetscCall(ISRestoreIndices(pcbddc->NeumannBoundariesLocal, &idxs));
2629               if (nzb) valid = PETSC_FALSE;
2630             }
2631             if (valid && pressures) {
2632               IS       t_pressure_subs, tmp;
2633               PetscInt i1, i2;
2634 
2635               PetscCall(ISGlobalToLocalMappingApplyIS(l2g, IS_GTOLM_DROP, pressures, &t_pressure_subs));
2636               PetscCall(ISEmbed(t_zerodiag_subs, t_pressure_subs, PETSC_TRUE, &tmp));
2637               PetscCall(ISGetLocalSize(tmp, &i1));
2638               PetscCall(ISGetLocalSize(t_zerodiag_subs, &i2));
2639               if (i2 != i1) valid = PETSC_FALSE;
2640               PetscCall(ISDestroy(&t_pressure_subs));
2641               PetscCall(ISDestroy(&tmp));
2642             }
2643             if (valid) {
2644               PetscCall(ISLocalToGlobalMappingApplyIS(l2g, t_zerodiag_subs, &zerodiag_subs[benign_n]));
2645               benign_n++;
2646             } else recompute_zerodiag = PETSC_TRUE;
2647           }
2648           PetscCall(ISDestroy(&t_zerodiag_subs));
2649           PetscCall(ISLocalToGlobalMappingDestroy(&l2g));
2650         }
2651       }
2652     } else { /* there's just one subdomain (or zero if they have not been detected */
2653       PetscBool valid = PETSC_TRUE;
2654 
2655       if (nneu) valid = PETSC_FALSE;
2656       if (valid && pressures) PetscCall(ISEqual(pressures, zerodiag, &valid));
2657       if (valid && checkb) {
2658         PetscCall(MatMult(matis->A, work[0], matis->x));
2659         PetscCall(VecPointwiseMult(matis->x, work[1], matis->x));
2660         PetscCall(VecGetArray(matis->x, &array));
2661         for (j = 0; j < n_interior_dofs; j++) {
2662           if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) {
2663             valid = PETSC_FALSE;
2664             break;
2665           }
2666         }
2667         PetscCall(VecRestoreArray(matis->x, &array));
2668       }
2669       if (valid) {
2670         benign_n = 1;
2671         PetscCall(PetscMalloc1(benign_n, &zerodiag_subs));
2672         PetscCall(PetscObjectReference((PetscObject)zerodiag));
2673         zerodiag_subs[0] = zerodiag;
2674       }
2675     }
2676     if (checkb) PetscCall(VecDestroyVecs(2, &work));
2677   }
2678   PetscCall(PetscFree(interior_dofs));
2679 
2680   if (!benign_n) {
2681     PetscInt n;
2682 
2683     PetscCall(ISDestroy(&zerodiag));
2684     recompute_zerodiag = PETSC_FALSE;
2685     PetscCall(MatGetLocalSize(pcbddc->local_mat, &n, NULL));
2686     if (n) have_null = PETSC_FALSE;
2687   }
2688 
2689   /* final check for null pressures */
2690   if (zerodiag && pressures) PetscCall(ISEqual(pressures, zerodiag, &have_null));
2691 
2692   if (recompute_zerodiag) {
2693     PetscCall(ISDestroy(&zerodiag));
2694     if (benign_n == 1) {
2695       PetscCall(PetscObjectReference((PetscObject)zerodiag_subs[0]));
2696       zerodiag = zerodiag_subs[0];
2697     } else {
2698       PetscInt i, nzn, *new_idxs;
2699 
2700       nzn = 0;
2701       for (i = 0; i < benign_n; i++) {
2702         PetscInt ns;
2703         PetscCall(ISGetLocalSize(zerodiag_subs[i], &ns));
2704         nzn += ns;
2705       }
2706       PetscCall(PetscMalloc1(nzn, &new_idxs));
2707       nzn = 0;
2708       for (i = 0; i < benign_n; i++) {
2709         PetscInt ns, *idxs;
2710         PetscCall(ISGetLocalSize(zerodiag_subs[i], &ns));
2711         PetscCall(ISGetIndices(zerodiag_subs[i], (const PetscInt **)&idxs));
2712         PetscCall(PetscArraycpy(new_idxs + nzn, idxs, ns));
2713         PetscCall(ISRestoreIndices(zerodiag_subs[i], (const PetscInt **)&idxs));
2714         nzn += ns;
2715       }
2716       PetscCall(PetscSortInt(nzn, new_idxs));
2717       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nzn, new_idxs, PETSC_OWN_POINTER, &zerodiag));
2718     }
2719     have_null = PETSC_FALSE;
2720   }
2721 
2722   /* determines if the coarse solver will be singular or not */
2723   PetscCall(MPIU_Allreduce(&have_null, &pcbddc->benign_null, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)pc)));
2724 
2725   /* Prepare matrix to compute no-net-flux */
2726   if (pcbddc->compute_nonetflux && !pcbddc->divudotp) {
2727     Mat                    A, loc_divudotp;
2728     ISLocalToGlobalMapping rl2g, cl2g, l2gmap;
2729     IS                     row, col, isused = NULL;
2730     PetscInt               M, N, n, st, n_isused;
2731 
2732     if (pressures) {
2733       isused = pressures;
2734     } else {
2735       isused = zerodiag_save;
2736     }
2737     PetscCall(MatISGetLocalToGlobalMapping(pc->pmat, &l2gmap, NULL));
2738     PetscCall(MatISGetLocalMat(pc->pmat, &A));
2739     PetscCall(MatGetLocalSize(A, &n, NULL));
2740     PetscCheck(isused || (n == 0), PETSC_COMM_SELF, PETSC_ERR_USER, "Don't know how to extract div u dot p! Please provide the pressure field");
2741     n_isused = 0;
2742     if (isused) PetscCall(ISGetLocalSize(isused, &n_isused));
2743     PetscCallMPI(MPI_Scan(&n_isused, &st, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)pc)));
2744     st = st - n_isused;
2745     if (n) {
2746       const PetscInt *gidxs;
2747 
2748       PetscCall(MatCreateSubMatrix(A, isused, NULL, MAT_INITIAL_MATRIX, &loc_divudotp));
2749       PetscCall(ISLocalToGlobalMappingGetIndices(l2gmap, &gidxs));
2750       /* TODO: extend ISCreateStride with st = PETSC_DECIDE */
2751       PetscCall(ISCreateStride(PetscObjectComm((PetscObject)pc), n_isused, st, 1, &row));
2752       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), n, gidxs, PETSC_COPY_VALUES, &col));
2753       PetscCall(ISLocalToGlobalMappingRestoreIndices(l2gmap, &gidxs));
2754     } else {
2755       PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, 0, 0, 1, NULL, &loc_divudotp));
2756       PetscCall(ISCreateStride(PetscObjectComm((PetscObject)pc), n_isused, st, 1, &row));
2757       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), 0, NULL, PETSC_COPY_VALUES, &col));
2758     }
2759     PetscCall(MatGetSize(pc->pmat, NULL, &N));
2760     PetscCall(ISGetSize(row, &M));
2761     PetscCall(ISLocalToGlobalMappingCreateIS(row, &rl2g));
2762     PetscCall(ISLocalToGlobalMappingCreateIS(col, &cl2g));
2763     PetscCall(ISDestroy(&row));
2764     PetscCall(ISDestroy(&col));
2765     PetscCall(MatCreate(PetscObjectComm((PetscObject)pc), &pcbddc->divudotp));
2766     PetscCall(MatSetType(pcbddc->divudotp, MATIS));
2767     PetscCall(MatSetSizes(pcbddc->divudotp, PETSC_DECIDE, PETSC_DECIDE, M, N));
2768     PetscCall(MatSetLocalToGlobalMapping(pcbddc->divudotp, rl2g, cl2g));
2769     PetscCall(ISLocalToGlobalMappingDestroy(&rl2g));
2770     PetscCall(ISLocalToGlobalMappingDestroy(&cl2g));
2771     PetscCall(MatISSetLocalMat(pcbddc->divudotp, loc_divudotp));
2772     PetscCall(MatDestroy(&loc_divudotp));
2773     PetscCall(MatAssemblyBegin(pcbddc->divudotp, MAT_FINAL_ASSEMBLY));
2774     PetscCall(MatAssemblyEnd(pcbddc->divudotp, MAT_FINAL_ASSEMBLY));
2775   }
2776   PetscCall(ISDestroy(&zerodiag_save));
2777   PetscCall(ISDestroy(&pressures));
2778   if (bzerodiag) {
2779     PetscInt i;
2780 
2781     for (i = 0; i < bsp; i++) PetscCall(ISDestroy(&bzerodiag[i]));
2782     PetscCall(PetscFree(bzerodiag));
2783   }
2784   pcbddc->benign_n             = benign_n;
2785   pcbddc->benign_zerodiag_subs = zerodiag_subs;
2786 
2787   /* determines if the problem has subdomains with 0 pressure block */
2788   have_null = (PetscBool)(!!pcbddc->benign_n);
2789   PetscCall(MPIU_Allreduce(&have_null, &pcbddc->benign_have_null, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)pc)));
2790 
2791 project_b0:
2792   PetscCall(MatGetLocalSize(pcbddc->local_mat, &n, NULL));
2793   /* change of basis and p0 dofs */
2794   if (pcbddc->benign_n) {
2795     PetscInt i, s, *nnz;
2796 
2797     /* local change of basis for pressures */
2798     PetscCall(MatDestroy(&pcbddc->benign_change));
2799     PetscCall(MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat), &pcbddc->benign_change));
2800     PetscCall(MatSetType(pcbddc->benign_change, MATAIJ));
2801     PetscCall(MatSetSizes(pcbddc->benign_change, n, n, PETSC_DECIDE, PETSC_DECIDE));
2802     PetscCall(PetscMalloc1(n, &nnz));
2803     for (i = 0; i < n; i++) nnz[i] = 1; /* defaults to identity */
2804     for (i = 0; i < pcbddc->benign_n; i++) {
2805       const PetscInt *idxs;
2806       PetscInt        nzs, j;
2807 
2808       PetscCall(ISGetLocalSize(pcbddc->benign_zerodiag_subs[i], &nzs));
2809       PetscCall(ISGetIndices(pcbddc->benign_zerodiag_subs[i], &idxs));
2810       for (j = 0; j < nzs - 1; j++) nnz[idxs[j]] = 2; /* change on pressures */
2811       nnz[idxs[nzs - 1]] = nzs;                       /* last local pressure dof in subdomain */
2812       PetscCall(ISRestoreIndices(pcbddc->benign_zerodiag_subs[i], &idxs));
2813     }
2814     PetscCall(MatSeqAIJSetPreallocation(pcbddc->benign_change, 0, nnz));
2815     PetscCall(MatSetOption(pcbddc->benign_change, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE));
2816     PetscCall(PetscFree(nnz));
2817     /* set identity by default */
2818     for (i = 0; i < n; i++) PetscCall(MatSetValue(pcbddc->benign_change, i, i, 1., INSERT_VALUES));
2819     PetscCall(PetscFree3(pcbddc->benign_p0_lidx, pcbddc->benign_p0_gidx, pcbddc->benign_p0));
2820     PetscCall(PetscMalloc3(pcbddc->benign_n, &pcbddc->benign_p0_lidx, pcbddc->benign_n, &pcbddc->benign_p0_gidx, pcbddc->benign_n, &pcbddc->benign_p0));
2821     /* set change on pressures */
2822     for (s = 0; s < pcbddc->benign_n; s++) {
2823       PetscScalar    *array;
2824       const PetscInt *idxs;
2825       PetscInt        nzs;
2826 
2827       PetscCall(ISGetLocalSize(pcbddc->benign_zerodiag_subs[s], &nzs));
2828       PetscCall(ISGetIndices(pcbddc->benign_zerodiag_subs[s], &idxs));
2829       for (i = 0; i < nzs - 1; i++) {
2830         PetscScalar vals[2];
2831         PetscInt    cols[2];
2832 
2833         cols[0] = idxs[i];
2834         cols[1] = idxs[nzs - 1];
2835         vals[0] = 1.;
2836         vals[1] = 1.;
2837         PetscCall(MatSetValues(pcbddc->benign_change, 1, cols, 2, cols, vals, INSERT_VALUES));
2838       }
2839       PetscCall(PetscMalloc1(nzs, &array));
2840       for (i = 0; i < nzs - 1; i++) array[i] = -1.;
2841       array[nzs - 1] = 1.;
2842       PetscCall(MatSetValues(pcbddc->benign_change, 1, idxs + nzs - 1, nzs, idxs, array, INSERT_VALUES));
2843       /* store local idxs for p0 */
2844       pcbddc->benign_p0_lidx[s] = idxs[nzs - 1];
2845       PetscCall(ISRestoreIndices(pcbddc->benign_zerodiag_subs[s], &idxs));
2846       PetscCall(PetscFree(array));
2847     }
2848     PetscCall(MatAssemblyBegin(pcbddc->benign_change, MAT_FINAL_ASSEMBLY));
2849     PetscCall(MatAssemblyEnd(pcbddc->benign_change, MAT_FINAL_ASSEMBLY));
2850 
2851     /* project if needed */
2852     if (pcbddc->benign_change_explicit) {
2853       Mat M;
2854 
2855       PetscCall(MatPtAP(pcbddc->local_mat, pcbddc->benign_change, MAT_INITIAL_MATRIX, 2.0, &M));
2856       PetscCall(MatDestroy(&pcbddc->local_mat));
2857       PetscCall(MatSeqAIJCompress(M, &pcbddc->local_mat));
2858       PetscCall(MatDestroy(&M));
2859     }
2860     /* store global idxs for p0 */
2861     PetscCall(ISLocalToGlobalMappingApply(matis->rmapping, pcbddc->benign_n, pcbddc->benign_p0_lidx, pcbddc->benign_p0_gidx));
2862   }
2863   *zerodiaglocal = zerodiag;
2864   PetscFunctionReturn(0);
2865 }
2866 
2867 PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get)
2868 {
2869   PC_BDDC     *pcbddc = (PC_BDDC *)pc->data;
2870   PetscScalar *array;
2871 
2872   PetscFunctionBegin;
2873   if (!pcbddc->benign_sf) {
2874     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)pc), &pcbddc->benign_sf));
2875     PetscCall(PetscSFSetGraphLayout(pcbddc->benign_sf, pc->pmat->rmap, pcbddc->benign_n, NULL, PETSC_OWN_POINTER, pcbddc->benign_p0_gidx));
2876   }
2877   if (get) {
2878     PetscCall(VecGetArrayRead(v, (const PetscScalar **)&array));
2879     PetscCall(PetscSFBcastBegin(pcbddc->benign_sf, MPIU_SCALAR, array, pcbddc->benign_p0, MPI_REPLACE));
2880     PetscCall(PetscSFBcastEnd(pcbddc->benign_sf, MPIU_SCALAR, array, pcbddc->benign_p0, MPI_REPLACE));
2881     PetscCall(VecRestoreArrayRead(v, (const PetscScalar **)&array));
2882   } else {
2883     PetscCall(VecGetArray(v, &array));
2884     PetscCall(PetscSFReduceBegin(pcbddc->benign_sf, MPIU_SCALAR, pcbddc->benign_p0, array, MPI_REPLACE));
2885     PetscCall(PetscSFReduceEnd(pcbddc->benign_sf, MPIU_SCALAR, pcbddc->benign_p0, array, MPI_REPLACE));
2886     PetscCall(VecRestoreArray(v, &array));
2887   }
2888   PetscFunctionReturn(0);
2889 }
2890 
2891 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop)
2892 {
2893   PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
2894 
2895   PetscFunctionBegin;
2896   /* TODO: add error checking
2897     - avoid nested pop (or push) calls.
2898     - cannot push before pop.
2899     - cannot call this if pcbddc->local_mat is NULL
2900   */
2901   if (!pcbddc->benign_n) PetscFunctionReturn(0);
2902   if (pop) {
2903     if (pcbddc->benign_change_explicit) {
2904       IS       is_p0;
2905       MatReuse reuse;
2906 
2907       /* extract B_0 */
2908       reuse = MAT_INITIAL_MATRIX;
2909       if (pcbddc->benign_B0) reuse = MAT_REUSE_MATRIX;
2910       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, pcbddc->benign_n, pcbddc->benign_p0_lidx, PETSC_COPY_VALUES, &is_p0));
2911       PetscCall(MatCreateSubMatrix(pcbddc->local_mat, is_p0, NULL, reuse, &pcbddc->benign_B0));
2912       /* remove rows and cols from local problem */
2913       PetscCall(MatSetOption(pcbddc->local_mat, MAT_KEEP_NONZERO_PATTERN, PETSC_TRUE));
2914       PetscCall(MatSetOption(pcbddc->local_mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE));
2915       PetscCall(MatZeroRowsColumnsIS(pcbddc->local_mat, is_p0, 1.0, NULL, NULL));
2916       PetscCall(ISDestroy(&is_p0));
2917     } else {
2918       Mat_IS      *matis = (Mat_IS *)pc->pmat->data;
2919       PetscScalar *vals;
2920       PetscInt     i, n, *idxs_ins;
2921 
2922       PetscCall(VecGetLocalSize(matis->y, &n));
2923       PetscCall(PetscMalloc2(n, &idxs_ins, n, &vals));
2924       if (!pcbddc->benign_B0) {
2925         PetscInt *nnz;
2926         PetscCall(MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat), &pcbddc->benign_B0));
2927         PetscCall(MatSetType(pcbddc->benign_B0, MATAIJ));
2928         PetscCall(MatSetSizes(pcbddc->benign_B0, pcbddc->benign_n, n, PETSC_DECIDE, PETSC_DECIDE));
2929         PetscCall(PetscMalloc1(pcbddc->benign_n, &nnz));
2930         for (i = 0; i < pcbddc->benign_n; i++) {
2931           PetscCall(ISGetLocalSize(pcbddc->benign_zerodiag_subs[i], &nnz[i]));
2932           nnz[i] = n - nnz[i];
2933         }
2934         PetscCall(MatSeqAIJSetPreallocation(pcbddc->benign_B0, 0, nnz));
2935         PetscCall(MatSetOption(pcbddc->benign_B0, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE));
2936         PetscCall(PetscFree(nnz));
2937       }
2938 
2939       for (i = 0; i < pcbddc->benign_n; i++) {
2940         PetscScalar *array;
2941         PetscInt    *idxs, j, nz, cum;
2942 
2943         PetscCall(VecSet(matis->x, 0.));
2944         PetscCall(ISGetLocalSize(pcbddc->benign_zerodiag_subs[i], &nz));
2945         PetscCall(ISGetIndices(pcbddc->benign_zerodiag_subs[i], (const PetscInt **)&idxs));
2946         for (j = 0; j < nz; j++) vals[j] = 1.;
2947         PetscCall(VecSetValues(matis->x, nz, idxs, vals, INSERT_VALUES));
2948         PetscCall(VecAssemblyBegin(matis->x));
2949         PetscCall(VecAssemblyEnd(matis->x));
2950         PetscCall(VecSet(matis->y, 0.));
2951         PetscCall(MatMult(matis->A, matis->x, matis->y));
2952         PetscCall(VecGetArray(matis->y, &array));
2953         cum = 0;
2954         for (j = 0; j < n; j++) {
2955           if (PetscUnlikely(PetscAbsScalar(array[j]) > PETSC_SMALL)) {
2956             vals[cum]     = array[j];
2957             idxs_ins[cum] = j;
2958             cum++;
2959           }
2960         }
2961         PetscCall(MatSetValues(pcbddc->benign_B0, 1, &i, cum, idxs_ins, vals, INSERT_VALUES));
2962         PetscCall(VecRestoreArray(matis->y, &array));
2963         PetscCall(ISRestoreIndices(pcbddc->benign_zerodiag_subs[i], (const PetscInt **)&idxs));
2964       }
2965       PetscCall(MatAssemblyBegin(pcbddc->benign_B0, MAT_FINAL_ASSEMBLY));
2966       PetscCall(MatAssemblyEnd(pcbddc->benign_B0, MAT_FINAL_ASSEMBLY));
2967       PetscCall(PetscFree2(idxs_ins, vals));
2968     }
2969   } else { /* push */
2970     if (pcbddc->benign_change_explicit) {
2971       PetscInt i;
2972 
2973       for (i = 0; i < pcbddc->benign_n; i++) {
2974         PetscScalar *B0_vals;
2975         PetscInt    *B0_cols, B0_ncol;
2976 
2977         PetscCall(MatGetRow(pcbddc->benign_B0, i, &B0_ncol, (const PetscInt **)&B0_cols, (const PetscScalar **)&B0_vals));
2978         PetscCall(MatSetValues(pcbddc->local_mat, 1, pcbddc->benign_p0_lidx + i, B0_ncol, B0_cols, B0_vals, INSERT_VALUES));
2979         PetscCall(MatSetValues(pcbddc->local_mat, B0_ncol, B0_cols, 1, pcbddc->benign_p0_lidx + i, B0_vals, INSERT_VALUES));
2980         PetscCall(MatSetValue(pcbddc->local_mat, pcbddc->benign_p0_lidx[i], pcbddc->benign_p0_lidx[i], 0.0, INSERT_VALUES));
2981         PetscCall(MatRestoreRow(pcbddc->benign_B0, i, &B0_ncol, (const PetscInt **)&B0_cols, (const PetscScalar **)&B0_vals));
2982       }
2983       PetscCall(MatAssemblyBegin(pcbddc->local_mat, MAT_FINAL_ASSEMBLY));
2984       PetscCall(MatAssemblyEnd(pcbddc->local_mat, MAT_FINAL_ASSEMBLY));
2985     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot push B0!");
2986   }
2987   PetscFunctionReturn(0);
2988 }
2989 
2990 PetscErrorCode PCBDDCAdaptiveSelection(PC pc)
2991 {
2992   PC_BDDC        *pcbddc     = (PC_BDDC *)pc->data;
2993   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
2994   PetscBLASInt    B_dummyint, B_neigs, B_ierr, B_lwork;
2995   PetscBLASInt   *B_iwork, *B_ifail;
2996   PetscScalar    *work, lwork;
2997   PetscScalar    *St, *S, *eigv;
2998   PetscScalar    *Sarray, *Starray;
2999   PetscReal      *eigs, thresh, lthresh, uthresh;
3000   PetscInt        i, nmax, nmin, nv, cum, mss, cum2, cumarray, maxneigs;
3001   PetscBool       allocated_S_St, upart;
3002 #if defined(PETSC_USE_COMPLEX)
3003   PetscReal *rwork;
3004 #endif
3005 
3006   PetscFunctionBegin;
3007   if (!pcbddc->adaptive_selection) PetscFunctionReturn(0);
3008   PetscCheck(sub_schurs, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Adaptive selection of constraints requires SubSchurs data");
3009   PetscCheck(sub_schurs->schur_explicit || !sub_schurs->n_subs, PetscObjectComm((PetscObject)pc), PETSC_ERR_SUP, "Adaptive selection of constraints requires MUMPS and/or MKL_CPARDISO");
3010   PetscCheck(!sub_schurs->n_subs || sub_schurs->is_symmetric, PETSC_COMM_SELF, PETSC_ERR_SUP, "Adaptive selection not yet implemented for this matrix pencil (herm %d, symm %d, posdef %d)", sub_schurs->is_hermitian, sub_schurs->is_symmetric,
3011              sub_schurs->is_posdef);
3012   PetscCall(PetscLogEventBegin(PC_BDDC_AdaptiveSetUp[pcbddc->current_level], pc, 0, 0, 0));
3013 
3014   if (pcbddc->dbg_flag) {
3015     if (!pcbddc->dbg_viewer) pcbddc->dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pc));
3016     PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
3017     PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n"));
3018     PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Check adaptive selection of constraints\n"));
3019     PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer));
3020   }
3021 
3022   if (pcbddc->dbg_flag) PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d cc %" PetscInt_FMT " (%d,%d).\n", PetscGlobalRank, sub_schurs->n_subs, sub_schurs->is_hermitian, sub_schurs->is_posdef));
3023 
3024   /* max size of subsets */
3025   mss = 0;
3026   for (i = 0; i < sub_schurs->n_subs; i++) {
3027     PetscInt subset_size;
3028 
3029     PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size));
3030     mss = PetscMax(mss, subset_size);
3031   }
3032 
3033   /* min/max and threshold */
3034   nmax           = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss;
3035   nmin           = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0;
3036   nmax           = PetscMax(nmin, nmax);
3037   allocated_S_St = PETSC_FALSE;
3038   if (nmin || !sub_schurs->is_posdef) { /* XXX */
3039     allocated_S_St = PETSC_TRUE;
3040   }
3041 
3042   /* allocate lapack workspace */
3043   cum = cum2 = 0;
3044   maxneigs   = 0;
3045   for (i = 0; i < sub_schurs->n_subs; i++) {
3046     PetscInt n, subset_size;
3047 
3048     PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size));
3049     n = PetscMin(subset_size, nmax);
3050     cum += subset_size;
3051     cum2 += subset_size * n;
3052     maxneigs = PetscMax(maxneigs, n);
3053   }
3054   lwork = 0;
3055   if (mss) {
3056     if (sub_schurs->is_symmetric) {
3057       PetscScalar  sdummy  = 0.;
3058       PetscBLASInt B_itype = 1;
3059       PetscBLASInt B_N = mss, idummy = 0;
3060       PetscReal    rdummy = 0., zero = 0.0;
3061       PetscReal    eps = 0.0; /* dlamch? */
3062 
3063       B_lwork = -1;
3064       /* some implementations may complain about NULL pointers, even if we are querying */
3065       S       = &sdummy;
3066       St      = &sdummy;
3067       eigs    = &rdummy;
3068       eigv    = &sdummy;
3069       B_iwork = &idummy;
3070       B_ifail = &idummy;
3071 #if defined(PETSC_USE_COMPLEX)
3072       rwork = &rdummy;
3073 #endif
3074       thresh = 1.0;
3075       PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
3076 #if defined(PETSC_USE_COMPLEX)
3077       PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &zero, &thresh, &B_dummyint, &B_dummyint, &eps, &B_neigs, eigs, eigv, &B_N, &lwork, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3078 #else
3079       PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &zero, &thresh, &B_dummyint, &B_dummyint, &eps, &B_neigs, eigs, eigv, &B_N, &lwork, &B_lwork, B_iwork, B_ifail, &B_ierr));
3080 #endif
3081       PetscCheck(B_ierr == 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in query to SYGVX Lapack routine %d", (int)B_ierr);
3082       PetscCall(PetscFPTrapPop());
3083     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Not yet implemented");
3084   }
3085 
3086   nv = 0;
3087   if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */
3088     PetscCall(ISGetLocalSize(sub_schurs->is_vertices, &nv));
3089   }
3090   PetscCall(PetscBLASIntCast((PetscInt)PetscRealPart(lwork), &B_lwork));
3091   if (allocated_S_St) PetscCall(PetscMalloc2(mss * mss, &S, mss * mss, &St));
3092   PetscCall(PetscMalloc5(mss * mss, &eigv, mss, &eigs, B_lwork, &work, 5 * mss, &B_iwork, mss, &B_ifail));
3093 #if defined(PETSC_USE_COMPLEX)
3094   PetscCall(PetscMalloc1(7 * mss, &rwork));
3095 #endif
3096   PetscCall(PetscMalloc5(nv + sub_schurs->n_subs, &pcbddc->adaptive_constraints_n, nv + sub_schurs->n_subs + 1, &pcbddc->adaptive_constraints_idxs_ptr, nv + sub_schurs->n_subs + 1, &pcbddc->adaptive_constraints_data_ptr, nv + cum, &pcbddc->adaptive_constraints_idxs, nv + cum2,
3097                          &pcbddc->adaptive_constraints_data));
3098   PetscCall(PetscArrayzero(pcbddc->adaptive_constraints_n, nv + sub_schurs->n_subs));
3099 
3100   maxneigs = 0;
3101   cum = cumarray                           = 0;
3102   pcbddc->adaptive_constraints_idxs_ptr[0] = 0;
3103   pcbddc->adaptive_constraints_data_ptr[0] = 0;
3104   if (sub_schurs->is_vertices && pcbddc->use_vertices) {
3105     const PetscInt *idxs;
3106 
3107     PetscCall(ISGetIndices(sub_schurs->is_vertices, &idxs));
3108     for (cum = 0; cum < nv; cum++) {
3109       pcbddc->adaptive_constraints_n[cum]            = 1;
3110       pcbddc->adaptive_constraints_idxs[cum]         = idxs[cum];
3111       pcbddc->adaptive_constraints_data[cum]         = 1.0;
3112       pcbddc->adaptive_constraints_idxs_ptr[cum + 1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + 1;
3113       pcbddc->adaptive_constraints_data_ptr[cum + 1] = pcbddc->adaptive_constraints_data_ptr[cum] + 1;
3114     }
3115     PetscCall(ISRestoreIndices(sub_schurs->is_vertices, &idxs));
3116   }
3117 
3118   if (mss) { /* multilevel */
3119     if (sub_schurs->gdsw) {
3120       PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_all, &Sarray));
3121       PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all, &Starray));
3122     } else {
3123       PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all, &Sarray));
3124       PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all, &Starray));
3125     }
3126   }
3127 
3128   lthresh = pcbddc->adaptive_threshold[0];
3129   uthresh = pcbddc->adaptive_threshold[1];
3130   upart   = pcbddc->use_deluxe_scaling;
3131   for (i = 0; i < sub_schurs->n_subs; i++) {
3132     const PetscInt *idxs;
3133     PetscReal       upper, lower;
3134     PetscInt        j, subset_size, eigs_start = 0;
3135     PetscBLASInt    B_N;
3136     PetscBool       same_data = PETSC_FALSE;
3137     PetscBool       scal      = PETSC_FALSE;
3138 
3139     if (upart) {
3140       upper = PETSC_MAX_REAL;
3141       lower = uthresh;
3142     } else {
3143       if (sub_schurs->gdsw) {
3144         upper = uthresh;
3145         lower = PETSC_MIN_REAL;
3146       } else {
3147         PetscCheck(sub_schurs->is_posdef, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not yet implemented without deluxe scaling");
3148         upper = 1. / uthresh;
3149         lower = 0.;
3150       }
3151     }
3152     PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size));
3153     PetscCall(ISGetIndices(sub_schurs->is_subs[i], &idxs));
3154     PetscCall(PetscBLASIntCast(subset_size, &B_N));
3155     /* this is experimental: we assume the dofs have been properly grouped to have
3156        the diagonal blocks Schur complements either positive or negative definite (true for Stokes) */
3157     if (!sub_schurs->is_posdef) {
3158       Mat T;
3159 
3160       for (j = 0; j < subset_size; j++) {
3161         if (PetscRealPart(*(Sarray + cumarray + j * (subset_size + 1))) < 0.0) {
3162           PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, subset_size, Sarray + cumarray, &T));
3163           PetscCall(MatScale(T, -1.0));
3164           PetscCall(MatDestroy(&T));
3165           PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, subset_size, Starray + cumarray, &T));
3166           PetscCall(MatScale(T, -1.0));
3167           PetscCall(MatDestroy(&T));
3168           if (sub_schurs->change_primal_sub) {
3169             PetscInt        nz, k;
3170             const PetscInt *idxs;
3171 
3172             PetscCall(ISGetLocalSize(sub_schurs->change_primal_sub[i], &nz));
3173             PetscCall(ISGetIndices(sub_schurs->change_primal_sub[i], &idxs));
3174             for (k = 0; k < nz; k++) {
3175               *(Sarray + cumarray + idxs[k] * (subset_size + 1)) *= -1.0;
3176               *(Starray + cumarray + idxs[k] * (subset_size + 1)) = 0.0;
3177             }
3178             PetscCall(ISRestoreIndices(sub_schurs->change_primal_sub[i], &idxs));
3179           }
3180           scal = PETSC_TRUE;
3181           break;
3182         }
3183       }
3184     }
3185 
3186     if (allocated_S_St) { /* S and S_t should be copied since we could need them later */
3187       if (sub_schurs->is_symmetric) {
3188         PetscInt j, k;
3189         if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscArraycmp() later */
3190           PetscCall(PetscArrayzero(S, subset_size * subset_size));
3191           PetscCall(PetscArrayzero(St, subset_size * subset_size));
3192         }
3193         for (j = 0; j < subset_size; j++) {
3194           for (k = j; k < subset_size; k++) {
3195             S[j * subset_size + k]  = Sarray[cumarray + j * subset_size + k];
3196             St[j * subset_size + k] = Starray[cumarray + j * subset_size + k];
3197           }
3198         }
3199       } else {
3200         PetscCall(PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size));
3201         PetscCall(PetscArraycpy(St, Starray + cumarray, subset_size * subset_size));
3202       }
3203     } else {
3204       S  = Sarray + cumarray;
3205       St = Starray + cumarray;
3206     }
3207     /* see if we can save some work */
3208     if (sub_schurs->n_subs == 1 && pcbddc->use_deluxe_scaling) PetscCall(PetscArraycmp(S, St, subset_size * subset_size, &same_data));
3209 
3210     if (same_data && !sub_schurs->change) { /* there's no need of constraints here */
3211       B_neigs = 0;
3212     } else {
3213       if (sub_schurs->is_symmetric) {
3214         PetscBLASInt B_itype = 1;
3215         PetscBLASInt B_IL, B_IU;
3216         PetscReal    eps = -1.0; /* dlamch? */
3217         PetscInt     nmin_s;
3218         PetscBool    compute_range;
3219 
3220         B_neigs       = 0;
3221         compute_range = (PetscBool)!same_data;
3222         if (nmin >= subset_size) compute_range = PETSC_FALSE;
3223 
3224         if (pcbddc->dbg_flag) {
3225           PetscInt nc = 0;
3226 
3227           if (sub_schurs->change_primal_sub) PetscCall(ISGetLocalSize(sub_schurs->change_primal_sub[i], &nc));
3228           PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Computing for sub %" PetscInt_FMT "/%" PetscInt_FMT " size %" PetscInt_FMT " count %" PetscInt_FMT " fid %" PetscInt_FMT " (range %d) (change %" PetscInt_FMT ").\n", i,
3229                                                        sub_schurs->n_subs, subset_size, pcbddc->mat_graph->count[idxs[0]] + 1, pcbddc->mat_graph->which_dof[idxs[0]], compute_range, nc));
3230         }
3231 
3232         PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
3233         if (compute_range) {
3234           /* ask for eigenvalues larger than thresh */
3235           if (sub_schurs->is_posdef) {
3236 #if defined(PETSC_USE_COMPLEX)
3237             PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3238 #else
3239             PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3240 #endif
3241             PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3242           } else { /* no theory so far, but it works nicely */
3243             PetscInt  recipe = 0, recipe_m = 1;
3244             PetscReal bb[2];
3245 
3246             PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)pc)->prefix, "-pc_bddc_adaptive_recipe", &recipe, NULL));
3247             switch (recipe) {
3248             case 0:
3249               if (scal) {
3250                 bb[0] = PETSC_MIN_REAL;
3251                 bb[1] = lthresh;
3252               } else {
3253                 bb[0] = uthresh;
3254                 bb[1] = PETSC_MAX_REAL;
3255               }
3256 #if defined(PETSC_USE_COMPLEX)
3257               PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3258 #else
3259               PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3260 #endif
3261               PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3262               break;
3263             case 1:
3264               bb[0] = PETSC_MIN_REAL;
3265               bb[1] = lthresh * lthresh;
3266 #if defined(PETSC_USE_COMPLEX)
3267               PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3268 #else
3269               PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3270 #endif
3271               PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3272               if (!scal) {
3273                 PetscBLASInt B_neigs2 = 0;
3274 
3275                 bb[0] = PetscMax(lthresh * lthresh, uthresh);
3276                 bb[1] = PETSC_MAX_REAL;
3277                 PetscCall(PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size));
3278                 PetscCall(PetscArraycpy(St, Starray + cumarray, subset_size * subset_size));
3279 #if defined(PETSC_USE_COMPLEX)
3280                 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3281 #else
3282                 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3283 #endif
3284                 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3285                 B_neigs += B_neigs2;
3286               }
3287               break;
3288             case 2:
3289               if (scal) {
3290                 bb[0] = PETSC_MIN_REAL;
3291                 bb[1] = 0;
3292 #if defined(PETSC_USE_COMPLEX)
3293                 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3294 #else
3295                 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3296 #endif
3297                 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3298               } else {
3299                 PetscBLASInt B_neigs2 = 0;
3300                 PetscBool    import   = PETSC_FALSE;
3301 
3302                 lthresh = PetscMax(lthresh, 0.0);
3303                 if (lthresh > 0.0) {
3304                   bb[0] = PETSC_MIN_REAL;
3305                   bb[1] = lthresh * lthresh;
3306 
3307                   import = PETSC_TRUE;
3308 #if defined(PETSC_USE_COMPLEX)
3309                   PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3310 #else
3311                   PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3312 #endif
3313                   PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3314                 }
3315                 bb[0] = PetscMax(lthresh * lthresh, uthresh);
3316                 bb[1] = PETSC_MAX_REAL;
3317                 if (import) {
3318                   PetscCall(PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size));
3319                   PetscCall(PetscArraycpy(St, Starray + cumarray, subset_size * subset_size));
3320                 }
3321 #if defined(PETSC_USE_COMPLEX)
3322                 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3323 #else
3324                 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3325 #endif
3326                 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3327                 B_neigs += B_neigs2;
3328               }
3329               break;
3330             case 3:
3331               if (scal) {
3332                 PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)pc)->prefix, "-pc_bddc_adaptive_recipe3_min_scal", &recipe_m, NULL));
3333               } else {
3334                 PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)pc)->prefix, "-pc_bddc_adaptive_recipe3_min", &recipe_m, NULL));
3335               }
3336               if (!scal) {
3337                 bb[0] = uthresh;
3338                 bb[1] = PETSC_MAX_REAL;
3339 #if defined(PETSC_USE_COMPLEX)
3340                 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3341 #else
3342                 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3343 #endif
3344                 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3345               }
3346               if (recipe_m > 0 && B_N - B_neigs > 0) {
3347                 PetscBLASInt B_neigs2 = 0;
3348 
3349                 B_IL = 1;
3350                 PetscCall(PetscBLASIntCast(PetscMin(recipe_m, B_N - B_neigs), &B_IU));
3351                 PetscCall(PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size));
3352                 PetscCall(PetscArraycpy(St, Starray + cumarray, subset_size * subset_size));
3353 #if defined(PETSC_USE_COMPLEX)
3354                 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3355 #else
3356                 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3357 #endif
3358                 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3359                 B_neigs += B_neigs2;
3360               }
3361               break;
3362             case 4:
3363               bb[0] = PETSC_MIN_REAL;
3364               bb[1] = lthresh;
3365 #if defined(PETSC_USE_COMPLEX)
3366               PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3367 #else
3368               PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3369 #endif
3370               PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3371               {
3372                 PetscBLASInt B_neigs2 = 0;
3373 
3374                 bb[0] = PetscMax(lthresh + PETSC_SMALL, uthresh);
3375                 bb[1] = PETSC_MAX_REAL;
3376                 PetscCall(PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size));
3377                 PetscCall(PetscArraycpy(St, Starray + cumarray, subset_size * subset_size));
3378 #if defined(PETSC_USE_COMPLEX)
3379                 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3380 #else
3381                 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3382 #endif
3383                 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3384                 B_neigs += B_neigs2;
3385               }
3386               break;
3387             case 5: /* same as before: first compute all eigenvalues, then filter */
3388 #if defined(PETSC_USE_COMPLEX)
3389               PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "A", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3390 #else
3391               PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "A", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3392 #endif
3393               PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3394               {
3395                 PetscInt e, k, ne;
3396                 for (e = 0, ne = 0; e < B_neigs; e++) {
3397                   if (eigs[e] < lthresh || eigs[e] > uthresh) {
3398                     for (k = 0; k < B_N; k++) S[ne * B_N + k] = eigv[e * B_N + k];
3399                     eigs[ne] = eigs[e];
3400                     ne++;
3401                   }
3402                 }
3403                 PetscCall(PetscArraycpy(eigv, S, B_N * ne));
3404                 B_neigs = ne;
3405               }
3406               break;
3407             default:
3408               SETERRQ(PetscObjectComm((PetscObject)pc), PETSC_ERR_SUP, "Unknown recipe %" PetscInt_FMT, recipe);
3409             }
3410           }
3411         } else if (!same_data) { /* this is just to see all the eigenvalues */
3412           B_IU = PetscMax(1, PetscMin(B_N, nmax));
3413           B_IL = 1;
3414 #if defined(PETSC_USE_COMPLEX)
3415           PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3416 #else
3417           PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3418 #endif
3419           PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3420         } else { /* same_data is true, so just get the adaptive functional requested by the user */
3421           PetscInt k;
3422           PetscCheck(sub_schurs->change_primal_sub, PETSC_COMM_SELF, PETSC_ERR_PLIB, "This should not happen");
3423           PetscCall(ISGetLocalSize(sub_schurs->change_primal_sub[i], &nmax));
3424           PetscCall(PetscBLASIntCast(nmax, &B_neigs));
3425           nmin = nmax;
3426           PetscCall(PetscArrayzero(eigv, subset_size * nmax));
3427           for (k = 0; k < nmax; k++) {
3428             eigs[k]                     = 1. / PETSC_SMALL;
3429             eigv[k * (subset_size + 1)] = 1.0;
3430           }
3431         }
3432         PetscCall(PetscFPTrapPop());
3433         if (B_ierr) {
3434           PetscCheck(B_ierr >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYGVX Lapack routine: illegal value for argument %" PetscBLASInt_FMT, -B_ierr);
3435           PetscCheck(B_ierr > B_N, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYGVX Lapack routine: %" PetscBLASInt_FMT " eigenvalues failed to converge", B_ierr);
3436           SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYGVX Lapack routine: leading minor of order %" PetscBLASInt_FMT " is not positive definite", B_ierr - B_N - 1);
3437         }
3438 
3439         if (B_neigs > nmax) {
3440           if (pcbddc->dbg_flag) PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "   found %" PetscBLASInt_FMT " eigs, more than maximum required %" PetscInt_FMT ".\n", B_neigs, nmax));
3441           if (upart) eigs_start = scal ? 0 : B_neigs - nmax;
3442           B_neigs = nmax;
3443         }
3444 
3445         nmin_s = PetscMin(nmin, B_N);
3446         if (B_neigs < nmin_s) {
3447           PetscBLASInt B_neigs2 = 0;
3448 
3449           if (upart) {
3450             if (scal) {
3451               B_IU = nmin_s;
3452               B_IL = B_neigs + 1;
3453             } else {
3454               B_IL = B_N - nmin_s + 1;
3455               B_IU = B_N - B_neigs;
3456             }
3457           } else {
3458             B_IL = B_neigs + 1;
3459             B_IU = nmin_s;
3460           }
3461           if (pcbddc->dbg_flag) {
3462             PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "   found %" PetscBLASInt_FMT " eigs, less than minimum required %" PetscInt_FMT ". Asking for %" PetscBLASInt_FMT " to %" PetscBLASInt_FMT " incl (fortran like)\n", B_neigs, nmin, B_IL, B_IU));
3463           }
3464           if (sub_schurs->is_symmetric) {
3465             PetscInt j, k;
3466             for (j = 0; j < subset_size; j++) {
3467               for (k = j; k < subset_size; k++) {
3468                 S[j * subset_size + k]  = Sarray[cumarray + j * subset_size + k];
3469                 St[j * subset_size + k] = Starray[cumarray + j * subset_size + k];
3470               }
3471             }
3472           } else {
3473             PetscCall(PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size));
3474             PetscCall(PetscArraycpy(St, Starray + cumarray, subset_size * subset_size));
3475           }
3476           PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
3477 #if defined(PETSC_USE_COMPLEX)
3478           PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * subset_size, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3479 #else
3480           PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * subset_size, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3481 #endif
3482           PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3483           PetscCall(PetscFPTrapPop());
3484           B_neigs += B_neigs2;
3485         }
3486         if (B_ierr) {
3487           PetscCheck(B_ierr >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYGVX Lapack routine: illegal value for argument %" PetscBLASInt_FMT, -B_ierr);
3488           PetscCheck(B_ierr > B_N, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYGVX Lapack routine: %" PetscBLASInt_FMT " eigenvalues failed to converge", B_ierr);
3489           SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYGVX Lapack routine: leading minor of order %" PetscBLASInt_FMT " is not positive definite", B_ierr - B_N - 1);
3490         }
3491         if (pcbddc->dbg_flag) {
3492           PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "   -> Got %" PetscBLASInt_FMT " eigs\n", B_neigs));
3493           for (j = 0; j < B_neigs; j++) {
3494             if (!sub_schurs->gdsw) {
3495               if (eigs[j] == 0.0) {
3496                 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "     Inf\n"));
3497               } else {
3498                 if (upart) {
3499                   PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "     %1.6e\n", (double)eigs[j + eigs_start]));
3500                 } else {
3501                   PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "     %1.6e\n", (double)(1. / eigs[j + eigs_start])));
3502                 }
3503               }
3504             } else {
3505               double pg = (double)eigs[j + eigs_start];
3506               if (pg < 2 * PETSC_SMALL) pg = 0.0;
3507               PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "     %1.6e\n", pg));
3508             }
3509           }
3510         }
3511       } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Not yet implemented");
3512     }
3513     /* change the basis back to the original one */
3514     if (sub_schurs->change) {
3515       Mat change, phi, phit;
3516 
3517       if (pcbddc->dbg_flag > 2) {
3518         PetscInt ii;
3519         for (ii = 0; ii < B_neigs; ii++) {
3520           PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "   -> Eigenvector (old basis) %" PetscInt_FMT "/%" PetscBLASInt_FMT " (%" PetscBLASInt_FMT ")\n", ii, B_neigs, B_N));
3521           for (j = 0; j < B_N; j++) {
3522 #if defined(PETSC_USE_COMPLEX)
3523             PetscReal r = PetscRealPart(eigv[(ii + eigs_start) * subset_size + j]);
3524             PetscReal c = PetscImaginaryPart(eigv[(ii + eigs_start) * subset_size + j]);
3525             PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "       %1.4e + %1.4e i\n", (double)r, (double)c));
3526 #else
3527             PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "       %1.4e\n", (double)(eigv[(ii + eigs_start) * subset_size + j])));
3528 #endif
3529           }
3530         }
3531       }
3532       PetscCall(KSPGetOperators(sub_schurs->change[i], &change, NULL));
3533       PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, B_neigs, eigv + eigs_start * subset_size, &phit));
3534       PetscCall(MatMatMult(change, phit, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &phi));
3535       PetscCall(MatCopy(phi, phit, SAME_NONZERO_PATTERN));
3536       PetscCall(MatDestroy(&phit));
3537       PetscCall(MatDestroy(&phi));
3538     }
3539     maxneigs                               = PetscMax(B_neigs, maxneigs);
3540     pcbddc->adaptive_constraints_n[i + nv] = B_neigs;
3541     if (B_neigs) {
3542       PetscCall(PetscArraycpy(pcbddc->adaptive_constraints_data + pcbddc->adaptive_constraints_data_ptr[cum], eigv + eigs_start * subset_size, B_neigs * subset_size));
3543 
3544       if (pcbddc->dbg_flag > 1) {
3545         PetscInt ii;
3546         for (ii = 0; ii < B_neigs; ii++) {
3547           PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "   -> Eigenvector %" PetscInt_FMT "/%" PetscBLASInt_FMT " (%" PetscBLASInt_FMT ")\n", ii, B_neigs, B_N));
3548           for (j = 0; j < B_N; j++) {
3549 #if defined(PETSC_USE_COMPLEX)
3550             PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii * subset_size + j + pcbddc->adaptive_constraints_data_ptr[cum]]);
3551             PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii * subset_size + j + pcbddc->adaptive_constraints_data_ptr[cum]]);
3552             PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "       %1.4e + %1.4e i\n", (double)r, (double)c));
3553 #else
3554             PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "       %1.4e\n", (double)PetscRealPart(pcbddc->adaptive_constraints_data[ii * subset_size + j + pcbddc->adaptive_constraints_data_ptr[cum]])));
3555 #endif
3556           }
3557         }
3558       }
3559       PetscCall(PetscArraycpy(pcbddc->adaptive_constraints_idxs + pcbddc->adaptive_constraints_idxs_ptr[cum], idxs, subset_size));
3560       pcbddc->adaptive_constraints_idxs_ptr[cum + 1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size;
3561       pcbddc->adaptive_constraints_data_ptr[cum + 1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size * B_neigs;
3562       cum++;
3563     }
3564     PetscCall(ISRestoreIndices(sub_schurs->is_subs[i], &idxs));
3565     /* shift for next computation */
3566     cumarray += subset_size * subset_size;
3567   }
3568   if (pcbddc->dbg_flag) PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
3569 
3570   if (mss) {
3571     if (sub_schurs->gdsw) {
3572       PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_all, &Sarray));
3573       PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all, &Starray));
3574     } else {
3575       PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all, &Sarray));
3576       PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all, &Starray));
3577       /* destroy matrices (junk) */
3578       PetscCall(MatDestroy(&sub_schurs->sum_S_Ej_inv_all));
3579       PetscCall(MatDestroy(&sub_schurs->sum_S_Ej_tilda_all));
3580     }
3581   }
3582   if (allocated_S_St) PetscCall(PetscFree2(S, St));
3583   PetscCall(PetscFree5(eigv, eigs, work, B_iwork, B_ifail));
3584 #if defined(PETSC_USE_COMPLEX)
3585   PetscCall(PetscFree(rwork));
3586 #endif
3587   if (pcbddc->dbg_flag) {
3588     PetscInt maxneigs_r;
3589     PetscCall(MPIU_Allreduce(&maxneigs, &maxneigs_r, 1, MPIU_INT, MPI_MAX, PetscObjectComm((PetscObject)pc)));
3590     PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Maximum number of constraints per cc %" PetscInt_FMT "\n", maxneigs_r));
3591   }
3592   PetscCall(PetscLogEventEnd(PC_BDDC_AdaptiveSetUp[pcbddc->current_level], pc, 0, 0, 0));
3593   PetscFunctionReturn(0);
3594 }
3595 
3596 PetscErrorCode PCBDDCSetUpSolvers(PC pc)
3597 {
3598   PetscScalar *coarse_submat_vals;
3599 
3600   PetscFunctionBegin;
3601   /* Setup local scatters R_to_B and (optionally) R_to_D */
3602   /* PCBDDCSetUpLocalWorkVectors should be called first! */
3603   PetscCall(PCBDDCSetUpLocalScatters(pc));
3604 
3605   /* Setup local neumann solver ksp_R */
3606   /* PCBDDCSetUpLocalScatters should be called first! */
3607   PetscCall(PCBDDCSetUpLocalSolvers(pc, PETSC_FALSE, PETSC_TRUE));
3608 
3609   /*
3610      Setup local correction and local part of coarse basis.
3611      Gives back the dense local part of the coarse matrix in column major ordering
3612   */
3613   PetscCall(PCBDDCSetUpCorrection(pc, &coarse_submat_vals));
3614 
3615   /* Compute total number of coarse nodes and setup coarse solver */
3616   PetscCall(PCBDDCSetUpCoarseSolver(pc, coarse_submat_vals));
3617 
3618   /* free */
3619   PetscCall(PetscFree(coarse_submat_vals));
3620   PetscFunctionReturn(0);
3621 }
3622 
3623 PetscErrorCode PCBDDCResetCustomization(PC pc)
3624 {
3625   PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
3626 
3627   PetscFunctionBegin;
3628   PetscCall(ISDestroy(&pcbddc->user_primal_vertices));
3629   PetscCall(ISDestroy(&pcbddc->user_primal_vertices_local));
3630   PetscCall(ISDestroy(&pcbddc->NeumannBoundaries));
3631   PetscCall(ISDestroy(&pcbddc->NeumannBoundariesLocal));
3632   PetscCall(ISDestroy(&pcbddc->DirichletBoundaries));
3633   PetscCall(MatNullSpaceDestroy(&pcbddc->onearnullspace));
3634   PetscCall(PetscFree(pcbddc->onearnullvecs_state));
3635   PetscCall(ISDestroy(&pcbddc->DirichletBoundariesLocal));
3636   PetscCall(PCBDDCSetDofsSplitting(pc, 0, NULL));
3637   PetscCall(PCBDDCSetDofsSplittingLocal(pc, 0, NULL));
3638   PetscFunctionReturn(0);
3639 }
3640 
3641 PetscErrorCode PCBDDCResetTopography(PC pc)
3642 {
3643   PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
3644   PetscInt i;
3645 
3646   PetscFunctionBegin;
3647   PetscCall(MatDestroy(&pcbddc->nedcG));
3648   PetscCall(ISDestroy(&pcbddc->nedclocal));
3649   PetscCall(MatDestroy(&pcbddc->discretegradient));
3650   PetscCall(MatDestroy(&pcbddc->user_ChangeOfBasisMatrix));
3651   PetscCall(MatDestroy(&pcbddc->ChangeOfBasisMatrix));
3652   PetscCall(MatDestroy(&pcbddc->switch_static_change));
3653   PetscCall(VecDestroy(&pcbddc->work_change));
3654   PetscCall(MatDestroy(&pcbddc->ConstraintMatrix));
3655   PetscCall(MatDestroy(&pcbddc->divudotp));
3656   PetscCall(ISDestroy(&pcbddc->divudotp_vl2l));
3657   PetscCall(PCBDDCGraphDestroy(&pcbddc->mat_graph));
3658   for (i = 0; i < pcbddc->n_local_subs; i++) PetscCall(ISDestroy(&pcbddc->local_subs[i]));
3659   pcbddc->n_local_subs = 0;
3660   PetscCall(PetscFree(pcbddc->local_subs));
3661   PetscCall(PCBDDCSubSchursDestroy(&pcbddc->sub_schurs));
3662   pcbddc->graphanalyzed        = PETSC_FALSE;
3663   pcbddc->recompute_topography = PETSC_TRUE;
3664   pcbddc->corner_selected      = PETSC_FALSE;
3665   PetscFunctionReturn(0);
3666 }
3667 
3668 PetscErrorCode PCBDDCResetSolvers(PC pc)
3669 {
3670   PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
3671 
3672   PetscFunctionBegin;
3673   PetscCall(VecDestroy(&pcbddc->coarse_vec));
3674   if (pcbddc->coarse_phi_B) {
3675     PetscScalar *array;
3676     PetscCall(MatDenseGetArray(pcbddc->coarse_phi_B, &array));
3677     PetscCall(PetscFree(array));
3678   }
3679   PetscCall(MatDestroy(&pcbddc->coarse_phi_B));
3680   PetscCall(MatDestroy(&pcbddc->coarse_phi_D));
3681   PetscCall(MatDestroy(&pcbddc->coarse_psi_B));
3682   PetscCall(MatDestroy(&pcbddc->coarse_psi_D));
3683   PetscCall(VecDestroy(&pcbddc->vec1_P));
3684   PetscCall(VecDestroy(&pcbddc->vec1_C));
3685   PetscCall(MatDestroy(&pcbddc->local_auxmat2));
3686   PetscCall(MatDestroy(&pcbddc->local_auxmat1));
3687   PetscCall(VecDestroy(&pcbddc->vec1_R));
3688   PetscCall(VecDestroy(&pcbddc->vec2_R));
3689   PetscCall(ISDestroy(&pcbddc->is_R_local));
3690   PetscCall(VecScatterDestroy(&pcbddc->R_to_B));
3691   PetscCall(VecScatterDestroy(&pcbddc->R_to_D));
3692   PetscCall(VecScatterDestroy(&pcbddc->coarse_loc_to_glob));
3693   PetscCall(KSPReset(pcbddc->ksp_D));
3694   PetscCall(KSPReset(pcbddc->ksp_R));
3695   PetscCall(KSPReset(pcbddc->coarse_ksp));
3696   PetscCall(MatDestroy(&pcbddc->local_mat));
3697   PetscCall(PetscFree(pcbddc->primal_indices_local_idxs));
3698   PetscCall(PetscFree2(pcbddc->local_primal_ref_node, pcbddc->local_primal_ref_mult));
3699   PetscCall(PetscFree(pcbddc->global_primal_indices));
3700   PetscCall(ISDestroy(&pcbddc->coarse_subassembling));
3701   PetscCall(MatDestroy(&pcbddc->benign_change));
3702   PetscCall(VecDestroy(&pcbddc->benign_vec));
3703   PetscCall(PCBDDCBenignShellMat(pc, PETSC_TRUE));
3704   PetscCall(MatDestroy(&pcbddc->benign_B0));
3705   PetscCall(PetscSFDestroy(&pcbddc->benign_sf));
3706   if (pcbddc->benign_zerodiag_subs) {
3707     PetscInt i;
3708     for (i = 0; i < pcbddc->benign_n; i++) PetscCall(ISDestroy(&pcbddc->benign_zerodiag_subs[i]));
3709     PetscCall(PetscFree(pcbddc->benign_zerodiag_subs));
3710   }
3711   PetscCall(PetscFree3(pcbddc->benign_p0_lidx, pcbddc->benign_p0_gidx, pcbddc->benign_p0));
3712   PetscFunctionReturn(0);
3713 }
3714 
3715 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc)
3716 {
3717   PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
3718   PC_IS   *pcis   = (PC_IS *)pc->data;
3719   VecType  impVecType;
3720   PetscInt n_constraints, n_R, old_size;
3721 
3722   PetscFunctionBegin;
3723   n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices;
3724   n_R           = pcis->n - pcbddc->n_vertices;
3725   PetscCall(VecGetType(pcis->vec1_N, &impVecType));
3726   /* local work vectors (try to avoid unneeded work)*/
3727   /* R nodes */
3728   old_size = -1;
3729   if (pcbddc->vec1_R) PetscCall(VecGetSize(pcbddc->vec1_R, &old_size));
3730   if (n_R != old_size) {
3731     PetscCall(VecDestroy(&pcbddc->vec1_R));
3732     PetscCall(VecDestroy(&pcbddc->vec2_R));
3733     PetscCall(VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N), &pcbddc->vec1_R));
3734     PetscCall(VecSetSizes(pcbddc->vec1_R, PETSC_DECIDE, n_R));
3735     PetscCall(VecSetType(pcbddc->vec1_R, impVecType));
3736     PetscCall(VecDuplicate(pcbddc->vec1_R, &pcbddc->vec2_R));
3737   }
3738   /* local primal dofs */
3739   old_size = -1;
3740   if (pcbddc->vec1_P) PetscCall(VecGetSize(pcbddc->vec1_P, &old_size));
3741   if (pcbddc->local_primal_size != old_size) {
3742     PetscCall(VecDestroy(&pcbddc->vec1_P));
3743     PetscCall(VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N), &pcbddc->vec1_P));
3744     PetscCall(VecSetSizes(pcbddc->vec1_P, PETSC_DECIDE, pcbddc->local_primal_size));
3745     PetscCall(VecSetType(pcbddc->vec1_P, impVecType));
3746   }
3747   /* local explicit constraints */
3748   old_size = -1;
3749   if (pcbddc->vec1_C) PetscCall(VecGetSize(pcbddc->vec1_C, &old_size));
3750   if (n_constraints && n_constraints != old_size) {
3751     PetscCall(VecDestroy(&pcbddc->vec1_C));
3752     PetscCall(VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N), &pcbddc->vec1_C));
3753     PetscCall(VecSetSizes(pcbddc->vec1_C, PETSC_DECIDE, n_constraints));
3754     PetscCall(VecSetType(pcbddc->vec1_C, impVecType));
3755   }
3756   PetscFunctionReturn(0);
3757 }
3758 
3759 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n)
3760 {
3761   /* pointers to pcis and pcbddc */
3762   PC_IS          *pcis       = (PC_IS *)pc->data;
3763   PC_BDDC        *pcbddc     = (PC_BDDC *)pc->data;
3764   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
3765   /* submatrices of local problem */
3766   Mat A_RV, A_VR, A_VV, local_auxmat2_R;
3767   /* submatrices of local coarse problem */
3768   Mat S_VV, S_CV, S_VC, S_CC;
3769   /* working matrices */
3770   Mat C_CR;
3771   /* additional working stuff */
3772   PC           pc_R;
3773   Mat          F, Brhs = NULL;
3774   Vec          dummy_vec;
3775   PetscBool    isLU, isCHOL, need_benign_correction, sparserhs;
3776   PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */
3777   PetscScalar *work;
3778   PetscInt    *idx_V_B;
3779   PetscInt     lda_rhs, n, n_vertices, n_constraints, *p0_lidx_I;
3780   PetscInt     i, n_R, n_D, n_B;
3781   PetscScalar  one = 1.0, m_one = -1.0;
3782 
3783   PetscFunctionBegin;
3784   PetscCheck(pcbddc->symmetric_primal || !pcbddc->benign_n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Non-symmetric primal basis computation with benign trick not yet implemented");
3785   PetscCall(PetscLogEventBegin(PC_BDDC_CorrectionSetUp[pcbddc->current_level], pc, 0, 0, 0));
3786 
3787   /* Set Non-overlapping dimensions */
3788   n_vertices    = pcbddc->n_vertices;
3789   n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices;
3790   n_B           = pcis->n_B;
3791   n_D           = pcis->n - n_B;
3792   n_R           = pcis->n - n_vertices;
3793 
3794   /* vertices in boundary numbering */
3795   PetscCall(PetscMalloc1(n_vertices, &idx_V_B));
3796   PetscCall(ISGlobalToLocalMappingApply(pcis->BtoNmap, IS_GTOLM_DROP, n_vertices, pcbddc->local_primal_ref_node, &i, idx_V_B));
3797   PetscCheck(i == n_vertices, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error in boundary numbering for BDDC vertices! %" PetscInt_FMT " != %" PetscInt_FMT, n_vertices, i);
3798 
3799   /* Subdomain contribution (Non-overlapping) to coarse matrix  */
3800   PetscCall(PetscCalloc1(pcbddc->local_primal_size * pcbddc->local_primal_size, &coarse_submat_vals));
3801   PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_vertices, n_vertices, coarse_submat_vals, &S_VV));
3802   PetscCall(MatDenseSetLDA(S_VV, pcbddc->local_primal_size));
3803   PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_constraints, n_vertices, coarse_submat_vals + n_vertices, &S_CV));
3804   PetscCall(MatDenseSetLDA(S_CV, pcbddc->local_primal_size));
3805   PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_vertices, n_constraints, coarse_submat_vals + pcbddc->local_primal_size * n_vertices, &S_VC));
3806   PetscCall(MatDenseSetLDA(S_VC, pcbddc->local_primal_size));
3807   PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_constraints, n_constraints, coarse_submat_vals + (pcbddc->local_primal_size + 1) * n_vertices, &S_CC));
3808   PetscCall(MatDenseSetLDA(S_CC, pcbddc->local_primal_size));
3809 
3810   /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */
3811   PetscCall(KSPGetPC(pcbddc->ksp_R, &pc_R));
3812   PetscCall(PCSetUp(pc_R));
3813   PetscCall(PetscObjectTypeCompare((PetscObject)pc_R, PCLU, &isLU));
3814   PetscCall(PetscObjectTypeCompare((PetscObject)pc_R, PCCHOLESKY, &isCHOL));
3815   lda_rhs                = n_R;
3816   need_benign_correction = PETSC_FALSE;
3817   if (isLU || isCHOL) {
3818     PetscCall(PCFactorGetMatrix(pc_R, &F));
3819   } else if (sub_schurs && sub_schurs->reuse_solver) {
3820     PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
3821     MatFactorType      type;
3822 
3823     F = reuse_solver->F;
3824     PetscCall(MatGetFactorType(F, &type));
3825     if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE;
3826     if (type == MAT_FACTOR_LU) isLU = PETSC_TRUE;
3827     PetscCall(MatGetSize(F, &lda_rhs, NULL));
3828     need_benign_correction = (PetscBool)(!!reuse_solver->benign_n);
3829   } else F = NULL;
3830 
3831   /* determine if we can use a sparse right-hand side */
3832   sparserhs = PETSC_FALSE;
3833   if (F) {
3834     MatSolverType solver;
3835 
3836     PetscCall(MatFactorGetSolverType(F, &solver));
3837     PetscCall(PetscStrcmp(solver, MATSOLVERMUMPS, &sparserhs));
3838   }
3839 
3840   /* allocate workspace */
3841   n = 0;
3842   if (n_constraints) n += lda_rhs * n_constraints;
3843   if (n_vertices) {
3844     n = PetscMax(2 * lda_rhs * n_vertices, n);
3845     n = PetscMax((lda_rhs + n_B) * n_vertices, n);
3846   }
3847   if (!pcbddc->symmetric_primal) n = PetscMax(2 * lda_rhs * pcbddc->local_primal_size, n);
3848   PetscCall(PetscMalloc1(n, &work));
3849 
3850   /* create dummy vector to modify rhs and sol of MatMatSolve (work array will never be used) */
3851   dummy_vec = NULL;
3852   if (need_benign_correction && lda_rhs != n_R && F) {
3853     PetscCall(VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N), &dummy_vec));
3854     PetscCall(VecSetSizes(dummy_vec, lda_rhs, PETSC_DECIDE));
3855     PetscCall(VecSetType(dummy_vec, ((PetscObject)pcis->vec1_N)->type_name));
3856   }
3857 
3858   PetscCall(MatDestroy(&pcbddc->local_auxmat1));
3859   PetscCall(MatDestroy(&pcbddc->local_auxmat2));
3860 
3861   /* Precompute stuffs needed for preprocessing and application of BDDC*/
3862   if (n_constraints) {
3863     Mat M3, C_B;
3864     IS  is_aux;
3865 
3866     /* Extract constraints on R nodes: C_{CR}  */
3867     PetscCall(ISCreateStride(PETSC_COMM_SELF, n_constraints, n_vertices, 1, &is_aux));
3868     PetscCall(MatCreateSubMatrix(pcbddc->ConstraintMatrix, is_aux, pcbddc->is_R_local, MAT_INITIAL_MATRIX, &C_CR));
3869     PetscCall(MatCreateSubMatrix(pcbddc->ConstraintMatrix, is_aux, pcis->is_B_local, MAT_INITIAL_MATRIX, &C_B));
3870 
3871     /* Assemble         local_auxmat2_R =        (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */
3872     /* Assemble pcbddc->local_auxmat2   = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */
3873     if (!sparserhs) {
3874       PetscCall(PetscArrayzero(work, lda_rhs * n_constraints));
3875       for (i = 0; i < n_constraints; i++) {
3876         const PetscScalar *row_cmat_values;
3877         const PetscInt    *row_cmat_indices;
3878         PetscInt           size_of_constraint, j;
3879 
3880         PetscCall(MatGetRow(C_CR, i, &size_of_constraint, &row_cmat_indices, &row_cmat_values));
3881         for (j = 0; j < size_of_constraint; j++) work[row_cmat_indices[j] + i * lda_rhs] = -row_cmat_values[j];
3882         PetscCall(MatRestoreRow(C_CR, i, &size_of_constraint, &row_cmat_indices, &row_cmat_values));
3883       }
3884       PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, lda_rhs, n_constraints, work, &Brhs));
3885     } else {
3886       Mat tC_CR;
3887 
3888       PetscCall(MatScale(C_CR, -1.0));
3889       if (lda_rhs != n_R) {
3890         PetscScalar *aa;
3891         PetscInt     r, *ii, *jj;
3892         PetscBool    done;
3893 
3894         PetscCall(MatGetRowIJ(C_CR, 0, PETSC_FALSE, PETSC_FALSE, &r, (const PetscInt **)&ii, (const PetscInt **)&jj, &done));
3895         PetscCheck(done, PETSC_COMM_SELF, PETSC_ERR_PLIB, "GetRowIJ failed");
3896         PetscCall(MatSeqAIJGetArray(C_CR, &aa));
3897         PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, n_constraints, lda_rhs, ii, jj, aa, &tC_CR));
3898         PetscCall(MatRestoreRowIJ(C_CR, 0, PETSC_FALSE, PETSC_FALSE, &r, (const PetscInt **)&ii, (const PetscInt **)&jj, &done));
3899         PetscCheck(done, PETSC_COMM_SELF, PETSC_ERR_PLIB, "RestoreRowIJ failed");
3900       } else {
3901         PetscCall(PetscObjectReference((PetscObject)C_CR));
3902         tC_CR = C_CR;
3903       }
3904       PetscCall(MatCreateTranspose(tC_CR, &Brhs));
3905       PetscCall(MatDestroy(&tC_CR));
3906     }
3907     PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, lda_rhs, n_constraints, NULL, &local_auxmat2_R));
3908     if (F) {
3909       if (need_benign_correction) {
3910         PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
3911 
3912         /* rhs is already zero on interior dofs, no need to change the rhs */
3913         PetscCall(PetscArrayzero(reuse_solver->benign_save_vals, pcbddc->benign_n));
3914       }
3915       PetscCall(MatMatSolve(F, Brhs, local_auxmat2_R));
3916       if (need_benign_correction) {
3917         PetscScalar       *marr;
3918         PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
3919 
3920         PetscCall(MatDenseGetArray(local_auxmat2_R, &marr));
3921         if (lda_rhs != n_R) {
3922           for (i = 0; i < n_constraints; i++) {
3923             PetscCall(VecPlaceArray(dummy_vec, marr + i * lda_rhs));
3924             PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver, dummy_vec, NULL, PETSC_TRUE, PETSC_TRUE));
3925             PetscCall(VecResetArray(dummy_vec));
3926           }
3927         } else {
3928           for (i = 0; i < n_constraints; i++) {
3929             PetscCall(VecPlaceArray(pcbddc->vec1_R, marr + i * lda_rhs));
3930             PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver, pcbddc->vec1_R, NULL, PETSC_TRUE, PETSC_TRUE));
3931             PetscCall(VecResetArray(pcbddc->vec1_R));
3932           }
3933         }
3934         PetscCall(MatDenseRestoreArray(local_auxmat2_R, &marr));
3935       }
3936     } else {
3937       PetscScalar *marr;
3938 
3939       PetscCall(MatDenseGetArray(local_auxmat2_R, &marr));
3940       for (i = 0; i < n_constraints; i++) {
3941         PetscCall(VecPlaceArray(pcbddc->vec1_R, work + i * lda_rhs));
3942         PetscCall(VecPlaceArray(pcbddc->vec2_R, marr + i * lda_rhs));
3943         PetscCall(KSPSolve(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec2_R));
3944         PetscCall(KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec2_R));
3945         PetscCall(VecResetArray(pcbddc->vec1_R));
3946         PetscCall(VecResetArray(pcbddc->vec2_R));
3947       }
3948       PetscCall(MatDenseRestoreArray(local_auxmat2_R, &marr));
3949     }
3950     if (sparserhs) PetscCall(MatScale(C_CR, -1.0));
3951     PetscCall(MatDestroy(&Brhs));
3952     if (!pcbddc->switch_static) {
3953       PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_B, n_constraints, NULL, &pcbddc->local_auxmat2));
3954       for (i = 0; i < n_constraints; i++) {
3955         Vec r, b;
3956         PetscCall(MatDenseGetColumnVecRead(local_auxmat2_R, i, &r));
3957         PetscCall(MatDenseGetColumnVec(pcbddc->local_auxmat2, i, &b));
3958         PetscCall(VecScatterBegin(pcbddc->R_to_B, r, b, INSERT_VALUES, SCATTER_FORWARD));
3959         PetscCall(VecScatterEnd(pcbddc->R_to_B, r, b, INSERT_VALUES, SCATTER_FORWARD));
3960         PetscCall(MatDenseRestoreColumnVec(pcbddc->local_auxmat2, i, &b));
3961         PetscCall(MatDenseRestoreColumnVecRead(local_auxmat2_R, i, &r));
3962       }
3963       PetscCall(MatMatMult(C_B, pcbddc->local_auxmat2, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &M3));
3964     } else {
3965       if (lda_rhs != n_R) {
3966         IS dummy;
3967 
3968         PetscCall(ISCreateStride(PETSC_COMM_SELF, n_R, 0, 1, &dummy));
3969         PetscCall(MatCreateSubMatrix(local_auxmat2_R, dummy, NULL, MAT_INITIAL_MATRIX, &pcbddc->local_auxmat2));
3970         PetscCall(ISDestroy(&dummy));
3971       } else {
3972         PetscCall(PetscObjectReference((PetscObject)local_auxmat2_R));
3973         pcbddc->local_auxmat2 = local_auxmat2_R;
3974       }
3975       PetscCall(MatMatMult(C_CR, pcbddc->local_auxmat2, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &M3));
3976     }
3977     PetscCall(ISDestroy(&is_aux));
3978     /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR})^{-1}  */
3979     PetscCall(MatScale(M3, m_one));
3980     if (isCHOL) {
3981       PetscCall(MatCholeskyFactor(M3, NULL, NULL));
3982     } else {
3983       PetscCall(MatLUFactor(M3, NULL, NULL, NULL));
3984     }
3985     PetscCall(MatSeqDenseInvertFactors_Private(M3));
3986     /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */
3987     PetscCall(MatMatMult(M3, C_B, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &pcbddc->local_auxmat1));
3988     PetscCall(MatDestroy(&C_B));
3989     PetscCall(MatCopy(M3, S_CC, SAME_NONZERO_PATTERN)); /* S_CC can have a different LDA, MatMatSolve doesn't support it */
3990     PetscCall(MatDestroy(&M3));
3991   }
3992 
3993   /* Get submatrices from subdomain matrix */
3994   if (n_vertices) {
3995 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
3996     PetscBool oldpin;
3997 #endif
3998     PetscBool isaij;
3999     IS        is_aux;
4000 
4001     if (sub_schurs && sub_schurs->reuse_solver) { /* is_R_local is not sorted, ISComplement doesn't like it */
4002       IS tis;
4003 
4004       PetscCall(ISDuplicate(pcbddc->is_R_local, &tis));
4005       PetscCall(ISSort(tis));
4006       PetscCall(ISComplement(tis, 0, pcis->n, &is_aux));
4007       PetscCall(ISDestroy(&tis));
4008     } else {
4009       PetscCall(ISComplement(pcbddc->is_R_local, 0, pcis->n, &is_aux));
4010     }
4011 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
4012     oldpin = pcbddc->local_mat->boundtocpu;
4013 #endif
4014     PetscCall(MatBindToCPU(pcbddc->local_mat, PETSC_TRUE));
4015     PetscCall(MatCreateSubMatrix(pcbddc->local_mat, pcbddc->is_R_local, is_aux, MAT_INITIAL_MATRIX, &A_RV));
4016     PetscCall(MatCreateSubMatrix(pcbddc->local_mat, is_aux, pcbddc->is_R_local, MAT_INITIAL_MATRIX, &A_VR));
4017     PetscCall(PetscObjectBaseTypeCompare((PetscObject)A_VR, MATSEQAIJ, &isaij));
4018     if (!isaij) { /* TODO REMOVE: MatMatMult(A_VR,A_RRmA_RV) below may raise an error */
4019       PetscCall(MatConvert(A_VR, MATSEQAIJ, MAT_INPLACE_MATRIX, &A_VR));
4020     }
4021     PetscCall(MatCreateSubMatrix(pcbddc->local_mat, is_aux, is_aux, MAT_INITIAL_MATRIX, &A_VV));
4022 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
4023     PetscCall(MatBindToCPU(pcbddc->local_mat, oldpin));
4024 #endif
4025     PetscCall(ISDestroy(&is_aux));
4026   }
4027 
4028   /* Matrix of coarse basis functions (local) */
4029   if (pcbddc->coarse_phi_B) {
4030     PetscInt on_B, on_primal, on_D = n_D;
4031     if (pcbddc->coarse_phi_D) PetscCall(MatGetSize(pcbddc->coarse_phi_D, &on_D, NULL));
4032     PetscCall(MatGetSize(pcbddc->coarse_phi_B, &on_B, &on_primal));
4033     if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) {
4034       PetscScalar *marray;
4035 
4036       PetscCall(MatDenseGetArray(pcbddc->coarse_phi_B, &marray));
4037       PetscCall(PetscFree(marray));
4038       PetscCall(MatDestroy(&pcbddc->coarse_phi_B));
4039       PetscCall(MatDestroy(&pcbddc->coarse_psi_B));
4040       PetscCall(MatDestroy(&pcbddc->coarse_phi_D));
4041       PetscCall(MatDestroy(&pcbddc->coarse_psi_D));
4042     }
4043   }
4044 
4045   if (!pcbddc->coarse_phi_B) {
4046     PetscScalar *marr;
4047 
4048     /* memory size */
4049     n = n_B * pcbddc->local_primal_size;
4050     if (pcbddc->switch_static || pcbddc->dbg_flag) n += n_D * pcbddc->local_primal_size;
4051     if (!pcbddc->symmetric_primal) n *= 2;
4052     PetscCall(PetscCalloc1(n, &marr));
4053     PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_B, pcbddc->local_primal_size, marr, &pcbddc->coarse_phi_B));
4054     marr += n_B * pcbddc->local_primal_size;
4055     if (pcbddc->switch_static || pcbddc->dbg_flag) {
4056       PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_D, pcbddc->local_primal_size, marr, &pcbddc->coarse_phi_D));
4057       marr += n_D * pcbddc->local_primal_size;
4058     }
4059     if (!pcbddc->symmetric_primal) {
4060       PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_B, pcbddc->local_primal_size, marr, &pcbddc->coarse_psi_B));
4061       marr += n_B * pcbddc->local_primal_size;
4062       if (pcbddc->switch_static || pcbddc->dbg_flag) PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_D, pcbddc->local_primal_size, marr, &pcbddc->coarse_psi_D));
4063     } else {
4064       PetscCall(PetscObjectReference((PetscObject)pcbddc->coarse_phi_B));
4065       pcbddc->coarse_psi_B = pcbddc->coarse_phi_B;
4066       if (pcbddc->switch_static || pcbddc->dbg_flag) {
4067         PetscCall(PetscObjectReference((PetscObject)pcbddc->coarse_phi_D));
4068         pcbddc->coarse_psi_D = pcbddc->coarse_phi_D;
4069       }
4070     }
4071   }
4072 
4073   /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */
4074   p0_lidx_I = NULL;
4075   if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) {
4076     const PetscInt *idxs;
4077 
4078     PetscCall(ISGetIndices(pcis->is_I_local, &idxs));
4079     PetscCall(PetscMalloc1(pcbddc->benign_n, &p0_lidx_I));
4080     for (i = 0; i < pcbddc->benign_n; i++) PetscCall(PetscFindInt(pcbddc->benign_p0_lidx[i], pcis->n - pcis->n_B, idxs, &p0_lidx_I[i]));
4081     PetscCall(ISRestoreIndices(pcis->is_I_local, &idxs));
4082   }
4083 
4084   /* vertices */
4085   if (n_vertices) {
4086     PetscBool restoreavr = PETSC_FALSE;
4087 
4088     PetscCall(MatConvert(A_VV, MATDENSE, MAT_INPLACE_MATRIX, &A_VV));
4089 
4090     if (n_R) {
4091       Mat                A_RRmA_RV, A_RV_bcorr = NULL, S_VVt; /* S_VVt with LDA=N */
4092       PetscBLASInt       B_N, B_one            = 1;
4093       const PetscScalar *x;
4094       PetscScalar       *y;
4095 
4096       PetscCall(MatScale(A_RV, m_one));
4097       if (need_benign_correction) {
4098         ISLocalToGlobalMapping RtoN;
4099         IS                     is_p0;
4100         PetscInt              *idxs_p0, n;
4101 
4102         PetscCall(PetscMalloc1(pcbddc->benign_n, &idxs_p0));
4103         PetscCall(ISLocalToGlobalMappingCreateIS(pcbddc->is_R_local, &RtoN));
4104         PetscCall(ISGlobalToLocalMappingApply(RtoN, IS_GTOLM_DROP, pcbddc->benign_n, pcbddc->benign_p0_lidx, &n, idxs_p0));
4105         PetscCheck(n == pcbddc->benign_n, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error in R numbering for benign p0! %" PetscInt_FMT " != %" PetscInt_FMT, n, pcbddc->benign_n);
4106         PetscCall(ISLocalToGlobalMappingDestroy(&RtoN));
4107         PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idxs_p0, PETSC_OWN_POINTER, &is_p0));
4108         PetscCall(MatCreateSubMatrix(A_RV, is_p0, NULL, MAT_INITIAL_MATRIX, &A_RV_bcorr));
4109         PetscCall(ISDestroy(&is_p0));
4110       }
4111 
4112       PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, lda_rhs, n_vertices, work, &A_RRmA_RV));
4113       if (!sparserhs || need_benign_correction) {
4114         if (lda_rhs == n_R) {
4115           PetscCall(MatConvert(A_RV, MATDENSE, MAT_INPLACE_MATRIX, &A_RV));
4116         } else {
4117           PetscScalar    *av, *array;
4118           const PetscInt *xadj, *adjncy;
4119           PetscInt        n;
4120           PetscBool       flg_row;
4121 
4122           array = work + lda_rhs * n_vertices;
4123           PetscCall(PetscArrayzero(array, lda_rhs * n_vertices));
4124           PetscCall(MatConvert(A_RV, MATSEQAIJ, MAT_INPLACE_MATRIX, &A_RV));
4125           PetscCall(MatGetRowIJ(A_RV, 0, PETSC_FALSE, PETSC_FALSE, &n, &xadj, &adjncy, &flg_row));
4126           PetscCall(MatSeqAIJGetArray(A_RV, &av));
4127           for (i = 0; i < n; i++) {
4128             PetscInt j;
4129             for (j = xadj[i]; j < xadj[i + 1]; j++) array[lda_rhs * adjncy[j] + i] = av[j];
4130           }
4131           PetscCall(MatRestoreRowIJ(A_RV, 0, PETSC_FALSE, PETSC_FALSE, &n, &xadj, &adjncy, &flg_row));
4132           PetscCall(MatDestroy(&A_RV));
4133           PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, lda_rhs, n_vertices, array, &A_RV));
4134         }
4135         if (need_benign_correction) {
4136           PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
4137           PetscScalar       *marr;
4138 
4139           PetscCall(MatDenseGetArray(A_RV, &marr));
4140           /* need \Phi^T A_RV = (I+L)A_RV, L given by
4141 
4142                  | 0 0  0 | (V)
4143              L = | 0 0 -1 | (P-p0)
4144                  | 0 0 -1 | (p0)
4145 
4146           */
4147           for (i = 0; i < reuse_solver->benign_n; i++) {
4148             const PetscScalar *vals;
4149             const PetscInt    *idxs, *idxs_zero;
4150             PetscInt           n, j, nz;
4151 
4152             PetscCall(ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i], &nz));
4153             PetscCall(ISGetIndices(reuse_solver->benign_zerodiag_subs[i], &idxs_zero));
4154             PetscCall(MatGetRow(A_RV_bcorr, i, &n, &idxs, &vals));
4155             for (j = 0; j < n; j++) {
4156               PetscScalar val = vals[j];
4157               PetscInt    k, col = idxs[j];
4158               for (k = 0; k < nz; k++) marr[idxs_zero[k] + lda_rhs * col] -= val;
4159             }
4160             PetscCall(MatRestoreRow(A_RV_bcorr, i, &n, &idxs, &vals));
4161             PetscCall(ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i], &idxs_zero));
4162           }
4163           PetscCall(MatDenseRestoreArray(A_RV, &marr));
4164         }
4165         PetscCall(PetscObjectReference((PetscObject)A_RV));
4166         Brhs = A_RV;
4167       } else {
4168         Mat tA_RVT, A_RVT;
4169 
4170         if (!pcbddc->symmetric_primal) {
4171           /* A_RV already scaled by -1 */
4172           PetscCall(MatTranspose(A_RV, MAT_INITIAL_MATRIX, &A_RVT));
4173         } else {
4174           restoreavr = PETSC_TRUE;
4175           PetscCall(MatScale(A_VR, -1.0));
4176           PetscCall(PetscObjectReference((PetscObject)A_VR));
4177           A_RVT = A_VR;
4178         }
4179         if (lda_rhs != n_R) {
4180           PetscScalar *aa;
4181           PetscInt     r, *ii, *jj;
4182           PetscBool    done;
4183 
4184           PetscCall(MatGetRowIJ(A_RVT, 0, PETSC_FALSE, PETSC_FALSE, &r, (const PetscInt **)&ii, (const PetscInt **)&jj, &done));
4185           PetscCheck(done, PETSC_COMM_SELF, PETSC_ERR_PLIB, "GetRowIJ failed");
4186           PetscCall(MatSeqAIJGetArray(A_RVT, &aa));
4187           PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, n_vertices, lda_rhs, ii, jj, aa, &tA_RVT));
4188           PetscCall(MatRestoreRowIJ(A_RVT, 0, PETSC_FALSE, PETSC_FALSE, &r, (const PetscInt **)&ii, (const PetscInt **)&jj, &done));
4189           PetscCheck(done, PETSC_COMM_SELF, PETSC_ERR_PLIB, "RestoreRowIJ failed");
4190         } else {
4191           PetscCall(PetscObjectReference((PetscObject)A_RVT));
4192           tA_RVT = A_RVT;
4193         }
4194         PetscCall(MatCreateTranspose(tA_RVT, &Brhs));
4195         PetscCall(MatDestroy(&tA_RVT));
4196         PetscCall(MatDestroy(&A_RVT));
4197       }
4198       if (F) {
4199         /* need to correct the rhs */
4200         if (need_benign_correction) {
4201           PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
4202           PetscScalar       *marr;
4203 
4204           PetscCall(MatDenseGetArray(Brhs, &marr));
4205           if (lda_rhs != n_R) {
4206             for (i = 0; i < n_vertices; i++) {
4207               PetscCall(VecPlaceArray(dummy_vec, marr + i * lda_rhs));
4208               PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver, dummy_vec, NULL, PETSC_FALSE, PETSC_TRUE));
4209               PetscCall(VecResetArray(dummy_vec));
4210             }
4211           } else {
4212             for (i = 0; i < n_vertices; i++) {
4213               PetscCall(VecPlaceArray(pcbddc->vec1_R, marr + i * lda_rhs));
4214               PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver, pcbddc->vec1_R, NULL, PETSC_FALSE, PETSC_TRUE));
4215               PetscCall(VecResetArray(pcbddc->vec1_R));
4216             }
4217           }
4218           PetscCall(MatDenseRestoreArray(Brhs, &marr));
4219         }
4220         PetscCall(MatMatSolve(F, Brhs, A_RRmA_RV));
4221         if (restoreavr) PetscCall(MatScale(A_VR, -1.0));
4222         /* need to correct the solution */
4223         if (need_benign_correction) {
4224           PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
4225           PetscScalar       *marr;
4226 
4227           PetscCall(MatDenseGetArray(A_RRmA_RV, &marr));
4228           if (lda_rhs != n_R) {
4229             for (i = 0; i < n_vertices; i++) {
4230               PetscCall(VecPlaceArray(dummy_vec, marr + i * lda_rhs));
4231               PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver, dummy_vec, NULL, PETSC_TRUE, PETSC_TRUE));
4232               PetscCall(VecResetArray(dummy_vec));
4233             }
4234           } else {
4235             for (i = 0; i < n_vertices; i++) {
4236               PetscCall(VecPlaceArray(pcbddc->vec1_R, marr + i * lda_rhs));
4237               PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver, pcbddc->vec1_R, NULL, PETSC_TRUE, PETSC_TRUE));
4238               PetscCall(VecResetArray(pcbddc->vec1_R));
4239             }
4240           }
4241           PetscCall(MatDenseRestoreArray(A_RRmA_RV, &marr));
4242         }
4243       } else {
4244         PetscCall(MatDenseGetArray(Brhs, &y));
4245         for (i = 0; i < n_vertices; i++) {
4246           PetscCall(VecPlaceArray(pcbddc->vec1_R, y + i * lda_rhs));
4247           PetscCall(VecPlaceArray(pcbddc->vec2_R, work + i * lda_rhs));
4248           PetscCall(KSPSolve(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec2_R));
4249           PetscCall(KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec2_R));
4250           PetscCall(VecResetArray(pcbddc->vec1_R));
4251           PetscCall(VecResetArray(pcbddc->vec2_R));
4252         }
4253         PetscCall(MatDenseRestoreArray(Brhs, &y));
4254       }
4255       PetscCall(MatDestroy(&A_RV));
4256       PetscCall(MatDestroy(&Brhs));
4257       /* S_VV and S_CV */
4258       if (n_constraints) {
4259         Mat B;
4260 
4261         PetscCall(PetscArrayzero(work + lda_rhs * n_vertices, n_B * n_vertices));
4262         for (i = 0; i < n_vertices; i++) {
4263           PetscCall(VecPlaceArray(pcbddc->vec1_R, work + i * lda_rhs));
4264           PetscCall(VecPlaceArray(pcis->vec1_B, work + lda_rhs * n_vertices + i * n_B));
4265           PetscCall(VecScatterBegin(pcbddc->R_to_B, pcbddc->vec1_R, pcis->vec1_B, INSERT_VALUES, SCATTER_FORWARD));
4266           PetscCall(VecScatterEnd(pcbddc->R_to_B, pcbddc->vec1_R, pcis->vec1_B, INSERT_VALUES, SCATTER_FORWARD));
4267           PetscCall(VecResetArray(pcis->vec1_B));
4268           PetscCall(VecResetArray(pcbddc->vec1_R));
4269         }
4270         PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_B, n_vertices, work + lda_rhs * n_vertices, &B));
4271         /* Reuse dense S_C = pcbddc->local_auxmat1 * B */
4272         PetscCall(MatProductCreateWithMat(pcbddc->local_auxmat1, B, NULL, S_CV));
4273         PetscCall(MatProductSetType(S_CV, MATPRODUCT_AB));
4274         PetscCall(MatProductSetFromOptions(S_CV));
4275         PetscCall(MatProductSymbolic(S_CV));
4276         PetscCall(MatProductNumeric(S_CV));
4277         PetscCall(MatProductClear(S_CV));
4278 
4279         PetscCall(MatDestroy(&B));
4280         PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, lda_rhs, n_vertices, work + lda_rhs * n_vertices, &B));
4281         /* Reuse B = local_auxmat2_R * S_CV */
4282         PetscCall(MatProductCreateWithMat(local_auxmat2_R, S_CV, NULL, B));
4283         PetscCall(MatProductSetType(B, MATPRODUCT_AB));
4284         PetscCall(MatProductSetFromOptions(B));
4285         PetscCall(MatProductSymbolic(B));
4286         PetscCall(MatProductNumeric(B));
4287 
4288         PetscCall(MatScale(S_CV, m_one));
4289         PetscCall(PetscBLASIntCast(lda_rhs * n_vertices, &B_N));
4290         PetscCallBLAS("BLASaxpy", BLASaxpy_(&B_N, &one, work + lda_rhs * n_vertices, &B_one, work, &B_one));
4291         PetscCall(MatDestroy(&B));
4292       }
4293       if (lda_rhs != n_R) {
4294         PetscCall(MatDestroy(&A_RRmA_RV));
4295         PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_R, n_vertices, work, &A_RRmA_RV));
4296         PetscCall(MatDenseSetLDA(A_RRmA_RV, lda_rhs));
4297       }
4298       PetscCall(MatMatMult(A_VR, A_RRmA_RV, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &S_VVt));
4299       /* need A_VR * \Phi * A_RRmA_RV = A_VR * (I+L)^T * A_RRmA_RV, L given as before */
4300       if (need_benign_correction) {
4301         PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
4302         PetscScalar       *marr, *sums;
4303 
4304         PetscCall(PetscMalloc1(n_vertices, &sums));
4305         PetscCall(MatDenseGetArray(S_VVt, &marr));
4306         for (i = 0; i < reuse_solver->benign_n; i++) {
4307           const PetscScalar *vals;
4308           const PetscInt    *idxs, *idxs_zero;
4309           PetscInt           n, j, nz;
4310 
4311           PetscCall(ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i], &nz));
4312           PetscCall(ISGetIndices(reuse_solver->benign_zerodiag_subs[i], &idxs_zero));
4313           for (j = 0; j < n_vertices; j++) {
4314             PetscInt k;
4315             sums[j] = 0.;
4316             for (k = 0; k < nz; k++) sums[j] += work[idxs_zero[k] + j * lda_rhs];
4317           }
4318           PetscCall(MatGetRow(A_RV_bcorr, i, &n, &idxs, &vals));
4319           for (j = 0; j < n; j++) {
4320             PetscScalar val = vals[j];
4321             PetscInt    k;
4322             for (k = 0; k < n_vertices; k++) marr[idxs[j] + k * n_vertices] += val * sums[k];
4323           }
4324           PetscCall(MatRestoreRow(A_RV_bcorr, i, &n, &idxs, &vals));
4325           PetscCall(ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i], &idxs_zero));
4326         }
4327         PetscCall(PetscFree(sums));
4328         PetscCall(MatDenseRestoreArray(S_VVt, &marr));
4329         PetscCall(MatDestroy(&A_RV_bcorr));
4330       }
4331       PetscCall(MatDestroy(&A_RRmA_RV));
4332       PetscCall(PetscBLASIntCast(n_vertices * n_vertices, &B_N));
4333       PetscCall(MatDenseGetArrayRead(A_VV, &x));
4334       PetscCall(MatDenseGetArray(S_VVt, &y));
4335       PetscCallBLAS("BLASaxpy", BLASaxpy_(&B_N, &one, x, &B_one, y, &B_one));
4336       PetscCall(MatDenseRestoreArrayRead(A_VV, &x));
4337       PetscCall(MatDenseRestoreArray(S_VVt, &y));
4338       PetscCall(MatCopy(S_VVt, S_VV, SAME_NONZERO_PATTERN));
4339       PetscCall(MatDestroy(&S_VVt));
4340     } else {
4341       PetscCall(MatCopy(A_VV, S_VV, SAME_NONZERO_PATTERN));
4342     }
4343     PetscCall(MatDestroy(&A_VV));
4344 
4345     /* coarse basis functions */
4346     for (i = 0; i < n_vertices; i++) {
4347       Vec         v;
4348       PetscScalar one = 1.0, zero = 0.0;
4349 
4350       PetscCall(VecPlaceArray(pcbddc->vec1_R, work + lda_rhs * i));
4351       PetscCall(MatDenseGetColumnVec(pcbddc->coarse_phi_B, i, &v));
4352       PetscCall(VecScatterBegin(pcbddc->R_to_B, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD));
4353       PetscCall(VecScatterEnd(pcbddc->R_to_B, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD));
4354       if (PetscDefined(USE_DEBUG)) { /* The following VecSetValues() expects a sequential matrix */
4355         PetscMPIInt rank;
4356         PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)pcbddc->coarse_phi_B), &rank));
4357         PetscCheck(rank <= 1, PetscObjectComm((PetscObject)pcbddc->coarse_phi_B), PETSC_ERR_PLIB, "Expected a sequential dense matrix");
4358       }
4359       PetscCall(VecSetValues(v, 1, &idx_V_B[i], &one, INSERT_VALUES));
4360       PetscCall(VecAssemblyBegin(v)); /* If v is on device, hope VecSetValues() eventually implemented by a host to device memcopy */
4361       PetscCall(VecAssemblyEnd(v));
4362       PetscCall(MatDenseRestoreColumnVec(pcbddc->coarse_phi_B, i, &v));
4363 
4364       if (pcbddc->switch_static || pcbddc->dbg_flag) {
4365         PetscInt j;
4366 
4367         PetscCall(MatDenseGetColumnVec(pcbddc->coarse_phi_D, i, &v));
4368         PetscCall(VecScatterBegin(pcbddc->R_to_D, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD));
4369         PetscCall(VecScatterEnd(pcbddc->R_to_D, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD));
4370         if (PetscDefined(USE_DEBUG)) { /* The following VecSetValues() expects a sequential matrix */
4371           PetscMPIInt rank;
4372           PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)pcbddc->coarse_phi_D), &rank));
4373           PetscCheck(rank <= 1, PetscObjectComm((PetscObject)pcbddc->coarse_phi_D), PETSC_ERR_PLIB, "Expected a sequential dense matrix");
4374         }
4375         for (j = 0; j < pcbddc->benign_n; j++) PetscCall(VecSetValues(v, 1, &p0_lidx_I[j], &zero, INSERT_VALUES));
4376         PetscCall(VecAssemblyBegin(v));
4377         PetscCall(VecAssemblyEnd(v));
4378         PetscCall(MatDenseRestoreColumnVec(pcbddc->coarse_phi_D, i, &v));
4379       }
4380       PetscCall(VecResetArray(pcbddc->vec1_R));
4381     }
4382     /* if n_R == 0 the object is not destroyed */
4383     PetscCall(MatDestroy(&A_RV));
4384   }
4385   PetscCall(VecDestroy(&dummy_vec));
4386 
4387   if (n_constraints) {
4388     Mat B;
4389 
4390     PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, lda_rhs, n_constraints, work, &B));
4391     PetscCall(MatScale(S_CC, m_one));
4392     PetscCall(MatProductCreateWithMat(local_auxmat2_R, S_CC, NULL, B));
4393     PetscCall(MatProductSetType(B, MATPRODUCT_AB));
4394     PetscCall(MatProductSetFromOptions(B));
4395     PetscCall(MatProductSymbolic(B));
4396     PetscCall(MatProductNumeric(B));
4397 
4398     PetscCall(MatScale(S_CC, m_one));
4399     if (n_vertices) {
4400       if (isCHOL || need_benign_correction) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */
4401         PetscCall(MatTransposeSetPrecursor(S_CV, S_VC));
4402         PetscCall(MatTranspose(S_CV, MAT_REUSE_MATRIX, &S_VC));
4403       } else {
4404         Mat S_VCt;
4405 
4406         if (lda_rhs != n_R) {
4407           PetscCall(MatDestroy(&B));
4408           PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_R, n_constraints, work, &B));
4409           PetscCall(MatDenseSetLDA(B, lda_rhs));
4410         }
4411         PetscCall(MatMatMult(A_VR, B, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &S_VCt));
4412         PetscCall(MatCopy(S_VCt, S_VC, SAME_NONZERO_PATTERN));
4413         PetscCall(MatDestroy(&S_VCt));
4414       }
4415     }
4416     PetscCall(MatDestroy(&B));
4417     /* coarse basis functions */
4418     for (i = 0; i < n_constraints; i++) {
4419       Vec v;
4420 
4421       PetscCall(VecPlaceArray(pcbddc->vec1_R, work + lda_rhs * i));
4422       PetscCall(MatDenseGetColumnVec(pcbddc->coarse_phi_B, i + n_vertices, &v));
4423       PetscCall(VecScatterBegin(pcbddc->R_to_B, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD));
4424       PetscCall(VecScatterEnd(pcbddc->R_to_B, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD));
4425       PetscCall(MatDenseRestoreColumnVec(pcbddc->coarse_phi_B, i + n_vertices, &v));
4426       if (pcbddc->switch_static || pcbddc->dbg_flag) {
4427         PetscInt    j;
4428         PetscScalar zero = 0.0;
4429         PetscCall(MatDenseGetColumnVec(pcbddc->coarse_phi_D, i + n_vertices, &v));
4430         PetscCall(VecScatterBegin(pcbddc->R_to_D, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD));
4431         PetscCall(VecScatterEnd(pcbddc->R_to_D, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD));
4432         for (j = 0; j < pcbddc->benign_n; j++) PetscCall(VecSetValues(v, 1, &p0_lidx_I[j], &zero, INSERT_VALUES));
4433         PetscCall(VecAssemblyBegin(v));
4434         PetscCall(VecAssemblyEnd(v));
4435         PetscCall(MatDenseRestoreColumnVec(pcbddc->coarse_phi_D, i + n_vertices, &v));
4436       }
4437       PetscCall(VecResetArray(pcbddc->vec1_R));
4438     }
4439   }
4440   if (n_constraints) PetscCall(MatDestroy(&local_auxmat2_R));
4441   PetscCall(PetscFree(p0_lidx_I));
4442 
4443   /* coarse matrix entries relative to B_0 */
4444   if (pcbddc->benign_n) {
4445     Mat                B0_B, B0_BPHI;
4446     IS                 is_dummy;
4447     const PetscScalar *data;
4448     PetscInt           j;
4449 
4450     PetscCall(ISCreateStride(PETSC_COMM_SELF, pcbddc->benign_n, 0, 1, &is_dummy));
4451     PetscCall(MatCreateSubMatrix(pcbddc->benign_B0, is_dummy, pcis->is_B_local, MAT_INITIAL_MATRIX, &B0_B));
4452     PetscCall(ISDestroy(&is_dummy));
4453     PetscCall(MatMatMult(B0_B, pcbddc->coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &B0_BPHI));
4454     PetscCall(MatConvert(B0_BPHI, MATSEQDENSE, MAT_INPLACE_MATRIX, &B0_BPHI));
4455     PetscCall(MatDenseGetArrayRead(B0_BPHI, &data));
4456     for (j = 0; j < pcbddc->benign_n; j++) {
4457       PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j;
4458       for (i = 0; i < pcbddc->local_primal_size; i++) {
4459         coarse_submat_vals[primal_idx * pcbddc->local_primal_size + i] = data[i * pcbddc->benign_n + j];
4460         coarse_submat_vals[i * pcbddc->local_primal_size + primal_idx] = data[i * pcbddc->benign_n + j];
4461       }
4462     }
4463     PetscCall(MatDenseRestoreArrayRead(B0_BPHI, &data));
4464     PetscCall(MatDestroy(&B0_B));
4465     PetscCall(MatDestroy(&B0_BPHI));
4466   }
4467 
4468   /* compute other basis functions for non-symmetric problems */
4469   if (!pcbddc->symmetric_primal) {
4470     Mat          B_V = NULL, B_C = NULL;
4471     PetscScalar *marray;
4472 
4473     if (n_constraints) {
4474       Mat S_CCT, C_CRT;
4475 
4476       PetscCall(MatTranspose(C_CR, MAT_INITIAL_MATRIX, &C_CRT));
4477       PetscCall(MatTranspose(S_CC, MAT_INITIAL_MATRIX, &S_CCT));
4478       PetscCall(MatMatMult(C_CRT, S_CCT, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &B_C));
4479       PetscCall(MatDestroy(&S_CCT));
4480       if (n_vertices) {
4481         Mat S_VCT;
4482 
4483         PetscCall(MatTranspose(S_VC, MAT_INITIAL_MATRIX, &S_VCT));
4484         PetscCall(MatMatMult(C_CRT, S_VCT, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &B_V));
4485         PetscCall(MatDestroy(&S_VCT));
4486       }
4487       PetscCall(MatDestroy(&C_CRT));
4488     } else {
4489       PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_R, n_vertices, NULL, &B_V));
4490     }
4491     if (n_vertices && n_R) {
4492       PetscScalar    *av, *marray;
4493       const PetscInt *xadj, *adjncy;
4494       PetscInt        n;
4495       PetscBool       flg_row;
4496 
4497       /* B_V = B_V - A_VR^T */
4498       PetscCall(MatConvert(A_VR, MATSEQAIJ, MAT_INPLACE_MATRIX, &A_VR));
4499       PetscCall(MatGetRowIJ(A_VR, 0, PETSC_FALSE, PETSC_FALSE, &n, &xadj, &adjncy, &flg_row));
4500       PetscCall(MatSeqAIJGetArray(A_VR, &av));
4501       PetscCall(MatDenseGetArray(B_V, &marray));
4502       for (i = 0; i < n; i++) {
4503         PetscInt j;
4504         for (j = xadj[i]; j < xadj[i + 1]; j++) marray[i * n_R + adjncy[j]] -= av[j];
4505       }
4506       PetscCall(MatDenseRestoreArray(B_V, &marray));
4507       PetscCall(MatRestoreRowIJ(A_VR, 0, PETSC_FALSE, PETSC_FALSE, &n, &xadj, &adjncy, &flg_row));
4508       PetscCall(MatDestroy(&A_VR));
4509     }
4510 
4511     /* currently there's no support for MatTransposeMatSolve(F,B,X) */
4512     if (n_vertices) {
4513       PetscCall(MatDenseGetArray(B_V, &marray));
4514       for (i = 0; i < n_vertices; i++) {
4515         PetscCall(VecPlaceArray(pcbddc->vec1_R, marray + i * n_R));
4516         PetscCall(VecPlaceArray(pcbddc->vec2_R, work + i * n_R));
4517         PetscCall(KSPSolveTranspose(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec2_R));
4518         PetscCall(KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec2_R));
4519         PetscCall(VecResetArray(pcbddc->vec1_R));
4520         PetscCall(VecResetArray(pcbddc->vec2_R));
4521       }
4522       PetscCall(MatDenseRestoreArray(B_V, &marray));
4523     }
4524     if (B_C) {
4525       PetscCall(MatDenseGetArray(B_C, &marray));
4526       for (i = n_vertices; i < n_constraints + n_vertices; i++) {
4527         PetscCall(VecPlaceArray(pcbddc->vec1_R, marray + (i - n_vertices) * n_R));
4528         PetscCall(VecPlaceArray(pcbddc->vec2_R, work + i * n_R));
4529         PetscCall(KSPSolveTranspose(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec2_R));
4530         PetscCall(KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec2_R));
4531         PetscCall(VecResetArray(pcbddc->vec1_R));
4532         PetscCall(VecResetArray(pcbddc->vec2_R));
4533       }
4534       PetscCall(MatDenseRestoreArray(B_C, &marray));
4535     }
4536     /* coarse basis functions */
4537     for (i = 0; i < pcbddc->local_primal_size; i++) {
4538       Vec v;
4539 
4540       PetscCall(VecPlaceArray(pcbddc->vec1_R, work + i * n_R));
4541       PetscCall(MatDenseGetColumnVec(pcbddc->coarse_psi_B, i, &v));
4542       PetscCall(VecScatterBegin(pcbddc->R_to_B, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD));
4543       PetscCall(VecScatterEnd(pcbddc->R_to_B, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD));
4544       if (i < n_vertices) {
4545         PetscScalar one = 1.0;
4546         PetscCall(VecSetValues(v, 1, &idx_V_B[i], &one, INSERT_VALUES));
4547         PetscCall(VecAssemblyBegin(v));
4548         PetscCall(VecAssemblyEnd(v));
4549       }
4550       PetscCall(MatDenseRestoreColumnVec(pcbddc->coarse_psi_B, i, &v));
4551 
4552       if (pcbddc->switch_static || pcbddc->dbg_flag) {
4553         PetscCall(MatDenseGetColumnVec(pcbddc->coarse_psi_D, i, &v));
4554         PetscCall(VecScatterBegin(pcbddc->R_to_D, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD));
4555         PetscCall(VecScatterEnd(pcbddc->R_to_D, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD));
4556         PetscCall(MatDenseRestoreColumnVec(pcbddc->coarse_psi_D, i, &v));
4557       }
4558       PetscCall(VecResetArray(pcbddc->vec1_R));
4559     }
4560     PetscCall(MatDestroy(&B_V));
4561     PetscCall(MatDestroy(&B_C));
4562   }
4563 
4564   /* free memory */
4565   PetscCall(PetscFree(idx_V_B));
4566   PetscCall(MatDestroy(&S_VV));
4567   PetscCall(MatDestroy(&S_CV));
4568   PetscCall(MatDestroy(&S_VC));
4569   PetscCall(MatDestroy(&S_CC));
4570   PetscCall(PetscFree(work));
4571   if (n_vertices) PetscCall(MatDestroy(&A_VR));
4572   if (n_constraints) PetscCall(MatDestroy(&C_CR));
4573   PetscCall(PetscLogEventEnd(PC_BDDC_CorrectionSetUp[pcbddc->current_level], pc, 0, 0, 0));
4574 
4575   /* Checking coarse_sub_mat and coarse basis functios */
4576   /* Symmetric case     : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
4577   /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
4578   if (pcbddc->dbg_flag) {
4579     Mat       coarse_sub_mat;
4580     Mat       AUXMAT, TM1, TM2, TM3, TM4;
4581     Mat       coarse_phi_D, coarse_phi_B;
4582     Mat       coarse_psi_D, coarse_psi_B;
4583     Mat       A_II, A_BB, A_IB, A_BI;
4584     Mat       C_B, CPHI;
4585     IS        is_dummy;
4586     Vec       mones;
4587     MatType   checkmattype = MATSEQAIJ;
4588     PetscReal real_value;
4589 
4590     if (pcbddc->benign_n && !pcbddc->benign_change_explicit) {
4591       Mat A;
4592       PetscCall(PCBDDCBenignProject(pc, NULL, NULL, &A));
4593       PetscCall(MatCreateSubMatrix(A, pcis->is_I_local, pcis->is_I_local, MAT_INITIAL_MATRIX, &A_II));
4594       PetscCall(MatCreateSubMatrix(A, pcis->is_I_local, pcis->is_B_local, MAT_INITIAL_MATRIX, &A_IB));
4595       PetscCall(MatCreateSubMatrix(A, pcis->is_B_local, pcis->is_I_local, MAT_INITIAL_MATRIX, &A_BI));
4596       PetscCall(MatCreateSubMatrix(A, pcis->is_B_local, pcis->is_B_local, MAT_INITIAL_MATRIX, &A_BB));
4597       PetscCall(MatDestroy(&A));
4598     } else {
4599       PetscCall(MatConvert(pcis->A_II, checkmattype, MAT_INITIAL_MATRIX, &A_II));
4600       PetscCall(MatConvert(pcis->A_IB, checkmattype, MAT_INITIAL_MATRIX, &A_IB));
4601       PetscCall(MatConvert(pcis->A_BI, checkmattype, MAT_INITIAL_MATRIX, &A_BI));
4602       PetscCall(MatConvert(pcis->A_BB, checkmattype, MAT_INITIAL_MATRIX, &A_BB));
4603     }
4604     PetscCall(MatConvert(pcbddc->coarse_phi_D, checkmattype, MAT_INITIAL_MATRIX, &coarse_phi_D));
4605     PetscCall(MatConvert(pcbddc->coarse_phi_B, checkmattype, MAT_INITIAL_MATRIX, &coarse_phi_B));
4606     if (!pcbddc->symmetric_primal) {
4607       PetscCall(MatConvert(pcbddc->coarse_psi_D, checkmattype, MAT_INITIAL_MATRIX, &coarse_psi_D));
4608       PetscCall(MatConvert(pcbddc->coarse_psi_B, checkmattype, MAT_INITIAL_MATRIX, &coarse_psi_B));
4609     }
4610     PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, pcbddc->local_primal_size, pcbddc->local_primal_size, coarse_submat_vals, &coarse_sub_mat));
4611 
4612     PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n"));
4613     PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Check coarse sub mat computation (symmetric %d)\n", pcbddc->symmetric_primal));
4614     PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
4615     if (!pcbddc->symmetric_primal) {
4616       PetscCall(MatMatMult(A_II, coarse_phi_D, MAT_INITIAL_MATRIX, 1.0, &AUXMAT));
4617       PetscCall(MatTransposeMatMult(coarse_psi_D, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM1));
4618       PetscCall(MatDestroy(&AUXMAT));
4619       PetscCall(MatMatMult(A_BB, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &AUXMAT));
4620       PetscCall(MatTransposeMatMult(coarse_psi_B, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM2));
4621       PetscCall(MatDestroy(&AUXMAT));
4622       PetscCall(MatMatMult(A_IB, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &AUXMAT));
4623       PetscCall(MatTransposeMatMult(coarse_psi_D, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM3));
4624       PetscCall(MatDestroy(&AUXMAT));
4625       PetscCall(MatMatMult(A_BI, coarse_phi_D, MAT_INITIAL_MATRIX, 1.0, &AUXMAT));
4626       PetscCall(MatTransposeMatMult(coarse_psi_B, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM4));
4627       PetscCall(MatDestroy(&AUXMAT));
4628     } else {
4629       PetscCall(MatPtAP(A_II, coarse_phi_D, MAT_INITIAL_MATRIX, 1.0, &TM1));
4630       PetscCall(MatPtAP(A_BB, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &TM2));
4631       PetscCall(MatMatMult(A_IB, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &AUXMAT));
4632       PetscCall(MatTransposeMatMult(coarse_phi_D, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM3));
4633       PetscCall(MatDestroy(&AUXMAT));
4634       PetscCall(MatMatMult(A_BI, coarse_phi_D, MAT_INITIAL_MATRIX, 1.0, &AUXMAT));
4635       PetscCall(MatTransposeMatMult(coarse_phi_B, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM4));
4636       PetscCall(MatDestroy(&AUXMAT));
4637     }
4638     PetscCall(MatAXPY(TM1, one, TM2, DIFFERENT_NONZERO_PATTERN));
4639     PetscCall(MatAXPY(TM1, one, TM3, DIFFERENT_NONZERO_PATTERN));
4640     PetscCall(MatAXPY(TM1, one, TM4, DIFFERENT_NONZERO_PATTERN));
4641     PetscCall(MatConvert(TM1, MATSEQDENSE, MAT_INPLACE_MATRIX, &TM1));
4642     if (pcbddc->benign_n) {
4643       Mat                B0_B, B0_BPHI;
4644       const PetscScalar *data2;
4645       PetscScalar       *data;
4646       PetscInt           j;
4647 
4648       PetscCall(ISCreateStride(PETSC_COMM_SELF, pcbddc->benign_n, 0, 1, &is_dummy));
4649       PetscCall(MatCreateSubMatrix(pcbddc->benign_B0, is_dummy, pcis->is_B_local, MAT_INITIAL_MATRIX, &B0_B));
4650       PetscCall(MatMatMult(B0_B, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &B0_BPHI));
4651       PetscCall(MatConvert(B0_BPHI, MATSEQDENSE, MAT_INPLACE_MATRIX, &B0_BPHI));
4652       PetscCall(MatDenseGetArray(TM1, &data));
4653       PetscCall(MatDenseGetArrayRead(B0_BPHI, &data2));
4654       for (j = 0; j < pcbddc->benign_n; j++) {
4655         PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j;
4656         for (i = 0; i < pcbddc->local_primal_size; i++) {
4657           data[primal_idx * pcbddc->local_primal_size + i] += data2[i * pcbddc->benign_n + j];
4658           data[i * pcbddc->local_primal_size + primal_idx] += data2[i * pcbddc->benign_n + j];
4659         }
4660       }
4661       PetscCall(MatDenseRestoreArray(TM1, &data));
4662       PetscCall(MatDenseRestoreArrayRead(B0_BPHI, &data2));
4663       PetscCall(MatDestroy(&B0_B));
4664       PetscCall(ISDestroy(&is_dummy));
4665       PetscCall(MatDestroy(&B0_BPHI));
4666     }
4667 #if 0
4668   {
4669     PetscViewer viewer;
4670     char filename[256];
4671     sprintf(filename,"details_local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level);
4672     PetscCall(PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer));
4673     PetscCall(PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB));
4674     PetscCall(PetscObjectSetName((PetscObject)coarse_sub_mat,"computed"));
4675     PetscCall(MatView(coarse_sub_mat,viewer));
4676     PetscCall(PetscObjectSetName((PetscObject)TM1,"projected"));
4677     PetscCall(MatView(TM1,viewer));
4678     if (pcbddc->coarse_phi_B) {
4679       PetscCall(PetscObjectSetName((PetscObject)pcbddc->coarse_phi_B,"phi_B"));
4680       PetscCall(MatView(pcbddc->coarse_phi_B,viewer));
4681     }
4682     if (pcbddc->coarse_phi_D) {
4683       PetscCall(PetscObjectSetName((PetscObject)pcbddc->coarse_phi_D,"phi_D"));
4684       PetscCall(MatView(pcbddc->coarse_phi_D,viewer));
4685     }
4686     if (pcbddc->coarse_psi_B) {
4687       PetscCall(PetscObjectSetName((PetscObject)pcbddc->coarse_psi_B,"psi_B"));
4688       PetscCall(MatView(pcbddc->coarse_psi_B,viewer));
4689     }
4690     if (pcbddc->coarse_psi_D) {
4691       PetscCall(PetscObjectSetName((PetscObject)pcbddc->coarse_psi_D,"psi_D"));
4692       PetscCall(MatView(pcbddc->coarse_psi_D,viewer));
4693     }
4694     PetscCall(PetscObjectSetName((PetscObject)pcbddc->local_mat,"A"));
4695     PetscCall(MatView(pcbddc->local_mat,viewer));
4696     PetscCall(PetscObjectSetName((PetscObject)pcbddc->ConstraintMatrix,"C"));
4697     PetscCall(MatView(pcbddc->ConstraintMatrix,viewer));
4698     PetscCall(PetscObjectSetName((PetscObject)pcis->is_I_local,"I"));
4699     PetscCall(ISView(pcis->is_I_local,viewer));
4700     PetscCall(PetscObjectSetName((PetscObject)pcis->is_B_local,"B"));
4701     PetscCall(ISView(pcis->is_B_local,viewer));
4702     PetscCall(PetscObjectSetName((PetscObject)pcbddc->is_R_local,"R"));
4703     PetscCall(ISView(pcbddc->is_R_local,viewer));
4704     PetscCall(PetscViewerDestroy(&viewer));
4705   }
4706 #endif
4707     PetscCall(MatAXPY(TM1, m_one, coarse_sub_mat, DIFFERENT_NONZERO_PATTERN));
4708     PetscCall(MatNorm(TM1, NORM_FROBENIUS, &real_value));
4709     PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer));
4710     PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d          matrix error % 1.14e\n", PetscGlobalRank, (double)real_value));
4711 
4712     /* check constraints */
4713     PetscCall(ISCreateStride(PETSC_COMM_SELF, pcbddc->local_primal_size - pcbddc->benign_n, 0, 1, &is_dummy));
4714     PetscCall(MatCreateSubMatrix(pcbddc->ConstraintMatrix, is_dummy, pcis->is_B_local, MAT_INITIAL_MATRIX, &C_B));
4715     if (!pcbddc->benign_n) { /* TODO: add benign case */
4716       PetscCall(MatMatMult(C_B, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &CPHI));
4717     } else {
4718       PetscScalar *data;
4719       Mat          tmat;
4720       PetscCall(MatDenseGetArray(pcbddc->coarse_phi_B, &data));
4721       PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, pcis->n_B, pcbddc->local_primal_size - pcbddc->benign_n, data, &tmat));
4722       PetscCall(MatDenseRestoreArray(pcbddc->coarse_phi_B, &data));
4723       PetscCall(MatMatMult(C_B, tmat, MAT_INITIAL_MATRIX, 1.0, &CPHI));
4724       PetscCall(MatDestroy(&tmat));
4725     }
4726     PetscCall(MatCreateVecs(CPHI, &mones, NULL));
4727     PetscCall(VecSet(mones, -1.0));
4728     PetscCall(MatDiagonalSet(CPHI, mones, ADD_VALUES));
4729     PetscCall(MatNorm(CPHI, NORM_FROBENIUS, &real_value));
4730     PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d phi constraints error % 1.14e\n", PetscGlobalRank, (double)real_value));
4731     if (!pcbddc->symmetric_primal) {
4732       PetscCall(MatMatMult(C_B, coarse_psi_B, MAT_REUSE_MATRIX, 1.0, &CPHI));
4733       PetscCall(VecSet(mones, -1.0));
4734       PetscCall(MatDiagonalSet(CPHI, mones, ADD_VALUES));
4735       PetscCall(MatNorm(CPHI, NORM_FROBENIUS, &real_value));
4736       PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d psi constraints error % 1.14e\n", PetscGlobalRank, (double)real_value));
4737     }
4738     PetscCall(MatDestroy(&C_B));
4739     PetscCall(MatDestroy(&CPHI));
4740     PetscCall(ISDestroy(&is_dummy));
4741     PetscCall(VecDestroy(&mones));
4742     PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
4743     PetscCall(MatDestroy(&A_II));
4744     PetscCall(MatDestroy(&A_BB));
4745     PetscCall(MatDestroy(&A_IB));
4746     PetscCall(MatDestroy(&A_BI));
4747     PetscCall(MatDestroy(&TM1));
4748     PetscCall(MatDestroy(&TM2));
4749     PetscCall(MatDestroy(&TM3));
4750     PetscCall(MatDestroy(&TM4));
4751     PetscCall(MatDestroy(&coarse_phi_D));
4752     PetscCall(MatDestroy(&coarse_phi_B));
4753     if (!pcbddc->symmetric_primal) {
4754       PetscCall(MatDestroy(&coarse_psi_D));
4755       PetscCall(MatDestroy(&coarse_psi_B));
4756     }
4757     PetscCall(MatDestroy(&coarse_sub_mat));
4758   }
4759   /* FINAL CUDA support (we cannot currently mix viennacl and cuda vectors */
4760   {
4761     PetscBool gpu;
4762 
4763     PetscCall(PetscObjectTypeCompare((PetscObject)pcis->vec1_N, VECSEQCUDA, &gpu));
4764     if (gpu) {
4765       if (pcbddc->local_auxmat1) PetscCall(MatConvert(pcbddc->local_auxmat1, MATSEQDENSECUDA, MAT_INPLACE_MATRIX, &pcbddc->local_auxmat1));
4766       if (pcbddc->local_auxmat2) PetscCall(MatConvert(pcbddc->local_auxmat2, MATSEQDENSECUDA, MAT_INPLACE_MATRIX, &pcbddc->local_auxmat2));
4767       if (pcbddc->coarse_phi_B) PetscCall(MatConvert(pcbddc->coarse_phi_B, MATSEQDENSECUDA, MAT_INPLACE_MATRIX, &pcbddc->coarse_phi_B));
4768       if (pcbddc->coarse_phi_D) PetscCall(MatConvert(pcbddc->coarse_phi_D, MATSEQDENSECUDA, MAT_INPLACE_MATRIX, &pcbddc->coarse_phi_D));
4769       if (pcbddc->coarse_psi_B) PetscCall(MatConvert(pcbddc->coarse_psi_B, MATSEQDENSECUDA, MAT_INPLACE_MATRIX, &pcbddc->coarse_psi_B));
4770       if (pcbddc->coarse_psi_D) PetscCall(MatConvert(pcbddc->coarse_psi_D, MATSEQDENSECUDA, MAT_INPLACE_MATRIX, &pcbddc->coarse_psi_D));
4771     }
4772   }
4773   /* get back data */
4774   *coarse_submat_vals_n = coarse_submat_vals;
4775   PetscFunctionReturn(0);
4776 }
4777 
4778 PetscErrorCode MatCreateSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat *B)
4779 {
4780   Mat      *work_mat;
4781   IS        isrow_s, iscol_s;
4782   PetscBool rsorted, csorted;
4783   PetscInt  rsize, *idxs_perm_r = NULL, csize, *idxs_perm_c = NULL;
4784 
4785   PetscFunctionBegin;
4786   PetscCall(ISSorted(isrow, &rsorted));
4787   PetscCall(ISSorted(iscol, &csorted));
4788   PetscCall(ISGetLocalSize(isrow, &rsize));
4789   PetscCall(ISGetLocalSize(iscol, &csize));
4790 
4791   if (!rsorted) {
4792     const PetscInt *idxs;
4793     PetscInt       *idxs_sorted, i;
4794 
4795     PetscCall(PetscMalloc1(rsize, &idxs_perm_r));
4796     PetscCall(PetscMalloc1(rsize, &idxs_sorted));
4797     for (i = 0; i < rsize; i++) idxs_perm_r[i] = i;
4798     PetscCall(ISGetIndices(isrow, &idxs));
4799     PetscCall(PetscSortIntWithPermutation(rsize, idxs, idxs_perm_r));
4800     for (i = 0; i < rsize; i++) idxs_sorted[i] = idxs[idxs_perm_r[i]];
4801     PetscCall(ISRestoreIndices(isrow, &idxs));
4802     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, rsize, idxs_sorted, PETSC_OWN_POINTER, &isrow_s));
4803   } else {
4804     PetscCall(PetscObjectReference((PetscObject)isrow));
4805     isrow_s = isrow;
4806   }
4807 
4808   if (!csorted) {
4809     if (isrow == iscol) {
4810       PetscCall(PetscObjectReference((PetscObject)isrow_s));
4811       iscol_s = isrow_s;
4812     } else {
4813       const PetscInt *idxs;
4814       PetscInt       *idxs_sorted, i;
4815 
4816       PetscCall(PetscMalloc1(csize, &idxs_perm_c));
4817       PetscCall(PetscMalloc1(csize, &idxs_sorted));
4818       for (i = 0; i < csize; i++) idxs_perm_c[i] = i;
4819       PetscCall(ISGetIndices(iscol, &idxs));
4820       PetscCall(PetscSortIntWithPermutation(csize, idxs, idxs_perm_c));
4821       for (i = 0; i < csize; i++) idxs_sorted[i] = idxs[idxs_perm_c[i]];
4822       PetscCall(ISRestoreIndices(iscol, &idxs));
4823       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, csize, idxs_sorted, PETSC_OWN_POINTER, &iscol_s));
4824     }
4825   } else {
4826     PetscCall(PetscObjectReference((PetscObject)iscol));
4827     iscol_s = iscol;
4828   }
4829 
4830   PetscCall(MatCreateSubMatrices(A, 1, &isrow_s, &iscol_s, MAT_INITIAL_MATRIX, &work_mat));
4831 
4832   if (!rsorted || !csorted) {
4833     Mat new_mat;
4834     IS  is_perm_r, is_perm_c;
4835 
4836     if (!rsorted) {
4837       PetscInt *idxs_r, i;
4838       PetscCall(PetscMalloc1(rsize, &idxs_r));
4839       for (i = 0; i < rsize; i++) idxs_r[idxs_perm_r[i]] = i;
4840       PetscCall(PetscFree(idxs_perm_r));
4841       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, rsize, idxs_r, PETSC_OWN_POINTER, &is_perm_r));
4842     } else {
4843       PetscCall(ISCreateStride(PETSC_COMM_SELF, rsize, 0, 1, &is_perm_r));
4844     }
4845     PetscCall(ISSetPermutation(is_perm_r));
4846 
4847     if (!csorted) {
4848       if (isrow_s == iscol_s) {
4849         PetscCall(PetscObjectReference((PetscObject)is_perm_r));
4850         is_perm_c = is_perm_r;
4851       } else {
4852         PetscInt *idxs_c, i;
4853         PetscCheck(idxs_perm_c, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Permutation array not present");
4854         PetscCall(PetscMalloc1(csize, &idxs_c));
4855         for (i = 0; i < csize; i++) idxs_c[idxs_perm_c[i]] = i;
4856         PetscCall(PetscFree(idxs_perm_c));
4857         PetscCall(ISCreateGeneral(PETSC_COMM_SELF, csize, idxs_c, PETSC_OWN_POINTER, &is_perm_c));
4858       }
4859     } else {
4860       PetscCall(ISCreateStride(PETSC_COMM_SELF, csize, 0, 1, &is_perm_c));
4861     }
4862     PetscCall(ISSetPermutation(is_perm_c));
4863 
4864     PetscCall(MatPermute(work_mat[0], is_perm_r, is_perm_c, &new_mat));
4865     PetscCall(MatDestroy(&work_mat[0]));
4866     work_mat[0] = new_mat;
4867     PetscCall(ISDestroy(&is_perm_r));
4868     PetscCall(ISDestroy(&is_perm_c));
4869   }
4870 
4871   PetscCall(PetscObjectReference((PetscObject)work_mat[0]));
4872   *B = work_mat[0];
4873   PetscCall(MatDestroyMatrices(1, &work_mat));
4874   PetscCall(ISDestroy(&isrow_s));
4875   PetscCall(ISDestroy(&iscol_s));
4876   PetscFunctionReturn(0);
4877 }
4878 
4879 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix)
4880 {
4881   Mat_IS   *matis  = (Mat_IS *)pc->pmat->data;
4882   PC_BDDC  *pcbddc = (PC_BDDC *)pc->data;
4883   Mat       new_mat, lA;
4884   IS        is_local, is_global;
4885   PetscInt  local_size;
4886   PetscBool isseqaij, issym, isset;
4887 
4888   PetscFunctionBegin;
4889   PetscCall(MatDestroy(&pcbddc->local_mat));
4890   PetscCall(MatGetSize(matis->A, &local_size, NULL));
4891   PetscCall(ISCreateStride(PetscObjectComm((PetscObject)matis->A), local_size, 0, 1, &is_local));
4892   PetscCall(ISLocalToGlobalMappingApplyIS(matis->rmapping, is_local, &is_global));
4893   PetscCall(ISDestroy(&is_local));
4894   PetscCall(MatCreateSubMatrixUnsorted(ChangeOfBasisMatrix, is_global, is_global, &new_mat));
4895   PetscCall(ISDestroy(&is_global));
4896 
4897   if (pcbddc->dbg_flag) {
4898     Vec       x, x_change;
4899     PetscReal error;
4900 
4901     PetscCall(MatCreateVecs(ChangeOfBasisMatrix, &x, &x_change));
4902     PetscCall(VecSetRandom(x, NULL));
4903     PetscCall(MatMult(ChangeOfBasisMatrix, x, x_change));
4904     PetscCall(VecScatterBegin(matis->cctx, x, matis->x, INSERT_VALUES, SCATTER_FORWARD));
4905     PetscCall(VecScatterEnd(matis->cctx, x, matis->x, INSERT_VALUES, SCATTER_FORWARD));
4906     PetscCall(MatMult(new_mat, matis->x, matis->y));
4907     if (!pcbddc->change_interior) {
4908       const PetscScalar *x, *y, *v;
4909       PetscReal          lerror = 0.;
4910       PetscInt           i;
4911 
4912       PetscCall(VecGetArrayRead(matis->x, &x));
4913       PetscCall(VecGetArrayRead(matis->y, &y));
4914       PetscCall(VecGetArrayRead(matis->counter, &v));
4915       for (i = 0; i < local_size; i++)
4916         if (PetscRealPart(v[i]) < 1.5 && PetscAbsScalar(x[i] - y[i]) > lerror) lerror = PetscAbsScalar(x[i] - y[i]);
4917       PetscCall(VecRestoreArrayRead(matis->x, &x));
4918       PetscCall(VecRestoreArrayRead(matis->y, &y));
4919       PetscCall(VecRestoreArrayRead(matis->counter, &v));
4920       PetscCall(MPIU_Allreduce(&lerror, &error, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)pc)));
4921       if (error > PETSC_SMALL) {
4922         if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) {
4923           SETERRQ(PetscObjectComm((PetscObject)pc), PETSC_ERR_PLIB, "Error global vs local change on I: %1.6e", (double)error);
4924         } else {
4925           SETERRQ(PetscObjectComm((PetscObject)pc), PETSC_ERR_USER, "Error global vs local change on I: %1.6e", (double)error);
4926         }
4927       }
4928     }
4929     PetscCall(VecScatterBegin(matis->rctx, matis->y, x, INSERT_VALUES, SCATTER_REVERSE));
4930     PetscCall(VecScatterEnd(matis->rctx, matis->y, x, INSERT_VALUES, SCATTER_REVERSE));
4931     PetscCall(VecAXPY(x, -1.0, x_change));
4932     PetscCall(VecNorm(x, NORM_INFINITY, &error));
4933     if (error > PETSC_SMALL) {
4934       if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) {
4935         SETERRQ(PetscObjectComm((PetscObject)pc), PETSC_ERR_PLIB, "Error global vs local change on N: %1.6e", (double)error);
4936       } else {
4937         SETERRQ(PetscObjectComm((PetscObject)pc), PETSC_ERR_USER, "Error global vs local change on N: %1.6e", (double)error);
4938       }
4939     }
4940     PetscCall(VecDestroy(&x));
4941     PetscCall(VecDestroy(&x_change));
4942   }
4943 
4944   /* lA is present if we are setting up an inner BDDC for a saddle point FETI-DP */
4945   PetscCall(PetscObjectQuery((PetscObject)pc, "__KSPFETIDP_lA", (PetscObject *)&lA));
4946 
4947   /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */
4948   PetscCall(PetscObjectBaseTypeCompare((PetscObject)matis->A, MATSEQAIJ, &isseqaij));
4949   if (isseqaij) {
4950     PetscCall(MatDestroy(&pcbddc->local_mat));
4951     PetscCall(MatPtAP(matis->A, new_mat, MAT_INITIAL_MATRIX, 2.0, &pcbddc->local_mat));
4952     if (lA) {
4953       Mat work;
4954       PetscCall(MatPtAP(lA, new_mat, MAT_INITIAL_MATRIX, 2.0, &work));
4955       PetscCall(PetscObjectCompose((PetscObject)pc, "__KSPFETIDP_lA", (PetscObject)work));
4956       PetscCall(MatDestroy(&work));
4957     }
4958   } else {
4959     Mat work_mat;
4960 
4961     PetscCall(MatDestroy(&pcbddc->local_mat));
4962     PetscCall(MatConvert(matis->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &work_mat));
4963     PetscCall(MatPtAP(work_mat, new_mat, MAT_INITIAL_MATRIX, 2.0, &pcbddc->local_mat));
4964     PetscCall(MatDestroy(&work_mat));
4965     if (lA) {
4966       Mat work;
4967       PetscCall(MatConvert(lA, MATSEQAIJ, MAT_INITIAL_MATRIX, &work_mat));
4968       PetscCall(MatPtAP(work_mat, new_mat, MAT_INITIAL_MATRIX, 2.0, &work));
4969       PetscCall(PetscObjectCompose((PetscObject)pc, "__KSPFETIDP_lA", (PetscObject)work));
4970       PetscCall(MatDestroy(&work));
4971     }
4972   }
4973   PetscCall(MatIsSymmetricKnown(matis->A, &isset, &issym));
4974   if (isset) PetscCall(MatSetOption(pcbddc->local_mat, MAT_SYMMETRIC, issym));
4975   PetscCall(MatDestroy(&new_mat));
4976   PetscFunctionReturn(0);
4977 }
4978 
4979 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc)
4980 {
4981   PC_IS          *pcis        = (PC_IS *)(pc->data);
4982   PC_BDDC        *pcbddc      = (PC_BDDC *)pc->data;
4983   PCBDDCSubSchurs sub_schurs  = pcbddc->sub_schurs;
4984   PetscInt       *idx_R_local = NULL;
4985   PetscInt        n_vertices, i, j, n_R, n_D, n_B;
4986   PetscInt        vbs, bs;
4987   PetscBT         bitmask = NULL;
4988 
4989   PetscFunctionBegin;
4990   /*
4991     No need to setup local scatters if
4992       - primal space is unchanged
4993         AND
4994       - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains)
4995         AND
4996       - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine
4997   */
4998   if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) PetscFunctionReturn(0);
4999   /* destroy old objects */
5000   PetscCall(ISDestroy(&pcbddc->is_R_local));
5001   PetscCall(VecScatterDestroy(&pcbddc->R_to_B));
5002   PetscCall(VecScatterDestroy(&pcbddc->R_to_D));
5003   /* Set Non-overlapping dimensions */
5004   n_B        = pcis->n_B;
5005   n_D        = pcis->n - n_B;
5006   n_vertices = pcbddc->n_vertices;
5007 
5008   /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */
5009 
5010   /* create auxiliary bitmask and allocate workspace */
5011   if (!sub_schurs || !sub_schurs->reuse_solver) {
5012     PetscCall(PetscMalloc1(pcis->n - n_vertices, &idx_R_local));
5013     PetscCall(PetscBTCreate(pcis->n, &bitmask));
5014     for (i = 0; i < n_vertices; i++) PetscCall(PetscBTSet(bitmask, pcbddc->local_primal_ref_node[i]));
5015 
5016     for (i = 0, n_R = 0; i < pcis->n; i++) {
5017       if (!PetscBTLookup(bitmask, i)) idx_R_local[n_R++] = i;
5018     }
5019   } else { /* A different ordering (already computed) is present if we are reusing the Schur solver */
5020     PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
5021 
5022     PetscCall(ISGetIndices(reuse_solver->is_R, (const PetscInt **)&idx_R_local));
5023     PetscCall(ISGetLocalSize(reuse_solver->is_R, &n_R));
5024   }
5025 
5026   /* Block code */
5027   vbs = 1;
5028   PetscCall(MatGetBlockSize(pcbddc->local_mat, &bs));
5029   if (bs > 1 && !(n_vertices % bs)) {
5030     PetscBool is_blocked = PETSC_TRUE;
5031     PetscInt *vary;
5032     if (!sub_schurs || !sub_schurs->reuse_solver) {
5033       PetscCall(PetscMalloc1(pcis->n / bs, &vary));
5034       PetscCall(PetscArrayzero(vary, pcis->n / bs));
5035       /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */
5036       /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */
5037       for (i = 0; i < n_vertices; i++) vary[pcbddc->local_primal_ref_node[i] / bs]++;
5038       for (i = 0; i < pcis->n / bs; i++) {
5039         if (vary[i] != 0 && vary[i] != bs) {
5040           is_blocked = PETSC_FALSE;
5041           break;
5042         }
5043       }
5044       PetscCall(PetscFree(vary));
5045     } else {
5046       /* Verify directly the R set */
5047       for (i = 0; i < n_R / bs; i++) {
5048         PetscInt j, node = idx_R_local[bs * i];
5049         for (j = 1; j < bs; j++) {
5050           if (node != idx_R_local[bs * i + j] - j) {
5051             is_blocked = PETSC_FALSE;
5052             break;
5053           }
5054         }
5055       }
5056     }
5057     if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */
5058       vbs = bs;
5059       for (i = 0; i < n_R / vbs; i++) idx_R_local[i] = idx_R_local[vbs * i] / vbs;
5060     }
5061   }
5062   PetscCall(ISCreateBlock(PETSC_COMM_SELF, vbs, n_R / vbs, idx_R_local, PETSC_COPY_VALUES, &pcbddc->is_R_local));
5063   if (sub_schurs && sub_schurs->reuse_solver) {
5064     PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
5065 
5066     PetscCall(ISRestoreIndices(reuse_solver->is_R, (const PetscInt **)&idx_R_local));
5067     PetscCall(ISDestroy(&reuse_solver->is_R));
5068     PetscCall(PetscObjectReference((PetscObject)pcbddc->is_R_local));
5069     reuse_solver->is_R = pcbddc->is_R_local;
5070   } else {
5071     PetscCall(PetscFree(idx_R_local));
5072   }
5073 
5074   /* print some info if requested */
5075   if (pcbddc->dbg_flag) {
5076     PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n"));
5077     PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
5078     PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer));
5079     PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d local dimensions\n", PetscGlobalRank));
5080     PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "local_size = %" PetscInt_FMT ", dirichlet_size = %" PetscInt_FMT ", boundary_size = %" PetscInt_FMT "\n", pcis->n, n_D, n_B));
5081     PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "r_size = %" PetscInt_FMT ", v_size = %" PetscInt_FMT ", constraints = %" PetscInt_FMT ", local_primal_size = %" PetscInt_FMT "\n", n_R, n_vertices,
5082                                                  pcbddc->local_primal_size - n_vertices - pcbddc->benign_n, pcbddc->local_primal_size));
5083     PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
5084   }
5085 
5086   /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */
5087   if (!sub_schurs || !sub_schurs->reuse_solver) {
5088     IS        is_aux1, is_aux2;
5089     PetscInt *aux_array1, *aux_array2, *is_indices, *idx_R_local;
5090 
5091     PetscCall(ISGetIndices(pcbddc->is_R_local, (const PetscInt **)&idx_R_local));
5092     PetscCall(PetscMalloc1(pcis->n_B - n_vertices, &aux_array1));
5093     PetscCall(PetscMalloc1(pcis->n_B - n_vertices, &aux_array2));
5094     PetscCall(ISGetIndices(pcis->is_I_local, (const PetscInt **)&is_indices));
5095     for (i = 0; i < n_D; i++) PetscCall(PetscBTSet(bitmask, is_indices[i]));
5096     PetscCall(ISRestoreIndices(pcis->is_I_local, (const PetscInt **)&is_indices));
5097     for (i = 0, j = 0; i < n_R; i++) {
5098       if (!PetscBTLookup(bitmask, idx_R_local[i])) aux_array1[j++] = i;
5099     }
5100     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, j, aux_array1, PETSC_OWN_POINTER, &is_aux1));
5101     PetscCall(ISGetIndices(pcis->is_B_local, (const PetscInt **)&is_indices));
5102     for (i = 0, j = 0; i < n_B; i++) {
5103       if (!PetscBTLookup(bitmask, is_indices[i])) aux_array2[j++] = i;
5104     }
5105     PetscCall(ISRestoreIndices(pcis->is_B_local, (const PetscInt **)&is_indices));
5106     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, j, aux_array2, PETSC_OWN_POINTER, &is_aux2));
5107     PetscCall(VecScatterCreate(pcbddc->vec1_R, is_aux1, pcis->vec1_B, is_aux2, &pcbddc->R_to_B));
5108     PetscCall(ISDestroy(&is_aux1));
5109     PetscCall(ISDestroy(&is_aux2));
5110 
5111     if (pcbddc->switch_static || pcbddc->dbg_flag) {
5112       PetscCall(PetscMalloc1(n_D, &aux_array1));
5113       for (i = 0, j = 0; i < n_R; i++) {
5114         if (PetscBTLookup(bitmask, idx_R_local[i])) aux_array1[j++] = i;
5115       }
5116       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, j, aux_array1, PETSC_OWN_POINTER, &is_aux1));
5117       PetscCall(VecScatterCreate(pcbddc->vec1_R, is_aux1, pcis->vec1_D, (IS)0, &pcbddc->R_to_D));
5118       PetscCall(ISDestroy(&is_aux1));
5119     }
5120     PetscCall(PetscBTDestroy(&bitmask));
5121     PetscCall(ISRestoreIndices(pcbddc->is_R_local, (const PetscInt **)&idx_R_local));
5122   } else {
5123     PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
5124     IS                 tis;
5125     PetscInt           schur_size;
5126 
5127     PetscCall(ISGetLocalSize(reuse_solver->is_B, &schur_size));
5128     PetscCall(ISCreateStride(PETSC_COMM_SELF, schur_size, n_D, 1, &tis));
5129     PetscCall(VecScatterCreate(pcbddc->vec1_R, tis, pcis->vec1_B, reuse_solver->is_B, &pcbddc->R_to_B));
5130     PetscCall(ISDestroy(&tis));
5131     if (pcbddc->switch_static || pcbddc->dbg_flag) {
5132       PetscCall(ISCreateStride(PETSC_COMM_SELF, n_D, 0, 1, &tis));
5133       PetscCall(VecScatterCreate(pcbddc->vec1_R, tis, pcis->vec1_D, (IS)0, &pcbddc->R_to_D));
5134       PetscCall(ISDestroy(&tis));
5135     }
5136   }
5137   PetscFunctionReturn(0);
5138 }
5139 
5140 static PetscErrorCode MatNullSpacePropagateAny_Private(Mat A, IS is, Mat B)
5141 {
5142   MatNullSpace   NullSpace;
5143   Mat            dmat;
5144   const Vec     *nullvecs;
5145   Vec            v, v2, *nullvecs2;
5146   VecScatter     sct = NULL;
5147   PetscContainer c;
5148   PetscScalar   *ddata;
5149   PetscInt       k, nnsp_size, bsiz, bsiz2, n, N, bs;
5150   PetscBool      nnsp_has_cnst;
5151 
5152   PetscFunctionBegin;
5153   if (!is && !B) { /* MATIS */
5154     Mat_IS *matis = (Mat_IS *)A->data;
5155 
5156     if (!B) PetscCall(MatISGetLocalMat(A, &B));
5157     sct = matis->cctx;
5158     PetscCall(PetscObjectReference((PetscObject)sct));
5159   } else {
5160     PetscCall(MatGetNullSpace(B, &NullSpace));
5161     if (!NullSpace) PetscCall(MatGetNearNullSpace(B, &NullSpace));
5162     if (NullSpace) PetscFunctionReturn(0);
5163   }
5164   PetscCall(MatGetNullSpace(A, &NullSpace));
5165   if (!NullSpace) PetscCall(MatGetNearNullSpace(A, &NullSpace));
5166   if (!NullSpace) PetscFunctionReturn(0);
5167 
5168   PetscCall(MatCreateVecs(A, &v, NULL));
5169   PetscCall(MatCreateVecs(B, &v2, NULL));
5170   if (!sct) PetscCall(VecScatterCreate(v, is, v2, NULL, &sct));
5171   PetscCall(MatNullSpaceGetVecs(NullSpace, &nnsp_has_cnst, &nnsp_size, (const Vec **)&nullvecs));
5172   bsiz = bsiz2 = nnsp_size + !!nnsp_has_cnst;
5173   PetscCall(PetscMalloc1(bsiz, &nullvecs2));
5174   PetscCall(VecGetBlockSize(v2, &bs));
5175   PetscCall(VecGetSize(v2, &N));
5176   PetscCall(VecGetLocalSize(v2, &n));
5177   PetscCall(PetscMalloc1(n * bsiz, &ddata));
5178   for (k = 0; k < nnsp_size; k++) {
5179     PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), bs, n, N, ddata + n * k, &nullvecs2[k]));
5180     PetscCall(VecScatterBegin(sct, nullvecs[k], nullvecs2[k], INSERT_VALUES, SCATTER_FORWARD));
5181     PetscCall(VecScatterEnd(sct, nullvecs[k], nullvecs2[k], INSERT_VALUES, SCATTER_FORWARD));
5182   }
5183   if (nnsp_has_cnst) {
5184     PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), bs, n, N, ddata + n * nnsp_size, &nullvecs2[nnsp_size]));
5185     PetscCall(VecSet(nullvecs2[nnsp_size], 1.0));
5186   }
5187   PetscCall(PCBDDCOrthonormalizeVecs(&bsiz2, nullvecs2));
5188   PetscCall(MatNullSpaceCreate(PetscObjectComm((PetscObject)B), PETSC_FALSE, bsiz2, nullvecs2, &NullSpace));
5189 
5190   PetscCall(MatCreateDense(PetscObjectComm((PetscObject)B), n, PETSC_DECIDE, N, bsiz2, ddata, &dmat));
5191   PetscCall(PetscContainerCreate(PetscObjectComm((PetscObject)B), &c));
5192   PetscCall(PetscContainerSetPointer(c, ddata));
5193   PetscCall(PetscContainerSetUserDestroy(c, PetscContainerUserDestroyDefault));
5194   PetscCall(PetscObjectCompose((PetscObject)dmat, "_PBDDC_Null_dmat_arr", (PetscObject)c));
5195   PetscCall(PetscContainerDestroy(&c));
5196   PetscCall(PetscObjectCompose((PetscObject)NullSpace, "_PBDDC_Null_dmat", (PetscObject)dmat));
5197   PetscCall(MatDestroy(&dmat));
5198 
5199   for (k = 0; k < bsiz; k++) PetscCall(VecDestroy(&nullvecs2[k]));
5200   PetscCall(PetscFree(nullvecs2));
5201   PetscCall(MatSetNearNullSpace(B, NullSpace));
5202   PetscCall(MatNullSpaceDestroy(&NullSpace));
5203   PetscCall(VecDestroy(&v));
5204   PetscCall(VecDestroy(&v2));
5205   PetscCall(VecScatterDestroy(&sct));
5206   PetscFunctionReturn(0);
5207 }
5208 
5209 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann)
5210 {
5211   PC_BDDC     *pcbddc = (PC_BDDC *)pc->data;
5212   PC_IS       *pcis   = (PC_IS *)pc->data;
5213   PC           pc_temp;
5214   Mat          A_RR;
5215   MatNullSpace nnsp;
5216   MatReuse     reuse;
5217   PetscScalar  m_one = -1.0;
5218   PetscReal    value;
5219   PetscInt     n_D, n_R;
5220   PetscBool    issbaij, opts, isset, issym;
5221   void (*f)(void) = NULL;
5222   char   dir_prefix[256], neu_prefix[256], str_level[16];
5223   size_t len;
5224 
5225   PetscFunctionBegin;
5226   PetscCall(PetscLogEventBegin(PC_BDDC_LocalSolvers[pcbddc->current_level], pc, 0, 0, 0));
5227   /* approximate solver, propagate NearNullSpace if needed */
5228   if (!pc->setupcalled && (pcbddc->NullSpace_corr[0] || pcbddc->NullSpace_corr[2])) {
5229     MatNullSpace gnnsp1, gnnsp2;
5230     PetscBool    lhas, ghas;
5231 
5232     PetscCall(MatGetNearNullSpace(pcbddc->local_mat, &nnsp));
5233     PetscCall(MatGetNearNullSpace(pc->pmat, &gnnsp1));
5234     PetscCall(MatGetNullSpace(pc->pmat, &gnnsp2));
5235     lhas = nnsp ? PETSC_TRUE : PETSC_FALSE;
5236     PetscCall(MPIU_Allreduce(&lhas, &ghas, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)pc)));
5237     if (!ghas && (gnnsp1 || gnnsp2)) PetscCall(MatNullSpacePropagateAny_Private(pc->pmat, NULL, NULL));
5238   }
5239 
5240   /* compute prefixes */
5241   PetscCall(PetscStrcpy(dir_prefix, ""));
5242   PetscCall(PetscStrcpy(neu_prefix, ""));
5243   if (!pcbddc->current_level) {
5244     PetscCall(PetscStrncpy(dir_prefix, ((PetscObject)pc)->prefix, sizeof(dir_prefix)));
5245     PetscCall(PetscStrncpy(neu_prefix, ((PetscObject)pc)->prefix, sizeof(neu_prefix)));
5246     PetscCall(PetscStrlcat(dir_prefix, "pc_bddc_dirichlet_", sizeof(dir_prefix)));
5247     PetscCall(PetscStrlcat(neu_prefix, "pc_bddc_neumann_", sizeof(neu_prefix)));
5248   } else {
5249     PetscCall(PetscSNPrintf(str_level, sizeof(str_level), "l%d_", (int)(pcbddc->current_level)));
5250     PetscCall(PetscStrlen(((PetscObject)pc)->prefix, &len));
5251     len -= 15;                                /* remove "pc_bddc_coarse_" */
5252     if (pcbddc->current_level > 1) len -= 3;  /* remove "lX_" with X level number */
5253     if (pcbddc->current_level > 10) len -= 1; /* remove another char from level number */
5254     /* Nonstandard use of PetscStrncpy() to only copy a portion of the input string */
5255     PetscCall(PetscStrncpy(dir_prefix, ((PetscObject)pc)->prefix, len + 1));
5256     PetscCall(PetscStrncpy(neu_prefix, ((PetscObject)pc)->prefix, len + 1));
5257     PetscCall(PetscStrlcat(dir_prefix, "pc_bddc_dirichlet_", sizeof(dir_prefix)));
5258     PetscCall(PetscStrlcat(neu_prefix, "pc_bddc_neumann_", sizeof(neu_prefix)));
5259     PetscCall(PetscStrlcat(dir_prefix, str_level, sizeof(dir_prefix)));
5260     PetscCall(PetscStrlcat(neu_prefix, str_level, sizeof(neu_prefix)));
5261   }
5262 
5263   /* DIRICHLET PROBLEM */
5264   if (dirichlet) {
5265     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
5266     if (pcbddc->benign_n && !pcbddc->benign_change_explicit) {
5267       PetscCheck(sub_schurs && sub_schurs->reuse_solver, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not yet implemented");
5268       if (pcbddc->dbg_flag) {
5269         Mat A_IIn;
5270 
5271         PetscCall(PCBDDCBenignProject(pc, pcis->is_I_local, pcis->is_I_local, &A_IIn));
5272         PetscCall(MatDestroy(&pcis->A_II));
5273         pcis->A_II = A_IIn;
5274       }
5275     }
5276     PetscCall(MatIsSymmetricKnown(pcbddc->local_mat, &isset, &issym));
5277     if (isset) PetscCall(MatSetOption(pcis->A_II, MAT_SYMMETRIC, issym));
5278 
5279     /* Matrix for Dirichlet problem is pcis->A_II */
5280     n_D  = pcis->n - pcis->n_B;
5281     opts = PETSC_FALSE;
5282     if (!pcbddc->ksp_D) { /* create object if not yet build */
5283       opts = PETSC_TRUE;
5284       PetscCall(KSPCreate(PETSC_COMM_SELF, &pcbddc->ksp_D));
5285       PetscCall(PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D, (PetscObject)pc, 1));
5286       /* default */
5287       PetscCall(KSPSetType(pcbddc->ksp_D, KSPPREONLY));
5288       PetscCall(KSPSetOptionsPrefix(pcbddc->ksp_D, dir_prefix));
5289       PetscCall(PetscObjectTypeCompare((PetscObject)pcis->pA_II, MATSEQSBAIJ, &issbaij));
5290       PetscCall(KSPGetPC(pcbddc->ksp_D, &pc_temp));
5291       if (issbaij) {
5292         PetscCall(PCSetType(pc_temp, PCCHOLESKY));
5293       } else {
5294         PetscCall(PCSetType(pc_temp, PCLU));
5295       }
5296       PetscCall(KSPSetErrorIfNotConverged(pcbddc->ksp_D, pc->erroriffailure));
5297     }
5298     PetscCall(MatSetOptionsPrefix(pcis->pA_II, ((PetscObject)pcbddc->ksp_D)->prefix));
5299     PetscCall(KSPSetOperators(pcbddc->ksp_D, pcis->A_II, pcis->pA_II));
5300     /* Allow user's customization */
5301     if (opts) PetscCall(KSPSetFromOptions(pcbddc->ksp_D));
5302     PetscCall(MatGetNearNullSpace(pcis->pA_II, &nnsp));
5303     if (pcbddc->NullSpace_corr[0] && !nnsp) { /* approximate solver, propagate NearNullSpace */
5304       PetscCall(MatNullSpacePropagateAny_Private(pcbddc->local_mat, pcis->is_I_local, pcis->pA_II));
5305     }
5306     PetscCall(MatGetNearNullSpace(pcis->pA_II, &nnsp));
5307     PetscCall(KSPGetPC(pcbddc->ksp_D, &pc_temp));
5308     PetscCall(PetscObjectQueryFunction((PetscObject)pc_temp, "PCSetCoordinates_C", &f));
5309     if (f && pcbddc->mat_graph->cloc && !nnsp) {
5310       PetscReal      *coords = pcbddc->mat_graph->coords, *scoords;
5311       const PetscInt *idxs;
5312       PetscInt        cdim = pcbddc->mat_graph->cdim, nl, i, d;
5313 
5314       PetscCall(ISGetLocalSize(pcis->is_I_local, &nl));
5315       PetscCall(ISGetIndices(pcis->is_I_local, &idxs));
5316       PetscCall(PetscMalloc1(nl * cdim, &scoords));
5317       for (i = 0; i < nl; i++) {
5318         for (d = 0; d < cdim; d++) scoords[i * cdim + d] = coords[idxs[i] * cdim + d];
5319       }
5320       PetscCall(ISRestoreIndices(pcis->is_I_local, &idxs));
5321       PetscCall(PCSetCoordinates(pc_temp, cdim, nl, scoords));
5322       PetscCall(PetscFree(scoords));
5323     }
5324     if (sub_schurs && sub_schurs->reuse_solver) {
5325       PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
5326 
5327       PetscCall(KSPSetPC(pcbddc->ksp_D, reuse_solver->interior_solver));
5328     }
5329 
5330     /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
5331     if (!n_D) {
5332       PetscCall(KSPGetPC(pcbddc->ksp_D, &pc_temp));
5333       PetscCall(PCSetType(pc_temp, PCNONE));
5334     }
5335     PetscCall(KSPSetUp(pcbddc->ksp_D));
5336     /* set ksp_D into pcis data */
5337     PetscCall(PetscObjectReference((PetscObject)pcbddc->ksp_D));
5338     PetscCall(KSPDestroy(&pcis->ksp_D));
5339     pcis->ksp_D = pcbddc->ksp_D;
5340   }
5341 
5342   /* NEUMANN PROBLEM */
5343   A_RR = NULL;
5344   if (neumann) {
5345     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
5346     PetscInt        ibs, mbs;
5347     PetscBool       issbaij, reuse_neumann_solver, isset, issym;
5348     Mat_IS         *matis = (Mat_IS *)pc->pmat->data;
5349 
5350     reuse_neumann_solver = PETSC_FALSE;
5351     if (sub_schurs && sub_schurs->reuse_solver) {
5352       IS iP;
5353 
5354       reuse_neumann_solver = PETSC_TRUE;
5355       PetscCall(PetscObjectQuery((PetscObject)sub_schurs->A, "__KSPFETIDP_iP", (PetscObject *)&iP));
5356       if (iP) reuse_neumann_solver = PETSC_FALSE;
5357     }
5358     /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */
5359     PetscCall(ISGetSize(pcbddc->is_R_local, &n_R));
5360     if (pcbddc->ksp_R) { /* already created ksp */
5361       PetscInt nn_R;
5362       PetscCall(KSPGetOperators(pcbddc->ksp_R, NULL, &A_RR));
5363       PetscCall(PetscObjectReference((PetscObject)A_RR));
5364       PetscCall(MatGetSize(A_RR, &nn_R, NULL));
5365       if (nn_R != n_R) { /* old ksp is not reusable, so reset it */
5366         PetscCall(KSPReset(pcbddc->ksp_R));
5367         PetscCall(MatDestroy(&A_RR));
5368         reuse = MAT_INITIAL_MATRIX;
5369       } else {                                /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */
5370         if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */
5371           PetscCall(MatDestroy(&A_RR));
5372           reuse = MAT_INITIAL_MATRIX;
5373         } else { /* safe to reuse the matrix */
5374           reuse = MAT_REUSE_MATRIX;
5375         }
5376       }
5377       /* last check */
5378       if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
5379         PetscCall(MatDestroy(&A_RR));
5380         reuse = MAT_INITIAL_MATRIX;
5381       }
5382     } else { /* first time, so we need to create the matrix */
5383       reuse = MAT_INITIAL_MATRIX;
5384     }
5385     /* convert pcbddc->local_mat if needed later in PCBDDCSetUpCorrection
5386        TODO: Get Rid of these conversions */
5387     PetscCall(MatGetBlockSize(pcbddc->local_mat, &mbs));
5388     PetscCall(ISGetBlockSize(pcbddc->is_R_local, &ibs));
5389     PetscCall(PetscObjectTypeCompare((PetscObject)pcbddc->local_mat, MATSEQSBAIJ, &issbaij));
5390     if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */
5391       if (matis->A == pcbddc->local_mat) {
5392         PetscCall(MatDestroy(&pcbddc->local_mat));
5393         PetscCall(MatConvert(matis->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &pcbddc->local_mat));
5394       } else {
5395         PetscCall(MatConvert(pcbddc->local_mat, MATSEQAIJ, MAT_INPLACE_MATRIX, &pcbddc->local_mat));
5396       }
5397     } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */
5398       if (matis->A == pcbddc->local_mat) {
5399         PetscCall(MatDestroy(&pcbddc->local_mat));
5400         PetscCall(MatConvert(matis->A, mbs > 1 ? MATSEQBAIJ : MATSEQAIJ, MAT_INITIAL_MATRIX, &pcbddc->local_mat));
5401       } else {
5402         PetscCall(MatConvert(pcbddc->local_mat, mbs > 1 ? MATSEQBAIJ : MATSEQAIJ, MAT_INPLACE_MATRIX, &pcbddc->local_mat));
5403       }
5404     }
5405     /* extract A_RR */
5406     if (reuse_neumann_solver) {
5407       PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
5408 
5409       if (pcbddc->dbg_flag) { /* we need A_RR to test the solver later */
5410         PetscCall(MatDestroy(&A_RR));
5411         if (reuse_solver->benign_n) { /* we are not using the explicit change of basis on the pressures */
5412           PetscCall(PCBDDCBenignProject(pc, pcbddc->is_R_local, pcbddc->is_R_local, &A_RR));
5413         } else {
5414           PetscCall(MatCreateSubMatrix(pcbddc->local_mat, pcbddc->is_R_local, pcbddc->is_R_local, MAT_INITIAL_MATRIX, &A_RR));
5415         }
5416       } else {
5417         PetscCall(MatDestroy(&A_RR));
5418         PetscCall(PCGetOperators(reuse_solver->correction_solver, &A_RR, NULL));
5419         PetscCall(PetscObjectReference((PetscObject)A_RR));
5420       }
5421     } else { /* we have to build the neumann solver, so we need to extract the relevant matrix */
5422       PetscCall(MatCreateSubMatrix(pcbddc->local_mat, pcbddc->is_R_local, pcbddc->is_R_local, reuse, &A_RR));
5423     }
5424     PetscCall(MatIsSymmetricKnown(pcbddc->local_mat, &isset, &issym));
5425     if (isset) PetscCall(MatSetOption(A_RR, MAT_SYMMETRIC, issym));
5426     opts = PETSC_FALSE;
5427     if (!pcbddc->ksp_R) { /* create object if not present */
5428       opts = PETSC_TRUE;
5429       PetscCall(KSPCreate(PETSC_COMM_SELF, &pcbddc->ksp_R));
5430       PetscCall(PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R, (PetscObject)pc, 1));
5431       /* default */
5432       PetscCall(KSPSetType(pcbddc->ksp_R, KSPPREONLY));
5433       PetscCall(KSPSetOptionsPrefix(pcbddc->ksp_R, neu_prefix));
5434       PetscCall(KSPGetPC(pcbddc->ksp_R, &pc_temp));
5435       PetscCall(PetscObjectTypeCompare((PetscObject)A_RR, MATSEQSBAIJ, &issbaij));
5436       if (issbaij) {
5437         PetscCall(PCSetType(pc_temp, PCCHOLESKY));
5438       } else {
5439         PetscCall(PCSetType(pc_temp, PCLU));
5440       }
5441       PetscCall(KSPSetErrorIfNotConverged(pcbddc->ksp_R, pc->erroriffailure));
5442     }
5443     PetscCall(KSPSetOperators(pcbddc->ksp_R, A_RR, A_RR));
5444     PetscCall(MatSetOptionsPrefix(A_RR, ((PetscObject)pcbddc->ksp_R)->prefix));
5445     if (opts) { /* Allow user's customization once */
5446       PetscCall(KSPSetFromOptions(pcbddc->ksp_R));
5447     }
5448     PetscCall(MatGetNearNullSpace(A_RR, &nnsp));
5449     if (pcbddc->NullSpace_corr[2] && !nnsp) { /* approximate solver, propagate NearNullSpace */
5450       PetscCall(MatNullSpacePropagateAny_Private(pcbddc->local_mat, pcbddc->is_R_local, A_RR));
5451     }
5452     PetscCall(MatGetNearNullSpace(A_RR, &nnsp));
5453     PetscCall(KSPGetPC(pcbddc->ksp_R, &pc_temp));
5454     PetscCall(PetscObjectQueryFunction((PetscObject)pc_temp, "PCSetCoordinates_C", &f));
5455     if (f && pcbddc->mat_graph->cloc && !nnsp) {
5456       PetscReal      *coords = pcbddc->mat_graph->coords, *scoords;
5457       const PetscInt *idxs;
5458       PetscInt        cdim = pcbddc->mat_graph->cdim, nl, i, d;
5459 
5460       PetscCall(ISGetLocalSize(pcbddc->is_R_local, &nl));
5461       PetscCall(ISGetIndices(pcbddc->is_R_local, &idxs));
5462       PetscCall(PetscMalloc1(nl * cdim, &scoords));
5463       for (i = 0; i < nl; i++) {
5464         for (d = 0; d < cdim; d++) scoords[i * cdim + d] = coords[idxs[i] * cdim + d];
5465       }
5466       PetscCall(ISRestoreIndices(pcbddc->is_R_local, &idxs));
5467       PetscCall(PCSetCoordinates(pc_temp, cdim, nl, scoords));
5468       PetscCall(PetscFree(scoords));
5469     }
5470 
5471     /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
5472     if (!n_R) {
5473       PetscCall(KSPGetPC(pcbddc->ksp_R, &pc_temp));
5474       PetscCall(PCSetType(pc_temp, PCNONE));
5475     }
5476     /* Reuse solver if it is present */
5477     if (reuse_neumann_solver) {
5478       PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
5479 
5480       PetscCall(KSPSetPC(pcbddc->ksp_R, reuse_solver->correction_solver));
5481     }
5482     PetscCall(KSPSetUp(pcbddc->ksp_R));
5483   }
5484 
5485   if (pcbddc->dbg_flag) {
5486     PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
5487     PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer));
5488     PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n"));
5489   }
5490   PetscCall(PetscLogEventEnd(PC_BDDC_LocalSolvers[pcbddc->current_level], pc, 0, 0, 0));
5491 
5492   /* adapt Dirichlet and Neumann solvers if a nullspace correction has been requested */
5493   if (pcbddc->NullSpace_corr[0]) PetscCall(PCBDDCSetUseExactDirichlet(pc, PETSC_FALSE));
5494   if (dirichlet && pcbddc->NullSpace_corr[0] && !pcbddc->switch_static) PetscCall(PCBDDCNullSpaceAssembleCorrection(pc, PETSC_TRUE, pcbddc->NullSpace_corr[1]));
5495   if (neumann && pcbddc->NullSpace_corr[2]) PetscCall(PCBDDCNullSpaceAssembleCorrection(pc, PETSC_FALSE, pcbddc->NullSpace_corr[3]));
5496   /* check Dirichlet and Neumann solvers */
5497   if (pcbddc->dbg_flag) {
5498     if (dirichlet) { /* Dirichlet */
5499       PetscCall(VecSetRandom(pcis->vec1_D, NULL));
5500       PetscCall(MatMult(pcis->A_II, pcis->vec1_D, pcis->vec2_D));
5501       PetscCall(KSPSolve(pcbddc->ksp_D, pcis->vec2_D, pcis->vec2_D));
5502       PetscCall(KSPCheckSolve(pcbddc->ksp_D, pc, pcis->vec2_D));
5503       PetscCall(VecAXPY(pcis->vec1_D, m_one, pcis->vec2_D));
5504       PetscCall(VecNorm(pcis->vec1_D, NORM_INFINITY, &value));
5505       PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n", PetscGlobalRank, ((PetscObject)(pcbddc->ksp_D))->prefix, (double)value));
5506       PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
5507     }
5508     if (neumann) { /* Neumann */
5509       PetscCall(VecSetRandom(pcbddc->vec1_R, NULL));
5510       PetscCall(MatMult(A_RR, pcbddc->vec1_R, pcbddc->vec2_R));
5511       PetscCall(KSPSolve(pcbddc->ksp_R, pcbddc->vec2_R, pcbddc->vec2_R));
5512       PetscCall(KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec2_R));
5513       PetscCall(VecAXPY(pcbddc->vec1_R, m_one, pcbddc->vec2_R));
5514       PetscCall(VecNorm(pcbddc->vec1_R, NORM_INFINITY, &value));
5515       PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n", PetscGlobalRank, ((PetscObject)(pcbddc->ksp_R))->prefix, (double)value));
5516       PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
5517     }
5518   }
5519   /* free Neumann problem's matrix */
5520   PetscCall(MatDestroy(&A_RR));
5521   PetscFunctionReturn(0);
5522 }
5523 
5524 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose)
5525 {
5526   PC_BDDC        *pcbddc       = (PC_BDDC *)(pc->data);
5527   PCBDDCSubSchurs sub_schurs   = pcbddc->sub_schurs;
5528   PetscBool       reuse_solver = sub_schurs ? (sub_schurs->reuse_solver ? PETSC_TRUE : PETSC_FALSE) : PETSC_FALSE;
5529 
5530   PetscFunctionBegin;
5531   if (!reuse_solver) PetscCall(VecSet(pcbddc->vec1_R, 0.));
5532   if (!pcbddc->switch_static) {
5533     if (applytranspose && pcbddc->local_auxmat1) {
5534       PetscCall(MatMultTranspose(pcbddc->local_auxmat2, inout_B, pcbddc->vec1_C));
5535       PetscCall(MatMultTransposeAdd(pcbddc->local_auxmat1, pcbddc->vec1_C, inout_B, inout_B));
5536     }
5537     if (!reuse_solver) {
5538       PetscCall(VecScatterBegin(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE));
5539       PetscCall(VecScatterEnd(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE));
5540     } else {
5541       PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
5542 
5543       PetscCall(VecScatterBegin(reuse_solver->correction_scatter_B, inout_B, reuse_solver->rhs_B, INSERT_VALUES, SCATTER_FORWARD));
5544       PetscCall(VecScatterEnd(reuse_solver->correction_scatter_B, inout_B, reuse_solver->rhs_B, INSERT_VALUES, SCATTER_FORWARD));
5545     }
5546   } else {
5547     PetscCall(VecScatterBegin(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE));
5548     PetscCall(VecScatterEnd(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE));
5549     PetscCall(VecScatterBegin(pcbddc->R_to_D, inout_D, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE));
5550     PetscCall(VecScatterEnd(pcbddc->R_to_D, inout_D, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE));
5551     if (applytranspose && pcbddc->local_auxmat1) {
5552       PetscCall(MatMultTranspose(pcbddc->local_auxmat2, pcbddc->vec1_R, pcbddc->vec1_C));
5553       PetscCall(MatMultTransposeAdd(pcbddc->local_auxmat1, pcbddc->vec1_C, inout_B, inout_B));
5554       PetscCall(VecScatterBegin(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE));
5555       PetscCall(VecScatterEnd(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE));
5556     }
5557   }
5558   PetscCall(PetscLogEventBegin(PC_BDDC_Solves[pcbddc->current_level][1], pc, 0, 0, 0));
5559   if (!reuse_solver || pcbddc->switch_static) {
5560     if (applytranspose) {
5561       PetscCall(KSPSolveTranspose(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec1_R));
5562     } else {
5563       PetscCall(KSPSolve(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec1_R));
5564     }
5565     PetscCall(KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec1_R));
5566   } else {
5567     PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
5568 
5569     if (applytranspose) {
5570       PetscCall(MatFactorSolveSchurComplementTranspose(reuse_solver->F, reuse_solver->rhs_B, reuse_solver->sol_B));
5571     } else {
5572       PetscCall(MatFactorSolveSchurComplement(reuse_solver->F, reuse_solver->rhs_B, reuse_solver->sol_B));
5573     }
5574   }
5575   PetscCall(PetscLogEventEnd(PC_BDDC_Solves[pcbddc->current_level][1], pc, 0, 0, 0));
5576   PetscCall(VecSet(inout_B, 0.));
5577   if (!pcbddc->switch_static) {
5578     if (!reuse_solver) {
5579       PetscCall(VecScatterBegin(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD));
5580       PetscCall(VecScatterEnd(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD));
5581     } else {
5582       PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
5583 
5584       PetscCall(VecScatterBegin(reuse_solver->correction_scatter_B, reuse_solver->sol_B, inout_B, INSERT_VALUES, SCATTER_REVERSE));
5585       PetscCall(VecScatterEnd(reuse_solver->correction_scatter_B, reuse_solver->sol_B, inout_B, INSERT_VALUES, SCATTER_REVERSE));
5586     }
5587     if (!applytranspose && pcbddc->local_auxmat1) {
5588       PetscCall(MatMult(pcbddc->local_auxmat1, inout_B, pcbddc->vec1_C));
5589       PetscCall(MatMultAdd(pcbddc->local_auxmat2, pcbddc->vec1_C, inout_B, inout_B));
5590     }
5591   } else {
5592     PetscCall(VecScatterBegin(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD));
5593     PetscCall(VecScatterEnd(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD));
5594     PetscCall(VecScatterBegin(pcbddc->R_to_D, pcbddc->vec1_R, inout_D, INSERT_VALUES, SCATTER_FORWARD));
5595     PetscCall(VecScatterEnd(pcbddc->R_to_D, pcbddc->vec1_R, inout_D, INSERT_VALUES, SCATTER_FORWARD));
5596     if (!applytranspose && pcbddc->local_auxmat1) {
5597       PetscCall(MatMult(pcbddc->local_auxmat1, inout_B, pcbddc->vec1_C));
5598       PetscCall(MatMultAdd(pcbddc->local_auxmat2, pcbddc->vec1_C, pcbddc->vec1_R, pcbddc->vec1_R));
5599     }
5600     PetscCall(VecScatterBegin(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD));
5601     PetscCall(VecScatterEnd(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD));
5602     PetscCall(VecScatterBegin(pcbddc->R_to_D, pcbddc->vec1_R, inout_D, INSERT_VALUES, SCATTER_FORWARD));
5603     PetscCall(VecScatterEnd(pcbddc->R_to_D, pcbddc->vec1_R, inout_D, INSERT_VALUES, SCATTER_FORWARD));
5604   }
5605   PetscFunctionReturn(0);
5606 }
5607 
5608 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */
5609 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose)
5610 {
5611   PC_BDDC          *pcbddc = (PC_BDDC *)(pc->data);
5612   PC_IS            *pcis   = (PC_IS *)(pc->data);
5613   const PetscScalar zero   = 0.0;
5614 
5615   PetscFunctionBegin;
5616   /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */
5617   if (!pcbddc->benign_apply_coarse_only) {
5618     if (applytranspose) {
5619       PetscCall(MatMultTranspose(pcbddc->coarse_phi_B, pcis->vec1_B, pcbddc->vec1_P));
5620       if (pcbddc->switch_static) PetscCall(MatMultTransposeAdd(pcbddc->coarse_phi_D, pcis->vec1_D, pcbddc->vec1_P, pcbddc->vec1_P));
5621     } else {
5622       PetscCall(MatMultTranspose(pcbddc->coarse_psi_B, pcis->vec1_B, pcbddc->vec1_P));
5623       if (pcbddc->switch_static) PetscCall(MatMultTransposeAdd(pcbddc->coarse_psi_D, pcis->vec1_D, pcbddc->vec1_P, pcbddc->vec1_P));
5624     }
5625   } else {
5626     PetscCall(VecSet(pcbddc->vec1_P, zero));
5627   }
5628 
5629   /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */
5630   if (pcbddc->benign_n) {
5631     PetscScalar *array;
5632     PetscInt     j;
5633 
5634     PetscCall(VecGetArray(pcbddc->vec1_P, &array));
5635     for (j = 0; j < pcbddc->benign_n; j++) array[pcbddc->local_primal_size - pcbddc->benign_n + j] += pcbddc->benign_p0[j];
5636     PetscCall(VecRestoreArray(pcbddc->vec1_P, &array));
5637   }
5638 
5639   /* start communications from local primal nodes to rhs of coarse solver */
5640   PetscCall(VecSet(pcbddc->coarse_vec, zero));
5641   PetscCall(PCBDDCScatterCoarseDataBegin(pc, ADD_VALUES, SCATTER_FORWARD));
5642   PetscCall(PCBDDCScatterCoarseDataEnd(pc, ADD_VALUES, SCATTER_FORWARD));
5643 
5644   /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */
5645   if (pcbddc->coarse_ksp) {
5646     Mat          coarse_mat;
5647     Vec          rhs, sol;
5648     MatNullSpace nullsp;
5649     PetscBool    isbddc = PETSC_FALSE;
5650 
5651     if (pcbddc->benign_have_null) {
5652       PC coarse_pc;
5653 
5654       PetscCall(KSPGetPC(pcbddc->coarse_ksp, &coarse_pc));
5655       PetscCall(PetscObjectTypeCompare((PetscObject)coarse_pc, PCBDDC, &isbddc));
5656       /* we need to propagate to coarser levels the need for a possible benign correction */
5657       if (isbddc && pcbddc->benign_apply_coarse_only && !pcbddc->benign_skip_correction) {
5658         PC_BDDC *coarsepcbddc                  = (PC_BDDC *)(coarse_pc->data);
5659         coarsepcbddc->benign_skip_correction   = PETSC_FALSE;
5660         coarsepcbddc->benign_apply_coarse_only = PETSC_TRUE;
5661       }
5662     }
5663     PetscCall(KSPGetRhs(pcbddc->coarse_ksp, &rhs));
5664     PetscCall(KSPGetSolution(pcbddc->coarse_ksp, &sol));
5665     PetscCall(KSPGetOperators(pcbddc->coarse_ksp, &coarse_mat, NULL));
5666     if (applytranspose) {
5667       PetscCheck(!pcbddc->benign_apply_coarse_only, PetscObjectComm((PetscObject)pcbddc->coarse_ksp), PETSC_ERR_SUP, "Not yet implemented");
5668       PetscCall(PetscLogEventBegin(PC_BDDC_Solves[pcbddc->current_level][2], pc, 0, 0, 0));
5669       PetscCall(KSPSolveTranspose(pcbddc->coarse_ksp, rhs, sol));
5670       PetscCall(PetscLogEventEnd(PC_BDDC_Solves[pcbddc->current_level][2], pc, 0, 0, 0));
5671       PetscCall(KSPCheckSolve(pcbddc->coarse_ksp, pc, sol));
5672       PetscCall(MatGetTransposeNullSpace(coarse_mat, &nullsp));
5673       if (nullsp) PetscCall(MatNullSpaceRemove(nullsp, sol));
5674     } else {
5675       PetscCall(MatGetNullSpace(coarse_mat, &nullsp));
5676       if (pcbddc->benign_apply_coarse_only && isbddc) { /* need just to apply the coarse preconditioner during presolve */
5677         PC coarse_pc;
5678 
5679         if (nullsp) PetscCall(MatNullSpaceRemove(nullsp, rhs));
5680         PetscCall(KSPGetPC(pcbddc->coarse_ksp, &coarse_pc));
5681         PetscCall(PCPreSolve(coarse_pc, pcbddc->coarse_ksp));
5682         PetscCall(PCBDDCBenignRemoveInterior(coarse_pc, rhs, sol));
5683         PetscCall(PCPostSolve(coarse_pc, pcbddc->coarse_ksp));
5684       } else {
5685         PetscCall(PetscLogEventBegin(PC_BDDC_Solves[pcbddc->current_level][2], pc, 0, 0, 0));
5686         PetscCall(KSPSolve(pcbddc->coarse_ksp, rhs, sol));
5687         PetscCall(PetscLogEventEnd(PC_BDDC_Solves[pcbddc->current_level][2], pc, 0, 0, 0));
5688         PetscCall(KSPCheckSolve(pcbddc->coarse_ksp, pc, sol));
5689         if (nullsp) PetscCall(MatNullSpaceRemove(nullsp, sol));
5690       }
5691     }
5692     /* we don't need the benign correction at coarser levels anymore */
5693     if (pcbddc->benign_have_null && isbddc) {
5694       PC       coarse_pc;
5695       PC_BDDC *coarsepcbddc;
5696 
5697       PetscCall(KSPGetPC(pcbddc->coarse_ksp, &coarse_pc));
5698       coarsepcbddc                           = (PC_BDDC *)(coarse_pc->data);
5699       coarsepcbddc->benign_skip_correction   = PETSC_TRUE;
5700       coarsepcbddc->benign_apply_coarse_only = PETSC_FALSE;
5701     }
5702   }
5703 
5704   /* Local solution on R nodes */
5705   if (pcis->n && !pcbddc->benign_apply_coarse_only) PetscCall(PCBDDCSolveSubstructureCorrection(pc, pcis->vec1_B, pcis->vec1_D, applytranspose));
5706   /* communications from coarse sol to local primal nodes */
5707   PetscCall(PCBDDCScatterCoarseDataBegin(pc, INSERT_VALUES, SCATTER_REVERSE));
5708   PetscCall(PCBDDCScatterCoarseDataEnd(pc, INSERT_VALUES, SCATTER_REVERSE));
5709 
5710   /* Sum contributions from the two levels */
5711   if (!pcbddc->benign_apply_coarse_only) {
5712     if (applytranspose) {
5713       PetscCall(MatMultAdd(pcbddc->coarse_psi_B, pcbddc->vec1_P, pcis->vec1_B, pcis->vec1_B));
5714       if (pcbddc->switch_static) PetscCall(MatMultAdd(pcbddc->coarse_psi_D, pcbddc->vec1_P, pcis->vec1_D, pcis->vec1_D));
5715     } else {
5716       PetscCall(MatMultAdd(pcbddc->coarse_phi_B, pcbddc->vec1_P, pcis->vec1_B, pcis->vec1_B));
5717       if (pcbddc->switch_static) PetscCall(MatMultAdd(pcbddc->coarse_phi_D, pcbddc->vec1_P, pcis->vec1_D, pcis->vec1_D));
5718     }
5719     /* store p0 */
5720     if (pcbddc->benign_n) {
5721       PetscScalar *array;
5722       PetscInt     j;
5723 
5724       PetscCall(VecGetArray(pcbddc->vec1_P, &array));
5725       for (j = 0; j < pcbddc->benign_n; j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size - pcbddc->benign_n + j];
5726       PetscCall(VecRestoreArray(pcbddc->vec1_P, &array));
5727     }
5728   } else { /* expand the coarse solution */
5729     if (applytranspose) {
5730       PetscCall(MatMult(pcbddc->coarse_psi_B, pcbddc->vec1_P, pcis->vec1_B));
5731     } else {
5732       PetscCall(MatMult(pcbddc->coarse_phi_B, pcbddc->vec1_P, pcis->vec1_B));
5733     }
5734   }
5735   PetscFunctionReturn(0);
5736 }
5737 
5738 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc, InsertMode imode, ScatterMode smode)
5739 {
5740   PC_BDDC           *pcbddc = (PC_BDDC *)(pc->data);
5741   Vec                from, to;
5742   const PetscScalar *array;
5743 
5744   PetscFunctionBegin;
5745   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
5746     from = pcbddc->coarse_vec;
5747     to   = pcbddc->vec1_P;
5748     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
5749       Vec tvec;
5750 
5751       PetscCall(KSPGetRhs(pcbddc->coarse_ksp, &tvec));
5752       PetscCall(VecResetArray(tvec));
5753       PetscCall(KSPGetSolution(pcbddc->coarse_ksp, &tvec));
5754       PetscCall(VecGetArrayRead(tvec, &array));
5755       PetscCall(VecPlaceArray(from, array));
5756       PetscCall(VecRestoreArrayRead(tvec, &array));
5757     }
5758   } else { /* from local to global -> put data in coarse right hand side */
5759     from = pcbddc->vec1_P;
5760     to   = pcbddc->coarse_vec;
5761   }
5762   PetscCall(VecScatterBegin(pcbddc->coarse_loc_to_glob, from, to, imode, smode));
5763   PetscFunctionReturn(0);
5764 }
5765 
5766 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode)
5767 {
5768   PC_BDDC           *pcbddc = (PC_BDDC *)(pc->data);
5769   Vec                from, to;
5770   const PetscScalar *array;
5771 
5772   PetscFunctionBegin;
5773   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
5774     from = pcbddc->coarse_vec;
5775     to   = pcbddc->vec1_P;
5776   } else { /* from local to global -> put data in coarse right hand side */
5777     from = pcbddc->vec1_P;
5778     to   = pcbddc->coarse_vec;
5779   }
5780   PetscCall(VecScatterEnd(pcbddc->coarse_loc_to_glob, from, to, imode, smode));
5781   if (smode == SCATTER_FORWARD) {
5782     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
5783       Vec tvec;
5784 
5785       PetscCall(KSPGetRhs(pcbddc->coarse_ksp, &tvec));
5786       PetscCall(VecGetArrayRead(to, &array));
5787       PetscCall(VecPlaceArray(tvec, array));
5788       PetscCall(VecRestoreArrayRead(to, &array));
5789     }
5790   } else {
5791     if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */
5792       PetscCall(VecResetArray(from));
5793     }
5794   }
5795   PetscFunctionReturn(0);
5796 }
5797 
5798 PetscErrorCode PCBDDCConstraintsSetUp(PC pc)
5799 {
5800   PC_IS   *pcis   = (PC_IS *)(pc->data);
5801   PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
5802   Mat_IS  *matis  = (Mat_IS *)pc->pmat->data;
5803   /* one and zero */
5804   PetscScalar one = 1.0, zero = 0.0;
5805   /* space to store constraints and their local indices */
5806   PetscScalar *constraints_data;
5807   PetscInt    *constraints_idxs, *constraints_idxs_B;
5808   PetscInt    *constraints_idxs_ptr, *constraints_data_ptr;
5809   PetscInt    *constraints_n;
5810   /* iterators */
5811   PetscInt i, j, k, total_counts, total_counts_cc, cum;
5812   /* BLAS integers */
5813   PetscBLASInt lwork, lierr;
5814   PetscBLASInt Blas_N, Blas_M, Blas_K, Blas_one = 1;
5815   PetscBLASInt Blas_LDA, Blas_LDB, Blas_LDC;
5816   /* reuse */
5817   PetscInt  olocal_primal_size, olocal_primal_size_cc;
5818   PetscInt *olocal_primal_ref_node, *olocal_primal_ref_mult;
5819   /* change of basis */
5820   PetscBool qr_needed;
5821   PetscBT   change_basis, qr_needed_idx;
5822   /* auxiliary stuff */
5823   PetscInt *nnz, *is_indices;
5824   PetscInt  ncc;
5825   /* some quantities */
5826   PetscInt  n_vertices, total_primal_vertices, valid_constraints;
5827   PetscInt  size_of_constraint, max_size_of_constraint = 0, max_constraints, temp_constraints;
5828   PetscReal tol; /* tolerance for retaining eigenmodes */
5829 
5830   PetscFunctionBegin;
5831   tol = PetscSqrtReal(PETSC_SMALL);
5832   /* Destroy Mat objects computed previously */
5833   PetscCall(MatDestroy(&pcbddc->ChangeOfBasisMatrix));
5834   PetscCall(MatDestroy(&pcbddc->ConstraintMatrix));
5835   PetscCall(MatDestroy(&pcbddc->switch_static_change));
5836   /* save info on constraints from previous setup (if any) */
5837   olocal_primal_size    = pcbddc->local_primal_size;
5838   olocal_primal_size_cc = pcbddc->local_primal_size_cc;
5839   PetscCall(PetscMalloc2(olocal_primal_size_cc, &olocal_primal_ref_node, olocal_primal_size_cc, &olocal_primal_ref_mult));
5840   PetscCall(PetscArraycpy(olocal_primal_ref_node, pcbddc->local_primal_ref_node, olocal_primal_size_cc));
5841   PetscCall(PetscArraycpy(olocal_primal_ref_mult, pcbddc->local_primal_ref_mult, olocal_primal_size_cc));
5842   PetscCall(PetscFree2(pcbddc->local_primal_ref_node, pcbddc->local_primal_ref_mult));
5843   PetscCall(PetscFree(pcbddc->primal_indices_local_idxs));
5844 
5845   if (!pcbddc->adaptive_selection) {
5846     IS           ISForVertices, *ISForFaces, *ISForEdges;
5847     MatNullSpace nearnullsp;
5848     const Vec   *nearnullvecs;
5849     Vec         *localnearnullsp;
5850     PetscScalar *array;
5851     PetscInt     n_ISForFaces, n_ISForEdges, nnsp_size, o_nf, o_ne;
5852     PetscBool    nnsp_has_cnst;
5853     /* LAPACK working arrays for SVD or POD */
5854     PetscBool    skip_lapack, boolforchange;
5855     PetscScalar *work;
5856     PetscReal   *singular_vals;
5857 #if defined(PETSC_USE_COMPLEX)
5858     PetscReal *rwork;
5859 #endif
5860     PetscScalar *temp_basis = NULL, *correlation_mat = NULL;
5861     PetscBLASInt dummy_int    = 1;
5862     PetscScalar  dummy_scalar = 1.;
5863     PetscBool    use_pod      = PETSC_FALSE;
5864 
5865     /* MKL SVD with same input gives different results on different processes! */
5866 #if defined(PETSC_MISSING_LAPACK_GESVD) || defined(PETSC_HAVE_MKL_LIBS)
5867     use_pod = PETSC_TRUE;
5868 #endif
5869     /* Get index sets for faces, edges and vertices from graph */
5870     PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, &n_ISForFaces, &ISForFaces, &n_ISForEdges, &ISForEdges, &ISForVertices));
5871     o_nf       = n_ISForFaces;
5872     o_ne       = n_ISForEdges;
5873     n_vertices = 0;
5874     if (ISForVertices) PetscCall(ISGetSize(ISForVertices, &n_vertices));
5875     /* print some info */
5876     if (pcbddc->dbg_flag && (!pcbddc->sub_schurs || pcbddc->sub_schurs_rebuild)) {
5877       if (!pcbddc->dbg_viewer) pcbddc->dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pc));
5878       PetscCall(PCBDDCGraphASCIIView(pcbddc->mat_graph, pcbddc->dbg_flag, pcbddc->dbg_viewer));
5879       PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer));
5880       PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "--------------------------------------------------------------\n"));
5881       PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate vertices (%d)\n", PetscGlobalRank, n_vertices, pcbddc->use_vertices));
5882       PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate edges    (%d)\n", PetscGlobalRank, n_ISForEdges, pcbddc->use_edges));
5883       PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate faces    (%d)\n", PetscGlobalRank, n_ISForFaces, pcbddc->use_faces));
5884       PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
5885       PetscCall(PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer));
5886     }
5887 
5888     if (!pcbddc->use_vertices) n_vertices = 0;
5889     if (!pcbddc->use_edges) n_ISForEdges = 0;
5890     if (!pcbddc->use_faces) n_ISForFaces = 0;
5891 
5892     /* check if near null space is attached to global mat */
5893     if (pcbddc->use_nnsp) {
5894       PetscCall(MatGetNearNullSpace(pc->pmat, &nearnullsp));
5895     } else nearnullsp = NULL;
5896 
5897     if (nearnullsp) {
5898       PetscCall(MatNullSpaceGetVecs(nearnullsp, &nnsp_has_cnst, &nnsp_size, &nearnullvecs));
5899       /* remove any stored info */
5900       PetscCall(MatNullSpaceDestroy(&pcbddc->onearnullspace));
5901       PetscCall(PetscFree(pcbddc->onearnullvecs_state));
5902       /* store information for BDDC solver reuse */
5903       PetscCall(PetscObjectReference((PetscObject)nearnullsp));
5904       pcbddc->onearnullspace = nearnullsp;
5905       PetscCall(PetscMalloc1(nnsp_size, &pcbddc->onearnullvecs_state));
5906       for (i = 0; i < nnsp_size; i++) PetscCall(PetscObjectStateGet((PetscObject)nearnullvecs[i], &pcbddc->onearnullvecs_state[i]));
5907     } else { /* if near null space is not provided BDDC uses constants by default */
5908       nnsp_size     = 0;
5909       nnsp_has_cnst = PETSC_TRUE;
5910     }
5911     /* get max number of constraints on a single cc */
5912     max_constraints = nnsp_size;
5913     if (nnsp_has_cnst) max_constraints++;
5914 
5915     /*
5916          Evaluate maximum storage size needed by the procedure
5917          - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]"
5918          - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]"
5919          There can be multiple constraints per connected component
5920                                                                                                                                                            */
5921     ncc = n_vertices + n_ISForFaces + n_ISForEdges;
5922     PetscCall(PetscMalloc3(ncc + 1, &constraints_idxs_ptr, ncc + 1, &constraints_data_ptr, ncc, &constraints_n));
5923 
5924     total_counts = n_ISForFaces + n_ISForEdges;
5925     total_counts *= max_constraints;
5926     total_counts += n_vertices;
5927     PetscCall(PetscBTCreate(total_counts, &change_basis));
5928 
5929     total_counts           = 0;
5930     max_size_of_constraint = 0;
5931     for (i = 0; i < n_ISForEdges + n_ISForFaces; i++) {
5932       IS used_is;
5933       if (i < n_ISForEdges) {
5934         used_is = ISForEdges[i];
5935       } else {
5936         used_is = ISForFaces[i - n_ISForEdges];
5937       }
5938       PetscCall(ISGetSize(used_is, &j));
5939       total_counts += j;
5940       max_size_of_constraint = PetscMax(j, max_size_of_constraint);
5941     }
5942     PetscCall(PetscMalloc3(total_counts * max_constraints + n_vertices, &constraints_data, total_counts + n_vertices, &constraints_idxs, total_counts + n_vertices, &constraints_idxs_B));
5943 
5944     /* get local part of global near null space vectors */
5945     PetscCall(PetscMalloc1(nnsp_size, &localnearnullsp));
5946     for (k = 0; k < nnsp_size; k++) {
5947       PetscCall(VecDuplicate(pcis->vec1_N, &localnearnullsp[k]));
5948       PetscCall(VecScatterBegin(matis->rctx, nearnullvecs[k], localnearnullsp[k], INSERT_VALUES, SCATTER_FORWARD));
5949       PetscCall(VecScatterEnd(matis->rctx, nearnullvecs[k], localnearnullsp[k], INSERT_VALUES, SCATTER_FORWARD));
5950     }
5951 
5952     /* whether or not to skip lapack calls */
5953     skip_lapack = PETSC_TRUE;
5954     if (n_ISForFaces + n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE;
5955 
5956     /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */
5957     if (!skip_lapack) {
5958       PetscScalar temp_work;
5959 
5960       if (use_pod) {
5961         /* Proper Orthogonal Decomposition (POD) using the snapshot method */
5962         PetscCall(PetscMalloc1(max_constraints * max_constraints, &correlation_mat));
5963         PetscCall(PetscMalloc1(max_constraints, &singular_vals));
5964         PetscCall(PetscMalloc1(max_size_of_constraint * max_constraints, &temp_basis));
5965 #if defined(PETSC_USE_COMPLEX)
5966         PetscCall(PetscMalloc1(3 * max_constraints, &rwork));
5967 #endif
5968         /* now we evaluate the optimal workspace using query with lwork=-1 */
5969         PetscCall(PetscBLASIntCast(max_constraints, &Blas_N));
5970         PetscCall(PetscBLASIntCast(max_constraints, &Blas_LDA));
5971         lwork = -1;
5972         PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
5973 #if !defined(PETSC_USE_COMPLEX)
5974         PetscCallBLAS("LAPACKsyev", LAPACKsyev_("V", "U", &Blas_N, correlation_mat, &Blas_LDA, singular_vals, &temp_work, &lwork, &lierr));
5975 #else
5976         PetscCallBLAS("LAPACKsyev", LAPACKsyev_("V", "U", &Blas_N, correlation_mat, &Blas_LDA, singular_vals, &temp_work, &lwork, rwork, &lierr));
5977 #endif
5978         PetscCall(PetscFPTrapPop());
5979         PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in query to SYEV Lapack routine %d", (int)lierr);
5980       } else {
5981 #if !defined(PETSC_MISSING_LAPACK_GESVD)
5982         /* SVD */
5983         PetscInt max_n, min_n;
5984         max_n = max_size_of_constraint;
5985         min_n = max_constraints;
5986         if (max_size_of_constraint < max_constraints) {
5987           min_n = max_size_of_constraint;
5988           max_n = max_constraints;
5989         }
5990         PetscCall(PetscMalloc1(min_n, &singular_vals));
5991   #if defined(PETSC_USE_COMPLEX)
5992         PetscCall(PetscMalloc1(5 * min_n, &rwork));
5993   #endif
5994         /* now we evaluate the optimal workspace using query with lwork=-1 */
5995         lwork = -1;
5996         PetscCall(PetscBLASIntCast(max_n, &Blas_M));
5997         PetscCall(PetscBLASIntCast(min_n, &Blas_N));
5998         PetscCall(PetscBLASIntCast(max_n, &Blas_LDA));
5999         PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
6000   #if !defined(PETSC_USE_COMPLEX)
6001         PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("O", "N", &Blas_M, &Blas_N, &constraints_data[0], &Blas_LDA, singular_vals, &dummy_scalar, &dummy_int, &dummy_scalar, &dummy_int, &temp_work, &lwork, &lierr));
6002   #else
6003         PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("O", "N", &Blas_M, &Blas_N, &constraints_data[0], &Blas_LDA, singular_vals, &dummy_scalar, &dummy_int, &dummy_scalar, &dummy_int, &temp_work, &lwork, rwork, &lierr));
6004   #endif
6005         PetscCall(PetscFPTrapPop());
6006         PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in query to GESVD Lapack routine %d", (int)lierr);
6007 #else
6008         SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "This should not happen");
6009 #endif /* on missing GESVD */
6010       }
6011       /* Allocate optimal workspace */
6012       PetscCall(PetscBLASIntCast((PetscInt)PetscRealPart(temp_work), &lwork));
6013       PetscCall(PetscMalloc1(lwork, &work));
6014     }
6015     /* Now we can loop on constraining sets */
6016     total_counts            = 0;
6017     constraints_idxs_ptr[0] = 0;
6018     constraints_data_ptr[0] = 0;
6019     /* vertices */
6020     if (n_vertices) {
6021       PetscCall(ISGetIndices(ISForVertices, (const PetscInt **)&is_indices));
6022       PetscCall(PetscArraycpy(constraints_idxs, is_indices, n_vertices));
6023       for (i = 0; i < n_vertices; i++) {
6024         constraints_n[total_counts]            = 1;
6025         constraints_data[total_counts]         = 1.0;
6026         constraints_idxs_ptr[total_counts + 1] = constraints_idxs_ptr[total_counts] + 1;
6027         constraints_data_ptr[total_counts + 1] = constraints_data_ptr[total_counts] + 1;
6028         total_counts++;
6029       }
6030       PetscCall(ISRestoreIndices(ISForVertices, (const PetscInt **)&is_indices));
6031     }
6032 
6033     /* edges and faces */
6034     total_counts_cc = total_counts;
6035     for (ncc = 0; ncc < n_ISForEdges + n_ISForFaces; ncc++) {
6036       IS        used_is;
6037       PetscBool idxs_copied = PETSC_FALSE;
6038 
6039       if (ncc < n_ISForEdges) {
6040         used_is       = ISForEdges[ncc];
6041         boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */
6042       } else {
6043         used_is       = ISForFaces[ncc - n_ISForEdges];
6044         boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */
6045       }
6046       temp_constraints = 0; /* zero the number of constraints I have on this conn comp */
6047 
6048       PetscCall(ISGetSize(used_is, &size_of_constraint));
6049       if (!size_of_constraint) continue;
6050       PetscCall(ISGetIndices(used_is, (const PetscInt **)&is_indices));
6051       /* change of basis should not be performed on local periodic nodes */
6052       if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE;
6053       if (nnsp_has_cnst) {
6054         PetscScalar quad_value;
6055 
6056         PetscCall(PetscArraycpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc], is_indices, size_of_constraint));
6057         idxs_copied = PETSC_TRUE;
6058 
6059         if (!pcbddc->use_nnsp_true) {
6060           quad_value = (PetscScalar)(1.0 / PetscSqrtReal((PetscReal)size_of_constraint));
6061         } else {
6062           quad_value = 1.0;
6063         }
6064         for (j = 0; j < size_of_constraint; j++) constraints_data[constraints_data_ptr[total_counts_cc] + j] = quad_value;
6065         temp_constraints++;
6066         total_counts++;
6067       }
6068       for (k = 0; k < nnsp_size; k++) {
6069         PetscReal    real_value;
6070         PetscScalar *ptr_to_data;
6071 
6072         PetscCall(VecGetArrayRead(localnearnullsp[k], (const PetscScalar **)&array));
6073         ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc] + temp_constraints * size_of_constraint];
6074         for (j = 0; j < size_of_constraint; j++) ptr_to_data[j] = array[is_indices[j]];
6075         PetscCall(VecRestoreArrayRead(localnearnullsp[k], (const PetscScalar **)&array));
6076         /* check if array is null on the connected component */
6077         PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_N));
6078         PetscCallBLAS("BLASasum", real_value = BLASasum_(&Blas_N, ptr_to_data, &Blas_one));
6079         if (real_value > tol * size_of_constraint) { /* keep indices and values */
6080           temp_constraints++;
6081           total_counts++;
6082           if (!idxs_copied) {
6083             PetscCall(PetscArraycpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc], is_indices, size_of_constraint));
6084             idxs_copied = PETSC_TRUE;
6085           }
6086         }
6087       }
6088       PetscCall(ISRestoreIndices(used_is, (const PetscInt **)&is_indices));
6089       valid_constraints = temp_constraints;
6090       if (!pcbddc->use_nnsp_true && temp_constraints) {
6091         if (temp_constraints == 1) { /* just normalize the constraint */
6092           PetscScalar norm, *ptr_to_data;
6093 
6094           ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]];
6095           PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_N));
6096           PetscCallBLAS("BLASdot", norm = BLASdot_(&Blas_N, ptr_to_data, &Blas_one, ptr_to_data, &Blas_one));
6097           norm = 1.0 / PetscSqrtReal(PetscRealPart(norm));
6098           PetscCallBLAS("BLASscal", BLASscal_(&Blas_N, &norm, ptr_to_data, &Blas_one));
6099         } else { /* perform SVD */
6100           PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]];
6101 
6102           if (use_pod) {
6103             /* SVD: Y = U*S*V^H                -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag
6104                POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2)
6105                -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined
6106                   the constraints basis will differ (by a complex factor with absolute value equal to 1)
6107                   from that computed using LAPACKgesvd
6108                -> This is due to a different computation of eigenvectors in LAPACKheev
6109                -> The quality of the POD-computed basis will be the same */
6110             PetscCall(PetscArrayzero(correlation_mat, temp_constraints * temp_constraints));
6111             /* Store upper triangular part of correlation matrix */
6112             PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_N));
6113             PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
6114             for (j = 0; j < temp_constraints; j++) {
6115               for (k = 0; k < j + 1; k++) PetscCallBLAS("BLASdot", correlation_mat[j * temp_constraints + k] = BLASdot_(&Blas_N, ptr_to_data + k * size_of_constraint, &Blas_one, ptr_to_data + j * size_of_constraint, &Blas_one));
6116             }
6117             /* compute eigenvalues and eigenvectors of correlation matrix */
6118             PetscCall(PetscBLASIntCast(temp_constraints, &Blas_N));
6119             PetscCall(PetscBLASIntCast(temp_constraints, &Blas_LDA));
6120 #if !defined(PETSC_USE_COMPLEX)
6121             PetscCallBLAS("LAPACKsyev", LAPACKsyev_("V", "U", &Blas_N, correlation_mat, &Blas_LDA, singular_vals, work, &lwork, &lierr));
6122 #else
6123             PetscCallBLAS("LAPACKsyev", LAPACKsyev_("V", "U", &Blas_N, correlation_mat, &Blas_LDA, singular_vals, work, &lwork, rwork, &lierr));
6124 #endif
6125             PetscCall(PetscFPTrapPop());
6126             PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYEV Lapack routine %d", (int)lierr);
6127             /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */
6128             j = 0;
6129             while (j < temp_constraints && singular_vals[j] / singular_vals[temp_constraints - 1] < tol) j++;
6130             total_counts      = total_counts - j;
6131             valid_constraints = temp_constraints - j;
6132             /* scale and copy POD basis into used quadrature memory */
6133             PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_M));
6134             PetscCall(PetscBLASIntCast(temp_constraints, &Blas_N));
6135             PetscCall(PetscBLASIntCast(temp_constraints, &Blas_K));
6136             PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA));
6137             PetscCall(PetscBLASIntCast(temp_constraints, &Blas_LDB));
6138             PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDC));
6139             if (j < temp_constraints) {
6140               PetscInt ii;
6141               for (k = j; k < temp_constraints; k++) singular_vals[k] = 1.0 / PetscSqrtReal(singular_vals[k]);
6142               PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
6143               PetscCallBLAS("BLASgemm", BLASgemm_("N", "N", &Blas_M, &Blas_N, &Blas_K, &one, ptr_to_data, &Blas_LDA, correlation_mat, &Blas_LDB, &zero, temp_basis, &Blas_LDC));
6144               PetscCall(PetscFPTrapPop());
6145               for (k = 0; k < temp_constraints - j; k++) {
6146                 for (ii = 0; ii < size_of_constraint; ii++) ptr_to_data[k * size_of_constraint + ii] = singular_vals[temp_constraints - 1 - k] * temp_basis[(temp_constraints - 1 - k) * size_of_constraint + ii];
6147               }
6148             }
6149           } else {
6150 #if !defined(PETSC_MISSING_LAPACK_GESVD)
6151             PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_M));
6152             PetscCall(PetscBLASIntCast(temp_constraints, &Blas_N));
6153             PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA));
6154             PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
6155   #if !defined(PETSC_USE_COMPLEX)
6156             PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("O", "N", &Blas_M, &Blas_N, ptr_to_data, &Blas_LDA, singular_vals, &dummy_scalar, &dummy_int, &dummy_scalar, &dummy_int, work, &lwork, &lierr));
6157   #else
6158             PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("O", "N", &Blas_M, &Blas_N, ptr_to_data, &Blas_LDA, singular_vals, &dummy_scalar, &dummy_int, &dummy_scalar, &dummy_int, work, &lwork, rwork, &lierr));
6159   #endif
6160             PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in GESVD Lapack routine %d", (int)lierr);
6161             PetscCall(PetscFPTrapPop());
6162             /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */
6163             k = temp_constraints;
6164             if (k > size_of_constraint) k = size_of_constraint;
6165             j = 0;
6166             while (j < k && singular_vals[k - j - 1] / singular_vals[0] < tol) j++;
6167             valid_constraints = k - j;
6168             total_counts      = total_counts - temp_constraints + valid_constraints;
6169 #else
6170             SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "This should not happen");
6171 #endif /* on missing GESVD */
6172           }
6173         }
6174       }
6175       /* update pointers information */
6176       if (valid_constraints) {
6177         constraints_n[total_counts_cc]            = valid_constraints;
6178         constraints_idxs_ptr[total_counts_cc + 1] = constraints_idxs_ptr[total_counts_cc] + size_of_constraint;
6179         constraints_data_ptr[total_counts_cc + 1] = constraints_data_ptr[total_counts_cc] + size_of_constraint * valid_constraints;
6180         /* set change_of_basis flag */
6181         if (boolforchange) PetscBTSet(change_basis, total_counts_cc);
6182         total_counts_cc++;
6183       }
6184     }
6185     /* free workspace */
6186     if (!skip_lapack) {
6187       PetscCall(PetscFree(work));
6188 #if defined(PETSC_USE_COMPLEX)
6189       PetscCall(PetscFree(rwork));
6190 #endif
6191       PetscCall(PetscFree(singular_vals));
6192       PetscCall(PetscFree(correlation_mat));
6193       PetscCall(PetscFree(temp_basis));
6194     }
6195     for (k = 0; k < nnsp_size; k++) PetscCall(VecDestroy(&localnearnullsp[k]));
6196     PetscCall(PetscFree(localnearnullsp));
6197     /* free index sets of faces, edges and vertices */
6198     PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, &o_nf, &ISForFaces, &o_ne, &ISForEdges, &ISForVertices));
6199   } else {
6200     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
6201 
6202     total_counts = 0;
6203     n_vertices   = 0;
6204     if (sub_schurs->is_vertices && pcbddc->use_vertices) PetscCall(ISGetLocalSize(sub_schurs->is_vertices, &n_vertices));
6205     max_constraints = 0;
6206     total_counts_cc = 0;
6207     for (i = 0; i < sub_schurs->n_subs + n_vertices; i++) {
6208       total_counts += pcbddc->adaptive_constraints_n[i];
6209       if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++;
6210       max_constraints = PetscMax(max_constraints, pcbddc->adaptive_constraints_n[i]);
6211     }
6212     constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr;
6213     constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr;
6214     constraints_idxs     = pcbddc->adaptive_constraints_idxs;
6215     constraints_data     = pcbddc->adaptive_constraints_data;
6216     /* constraints_n differs from pcbddc->adaptive_constraints_n */
6217     PetscCall(PetscMalloc1(total_counts_cc, &constraints_n));
6218     total_counts_cc = 0;
6219     for (i = 0; i < sub_schurs->n_subs + n_vertices; i++) {
6220       if (pcbddc->adaptive_constraints_n[i]) constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i];
6221     }
6222 
6223     max_size_of_constraint = 0;
6224     for (i = 0; i < total_counts_cc; i++) max_size_of_constraint = PetscMax(max_size_of_constraint, constraints_idxs_ptr[i + 1] - constraints_idxs_ptr[i]);
6225     PetscCall(PetscMalloc1(constraints_idxs_ptr[total_counts_cc], &constraints_idxs_B));
6226     /* Change of basis */
6227     PetscCall(PetscBTCreate(total_counts_cc, &change_basis));
6228     if (pcbddc->use_change_of_basis) {
6229       for (i = 0; i < sub_schurs->n_subs; i++) {
6230         if (PetscBTLookup(sub_schurs->is_edge, i) || pcbddc->use_change_on_faces) PetscCall(PetscBTSet(change_basis, i + n_vertices));
6231       }
6232     }
6233   }
6234   pcbddc->local_primal_size = total_counts;
6235   PetscCall(PetscMalloc1(pcbddc->local_primal_size + pcbddc->benign_n, &pcbddc->primal_indices_local_idxs));
6236 
6237   /* map constraints_idxs in boundary numbering */
6238   if (pcbddc->use_change_of_basis) {
6239     PetscCall(ISGlobalToLocalMappingApply(pcis->BtoNmap, IS_GTOLM_DROP, constraints_idxs_ptr[total_counts_cc], constraints_idxs, &i, constraints_idxs_B));
6240     PetscCheck(i == constraints_idxs_ptr[total_counts_cc], PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error in boundary numbering for constraints indices %" PetscInt_FMT " != %" PetscInt_FMT, constraints_idxs_ptr[total_counts_cc], i);
6241   }
6242 
6243   /* Create constraint matrix */
6244   PetscCall(MatCreate(PETSC_COMM_SELF, &pcbddc->ConstraintMatrix));
6245   PetscCall(MatSetType(pcbddc->ConstraintMatrix, MATAIJ));
6246   PetscCall(MatSetSizes(pcbddc->ConstraintMatrix, pcbddc->local_primal_size, pcis->n, pcbddc->local_primal_size, pcis->n));
6247 
6248   /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */
6249   /* determine if a QR strategy is needed for change of basis */
6250   qr_needed = pcbddc->use_qr_single;
6251   PetscCall(PetscBTCreate(total_counts_cc, &qr_needed_idx));
6252   total_primal_vertices        = 0;
6253   pcbddc->local_primal_size_cc = 0;
6254   for (i = 0; i < total_counts_cc; i++) {
6255     size_of_constraint = constraints_idxs_ptr[i + 1] - constraints_idxs_ptr[i];
6256     if (size_of_constraint == 1 && pcbddc->mat_graph->custom_minimal_size) {
6257       pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]];
6258       pcbddc->local_primal_size_cc += 1;
6259     } else if (PetscBTLookup(change_basis, i)) {
6260       for (k = 0; k < constraints_n[i]; k++) pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i] + k];
6261       pcbddc->local_primal_size_cc += constraints_n[i];
6262       if (constraints_n[i] > 1 || pcbddc->use_qr_single) {
6263         PetscBTSet(qr_needed_idx, i);
6264         qr_needed = PETSC_TRUE;
6265       }
6266     } else {
6267       pcbddc->local_primal_size_cc += 1;
6268     }
6269   }
6270   /* note that the local variable n_vertices used below stores the number of pointwise constraints */
6271   pcbddc->n_vertices = total_primal_vertices;
6272   /* permute indices in order to have a sorted set of vertices */
6273   PetscCall(PetscSortInt(total_primal_vertices, pcbddc->primal_indices_local_idxs));
6274   PetscCall(PetscMalloc2(pcbddc->local_primal_size_cc + pcbddc->benign_n, &pcbddc->local_primal_ref_node, pcbddc->local_primal_size_cc + pcbddc->benign_n, &pcbddc->local_primal_ref_mult));
6275   PetscCall(PetscArraycpy(pcbddc->local_primal_ref_node, pcbddc->primal_indices_local_idxs, total_primal_vertices));
6276   for (i = 0; i < total_primal_vertices; i++) pcbddc->local_primal_ref_mult[i] = 1;
6277 
6278   /* nonzero structure of constraint matrix */
6279   /* and get reference dof for local constraints */
6280   PetscCall(PetscMalloc1(pcbddc->local_primal_size, &nnz));
6281   for (i = 0; i < total_primal_vertices; i++) nnz[i] = 1;
6282 
6283   j            = total_primal_vertices;
6284   total_counts = total_primal_vertices;
6285   cum          = total_primal_vertices;
6286   for (i = n_vertices; i < total_counts_cc; i++) {
6287     if (!PetscBTLookup(change_basis, i)) {
6288       pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]];
6289       pcbddc->local_primal_ref_mult[cum] = constraints_n[i];
6290       cum++;
6291       size_of_constraint = constraints_idxs_ptr[i + 1] - constraints_idxs_ptr[i];
6292       for (k = 0; k < constraints_n[i]; k++) {
6293         pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i] + k];
6294         nnz[j + k]                                        = size_of_constraint;
6295       }
6296       j += constraints_n[i];
6297     }
6298   }
6299   PetscCall(MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix, 0, nnz));
6300   PetscCall(MatSetOption(pcbddc->ConstraintMatrix, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE));
6301   PetscCall(PetscFree(nnz));
6302 
6303   /* set values in constraint matrix */
6304   for (i = 0; i < total_primal_vertices; i++) PetscCall(MatSetValue(pcbddc->ConstraintMatrix, i, pcbddc->local_primal_ref_node[i], 1.0, INSERT_VALUES));
6305   total_counts = total_primal_vertices;
6306   for (i = n_vertices; i < total_counts_cc; i++) {
6307     if (!PetscBTLookup(change_basis, i)) {
6308       PetscInt *cols;
6309 
6310       size_of_constraint = constraints_idxs_ptr[i + 1] - constraints_idxs_ptr[i];
6311       cols               = constraints_idxs + constraints_idxs_ptr[i];
6312       for (k = 0; k < constraints_n[i]; k++) {
6313         PetscInt     row = total_counts + k;
6314         PetscScalar *vals;
6315 
6316         vals = constraints_data + constraints_data_ptr[i] + k * size_of_constraint;
6317         PetscCall(MatSetValues(pcbddc->ConstraintMatrix, 1, &row, size_of_constraint, cols, vals, INSERT_VALUES));
6318       }
6319       total_counts += constraints_n[i];
6320     }
6321   }
6322   /* assembling */
6323   PetscCall(MatAssemblyBegin(pcbddc->ConstraintMatrix, MAT_FINAL_ASSEMBLY));
6324   PetscCall(MatAssemblyEnd(pcbddc->ConstraintMatrix, MAT_FINAL_ASSEMBLY));
6325   PetscCall(MatViewFromOptions(pcbddc->ConstraintMatrix, (PetscObject)pc, "-pc_bddc_constraint_mat_view"));
6326 
6327   /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */
6328   if (pcbddc->use_change_of_basis) {
6329     /* dual and primal dofs on a single cc */
6330     PetscInt dual_dofs, primal_dofs;
6331     /* working stuff for GEQRF */
6332     PetscScalar *qr_basis = NULL, *qr_tau = NULL, *qr_work = NULL, lqr_work_t;
6333     PetscBLASInt lqr_work;
6334     /* working stuff for UNGQR */
6335     PetscScalar *gqr_work = NULL, lgqr_work_t = 0.0;
6336     PetscBLASInt lgqr_work;
6337     /* working stuff for TRTRS */
6338     PetscScalar *trs_rhs = NULL;
6339     PetscBLASInt Blas_NRHS;
6340     /* pointers for values insertion into change of basis matrix */
6341     PetscInt    *start_rows, *start_cols;
6342     PetscScalar *start_vals;
6343     /* working stuff for values insertion */
6344     PetscBT   is_primal;
6345     PetscInt *aux_primal_numbering_B;
6346     /* matrix sizes */
6347     PetscInt global_size, local_size;
6348     /* temporary change of basis */
6349     Mat localChangeOfBasisMatrix;
6350     /* extra space for debugging */
6351     PetscScalar *dbg_work = NULL;
6352 
6353     PetscCall(MatCreate(PETSC_COMM_SELF, &localChangeOfBasisMatrix));
6354     PetscCall(MatSetType(localChangeOfBasisMatrix, MATAIJ));
6355     PetscCall(MatSetSizes(localChangeOfBasisMatrix, pcis->n, pcis->n, pcis->n, pcis->n));
6356     /* nonzeros for local mat */
6357     PetscCall(PetscMalloc1(pcis->n, &nnz));
6358     if (!pcbddc->benign_change || pcbddc->fake_change) {
6359       for (i = 0; i < pcis->n; i++) nnz[i] = 1;
6360     } else {
6361       const PetscInt *ii;
6362       PetscInt        n;
6363       PetscBool       flg_row;
6364       PetscCall(MatGetRowIJ(pcbddc->benign_change, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, NULL, &flg_row));
6365       for (i = 0; i < n; i++) nnz[i] = ii[i + 1] - ii[i];
6366       PetscCall(MatRestoreRowIJ(pcbddc->benign_change, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, NULL, &flg_row));
6367     }
6368     for (i = n_vertices; i < total_counts_cc; i++) {
6369       if (PetscBTLookup(change_basis, i)) {
6370         size_of_constraint = constraints_idxs_ptr[i + 1] - constraints_idxs_ptr[i];
6371         if (PetscBTLookup(qr_needed_idx, i)) {
6372           for (j = 0; j < size_of_constraint; j++) nnz[constraints_idxs[constraints_idxs_ptr[i] + j]] = size_of_constraint;
6373         } else {
6374           nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint;
6375           for (j = 1; j < size_of_constraint; j++) nnz[constraints_idxs[constraints_idxs_ptr[i] + j]] = 2;
6376         }
6377       }
6378     }
6379     PetscCall(MatSeqAIJSetPreallocation(localChangeOfBasisMatrix, 0, nnz));
6380     PetscCall(MatSetOption(localChangeOfBasisMatrix, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE));
6381     PetscCall(PetscFree(nnz));
6382     /* Set interior change in the matrix */
6383     if (!pcbddc->benign_change || pcbddc->fake_change) {
6384       for (i = 0; i < pcis->n; i++) PetscCall(MatSetValue(localChangeOfBasisMatrix, i, i, 1.0, INSERT_VALUES));
6385     } else {
6386       const PetscInt *ii, *jj;
6387       PetscScalar    *aa;
6388       PetscInt        n;
6389       PetscBool       flg_row;
6390       PetscCall(MatGetRowIJ(pcbddc->benign_change, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, &jj, &flg_row));
6391       PetscCall(MatSeqAIJGetArray(pcbddc->benign_change, &aa));
6392       for (i = 0; i < n; i++) PetscCall(MatSetValues(localChangeOfBasisMatrix, 1, &i, ii[i + 1] - ii[i], jj + ii[i], aa + ii[i], INSERT_VALUES));
6393       PetscCall(MatSeqAIJRestoreArray(pcbddc->benign_change, &aa));
6394       PetscCall(MatRestoreRowIJ(pcbddc->benign_change, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, &jj, &flg_row));
6395     }
6396 
6397     if (pcbddc->dbg_flag) {
6398       PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "--------------------------------------------------------------\n"));
6399       PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Checking change of basis computation for subdomain %04d\n", PetscGlobalRank));
6400     }
6401 
6402     /* Now we loop on the constraints which need a change of basis */
6403     /*
6404        Change of basis matrix is evaluated similarly to the FIRST APPROACH in
6405        Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1)
6406 
6407        Basic blocks of change of basis matrix T computed:
6408 
6409           - By using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified)
6410 
6411             | 1        0   ...        0         s_1/S |
6412             | 0        1   ...        0         s_2/S |
6413             |              ...                        |
6414             | 0        ...            1     s_{n-1}/S |
6415             | -s_1/s_n ...    -s_{n-1}/s_n      s_n/S |
6416 
6417             with S = \sum_{i=1}^n s_i^2
6418             NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering
6419                   in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering
6420 
6421           - QR decomposition of constraints otherwise
6422     */
6423     if (qr_needed && max_size_of_constraint) {
6424       /* space to store Q */
6425       PetscCall(PetscMalloc1(max_size_of_constraint * max_size_of_constraint, &qr_basis));
6426       /* array to store scaling factors for reflectors */
6427       PetscCall(PetscMalloc1(max_constraints, &qr_tau));
6428       /* first we issue queries for optimal work */
6429       PetscCall(PetscBLASIntCast(max_size_of_constraint, &Blas_M));
6430       PetscCall(PetscBLASIntCast(max_constraints, &Blas_N));
6431       PetscCall(PetscBLASIntCast(max_size_of_constraint, &Blas_LDA));
6432       lqr_work = -1;
6433       PetscCallBLAS("LAPACKgeqrf", LAPACKgeqrf_(&Blas_M, &Blas_N, qr_basis, &Blas_LDA, qr_tau, &lqr_work_t, &lqr_work, &lierr));
6434       PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in query to GEQRF Lapack routine %d", (int)lierr);
6435       PetscCall(PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t), &lqr_work));
6436       PetscCall(PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t), &qr_work));
6437       lgqr_work = -1;
6438       PetscCall(PetscBLASIntCast(max_size_of_constraint, &Blas_M));
6439       PetscCall(PetscBLASIntCast(max_size_of_constraint, &Blas_N));
6440       PetscCall(PetscBLASIntCast(max_constraints, &Blas_K));
6441       PetscCall(PetscBLASIntCast(max_size_of_constraint, &Blas_LDA));
6442       if (Blas_K > Blas_M) Blas_K = Blas_M; /* adjust just for computing optimal work */
6443       PetscCallBLAS("LAPACKorgqr", LAPACKorgqr_(&Blas_M, &Blas_N, &Blas_K, qr_basis, &Blas_LDA, qr_tau, &lgqr_work_t, &lgqr_work, &lierr));
6444       PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in query to ORGQR/UNGQR Lapack routine %d", (int)lierr);
6445       PetscCall(PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t), &lgqr_work));
6446       PetscCall(PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t), &gqr_work));
6447       /* array to store rhs and solution of triangular solver */
6448       PetscCall(PetscMalloc1(max_constraints * max_constraints, &trs_rhs));
6449       /* allocating workspace for check */
6450       if (pcbddc->dbg_flag) PetscCall(PetscMalloc1(max_size_of_constraint * (max_constraints + max_size_of_constraint), &dbg_work));
6451     }
6452     /* array to store whether a node is primal or not */
6453     PetscCall(PetscBTCreate(pcis->n_B, &is_primal));
6454     PetscCall(PetscMalloc1(total_primal_vertices, &aux_primal_numbering_B));
6455     PetscCall(ISGlobalToLocalMappingApply(pcis->BtoNmap, IS_GTOLM_DROP, total_primal_vertices, pcbddc->local_primal_ref_node, &i, aux_primal_numbering_B));
6456     PetscCheck(i == total_primal_vertices, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error in boundary numbering for BDDC vertices! %" PetscInt_FMT " != %" PetscInt_FMT, total_primal_vertices, i);
6457     for (i = 0; i < total_primal_vertices; i++) PetscCall(PetscBTSet(is_primal, aux_primal_numbering_B[i]));
6458     PetscCall(PetscFree(aux_primal_numbering_B));
6459 
6460     /* loop on constraints and see whether or not they need a change of basis and compute it */
6461     for (total_counts = n_vertices; total_counts < total_counts_cc; total_counts++) {
6462       size_of_constraint = constraints_idxs_ptr[total_counts + 1] - constraints_idxs_ptr[total_counts];
6463       if (PetscBTLookup(change_basis, total_counts)) {
6464         /* get constraint info */
6465         primal_dofs = constraints_n[total_counts];
6466         dual_dofs   = size_of_constraint - primal_dofs;
6467 
6468         if (pcbddc->dbg_flag) PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Constraints %" PetscInt_FMT ": %" PetscInt_FMT " need a change of basis (size %" PetscInt_FMT ")\n", total_counts, primal_dofs, size_of_constraint));
6469 
6470         if (PetscBTLookup(qr_needed_idx, total_counts)) { /* QR */
6471 
6472           /* copy quadrature constraints for change of basis check */
6473           if (pcbddc->dbg_flag) PetscCall(PetscArraycpy(dbg_work, &constraints_data[constraints_data_ptr[total_counts]], size_of_constraint * primal_dofs));
6474           /* copy temporary constraints into larger work vector (in order to store all columns of Q) */
6475           PetscCall(PetscArraycpy(qr_basis, &constraints_data[constraints_data_ptr[total_counts]], size_of_constraint * primal_dofs));
6476 
6477           /* compute QR decomposition of constraints */
6478           PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_M));
6479           PetscCall(PetscBLASIntCast(primal_dofs, &Blas_N));
6480           PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA));
6481           PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
6482           PetscCallBLAS("LAPACKgeqrf", LAPACKgeqrf_(&Blas_M, &Blas_N, qr_basis, &Blas_LDA, qr_tau, qr_work, &lqr_work, &lierr));
6483           PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in GEQRF Lapack routine %d", (int)lierr);
6484           PetscCall(PetscFPTrapPop());
6485 
6486           /* explicitly compute R^-T */
6487           PetscCall(PetscArrayzero(trs_rhs, primal_dofs * primal_dofs));
6488           for (j = 0; j < primal_dofs; j++) trs_rhs[j * (primal_dofs + 1)] = 1.0;
6489           PetscCall(PetscBLASIntCast(primal_dofs, &Blas_N));
6490           PetscCall(PetscBLASIntCast(primal_dofs, &Blas_NRHS));
6491           PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA));
6492           PetscCall(PetscBLASIntCast(primal_dofs, &Blas_LDB));
6493           PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
6494           PetscCallBLAS("LAPACKtrtrs", LAPACKtrtrs_("U", "T", "N", &Blas_N, &Blas_NRHS, qr_basis, &Blas_LDA, trs_rhs, &Blas_LDB, &lierr));
6495           PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in TRTRS Lapack routine %d", (int)lierr);
6496           PetscCall(PetscFPTrapPop());
6497 
6498           /* explicitly compute all columns of Q (Q = [Q1 | Q2]) overwriting QR factorization in qr_basis */
6499           PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_M));
6500           PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_N));
6501           PetscCall(PetscBLASIntCast(primal_dofs, &Blas_K));
6502           PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA));
6503           PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
6504           PetscCallBLAS("LAPACKorgqr", LAPACKorgqr_(&Blas_M, &Blas_N, &Blas_K, qr_basis, &Blas_LDA, qr_tau, gqr_work, &lgqr_work, &lierr));
6505           PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in ORGQR/UNGQR Lapack routine %d", (int)lierr);
6506           PetscCall(PetscFPTrapPop());
6507 
6508           /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints
6509              i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below)
6510              where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */
6511           PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_M));
6512           PetscCall(PetscBLASIntCast(primal_dofs, &Blas_N));
6513           PetscCall(PetscBLASIntCast(primal_dofs, &Blas_K));
6514           PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA));
6515           PetscCall(PetscBLASIntCast(primal_dofs, &Blas_LDB));
6516           PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDC));
6517           PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
6518           PetscCallBLAS("BLASgemm", BLASgemm_("N", "N", &Blas_M, &Blas_N, &Blas_K, &one, qr_basis, &Blas_LDA, trs_rhs, &Blas_LDB, &zero, constraints_data + constraints_data_ptr[total_counts], &Blas_LDC));
6519           PetscCall(PetscFPTrapPop());
6520           PetscCall(PetscArraycpy(qr_basis, &constraints_data[constraints_data_ptr[total_counts]], size_of_constraint * primal_dofs));
6521 
6522           /* insert values in change of basis matrix respecting global ordering of new primal dofs */
6523           start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]];
6524           /* insert cols for primal dofs */
6525           for (j = 0; j < primal_dofs; j++) {
6526             start_vals = &qr_basis[j * size_of_constraint];
6527             start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts] + j];
6528             PetscCall(MatSetValues(localChangeOfBasisMatrix, size_of_constraint, start_rows, 1, start_cols, start_vals, INSERT_VALUES));
6529           }
6530           /* insert cols for dual dofs */
6531           for (j = 0, k = 0; j < dual_dofs; k++) {
6532             if (!PetscBTLookup(is_primal, constraints_idxs_B[constraints_idxs_ptr[total_counts] + k])) {
6533               start_vals = &qr_basis[(primal_dofs + j) * size_of_constraint];
6534               start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts] + k];
6535               PetscCall(MatSetValues(localChangeOfBasisMatrix, size_of_constraint, start_rows, 1, start_cols, start_vals, INSERT_VALUES));
6536               j++;
6537             }
6538           }
6539 
6540           /* check change of basis */
6541           if (pcbddc->dbg_flag) {
6542             PetscInt  ii, jj;
6543             PetscBool valid_qr = PETSC_TRUE;
6544             PetscCall(PetscBLASIntCast(primal_dofs, &Blas_M));
6545             PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_N));
6546             PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_K));
6547             PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA));
6548             PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDB));
6549             PetscCall(PetscBLASIntCast(primal_dofs, &Blas_LDC));
6550             PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
6551             PetscCallBLAS("BLASgemm", BLASgemm_("T", "N", &Blas_M, &Blas_N, &Blas_K, &one, dbg_work, &Blas_LDA, qr_basis, &Blas_LDB, &zero, &dbg_work[size_of_constraint * primal_dofs], &Blas_LDC));
6552             PetscCall(PetscFPTrapPop());
6553             for (jj = 0; jj < size_of_constraint; jj++) {
6554               for (ii = 0; ii < primal_dofs; ii++) {
6555                 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii]) > 1.e-12) valid_qr = PETSC_FALSE;
6556                 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii] - (PetscReal)1) > 1.e-12) valid_qr = PETSC_FALSE;
6557               }
6558             }
6559             if (!valid_qr) {
6560               PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "\t-> wrong change of basis!\n"));
6561               for (jj = 0; jj < size_of_constraint; jj++) {
6562                 for (ii = 0; ii < primal_dofs; ii++) {
6563                   if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii]) > 1.e-12) {
6564                     PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "\tQr basis function %" PetscInt_FMT " is not orthogonal to constraint %" PetscInt_FMT " (%1.14e)!\n", jj, ii, (double)PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii])));
6565                   }
6566                   if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii] - (PetscReal)1) > 1.e-12) {
6567                     PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "\tQr basis function %" PetscInt_FMT " is not unitary w.r.t constraint %" PetscInt_FMT " (%1.14e)!\n", jj, ii, (double)PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii])));
6568                   }
6569                 }
6570               }
6571             } else {
6572               PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "\t-> right change of basis!\n"));
6573             }
6574           }
6575         } else { /* simple transformation block */
6576           PetscInt    row, col;
6577           PetscScalar val, norm;
6578 
6579           PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_N));
6580           PetscCallBLAS("BLASdot", norm = BLASdot_(&Blas_N, constraints_data + constraints_data_ptr[total_counts], &Blas_one, constraints_data + constraints_data_ptr[total_counts], &Blas_one));
6581           for (j = 0; j < size_of_constraint; j++) {
6582             PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts] + j];
6583             row            = constraints_idxs[constraints_idxs_ptr[total_counts] + j];
6584             if (!PetscBTLookup(is_primal, row_B)) {
6585               col = constraints_idxs[constraints_idxs_ptr[total_counts]];
6586               PetscCall(MatSetValue(localChangeOfBasisMatrix, row, row, 1.0, INSERT_VALUES));
6587               PetscCall(MatSetValue(localChangeOfBasisMatrix, row, col, constraints_data[constraints_data_ptr[total_counts] + j] / norm, INSERT_VALUES));
6588             } else {
6589               for (k = 0; k < size_of_constraint; k++) {
6590                 col = constraints_idxs[constraints_idxs_ptr[total_counts] + k];
6591                 if (row != col) {
6592                   val = -constraints_data[constraints_data_ptr[total_counts] + k] / constraints_data[constraints_data_ptr[total_counts]];
6593                 } else {
6594                   val = constraints_data[constraints_data_ptr[total_counts]] / norm;
6595                 }
6596                 PetscCall(MatSetValue(localChangeOfBasisMatrix, row, col, val, INSERT_VALUES));
6597               }
6598             }
6599           }
6600           if (pcbddc->dbg_flag) PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "\t-> using standard change of basis\n"));
6601         }
6602       } else {
6603         if (pcbddc->dbg_flag) PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Constraint %" PetscInt_FMT " does not need a change of basis (size %" PetscInt_FMT ")\n", total_counts, size_of_constraint));
6604       }
6605     }
6606 
6607     /* free workspace */
6608     if (qr_needed) {
6609       if (pcbddc->dbg_flag) PetscCall(PetscFree(dbg_work));
6610       PetscCall(PetscFree(trs_rhs));
6611       PetscCall(PetscFree(qr_tau));
6612       PetscCall(PetscFree(qr_work));
6613       PetscCall(PetscFree(gqr_work));
6614       PetscCall(PetscFree(qr_basis));
6615     }
6616     PetscCall(PetscBTDestroy(&is_primal));
6617     PetscCall(MatAssemblyBegin(localChangeOfBasisMatrix, MAT_FINAL_ASSEMBLY));
6618     PetscCall(MatAssemblyEnd(localChangeOfBasisMatrix, MAT_FINAL_ASSEMBLY));
6619 
6620     /* assembling of global change of variable */
6621     if (!pcbddc->fake_change) {
6622       Mat      tmat;
6623       PetscInt bs;
6624 
6625       PetscCall(VecGetSize(pcis->vec1_global, &global_size));
6626       PetscCall(VecGetLocalSize(pcis->vec1_global, &local_size));
6627       PetscCall(MatDuplicate(pc->pmat, MAT_DO_NOT_COPY_VALUES, &tmat));
6628       PetscCall(MatISSetLocalMat(tmat, localChangeOfBasisMatrix));
6629       PetscCall(MatAssemblyBegin(tmat, MAT_FINAL_ASSEMBLY));
6630       PetscCall(MatAssemblyEnd(tmat, MAT_FINAL_ASSEMBLY));
6631       PetscCall(MatCreate(PetscObjectComm((PetscObject)pc), &pcbddc->ChangeOfBasisMatrix));
6632       PetscCall(MatSetType(pcbddc->ChangeOfBasisMatrix, MATAIJ));
6633       PetscCall(MatGetBlockSize(pc->pmat, &bs));
6634       PetscCall(MatSetBlockSize(pcbddc->ChangeOfBasisMatrix, bs));
6635       PetscCall(MatSetSizes(pcbddc->ChangeOfBasisMatrix, local_size, local_size, global_size, global_size));
6636       PetscCall(MatISSetMPIXAIJPreallocation_Private(tmat, pcbddc->ChangeOfBasisMatrix, PETSC_TRUE));
6637       PetscCall(MatConvert(tmat, MATAIJ, MAT_REUSE_MATRIX, &pcbddc->ChangeOfBasisMatrix));
6638       PetscCall(MatDestroy(&tmat));
6639       PetscCall(VecSet(pcis->vec1_global, 0.0));
6640       PetscCall(VecSet(pcis->vec1_N, 1.0));
6641       PetscCall(VecScatterBegin(matis->rctx, pcis->vec1_N, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE));
6642       PetscCall(VecScatterEnd(matis->rctx, pcis->vec1_N, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE));
6643       PetscCall(VecReciprocal(pcis->vec1_global));
6644       PetscCall(MatDiagonalScale(pcbddc->ChangeOfBasisMatrix, pcis->vec1_global, NULL));
6645 
6646       /* check */
6647       if (pcbddc->dbg_flag) {
6648         PetscReal error;
6649         Vec       x, x_change;
6650 
6651         PetscCall(VecDuplicate(pcis->vec1_global, &x));
6652         PetscCall(VecDuplicate(pcis->vec1_global, &x_change));
6653         PetscCall(VecSetRandom(x, NULL));
6654         PetscCall(VecCopy(x, pcis->vec1_global));
6655         PetscCall(VecScatterBegin(matis->rctx, x, pcis->vec1_N, INSERT_VALUES, SCATTER_FORWARD));
6656         PetscCall(VecScatterEnd(matis->rctx, x, pcis->vec1_N, INSERT_VALUES, SCATTER_FORWARD));
6657         PetscCall(MatMult(localChangeOfBasisMatrix, pcis->vec1_N, pcis->vec2_N));
6658         PetscCall(VecScatterBegin(matis->rctx, pcis->vec2_N, x, INSERT_VALUES, SCATTER_REVERSE));
6659         PetscCall(VecScatterEnd(matis->rctx, pcis->vec2_N, x, INSERT_VALUES, SCATTER_REVERSE));
6660         PetscCall(MatMult(pcbddc->ChangeOfBasisMatrix, pcis->vec1_global, x_change));
6661         PetscCall(VecAXPY(x, -1.0, x_change));
6662         PetscCall(VecNorm(x, NORM_INFINITY, &error));
6663         PetscCheck(error <= PETSC_SMALL, PetscObjectComm((PetscObject)pc), PETSC_ERR_PLIB, "Error global vs local change on N: %1.6e", (double)error);
6664         PetscCall(VecDestroy(&x));
6665         PetscCall(VecDestroy(&x_change));
6666       }
6667       /* adapt sub_schurs computed (if any) */
6668       if (pcbddc->use_deluxe_scaling) {
6669         PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
6670 
6671         PetscCheck(!pcbddc->use_change_of_basis || !pcbddc->adaptive_userdefined, PetscObjectComm((PetscObject)pc), PETSC_ERR_SUP, "Cannot mix automatic change of basis, adaptive selection and user-defined constraints");
6672         if (sub_schurs && sub_schurs->S_Ej_all) {
6673           Mat S_new, tmat;
6674           IS  is_all_N, is_V_Sall = NULL;
6675 
6676           PetscCall(ISLocalToGlobalMappingApplyIS(pcis->BtoNmap, sub_schurs->is_Ej_all, &is_all_N));
6677           PetscCall(MatCreateSubMatrix(localChangeOfBasisMatrix, is_all_N, is_all_N, MAT_INITIAL_MATRIX, &tmat));
6678           if (pcbddc->deluxe_zerorows) {
6679             ISLocalToGlobalMapping NtoSall;
6680             IS                     is_V;
6681             PetscCall(ISCreateGeneral(PETSC_COMM_SELF, pcbddc->n_vertices, pcbddc->local_primal_ref_node, PETSC_COPY_VALUES, &is_V));
6682             PetscCall(ISLocalToGlobalMappingCreateIS(is_all_N, &NtoSall));
6683             PetscCall(ISGlobalToLocalMappingApplyIS(NtoSall, IS_GTOLM_DROP, is_V, &is_V_Sall));
6684             PetscCall(ISLocalToGlobalMappingDestroy(&NtoSall));
6685             PetscCall(ISDestroy(&is_V));
6686           }
6687           PetscCall(ISDestroy(&is_all_N));
6688           PetscCall(MatPtAP(sub_schurs->S_Ej_all, tmat, MAT_INITIAL_MATRIX, 1.0, &S_new));
6689           PetscCall(MatDestroy(&sub_schurs->S_Ej_all));
6690           PetscCall(PetscObjectReference((PetscObject)S_new));
6691           if (pcbddc->deluxe_zerorows) {
6692             const PetscScalar *array;
6693             const PetscInt    *idxs_V, *idxs_all;
6694             PetscInt           i, n_V;
6695 
6696             PetscCall(MatZeroRowsColumnsIS(S_new, is_V_Sall, 1., NULL, NULL));
6697             PetscCall(ISGetLocalSize(is_V_Sall, &n_V));
6698             PetscCall(ISGetIndices(is_V_Sall, &idxs_V));
6699             PetscCall(ISGetIndices(sub_schurs->is_Ej_all, &idxs_all));
6700             PetscCall(VecGetArrayRead(pcis->D, &array));
6701             for (i = 0; i < n_V; i++) {
6702               PetscScalar val;
6703               PetscInt    idx;
6704 
6705               idx = idxs_V[i];
6706               val = array[idxs_all[idxs_V[i]]];
6707               PetscCall(MatSetValue(S_new, idx, idx, val, INSERT_VALUES));
6708             }
6709             PetscCall(MatAssemblyBegin(S_new, MAT_FINAL_ASSEMBLY));
6710             PetscCall(MatAssemblyEnd(S_new, MAT_FINAL_ASSEMBLY));
6711             PetscCall(VecRestoreArrayRead(pcis->D, &array));
6712             PetscCall(ISRestoreIndices(sub_schurs->is_Ej_all, &idxs_all));
6713             PetscCall(ISRestoreIndices(is_V_Sall, &idxs_V));
6714           }
6715           sub_schurs->S_Ej_all = S_new;
6716           PetscCall(MatDestroy(&S_new));
6717           if (sub_schurs->sum_S_Ej_all) {
6718             PetscCall(MatPtAP(sub_schurs->sum_S_Ej_all, tmat, MAT_INITIAL_MATRIX, 1.0, &S_new));
6719             PetscCall(MatDestroy(&sub_schurs->sum_S_Ej_all));
6720             PetscCall(PetscObjectReference((PetscObject)S_new));
6721             if (pcbddc->deluxe_zerorows) PetscCall(MatZeroRowsColumnsIS(S_new, is_V_Sall, 1., NULL, NULL));
6722             sub_schurs->sum_S_Ej_all = S_new;
6723             PetscCall(MatDestroy(&S_new));
6724           }
6725           PetscCall(ISDestroy(&is_V_Sall));
6726           PetscCall(MatDestroy(&tmat));
6727         }
6728         /* destroy any change of basis context in sub_schurs */
6729         if (sub_schurs && sub_schurs->change) {
6730           PetscInt i;
6731 
6732           for (i = 0; i < sub_schurs->n_subs; i++) PetscCall(KSPDestroy(&sub_schurs->change[i]));
6733           PetscCall(PetscFree(sub_schurs->change));
6734         }
6735       }
6736       if (pcbddc->switch_static) { /* need to save the local change */
6737         pcbddc->switch_static_change = localChangeOfBasisMatrix;
6738       } else {
6739         PetscCall(MatDestroy(&localChangeOfBasisMatrix));
6740       }
6741       /* determine if any process has changed the pressures locally */
6742       pcbddc->change_interior = pcbddc->benign_have_null;
6743     } else { /* fake change (get back change of basis into ConstraintMatrix and info on qr) */
6744       PetscCall(MatDestroy(&pcbddc->ConstraintMatrix));
6745       pcbddc->ConstraintMatrix = localChangeOfBasisMatrix;
6746       pcbddc->use_qr_single    = qr_needed;
6747     }
6748   } else if (pcbddc->user_ChangeOfBasisMatrix || pcbddc->benign_saddle_point) {
6749     if (!pcbddc->benign_have_null && pcbddc->user_ChangeOfBasisMatrix) {
6750       PetscCall(PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix));
6751       pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix;
6752     } else {
6753       Mat benign_global = NULL;
6754       if (pcbddc->benign_have_null) {
6755         Mat M;
6756 
6757         pcbddc->change_interior = PETSC_TRUE;
6758         PetscCall(VecCopy(matis->counter, pcis->vec1_N));
6759         PetscCall(VecReciprocal(pcis->vec1_N));
6760         PetscCall(MatDuplicate(pc->pmat, MAT_DO_NOT_COPY_VALUES, &benign_global));
6761         if (pcbddc->benign_change) {
6762           PetscCall(MatDuplicate(pcbddc->benign_change, MAT_COPY_VALUES, &M));
6763           PetscCall(MatDiagonalScale(M, pcis->vec1_N, NULL));
6764         } else {
6765           PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, pcis->n, pcis->n, 1, NULL, &M));
6766           PetscCall(MatDiagonalSet(M, pcis->vec1_N, INSERT_VALUES));
6767         }
6768         PetscCall(MatISSetLocalMat(benign_global, M));
6769         PetscCall(MatDestroy(&M));
6770         PetscCall(MatAssemblyBegin(benign_global, MAT_FINAL_ASSEMBLY));
6771         PetscCall(MatAssemblyEnd(benign_global, MAT_FINAL_ASSEMBLY));
6772       }
6773       if (pcbddc->user_ChangeOfBasisMatrix) {
6774         PetscCall(MatMatMult(pcbddc->user_ChangeOfBasisMatrix, benign_global, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &pcbddc->ChangeOfBasisMatrix));
6775         PetscCall(MatDestroy(&benign_global));
6776       } else if (pcbddc->benign_have_null) {
6777         pcbddc->ChangeOfBasisMatrix = benign_global;
6778       }
6779     }
6780     if (pcbddc->switch_static && pcbddc->ChangeOfBasisMatrix) { /* need to save the local change */
6781       IS              is_global;
6782       const PetscInt *gidxs;
6783 
6784       PetscCall(ISLocalToGlobalMappingGetIndices(matis->rmapping, &gidxs));
6785       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), pcis->n, gidxs, PETSC_COPY_VALUES, &is_global));
6786       PetscCall(ISLocalToGlobalMappingRestoreIndices(matis->rmapping, &gidxs));
6787       PetscCall(MatCreateSubMatrixUnsorted(pcbddc->ChangeOfBasisMatrix, is_global, is_global, &pcbddc->switch_static_change));
6788       PetscCall(ISDestroy(&is_global));
6789     }
6790   }
6791   if (!pcbddc->fake_change && pcbddc->ChangeOfBasisMatrix && !pcbddc->work_change) PetscCall(VecDuplicate(pcis->vec1_global, &pcbddc->work_change));
6792 
6793   if (!pcbddc->fake_change) {
6794     /* add pressure dofs to set of primal nodes for numbering purposes */
6795     for (i = 0; i < pcbddc->benign_n; i++) {
6796       pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc]  = pcbddc->benign_p0_lidx[i];
6797       pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i];
6798       pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc]  = 1;
6799       pcbddc->local_primal_size_cc++;
6800       pcbddc->local_primal_size++;
6801     }
6802 
6803     /* check if a new primal space has been introduced (also take into account benign trick) */
6804     pcbddc->new_primal_space_local = PETSC_TRUE;
6805     if (olocal_primal_size == pcbddc->local_primal_size) {
6806       PetscCall(PetscArraycmp(pcbddc->local_primal_ref_node, olocal_primal_ref_node, olocal_primal_size_cc, &pcbddc->new_primal_space_local));
6807       pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
6808       if (!pcbddc->new_primal_space_local) {
6809         PetscCall(PetscArraycmp(pcbddc->local_primal_ref_mult, olocal_primal_ref_mult, olocal_primal_size_cc, &pcbddc->new_primal_space_local));
6810         pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
6811       }
6812     }
6813     /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */
6814     PetscCall(MPIU_Allreduce(&pcbddc->new_primal_space_local, &pcbddc->new_primal_space, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)pc)));
6815   }
6816   PetscCall(PetscFree2(olocal_primal_ref_node, olocal_primal_ref_mult));
6817 
6818   /* flush dbg viewer */
6819   if (pcbddc->dbg_flag) PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
6820 
6821   /* free workspace */
6822   PetscCall(PetscBTDestroy(&qr_needed_idx));
6823   PetscCall(PetscBTDestroy(&change_basis));
6824   if (!pcbddc->adaptive_selection) {
6825     PetscCall(PetscFree3(constraints_idxs_ptr, constraints_data_ptr, constraints_n));
6826     PetscCall(PetscFree3(constraints_data, constraints_idxs, constraints_idxs_B));
6827   } else {
6828     PetscCall(PetscFree5(pcbddc->adaptive_constraints_n, pcbddc->adaptive_constraints_idxs_ptr, pcbddc->adaptive_constraints_data_ptr, pcbddc->adaptive_constraints_idxs, pcbddc->adaptive_constraints_data));
6829     PetscCall(PetscFree(constraints_n));
6830     PetscCall(PetscFree(constraints_idxs_B));
6831   }
6832   PetscFunctionReturn(0);
6833 }
6834 
6835 PetscErrorCode PCBDDCAnalyzeInterface(PC pc)
6836 {
6837   ISLocalToGlobalMapping map;
6838   PC_BDDC               *pcbddc = (PC_BDDC *)pc->data;
6839   Mat_IS                *matis  = (Mat_IS *)pc->pmat->data;
6840   PetscInt               i, N;
6841   PetscBool              rcsr = PETSC_FALSE;
6842 
6843   PetscFunctionBegin;
6844   if (pcbddc->recompute_topography) {
6845     pcbddc->graphanalyzed = PETSC_FALSE;
6846     /* Reset previously computed graph */
6847     PetscCall(PCBDDCGraphReset(pcbddc->mat_graph));
6848     /* Init local Graph struct */
6849     PetscCall(MatGetSize(pc->pmat, &N, NULL));
6850     PetscCall(MatISGetLocalToGlobalMapping(pc->pmat, &map, NULL));
6851     PetscCall(PCBDDCGraphInit(pcbddc->mat_graph, map, N, pcbddc->graphmaxcount));
6852 
6853     if (pcbddc->user_primal_vertices_local && !pcbddc->user_primal_vertices) PetscCall(PCBDDCConsistencyCheckIS(pc, MPI_LOR, &pcbddc->user_primal_vertices_local));
6854     /* Check validity of the csr graph passed in by the user */
6855     PetscCheck(!pcbddc->mat_graph->nvtxs_csr || pcbddc->mat_graph->nvtxs_csr == pcbddc->mat_graph->nvtxs, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid size of local CSR graph! Found %" PetscInt_FMT ", expected %" PetscInt_FMT, pcbddc->mat_graph->nvtxs_csr,
6856                pcbddc->mat_graph->nvtxs);
6857 
6858     /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */
6859     if (!pcbddc->mat_graph->xadj && pcbddc->use_local_adj) {
6860       PetscInt *xadj, *adjncy;
6861       PetscInt  nvtxs;
6862       PetscBool flg_row = PETSC_FALSE;
6863 
6864       PetscCall(MatGetRowIJ(matis->A, 0, PETSC_TRUE, PETSC_FALSE, &nvtxs, (const PetscInt **)&xadj, (const PetscInt **)&adjncy, &flg_row));
6865       if (flg_row) {
6866         PetscCall(PCBDDCSetLocalAdjacencyGraph(pc, nvtxs, xadj, adjncy, PETSC_COPY_VALUES));
6867         pcbddc->computed_rowadj = PETSC_TRUE;
6868       }
6869       PetscCall(MatRestoreRowIJ(matis->A, 0, PETSC_TRUE, PETSC_FALSE, &nvtxs, (const PetscInt **)&xadj, (const PetscInt **)&adjncy, &flg_row));
6870       rcsr = PETSC_TRUE;
6871     }
6872     if (pcbddc->dbg_flag) PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
6873 
6874     if (pcbddc->mat_graph->cdim && !pcbddc->mat_graph->cloc) {
6875       PetscReal   *lcoords;
6876       PetscInt     n;
6877       MPI_Datatype dimrealtype;
6878 
6879       /* TODO: support for blocked */
6880       PetscCheck(pcbddc->mat_graph->cnloc == pc->pmat->rmap->n, PETSC_COMM_SELF, PETSC_ERR_USER, "Invalid number of local coordinates! Got %" PetscInt_FMT ", expected %" PetscInt_FMT, pcbddc->mat_graph->cnloc, pc->pmat->rmap->n);
6881       PetscCall(MatGetLocalSize(matis->A, &n, NULL));
6882       PetscCall(PetscMalloc1(pcbddc->mat_graph->cdim * n, &lcoords));
6883       PetscCallMPI(MPI_Type_contiguous(pcbddc->mat_graph->cdim, MPIU_REAL, &dimrealtype));
6884       PetscCallMPI(MPI_Type_commit(&dimrealtype));
6885       PetscCall(PetscSFBcastBegin(matis->sf, dimrealtype, pcbddc->mat_graph->coords, lcoords, MPI_REPLACE));
6886       PetscCall(PetscSFBcastEnd(matis->sf, dimrealtype, pcbddc->mat_graph->coords, lcoords, MPI_REPLACE));
6887       PetscCallMPI(MPI_Type_free(&dimrealtype));
6888       PetscCall(PetscFree(pcbddc->mat_graph->coords));
6889 
6890       pcbddc->mat_graph->coords = lcoords;
6891       pcbddc->mat_graph->cloc   = PETSC_TRUE;
6892       pcbddc->mat_graph->cnloc  = n;
6893     }
6894     PetscCheck(!pcbddc->mat_graph->cnloc || pcbddc->mat_graph->cnloc == pcbddc->mat_graph->nvtxs, PETSC_COMM_SELF, PETSC_ERR_USER, "Invalid number of local subdomain coordinates! Got %" PetscInt_FMT ", expected %" PetscInt_FMT, pcbddc->mat_graph->cnloc,
6895                pcbddc->mat_graph->nvtxs);
6896     pcbddc->mat_graph->active_coords = (PetscBool)(pcbddc->corner_selection && pcbddc->mat_graph->cdim && !pcbddc->corner_selected);
6897 
6898     /* Setup of Graph */
6899     pcbddc->mat_graph->commsizelimit = 0; /* don't use the COMM_SELF variant of the graph */
6900     PetscCall(PCBDDCGraphSetUp(pcbddc->mat_graph, pcbddc->vertex_size, pcbddc->NeumannBoundariesLocal, pcbddc->DirichletBoundariesLocal, pcbddc->n_ISForDofsLocal, pcbddc->ISForDofsLocal, pcbddc->user_primal_vertices_local));
6901 
6902     /* attach info on disconnected subdomains if present */
6903     if (pcbddc->n_local_subs) {
6904       PetscInt *local_subs, n, totn;
6905 
6906       PetscCall(MatGetLocalSize(matis->A, &n, NULL));
6907       PetscCall(PetscMalloc1(n, &local_subs));
6908       for (i = 0; i < n; i++) local_subs[i] = pcbddc->n_local_subs;
6909       for (i = 0; i < pcbddc->n_local_subs; i++) {
6910         const PetscInt *idxs;
6911         PetscInt        nl, j;
6912 
6913         PetscCall(ISGetLocalSize(pcbddc->local_subs[i], &nl));
6914         PetscCall(ISGetIndices(pcbddc->local_subs[i], &idxs));
6915         for (j = 0; j < nl; j++) local_subs[idxs[j]] = i;
6916         PetscCall(ISRestoreIndices(pcbddc->local_subs[i], &idxs));
6917       }
6918       for (i = 0, totn = 0; i < n; i++) totn = PetscMax(totn, local_subs[i]);
6919       pcbddc->mat_graph->n_local_subs = totn + 1;
6920       pcbddc->mat_graph->local_subs   = local_subs;
6921     }
6922   }
6923 
6924   if (!pcbddc->graphanalyzed) {
6925     /* Graph's connected components analysis */
6926     PetscCall(PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph));
6927     pcbddc->graphanalyzed   = PETSC_TRUE;
6928     pcbddc->corner_selected = pcbddc->corner_selection;
6929   }
6930   if (rcsr) pcbddc->mat_graph->nvtxs_csr = 0;
6931   PetscFunctionReturn(0);
6932 }
6933 
6934 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt *nio, Vec vecs[])
6935 {
6936   PetscInt     i, j, n;
6937   PetscScalar *alphas;
6938   PetscReal    norm, *onorms;
6939 
6940   PetscFunctionBegin;
6941   n = *nio;
6942   if (!n) PetscFunctionReturn(0);
6943   PetscCall(PetscMalloc2(n, &alphas, n, &onorms));
6944   PetscCall(VecNormalize(vecs[0], &norm));
6945   if (norm < PETSC_SMALL) {
6946     onorms[0] = 0.0;
6947     PetscCall(VecSet(vecs[0], 0.0));
6948   } else {
6949     onorms[0] = norm;
6950   }
6951 
6952   for (i = 1; i < n; i++) {
6953     PetscCall(VecMDot(vecs[i], i, vecs, alphas));
6954     for (j = 0; j < i; j++) alphas[j] = PetscConj(-alphas[j]);
6955     PetscCall(VecMAXPY(vecs[i], i, alphas, vecs));
6956     PetscCall(VecNormalize(vecs[i], &norm));
6957     if (norm < PETSC_SMALL) {
6958       onorms[i] = 0.0;
6959       PetscCall(VecSet(vecs[i], 0.0));
6960     } else {
6961       onorms[i] = norm;
6962     }
6963   }
6964   /* push nonzero vectors at the beginning */
6965   for (i = 0; i < n; i++) {
6966     if (onorms[i] == 0.0) {
6967       for (j = i + 1; j < n; j++) {
6968         if (onorms[j] != 0.0) {
6969           PetscCall(VecCopy(vecs[j], vecs[i]));
6970           onorms[j] = 0.0;
6971         }
6972       }
6973     }
6974   }
6975   for (i = 0, *nio = 0; i < n; i++) *nio += onorms[i] != 0.0 ? 1 : 0;
6976   PetscCall(PetscFree2(alphas, onorms));
6977   PetscFunctionReturn(0);
6978 }
6979 
6980 PetscErrorCode PCBDDCMatISGetSubassemblingPattern(Mat mat, PetscInt *n_subdomains, PetscInt redprocs, IS *is_sends, PetscBool *have_void)
6981 {
6982   ISLocalToGlobalMapping mapping;
6983   Mat                    A;
6984   PetscInt               n_neighs, *neighs, *n_shared, **shared;
6985   PetscMPIInt            size, rank, color;
6986   PetscInt              *xadj, *adjncy;
6987   PetscInt              *adjncy_wgt, *v_wgt, *ranks_send_to_idx;
6988   PetscInt               im_active, active_procs, N, n, i, j, threshold = 2;
6989   PetscInt               void_procs, *procs_candidates = NULL;
6990   PetscInt               xadj_count, *count;
6991   PetscBool              ismatis, use_vwgt = PETSC_FALSE;
6992   PetscSubcomm           psubcomm;
6993   MPI_Comm               subcomm;
6994 
6995   PetscFunctionBegin;
6996   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
6997   PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATIS, &ismatis));
6998   PetscCheck(ismatis, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Cannot use %s on a matrix object which is not of type MATIS", PETSC_FUNCTION_NAME);
6999   PetscValidLogicalCollectiveInt(mat, *n_subdomains, 2);
7000   PetscValidLogicalCollectiveInt(mat, redprocs, 3);
7001   PetscCheck(*n_subdomains > 0, PetscObjectComm((PetscObject)mat), PETSC_ERR_ARG_WRONG, "Invalid number of subdomains requested %" PetscInt_FMT, *n_subdomains);
7002 
7003   if (have_void) *have_void = PETSC_FALSE;
7004   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)mat), &size));
7005   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
7006   PetscCall(MatISGetLocalMat(mat, &A));
7007   PetscCall(MatGetLocalSize(A, &n, NULL));
7008   im_active = !!n;
7009   PetscCall(MPIU_Allreduce(&im_active, &active_procs, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)mat)));
7010   void_procs = size - active_procs;
7011   /* get ranks of of non-active processes in mat communicator */
7012   if (void_procs) {
7013     PetscInt ncand;
7014 
7015     if (have_void) *have_void = PETSC_TRUE;
7016     PetscCall(PetscMalloc1(size, &procs_candidates));
7017     PetscCallMPI(MPI_Allgather(&im_active, 1, MPIU_INT, procs_candidates, 1, MPIU_INT, PetscObjectComm((PetscObject)mat)));
7018     for (i = 0, ncand = 0; i < size; i++) {
7019       if (!procs_candidates[i]) procs_candidates[ncand++] = i;
7020     }
7021     /* force n_subdomains to be not greater that the number of non-active processes */
7022     *n_subdomains = PetscMin(void_procs, *n_subdomains);
7023   }
7024 
7025   /* number of subdomains requested greater than active processes or matrix size -> just shift the matrix
7026      number of subdomains requested 1 -> send to rank-0 or first candidate in voids  */
7027   PetscCall(MatGetSize(mat, &N, NULL));
7028   if (active_procs < *n_subdomains || *n_subdomains == 1 || N <= *n_subdomains) {
7029     PetscInt issize, isidx, dest;
7030     if (*n_subdomains == 1) dest = 0;
7031     else dest = rank;
7032     if (im_active) {
7033       issize = 1;
7034       if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */
7035         isidx = procs_candidates[dest];
7036       } else {
7037         isidx = dest;
7038       }
7039     } else {
7040       issize = 0;
7041       isidx  = -1;
7042     }
7043     if (*n_subdomains != 1) *n_subdomains = active_procs;
7044     PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)mat), issize, &isidx, PETSC_COPY_VALUES, is_sends));
7045     PetscCall(PetscFree(procs_candidates));
7046     PetscFunctionReturn(0);
7047   }
7048   PetscCall(PetscOptionsGetBool(NULL, NULL, "-matis_partitioning_use_vwgt", &use_vwgt, NULL));
7049   PetscCall(PetscOptionsGetInt(NULL, NULL, "-matis_partitioning_threshold", &threshold, NULL));
7050   threshold = PetscMax(threshold, 2);
7051 
7052   /* Get info on mapping */
7053   PetscCall(MatISGetLocalToGlobalMapping(mat, &mapping, NULL));
7054   PetscCall(ISLocalToGlobalMappingGetInfo(mapping, &n_neighs, &neighs, &n_shared, &shared));
7055 
7056   /* build local CSR graph of subdomains' connectivity */
7057   PetscCall(PetscMalloc1(2, &xadj));
7058   xadj[0] = 0;
7059   xadj[1] = PetscMax(n_neighs - 1, 0);
7060   PetscCall(PetscMalloc1(xadj[1], &adjncy));
7061   PetscCall(PetscMalloc1(xadj[1], &adjncy_wgt));
7062   PetscCall(PetscCalloc1(n, &count));
7063   for (i = 1; i < n_neighs; i++)
7064     for (j = 0; j < n_shared[i]; j++) count[shared[i][j]] += 1;
7065 
7066   xadj_count = 0;
7067   for (i = 1; i < n_neighs; i++) {
7068     for (j = 0; j < n_shared[i]; j++) {
7069       if (count[shared[i][j]] < threshold) {
7070         adjncy[xadj_count]     = neighs[i];
7071         adjncy_wgt[xadj_count] = n_shared[i];
7072         xadj_count++;
7073         break;
7074       }
7075     }
7076   }
7077   xadj[1] = xadj_count;
7078   PetscCall(PetscFree(count));
7079   PetscCall(ISLocalToGlobalMappingRestoreInfo(mapping, &n_neighs, &neighs, &n_shared, &shared));
7080   PetscCall(PetscSortIntWithArray(xadj[1], adjncy, adjncy_wgt));
7081 
7082   PetscCall(PetscMalloc1(1, &ranks_send_to_idx));
7083 
7084   /* Restrict work on active processes only */
7085   PetscCall(PetscMPIIntCast(im_active, &color));
7086   if (void_procs) {
7087     PetscCall(PetscSubcommCreate(PetscObjectComm((PetscObject)mat), &psubcomm));
7088     PetscCall(PetscSubcommSetNumber(psubcomm, 2)); /* 2 groups, active process and not active processes */
7089     PetscCall(PetscSubcommSetTypeGeneral(psubcomm, color, rank));
7090     subcomm = PetscSubcommChild(psubcomm);
7091   } else {
7092     psubcomm = NULL;
7093     subcomm  = PetscObjectComm((PetscObject)mat);
7094   }
7095 
7096   v_wgt = NULL;
7097   if (!color) {
7098     PetscCall(PetscFree(xadj));
7099     PetscCall(PetscFree(adjncy));
7100     PetscCall(PetscFree(adjncy_wgt));
7101   } else {
7102     Mat             subdomain_adj;
7103     IS              new_ranks, new_ranks_contig;
7104     MatPartitioning partitioner;
7105     PetscInt        rstart = 0, rend = 0;
7106     PetscInt       *is_indices, *oldranks;
7107     PetscMPIInt     size;
7108     PetscBool       aggregate;
7109 
7110     PetscCallMPI(MPI_Comm_size(subcomm, &size));
7111     if (void_procs) {
7112       PetscInt prank = rank;
7113       PetscCall(PetscMalloc1(size, &oldranks));
7114       PetscCallMPI(MPI_Allgather(&prank, 1, MPIU_INT, oldranks, 1, MPIU_INT, subcomm));
7115       for (i = 0; i < xadj[1]; i++) PetscCall(PetscFindInt(adjncy[i], size, oldranks, &adjncy[i]));
7116       PetscCall(PetscSortIntWithArray(xadj[1], adjncy, adjncy_wgt));
7117     } else {
7118       oldranks = NULL;
7119     }
7120     aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE);
7121     if (aggregate) { /* TODO: all this part could be made more efficient */
7122       PetscInt     lrows, row, ncols, *cols;
7123       PetscMPIInt  nrank;
7124       PetscScalar *vals;
7125 
7126       PetscCallMPI(MPI_Comm_rank(subcomm, &nrank));
7127       lrows = 0;
7128       if (nrank < redprocs) {
7129         lrows = size / redprocs;
7130         if (nrank < size % redprocs) lrows++;
7131       }
7132       PetscCall(MatCreateAIJ(subcomm, lrows, lrows, size, size, 50, NULL, 50, NULL, &subdomain_adj));
7133       PetscCall(MatGetOwnershipRange(subdomain_adj, &rstart, &rend));
7134       PetscCall(MatSetOption(subdomain_adj, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE));
7135       PetscCall(MatSetOption(subdomain_adj, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE));
7136       row   = nrank;
7137       ncols = xadj[1] - xadj[0];
7138       cols  = adjncy;
7139       PetscCall(PetscMalloc1(ncols, &vals));
7140       for (i = 0; i < ncols; i++) vals[i] = adjncy_wgt[i];
7141       PetscCall(MatSetValues(subdomain_adj, 1, &row, ncols, cols, vals, INSERT_VALUES));
7142       PetscCall(MatAssemblyBegin(subdomain_adj, MAT_FINAL_ASSEMBLY));
7143       PetscCall(MatAssemblyEnd(subdomain_adj, MAT_FINAL_ASSEMBLY));
7144       PetscCall(PetscFree(xadj));
7145       PetscCall(PetscFree(adjncy));
7146       PetscCall(PetscFree(adjncy_wgt));
7147       PetscCall(PetscFree(vals));
7148       if (use_vwgt) {
7149         Vec                v;
7150         const PetscScalar *array;
7151         PetscInt           nl;
7152 
7153         PetscCall(MatCreateVecs(subdomain_adj, &v, NULL));
7154         PetscCall(VecSetValue(v, row, (PetscScalar)n, INSERT_VALUES));
7155         PetscCall(VecAssemblyBegin(v));
7156         PetscCall(VecAssemblyEnd(v));
7157         PetscCall(VecGetLocalSize(v, &nl));
7158         PetscCall(VecGetArrayRead(v, &array));
7159         PetscCall(PetscMalloc1(nl, &v_wgt));
7160         for (i = 0; i < nl; i++) v_wgt[i] = (PetscInt)PetscRealPart(array[i]);
7161         PetscCall(VecRestoreArrayRead(v, &array));
7162         PetscCall(VecDestroy(&v));
7163       }
7164     } else {
7165       PetscCall(MatCreateMPIAdj(subcomm, 1, (PetscInt)size, xadj, adjncy, adjncy_wgt, &subdomain_adj));
7166       if (use_vwgt) {
7167         PetscCall(PetscMalloc1(1, &v_wgt));
7168         v_wgt[0] = n;
7169       }
7170     }
7171     /* PetscCall(MatView(subdomain_adj,0)); */
7172 
7173     /* Partition */
7174     PetscCall(MatPartitioningCreate(subcomm, &partitioner));
7175 #if defined(PETSC_HAVE_PTSCOTCH)
7176     PetscCall(MatPartitioningSetType(partitioner, MATPARTITIONINGPTSCOTCH));
7177 #elif defined(PETSC_HAVE_PARMETIS)
7178     PetscCall(MatPartitioningSetType(partitioner, MATPARTITIONINGPARMETIS));
7179 #else
7180     PetscCall(MatPartitioningSetType(partitioner, MATPARTITIONINGAVERAGE));
7181 #endif
7182     PetscCall(MatPartitioningSetAdjacency(partitioner, subdomain_adj));
7183     if (v_wgt) PetscCall(MatPartitioningSetVertexWeights(partitioner, v_wgt));
7184     *n_subdomains = PetscMin((PetscInt)size, *n_subdomains);
7185     PetscCall(MatPartitioningSetNParts(partitioner, *n_subdomains));
7186     PetscCall(MatPartitioningSetFromOptions(partitioner));
7187     PetscCall(MatPartitioningApply(partitioner, &new_ranks));
7188     /* PetscCall(MatPartitioningView(partitioner,0)); */
7189 
7190     /* renumber new_ranks to avoid "holes" in new set of processors */
7191     PetscCall(ISRenumber(new_ranks, NULL, NULL, &new_ranks_contig));
7192     PetscCall(ISDestroy(&new_ranks));
7193     PetscCall(ISGetIndices(new_ranks_contig, (const PetscInt **)&is_indices));
7194     if (!aggregate) {
7195       if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */
7196         PetscAssert(oldranks, PETSC_COMM_SELF, PETSC_ERR_PLIB, "This should not happen");
7197         ranks_send_to_idx[0] = procs_candidates[oldranks[is_indices[0]]];
7198       } else if (oldranks) {
7199         ranks_send_to_idx[0] = oldranks[is_indices[0]];
7200       } else {
7201         ranks_send_to_idx[0] = is_indices[0];
7202       }
7203     } else {
7204       PetscInt     idx = 0;
7205       PetscMPIInt  tag;
7206       MPI_Request *reqs;
7207 
7208       PetscCall(PetscObjectGetNewTag((PetscObject)subdomain_adj, &tag));
7209       PetscCall(PetscMalloc1(rend - rstart, &reqs));
7210       for (i = rstart; i < rend; i++) PetscCallMPI(MPI_Isend(is_indices + i - rstart, 1, MPIU_INT, i, tag, subcomm, &reqs[i - rstart]));
7211       PetscCallMPI(MPI_Recv(&idx, 1, MPIU_INT, MPI_ANY_SOURCE, tag, subcomm, MPI_STATUS_IGNORE));
7212       PetscCallMPI(MPI_Waitall(rend - rstart, reqs, MPI_STATUSES_IGNORE));
7213       PetscCall(PetscFree(reqs));
7214       if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */
7215         PetscAssert(oldranks, PETSC_COMM_SELF, PETSC_ERR_PLIB, "This should not happen");
7216         ranks_send_to_idx[0] = procs_candidates[oldranks[idx]];
7217       } else if (oldranks) {
7218         ranks_send_to_idx[0] = oldranks[idx];
7219       } else {
7220         ranks_send_to_idx[0] = idx;
7221       }
7222     }
7223     PetscCall(ISRestoreIndices(new_ranks_contig, (const PetscInt **)&is_indices));
7224     /* clean up */
7225     PetscCall(PetscFree(oldranks));
7226     PetscCall(ISDestroy(&new_ranks_contig));
7227     PetscCall(MatDestroy(&subdomain_adj));
7228     PetscCall(MatPartitioningDestroy(&partitioner));
7229   }
7230   PetscCall(PetscSubcommDestroy(&psubcomm));
7231   PetscCall(PetscFree(procs_candidates));
7232 
7233   /* assemble parallel IS for sends */
7234   i = 1;
7235   if (!color) i = 0;
7236   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)mat), i, ranks_send_to_idx, PETSC_OWN_POINTER, is_sends));
7237   PetscFunctionReturn(0);
7238 }
7239 
7240 typedef enum {
7241   MATDENSE_PRIVATE = 0,
7242   MATAIJ_PRIVATE,
7243   MATBAIJ_PRIVATE,
7244   MATSBAIJ_PRIVATE
7245 } MatTypePrivate;
7246 
7247 PetscErrorCode PCBDDCMatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, PetscBool reuse, Mat *mat_n, PetscInt nis, IS isarray[], PetscInt nvecs, Vec nnsp_vec[])
7248 {
7249   Mat                    local_mat;
7250   IS                     is_sends_internal;
7251   PetscInt               rows, cols, new_local_rows;
7252   PetscInt               i, bs, buf_size_idxs, buf_size_idxs_is, buf_size_vals, buf_size_vecs;
7253   PetscBool              ismatis, isdense, newisdense, destroy_mat;
7254   ISLocalToGlobalMapping l2gmap;
7255   PetscInt              *l2gmap_indices;
7256   const PetscInt        *is_indices;
7257   MatType                new_local_type;
7258   /* buffers */
7259   PetscInt          *ptr_idxs, *send_buffer_idxs, *recv_buffer_idxs;
7260   PetscInt          *ptr_idxs_is, *send_buffer_idxs_is, *recv_buffer_idxs_is;
7261   PetscInt          *recv_buffer_idxs_local;
7262   PetscScalar       *ptr_vals, *recv_buffer_vals;
7263   const PetscScalar *send_buffer_vals;
7264   PetscScalar       *ptr_vecs, *send_buffer_vecs, *recv_buffer_vecs;
7265   /* MPI */
7266   MPI_Comm     comm, comm_n;
7267   PetscSubcomm subcomm;
7268   PetscMPIInt  n_sends, n_recvs, size;
7269   PetscMPIInt *iflags, *ilengths_idxs, *ilengths_vals, *ilengths_idxs_is;
7270   PetscMPIInt *onodes, *onodes_is, *olengths_idxs, *olengths_idxs_is, *olengths_vals;
7271   PetscMPIInt  len, tag_idxs, tag_idxs_is, tag_vals, tag_vecs, source_dest;
7272   MPI_Request *send_req_idxs, *send_req_idxs_is, *send_req_vals, *send_req_vecs;
7273   MPI_Request *recv_req_idxs, *recv_req_idxs_is, *recv_req_vals, *recv_req_vecs;
7274 
7275   PetscFunctionBegin;
7276   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
7277   PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATIS, &ismatis));
7278   PetscCheck(ismatis, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Cannot use %s on a matrix object which is not of type MATIS", PETSC_FUNCTION_NAME);
7279   PetscValidLogicalCollectiveInt(mat, n_subdomains, 3);
7280   PetscValidLogicalCollectiveBool(mat, restrict_comm, 4);
7281   PetscValidLogicalCollectiveBool(mat, restrict_full, 5);
7282   PetscValidLogicalCollectiveBool(mat, reuse, 6);
7283   PetscValidLogicalCollectiveInt(mat, nis, 8);
7284   PetscValidLogicalCollectiveInt(mat, nvecs, 10);
7285   if (nvecs) {
7286     PetscCheck(nvecs <= 1, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Just 1 vector supported");
7287     PetscValidHeaderSpecific(nnsp_vec[0], VEC_CLASSID, 11);
7288   }
7289   /* further checks */
7290   PetscCall(MatISGetLocalMat(mat, &local_mat));
7291   PetscCall(PetscObjectTypeCompare((PetscObject)local_mat, MATSEQDENSE, &isdense));
7292   PetscCheck(isdense, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE");
7293   PetscCall(MatGetSize(local_mat, &rows, &cols));
7294   PetscCheck(rows == cols, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Local MATIS matrices should be square");
7295   if (reuse && *mat_n) {
7296     PetscInt mrows, mcols, mnrows, mncols;
7297     PetscValidHeaderSpecific(*mat_n, MAT_CLASSID, 7);
7298     PetscCall(PetscObjectTypeCompare((PetscObject)*mat_n, MATIS, &ismatis));
7299     PetscCheck(ismatis, PetscObjectComm((PetscObject)*mat_n), PETSC_ERR_SUP, "Cannot reuse a matrix which is not of type MATIS");
7300     PetscCall(MatGetSize(mat, &mrows, &mcols));
7301     PetscCall(MatGetSize(*mat_n, &mnrows, &mncols));
7302     PetscCheck(mrows == mnrows, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Cannot reuse matrix! Wrong number of rows %" PetscInt_FMT " != %" PetscInt_FMT, mrows, mnrows);
7303     PetscCheck(mcols == mncols, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Cannot reuse matrix! Wrong number of cols %" PetscInt_FMT " != %" PetscInt_FMT, mcols, mncols);
7304   }
7305   PetscCall(MatGetBlockSize(local_mat, &bs));
7306   PetscValidLogicalCollectiveInt(mat, bs, 1);
7307 
7308   /* prepare IS for sending if not provided */
7309   if (!is_sends) {
7310     PetscCheck(n_subdomains, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "You should specify either an IS or a target number of subdomains");
7311     PetscCall(PCBDDCMatISGetSubassemblingPattern(mat, &n_subdomains, 0, &is_sends_internal, NULL));
7312   } else {
7313     PetscCall(PetscObjectReference((PetscObject)is_sends));
7314     is_sends_internal = is_sends;
7315   }
7316 
7317   /* get comm */
7318   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
7319 
7320   /* compute number of sends */
7321   PetscCall(ISGetLocalSize(is_sends_internal, &i));
7322   PetscCall(PetscMPIIntCast(i, &n_sends));
7323 
7324   /* compute number of receives */
7325   PetscCallMPI(MPI_Comm_size(comm, &size));
7326   PetscCall(PetscMalloc1(size, &iflags));
7327   PetscCall(PetscArrayzero(iflags, size));
7328   PetscCall(ISGetIndices(is_sends_internal, &is_indices));
7329   for (i = 0; i < n_sends; i++) iflags[is_indices[i]] = 1;
7330   PetscCall(PetscGatherNumberOfMessages(comm, iflags, NULL, &n_recvs));
7331   PetscCall(PetscFree(iflags));
7332 
7333   /* restrict comm if requested */
7334   subcomm     = NULL;
7335   destroy_mat = PETSC_FALSE;
7336   if (restrict_comm) {
7337     PetscMPIInt color, subcommsize;
7338 
7339     color = 0;
7340     if (restrict_full) {
7341       if (!n_recvs) color = 1; /* processes not receiving anything will not participate in new comm (full restriction) */
7342     } else {
7343       if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not participate in new comm */
7344     }
7345     PetscCall(MPIU_Allreduce(&color, &subcommsize, 1, MPI_INT, MPI_SUM, comm));
7346     subcommsize = size - subcommsize;
7347     /* check if reuse has been requested */
7348     if (reuse) {
7349       if (*mat_n) {
7350         PetscMPIInt subcommsize2;
7351         PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n), &subcommsize2));
7352         PetscCheck(subcommsize == subcommsize2, PetscObjectComm((PetscObject)*mat_n), PETSC_ERR_PLIB, "Cannot reuse matrix! wrong subcomm size %d != %d", subcommsize, subcommsize2);
7353         comm_n = PetscObjectComm((PetscObject)*mat_n);
7354       } else {
7355         comm_n = PETSC_COMM_SELF;
7356       }
7357     } else { /* MAT_INITIAL_MATRIX */
7358       PetscMPIInt rank;
7359 
7360       PetscCallMPI(MPI_Comm_rank(comm, &rank));
7361       PetscCall(PetscSubcommCreate(comm, &subcomm));
7362       PetscCall(PetscSubcommSetNumber(subcomm, 2));
7363       PetscCall(PetscSubcommSetTypeGeneral(subcomm, color, rank));
7364       comm_n = PetscSubcommChild(subcomm);
7365     }
7366     /* flag to destroy *mat_n if not significative */
7367     if (color) destroy_mat = PETSC_TRUE;
7368   } else {
7369     comm_n = comm;
7370   }
7371 
7372   /* prepare send/receive buffers */
7373   PetscCall(PetscMalloc1(size, &ilengths_idxs));
7374   PetscCall(PetscArrayzero(ilengths_idxs, size));
7375   PetscCall(PetscMalloc1(size, &ilengths_vals));
7376   PetscCall(PetscArrayzero(ilengths_vals, size));
7377   if (nis) PetscCall(PetscCalloc1(size, &ilengths_idxs_is));
7378 
7379   /* Get data from local matrices */
7380   PetscCheck(isdense, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Subassembling of AIJ local matrices not yet implemented");
7381   /* TODO: See below some guidelines on how to prepare the local buffers */
7382   /*
7383        send_buffer_vals should contain the raw values of the local matrix
7384        send_buffer_idxs should contain:
7385        - MatType_PRIVATE type
7386        - PetscInt        size_of_l2gmap
7387        - PetscInt        global_row_indices[size_of_l2gmap]
7388        - PetscInt        all_other_info_which_is_needed_to_compute_preallocation_and_set_values
7389     */
7390   {
7391     ISLocalToGlobalMapping mapping;
7392 
7393     PetscCall(MatISGetLocalToGlobalMapping(mat, &mapping, NULL));
7394     PetscCall(MatDenseGetArrayRead(local_mat, &send_buffer_vals));
7395     PetscCall(ISLocalToGlobalMappingGetSize(mapping, &i));
7396     PetscCall(PetscMalloc1(i + 2, &send_buffer_idxs));
7397     send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE;
7398     send_buffer_idxs[1] = i;
7399     PetscCall(ISLocalToGlobalMappingGetIndices(mapping, (const PetscInt **)&ptr_idxs));
7400     PetscCall(PetscArraycpy(&send_buffer_idxs[2], ptr_idxs, i));
7401     PetscCall(ISLocalToGlobalMappingRestoreIndices(mapping, (const PetscInt **)&ptr_idxs));
7402     PetscCall(PetscMPIIntCast(i, &len));
7403     for (i = 0; i < n_sends; i++) {
7404       ilengths_vals[is_indices[i]] = len * len;
7405       ilengths_idxs[is_indices[i]] = len + 2;
7406     }
7407   }
7408   PetscCall(PetscGatherMessageLengths2(comm, n_sends, n_recvs, ilengths_idxs, ilengths_vals, &onodes, &olengths_idxs, &olengths_vals));
7409   /* additional is (if any) */
7410   if (nis) {
7411     PetscMPIInt psum;
7412     PetscInt    j;
7413     for (j = 0, psum = 0; j < nis; j++) {
7414       PetscInt plen;
7415       PetscCall(ISGetLocalSize(isarray[j], &plen));
7416       PetscCall(PetscMPIIntCast(plen, &len));
7417       psum += len + 1; /* indices + length */
7418     }
7419     PetscCall(PetscMalloc1(psum, &send_buffer_idxs_is));
7420     for (j = 0, psum = 0; j < nis; j++) {
7421       PetscInt        plen;
7422       const PetscInt *is_array_idxs;
7423       PetscCall(ISGetLocalSize(isarray[j], &plen));
7424       send_buffer_idxs_is[psum] = plen;
7425       PetscCall(ISGetIndices(isarray[j], &is_array_idxs));
7426       PetscCall(PetscArraycpy(&send_buffer_idxs_is[psum + 1], is_array_idxs, plen));
7427       PetscCall(ISRestoreIndices(isarray[j], &is_array_idxs));
7428       psum += plen + 1; /* indices + length */
7429     }
7430     for (i = 0; i < n_sends; i++) ilengths_idxs_is[is_indices[i]] = psum;
7431     PetscCall(PetscGatherMessageLengths(comm, n_sends, n_recvs, ilengths_idxs_is, &onodes_is, &olengths_idxs_is));
7432   }
7433   PetscCall(MatISRestoreLocalMat(mat, &local_mat));
7434 
7435   buf_size_idxs    = 0;
7436   buf_size_vals    = 0;
7437   buf_size_idxs_is = 0;
7438   buf_size_vecs    = 0;
7439   for (i = 0; i < n_recvs; i++) {
7440     buf_size_idxs += (PetscInt)olengths_idxs[i];
7441     buf_size_vals += (PetscInt)olengths_vals[i];
7442     if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i];
7443     if (nvecs) buf_size_vecs += (PetscInt)olengths_idxs[i];
7444   }
7445   PetscCall(PetscMalloc1(buf_size_idxs, &recv_buffer_idxs));
7446   PetscCall(PetscMalloc1(buf_size_vals, &recv_buffer_vals));
7447   PetscCall(PetscMalloc1(buf_size_idxs_is, &recv_buffer_idxs_is));
7448   PetscCall(PetscMalloc1(buf_size_vecs, &recv_buffer_vecs));
7449 
7450   /* get new tags for clean communications */
7451   PetscCall(PetscObjectGetNewTag((PetscObject)mat, &tag_idxs));
7452   PetscCall(PetscObjectGetNewTag((PetscObject)mat, &tag_vals));
7453   PetscCall(PetscObjectGetNewTag((PetscObject)mat, &tag_idxs_is));
7454   PetscCall(PetscObjectGetNewTag((PetscObject)mat, &tag_vecs));
7455 
7456   /* allocate for requests */
7457   PetscCall(PetscMalloc1(n_sends, &send_req_idxs));
7458   PetscCall(PetscMalloc1(n_sends, &send_req_vals));
7459   PetscCall(PetscMalloc1(n_sends, &send_req_idxs_is));
7460   PetscCall(PetscMalloc1(n_sends, &send_req_vecs));
7461   PetscCall(PetscMalloc1(n_recvs, &recv_req_idxs));
7462   PetscCall(PetscMalloc1(n_recvs, &recv_req_vals));
7463   PetscCall(PetscMalloc1(n_recvs, &recv_req_idxs_is));
7464   PetscCall(PetscMalloc1(n_recvs, &recv_req_vecs));
7465 
7466   /* communications */
7467   ptr_idxs    = recv_buffer_idxs;
7468   ptr_vals    = recv_buffer_vals;
7469   ptr_idxs_is = recv_buffer_idxs_is;
7470   ptr_vecs    = recv_buffer_vecs;
7471   for (i = 0; i < n_recvs; i++) {
7472     source_dest = onodes[i];
7473     PetscCallMPI(MPI_Irecv(ptr_idxs, olengths_idxs[i], MPIU_INT, source_dest, tag_idxs, comm, &recv_req_idxs[i]));
7474     PetscCallMPI(MPI_Irecv(ptr_vals, olengths_vals[i], MPIU_SCALAR, source_dest, tag_vals, comm, &recv_req_vals[i]));
7475     ptr_idxs += olengths_idxs[i];
7476     ptr_vals += olengths_vals[i];
7477     if (nis) {
7478       source_dest = onodes_is[i];
7479       PetscCallMPI(MPI_Irecv(ptr_idxs_is, olengths_idxs_is[i], MPIU_INT, source_dest, tag_idxs_is, comm, &recv_req_idxs_is[i]));
7480       ptr_idxs_is += olengths_idxs_is[i];
7481     }
7482     if (nvecs) {
7483       source_dest = onodes[i];
7484       PetscCallMPI(MPI_Irecv(ptr_vecs, olengths_idxs[i] - 2, MPIU_SCALAR, source_dest, tag_vecs, comm, &recv_req_vecs[i]));
7485       ptr_vecs += olengths_idxs[i] - 2;
7486     }
7487   }
7488   for (i = 0; i < n_sends; i++) {
7489     PetscCall(PetscMPIIntCast(is_indices[i], &source_dest));
7490     PetscCallMPI(MPI_Isend(send_buffer_idxs, ilengths_idxs[source_dest], MPIU_INT, source_dest, tag_idxs, comm, &send_req_idxs[i]));
7491     PetscCallMPI(MPI_Isend((PetscScalar *)send_buffer_vals, ilengths_vals[source_dest], MPIU_SCALAR, source_dest, tag_vals, comm, &send_req_vals[i]));
7492     if (nis) PetscCallMPI(MPI_Isend(send_buffer_idxs_is, ilengths_idxs_is[source_dest], MPIU_INT, source_dest, tag_idxs_is, comm, &send_req_idxs_is[i]));
7493     if (nvecs) {
7494       PetscCall(VecGetArray(nnsp_vec[0], &send_buffer_vecs));
7495       PetscCallMPI(MPI_Isend(send_buffer_vecs, ilengths_idxs[source_dest] - 2, MPIU_SCALAR, source_dest, tag_vecs, comm, &send_req_vecs[i]));
7496     }
7497   }
7498   PetscCall(ISRestoreIndices(is_sends_internal, &is_indices));
7499   PetscCall(ISDestroy(&is_sends_internal));
7500 
7501   /* assemble new l2g map */
7502   PetscCallMPI(MPI_Waitall(n_recvs, recv_req_idxs, MPI_STATUSES_IGNORE));
7503   ptr_idxs       = recv_buffer_idxs;
7504   new_local_rows = 0;
7505   for (i = 0; i < n_recvs; i++) {
7506     new_local_rows += *(ptr_idxs + 1); /* second element is the local size of the l2gmap */
7507     ptr_idxs += olengths_idxs[i];
7508   }
7509   PetscCall(PetscMalloc1(new_local_rows, &l2gmap_indices));
7510   ptr_idxs       = recv_buffer_idxs;
7511   new_local_rows = 0;
7512   for (i = 0; i < n_recvs; i++) {
7513     PetscCall(PetscArraycpy(&l2gmap_indices[new_local_rows], ptr_idxs + 2, *(ptr_idxs + 1)));
7514     new_local_rows += *(ptr_idxs + 1); /* second element is the local size of the l2gmap */
7515     ptr_idxs += olengths_idxs[i];
7516   }
7517   PetscCall(PetscSortRemoveDupsInt(&new_local_rows, l2gmap_indices));
7518   PetscCall(ISLocalToGlobalMappingCreate(comm_n, 1, new_local_rows, l2gmap_indices, PETSC_COPY_VALUES, &l2gmap));
7519   PetscCall(PetscFree(l2gmap_indices));
7520 
7521   /* infer new local matrix type from received local matrices type */
7522   /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */
7523   /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */
7524   if (n_recvs) {
7525     MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0];
7526     ptr_idxs                              = recv_buffer_idxs;
7527     for (i = 0; i < n_recvs; i++) {
7528       if ((PetscInt)new_local_type_private != *ptr_idxs) {
7529         new_local_type_private = MATAIJ_PRIVATE;
7530         break;
7531       }
7532       ptr_idxs += olengths_idxs[i];
7533     }
7534     switch (new_local_type_private) {
7535     case MATDENSE_PRIVATE:
7536       new_local_type = MATSEQAIJ;
7537       bs             = 1;
7538       break;
7539     case MATAIJ_PRIVATE:
7540       new_local_type = MATSEQAIJ;
7541       bs             = 1;
7542       break;
7543     case MATBAIJ_PRIVATE:
7544       new_local_type = MATSEQBAIJ;
7545       break;
7546     case MATSBAIJ_PRIVATE:
7547       new_local_type = MATSEQSBAIJ;
7548       break;
7549     default:
7550       SETERRQ(comm, PETSC_ERR_SUP, "Unsupported private type %d in %s", new_local_type_private, PETSC_FUNCTION_NAME);
7551     }
7552   } else { /* by default, new_local_type is seqaij */
7553     new_local_type = MATSEQAIJ;
7554     bs             = 1;
7555   }
7556 
7557   /* create MATIS object if needed */
7558   if (!reuse) {
7559     PetscCall(MatGetSize(mat, &rows, &cols));
7560     PetscCall(MatCreateIS(comm_n, bs, PETSC_DECIDE, PETSC_DECIDE, rows, cols, l2gmap, l2gmap, mat_n));
7561   } else {
7562     /* it also destroys the local matrices */
7563     if (*mat_n) {
7564       PetscCall(MatSetLocalToGlobalMapping(*mat_n, l2gmap, l2gmap));
7565     } else { /* this is a fake object */
7566       PetscCall(MatCreateIS(comm_n, bs, PETSC_DECIDE, PETSC_DECIDE, rows, cols, l2gmap, l2gmap, mat_n));
7567     }
7568   }
7569   PetscCall(MatISGetLocalMat(*mat_n, &local_mat));
7570   PetscCall(MatSetType(local_mat, new_local_type));
7571 
7572   PetscCallMPI(MPI_Waitall(n_recvs, recv_req_vals, MPI_STATUSES_IGNORE));
7573 
7574   /* Global to local map of received indices */
7575   PetscCall(PetscMalloc1(buf_size_idxs, &recv_buffer_idxs_local)); /* needed for values insertion */
7576   PetscCall(ISGlobalToLocalMappingApply(l2gmap, IS_GTOLM_MASK, buf_size_idxs, recv_buffer_idxs, &i, recv_buffer_idxs_local));
7577   PetscCall(ISLocalToGlobalMappingDestroy(&l2gmap));
7578 
7579   /* restore attributes -> type of incoming data and its size */
7580   buf_size_idxs = 0;
7581   for (i = 0; i < n_recvs; i++) {
7582     recv_buffer_idxs_local[buf_size_idxs]     = recv_buffer_idxs[buf_size_idxs];
7583     recv_buffer_idxs_local[buf_size_idxs + 1] = recv_buffer_idxs[buf_size_idxs + 1];
7584     buf_size_idxs += (PetscInt)olengths_idxs[i];
7585   }
7586   PetscCall(PetscFree(recv_buffer_idxs));
7587 
7588   /* set preallocation */
7589   PetscCall(PetscObjectTypeCompare((PetscObject)local_mat, MATSEQDENSE, &newisdense));
7590   if (!newisdense) {
7591     PetscInt *new_local_nnz = NULL;
7592 
7593     ptr_idxs = recv_buffer_idxs_local;
7594     if (n_recvs) PetscCall(PetscCalloc1(new_local_rows, &new_local_nnz));
7595     for (i = 0; i < n_recvs; i++) {
7596       PetscInt j;
7597       if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */
7598         for (j = 0; j < *(ptr_idxs + 1); j++) new_local_nnz[*(ptr_idxs + 2 + j)] += *(ptr_idxs + 1);
7599       } else {
7600         /* TODO */
7601       }
7602       ptr_idxs += olengths_idxs[i];
7603     }
7604     if (new_local_nnz) {
7605       for (i = 0; i < new_local_rows; i++) new_local_nnz[i] = PetscMin(new_local_nnz[i], new_local_rows);
7606       PetscCall(MatSeqAIJSetPreallocation(local_mat, 0, new_local_nnz));
7607       for (i = 0; i < new_local_rows; i++) new_local_nnz[i] /= bs;
7608       PetscCall(MatSeqBAIJSetPreallocation(local_mat, bs, 0, new_local_nnz));
7609       for (i = 0; i < new_local_rows; i++) new_local_nnz[i] = PetscMax(new_local_nnz[i] - i, 0);
7610       PetscCall(MatSeqSBAIJSetPreallocation(local_mat, bs, 0, new_local_nnz));
7611     } else {
7612       PetscCall(MatSetUp(local_mat));
7613     }
7614     PetscCall(PetscFree(new_local_nnz));
7615   } else {
7616     PetscCall(MatSetUp(local_mat));
7617   }
7618 
7619   /* set values */
7620   ptr_vals = recv_buffer_vals;
7621   ptr_idxs = recv_buffer_idxs_local;
7622   for (i = 0; i < n_recvs; i++) {
7623     if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */
7624       PetscCall(MatSetOption(local_mat, MAT_ROW_ORIENTED, PETSC_FALSE));
7625       PetscCall(MatSetValues(local_mat, *(ptr_idxs + 1), ptr_idxs + 2, *(ptr_idxs + 1), ptr_idxs + 2, ptr_vals, ADD_VALUES));
7626       PetscCall(MatAssemblyBegin(local_mat, MAT_FLUSH_ASSEMBLY));
7627       PetscCall(MatAssemblyEnd(local_mat, MAT_FLUSH_ASSEMBLY));
7628       PetscCall(MatSetOption(local_mat, MAT_ROW_ORIENTED, PETSC_TRUE));
7629     } else {
7630       /* TODO */
7631     }
7632     ptr_idxs += olengths_idxs[i];
7633     ptr_vals += olengths_vals[i];
7634   }
7635   PetscCall(MatAssemblyBegin(local_mat, MAT_FINAL_ASSEMBLY));
7636   PetscCall(MatAssemblyEnd(local_mat, MAT_FINAL_ASSEMBLY));
7637   PetscCall(MatISRestoreLocalMat(*mat_n, &local_mat));
7638   PetscCall(MatAssemblyBegin(*mat_n, MAT_FINAL_ASSEMBLY));
7639   PetscCall(MatAssemblyEnd(*mat_n, MAT_FINAL_ASSEMBLY));
7640   PetscCall(PetscFree(recv_buffer_vals));
7641 
7642 #if 0
7643   if (!restrict_comm) { /* check */
7644     Vec       lvec,rvec;
7645     PetscReal infty_error;
7646 
7647     PetscCall(MatCreateVecs(mat,&rvec,&lvec));
7648     PetscCall(VecSetRandom(rvec,NULL));
7649     PetscCall(MatMult(mat,rvec,lvec));
7650     PetscCall(VecScale(lvec,-1.0));
7651     PetscCall(MatMultAdd(*mat_n,rvec,lvec,lvec));
7652     PetscCall(VecNorm(lvec,NORM_INFINITY,&infty_error));
7653     PetscCall(PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error));
7654     PetscCall(VecDestroy(&rvec));
7655     PetscCall(VecDestroy(&lvec));
7656   }
7657 #endif
7658 
7659   /* assemble new additional is (if any) */
7660   if (nis) {
7661     PetscInt **temp_idxs, *count_is, j, psum;
7662 
7663     PetscCallMPI(MPI_Waitall(n_recvs, recv_req_idxs_is, MPI_STATUSES_IGNORE));
7664     PetscCall(PetscCalloc1(nis, &count_is));
7665     ptr_idxs = recv_buffer_idxs_is;
7666     psum     = 0;
7667     for (i = 0; i < n_recvs; i++) {
7668       for (j = 0; j < nis; j++) {
7669         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
7670         count_is[j] += plen;         /* increment counting of buffer for j-th IS */
7671         psum += plen;
7672         ptr_idxs += plen + 1; /* shift pointer to received data */
7673       }
7674     }
7675     PetscCall(PetscMalloc1(nis, &temp_idxs));
7676     PetscCall(PetscMalloc1(psum, &temp_idxs[0]));
7677     for (i = 1; i < nis; i++) temp_idxs[i] = temp_idxs[i - 1] + count_is[i - 1];
7678     PetscCall(PetscArrayzero(count_is, nis));
7679     ptr_idxs = recv_buffer_idxs_is;
7680     for (i = 0; i < n_recvs; i++) {
7681       for (j = 0; j < nis; j++) {
7682         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
7683         PetscCall(PetscArraycpy(&temp_idxs[j][count_is[j]], ptr_idxs + 1, plen));
7684         count_is[j] += plen;  /* increment starting point of buffer for j-th IS */
7685         ptr_idxs += plen + 1; /* shift pointer to received data */
7686       }
7687     }
7688     for (i = 0; i < nis; i++) {
7689       PetscCall(ISDestroy(&isarray[i]));
7690       PetscCall(PetscSortRemoveDupsInt(&count_is[i], temp_idxs[i]));
7691       PetscCall(ISCreateGeneral(comm_n, count_is[i], temp_idxs[i], PETSC_COPY_VALUES, &isarray[i]));
7692     }
7693     PetscCall(PetscFree(count_is));
7694     PetscCall(PetscFree(temp_idxs[0]));
7695     PetscCall(PetscFree(temp_idxs));
7696   }
7697   /* free workspace */
7698   PetscCall(PetscFree(recv_buffer_idxs_is));
7699   PetscCallMPI(MPI_Waitall(n_sends, send_req_idxs, MPI_STATUSES_IGNORE));
7700   PetscCall(PetscFree(send_buffer_idxs));
7701   PetscCallMPI(MPI_Waitall(n_sends, send_req_vals, MPI_STATUSES_IGNORE));
7702   if (isdense) {
7703     PetscCall(MatISGetLocalMat(mat, &local_mat));
7704     PetscCall(MatDenseRestoreArrayRead(local_mat, &send_buffer_vals));
7705     PetscCall(MatISRestoreLocalMat(mat, &local_mat));
7706   } else {
7707     /* PetscCall(PetscFree(send_buffer_vals)); */
7708   }
7709   if (nis) {
7710     PetscCallMPI(MPI_Waitall(n_sends, send_req_idxs_is, MPI_STATUSES_IGNORE));
7711     PetscCall(PetscFree(send_buffer_idxs_is));
7712   }
7713 
7714   if (nvecs) {
7715     PetscCallMPI(MPI_Waitall(n_recvs, recv_req_vecs, MPI_STATUSES_IGNORE));
7716     PetscCallMPI(MPI_Waitall(n_sends, send_req_vecs, MPI_STATUSES_IGNORE));
7717     PetscCall(VecRestoreArray(nnsp_vec[0], &send_buffer_vecs));
7718     PetscCall(VecDestroy(&nnsp_vec[0]));
7719     PetscCall(VecCreate(comm_n, &nnsp_vec[0]));
7720     PetscCall(VecSetSizes(nnsp_vec[0], new_local_rows, PETSC_DECIDE));
7721     PetscCall(VecSetType(nnsp_vec[0], VECSTANDARD));
7722     /* set values */
7723     ptr_vals = recv_buffer_vecs;
7724     ptr_idxs = recv_buffer_idxs_local;
7725     PetscCall(VecGetArray(nnsp_vec[0], &send_buffer_vecs));
7726     for (i = 0; i < n_recvs; i++) {
7727       PetscInt j;
7728       for (j = 0; j < *(ptr_idxs + 1); j++) send_buffer_vecs[*(ptr_idxs + 2 + j)] += *(ptr_vals + j);
7729       ptr_idxs += olengths_idxs[i];
7730       ptr_vals += olengths_idxs[i] - 2;
7731     }
7732     PetscCall(VecRestoreArray(nnsp_vec[0], &send_buffer_vecs));
7733     PetscCall(VecAssemblyBegin(nnsp_vec[0]));
7734     PetscCall(VecAssemblyEnd(nnsp_vec[0]));
7735   }
7736 
7737   PetscCall(PetscFree(recv_buffer_vecs));
7738   PetscCall(PetscFree(recv_buffer_idxs_local));
7739   PetscCall(PetscFree(recv_req_idxs));
7740   PetscCall(PetscFree(recv_req_vals));
7741   PetscCall(PetscFree(recv_req_vecs));
7742   PetscCall(PetscFree(recv_req_idxs_is));
7743   PetscCall(PetscFree(send_req_idxs));
7744   PetscCall(PetscFree(send_req_vals));
7745   PetscCall(PetscFree(send_req_vecs));
7746   PetscCall(PetscFree(send_req_idxs_is));
7747   PetscCall(PetscFree(ilengths_vals));
7748   PetscCall(PetscFree(ilengths_idxs));
7749   PetscCall(PetscFree(olengths_vals));
7750   PetscCall(PetscFree(olengths_idxs));
7751   PetscCall(PetscFree(onodes));
7752   if (nis) {
7753     PetscCall(PetscFree(ilengths_idxs_is));
7754     PetscCall(PetscFree(olengths_idxs_is));
7755     PetscCall(PetscFree(onodes_is));
7756   }
7757   PetscCall(PetscSubcommDestroy(&subcomm));
7758   if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not participate */
7759     PetscCall(MatDestroy(mat_n));
7760     for (i = 0; i < nis; i++) PetscCall(ISDestroy(&isarray[i]));
7761     if (nvecs) { /* need to match VecDestroy nnsp_vec called in the other code path */
7762       PetscCall(VecDestroy(&nnsp_vec[0]));
7763     }
7764     *mat_n = NULL;
7765   }
7766   PetscFunctionReturn(0);
7767 }
7768 
7769 /* temporary hack into ksp private data structure */
7770 #include <petsc/private/kspimpl.h>
7771 
7772 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc, PetscScalar *coarse_submat_vals)
7773 {
7774   PC_BDDC               *pcbddc = (PC_BDDC *)pc->data;
7775   PC_IS                 *pcis   = (PC_IS *)pc->data;
7776   Mat                    coarse_mat, coarse_mat_is, coarse_submat_dense;
7777   Mat                    coarsedivudotp = NULL;
7778   Mat                    coarseG, t_coarse_mat_is;
7779   MatNullSpace           CoarseNullSpace = NULL;
7780   ISLocalToGlobalMapping coarse_islg;
7781   IS                     coarse_is, *isarray, corners;
7782   PetscInt               i, im_active = -1, active_procs = -1;
7783   PetscInt               nis, nisdofs, nisneu, nisvert;
7784   PetscInt               coarse_eqs_per_proc;
7785   PC                     pc_temp;
7786   PCType                 coarse_pc_type;
7787   KSPType                coarse_ksp_type;
7788   PetscBool              multilevel_requested, multilevel_allowed;
7789   PetscBool              coarse_reuse;
7790   PetscInt               ncoarse, nedcfield;
7791   PetscBool              compute_vecs = PETSC_FALSE;
7792   PetscScalar           *array;
7793   MatReuse               coarse_mat_reuse;
7794   PetscBool              restr, full_restr, have_void;
7795   PetscMPIInt            size;
7796 
7797   PetscFunctionBegin;
7798   PetscCall(PetscLogEventBegin(PC_BDDC_CoarseSetUp[pcbddc->current_level], pc, 0, 0, 0));
7799   /* Assign global numbering to coarse dofs */
7800   if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */
7801     PetscInt ocoarse_size;
7802     compute_vecs = PETSC_TRUE;
7803 
7804     pcbddc->new_primal_space = PETSC_TRUE;
7805     ocoarse_size             = pcbddc->coarse_size;
7806     PetscCall(PetscFree(pcbddc->global_primal_indices));
7807     PetscCall(PCBDDCComputePrimalNumbering(pc, &pcbddc->coarse_size, &pcbddc->global_primal_indices));
7808     /* see if we can avoid some work */
7809     if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */
7810       /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */
7811       if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) {
7812         PetscCall(KSPReset(pcbddc->coarse_ksp));
7813         coarse_reuse = PETSC_FALSE;
7814       } else { /* we can safely reuse already computed coarse matrix */
7815         coarse_reuse = PETSC_TRUE;
7816       }
7817     } else { /* there's no coarse ksp, so we need to create the coarse matrix too */
7818       coarse_reuse = PETSC_FALSE;
7819     }
7820     /* reset any subassembling information */
7821     if (!coarse_reuse || pcbddc->recompute_topography) PetscCall(ISDestroy(&pcbddc->coarse_subassembling));
7822   } else { /* primal space is unchanged, so we can reuse coarse matrix */
7823     coarse_reuse = PETSC_TRUE;
7824   }
7825   if (coarse_reuse && pcbddc->coarse_ksp) {
7826     PetscCall(KSPGetOperators(pcbddc->coarse_ksp, &coarse_mat, NULL));
7827     PetscCall(PetscObjectReference((PetscObject)coarse_mat));
7828     coarse_mat_reuse = MAT_REUSE_MATRIX;
7829   } else {
7830     coarse_mat       = NULL;
7831     coarse_mat_reuse = MAT_INITIAL_MATRIX;
7832   }
7833 
7834   /* creates temporary l2gmap and IS for coarse indexes */
7835   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), pcbddc->local_primal_size, pcbddc->global_primal_indices, PETSC_COPY_VALUES, &coarse_is));
7836   PetscCall(ISLocalToGlobalMappingCreateIS(coarse_is, &coarse_islg));
7837 
7838   /* creates temporary MATIS object for coarse matrix */
7839   PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, pcbddc->local_primal_size, pcbddc->local_primal_size, coarse_submat_vals, &coarse_submat_dense));
7840   PetscCall(MatCreateIS(PetscObjectComm((PetscObject)pc), 1, PETSC_DECIDE, PETSC_DECIDE, pcbddc->coarse_size, pcbddc->coarse_size, coarse_islg, coarse_islg, &t_coarse_mat_is));
7841   PetscCall(MatISSetLocalMat(t_coarse_mat_is, coarse_submat_dense));
7842   PetscCall(MatAssemblyBegin(t_coarse_mat_is, MAT_FINAL_ASSEMBLY));
7843   PetscCall(MatAssemblyEnd(t_coarse_mat_is, MAT_FINAL_ASSEMBLY));
7844   PetscCall(MatDestroy(&coarse_submat_dense));
7845 
7846   /* count "active" (i.e. with positive local size) and "void" processes */
7847   im_active = !!(pcis->n);
7848   PetscCall(MPIU_Allreduce(&im_active, &active_procs, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)pc)));
7849 
7850   /* determine number of processes partecipating to coarse solver and compute subassembling pattern */
7851   /* restr : whether we want to exclude senders (which are not receivers) from the subassembling pattern */
7852   /* full_restr : just use the receivers from the subassembling pattern */
7853   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)pc), &size));
7854   coarse_mat_is        = NULL;
7855   multilevel_allowed   = PETSC_FALSE;
7856   multilevel_requested = PETSC_FALSE;
7857   coarse_eqs_per_proc  = PetscMin(PetscMax(pcbddc->coarse_size, 1), pcbddc->coarse_eqs_per_proc);
7858   if (coarse_eqs_per_proc < 0) coarse_eqs_per_proc = pcbddc->coarse_size;
7859   if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE;
7860   if (pcbddc->coarse_size <= pcbddc->coarse_eqs_limit) multilevel_requested = PETSC_FALSE;
7861   if (multilevel_requested) {
7862     ncoarse    = active_procs / pcbddc->coarsening_ratio;
7863     restr      = PETSC_FALSE;
7864     full_restr = PETSC_FALSE;
7865   } else {
7866     ncoarse    = pcbddc->coarse_size / coarse_eqs_per_proc + !!(pcbddc->coarse_size % coarse_eqs_per_proc);
7867     restr      = PETSC_TRUE;
7868     full_restr = PETSC_TRUE;
7869   }
7870   if (!pcbddc->coarse_size || size == 1) multilevel_allowed = multilevel_requested = restr = full_restr = PETSC_FALSE;
7871   ncoarse = PetscMax(1, ncoarse);
7872   if (!pcbddc->coarse_subassembling) {
7873     if (pcbddc->coarsening_ratio > 1) {
7874       if (multilevel_requested) {
7875         PetscCall(PCBDDCMatISGetSubassemblingPattern(pc->pmat, &ncoarse, pcbddc->coarse_adj_red, &pcbddc->coarse_subassembling, &have_void));
7876       } else {
7877         PetscCall(PCBDDCMatISGetSubassemblingPattern(t_coarse_mat_is, &ncoarse, pcbddc->coarse_adj_red, &pcbddc->coarse_subassembling, &have_void));
7878       }
7879     } else {
7880       PetscMPIInt rank;
7881 
7882       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)pc), &rank));
7883       have_void = (active_procs == (PetscInt)size) ? PETSC_FALSE : PETSC_TRUE;
7884       PetscCall(ISCreateStride(PetscObjectComm((PetscObject)pc), 1, rank, 1, &pcbddc->coarse_subassembling));
7885     }
7886   } else { /* if a subassembling pattern exists, then we can reuse the coarse ksp and compute the number of process involved */
7887     PetscInt psum;
7888     if (pcbddc->coarse_ksp) psum = 1;
7889     else psum = 0;
7890     PetscCall(MPIU_Allreduce(&psum, &ncoarse, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)pc)));
7891     have_void = ncoarse < size ? PETSC_TRUE : PETSC_FALSE;
7892   }
7893   /* determine if we can go multilevel */
7894   if (multilevel_requested) {
7895     if (ncoarse > 1) multilevel_allowed = PETSC_TRUE; /* found enough processes */
7896     else restr = full_restr = PETSC_TRUE;             /* 1 subdomain, use a direct solver */
7897   }
7898   if (multilevel_allowed && have_void) restr = PETSC_TRUE;
7899 
7900   /* dump subassembling pattern */
7901   if (pcbddc->dbg_flag && multilevel_allowed) PetscCall(ISView(pcbddc->coarse_subassembling, pcbddc->dbg_viewer));
7902   /* compute dofs splitting and neumann boundaries for coarse dofs */
7903   nedcfield = -1;
7904   corners   = NULL;
7905   if (multilevel_allowed && !coarse_reuse && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal || pcbddc->nedclocal || pcbddc->corner_selected)) { /* protects from unneeded computations */
7906     PetscInt              *tidxs, *tidxs2, nout, tsize, i;
7907     const PetscInt        *idxs;
7908     ISLocalToGlobalMapping tmap;
7909 
7910     /* create map between primal indices (in local representative ordering) and local primal numbering */
7911     PetscCall(ISLocalToGlobalMappingCreate(PETSC_COMM_SELF, 1, pcbddc->local_primal_size, pcbddc->primal_indices_local_idxs, PETSC_COPY_VALUES, &tmap));
7912     /* allocate space for temporary storage */
7913     PetscCall(PetscMalloc1(pcbddc->local_primal_size, &tidxs));
7914     PetscCall(PetscMalloc1(pcbddc->local_primal_size, &tidxs2));
7915     /* allocate for IS array */
7916     nisdofs = pcbddc->n_ISForDofsLocal;
7917     if (pcbddc->nedclocal) {
7918       if (pcbddc->nedfield > -1) {
7919         nedcfield = pcbddc->nedfield;
7920       } else {
7921         nedcfield = 0;
7922         PetscCheck(!nisdofs, PetscObjectComm((PetscObject)pc), PETSC_ERR_PLIB, "This should not happen (%" PetscInt_FMT ")", nisdofs);
7923         nisdofs = 1;
7924       }
7925     }
7926     nisneu  = !!pcbddc->NeumannBoundariesLocal;
7927     nisvert = 0; /* nisvert is not used */
7928     nis     = nisdofs + nisneu + nisvert;
7929     PetscCall(PetscMalloc1(nis, &isarray));
7930     /* dofs splitting */
7931     for (i = 0; i < nisdofs; i++) {
7932       /* PetscCall(ISView(pcbddc->ISForDofsLocal[i],0)); */
7933       if (nedcfield != i) {
7934         PetscCall(ISGetLocalSize(pcbddc->ISForDofsLocal[i], &tsize));
7935         PetscCall(ISGetIndices(pcbddc->ISForDofsLocal[i], &idxs));
7936         PetscCall(ISGlobalToLocalMappingApply(tmap, IS_GTOLM_DROP, tsize, idxs, &nout, tidxs));
7937         PetscCall(ISRestoreIndices(pcbddc->ISForDofsLocal[i], &idxs));
7938       } else {
7939         PetscCall(ISGetLocalSize(pcbddc->nedclocal, &tsize));
7940         PetscCall(ISGetIndices(pcbddc->nedclocal, &idxs));
7941         PetscCall(ISGlobalToLocalMappingApply(tmap, IS_GTOLM_DROP, tsize, idxs, &nout, tidxs));
7942         PetscCheck(tsize == nout, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Failed when mapping coarse nedelec field! %" PetscInt_FMT " != %" PetscInt_FMT, tsize, nout);
7943         PetscCall(ISRestoreIndices(pcbddc->nedclocal, &idxs));
7944       }
7945       PetscCall(ISLocalToGlobalMappingApply(coarse_islg, nout, tidxs, tidxs2));
7946       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), nout, tidxs2, PETSC_COPY_VALUES, &isarray[i]));
7947       /* PetscCall(ISView(isarray[i],0)); */
7948     }
7949     /* neumann boundaries */
7950     if (pcbddc->NeumannBoundariesLocal) {
7951       /* PetscCall(ISView(pcbddc->NeumannBoundariesLocal,0)); */
7952       PetscCall(ISGetLocalSize(pcbddc->NeumannBoundariesLocal, &tsize));
7953       PetscCall(ISGetIndices(pcbddc->NeumannBoundariesLocal, &idxs));
7954       PetscCall(ISGlobalToLocalMappingApply(tmap, IS_GTOLM_DROP, tsize, idxs, &nout, tidxs));
7955       PetscCall(ISRestoreIndices(pcbddc->NeumannBoundariesLocal, &idxs));
7956       PetscCall(ISLocalToGlobalMappingApply(coarse_islg, nout, tidxs, tidxs2));
7957       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), nout, tidxs2, PETSC_COPY_VALUES, &isarray[nisdofs]));
7958       /* PetscCall(ISView(isarray[nisdofs],0)); */
7959     }
7960     /* coordinates */
7961     if (pcbddc->corner_selected) {
7962       PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &corners));
7963       PetscCall(ISGetLocalSize(corners, &tsize));
7964       PetscCall(ISGetIndices(corners, &idxs));
7965       PetscCall(ISGlobalToLocalMappingApply(tmap, IS_GTOLM_DROP, tsize, idxs, &nout, tidxs));
7966       PetscCheck(tsize == nout, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Failed when mapping corners! %" PetscInt_FMT " != %" PetscInt_FMT, tsize, nout);
7967       PetscCall(ISRestoreIndices(corners, &idxs));
7968       PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &corners));
7969       PetscCall(ISLocalToGlobalMappingApply(coarse_islg, nout, tidxs, tidxs2));
7970       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), nout, tidxs2, PETSC_COPY_VALUES, &corners));
7971     }
7972     PetscCall(PetscFree(tidxs));
7973     PetscCall(PetscFree(tidxs2));
7974     PetscCall(ISLocalToGlobalMappingDestroy(&tmap));
7975   } else {
7976     nis     = 0;
7977     nisdofs = 0;
7978     nisneu  = 0;
7979     nisvert = 0;
7980     isarray = NULL;
7981   }
7982   /* destroy no longer needed map */
7983   PetscCall(ISLocalToGlobalMappingDestroy(&coarse_islg));
7984 
7985   /* subassemble */
7986   if (multilevel_allowed) {
7987     Vec       vp[1];
7988     PetscInt  nvecs = 0;
7989     PetscBool reuse, reuser;
7990 
7991     if (coarse_mat) reuse = PETSC_TRUE;
7992     else reuse = PETSC_FALSE;
7993     PetscCall(MPIU_Allreduce(&reuse, &reuser, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)pc)));
7994     vp[0] = NULL;
7995     if (pcbddc->benign_have_null) { /* propagate no-net-flux quadrature to coarser level */
7996       PetscCall(VecCreate(PetscObjectComm((PetscObject)pc), &vp[0]));
7997       PetscCall(VecSetSizes(vp[0], pcbddc->local_primal_size, PETSC_DECIDE));
7998       PetscCall(VecSetType(vp[0], VECSTANDARD));
7999       nvecs = 1;
8000 
8001       if (pcbddc->divudotp) {
8002         Mat      B, loc_divudotp;
8003         Vec      v, p;
8004         IS       dummy;
8005         PetscInt np;
8006 
8007         PetscCall(MatISGetLocalMat(pcbddc->divudotp, &loc_divudotp));
8008         PetscCall(MatGetSize(loc_divudotp, &np, NULL));
8009         PetscCall(ISCreateStride(PETSC_COMM_SELF, np, 0, 1, &dummy));
8010         PetscCall(MatCreateSubMatrix(loc_divudotp, dummy, pcis->is_B_local, MAT_INITIAL_MATRIX, &B));
8011         PetscCall(MatCreateVecs(B, &v, &p));
8012         PetscCall(VecSet(p, 1.));
8013         PetscCall(MatMultTranspose(B, p, v));
8014         PetscCall(VecDestroy(&p));
8015         PetscCall(MatDestroy(&B));
8016         PetscCall(VecGetArray(vp[0], &array));
8017         PetscCall(VecPlaceArray(pcbddc->vec1_P, array));
8018         PetscCall(MatMultTranspose(pcbddc->coarse_phi_B, v, pcbddc->vec1_P));
8019         PetscCall(VecResetArray(pcbddc->vec1_P));
8020         PetscCall(VecRestoreArray(vp[0], &array));
8021         PetscCall(ISDestroy(&dummy));
8022         PetscCall(VecDestroy(&v));
8023       }
8024     }
8025     if (reuser) {
8026       PetscCall(PCBDDCMatISSubassemble(t_coarse_mat_is, pcbddc->coarse_subassembling, 0, restr, full_restr, PETSC_TRUE, &coarse_mat, nis, isarray, nvecs, vp));
8027     } else {
8028       PetscCall(PCBDDCMatISSubassemble(t_coarse_mat_is, pcbddc->coarse_subassembling, 0, restr, full_restr, PETSC_FALSE, &coarse_mat_is, nis, isarray, nvecs, vp));
8029     }
8030     if (vp[0]) { /* vp[0] could have been placed on a different set of processes */
8031       PetscScalar       *arraym;
8032       const PetscScalar *arrayv;
8033       PetscInt           nl;
8034       PetscCall(VecGetLocalSize(vp[0], &nl));
8035       PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, 1, nl, NULL, &coarsedivudotp));
8036       PetscCall(MatDenseGetArray(coarsedivudotp, &arraym));
8037       PetscCall(VecGetArrayRead(vp[0], &arrayv));
8038       PetscCall(PetscArraycpy(arraym, arrayv, nl));
8039       PetscCall(VecRestoreArrayRead(vp[0], &arrayv));
8040       PetscCall(MatDenseRestoreArray(coarsedivudotp, &arraym));
8041       PetscCall(VecDestroy(&vp[0]));
8042     } else {
8043       PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, 0, 0, 1, NULL, &coarsedivudotp));
8044     }
8045   } else {
8046     PetscCall(PCBDDCMatISSubassemble(t_coarse_mat_is, pcbddc->coarse_subassembling, 0, restr, full_restr, PETSC_FALSE, &coarse_mat_is, 0, NULL, 0, NULL));
8047   }
8048   if (coarse_mat_is || coarse_mat) {
8049     if (!multilevel_allowed) {
8050       PetscCall(MatConvert(coarse_mat_is, MATAIJ, coarse_mat_reuse, &coarse_mat));
8051     } else {
8052       /* if this matrix is present, it means we are not reusing the coarse matrix */
8053       if (coarse_mat_is) {
8054         PetscCheck(!coarse_mat, PetscObjectComm((PetscObject)coarse_mat_is), PETSC_ERR_PLIB, "This should not happen");
8055         PetscCall(PetscObjectReference((PetscObject)coarse_mat_is));
8056         coarse_mat = coarse_mat_is;
8057       }
8058     }
8059   }
8060   PetscCall(MatDestroy(&t_coarse_mat_is));
8061   PetscCall(MatDestroy(&coarse_mat_is));
8062 
8063   /* create local to global scatters for coarse problem */
8064   if (compute_vecs) {
8065     PetscInt lrows;
8066     PetscCall(VecDestroy(&pcbddc->coarse_vec));
8067     if (coarse_mat) {
8068       PetscCall(MatGetLocalSize(coarse_mat, &lrows, NULL));
8069     } else {
8070       lrows = 0;
8071     }
8072     PetscCall(VecCreate(PetscObjectComm((PetscObject)pc), &pcbddc->coarse_vec));
8073     PetscCall(VecSetSizes(pcbddc->coarse_vec, lrows, PETSC_DECIDE));
8074     PetscCall(VecSetType(pcbddc->coarse_vec, coarse_mat ? coarse_mat->defaultvectype : VECSTANDARD));
8075     PetscCall(VecScatterDestroy(&pcbddc->coarse_loc_to_glob));
8076     PetscCall(VecScatterCreate(pcbddc->vec1_P, NULL, pcbddc->coarse_vec, coarse_is, &pcbddc->coarse_loc_to_glob));
8077   }
8078   PetscCall(ISDestroy(&coarse_is));
8079 
8080   /* set defaults for coarse KSP and PC */
8081   if (multilevel_allowed) {
8082     coarse_ksp_type = KSPRICHARDSON;
8083     coarse_pc_type  = PCBDDC;
8084   } else {
8085     coarse_ksp_type = KSPPREONLY;
8086     coarse_pc_type  = PCREDUNDANT;
8087   }
8088 
8089   /* print some info if requested */
8090   if (pcbddc->dbg_flag) {
8091     if (!multilevel_allowed) {
8092       PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n"));
8093       if (multilevel_requested) {
8094         PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Not enough active processes on level %" PetscInt_FMT " (active processes %" PetscInt_FMT ", coarsening ratio %" PetscInt_FMT ")\n", pcbddc->current_level, active_procs, pcbddc->coarsening_ratio));
8095       } else if (pcbddc->max_levels) {
8096         PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Maximum number of requested levels reached (%" PetscInt_FMT ")\n", pcbddc->max_levels));
8097       }
8098       PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
8099     }
8100   }
8101 
8102   /* communicate coarse discrete gradient */
8103   coarseG = NULL;
8104   if (pcbddc->nedcG && multilevel_allowed) {
8105     MPI_Comm ccomm;
8106     if (coarse_mat) {
8107       ccomm = PetscObjectComm((PetscObject)coarse_mat);
8108     } else {
8109       ccomm = MPI_COMM_NULL;
8110     }
8111     PetscCall(MatMPIAIJRestrict(pcbddc->nedcG, ccomm, &coarseG));
8112   }
8113 
8114   /* create the coarse KSP object only once with defaults */
8115   if (coarse_mat) {
8116     PetscBool   isredundant, isbddc, force, valid;
8117     PetscViewer dbg_viewer = NULL;
8118     PetscBool   isset, issym, isher, isspd;
8119 
8120     if (pcbddc->dbg_flag) {
8121       dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat));
8122       PetscCall(PetscViewerASCIIAddTab(dbg_viewer, 2 * pcbddc->current_level));
8123     }
8124     if (!pcbddc->coarse_ksp) {
8125       char   prefix[256], str_level[16];
8126       size_t len;
8127 
8128       PetscCall(KSPCreate(PetscObjectComm((PetscObject)coarse_mat), &pcbddc->coarse_ksp));
8129       PetscCall(KSPSetErrorIfNotConverged(pcbddc->coarse_ksp, pc->erroriffailure));
8130       PetscCall(PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp, (PetscObject)pc, 1));
8131       PetscCall(KSPSetTolerances(pcbddc->coarse_ksp, PETSC_DEFAULT, PETSC_DEFAULT, PETSC_DEFAULT, 1));
8132       PetscCall(KSPSetOperators(pcbddc->coarse_ksp, coarse_mat, coarse_mat));
8133       PetscCall(KSPSetType(pcbddc->coarse_ksp, coarse_ksp_type));
8134       PetscCall(KSPSetNormType(pcbddc->coarse_ksp, KSP_NORM_NONE));
8135       PetscCall(KSPGetPC(pcbddc->coarse_ksp, &pc_temp));
8136       /* TODO is this logic correct? should check for coarse_mat type */
8137       PetscCall(PCSetType(pc_temp, coarse_pc_type));
8138       /* prefix */
8139       PetscCall(PetscStrcpy(prefix, ""));
8140       PetscCall(PetscStrcpy(str_level, ""));
8141       if (!pcbddc->current_level) {
8142         PetscCall(PetscStrncpy(prefix, ((PetscObject)pc)->prefix, sizeof(prefix)));
8143         PetscCall(PetscStrlcat(prefix, "pc_bddc_coarse_", sizeof(prefix)));
8144       } else {
8145         PetscCall(PetscStrlen(((PetscObject)pc)->prefix, &len));
8146         if (pcbddc->current_level > 1) len -= 3;  /* remove "lX_" with X level number */
8147         if (pcbddc->current_level > 10) len -= 1; /* remove another char from level number */
8148         /* Nonstandard use of PetscStrncpy() to copy only a portion of the string */
8149         PetscCall(PetscStrncpy(prefix, ((PetscObject)pc)->prefix, len + 1));
8150         PetscCall(PetscSNPrintf(str_level, sizeof(str_level), "l%d_", (int)(pcbddc->current_level)));
8151         PetscCall(PetscStrlcat(prefix, str_level, sizeof(prefix)));
8152       }
8153       PetscCall(KSPSetOptionsPrefix(pcbddc->coarse_ksp, prefix));
8154       /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
8155       PetscCall(PCBDDCSetLevel(pc_temp, pcbddc->current_level + 1));
8156       PetscCall(PCBDDCSetCoarseningRatio(pc_temp, pcbddc->coarsening_ratio));
8157       PetscCall(PCBDDCSetLevels(pc_temp, pcbddc->max_levels));
8158       /* allow user customization */
8159       PetscCall(KSPSetFromOptions(pcbddc->coarse_ksp));
8160       /* get some info after set from options */
8161       PetscCall(KSPGetPC(pcbddc->coarse_ksp, &pc_temp));
8162       /* multilevel cannot be done with coarse PC different from BDDC, NN, HPDDM, unless forced to */
8163       force = PETSC_FALSE;
8164       PetscCall(PetscOptionsGetBool(NULL, ((PetscObject)pc_temp)->prefix, "-pc_type_forced", &force, NULL));
8165       PetscCall(PetscObjectTypeCompareAny((PetscObject)pc_temp, &valid, PCBDDC, PCNN, PCHPDDM, ""));
8166       PetscCall(PetscObjectTypeCompare((PetscObject)pc_temp, PCBDDC, &isbddc));
8167       if (multilevel_allowed && !force && !valid) {
8168         isbddc = PETSC_TRUE;
8169         PetscCall(PCSetType(pc_temp, PCBDDC));
8170         PetscCall(PCBDDCSetLevel(pc_temp, pcbddc->current_level + 1));
8171         PetscCall(PCBDDCSetCoarseningRatio(pc_temp, pcbddc->coarsening_ratio));
8172         PetscCall(PCBDDCSetLevels(pc_temp, pcbddc->max_levels));
8173         if (pc_temp->ops->setfromoptions) { /* need to setfromoptions again, skipping the pc_type */
8174           PetscObjectOptionsBegin((PetscObject)pc_temp);
8175           PetscCall((*pc_temp->ops->setfromoptions)(pc_temp, PetscOptionsObject));
8176           PetscCall(PetscObjectProcessOptionsHandlers((PetscObject)pc_temp, PetscOptionsObject));
8177           PetscOptionsEnd();
8178           pc_temp->setfromoptionscalled++;
8179         }
8180       }
8181     }
8182     /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
8183     PetscCall(KSPGetPC(pcbddc->coarse_ksp, &pc_temp));
8184     if (nisdofs) {
8185       PetscCall(PCBDDCSetDofsSplitting(pc_temp, nisdofs, isarray));
8186       for (i = 0; i < nisdofs; i++) PetscCall(ISDestroy(&isarray[i]));
8187     }
8188     if (nisneu) {
8189       PetscCall(PCBDDCSetNeumannBoundaries(pc_temp, isarray[nisdofs]));
8190       PetscCall(ISDestroy(&isarray[nisdofs]));
8191     }
8192     if (nisvert) {
8193       PetscCall(PCBDDCSetPrimalVerticesIS(pc_temp, isarray[nis - 1]));
8194       PetscCall(ISDestroy(&isarray[nis - 1]));
8195     }
8196     if (coarseG) PetscCall(PCBDDCSetDiscreteGradient(pc_temp, coarseG, 1, nedcfield, PETSC_FALSE, PETSC_TRUE));
8197 
8198     /* get some info after set from options */
8199     PetscCall(PetscObjectTypeCompare((PetscObject)pc_temp, PCBDDC, &isbddc));
8200 
8201     /* multilevel can only be requested via -pc_bddc_levels or PCBDDCSetLevels */
8202     if (isbddc && !multilevel_allowed) PetscCall(PCSetType(pc_temp, coarse_pc_type));
8203     /* multilevel cannot be done with coarse PC different from BDDC, NN, HPDDM, unless forced to */
8204     force = PETSC_FALSE;
8205     PetscCall(PetscOptionsGetBool(NULL, ((PetscObject)pc_temp)->prefix, "-pc_type_forced", &force, NULL));
8206     PetscCall(PetscObjectTypeCompareAny((PetscObject)pc_temp, &valid, PCBDDC, PCNN, PCHPDDM, ""));
8207     if (multilevel_requested && multilevel_allowed && !valid && !force) PetscCall(PCSetType(pc_temp, PCBDDC));
8208     PetscCall(PetscObjectTypeCompare((PetscObject)pc_temp, PCREDUNDANT, &isredundant));
8209     if (isredundant) {
8210       KSP inner_ksp;
8211       PC  inner_pc;
8212 
8213       PetscCall(PCRedundantGetKSP(pc_temp, &inner_ksp));
8214       PetscCall(KSPGetPC(inner_ksp, &inner_pc));
8215     }
8216 
8217     /* parameters which miss an API */
8218     PetscCall(PetscObjectTypeCompare((PetscObject)pc_temp, PCBDDC, &isbddc));
8219     if (isbddc) {
8220       PC_BDDC *pcbddc_coarse = (PC_BDDC *)pc_temp->data;
8221 
8222       pcbddc_coarse->detect_disconnected = PETSC_TRUE;
8223       pcbddc_coarse->coarse_eqs_per_proc = pcbddc->coarse_eqs_per_proc;
8224       pcbddc_coarse->coarse_eqs_limit    = pcbddc->coarse_eqs_limit;
8225       pcbddc_coarse->benign_saddle_point = pcbddc->benign_have_null;
8226       if (pcbddc_coarse->benign_saddle_point) {
8227         Mat                    coarsedivudotp_is;
8228         ISLocalToGlobalMapping l2gmap, rl2g, cl2g;
8229         IS                     row, col;
8230         const PetscInt        *gidxs;
8231         PetscInt               n, st, M, N;
8232 
8233         PetscCall(MatGetSize(coarsedivudotp, &n, NULL));
8234         PetscCallMPI(MPI_Scan(&n, &st, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)coarse_mat)));
8235         st = st - n;
8236         PetscCall(ISCreateStride(PetscObjectComm((PetscObject)coarse_mat), 1, st, 1, &row));
8237         PetscCall(MatISGetLocalToGlobalMapping(coarse_mat, &l2gmap, NULL));
8238         PetscCall(ISLocalToGlobalMappingGetSize(l2gmap, &n));
8239         PetscCall(ISLocalToGlobalMappingGetIndices(l2gmap, &gidxs));
8240         PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)coarse_mat), n, gidxs, PETSC_COPY_VALUES, &col));
8241         PetscCall(ISLocalToGlobalMappingRestoreIndices(l2gmap, &gidxs));
8242         PetscCall(ISLocalToGlobalMappingCreateIS(row, &rl2g));
8243         PetscCall(ISLocalToGlobalMappingCreateIS(col, &cl2g));
8244         PetscCall(ISGetSize(row, &M));
8245         PetscCall(MatGetSize(coarse_mat, &N, NULL));
8246         PetscCall(ISDestroy(&row));
8247         PetscCall(ISDestroy(&col));
8248         PetscCall(MatCreate(PetscObjectComm((PetscObject)coarse_mat), &coarsedivudotp_is));
8249         PetscCall(MatSetType(coarsedivudotp_is, MATIS));
8250         PetscCall(MatSetSizes(coarsedivudotp_is, PETSC_DECIDE, PETSC_DECIDE, M, N));
8251         PetscCall(MatSetLocalToGlobalMapping(coarsedivudotp_is, rl2g, cl2g));
8252         PetscCall(ISLocalToGlobalMappingDestroy(&rl2g));
8253         PetscCall(ISLocalToGlobalMappingDestroy(&cl2g));
8254         PetscCall(MatISSetLocalMat(coarsedivudotp_is, coarsedivudotp));
8255         PetscCall(MatDestroy(&coarsedivudotp));
8256         PetscCall(PCBDDCSetDivergenceMat(pc_temp, coarsedivudotp_is, PETSC_FALSE, NULL));
8257         PetscCall(MatDestroy(&coarsedivudotp_is));
8258         pcbddc_coarse->adaptive_userdefined = PETSC_TRUE;
8259         if (pcbddc->adaptive_threshold[0] == 0.0) pcbddc_coarse->deluxe_zerorows = PETSC_TRUE;
8260       }
8261     }
8262 
8263     /* propagate symmetry info of coarse matrix */
8264     PetscCall(MatSetOption(coarse_mat, MAT_STRUCTURALLY_SYMMETRIC, PETSC_TRUE));
8265     PetscCall(MatIsSymmetricKnown(pc->pmat, &isset, &issym));
8266     if (isset) PetscCall(MatSetOption(coarse_mat, MAT_SYMMETRIC, issym));
8267     PetscCall(MatIsHermitianKnown(pc->pmat, &isset, &isher));
8268     if (isset) PetscCall(MatSetOption(coarse_mat, MAT_HERMITIAN, isher));
8269     PetscCall(MatIsSPDKnown(pc->pmat, &isset, &isspd));
8270     if (isset) PetscCall(MatSetOption(coarse_mat, MAT_SPD, isspd));
8271 
8272     if (pcbddc->benign_saddle_point && !pcbddc->benign_have_null) PetscCall(MatSetOption(coarse_mat, MAT_SPD, PETSC_TRUE));
8273     /* set operators */
8274     PetscCall(MatViewFromOptions(coarse_mat, (PetscObject)pc, "-pc_bddc_coarse_mat_view"));
8275     PetscCall(MatSetOptionsPrefix(coarse_mat, ((PetscObject)pcbddc->coarse_ksp)->prefix));
8276     PetscCall(KSPSetOperators(pcbddc->coarse_ksp, coarse_mat, coarse_mat));
8277     if (pcbddc->dbg_flag) PetscCall(PetscViewerASCIISubtractTab(dbg_viewer, 2 * pcbddc->current_level));
8278   }
8279   PetscCall(MatDestroy(&coarseG));
8280   PetscCall(PetscFree(isarray));
8281 #if 0
8282   {
8283     PetscViewer viewer;
8284     char filename[256];
8285     sprintf(filename,"coarse_mat_level%d.m",pcbddc->current_level);
8286     PetscCall(PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer));
8287     PetscCall(PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB));
8288     PetscCall(MatView(coarse_mat,viewer));
8289     PetscCall(PetscViewerPopFormat(viewer));
8290     PetscCall(PetscViewerDestroy(&viewer));
8291   }
8292 #endif
8293 
8294   if (corners) {
8295     Vec             gv;
8296     IS              is;
8297     const PetscInt *idxs;
8298     PetscInt        i, d, N, n, cdim = pcbddc->mat_graph->cdim;
8299     PetscScalar    *coords;
8300 
8301     PetscCheck(pcbddc->mat_graph->cloc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing local coordinates");
8302     PetscCall(VecGetSize(pcbddc->coarse_vec, &N));
8303     PetscCall(VecGetLocalSize(pcbddc->coarse_vec, &n));
8304     PetscCall(VecCreate(PetscObjectComm((PetscObject)pcbddc->coarse_vec), &gv));
8305     PetscCall(VecSetBlockSize(gv, cdim));
8306     PetscCall(VecSetSizes(gv, n * cdim, N * cdim));
8307     PetscCall(VecSetType(gv, VECSTANDARD));
8308     PetscCall(VecSetFromOptions(gv));
8309     PetscCall(VecSet(gv, PETSC_MAX_REAL)); /* we only propagate coordinates from vertices constraints */
8310 
8311     PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &is));
8312     PetscCall(ISGetLocalSize(is, &n));
8313     PetscCall(ISGetIndices(is, &idxs));
8314     PetscCall(PetscMalloc1(n * cdim, &coords));
8315     for (i = 0; i < n; i++) {
8316       for (d = 0; d < cdim; d++) coords[cdim * i + d] = pcbddc->mat_graph->coords[cdim * idxs[i] + d];
8317     }
8318     PetscCall(ISRestoreIndices(is, &idxs));
8319     PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &is));
8320 
8321     PetscCall(ISGetLocalSize(corners, &n));
8322     PetscCall(ISGetIndices(corners, &idxs));
8323     PetscCall(VecSetValuesBlocked(gv, n, idxs, coords, INSERT_VALUES));
8324     PetscCall(ISRestoreIndices(corners, &idxs));
8325     PetscCall(PetscFree(coords));
8326     PetscCall(VecAssemblyBegin(gv));
8327     PetscCall(VecAssemblyEnd(gv));
8328     PetscCall(VecGetArray(gv, &coords));
8329     if (pcbddc->coarse_ksp) {
8330       PC        coarse_pc;
8331       PetscBool isbddc;
8332 
8333       PetscCall(KSPGetPC(pcbddc->coarse_ksp, &coarse_pc));
8334       PetscCall(PetscObjectTypeCompare((PetscObject)coarse_pc, PCBDDC, &isbddc));
8335       if (isbddc) { /* coarse coordinates have PETSC_MAX_REAL, specific for BDDC */
8336         PetscReal *realcoords;
8337 
8338         PetscCall(VecGetLocalSize(gv, &n));
8339 #if defined(PETSC_USE_COMPLEX)
8340         PetscCall(PetscMalloc1(n, &realcoords));
8341         for (i = 0; i < n; i++) realcoords[i] = PetscRealPart(coords[i]);
8342 #else
8343         realcoords = coords;
8344 #endif
8345         PetscCall(PCSetCoordinates(coarse_pc, cdim, n / cdim, realcoords));
8346 #if defined(PETSC_USE_COMPLEX)
8347         PetscCall(PetscFree(realcoords));
8348 #endif
8349       }
8350     }
8351     PetscCall(VecRestoreArray(gv, &coords));
8352     PetscCall(VecDestroy(&gv));
8353   }
8354   PetscCall(ISDestroy(&corners));
8355 
8356   if (pcbddc->coarse_ksp) {
8357     Vec crhs, csol;
8358 
8359     PetscCall(KSPGetSolution(pcbddc->coarse_ksp, &csol));
8360     PetscCall(KSPGetRhs(pcbddc->coarse_ksp, &crhs));
8361     if (!csol) PetscCall(MatCreateVecs(coarse_mat, &((pcbddc->coarse_ksp)->vec_sol), NULL));
8362     if (!crhs) PetscCall(MatCreateVecs(coarse_mat, NULL, &((pcbddc->coarse_ksp)->vec_rhs)));
8363   }
8364   PetscCall(MatDestroy(&coarsedivudotp));
8365 
8366   /* compute null space for coarse solver if the benign trick has been requested */
8367   if (pcbddc->benign_null) {
8368     PetscCall(VecSet(pcbddc->vec1_P, 0.));
8369     for (i = 0; i < pcbddc->benign_n; i++) PetscCall(VecSetValue(pcbddc->vec1_P, pcbddc->local_primal_size - pcbddc->benign_n + i, 1.0, INSERT_VALUES));
8370     PetscCall(VecAssemblyBegin(pcbddc->vec1_P));
8371     PetscCall(VecAssemblyEnd(pcbddc->vec1_P));
8372     PetscCall(VecScatterBegin(pcbddc->coarse_loc_to_glob, pcbddc->vec1_P, pcbddc->coarse_vec, INSERT_VALUES, SCATTER_FORWARD));
8373     PetscCall(VecScatterEnd(pcbddc->coarse_loc_to_glob, pcbddc->vec1_P, pcbddc->coarse_vec, INSERT_VALUES, SCATTER_FORWARD));
8374     if (coarse_mat) {
8375       Vec          nullv;
8376       PetscScalar *array, *array2;
8377       PetscInt     nl;
8378 
8379       PetscCall(MatCreateVecs(coarse_mat, &nullv, NULL));
8380       PetscCall(VecGetLocalSize(nullv, &nl));
8381       PetscCall(VecGetArrayRead(pcbddc->coarse_vec, (const PetscScalar **)&array));
8382       PetscCall(VecGetArray(nullv, &array2));
8383       PetscCall(PetscArraycpy(array2, array, nl));
8384       PetscCall(VecRestoreArray(nullv, &array2));
8385       PetscCall(VecRestoreArrayRead(pcbddc->coarse_vec, (const PetscScalar **)&array));
8386       PetscCall(VecNormalize(nullv, NULL));
8387       PetscCall(MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat), PETSC_FALSE, 1, &nullv, &CoarseNullSpace));
8388       PetscCall(VecDestroy(&nullv));
8389     }
8390   }
8391   PetscCall(PetscLogEventEnd(PC_BDDC_CoarseSetUp[pcbddc->current_level], pc, 0, 0, 0));
8392 
8393   PetscCall(PetscLogEventBegin(PC_BDDC_CoarseSolver[pcbddc->current_level], pc, 0, 0, 0));
8394   if (pcbddc->coarse_ksp) {
8395     PetscBool ispreonly;
8396 
8397     if (CoarseNullSpace) {
8398       PetscBool isnull;
8399 
8400       PetscCall(MatNullSpaceTest(CoarseNullSpace, coarse_mat, &isnull));
8401       if (isnull) PetscCall(MatSetNullSpace(coarse_mat, CoarseNullSpace));
8402       /* TODO: add local nullspaces (if any) */
8403     }
8404     /* setup coarse ksp */
8405     PetscCall(KSPSetUp(pcbddc->coarse_ksp));
8406     /* Check coarse problem if in debug mode or if solving with an iterative method */
8407     PetscCall(PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp, KSPPREONLY, &ispreonly));
8408     if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates)) {
8409       KSP         check_ksp;
8410       KSPType     check_ksp_type;
8411       PC          check_pc;
8412       Vec         check_vec, coarse_vec;
8413       PetscReal   abs_infty_error, infty_error, lambda_min = 1.0, lambda_max = 1.0;
8414       PetscInt    its;
8415       PetscBool   compute_eigs;
8416       PetscReal  *eigs_r, *eigs_c;
8417       PetscInt    neigs;
8418       const char *prefix;
8419 
8420       /* Create ksp object suitable for estimation of extreme eigenvalues */
8421       PetscCall(KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp), &check_ksp));
8422       PetscCall(PetscObjectIncrementTabLevel((PetscObject)check_ksp, (PetscObject)pcbddc->coarse_ksp, 0));
8423       PetscCall(KSPSetErrorIfNotConverged(pcbddc->coarse_ksp, PETSC_FALSE));
8424       PetscCall(KSPSetOperators(check_ksp, coarse_mat, coarse_mat));
8425       PetscCall(KSPSetTolerances(check_ksp, 1.e-12, 1.e-12, PETSC_DEFAULT, pcbddc->coarse_size));
8426       /* prevent from setup unneeded object */
8427       PetscCall(KSPGetPC(check_ksp, &check_pc));
8428       PetscCall(PCSetType(check_pc, PCNONE));
8429       if (ispreonly) {
8430         check_ksp_type = KSPPREONLY;
8431         compute_eigs   = PETSC_FALSE;
8432       } else {
8433         check_ksp_type = KSPGMRES;
8434         compute_eigs   = PETSC_TRUE;
8435       }
8436       PetscCall(KSPSetType(check_ksp, check_ksp_type));
8437       PetscCall(KSPSetComputeSingularValues(check_ksp, compute_eigs));
8438       PetscCall(KSPSetComputeEigenvalues(check_ksp, compute_eigs));
8439       PetscCall(KSPGMRESSetRestart(check_ksp, pcbddc->coarse_size + 1));
8440       PetscCall(KSPGetOptionsPrefix(pcbddc->coarse_ksp, &prefix));
8441       PetscCall(KSPSetOptionsPrefix(check_ksp, prefix));
8442       PetscCall(KSPAppendOptionsPrefix(check_ksp, "check_"));
8443       PetscCall(KSPSetFromOptions(check_ksp));
8444       PetscCall(KSPSetUp(check_ksp));
8445       PetscCall(KSPGetPC(pcbddc->coarse_ksp, &check_pc));
8446       PetscCall(KSPSetPC(check_ksp, check_pc));
8447       /* create random vec */
8448       PetscCall(MatCreateVecs(coarse_mat, &coarse_vec, &check_vec));
8449       PetscCall(VecSetRandom(check_vec, NULL));
8450       PetscCall(MatMult(coarse_mat, check_vec, coarse_vec));
8451       /* solve coarse problem */
8452       PetscCall(KSPSolve(check_ksp, coarse_vec, coarse_vec));
8453       PetscCall(KSPCheckSolve(check_ksp, pc, coarse_vec));
8454       /* set eigenvalue estimation if preonly has not been requested */
8455       if (compute_eigs) {
8456         PetscCall(PetscMalloc1(pcbddc->coarse_size + 1, &eigs_r));
8457         PetscCall(PetscMalloc1(pcbddc->coarse_size + 1, &eigs_c));
8458         PetscCall(KSPComputeEigenvalues(check_ksp, pcbddc->coarse_size + 1, eigs_r, eigs_c, &neigs));
8459         if (neigs) {
8460           lambda_max = eigs_r[neigs - 1];
8461           lambda_min = eigs_r[0];
8462           if (pcbddc->use_coarse_estimates) {
8463             if (lambda_max >= lambda_min) { /* using PETSC_SMALL since lambda_max == lambda_min is not allowed by KSPChebyshevSetEigenvalues */
8464               PetscCall(KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp, lambda_max + PETSC_SMALL, lambda_min));
8465               PetscCall(KSPRichardsonSetScale(pcbddc->coarse_ksp, 2.0 / (lambda_max + lambda_min)));
8466             }
8467           }
8468         }
8469       }
8470 
8471       /* check coarse problem residual error */
8472       if (pcbddc->dbg_flag) {
8473         PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp));
8474         PetscCall(PetscViewerASCIIAddTab(dbg_viewer, 2 * (pcbddc->current_level + 1)));
8475         PetscCall(VecAXPY(check_vec, -1.0, coarse_vec));
8476         PetscCall(VecNorm(check_vec, NORM_INFINITY, &infty_error));
8477         PetscCall(MatMult(coarse_mat, check_vec, coarse_vec));
8478         PetscCall(VecNorm(coarse_vec, NORM_INFINITY, &abs_infty_error));
8479         PetscCall(PetscViewerASCIIPrintf(dbg_viewer, "Coarse problem details (use estimates %d)\n", pcbddc->use_coarse_estimates));
8480         PetscCall(PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp), dbg_viewer));
8481         PetscCall(PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc), dbg_viewer));
8482         PetscCall(PetscViewerASCIIPrintf(dbg_viewer, "Coarse problem exact infty_error   : %1.6e\n", (double)infty_error));
8483         PetscCall(PetscViewerASCIIPrintf(dbg_viewer, "Coarse problem residual infty_error: %1.6e\n", (double)abs_infty_error));
8484         if (CoarseNullSpace) PetscCall(PetscViewerASCIIPrintf(dbg_viewer, "Coarse problem is singular\n"));
8485         if (compute_eigs) {
8486           PetscReal          lambda_max_s, lambda_min_s;
8487           KSPConvergedReason reason;
8488           PetscCall(KSPGetType(check_ksp, &check_ksp_type));
8489           PetscCall(KSPGetIterationNumber(check_ksp, &its));
8490           PetscCall(KSPGetConvergedReason(check_ksp, &reason));
8491           PetscCall(KSPComputeExtremeSingularValues(check_ksp, &lambda_max_s, &lambda_min_s));
8492           PetscCall(PetscViewerASCIIPrintf(dbg_viewer, "Coarse problem eigenvalues (estimated with %" PetscInt_FMT " iterations of %s, conv reason %d): %1.6e %1.6e (%1.6e %1.6e)\n", its, check_ksp_type, reason, (double)lambda_min, (double)lambda_max, (double)lambda_min_s, (double)lambda_max_s));
8493           for (i = 0; i < neigs; i++) PetscCall(PetscViewerASCIIPrintf(dbg_viewer, "%1.6e %1.6ei\n", (double)eigs_r[i], (double)eigs_c[i]));
8494         }
8495         PetscCall(PetscViewerFlush(dbg_viewer));
8496         PetscCall(PetscViewerASCIISubtractTab(dbg_viewer, 2 * (pcbddc->current_level + 1)));
8497       }
8498       PetscCall(VecDestroy(&check_vec));
8499       PetscCall(VecDestroy(&coarse_vec));
8500       PetscCall(KSPDestroy(&check_ksp));
8501       if (compute_eigs) {
8502         PetscCall(PetscFree(eigs_r));
8503         PetscCall(PetscFree(eigs_c));
8504       }
8505     }
8506   }
8507   PetscCall(MatNullSpaceDestroy(&CoarseNullSpace));
8508   /* print additional info */
8509   if (pcbddc->dbg_flag) {
8510     /* waits until all processes reaches this point */
8511     PetscCall(PetscBarrier((PetscObject)pc));
8512     PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Coarse solver setup completed at level %" PetscInt_FMT "\n", pcbddc->current_level));
8513     PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
8514   }
8515 
8516   /* free memory */
8517   PetscCall(MatDestroy(&coarse_mat));
8518   PetscCall(PetscLogEventEnd(PC_BDDC_CoarseSolver[pcbddc->current_level], pc, 0, 0, 0));
8519   PetscFunctionReturn(0);
8520 }
8521 
8522 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc, PetscInt *coarse_size_n, PetscInt **local_primal_indices_n)
8523 {
8524   PC_BDDC        *pcbddc = (PC_BDDC *)pc->data;
8525   PC_IS          *pcis   = (PC_IS *)pc->data;
8526   Mat_IS         *matis  = (Mat_IS *)pc->pmat->data;
8527   IS              subset, subset_mult, subset_n;
8528   PetscInt        local_size, coarse_size = 0;
8529   PetscInt       *local_primal_indices = NULL;
8530   const PetscInt *t_local_primal_indices;
8531 
8532   PetscFunctionBegin;
8533   /* Compute global number of coarse dofs */
8534   PetscCheck(!pcbddc->local_primal_size || pcbddc->local_primal_ref_node, PETSC_COMM_SELF, PETSC_ERR_PLIB, "BDDC ConstraintsSetUp should be called first");
8535   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)), pcbddc->local_primal_size_cc, pcbddc->local_primal_ref_node, PETSC_COPY_VALUES, &subset_n));
8536   PetscCall(ISLocalToGlobalMappingApplyIS(pcis->mapping, subset_n, &subset));
8537   PetscCall(ISDestroy(&subset_n));
8538   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)), pcbddc->local_primal_size_cc, pcbddc->local_primal_ref_mult, PETSC_COPY_VALUES, &subset_mult));
8539   PetscCall(ISRenumber(subset, subset_mult, &coarse_size, &subset_n));
8540   PetscCall(ISDestroy(&subset));
8541   PetscCall(ISDestroy(&subset_mult));
8542   PetscCall(ISGetLocalSize(subset_n, &local_size));
8543   PetscCheck(local_size == pcbddc->local_primal_size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Invalid number of local primal indices computed %" PetscInt_FMT " != %" PetscInt_FMT, local_size, pcbddc->local_primal_size);
8544   PetscCall(PetscMalloc1(local_size, &local_primal_indices));
8545   PetscCall(ISGetIndices(subset_n, &t_local_primal_indices));
8546   PetscCall(PetscArraycpy(local_primal_indices, t_local_primal_indices, local_size));
8547   PetscCall(ISRestoreIndices(subset_n, &t_local_primal_indices));
8548   PetscCall(ISDestroy(&subset_n));
8549 
8550   /* check numbering */
8551   if (pcbddc->dbg_flag) {
8552     PetscScalar coarsesum, *array, *array2;
8553     PetscInt    i;
8554     PetscBool   set_error = PETSC_FALSE, set_error_reduced = PETSC_FALSE;
8555 
8556     PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
8557     PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n"));
8558     PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Check coarse indices\n"));
8559     PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer));
8560     /* counter */
8561     PetscCall(VecSet(pcis->vec1_global, 0.0));
8562     PetscCall(VecSet(pcis->vec1_N, 1.0));
8563     PetscCall(VecScatterBegin(matis->rctx, pcis->vec1_N, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE));
8564     PetscCall(VecScatterEnd(matis->rctx, pcis->vec1_N, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE));
8565     PetscCall(VecScatterBegin(matis->rctx, pcis->vec1_global, pcis->vec2_N, INSERT_VALUES, SCATTER_FORWARD));
8566     PetscCall(VecScatterEnd(matis->rctx, pcis->vec1_global, pcis->vec2_N, INSERT_VALUES, SCATTER_FORWARD));
8567     PetscCall(VecSet(pcis->vec1_N, 0.0));
8568     for (i = 0; i < pcbddc->local_primal_size; i++) PetscCall(VecSetValue(pcis->vec1_N, pcbddc->primal_indices_local_idxs[i], 1.0, INSERT_VALUES));
8569     PetscCall(VecAssemblyBegin(pcis->vec1_N));
8570     PetscCall(VecAssemblyEnd(pcis->vec1_N));
8571     PetscCall(VecSet(pcis->vec1_global, 0.0));
8572     PetscCall(VecScatterBegin(matis->rctx, pcis->vec1_N, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE));
8573     PetscCall(VecScatterEnd(matis->rctx, pcis->vec1_N, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE));
8574     PetscCall(VecScatterBegin(matis->rctx, pcis->vec1_global, pcis->vec1_N, INSERT_VALUES, SCATTER_FORWARD));
8575     PetscCall(VecScatterEnd(matis->rctx, pcis->vec1_global, pcis->vec1_N, INSERT_VALUES, SCATTER_FORWARD));
8576     PetscCall(VecGetArray(pcis->vec1_N, &array));
8577     PetscCall(VecGetArray(pcis->vec2_N, &array2));
8578     for (i = 0; i < pcis->n; i++) {
8579       if (array[i] != 0.0 && array[i] != array2[i]) {
8580         PetscInt owned = (PetscInt)PetscRealPart(array[i]), gi;
8581         PetscInt neigh = (PetscInt)PetscRealPart(array2[i]);
8582         set_error      = PETSC_TRUE;
8583         PetscCall(ISLocalToGlobalMappingApply(pcis->mapping, 1, &i, &gi));
8584         PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d: local index %" PetscInt_FMT " (gid %" PetscInt_FMT ") owned by %" PetscInt_FMT " processes instead of %" PetscInt_FMT "!\n", PetscGlobalRank, i, gi, owned, neigh));
8585       }
8586     }
8587     PetscCall(VecRestoreArray(pcis->vec2_N, &array2));
8588     PetscCall(MPIU_Allreduce(&set_error, &set_error_reduced, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)pc)));
8589     PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
8590     for (i = 0; i < pcis->n; i++) {
8591       if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0 / PetscRealPart(array[i]);
8592     }
8593     PetscCall(VecRestoreArray(pcis->vec1_N, &array));
8594     PetscCall(VecSet(pcis->vec1_global, 0.0));
8595     PetscCall(VecScatterBegin(matis->rctx, pcis->vec1_N, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE));
8596     PetscCall(VecScatterEnd(matis->rctx, pcis->vec1_N, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE));
8597     PetscCall(VecSum(pcis->vec1_global, &coarsesum));
8598     PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Size of coarse problem is %" PetscInt_FMT " (%lf)\n", coarse_size, (double)PetscRealPart(coarsesum)));
8599     if (pcbddc->dbg_flag > 1 || set_error_reduced) {
8600       PetscInt *gidxs;
8601 
8602       PetscCall(PetscMalloc1(pcbddc->local_primal_size, &gidxs));
8603       PetscCall(ISLocalToGlobalMappingApply(pcis->mapping, pcbddc->local_primal_size, pcbddc->primal_indices_local_idxs, gidxs));
8604       PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Distribution of local primal indices\n"));
8605       PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
8606       PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d\n", PetscGlobalRank));
8607       for (i = 0; i < pcbddc->local_primal_size; i++) {
8608         PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "local_primal_indices[%" PetscInt_FMT "]=%" PetscInt_FMT " (%" PetscInt_FMT ",%" PetscInt_FMT ")\n", i, local_primal_indices[i], pcbddc->primal_indices_local_idxs[i], gidxs[i]));
8609       }
8610       PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
8611       PetscCall(PetscFree(gidxs));
8612     }
8613     PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
8614     PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer));
8615     PetscCheck(!set_error_reduced, PetscObjectComm((PetscObject)pc), PETSC_ERR_PLIB, "BDDC Numbering of coarse dofs failed");
8616   }
8617 
8618   /* get back data */
8619   *coarse_size_n          = coarse_size;
8620   *local_primal_indices_n = local_primal_indices;
8621   PetscFunctionReturn(0);
8622 }
8623 
8624 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx, Vec gwork, Vec lwork, IS globalis, IS *localis)
8625 {
8626   IS           localis_t;
8627   PetscInt     i, lsize, *idxs, n;
8628   PetscScalar *vals;
8629 
8630   PetscFunctionBegin;
8631   /* get indices in local ordering exploiting local to global map */
8632   PetscCall(ISGetLocalSize(globalis, &lsize));
8633   PetscCall(PetscMalloc1(lsize, &vals));
8634   for (i = 0; i < lsize; i++) vals[i] = 1.0;
8635   PetscCall(ISGetIndices(globalis, (const PetscInt **)&idxs));
8636   PetscCall(VecSet(gwork, 0.0));
8637   PetscCall(VecSet(lwork, 0.0));
8638   if (idxs) { /* multilevel guard */
8639     PetscCall(VecSetOption(gwork, VEC_IGNORE_NEGATIVE_INDICES, PETSC_TRUE));
8640     PetscCall(VecSetValues(gwork, lsize, idxs, vals, INSERT_VALUES));
8641   }
8642   PetscCall(VecAssemblyBegin(gwork));
8643   PetscCall(ISRestoreIndices(globalis, (const PetscInt **)&idxs));
8644   PetscCall(PetscFree(vals));
8645   PetscCall(VecAssemblyEnd(gwork));
8646   /* now compute set in local ordering */
8647   PetscCall(VecScatterBegin(g2l_ctx, gwork, lwork, INSERT_VALUES, SCATTER_FORWARD));
8648   PetscCall(VecScatterEnd(g2l_ctx, gwork, lwork, INSERT_VALUES, SCATTER_FORWARD));
8649   PetscCall(VecGetArrayRead(lwork, (const PetscScalar **)&vals));
8650   PetscCall(VecGetSize(lwork, &n));
8651   for (i = 0, lsize = 0; i < n; i++) {
8652     if (PetscRealPart(vals[i]) > 0.5) lsize++;
8653   }
8654   PetscCall(PetscMalloc1(lsize, &idxs));
8655   for (i = 0, lsize = 0; i < n; i++) {
8656     if (PetscRealPart(vals[i]) > 0.5) idxs[lsize++] = i;
8657   }
8658   PetscCall(VecRestoreArrayRead(lwork, (const PetscScalar **)&vals));
8659   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)gwork), lsize, idxs, PETSC_OWN_POINTER, &localis_t));
8660   *localis = localis_t;
8661   PetscFunctionReturn(0);
8662 }
8663 
8664 PetscErrorCode PCBDDCComputeFakeChange(PC pc, PetscBool constraints, PCBDDCGraph graph, PCBDDCSubSchurs schurs, Mat *change, IS *change_primal, IS *change_primal_mult, PetscBool *change_with_qr)
8665 {
8666   PC_IS   *pcis   = (PC_IS *)pc->data;
8667   PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
8668   PC_IS   *pcisf;
8669   PC_BDDC *pcbddcf;
8670   PC       pcf;
8671 
8672   PetscFunctionBegin;
8673   PetscCall(PCCreate(PetscObjectComm((PetscObject)pc), &pcf));
8674   PetscCall(PCSetOperators(pcf, pc->mat, pc->pmat));
8675   PetscCall(PCSetType(pcf, PCBDDC));
8676 
8677   pcisf   = (PC_IS *)pcf->data;
8678   pcbddcf = (PC_BDDC *)pcf->data;
8679 
8680   pcisf->is_B_local = pcis->is_B_local;
8681   pcisf->vec1_N     = pcis->vec1_N;
8682   pcisf->BtoNmap    = pcis->BtoNmap;
8683   pcisf->n          = pcis->n;
8684   pcisf->n_B        = pcis->n_B;
8685 
8686   PetscCall(PetscFree(pcbddcf->mat_graph));
8687   PetscCall(PetscFree(pcbddcf->sub_schurs));
8688   pcbddcf->mat_graph             = graph ? graph : pcbddc->mat_graph;
8689   pcbddcf->sub_schurs            = schurs;
8690   pcbddcf->adaptive_selection    = schurs ? PETSC_TRUE : PETSC_FALSE;
8691   pcbddcf->adaptive_threshold[0] = pcbddc->adaptive_threshold[0];
8692   pcbddcf->adaptive_threshold[1] = pcbddc->adaptive_threshold[1];
8693   pcbddcf->adaptive_nmin         = pcbddc->adaptive_nmin;
8694   pcbddcf->adaptive_nmax         = pcbddc->adaptive_nmax;
8695   pcbddcf->use_faces             = PETSC_TRUE;
8696   pcbddcf->use_change_of_basis   = (PetscBool)!constraints;
8697   pcbddcf->use_change_on_faces   = (PetscBool)!constraints;
8698   pcbddcf->use_qr_single         = (PetscBool)!constraints;
8699   pcbddcf->fake_change           = PETSC_TRUE;
8700   pcbddcf->dbg_flag              = pcbddc->dbg_flag;
8701 
8702   PetscCall(PCBDDCAdaptiveSelection(pcf));
8703   PetscCall(PCBDDCConstraintsSetUp(pcf));
8704 
8705   *change = pcbddcf->ConstraintMatrix;
8706   if (change_primal) PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc->pmat), pcbddcf->local_primal_size_cc, pcbddcf->local_primal_ref_node, PETSC_COPY_VALUES, change_primal));
8707   if (change_primal_mult) PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc->pmat), pcbddcf->local_primal_size_cc, pcbddcf->local_primal_ref_mult, PETSC_COPY_VALUES, change_primal_mult));
8708   if (change_with_qr) *change_with_qr = pcbddcf->use_qr_single;
8709 
8710   if (schurs) pcbddcf->sub_schurs = NULL;
8711   pcbddcf->ConstraintMatrix = NULL;
8712   pcbddcf->mat_graph        = NULL;
8713   pcisf->is_B_local         = NULL;
8714   pcisf->vec1_N             = NULL;
8715   pcisf->BtoNmap            = NULL;
8716   PetscCall(PCDestroy(&pcf));
8717   PetscFunctionReturn(0);
8718 }
8719 
8720 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc)
8721 {
8722   PC_IS          *pcis       = (PC_IS *)pc->data;
8723   PC_BDDC        *pcbddc     = (PC_BDDC *)pc->data;
8724   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
8725   Mat             S_j;
8726   PetscInt       *used_xadj, *used_adjncy;
8727   PetscBool       free_used_adj;
8728 
8729   PetscFunctionBegin;
8730   PetscCall(PetscLogEventBegin(PC_BDDC_Schurs[pcbddc->current_level], pc, 0, 0, 0));
8731   /* decide the adjacency to be used for determining internal problems for local schur on subsets */
8732   free_used_adj = PETSC_FALSE;
8733   if (pcbddc->sub_schurs_layers == -1) {
8734     used_xadj   = NULL;
8735     used_adjncy = NULL;
8736   } else {
8737     if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) {
8738       used_xadj   = pcbddc->mat_graph->xadj;
8739       used_adjncy = pcbddc->mat_graph->adjncy;
8740     } else if (pcbddc->computed_rowadj) {
8741       used_xadj   = pcbddc->mat_graph->xadj;
8742       used_adjncy = pcbddc->mat_graph->adjncy;
8743     } else {
8744       PetscBool       flg_row = PETSC_FALSE;
8745       const PetscInt *xadj, *adjncy;
8746       PetscInt        nvtxs;
8747 
8748       PetscCall(MatGetRowIJ(pcbddc->local_mat, 0, PETSC_TRUE, PETSC_FALSE, &nvtxs, &xadj, &adjncy, &flg_row));
8749       if (flg_row) {
8750         PetscCall(PetscMalloc2(nvtxs + 1, &used_xadj, xadj[nvtxs], &used_adjncy));
8751         PetscCall(PetscArraycpy(used_xadj, xadj, nvtxs + 1));
8752         PetscCall(PetscArraycpy(used_adjncy, adjncy, xadj[nvtxs]));
8753         free_used_adj = PETSC_TRUE;
8754       } else {
8755         pcbddc->sub_schurs_layers = -1;
8756         used_xadj                 = NULL;
8757         used_adjncy               = NULL;
8758       }
8759       PetscCall(MatRestoreRowIJ(pcbddc->local_mat, 0, PETSC_TRUE, PETSC_FALSE, &nvtxs, &xadj, &adjncy, &flg_row));
8760     }
8761   }
8762 
8763   /* setup sub_schurs data */
8764   PetscCall(MatCreateSchurComplement(pcis->A_II, pcis->pA_II, pcis->A_IB, pcis->A_BI, pcis->A_BB, &S_j));
8765   if (!sub_schurs->schur_explicit) {
8766     /* pcbddc->ksp_D up to date only if not using MatFactor with Schur complement support */
8767     PetscCall(MatSchurComplementSetKSP(S_j, pcbddc->ksp_D));
8768     PetscCall(PCBDDCSubSchursSetUp(sub_schurs, NULL, S_j, PETSC_FALSE, used_xadj, used_adjncy, pcbddc->sub_schurs_layers, NULL, pcbddc->adaptive_selection, PETSC_FALSE, PETSC_FALSE, 0, NULL, NULL, NULL, NULL));
8769   } else {
8770     Mat       change        = NULL;
8771     Vec       scaling       = NULL;
8772     IS        change_primal = NULL, iP;
8773     PetscInt  benign_n;
8774     PetscBool reuse_solvers     = (PetscBool)!pcbddc->use_change_of_basis;
8775     PetscBool need_change       = PETSC_FALSE;
8776     PetscBool discrete_harmonic = PETSC_FALSE;
8777 
8778     if (!pcbddc->use_vertices && reuse_solvers) {
8779       PetscInt n_vertices;
8780 
8781       PetscCall(ISGetLocalSize(sub_schurs->is_vertices, &n_vertices));
8782       reuse_solvers = (PetscBool)!n_vertices;
8783     }
8784     if (!pcbddc->benign_change_explicit) {
8785       benign_n = pcbddc->benign_n;
8786     } else {
8787       benign_n = 0;
8788     }
8789     /* sub_schurs->change is a local object; instead, PCBDDCConstraintsSetUp and the quantities used in the test below are logically collective on pc.
8790        We need a global reduction to avoid possible deadlocks.
8791        We assume that sub_schurs->change is created once, and then reused for different solves, unless the topography has been recomputed */
8792     if (pcbddc->adaptive_userdefined || (pcbddc->deluxe_zerorows && !pcbddc->use_change_of_basis)) {
8793       PetscBool have_loc_change = (PetscBool)(!!sub_schurs->change);
8794       PetscCall(MPIU_Allreduce(&have_loc_change, &need_change, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)pc)));
8795       need_change = (PetscBool)(!need_change);
8796     }
8797     /* If the user defines additional constraints, we import them here */
8798     if (need_change) {
8799       PetscCheck(!pcbddc->sub_schurs_rebuild, PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot compute change of basis with a different graph");
8800       PetscCall(PCBDDCComputeFakeChange(pc, PETSC_FALSE, NULL, NULL, &change, &change_primal, NULL, &sub_schurs->change_with_qr));
8801     }
8802     if (!pcbddc->use_deluxe_scaling) scaling = pcis->D;
8803 
8804     PetscCall(PetscObjectQuery((PetscObject)pc, "__KSPFETIDP_iP", (PetscObject *)&iP));
8805     if (iP) {
8806       PetscOptionsBegin(PetscObjectComm((PetscObject)iP), sub_schurs->prefix, "BDDC sub_schurs options", "PC");
8807       PetscCall(PetscOptionsBool("-sub_schurs_discrete_harmonic", NULL, NULL, discrete_harmonic, &discrete_harmonic, NULL));
8808       PetscOptionsEnd();
8809     }
8810     if (discrete_harmonic) {
8811       Mat A;
8812       PetscCall(MatDuplicate(pcbddc->local_mat, MAT_COPY_VALUES, &A));
8813       PetscCall(MatZeroRowsColumnsIS(A, iP, 1.0, NULL, NULL));
8814       PetscCall(PetscObjectCompose((PetscObject)A, "__KSPFETIDP_iP", (PetscObject)iP));
8815       PetscCall(PCBDDCSubSchursSetUp(sub_schurs, A, S_j, pcbddc->sub_schurs_exact_schur, used_xadj, used_adjncy, pcbddc->sub_schurs_layers, scaling, pcbddc->adaptive_selection, reuse_solvers, pcbddc->benign_saddle_point, benign_n, pcbddc->benign_p0_lidx,
8816                                      pcbddc->benign_zerodiag_subs, change, change_primal));
8817       PetscCall(MatDestroy(&A));
8818     } else {
8819       PetscCall(PCBDDCSubSchursSetUp(sub_schurs, pcbddc->local_mat, S_j, pcbddc->sub_schurs_exact_schur, used_xadj, used_adjncy, pcbddc->sub_schurs_layers, scaling, pcbddc->adaptive_selection, reuse_solvers, pcbddc->benign_saddle_point, benign_n,
8820                                      pcbddc->benign_p0_lidx, pcbddc->benign_zerodiag_subs, change, change_primal));
8821     }
8822     PetscCall(MatDestroy(&change));
8823     PetscCall(ISDestroy(&change_primal));
8824   }
8825   PetscCall(MatDestroy(&S_j));
8826 
8827   /* free adjacency */
8828   if (free_used_adj) PetscCall(PetscFree2(used_xadj, used_adjncy));
8829   PetscCall(PetscLogEventEnd(PC_BDDC_Schurs[pcbddc->current_level], pc, 0, 0, 0));
8830   PetscFunctionReturn(0);
8831 }
8832 
8833 PetscErrorCode PCBDDCInitSubSchurs(PC pc)
8834 {
8835   PC_IS      *pcis   = (PC_IS *)pc->data;
8836   PC_BDDC    *pcbddc = (PC_BDDC *)pc->data;
8837   PCBDDCGraph graph;
8838 
8839   PetscFunctionBegin;
8840   /* attach interface graph for determining subsets */
8841   if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */
8842     IS       verticesIS, verticescomm;
8843     PetscInt vsize, *idxs;
8844 
8845     PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &verticesIS));
8846     PetscCall(ISGetSize(verticesIS, &vsize));
8847     PetscCall(ISGetIndices(verticesIS, (const PetscInt **)&idxs));
8848     PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), vsize, idxs, PETSC_COPY_VALUES, &verticescomm));
8849     PetscCall(ISRestoreIndices(verticesIS, (const PetscInt **)&idxs));
8850     PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &verticesIS));
8851     PetscCall(PCBDDCGraphCreate(&graph));
8852     PetscCall(PCBDDCGraphInit(graph, pcbddc->mat_graph->l2gmap, pcbddc->mat_graph->nvtxs_global, pcbddc->graphmaxcount));
8853     PetscCall(PCBDDCGraphSetUp(graph, pcbddc->mat_graph->custom_minimal_size, NULL, pcbddc->DirichletBoundariesLocal, 0, NULL, verticescomm));
8854     PetscCall(ISDestroy(&verticescomm));
8855     PetscCall(PCBDDCGraphComputeConnectedComponents(graph));
8856   } else {
8857     graph = pcbddc->mat_graph;
8858   }
8859   /* print some info */
8860   if (pcbddc->dbg_flag && !pcbddc->sub_schurs_rebuild) {
8861     IS       vertices;
8862     PetscInt nv, nedges, nfaces;
8863     PetscCall(PCBDDCGraphASCIIView(graph, pcbddc->dbg_flag, pcbddc->dbg_viewer));
8864     PetscCall(PCBDDCGraphGetCandidatesIS(graph, &nfaces, NULL, &nedges, NULL, &vertices));
8865     PetscCall(ISGetSize(vertices, &nv));
8866     PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer));
8867     PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "--------------------------------------------------------------\n"));
8868     PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate vertices (%d)\n", PetscGlobalRank, nv, pcbddc->use_vertices));
8869     PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate edges    (%d)\n", PetscGlobalRank, nedges, pcbddc->use_edges));
8870     PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate faces    (%d)\n", PetscGlobalRank, nfaces, pcbddc->use_faces));
8871     PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
8872     PetscCall(PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer));
8873     PetscCall(PCBDDCGraphRestoreCandidatesIS(graph, &nfaces, NULL, &nedges, NULL, &vertices));
8874   }
8875 
8876   /* sub_schurs init */
8877   if (!pcbddc->sub_schurs) PetscCall(PCBDDCSubSchursCreate(&pcbddc->sub_schurs));
8878   PetscCall(PCBDDCSubSchursInit(pcbddc->sub_schurs, ((PetscObject)pc)->prefix, pcis->is_I_local, pcis->is_B_local, graph, pcis->BtoNmap, pcbddc->sub_schurs_rebuild, PETSC_FALSE));
8879 
8880   /* free graph struct */
8881   if (pcbddc->sub_schurs_rebuild) PetscCall(PCBDDCGraphDestroy(&graph));
8882   PetscFunctionReturn(0);
8883 }
8884 
8885 PetscErrorCode PCBDDCCheckOperator(PC pc)
8886 {
8887   PC_IS   *pcis   = (PC_IS *)pc->data;
8888   PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
8889 
8890   PetscFunctionBegin;
8891   if (pcbddc->n_vertices == pcbddc->local_primal_size) {
8892     IS           zerodiag = NULL;
8893     Mat          S_j, B0_B = NULL;
8894     Vec          dummy_vec = NULL, vec_check_B, vec_scale_P;
8895     PetscScalar *p0_check, *array, *array2;
8896     PetscReal    norm;
8897     PetscInt     i;
8898 
8899     /* B0 and B0_B */
8900     if (zerodiag) {
8901       IS dummy;
8902 
8903       PetscCall(ISCreateStride(PETSC_COMM_SELF, pcbddc->benign_n, 0, 1, &dummy));
8904       PetscCall(MatCreateSubMatrix(pcbddc->benign_B0, dummy, pcis->is_B_local, MAT_INITIAL_MATRIX, &B0_B));
8905       PetscCall(MatCreateVecs(B0_B, NULL, &dummy_vec));
8906       PetscCall(ISDestroy(&dummy));
8907     }
8908     /* I need a primal vector to scale primal nodes since BDDC sums contibutions */
8909     PetscCall(VecDuplicate(pcbddc->vec1_P, &vec_scale_P));
8910     PetscCall(VecSet(pcbddc->vec1_P, 1.0));
8911     PetscCall(VecScatterBegin(pcbddc->coarse_loc_to_glob, pcbddc->vec1_P, pcbddc->coarse_vec, ADD_VALUES, SCATTER_FORWARD));
8912     PetscCall(VecScatterEnd(pcbddc->coarse_loc_to_glob, pcbddc->vec1_P, pcbddc->coarse_vec, ADD_VALUES, SCATTER_FORWARD));
8913     PetscCall(VecScatterBegin(pcbddc->coarse_loc_to_glob, pcbddc->coarse_vec, vec_scale_P, INSERT_VALUES, SCATTER_REVERSE));
8914     PetscCall(VecScatterEnd(pcbddc->coarse_loc_to_glob, pcbddc->coarse_vec, vec_scale_P, INSERT_VALUES, SCATTER_REVERSE));
8915     PetscCall(VecReciprocal(vec_scale_P));
8916     /* S_j */
8917     PetscCall(MatCreateSchurComplement(pcis->A_II, pcis->pA_II, pcis->A_IB, pcis->A_BI, pcis->A_BB, &S_j));
8918     PetscCall(MatSchurComplementSetKSP(S_j, pcbddc->ksp_D));
8919 
8920     /* mimic vector in \widetilde{W}_\Gamma */
8921     PetscCall(VecSetRandom(pcis->vec1_N, NULL));
8922     /* continuous in primal space */
8923     PetscCall(VecSetRandom(pcbddc->coarse_vec, NULL));
8924     PetscCall(VecScatterBegin(pcbddc->coarse_loc_to_glob, pcbddc->coarse_vec, pcbddc->vec1_P, INSERT_VALUES, SCATTER_REVERSE));
8925     PetscCall(VecScatterEnd(pcbddc->coarse_loc_to_glob, pcbddc->coarse_vec, pcbddc->vec1_P, INSERT_VALUES, SCATTER_REVERSE));
8926     PetscCall(VecGetArray(pcbddc->vec1_P, &array));
8927     PetscCall(PetscCalloc1(pcbddc->benign_n, &p0_check));
8928     for (i = 0; i < pcbddc->benign_n; i++) p0_check[i] = array[pcbddc->local_primal_size - pcbddc->benign_n + i];
8929     PetscCall(VecSetValues(pcis->vec1_N, pcbddc->local_primal_size, pcbddc->local_primal_ref_node, array, INSERT_VALUES));
8930     PetscCall(VecRestoreArray(pcbddc->vec1_P, &array));
8931     PetscCall(VecAssemblyBegin(pcis->vec1_N));
8932     PetscCall(VecAssemblyEnd(pcis->vec1_N));
8933     PetscCall(VecScatterBegin(pcis->N_to_B, pcis->vec1_N, pcis->vec2_B, INSERT_VALUES, SCATTER_FORWARD));
8934     PetscCall(VecScatterEnd(pcis->N_to_B, pcis->vec1_N, pcis->vec2_B, INSERT_VALUES, SCATTER_FORWARD));
8935     PetscCall(VecDuplicate(pcis->vec2_B, &vec_check_B));
8936     PetscCall(VecCopy(pcis->vec2_B, vec_check_B));
8937 
8938     /* assemble rhs for coarse problem */
8939     /* widetilde{S}_\Gamma w_\Gamma + \widetilde{B0}^T_B p0 */
8940     /* local with Schur */
8941     PetscCall(MatMult(S_j, pcis->vec2_B, pcis->vec1_B));
8942     if (zerodiag) {
8943       PetscCall(VecGetArray(dummy_vec, &array));
8944       for (i = 0; i < pcbddc->benign_n; i++) array[i] = p0_check[i];
8945       PetscCall(VecRestoreArray(dummy_vec, &array));
8946       PetscCall(MatMultTransposeAdd(B0_B, dummy_vec, pcis->vec1_B, pcis->vec1_B));
8947     }
8948     /* sum on primal nodes the local contributions */
8949     PetscCall(VecScatterBegin(pcis->N_to_B, pcis->vec1_B, pcis->vec1_N, INSERT_VALUES, SCATTER_REVERSE));
8950     PetscCall(VecScatterEnd(pcis->N_to_B, pcis->vec1_B, pcis->vec1_N, INSERT_VALUES, SCATTER_REVERSE));
8951     PetscCall(VecGetArray(pcis->vec1_N, &array));
8952     PetscCall(VecGetArray(pcbddc->vec1_P, &array2));
8953     for (i = 0; i < pcbddc->local_primal_size; i++) array2[i] = array[pcbddc->local_primal_ref_node[i]];
8954     PetscCall(VecRestoreArray(pcbddc->vec1_P, &array2));
8955     PetscCall(VecRestoreArray(pcis->vec1_N, &array));
8956     PetscCall(VecSet(pcbddc->coarse_vec, 0.));
8957     PetscCall(VecScatterBegin(pcbddc->coarse_loc_to_glob, pcbddc->vec1_P, pcbddc->coarse_vec, ADD_VALUES, SCATTER_FORWARD));
8958     PetscCall(VecScatterEnd(pcbddc->coarse_loc_to_glob, pcbddc->vec1_P, pcbddc->coarse_vec, ADD_VALUES, SCATTER_FORWARD));
8959     PetscCall(VecScatterBegin(pcbddc->coarse_loc_to_glob, pcbddc->coarse_vec, pcbddc->vec1_P, INSERT_VALUES, SCATTER_REVERSE));
8960     PetscCall(VecScatterEnd(pcbddc->coarse_loc_to_glob, pcbddc->coarse_vec, pcbddc->vec1_P, INSERT_VALUES, SCATTER_REVERSE));
8961     PetscCall(VecGetArray(pcbddc->vec1_P, &array));
8962     /* scale primal nodes (BDDC sums contibutions) */
8963     PetscCall(VecPointwiseMult(pcbddc->vec1_P, vec_scale_P, pcbddc->vec1_P));
8964     PetscCall(VecSetValues(pcis->vec1_N, pcbddc->local_primal_size, pcbddc->local_primal_ref_node, array, INSERT_VALUES));
8965     PetscCall(VecRestoreArray(pcbddc->vec1_P, &array));
8966     PetscCall(VecAssemblyBegin(pcis->vec1_N));
8967     PetscCall(VecAssemblyEnd(pcis->vec1_N));
8968     PetscCall(VecScatterBegin(pcis->N_to_B, pcis->vec1_N, pcis->vec1_B, INSERT_VALUES, SCATTER_FORWARD));
8969     PetscCall(VecScatterEnd(pcis->N_to_B, pcis->vec1_N, pcis->vec1_B, INSERT_VALUES, SCATTER_FORWARD));
8970     /* global: \widetilde{B0}_B w_\Gamma */
8971     if (zerodiag) {
8972       PetscCall(MatMult(B0_B, pcis->vec2_B, dummy_vec));
8973       PetscCall(VecGetArray(dummy_vec, &array));
8974       for (i = 0; i < pcbddc->benign_n; i++) pcbddc->benign_p0[i] = array[i];
8975       PetscCall(VecRestoreArray(dummy_vec, &array));
8976     }
8977     /* BDDC */
8978     PetscCall(VecSet(pcis->vec1_D, 0.));
8979     PetscCall(PCBDDCApplyInterfacePreconditioner(pc, PETSC_FALSE));
8980 
8981     PetscCall(VecCopy(pcis->vec1_B, pcis->vec2_B));
8982     PetscCall(VecAXPY(pcis->vec1_B, -1.0, vec_check_B));
8983     PetscCall(VecNorm(pcis->vec1_B, NORM_INFINITY, &norm));
8984     PetscCall(PetscPrintf(PETSC_COMM_SELF, "[%d] BDDC local error is %1.4e\n", PetscGlobalRank, (double)norm));
8985     for (i = 0; i < pcbddc->benign_n; i++) PetscCall(PetscPrintf(PETSC_COMM_SELF, "[%d] BDDC p0[%" PetscInt_FMT "] error is %1.4e\n", PetscGlobalRank, i, (double)PetscAbsScalar(pcbddc->benign_p0[i] - p0_check[i])));
8986     PetscCall(PetscFree(p0_check));
8987     PetscCall(VecDestroy(&vec_scale_P));
8988     PetscCall(VecDestroy(&vec_check_B));
8989     PetscCall(VecDestroy(&dummy_vec));
8990     PetscCall(MatDestroy(&S_j));
8991     PetscCall(MatDestroy(&B0_B));
8992   }
8993   PetscFunctionReturn(0);
8994 }
8995 
8996 #include <../src/mat/impls/aij/mpi/mpiaij.h>
8997 PetscErrorCode MatMPIAIJRestrict(Mat A, MPI_Comm ccomm, Mat *B)
8998 {
8999   Mat         At;
9000   IS          rows;
9001   PetscInt    rst, ren;
9002   PetscLayout rmap;
9003 
9004   PetscFunctionBegin;
9005   rst = ren = 0;
9006   if (ccomm != MPI_COMM_NULL) {
9007     PetscCall(PetscLayoutCreate(ccomm, &rmap));
9008     PetscCall(PetscLayoutSetSize(rmap, A->rmap->N));
9009     PetscCall(PetscLayoutSetBlockSize(rmap, 1));
9010     PetscCall(PetscLayoutSetUp(rmap));
9011     PetscCall(PetscLayoutGetRange(rmap, &rst, &ren));
9012   }
9013   PetscCall(ISCreateStride(PetscObjectComm((PetscObject)A), ren - rst, rst, 1, &rows));
9014   PetscCall(MatCreateSubMatrix(A, rows, NULL, MAT_INITIAL_MATRIX, &At));
9015   PetscCall(ISDestroy(&rows));
9016 
9017   if (ccomm != MPI_COMM_NULL) {
9018     Mat_MPIAIJ *a, *b;
9019     IS          from, to;
9020     Vec         gvec;
9021     PetscInt    lsize;
9022 
9023     PetscCall(MatCreate(ccomm, B));
9024     PetscCall(MatSetSizes(*B, ren - rst, PETSC_DECIDE, PETSC_DECIDE, At->cmap->N));
9025     PetscCall(MatSetType(*B, MATAIJ));
9026     PetscCall(PetscLayoutDestroy(&((*B)->rmap)));
9027     PetscCall(PetscLayoutSetUp((*B)->cmap));
9028     a = (Mat_MPIAIJ *)At->data;
9029     b = (Mat_MPIAIJ *)(*B)->data;
9030     PetscCallMPI(MPI_Comm_size(ccomm, &b->size));
9031     PetscCallMPI(MPI_Comm_rank(ccomm, &b->rank));
9032     PetscCall(PetscObjectReference((PetscObject)a->A));
9033     PetscCall(PetscObjectReference((PetscObject)a->B));
9034     b->A = a->A;
9035     b->B = a->B;
9036 
9037     b->donotstash   = a->donotstash;
9038     b->roworiented  = a->roworiented;
9039     b->rowindices   = NULL;
9040     b->rowvalues    = NULL;
9041     b->getrowactive = PETSC_FALSE;
9042 
9043     (*B)->rmap         = rmap;
9044     (*B)->factortype   = A->factortype;
9045     (*B)->assembled    = PETSC_TRUE;
9046     (*B)->insertmode   = NOT_SET_VALUES;
9047     (*B)->preallocated = PETSC_TRUE;
9048 
9049     if (a->colmap) {
9050 #if defined(PETSC_USE_CTABLE)
9051       PetscCall(PetscTableCreateCopy(a->colmap, &b->colmap));
9052 #else
9053       PetscCall(PetscMalloc1(At->cmap->N, &b->colmap));
9054       PetscCall(PetscArraycpy(b->colmap, a->colmap, At->cmap->N));
9055 #endif
9056     } else b->colmap = NULL;
9057     if (a->garray) {
9058       PetscInt len;
9059       len = a->B->cmap->n;
9060       PetscCall(PetscMalloc1(len + 1, &b->garray));
9061       if (len) PetscCall(PetscArraycpy(b->garray, a->garray, len));
9062     } else b->garray = NULL;
9063 
9064     PetscCall(PetscObjectReference((PetscObject)a->lvec));
9065     b->lvec = a->lvec;
9066 
9067     /* cannot use VecScatterCopy */
9068     PetscCall(VecGetLocalSize(b->lvec, &lsize));
9069     PetscCall(ISCreateGeneral(ccomm, lsize, b->garray, PETSC_USE_POINTER, &from));
9070     PetscCall(ISCreateStride(PETSC_COMM_SELF, lsize, 0, 1, &to));
9071     PetscCall(MatCreateVecs(*B, &gvec, NULL));
9072     PetscCall(VecScatterCreate(gvec, from, b->lvec, to, &b->Mvctx));
9073     PetscCall(ISDestroy(&from));
9074     PetscCall(ISDestroy(&to));
9075     PetscCall(VecDestroy(&gvec));
9076   }
9077   PetscCall(MatDestroy(&At));
9078   PetscFunctionReturn(0);
9079 }
9080