1 #include <../src/mat/impls/aij/seq/aij.h>
2 #include <petsc/private/pcbddcimpl.h>
3 #include <petsc/private/pcbddcprivateimpl.h>
4 #include <petsc/private/kernels/blockinvert.h>
5 #include <../src/mat/impls/dense/seq/dense.h>
6 #include <petscdmplex.h>
7 #include <petscblaslapack.h>
8 #include <petsc/private/sfimpl.h>
9 #include <petsc/private/dmpleximpl.h>
10 #include <petscdmda.h>
11
12 static PetscErrorCode MatMPIAIJRestrict(Mat, MPI_Comm, Mat *);
13
14 /* if range is true, it returns B s.t. span{B} = range(A)
15 if range is false, it returns B s.t. range(B) _|_ range(A) */
MatDenseOrthogonalRangeOrComplement(Mat A,PetscBool range,PetscInt lw,PetscScalar * work,PetscReal * rwork,Mat * B)16 static PetscErrorCode MatDenseOrthogonalRangeOrComplement(Mat A, PetscBool range, PetscInt lw, PetscScalar *work, PetscReal *rwork, Mat *B)
17 {
18 PetscScalar *uwork, *data, *U, ds = 0.;
19 PetscReal *sing;
20 PetscBLASInt bM, bN, lwork, lierr, di = 1;
21 PetscInt ulw, i, nr, nc, n;
22 #if defined(PETSC_USE_COMPLEX)
23 PetscReal *rwork2;
24 #endif
25
26 PetscFunctionBegin;
27 PetscCall(MatGetSize(A, &nr, &nc));
28 if (!nr || !nc) PetscFunctionReturn(PETSC_SUCCESS);
29
30 /* workspace */
31 if (!work) {
32 ulw = PetscMax(PetscMax(1, 5 * PetscMin(nr, nc)), 3 * PetscMin(nr, nc) + PetscMax(nr, nc));
33 PetscCall(PetscMalloc1(ulw, &uwork));
34 } else {
35 ulw = lw;
36 uwork = work;
37 }
38 n = PetscMin(nr, nc);
39 if (!rwork) {
40 PetscCall(PetscMalloc1(n, &sing));
41 } else {
42 sing = rwork;
43 }
44
45 /* SVD */
46 PetscCall(PetscMalloc1(nr * nr, &U));
47 PetscCall(PetscBLASIntCast(nr, &bM));
48 PetscCall(PetscBLASIntCast(nc, &bN));
49 PetscCall(PetscBLASIntCast(ulw, &lwork));
50 PetscCall(MatDenseGetArray(A, &data));
51 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
52 #if !defined(PETSC_USE_COMPLEX)
53 PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("A", "N", &bM, &bN, data, &bM, sing, U, &bM, &ds, &di, uwork, &lwork, &lierr));
54 #else
55 PetscCall(PetscMalloc1(5 * n, &rwork2));
56 PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("A", "N", &bM, &bN, data, &bM, sing, U, &bM, &ds, &di, uwork, &lwork, rwork2, &lierr));
57 PetscCall(PetscFree(rwork2));
58 #endif
59 PetscCall(PetscFPTrapPop());
60 PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in GESVD Lapack routine %" PetscBLASInt_FMT, lierr);
61 PetscCall(MatDenseRestoreArray(A, &data));
62 for (i = 0; i < n; i++)
63 if (sing[i] < PETSC_SMALL) break;
64 if (!rwork) PetscCall(PetscFree(sing));
65 if (!work) PetscCall(PetscFree(uwork));
66 /* create B */
67 if (!range) {
68 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, nr, nr - i, NULL, B));
69 PetscCall(MatDenseGetArray(*B, &data));
70 PetscCall(PetscArraycpy(data, U + nr * i, (nr - i) * nr));
71 } else {
72 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, nr, i, NULL, B));
73 PetscCall(MatDenseGetArray(*B, &data));
74 PetscCall(PetscArraycpy(data, U, i * nr));
75 }
76 PetscCall(MatDenseRestoreArray(*B, &data));
77 PetscCall(PetscFree(U));
78 PetscFunctionReturn(PETSC_SUCCESS);
79 }
80
81 /* TODO REMOVE */
82 #if defined(PRINT_GDET)
83 static int inc = 0;
84 static int lev = 0;
85 #endif
86
PCBDDCComputeNedelecChangeEdge(Mat lG,IS edge,IS extrow,IS extcol,IS corners,Mat * Gins,Mat * GKins,PetscScalar cvals[2],PetscScalar * work,PetscReal * rwork)87 static PetscErrorCode PCBDDCComputeNedelecChangeEdge(Mat lG, IS edge, IS extrow, IS extcol, IS corners, Mat *Gins, Mat *GKins, PetscScalar cvals[2], PetscScalar *work, PetscReal *rwork)
88 {
89 Mat GE, GEd;
90 PetscInt rsize, csize, esize;
91 PetscScalar *ptr;
92
93 PetscFunctionBegin;
94 PetscCall(ISGetSize(edge, &esize));
95 if (!esize) PetscFunctionReturn(PETSC_SUCCESS);
96 PetscCall(ISGetSize(extrow, &rsize));
97 PetscCall(ISGetSize(extcol, &csize));
98
99 /* gradients */
100 ptr = work + 5 * esize;
101 PetscCall(MatCreateSubMatrix(lG, extrow, extcol, MAT_INITIAL_MATRIX, &GE));
102 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, rsize, csize, ptr, Gins));
103 PetscCall(MatConvert(GE, MATSEQDENSE, MAT_REUSE_MATRIX, Gins));
104 PetscCall(MatDestroy(&GE));
105
106 /* constants */
107 ptr += rsize * csize;
108 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, esize, csize, ptr, &GEd));
109 PetscCall(MatCreateSubMatrix(lG, edge, extcol, MAT_INITIAL_MATRIX, &GE));
110 PetscCall(MatConvert(GE, MATSEQDENSE, MAT_REUSE_MATRIX, &GEd));
111 PetscCall(MatDestroy(&GE));
112 PetscCall(MatDenseOrthogonalRangeOrComplement(GEd, PETSC_FALSE, 5 * esize, work, rwork, GKins));
113 PetscCall(MatDestroy(&GEd));
114
115 if (corners) {
116 Mat GEc;
117 const PetscScalar *vals;
118 PetscScalar v;
119
120 PetscCall(MatCreateSubMatrix(lG, edge, corners, MAT_INITIAL_MATRIX, &GEc));
121 PetscCall(MatTransposeMatMult(GEc, *GKins, MAT_INITIAL_MATRIX, 1.0, &GEd));
122 PetscCall(MatDenseGetArrayRead(GEd, &vals));
123 /* v = PetscAbsScalar(vals[0]); */
124 v = 1.;
125 cvals[0] = vals[0] / v;
126 cvals[1] = vals[1] / v;
127 PetscCall(MatDenseRestoreArrayRead(GEd, &vals));
128 PetscCall(MatScale(*GKins, 1. / v));
129 #if defined(PRINT_GDET)
130 {
131 PetscViewer viewer;
132 char filename[256];
133 PetscCall(PetscSNPrintf(filename, PETSC_STATIC_ARRAY_LENGTH(filename), "Gdet_l%d_r%d_cc%d.m", lev, PetscGlobalRank, inc++));
134 PetscCall(PetscViewerASCIIOpen(PETSC_COMM_SELF, filename, &viewer));
135 PetscCall(PetscViewerPushFormat(viewer, PETSC_VIEWER_ASCII_MATLAB));
136 PetscCall(PetscObjectSetName((PetscObject)GEc, "GEc"));
137 PetscCall(MatView(GEc, viewer));
138 PetscCall(PetscObjectSetName((PetscObject)*GKins, "GK"));
139 PetscCall(MatView(*GKins, viewer));
140 PetscCall(PetscObjectSetName((PetscObject)GEd, "Gproj"));
141 PetscCall(MatView(GEd, viewer));
142 PetscCall(PetscViewerDestroy(&viewer));
143 }
144 #endif
145 PetscCall(MatDestroy(&GEd));
146 PetscCall(MatDestroy(&GEc));
147 }
148 PetscFunctionReturn(PETSC_SUCCESS);
149 }
150
151 static PetscErrorCode MatAIJExtractRows(Mat, IS, Mat *);
152
PCBDDCNedelecSupport(PC pc)153 PetscErrorCode PCBDDCNedelecSupport(PC pc)
154 {
155 PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
156 Mat_IS *matis = (Mat_IS *)pc->pmat->data;
157 Mat G, T, conn, lG, lGt, lGis, lGall, lGe, lGinit;
158 PetscSF sfv;
159 ISLocalToGlobalMapping el2g, vl2g, fl2g, al2g;
160 MPI_Comm comm;
161 IS lned, primals, allprimals, nedfieldlocal, elements_corners = NULL;
162 IS *eedges, *extrows, *extcols, *alleedges;
163 PetscBT btv, bte, btvc, btb, btbd, btvcand, btvi, btee, bter;
164 PetscScalar *vals, *work;
165 PetscReal *rwork;
166 const PetscInt *idxs, *ii, *jj, *iit, *jjt;
167 PetscInt ne, nv, Lv, order, n, field;
168 PetscInt i, j, extmem, cum, maxsize, nee;
169 PetscInt *extrow, *extrowcum, *marks, *vmarks, *gidxs;
170 PetscInt *sfvleaves, *sfvroots;
171 PetscInt *corners, *cedges;
172 PetscInt *ecount, **eneighs, *vcount, **vneighs;
173 PetscInt *emarks;
174 PetscBool print, eerr, done, lrc[2], conforming, global, setprimal;
175
176 PetscFunctionBegin;
177 /* If the discrete gradient is defined for a subset of dofs and global is true,
178 it assumes G is given in global ordering for all the dofs.
179 Otherwise, the ordering is global for the Nedelec field */
180 order = pcbddc->nedorder;
181 conforming = pcbddc->conforming;
182 field = pcbddc->nedfield;
183 global = pcbddc->nedglobal;
184 setprimal = PETSC_FALSE;
185 print = PETSC_FALSE;
186
187 /* Command line customization */
188 PetscOptionsBegin(PetscObjectComm((PetscObject)pc), ((PetscObject)pc)->prefix, "BDDC Nedelec options", "PC");
189 PetscCall(PetscOptionsBool("-pc_bddc_nedelec_field_primal", "All edge dofs set as primals: Toselli's algorithm C", NULL, setprimal, &setprimal, NULL));
190 /* print debug info and adaptive order TODO: to be removed */
191 PetscCall(PetscOptionsInt("-pc_bddc_nedelec_order", "Test variable order code (to be removed)", NULL, order, &order, NULL));
192 PetscCall(PetscOptionsBool("-pc_bddc_nedelec_print", "Print debug info", NULL, print, &print, NULL));
193 PetscOptionsEnd();
194
195 /* Return if there are no edges in the decomposition */
196 PetscCall(MatISGetLocalToGlobalMapping(pc->pmat, &al2g, NULL));
197 PetscCall(ISLocalToGlobalMappingGetSize(al2g, &n));
198 PetscCall(PetscObjectGetComm((PetscObject)pc, &comm));
199 PetscCall(VecGetArrayRead(matis->counter, (const PetscScalar **)&vals));
200 lrc[0] = PETSC_FALSE;
201 for (i = 0; i < n; i++) {
202 if (PetscRealPart(vals[i]) > 2.) {
203 lrc[0] = PETSC_TRUE;
204 break;
205 }
206 }
207 PetscCall(VecRestoreArrayRead(matis->counter, (const PetscScalar **)&vals));
208 PetscCallMPI(MPIU_Allreduce(&lrc[0], &lrc[1], 1, MPI_C_BOOL, MPI_LOR, comm));
209 if (!lrc[1]) PetscFunctionReturn(PETSC_SUCCESS);
210
211 /* Get Nedelec field */
212 PetscCheck(!pcbddc->n_ISForDofsLocal || field < pcbddc->n_ISForDofsLocal, comm, PETSC_ERR_USER, "Invalid field for Nedelec %" PetscInt_FMT ": number of fields is %" PetscInt_FMT, field, pcbddc->n_ISForDofsLocal);
213 if (pcbddc->n_ISForDofsLocal && field >= 0) {
214 PetscCall(PetscObjectReference((PetscObject)pcbddc->ISForDofsLocal[field]));
215 nedfieldlocal = pcbddc->ISForDofsLocal[field];
216 PetscCall(ISGetLocalSize(nedfieldlocal, &ne));
217 } else if (!pcbddc->n_ISForDofsLocal && field != PETSC_DECIDE) {
218 ne = n;
219 nedfieldlocal = NULL;
220 global = PETSC_TRUE;
221 } else if (field == PETSC_DECIDE) {
222 PetscInt rst, ren, *idx;
223
224 PetscCall(PetscArrayzero(matis->sf_leafdata, n));
225 PetscCall(PetscArrayzero(matis->sf_rootdata, pc->pmat->rmap->n));
226 PetscCall(MatGetOwnershipRange(pcbddc->discretegradient, &rst, &ren));
227 for (i = rst; i < ren; i++) {
228 PetscInt nc;
229
230 PetscCall(MatGetRow(pcbddc->discretegradient, i, &nc, NULL, NULL));
231 if (nc > 1) matis->sf_rootdata[i - rst] = 1;
232 PetscCall(MatRestoreRow(pcbddc->discretegradient, i, &nc, NULL, NULL));
233 }
234 PetscCall(PetscSFBcastBegin(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE));
235 PetscCall(PetscSFBcastEnd(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE));
236 PetscCall(PetscMalloc1(n, &idx));
237 for (i = 0, ne = 0; i < n; i++)
238 if (matis->sf_leafdata[i]) idx[ne++] = i;
239 PetscCall(ISCreateGeneral(comm, ne, idx, PETSC_OWN_POINTER, &nedfieldlocal));
240 } else {
241 SETERRQ(comm, PETSC_ERR_USER, "When multiple fields are present, the Nedelec field has to be specified");
242 }
243
244 /* Sanity checks */
245 PetscCheck(order || conforming, comm, PETSC_ERR_SUP, "Variable order and non-conforming spaces are not supported at the same time");
246 PetscCheck(!pcbddc->user_ChangeOfBasisMatrix, comm, PETSC_ERR_SUP, "Cannot generate Nedelec support with user defined change of basis");
247 PetscCheck(!order || (ne % order == 0), PETSC_COMM_SELF, PETSC_ERR_USER, "The number of local edge dofs %" PetscInt_FMT " is not a multiple of the order %" PetscInt_FMT, ne, order);
248
249 /* Just set primal dofs and return */
250 if (setprimal) {
251 IS enedfieldlocal;
252 PetscInt *eidxs;
253
254 PetscCall(PetscMalloc1(ne, &eidxs));
255 PetscCall(VecGetArrayRead(matis->counter, (const PetscScalar **)&vals));
256 if (nedfieldlocal) {
257 PetscCall(ISGetIndices(nedfieldlocal, &idxs));
258 for (i = 0, cum = 0; i < ne; i++) {
259 if (PetscRealPart(vals[idxs[i]]) > 2.) eidxs[cum++] = idxs[i];
260 }
261 PetscCall(ISRestoreIndices(nedfieldlocal, &idxs));
262 } else {
263 for (i = 0, cum = 0; i < ne; i++) {
264 if (PetscRealPart(vals[i]) > 2.) eidxs[cum++] = i;
265 }
266 }
267 PetscCall(VecRestoreArrayRead(matis->counter, (const PetscScalar **)&vals));
268 PetscCall(ISCreateGeneral(comm, cum, eidxs, PETSC_COPY_VALUES, &enedfieldlocal));
269 PetscCall(PCBDDCSetPrimalVerticesLocalIS(pc, enedfieldlocal));
270 PetscCall(PetscFree(eidxs));
271 PetscCall(ISDestroy(&nedfieldlocal));
272 PetscCall(ISDestroy(&enedfieldlocal));
273 PetscFunctionReturn(PETSC_SUCCESS);
274 }
275
276 /* Compute some l2g maps */
277 if (nedfieldlocal) {
278 IS is;
279
280 /* need to map from the local Nedelec field to local numbering */
281 PetscCall(ISLocalToGlobalMappingCreateIS(nedfieldlocal, &fl2g));
282 /* need to map from the local Nedelec field to global numbering for the whole dofs*/
283 PetscCall(ISLocalToGlobalMappingApplyIS(al2g, nedfieldlocal, &is));
284 PetscCall(ISLocalToGlobalMappingCreateIS(is, &al2g));
285 /* need to map from the local Nedelec field to global numbering (for Nedelec only) */
286 if (global) {
287 PetscCall(PetscObjectReference((PetscObject)al2g));
288 el2g = al2g;
289 } else {
290 IS gis;
291
292 PetscCall(ISRenumber(is, NULL, NULL, &gis));
293 PetscCall(ISLocalToGlobalMappingCreateIS(gis, &el2g));
294 PetscCall(ISDestroy(&gis));
295 }
296 PetscCall(ISDestroy(&is));
297 } else {
298 /* one ref for the destruction of al2g, one for el2g */
299 PetscCall(PetscObjectReference((PetscObject)al2g));
300 PetscCall(PetscObjectReference((PetscObject)al2g));
301 el2g = al2g;
302 fl2g = NULL;
303 }
304
305 /* Start communication to drop connections for interior edges (for cc analysis only) */
306 PetscCall(PetscArrayzero(matis->sf_leafdata, n));
307 PetscCall(PetscArrayzero(matis->sf_rootdata, pc->pmat->rmap->n));
308 if (nedfieldlocal) {
309 PetscCall(ISGetIndices(nedfieldlocal, &idxs));
310 for (i = 0; i < ne; i++) matis->sf_leafdata[idxs[i]] = 1;
311 PetscCall(ISRestoreIndices(nedfieldlocal, &idxs));
312 } else {
313 for (i = 0; i < ne; i++) matis->sf_leafdata[i] = 1;
314 }
315 PetscCall(PetscSFReduceBegin(matis->sf, MPIU_INT, matis->sf_leafdata, matis->sf_rootdata, MPI_SUM));
316 PetscCall(PetscSFReduceEnd(matis->sf, MPIU_INT, matis->sf_leafdata, matis->sf_rootdata, MPI_SUM));
317
318 /* There's no way to detect all possible corner candidates in a element-by-element case in a pure algebraic setting
319 Firedrake attaches a index set to identify them upfront. If it is present, we assume we are in such a case */
320 if (matis->allow_repeated) PetscCall(PetscObjectQuery((PetscObject)pcbddc->discretegradient, "_elements_corners", (PetscObject *)&elements_corners));
321
322 /* drop connections with interior edges to avoid unneeded communications and memory movements */
323 PetscCall(MatViewFromOptions(pcbddc->discretegradient, (PetscObject)pc, "-pc_bddc_discrete_gradient_view"));
324 PetscCall(MatDuplicate(pcbddc->discretegradient, MAT_COPY_VALUES, &G));
325 PetscCall(MatSetOption(G, MAT_KEEP_NONZERO_PATTERN, PETSC_FALSE));
326 if (global) {
327 PetscInt rst;
328
329 PetscCall(MatGetOwnershipRange(G, &rst, NULL));
330 for (i = 0, cum = 0; i < pc->pmat->rmap->n; i++) {
331 if (matis->sf_rootdata[i] < 2) matis->sf_rootdata[cum++] = i + rst;
332 }
333 PetscCall(MatSetOption(G, MAT_NO_OFF_PROC_ZERO_ROWS, PETSC_TRUE));
334 PetscCall(MatZeroRows(G, cum, matis->sf_rootdata, 0., NULL, NULL));
335 } else {
336 PetscInt *tbz;
337
338 PetscCall(PetscMalloc1(ne, &tbz));
339 PetscCall(PetscSFBcastBegin(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE));
340 PetscCall(PetscSFBcastEnd(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE));
341 PetscCall(ISGetIndices(nedfieldlocal, &idxs));
342 for (i = 0, cum = 0; i < ne; i++)
343 if (matis->sf_leafdata[idxs[i]] == 1) tbz[cum++] = i;
344 PetscCall(ISRestoreIndices(nedfieldlocal, &idxs));
345 PetscCall(ISLocalToGlobalMappingApply(el2g, cum, tbz, tbz));
346 PetscCall(MatZeroRows(G, cum, tbz, 0., NULL, NULL));
347 PetscCall(PetscFree(tbz));
348 }
349
350 /* Extract subdomain relevant rows of G */
351 PetscCall(ISLocalToGlobalMappingGetIndices(el2g, &idxs));
352 PetscCall(ISCreateGeneral(comm, ne, idxs, PETSC_USE_POINTER, &lned));
353 PetscCall(MatAIJExtractRows(G, lned, &lGall));
354 /* PetscCall(MatCreateSubMatrix(G, lned, NULL, MAT_INITIAL_MATRIX, &lGall)); */
355 PetscCall(ISLocalToGlobalMappingRestoreIndices(el2g, &idxs));
356 PetscCall(ISDestroy(&lned));
357 PetscCall(MatConvert(lGall, MATIS, MAT_INITIAL_MATRIX, &lGis));
358 PetscCall(MatDestroy(&lGall));
359 PetscCall(MatISGetLocalMat(lGis, &lG));
360 if (matis->allow_repeated) { /* multi-element support */
361 Mat *lGn, B;
362 IS *is_rows, *tcols, tmap, nmap;
363 PetscInt subnv;
364 const PetscInt *subvidxs;
365 ISLocalToGlobalMapping mapn;
366
367 PetscCall(PetscCalloc1(pcbddc->n_local_subs * pcbddc->n_local_subs, &lGn));
368 PetscCall(PetscMalloc1(pcbddc->n_local_subs, &is_rows));
369 PetscCall(PetscMalloc1(pcbddc->n_local_subs, &tcols));
370 for (PetscInt i = 0; i < pcbddc->n_local_subs; i++) {
371 if (fl2g) {
372 PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_MASK, pcbddc->local_subs[i], &is_rows[i]));
373 } else {
374 PetscCall(PetscObjectReference((PetscObject)pcbddc->local_subs[i]));
375 is_rows[i] = pcbddc->local_subs[i];
376 }
377 PetscCall(MatCreateSubMatrix(lG, is_rows[i], NULL, MAT_INITIAL_MATRIX, &lGn[i * (1 + pcbddc->n_local_subs)]));
378 PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(lGn[i * (1 + pcbddc->n_local_subs)], &mapn));
379 PetscCall(ISLocalToGlobalMappingGetSize(mapn, &subnv));
380 PetscCall(ISLocalToGlobalMappingGetIndices(mapn, &subvidxs));
381 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, subnv, subvidxs, PETSC_COPY_VALUES, &tcols[i]));
382 PetscCall(ISLocalToGlobalMappingRestoreIndices(mapn, &subvidxs));
383 PetscCall(ISLocalToGlobalMappingDestroy(&mapn));
384 }
385
386 /* Create new MATIS with repeated vertices */
387 PetscCall(MatCreate(comm, &B));
388 PetscCall(MatSetSizes(B, lGis->rmap->n, lGis->cmap->n, lGis->rmap->N, lGis->cmap->N));
389 PetscCall(MatSetType(B, MATIS));
390 PetscCall(MatISSetAllowRepeated(B, PETSC_TRUE));
391 PetscCall(ISConcatenate(PETSC_COMM_SELF, pcbddc->n_local_subs, tcols, &tmap));
392 PetscCall(ISLocalToGlobalMappingApplyIS(lGis->cmap->mapping, tmap, &nmap));
393 PetscCall(ISDestroy(&tmap));
394 PetscCall(ISGetLocalSize(nmap, &subnv));
395 PetscCall(ISGetIndices(nmap, &subvidxs));
396 PetscCall(ISCreateGeneral(comm, subnv, subvidxs, PETSC_USE_POINTER, &tmap));
397 PetscCall(ISRestoreIndices(nmap, &subvidxs));
398 PetscCall(ISLocalToGlobalMappingCreateIS(tmap, &mapn));
399 PetscCall(ISDestroy(&tmap));
400 PetscCall(ISDestroy(&nmap));
401 PetscCall(MatSetLocalToGlobalMapping(B, lGis->rmap->mapping, mapn));
402 PetscCall(ISLocalToGlobalMappingDestroy(&mapn));
403 PetscCall(MatCreateNest(PETSC_COMM_SELF, pcbddc->n_local_subs, is_rows, pcbddc->n_local_subs, NULL, lGn, &lG));
404 for (PetscInt i = 0; i < pcbddc->n_local_subs; i++) {
405 PetscCall(MatDestroy(&lGn[i * (1 + pcbddc->n_local_subs)]));
406 PetscCall(ISDestroy(&is_rows[i]));
407 PetscCall(ISDestroy(&tcols[i]));
408 }
409 PetscCall(MatConvert(lG, MATSEQAIJ, MAT_INPLACE_MATRIX, &lG));
410 PetscCall(PetscFree(lGn));
411 PetscCall(PetscFree(is_rows));
412 PetscCall(PetscFree(tcols));
413 PetscCall(MatISSetLocalMat(B, lG));
414 PetscCall(MatDestroy(&lG));
415
416 PetscCall(MatDestroy(&lGis));
417 lGis = B;
418
419 lGis->assembled = PETSC_TRUE;
420 }
421 PetscCall(MatViewFromOptions(lGis, (PetscObject)pc, "-pc_bddc_nedelec_init_G_view"));
422
423 /* SF for nodal dofs communications */
424 PetscCall(MatGetLocalSize(G, NULL, &Lv));
425 PetscCall(MatISGetLocalToGlobalMapping(lGis, NULL, &vl2g));
426 PetscCall(PetscObjectReference((PetscObject)vl2g));
427 PetscCall(ISLocalToGlobalMappingGetSize(vl2g, &nv));
428 PetscCall(PetscSFCreate(comm, &sfv));
429 PetscCall(ISLocalToGlobalMappingGetIndices(vl2g, &idxs));
430 PetscCall(PetscSFSetGraphLayout(sfv, lGis->cmap, nv, NULL, PETSC_OWN_POINTER, idxs));
431 PetscCall(ISLocalToGlobalMappingRestoreIndices(vl2g, &idxs));
432
433 if (elements_corners) {
434 IS tmp;
435 Vec global, local;
436 Mat_IS *tGis = (Mat_IS *)lGis->data;
437
438 PetscCall(MatCreateVecs(lGis, &global, NULL));
439 PetscCall(MatCreateVecs(tGis->A, &local, NULL));
440 PetscCall(PCBDDCGlobalToLocal(tGis->cctx, global, local, elements_corners, &tmp));
441 PetscCall(VecDestroy(&global));
442 PetscCall(VecDestroy(&local));
443 elements_corners = tmp;
444 }
445
446 /* Destroy temporary G */
447 PetscCall(MatISGetLocalMat(lGis, &lG));
448 PetscCall(PetscObjectReference((PetscObject)lG));
449 PetscCall(MatDestroy(&G));
450 PetscCall(MatDestroy(&lGis));
451
452 if (print) {
453 PetscCall(PetscObjectSetName((PetscObject)lG, "initial_lG"));
454 PetscCall(MatView(lG, NULL));
455 }
456
457 /* Save lG for values insertion in change of basis */
458 PetscCall(MatDuplicate(lG, MAT_COPY_VALUES, &lGinit));
459
460 /* Analyze the edge-nodes connections (duplicate lG) */
461 PetscCall(MatDuplicate(lG, MAT_COPY_VALUES, &lGe));
462 PetscCall(MatSetOption(lGe, MAT_KEEP_NONZERO_PATTERN, PETSC_FALSE));
463 PetscCall(PetscBTCreate(nv, &btv));
464 PetscCall(PetscBTCreate(ne, &bte));
465 PetscCall(PetscBTCreate(ne, &btb));
466 PetscCall(PetscBTCreate(ne, &btbd));
467 /* need to import the boundary specification to ensure the
468 proper detection of coarse edges' endpoints */
469 if (pcbddc->DirichletBoundariesLocal) {
470 IS is;
471
472 if (fl2g) {
473 PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_MASK, pcbddc->DirichletBoundariesLocal, &is));
474 } else {
475 is = pcbddc->DirichletBoundariesLocal;
476 }
477 PetscCall(ISGetLocalSize(is, &cum));
478 PetscCall(ISGetIndices(is, &idxs));
479 for (i = 0; i < cum; i++) {
480 if (idxs[i] >= 0 && idxs[i] < ne) {
481 PetscCall(PetscBTSet(btb, idxs[i]));
482 PetscCall(PetscBTSet(btbd, idxs[i]));
483 }
484 }
485 PetscCall(ISRestoreIndices(is, &idxs));
486 if (fl2g) PetscCall(ISDestroy(&is));
487 }
488 if (pcbddc->NeumannBoundariesLocal) {
489 IS is;
490
491 if (fl2g) {
492 PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_MASK, pcbddc->NeumannBoundariesLocal, &is));
493 } else {
494 is = pcbddc->NeumannBoundariesLocal;
495 }
496 PetscCall(ISGetLocalSize(is, &cum));
497 PetscCall(ISGetIndices(is, &idxs));
498 for (i = 0; i < cum; i++) {
499 if (idxs[i] >= 0 && idxs[i] < ne) PetscCall(PetscBTSet(btb, idxs[i]));
500 }
501 PetscCall(ISRestoreIndices(is, &idxs));
502 if (fl2g) PetscCall(ISDestroy(&is));
503 }
504
505 /* Count neighs per dof */
506 PetscCall(ISLocalToGlobalMappingGetNodeInfo(el2g, NULL, &ecount, NULL));
507 PetscCall(ISLocalToGlobalMappingGetNodeInfo(vl2g, NULL, &vcount, NULL));
508
509 /* need to remove coarse faces' dofs and coarse edges' dirichlet dofs
510 for proper detection of coarse edges' endpoints */
511 PetscCall(PetscBTCreate(ne, &btee));
512 for (i = 0; i < ne; i++) {
513 if ((ecount[i] > 2 && !PetscBTLookup(btbd, i)) || (ecount[i] == 2 && PetscBTLookup(btb, i))) PetscCall(PetscBTSet(btee, i));
514 }
515 PetscCall(PetscMalloc1(ne, &marks));
516 if (!conforming) {
517 PetscCall(MatTranspose(lGe, MAT_INITIAL_MATRIX, &lGt));
518 PetscCall(MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done));
519 }
520 PetscCall(MatGetRowIJ(lGe, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
521 PetscCall(MatSeqAIJGetArray(lGe, &vals));
522 cum = 0;
523 for (i = 0; i < ne; i++) {
524 /* eliminate rows corresponding to edge dofs belonging to coarse faces */
525 if (!PetscBTLookup(btee, i)) {
526 marks[cum++] = i;
527 continue;
528 }
529 /* set badly connected edge dofs as primal */
530 if (!conforming) {
531 if (ii[i + 1] - ii[i] != order + 1) { /* every row of G on the coarse edge should list order+1 nodal dofs */
532 marks[cum++] = i;
533 PetscCall(PetscBTSet(bte, i));
534 for (j = ii[i]; j < ii[i + 1]; j++) PetscCall(PetscBTSet(btv, jj[j]));
535 } else {
536 /* every edge dofs should be connected through a certain number of nodal dofs
537 to other edge dofs belonging to coarse edges
538 - at most 2 endpoints
539 - order-1 interior nodal dofs
540 - no undefined nodal dofs (nconn < order)
541 */
542 PetscInt ends = 0, ints = 0, undef = 0;
543 for (j = ii[i]; j < ii[i + 1]; j++) {
544 PetscInt v = jj[j], k;
545 PetscInt nconn = iit[v + 1] - iit[v];
546 for (k = iit[v]; k < iit[v + 1]; k++)
547 if (!PetscBTLookup(btee, jjt[k])) nconn--;
548 if (nconn > order) ends++;
549 else if (nconn == order) ints++;
550 else undef++;
551 }
552 if (undef || ends > 2 || ints != order - 1) {
553 marks[cum++] = i;
554 PetscCall(PetscBTSet(bte, i));
555 for (j = ii[i]; j < ii[i + 1]; j++) PetscCall(PetscBTSet(btv, jj[j]));
556 }
557 }
558 }
559 /* We assume the order on the element edge is ii[i+1]-ii[i]-1 */
560 if (!order && ii[i + 1] != ii[i]) {
561 PetscScalar val = 1. / (ii[i + 1] - ii[i] - 1);
562 for (j = ii[i]; j < ii[i + 1]; j++) vals[j] = val;
563 }
564 }
565 PetscCall(PetscBTDestroy(&btee));
566 PetscCall(MatSeqAIJRestoreArray(lGe, &vals));
567 PetscCall(MatRestoreRowIJ(lGe, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
568 if (!conforming) {
569 PetscCall(MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done));
570 PetscCall(MatDestroy(&lGt));
571 }
572 PetscCall(MatZeroRows(lGe, cum, marks, 0., NULL, NULL));
573
574 /* identify splitpoints and corner candidates */
575 PetscCall(PetscMalloc2(nv, &sfvleaves, Lv, &sfvroots));
576 PetscCall(PetscBTCreate(nv, &btvcand));
577 if (elements_corners) {
578 PetscCall(ISGetLocalSize(elements_corners, &cum));
579 PetscCall(ISGetIndices(elements_corners, &idxs));
580 for (i = 0; i < cum; i++) PetscCall(PetscBTSet(btvcand, idxs[i]));
581 PetscCall(ISRestoreIndices(elements_corners, &idxs));
582 }
583
584 if (matis->allow_repeated) { /* assign a uniq global id to edge local subsets and communicate it with nodal space */
585 PetscSF emlsf, vmlsf;
586 PetscInt *eleaves, *vleaves, *meleaves, *mvleaves;
587 PetscInt cum_subs = 0, n_subs = pcbddc->n_local_subs, bs, emnr, emnl, vmnr, vmnl;
588
589 PetscCall(ISLocalToGlobalMappingGetBlockSize(el2g, &bs));
590 PetscCheck(bs == 1, comm, PETSC_ERR_SUP, "Not coded");
591 PetscCall(ISLocalToGlobalMappingGetBlockSize(vl2g, &bs));
592 PetscCheck(bs == 1, comm, PETSC_ERR_SUP, "Not coded");
593
594 PetscCall(ISLocalToGlobalMappingGetBlockMultiLeavesSF(el2g, &emlsf));
595 PetscCall(ISLocalToGlobalMappingGetBlockMultiLeavesSF(vl2g, &vmlsf));
596
597 PetscCall(PetscSFGetGraph(emlsf, &emnr, &emnl, NULL, NULL));
598 for (i = 0, j = 0; i < ne; i++) j += ecount[i];
599 PetscCheck(emnr == ne, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Invalid number of roots in edge multi-leaves SF %" PetscInt_FMT " != %" PetscInt_FMT, emnr, ne);
600 PetscCheck(emnl == j, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Invalid number of leaves in edge multi-leaves SF %" PetscInt_FMT " != %" PetscInt_FMT, emnl, j);
601
602 PetscCall(PetscSFGetGraph(vmlsf, &vmnr, &vmnl, NULL, NULL));
603 for (i = 0, j = 0; i < nv; i++) j += vcount[i];
604 PetscCheck(vmnr == nv, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Invalid number of roots in nodal multi-leaves SF %" PetscInt_FMT " != %" PetscInt_FMT, vmnr, nv);
605 PetscCheck(vmnl == j, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Invalid number of leaves in nodal multi-leaves SF %" PetscInt_FMT " != %" PetscInt_FMT, vmnl, j);
606
607 PetscCall(PetscMalloc1(ne, &eleaves));
608 PetscCall(PetscMalloc1(nv, &vleaves));
609 for (i = 0; i < ne; i++) eleaves[i] = PETSC_INT_MAX;
610 for (i = 0; i < nv; i++) vleaves[i] = PETSC_INT_MAX;
611 PetscCall(PetscMalloc1(emnl, &meleaves));
612 PetscCall(PetscMalloc1(vmnl, &mvleaves));
613
614 PetscCallMPI(MPI_Exscan(&n_subs, &cum_subs, 1, MPIU_INT, MPI_SUM, comm));
615 PetscCall(MatGetRowIJ(lGinit, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
616 for (i = 0; i < n_subs; i++) {
617 const PetscInt *idxs;
618 const PetscInt subid = cum_subs + i;
619 PetscInt ns;
620
621 PetscCall(ISGetLocalSize(pcbddc->local_subs[i], &ns));
622 PetscCall(ISGetIndices(pcbddc->local_subs[i], &idxs));
623 for (j = 0; j < ns; j++) {
624 const PetscInt e = idxs[j];
625
626 eleaves[e] = subid;
627 for (PetscInt k = ii[e]; k < ii[e + 1]; k++) vleaves[jj[k]] = subid;
628 }
629 PetscCall(ISRestoreIndices(pcbddc->local_subs[i], &idxs));
630 }
631 PetscCall(MatRestoreRowIJ(lGinit, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
632 PetscCall(PetscSFBcastBegin(emlsf, MPIU_INT, eleaves, meleaves, MPI_REPLACE));
633 PetscCall(PetscSFBcastEnd(emlsf, MPIU_INT, eleaves, meleaves, MPI_REPLACE));
634 PetscCall(PetscSFBcastBegin(vmlsf, MPIU_INT, vleaves, mvleaves, MPI_REPLACE));
635 PetscCall(PetscSFBcastEnd(vmlsf, MPIU_INT, vleaves, mvleaves, MPI_REPLACE));
636 PetscCall(PetscFree(eleaves));
637 PetscCall(PetscFree(vleaves));
638
639 PetscCall(PetscMalloc1(ne + 1, &eneighs));
640 eneighs[0] = meleaves;
641 for (i = 0; i < ne; i++) {
642 PetscCall(PetscSortInt(ecount[i], eneighs[i]));
643 eneighs[i + 1] = eneighs[i] + ecount[i];
644 }
645 PetscCall(PetscMalloc1(nv + 1, &vneighs));
646 vneighs[0] = mvleaves;
647 for (i = 0; i < nv; i++) {
648 PetscCall(PetscSortInt(vcount[i], vneighs[i]));
649 vneighs[i + 1] = vneighs[i] + vcount[i];
650 }
651 } else {
652 PetscCall(ISLocalToGlobalMappingGetNodeInfo(el2g, NULL, NULL, &eneighs));
653 PetscCall(ISLocalToGlobalMappingGetNodeInfo(vl2g, NULL, NULL, &vneighs));
654 }
655
656 PetscCall(MatTranspose(lGe, MAT_INITIAL_MATRIX, &lGt));
657 if (print) {
658 PetscCall(PetscObjectSetName((PetscObject)lGe, "edgerestr_lG"));
659 PetscCall(MatView(lGe, NULL));
660 PetscCall(PetscObjectSetName((PetscObject)lGt, "edgerestr_lGt"));
661 PetscCall(MatView(lGt, NULL));
662 }
663 PetscCall(MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
664 PetscCall(MatSeqAIJGetArray(lGt, &vals));
665 for (i = 0; i < nv; i++) {
666 PetscInt ord = order, test = ii[i + 1] - ii[i], vc = vcount[i];
667 PetscBool sneighs = PETSC_TRUE, bdir = PETSC_FALSE;
668 if (!order) { /* variable order */
669 PetscReal vorder = 0.;
670
671 for (j = ii[i]; j < ii[i + 1]; j++) vorder += PetscRealPart(vals[j]);
672 test = PetscFloorReal(vorder + 10. * PETSC_SQRT_MACHINE_EPSILON);
673 PetscCheck(vorder - test <= PETSC_SQRT_MACHINE_EPSILON, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected value for vorder: %g (%" PetscInt_FMT ")", (double)vorder, test);
674 ord = 1;
675 }
676 for (j = ii[i]; j < ii[i + 1] && sneighs; j++) {
677 const PetscInt e = jj[j];
678
679 if (PetscBTLookup(btbd, e)) {
680 bdir = PETSC_TRUE;
681 break;
682 }
683 if (vc != ecount[e]) {
684 sneighs = PETSC_FALSE;
685 } else {
686 const PetscInt *vn = vneighs[i], *en = eneighs[e];
687
688 for (PetscInt k = 0; k < vc; k++) {
689 if (vn[k] != en[k]) {
690 sneighs = PETSC_FALSE;
691 break;
692 }
693 }
694 }
695 }
696 if (elements_corners) test = 0;
697 if (!sneighs || test >= 3 * ord || bdir) { /* splitpoints */
698 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "SPLITPOINT %" PetscInt_FMT " (%s %s %s)\n", i, PetscBools[!sneighs], PetscBools[test >= 3 * ord], PetscBools[bdir]));
699 PetscCall(PetscBTSet(btv, i));
700 } else if (test == ord) {
701 if (order == 1 || (!order && ii[i + 1] - ii[i] == 1)) {
702 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "ENDPOINT %" PetscInt_FMT "\n", i));
703 PetscCall(PetscBTSet(btv, i));
704 } else if (!elements_corners) {
705 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "CORNER CANDIDATE %" PetscInt_FMT "\n", i));
706 PetscCall(PetscBTSet(btvcand, i));
707 }
708 }
709 }
710 PetscCall(PetscBTDestroy(&btbd));
711
712 /* a candidate is valid if it is connected to another candidate via a non-primal edge dof */
713 if (order != 1) {
714 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "INSPECTING CANDIDATES\n"));
715 PetscCall(MatGetRowIJ(lGe, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done));
716 for (i = 0; i < nv; i++) {
717 if (PetscBTLookup(btvcand, i)) {
718 PetscBool found = PETSC_FALSE;
719 for (j = ii[i]; j < ii[i + 1] && !found; j++) {
720 PetscInt k, e = jj[j];
721 if (PetscBTLookup(bte, e)) continue;
722 for (k = iit[e]; k < iit[e + 1]; k++) {
723 PetscInt v = jjt[k];
724 if (v != i && PetscBTLookup(btvcand, v)) {
725 found = PETSC_TRUE;
726 break;
727 }
728 }
729 }
730 if (!found) {
731 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " CANDIDATE %" PetscInt_FMT " CLEARED\n", i));
732 PetscCall(PetscBTClear(btvcand, i));
733 } else {
734 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " CANDIDATE %" PetscInt_FMT " ACCEPTED\n", i));
735 }
736 }
737 }
738 PetscCall(MatRestoreRowIJ(lGe, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done));
739 }
740 PetscCall(MatSeqAIJRestoreArray(lGt, &vals));
741 PetscCall(MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
742 PetscCall(MatDestroy(&lGe));
743
744 /* Get the local G^T explicitly */
745 PetscCall(MatDestroy(&lGt));
746 PetscCall(MatTranspose(lG, MAT_INITIAL_MATRIX, &lGt));
747 PetscCall(MatSetOption(lGt, MAT_KEEP_NONZERO_PATTERN, PETSC_FALSE));
748
749 /* Mark shared nodal dofs */
750 PetscCall(PetscBTCreate(nv, &btvi));
751 for (i = 0; i < nv; i++) {
752 if (vcount[i] > 1) PetscCall(PetscBTSet(btvi, i));
753 }
754
755 if (matis->allow_repeated) {
756 PetscCall(PetscFree(eneighs[0]));
757 PetscCall(PetscFree(vneighs[0]));
758 PetscCall(PetscFree(eneighs));
759 PetscCall(PetscFree(vneighs));
760 }
761 PetscCall(ISLocalToGlobalMappingRestoreNodeInfo(el2g, NULL, &ecount, &eneighs));
762 PetscCall(ISLocalToGlobalMappingRestoreNodeInfo(vl2g, NULL, &vcount, &vneighs));
763
764 /* communicate corners and splitpoints */
765 PetscCall(PetscMalloc1(nv, &vmarks));
766 PetscCall(PetscArrayzero(sfvleaves, nv));
767 PetscCall(PetscArrayzero(sfvroots, Lv));
768 for (i = 0; i < nv; i++)
769 if (PetscUnlikely(PetscBTLookup(btv, i))) sfvleaves[i] = 1;
770
771 if (print) {
772 IS tbz;
773
774 cum = 0;
775 for (i = 0; i < nv; i++)
776 if (sfvleaves[i]) vmarks[cum++] = i;
777
778 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, cum, vmarks, PETSC_COPY_VALUES, &tbz));
779 PetscCall(PetscObjectSetName((PetscObject)tbz, "corners_to_be_zeroed_local"));
780 PetscCall(ISView(tbz, NULL));
781 PetscCall(ISDestroy(&tbz));
782 }
783
784 PetscCall(PetscSFReduceBegin(sfv, MPIU_INT, sfvleaves, sfvroots, MPI_SUM));
785 PetscCall(PetscSFReduceEnd(sfv, MPIU_INT, sfvleaves, sfvroots, MPI_SUM));
786 PetscCall(PetscSFBcastBegin(sfv, MPIU_INT, sfvroots, sfvleaves, MPI_REPLACE));
787 PetscCall(PetscSFBcastEnd(sfv, MPIU_INT, sfvroots, sfvleaves, MPI_REPLACE));
788
789 /* Zero rows of lGt corresponding to identified corners
790 and interior nodal dofs */
791 cum = 0;
792 for (i = 0; i < nv; i++) {
793 if (sfvleaves[i]) {
794 vmarks[cum++] = i;
795 PetscCall(PetscBTSet(btv, i));
796 } else if (!PetscBTLookup(btvi, i)) vmarks[cum++] = i;
797 }
798 PetscCall(PetscBTDestroy(&btvi));
799 if (print) {
800 IS tbz;
801
802 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, cum, vmarks, PETSC_COPY_VALUES, &tbz));
803 PetscCall(PetscObjectSetName((PetscObject)tbz, "corners_to_be_zeroed_with_interior"));
804 PetscCall(ISView(tbz, NULL));
805 PetscCall(ISDestroy(&tbz));
806 }
807 PetscCall(MatZeroRows(lGt, cum, vmarks, 0., NULL, NULL));
808 PetscCall(PetscFree(vmarks));
809 PetscCall(PetscSFDestroy(&sfv));
810 PetscCall(PetscFree2(sfvleaves, sfvroots));
811
812 /* Recompute G */
813 PetscCall(MatDestroy(&lG));
814 PetscCall(MatTranspose(lGt, MAT_INITIAL_MATRIX, &lG));
815 if (print) {
816 PetscCall(PetscObjectSetName((PetscObject)lG, "used_lG"));
817 PetscCall(MatView(lG, NULL));
818 PetscCall(PetscObjectSetName((PetscObject)lGt, "used_lGt"));
819 PetscCall(MatView(lGt, NULL));
820 }
821
822 /* Get primal dofs (if any) */
823 cum = 0;
824 for (i = 0; i < ne; i++) {
825 if (PetscUnlikely(PetscBTLookup(bte, i))) marks[cum++] = i;
826 }
827 if (fl2g) PetscCall(ISLocalToGlobalMappingApply(fl2g, cum, marks, marks));
828 PetscCall(ISCreateGeneral(comm, cum, marks, PETSC_COPY_VALUES, &primals));
829 if (print) {
830 PetscCall(PetscObjectSetName((PetscObject)primals, "prescribed_primal_dofs"));
831 PetscCall(ISView(primals, NULL));
832 }
833 PetscCall(PetscBTDestroy(&bte));
834 /* TODO: what if the user passed in some of them ? */
835 PetscCall(PCBDDCSetPrimalVerticesLocalIS(pc, primals));
836 PetscCall(ISDestroy(&primals));
837
838 /* Compute edge connectivity */
839 PetscCall(PetscObjectSetOptionsPrefix((PetscObject)lG, "econn_"));
840
841 /* Symbolic conn = lG*lGt */
842 if (!elements_corners) { /* if present, we assume we are in the element-by-element case and the CSR graph is not needed */
843 PetscCall(MatProductCreate(lG, lGt, NULL, &conn));
844 PetscCall(MatProductSetType(conn, MATPRODUCT_AB));
845 PetscCall(MatProductSetAlgorithm(conn, "default"));
846 PetscCall(MatProductSetFill(conn, PETSC_DEFAULT));
847 PetscCall(PetscObjectSetOptionsPrefix((PetscObject)conn, "econn_"));
848 PetscCall(MatProductSetFromOptions(conn));
849 PetscCall(MatProductSymbolic(conn));
850 PetscCall(MatGetRowIJ(conn, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
851 if (fl2g) {
852 PetscBT btf;
853 PetscInt *iia, *jja, *iiu, *jju;
854 PetscBool rest = PETSC_FALSE, free = PETSC_FALSE;
855
856 /* create CSR for all local dofs */
857 PetscCall(PetscMalloc1(n + 1, &iia));
858 if (pcbddc->mat_graph->nvtxs_csr) { /* the user has passed in a CSR graph */
859 PetscCheck(pcbddc->mat_graph->nvtxs_csr == n, PETSC_COMM_SELF, PETSC_ERR_USER, "Invalid size of CSR graph %" PetscInt_FMT ". Should be %" PetscInt_FMT, pcbddc->mat_graph->nvtxs_csr, n);
860 iiu = pcbddc->mat_graph->xadj;
861 jju = pcbddc->mat_graph->adjncy;
862 } else if (pcbddc->use_local_adj) {
863 rest = PETSC_TRUE;
864 PetscCall(MatGetRowIJ(matis->A, 0, PETSC_TRUE, PETSC_FALSE, &i, (const PetscInt **)&iiu, (const PetscInt **)&jju, &done));
865 } else {
866 free = PETSC_TRUE;
867 PetscCall(PetscMalloc2(n + 1, &iiu, n, &jju));
868 iiu[0] = 0;
869 for (i = 0; i < n; i++) {
870 iiu[i + 1] = i + 1;
871 jju[i] = -1;
872 }
873 }
874
875 /* import sizes of CSR */
876 iia[0] = 0;
877 for (i = 0; i < n; i++) iia[i + 1] = iiu[i + 1] - iiu[i];
878
879 /* overwrite entries corresponding to the Nedelec field */
880 PetscCall(PetscBTCreate(n, &btf));
881 PetscCall(ISGetIndices(nedfieldlocal, &idxs));
882 for (i = 0; i < ne; i++) {
883 PetscCall(PetscBTSet(btf, idxs[i]));
884 iia[idxs[i] + 1] = ii[i + 1] - ii[i];
885 }
886
887 /* iia in CSR */
888 for (i = 0; i < n; i++) iia[i + 1] += iia[i];
889
890 /* jja in CSR */
891 PetscCall(PetscMalloc1(iia[n], &jja));
892 for (i = 0; i < n; i++)
893 if (!PetscBTLookup(btf, i))
894 for (j = 0; j < iiu[i + 1] - iiu[i]; j++) jja[iia[i] + j] = jju[iiu[i] + j];
895
896 /* map edge dofs connectivity */
897 if (jj) {
898 PetscCall(ISLocalToGlobalMappingApply(fl2g, ii[ne], jj, (PetscInt *)jj));
899 for (i = 0; i < ne; i++) {
900 PetscInt e = idxs[i];
901 for (j = 0; j < ii[i + 1] - ii[i]; j++) jja[iia[e] + j] = jj[ii[i] + j];
902 }
903 }
904 PetscCall(ISRestoreIndices(nedfieldlocal, &idxs));
905 PetscCall(PCBDDCSetLocalAdjacencyGraph(pc, n, iia, jja, PETSC_COPY_VALUES));
906 if (rest) PetscCall(MatRestoreRowIJ(matis->A, 0, PETSC_TRUE, PETSC_FALSE, &i, (const PetscInt **)&iiu, (const PetscInt **)&jju, &done));
907 if (free) PetscCall(PetscFree2(iiu, jju));
908 PetscCall(PetscBTDestroy(&btf));
909 } else {
910 PetscCall(PCBDDCSetLocalAdjacencyGraph(pc, n, ii, jj, PETSC_COPY_VALUES));
911 }
912 PetscCall(MatRestoreRowIJ(conn, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
913 PetscCall(MatDestroy(&conn));
914 }
915
916 /* Analyze interface for edge dofs */
917 PetscCall(PCBDDCAnalyzeInterface(pc));
918 pcbddc->mat_graph->twodim = PETSC_FALSE;
919
920 /* Get coarse edges in the edge space */
921 PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, NULL, NULL, &nee, &alleedges, &allprimals));
922
923 if (fl2g) {
924 PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_DROP, allprimals, &primals));
925 PetscCall(PetscMalloc1(nee, &eedges));
926 for (i = 0; i < nee; i++) PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_DROP, alleedges[i], &eedges[i]));
927 } else {
928 eedges = alleedges;
929 primals = allprimals;
930 }
931
932 /* Mark fine edge dofs with their coarse edge id */
933 PetscCall(PetscArrayzero(marks, ne));
934 PetscCall(ISGetLocalSize(primals, &cum));
935 PetscCall(ISGetIndices(primals, &idxs));
936 for (i = 0; i < cum; i++) marks[idxs[i]] = nee + 1;
937 PetscCall(ISRestoreIndices(primals, &idxs));
938 if (print) {
939 PetscCall(PetscObjectSetName((PetscObject)primals, "obtained_primal_dofs"));
940 PetscCall(ISView(primals, NULL));
941 }
942
943 maxsize = 0;
944 for (i = 0; i < nee; i++) {
945 PetscInt size, mark = i + 1;
946
947 PetscCall(ISGetLocalSize(eedges[i], &size));
948 PetscCall(ISGetIndices(eedges[i], &idxs));
949 for (j = 0; j < size; j++) marks[idxs[j]] = mark;
950 PetscCall(ISRestoreIndices(eedges[i], &idxs));
951 maxsize = PetscMax(maxsize, size);
952 }
953
954 /* Find coarse edge endpoints */
955 PetscCall(MatGetRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
956 PetscCall(MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done));
957 for (i = 0; i < nee; i++) {
958 PetscInt mark = i + 1, size;
959
960 PetscCall(ISGetLocalSize(eedges[i], &size));
961 if (!size && nedfieldlocal) continue;
962 PetscCheck(size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected zero sized edge %" PetscInt_FMT, i);
963 PetscCall(ISGetIndices(eedges[i], &idxs));
964 if (print) {
965 PetscCall(PetscPrintf(PETSC_COMM_SELF, "ENDPOINTS ANALYSIS EDGE %" PetscInt_FMT "\n", i));
966 PetscCall(ISView(eedges[i], NULL));
967 }
968 for (j = 0; j < size; j++) {
969 PetscInt k, ee = idxs[j];
970 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " idx %" PetscInt_FMT "\n", ee));
971 for (k = ii[ee]; k < ii[ee + 1]; k++) {
972 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " inspect %" PetscInt_FMT "\n", jj[k]));
973 if (PetscBTLookup(btv, jj[k])) {
974 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " corner found (already set) %" PetscInt_FMT "\n", jj[k]));
975 } else if (PetscBTLookup(btvcand, jj[k])) { /* is it ok? */
976 PetscInt k2;
977 PetscBool corner = PETSC_FALSE;
978 for (k2 = iit[jj[k]]; k2 < iit[jj[k] + 1]; k2++) {
979 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " INSPECTING %" PetscInt_FMT ": mark %" PetscInt_FMT " (ref mark %" PetscInt_FMT "), boundary %d\n", jjt[k2], marks[jjt[k2]], mark, (int)!!PetscBTLookup(btb, jjt[k2])));
980 /* it's a corner if either is connected with an edge dof belonging to a different cc or
981 if the edge dof lie on the natural part of the boundary */
982 if ((marks[jjt[k2]] && marks[jjt[k2]] != mark) || (!marks[jjt[k2]] && PetscBTLookup(btb, jjt[k2]))) {
983 corner = PETSC_TRUE;
984 break;
985 }
986 }
987 if (corner) { /* found the nodal dof corresponding to the endpoint of the edge */
988 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " corner found %" PetscInt_FMT "\n", jj[k]));
989 PetscCall(PetscBTSet(btv, jj[k]));
990 } else {
991 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " no corners found\n"));
992 }
993 }
994 }
995 }
996 PetscCall(ISRestoreIndices(eedges[i], &idxs));
997 }
998 PetscCall(MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done));
999 PetscCall(MatRestoreRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
1000 PetscCall(PetscBTDestroy(&btb));
1001
1002 /* Reset marked primal dofs */
1003 PetscCall(ISGetLocalSize(primals, &cum));
1004 PetscCall(ISGetIndices(primals, &idxs));
1005 for (i = 0; i < cum; i++) marks[idxs[i]] = 0;
1006 PetscCall(ISRestoreIndices(primals, &idxs));
1007
1008 /* Now use the initial lG */
1009 PetscCall(MatDestroy(&lG));
1010 PetscCall(MatDestroy(&lGt));
1011 lG = lGinit;
1012 PetscCall(MatTranspose(lG, MAT_INITIAL_MATRIX, &lGt));
1013
1014 /* Compute extended cols indices */
1015 PetscCall(PetscBTCreate(nv, &btvc));
1016 PetscCall(PetscBTCreate(nee, &bter));
1017 PetscCall(MatGetRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
1018 PetscCall(MatSeqAIJGetMaxRowNonzeros(lG, &i));
1019 i *= maxsize;
1020 PetscCall(PetscCalloc1(nee, &extcols));
1021 PetscCall(PetscMalloc2(i, &extrow, i, &gidxs));
1022 eerr = PETSC_FALSE;
1023 for (i = 0; i < nee; i++) {
1024 PetscInt size, found = 0;
1025
1026 cum = 0;
1027 PetscCall(ISGetLocalSize(eedges[i], &size));
1028 if (!size && nedfieldlocal) continue;
1029 PetscCheck(size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected zero sized edge %" PetscInt_FMT, i);
1030 PetscCall(ISGetIndices(eedges[i], &idxs));
1031 PetscCall(PetscBTMemzero(nv, btvc));
1032 for (j = 0; j < size; j++) {
1033 PetscInt k, ee = idxs[j];
1034 for (k = ii[ee]; k < ii[ee + 1]; k++) {
1035 PetscInt vv = jj[k];
1036 if (!PetscBTLookup(btv, vv)) extrow[cum++] = vv;
1037 else if (!PetscBTLookupSet(btvc, vv)) found++;
1038 }
1039 }
1040 PetscCall(ISRestoreIndices(eedges[i], &idxs));
1041 PetscCall(PetscSortRemoveDupsInt(&cum, extrow));
1042 PetscCall(ISLocalToGlobalMappingApply(vl2g, cum, extrow, gidxs));
1043 PetscCall(PetscSortIntWithArray(cum, gidxs, extrow));
1044 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, cum, extrow, PETSC_COPY_VALUES, &extcols[i]));
1045 /* it may happen that endpoints are not defined at this point
1046 if it is the case, mark this edge for a second pass */
1047 if (cum != size - 1 || found != 2) {
1048 PetscCall(PetscBTSet(bter, i));
1049 if (print) {
1050 PetscCall(PetscObjectSetName((PetscObject)eedges[i], "error_edge"));
1051 PetscCall(ISView(eedges[i], NULL));
1052 PetscCall(PetscObjectSetName((PetscObject)extcols[i], "error_extcol"));
1053 PetscCall(ISView(extcols[i], NULL));
1054 }
1055 eerr = PETSC_TRUE;
1056 }
1057 }
1058 /* PetscCheck(!eerr,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL FIRST PASS"); */
1059 PetscCallMPI(MPIU_Allreduce(&eerr, &done, 1, MPI_C_BOOL, MPI_LOR, comm));
1060 if (done) {
1061 PetscInt *newprimals;
1062
1063 PetscCall(PetscMalloc1(ne, &newprimals));
1064 PetscCall(ISGetLocalSize(primals, &cum));
1065 PetscCall(ISGetIndices(primals, &idxs));
1066 PetscCall(PetscArraycpy(newprimals, idxs, cum));
1067 PetscCall(ISRestoreIndices(primals, &idxs));
1068 PetscCall(MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done));
1069 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "DOING SECOND PASS (eerr %s)\n", PetscBools[eerr]));
1070 for (i = 0; i < nee; i++) {
1071 PetscBool has_candidates = PETSC_FALSE;
1072 if (PetscBTLookup(bter, i)) {
1073 PetscInt size, mark = i + 1;
1074
1075 PetscCall(ISGetLocalSize(eedges[i], &size));
1076 PetscCall(ISGetIndices(eedges[i], &idxs));
1077 /* for (j=0;j<size;j++) newprimals[cum++] = idxs[j]; */
1078 for (j = 0; j < size; j++) {
1079 PetscInt k, ee = idxs[j];
1080 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "Inspecting edge dof %" PetscInt_FMT " [%" PetscInt_FMT " %" PetscInt_FMT ")\n", ee, ii[ee], ii[ee + 1]));
1081 for (k = ii[ee]; k < ii[ee + 1]; k++) {
1082 /* set all candidates located on the edge as corners */
1083 if (PetscBTLookup(btvcand, jj[k])) {
1084 PetscInt k2, vv = jj[k];
1085 has_candidates = PETSC_TRUE;
1086 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " Candidate set to vertex %" PetscInt_FMT "\n", vv));
1087 PetscCall(PetscBTSet(btv, vv));
1088 /* set all edge dofs connected to candidate as primals */
1089 for (k2 = iit[vv]; k2 < iit[vv + 1]; k2++) {
1090 if (marks[jjt[k2]] == mark) {
1091 PetscInt k3, ee2 = jjt[k2];
1092 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " Connected edge dof set to primal %" PetscInt_FMT "\n", ee2));
1093 newprimals[cum++] = ee2;
1094 /* finally set the new corners */
1095 for (k3 = ii[ee2]; k3 < ii[ee2 + 1]; k3++) {
1096 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " Connected nodal dof set to vertex %" PetscInt_FMT "\n", jj[k3]));
1097 PetscCall(PetscBTSet(btv, jj[k3]));
1098 }
1099 }
1100 }
1101 } else {
1102 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " Not a candidate vertex %" PetscInt_FMT "\n", jj[k]));
1103 }
1104 }
1105 }
1106 if (!has_candidates) { /* circular edge */
1107 PetscInt k, ee = idxs[0], *tmarks;
1108
1109 PetscCall(PetscCalloc1(ne, &tmarks));
1110 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " Circular edge %" PetscInt_FMT "\n", i));
1111 for (k = ii[ee]; k < ii[ee + 1]; k++) {
1112 PetscInt k2;
1113 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " Set to corner %" PetscInt_FMT "\n", jj[k]));
1114 PetscCall(PetscBTSet(btv, jj[k]));
1115 for (k2 = iit[jj[k]]; k2 < iit[jj[k] + 1]; k2++) tmarks[jjt[k2]]++;
1116 }
1117 for (j = 0; j < size; j++) {
1118 if (tmarks[idxs[j]] > 1) {
1119 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " Edge dof set to primal %" PetscInt_FMT "\n", idxs[j]));
1120 newprimals[cum++] = idxs[j];
1121 }
1122 }
1123 PetscCall(PetscFree(tmarks));
1124 }
1125 PetscCall(ISRestoreIndices(eedges[i], &idxs));
1126 }
1127 PetscCall(ISDestroy(&extcols[i]));
1128 }
1129 PetscCall(PetscFree(extcols));
1130 PetscCall(MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done));
1131 PetscCall(PetscSortRemoveDupsInt(&cum, newprimals));
1132 if (fl2g) {
1133 PetscCall(ISLocalToGlobalMappingApply(fl2g, cum, newprimals, newprimals));
1134 PetscCall(ISDestroy(&primals));
1135 for (i = 0; i < nee; i++) PetscCall(ISDestroy(&eedges[i]));
1136 PetscCall(PetscFree(eedges));
1137 }
1138 PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, NULL, NULL, &nee, &alleedges, &allprimals));
1139 PetscCall(ISCreateGeneral(comm, cum, newprimals, PETSC_COPY_VALUES, &primals));
1140 PetscCall(PetscFree(newprimals));
1141 PetscCall(PCBDDCSetPrimalVerticesLocalIS(pc, primals));
1142 PetscCall(ISDestroy(&primals));
1143 PetscCall(PCBDDCAnalyzeInterface(pc));
1144 pcbddc->mat_graph->twodim = PETSC_FALSE;
1145 PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, NULL, NULL, &nee, &alleedges, &allprimals));
1146 if (fl2g) {
1147 PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_DROP, allprimals, &primals));
1148 PetscCall(PetscMalloc1(nee, &eedges));
1149 for (i = 0; i < nee; i++) PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_DROP, alleedges[i], &eedges[i]));
1150 } else {
1151 eedges = alleedges;
1152 primals = allprimals;
1153 }
1154 PetscCall(PetscCalloc1(nee, &extcols));
1155
1156 /* Mark again */
1157 PetscCall(PetscArrayzero(marks, ne));
1158 for (i = 0; i < nee; i++) {
1159 PetscInt size, mark = i + 1;
1160
1161 PetscCall(ISGetLocalSize(eedges[i], &size));
1162 PetscCall(ISGetIndices(eedges[i], &idxs));
1163 for (j = 0; j < size; j++) marks[idxs[j]] = mark;
1164 PetscCall(ISRestoreIndices(eedges[i], &idxs));
1165 }
1166 if (print) {
1167 PetscCall(PetscObjectSetName((PetscObject)primals, "obtained_primal_dofs_secondpass"));
1168 PetscCall(ISView(primals, NULL));
1169 }
1170
1171 /* Recompute extended cols */
1172 eerr = PETSC_FALSE;
1173 for (i = 0; i < nee; i++) {
1174 PetscInt size;
1175
1176 cum = 0;
1177 PetscCall(ISGetLocalSize(eedges[i], &size));
1178 if (!size && nedfieldlocal) continue;
1179 PetscCheck(size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected zero sized edge %" PetscInt_FMT, i);
1180 PetscCall(ISGetIndices(eedges[i], &idxs));
1181 for (j = 0; j < size; j++) {
1182 PetscInt k, ee = idxs[j];
1183 for (k = ii[ee]; k < ii[ee + 1]; k++)
1184 if (!PetscBTLookup(btv, jj[k])) extrow[cum++] = jj[k];
1185 }
1186 PetscCall(ISRestoreIndices(eedges[i], &idxs));
1187 PetscCall(PetscSortRemoveDupsInt(&cum, extrow));
1188 PetscCall(ISLocalToGlobalMappingApply(vl2g, cum, extrow, gidxs));
1189 PetscCall(PetscSortIntWithArray(cum, gidxs, extrow));
1190 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, cum, extrow, PETSC_COPY_VALUES, &extcols[i]));
1191 if (cum != size - 1) {
1192 if (print) {
1193 PetscCall(PetscObjectSetName((PetscObject)eedges[i], "error_edge_secondpass"));
1194 PetscCall(ISView(eedges[i], NULL));
1195 PetscCall(PetscObjectSetName((PetscObject)extcols[i], "error_extcol_secondpass"));
1196 PetscCall(ISView(extcols[i], NULL));
1197 }
1198 eerr = PETSC_TRUE;
1199 }
1200 }
1201 }
1202 PetscCall(MatRestoreRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
1203 PetscCall(PetscFree2(extrow, gidxs));
1204 PetscCall(PetscBTDestroy(&bter));
1205 if (print) PetscCall(PCBDDCGraphASCIIView(pcbddc->mat_graph, 5, PETSC_VIEWER_STDOUT_SELF));
1206 /* an error should not occur at this point */
1207 PetscCheck(!eerr, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected SIZE OF EDGE > EXTCOL SECOND PASS");
1208
1209 /* Check the number of endpoints */
1210 PetscCall(MatGetRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
1211 PetscCall(PetscMalloc1(2 * nee, &corners));
1212 PetscCall(PetscMalloc1(nee, &cedges));
1213 for (i = 0; i < nee; i++) {
1214 PetscInt size, found = 0, gc[2];
1215
1216 /* init with defaults */
1217 cedges[i] = corners[i * 2] = corners[i * 2 + 1] = -1;
1218 PetscCall(ISGetLocalSize(eedges[i], &size));
1219 if (!size && nedfieldlocal) continue;
1220 PetscCheck(size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected zero sized edge %" PetscInt_FMT, i);
1221 PetscCall(ISGetIndices(eedges[i], &idxs));
1222 PetscCall(PetscBTMemzero(nv, btvc));
1223 for (j = 0; j < size; j++) {
1224 PetscInt k, ee = idxs[j];
1225 for (k = ii[ee]; k < ii[ee + 1]; k++) {
1226 PetscInt vv = jj[k];
1227 if (PetscBTLookup(btv, vv) && !PetscBTLookupSet(btvc, vv)) {
1228 PetscCheck(found != 2, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Found more than two corners for edge %" PetscInt_FMT, i);
1229 corners[i * 2 + found++] = vv;
1230 }
1231 }
1232 }
1233 if (found != 2) {
1234 PetscInt e;
1235 if (fl2g) {
1236 PetscCall(ISLocalToGlobalMappingApply(fl2g, 1, idxs, &e));
1237 } else {
1238 e = idxs[0];
1239 }
1240 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_PLIB, "Found %" PetscInt_FMT " corners for edge %" PetscInt_FMT " (astart %" PetscInt_FMT ", estart %" PetscInt_FMT ")", found, i, e, idxs[0]);
1241 }
1242
1243 /* get primal dof index on this coarse edge */
1244 PetscCall(ISLocalToGlobalMappingApply(vl2g, 2, corners + 2 * i, gc));
1245 if (gc[0] > gc[1]) {
1246 PetscInt swap = corners[2 * i];
1247 corners[2 * i] = corners[2 * i + 1];
1248 corners[2 * i + 1] = swap;
1249 }
1250 cedges[i] = idxs[size - 1];
1251 PetscCall(ISRestoreIndices(eedges[i], &idxs));
1252 if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "EDGE %" PetscInt_FMT ": ce %" PetscInt_FMT ", corners (%" PetscInt_FMT ",%" PetscInt_FMT ")\n", i, cedges[i], corners[2 * i], corners[2 * i + 1]));
1253 }
1254 PetscCall(MatRestoreRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
1255 PetscCall(PetscBTDestroy(&btvc));
1256
1257 if (PetscDefined(USE_DEBUG)) {
1258 /* Inspects columns of lG (rows of lGt) and make sure the change of basis will
1259 not interfere with neighbouring coarse edges */
1260 PetscCall(PetscMalloc1(nee + 1, &emarks));
1261 PetscCall(MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
1262 for (i = 0; i < nv; i++) {
1263 PetscInt emax = 0, eemax = 0;
1264
1265 if (ii[i + 1] == ii[i] || PetscBTLookup(btv, i)) continue;
1266 PetscCall(PetscArrayzero(emarks, nee + 1));
1267 for (j = ii[i]; j < ii[i + 1]; j++) emarks[marks[jj[j]]]++;
1268 for (j = 1; j < nee + 1; j++) {
1269 if (emax < emarks[j]) {
1270 emax = emarks[j];
1271 eemax = j;
1272 }
1273 }
1274 /* not relevant for edges */
1275 if (!eemax) continue;
1276
1277 for (j = ii[i]; j < ii[i + 1]; j++) {
1278 PetscCheck(!marks[jj[j]] || marks[jj[j]] == eemax, PETSC_COMM_SELF, PETSC_ERR_SUP, "Found 2 coarse edges (id %" PetscInt_FMT " and %" PetscInt_FMT ") connected through the %" PetscInt_FMT " nodal dof at edge dof %" PetscInt_FMT, marks[jj[j]] - 1, eemax, i, jj[j]);
1279 }
1280 }
1281 PetscCall(PetscFree(emarks));
1282 PetscCall(MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
1283 }
1284
1285 /* Compute extended rows indices for edge blocks of the change of basis */
1286 PetscCall(MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
1287 PetscCall(MatSeqAIJGetMaxRowNonzeros(lGt, &extmem));
1288 extmem *= maxsize;
1289 PetscCall(PetscMalloc1(extmem * nee, &extrow));
1290 PetscCall(PetscMalloc1(nee, &extrows));
1291 PetscCall(PetscCalloc1(nee, &extrowcum));
1292 for (i = 0; i < nv; i++) {
1293 PetscInt mark = 0, size, start;
1294
1295 if (ii[i + 1] == ii[i] || PetscBTLookup(btv, i)) continue;
1296 for (j = ii[i]; j < ii[i + 1]; j++)
1297 if (marks[jj[j]] && !mark) mark = marks[jj[j]];
1298
1299 /* not relevant */
1300 if (!mark) continue;
1301
1302 /* import extended row */
1303 mark--;
1304 start = mark * extmem + extrowcum[mark];
1305 size = ii[i + 1] - ii[i];
1306 PetscCheck(extrowcum[mark] + size <= extmem, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Not enough memory allocated %" PetscInt_FMT " > %" PetscInt_FMT, extrowcum[mark] + size, extmem);
1307 PetscCall(PetscArraycpy(extrow + start, jj + ii[i], size));
1308 extrowcum[mark] += size;
1309 }
1310 PetscCall(MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
1311 PetscCall(MatDestroy(&lGt));
1312 PetscCall(PetscFree(marks));
1313
1314 /* Compress extrows */
1315 cum = 0;
1316 for (i = 0; i < nee; i++) {
1317 PetscInt size = extrowcum[i], *start = extrow + i * extmem;
1318 PetscCall(PetscSortRemoveDupsInt(&size, start));
1319 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, size, start, PETSC_USE_POINTER, &extrows[i]));
1320 cum = PetscMax(cum, size);
1321 }
1322 PetscCall(PetscFree(extrowcum));
1323 PetscCall(PetscBTDestroy(&btv));
1324 PetscCall(PetscBTDestroy(&btvcand));
1325
1326 /* Workspace for lapack inner calls and VecSetValues */
1327 PetscCall(PetscMalloc2((5 + cum + maxsize) * maxsize, &work, maxsize, &rwork));
1328
1329 /* Create change of basis matrix (no preallocation) */
1330 PetscCall(MatCreate(comm, &T));
1331 PetscCall(MatSetLayouts(T, pc->mat->rmap, pc->mat->cmap));
1332 PetscCall(MatSetType(T, MATAIJ));
1333 PetscCall(MatSetLocalToGlobalMapping(T, al2g, al2g));
1334 PetscCall(MatSetOption(T, MAT_ROW_ORIENTED, PETSC_FALSE));
1335 PetscCall(MatSetOption(T, MAT_IGNORE_ZERO_ENTRIES, PETSC_TRUE));
1336 //PetscCall(MatSeqAIJSetPreallocation(T, maxsize, NULL));
1337 //PetscCall(MatMPIAIJSetPreallocation(T, maxsize, NULL, maxsize, NULL));
1338 //PetscCall(MatSetOption(T, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE));
1339
1340 /* Defaults to identity */
1341 {
1342 Vec w;
1343 const PetscScalar *wa;
1344
1345 PetscCall(MatCreateVecs(T, &w, NULL));
1346 PetscCall(VecSetLocalToGlobalMapping(w, al2g));
1347 PetscCall(VecSet(w, 1.0));
1348 for (i = 0; i < nee; i++) {
1349 const PetscInt *idxs;
1350 PetscInt nl;
1351
1352 PetscCall(ISGetLocalSize(eedges[i], &nl));
1353 PetscCall(ISGetIndices(eedges[i], &idxs));
1354 PetscCall(VecSetValuesLocal(w, nl, idxs, NULL, INSERT_VALUES));
1355 PetscCall(ISRestoreIndices(eedges[i], &idxs));
1356 }
1357 PetscCall(VecAssemblyBegin(w));
1358 PetscCall(VecAssemblyEnd(w));
1359 PetscCall(VecGetArrayRead(w, &wa));
1360 for (i = T->rmap->rstart; i < T->rmap->rend; i++)
1361 if (PetscAbsScalar(wa[i - T->rmap->rstart])) PetscCall(MatSetValue(T, i, i, 1.0, INSERT_VALUES));
1362 PetscCall(VecRestoreArrayRead(w, &wa));
1363 PetscCall(VecDestroy(&w));
1364 }
1365
1366 /* Create discrete gradient for the coarser level if needed */
1367 PetscCall(MatDestroy(&pcbddc->nedcG));
1368 PetscCall(ISDestroy(&pcbddc->nedclocal));
1369 if (pcbddc->current_level < pcbddc->max_levels) {
1370 ISLocalToGlobalMapping cel2g, cvl2g;
1371 IS wis, gwis;
1372 PetscInt cnv, cne;
1373
1374 PetscCall(ISCreateGeneral(comm, nee, cedges, PETSC_COPY_VALUES, &wis));
1375 if (fl2g) {
1376 PetscCall(ISLocalToGlobalMappingApplyIS(fl2g, wis, &pcbddc->nedclocal));
1377 } else {
1378 PetscCall(PetscObjectReference((PetscObject)wis));
1379 pcbddc->nedclocal = wis;
1380 }
1381 PetscCall(ISLocalToGlobalMappingApplyIS(el2g, wis, &gwis));
1382 PetscCall(ISDestroy(&wis));
1383 PetscCall(ISRenumber(gwis, NULL, &cne, &wis));
1384 PetscCall(ISLocalToGlobalMappingCreateIS(wis, &cel2g));
1385 PetscCall(ISDestroy(&wis));
1386 PetscCall(ISDestroy(&gwis));
1387
1388 PetscCall(ISCreateGeneral(comm, 2 * nee, corners, PETSC_USE_POINTER, &wis));
1389 PetscCall(ISLocalToGlobalMappingApplyIS(vl2g, wis, &gwis));
1390 PetscCall(ISDestroy(&wis));
1391 PetscCall(ISRenumber(gwis, NULL, &cnv, &wis));
1392 PetscCall(ISLocalToGlobalMappingCreateIS(wis, &cvl2g));
1393 PetscCall(ISDestroy(&wis));
1394 PetscCall(ISDestroy(&gwis));
1395
1396 PetscCall(MatCreate(comm, &pcbddc->nedcG));
1397 PetscCall(MatSetSizes(pcbddc->nedcG, PETSC_DECIDE, PETSC_DECIDE, cne, cnv));
1398 PetscCall(MatSetType(pcbddc->nedcG, MATAIJ));
1399 PetscCall(MatSeqAIJSetPreallocation(pcbddc->nedcG, 2, NULL));
1400 PetscCall(MatMPIAIJSetPreallocation(pcbddc->nedcG, 2, NULL, 2, NULL));
1401 PetscCall(MatSetLocalToGlobalMapping(pcbddc->nedcG, cel2g, cvl2g));
1402 PetscCall(ISLocalToGlobalMappingDestroy(&cel2g));
1403 PetscCall(ISLocalToGlobalMappingDestroy(&cvl2g));
1404 }
1405
1406 MatNullSpace nnsp;
1407 PetscBool nnsp_has_const = PETSC_FALSE;
1408 const Vec *nnsp_vecs = NULL;
1409 PetscInt nnsp_nvecs = 0;
1410 VecScatter nnsp_vscat = NULL;
1411 PetscCall(MatGetNullSpace(pcbddc->discretegradient, &nnsp));
1412 if (nnsp) PetscCall(MatNullSpaceGetVecs(nnsp, &nnsp_has_const, &nnsp_nvecs, &nnsp_vecs));
1413 if (nnsp_has_const || nnsp_nvecs) { /* create scatter to import edge constraints */
1414 IS allextcols, gallextcols, galleedges, is_E_to_zero;
1415 Vec E, V;
1416 PetscInt *eedgesidxs;
1417 const PetscScalar *evals;
1418
1419 PetscCall(MatCreateVecs(pc->pmat, &E, NULL));
1420 PetscCall(MatCreateVecs(pcbddc->discretegradient, &V, NULL));
1421 PetscCall(ISConcatenate(PETSC_COMM_SELF, nee, extcols, &allextcols));
1422 cum = 0;
1423 for (i = 0; i < nee; i++) {
1424 PetscInt j;
1425
1426 PetscCall(ISGetLocalSize(eedges[i], &j));
1427 PetscCheck(j, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Zero sized edge %" PetscInt_FMT, i);
1428 cum += j - 1;
1429 }
1430 PetscCall(PetscMalloc1(PetscMax(cum, pc->pmat->rmap->n), &eedgesidxs));
1431 cum = 0;
1432 for (i = 0; i < nee; i++) {
1433 const PetscInt *idxs;
1434 PetscInt j;
1435
1436 PetscCall(ISGetLocalSize(eedges[i], &j));
1437 PetscCall(ISGetIndices(eedges[i], &idxs));
1438 PetscCall(PetscArraycpy(eedgesidxs + cum, idxs, j - 1)); /* last on the edge is primal */
1439 PetscCall(ISRestoreIndices(eedges[i], &idxs));
1440 cum += j - 1;
1441 }
1442 PetscCall(ISLocalToGlobalMappingApply(al2g, cum, eedgesidxs, eedgesidxs));
1443 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, cum, eedgesidxs, PETSC_USE_POINTER, &galleedges));
1444 PetscCall(ISLocalToGlobalMappingApplyIS(vl2g, allextcols, &gallextcols));
1445 PetscCall(VecScatterCreate(V, gallextcols, E, galleedges, &nnsp_vscat));
1446 PetscCall(ISDestroy(&allextcols));
1447 PetscCall(ISDestroy(&gallextcols));
1448 PetscCall(ISDestroy(&galleedges));
1449
1450 /* identify dofs we must zero if importing user-defined near nullspace from pmat */
1451 PetscCall(VecSet(E, 1.0));
1452 PetscCall(VecSetValues(E, cum, eedgesidxs, NULL, INSERT_VALUES));
1453 PetscCall(VecAssemblyBegin(E));
1454 PetscCall(VecAssemblyEnd(E));
1455 PetscCall(VecGetArrayRead(E, &evals));
1456 for (i = 0, cum = 0; i < pc->pmat->rmap->n; i++)
1457 if (evals[i] == 0.0) eedgesidxs[cum++] = i + pc->pmat->rmap->rstart;
1458 PetscCall(VecRestoreArrayRead(E, &evals));
1459 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, cum, eedgesidxs, PETSC_COPY_VALUES, &is_E_to_zero));
1460 PetscCall(PetscFree(eedgesidxs));
1461
1462 PetscCall(PetscObjectCompose((PetscObject)nnsp_vscat, "__V_Vec", (PetscObject)V));
1463 PetscCall(PetscObjectCompose((PetscObject)nnsp_vscat, "__E_Vec", (PetscObject)E));
1464 PetscCall(PetscObjectCompose((PetscObject)nnsp_vscat, "__E_zero", (PetscObject)is_E_to_zero));
1465 PetscCall(ISDestroy(&is_E_to_zero));
1466 PetscCall(VecDestroy(&V));
1467 PetscCall(VecDestroy(&E));
1468 }
1469 #if defined(PRINT_GDET)
1470 inc = 0;
1471 lev = pcbddc->current_level;
1472 #endif
1473
1474 /* Insert values in the change of basis matrix */
1475 for (i = 0; i < nee; i++) {
1476 Mat Gins = NULL, GKins = NULL;
1477 IS cornersis = NULL;
1478 PetscScalar cvals[2];
1479
1480 if (pcbddc->nedcG) PetscCall(ISCreateGeneral(PETSC_COMM_SELF, 2, corners + 2 * i, PETSC_USE_POINTER, &cornersis));
1481 PetscCall(PCBDDCComputeNedelecChangeEdge(lG, eedges[i], extrows[i], extcols[i], cornersis, &Gins, &GKins, cvals, work, rwork));
1482 if (Gins && GKins) {
1483 const PetscScalar *data;
1484 const PetscInt *rows, *cols;
1485 PetscInt nrh, nch, nrc, ncc;
1486
1487 PetscCall(ISGetIndices(eedges[i], &cols));
1488 /* H1 */
1489 PetscCall(ISGetIndices(extrows[i], &rows));
1490 PetscCall(MatGetSize(Gins, &nrh, &nch));
1491 PetscCall(MatDenseGetArrayRead(Gins, &data));
1492 PetscCall(MatSetValuesLocal(T, nrh, rows, nch, cols, data, INSERT_VALUES));
1493 PetscCall(MatDenseRestoreArrayRead(Gins, &data));
1494 PetscCall(ISRestoreIndices(extrows[i], &rows));
1495 /* complement */
1496 PetscCall(MatGetSize(GKins, &nrc, &ncc));
1497 PetscCheck(ncc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Constant function has not been generated for coarse edge %" PetscInt_FMT, i);
1498 PetscCheck(ncc + nch == nrc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "The sum of the number of columns of GKins %" PetscInt_FMT " and Gins %" PetscInt_FMT " does not match %" PetscInt_FMT " for coarse edge %" PetscInt_FMT, ncc, nch, nrc, i);
1499 PetscCheck(ncc == 1 || !pcbddc->nedcG, PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot generate the coarse discrete gradient for coarse edge %" PetscInt_FMT " with ncc %" PetscInt_FMT, i, ncc);
1500 PetscCall(MatDenseGetArrayRead(GKins, &data));
1501 PetscCall(MatSetValuesLocal(T, nrc, cols, ncc, cols + nch, data, INSERT_VALUES));
1502 PetscCall(MatDenseRestoreArrayRead(GKins, &data));
1503
1504 /* coarse discrete gradient */
1505 if (pcbddc->nedcG) {
1506 PetscInt cols[2];
1507
1508 cols[0] = 2 * i;
1509 cols[1] = 2 * i + 1;
1510 PetscCall(MatSetValuesLocal(pcbddc->nedcG, 1, &i, 2, cols, cvals, INSERT_VALUES));
1511 }
1512 PetscCall(ISRestoreIndices(eedges[i], &cols));
1513 }
1514 PetscCall(ISDestroy(&extrows[i]));
1515 PetscCall(ISDestroy(&extcols[i]));
1516 PetscCall(ISDestroy(&cornersis));
1517 PetscCall(MatDestroy(&Gins));
1518 PetscCall(MatDestroy(&GKins));
1519 }
1520
1521 /* import edge constraints */
1522 if (nnsp_vscat) {
1523 Vec V, E, *quadvecs;
1524 PetscInt nvecs, nvecs_orth;
1525 MatNullSpace onnsp = NULL;
1526 PetscBool onnsp_has_const = PETSC_FALSE;
1527 const Vec *onnsp_vecs = NULL;
1528 PetscInt onnsp_nvecs = 0, new_nnsp_nvecs, old_nnsp_nvecs;
1529 IS is_E_to_zero;
1530
1531 /* import nearnullspace from preconditioning matrix if user-defined */
1532 PetscCall(MatGetNearNullSpace(pc->pmat, &onnsp));
1533 if (onnsp) {
1534 PetscBool isinternal;
1535
1536 PetscCall(PetscStrcmp("_internal_BDDC_nedelec_nnsp", ((PetscObject)onnsp)->name, &isinternal));
1537 if (!isinternal) PetscCall(MatNullSpaceGetVecs(onnsp, &onnsp_has_const, &onnsp_nvecs, &onnsp_vecs));
1538 }
1539 new_nnsp_nvecs = nnsp_nvecs + (nnsp_has_const ? 1 : 0);
1540 old_nnsp_nvecs = onnsp_nvecs + (onnsp_has_const ? 1 : 0);
1541 nvecs = old_nnsp_nvecs + new_nnsp_nvecs;
1542 PetscCall(PetscMalloc1(nvecs, &quadvecs));
1543
1544 PetscCall(PetscObjectQuery((PetscObject)nnsp_vscat, "__V_Vec", (PetscObject *)&V));
1545 PetscCall(PetscObjectQuery((PetscObject)nnsp_vscat, "__E_Vec", (PetscObject *)&E));
1546 PetscCall(PetscObjectQuery((PetscObject)nnsp_vscat, "__E_zero", (PetscObject *)&is_E_to_zero));
1547 for (i = 0; i < nvecs; i++) PetscCall(VecDuplicate(E, &quadvecs[i]));
1548 cum = 0;
1549 if (nnsp_has_const) {
1550 PetscCall(VecSet(V, 1.0));
1551 PetscCall(VecScatterBegin(nnsp_vscat, V, quadvecs[0], INSERT_VALUES, SCATTER_FORWARD));
1552 PetscCall(VecScatterEnd(nnsp_vscat, V, quadvecs[0], INSERT_VALUES, SCATTER_FORWARD));
1553 cum = 1;
1554 }
1555 for (i = 0; i < nnsp_nvecs; i++) {
1556 PetscCall(VecScatterBegin(nnsp_vscat, nnsp_vecs[i], quadvecs[i + cum], INSERT_VALUES, SCATTER_FORWARD));
1557 PetscCall(VecScatterEnd(nnsp_vscat, nnsp_vecs[i], quadvecs[i + cum], INSERT_VALUES, SCATTER_FORWARD));
1558 }
1559
1560 /* Now add old nnsp if present */
1561 cum = 0;
1562 if (onnsp_has_const) {
1563 PetscCall(VecSet(quadvecs[new_nnsp_nvecs], 1.0));
1564 PetscCall(VecISSet(quadvecs[new_nnsp_nvecs], is_E_to_zero, 0));
1565 cum = 1;
1566 }
1567 for (i = 0; i < onnsp_nvecs; i++) {
1568 PetscCall(VecCopy(onnsp_vecs[i], quadvecs[i + cum + new_nnsp_nvecs]));
1569 PetscCall(VecISSet(quadvecs[i + cum + new_nnsp_nvecs], is_E_to_zero, 0));
1570 }
1571 nvecs_orth = nvecs;
1572 PetscCall(PCBDDCOrthonormalizeVecs(&nvecs_orth, quadvecs));
1573 PetscCall(MatNullSpaceCreate(PetscObjectComm((PetscObject)pc), PETSC_FALSE, nvecs_orth, quadvecs, &nnsp));
1574 for (i = 0; i < nvecs; i++) PetscCall(VecDestroy(&quadvecs[i]));
1575 PetscCall(PetscFree(quadvecs));
1576 PetscCall(PetscObjectSetName((PetscObject)nnsp, "_internal_BDDC_nedelec_nnsp"));
1577 PetscCall(MatSetNearNullSpace(pc->pmat, nnsp));
1578 PetscCall(MatNullSpaceDestroy(&nnsp));
1579 }
1580 PetscCall(VecScatterDestroy(&nnsp_vscat));
1581 PetscCall(ISLocalToGlobalMappingDestroy(&vl2g));
1582 PetscCall(ISLocalToGlobalMappingDestroy(&el2g));
1583 PetscCall(ISLocalToGlobalMappingDestroy(&al2g));
1584
1585 /* Start assembling */
1586 PetscCall(MatAssemblyBegin(T, MAT_FINAL_ASSEMBLY));
1587 if (pcbddc->nedcG) PetscCall(MatAssemblyBegin(pcbddc->nedcG, MAT_FINAL_ASSEMBLY));
1588
1589 /* Free */
1590 if (fl2g) {
1591 PetscCall(ISDestroy(&primals));
1592 for (i = 0; i < nee; i++) PetscCall(ISDestroy(&eedges[i]));
1593 PetscCall(PetscFree(eedges));
1594 }
1595
1596 /* hack mat_graph with primal dofs on the coarse edges */
1597 {
1598 PCBDDCGraph graph = pcbddc->mat_graph;
1599 PetscInt *oqueue = graph->queue;
1600 PetscInt *ocptr = graph->cptr;
1601 PetscInt ncc, *idxs;
1602
1603 /* find first primal edge */
1604 if (pcbddc->nedclocal) {
1605 PetscCall(ISGetIndices(pcbddc->nedclocal, (const PetscInt **)&idxs));
1606 } else {
1607 if (fl2g) PetscCall(ISLocalToGlobalMappingApply(fl2g, nee, cedges, cedges));
1608 idxs = cedges;
1609 }
1610 cum = 0;
1611 while (cum < nee && cedges[cum] < 0) cum++;
1612
1613 /* adapt connected components */
1614 PetscCall(PetscMalloc2(graph->nvtxs + 1, &graph->cptr, ocptr[graph->ncc], &graph->queue));
1615 graph->cptr[0] = 0;
1616 for (i = 0, ncc = 0; i < graph->ncc; i++) {
1617 PetscInt lc = ocptr[i + 1] - ocptr[i];
1618 if (cum != nee && oqueue[ocptr[i + 1] - 1] == cedges[cum]) { /* this cc has a primal dof */
1619 graph->cptr[ncc + 1] = graph->cptr[ncc] + 1;
1620 graph->queue[graph->cptr[ncc]] = cedges[cum];
1621 ncc++;
1622 lc--;
1623 cum++;
1624 while (cum < nee && cedges[cum] < 0) cum++;
1625 }
1626 graph->cptr[ncc + 1] = graph->cptr[ncc] + lc;
1627 for (j = 0; j < lc; j++) graph->queue[graph->cptr[ncc] + j] = oqueue[ocptr[i] + j];
1628 ncc++;
1629 }
1630 graph->ncc = ncc;
1631 if (pcbddc->nedclocal) PetscCall(ISRestoreIndices(pcbddc->nedclocal, (const PetscInt **)&idxs));
1632 PetscCall(PetscFree2(ocptr, oqueue));
1633 }
1634 PetscCall(ISLocalToGlobalMappingDestroy(&fl2g));
1635 PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, NULL, NULL, &nee, &alleedges, &allprimals));
1636 PetscCall(PCBDDCGraphResetCSR(pcbddc->mat_graph));
1637
1638 PetscCall(ISDestroy(&nedfieldlocal));
1639 PetscCall(PetscFree(extrow));
1640 PetscCall(PetscFree2(work, rwork));
1641 PetscCall(PetscFree(corners));
1642 PetscCall(PetscFree(cedges));
1643 PetscCall(PetscFree(extrows));
1644 PetscCall(PetscFree(extcols));
1645 PetscCall(MatDestroy(&lG));
1646
1647 /* Complete assembling */
1648 PetscCall(MatAssemblyEnd(T, MAT_FINAL_ASSEMBLY));
1649 PetscCall(MatViewFromOptions(T, (PetscObject)pc, "-pc_bddc_nedelec_change_view"));
1650 if (pcbddc->nedcG) {
1651 PetscCall(MatAssemblyEnd(pcbddc->nedcG, MAT_FINAL_ASSEMBLY));
1652 PetscCall(MatViewFromOptions(pcbddc->nedcG, (PetscObject)pc, "-pc_bddc_nedelec_coarse_change_view"));
1653 }
1654
1655 PetscCall(ISDestroy(&elements_corners));
1656
1657 /* set change of basis */
1658 PetscCall(PCBDDCSetChangeOfBasisMat(pc, T, PETSC_FALSE));
1659 PetscCall(MatDestroy(&T));
1660 PetscFunctionReturn(PETSC_SUCCESS);
1661 }
1662
1663 /* the near-null space of BDDC carries information on quadrature weights,
1664 and these can be collinear -> so cheat with MatNullSpaceCreate
1665 and create a suitable set of basis vectors first */
PCBDDCNullSpaceCreate(MPI_Comm comm,PetscBool has_const,PetscInt nvecs,Vec quad_vecs[],MatNullSpace * nnsp)1666 PetscErrorCode PCBDDCNullSpaceCreate(MPI_Comm comm, PetscBool has_const, PetscInt nvecs, Vec quad_vecs[], MatNullSpace *nnsp)
1667 {
1668 PetscInt i;
1669
1670 PetscFunctionBegin;
1671 for (i = 0; i < nvecs; i++) {
1672 PetscInt first, last;
1673
1674 PetscCall(VecGetOwnershipRange(quad_vecs[i], &first, &last));
1675 PetscCheck(last - first >= 2 * nvecs || !has_const, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not implemented");
1676 if (i >= first && i < last) {
1677 PetscScalar *data;
1678 PetscCall(VecGetArray(quad_vecs[i], &data));
1679 if (!has_const) {
1680 data[i - first] = 1.;
1681 } else {
1682 data[2 * i - first] = 1. / PetscSqrtReal(2.);
1683 data[2 * i - first + 1] = -1. / PetscSqrtReal(2.);
1684 }
1685 PetscCall(VecRestoreArray(quad_vecs[i], &data));
1686 }
1687 PetscCall(PetscObjectStateIncrease((PetscObject)quad_vecs[i]));
1688 }
1689 PetscCall(MatNullSpaceCreate(comm, has_const, nvecs, quad_vecs, nnsp));
1690 for (i = 0; i < nvecs; i++) { /* reset vectors */
1691 PetscInt first, last;
1692 PetscCall(VecLockReadPop(quad_vecs[i]));
1693 PetscCall(VecGetOwnershipRange(quad_vecs[i], &first, &last));
1694 if (i >= first && i < last) {
1695 PetscScalar *data;
1696 PetscCall(VecGetArray(quad_vecs[i], &data));
1697 if (!has_const) {
1698 data[i - first] = 0.;
1699 } else {
1700 data[2 * i - first] = 0.;
1701 data[2 * i - first + 1] = 0.;
1702 }
1703 PetscCall(VecRestoreArray(quad_vecs[i], &data));
1704 }
1705 PetscCall(PetscObjectStateIncrease((PetscObject)quad_vecs[i]));
1706 PetscCall(VecLockReadPush(quad_vecs[i]));
1707 }
1708 PetscFunctionReturn(PETSC_SUCCESS);
1709 }
1710
PCBDDCComputeNoNetFlux(Mat A,Mat divudotp,PetscBool transpose,IS vl2l,PCBDDCGraph graph,MatNullSpace * nnsp)1711 PetscErrorCode PCBDDCComputeNoNetFlux(Mat A, Mat divudotp, PetscBool transpose, IS vl2l, PCBDDCGraph graph, MatNullSpace *nnsp)
1712 {
1713 Mat loc_divudotp;
1714 Vec p, v, quad_vec;
1715 ISLocalToGlobalMapping map;
1716 PetscScalar *array;
1717
1718 PetscFunctionBegin;
1719 PetscCall(MatCreateVecs(A, &quad_vec, NULL));
1720 if (!transpose) {
1721 PetscCall(MatISGetLocalToGlobalMapping(A, &map, NULL));
1722 } else {
1723 PetscCall(MatISGetLocalToGlobalMapping(A, NULL, &map));
1724 }
1725 PetscCall(PCBDDCNullSpaceCreate(PetscObjectComm((PetscObject)A), PETSC_FALSE, 1, &quad_vec, nnsp));
1726 PetscCall(VecLockReadPop(quad_vec));
1727 PetscCall(VecSetLocalToGlobalMapping(quad_vec, map));
1728
1729 /* compute local quad vec */
1730 PetscCall(MatISGetLocalMat(divudotp, &loc_divudotp));
1731 if (!transpose) {
1732 PetscCall(MatCreateVecs(loc_divudotp, &v, &p));
1733 } else {
1734 PetscCall(MatCreateVecs(loc_divudotp, &p, &v));
1735 }
1736 /* the assumption here is that the constant vector interpolates the constant on the L2 conforming space */
1737 PetscCall(VecSet(p, 1.));
1738 if (!transpose) {
1739 PetscCall(MatMultTranspose(loc_divudotp, p, v));
1740 } else {
1741 PetscCall(MatMult(loc_divudotp, p, v));
1742 }
1743 PetscCall(VecDestroy(&p));
1744 if (vl2l) {
1745 Mat lA;
1746 VecScatter sc;
1747 Vec vins;
1748
1749 PetscCall(MatISGetLocalMat(A, &lA));
1750 PetscCall(MatCreateVecs(lA, &vins, NULL));
1751 PetscCall(VecScatterCreate(v, NULL, vins, vl2l, &sc));
1752 PetscCall(VecScatterBegin(sc, v, vins, INSERT_VALUES, SCATTER_FORWARD));
1753 PetscCall(VecScatterEnd(sc, v, vins, INSERT_VALUES, SCATTER_FORWARD));
1754 PetscCall(VecScatterDestroy(&sc));
1755 PetscCall(VecDestroy(&v));
1756 v = vins;
1757 }
1758
1759 /* mask summation of interface values */
1760 PetscInt n, *mmask, *mask, *idxs, nmr, nr;
1761 const PetscInt *degree;
1762 PetscSF msf;
1763
1764 PetscCall(VecGetLocalSize(v, &n));
1765 PetscCall(PetscSFGetGraph(graph->interface_subset_sf, &nr, NULL, NULL, NULL));
1766 PetscCall(PetscSFGetMultiSF(graph->interface_subset_sf, &msf));
1767 PetscCall(PetscSFGetGraph(msf, &nmr, NULL, NULL, NULL));
1768 PetscCall(PetscCalloc3(nmr, &mmask, n, &mask, n, &idxs));
1769 PetscCall(PetscSFComputeDegreeBegin(graph->interface_subset_sf, °ree));
1770 PetscCall(PetscSFComputeDegreeEnd(graph->interface_subset_sf, °ree));
1771 for (PetscInt i = 0, c = 0; i < nr; i++) {
1772 mmask[c] = 1;
1773 c += degree[i];
1774 }
1775 PetscCall(PetscSFScatterBegin(graph->interface_subset_sf, MPIU_INT, mmask, mask));
1776 PetscCall(PetscSFScatterEnd(graph->interface_subset_sf, MPIU_INT, mmask, mask));
1777 PetscCall(VecGetArray(v, &array));
1778 for (PetscInt i = 0; i < n; i++) {
1779 array[i] *= mask[i];
1780 idxs[i] = i;
1781 }
1782 PetscCall(VecSetValuesLocal(quad_vec, n, idxs, array, ADD_VALUES));
1783 PetscCall(VecRestoreArray(v, &array));
1784 PetscCall(PetscFree3(mmask, mask, idxs));
1785 PetscCall(VecDestroy(&v));
1786 PetscCall(VecAssemblyBegin(quad_vec));
1787 PetscCall(VecAssemblyEnd(quad_vec));
1788 PetscCall(VecViewFromOptions(quad_vec, NULL, "-pc_bddc_quad_vec_view"));
1789 PetscCall(VecLockReadPush(quad_vec));
1790 PetscCall(VecDestroy(&quad_vec));
1791 PetscFunctionReturn(PETSC_SUCCESS);
1792 }
1793
PCBDDCAddPrimalVerticesLocalIS(PC pc,IS primalv)1794 PetscErrorCode PCBDDCAddPrimalVerticesLocalIS(PC pc, IS primalv)
1795 {
1796 PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
1797
1798 PetscFunctionBegin;
1799 if (primalv) {
1800 if (pcbddc->user_primal_vertices_local) {
1801 IS list[2], newp;
1802
1803 list[0] = primalv;
1804 list[1] = pcbddc->user_primal_vertices_local;
1805 PetscCall(ISConcatenate(PetscObjectComm((PetscObject)pc), 2, list, &newp));
1806 PetscCall(ISSortRemoveDups(newp));
1807 PetscCall(ISDestroy(&list[1]));
1808 pcbddc->user_primal_vertices_local = newp;
1809 } else {
1810 PetscCall(PCBDDCSetPrimalVerticesLocalIS(pc, primalv));
1811 }
1812 }
1813 PetscFunctionReturn(PETSC_SUCCESS);
1814 }
1815
func_coords_private(PetscInt dim,PetscReal t,const PetscReal X[],PetscInt Nf,PetscScalar * out,PetscCtx ctx)1816 static PetscErrorCode func_coords_private(PetscInt dim, PetscReal t, const PetscReal X[], PetscInt Nf, PetscScalar *out, PetscCtx ctx)
1817 {
1818 PetscInt f, *comp = (PetscInt *)ctx;
1819
1820 PetscFunctionBegin;
1821 for (f = 0; f < Nf; f++) out[f] = X[*comp];
1822 PetscFunctionReturn(PETSC_SUCCESS);
1823 }
1824
PCBDDCComputeLocalTopologyInfo(PC pc)1825 PetscErrorCode PCBDDCComputeLocalTopologyInfo(PC pc)
1826 {
1827 Vec local, global;
1828 PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
1829 Mat_IS *matis = (Mat_IS *)pc->pmat->data;
1830 PetscBool monolithic = PETSC_FALSE;
1831
1832 PetscFunctionBegin;
1833 PetscOptionsBegin(PetscObjectComm((PetscObject)pc), ((PetscObject)pc)->prefix, "BDDC topology options", "PC");
1834 PetscCall(PetscOptionsBool("-pc_bddc_monolithic", "Discard any information on dofs splitting", NULL, monolithic, &monolithic, NULL));
1835 PetscOptionsEnd();
1836 /* need to convert from global to local topology information and remove references to information in global ordering */
1837 PetscCall(MatCreateVecs(pc->pmat, &global, NULL));
1838 PetscCall(MatCreateVecs(matis->A, &local, NULL));
1839 PetscCall(VecBindToCPU(global, PETSC_TRUE));
1840 PetscCall(VecBindToCPU(local, PETSC_TRUE));
1841 if (monolithic) { /* just get block size to properly compute vertices */
1842 if (pcbddc->vertex_size == 1) PetscCall(MatGetBlockSize(pc->pmat, &pcbddc->vertex_size));
1843 goto boundary;
1844 }
1845
1846 if (pcbddc->user_provided_isfordofs) {
1847 if (pcbddc->n_ISForDofs) {
1848 PetscInt i;
1849
1850 PetscCall(PetscMalloc1(pcbddc->n_ISForDofs, &pcbddc->ISForDofsLocal));
1851 for (i = 0; i < pcbddc->n_ISForDofs; i++) {
1852 PetscInt bs;
1853
1854 PetscCall(PCBDDCGlobalToLocal(matis->rctx, global, local, pcbddc->ISForDofs[i], &pcbddc->ISForDofsLocal[i]));
1855 PetscCall(ISGetBlockSize(pcbddc->ISForDofs[i], &bs));
1856 PetscCall(ISSetBlockSize(pcbddc->ISForDofsLocal[i], bs));
1857 PetscCall(ISDestroy(&pcbddc->ISForDofs[i]));
1858 }
1859 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs;
1860 pcbddc->n_ISForDofs = 0;
1861 PetscCall(PetscFree(pcbddc->ISForDofs));
1862 }
1863 } else {
1864 if (!pcbddc->n_ISForDofsLocal) { /* field split not present */
1865 DM dm;
1866
1867 PetscCall(MatGetDM(pc->pmat, &dm));
1868 if (!dm) PetscCall(PCGetDM(pc, &dm));
1869 if (dm) {
1870 IS *fields;
1871 PetscInt nf, i;
1872
1873 PetscCall(DMCreateFieldDecomposition(dm, &nf, NULL, &fields, NULL));
1874 PetscCall(PetscMalloc1(nf, &pcbddc->ISForDofsLocal));
1875 for (i = 0; i < nf; i++) {
1876 PetscInt bs;
1877
1878 PetscCall(PCBDDCGlobalToLocal(matis->rctx, global, local, fields[i], &pcbddc->ISForDofsLocal[i]));
1879 PetscCall(ISGetBlockSize(fields[i], &bs));
1880 PetscCall(ISSetBlockSize(pcbddc->ISForDofsLocal[i], bs));
1881 PetscCall(ISDestroy(&fields[i]));
1882 }
1883 PetscCall(PetscFree(fields));
1884 pcbddc->n_ISForDofsLocal = nf;
1885 } else { /* See if MATIS has fields attached by the conversion from MatNest */
1886 PetscContainer c;
1887
1888 PetscCall(PetscObjectQuery((PetscObject)pc->pmat, "_convert_nest_lfields", (PetscObject *)&c));
1889 if (c) {
1890 MatISLocalFields lf;
1891 PetscCall(PetscContainerGetPointer(c, &lf));
1892 PetscCall(PCBDDCSetDofsSplittingLocal(pc, lf->nr, lf->rf));
1893 } else { /* fallback, create the default fields if bs > 1 */
1894 PetscInt i, n = matis->A->rmap->n;
1895 PetscCall(MatGetBlockSize(pc->pmat, &i));
1896 if (i > 1) {
1897 pcbddc->n_ISForDofsLocal = i;
1898 PetscCall(PetscMalloc1(pcbddc->n_ISForDofsLocal, &pcbddc->ISForDofsLocal));
1899 for (i = 0; i < pcbddc->n_ISForDofsLocal; i++) PetscCall(ISCreateStride(PetscObjectComm((PetscObject)pc), n / pcbddc->n_ISForDofsLocal, i, pcbddc->n_ISForDofsLocal, &pcbddc->ISForDofsLocal[i]));
1900 }
1901 }
1902 }
1903 } else {
1904 PetscInt i;
1905 for (i = 0; i < pcbddc->n_ISForDofsLocal; i++) PetscCall(PCBDDCConsistencyCheckIS(pc, MPI_LAND, &pcbddc->ISForDofsLocal[i]));
1906 }
1907 }
1908
1909 boundary:
1910 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) {
1911 PetscCall(PCBDDCGlobalToLocal(matis->rctx, global, local, pcbddc->DirichletBoundaries, &pcbddc->DirichletBoundariesLocal));
1912 } else if (pcbddc->DirichletBoundariesLocal) {
1913 PetscCall(PCBDDCConsistencyCheckIS(pc, MPI_LAND, &pcbddc->DirichletBoundariesLocal));
1914 }
1915 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) {
1916 PetscCall(PCBDDCGlobalToLocal(matis->rctx, global, local, pcbddc->NeumannBoundaries, &pcbddc->NeumannBoundariesLocal));
1917 } else if (pcbddc->NeumannBoundariesLocal) {
1918 PetscCall(PCBDDCConsistencyCheckIS(pc, MPI_LOR, &pcbddc->NeumannBoundariesLocal));
1919 }
1920 if (!pcbddc->user_primal_vertices_local && pcbddc->user_primal_vertices) PetscCall(PCBDDCGlobalToLocal(matis->rctx, global, local, pcbddc->user_primal_vertices, &pcbddc->user_primal_vertices_local));
1921 PetscCall(VecDestroy(&global));
1922 PetscCall(VecDestroy(&local));
1923 /* detect local disconnected subdomains if requested or needed */
1924 if (pcbddc->detect_disconnected || matis->allow_repeated) {
1925 IS primalv = NULL;
1926 PetscInt nel;
1927 PetscBool filter = pcbddc->detect_disconnected_filter;
1928
1929 for (PetscInt i = 0; i < pcbddc->n_local_subs; i++) PetscCall(ISDestroy(&pcbddc->local_subs[i]));
1930 PetscCall(PetscFree(pcbddc->local_subs));
1931 PetscCall(MatGetVariableBlockSizes(matis->A, &nel, NULL));
1932 if (matis->allow_repeated && nel) {
1933 const PetscInt *elsizes;
1934
1935 pcbddc->n_local_subs = nel;
1936 PetscCall(MatGetVariableBlockSizes(matis->A, NULL, &elsizes));
1937 PetscCall(PetscMalloc1(nel, &pcbddc->local_subs));
1938 for (PetscInt i = 0, c = 0; i < nel; i++) {
1939 PetscCall(ISCreateStride(PETSC_COMM_SELF, elsizes[i], c, 1, &pcbddc->local_subs[i]));
1940 c += elsizes[i];
1941 }
1942 } else {
1943 PetscCall(PCBDDCDetectDisconnectedComponents(pc, filter, &pcbddc->n_local_subs, &pcbddc->local_subs, &primalv));
1944 }
1945 PetscCall(PCBDDCAddPrimalVerticesLocalIS(pc, primalv));
1946 PetscCall(ISDestroy(&primalv));
1947 }
1948 /* early stage corner detection */
1949 {
1950 DM dm;
1951
1952 PetscCall(MatGetDM(pc->pmat, &dm));
1953 if (!dm) PetscCall(PCGetDM(pc, &dm));
1954 if (dm) {
1955 PetscBool isda;
1956
1957 PetscCall(PetscObjectTypeCompare((PetscObject)dm, DMDA, &isda));
1958 if (isda) {
1959 ISLocalToGlobalMapping l2l;
1960 IS corners;
1961 Mat lA;
1962 PetscBool gl, lo;
1963
1964 {
1965 Vec cvec;
1966 const PetscScalar *coords;
1967 PetscInt dof, n, cdim;
1968 PetscBool memc = PetscDefined(USE_COMPLEX) ? PETSC_FALSE : PETSC_TRUE;
1969
1970 PetscCall(DMDAGetInfo(dm, NULL, NULL, NULL, NULL, NULL, NULL, NULL, &dof, NULL, NULL, NULL, NULL, NULL));
1971 PetscCall(DMGetCoordinates(dm, &cvec));
1972 PetscCall(VecGetLocalSize(cvec, &n));
1973 PetscCall(VecGetBlockSize(cvec, &cdim));
1974 n /= cdim;
1975 PetscCall(PetscFree(pcbddc->mat_graph->coords));
1976 PetscCall(PetscMalloc1(dof * n * cdim, &pcbddc->mat_graph->coords));
1977 PetscCall(VecGetArrayRead(cvec, &coords));
1978 if (dof != 1) memc = PETSC_FALSE;
1979 if (memc) {
1980 PetscCall(PetscArraycpy(pcbddc->mat_graph->coords, coords, cdim * n * dof));
1981 } else { /* BDDC graph does not use any blocked information, we need to replicate the data */
1982 PetscReal *bcoords = pcbddc->mat_graph->coords;
1983 PetscInt i, b, d;
1984
1985 for (i = 0; i < n; i++) {
1986 for (b = 0; b < dof; b++) {
1987 for (d = 0; d < cdim; d++) bcoords[i * dof * cdim + b * cdim + d] = PetscRealPart(coords[i * cdim + d]);
1988 }
1989 }
1990 }
1991 PetscCall(VecRestoreArrayRead(cvec, &coords));
1992 pcbddc->mat_graph->cdim = cdim;
1993 pcbddc->mat_graph->cnloc = dof * n;
1994 pcbddc->mat_graph->cloc = PETSC_FALSE;
1995 }
1996 PetscCall(DMDAGetSubdomainCornersIS(dm, &corners));
1997 PetscCall(MatISGetLocalMat(pc->pmat, &lA));
1998 PetscCall(MatGetLocalToGlobalMapping(lA, &l2l, NULL));
1999 PetscCall(MatISRestoreLocalMat(pc->pmat, &lA));
2000 lo = (PetscBool)(l2l && corners);
2001 PetscCallMPI(MPIU_Allreduce(&lo, &gl, 1, MPI_C_BOOL, MPI_LAND, PetscObjectComm((PetscObject)pc)));
2002 if (gl) { /* From PETSc's DMDA */
2003 const PetscInt *idx;
2004 PetscInt dof, bs, *idxout, n;
2005
2006 PetscCall(DMDAGetInfo(dm, NULL, NULL, NULL, NULL, NULL, NULL, NULL, &dof, NULL, NULL, NULL, NULL, NULL));
2007 PetscCall(ISLocalToGlobalMappingGetBlockSize(l2l, &bs));
2008 PetscCall(ISGetLocalSize(corners, &n));
2009 PetscCall(ISGetIndices(corners, &idx));
2010 if (bs == dof) {
2011 PetscCall(PetscMalloc1(n, &idxout));
2012 PetscCall(ISLocalToGlobalMappingApplyBlock(l2l, n, idx, idxout));
2013 } else { /* the original DMDA local-to-local map have been modified */
2014 PetscInt i, d;
2015
2016 PetscCall(PetscMalloc1(dof * n, &idxout));
2017 for (i = 0; i < n; i++)
2018 for (d = 0; d < dof; d++) idxout[dof * i + d] = dof * idx[i] + d;
2019 PetscCall(ISLocalToGlobalMappingApply(l2l, dof * n, idxout, idxout));
2020
2021 bs = 1;
2022 n *= dof;
2023 }
2024 PetscCall(ISRestoreIndices(corners, &idx));
2025 PetscCall(DMDARestoreSubdomainCornersIS(dm, &corners));
2026 PetscCall(ISCreateBlock(PetscObjectComm((PetscObject)pc), bs, n, idxout, PETSC_OWN_POINTER, &corners));
2027 PetscCall(PCBDDCAddPrimalVerticesLocalIS(pc, corners));
2028 PetscCall(ISDestroy(&corners));
2029 pcbddc->corner_selected = PETSC_TRUE;
2030 pcbddc->corner_selection = PETSC_TRUE;
2031 }
2032 if (corners) PetscCall(DMDARestoreSubdomainCornersIS(dm, &corners));
2033 }
2034 }
2035 }
2036 if (pcbddc->corner_selection && !pcbddc->mat_graph->cdim) {
2037 DM dm;
2038
2039 PetscCall(MatGetDM(pc->pmat, &dm));
2040 if (!dm) PetscCall(PCGetDM(pc, &dm));
2041 if (dm) { /* this can get very expensive, I need to find a faster alternative */
2042 Vec vcoords;
2043 PetscSection section;
2044 PetscReal *coords;
2045 PetscInt d, cdim, nl, nf, **ctxs;
2046 PetscErrorCode (**funcs)(PetscInt, PetscReal, const PetscReal *, PetscInt, PetscScalar *, void *);
2047 /* debug coordinates */
2048 PetscViewer viewer;
2049 PetscBool flg;
2050 PetscViewerFormat format;
2051 const char *prefix;
2052
2053 PetscCall(DMGetCoordinateDim(dm, &cdim));
2054 PetscCall(DMGetLocalSection(dm, §ion));
2055 PetscCall(PetscSectionGetNumFields(section, &nf));
2056 PetscCall(DMCreateGlobalVector(dm, &vcoords));
2057 PetscCall(VecGetLocalSize(vcoords, &nl));
2058 PetscCall(PetscMalloc1(nl * cdim, &coords));
2059 PetscCall(PetscMalloc2(nf, &funcs, nf, &ctxs));
2060 PetscCall(PetscMalloc1(nf, &ctxs[0]));
2061 for (d = 0; d < nf; d++) funcs[d] = func_coords_private;
2062 for (d = 1; d < nf; d++) ctxs[d] = ctxs[d - 1] + 1;
2063
2064 /* debug coordinates */
2065 PetscCall(PCGetOptionsPrefix(pc, &prefix));
2066 PetscCall(PetscOptionsCreateViewer(PetscObjectComm((PetscObject)vcoords), ((PetscObject)vcoords)->options, prefix, "-pc_bddc_coords_vec_view", &viewer, &format, &flg));
2067 if (flg) PetscCall(PetscViewerPushFormat(viewer, format));
2068 for (d = 0; d < cdim; d++) {
2069 PetscInt i;
2070 const PetscScalar *v;
2071 char name[16];
2072
2073 for (i = 0; i < nf; i++) ctxs[i][0] = d;
2074 PetscCall(PetscSNPrintf(name, sizeof(name), "bddc_coords_%" PetscInt_FMT, d));
2075 PetscCall(PetscObjectSetName((PetscObject)vcoords, name));
2076 PetscCall(DMProjectFunction(dm, 0.0, funcs, (void **)ctxs, INSERT_VALUES, vcoords));
2077 if (flg) PetscCall(VecView(vcoords, viewer));
2078 PetscCall(VecGetArrayRead(vcoords, &v));
2079 for (i = 0; i < nl; i++) coords[i * cdim + d] = PetscRealPart(v[i]);
2080 PetscCall(VecRestoreArrayRead(vcoords, &v));
2081 }
2082 PetscCall(VecDestroy(&vcoords));
2083 PetscCall(PCSetCoordinates(pc, cdim, nl, coords));
2084 PetscCall(PetscFree(coords));
2085 PetscCall(PetscFree(ctxs[0]));
2086 PetscCall(PetscFree2(funcs, ctxs));
2087 if (flg) {
2088 PetscCall(PetscViewerPopFormat(viewer));
2089 PetscCall(PetscViewerDestroy(&viewer));
2090 }
2091 }
2092 }
2093 PetscFunctionReturn(PETSC_SUCCESS);
2094 }
2095
PCBDDCConsistencyCheckIS(PC pc,MPI_Op mop,IS * is)2096 PetscErrorCode PCBDDCConsistencyCheckIS(PC pc, MPI_Op mop, IS *is)
2097 {
2098 Mat_IS *matis = (Mat_IS *)pc->pmat->data;
2099 IS nis;
2100 const PetscInt *idxs;
2101 PetscInt i, nd, n = matis->A->rmap->n, *nidxs, nnd;
2102
2103 PetscFunctionBegin;
2104 PetscCheck(mop == MPI_LAND || mop == MPI_LOR, PetscObjectComm((PetscObject)pc), PETSC_ERR_SUP, "Supported are MPI_LAND and MPI_LOR");
2105 if (mop == MPI_LAND) {
2106 /* init rootdata with true */
2107 for (i = 0; i < pc->pmat->rmap->n; i++) matis->sf_rootdata[i] = 1;
2108 } else {
2109 PetscCall(PetscArrayzero(matis->sf_rootdata, pc->pmat->rmap->n));
2110 }
2111 PetscCall(PetscArrayzero(matis->sf_leafdata, n));
2112 PetscCall(ISGetLocalSize(*is, &nd));
2113 PetscCall(ISGetIndices(*is, &idxs));
2114 for (i = 0; i < nd; i++)
2115 if (-1 < idxs[i] && idxs[i] < n) matis->sf_leafdata[idxs[i]] = 1;
2116 PetscCall(ISRestoreIndices(*is, &idxs));
2117 PetscCall(PetscSFReduceBegin(matis->sf, MPIU_INT, matis->sf_leafdata, matis->sf_rootdata, mop));
2118 PetscCall(PetscSFReduceEnd(matis->sf, MPIU_INT, matis->sf_leafdata, matis->sf_rootdata, mop));
2119 PetscCall(PetscSFBcastBegin(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE));
2120 PetscCall(PetscSFBcastEnd(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE));
2121 if (mop == MPI_LAND) {
2122 PetscCall(PetscMalloc1(nd, &nidxs));
2123 } else {
2124 PetscCall(PetscMalloc1(n, &nidxs));
2125 }
2126 for (i = 0, nnd = 0; i < n; i++)
2127 if (matis->sf_leafdata[i]) nidxs[nnd++] = i;
2128 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)*is), nnd, nidxs, PETSC_OWN_POINTER, &nis));
2129 PetscCall(ISDestroy(is));
2130 *is = nis;
2131 PetscFunctionReturn(PETSC_SUCCESS);
2132 }
2133
PCBDDCBenignRemoveInterior(PC pc,Vec r,Vec z)2134 PetscErrorCode PCBDDCBenignRemoveInterior(PC pc, Vec r, Vec z)
2135 {
2136 PC_IS *pcis = (PC_IS *)pc->data;
2137 PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
2138
2139 PetscFunctionBegin;
2140 if (!pcbddc->benign_have_null) PetscFunctionReturn(PETSC_SUCCESS);
2141 if (pcbddc->ChangeOfBasisMatrix) {
2142 Vec swap;
2143
2144 PetscCall(MatMultTranspose(pcbddc->ChangeOfBasisMatrix, r, pcbddc->work_change));
2145 swap = pcbddc->work_change;
2146 pcbddc->work_change = r;
2147 r = swap;
2148 }
2149 PetscCall(VecScatterBegin(pcis->global_to_D, r, pcis->vec1_D, INSERT_VALUES, SCATTER_FORWARD));
2150 PetscCall(VecScatterEnd(pcis->global_to_D, r, pcis->vec1_D, INSERT_VALUES, SCATTER_FORWARD));
2151 PetscCall(PetscLogEventBegin(PC_BDDC_Solves[pcbddc->current_level][0], pc, 0, 0, 0));
2152 PetscCall(KSPSolve(pcbddc->ksp_D, pcis->vec1_D, pcis->vec2_D));
2153 PetscCall(PetscLogEventEnd(PC_BDDC_Solves[pcbddc->current_level][0], pc, 0, 0, 0));
2154 PetscCall(KSPCheckSolve(pcbddc->ksp_D, pc, pcis->vec2_D));
2155 PetscCall(VecSet(z, 0.));
2156 PetscCall(VecScatterBegin(pcis->global_to_D, pcis->vec2_D, z, INSERT_VALUES, SCATTER_REVERSE));
2157 PetscCall(VecScatterEnd(pcis->global_to_D, pcis->vec2_D, z, INSERT_VALUES, SCATTER_REVERSE));
2158 if (pcbddc->ChangeOfBasisMatrix) {
2159 pcbddc->work_change = r;
2160 PetscCall(VecCopy(z, pcbddc->work_change));
2161 PetscCall(MatMult(pcbddc->ChangeOfBasisMatrix, pcbddc->work_change, z));
2162 }
2163 PetscFunctionReturn(PETSC_SUCCESS);
2164 }
2165
PCBDDCBenignMatMult_Private_Private(Mat A,Vec x,Vec y,PetscBool transpose)2166 static PetscErrorCode PCBDDCBenignMatMult_Private_Private(Mat A, Vec x, Vec y, PetscBool transpose)
2167 {
2168 PCBDDCBenignMatMult_ctx ctx;
2169 PetscBool apply_right, apply_left, reset_x;
2170
2171 PetscFunctionBegin;
2172 PetscCall(MatShellGetContext(A, &ctx));
2173 if (transpose) {
2174 apply_right = ctx->apply_left;
2175 apply_left = ctx->apply_right;
2176 } else {
2177 apply_right = ctx->apply_right;
2178 apply_left = ctx->apply_left;
2179 }
2180 reset_x = PETSC_FALSE;
2181 if (apply_right) {
2182 const PetscScalar *ax;
2183 PetscInt nl, i;
2184
2185 PetscCall(VecGetLocalSize(x, &nl));
2186 PetscCall(VecGetArrayRead(x, &ax));
2187 PetscCall(PetscArraycpy(ctx->work, ax, nl));
2188 PetscCall(VecRestoreArrayRead(x, &ax));
2189 for (i = 0; i < ctx->benign_n; i++) {
2190 PetscScalar sum, val;
2191 const PetscInt *idxs;
2192 PetscInt nz, j;
2193 PetscCall(ISGetLocalSize(ctx->benign_zerodiag_subs[i], &nz));
2194 PetscCall(ISGetIndices(ctx->benign_zerodiag_subs[i], &idxs));
2195 sum = 0.;
2196 if (ctx->apply_p0) {
2197 val = ctx->work[idxs[nz - 1]];
2198 for (j = 0; j < nz - 1; j++) {
2199 sum += ctx->work[idxs[j]];
2200 ctx->work[idxs[j]] += val;
2201 }
2202 } else {
2203 for (j = 0; j < nz - 1; j++) sum += ctx->work[idxs[j]];
2204 }
2205 ctx->work[idxs[nz - 1]] -= sum;
2206 PetscCall(ISRestoreIndices(ctx->benign_zerodiag_subs[i], &idxs));
2207 }
2208 PetscCall(VecPlaceArray(x, ctx->work));
2209 reset_x = PETSC_TRUE;
2210 }
2211 if (transpose) {
2212 PetscCall(MatMultTranspose(ctx->A, x, y));
2213 } else {
2214 PetscCall(MatMult(ctx->A, x, y));
2215 }
2216 if (reset_x) PetscCall(VecResetArray(x));
2217 if (apply_left) {
2218 PetscScalar *ay;
2219 PetscInt i;
2220
2221 PetscCall(VecGetArray(y, &ay));
2222 for (i = 0; i < ctx->benign_n; i++) {
2223 PetscScalar sum, val;
2224 const PetscInt *idxs;
2225 PetscInt nz, j;
2226 PetscCall(ISGetLocalSize(ctx->benign_zerodiag_subs[i], &nz));
2227 PetscCall(ISGetIndices(ctx->benign_zerodiag_subs[i], &idxs));
2228 val = -ay[idxs[nz - 1]];
2229 if (ctx->apply_p0) {
2230 sum = 0.;
2231 for (j = 0; j < nz - 1; j++) {
2232 sum += ay[idxs[j]];
2233 ay[idxs[j]] += val;
2234 }
2235 ay[idxs[nz - 1]] += sum;
2236 } else {
2237 for (j = 0; j < nz - 1; j++) ay[idxs[j]] += val;
2238 ay[idxs[nz - 1]] = 0.;
2239 }
2240 PetscCall(ISRestoreIndices(ctx->benign_zerodiag_subs[i], &idxs));
2241 }
2242 PetscCall(VecRestoreArray(y, &ay));
2243 }
2244 PetscFunctionReturn(PETSC_SUCCESS);
2245 }
2246
PCBDDCBenignMatMultTranspose_Private(Mat A,Vec x,Vec y)2247 static PetscErrorCode PCBDDCBenignMatMultTranspose_Private(Mat A, Vec x, Vec y)
2248 {
2249 PetscFunctionBegin;
2250 PetscCall(PCBDDCBenignMatMult_Private_Private(A, x, y, PETSC_TRUE));
2251 PetscFunctionReturn(PETSC_SUCCESS);
2252 }
2253
PCBDDCBenignMatMult_Private(Mat A,Vec x,Vec y)2254 static PetscErrorCode PCBDDCBenignMatMult_Private(Mat A, Vec x, Vec y)
2255 {
2256 PetscFunctionBegin;
2257 PetscCall(PCBDDCBenignMatMult_Private_Private(A, x, y, PETSC_FALSE));
2258 PetscFunctionReturn(PETSC_SUCCESS);
2259 }
2260
PCBDDCBenignShellMat(PC pc,PetscBool restore)2261 PetscErrorCode PCBDDCBenignShellMat(PC pc, PetscBool restore)
2262 {
2263 PC_IS *pcis = (PC_IS *)pc->data;
2264 PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
2265 PCBDDCBenignMatMult_ctx ctx;
2266
2267 PetscFunctionBegin;
2268 if (!restore) {
2269 Mat A_IB, A_BI;
2270 PetscScalar *work;
2271 PCBDDCReuseSolvers reuse = pcbddc->sub_schurs ? pcbddc->sub_schurs->reuse_solver : NULL;
2272
2273 PetscCheck(!pcbddc->benign_original_mat, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Benign original mat has not been restored");
2274 if (!pcbddc->benign_change || !pcbddc->benign_n || pcbddc->benign_change_explicit) PetscFunctionReturn(PETSC_SUCCESS);
2275 PetscCall(PetscMalloc1(pcis->n, &work));
2276 PetscCall(MatCreate(PETSC_COMM_SELF, &A_IB));
2277 PetscCall(MatSetSizes(A_IB, pcis->n - pcis->n_B, pcis->n_B, PETSC_DECIDE, PETSC_DECIDE));
2278 PetscCall(MatSetType(A_IB, MATSHELL));
2279 PetscCall(MatShellSetOperation(A_IB, MATOP_MULT, (PetscErrorCodeFn *)PCBDDCBenignMatMult_Private));
2280 PetscCall(MatShellSetOperation(A_IB, MATOP_MULT_TRANSPOSE, (PetscErrorCodeFn *)PCBDDCBenignMatMultTranspose_Private));
2281 PetscCall(PetscNew(&ctx));
2282 PetscCall(MatShellSetContext(A_IB, ctx));
2283 ctx->apply_left = PETSC_TRUE;
2284 ctx->apply_right = PETSC_FALSE;
2285 ctx->apply_p0 = PETSC_FALSE;
2286 ctx->benign_n = pcbddc->benign_n;
2287 if (reuse) {
2288 ctx->benign_zerodiag_subs = reuse->benign_zerodiag_subs;
2289 ctx->free = PETSC_FALSE;
2290 } else { /* TODO: could be optimized for successive solves */
2291 ISLocalToGlobalMapping N_to_D;
2292 PetscInt i;
2293
2294 PetscCall(ISLocalToGlobalMappingCreateIS(pcis->is_I_local, &N_to_D));
2295 PetscCall(PetscMalloc1(pcbddc->benign_n, &ctx->benign_zerodiag_subs));
2296 for (i = 0; i < pcbddc->benign_n; i++) PetscCall(ISGlobalToLocalMappingApplyIS(N_to_D, IS_GTOLM_DROP, pcbddc->benign_zerodiag_subs[i], &ctx->benign_zerodiag_subs[i]));
2297 PetscCall(ISLocalToGlobalMappingDestroy(&N_to_D));
2298 ctx->free = PETSC_TRUE;
2299 }
2300 ctx->A = pcis->A_IB;
2301 ctx->work = work;
2302 PetscCall(MatSetUp(A_IB));
2303 PetscCall(MatAssemblyBegin(A_IB, MAT_FINAL_ASSEMBLY));
2304 PetscCall(MatAssemblyEnd(A_IB, MAT_FINAL_ASSEMBLY));
2305 pcis->A_IB = A_IB;
2306
2307 /* A_BI as A_IB^T */
2308 PetscCall(MatCreateTranspose(A_IB, &A_BI));
2309 pcbddc->benign_original_mat = pcis->A_BI;
2310 pcis->A_BI = A_BI;
2311 } else {
2312 if (!pcbddc->benign_original_mat) PetscFunctionReturn(PETSC_SUCCESS);
2313 PetscCall(MatShellGetContext(pcis->A_IB, &ctx));
2314 PetscCall(MatDestroy(&pcis->A_IB));
2315 pcis->A_IB = ctx->A;
2316 ctx->A = NULL;
2317 PetscCall(MatDestroy(&pcis->A_BI));
2318 pcis->A_BI = pcbddc->benign_original_mat;
2319 pcbddc->benign_original_mat = NULL;
2320 if (ctx->free) {
2321 PetscInt i;
2322 for (i = 0; i < ctx->benign_n; i++) PetscCall(ISDestroy(&ctx->benign_zerodiag_subs[i]));
2323 PetscCall(PetscFree(ctx->benign_zerodiag_subs));
2324 }
2325 PetscCall(PetscFree(ctx->work));
2326 PetscCall(PetscFree(ctx));
2327 }
2328 PetscFunctionReturn(PETSC_SUCCESS);
2329 }
2330
2331 /* used just in bddc debug mode */
PCBDDCBenignProject(PC pc,IS is1,IS is2,Mat * B)2332 static PetscErrorCode PCBDDCBenignProject(PC pc, IS is1, IS is2, Mat *B)
2333 {
2334 PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
2335 Mat_IS *matis = (Mat_IS *)pc->pmat->data;
2336 Mat An;
2337
2338 PetscFunctionBegin;
2339 PetscCall(MatPtAP(matis->A, pcbddc->benign_change, MAT_INITIAL_MATRIX, 2.0, &An));
2340 PetscCall(MatZeroRowsColumns(An, pcbddc->benign_n, pcbddc->benign_p0_lidx, 1.0, NULL, NULL));
2341 if (is1) {
2342 PetscCall(MatCreateSubMatrix(An, is1, is2, MAT_INITIAL_MATRIX, B));
2343 PetscCall(MatDestroy(&An));
2344 } else {
2345 *B = An;
2346 }
2347 PetscFunctionReturn(PETSC_SUCCESS);
2348 }
2349
2350 /* TODO: add reuse flag */
MatSeqAIJCompress(Mat A,Mat * B)2351 PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B)
2352 {
2353 Mat Bt;
2354 PetscScalar *a, *bdata;
2355 const PetscInt *ii, *ij;
2356 PetscInt m, n, i, nnz, *bii, *bij;
2357 PetscBool flg_row;
2358
2359 PetscFunctionBegin;
2360 PetscCall(MatGetSize(A, &n, &m));
2361 PetscCall(MatGetRowIJ(A, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, &ij, &flg_row));
2362 PetscCall(MatSeqAIJGetArray(A, &a));
2363 nnz = n;
2364 for (i = 0; i < ii[n]; i++) {
2365 if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++;
2366 }
2367 PetscCall(PetscMalloc1(n + 1, &bii));
2368 PetscCall(PetscMalloc1(nnz, &bij));
2369 PetscCall(PetscMalloc1(nnz, &bdata));
2370 nnz = 0;
2371 bii[0] = 0;
2372 for (i = 0; i < n; i++) {
2373 PetscInt j;
2374 for (j = ii[i]; j < ii[i + 1]; j++) {
2375 PetscScalar entry = a[j];
2376 if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || (n == m && ij[j] == i)) {
2377 bij[nnz] = ij[j];
2378 bdata[nnz] = entry;
2379 nnz++;
2380 }
2381 }
2382 bii[i + 1] = nnz;
2383 }
2384 PetscCall(MatSeqAIJRestoreArray(A, &a));
2385 PetscCall(MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A), n, m, bii, bij, bdata, &Bt));
2386 PetscCall(MatRestoreRowIJ(A, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, &ij, &flg_row));
2387 {
2388 Mat_SeqAIJ *b = (Mat_SeqAIJ *)Bt->data;
2389 b->free_a = PETSC_TRUE;
2390 b->free_ij = PETSC_TRUE;
2391 }
2392 if (*B == A) PetscCall(MatDestroy(&A));
2393 *B = Bt;
2394 PetscFunctionReturn(PETSC_SUCCESS);
2395 }
2396
PCBDDCDetectDisconnectedComponents(PC pc,PetscBool filter,PetscInt * ncc,IS * cc[],IS * primalv)2397 PetscErrorCode PCBDDCDetectDisconnectedComponents(PC pc, PetscBool filter, PetscInt *ncc, IS *cc[], IS *primalv)
2398 {
2399 Mat B = NULL;
2400 DM dm;
2401 IS is_dummy, *cc_n;
2402 ISLocalToGlobalMapping l2gmap_dummy;
2403 PCBDDCGraph graph;
2404 PetscInt *xadj_filtered = NULL, *adjncy_filtered = NULL;
2405 PetscInt i, n;
2406 PetscInt *xadj, *adjncy;
2407 PetscBool isplex = PETSC_FALSE;
2408
2409 PetscFunctionBegin;
2410 if (ncc) *ncc = 0;
2411 if (cc) *cc = NULL;
2412 if (primalv) *primalv = NULL;
2413 PetscCall(PCBDDCGraphCreate(&graph));
2414 PetscCall(MatGetDM(pc->pmat, &dm));
2415 if (!dm) PetscCall(PCGetDM(pc, &dm));
2416 if (dm) PetscCall(PetscObjectTypeCompareAny((PetscObject)dm, &isplex, DMPLEX, DMP4EST, DMP8EST, ""));
2417 if (filter) isplex = PETSC_FALSE;
2418
2419 if (isplex) { /* this code has been modified from plexpartition.c */
2420 PetscInt p, pStart, pEnd, a, adjSize, idx, size, nroots;
2421 PetscInt *adj = NULL;
2422 IS cellNumbering;
2423 const PetscInt *cellNum;
2424 PetscBool useCone, useClosure;
2425 PetscSection section;
2426 PetscSegBuffer adjBuffer;
2427 PetscSF sfPoint;
2428
2429 PetscCall(DMConvert(dm, DMPLEX, &dm));
2430 PetscCall(DMPlexGetHeightStratum(dm, 0, &pStart, &pEnd));
2431 PetscCall(DMGetPointSF(dm, &sfPoint));
2432 PetscCall(PetscSFGetGraph(sfPoint, &nroots, NULL, NULL, NULL));
2433 /* Build adjacency graph via a section/segbuffer */
2434 PetscCall(PetscSectionCreate(PetscObjectComm((PetscObject)dm), §ion));
2435 PetscCall(PetscSectionSetChart(section, pStart, pEnd));
2436 PetscCall(PetscSegBufferCreate(sizeof(PetscInt), 1000, &adjBuffer));
2437 /* Always use FVM adjacency to create partitioner graph */
2438 PetscCall(DMGetBasicAdjacency(dm, &useCone, &useClosure));
2439 PetscCall(DMSetBasicAdjacency(dm, PETSC_TRUE, PETSC_FALSE));
2440 PetscCall(DMPlexGetCellNumbering(dm, &cellNumbering));
2441 PetscCall(ISGetIndices(cellNumbering, &cellNum));
2442 for (n = 0, p = pStart; p < pEnd; p++) {
2443 /* Skip non-owned cells in parallel (ParMetis expects no overlap) */
2444 if (nroots > 0) {
2445 if (cellNum[p] < 0) continue;
2446 }
2447 adjSize = PETSC_DETERMINE;
2448 PetscCall(DMPlexGetAdjacency(dm, p, &adjSize, &adj));
2449 for (a = 0; a < adjSize; ++a) {
2450 const PetscInt point = adj[a];
2451 if (pStart <= point && point < pEnd) {
2452 PetscInt *PETSC_RESTRICT pBuf;
2453 PetscCall(PetscSectionAddDof(section, p, 1));
2454 PetscCall(PetscSegBufferGetInts(adjBuffer, 1, &pBuf));
2455 *pBuf = point;
2456 }
2457 }
2458 n++;
2459 }
2460 PetscCall(DMSetBasicAdjacency(dm, useCone, useClosure));
2461 /* Derive CSR graph from section/segbuffer */
2462 PetscCall(PetscSectionSetUp(section));
2463 PetscCall(PetscSectionGetStorageSize(section, &size));
2464 PetscCall(PetscMalloc1(n + 1, &xadj));
2465 for (idx = 0, p = pStart; p < pEnd; p++) {
2466 if (nroots > 0) {
2467 if (cellNum[p] < 0) continue;
2468 }
2469 PetscCall(PetscSectionGetOffset(section, p, &xadj[idx++]));
2470 }
2471 xadj[n] = size;
2472 PetscCall(PetscSegBufferExtractAlloc(adjBuffer, &adjncy));
2473 /* Clean up */
2474 PetscCall(PetscSegBufferDestroy(&adjBuffer));
2475 PetscCall(PetscSectionDestroy(§ion));
2476 PetscCall(PetscFree(adj));
2477 graph->xadj = xadj;
2478 graph->adjncy = adjncy;
2479 } else {
2480 Mat A;
2481 PetscBool isseqaij, flg_row;
2482
2483 PetscCall(MatISGetLocalMat(pc->pmat, &A));
2484 if (!A->rmap->N || !A->cmap->N) {
2485 PetscCall(PCBDDCGraphDestroy(&graph));
2486 PetscFunctionReturn(PETSC_SUCCESS);
2487 }
2488 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &isseqaij));
2489 if (!isseqaij && filter) {
2490 PetscBool isseqdense;
2491
2492 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQDENSE, &isseqdense));
2493 if (!isseqdense) {
2494 PetscCall(MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &B));
2495 } else { /* TODO: rectangular case and LDA */
2496 PetscScalar *array;
2497 PetscReal chop = 1.e-6;
2498
2499 PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &B));
2500 PetscCall(MatDenseGetArray(B, &array));
2501 PetscCall(MatGetSize(B, &n, NULL));
2502 for (i = 0; i < n; i++) {
2503 PetscInt j;
2504 for (j = i + 1; j < n; j++) {
2505 PetscReal thresh = chop * (PetscAbsScalar(array[i * (n + 1)]) + PetscAbsScalar(array[j * (n + 1)]));
2506 if (PetscAbsScalar(array[i * n + j]) < thresh) array[i * n + j] = 0.;
2507 if (PetscAbsScalar(array[j * n + i]) < thresh) array[j * n + i] = 0.;
2508 }
2509 }
2510 PetscCall(MatDenseRestoreArray(B, &array));
2511 PetscCall(MatConvert(B, MATSEQAIJ, MAT_INPLACE_MATRIX, &B));
2512 }
2513 } else {
2514 PetscCall(PetscObjectReference((PetscObject)A));
2515 B = A;
2516 }
2517 PetscCall(MatGetRowIJ(B, 0, PETSC_TRUE, PETSC_FALSE, &n, (const PetscInt **)&xadj, (const PetscInt **)&adjncy, &flg_row));
2518
2519 /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */
2520 if (filter) {
2521 PetscScalar *data;
2522 PetscInt j, cum;
2523
2524 PetscCall(PetscCalloc2(n + 1, &xadj_filtered, xadj[n], &adjncy_filtered));
2525 PetscCall(MatSeqAIJGetArray(B, &data));
2526 cum = 0;
2527 for (i = 0; i < n; i++) {
2528 PetscInt t;
2529
2530 for (j = xadj[i]; j < xadj[i + 1]; j++) {
2531 if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) continue;
2532 adjncy_filtered[cum + xadj_filtered[i]++] = adjncy[j];
2533 }
2534 t = xadj_filtered[i];
2535 xadj_filtered[i] = cum;
2536 cum += t;
2537 }
2538 PetscCall(MatSeqAIJRestoreArray(B, &data));
2539 graph->xadj = xadj_filtered;
2540 graph->adjncy = adjncy_filtered;
2541 } else {
2542 graph->xadj = xadj;
2543 graph->adjncy = adjncy;
2544 }
2545 }
2546 /* compute local connected components using PCBDDCGraph */
2547 graph->seq_graph = PETSC_TRUE; /* analyze local connected components (i.e. disconnected subdomains) irrespective of dofs count */
2548 PetscCall(ISCreateStride(PETSC_COMM_SELF, n, 0, 1, &is_dummy));
2549 PetscCall(ISLocalToGlobalMappingCreateIS(is_dummy, &l2gmap_dummy));
2550 PetscCall(ISDestroy(&is_dummy));
2551 PetscCall(PCBDDCGraphInit(graph, l2gmap_dummy, n, PETSC_INT_MAX));
2552 PetscCall(ISLocalToGlobalMappingDestroy(&l2gmap_dummy));
2553 PetscCall(PCBDDCGraphSetUp(graph, 1, NULL, NULL, 0, NULL, NULL));
2554 PetscCall(PCBDDCGraphComputeConnectedComponents(graph));
2555
2556 /* partial clean up */
2557 PetscCall(PetscFree2(xadj_filtered, adjncy_filtered));
2558 if (B) {
2559 PetscBool flg_row;
2560 PetscCall(MatRestoreRowIJ(B, 0, PETSC_TRUE, PETSC_FALSE, &n, (const PetscInt **)&xadj, (const PetscInt **)&adjncy, &flg_row));
2561 PetscCall(MatDestroy(&B));
2562 }
2563 if (isplex) {
2564 PetscCall(PetscFree(xadj));
2565 PetscCall(PetscFree(adjncy));
2566 }
2567
2568 /* get back data */
2569 if (isplex) {
2570 if (ncc) *ncc = graph->ncc;
2571 if (cc || primalv) {
2572 Mat A;
2573 PetscBT btv, btvt, btvc;
2574 PetscSection subSection;
2575 PetscInt *ids, cum, cump, *cids, *pids;
2576 PetscInt dim, cStart, cEnd, fStart, fEnd, vStart, vEnd, pStart, pEnd;
2577
2578 PetscCall(DMGetDimension(dm, &dim));
2579 PetscCall(DMPlexGetSubdomainSection(dm, &subSection));
2580 PetscCall(DMPlexGetHeightStratum(dm, 1, &fStart, &fEnd));
2581 PetscCall(DMPlexGetHeightStratum(dm, 0, &cStart, &cEnd));
2582 PetscCall(DMPlexGetDepthStratum(dm, 0, &vStart, &vEnd));
2583 PetscCall(DMPlexGetChart(dm, &pStart, &pEnd));
2584 PetscCall(MatISGetLocalMat(pc->pmat, &A));
2585 PetscCall(PetscMalloc3(A->rmap->n, &ids, graph->ncc + 1, &cids, A->rmap->n, &pids));
2586 PetscCall(PetscBTCreate(A->rmap->n, &btv));
2587 PetscCall(PetscBTCreate(A->rmap->n, &btvt));
2588 PetscCall(PetscBTCreate(pEnd - pStart, &btvc));
2589
2590 /* First see if we find corners for the subdomains, i.e. a vertex
2591 shared by at least dim subdomain boundary faces. This does not
2592 cover all the possible cases with simplices but it is enough
2593 for tensor cells */
2594 if (vStart != fStart && dim <= 3) {
2595 for (PetscInt c = cStart; c < cEnd; c++) {
2596 PetscInt nf, cnt = 0, mcnt = dim, *cfaces;
2597 const PetscInt *faces;
2598
2599 PetscCall(DMPlexGetConeSize(dm, c, &nf));
2600 PetscCall(DMGetWorkArray(dm, nf, MPIU_INT, &cfaces));
2601 PetscCall(DMPlexGetCone(dm, c, &faces));
2602 for (PetscInt f = 0; f < nf; f++) {
2603 PetscInt nc, ff;
2604
2605 PetscCall(DMPlexGetSupportSize(dm, faces[f], &nc));
2606 PetscCall(DMPlexGetTreeParent(dm, faces[f], &ff, NULL));
2607 if (nc == 1 && faces[f] == ff) cfaces[cnt++] = faces[f];
2608 }
2609 if (cnt >= mcnt) {
2610 PetscInt size, *closure = NULL;
2611
2612 PetscCall(DMPlexGetTransitiveClosure(dm, c, PETSC_TRUE, &size, &closure));
2613 for (PetscInt k = 0; k < 2 * size; k += 2) {
2614 PetscInt v = closure[k];
2615 if (v >= vStart && v < vEnd) {
2616 PetscInt vsize, *vclosure = NULL;
2617
2618 cnt = 0;
2619 PetscCall(DMPlexGetTransitiveClosure(dm, v, PETSC_FALSE, &vsize, &vclosure));
2620 for (PetscInt vk = 0; vk < 2 * vsize; vk += 2) {
2621 PetscInt f = vclosure[vk];
2622 if (f >= fStart && f < fEnd) {
2623 PetscInt nc, ff;
2624 PetscBool valid = PETSC_FALSE;
2625
2626 for (PetscInt fk = 0; fk < nf; fk++)
2627 if (f == cfaces[fk]) valid = PETSC_TRUE;
2628 if (!valid) continue;
2629 PetscCall(DMPlexGetSupportSize(dm, f, &nc));
2630 PetscCall(DMPlexGetTreeParent(dm, f, &ff, NULL));
2631 if (nc == 1 && f == ff) cnt++;
2632 }
2633 }
2634 if (cnt >= mcnt) PetscCall(PetscBTSet(btvc, v - pStart));
2635 PetscCall(DMPlexRestoreTransitiveClosure(dm, v, PETSC_FALSE, &vsize, &vclosure));
2636 }
2637 }
2638 PetscCall(DMPlexRestoreTransitiveClosure(dm, c, PETSC_TRUE, &size, &closure));
2639 }
2640 PetscCall(DMRestoreWorkArray(dm, nf, MPIU_INT, &cfaces));
2641 }
2642 }
2643
2644 cids[0] = 0;
2645 for (i = 0, cump = 0, cum = 0; i < graph->ncc; i++) {
2646 PetscInt j;
2647
2648 PetscCall(PetscBTMemzero(A->rmap->n, btvt));
2649 for (j = graph->cptr[i]; j < graph->cptr[i + 1]; j++) {
2650 PetscInt k, size, *closure = NULL, cell = graph->queue[j];
2651
2652 PetscCall(DMPlexGetTransitiveClosure(dm, cell, PETSC_TRUE, &size, &closure));
2653 for (k = 0; k < 2 * size; k += 2) {
2654 PetscInt s, pp, p = closure[k], off, dof, cdof;
2655
2656 PetscCall(PetscSectionGetConstraintDof(subSection, p, &cdof));
2657 PetscCall(PetscSectionGetOffset(subSection, p, &off));
2658 PetscCall(PetscSectionGetDof(subSection, p, &dof));
2659 for (s = 0; s < dof - cdof; s++) {
2660 if (PetscBTLookupSet(btvt, off + s)) continue;
2661 if (PetscBTLookup(btvc, p - pStart)) pids[cump++] = off + s; /* subdomain corner */
2662 else if (!PetscBTLookup(btv, off + s)) ids[cum++] = off + s;
2663 else pids[cump++] = off + s; /* cross-vertex */
2664 }
2665 PetscCall(DMPlexGetTreeParent(dm, p, &pp, NULL));
2666 if (pp != p) {
2667 PetscCall(PetscSectionGetConstraintDof(subSection, pp, &cdof));
2668 PetscCall(PetscSectionGetOffset(subSection, pp, &off));
2669 PetscCall(PetscSectionGetDof(subSection, pp, &dof));
2670 for (s = 0; s < dof - cdof; s++) {
2671 if (PetscBTLookupSet(btvt, off + s)) continue;
2672 if (PetscBTLookup(btvc, pp - pStart)) pids[cump++] = off + s; /* subdomain corner */
2673 else if (!PetscBTLookup(btv, off + s)) ids[cum++] = off + s;
2674 else pids[cump++] = off + s; /* cross-vertex */
2675 }
2676 }
2677 }
2678 PetscCall(DMPlexRestoreTransitiveClosure(dm, cell, PETSC_TRUE, &size, &closure));
2679 }
2680 cids[i + 1] = cum;
2681 /* mark dofs as already assigned */
2682 for (j = cids[i]; j < cids[i + 1]; j++) PetscCall(PetscBTSet(btv, ids[j]));
2683 }
2684 if (cc) {
2685 PetscCall(PetscMalloc1(graph->ncc, &cc_n));
2686 for (i = 0; i < graph->ncc; i++) PetscCall(ISCreateGeneral(PETSC_COMM_SELF, cids[i + 1] - cids[i], ids + cids[i], PETSC_COPY_VALUES, &cc_n[i]));
2687 *cc = cc_n;
2688 }
2689 if (primalv) PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), cump, pids, PETSC_COPY_VALUES, primalv));
2690 PetscCall(PetscFree3(ids, cids, pids));
2691 PetscCall(PetscBTDestroy(&btv));
2692 PetscCall(PetscBTDestroy(&btvt));
2693 PetscCall(PetscBTDestroy(&btvc));
2694 PetscCall(DMDestroy(&dm));
2695 }
2696 } else {
2697 if (ncc) *ncc = graph->ncc;
2698 if (cc) {
2699 PetscCall(PetscMalloc1(graph->ncc, &cc_n));
2700 for (i = 0; i < graph->ncc; i++) PetscCall(ISCreateGeneral(PETSC_COMM_SELF, graph->cptr[i + 1] - graph->cptr[i], graph->queue + graph->cptr[i], PETSC_COPY_VALUES, &cc_n[i]));
2701 *cc = cc_n;
2702 }
2703 }
2704 /* clean up graph */
2705 graph->xadj = NULL;
2706 graph->adjncy = NULL;
2707 PetscCall(PCBDDCGraphDestroy(&graph));
2708 PetscFunctionReturn(PETSC_SUCCESS);
2709 }
2710
PCBDDCBenignCheck(PC pc,IS zerodiag)2711 PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag)
2712 {
2713 PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
2714 PC_IS *pcis = (PC_IS *)pc->data;
2715 IS dirIS = NULL;
2716 PetscInt i;
2717
2718 PetscFunctionBegin;
2719 PetscCall(PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph, &dirIS));
2720 if (zerodiag) {
2721 Mat A;
2722 Vec vec3_N;
2723 PetscScalar *vals;
2724 const PetscInt *idxs;
2725 PetscInt nz, *count;
2726
2727 /* p0 */
2728 PetscCall(VecSet(pcis->vec1_N, 0.));
2729 PetscCall(PetscMalloc1(pcis->n, &vals));
2730 PetscCall(ISGetLocalSize(zerodiag, &nz));
2731 PetscCall(ISGetIndices(zerodiag, &idxs));
2732 for (i = 0; i < nz; i++) vals[i] = 1.;
2733 PetscCall(VecSetValues(pcis->vec1_N, nz, idxs, vals, INSERT_VALUES));
2734 PetscCall(VecAssemblyBegin(pcis->vec1_N));
2735 PetscCall(VecAssemblyEnd(pcis->vec1_N));
2736 /* v_I */
2737 PetscCall(VecSetRandom(pcis->vec2_N, NULL));
2738 for (i = 0; i < nz; i++) vals[i] = 0.;
2739 PetscCall(VecSetValues(pcis->vec2_N, nz, idxs, vals, INSERT_VALUES));
2740 PetscCall(ISRestoreIndices(zerodiag, &idxs));
2741 PetscCall(ISGetIndices(pcis->is_B_local, &idxs));
2742 for (i = 0; i < pcis->n_B; i++) vals[i] = 0.;
2743 PetscCall(VecSetValues(pcis->vec2_N, pcis->n_B, idxs, vals, INSERT_VALUES));
2744 PetscCall(ISRestoreIndices(pcis->is_B_local, &idxs));
2745 if (dirIS) {
2746 PetscInt n;
2747
2748 PetscCall(ISGetLocalSize(dirIS, &n));
2749 PetscCall(ISGetIndices(dirIS, &idxs));
2750 for (i = 0; i < n; i++) vals[i] = 0.;
2751 PetscCall(VecSetValues(pcis->vec2_N, n, idxs, vals, INSERT_VALUES));
2752 PetscCall(ISRestoreIndices(dirIS, &idxs));
2753 }
2754 PetscCall(VecAssemblyBegin(pcis->vec2_N));
2755 PetscCall(VecAssemblyEnd(pcis->vec2_N));
2756 PetscCall(VecDuplicate(pcis->vec1_N, &vec3_N));
2757 PetscCall(VecSet(vec3_N, 0.));
2758 PetscCall(MatISGetLocalMat(pc->pmat, &A));
2759 PetscCall(MatMult(A, pcis->vec1_N, vec3_N));
2760 PetscCall(VecDot(vec3_N, pcis->vec2_N, &vals[0]));
2761 PetscCheck(PetscAbsScalar(vals[0]) <= 1.e-1, PETSC_COMM_SELF, PETSC_ERR_SUP, "Benign trick can not be applied! b(v_I,p_0) = %1.6e (should be numerically 0.)", (double)PetscAbsScalar(vals[0]));
2762 PetscCall(PetscFree(vals));
2763 PetscCall(VecDestroy(&vec3_N));
2764
2765 /* there should not be any pressure dofs lying on the interface */
2766 PetscCall(PetscCalloc1(pcis->n, &count));
2767 PetscCall(ISGetIndices(pcis->is_B_local, &idxs));
2768 for (i = 0; i < pcis->n_B; i++) count[idxs[i]]++;
2769 PetscCall(ISRestoreIndices(pcis->is_B_local, &idxs));
2770 PetscCall(ISGetIndices(zerodiag, &idxs));
2771 for (i = 0; i < nz; i++) PetscCheck(!count[idxs[i]], PETSC_COMM_SELF, PETSC_ERR_SUP, "Benign trick can not be applied! pressure dof %" PetscInt_FMT " is an interface dof", idxs[i]);
2772 PetscCall(ISRestoreIndices(zerodiag, &idxs));
2773 PetscCall(PetscFree(count));
2774 }
2775 PetscCall(ISDestroy(&dirIS));
2776
2777 /* check PCBDDCBenignGetOrSetP0 */
2778 PetscCall(VecSetRandom(pcis->vec1_global, NULL));
2779 for (i = 0; i < pcbddc->benign_n; i++) pcbddc->benign_p0[i] = -PetscGlobalRank - i;
2780 PetscCall(PCBDDCBenignGetOrSetP0(pc, pcis->vec1_global, PETSC_FALSE));
2781 for (i = 0; i < pcbddc->benign_n; i++) pcbddc->benign_p0[i] = 1;
2782 PetscCall(PCBDDCBenignGetOrSetP0(pc, pcis->vec1_global, PETSC_TRUE));
2783 for (i = 0; i < pcbddc->benign_n; i++) {
2784 PetscInt val = PetscRealPart(pcbddc->benign_p0[i]);
2785 PetscCheck(val == -PetscGlobalRank - i, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error testing PCBDDCBenignGetOrSetP0! Found %g at %" PetscInt_FMT " instead of %g", (double)PetscRealPart(pcbddc->benign_p0[i]), i, (double)(-PetscGlobalRank - i));
2786 }
2787 PetscFunctionReturn(PETSC_SUCCESS);
2788 }
2789
PCBDDCBenignDetectSaddlePoint(PC pc,PetscBool reuse,IS * zerodiaglocal)2790 PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, PetscBool reuse, IS *zerodiaglocal)
2791 {
2792 PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
2793 Mat_IS *matis = (Mat_IS *)pc->pmat->data;
2794 IS pressures = NULL, zerodiag = NULL, *bzerodiag = NULL, zerodiag_save, *zerodiag_subs;
2795 PetscInt nz, n, benign_n, bsp = 1;
2796 PetscInt *interior_dofs, n_interior_dofs, nneu;
2797 PetscBool sorted, have_null, has_null_pressures, recompute_zerodiag, checkb;
2798
2799 PetscFunctionBegin;
2800 if (reuse) goto project_b0;
2801 PetscCall(PetscSFDestroy(&pcbddc->benign_sf));
2802 PetscCall(MatDestroy(&pcbddc->benign_B0));
2803 for (n = 0; n < pcbddc->benign_n; n++) PetscCall(ISDestroy(&pcbddc->benign_zerodiag_subs[n]));
2804 PetscCall(PetscFree(pcbddc->benign_zerodiag_subs));
2805 has_null_pressures = PETSC_TRUE;
2806 have_null = PETSC_TRUE;
2807 /* if a local information on dofs is present, gets pressure dofs from command line (uses the last field is not provided)
2808 Without local information, it uses only the zerodiagonal dofs (ok if the pressure block is all zero and it is a scalar field)
2809 Checks if all the pressure dofs in each subdomain have a zero diagonal
2810 If not, a change of basis on pressures is not needed
2811 since the local Schur complements are already SPD
2812 */
2813 if (pcbddc->n_ISForDofsLocal) {
2814 IS iP = NULL;
2815 PetscInt p, *pp;
2816 PetscBool flg, blocked = PETSC_FALSE;
2817
2818 PetscCall(PetscMalloc1(pcbddc->n_ISForDofsLocal, &pp));
2819 n = pcbddc->n_ISForDofsLocal;
2820 PetscOptionsBegin(PetscObjectComm((PetscObject)pc), ((PetscObject)pc)->prefix, "BDDC benign options", "PC");
2821 PetscCall(PetscOptionsIntArray("-pc_bddc_pressure_field", "Field id for pressures", NULL, pp, &n, &flg));
2822 PetscCall(PetscOptionsBool("-pc_bddc_pressure_blocked", "Use blocked pressure fields", NULL, blocked, &blocked, NULL));
2823 PetscOptionsEnd();
2824 if (!flg) {
2825 n = 1;
2826 pp[0] = pcbddc->n_ISForDofsLocal - 1;
2827 }
2828
2829 bsp = 0;
2830 for (p = 0; p < n; p++) {
2831 PetscInt bs = 1;
2832
2833 PetscCheck(pp[p] >= 0 && pp[p] < pcbddc->n_ISForDofsLocal, PetscObjectComm((PetscObject)pc), PETSC_ERR_USER, "Invalid field id for pressures %" PetscInt_FMT, pp[p]);
2834 if (blocked) PetscCall(ISGetBlockSize(pcbddc->ISForDofsLocal[pp[p]], &bs));
2835 bsp += bs;
2836 }
2837 PetscCall(PetscMalloc1(bsp, &bzerodiag));
2838 bsp = 0;
2839 for (p = 0; p < n; p++) {
2840 const PetscInt *idxs;
2841 PetscInt b, bs = 1, npl, *bidxs;
2842
2843 if (blocked) PetscCall(ISGetBlockSize(pcbddc->ISForDofsLocal[pp[p]], &bs));
2844 PetscCall(ISGetLocalSize(pcbddc->ISForDofsLocal[pp[p]], &npl));
2845 PetscCall(ISGetIndices(pcbddc->ISForDofsLocal[pp[p]], &idxs));
2846 PetscCall(PetscMalloc1(npl / bs, &bidxs));
2847 for (b = 0; b < bs; b++) {
2848 PetscInt i;
2849
2850 for (i = 0; i < npl / bs; i++) bidxs[i] = idxs[bs * i + b];
2851 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, npl / bs, bidxs, PETSC_COPY_VALUES, &bzerodiag[bsp]));
2852 bsp++;
2853 }
2854 PetscCall(PetscFree(bidxs));
2855 PetscCall(ISRestoreIndices(pcbddc->ISForDofsLocal[pp[p]], &idxs));
2856 }
2857 PetscCall(ISConcatenate(PETSC_COMM_SELF, bsp, bzerodiag, &pressures));
2858
2859 /* remove zeroed out pressures if we are setting up a BDDC solver for a saddle-point FETI-DP */
2860 PetscCall(PetscObjectQuery((PetscObject)pc, "__KSPFETIDP_lP", (PetscObject *)&iP));
2861 if (iP) {
2862 IS newpressures;
2863
2864 PetscCall(ISDifference(pressures, iP, &newpressures));
2865 PetscCall(ISDestroy(&pressures));
2866 pressures = newpressures;
2867 }
2868 PetscCall(ISSorted(pressures, &sorted));
2869 if (!sorted) PetscCall(ISSort(pressures));
2870 PetscCall(PetscFree(pp));
2871 }
2872
2873 /* pcis has not been setup yet, so get the local size from the subdomain matrix */
2874 PetscCall(MatGetLocalSize(pcbddc->local_mat, &n, NULL));
2875 if (!n) pcbddc->benign_change_explicit = PETSC_TRUE;
2876 PetscCall(MatFindZeroDiagonals(pcbddc->local_mat, &zerodiag));
2877 PetscCall(ISSorted(zerodiag, &sorted));
2878 if (!sorted) PetscCall(ISSort(zerodiag));
2879 PetscCall(PetscObjectReference((PetscObject)zerodiag));
2880 zerodiag_save = zerodiag;
2881 PetscCall(ISGetLocalSize(zerodiag, &nz));
2882 if (!nz) {
2883 if (n) have_null = PETSC_FALSE;
2884 has_null_pressures = PETSC_FALSE;
2885 PetscCall(ISDestroy(&zerodiag));
2886 }
2887 recompute_zerodiag = PETSC_FALSE;
2888
2889 /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */
2890 zerodiag_subs = NULL;
2891 benign_n = 0;
2892 n_interior_dofs = 0;
2893 interior_dofs = NULL;
2894 nneu = 0;
2895 if (pcbddc->NeumannBoundariesLocal) PetscCall(ISGetLocalSize(pcbddc->NeumannBoundariesLocal, &nneu));
2896 checkb = (PetscBool)(!pcbddc->NeumannBoundariesLocal || pcbddc->current_level);
2897 if (checkb) { /* need to compute interior nodes */
2898 PetscInt n, i;
2899 PetscInt *count;
2900 ISLocalToGlobalMapping mapping;
2901
2902 PetscCall(MatISGetLocalToGlobalMapping(pc->pmat, &mapping, NULL));
2903 PetscCall(ISLocalToGlobalMappingGetNodeInfo(mapping, &n, &count, NULL));
2904 PetscCall(PetscMalloc1(n, &interior_dofs));
2905 for (i = 0; i < n; i++)
2906 if (count[i] < 2) interior_dofs[n_interior_dofs++] = i;
2907 PetscCall(ISLocalToGlobalMappingRestoreNodeInfo(mapping, &n, &count, NULL));
2908 }
2909 if (has_null_pressures) {
2910 IS *subs;
2911 PetscInt nsubs, i, j, nl;
2912 const PetscInt *idxs;
2913 PetscScalar *array;
2914 Vec *work;
2915
2916 subs = pcbddc->local_subs;
2917 nsubs = pcbddc->n_local_subs;
2918 /* these vectors are needed to check if the constant on pressures is in the kernel of the local operator B (i.e. B(v_I,p0) should be zero) */
2919 if (checkb) {
2920 PetscCall(VecDuplicateVecs(matis->y, 2, &work));
2921 PetscCall(ISGetLocalSize(zerodiag, &nl));
2922 PetscCall(ISGetIndices(zerodiag, &idxs));
2923 /* work[0] = 1_p */
2924 PetscCall(VecSet(work[0], 0.));
2925 PetscCall(VecGetArray(work[0], &array));
2926 for (j = 0; j < nl; j++) array[idxs[j]] = 1.;
2927 PetscCall(VecRestoreArray(work[0], &array));
2928 /* work[0] = 1_v */
2929 PetscCall(VecSet(work[1], 1.));
2930 PetscCall(VecGetArray(work[1], &array));
2931 for (j = 0; j < nl; j++) array[idxs[j]] = 0.;
2932 PetscCall(VecRestoreArray(work[1], &array));
2933 PetscCall(ISRestoreIndices(zerodiag, &idxs));
2934 }
2935
2936 if (nsubs > 1 || bsp > 1) {
2937 IS *is;
2938 PetscInt b, totb;
2939
2940 totb = bsp;
2941 is = bsp > 1 ? bzerodiag : &zerodiag;
2942 nsubs = PetscMax(nsubs, 1);
2943 PetscCall(PetscCalloc1(nsubs * totb, &zerodiag_subs));
2944 for (b = 0; b < totb; b++) {
2945 for (i = 0; i < nsubs; i++) {
2946 ISLocalToGlobalMapping l2g;
2947 IS t_zerodiag_subs;
2948 PetscInt nl;
2949
2950 if (subs) {
2951 PetscCall(ISLocalToGlobalMappingCreateIS(subs[i], &l2g));
2952 } else {
2953 IS tis;
2954
2955 PetscCall(MatGetLocalSize(pcbddc->local_mat, &nl, NULL));
2956 PetscCall(ISCreateStride(PETSC_COMM_SELF, nl, 0, 1, &tis));
2957 PetscCall(ISLocalToGlobalMappingCreateIS(tis, &l2g));
2958 PetscCall(ISDestroy(&tis));
2959 }
2960 PetscCall(ISGlobalToLocalMappingApplyIS(l2g, IS_GTOLM_DROP, is[b], &t_zerodiag_subs));
2961 PetscCall(ISGetLocalSize(t_zerodiag_subs, &nl));
2962 if (nl) {
2963 PetscBool valid = PETSC_TRUE;
2964
2965 if (checkb) {
2966 PetscCall(VecSet(matis->x, 0));
2967 PetscCall(ISGetLocalSize(subs[i], &nl));
2968 PetscCall(ISGetIndices(subs[i], &idxs));
2969 PetscCall(VecGetArray(matis->x, &array));
2970 for (j = 0; j < nl; j++) array[idxs[j]] = 1.;
2971 PetscCall(VecRestoreArray(matis->x, &array));
2972 PetscCall(ISRestoreIndices(subs[i], &idxs));
2973 PetscCall(VecPointwiseMult(matis->x, work[0], matis->x));
2974 PetscCall(MatMult(matis->A, matis->x, matis->y));
2975 PetscCall(VecPointwiseMult(matis->y, work[1], matis->y));
2976 PetscCall(VecGetArray(matis->y, &array));
2977 for (j = 0; j < n_interior_dofs; j++) {
2978 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) {
2979 valid = PETSC_FALSE;
2980 break;
2981 }
2982 }
2983 PetscCall(VecRestoreArray(matis->y, &array));
2984 }
2985 if (valid && nneu) {
2986 const PetscInt *idxs;
2987 PetscInt nzb;
2988
2989 PetscCall(ISGetIndices(pcbddc->NeumannBoundariesLocal, &idxs));
2990 PetscCall(ISGlobalToLocalMappingApply(l2g, IS_GTOLM_DROP, nneu, idxs, &nzb, NULL));
2991 PetscCall(ISRestoreIndices(pcbddc->NeumannBoundariesLocal, &idxs));
2992 if (nzb) valid = PETSC_FALSE;
2993 }
2994 if (valid && pressures) {
2995 IS t_pressure_subs, tmp;
2996 PetscInt i1, i2;
2997
2998 PetscCall(ISGlobalToLocalMappingApplyIS(l2g, IS_GTOLM_DROP, pressures, &t_pressure_subs));
2999 PetscCall(ISEmbed(t_zerodiag_subs, t_pressure_subs, PETSC_TRUE, &tmp));
3000 PetscCall(ISGetLocalSize(tmp, &i1));
3001 PetscCall(ISGetLocalSize(t_zerodiag_subs, &i2));
3002 if (i2 != i1) valid = PETSC_FALSE;
3003 PetscCall(ISDestroy(&t_pressure_subs));
3004 PetscCall(ISDestroy(&tmp));
3005 }
3006 if (valid) {
3007 PetscCall(ISLocalToGlobalMappingApplyIS(l2g, t_zerodiag_subs, &zerodiag_subs[benign_n]));
3008 benign_n++;
3009 } else recompute_zerodiag = PETSC_TRUE;
3010 }
3011 PetscCall(ISDestroy(&t_zerodiag_subs));
3012 PetscCall(ISLocalToGlobalMappingDestroy(&l2g));
3013 }
3014 }
3015 } else { /* there's just one subdomain (or zero if they have not been detected */
3016 PetscBool valid = PETSC_TRUE;
3017
3018 if (nneu) valid = PETSC_FALSE;
3019 if (valid && pressures) PetscCall(ISEqual(pressures, zerodiag, &valid));
3020 if (valid && checkb) {
3021 PetscCall(MatMult(matis->A, work[0], matis->x));
3022 PetscCall(VecPointwiseMult(matis->x, work[1], matis->x));
3023 PetscCall(VecGetArray(matis->x, &array));
3024 for (j = 0; j < n_interior_dofs; j++) {
3025 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) {
3026 valid = PETSC_FALSE;
3027 break;
3028 }
3029 }
3030 PetscCall(VecRestoreArray(matis->x, &array));
3031 }
3032 if (valid) {
3033 benign_n = 1;
3034 PetscCall(PetscMalloc1(benign_n, &zerodiag_subs));
3035 PetscCall(PetscObjectReference((PetscObject)zerodiag));
3036 zerodiag_subs[0] = zerodiag;
3037 }
3038 }
3039 if (checkb) PetscCall(VecDestroyVecs(2, &work));
3040 }
3041 PetscCall(PetscFree(interior_dofs));
3042
3043 if (!benign_n) {
3044 PetscInt n;
3045
3046 PetscCall(ISDestroy(&zerodiag));
3047 recompute_zerodiag = PETSC_FALSE;
3048 PetscCall(MatGetLocalSize(pcbddc->local_mat, &n, NULL));
3049 if (n) have_null = PETSC_FALSE;
3050 }
3051
3052 /* final check for null pressures */
3053 if (zerodiag && pressures) PetscCall(ISEqual(pressures, zerodiag, &have_null));
3054
3055 if (recompute_zerodiag) {
3056 PetscCall(ISDestroy(&zerodiag));
3057 if (benign_n == 1) {
3058 PetscCall(PetscObjectReference((PetscObject)zerodiag_subs[0]));
3059 zerodiag = zerodiag_subs[0];
3060 } else {
3061 PetscInt i, nzn, *new_idxs;
3062
3063 nzn = 0;
3064 for (i = 0; i < benign_n; i++) {
3065 PetscInt ns;
3066 PetscCall(ISGetLocalSize(zerodiag_subs[i], &ns));
3067 nzn += ns;
3068 }
3069 PetscCall(PetscMalloc1(nzn, &new_idxs));
3070 nzn = 0;
3071 for (i = 0; i < benign_n; i++) {
3072 PetscInt ns, *idxs;
3073 PetscCall(ISGetLocalSize(zerodiag_subs[i], &ns));
3074 PetscCall(ISGetIndices(zerodiag_subs[i], (const PetscInt **)&idxs));
3075 PetscCall(PetscArraycpy(new_idxs + nzn, idxs, ns));
3076 PetscCall(ISRestoreIndices(zerodiag_subs[i], (const PetscInt **)&idxs));
3077 nzn += ns;
3078 }
3079 PetscCall(PetscSortInt(nzn, new_idxs));
3080 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nzn, new_idxs, PETSC_OWN_POINTER, &zerodiag));
3081 }
3082 have_null = PETSC_FALSE;
3083 }
3084
3085 /* determines if the coarse solver will be singular or not */
3086 PetscCallMPI(MPIU_Allreduce(&have_null, &pcbddc->benign_null, 1, MPI_C_BOOL, MPI_LAND, PetscObjectComm((PetscObject)pc)));
3087
3088 /* Prepare matrix to compute no-net-flux */
3089 if (pcbddc->compute_nonetflux && !pcbddc->divudotp) {
3090 Mat A, loc_divudotp;
3091 ISLocalToGlobalMapping rl2g, cl2g, l2gmap;
3092 IS row, col, isused = NULL;
3093 PetscInt M, N, n, st, n_isused;
3094
3095 if (pressures) {
3096 isused = pressures;
3097 } else {
3098 isused = zerodiag_save;
3099 }
3100 PetscCall(MatISGetLocalToGlobalMapping(pc->pmat, &l2gmap, NULL));
3101 PetscCall(MatISGetLocalMat(pc->pmat, &A));
3102 PetscCall(MatGetLocalSize(A, &n, NULL));
3103 PetscCheck(isused || (n == 0), PETSC_COMM_SELF, PETSC_ERR_USER, "Don't know how to extract div u dot p! Please provide the pressure field");
3104 n_isused = 0;
3105 if (isused) PetscCall(ISGetLocalSize(isused, &n_isused));
3106 PetscCallMPI(MPI_Scan(&n_isused, &st, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)pc)));
3107 st = st - n_isused;
3108 if (n) {
3109 const PetscInt *gidxs;
3110
3111 PetscCall(MatCreateSubMatrix(A, isused, NULL, MAT_INITIAL_MATRIX, &loc_divudotp));
3112 PetscCall(ISLocalToGlobalMappingGetIndices(l2gmap, &gidxs));
3113 /* TODO: extend ISCreateStride with st = PETSC_DECIDE */
3114 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)pc), n_isused, st, 1, &row));
3115 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), n, gidxs, PETSC_COPY_VALUES, &col));
3116 PetscCall(ISLocalToGlobalMappingRestoreIndices(l2gmap, &gidxs));
3117 } else {
3118 PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, 0, 0, 1, NULL, &loc_divudotp));
3119 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)pc), n_isused, st, 1, &row));
3120 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), 0, NULL, PETSC_COPY_VALUES, &col));
3121 }
3122 PetscCall(MatGetSize(pc->pmat, NULL, &N));
3123 PetscCall(ISGetSize(row, &M));
3124 PetscCall(ISLocalToGlobalMappingCreateIS(row, &rl2g));
3125 PetscCall(ISLocalToGlobalMappingCreateIS(col, &cl2g));
3126 PetscCall(ISDestroy(&row));
3127 PetscCall(ISDestroy(&col));
3128 PetscCall(MatCreate(PetscObjectComm((PetscObject)pc), &pcbddc->divudotp));
3129 PetscCall(MatSetType(pcbddc->divudotp, MATIS));
3130 PetscCall(MatSetSizes(pcbddc->divudotp, PETSC_DECIDE, PETSC_DECIDE, M, N));
3131 PetscCall(MatSetLocalToGlobalMapping(pcbddc->divudotp, rl2g, cl2g));
3132 PetscCall(ISLocalToGlobalMappingDestroy(&rl2g));
3133 PetscCall(ISLocalToGlobalMappingDestroy(&cl2g));
3134 PetscCall(MatISSetLocalMat(pcbddc->divudotp, loc_divudotp));
3135 PetscCall(MatDestroy(&loc_divudotp));
3136 PetscCall(MatAssemblyBegin(pcbddc->divudotp, MAT_FINAL_ASSEMBLY));
3137 PetscCall(MatAssemblyEnd(pcbddc->divudotp, MAT_FINAL_ASSEMBLY));
3138 }
3139 PetscCall(ISDestroy(&zerodiag_save));
3140 PetscCall(ISDestroy(&pressures));
3141 if (bzerodiag) {
3142 PetscInt i;
3143
3144 for (i = 0; i < bsp; i++) PetscCall(ISDestroy(&bzerodiag[i]));
3145 PetscCall(PetscFree(bzerodiag));
3146 }
3147 pcbddc->benign_n = benign_n;
3148 pcbddc->benign_zerodiag_subs = zerodiag_subs;
3149
3150 /* determines if the problem has subdomains with 0 pressure block */
3151 have_null = (PetscBool)(!!pcbddc->benign_n);
3152 PetscCallMPI(MPIU_Allreduce(&have_null, &pcbddc->benign_have_null, 1, MPI_C_BOOL, MPI_LOR, PetscObjectComm((PetscObject)pc)));
3153
3154 project_b0:
3155 PetscCall(MatGetLocalSize(pcbddc->local_mat, &n, NULL));
3156 /* change of basis and p0 dofs */
3157 if (pcbddc->benign_n) {
3158 PetscInt i, s, *nnz;
3159
3160 /* local change of basis for pressures */
3161 PetscCall(MatDestroy(&pcbddc->benign_change));
3162 PetscCall(MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat), &pcbddc->benign_change));
3163 PetscCall(MatSetType(pcbddc->benign_change, MATAIJ));
3164 PetscCall(MatSetSizes(pcbddc->benign_change, n, n, PETSC_DECIDE, PETSC_DECIDE));
3165 PetscCall(PetscMalloc1(n, &nnz));
3166 for (i = 0; i < n; i++) nnz[i] = 1; /* defaults to identity */
3167 for (i = 0; i < pcbddc->benign_n; i++) {
3168 const PetscInt *idxs;
3169 PetscInt nzs, j;
3170
3171 PetscCall(ISGetLocalSize(pcbddc->benign_zerodiag_subs[i], &nzs));
3172 PetscCall(ISGetIndices(pcbddc->benign_zerodiag_subs[i], &idxs));
3173 for (j = 0; j < nzs - 1; j++) nnz[idxs[j]] = 2; /* change on pressures */
3174 nnz[idxs[nzs - 1]] = nzs; /* last local pressure dof in subdomain */
3175 PetscCall(ISRestoreIndices(pcbddc->benign_zerodiag_subs[i], &idxs));
3176 }
3177 PetscCall(MatSeqAIJSetPreallocation(pcbddc->benign_change, 0, nnz));
3178 PetscCall(MatSetOption(pcbddc->benign_change, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE));
3179 PetscCall(PetscFree(nnz));
3180 /* set identity by default */
3181 for (i = 0; i < n; i++) PetscCall(MatSetValue(pcbddc->benign_change, i, i, 1., INSERT_VALUES));
3182 PetscCall(PetscFree3(pcbddc->benign_p0_lidx, pcbddc->benign_p0_gidx, pcbddc->benign_p0));
3183 PetscCall(PetscMalloc3(pcbddc->benign_n, &pcbddc->benign_p0_lidx, pcbddc->benign_n, &pcbddc->benign_p0_gidx, pcbddc->benign_n, &pcbddc->benign_p0));
3184 /* set change on pressures */
3185 for (s = 0; s < pcbddc->benign_n; s++) {
3186 PetscScalar *array;
3187 const PetscInt *idxs;
3188 PetscInt nzs;
3189
3190 PetscCall(ISGetLocalSize(pcbddc->benign_zerodiag_subs[s], &nzs));
3191 PetscCall(ISGetIndices(pcbddc->benign_zerodiag_subs[s], &idxs));
3192 for (i = 0; i < nzs - 1; i++) {
3193 PetscScalar vals[2];
3194 PetscInt cols[2];
3195
3196 cols[0] = idxs[i];
3197 cols[1] = idxs[nzs - 1];
3198 vals[0] = 1.;
3199 vals[1] = 1.;
3200 PetscCall(MatSetValues(pcbddc->benign_change, 1, cols, 2, cols, vals, INSERT_VALUES));
3201 }
3202 PetscCall(PetscMalloc1(nzs, &array));
3203 for (i = 0; i < nzs - 1; i++) array[i] = -1.;
3204 array[nzs - 1] = 1.;
3205 PetscCall(MatSetValues(pcbddc->benign_change, 1, idxs + nzs - 1, nzs, idxs, array, INSERT_VALUES));
3206 /* store local idxs for p0 */
3207 pcbddc->benign_p0_lidx[s] = idxs[nzs - 1];
3208 PetscCall(ISRestoreIndices(pcbddc->benign_zerodiag_subs[s], &idxs));
3209 PetscCall(PetscFree(array));
3210 }
3211 PetscCall(MatAssemblyBegin(pcbddc->benign_change, MAT_FINAL_ASSEMBLY));
3212 PetscCall(MatAssemblyEnd(pcbddc->benign_change, MAT_FINAL_ASSEMBLY));
3213
3214 /* project if needed */
3215 if (pcbddc->benign_change_explicit) {
3216 Mat M;
3217
3218 PetscCall(MatPtAP(pcbddc->local_mat, pcbddc->benign_change, MAT_INITIAL_MATRIX, 2.0, &M));
3219 PetscCall(MatDestroy(&pcbddc->local_mat));
3220 PetscCall(MatSeqAIJCompress(M, &pcbddc->local_mat));
3221 PetscCall(MatDestroy(&M));
3222 }
3223 /* store global idxs for p0 */
3224 PetscCall(ISLocalToGlobalMappingApply(matis->rmapping, pcbddc->benign_n, pcbddc->benign_p0_lidx, pcbddc->benign_p0_gidx));
3225 }
3226 *zerodiaglocal = zerodiag;
3227 PetscFunctionReturn(PETSC_SUCCESS);
3228 }
3229
PCBDDCBenignGetOrSetP0(PC pc,Vec v,PetscBool get)3230 PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get)
3231 {
3232 PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
3233 PetscScalar *array;
3234
3235 PetscFunctionBegin;
3236 if (!pcbddc->benign_sf) {
3237 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)pc), &pcbddc->benign_sf));
3238 PetscCall(PetscSFSetGraphLayout(pcbddc->benign_sf, pc->pmat->rmap, pcbddc->benign_n, NULL, PETSC_OWN_POINTER, pcbddc->benign_p0_gidx));
3239 }
3240 if (get) {
3241 PetscCall(VecGetArrayRead(v, (const PetscScalar **)&array));
3242 PetscCall(PetscSFBcastBegin(pcbddc->benign_sf, MPIU_SCALAR, array, pcbddc->benign_p0, MPI_REPLACE));
3243 PetscCall(PetscSFBcastEnd(pcbddc->benign_sf, MPIU_SCALAR, array, pcbddc->benign_p0, MPI_REPLACE));
3244 PetscCall(VecRestoreArrayRead(v, (const PetscScalar **)&array));
3245 } else {
3246 PetscCall(VecGetArray(v, &array));
3247 PetscCall(PetscSFReduceBegin(pcbddc->benign_sf, MPIU_SCALAR, pcbddc->benign_p0, array, MPI_REPLACE));
3248 PetscCall(PetscSFReduceEnd(pcbddc->benign_sf, MPIU_SCALAR, pcbddc->benign_p0, array, MPI_REPLACE));
3249 PetscCall(VecRestoreArray(v, &array));
3250 }
3251 PetscFunctionReturn(PETSC_SUCCESS);
3252 }
3253
PCBDDCBenignPopOrPushB0(PC pc,PetscBool pop)3254 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop)
3255 {
3256 PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
3257
3258 PetscFunctionBegin;
3259 /* TODO: add error checking
3260 - avoid nested pop (or push) calls.
3261 - cannot push before pop.
3262 - cannot call this if pcbddc->local_mat is NULL
3263 */
3264 if (!pcbddc->benign_n) PetscFunctionReturn(PETSC_SUCCESS);
3265 if (pop) {
3266 if (pcbddc->benign_change_explicit) {
3267 IS is_p0;
3268 MatReuse reuse;
3269
3270 /* extract B_0 */
3271 reuse = MAT_INITIAL_MATRIX;
3272 if (pcbddc->benign_B0) reuse = MAT_REUSE_MATRIX;
3273 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, pcbddc->benign_n, pcbddc->benign_p0_lidx, PETSC_COPY_VALUES, &is_p0));
3274 PetscCall(MatCreateSubMatrix(pcbddc->local_mat, is_p0, NULL, reuse, &pcbddc->benign_B0));
3275 /* remove rows and cols from local problem */
3276 PetscCall(MatSetOption(pcbddc->local_mat, MAT_KEEP_NONZERO_PATTERN, PETSC_TRUE));
3277 PetscCall(MatSetOption(pcbddc->local_mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE));
3278 PetscCall(MatZeroRowsColumnsIS(pcbddc->local_mat, is_p0, 1.0, NULL, NULL));
3279 PetscCall(ISDestroy(&is_p0));
3280 } else {
3281 Mat_IS *matis = (Mat_IS *)pc->pmat->data;
3282 PetscScalar *vals;
3283 PetscInt i, n, *idxs_ins;
3284
3285 PetscCall(VecGetLocalSize(matis->y, &n));
3286 PetscCall(PetscMalloc2(n, &idxs_ins, n, &vals));
3287 if (!pcbddc->benign_B0) {
3288 PetscInt *nnz;
3289 PetscCall(MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat), &pcbddc->benign_B0));
3290 PetscCall(MatSetType(pcbddc->benign_B0, MATAIJ));
3291 PetscCall(MatSetSizes(pcbddc->benign_B0, pcbddc->benign_n, n, PETSC_DECIDE, PETSC_DECIDE));
3292 PetscCall(PetscMalloc1(pcbddc->benign_n, &nnz));
3293 for (i = 0; i < pcbddc->benign_n; i++) {
3294 PetscCall(ISGetLocalSize(pcbddc->benign_zerodiag_subs[i], &nnz[i]));
3295 nnz[i] = n - nnz[i];
3296 }
3297 PetscCall(MatSeqAIJSetPreallocation(pcbddc->benign_B0, 0, nnz));
3298 PetscCall(MatSetOption(pcbddc->benign_B0, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE));
3299 PetscCall(PetscFree(nnz));
3300 }
3301
3302 for (i = 0; i < pcbddc->benign_n; i++) {
3303 PetscScalar *array;
3304 PetscInt *idxs, j, nz, cum;
3305
3306 PetscCall(VecSet(matis->x, 0.));
3307 PetscCall(ISGetLocalSize(pcbddc->benign_zerodiag_subs[i], &nz));
3308 PetscCall(ISGetIndices(pcbddc->benign_zerodiag_subs[i], (const PetscInt **)&idxs));
3309 for (j = 0; j < nz; j++) vals[j] = 1.;
3310 PetscCall(VecSetValues(matis->x, nz, idxs, vals, INSERT_VALUES));
3311 PetscCall(VecAssemblyBegin(matis->x));
3312 PetscCall(VecAssemblyEnd(matis->x));
3313 PetscCall(VecSet(matis->y, 0.));
3314 PetscCall(MatMult(matis->A, matis->x, matis->y));
3315 PetscCall(VecGetArray(matis->y, &array));
3316 cum = 0;
3317 for (j = 0; j < n; j++) {
3318 if (PetscUnlikely(PetscAbsScalar(array[j]) > PETSC_SMALL)) {
3319 vals[cum] = array[j];
3320 idxs_ins[cum] = j;
3321 cum++;
3322 }
3323 }
3324 PetscCall(MatSetValues(pcbddc->benign_B0, 1, &i, cum, idxs_ins, vals, INSERT_VALUES));
3325 PetscCall(VecRestoreArray(matis->y, &array));
3326 PetscCall(ISRestoreIndices(pcbddc->benign_zerodiag_subs[i], (const PetscInt **)&idxs));
3327 }
3328 PetscCall(MatAssemblyBegin(pcbddc->benign_B0, MAT_FINAL_ASSEMBLY));
3329 PetscCall(MatAssemblyEnd(pcbddc->benign_B0, MAT_FINAL_ASSEMBLY));
3330 PetscCall(PetscFree2(idxs_ins, vals));
3331 }
3332 } else { /* push */
3333
3334 PetscCheck(pcbddc->benign_change_explicit, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot push B0!");
3335 for (PetscInt i = 0; i < pcbddc->benign_n; i++) {
3336 PetscScalar *B0_vals;
3337 PetscInt *B0_cols, B0_ncol;
3338
3339 PetscCall(MatGetRow(pcbddc->benign_B0, i, &B0_ncol, (const PetscInt **)&B0_cols, (const PetscScalar **)&B0_vals));
3340 PetscCall(MatSetValues(pcbddc->local_mat, 1, pcbddc->benign_p0_lidx + i, B0_ncol, B0_cols, B0_vals, INSERT_VALUES));
3341 PetscCall(MatSetValues(pcbddc->local_mat, B0_ncol, B0_cols, 1, pcbddc->benign_p0_lidx + i, B0_vals, INSERT_VALUES));
3342 PetscCall(MatSetValue(pcbddc->local_mat, pcbddc->benign_p0_lidx[i], pcbddc->benign_p0_lidx[i], 0.0, INSERT_VALUES));
3343 PetscCall(MatRestoreRow(pcbddc->benign_B0, i, &B0_ncol, (const PetscInt **)&B0_cols, (const PetscScalar **)&B0_vals));
3344 }
3345 PetscCall(MatAssemblyBegin(pcbddc->local_mat, MAT_FINAL_ASSEMBLY));
3346 PetscCall(MatAssemblyEnd(pcbddc->local_mat, MAT_FINAL_ASSEMBLY));
3347 }
3348 PetscFunctionReturn(PETSC_SUCCESS);
3349 }
3350
PCBDDCAdaptiveSelection(PC pc)3351 PetscErrorCode PCBDDCAdaptiveSelection(PC pc)
3352 {
3353 PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
3354 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
3355 PetscBLASInt B_neigs, B_ierr, B_lwork;
3356 PetscBLASInt *B_iwork, *B_ifail;
3357 PetscScalar *work, lwork;
3358 PetscScalar *St, *S, *eigv;
3359 PetscScalar *Sarray, *Starray;
3360 PetscReal *eigs, thresh, lthresh, uthresh;
3361 PetscInt i, nmax, nmin, nv, cum, mss, cum2, cumarray, maxneigs;
3362 PetscBool allocated_S_St, upart;
3363 #if defined(PETSC_USE_COMPLEX)
3364 PetscReal *rwork;
3365 #endif
3366
3367 PetscFunctionBegin;
3368 if (!pcbddc->adaptive_selection) PetscFunctionReturn(PETSC_SUCCESS);
3369 PetscCheck(sub_schurs, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Adaptive selection of constraints requires SubSchurs data");
3370 PetscCheck(sub_schurs->schur_explicit || !sub_schurs->n_subs, PetscObjectComm((PetscObject)pc), PETSC_ERR_SUP, "Adaptive selection of constraints requires MUMPS and/or MKL_CPARDISO");
3371 PetscCheck(!sub_schurs->n_subs || sub_schurs->is_symmetric, PETSC_COMM_SELF, PETSC_ERR_SUP, "Adaptive selection not yet implemented for this matrix pencil (herm %d, symm %d, posdef %d)", sub_schurs->is_hermitian, sub_schurs->is_symmetric,
3372 sub_schurs->is_posdef);
3373 PetscCall(PetscLogEventBegin(PC_BDDC_AdaptiveSetUp[pcbddc->current_level], pc, 0, 0, 0));
3374
3375 if (pcbddc->dbg_flag) {
3376 if (!pcbddc->dbg_viewer) pcbddc->dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pc));
3377 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
3378 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n"));
3379 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Check adaptive selection of constraints\n"));
3380 PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer));
3381 }
3382
3383 if (pcbddc->dbg_flag) PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d cc %" PetscInt_FMT " (%d,%d).\n", PetscGlobalRank, sub_schurs->n_subs, sub_schurs->is_hermitian, sub_schurs->is_posdef));
3384
3385 /* max size of subsets */
3386 mss = 0;
3387 for (i = 0; i < sub_schurs->n_subs; i++) {
3388 PetscInt subset_size;
3389
3390 PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size));
3391 mss = PetscMax(mss, subset_size);
3392 }
3393
3394 /* min/max and threshold */
3395 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss;
3396 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0;
3397 nmax = PetscMax(nmin, nmax);
3398 allocated_S_St = PETSC_FALSE;
3399 if (nmin || !sub_schurs->is_posdef) { /* XXX */
3400 allocated_S_St = PETSC_TRUE;
3401 }
3402
3403 /* allocate lapack workspace */
3404 cum = cum2 = 0;
3405 maxneigs = 0;
3406 for (i = 0; i < sub_schurs->n_subs; i++) {
3407 PetscInt n, subset_size;
3408
3409 PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size));
3410 n = PetscMin(subset_size, nmax);
3411 cum += subset_size;
3412 cum2 += subset_size * n;
3413 maxneigs = PetscMax(maxneigs, n);
3414 }
3415 lwork = 0;
3416 if (mss) {
3417 PetscScalar sdummy = 0.;
3418 PetscBLASInt B_itype = 1;
3419 PetscBLASInt B_N, idummy = 0;
3420 PetscReal rdummy = 0., zero = 0.0;
3421 PetscReal eps = 0.0; /* dlamch? */
3422
3423 PetscCheck(sub_schurs->is_symmetric, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not yet implemented");
3424 PetscCall(PetscBLASIntCast(mss, &B_N));
3425 B_lwork = -1;
3426 /* some implementations may complain about NULL pointers, even if we are querying */
3427 S = &sdummy;
3428 St = &sdummy;
3429 eigs = &rdummy;
3430 eigv = &sdummy;
3431 B_iwork = &idummy;
3432 B_ifail = &idummy;
3433 #if defined(PETSC_USE_COMPLEX)
3434 rwork = &rdummy;
3435 #endif
3436 thresh = 1.0;
3437 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
3438 #if defined(PETSC_USE_COMPLEX)
3439 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &zero, &thresh, B_iwork, B_iwork, &eps, &B_neigs, eigs, eigv, &B_N, &lwork, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3440 #else
3441 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &zero, &thresh, B_iwork, B_iwork, &eps, &B_neigs, eigs, eigv, &B_N, &lwork, &B_lwork, B_iwork, B_ifail, &B_ierr));
3442 #endif
3443 PetscCheck(B_ierr == 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in query to SYGVX Lapack routine %" PetscBLASInt_FMT, B_ierr);
3444 PetscCall(PetscFPTrapPop());
3445 }
3446
3447 nv = 0;
3448 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */
3449 PetscCall(ISGetLocalSize(sub_schurs->is_vertices, &nv));
3450 }
3451 PetscCall(PetscBLASIntCast((PetscInt)PetscRealPart(lwork), &B_lwork));
3452 if (allocated_S_St) PetscCall(PetscMalloc2(mss * mss, &S, mss * mss, &St));
3453 PetscCall(PetscMalloc5(mss * mss, &eigv, mss, &eigs, B_lwork, &work, 5 * mss, &B_iwork, mss, &B_ifail));
3454 #if defined(PETSC_USE_COMPLEX)
3455 PetscCall(PetscMalloc1(7 * mss, &rwork));
3456 #endif
3457 PetscCall(PetscMalloc5(nv + sub_schurs->n_subs, &pcbddc->adaptive_constraints_n, nv + sub_schurs->n_subs + 1, &pcbddc->adaptive_constraints_idxs_ptr, nv + sub_schurs->n_subs + 1, &pcbddc->adaptive_constraints_data_ptr, nv + cum, &pcbddc->adaptive_constraints_idxs, nv + cum2,
3458 &pcbddc->adaptive_constraints_data));
3459 PetscCall(PetscArrayzero(pcbddc->adaptive_constraints_n, nv + sub_schurs->n_subs));
3460
3461 maxneigs = 0;
3462 cum = cumarray = 0;
3463 pcbddc->adaptive_constraints_idxs_ptr[0] = 0;
3464 pcbddc->adaptive_constraints_data_ptr[0] = 0;
3465 if (sub_schurs->is_vertices && pcbddc->use_vertices) {
3466 const PetscInt *idxs;
3467
3468 PetscCall(ISGetIndices(sub_schurs->is_vertices, &idxs));
3469 for (cum = 0; cum < nv; cum++) {
3470 pcbddc->adaptive_constraints_n[cum] = 1;
3471 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum];
3472 pcbddc->adaptive_constraints_data[cum] = 1.0;
3473 pcbddc->adaptive_constraints_idxs_ptr[cum + 1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + 1;
3474 pcbddc->adaptive_constraints_data_ptr[cum + 1] = pcbddc->adaptive_constraints_data_ptr[cum] + 1;
3475 }
3476 PetscCall(ISRestoreIndices(sub_schurs->is_vertices, &idxs));
3477 }
3478
3479 if (mss) { /* multilevel */
3480 if (sub_schurs->gdsw) {
3481 PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_all, &Sarray));
3482 PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all, &Starray));
3483 } else {
3484 PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all, &Sarray));
3485 PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all, &Starray));
3486 }
3487 }
3488
3489 lthresh = pcbddc->adaptive_threshold[0];
3490 uthresh = pcbddc->adaptive_threshold[1];
3491 upart = pcbddc->use_deluxe_scaling;
3492 for (i = 0; i < sub_schurs->n_subs; i++) {
3493 const PetscInt *idxs;
3494 PetscReal upper, lower;
3495 PetscInt j, subset_size, eigs_start = 0;
3496 PetscBLASInt B_N;
3497 PetscBool same_data = PETSC_FALSE;
3498 PetscBool scal = PETSC_FALSE;
3499
3500 if (upart) {
3501 upper = PETSC_MAX_REAL;
3502 lower = uthresh;
3503 } else {
3504 if (sub_schurs->gdsw) {
3505 upper = uthresh;
3506 lower = PETSC_MIN_REAL;
3507 } else {
3508 PetscCheck(sub_schurs->is_posdef, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not yet implemented without deluxe scaling");
3509 upper = 1. / uthresh;
3510 lower = 0.;
3511 }
3512 }
3513 PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size));
3514 PetscCall(ISGetIndices(sub_schurs->is_subs[i], &idxs));
3515 PetscCall(PetscBLASIntCast(subset_size, &B_N));
3516 /* this is experimental: we assume the dofs have been properly grouped to have
3517 the diagonal blocks Schur complements either positive or negative definite (true for Stokes) */
3518 if (!sub_schurs->is_posdef) {
3519 Mat T;
3520
3521 for (j = 0; j < subset_size; j++) {
3522 if (PetscRealPart(*(Sarray + cumarray + j * (subset_size + 1))) < 0.0) {
3523 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, subset_size, Sarray + cumarray, &T));
3524 PetscCall(MatScale(T, -1.0));
3525 PetscCall(MatDestroy(&T));
3526 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, subset_size, Starray + cumarray, &T));
3527 PetscCall(MatScale(T, -1.0));
3528 PetscCall(MatDestroy(&T));
3529 if (sub_schurs->change_primal_sub) {
3530 PetscInt nz, k;
3531 const PetscInt *idxs;
3532
3533 PetscCall(ISGetLocalSize(sub_schurs->change_primal_sub[i], &nz));
3534 PetscCall(ISGetIndices(sub_schurs->change_primal_sub[i], &idxs));
3535 for (k = 0; k < nz; k++) {
3536 *(Sarray + cumarray + idxs[k] * (subset_size + 1)) *= -1.0;
3537 *(Starray + cumarray + idxs[k] * (subset_size + 1)) = 0.0;
3538 }
3539 PetscCall(ISRestoreIndices(sub_schurs->change_primal_sub[i], &idxs));
3540 }
3541 scal = PETSC_TRUE;
3542 break;
3543 }
3544 }
3545 }
3546
3547 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */
3548 if (sub_schurs->is_symmetric) {
3549 PetscInt j, k;
3550 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscArraycmp() later */
3551 PetscCall(PetscArrayzero(S, subset_size * subset_size));
3552 PetscCall(PetscArrayzero(St, subset_size * subset_size));
3553 }
3554 for (j = 0; j < subset_size; j++) {
3555 for (k = j; k < subset_size; k++) {
3556 S[j * subset_size + k] = Sarray[cumarray + j * subset_size + k];
3557 St[j * subset_size + k] = Starray[cumarray + j * subset_size + k];
3558 }
3559 }
3560 } else {
3561 PetscCall(PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size));
3562 PetscCall(PetscArraycpy(St, Starray + cumarray, subset_size * subset_size));
3563 }
3564 } else {
3565 S = Sarray + cumarray;
3566 St = Starray + cumarray;
3567 }
3568 /* see if we can save some work */
3569 if (sub_schurs->n_subs == 1 && pcbddc->use_deluxe_scaling) PetscCall(PetscArraycmp(S, St, subset_size * subset_size, &same_data));
3570
3571 if (same_data && !sub_schurs->change) { /* there's no need of constraints here */
3572 B_neigs = 0;
3573 } else {
3574 PetscBLASInt B_itype = 1, B_IL = 1, B_IU = 0;
3575 PetscReal eps = -1.0; /* dlamch? */
3576 PetscInt nmin_s;
3577 PetscBool compute_range;
3578
3579 PetscCheck(sub_schurs->is_symmetric, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not yet implemented");
3580 B_neigs = 0;
3581 compute_range = (PetscBool)!same_data;
3582 if (nmin >= subset_size) compute_range = PETSC_FALSE;
3583
3584 if (pcbddc->dbg_flag) {
3585 PetscInt nc = 0, c = pcbddc->mat_graph->nodes[idxs[0]].count, w = pcbddc->mat_graph->nodes[idxs[0]].which_dof;
3586
3587 if (sub_schurs->change_primal_sub) PetscCall(ISGetLocalSize(sub_schurs->change_primal_sub[i], &nc));
3588 PetscCall(
3589 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Computing for sub %" PetscInt_FMT "/%" PetscInt_FMT " size %" PetscInt_FMT " count %" PetscInt_FMT " fid %" PetscInt_FMT " (range %d) (change %" PetscInt_FMT ").\n", i, sub_schurs->n_subs, subset_size, c, w, compute_range, nc));
3590 }
3591
3592 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
3593 if (compute_range) {
3594 /* ask for eigenvalues larger than thresh */
3595 if (sub_schurs->is_posdef) {
3596 #if defined(PETSC_USE_COMPLEX)
3597 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3598 #else
3599 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3600 #endif
3601 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3602 } else { /* no theory so far, but it works nicely */
3603 PetscInt recipe = 0, recipe_m = 1;
3604 PetscReal bb[2];
3605
3606 PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)pc)->prefix, "-pc_bddc_adaptive_recipe", &recipe, NULL));
3607 switch (recipe) {
3608 case 0:
3609 if (scal) {
3610 bb[0] = PETSC_MIN_REAL;
3611 bb[1] = lthresh;
3612 } else {
3613 bb[0] = uthresh;
3614 bb[1] = PETSC_MAX_REAL;
3615 }
3616 #if defined(PETSC_USE_COMPLEX)
3617 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3618 #else
3619 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3620 #endif
3621 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3622 break;
3623 case 1:
3624 bb[0] = PETSC_MIN_REAL;
3625 bb[1] = lthresh * lthresh;
3626 #if defined(PETSC_USE_COMPLEX)
3627 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3628 #else
3629 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3630 #endif
3631 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3632 if (!scal) {
3633 PetscBLASInt B_neigs2 = 0;
3634
3635 bb[0] = PetscMax(lthresh * lthresh, uthresh);
3636 bb[1] = PETSC_MAX_REAL;
3637 PetscCall(PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size));
3638 PetscCall(PetscArraycpy(St, Starray + cumarray, subset_size * subset_size));
3639 #if defined(PETSC_USE_COMPLEX)
3640 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3641 #else
3642 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3643 #endif
3644 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3645 B_neigs += B_neigs2;
3646 }
3647 break;
3648 case 2:
3649 if (scal) {
3650 bb[0] = PETSC_MIN_REAL;
3651 bb[1] = 0;
3652 #if defined(PETSC_USE_COMPLEX)
3653 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3654 #else
3655 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3656 #endif
3657 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3658 } else {
3659 PetscBLASInt B_neigs2 = 0;
3660 PetscBool do_copy = PETSC_FALSE;
3661
3662 lthresh = PetscMax(lthresh, 0.0);
3663 if (lthresh > 0.0) {
3664 bb[0] = PETSC_MIN_REAL;
3665 bb[1] = lthresh * lthresh;
3666
3667 do_copy = PETSC_TRUE;
3668 #if defined(PETSC_USE_COMPLEX)
3669 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3670 #else
3671 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3672 #endif
3673 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3674 }
3675 bb[0] = PetscMax(lthresh * lthresh, uthresh);
3676 bb[1] = PETSC_MAX_REAL;
3677 if (do_copy) {
3678 PetscCall(PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size));
3679 PetscCall(PetscArraycpy(St, Starray + cumarray, subset_size * subset_size));
3680 }
3681 #if defined(PETSC_USE_COMPLEX)
3682 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3683 #else
3684 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3685 #endif
3686 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3687 B_neigs += B_neigs2;
3688 }
3689 break;
3690 case 3:
3691 if (scal) {
3692 PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)pc)->prefix, "-pc_bddc_adaptive_recipe3_min_scal", &recipe_m, NULL));
3693 } else {
3694 PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)pc)->prefix, "-pc_bddc_adaptive_recipe3_min", &recipe_m, NULL));
3695 }
3696 if (!scal) {
3697 bb[0] = uthresh;
3698 bb[1] = PETSC_MAX_REAL;
3699 #if defined(PETSC_USE_COMPLEX)
3700 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3701 #else
3702 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3703 #endif
3704 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3705 }
3706 if (recipe_m > 0 && B_N - B_neigs > 0) {
3707 PetscBLASInt B_neigs2 = 0;
3708
3709 PetscCall(PetscBLASIntCast(PetscMin(recipe_m, B_N - B_neigs), &B_IU));
3710 PetscCall(PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size));
3711 PetscCall(PetscArraycpy(St, Starray + cumarray, subset_size * subset_size));
3712 #if defined(PETSC_USE_COMPLEX)
3713 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3714 #else
3715 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3716 #endif
3717 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3718 B_neigs += B_neigs2;
3719 }
3720 break;
3721 case 4:
3722 bb[0] = PETSC_MIN_REAL;
3723 bb[1] = lthresh;
3724 #if defined(PETSC_USE_COMPLEX)
3725 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3726 #else
3727 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3728 #endif
3729 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3730 {
3731 PetscBLASInt B_neigs2 = 0;
3732
3733 bb[0] = PetscMax(lthresh + PETSC_SMALL, uthresh);
3734 bb[1] = PETSC_MAX_REAL;
3735 PetscCall(PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size));
3736 PetscCall(PetscArraycpy(St, Starray + cumarray, subset_size * subset_size));
3737 #if defined(PETSC_USE_COMPLEX)
3738 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3739 #else
3740 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3741 #endif
3742 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3743 B_neigs += B_neigs2;
3744 }
3745 break;
3746 case 5: /* same as before: first compute all eigenvalues, then filter */
3747 #if defined(PETSC_USE_COMPLEX)
3748 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "A", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3749 #else
3750 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "A", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3751 #endif
3752 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3753 {
3754 PetscInt e, k, ne;
3755 for (e = 0, ne = 0; e < B_neigs; e++) {
3756 if (eigs[e] < lthresh || eigs[e] > uthresh) {
3757 for (k = 0; k < B_N; k++) S[ne * B_N + k] = eigv[e * B_N + k];
3758 eigs[ne] = eigs[e];
3759 ne++;
3760 }
3761 }
3762 PetscCall(PetscArraycpy(eigv, S, B_N * ne));
3763 PetscCall(PetscBLASIntCast(ne, &B_neigs));
3764 }
3765 break;
3766 default:
3767 SETERRQ(PetscObjectComm((PetscObject)pc), PETSC_ERR_SUP, "Unknown recipe %" PetscInt_FMT, recipe);
3768 }
3769 }
3770 } else if (!same_data) { /* this is just to see all the eigenvalues */
3771 PetscCall(PetscBLASIntCast(PetscMax(1, PetscMin(B_N, nmax)), &B_IU));
3772 #if defined(PETSC_USE_COMPLEX)
3773 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3774 #else
3775 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3776 #endif
3777 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3778 } else { /* same_data is true, so just get the adaptive functional requested by the user */
3779 PetscInt k;
3780 PetscCheck(sub_schurs->change_primal_sub, PETSC_COMM_SELF, PETSC_ERR_PLIB, "This should not happen");
3781 PetscCall(ISGetLocalSize(sub_schurs->change_primal_sub[i], &nmax));
3782 PetscCall(PetscBLASIntCast(nmax, &B_neigs));
3783 nmin = nmax;
3784 PetscCall(PetscArrayzero(eigv, subset_size * nmax));
3785 for (k = 0; k < nmax; k++) {
3786 eigs[k] = 1. / PETSC_SMALL;
3787 eigv[k * (subset_size + 1)] = 1.0;
3788 }
3789 }
3790 PetscCall(PetscFPTrapPop());
3791 if (B_ierr) {
3792 PetscCheck(B_ierr >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYGVX Lapack routine: illegal value for argument %" PetscBLASInt_FMT, -B_ierr);
3793 PetscCheck(B_ierr > B_N, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYGVX Lapack routine: %" PetscBLASInt_FMT " eigenvalues failed to converge", B_ierr);
3794 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYGVX Lapack routine: leading minor of order %" PetscBLASInt_FMT " is not positive definite", B_ierr - B_N - 1);
3795 }
3796
3797 if (B_neigs > nmax) {
3798 if (pcbddc->dbg_flag) PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " found %" PetscBLASInt_FMT " eigs, more than maximum required %" PetscInt_FMT ".\n", B_neigs, nmax));
3799 if (upart) eigs_start = scal ? 0 : B_neigs - nmax;
3800 PetscCall(PetscBLASIntCast(nmax, &B_neigs));
3801 }
3802
3803 nmin_s = PetscMin(nmin, B_N);
3804 if (B_neigs < nmin_s) {
3805 PetscBLASInt B_neigs2 = 0;
3806
3807 if (upart) {
3808 if (scal) {
3809 PetscCall(PetscBLASIntCast(nmin_s, &B_IU));
3810 B_IL = B_neigs + 1;
3811 } else {
3812 PetscCall(PetscBLASIntCast(B_N - nmin_s + 1, &B_IL));
3813 B_IU = B_N - B_neigs;
3814 }
3815 } else {
3816 B_IL = B_neigs + 1;
3817 PetscCall(PetscBLASIntCast(nmin_s, &B_IU));
3818 }
3819 if (pcbddc->dbg_flag) {
3820 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " found %" PetscBLASInt_FMT " eigs, less than minimum required %" PetscInt_FMT ". Asking for %" PetscBLASInt_FMT " to %" PetscBLASInt_FMT " incl (fortran like)\n", B_neigs, nmin, B_IL, B_IU));
3821 }
3822 if (sub_schurs->is_symmetric) {
3823 PetscInt j, k;
3824 for (j = 0; j < subset_size; j++) {
3825 for (k = j; k < subset_size; k++) {
3826 S[j * subset_size + k] = Sarray[cumarray + j * subset_size + k];
3827 St[j * subset_size + k] = Starray[cumarray + j * subset_size + k];
3828 }
3829 }
3830 } else {
3831 PetscCall(PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size));
3832 PetscCall(PetscArraycpy(St, Starray + cumarray, subset_size * subset_size));
3833 }
3834 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
3835 #if defined(PETSC_USE_COMPLEX)
3836 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * subset_size, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3837 #else
3838 PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * subset_size, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3839 #endif
3840 PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3841 PetscCall(PetscFPTrapPop());
3842 B_neigs += B_neigs2;
3843 }
3844 if (B_ierr) {
3845 PetscCheck(B_ierr >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYGVX Lapack routine: illegal value for argument %" PetscBLASInt_FMT, -B_ierr);
3846 PetscCheck(B_ierr > B_N, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYGVX Lapack routine: %" PetscBLASInt_FMT " eigenvalues failed to converge", B_ierr);
3847 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYGVX Lapack routine: leading minor of order %" PetscBLASInt_FMT " is not positive definite", B_ierr - B_N - 1);
3848 }
3849 if (pcbddc->dbg_flag) {
3850 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " -> Got %" PetscBLASInt_FMT " eigs\n", B_neigs));
3851 for (j = 0; j < B_neigs; j++) {
3852 if (!sub_schurs->gdsw) {
3853 if (eigs[j] == 0.0) {
3854 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " Inf\n"));
3855 } else {
3856 if (upart) {
3857 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " %1.6e\n", (double)eigs[j + eigs_start]));
3858 } else {
3859 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " %1.6e\n", (double)(1 / eigs[j + eigs_start])));
3860 }
3861 }
3862 } else {
3863 double pg = (double)eigs[j + eigs_start];
3864 if (pg < 2 * PETSC_SMALL) pg = 0.0;
3865 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " %1.6e\n", pg));
3866 }
3867 }
3868 }
3869 }
3870 /* change the basis back to the original one */
3871 if (sub_schurs->change) {
3872 Mat change, phi, phit;
3873
3874 if (pcbddc->dbg_flag > 2) {
3875 PetscInt ii;
3876 for (ii = 0; ii < B_neigs; ii++) {
3877 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " -> Eigenvector (old basis) %" PetscInt_FMT "/%" PetscBLASInt_FMT " (%" PetscBLASInt_FMT ")\n", ii, B_neigs, B_N));
3878 for (j = 0; j < B_N; j++) {
3879 #if defined(PETSC_USE_COMPLEX)
3880 PetscReal r = PetscRealPart(eigv[(ii + eigs_start) * subset_size + j]);
3881 PetscReal c = PetscImaginaryPart(eigv[(ii + eigs_start) * subset_size + j]);
3882 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " %1.4e + %1.4e i\n", (double)r, (double)c));
3883 #else
3884 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " %1.4e\n", (double)(eigv[(ii + eigs_start) * subset_size + j])));
3885 #endif
3886 }
3887 }
3888 }
3889 PetscCall(KSPGetOperators(sub_schurs->change[i], &change, NULL));
3890 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, B_neigs, eigv + eigs_start * subset_size, &phit));
3891 PetscCall(MatMatMult(change, phit, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &phi));
3892 PetscCall(MatCopy(phi, phit, SAME_NONZERO_PATTERN));
3893 PetscCall(MatDestroy(&phit));
3894 PetscCall(MatDestroy(&phi));
3895 }
3896 maxneigs = PetscMax(B_neigs, maxneigs);
3897 pcbddc->adaptive_constraints_n[i + nv] = B_neigs;
3898 if (B_neigs) {
3899 PetscCall(PetscArraycpy(pcbddc->adaptive_constraints_data + pcbddc->adaptive_constraints_data_ptr[cum], eigv + eigs_start * subset_size, B_neigs * subset_size));
3900
3901 if (pcbddc->dbg_flag > 1) {
3902 PetscInt ii;
3903 for (ii = 0; ii < B_neigs; ii++) {
3904 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " -> Eigenvector %" PetscInt_FMT "/%" PetscBLASInt_FMT " (%" PetscBLASInt_FMT ")\n", ii, B_neigs, B_N));
3905 for (j = 0; j < B_N; j++) {
3906 #if defined(PETSC_USE_COMPLEX)
3907 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii * subset_size + j + pcbddc->adaptive_constraints_data_ptr[cum]]);
3908 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii * subset_size + j + pcbddc->adaptive_constraints_data_ptr[cum]]);
3909 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " %1.4e + %1.4e i\n", (double)r, (double)c));
3910 #else
3911 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " %1.4e\n", (double)PetscRealPart(pcbddc->adaptive_constraints_data[ii * subset_size + j + pcbddc->adaptive_constraints_data_ptr[cum]])));
3912 #endif
3913 }
3914 }
3915 }
3916 PetscCall(PetscArraycpy(pcbddc->adaptive_constraints_idxs + pcbddc->adaptive_constraints_idxs_ptr[cum], idxs, subset_size));
3917 pcbddc->adaptive_constraints_idxs_ptr[cum + 1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size;
3918 pcbddc->adaptive_constraints_data_ptr[cum + 1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size * B_neigs;
3919 cum++;
3920 }
3921 PetscCall(ISRestoreIndices(sub_schurs->is_subs[i], &idxs));
3922 /* shift for next computation */
3923 cumarray += subset_size * subset_size;
3924 }
3925 if (pcbddc->dbg_flag) PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
3926
3927 if (mss) {
3928 if (sub_schurs->gdsw) {
3929 PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_all, &Sarray));
3930 PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all, &Starray));
3931 } else {
3932 PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all, &Sarray));
3933 PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all, &Starray));
3934 /* destroy matrices (junk) */
3935 PetscCall(MatDestroy(&sub_schurs->sum_S_Ej_inv_all));
3936 PetscCall(MatDestroy(&sub_schurs->sum_S_Ej_tilda_all));
3937 }
3938 }
3939 if (allocated_S_St) PetscCall(PetscFree2(S, St));
3940 PetscCall(PetscFree5(eigv, eigs, work, B_iwork, B_ifail));
3941 #if defined(PETSC_USE_COMPLEX)
3942 PetscCall(PetscFree(rwork));
3943 #endif
3944 if (pcbddc->dbg_flag) {
3945 PetscInt maxneigs_r;
3946 PetscCallMPI(MPIU_Allreduce(&maxneigs, &maxneigs_r, 1, MPIU_INT, MPI_MAX, PetscObjectComm((PetscObject)pc)));
3947 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Maximum number of constraints per cc %" PetscInt_FMT "\n", maxneigs_r));
3948 }
3949 PetscCall(PetscLogEventEnd(PC_BDDC_AdaptiveSetUp[pcbddc->current_level], pc, 0, 0, 0));
3950 PetscFunctionReturn(PETSC_SUCCESS);
3951 }
3952
PCBDDCSetUpSolvers(PC pc)3953 PetscErrorCode PCBDDCSetUpSolvers(PC pc)
3954 {
3955 Mat coarse_submat;
3956
3957 PetscFunctionBegin;
3958 /* Setup local scatters R_to_B and (optionally) R_to_D */
3959 /* PCBDDCSetUpLocalWorkVectors should be called first! */
3960 PetscCall(PCBDDCSetUpLocalScatters(pc));
3961
3962 /* Setup local neumann solver ksp_R */
3963 /* PCBDDCSetUpLocalScatters should be called first! */
3964 PetscCall(PCBDDCSetUpLocalSolvers(pc, PETSC_FALSE, PETSC_TRUE));
3965
3966 /*
3967 Setup local correction and local part of coarse basis.
3968 Gives back the dense local part of the coarse matrix in column major ordering
3969 */
3970 PetscCall(PCBDDCSetUpCorrection(pc, &coarse_submat));
3971
3972 /* Compute total number of coarse nodes and setup coarse solver */
3973 PetscCall(PCBDDCSetUpCoarseSolver(pc, coarse_submat));
3974 PetscCall(MatDestroy(&coarse_submat));
3975 PetscFunctionReturn(PETSC_SUCCESS);
3976 }
3977
PCBDDCResetCustomization(PC pc)3978 PetscErrorCode PCBDDCResetCustomization(PC pc)
3979 {
3980 PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
3981
3982 PetscFunctionBegin;
3983 PetscCall(ISDestroy(&pcbddc->user_primal_vertices));
3984 PetscCall(ISDestroy(&pcbddc->user_primal_vertices_local));
3985 PetscCall(ISDestroy(&pcbddc->NeumannBoundaries));
3986 PetscCall(ISDestroy(&pcbddc->NeumannBoundariesLocal));
3987 PetscCall(ISDestroy(&pcbddc->DirichletBoundaries));
3988 PetscCall(MatNullSpaceDestroy(&pcbddc->onearnullspace));
3989 PetscCall(PetscFree(pcbddc->onearnullvecs_state));
3990 PetscCall(ISDestroy(&pcbddc->DirichletBoundariesLocal));
3991 PetscCall(PCBDDCSetDofsSplitting(pc, 0, NULL));
3992 PetscCall(PCBDDCSetDofsSplittingLocal(pc, 0, NULL));
3993 PetscFunctionReturn(PETSC_SUCCESS);
3994 }
3995
PCBDDCResetTopography(PC pc)3996 PetscErrorCode PCBDDCResetTopography(PC pc)
3997 {
3998 PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
3999 PetscInt i;
4000
4001 PetscFunctionBegin;
4002 PetscCall(MatDestroy(&pcbddc->nedcG));
4003 PetscCall(ISDestroy(&pcbddc->nedclocal));
4004 PetscCall(MatDestroy(&pcbddc->discretegradient));
4005 PetscCall(MatDestroy(&pcbddc->user_ChangeOfBasisMatrix));
4006 PetscCall(MatDestroy(&pcbddc->ChangeOfBasisMatrix));
4007 PetscCall(MatDestroy(&pcbddc->switch_static_change));
4008 PetscCall(VecDestroy(&pcbddc->work_change));
4009 PetscCall(MatDestroy(&pcbddc->ConstraintMatrix));
4010 PetscCall(MatDestroy(&pcbddc->divudotp));
4011 PetscCall(ISDestroy(&pcbddc->divudotp_vl2l));
4012 PetscCall(PCBDDCGraphDestroy(&pcbddc->mat_graph));
4013 for (i = 0; i < pcbddc->n_local_subs; i++) PetscCall(ISDestroy(&pcbddc->local_subs[i]));
4014 pcbddc->n_local_subs = 0;
4015 PetscCall(PetscFree(pcbddc->local_subs));
4016 PetscCall(PCBDDCSubSchursDestroy(&pcbddc->sub_schurs));
4017 pcbddc->graphanalyzed = PETSC_FALSE;
4018 pcbddc->recompute_topography = PETSC_TRUE;
4019 pcbddc->corner_selected = PETSC_FALSE;
4020 PetscFunctionReturn(PETSC_SUCCESS);
4021 }
4022
PCBDDCResetSolvers(PC pc)4023 PetscErrorCode PCBDDCResetSolvers(PC pc)
4024 {
4025 PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
4026
4027 PetscFunctionBegin;
4028 PetscCall(VecDestroy(&pcbddc->coarse_vec));
4029 PetscCall(MatDestroy(&pcbddc->coarse_phi_B));
4030 PetscCall(MatDestroy(&pcbddc->coarse_phi_D));
4031 PetscCall(MatDestroy(&pcbddc->coarse_psi_B));
4032 PetscCall(MatDestroy(&pcbddc->coarse_psi_D));
4033 PetscCall(VecDestroy(&pcbddc->vec1_P));
4034 PetscCall(VecDestroy(&pcbddc->vec1_C));
4035 PetscCall(MatDestroy(&pcbddc->local_auxmat2));
4036 PetscCall(MatDestroy(&pcbddc->local_auxmat1));
4037 PetscCall(VecDestroy(&pcbddc->vec1_R));
4038 PetscCall(VecDestroy(&pcbddc->vec2_R));
4039 PetscCall(ISDestroy(&pcbddc->is_R_local));
4040 PetscCall(VecScatterDestroy(&pcbddc->R_to_B));
4041 PetscCall(VecScatterDestroy(&pcbddc->R_to_D));
4042 PetscCall(VecScatterDestroy(&pcbddc->coarse_loc_to_glob));
4043 PetscCall(KSPReset(pcbddc->ksp_D));
4044 PetscCall(KSPReset(pcbddc->ksp_R));
4045 PetscCall(KSPReset(pcbddc->coarse_ksp));
4046 PetscCall(MatDestroy(&pcbddc->local_mat));
4047 PetscCall(PetscFree(pcbddc->primal_indices_local_idxs));
4048 PetscCall(PetscFree2(pcbddc->local_primal_ref_node, pcbddc->local_primal_ref_mult));
4049 PetscCall(PetscFree(pcbddc->global_primal_indices));
4050 PetscCall(ISDestroy(&pcbddc->coarse_subassembling));
4051 PetscCall(MatDestroy(&pcbddc->benign_change));
4052 PetscCall(VecDestroy(&pcbddc->benign_vec));
4053 PetscCall(PCBDDCBenignShellMat(pc, PETSC_TRUE));
4054 PetscCall(MatDestroy(&pcbddc->benign_B0));
4055 PetscCall(PetscSFDestroy(&pcbddc->benign_sf));
4056 if (pcbddc->benign_zerodiag_subs) {
4057 PetscInt i;
4058 for (i = 0; i < pcbddc->benign_n; i++) PetscCall(ISDestroy(&pcbddc->benign_zerodiag_subs[i]));
4059 PetscCall(PetscFree(pcbddc->benign_zerodiag_subs));
4060 }
4061 PetscCall(PetscFree3(pcbddc->benign_p0_lidx, pcbddc->benign_p0_gidx, pcbddc->benign_p0));
4062 PetscFunctionReturn(PETSC_SUCCESS);
4063 }
4064
PCBDDCSetUpLocalWorkVectors(PC pc)4065 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc)
4066 {
4067 PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
4068 PC_IS *pcis = (PC_IS *)pc->data;
4069 VecType impVecType;
4070 PetscInt n_constraints, n_R, old_size;
4071
4072 PetscFunctionBegin;
4073 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices;
4074 n_R = pcis->n - pcbddc->n_vertices;
4075 PetscCall(VecGetType(pcis->vec1_N, &impVecType));
4076 /* local work vectors (try to avoid unneeded work)*/
4077 /* R nodes */
4078 old_size = -1;
4079 if (pcbddc->vec1_R) PetscCall(VecGetSize(pcbddc->vec1_R, &old_size));
4080 if (n_R != old_size) {
4081 PetscCall(VecDestroy(&pcbddc->vec1_R));
4082 PetscCall(VecDestroy(&pcbddc->vec2_R));
4083 PetscCall(VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N), &pcbddc->vec1_R));
4084 PetscCall(VecSetSizes(pcbddc->vec1_R, PETSC_DECIDE, n_R));
4085 PetscCall(VecSetType(pcbddc->vec1_R, impVecType));
4086 PetscCall(VecDuplicate(pcbddc->vec1_R, &pcbddc->vec2_R));
4087 }
4088 /* local primal dofs */
4089 old_size = -1;
4090 if (pcbddc->vec1_P) PetscCall(VecGetSize(pcbddc->vec1_P, &old_size));
4091 if (pcbddc->local_primal_size != old_size) {
4092 PetscCall(VecDestroy(&pcbddc->vec1_P));
4093 PetscCall(VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N), &pcbddc->vec1_P));
4094 PetscCall(VecSetSizes(pcbddc->vec1_P, PETSC_DECIDE, pcbddc->local_primal_size));
4095 PetscCall(VecSetType(pcbddc->vec1_P, impVecType));
4096 }
4097 /* local explicit constraints */
4098 old_size = -1;
4099 if (pcbddc->vec1_C) PetscCall(VecGetSize(pcbddc->vec1_C, &old_size));
4100 if (n_constraints && n_constraints != old_size) {
4101 PetscCall(VecDestroy(&pcbddc->vec1_C));
4102 PetscCall(VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N), &pcbddc->vec1_C));
4103 PetscCall(VecSetSizes(pcbddc->vec1_C, PETSC_DECIDE, n_constraints));
4104 PetscCall(VecSetType(pcbddc->vec1_C, impVecType));
4105 }
4106 PetscFunctionReturn(PETSC_SUCCESS);
4107 }
4108
MatSetValuesSubMat(Mat A,Mat S,PetscInt nr,const PetscInt rows[],PetscInt nc,const PetscInt cols[],InsertMode imode)4109 static PetscErrorCode MatSetValuesSubMat(Mat A, Mat S, PetscInt nr, const PetscInt rows[], PetscInt nc, const PetscInt cols[], InsertMode imode)
4110 {
4111 PetscBool flg;
4112 const PetscScalar *a;
4113
4114 PetscFunctionBegin;
4115 PetscCall(PetscObjectBaseTypeCompare((PetscObject)S, MATSEQDENSE, &flg));
4116 if (flg) {
4117 PetscCall(MatDenseGetArrayRead(S, &a));
4118 PetscCall(MatSetOption(A, MAT_ROW_ORIENTED, PETSC_FALSE));
4119 PetscCall(MatSetValues(A, nr, rows, nc, cols, a, imode));
4120 PetscCall(MatSetOption(A, MAT_ROW_ORIENTED, PETSC_TRUE));
4121 PetscCall(MatDenseRestoreArrayRead(S, &a));
4122 } else {
4123 const PetscInt *ii, *jj;
4124 PetscInt n;
4125 PetscInt buf[8192], *bufc = NULL;
4126 PetscBool freeb = PETSC_FALSE;
4127 Mat Sm = S;
4128
4129 PetscCall(PetscObjectBaseTypeCompare((PetscObject)S, MATSEQAIJ, &flg));
4130 if (!flg) PetscCall(MatConvert(S, MATSEQAIJ, MAT_INITIAL_MATRIX, &Sm));
4131 else PetscCall(PetscObjectReference((PetscObject)S));
4132 PetscCall(MatSeqAIJGetArrayRead(Sm, &a));
4133 PetscCall(MatGetRowIJ(Sm, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, &jj, &flg));
4134 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot get IJ structure");
4135 if (nc <= (PetscInt)PETSC_STATIC_ARRAY_LENGTH(buf)) {
4136 bufc = buf;
4137 } else {
4138 PetscCall(PetscMalloc1(nc, &bufc));
4139 freeb = PETSC_TRUE;
4140 }
4141
4142 for (PetscInt i = 0; i < n; i++) {
4143 const PetscInt nci = ii[i + 1] - ii[i];
4144
4145 for (PetscInt j = 0; j < nci; j++) bufc[j] = cols[jj[ii[i] + j]];
4146 PetscCall(MatSetValues(A, 1, rows + i, nci, bufc, a + ii[i], imode));
4147 }
4148 PetscCall(MatRestoreRowIJ(Sm, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, &jj, &flg));
4149 PetscCall(MatSeqAIJRestoreArrayRead(Sm, &a));
4150 PetscCall(MatDestroy(&Sm));
4151 if (freeb) PetscCall(PetscFree(bufc));
4152 }
4153 PetscCall(MatAssemblyBegin(A, MAT_FLUSH_ASSEMBLY));
4154 PetscCall(MatAssemblyEnd(A, MAT_FLUSH_ASSEMBLY));
4155 PetscFunctionReturn(PETSC_SUCCESS);
4156 }
4157
MatCreateSeqAIJFromDenseExpand(Mat D,PetscInt n,const PetscInt j[],Mat * mat)4158 static PetscErrorCode MatCreateSeqAIJFromDenseExpand(Mat D, PetscInt n, const PetscInt j[], Mat *mat)
4159 {
4160 Mat_SeqAIJ *aij;
4161 PetscInt *ii, *jj;
4162 PetscScalar *aa;
4163 PetscInt nnz = 0, m, nc;
4164 const PetscScalar *a;
4165 const PetscScalar zero = 0.0;
4166
4167 PetscFunctionBegin;
4168 PetscCall(MatGetLocalSize(D, &m, &nc));
4169 PetscCall(MatDenseGetArrayRead(D, &a));
4170 PetscCall(PetscMalloc1(m + 1, &ii));
4171 PetscCall(PetscMalloc1(m * nc, &jj));
4172 PetscCall(PetscMalloc1(m * nc, &aa));
4173 ii[0] = 0;
4174 for (PetscInt k = 0; k < m; k++) {
4175 for (PetscInt s = 0; s < nc; s++) {
4176 const PetscInt c = s + k * nc;
4177 const PetscScalar v = a[k + s * m];
4178
4179 if (PetscUnlikely(j[c] < 0 || v == zero)) continue;
4180 jj[nnz] = j[c];
4181 aa[nnz] = a[k + s * m];
4182 nnz++;
4183 }
4184 ii[k + 1] = nnz;
4185 }
4186
4187 PetscCall(MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)D), m, n, ii, jj, aa, mat));
4188 PetscCall(MatDenseRestoreArrayRead(D, &a));
4189
4190 aij = (Mat_SeqAIJ *)(*mat)->data;
4191 aij->free_a = PETSC_TRUE;
4192 aij->free_ij = PETSC_TRUE;
4193 PetscFunctionReturn(PETSC_SUCCESS);
4194 }
4195
4196 /* adapted from MatInvertVariableBlockDiagonal_SeqAIJ */
MatSeqAIJInvertVariableBlockDiagonalMat(Mat A,PetscInt nblocks,const PetscInt * bsizes,Mat * B)4197 static PetscErrorCode MatSeqAIJInvertVariableBlockDiagonalMat(Mat A, PetscInt nblocks, const PetscInt *bsizes, Mat *B)
4198 {
4199 PetscInt n = A->rmap->n, ncnt = 0, ncnt2 = 0, bsizemax = 0, *v_pivots = NULL;
4200 const PetscBool allowzeropivot = PETSC_FALSE;
4201 PetscBool zeropivotdetected = PETSC_FALSE;
4202 const PetscReal shift = 0.0;
4203 PetscInt ipvt[5], *ii, *jj, *indi, *indj;
4204 PetscScalar work[25], *v_work = NULL, *aa, *diag;
4205 PetscLogDouble flops = 0.0;
4206
4207 PetscFunctionBegin;
4208 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Not for rectangular matrices");
4209 for (PetscInt i = 0; i < nblocks; i++) {
4210 ncnt += bsizes[i];
4211 ncnt2 += PetscSqr(bsizes[i]);
4212 }
4213 PetscCheck(ncnt == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Total blocksizes %" PetscInt_FMT " doesn't match number matrix rows %" PetscInt_FMT, ncnt, n);
4214 for (PetscInt i = 0; i < nblocks; i++) bsizemax = PetscMax(bsizemax, bsizes[i]);
4215 if (bsizemax > 7) PetscCall(PetscMalloc2(bsizemax, &v_work, bsizemax, &v_pivots));
4216
4217 PetscCall(PetscMalloc1(n + 1, &ii));
4218 PetscCall(PetscMalloc1(ncnt2, &jj));
4219 PetscCall(PetscCalloc1(ncnt2, &aa));
4220
4221 ncnt = 0;
4222 ii[0] = 0;
4223 indi = ii;
4224 indj = jj;
4225 diag = aa;
4226 for (PetscInt i = 0; i < nblocks; i++) {
4227 const PetscInt bs = bsizes[i];
4228
4229 for (PetscInt k = 0; k < bs; k++) {
4230 indi[k + 1] = indi[k] + bs;
4231 for (PetscInt j = 0; j < bs; j++) indj[k * bs + j] = ncnt + j;
4232 }
4233 PetscCall(MatGetValues(A, bs, indj, bs, indj, diag));
4234 switch (bs) {
4235 case 1:
4236 *diag = 1.0 / (*diag);
4237 break;
4238 case 2:
4239 PetscCall(PetscKernel_A_gets_inverse_A_2(diag, shift, allowzeropivot, &zeropivotdetected));
4240 break;
4241 case 3:
4242 PetscCall(PetscKernel_A_gets_inverse_A_3(diag, shift, allowzeropivot, &zeropivotdetected));
4243 break;
4244 case 4:
4245 PetscCall(PetscKernel_A_gets_inverse_A_4(diag, shift, allowzeropivot, &zeropivotdetected));
4246 break;
4247 case 5:
4248 PetscCall(PetscKernel_A_gets_inverse_A_5(diag, ipvt, work, shift, allowzeropivot, &zeropivotdetected));
4249 break;
4250 case 6:
4251 PetscCall(PetscKernel_A_gets_inverse_A_6(diag, shift, allowzeropivot, &zeropivotdetected));
4252 break;
4253 case 7:
4254 PetscCall(PetscKernel_A_gets_inverse_A_7(diag, shift, allowzeropivot, &zeropivotdetected));
4255 break;
4256 default:
4257 PetscCall(PetscKernel_A_gets_inverse_A(bs, diag, v_pivots, v_work, allowzeropivot, &zeropivotdetected));
4258 }
4259 ncnt += bs;
4260 flops += 2.0 * PetscPowInt(bs, 3) / 3.0;
4261 diag += bs * bs;
4262 indj += bs * bs;
4263 indi += bs;
4264 }
4265 PetscCall(PetscLogFlops(flops));
4266 PetscCall(PetscFree2(v_work, v_pivots));
4267 PetscCall(MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A), n, n, ii, jj, aa, B));
4268 {
4269 Mat_SeqAIJ *aij = (Mat_SeqAIJ *)(*B)->data;
4270 aij->free_a = PETSC_TRUE;
4271 aij->free_ij = PETSC_TRUE;
4272 }
4273 PetscFunctionReturn(PETSC_SUCCESS);
4274 }
4275
PCBDDCSetUpCorrection(PC pc,Mat * coarse_submat)4276 PetscErrorCode PCBDDCSetUpCorrection(PC pc, Mat *coarse_submat)
4277 {
4278 PC_IS *pcis = (PC_IS *)pc->data;
4279 PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
4280 PCBDDCGraph graph = pcbddc->mat_graph;
4281 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
4282 /* submatrices of local problem */
4283 Mat A_RV = NULL, A_VR, A_VV, local_auxmat2_R = NULL;
4284 /* submatrices of local coarse problem */
4285 Mat S_CV = NULL, S_VC = NULL, S_CC = NULL;
4286 /* working matrices */
4287 Mat C_CR;
4288
4289 /* additional working stuff */
4290 PC pc_R;
4291 IS is_R, is_V, is_C;
4292 const PetscInt *idx_V, *idx_C;
4293 Mat F, Brhs = NULL;
4294 Vec dummy_vec;
4295 PetscBool isPreonly, isLU, isCHOL, need_benign_correction, sparserhs;
4296 PetscInt *idx_V_B;
4297 PetscInt lda_rhs, n_vertices, n_constraints, *p0_lidx_I;
4298 PetscInt n_eff_vertices, n_eff_constraints;
4299 PetscInt i, n_R, n_D, n_B;
4300 PetscScalar one = 1.0, m_one = -1.0;
4301
4302 /* Multi-element support */
4303 PetscBool multi_element = graph->multi_element;
4304 PetscInt *V_to_eff_V = NULL, *C_to_eff_C = NULL;
4305 PetscInt *B_eff_V_J = NULL, *R_eff_V_J = NULL, *B_eff_C_J = NULL, *R_eff_C_J = NULL;
4306 IS is_C_perm = NULL;
4307 PetscInt n_C_bss = 0, *C_bss = NULL;
4308 Mat coarse_phi_multi;
4309
4310 PetscFunctionBegin;
4311 PetscCheck(pcbddc->symmetric_primal || !pcbddc->benign_n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Non-symmetric primal basis computation with benign trick not yet implemented");
4312 PetscCall(PetscLogEventBegin(PC_BDDC_CorrectionSetUp[pcbddc->current_level], pc, 0, 0, 0));
4313
4314 /* Set Non-overlapping dimensions */
4315 n_vertices = pcbddc->n_vertices;
4316 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices;
4317 n_B = pcis->n_B;
4318 n_D = pcis->n - n_B;
4319 n_R = pcis->n - n_vertices;
4320
4321 /* vertices in boundary numbering */
4322 PetscCall(PetscMalloc1(n_vertices, &idx_V_B));
4323 PetscCall(ISGlobalToLocalMappingApply(pcis->BtoNmap, IS_GTOLM_DROP, n_vertices, pcbddc->local_primal_ref_node, &i, idx_V_B));
4324 PetscCheck(i == n_vertices, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error in boundary numbering for BDDC vertices! %" PetscInt_FMT " != %" PetscInt_FMT, n_vertices, i);
4325
4326 /* these two cases still need to be optimized */
4327 if (pcbddc->benign_saddle_point || !pcbddc->symmetric_primal) multi_element = PETSC_FALSE;
4328
4329 /* Subdomain contribution (Non-overlapping) to coarse matrix */
4330 if (multi_element) {
4331 PetscCheck(!pcbddc->benign_n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not yet implemented");
4332
4333 PetscCall(MatCreate(PETSC_COMM_SELF, coarse_submat));
4334 PetscCall(MatSetSizes(*coarse_submat, pcbddc->local_primal_size, pcbddc->local_primal_size, pcbddc->local_primal_size, pcbddc->local_primal_size));
4335 PetscCall(MatSetType(*coarse_submat, MATSEQAIJ));
4336 PetscCall(MatSetOption(*coarse_submat, MAT_IGNORE_ZERO_ENTRIES, PETSC_TRUE));
4337 PetscCall(MatSetOption(*coarse_submat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
4338
4339 /* group vertices and constraints by subdomain id */
4340 const PetscInt *vidxs = pcbddc->primal_indices_local_idxs;
4341 const PetscInt *cidxs = pcbddc->primal_indices_local_idxs + n_vertices;
4342 PetscInt *count_eff, *V_eff_to_V, *C_eff_to_C, *nnz;
4343 PetscInt n_el = PetscMax(graph->n_local_subs, 1);
4344
4345 PetscCall(PetscCalloc1(2 * n_el, &count_eff));
4346 PetscCall(PetscMalloc1(n_vertices, &V_to_eff_V));
4347 PetscCall(PetscMalloc1(n_constraints, &C_to_eff_C));
4348 for (PetscInt i = 0; i < n_vertices; i++) {
4349 PetscInt s = 2 * graph->nodes[vidxs[i]].local_sub;
4350
4351 V_to_eff_V[i] = count_eff[s];
4352 count_eff[s] += 1;
4353 }
4354 for (PetscInt i = 0; i < n_constraints; i++) {
4355 PetscInt s = 2 * graph->nodes[cidxs[i]].local_sub + 1;
4356
4357 C_to_eff_C[i] = count_eff[s];
4358 count_eff[s] += 1;
4359 }
4360
4361 /* preallocation */
4362 PetscCall(PetscMalloc1(n_vertices + n_constraints, &nnz));
4363 for (PetscInt i = 0; i < n_vertices; i++) {
4364 PetscInt s = 2 * graph->nodes[vidxs[i]].local_sub;
4365
4366 nnz[i] = count_eff[s] + count_eff[s + 1];
4367 }
4368 for (PetscInt i = 0; i < n_constraints; i++) {
4369 PetscInt s = 2 * graph->nodes[cidxs[i]].local_sub;
4370
4371 nnz[i + n_vertices] = count_eff[s] + count_eff[s + 1];
4372 }
4373 PetscCall(MatSeqAIJSetPreallocation(*coarse_submat, 0, nnz));
4374 PetscCall(PetscFree(nnz));
4375
4376 n_eff_vertices = 0;
4377 n_eff_constraints = 0;
4378 for (PetscInt i = 0; i < n_el; i++) {
4379 n_eff_vertices = PetscMax(n_eff_vertices, count_eff[2 * i]);
4380 n_eff_constraints = PetscMax(n_eff_constraints, count_eff[2 * i + 1]);
4381 count_eff[2 * i] = 0;
4382 count_eff[2 * i + 1] = 0;
4383 }
4384
4385 const PetscInt *idx;
4386 PetscCall(PetscMalloc2(n_el * n_eff_vertices, &V_eff_to_V, n_el * n_eff_constraints, &C_eff_to_C));
4387
4388 for (PetscInt i = 0; i < n_vertices; i++) {
4389 const PetscInt e = graph->nodes[vidxs[i]].local_sub;
4390 const PetscInt s = 2 * e;
4391
4392 V_eff_to_V[e * n_eff_vertices + count_eff[s]] = i;
4393 count_eff[s] += 1;
4394 }
4395 for (PetscInt i = 0; i < n_constraints; i++) {
4396 const PetscInt e = graph->nodes[cidxs[i]].local_sub;
4397 const PetscInt s = 2 * e + 1;
4398
4399 C_eff_to_C[e * n_eff_constraints + count_eff[s]] = i;
4400 count_eff[s] += 1;
4401 }
4402
4403 PetscCall(PetscMalloc1(n_R * n_eff_vertices, &R_eff_V_J));
4404 PetscCall(PetscMalloc1(n_R * n_eff_constraints, &R_eff_C_J));
4405 PetscCall(PetscMalloc1(n_B * n_eff_vertices, &B_eff_V_J));
4406 PetscCall(PetscMalloc1(n_B * n_eff_constraints, &B_eff_C_J));
4407 for (PetscInt i = 0; i < n_R * n_eff_vertices; i++) R_eff_V_J[i] = -1;
4408 for (PetscInt i = 0; i < n_R * n_eff_constraints; i++) R_eff_C_J[i] = -1;
4409 for (PetscInt i = 0; i < n_B * n_eff_vertices; i++) B_eff_V_J[i] = -1;
4410 for (PetscInt i = 0; i < n_B * n_eff_constraints; i++) B_eff_C_J[i] = -1;
4411
4412 PetscCall(ISGetIndices(pcbddc->is_R_local, &idx));
4413 for (PetscInt i = 0; i < n_R; i++) {
4414 const PetscInt e = graph->nodes[idx[i]].local_sub;
4415 const PetscInt s = 2 * e;
4416 PetscInt j;
4417
4418 for (j = 0; j < count_eff[s]; j++) R_eff_V_J[i * n_eff_vertices + j] = V_eff_to_V[e * n_eff_vertices + j];
4419 for (j = 0; j < count_eff[s + 1]; j++) R_eff_C_J[i * n_eff_constraints + j] = C_eff_to_C[e * n_eff_constraints + j];
4420 }
4421 PetscCall(ISRestoreIndices(pcbddc->is_R_local, &idx));
4422 PetscCall(ISGetIndices(pcis->is_B_local, &idx));
4423 for (PetscInt i = 0; i < n_B; i++) {
4424 const PetscInt e = graph->nodes[idx[i]].local_sub;
4425 const PetscInt s = 2 * e;
4426 PetscInt j;
4427
4428 for (j = 0; j < count_eff[s]; j++) B_eff_V_J[i * n_eff_vertices + j] = V_eff_to_V[e * n_eff_vertices + j];
4429 for (j = 0; j < count_eff[s + 1]; j++) B_eff_C_J[i * n_eff_constraints + j] = C_eff_to_C[e * n_eff_constraints + j];
4430 }
4431 PetscCall(ISRestoreIndices(pcis->is_B_local, &idx));
4432
4433 /* permutation and blocksizes for block invert of S_CC */
4434 PetscInt *idxp;
4435
4436 PetscCall(PetscMalloc1(n_constraints, &idxp));
4437 PetscCall(PetscMalloc1(n_el, &C_bss));
4438 n_C_bss = 0;
4439 for (PetscInt e = 0, cnt = 0; e < n_el; e++) {
4440 const PetscInt nc = count_eff[2 * e + 1];
4441
4442 if (nc) C_bss[n_C_bss++] = nc;
4443 for (PetscInt c = 0; c < nc; c++) idxp[cnt + c] = C_eff_to_C[e * n_eff_constraints + c];
4444 cnt += nc;
4445 }
4446
4447 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n_constraints, idxp, PETSC_OWN_POINTER, &is_C_perm));
4448
4449 PetscCall(PetscFree2(V_eff_to_V, C_eff_to_C));
4450 PetscCall(PetscFree(count_eff));
4451 } else {
4452 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, pcbddc->local_primal_size, pcbddc->local_primal_size, NULL, coarse_submat));
4453 n_eff_constraints = n_constraints;
4454 n_eff_vertices = n_vertices;
4455 }
4456
4457 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */
4458 PetscCall(KSPGetPC(pcbddc->ksp_R, &pc_R));
4459 PetscCall(PCSetUp(pc_R));
4460 PetscCall(PetscObjectTypeCompare((PetscObject)pcbddc->ksp_R, KSPPREONLY, &isPreonly));
4461 PetscCall(PetscObjectTypeCompare((PetscObject)pc_R, PCLU, &isLU));
4462 PetscCall(PetscObjectTypeCompare((PetscObject)pc_R, PCCHOLESKY, &isCHOL));
4463 lda_rhs = n_R;
4464 need_benign_correction = PETSC_FALSE;
4465 F = NULL;
4466 if (isPreonly && (isLU || isCHOL)) {
4467 PetscCall(PCFactorGetMatrix(pc_R, &F));
4468 } else if (sub_schurs && sub_schurs->reuse_solver) {
4469 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
4470 MatFactorType type;
4471
4472 F = reuse_solver->F;
4473 PetscCall(MatGetFactorType(F, &type));
4474 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE;
4475 if (type == MAT_FACTOR_LU) isLU = PETSC_TRUE;
4476 PetscCall(MatGetSize(F, &lda_rhs, NULL));
4477 need_benign_correction = (PetscBool)(!!reuse_solver->benign_n);
4478 }
4479
4480 /* determine if we can use a sparse right-hand side */
4481 sparserhs = PETSC_FALSE;
4482 if (F && !multi_element) {
4483 MatSolverType solver;
4484
4485 PetscCall(MatFactorGetSolverType(F, &solver));
4486 PetscCall(PetscStrcmp(solver, MATSOLVERMUMPS, &sparserhs));
4487 }
4488
4489 /* create dummy vector to modify rhs and sol of MatMatSolve (work array will never be used) */
4490 dummy_vec = NULL;
4491 if (need_benign_correction && lda_rhs != n_R && F) {
4492 PetscCall(VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N), &dummy_vec));
4493 PetscCall(VecSetSizes(dummy_vec, lda_rhs, PETSC_DECIDE));
4494 PetscCall(VecSetType(dummy_vec, ((PetscObject)pcis->vec1_N)->type_name));
4495 }
4496
4497 PetscCall(MatDestroy(&pcbddc->local_auxmat1));
4498 PetscCall(MatDestroy(&pcbddc->local_auxmat2));
4499
4500 PetscCall(ISCreateStride(PETSC_COMM_SELF, n_R, 0, 1, &is_R));
4501 PetscCall(ISCreateStride(PETSC_COMM_SELF, n_vertices, 0, 1, &is_V));
4502 PetscCall(ISCreateStride(PETSC_COMM_SELF, n_constraints, n_vertices, 1, &is_C));
4503 PetscCall(ISGetIndices(is_V, &idx_V));
4504 PetscCall(ISGetIndices(is_C, &idx_C));
4505
4506 /* Precompute stuffs needed for preprocessing and application of BDDC*/
4507 if (n_constraints) {
4508 Mat C_B;
4509
4510 /* Extract constraints on R nodes: C_{CR} */
4511 PetscCall(MatCreateSubMatrix(pcbddc->ConstraintMatrix, is_C, pcbddc->is_R_local, MAT_INITIAL_MATRIX, &C_CR));
4512 PetscCall(MatCreateSubMatrix(pcbddc->ConstraintMatrix, is_C, pcis->is_B_local, MAT_INITIAL_MATRIX, &C_B));
4513
4514 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */
4515 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */
4516 if (!sparserhs) {
4517 PetscScalar *marr;
4518
4519 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, lda_rhs, n_eff_constraints, NULL, &Brhs));
4520 PetscCall(MatDenseGetArrayWrite(Brhs, &marr));
4521 for (i = 0; i < n_constraints; i++) {
4522 const PetscScalar *row_cmat_values;
4523 const PetscInt *row_cmat_indices;
4524 PetscInt size_of_constraint, j, col = C_to_eff_C ? C_to_eff_C[i] : i;
4525
4526 PetscCall(MatGetRow(C_CR, i, &size_of_constraint, &row_cmat_indices, &row_cmat_values));
4527 for (j = 0; j < size_of_constraint; j++) marr[row_cmat_indices[j] + col * lda_rhs] = -row_cmat_values[j];
4528 PetscCall(MatRestoreRow(C_CR, i, &size_of_constraint, &row_cmat_indices, &row_cmat_values));
4529 }
4530 PetscCall(MatDenseRestoreArrayWrite(Brhs, &marr));
4531 } else {
4532 Mat tC_CR;
4533
4534 PetscCall(MatScale(C_CR, -1.0));
4535 if (lda_rhs != n_R) {
4536 PetscScalar *aa;
4537 PetscInt r, *ii, *jj;
4538 PetscBool done;
4539
4540 PetscCall(MatGetRowIJ(C_CR, 0, PETSC_FALSE, PETSC_FALSE, &r, (const PetscInt **)&ii, (const PetscInt **)&jj, &done));
4541 PetscCheck(done, PETSC_COMM_SELF, PETSC_ERR_PLIB, "GetRowIJ failed");
4542 PetscCall(MatSeqAIJGetArray(C_CR, &aa));
4543 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, n_constraints, lda_rhs, ii, jj, aa, &tC_CR));
4544 PetscCall(MatRestoreRowIJ(C_CR, 0, PETSC_FALSE, PETSC_FALSE, &r, (const PetscInt **)&ii, (const PetscInt **)&jj, &done));
4545 PetscCheck(done, PETSC_COMM_SELF, PETSC_ERR_PLIB, "RestoreRowIJ failed");
4546 } else {
4547 PetscCall(PetscObjectReference((PetscObject)C_CR));
4548 tC_CR = C_CR;
4549 }
4550 PetscCall(MatCreateTranspose(tC_CR, &Brhs));
4551 PetscCall(MatDestroy(&tC_CR));
4552 }
4553 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, lda_rhs, n_eff_constraints, NULL, &local_auxmat2_R));
4554 if (F) {
4555 if (need_benign_correction) {
4556 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
4557
4558 /* rhs is already zero on interior dofs, no need to change the rhs */
4559 PetscCall(PetscArrayzero(reuse_solver->benign_save_vals, pcbddc->benign_n));
4560 }
4561 PetscCall(MatMatSolve(F, Brhs, local_auxmat2_R));
4562 if (need_benign_correction) {
4563 PetscScalar *marr;
4564 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
4565
4566 /* XXX multi_element? */
4567 PetscCall(MatDenseGetArray(local_auxmat2_R, &marr));
4568 if (lda_rhs != n_R) {
4569 for (i = 0; i < n_eff_constraints; i++) {
4570 PetscCall(VecPlaceArray(dummy_vec, marr + i * lda_rhs));
4571 PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver, dummy_vec, NULL, PETSC_TRUE, PETSC_TRUE));
4572 PetscCall(VecResetArray(dummy_vec));
4573 }
4574 } else {
4575 for (i = 0; i < n_eff_constraints; i++) {
4576 PetscCall(VecPlaceArray(pcbddc->vec1_R, marr + i * lda_rhs));
4577 PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver, pcbddc->vec1_R, NULL, PETSC_TRUE, PETSC_TRUE));
4578 PetscCall(VecResetArray(pcbddc->vec1_R));
4579 }
4580 }
4581 PetscCall(MatDenseRestoreArray(local_auxmat2_R, &marr));
4582 }
4583 } else {
4584 const PetscScalar *barr;
4585 PetscScalar *marr;
4586
4587 PetscCall(MatDenseGetArrayRead(Brhs, &barr));
4588 PetscCall(MatDenseGetArray(local_auxmat2_R, &marr));
4589 for (i = 0; i < n_eff_constraints; i++) {
4590 PetscCall(VecPlaceArray(pcbddc->vec1_R, barr + i * lda_rhs));
4591 PetscCall(VecPlaceArray(pcbddc->vec2_R, marr + i * lda_rhs));
4592 PetscCall(KSPSolve(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec2_R));
4593 PetscCall(KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec2_R));
4594 PetscCall(VecResetArray(pcbddc->vec1_R));
4595 PetscCall(VecResetArray(pcbddc->vec2_R));
4596 }
4597 PetscCall(MatDenseRestoreArrayRead(Brhs, &barr));
4598 PetscCall(MatDenseRestoreArray(local_auxmat2_R, &marr));
4599 }
4600 if (sparserhs) PetscCall(MatScale(C_CR, -1.0));
4601 PetscCall(MatDestroy(&Brhs));
4602 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR})^{-1} */
4603 if (!pcbddc->switch_static) {
4604 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_B, n_eff_constraints, NULL, &pcbddc->local_auxmat2));
4605 PetscCall(MatDenseScatter_Private(pcbddc->R_to_B, local_auxmat2_R, pcbddc->local_auxmat2, INSERT_VALUES, SCATTER_FORWARD));
4606 if (multi_element) {
4607 Mat T;
4608
4609 PetscCall(MatCreateSeqAIJFromDenseExpand(local_auxmat2_R, n_constraints, R_eff_C_J, &T));
4610 PetscCall(MatDestroy(&local_auxmat2_R));
4611 local_auxmat2_R = T;
4612 PetscCall(MatCreateSeqAIJFromDenseExpand(pcbddc->local_auxmat2, n_constraints, B_eff_C_J, &T));
4613 PetscCall(MatDestroy(&pcbddc->local_auxmat2));
4614 pcbddc->local_auxmat2 = T;
4615 }
4616 PetscCall(MatMatMult(C_B, pcbddc->local_auxmat2, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &S_CC));
4617 } else {
4618 if (multi_element) {
4619 Mat T;
4620
4621 PetscCall(MatCreateSeqAIJFromDenseExpand(local_auxmat2_R, n_constraints, R_eff_C_J, &T));
4622 PetscCall(MatDestroy(&local_auxmat2_R));
4623 local_auxmat2_R = T;
4624 }
4625 if (lda_rhs != n_R) {
4626 PetscCall(MatCreateSubMatrix(local_auxmat2_R, is_R, NULL, MAT_INITIAL_MATRIX, &pcbddc->local_auxmat2));
4627 } else {
4628 PetscCall(PetscObjectReference((PetscObject)local_auxmat2_R));
4629 pcbddc->local_auxmat2 = local_auxmat2_R;
4630 }
4631 PetscCall(MatMatMult(C_CR, pcbddc->local_auxmat2, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &S_CC));
4632 }
4633 PetscCall(MatScale(S_CC, m_one));
4634 if (multi_element) {
4635 Mat T, T2;
4636 IS isp, ispi;
4637
4638 isp = is_C_perm;
4639
4640 PetscCall(ISInvertPermutation(isp, PETSC_DECIDE, &ispi));
4641 PetscCall(MatPermute(S_CC, isp, isp, &T));
4642 PetscCall(MatSeqAIJInvertVariableBlockDiagonalMat(T, n_C_bss, C_bss, &T2));
4643 PetscCall(MatDestroy(&T));
4644 PetscCall(MatDestroy(&S_CC));
4645 PetscCall(MatPermute(T2, ispi, ispi, &S_CC));
4646 PetscCall(MatDestroy(&T2));
4647 PetscCall(ISDestroy(&ispi));
4648 } else {
4649 if (isCHOL) {
4650 PetscCall(MatCholeskyFactor(S_CC, NULL, NULL));
4651 } else {
4652 PetscCall(MatLUFactor(S_CC, NULL, NULL, NULL));
4653 }
4654 PetscCall(MatSeqDenseInvertFactors_Private(S_CC));
4655 }
4656 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */
4657 PetscCall(MatMatMult(S_CC, C_B, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &pcbddc->local_auxmat1));
4658 PetscCall(MatDestroy(&C_B));
4659 PetscCall(MatSetValuesSubMat(*coarse_submat, S_CC, n_constraints, idx_C, n_constraints, idx_C, INSERT_VALUES));
4660 }
4661
4662 /* Get submatrices from subdomain matrix */
4663 if (n_vertices) {
4664 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
4665 PetscBool oldpin;
4666 #endif
4667 IS is_aux;
4668
4669 if (sub_schurs && sub_schurs->reuse_solver) { /* is_R_local is not sorted, ISComplement doesn't like it */
4670 IS tis;
4671
4672 PetscCall(ISDuplicate(pcbddc->is_R_local, &tis));
4673 PetscCall(ISSort(tis));
4674 PetscCall(ISComplement(tis, 0, pcis->n, &is_aux));
4675 PetscCall(ISDestroy(&tis));
4676 } else {
4677 PetscCall(ISComplement(pcbddc->is_R_local, 0, pcis->n, &is_aux));
4678 }
4679 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
4680 oldpin = pcbddc->local_mat->boundtocpu;
4681 #endif
4682 PetscCall(MatBindToCPU(pcbddc->local_mat, PETSC_TRUE));
4683 PetscCall(MatCreateSubMatrix(pcbddc->local_mat, pcbddc->is_R_local, is_aux, MAT_INITIAL_MATRIX, &A_RV));
4684 PetscCall(MatCreateSubMatrix(pcbddc->local_mat, is_aux, pcbddc->is_R_local, MAT_INITIAL_MATRIX, &A_VR));
4685 /* TODO REMOVE: MatMatMult(A_VR,A_RRmA_RV) below may raise an error */
4686 PetscCall(MatConvert(A_VR, MATSEQAIJ, MAT_INPLACE_MATRIX, &A_VR));
4687 PetscCall(MatCreateSubMatrix(pcbddc->local_mat, is_aux, is_aux, MAT_INITIAL_MATRIX, &A_VV));
4688 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
4689 PetscCall(MatBindToCPU(pcbddc->local_mat, oldpin));
4690 #endif
4691 PetscCall(ISDestroy(&is_aux));
4692 }
4693 PetscCall(ISDestroy(&is_C_perm));
4694 PetscCall(PetscFree(C_bss));
4695
4696 p0_lidx_I = NULL;
4697 if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) {
4698 const PetscInt *idxs;
4699
4700 PetscCall(ISGetIndices(pcis->is_I_local, &idxs));
4701 PetscCall(PetscMalloc1(pcbddc->benign_n, &p0_lidx_I));
4702 for (i = 0; i < pcbddc->benign_n; i++) PetscCall(PetscFindInt(pcbddc->benign_p0_lidx[i], pcis->n - pcis->n_B, idxs, &p0_lidx_I[i]));
4703 PetscCall(ISRestoreIndices(pcis->is_I_local, &idxs));
4704 }
4705
4706 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */
4707
4708 /* Matrices of coarse basis functions (local) */
4709 PetscCall(MatDestroy(&pcbddc->coarse_phi_B));
4710 PetscCall(MatDestroy(&pcbddc->coarse_psi_B));
4711 PetscCall(MatDestroy(&pcbddc->coarse_phi_D));
4712 PetscCall(MatDestroy(&pcbddc->coarse_psi_D));
4713 if (!multi_element) {
4714 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_B, pcbddc->local_primal_size, NULL, &pcbddc->coarse_phi_B));
4715 if (pcbddc->switch_static || pcbddc->dbg_flag) PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_D, pcbddc->local_primal_size, NULL, &pcbddc->coarse_phi_D));
4716 coarse_phi_multi = NULL;
4717 } else { /* Create temporary NEST matrix to hold coarse basis functions blocks */
4718 IS is_rows[2] = {pcbddc->is_R_local, NULL};
4719 IS is_cols[2] = {is_V, is_C};
4720
4721 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n_vertices, pcbddc->local_primal_ref_node, PETSC_USE_POINTER, &is_rows[1]));
4722 PetscCall(MatCreateNest(PETSC_COMM_SELF, 2, is_rows, 2, is_cols, NULL, &coarse_phi_multi));
4723 PetscCall(ISDestroy(&is_rows[1]));
4724 }
4725
4726 /* vertices */
4727 if (n_vertices) {
4728 PetscBool restoreavr = PETSC_FALSE;
4729 Mat A_RRmA_RV = NULL;
4730
4731 PetscCall(MatSetValuesSubMat(*coarse_submat, A_VV, n_vertices, idx_V, n_vertices, idx_V, ADD_VALUES));
4732 PetscCall(MatDestroy(&A_VV));
4733
4734 if (n_R) {
4735 Mat A_RV_bcorr = NULL, S_VV;
4736
4737 PetscCall(MatScale(A_RV, m_one));
4738 if (need_benign_correction) {
4739 ISLocalToGlobalMapping RtoN;
4740 IS is_p0;
4741 PetscInt *idxs_p0, n;
4742
4743 PetscCall(PetscMalloc1(pcbddc->benign_n, &idxs_p0));
4744 PetscCall(ISLocalToGlobalMappingCreateIS(pcbddc->is_R_local, &RtoN));
4745 PetscCall(ISGlobalToLocalMappingApply(RtoN, IS_GTOLM_DROP, pcbddc->benign_n, pcbddc->benign_p0_lidx, &n, idxs_p0));
4746 PetscCheck(n == pcbddc->benign_n, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error in R numbering for benign p0! %" PetscInt_FMT " != %" PetscInt_FMT, n, pcbddc->benign_n);
4747 PetscCall(ISLocalToGlobalMappingDestroy(&RtoN));
4748 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idxs_p0, PETSC_OWN_POINTER, &is_p0));
4749 PetscCall(MatCreateSubMatrix(A_RV, is_p0, NULL, MAT_INITIAL_MATRIX, &A_RV_bcorr));
4750 PetscCall(ISDestroy(&is_p0));
4751 }
4752
4753 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, lda_rhs, n_eff_vertices, NULL, &A_RRmA_RV));
4754 if (!sparserhs || need_benign_correction) {
4755 if (lda_rhs == n_R && !multi_element) {
4756 PetscCall(MatConvert(A_RV, MATDENSE, MAT_INPLACE_MATRIX, &A_RV));
4757 } else {
4758 Mat T;
4759 PetscScalar *av, *array;
4760 const PetscInt *xadj, *adjncy;
4761 PetscInt n;
4762 PetscBool flg_row;
4763
4764 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, lda_rhs, n_eff_vertices, NULL, &T));
4765 PetscCall(MatDenseGetArrayWrite(T, &array));
4766 PetscCall(MatConvert(A_RV, MATSEQAIJ, MAT_INPLACE_MATRIX, &A_RV));
4767 PetscCall(MatGetRowIJ(A_RV, 0, PETSC_FALSE, PETSC_FALSE, &n, &xadj, &adjncy, &flg_row));
4768 PetscCall(MatSeqAIJGetArray(A_RV, &av));
4769 for (i = 0; i < n; i++) {
4770 PetscInt j;
4771 for (j = xadj[i]; j < xadj[i + 1]; j++) array[lda_rhs * (V_to_eff_V ? V_to_eff_V[adjncy[j]] : adjncy[j]) + i] = av[j];
4772 }
4773 PetscCall(MatRestoreRowIJ(A_RV, 0, PETSC_FALSE, PETSC_FALSE, &n, &xadj, &adjncy, &flg_row));
4774 PetscCall(MatDenseRestoreArrayWrite(T, &array));
4775 PetscCall(MatDestroy(&A_RV));
4776 A_RV = T;
4777 }
4778 if (need_benign_correction) {
4779 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
4780 PetscScalar *marr;
4781
4782 /* XXX multi_element */
4783 PetscCall(MatDenseGetArray(A_RV, &marr));
4784 /* need \Phi^T A_RV = (I+L)A_RV, L given by
4785
4786 | 0 0 0 | (V)
4787 L = | 0 0 -1 | (P-p0)
4788 | 0 0 -1 | (p0)
4789
4790 */
4791 for (i = 0; i < reuse_solver->benign_n; i++) {
4792 const PetscScalar *vals;
4793 const PetscInt *idxs, *idxs_zero;
4794 PetscInt n, j, nz;
4795
4796 PetscCall(ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i], &nz));
4797 PetscCall(ISGetIndices(reuse_solver->benign_zerodiag_subs[i], &idxs_zero));
4798 PetscCall(MatGetRow(A_RV_bcorr, i, &n, &idxs, &vals));
4799 for (j = 0; j < n; j++) {
4800 PetscScalar val = vals[j];
4801 PetscInt k, col = idxs[j];
4802 for (k = 0; k < nz; k++) marr[idxs_zero[k] + lda_rhs * col] -= val;
4803 }
4804 PetscCall(MatRestoreRow(A_RV_bcorr, i, &n, &idxs, &vals));
4805 PetscCall(ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i], &idxs_zero));
4806 }
4807 PetscCall(MatDenseRestoreArray(A_RV, &marr));
4808 }
4809 PetscCall(PetscObjectReference((PetscObject)A_RV));
4810 Brhs = A_RV;
4811 } else {
4812 Mat tA_RVT, A_RVT;
4813
4814 if (!pcbddc->symmetric_primal) {
4815 /* A_RV already scaled by -1 */
4816 PetscCall(MatTranspose(A_RV, MAT_INITIAL_MATRIX, &A_RVT));
4817 } else {
4818 restoreavr = PETSC_TRUE;
4819 PetscCall(MatScale(A_VR, -1.0));
4820 PetscCall(PetscObjectReference((PetscObject)A_VR));
4821 A_RVT = A_VR;
4822 }
4823 if (lda_rhs != n_R) {
4824 PetscScalar *aa;
4825 PetscInt r, *ii, *jj;
4826 PetscBool done;
4827
4828 PetscCall(MatGetRowIJ(A_RVT, 0, PETSC_FALSE, PETSC_FALSE, &r, (const PetscInt **)&ii, (const PetscInt **)&jj, &done));
4829 PetscCheck(done, PETSC_COMM_SELF, PETSC_ERR_PLIB, "GetRowIJ failed");
4830 PetscCall(MatSeqAIJGetArray(A_RVT, &aa));
4831 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, n_vertices, lda_rhs, ii, jj, aa, &tA_RVT));
4832 PetscCall(MatRestoreRowIJ(A_RVT, 0, PETSC_FALSE, PETSC_FALSE, &r, (const PetscInt **)&ii, (const PetscInt **)&jj, &done));
4833 PetscCheck(done, PETSC_COMM_SELF, PETSC_ERR_PLIB, "RestoreRowIJ failed");
4834 } else {
4835 PetscCall(PetscObjectReference((PetscObject)A_RVT));
4836 tA_RVT = A_RVT;
4837 }
4838 PetscCall(MatCreateTranspose(tA_RVT, &Brhs));
4839 PetscCall(MatDestroy(&tA_RVT));
4840 PetscCall(MatDestroy(&A_RVT));
4841 }
4842 if (F) {
4843 /* need to correct the rhs */
4844 if (need_benign_correction) {
4845 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
4846 PetscScalar *marr;
4847
4848 PetscCall(MatDenseGetArray(Brhs, &marr));
4849 if (lda_rhs != n_R) {
4850 for (i = 0; i < n_eff_vertices; i++) {
4851 PetscCall(VecPlaceArray(dummy_vec, marr + i * lda_rhs));
4852 PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver, dummy_vec, NULL, PETSC_FALSE, PETSC_TRUE));
4853 PetscCall(VecResetArray(dummy_vec));
4854 }
4855 } else {
4856 for (i = 0; i < n_eff_vertices; i++) {
4857 PetscCall(VecPlaceArray(pcbddc->vec1_R, marr + i * lda_rhs));
4858 PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver, pcbddc->vec1_R, NULL, PETSC_FALSE, PETSC_TRUE));
4859 PetscCall(VecResetArray(pcbddc->vec1_R));
4860 }
4861 }
4862 PetscCall(MatDenseRestoreArray(Brhs, &marr));
4863 }
4864 PetscCall(MatMatSolve(F, Brhs, A_RRmA_RV));
4865 if (restoreavr) PetscCall(MatScale(A_VR, -1.0));
4866 /* need to correct the solution */
4867 if (need_benign_correction) {
4868 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
4869 PetscScalar *marr;
4870
4871 PetscCall(MatDenseGetArray(A_RRmA_RV, &marr));
4872 if (lda_rhs != n_R) {
4873 for (i = 0; i < n_eff_vertices; i++) {
4874 PetscCall(VecPlaceArray(dummy_vec, marr + i * lda_rhs));
4875 PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver, dummy_vec, NULL, PETSC_TRUE, PETSC_TRUE));
4876 PetscCall(VecResetArray(dummy_vec));
4877 }
4878 } else {
4879 for (i = 0; i < n_eff_vertices; i++) {
4880 PetscCall(VecPlaceArray(pcbddc->vec1_R, marr + i * lda_rhs));
4881 PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver, pcbddc->vec1_R, NULL, PETSC_TRUE, PETSC_TRUE));
4882 PetscCall(VecResetArray(pcbddc->vec1_R));
4883 }
4884 }
4885 PetscCall(MatDenseRestoreArray(A_RRmA_RV, &marr));
4886 }
4887 } else {
4888 const PetscScalar *barr;
4889 PetscScalar *marr;
4890
4891 PetscCall(MatDenseGetArrayRead(Brhs, &barr));
4892 PetscCall(MatDenseGetArray(A_RRmA_RV, &marr));
4893 for (i = 0; i < n_eff_vertices; i++) {
4894 PetscCall(VecPlaceArray(pcbddc->vec1_R, barr + i * lda_rhs));
4895 PetscCall(VecPlaceArray(pcbddc->vec2_R, marr + i * lda_rhs));
4896 PetscCall(KSPSolve(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec2_R));
4897 PetscCall(KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec2_R));
4898 PetscCall(VecResetArray(pcbddc->vec1_R));
4899 PetscCall(VecResetArray(pcbddc->vec2_R));
4900 }
4901 PetscCall(MatDenseRestoreArrayRead(Brhs, &barr));
4902 PetscCall(MatDenseRestoreArray(A_RRmA_RV, &marr));
4903 }
4904 PetscCall(MatDestroy(&A_RV));
4905 PetscCall(MatDestroy(&Brhs));
4906 /* S_VV and S_CV */
4907 if (n_constraints) {
4908 Mat B;
4909
4910 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_B, n_eff_vertices, NULL, &B));
4911 PetscCall(MatDenseScatter_Private(pcbddc->R_to_B, A_RRmA_RV, B, INSERT_VALUES, SCATTER_FORWARD));
4912
4913 /* S_CV = pcbddc->local_auxmat1 * B */
4914 if (multi_element) {
4915 Mat T;
4916
4917 PetscCall(MatCreateSeqAIJFromDenseExpand(B, n_vertices, B_eff_V_J, &T));
4918 PetscCall(MatDestroy(&B));
4919 B = T;
4920 }
4921 PetscCall(MatProductCreate(pcbddc->local_auxmat1, B, NULL, &S_CV));
4922 PetscCall(MatProductSetType(S_CV, MATPRODUCT_AB));
4923 PetscCall(MatProductSetFromOptions(S_CV));
4924 PetscCall(MatProductSymbolic(S_CV));
4925 PetscCall(MatProductNumeric(S_CV));
4926 PetscCall(MatProductClear(S_CV));
4927 PetscCall(MatDestroy(&B));
4928
4929 /* B = local_auxmat2_R * S_CV */
4930 PetscCall(MatProductCreate(local_auxmat2_R, S_CV, NULL, &B));
4931 PetscCall(MatProductSetType(B, MATPRODUCT_AB));
4932 PetscCall(MatProductSetFromOptions(B));
4933 PetscCall(MatProductSymbolic(B));
4934 PetscCall(MatProductNumeric(B));
4935
4936 PetscCall(MatScale(S_CV, m_one));
4937 PetscCall(MatSetValuesSubMat(*coarse_submat, S_CV, n_constraints, idx_C, n_vertices, idx_V, INSERT_VALUES));
4938
4939 if (multi_element) {
4940 Mat T;
4941
4942 PetscCall(MatCreateSeqAIJFromDenseExpand(A_RRmA_RV, n_vertices, R_eff_V_J, &T));
4943 PetscCall(MatDestroy(&A_RRmA_RV));
4944 A_RRmA_RV = T;
4945 }
4946 PetscCall(MatAXPY(A_RRmA_RV, 1.0, B, UNKNOWN_NONZERO_PATTERN)); /* XXX ? */
4947 PetscCall(MatDestroy(&B));
4948 } else if (multi_element) {
4949 Mat T;
4950
4951 PetscCall(MatCreateSeqAIJFromDenseExpand(A_RRmA_RV, n_vertices, R_eff_V_J, &T));
4952 PetscCall(MatDestroy(&A_RRmA_RV));
4953 A_RRmA_RV = T;
4954 }
4955
4956 if (lda_rhs != n_R) {
4957 Mat T;
4958
4959 PetscCall(MatCreateSubMatrix(A_RRmA_RV, is_R, NULL, MAT_INITIAL_MATRIX, &T));
4960 PetscCall(MatDestroy(&A_RRmA_RV));
4961 A_RRmA_RV = T;
4962 }
4963
4964 /* need A_VR * \Phi * A_RRmA_RV = A_VR * (I+L)^T * A_RRmA_RV, L given as before */
4965 if (need_benign_correction) { /* XXX SPARSE */
4966 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
4967 PetscScalar *sums;
4968 const PetscScalar *marr;
4969
4970 PetscCall(MatDenseGetArrayRead(A_RRmA_RV, &marr));
4971 PetscCall(PetscMalloc1(n_vertices, &sums));
4972 for (i = 0; i < reuse_solver->benign_n; i++) {
4973 const PetscScalar *vals;
4974 const PetscInt *idxs, *idxs_zero;
4975 PetscInt n, j, nz;
4976
4977 PetscCall(ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i], &nz));
4978 PetscCall(ISGetIndices(reuse_solver->benign_zerodiag_subs[i], &idxs_zero));
4979 for (j = 0; j < n_vertices; j++) {
4980 sums[j] = 0.;
4981 for (PetscInt k = 0; k < nz; k++) sums[j] += marr[idxs_zero[k] + j * n_R];
4982 }
4983 PetscCall(MatGetRow(A_RV_bcorr, i, &n, &idxs, &vals));
4984 for (j = 0; j < n; j++) {
4985 PetscScalar val = vals[j];
4986 for (PetscInt k = 0; k < n_vertices; k++) PetscCall(MatSetValue(*coarse_submat, idx_V[idxs[j]], idx_V[k], val * sums[k], ADD_VALUES));
4987 }
4988 PetscCall(MatRestoreRow(A_RV_bcorr, i, &n, &idxs, &vals));
4989 PetscCall(ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i], &idxs_zero));
4990 }
4991 PetscCall(PetscFree(sums));
4992 PetscCall(MatDestroy(&A_RV_bcorr));
4993 PetscCall(MatDenseRestoreArrayRead(A_RRmA_RV, &marr));
4994 }
4995
4996 PetscCall(MatMatMult(A_VR, A_RRmA_RV, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &S_VV));
4997 PetscCall(MatSetValuesSubMat(*coarse_submat, S_VV, n_vertices, idx_V, n_vertices, idx_V, ADD_VALUES));
4998 PetscCall(MatDestroy(&S_VV));
4999 }
5000
5001 /* coarse basis functions */
5002 if (coarse_phi_multi) {
5003 Mat Vid;
5004
5005 PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, n_vertices, n_vertices, 1, NULL, &Vid));
5006 PetscCall(MatShift_Basic(Vid, 1.0));
5007 PetscCall(MatNestSetSubMat(coarse_phi_multi, 0, 0, A_RRmA_RV));
5008 PetscCall(MatNestSetSubMat(coarse_phi_multi, 1, 0, Vid));
5009 PetscCall(MatDestroy(&Vid));
5010 } else {
5011 if (A_RRmA_RV) {
5012 Mat B;
5013
5014 PetscCall(MatDenseGetSubMatrix(pcbddc->coarse_phi_B, PETSC_DECIDE, PETSC_DECIDE, 0, n_vertices, &B));
5015 PetscCall(MatDenseScatter_Private(pcbddc->R_to_B, A_RRmA_RV, B, INSERT_VALUES, SCATTER_FORWARD));
5016 PetscCall(MatDenseRestoreSubMatrix(pcbddc->coarse_phi_B, &B));
5017 if (pcbddc->switch_static || pcbddc->dbg_flag) {
5018 PetscCall(MatDenseGetSubMatrix(pcbddc->coarse_phi_D, PETSC_DECIDE, PETSC_DECIDE, 0, n_vertices, &B));
5019 PetscCall(MatDenseScatter_Private(pcbddc->R_to_D, A_RRmA_RV, B, INSERT_VALUES, SCATTER_FORWARD));
5020 PetscCall(MatDenseRestoreSubMatrix(pcbddc->coarse_phi_D, &B));
5021 if (pcbddc->benign_n) {
5022 for (i = 0; i < n_vertices; i++) PetscCall(MatSetValues(pcbddc->coarse_phi_D, pcbddc->benign_n, p0_lidx_I, 1, &i, NULL, INSERT_VALUES));
5023 PetscCall(MatAssemblyBegin(pcbddc->coarse_phi_D, MAT_FINAL_ASSEMBLY));
5024 PetscCall(MatAssemblyEnd(pcbddc->coarse_phi_D, MAT_FINAL_ASSEMBLY));
5025 }
5026 }
5027 }
5028 for (i = 0; i < n_vertices; i++) PetscCall(MatSetValues(pcbddc->coarse_phi_B, 1, &idx_V_B[i], 1, &i, &one, INSERT_VALUES));
5029 PetscCall(MatAssemblyBegin(pcbddc->coarse_phi_B, MAT_FINAL_ASSEMBLY));
5030 PetscCall(MatAssemblyEnd(pcbddc->coarse_phi_B, MAT_FINAL_ASSEMBLY));
5031 }
5032 PetscCall(MatDestroy(&A_RRmA_RV));
5033 }
5034 PetscCall(MatDestroy(&A_RV));
5035 PetscCall(VecDestroy(&dummy_vec));
5036
5037 if (n_constraints) {
5038 Mat B, B2;
5039
5040 PetscCall(MatScale(S_CC, m_one));
5041 PetscCall(MatProductCreate(local_auxmat2_R, S_CC, NULL, &B));
5042 PetscCall(MatProductSetType(B, MATPRODUCT_AB));
5043 PetscCall(MatProductSetFromOptions(B));
5044 PetscCall(MatProductSymbolic(B));
5045 PetscCall(MatProductNumeric(B));
5046
5047 if (n_vertices) {
5048 if (isCHOL || need_benign_correction) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */
5049 PetscCall(MatTranspose(S_CV, MAT_INITIAL_MATRIX, &S_VC));
5050 } else {
5051 if (lda_rhs != n_R) {
5052 Mat tB;
5053
5054 PetscCall(MatCreateSubMatrix(B, is_R, NULL, MAT_INITIAL_MATRIX, &tB));
5055 PetscCall(MatDestroy(&B));
5056 B = tB;
5057 }
5058 PetscCall(MatMatMult(A_VR, B, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &S_VC));
5059 }
5060 PetscCall(MatSetValuesSubMat(*coarse_submat, S_VC, n_vertices, idx_V, n_constraints, idx_C, INSERT_VALUES));
5061 }
5062
5063 /* coarse basis functions */
5064 if (coarse_phi_multi) {
5065 PetscCall(MatNestSetSubMat(coarse_phi_multi, 0, 1, B));
5066 } else {
5067 PetscCall(MatDenseGetSubMatrix(pcbddc->coarse_phi_B, PETSC_DECIDE, PETSC_DECIDE, n_vertices, n_vertices + n_constraints, &B2));
5068 PetscCall(MatDenseScatter_Private(pcbddc->R_to_B, B, B2, INSERT_VALUES, SCATTER_FORWARD));
5069 PetscCall(MatDenseRestoreSubMatrix(pcbddc->coarse_phi_B, &B2));
5070 if (pcbddc->switch_static || pcbddc->dbg_flag) {
5071 PetscCall(MatDenseGetSubMatrix(pcbddc->coarse_phi_D, PETSC_DECIDE, PETSC_DECIDE, n_vertices, n_vertices + n_constraints, &B2));
5072 PetscCall(MatDenseScatter_Private(pcbddc->R_to_D, B, B2, INSERT_VALUES, SCATTER_FORWARD));
5073 if (pcbddc->benign_n) {
5074 for (i = 0; i < n_constraints; i++) PetscCall(MatSetValues(B2, pcbddc->benign_n, p0_lidx_I, 1, &i, NULL, INSERT_VALUES));
5075 }
5076 PetscCall(MatDenseRestoreSubMatrix(pcbddc->coarse_phi_D, &B2));
5077 }
5078 }
5079 PetscCall(MatDestroy(&B));
5080 }
5081
5082 /* assemble sparse coarse basis functions */
5083 if (coarse_phi_multi) {
5084 Mat T;
5085
5086 PetscCall(MatConvert(coarse_phi_multi, MATSEQAIJ, MAT_INITIAL_MATRIX, &T));
5087 PetscCall(MatDestroy(&coarse_phi_multi));
5088 PetscCall(MatCreateSubMatrix(T, pcis->is_B_local, NULL, MAT_INITIAL_MATRIX, &pcbddc->coarse_phi_B));
5089 if (pcbddc->switch_static || pcbddc->dbg_flag) PetscCall(MatCreateSubMatrix(T, pcis->is_I_local, NULL, MAT_INITIAL_MATRIX, &pcbddc->coarse_phi_D));
5090 PetscCall(MatDestroy(&T));
5091 }
5092 PetscCall(MatDestroy(&local_auxmat2_R));
5093 PetscCall(PetscFree(p0_lidx_I));
5094
5095 /* coarse matrix entries relative to B_0 */
5096 if (pcbddc->benign_n) {
5097 Mat B0_B, B0_BPHI;
5098 IS is_dummy;
5099 const PetscScalar *data;
5100 PetscInt j;
5101
5102 PetscCall(ISCreateStride(PETSC_COMM_SELF, pcbddc->benign_n, 0, 1, &is_dummy));
5103 PetscCall(MatCreateSubMatrix(pcbddc->benign_B0, is_dummy, pcis->is_B_local, MAT_INITIAL_MATRIX, &B0_B));
5104 PetscCall(ISDestroy(&is_dummy));
5105 PetscCall(MatMatMult(B0_B, pcbddc->coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &B0_BPHI));
5106 PetscCall(MatConvert(B0_BPHI, MATSEQDENSE, MAT_INPLACE_MATRIX, &B0_BPHI));
5107 PetscCall(MatDenseGetArrayRead(B0_BPHI, &data));
5108 for (j = 0; j < pcbddc->benign_n; j++) {
5109 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j;
5110 for (i = 0; i < pcbddc->local_primal_size; i++) {
5111 PetscCall(MatSetValue(*coarse_submat, primal_idx, i, data[i * pcbddc->benign_n + j], INSERT_VALUES));
5112 PetscCall(MatSetValue(*coarse_submat, i, primal_idx, data[i * pcbddc->benign_n + j], INSERT_VALUES));
5113 }
5114 }
5115 PetscCall(MatDenseRestoreArrayRead(B0_BPHI, &data));
5116 PetscCall(MatDestroy(&B0_B));
5117 PetscCall(MatDestroy(&B0_BPHI));
5118 }
5119
5120 /* compute other basis functions for non-symmetric problems */
5121 if (!pcbddc->symmetric_primal) {
5122 Mat B_V = NULL, B_C = NULL;
5123 PetscScalar *marray, *work;
5124
5125 /* TODO multi_element MatDenseScatter */
5126 if (n_constraints) {
5127 Mat S_CCT, C_CRT;
5128
5129 PetscCall(MatScale(S_CC, m_one));
5130 PetscCall(MatTranspose(C_CR, MAT_INITIAL_MATRIX, &C_CRT));
5131 PetscCall(MatTranspose(S_CC, MAT_INITIAL_MATRIX, &S_CCT));
5132 PetscCall(MatMatMult(C_CRT, S_CCT, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &B_C));
5133 PetscCall(MatConvert(B_C, MATDENSE, MAT_INPLACE_MATRIX, &B_C));
5134 PetscCall(MatDestroy(&S_CCT));
5135 if (n_vertices) {
5136 Mat S_VCT;
5137
5138 PetscCall(MatTranspose(S_VC, MAT_INITIAL_MATRIX, &S_VCT));
5139 PetscCall(MatMatMult(C_CRT, S_VCT, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &B_V));
5140 PetscCall(MatDestroy(&S_VCT));
5141 PetscCall(MatConvert(B_V, MATDENSE, MAT_INPLACE_MATRIX, &B_V));
5142 }
5143 PetscCall(MatDestroy(&C_CRT));
5144 } else {
5145 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_R, n_vertices, NULL, &B_V));
5146 }
5147 if (n_vertices && n_R) {
5148 PetscScalar *av, *marray;
5149 const PetscInt *xadj, *adjncy;
5150 PetscInt n;
5151 PetscBool flg_row;
5152
5153 /* B_V = B_V - A_VR^T */
5154 PetscCall(MatConvert(A_VR, MATSEQAIJ, MAT_INPLACE_MATRIX, &A_VR));
5155 PetscCall(MatGetRowIJ(A_VR, 0, PETSC_FALSE, PETSC_FALSE, &n, &xadj, &adjncy, &flg_row));
5156 PetscCall(MatSeqAIJGetArray(A_VR, &av));
5157 PetscCall(MatDenseGetArray(B_V, &marray));
5158 for (i = 0; i < n; i++) {
5159 PetscInt j;
5160 for (j = xadj[i]; j < xadj[i + 1]; j++) marray[i * n_R + adjncy[j]] -= av[j];
5161 }
5162 PetscCall(MatDenseRestoreArray(B_V, &marray));
5163 PetscCall(MatRestoreRowIJ(A_VR, 0, PETSC_FALSE, PETSC_FALSE, &n, &xadj, &adjncy, &flg_row));
5164 PetscCall(MatDestroy(&A_VR));
5165 }
5166
5167 /* currently there's no support for MatTransposeMatSolve(F,B,X) */
5168 PetscCall(PetscMalloc1(n_R * pcbddc->local_primal_size, &work));
5169 if (n_vertices) {
5170 PetscCall(MatDenseGetArray(B_V, &marray));
5171 for (i = 0; i < n_vertices; i++) {
5172 PetscCall(VecPlaceArray(pcbddc->vec1_R, marray + i * n_R));
5173 PetscCall(VecPlaceArray(pcbddc->vec2_R, work + i * n_R));
5174 PetscCall(KSPSolveTranspose(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec2_R));
5175 PetscCall(KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec2_R));
5176 PetscCall(VecResetArray(pcbddc->vec1_R));
5177 PetscCall(VecResetArray(pcbddc->vec2_R));
5178 }
5179 PetscCall(MatDenseRestoreArray(B_V, &marray));
5180 }
5181 if (B_C) {
5182 PetscCall(MatDenseGetArray(B_C, &marray));
5183 for (i = n_vertices; i < n_constraints + n_vertices; i++) {
5184 PetscCall(VecPlaceArray(pcbddc->vec1_R, marray + (i - n_vertices) * n_R));
5185 PetscCall(VecPlaceArray(pcbddc->vec2_R, work + i * n_R));
5186 PetscCall(KSPSolveTranspose(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec2_R));
5187 PetscCall(KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec2_R));
5188 PetscCall(VecResetArray(pcbddc->vec1_R));
5189 PetscCall(VecResetArray(pcbddc->vec2_R));
5190 }
5191 PetscCall(MatDenseRestoreArray(B_C, &marray));
5192 }
5193 /* coarse basis functions */
5194 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_B, pcbddc->local_primal_size, NULL, &pcbddc->coarse_psi_B));
5195 if (pcbddc->switch_static || pcbddc->dbg_flag) PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_D, pcbddc->local_primal_size, NULL, &pcbddc->coarse_psi_D));
5196 for (i = 0; i < pcbddc->local_primal_size; i++) {
5197 Vec v;
5198
5199 PetscCall(VecPlaceArray(pcbddc->vec1_R, work + i * n_R));
5200 PetscCall(MatDenseGetColumnVec(pcbddc->coarse_psi_B, i, &v));
5201 PetscCall(VecScatterBegin(pcbddc->R_to_B, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD));
5202 PetscCall(VecScatterEnd(pcbddc->R_to_B, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD));
5203 if (i < n_vertices) {
5204 PetscScalar one = 1.0;
5205 PetscCall(VecSetValues(v, 1, &idx_V_B[i], &one, INSERT_VALUES));
5206 PetscCall(VecAssemblyBegin(v));
5207 PetscCall(VecAssemblyEnd(v));
5208 }
5209 PetscCall(MatDenseRestoreColumnVec(pcbddc->coarse_psi_B, i, &v));
5210
5211 if (pcbddc->switch_static || pcbddc->dbg_flag) {
5212 PetscCall(MatDenseGetColumnVec(pcbddc->coarse_psi_D, i, &v));
5213 PetscCall(VecScatterBegin(pcbddc->R_to_D, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD));
5214 PetscCall(VecScatterEnd(pcbddc->R_to_D, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD));
5215 PetscCall(MatDenseRestoreColumnVec(pcbddc->coarse_psi_D, i, &v));
5216 }
5217 PetscCall(VecResetArray(pcbddc->vec1_R));
5218 }
5219 PetscCall(MatDestroy(&B_V));
5220 PetscCall(MatDestroy(&B_C));
5221 PetscCall(PetscFree(work));
5222 } else {
5223 PetscCall(PetscObjectReference((PetscObject)pcbddc->coarse_phi_B));
5224 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B;
5225 PetscCall(PetscObjectReference((PetscObject)pcbddc->coarse_phi_D));
5226 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D;
5227 }
5228 PetscCall(MatAssemblyBegin(*coarse_submat, MAT_FINAL_ASSEMBLY));
5229 PetscCall(MatAssemblyEnd(*coarse_submat, MAT_FINAL_ASSEMBLY));
5230
5231 /* free memory */
5232 PetscCall(PetscFree(V_to_eff_V));
5233 PetscCall(PetscFree(C_to_eff_C));
5234 PetscCall(PetscFree(R_eff_V_J));
5235 PetscCall(PetscFree(R_eff_C_J));
5236 PetscCall(PetscFree(B_eff_V_J));
5237 PetscCall(PetscFree(B_eff_C_J));
5238 PetscCall(ISDestroy(&is_R));
5239 PetscCall(ISRestoreIndices(is_V, &idx_V));
5240 PetscCall(ISRestoreIndices(is_C, &idx_C));
5241 PetscCall(ISDestroy(&is_V));
5242 PetscCall(ISDestroy(&is_C));
5243 PetscCall(PetscFree(idx_V_B));
5244 PetscCall(MatDestroy(&S_CV));
5245 PetscCall(MatDestroy(&S_VC));
5246 PetscCall(MatDestroy(&S_CC));
5247 if (n_vertices) PetscCall(MatDestroy(&A_VR));
5248 if (n_constraints) PetscCall(MatDestroy(&C_CR));
5249 PetscCall(PetscLogEventEnd(PC_BDDC_CorrectionSetUp[pcbddc->current_level], pc, 0, 0, 0));
5250
5251 /* Checking coarse_sub_mat and coarse basis functions */
5252 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
5253 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
5254 if (pcbddc->dbg_flag) {
5255 Mat AUXMAT, TM1, TM2, TM3, TM4;
5256 Mat coarse_phi_D, coarse_phi_B;
5257 Mat coarse_psi_D, coarse_psi_B;
5258 Mat A_II, A_BB, A_IB, A_BI;
5259 Mat C_B, CPHI;
5260 IS is_dummy;
5261 Vec mones;
5262 MatType checkmattype = MATSEQAIJ;
5263 PetscReal real_value;
5264
5265 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) {
5266 Mat A;
5267 PetscCall(PCBDDCBenignProject(pc, NULL, NULL, &A));
5268 PetscCall(MatCreateSubMatrix(A, pcis->is_I_local, pcis->is_I_local, MAT_INITIAL_MATRIX, &A_II));
5269 PetscCall(MatCreateSubMatrix(A, pcis->is_I_local, pcis->is_B_local, MAT_INITIAL_MATRIX, &A_IB));
5270 PetscCall(MatCreateSubMatrix(A, pcis->is_B_local, pcis->is_I_local, MAT_INITIAL_MATRIX, &A_BI));
5271 PetscCall(MatCreateSubMatrix(A, pcis->is_B_local, pcis->is_B_local, MAT_INITIAL_MATRIX, &A_BB));
5272 PetscCall(MatDestroy(&A));
5273 } else {
5274 PetscCall(MatConvert(pcis->A_II, checkmattype, MAT_INITIAL_MATRIX, &A_II));
5275 PetscCall(MatConvert(pcis->A_IB, checkmattype, MAT_INITIAL_MATRIX, &A_IB));
5276 PetscCall(MatConvert(pcis->A_BI, checkmattype, MAT_INITIAL_MATRIX, &A_BI));
5277 PetscCall(MatConvert(pcis->A_BB, checkmattype, MAT_INITIAL_MATRIX, &A_BB));
5278 }
5279 PetscCall(MatConvert(pcbddc->coarse_phi_D, checkmattype, MAT_INITIAL_MATRIX, &coarse_phi_D));
5280 PetscCall(MatConvert(pcbddc->coarse_phi_B, checkmattype, MAT_INITIAL_MATRIX, &coarse_phi_B));
5281 if (!pcbddc->symmetric_primal) {
5282 PetscCall(MatConvert(pcbddc->coarse_psi_D, checkmattype, MAT_INITIAL_MATRIX, &coarse_psi_D));
5283 PetscCall(MatConvert(pcbddc->coarse_psi_B, checkmattype, MAT_INITIAL_MATRIX, &coarse_psi_B));
5284 }
5285 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n"));
5286 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Check coarse sub mat computation (symmetric %d)\n", pcbddc->symmetric_primal));
5287 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
5288 if (!pcbddc->symmetric_primal) {
5289 PetscCall(MatMatMult(A_II, coarse_phi_D, MAT_INITIAL_MATRIX, 1.0, &AUXMAT));
5290 PetscCall(MatTransposeMatMult(coarse_psi_D, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM1));
5291 PetscCall(MatDestroy(&AUXMAT));
5292 PetscCall(MatMatMult(A_BB, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &AUXMAT));
5293 PetscCall(MatTransposeMatMult(coarse_psi_B, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM2));
5294 PetscCall(MatDestroy(&AUXMAT));
5295 PetscCall(MatMatMult(A_IB, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &AUXMAT));
5296 PetscCall(MatTransposeMatMult(coarse_psi_D, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM3));
5297 PetscCall(MatDestroy(&AUXMAT));
5298 PetscCall(MatMatMult(A_BI, coarse_phi_D, MAT_INITIAL_MATRIX, 1.0, &AUXMAT));
5299 PetscCall(MatTransposeMatMult(coarse_psi_B, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM4));
5300 PetscCall(MatDestroy(&AUXMAT));
5301 } else {
5302 PetscCall(MatPtAP(A_II, coarse_phi_D, MAT_INITIAL_MATRIX, 1.0, &TM1));
5303 PetscCall(MatPtAP(A_BB, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &TM2));
5304 PetscCall(MatMatMult(A_IB, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &AUXMAT));
5305 PetscCall(MatTransposeMatMult(coarse_phi_D, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM3));
5306 PetscCall(MatDestroy(&AUXMAT));
5307 PetscCall(MatMatMult(A_BI, coarse_phi_D, MAT_INITIAL_MATRIX, 1.0, &AUXMAT));
5308 PetscCall(MatTransposeMatMult(coarse_phi_B, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM4));
5309 PetscCall(MatDestroy(&AUXMAT));
5310 }
5311 PetscCall(MatAXPY(TM1, one, TM2, DIFFERENT_NONZERO_PATTERN));
5312 PetscCall(MatAXPY(TM1, one, TM3, DIFFERENT_NONZERO_PATTERN));
5313 PetscCall(MatAXPY(TM1, one, TM4, DIFFERENT_NONZERO_PATTERN));
5314 PetscCall(MatConvert(TM1, MATSEQDENSE, MAT_INPLACE_MATRIX, &TM1));
5315 if (pcbddc->benign_n) {
5316 Mat B0_B, B0_BPHI;
5317 const PetscScalar *data2;
5318 PetscScalar *data;
5319 PetscInt j;
5320
5321 PetscCall(ISCreateStride(PETSC_COMM_SELF, pcbddc->benign_n, 0, 1, &is_dummy));
5322 PetscCall(MatCreateSubMatrix(pcbddc->benign_B0, is_dummy, pcis->is_B_local, MAT_INITIAL_MATRIX, &B0_B));
5323 PetscCall(MatMatMult(B0_B, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &B0_BPHI));
5324 PetscCall(MatConvert(B0_BPHI, MATSEQDENSE, MAT_INPLACE_MATRIX, &B0_BPHI));
5325 PetscCall(MatDenseGetArray(TM1, &data));
5326 PetscCall(MatDenseGetArrayRead(B0_BPHI, &data2));
5327 for (j = 0; j < pcbddc->benign_n; j++) {
5328 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j;
5329 for (i = 0; i < pcbddc->local_primal_size; i++) {
5330 data[primal_idx * pcbddc->local_primal_size + i] += data2[i * pcbddc->benign_n + j];
5331 data[i * pcbddc->local_primal_size + primal_idx] += data2[i * pcbddc->benign_n + j];
5332 }
5333 }
5334 PetscCall(MatDenseRestoreArray(TM1, &data));
5335 PetscCall(MatDenseRestoreArrayRead(B0_BPHI, &data2));
5336 PetscCall(MatDestroy(&B0_B));
5337 PetscCall(ISDestroy(&is_dummy));
5338 PetscCall(MatDestroy(&B0_BPHI));
5339 }
5340 PetscCall(MatAXPY(TM1, m_one, *coarse_submat, DIFFERENT_NONZERO_PATTERN));
5341 PetscCall(MatNorm(TM1, NORM_FROBENIUS, &real_value));
5342 PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer));
5343 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d matrix error % 1.14e\n", PetscGlobalRank, (double)real_value));
5344
5345 /* check constraints */
5346 PetscCall(ISCreateStride(PETSC_COMM_SELF, pcbddc->local_primal_size - pcbddc->benign_n, 0, 1, &is_dummy));
5347 PetscCall(MatCreateSubMatrix(pcbddc->ConstraintMatrix, is_dummy, pcis->is_B_local, MAT_INITIAL_MATRIX, &C_B));
5348 if (!pcbddc->benign_n) { /* TODO: add benign case */
5349 PetscCall(MatMatMult(C_B, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &CPHI));
5350 } else {
5351 PetscScalar *data;
5352 Mat tmat;
5353 PetscCall(MatDenseGetArray(pcbddc->coarse_phi_B, &data));
5354 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, pcis->n_B, pcbddc->local_primal_size - pcbddc->benign_n, data, &tmat));
5355 PetscCall(MatDenseRestoreArray(pcbddc->coarse_phi_B, &data));
5356 PetscCall(MatMatMult(C_B, tmat, MAT_INITIAL_MATRIX, 1.0, &CPHI));
5357 PetscCall(MatDestroy(&tmat));
5358 }
5359 PetscCall(MatCreateVecs(CPHI, &mones, NULL));
5360 PetscCall(VecSet(mones, -1.0));
5361 PetscCall(MatDiagonalSet(CPHI, mones, ADD_VALUES));
5362 PetscCall(MatNorm(CPHI, NORM_FROBENIUS, &real_value));
5363 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d phi constraints error % 1.14e\n", PetscGlobalRank, (double)real_value));
5364 if (!pcbddc->symmetric_primal) {
5365 PetscCall(MatMatMult(C_B, coarse_psi_B, MAT_REUSE_MATRIX, 1.0, &CPHI));
5366 PetscCall(VecSet(mones, -1.0));
5367 PetscCall(MatDiagonalSet(CPHI, mones, ADD_VALUES));
5368 PetscCall(MatNorm(CPHI, NORM_FROBENIUS, &real_value));
5369 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d psi constraints error % 1.14e\n", PetscGlobalRank, (double)real_value));
5370 }
5371 PetscCall(MatDestroy(&C_B));
5372 PetscCall(MatDestroy(&CPHI));
5373 PetscCall(ISDestroy(&is_dummy));
5374 PetscCall(VecDestroy(&mones));
5375 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
5376 PetscCall(MatDestroy(&A_II));
5377 PetscCall(MatDestroy(&A_BB));
5378 PetscCall(MatDestroy(&A_IB));
5379 PetscCall(MatDestroy(&A_BI));
5380 PetscCall(MatDestroy(&TM1));
5381 PetscCall(MatDestroy(&TM2));
5382 PetscCall(MatDestroy(&TM3));
5383 PetscCall(MatDestroy(&TM4));
5384 PetscCall(MatDestroy(&coarse_phi_D));
5385 PetscCall(MatDestroy(&coarse_phi_B));
5386 if (!pcbddc->symmetric_primal) {
5387 PetscCall(MatDestroy(&coarse_psi_D));
5388 PetscCall(MatDestroy(&coarse_psi_B));
5389 }
5390 }
5391
5392 #if 0
5393 {
5394 PetscViewer viewer;
5395 char filename[256];
5396
5397 PetscCall(PetscSNPrintf(filename, PETSC_STATIC_ARRAY_LENGTH(filename), "details_local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level));
5398 PetscCall(PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer));
5399 PetscCall(PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB));
5400 PetscCall(PetscObjectSetName((PetscObject)*coarse_submat,"coarse submat"));
5401 PetscCall(MatView(*coarse_submat,viewer));
5402 if (pcbddc->coarse_phi_B) {
5403 PetscCall(PetscObjectSetName((PetscObject)pcbddc->coarse_phi_B,"phi_B"));
5404 PetscCall(MatView(pcbddc->coarse_phi_B,viewer));
5405 }
5406 if (pcbddc->coarse_phi_D) {
5407 PetscCall(PetscObjectSetName((PetscObject)pcbddc->coarse_phi_D,"phi_D"));
5408 PetscCall(MatView(pcbddc->coarse_phi_D,viewer));
5409 }
5410 if (pcbddc->coarse_psi_B) {
5411 PetscCall(PetscObjectSetName((PetscObject)pcbddc->coarse_psi_B,"psi_B"));
5412 PetscCall(MatView(pcbddc->coarse_psi_B,viewer));
5413 }
5414 if (pcbddc->coarse_psi_D) {
5415 PetscCall(PetscObjectSetName((PetscObject)pcbddc->coarse_psi_D,"psi_D"));
5416 PetscCall(MatView(pcbddc->coarse_psi_D,viewer));
5417 }
5418 PetscCall(PetscObjectSetName((PetscObject)pcbddc->local_mat,"A"));
5419 PetscCall(MatView(pcbddc->local_mat,viewer));
5420 PetscCall(PetscObjectSetName((PetscObject)pcbddc->ConstraintMatrix,"C"));
5421 PetscCall(MatView(pcbddc->ConstraintMatrix,viewer));
5422 PetscCall(PetscObjectSetName((PetscObject)pcis->is_I_local,"I"));
5423 PetscCall(ISView(pcis->is_I_local,viewer));
5424 PetscCall(PetscObjectSetName((PetscObject)pcis->is_B_local,"B"));
5425 PetscCall(ISView(pcis->is_B_local,viewer));
5426 PetscCall(PetscObjectSetName((PetscObject)pcbddc->is_R_local,"R"));
5427 PetscCall(ISView(pcbddc->is_R_local,viewer));
5428 PetscCall(PetscViewerDestroy(&viewer));
5429 }
5430 #endif
5431
5432 /* device support */
5433 {
5434 PetscBool iscuda, iship, iskokkos;
5435 MatType mtype = NULL;
5436
5437 PetscCall(PetscObjectTypeCompareAny((PetscObject)pcis->vec1_N, &iscuda, VECCUDA, VECMPICUDA, VECSEQCUDA, ""));
5438 PetscCall(PetscObjectTypeCompareAny((PetscObject)pcis->vec1_N, &iship, VECHIP, VECMPIHIP, VECSEQHIP, ""));
5439 PetscCall(PetscObjectTypeCompareAny((PetscObject)pcis->vec1_N, &iskokkos, VECKOKKOS, VECMPIKOKKOS, VECSEQKOKKOS, ""));
5440 if (iskokkos) {
5441 if (PetscDefined(HAVE_MACRO_KOKKOS_ENABLE_CUDA)) iscuda = PETSC_TRUE;
5442 else if (PetscDefined(HAVE_MACRO_KOKKOS_ENABLE_HIP)) iship = PETSC_TRUE;
5443 }
5444 if (iskokkos) mtype = multi_element ? MATSEQAIJKOKKOS : (iscuda ? MATSEQDENSECUDA : MATSEQDENSEHIP);
5445 else if (iship) mtype = multi_element ? MATSEQAIJHIPSPARSE : MATSEQDENSEHIP;
5446 else if (iscuda) mtype = multi_element ? MATSEQAIJCUSPARSE : MATSEQDENSECUDA;
5447 if (mtype) {
5448 if (pcbddc->local_auxmat1) PetscCall(MatConvert(pcbddc->local_auxmat1, mtype, MAT_INPLACE_MATRIX, &pcbddc->local_auxmat1));
5449 if (pcbddc->local_auxmat2) PetscCall(MatConvert(pcbddc->local_auxmat2, mtype, MAT_INPLACE_MATRIX, &pcbddc->local_auxmat2));
5450 if (pcbddc->coarse_phi_B) PetscCall(MatConvert(pcbddc->coarse_phi_B, mtype, MAT_INPLACE_MATRIX, &pcbddc->coarse_phi_B));
5451 if (pcbddc->coarse_phi_D) PetscCall(MatConvert(pcbddc->coarse_phi_D, mtype, MAT_INPLACE_MATRIX, &pcbddc->coarse_phi_D));
5452 if (pcbddc->coarse_psi_B) PetscCall(MatConvert(pcbddc->coarse_psi_B, mtype, MAT_INPLACE_MATRIX, &pcbddc->coarse_psi_B));
5453 if (pcbddc->coarse_psi_D) PetscCall(MatConvert(pcbddc->coarse_psi_D, mtype, MAT_INPLACE_MATRIX, &pcbddc->coarse_psi_D));
5454 }
5455 }
5456 PetscFunctionReturn(PETSC_SUCCESS);
5457 }
5458
MatCreateSubMatrixUnsorted(Mat A,IS isrow,IS iscol,Mat * B)5459 PetscErrorCode MatCreateSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat *B)
5460 {
5461 Mat *work_mat;
5462 IS isrow_s, iscol_s;
5463 PetscBool rsorted, csorted;
5464 PetscInt rsize, *idxs_perm_r = NULL, csize, *idxs_perm_c = NULL;
5465
5466 PetscFunctionBegin;
5467 PetscCall(ISSorted(isrow, &rsorted));
5468 PetscCall(ISSorted(iscol, &csorted));
5469 PetscCall(ISGetLocalSize(isrow, &rsize));
5470 PetscCall(ISGetLocalSize(iscol, &csize));
5471
5472 if (!rsorted) {
5473 const PetscInt *idxs;
5474 PetscInt *idxs_sorted, i;
5475
5476 PetscCall(PetscMalloc1(rsize, &idxs_perm_r));
5477 PetscCall(PetscMalloc1(rsize, &idxs_sorted));
5478 for (i = 0; i < rsize; i++) idxs_perm_r[i] = i;
5479 PetscCall(ISGetIndices(isrow, &idxs));
5480 PetscCall(PetscSortIntWithPermutation(rsize, idxs, idxs_perm_r));
5481 for (i = 0; i < rsize; i++) idxs_sorted[i] = idxs[idxs_perm_r[i]];
5482 PetscCall(ISRestoreIndices(isrow, &idxs));
5483 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, rsize, idxs_sorted, PETSC_OWN_POINTER, &isrow_s));
5484 } else {
5485 PetscCall(PetscObjectReference((PetscObject)isrow));
5486 isrow_s = isrow;
5487 }
5488
5489 if (!csorted) {
5490 if (isrow == iscol) {
5491 PetscCall(PetscObjectReference((PetscObject)isrow_s));
5492 iscol_s = isrow_s;
5493 } else {
5494 const PetscInt *idxs;
5495 PetscInt *idxs_sorted, i;
5496
5497 PetscCall(PetscMalloc1(csize, &idxs_perm_c));
5498 PetscCall(PetscMalloc1(csize, &idxs_sorted));
5499 for (i = 0; i < csize; i++) idxs_perm_c[i] = i;
5500 PetscCall(ISGetIndices(iscol, &idxs));
5501 PetscCall(PetscSortIntWithPermutation(csize, idxs, idxs_perm_c));
5502 for (i = 0; i < csize; i++) idxs_sorted[i] = idxs[idxs_perm_c[i]];
5503 PetscCall(ISRestoreIndices(iscol, &idxs));
5504 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, csize, idxs_sorted, PETSC_OWN_POINTER, &iscol_s));
5505 }
5506 } else {
5507 PetscCall(PetscObjectReference((PetscObject)iscol));
5508 iscol_s = iscol;
5509 }
5510
5511 PetscCall(MatCreateSubMatrices(A, 1, &isrow_s, &iscol_s, MAT_INITIAL_MATRIX, &work_mat));
5512
5513 if (!rsorted || !csorted) {
5514 Mat new_mat;
5515 IS is_perm_r, is_perm_c;
5516
5517 if (!rsorted) {
5518 PetscInt *idxs_r, i;
5519 PetscCall(PetscMalloc1(rsize, &idxs_r));
5520 for (i = 0; i < rsize; i++) idxs_r[idxs_perm_r[i]] = i;
5521 PetscCall(PetscFree(idxs_perm_r));
5522 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, rsize, idxs_r, PETSC_OWN_POINTER, &is_perm_r));
5523 } else {
5524 PetscCall(ISCreateStride(PETSC_COMM_SELF, rsize, 0, 1, &is_perm_r));
5525 }
5526 PetscCall(ISSetPermutation(is_perm_r));
5527
5528 if (!csorted) {
5529 if (isrow_s == iscol_s) {
5530 PetscCall(PetscObjectReference((PetscObject)is_perm_r));
5531 is_perm_c = is_perm_r;
5532 } else {
5533 PetscInt *idxs_c, i;
5534 PetscCheck(idxs_perm_c, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Permutation array not present");
5535 PetscCall(PetscMalloc1(csize, &idxs_c));
5536 for (i = 0; i < csize; i++) idxs_c[idxs_perm_c[i]] = i;
5537 PetscCall(PetscFree(idxs_perm_c));
5538 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, csize, idxs_c, PETSC_OWN_POINTER, &is_perm_c));
5539 }
5540 } else {
5541 PetscCall(ISCreateStride(PETSC_COMM_SELF, csize, 0, 1, &is_perm_c));
5542 }
5543 PetscCall(ISSetPermutation(is_perm_c));
5544
5545 PetscCall(MatPermute(work_mat[0], is_perm_r, is_perm_c, &new_mat));
5546 PetscCall(MatDestroy(&work_mat[0]));
5547 work_mat[0] = new_mat;
5548 PetscCall(ISDestroy(&is_perm_r));
5549 PetscCall(ISDestroy(&is_perm_c));
5550 }
5551
5552 PetscCall(PetscObjectReference((PetscObject)work_mat[0]));
5553 *B = work_mat[0];
5554 PetscCall(MatDestroyMatrices(1, &work_mat));
5555 PetscCall(ISDestroy(&isrow_s));
5556 PetscCall(ISDestroy(&iscol_s));
5557 PetscFunctionReturn(PETSC_SUCCESS);
5558 }
5559
MatPtAPWithPrefix_Private(Mat A,Mat P,PetscReal fill,const char * prefix,Mat * C)5560 static PetscErrorCode MatPtAPWithPrefix_Private(Mat A, Mat P, PetscReal fill, const char *prefix, Mat *C)
5561 {
5562 PetscFunctionBegin;
5563 PetscCall(MatProductCreate(A, P, NULL, C));
5564 PetscCall(MatProductSetType(*C, MATPRODUCT_PtAP));
5565 PetscCall(MatProductSetAlgorithm(*C, "default"));
5566 PetscCall(MatProductSetFill(*C, fill));
5567 PetscCall(MatSetOptionsPrefix(*C, prefix));
5568 PetscCall(MatProductSetFromOptions(*C));
5569 PetscCall(MatProductSymbolic(*C));
5570 PetscCall(MatProductNumeric(*C));
5571 (*C)->symmetric = A->symmetric;
5572 (*C)->spd = A->spd;
5573 PetscFunctionReturn(PETSC_SUCCESS);
5574 }
5575
PCBDDCComputeLocalMatrix(PC pc,Mat ChangeOfBasisMatrix)5576 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix)
5577 {
5578 Mat_IS *matis = (Mat_IS *)pc->pmat->data;
5579 PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
5580 Mat new_mat, lA;
5581 IS is_local, is_global;
5582 PetscInt local_size;
5583 PetscBool isseqaij, issym, isset;
5584 char ptapprefix[256];
5585
5586 PetscFunctionBegin;
5587 PetscCall(MatDestroy(&pcbddc->local_mat));
5588 PetscCall(MatGetSize(matis->A, &local_size, NULL));
5589 if (pcbddc->mat_graph->multi_element) {
5590 Mat *mats, *bdiags;
5591 IS *gsubs;
5592 PetscInt nsubs = pcbddc->n_local_subs;
5593
5594 PetscCall(PetscCalloc1(nsubs * nsubs, &mats));
5595 #if 1
5596 PetscCall(PetscMalloc1(nsubs, &gsubs));
5597 for (PetscInt i = 0; i < nsubs; i++) PetscCall(ISLocalToGlobalMappingApplyIS(matis->rmapping, pcbddc->local_subs[i], &gsubs[i]));
5598 PetscCall(MatCreateSubMatrices(ChangeOfBasisMatrix, nsubs, gsubs, gsubs, MAT_INITIAL_MATRIX, &bdiags));
5599 for (PetscInt i = 0; i < nsubs; i++) PetscCall(ISDestroy(&gsubs[i]));
5600 PetscCall(PetscFree(gsubs));
5601 #else /* this does not work since MatCreateSubMatrices does not support repeated indices */
5602 Mat *tmats;
5603 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)matis->A), local_size, 0, 1, &is_local));
5604 PetscCall(ISLocalToGlobalMappingApplyIS(matis->rmapping, is_local, &is_global));
5605 PetscCall(ISDestroy(&is_local));
5606 PetscCall(MatSetOption(ChangeOfBasisMatrix, MAT_SUBMAT_SINGLEIS, PETSC_TRUE));
5607 PetscCall(MatCreateSubMatrices(ChangeOfBasisMatrix, 1, &is_global, &is_global, MAT_INITIAL_MATRIX, &tmats));
5608 PetscCall(ISDestroy(&is_global));
5609 PetscCall(MatCreateSubMatrices(tmats[0], nsubs, pcbddc->local_subs, pcbddc->local_subs, MAT_INITIAL_MATRIX, &bdiags));
5610 PetscCall(MatDestroySubMatrices(1, &tmats));
5611 #endif
5612 for (PetscInt i = 0; i < nsubs; i++) mats[i * (1 + nsubs)] = bdiags[i];
5613 PetscCall(MatCreateNest(PETSC_COMM_SELF, nsubs, pcbddc->local_subs, nsubs, pcbddc->local_subs, mats, &new_mat));
5614 PetscCall(MatConvert(new_mat, MATSEQAIJ, MAT_INPLACE_MATRIX, &new_mat));
5615 PetscCall(MatDestroySubMatrices(nsubs, &bdiags));
5616 PetscCall(PetscFree(mats));
5617 } else {
5618 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)matis->A), local_size, 0, 1, &is_local));
5619 PetscCall(ISLocalToGlobalMappingApplyIS(matis->rmapping, is_local, &is_global));
5620 PetscCall(ISDestroy(&is_local));
5621 PetscCall(MatCreateSubMatrixUnsorted(ChangeOfBasisMatrix, is_global, is_global, &new_mat));
5622 PetscCall(ISDestroy(&is_global));
5623 }
5624 if (pcbddc->dbg_flag) {
5625 Vec x, x_change;
5626 PetscReal error;
5627
5628 PetscCall(MatCreateVecs(ChangeOfBasisMatrix, &x, &x_change));
5629 PetscCall(VecSetRandom(x, NULL));
5630 PetscCall(MatMult(ChangeOfBasisMatrix, x, x_change));
5631 PetscCall(VecScatterBegin(matis->cctx, x, matis->x, INSERT_VALUES, SCATTER_FORWARD));
5632 PetscCall(VecScatterEnd(matis->cctx, x, matis->x, INSERT_VALUES, SCATTER_FORWARD));
5633 PetscCall(MatMult(new_mat, matis->x, matis->y));
5634 if (!pcbddc->change_interior) {
5635 const PetscScalar *x, *y, *v;
5636 PetscReal lerror = 0.;
5637 PetscInt i;
5638
5639 PetscCall(VecGetArrayRead(matis->x, &x));
5640 PetscCall(VecGetArrayRead(matis->y, &y));
5641 PetscCall(VecGetArrayRead(matis->counter, &v));
5642 for (i = 0; i < local_size; i++)
5643 if (PetscRealPart(v[i]) < 1.5 && PetscAbsScalar(x[i] - y[i]) > lerror) lerror = PetscAbsScalar(x[i] - y[i]);
5644 PetscCall(VecRestoreArrayRead(matis->x, &x));
5645 PetscCall(VecRestoreArrayRead(matis->y, &y));
5646 PetscCall(VecRestoreArrayRead(matis->counter, &v));
5647 PetscCallMPI(MPIU_Allreduce(&lerror, &error, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)pc)));
5648 if (error > PETSC_SMALL) {
5649 if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) {
5650 SETERRQ(PetscObjectComm((PetscObject)pc), PETSC_ERR_PLIB, "Error global vs local change on I: %1.6e", (double)error);
5651 } else {
5652 SETERRQ(PetscObjectComm((PetscObject)pc), PETSC_ERR_USER, "Error global vs local change on I: %1.6e", (double)error);
5653 }
5654 }
5655 }
5656 PetscCall(VecScatterBegin(matis->rctx, matis->y, x, INSERT_VALUES, SCATTER_REVERSE));
5657 PetscCall(VecScatterEnd(matis->rctx, matis->y, x, INSERT_VALUES, SCATTER_REVERSE));
5658 PetscCall(VecAXPY(x, -1.0, x_change));
5659 PetscCall(VecNorm(x, NORM_INFINITY, &error));
5660 if (error > PETSC_SMALL) {
5661 if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) {
5662 SETERRQ(PetscObjectComm((PetscObject)pc), PETSC_ERR_PLIB, "Error global vs local change on N: %1.6e", (double)error);
5663 } else {
5664 SETERRQ(PetscObjectComm((PetscObject)pc), PETSC_ERR_USER, "Error global vs local change on N: %1.6e", (double)error);
5665 }
5666 }
5667 PetscCall(VecDestroy(&x));
5668 PetscCall(VecDestroy(&x_change));
5669 }
5670
5671 /* lA is present if we are setting up an inner BDDC for a saddle point FETI-DP */
5672 PetscCall(PetscObjectQuery((PetscObject)pc, "__KSPFETIDP_lA", (PetscObject *)&lA));
5673
5674 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */
5675 if (((PetscObject)pc)->prefix) PetscCall(PetscSNPrintf(ptapprefix, sizeof(ptapprefix), "%spc_bddc_change_", ((PetscObject)pc)->prefix));
5676 else PetscCall(PetscSNPrintf(ptapprefix, sizeof(ptapprefix), "pc_bddc_change_"));
5677 PetscCall(PetscObjectBaseTypeCompare((PetscObject)matis->A, MATSEQAIJ, &isseqaij));
5678 if (isseqaij) {
5679 PetscCall(MatDestroy(&pcbddc->local_mat));
5680 PetscCall(MatPtAPWithPrefix_Private(matis->A, new_mat, PETSC_DEFAULT, ptapprefix, &pcbddc->local_mat));
5681 if (lA) {
5682 Mat work;
5683 PetscCall(MatPtAPWithPrefix_Private(lA, new_mat, PETSC_DEFAULT, ptapprefix, &work));
5684 PetscCall(PetscObjectCompose((PetscObject)pc, "__KSPFETIDP_lA", (PetscObject)work));
5685 PetscCall(MatDestroy(&work));
5686 }
5687 } else {
5688 Mat work_mat;
5689
5690 PetscCall(MatDestroy(&pcbddc->local_mat));
5691 PetscCall(MatConvert(matis->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &work_mat));
5692 PetscCall(MatPtAPWithPrefix_Private(work_mat, new_mat, PETSC_DEFAULT, ptapprefix, &pcbddc->local_mat));
5693 PetscCall(MatDestroy(&work_mat));
5694 if (lA) {
5695 Mat work;
5696 PetscCall(MatConvert(lA, MATSEQAIJ, MAT_INITIAL_MATRIX, &work_mat));
5697 PetscCall(MatPtAPWithPrefix_Private(work_mat, new_mat, PETSC_DEFAULT, ptapprefix, &work));
5698 PetscCall(PetscObjectCompose((PetscObject)pc, "__KSPFETIDP_lA", (PetscObject)work));
5699 PetscCall(MatDestroy(&work));
5700 }
5701 }
5702 PetscCall(MatIsSymmetricKnown(matis->A, &isset, &issym));
5703 if (isset) PetscCall(MatSetOption(pcbddc->local_mat, MAT_SYMMETRIC, issym));
5704 PetscCall(MatDestroy(&new_mat));
5705 PetscFunctionReturn(PETSC_SUCCESS);
5706 }
5707
PCBDDCSetUpLocalScatters(PC pc)5708 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc)
5709 {
5710 PC_IS *pcis = (PC_IS *)pc->data;
5711 PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
5712 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
5713 PetscInt *idx_R_local = NULL;
5714 PetscInt n_vertices, i, j, n_R, n_D, n_B;
5715 PetscInt vbs, bs;
5716 PetscBT bitmask = NULL;
5717
5718 PetscFunctionBegin;
5719 /*
5720 No need to setup local scatters if
5721 - primal space is unchanged
5722 AND
5723 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains)
5724 AND
5725 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine
5726 */
5727 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) PetscFunctionReturn(PETSC_SUCCESS);
5728 /* destroy old objects */
5729 PetscCall(ISDestroy(&pcbddc->is_R_local));
5730 PetscCall(VecScatterDestroy(&pcbddc->R_to_B));
5731 PetscCall(VecScatterDestroy(&pcbddc->R_to_D));
5732 /* Set Non-overlapping dimensions */
5733 n_B = pcis->n_B;
5734 n_D = pcis->n - n_B;
5735 n_vertices = pcbddc->n_vertices;
5736
5737 /* Dohrmann's notation: dofs split in R (Remaining: all dofs but the vertices) and V (Vertices) */
5738
5739 /* create auxiliary bitmask and allocate workspace */
5740 if (!sub_schurs || !sub_schurs->reuse_solver) {
5741 PetscCall(PetscMalloc1(pcis->n - n_vertices, &idx_R_local));
5742 PetscCall(PetscBTCreate(pcis->n, &bitmask));
5743 for (i = 0; i < n_vertices; i++) PetscCall(PetscBTSet(bitmask, pcbddc->local_primal_ref_node[i]));
5744
5745 for (i = 0, n_R = 0; i < pcis->n; i++) {
5746 if (!PetscBTLookup(bitmask, i)) idx_R_local[n_R++] = i;
5747 }
5748 } else { /* A different ordering (already computed) is present if we are reusing the Schur solver */
5749 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
5750
5751 PetscCall(ISGetIndices(reuse_solver->is_R, (const PetscInt **)&idx_R_local));
5752 PetscCall(ISGetLocalSize(reuse_solver->is_R, &n_R));
5753 }
5754
5755 /* Block code */
5756 vbs = 1;
5757 PetscCall(MatGetBlockSize(pcbddc->local_mat, &bs));
5758 if (bs > 1 && !(n_vertices % bs)) {
5759 PetscBool is_blocked = PETSC_TRUE;
5760 PetscInt *vary;
5761 if (!sub_schurs || !sub_schurs->reuse_solver) {
5762 PetscCall(PetscMalloc1(pcis->n / bs, &vary));
5763 PetscCall(PetscArrayzero(vary, pcis->n / bs));
5764 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */
5765 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */
5766 for (i = 0; i < n_vertices; i++) vary[pcbddc->local_primal_ref_node[i] / bs]++;
5767 for (i = 0; i < pcis->n / bs; i++) {
5768 if (vary[i] != 0 && vary[i] != bs) {
5769 is_blocked = PETSC_FALSE;
5770 break;
5771 }
5772 }
5773 PetscCall(PetscFree(vary));
5774 } else {
5775 /* Verify directly the R set */
5776 for (i = 0; i < n_R / bs; i++) {
5777 PetscInt j, node = idx_R_local[bs * i];
5778 for (j = 1; j < bs; j++) {
5779 if (node != idx_R_local[bs * i + j] - j) {
5780 is_blocked = PETSC_FALSE;
5781 break;
5782 }
5783 }
5784 }
5785 }
5786 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */
5787 vbs = bs;
5788 for (i = 0; i < n_R / vbs; i++) idx_R_local[i] = idx_R_local[vbs * i] / vbs;
5789 }
5790 }
5791 PetscCall(ISCreateBlock(PETSC_COMM_SELF, vbs, n_R / vbs, idx_R_local, PETSC_COPY_VALUES, &pcbddc->is_R_local));
5792 if (sub_schurs && sub_schurs->reuse_solver) {
5793 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
5794
5795 PetscCall(ISRestoreIndices(reuse_solver->is_R, (const PetscInt **)&idx_R_local));
5796 PetscCall(ISDestroy(&reuse_solver->is_R));
5797 PetscCall(PetscObjectReference((PetscObject)pcbddc->is_R_local));
5798 reuse_solver->is_R = pcbddc->is_R_local;
5799 } else {
5800 PetscCall(PetscFree(idx_R_local));
5801 }
5802
5803 /* print some info if requested */
5804 if (pcbddc->dbg_flag) {
5805 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n"));
5806 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
5807 PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer));
5808 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d local dimensions\n", PetscGlobalRank));
5809 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "local_size = %" PetscInt_FMT ", dirichlet_size = %" PetscInt_FMT ", boundary_size = %" PetscInt_FMT "\n", pcis->n, n_D, n_B));
5810 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "r_size = %" PetscInt_FMT ", v_size = %" PetscInt_FMT ", constraints = %" PetscInt_FMT ", local_primal_size = %" PetscInt_FMT "\n", n_R, n_vertices,
5811 pcbddc->local_primal_size - n_vertices - pcbddc->benign_n, pcbddc->local_primal_size));
5812 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
5813 }
5814
5815 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */
5816 if (!sub_schurs || !sub_schurs->reuse_solver) {
5817 IS is_aux1, is_aux2;
5818 PetscInt *aux_array1, *aux_array2, *is_indices, *idx_R_local;
5819
5820 PetscCall(ISGetIndices(pcbddc->is_R_local, (const PetscInt **)&idx_R_local));
5821 PetscCall(PetscMalloc1(pcis->n_B - n_vertices, &aux_array1));
5822 PetscCall(PetscMalloc1(pcis->n_B - n_vertices, &aux_array2));
5823 PetscCall(ISGetIndices(pcis->is_I_local, (const PetscInt **)&is_indices));
5824 for (i = 0; i < n_D; i++) PetscCall(PetscBTSet(bitmask, is_indices[i]));
5825 PetscCall(ISRestoreIndices(pcis->is_I_local, (const PetscInt **)&is_indices));
5826 for (i = 0, j = 0; i < n_R; i++) {
5827 if (!PetscBTLookup(bitmask, idx_R_local[i])) aux_array1[j++] = i;
5828 }
5829 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, j, aux_array1, PETSC_OWN_POINTER, &is_aux1));
5830 PetscCall(ISGetIndices(pcis->is_B_local, (const PetscInt **)&is_indices));
5831 for (i = 0, j = 0; i < n_B; i++) {
5832 if (!PetscBTLookup(bitmask, is_indices[i])) aux_array2[j++] = i;
5833 }
5834 PetscCall(ISRestoreIndices(pcis->is_B_local, (const PetscInt **)&is_indices));
5835 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, j, aux_array2, PETSC_OWN_POINTER, &is_aux2));
5836 PetscCall(VecScatterCreate(pcbddc->vec1_R, is_aux1, pcis->vec1_B, is_aux2, &pcbddc->R_to_B));
5837 PetscCall(ISDestroy(&is_aux1));
5838 PetscCall(ISDestroy(&is_aux2));
5839
5840 if (pcbddc->switch_static || pcbddc->dbg_flag) {
5841 PetscCall(PetscMalloc1(n_D, &aux_array1));
5842 for (i = 0, j = 0; i < n_R; i++) {
5843 if (PetscBTLookup(bitmask, idx_R_local[i])) aux_array1[j++] = i;
5844 }
5845 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, j, aux_array1, PETSC_OWN_POINTER, &is_aux1));
5846 PetscCall(VecScatterCreate(pcbddc->vec1_R, is_aux1, pcis->vec1_D, (IS)0, &pcbddc->R_to_D));
5847 PetscCall(ISDestroy(&is_aux1));
5848 }
5849 PetscCall(PetscBTDestroy(&bitmask));
5850 PetscCall(ISRestoreIndices(pcbddc->is_R_local, (const PetscInt **)&idx_R_local));
5851 } else {
5852 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
5853 IS tis;
5854 PetscInt schur_size;
5855
5856 PetscCall(ISGetLocalSize(reuse_solver->is_B, &schur_size));
5857 PetscCall(ISCreateStride(PETSC_COMM_SELF, schur_size, n_D, 1, &tis));
5858 PetscCall(VecScatterCreate(pcbddc->vec1_R, tis, pcis->vec1_B, reuse_solver->is_B, &pcbddc->R_to_B));
5859 PetscCall(ISDestroy(&tis));
5860 if (pcbddc->switch_static || pcbddc->dbg_flag) {
5861 PetscCall(ISCreateStride(PETSC_COMM_SELF, n_D, 0, 1, &tis));
5862 PetscCall(VecScatterCreate(pcbddc->vec1_R, tis, pcis->vec1_D, (IS)0, &pcbddc->R_to_D));
5863 PetscCall(ISDestroy(&tis));
5864 }
5865 }
5866 PetscFunctionReturn(PETSC_SUCCESS);
5867 }
5868
MatNullSpacePropagateAny_Private(Mat A,IS is,Mat B)5869 PetscErrorCode MatNullSpacePropagateAny_Private(Mat A, IS is, Mat B)
5870 {
5871 MatNullSpace NullSpace;
5872 Mat dmat;
5873 const Vec *nullvecs;
5874 Vec v, v2, *nullvecs2;
5875 VecScatter sct = NULL;
5876 PetscScalar *ddata;
5877 PetscInt k, nnsp_size, bsiz, bsiz2, n, N, bs;
5878 PetscBool nnsp_has_cnst;
5879
5880 PetscFunctionBegin;
5881 if (!is && !B) { /* MATIS */
5882 Mat_IS *matis = (Mat_IS *)A->data;
5883
5884 if (!B) PetscCall(MatISGetLocalMat(A, &B));
5885 sct = matis->cctx;
5886 PetscCall(PetscObjectReference((PetscObject)sct));
5887 } else {
5888 PetscCall(MatGetNullSpace(B, &NullSpace));
5889 if (!NullSpace) PetscCall(MatGetNearNullSpace(B, &NullSpace));
5890 if (NullSpace) PetscFunctionReturn(PETSC_SUCCESS);
5891 }
5892 PetscCall(MatGetNullSpace(A, &NullSpace));
5893 if (!NullSpace) PetscCall(MatGetNearNullSpace(A, &NullSpace));
5894 if (!NullSpace) PetscFunctionReturn(PETSC_SUCCESS);
5895
5896 PetscCall(MatCreateVecs(A, &v, NULL));
5897 PetscCall(MatCreateVecs(B, &v2, NULL));
5898 if (!sct) PetscCall(VecScatterCreate(v, is, v2, NULL, &sct));
5899 PetscCall(MatNullSpaceGetVecs(NullSpace, &nnsp_has_cnst, &nnsp_size, &nullvecs));
5900 bsiz = bsiz2 = nnsp_size + !!nnsp_has_cnst;
5901 PetscCall(PetscMalloc1(bsiz, &nullvecs2));
5902 PetscCall(VecGetBlockSize(v2, &bs));
5903 PetscCall(VecGetSize(v2, &N));
5904 PetscCall(VecGetLocalSize(v2, &n));
5905 PetscCall(PetscMalloc1(n * bsiz, &ddata));
5906 for (k = 0; k < nnsp_size; k++) {
5907 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), bs, n, N, ddata + n * k, &nullvecs2[k]));
5908 PetscCall(VecScatterBegin(sct, nullvecs[k], nullvecs2[k], INSERT_VALUES, SCATTER_FORWARD));
5909 PetscCall(VecScatterEnd(sct, nullvecs[k], nullvecs2[k], INSERT_VALUES, SCATTER_FORWARD));
5910 }
5911 if (nnsp_has_cnst) {
5912 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), bs, n, N, ddata + n * nnsp_size, &nullvecs2[nnsp_size]));
5913 PetscCall(VecSet(nullvecs2[nnsp_size], 1.0));
5914 }
5915 PetscCall(PCBDDCOrthonormalizeVecs(&bsiz2, nullvecs2));
5916 PetscCall(MatNullSpaceCreate(PetscObjectComm((PetscObject)B), PETSC_FALSE, bsiz2, nullvecs2, &NullSpace));
5917
5918 PetscCall(MatCreateDense(PetscObjectComm((PetscObject)B), n, PETSC_DECIDE, N, bsiz2, ddata, &dmat));
5919 PetscCall(PetscObjectContainerCompose((PetscObject)dmat, "_PBDDC_Null_dmat_arr", ddata, PetscCtxDestroyDefault));
5920 PetscCall(PetscObjectCompose((PetscObject)NullSpace, "_PBDDC_Null_dmat", (PetscObject)dmat));
5921 PetscCall(MatDestroy(&dmat));
5922
5923 for (k = 0; k < bsiz; k++) PetscCall(VecDestroy(&nullvecs2[k]));
5924 PetscCall(PetscFree(nullvecs2));
5925 PetscCall(MatSetNearNullSpace(B, NullSpace));
5926 PetscCall(MatNullSpaceDestroy(&NullSpace));
5927 PetscCall(VecDestroy(&v));
5928 PetscCall(VecDestroy(&v2));
5929 PetscCall(VecScatterDestroy(&sct));
5930 PetscFunctionReturn(PETSC_SUCCESS);
5931 }
5932
PCBDDCSetUpLocalSolvers(PC pc,PetscBool dirichlet,PetscBool neumann)5933 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann)
5934 {
5935 PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
5936 PC_IS *pcis = (PC_IS *)pc->data;
5937 PC pc_temp;
5938 Mat A_RR;
5939 MatNullSpace nnsp;
5940 MatReuse reuse;
5941 PetscScalar m_one = -1.0;
5942 PetscReal value;
5943 PetscInt n_D, n_R;
5944 PetscBool issbaij, opts, isset, issym;
5945 PetscBool f = PETSC_FALSE;
5946 char dir_prefix[256], neu_prefix[256], str_level[16];
5947 size_t len;
5948
5949 PetscFunctionBegin;
5950 PetscCall(PetscLogEventBegin(PC_BDDC_LocalSolvers[pcbddc->current_level], pc, 0, 0, 0));
5951 /* approximate solver, propagate NearNullSpace if needed */
5952 if (!pc->setupcalled && (pcbddc->NullSpace_corr[0] || pcbddc->NullSpace_corr[2])) {
5953 MatNullSpace gnnsp1, gnnsp2;
5954 PetscBool lhas, ghas;
5955
5956 PetscCall(MatGetNearNullSpace(pcbddc->local_mat, &nnsp));
5957 PetscCall(MatGetNearNullSpace(pc->pmat, &gnnsp1));
5958 PetscCall(MatGetNullSpace(pc->pmat, &gnnsp2));
5959 lhas = nnsp ? PETSC_TRUE : PETSC_FALSE;
5960 PetscCallMPI(MPIU_Allreduce(&lhas, &ghas, 1, MPI_C_BOOL, MPI_LOR, PetscObjectComm((PetscObject)pc)));
5961 if (!ghas && (gnnsp1 || gnnsp2)) PetscCall(MatNullSpacePropagateAny_Private(pc->pmat, NULL, NULL));
5962 }
5963
5964 /* compute prefixes */
5965 PetscCall(PetscStrncpy(dir_prefix, "", sizeof(dir_prefix)));
5966 PetscCall(PetscStrncpy(neu_prefix, "", sizeof(neu_prefix)));
5967 if (!pcbddc->current_level) {
5968 PetscCall(PetscStrncpy(dir_prefix, ((PetscObject)pc)->prefix, sizeof(dir_prefix)));
5969 PetscCall(PetscStrncpy(neu_prefix, ((PetscObject)pc)->prefix, sizeof(neu_prefix)));
5970 PetscCall(PetscStrlcat(dir_prefix, "pc_bddc_dirichlet_", sizeof(dir_prefix)));
5971 PetscCall(PetscStrlcat(neu_prefix, "pc_bddc_neumann_", sizeof(neu_prefix)));
5972 } else {
5973 PetscCall(PetscSNPrintf(str_level, sizeof(str_level), "l%" PetscInt_FMT "_", pcbddc->current_level));
5974 PetscCall(PetscStrlen(((PetscObject)pc)->prefix, &len));
5975 len -= 15; /* remove "pc_bddc_coarse_" */
5976 if (pcbddc->current_level > 1) len -= 3; /* remove "lX_" with X level number */
5977 if (pcbddc->current_level > 10) len -= 1; /* remove another char from level number */
5978 /* Nonstandard use of PetscStrncpy() to only copy a portion of the input string */
5979 PetscCall(PetscStrncpy(dir_prefix, ((PetscObject)pc)->prefix, len + 1));
5980 PetscCall(PetscStrncpy(neu_prefix, ((PetscObject)pc)->prefix, len + 1));
5981 PetscCall(PetscStrlcat(dir_prefix, "pc_bddc_dirichlet_", sizeof(dir_prefix)));
5982 PetscCall(PetscStrlcat(neu_prefix, "pc_bddc_neumann_", sizeof(neu_prefix)));
5983 PetscCall(PetscStrlcat(dir_prefix, str_level, sizeof(dir_prefix)));
5984 PetscCall(PetscStrlcat(neu_prefix, str_level, sizeof(neu_prefix)));
5985 }
5986
5987 /* DIRICHLET PROBLEM */
5988 if (dirichlet) {
5989 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
5990 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) {
5991 PetscCheck(sub_schurs && sub_schurs->reuse_solver, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not yet implemented");
5992 if (pcbddc->dbg_flag) {
5993 Mat A_IIn;
5994
5995 PetscCall(PCBDDCBenignProject(pc, pcis->is_I_local, pcis->is_I_local, &A_IIn));
5996 PetscCall(MatDestroy(&pcis->A_II));
5997 pcis->A_II = A_IIn;
5998 }
5999 }
6000 PetscCall(MatIsSymmetricKnown(pcbddc->local_mat, &isset, &issym));
6001 if (isset) PetscCall(MatSetOption(pcis->A_II, MAT_SYMMETRIC, issym));
6002
6003 /* Matrix for Dirichlet problem is pcis->A_II */
6004 n_D = pcis->n - pcis->n_B;
6005 opts = PETSC_FALSE;
6006 if (!pcbddc->ksp_D) { /* create object if not yet build */
6007 opts = PETSC_TRUE;
6008 PetscCall(KSPCreate(PETSC_COMM_SELF, &pcbddc->ksp_D));
6009 PetscCall(KSPSetNestLevel(pcbddc->ksp_D, pc->kspnestlevel));
6010 PetscCall(PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D, (PetscObject)pc, 1));
6011 /* default */
6012 PetscCall(KSPSetType(pcbddc->ksp_D, KSPPREONLY));
6013 PetscCall(KSPSetOptionsPrefix(pcbddc->ksp_D, dir_prefix));
6014 PetscCall(PetscObjectTypeCompare((PetscObject)pcis->pA_II, MATSEQSBAIJ, &issbaij));
6015 PetscCall(KSPGetPC(pcbddc->ksp_D, &pc_temp));
6016 if (issbaij) {
6017 PetscCall(PCSetType(pc_temp, PCCHOLESKY));
6018 } else {
6019 PetscCall(PCSetType(pc_temp, PCLU));
6020 }
6021 PetscCall(KSPSetErrorIfNotConverged(pcbddc->ksp_D, pc->erroriffailure));
6022 }
6023 PetscCall(MatSetOptionsPrefix(pcis->pA_II, ((PetscObject)pcbddc->ksp_D)->prefix));
6024 PetscCall(MatViewFromOptions(pcis->pA_II, NULL, "-mat_view"));
6025 PetscCall(KSPSetOperators(pcbddc->ksp_D, pcis->A_II, pcis->pA_II));
6026 /* Allow user's customization */
6027 if (opts) PetscCall(KSPSetFromOptions(pcbddc->ksp_D));
6028 PetscCall(MatGetNearNullSpace(pcis->pA_II, &nnsp));
6029 if (pcbddc->NullSpace_corr[0] && !nnsp) { /* approximate solver, propagate NearNullSpace */
6030 PetscCall(MatNullSpacePropagateAny_Private(pcbddc->local_mat, pcis->is_I_local, pcis->pA_II));
6031 }
6032 PetscCall(MatGetNearNullSpace(pcis->pA_II, &nnsp));
6033 PetscCall(KSPGetPC(pcbddc->ksp_D, &pc_temp));
6034 PetscCall(PetscObjectHasFunction((PetscObject)pc_temp, "PCSetCoordinates_C", &f));
6035 if (f && pcbddc->mat_graph->cloc && !nnsp) {
6036 PetscReal *coords = pcbddc->mat_graph->coords, *scoords;
6037 const PetscInt *idxs;
6038 PetscInt cdim = pcbddc->mat_graph->cdim, nl, i, d;
6039
6040 PetscCall(ISGetLocalSize(pcis->is_I_local, &nl));
6041 PetscCall(ISGetIndices(pcis->is_I_local, &idxs));
6042 PetscCall(PetscMalloc1(nl * cdim, &scoords));
6043 for (i = 0; i < nl; i++) {
6044 for (d = 0; d < cdim; d++) scoords[i * cdim + d] = coords[idxs[i] * cdim + d];
6045 }
6046 PetscCall(ISRestoreIndices(pcis->is_I_local, &idxs));
6047 PetscCall(PCSetCoordinates(pc_temp, cdim, nl, scoords));
6048 PetscCall(PetscFree(scoords));
6049 }
6050 if (sub_schurs && sub_schurs->reuse_solver) {
6051 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
6052
6053 PetscCall(KSPSetPC(pcbddc->ksp_D, reuse_solver->interior_solver));
6054 }
6055
6056 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
6057 if (!n_D) {
6058 PetscCall(KSPGetPC(pcbddc->ksp_D, &pc_temp));
6059 PetscCall(PCSetType(pc_temp, PCNONE));
6060 }
6061 PetscCall(KSPSetUp(pcbddc->ksp_D));
6062 /* set ksp_D into pcis data */
6063 PetscCall(PetscObjectReference((PetscObject)pcbddc->ksp_D));
6064 PetscCall(KSPDestroy(&pcis->ksp_D));
6065 pcis->ksp_D = pcbddc->ksp_D;
6066 }
6067
6068 /* NEUMANN PROBLEM */
6069 A_RR = NULL;
6070 if (neumann) {
6071 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
6072 PetscInt ibs, mbs;
6073 PetscBool issbaij, reuse_neumann_solver, isset, issym;
6074 Mat_IS *matis = (Mat_IS *)pc->pmat->data;
6075
6076 reuse_neumann_solver = PETSC_FALSE;
6077 if (sub_schurs && sub_schurs->reuse_solver) {
6078 IS iP;
6079
6080 reuse_neumann_solver = PETSC_TRUE;
6081 PetscCall(PetscObjectQuery((PetscObject)sub_schurs->A, "__KSPFETIDP_iP", (PetscObject *)&iP));
6082 if (iP) reuse_neumann_solver = PETSC_FALSE;
6083 }
6084 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */
6085 PetscCall(ISGetSize(pcbddc->is_R_local, &n_R));
6086 if (pcbddc->ksp_R) { /* already created ksp */
6087 PetscInt nn_R;
6088 PetscCall(KSPGetOperators(pcbddc->ksp_R, NULL, &A_RR));
6089 PetscCall(PetscObjectReference((PetscObject)A_RR));
6090 PetscCall(MatGetSize(A_RR, &nn_R, NULL));
6091 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */
6092 PetscCall(KSPReset(pcbddc->ksp_R));
6093 PetscCall(MatDestroy(&A_RR));
6094 reuse = MAT_INITIAL_MATRIX;
6095 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */
6096 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */
6097 PetscCall(MatDestroy(&A_RR));
6098 reuse = MAT_INITIAL_MATRIX;
6099 } else { /* safe to reuse the matrix */
6100 reuse = MAT_REUSE_MATRIX;
6101 }
6102 }
6103 /* last check */
6104 if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
6105 PetscCall(MatDestroy(&A_RR));
6106 reuse = MAT_INITIAL_MATRIX;
6107 }
6108 } else { /* first time, so we need to create the matrix */
6109 reuse = MAT_INITIAL_MATRIX;
6110 }
6111 /* convert pcbddc->local_mat if needed later in PCBDDCSetUpCorrection
6112 TODO: Get Rid of these conversions */
6113 PetscCall(MatGetBlockSize(pcbddc->local_mat, &mbs));
6114 PetscCall(ISGetBlockSize(pcbddc->is_R_local, &ibs));
6115 PetscCall(PetscObjectTypeCompare((PetscObject)pcbddc->local_mat, MATSEQSBAIJ, &issbaij));
6116 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */
6117 if (matis->A == pcbddc->local_mat) {
6118 PetscCall(MatDestroy(&pcbddc->local_mat));
6119 PetscCall(MatConvert(matis->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &pcbddc->local_mat));
6120 } else {
6121 PetscCall(MatConvert(pcbddc->local_mat, MATSEQAIJ, MAT_INPLACE_MATRIX, &pcbddc->local_mat));
6122 }
6123 } else if (issbaij) { /* need to convert to BAIJ to get off-diagonal blocks */
6124 if (matis->A == pcbddc->local_mat) {
6125 PetscCall(MatDestroy(&pcbddc->local_mat));
6126 PetscCall(MatConvert(matis->A, mbs > 1 ? MATSEQBAIJ : MATSEQAIJ, MAT_INITIAL_MATRIX, &pcbddc->local_mat));
6127 } else {
6128 PetscCall(MatConvert(pcbddc->local_mat, mbs > 1 ? MATSEQBAIJ : MATSEQAIJ, MAT_INPLACE_MATRIX, &pcbddc->local_mat));
6129 }
6130 }
6131 /* extract A_RR */
6132 if (reuse_neumann_solver) {
6133 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
6134
6135 if (pcbddc->dbg_flag) { /* we need A_RR to test the solver later */
6136 PetscCall(MatDestroy(&A_RR));
6137 if (reuse_solver->benign_n) { /* we are not using the explicit change of basis on the pressures */
6138 PetscCall(PCBDDCBenignProject(pc, pcbddc->is_R_local, pcbddc->is_R_local, &A_RR));
6139 } else {
6140 PetscCall(MatCreateSubMatrix(pcbddc->local_mat, pcbddc->is_R_local, pcbddc->is_R_local, MAT_INITIAL_MATRIX, &A_RR));
6141 }
6142 } else {
6143 PetscCall(MatDestroy(&A_RR));
6144 PetscCall(PCGetOperators(reuse_solver->correction_solver, &A_RR, NULL));
6145 PetscCall(PetscObjectReference((PetscObject)A_RR));
6146 }
6147 } else { /* we have to build the neumann solver, so we need to extract the relevant matrix */
6148 PetscCall(MatCreateSubMatrix(pcbddc->local_mat, pcbddc->is_R_local, pcbddc->is_R_local, reuse, &A_RR));
6149 }
6150 PetscCall(MatIsSymmetricKnown(pcbddc->local_mat, &isset, &issym));
6151 if (isset) PetscCall(MatSetOption(A_RR, MAT_SYMMETRIC, issym));
6152 opts = PETSC_FALSE;
6153 if (!pcbddc->ksp_R) { /* create object if not present */
6154 opts = PETSC_TRUE;
6155 PetscCall(KSPCreate(PETSC_COMM_SELF, &pcbddc->ksp_R));
6156 PetscCall(KSPSetNestLevel(pcbddc->ksp_R, pc->kspnestlevel));
6157 PetscCall(PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R, (PetscObject)pc, 1));
6158 /* default */
6159 PetscCall(KSPSetType(pcbddc->ksp_R, KSPPREONLY));
6160 PetscCall(KSPSetOptionsPrefix(pcbddc->ksp_R, neu_prefix));
6161 PetscCall(KSPGetPC(pcbddc->ksp_R, &pc_temp));
6162 PetscCall(PetscObjectTypeCompare((PetscObject)A_RR, MATSEQSBAIJ, &issbaij));
6163 if (issbaij) {
6164 PetscCall(PCSetType(pc_temp, PCCHOLESKY));
6165 } else {
6166 PetscCall(PCSetType(pc_temp, PCLU));
6167 }
6168 PetscCall(KSPSetErrorIfNotConverged(pcbddc->ksp_R, pc->erroriffailure));
6169 }
6170 PetscCall(MatSetOptionsPrefix(A_RR, ((PetscObject)pcbddc->ksp_R)->prefix));
6171 PetscCall(MatViewFromOptions(A_RR, NULL, "-mat_view"));
6172 PetscCall(KSPSetOperators(pcbddc->ksp_R, A_RR, A_RR));
6173 if (opts) { /* Allow user's customization once */
6174 PetscCall(KSPSetFromOptions(pcbddc->ksp_R));
6175 }
6176 PetscCall(MatGetNearNullSpace(A_RR, &nnsp));
6177 if (pcbddc->NullSpace_corr[2] && !nnsp) { /* approximate solver, propagate NearNullSpace */
6178 PetscCall(MatNullSpacePropagateAny_Private(pcbddc->local_mat, pcbddc->is_R_local, A_RR));
6179 }
6180 PetscCall(MatGetNearNullSpace(A_RR, &nnsp));
6181 PetscCall(KSPGetPC(pcbddc->ksp_R, &pc_temp));
6182 PetscCall(PetscObjectHasFunction((PetscObject)pc_temp, "PCSetCoordinates_C", &f));
6183 if (f && pcbddc->mat_graph->cloc && !nnsp) {
6184 PetscReal *coords = pcbddc->mat_graph->coords, *scoords;
6185 const PetscInt *idxs;
6186 PetscInt cdim = pcbddc->mat_graph->cdim, nl, i, d;
6187
6188 PetscCall(ISGetLocalSize(pcbddc->is_R_local, &nl));
6189 PetscCall(ISGetIndices(pcbddc->is_R_local, &idxs));
6190 PetscCall(PetscMalloc1(nl * cdim, &scoords));
6191 for (i = 0; i < nl; i++) {
6192 for (d = 0; d < cdim; d++) scoords[i * cdim + d] = coords[idxs[i] * cdim + d];
6193 }
6194 PetscCall(ISRestoreIndices(pcbddc->is_R_local, &idxs));
6195 PetscCall(PCSetCoordinates(pc_temp, cdim, nl, scoords));
6196 PetscCall(PetscFree(scoords));
6197 }
6198
6199 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
6200 if (!n_R) {
6201 PetscCall(KSPGetPC(pcbddc->ksp_R, &pc_temp));
6202 PetscCall(PCSetType(pc_temp, PCNONE));
6203 }
6204 /* Reuse solver if it is present */
6205 if (reuse_neumann_solver) {
6206 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
6207
6208 PetscCall(KSPSetPC(pcbddc->ksp_R, reuse_solver->correction_solver));
6209 }
6210 PetscCall(KSPSetUp(pcbddc->ksp_R));
6211 }
6212
6213 if (pcbddc->dbg_flag) {
6214 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
6215 PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer));
6216 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n"));
6217 }
6218 PetscCall(PetscLogEventEnd(PC_BDDC_LocalSolvers[pcbddc->current_level], pc, 0, 0, 0));
6219
6220 /* adapt Dirichlet and Neumann solvers if a nullspace correction has been requested */
6221 if (pcbddc->NullSpace_corr[0]) PetscCall(PCBDDCSetUseExactDirichlet(pc, PETSC_FALSE));
6222 if (dirichlet && pcbddc->NullSpace_corr[0] && !pcbddc->switch_static) PetscCall(PCBDDCNullSpaceAssembleCorrection(pc, PETSC_TRUE, pcbddc->NullSpace_corr[1]));
6223 if (neumann && pcbddc->NullSpace_corr[2]) PetscCall(PCBDDCNullSpaceAssembleCorrection(pc, PETSC_FALSE, pcbddc->NullSpace_corr[3]));
6224 /* check Dirichlet and Neumann solvers */
6225 if (pcbddc->dbg_flag) {
6226 if (dirichlet) { /* Dirichlet */
6227 PetscCall(VecSetRandom(pcis->vec1_D, NULL));
6228 PetscCall(MatMult(pcis->A_II, pcis->vec1_D, pcis->vec2_D));
6229 PetscCall(KSPSolve(pcbddc->ksp_D, pcis->vec2_D, pcis->vec2_D));
6230 PetscCall(KSPCheckSolve(pcbddc->ksp_D, pc, pcis->vec2_D));
6231 PetscCall(VecAXPY(pcis->vec1_D, m_one, pcis->vec2_D));
6232 PetscCall(VecNorm(pcis->vec1_D, NORM_INFINITY, &value));
6233 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n", PetscGlobalRank, ((PetscObject)pcbddc->ksp_D)->prefix, (double)value));
6234 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
6235 }
6236 if (neumann) { /* Neumann */
6237 PetscCall(VecSetRandom(pcbddc->vec1_R, NULL));
6238 PetscCall(MatMult(A_RR, pcbddc->vec1_R, pcbddc->vec2_R));
6239 PetscCall(KSPSolve(pcbddc->ksp_R, pcbddc->vec2_R, pcbddc->vec2_R));
6240 PetscCall(KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec2_R));
6241 PetscCall(VecAXPY(pcbddc->vec1_R, m_one, pcbddc->vec2_R));
6242 PetscCall(VecNorm(pcbddc->vec1_R, NORM_INFINITY, &value));
6243 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n", PetscGlobalRank, ((PetscObject)pcbddc->ksp_R)->prefix, (double)value));
6244 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
6245 }
6246 }
6247 /* free Neumann problem's matrix */
6248 PetscCall(MatDestroy(&A_RR));
6249 PetscFunctionReturn(PETSC_SUCCESS);
6250 }
6251
PCBDDCSolveSubstructureCorrection(PC pc,Vec inout_B,Vec inout_D,PetscBool applytranspose)6252 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose)
6253 {
6254 PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
6255 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
6256 PetscBool reuse_solver = sub_schurs ? (sub_schurs->reuse_solver ? PETSC_TRUE : PETSC_FALSE) : PETSC_FALSE;
6257
6258 PetscFunctionBegin;
6259 if (!reuse_solver) PetscCall(VecSet(pcbddc->vec1_R, 0.));
6260 if (!pcbddc->switch_static) {
6261 if (applytranspose && pcbddc->local_auxmat1) {
6262 PetscCall(MatMultTranspose(pcbddc->local_auxmat2, inout_B, pcbddc->vec1_C));
6263 PetscCall(MatMultTransposeAdd(pcbddc->local_auxmat1, pcbddc->vec1_C, inout_B, inout_B));
6264 }
6265 if (!reuse_solver) {
6266 PetscCall(VecScatterBegin(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE));
6267 PetscCall(VecScatterEnd(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE));
6268 } else {
6269 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
6270
6271 PetscCall(VecScatterBegin(reuse_solver->correction_scatter_B, inout_B, reuse_solver->rhs_B, INSERT_VALUES, SCATTER_FORWARD));
6272 PetscCall(VecScatterEnd(reuse_solver->correction_scatter_B, inout_B, reuse_solver->rhs_B, INSERT_VALUES, SCATTER_FORWARD));
6273 }
6274 } else {
6275 PetscCall(VecScatterBegin(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE));
6276 PetscCall(VecScatterEnd(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE));
6277 PetscCall(VecScatterBegin(pcbddc->R_to_D, inout_D, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE));
6278 PetscCall(VecScatterEnd(pcbddc->R_to_D, inout_D, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE));
6279 if (applytranspose && pcbddc->local_auxmat1) {
6280 PetscCall(MatMultTranspose(pcbddc->local_auxmat2, pcbddc->vec1_R, pcbddc->vec1_C));
6281 PetscCall(MatMultTransposeAdd(pcbddc->local_auxmat1, pcbddc->vec1_C, inout_B, inout_B));
6282 PetscCall(VecScatterBegin(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE));
6283 PetscCall(VecScatterEnd(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE));
6284 }
6285 }
6286 PetscCall(PetscLogEventBegin(PC_BDDC_Solves[pcbddc->current_level][1], pc, 0, 0, 0));
6287 if (!reuse_solver || pcbddc->switch_static) {
6288 if (applytranspose) {
6289 PetscCall(KSPSolveTranspose(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec1_R));
6290 } else {
6291 PetscCall(KSPSolve(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec1_R));
6292 }
6293 PetscCall(KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec1_R));
6294 } else {
6295 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
6296
6297 if (applytranspose) {
6298 PetscCall(MatFactorSolveSchurComplementTranspose(reuse_solver->F, reuse_solver->rhs_B, reuse_solver->sol_B));
6299 } else {
6300 PetscCall(MatFactorSolveSchurComplement(reuse_solver->F, reuse_solver->rhs_B, reuse_solver->sol_B));
6301 }
6302 }
6303 PetscCall(PetscLogEventEnd(PC_BDDC_Solves[pcbddc->current_level][1], pc, 0, 0, 0));
6304 PetscCall(VecSet(inout_B, 0.));
6305 if (!pcbddc->switch_static) {
6306 if (!reuse_solver) {
6307 PetscCall(VecScatterBegin(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD));
6308 PetscCall(VecScatterEnd(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD));
6309 } else {
6310 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
6311
6312 PetscCall(VecScatterBegin(reuse_solver->correction_scatter_B, reuse_solver->sol_B, inout_B, INSERT_VALUES, SCATTER_REVERSE));
6313 PetscCall(VecScatterEnd(reuse_solver->correction_scatter_B, reuse_solver->sol_B, inout_B, INSERT_VALUES, SCATTER_REVERSE));
6314 }
6315 if (!applytranspose && pcbddc->local_auxmat1) {
6316 PetscCall(MatMult(pcbddc->local_auxmat1, inout_B, pcbddc->vec1_C));
6317 PetscCall(MatMultAdd(pcbddc->local_auxmat2, pcbddc->vec1_C, inout_B, inout_B));
6318 }
6319 } else {
6320 PetscCall(VecScatterBegin(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD));
6321 PetscCall(VecScatterEnd(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD));
6322 PetscCall(VecScatterBegin(pcbddc->R_to_D, pcbddc->vec1_R, inout_D, INSERT_VALUES, SCATTER_FORWARD));
6323 PetscCall(VecScatterEnd(pcbddc->R_to_D, pcbddc->vec1_R, inout_D, INSERT_VALUES, SCATTER_FORWARD));
6324 if (!applytranspose && pcbddc->local_auxmat1) {
6325 PetscCall(MatMult(pcbddc->local_auxmat1, inout_B, pcbddc->vec1_C));
6326 PetscCall(MatMultAdd(pcbddc->local_auxmat2, pcbddc->vec1_C, pcbddc->vec1_R, pcbddc->vec1_R));
6327 }
6328 PetscCall(VecScatterBegin(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD));
6329 PetscCall(VecScatterEnd(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD));
6330 PetscCall(VecScatterBegin(pcbddc->R_to_D, pcbddc->vec1_R, inout_D, INSERT_VALUES, SCATTER_FORWARD));
6331 PetscCall(VecScatterEnd(pcbddc->R_to_D, pcbddc->vec1_R, inout_D, INSERT_VALUES, SCATTER_FORWARD));
6332 }
6333 PetscFunctionReturn(PETSC_SUCCESS);
6334 }
6335
6336 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */
PCBDDCApplyInterfacePreconditioner(PC pc,PetscBool applytranspose)6337 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose)
6338 {
6339 PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
6340 PC_IS *pcis = (PC_IS *)pc->data;
6341 const PetscScalar zero = 0.0;
6342
6343 PetscFunctionBegin;
6344 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */
6345 if (!pcbddc->benign_apply_coarse_only) {
6346 if (applytranspose) {
6347 PetscCall(MatMultTranspose(pcbddc->coarse_phi_B, pcis->vec1_B, pcbddc->vec1_P));
6348 if (pcbddc->switch_static) PetscCall(MatMultTransposeAdd(pcbddc->coarse_phi_D, pcis->vec1_D, pcbddc->vec1_P, pcbddc->vec1_P));
6349 } else {
6350 PetscCall(MatMultTranspose(pcbddc->coarse_psi_B, pcis->vec1_B, pcbddc->vec1_P));
6351 if (pcbddc->switch_static) PetscCall(MatMultTransposeAdd(pcbddc->coarse_psi_D, pcis->vec1_D, pcbddc->vec1_P, pcbddc->vec1_P));
6352 }
6353 } else {
6354 PetscCall(VecSet(pcbddc->vec1_P, zero));
6355 }
6356
6357 /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */
6358 if (pcbddc->benign_n) {
6359 PetscScalar *array;
6360 PetscInt j;
6361
6362 PetscCall(VecGetArray(pcbddc->vec1_P, &array));
6363 for (j = 0; j < pcbddc->benign_n; j++) array[pcbddc->local_primal_size - pcbddc->benign_n + j] += pcbddc->benign_p0[j];
6364 PetscCall(VecRestoreArray(pcbddc->vec1_P, &array));
6365 }
6366
6367 /* start communications from local primal nodes to rhs of coarse solver */
6368 PetscCall(VecSet(pcbddc->coarse_vec, zero));
6369 PetscCall(PCBDDCScatterCoarseDataBegin(pc, ADD_VALUES, SCATTER_FORWARD));
6370 PetscCall(PCBDDCScatterCoarseDataEnd(pc, ADD_VALUES, SCATTER_FORWARD));
6371
6372 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */
6373 PetscCall(PetscLogEventBegin(PC_BDDC_Solves[pcbddc->current_level][2], pc, 0, 0, 0));
6374 if (pcbddc->coarse_ksp) {
6375 Mat coarse_mat;
6376 Vec rhs, sol;
6377 MatNullSpace nullsp;
6378 PetscBool isbddc = PETSC_FALSE;
6379
6380 if (pcbddc->benign_have_null) {
6381 PC coarse_pc;
6382
6383 PetscCall(KSPGetPC(pcbddc->coarse_ksp, &coarse_pc));
6384 PetscCall(PetscObjectTypeCompare((PetscObject)coarse_pc, PCBDDC, &isbddc));
6385 /* we need to propagate to coarser levels the need for a possible benign correction */
6386 if (isbddc && pcbddc->benign_apply_coarse_only && !pcbddc->benign_skip_correction) {
6387 PC_BDDC *coarsepcbddc = (PC_BDDC *)coarse_pc->data;
6388 coarsepcbddc->benign_skip_correction = PETSC_FALSE;
6389 coarsepcbddc->benign_apply_coarse_only = PETSC_TRUE;
6390 }
6391 }
6392 PetscCall(KSPGetRhs(pcbddc->coarse_ksp, &rhs));
6393 PetscCall(KSPGetSolution(pcbddc->coarse_ksp, &sol));
6394 PetscCall(KSPGetOperators(pcbddc->coarse_ksp, &coarse_mat, NULL));
6395 if (applytranspose) {
6396 PetscCheck(!pcbddc->benign_apply_coarse_only, PetscObjectComm((PetscObject)pcbddc->coarse_ksp), PETSC_ERR_SUP, "Not yet implemented");
6397 PetscCall(KSPSolveTranspose(pcbddc->coarse_ksp, rhs, sol));
6398 PetscCall(KSPCheckSolve(pcbddc->coarse_ksp, pc, sol));
6399 PetscCall(MatGetTransposeNullSpace(coarse_mat, &nullsp));
6400 if (nullsp) PetscCall(MatNullSpaceRemove(nullsp, sol));
6401 } else {
6402 PetscCall(MatGetNullSpace(coarse_mat, &nullsp));
6403 if (pcbddc->benign_apply_coarse_only && isbddc) { /* need just to apply the coarse preconditioner during presolve */
6404 PC coarse_pc;
6405
6406 if (nullsp) PetscCall(MatNullSpaceRemove(nullsp, rhs));
6407 PetscCall(KSPGetPC(pcbddc->coarse_ksp, &coarse_pc));
6408 PetscCall(PCPreSolve(coarse_pc, pcbddc->coarse_ksp));
6409 PetscCall(PCBDDCBenignRemoveInterior(coarse_pc, rhs, sol));
6410 PetscCall(PCPostSolve(coarse_pc, pcbddc->coarse_ksp));
6411 } else {
6412 PetscCall(KSPSolve(pcbddc->coarse_ksp, rhs, sol));
6413 PetscCall(KSPCheckSolve(pcbddc->coarse_ksp, pc, sol));
6414 if (nullsp) PetscCall(MatNullSpaceRemove(nullsp, sol));
6415 }
6416 }
6417 /* we don't need the benign correction at coarser levels anymore */
6418 if (pcbddc->benign_have_null && isbddc) {
6419 PC coarse_pc;
6420 PC_BDDC *coarsepcbddc;
6421
6422 PetscCall(KSPGetPC(pcbddc->coarse_ksp, &coarse_pc));
6423 coarsepcbddc = (PC_BDDC *)coarse_pc->data;
6424 coarsepcbddc->benign_skip_correction = PETSC_TRUE;
6425 coarsepcbddc->benign_apply_coarse_only = PETSC_FALSE;
6426 }
6427 }
6428 PetscCall(PetscLogEventEnd(PC_BDDC_Solves[pcbddc->current_level][2], pc, 0, 0, 0));
6429
6430 /* Local solution on R nodes */
6431 if (!pcbddc->benign_apply_coarse_only) PetscCall(PCBDDCSolveSubstructureCorrection(pc, pcis->vec1_B, pcis->vec1_D, applytranspose));
6432 /* communications from coarse sol to local primal nodes */
6433 PetscCall(PCBDDCScatterCoarseDataBegin(pc, INSERT_VALUES, SCATTER_REVERSE));
6434 PetscCall(PCBDDCScatterCoarseDataEnd(pc, INSERT_VALUES, SCATTER_REVERSE));
6435
6436 /* Sum contributions from the two levels */
6437 if (!pcbddc->benign_apply_coarse_only) {
6438 if (applytranspose) {
6439 PetscCall(MatMultAdd(pcbddc->coarse_psi_B, pcbddc->vec1_P, pcis->vec1_B, pcis->vec1_B));
6440 if (pcbddc->switch_static) PetscCall(MatMultAdd(pcbddc->coarse_psi_D, pcbddc->vec1_P, pcis->vec1_D, pcis->vec1_D));
6441 } else {
6442 PetscCall(MatMultAdd(pcbddc->coarse_phi_B, pcbddc->vec1_P, pcis->vec1_B, pcis->vec1_B));
6443 if (pcbddc->switch_static) PetscCall(MatMultAdd(pcbddc->coarse_phi_D, pcbddc->vec1_P, pcis->vec1_D, pcis->vec1_D));
6444 }
6445 /* store p0 */
6446 if (pcbddc->benign_n) {
6447 PetscScalar *array;
6448 PetscInt j;
6449
6450 PetscCall(VecGetArray(pcbddc->vec1_P, &array));
6451 for (j = 0; j < pcbddc->benign_n; j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size - pcbddc->benign_n + j];
6452 PetscCall(VecRestoreArray(pcbddc->vec1_P, &array));
6453 }
6454 } else { /* expand the coarse solution */
6455 if (applytranspose) {
6456 PetscCall(MatMult(pcbddc->coarse_psi_B, pcbddc->vec1_P, pcis->vec1_B));
6457 } else {
6458 PetscCall(MatMult(pcbddc->coarse_phi_B, pcbddc->vec1_P, pcis->vec1_B));
6459 }
6460 }
6461 PetscFunctionReturn(PETSC_SUCCESS);
6462 }
6463
PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode,ScatterMode smode)6464 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc, InsertMode imode, ScatterMode smode)
6465 {
6466 PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
6467 Vec from, to;
6468 const PetscScalar *array;
6469
6470 PetscFunctionBegin;
6471 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
6472 from = pcbddc->coarse_vec;
6473 to = pcbddc->vec1_P;
6474 if (pcbddc->coarse_ksp) { /* get array from coarse processes */
6475 Vec tvec;
6476
6477 PetscCall(KSPGetRhs(pcbddc->coarse_ksp, &tvec));
6478 PetscCall(VecResetArray(tvec));
6479 PetscCall(KSPGetSolution(pcbddc->coarse_ksp, &tvec));
6480 PetscCall(VecGetArrayRead(tvec, &array));
6481 PetscCall(VecPlaceArray(from, array));
6482 PetscCall(VecRestoreArrayRead(tvec, &array));
6483 }
6484 } else { /* from local to global -> put data in coarse right-hand side */
6485 from = pcbddc->vec1_P;
6486 to = pcbddc->coarse_vec;
6487 }
6488 PetscCall(VecScatterBegin(pcbddc->coarse_loc_to_glob, from, to, imode, smode));
6489 PetscFunctionReturn(PETSC_SUCCESS);
6490 }
6491
PCBDDCScatterCoarseDataEnd(PC pc,InsertMode imode,ScatterMode smode)6492 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode)
6493 {
6494 PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
6495 Vec from, to;
6496 const PetscScalar *array;
6497
6498 PetscFunctionBegin;
6499 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
6500 from = pcbddc->coarse_vec;
6501 to = pcbddc->vec1_P;
6502 } else { /* from local to global -> put data in coarse right-hand side */
6503 from = pcbddc->vec1_P;
6504 to = pcbddc->coarse_vec;
6505 }
6506 PetscCall(VecScatterEnd(pcbddc->coarse_loc_to_glob, from, to, imode, smode));
6507 if (smode == SCATTER_FORWARD) {
6508 if (pcbddc->coarse_ksp) { /* get array from coarse processes */
6509 Vec tvec;
6510
6511 PetscCall(KSPGetRhs(pcbddc->coarse_ksp, &tvec));
6512 PetscCall(VecGetArrayRead(to, &array));
6513 PetscCall(VecPlaceArray(tvec, array));
6514 PetscCall(VecRestoreArrayRead(to, &array));
6515 }
6516 } else {
6517 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */
6518 PetscCall(VecResetArray(from));
6519 }
6520 }
6521 PetscFunctionReturn(PETSC_SUCCESS);
6522 }
6523
PCBDDCConstraintsSetUp(PC pc)6524 PetscErrorCode PCBDDCConstraintsSetUp(PC pc)
6525 {
6526 PC_IS *pcis = (PC_IS *)pc->data;
6527 PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
6528 Mat_IS *matis = (Mat_IS *)pc->pmat->data;
6529 /* one and zero */
6530 PetscScalar one = 1.0, zero = 0.0;
6531 /* space to store constraints and their local indices */
6532 PetscScalar *constraints_data;
6533 PetscInt *constraints_idxs, *constraints_idxs_B;
6534 PetscInt *constraints_idxs_ptr, *constraints_data_ptr;
6535 PetscInt *constraints_n;
6536 /* iterators */
6537 PetscInt i, j, k, total_counts, total_counts_cc, cum;
6538 /* BLAS integers */
6539 PetscBLASInt lwork, lierr;
6540 PetscBLASInt Blas_N, Blas_M, Blas_K, Blas_one = 1;
6541 PetscBLASInt Blas_LDA, Blas_LDB, Blas_LDC;
6542 /* reuse */
6543 PetscInt olocal_primal_size, olocal_primal_size_cc;
6544 PetscInt *olocal_primal_ref_node, *olocal_primal_ref_mult;
6545 /* change of basis */
6546 PetscBool qr_needed;
6547 PetscBT change_basis, qr_needed_idx;
6548 /* auxiliary stuff */
6549 PetscInt *nnz, *is_indices;
6550 PetscInt ncc;
6551 /* some quantities */
6552 PetscInt n_vertices, total_primal_vertices, valid_constraints;
6553 PetscInt size_of_constraint, max_size_of_constraint = 0, max_constraints, temp_constraints;
6554 PetscReal tol; /* tolerance for retaining eigenmodes */
6555
6556 PetscFunctionBegin;
6557 tol = PetscSqrtReal(PETSC_SMALL);
6558 /* Destroy Mat objects computed previously */
6559 PetscCall(MatDestroy(&pcbddc->ChangeOfBasisMatrix));
6560 PetscCall(MatDestroy(&pcbddc->ConstraintMatrix));
6561 PetscCall(MatDestroy(&pcbddc->switch_static_change));
6562 /* save info on constraints from previous setup (if any) */
6563 olocal_primal_size = pcbddc->local_primal_size;
6564 olocal_primal_size_cc = pcbddc->local_primal_size_cc;
6565 PetscCall(PetscMalloc2(olocal_primal_size_cc, &olocal_primal_ref_node, olocal_primal_size_cc, &olocal_primal_ref_mult));
6566 PetscCall(PetscArraycpy(olocal_primal_ref_node, pcbddc->local_primal_ref_node, olocal_primal_size_cc));
6567 PetscCall(PetscArraycpy(olocal_primal_ref_mult, pcbddc->local_primal_ref_mult, olocal_primal_size_cc));
6568 PetscCall(PetscFree2(pcbddc->local_primal_ref_node, pcbddc->local_primal_ref_mult));
6569 PetscCall(PetscFree(pcbddc->primal_indices_local_idxs));
6570
6571 if (!pcbddc->adaptive_selection) {
6572 IS ISForVertices, *ISForFaces, *ISForEdges;
6573 MatNullSpace nearnullsp;
6574 const Vec *nearnullvecs;
6575 Vec *localnearnullsp;
6576 PetscScalar *array;
6577 PetscInt n_ISForFaces, n_ISForEdges, nnsp_size, o_nf, o_ne;
6578 PetscBool nnsp_has_cnst;
6579 /* LAPACK working arrays for SVD or POD */
6580 PetscBool skip_lapack, boolforchange;
6581 PetscScalar *work;
6582 PetscReal *singular_vals;
6583 #if defined(PETSC_USE_COMPLEX)
6584 PetscReal *rwork;
6585 #endif
6586 PetscScalar *temp_basis = NULL, *correlation_mat = NULL;
6587 PetscBLASInt dummy_int = 1;
6588 PetscScalar dummy_scalar = 1.;
6589 PetscBool use_pod = PetscDefined(MISSING_LAPACK_GESVD) || PetscDefined(HAVE_MKL_LIBS) ? PETSC_TRUE : PETSC_FALSE; /* MKL SVD with same input gives different results on different processes! */
6590
6591 /* Get index sets for faces, edges and vertices from graph */
6592 PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, &n_ISForFaces, &ISForFaces, &n_ISForEdges, &ISForEdges, &ISForVertices));
6593 o_nf = n_ISForFaces;
6594 o_ne = n_ISForEdges;
6595 n_vertices = 0;
6596 if (ISForVertices) PetscCall(ISGetSize(ISForVertices, &n_vertices));
6597 /* print some info */
6598 if (pcbddc->dbg_flag && (!pcbddc->sub_schurs || pcbddc->sub_schurs_rebuild)) {
6599 if (!pcbddc->dbg_viewer) pcbddc->dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pc));
6600 PetscCall(PCBDDCGraphASCIIView(pcbddc->mat_graph, pcbddc->dbg_flag, pcbddc->dbg_viewer));
6601 PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer));
6602 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "--------------------------------------------------------------\n"));
6603 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate vertices (%d)\n", PetscGlobalRank, n_vertices, pcbddc->use_vertices));
6604 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate edges (%d)\n", PetscGlobalRank, n_ISForEdges, pcbddc->use_edges));
6605 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate faces (%d)\n", PetscGlobalRank, n_ISForFaces, pcbddc->use_faces));
6606 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
6607 PetscCall(PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer));
6608 }
6609
6610 if (!pcbddc->use_vertices) n_vertices = 0;
6611 if (!pcbddc->use_edges) n_ISForEdges = 0;
6612 if (!pcbddc->use_faces) n_ISForFaces = 0;
6613
6614 /* check if near null space is attached to global mat */
6615 if (pcbddc->use_nnsp) PetscCall(MatGetNearNullSpace(pc->pmat, &nearnullsp));
6616 else nearnullsp = NULL;
6617
6618 if (nearnullsp) {
6619 PetscCall(MatNullSpaceGetVecs(nearnullsp, &nnsp_has_cnst, &nnsp_size, &nearnullvecs));
6620 /* remove any stored info */
6621 PetscCall(MatNullSpaceDestroy(&pcbddc->onearnullspace));
6622 PetscCall(PetscFree(pcbddc->onearnullvecs_state));
6623 /* store information for BDDC solver reuse */
6624 PetscCall(PetscObjectReference((PetscObject)nearnullsp));
6625 pcbddc->onearnullspace = nearnullsp;
6626 PetscCall(PetscMalloc1(nnsp_size, &pcbddc->onearnullvecs_state));
6627 for (i = 0; i < nnsp_size; i++) PetscCall(PetscObjectStateGet((PetscObject)nearnullvecs[i], &pcbddc->onearnullvecs_state[i]));
6628 } else { /* if near null space is not provided BDDC uses constants by default */
6629 nnsp_size = 0;
6630 nnsp_has_cnst = PETSC_TRUE;
6631 }
6632 /* get max number of constraints on a single cc */
6633 max_constraints = nnsp_size;
6634 if (nnsp_has_cnst) max_constraints++;
6635
6636 /*
6637 Evaluate maximum storage size needed by the procedure
6638 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]"
6639 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]"
6640 There can be multiple constraints per connected component
6641 */
6642 ncc = n_vertices + n_ISForFaces + n_ISForEdges;
6643 PetscCall(PetscMalloc3(ncc + 1, &constraints_idxs_ptr, ncc + 1, &constraints_data_ptr, ncc, &constraints_n));
6644
6645 total_counts = n_ISForFaces + n_ISForEdges;
6646 total_counts *= max_constraints;
6647 total_counts += n_vertices;
6648 PetscCall(PetscBTCreate(total_counts, &change_basis));
6649
6650 total_counts = 0;
6651 max_size_of_constraint = 0;
6652 for (i = 0; i < n_ISForEdges + n_ISForFaces; i++) {
6653 IS used_is;
6654 if (i < n_ISForEdges) {
6655 used_is = ISForEdges[i];
6656 } else {
6657 used_is = ISForFaces[i - n_ISForEdges];
6658 }
6659 PetscCall(ISGetSize(used_is, &j));
6660 total_counts += j;
6661 max_size_of_constraint = PetscMax(j, max_size_of_constraint);
6662 }
6663 PetscCall(PetscMalloc3(total_counts * max_constraints + n_vertices, &constraints_data, total_counts + n_vertices, &constraints_idxs, total_counts + n_vertices, &constraints_idxs_B));
6664
6665 /* get local part of global near null space vectors */
6666 PetscCall(PetscMalloc1(nnsp_size, &localnearnullsp));
6667 for (k = 0; k < nnsp_size; k++) {
6668 PetscCall(VecDuplicate(pcis->vec1_N, &localnearnullsp[k]));
6669 PetscCall(VecScatterBegin(matis->rctx, nearnullvecs[k], localnearnullsp[k], INSERT_VALUES, SCATTER_FORWARD));
6670 PetscCall(VecScatterEnd(matis->rctx, nearnullvecs[k], localnearnullsp[k], INSERT_VALUES, SCATTER_FORWARD));
6671 }
6672
6673 /* whether or not to skip lapack calls */
6674 skip_lapack = PETSC_TRUE;
6675 if (n_ISForFaces + n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE;
6676
6677 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */
6678 if (!skip_lapack) {
6679 PetscScalar temp_work;
6680
6681 if (use_pod) {
6682 /* Proper Orthogonal Decomposition (POD) using the snapshot method */
6683 PetscCall(PetscMalloc1(max_constraints * max_constraints, &correlation_mat));
6684 PetscCall(PetscMalloc1(max_constraints, &singular_vals));
6685 PetscCall(PetscMalloc1(max_size_of_constraint * max_constraints, &temp_basis));
6686 #if defined(PETSC_USE_COMPLEX)
6687 PetscCall(PetscMalloc1(3 * max_constraints, &rwork));
6688 #endif
6689 /* now we evaluate the optimal workspace using query with lwork=-1 */
6690 PetscCall(PetscBLASIntCast(max_constraints, &Blas_N));
6691 PetscCall(PetscBLASIntCast(max_constraints, &Blas_LDA));
6692 lwork = -1;
6693 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
6694 #if !defined(PETSC_USE_COMPLEX)
6695 PetscCallBLAS("LAPACKsyev", LAPACKsyev_("V", "U", &Blas_N, correlation_mat, &Blas_LDA, singular_vals, &temp_work, &lwork, &lierr));
6696 #else
6697 PetscCallBLAS("LAPACKsyev", LAPACKsyev_("V", "U", &Blas_N, correlation_mat, &Blas_LDA, singular_vals, &temp_work, &lwork, rwork, &lierr));
6698 #endif
6699 PetscCall(PetscFPTrapPop());
6700 PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in query to SYEV Lapack routine %" PetscBLASInt_FMT, lierr);
6701 } else {
6702 #if !defined(PETSC_MISSING_LAPACK_GESVD)
6703 /* SVD */
6704 PetscInt max_n, min_n;
6705 max_n = max_size_of_constraint;
6706 min_n = max_constraints;
6707 if (max_size_of_constraint < max_constraints) {
6708 min_n = max_size_of_constraint;
6709 max_n = max_constraints;
6710 }
6711 PetscCall(PetscMalloc1(min_n, &singular_vals));
6712 #if defined(PETSC_USE_COMPLEX)
6713 PetscCall(PetscMalloc1(5 * min_n, &rwork));
6714 #endif
6715 /* now we evaluate the optimal workspace using query with lwork=-1 */
6716 lwork = -1;
6717 PetscCall(PetscBLASIntCast(max_n, &Blas_M));
6718 PetscCall(PetscBLASIntCast(min_n, &Blas_N));
6719 PetscCall(PetscBLASIntCast(max_n, &Blas_LDA));
6720 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
6721 #if !defined(PETSC_USE_COMPLEX)
6722 PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("O", "N", &Blas_M, &Blas_N, &constraints_data[0], &Blas_LDA, singular_vals, &dummy_scalar, &dummy_int, &dummy_scalar, &dummy_int, &temp_work, &lwork, &lierr));
6723 #else
6724 PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("O", "N", &Blas_M, &Blas_N, &constraints_data[0], &Blas_LDA, singular_vals, &dummy_scalar, &dummy_int, &dummy_scalar, &dummy_int, &temp_work, &lwork, rwork, &lierr));
6725 #endif
6726 PetscCall(PetscFPTrapPop());
6727 PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in query to GESVD Lapack routine %" PetscBLASInt_FMT, lierr);
6728 #else
6729 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "This should not happen");
6730 #endif /* on missing GESVD */
6731 }
6732 /* Allocate optimal workspace */
6733 PetscCall(PetscBLASIntCast((PetscInt)PetscRealPart(temp_work), &lwork));
6734 PetscCall(PetscMalloc1(lwork, &work));
6735 }
6736 /* Now we can loop on constraining sets */
6737 total_counts = 0;
6738 constraints_idxs_ptr[0] = 0;
6739 constraints_data_ptr[0] = 0;
6740 /* vertices */
6741 if (n_vertices) {
6742 PetscCall(ISGetIndices(ISForVertices, (const PetscInt **)&is_indices));
6743 PetscCall(PetscArraycpy(constraints_idxs, is_indices, n_vertices));
6744 for (i = 0; i < n_vertices; i++) {
6745 constraints_n[total_counts] = 1;
6746 constraints_data[total_counts] = 1.0;
6747 constraints_idxs_ptr[total_counts + 1] = constraints_idxs_ptr[total_counts] + 1;
6748 constraints_data_ptr[total_counts + 1] = constraints_data_ptr[total_counts] + 1;
6749 total_counts++;
6750 }
6751 PetscCall(ISRestoreIndices(ISForVertices, (const PetscInt **)&is_indices));
6752 }
6753
6754 /* edges and faces */
6755 total_counts_cc = total_counts;
6756 for (ncc = 0; ncc < n_ISForEdges + n_ISForFaces; ncc++) {
6757 IS used_is;
6758 PetscBool idxs_copied = PETSC_FALSE;
6759
6760 if (ncc < n_ISForEdges) {
6761 used_is = ISForEdges[ncc];
6762 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */
6763 } else {
6764 used_is = ISForFaces[ncc - n_ISForEdges];
6765 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */
6766 }
6767 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */
6768
6769 PetscCall(ISGetSize(used_is, &size_of_constraint));
6770 if (!size_of_constraint) continue;
6771 PetscCall(ISGetIndices(used_is, (const PetscInt **)&is_indices));
6772 if (nnsp_has_cnst) {
6773 PetscScalar quad_value;
6774
6775 PetscCall(PetscArraycpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc], is_indices, size_of_constraint));
6776 idxs_copied = PETSC_TRUE;
6777
6778 if (!pcbddc->use_nnsp_true) {
6779 quad_value = (PetscScalar)(1.0 / PetscSqrtReal((PetscReal)size_of_constraint));
6780 } else {
6781 quad_value = 1.0;
6782 }
6783 for (j = 0; j < size_of_constraint; j++) constraints_data[constraints_data_ptr[total_counts_cc] + j] = quad_value;
6784 temp_constraints++;
6785 total_counts++;
6786 }
6787 for (k = 0; k < nnsp_size; k++) {
6788 PetscReal real_value;
6789 PetscScalar *ptr_to_data;
6790
6791 PetscCall(VecGetArrayRead(localnearnullsp[k], (const PetscScalar **)&array));
6792 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc] + temp_constraints * size_of_constraint];
6793 for (j = 0; j < size_of_constraint; j++) ptr_to_data[j] = array[is_indices[j]];
6794 PetscCall(VecRestoreArrayRead(localnearnullsp[k], (const PetscScalar **)&array));
6795 /* check if array is null on the connected component */
6796 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_N));
6797 PetscCallBLAS("BLASasum", real_value = BLASasum_(&Blas_N, ptr_to_data, &Blas_one));
6798 if (real_value > tol * size_of_constraint) { /* keep indices and values */
6799 temp_constraints++;
6800 total_counts++;
6801 if (!idxs_copied) {
6802 PetscCall(PetscArraycpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc], is_indices, size_of_constraint));
6803 idxs_copied = PETSC_TRUE;
6804 }
6805 }
6806 }
6807 PetscCall(ISRestoreIndices(used_is, (const PetscInt **)&is_indices));
6808 valid_constraints = temp_constraints;
6809 if (!pcbddc->use_nnsp_true && temp_constraints) {
6810 if (temp_constraints == 1) { /* just normalize the constraint */
6811 PetscScalar norm, *ptr_to_data;
6812
6813 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]];
6814 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_N));
6815 PetscCallBLAS("BLASdot", norm = BLASdot_(&Blas_N, ptr_to_data, &Blas_one, ptr_to_data, &Blas_one));
6816 norm = 1.0 / PetscSqrtReal(PetscRealPart(norm));
6817 PetscCallBLAS("BLASscal", BLASscal_(&Blas_N, &norm, ptr_to_data, &Blas_one));
6818 } else { /* perform SVD */
6819 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]];
6820
6821 if (use_pod) {
6822 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag
6823 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2)
6824 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined
6825 the constraints basis will differ (by a complex factor with absolute value equal to 1)
6826 from that computed using LAPACKgesvd
6827 -> This is due to a different computation of eigenvectors in LAPACKheev
6828 -> The quality of the POD-computed basis will be the same */
6829 PetscCall(PetscArrayzero(correlation_mat, temp_constraints * temp_constraints));
6830 /* Store upper triangular part of correlation matrix */
6831 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_N));
6832 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
6833 for (j = 0; j < temp_constraints; j++) {
6834 for (k = 0; k < j + 1; k++) PetscCallBLAS("BLASdot", correlation_mat[j * temp_constraints + k] = BLASdot_(&Blas_N, ptr_to_data + k * size_of_constraint, &Blas_one, ptr_to_data + j * size_of_constraint, &Blas_one));
6835 }
6836 /* compute eigenvalues and eigenvectors of correlation matrix */
6837 PetscCall(PetscBLASIntCast(temp_constraints, &Blas_N));
6838 PetscCall(PetscBLASIntCast(temp_constraints, &Blas_LDA));
6839 #if !defined(PETSC_USE_COMPLEX)
6840 PetscCallBLAS("LAPACKsyev", LAPACKsyev_("V", "U", &Blas_N, correlation_mat, &Blas_LDA, singular_vals, work, &lwork, &lierr));
6841 #else
6842 PetscCallBLAS("LAPACKsyev", LAPACKsyev_("V", "U", &Blas_N, correlation_mat, &Blas_LDA, singular_vals, work, &lwork, rwork, &lierr));
6843 #endif
6844 PetscCall(PetscFPTrapPop());
6845 PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYEV Lapack routine %" PetscBLASInt_FMT, lierr);
6846 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */
6847 j = 0;
6848 while (j < temp_constraints && singular_vals[j] / singular_vals[temp_constraints - 1] < tol) j++;
6849 total_counts = total_counts - j;
6850 valid_constraints = temp_constraints - j;
6851 /* scale and copy POD basis into used quadrature memory */
6852 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_M));
6853 PetscCall(PetscBLASIntCast(temp_constraints, &Blas_N));
6854 PetscCall(PetscBLASIntCast(temp_constraints, &Blas_K));
6855 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA));
6856 PetscCall(PetscBLASIntCast(temp_constraints, &Blas_LDB));
6857 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDC));
6858 if (j < temp_constraints) {
6859 PetscInt ii;
6860 for (k = j; k < temp_constraints; k++) singular_vals[k] = 1.0 / PetscSqrtReal(singular_vals[k]);
6861 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
6862 PetscCallBLAS("BLASgemm", BLASgemm_("N", "N", &Blas_M, &Blas_N, &Blas_K, &one, ptr_to_data, &Blas_LDA, correlation_mat, &Blas_LDB, &zero, temp_basis, &Blas_LDC));
6863 PetscCall(PetscFPTrapPop());
6864 for (k = 0; k < temp_constraints - j; k++) {
6865 for (ii = 0; ii < size_of_constraint; ii++) ptr_to_data[k * size_of_constraint + ii] = singular_vals[temp_constraints - 1 - k] * temp_basis[(temp_constraints - 1 - k) * size_of_constraint + ii];
6866 }
6867 }
6868 } else {
6869 #if !defined(PETSC_MISSING_LAPACK_GESVD)
6870 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_M));
6871 PetscCall(PetscBLASIntCast(temp_constraints, &Blas_N));
6872 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA));
6873 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
6874 #if !defined(PETSC_USE_COMPLEX)
6875 PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("O", "N", &Blas_M, &Blas_N, ptr_to_data, &Blas_LDA, singular_vals, &dummy_scalar, &dummy_int, &dummy_scalar, &dummy_int, work, &lwork, &lierr));
6876 #else
6877 PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("O", "N", &Blas_M, &Blas_N, ptr_to_data, &Blas_LDA, singular_vals, &dummy_scalar, &dummy_int, &dummy_scalar, &dummy_int, work, &lwork, rwork, &lierr));
6878 #endif
6879 PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in GESVD Lapack routine %" PetscBLASInt_FMT, lierr);
6880 PetscCall(PetscFPTrapPop());
6881 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */
6882 k = temp_constraints;
6883 if (k > size_of_constraint) k = size_of_constraint;
6884 j = 0;
6885 while (j < k && singular_vals[k - j - 1] / singular_vals[0] < tol) j++;
6886 valid_constraints = k - j;
6887 total_counts = total_counts - temp_constraints + valid_constraints;
6888 #else
6889 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "This should not happen");
6890 #endif /* on missing GESVD */
6891 }
6892 }
6893 }
6894 /* update pointers information */
6895 if (valid_constraints) {
6896 constraints_n[total_counts_cc] = valid_constraints;
6897 constraints_idxs_ptr[total_counts_cc + 1] = constraints_idxs_ptr[total_counts_cc] + size_of_constraint;
6898 constraints_data_ptr[total_counts_cc + 1] = constraints_data_ptr[total_counts_cc] + size_of_constraint * valid_constraints;
6899 /* set change_of_basis flag */
6900 if (boolforchange) PetscCall(PetscBTSet(change_basis, total_counts_cc));
6901 total_counts_cc++;
6902 }
6903 }
6904 /* free workspace */
6905 if (!skip_lapack) {
6906 PetscCall(PetscFree(work));
6907 #if defined(PETSC_USE_COMPLEX)
6908 PetscCall(PetscFree(rwork));
6909 #endif
6910 PetscCall(PetscFree(singular_vals));
6911 PetscCall(PetscFree(correlation_mat));
6912 PetscCall(PetscFree(temp_basis));
6913 }
6914 for (k = 0; k < nnsp_size; k++) PetscCall(VecDestroy(&localnearnullsp[k]));
6915 PetscCall(PetscFree(localnearnullsp));
6916 /* free index sets of faces, edges and vertices */
6917 PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, &o_nf, &ISForFaces, &o_ne, &ISForEdges, &ISForVertices));
6918 } else {
6919 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
6920
6921 total_counts = 0;
6922 n_vertices = 0;
6923 if (sub_schurs->is_vertices && pcbddc->use_vertices) PetscCall(ISGetLocalSize(sub_schurs->is_vertices, &n_vertices));
6924 max_constraints = 0;
6925 total_counts_cc = 0;
6926 for (i = 0; i < sub_schurs->n_subs + n_vertices; i++) {
6927 total_counts += pcbddc->adaptive_constraints_n[i];
6928 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++;
6929 max_constraints = PetscMax(max_constraints, pcbddc->adaptive_constraints_n[i]);
6930 }
6931 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr;
6932 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr;
6933 constraints_idxs = pcbddc->adaptive_constraints_idxs;
6934 constraints_data = pcbddc->adaptive_constraints_data;
6935 /* constraints_n differs from pcbddc->adaptive_constraints_n */
6936 PetscCall(PetscMalloc1(total_counts_cc, &constraints_n));
6937 total_counts_cc = 0;
6938 for (i = 0; i < sub_schurs->n_subs + n_vertices; i++) {
6939 if (pcbddc->adaptive_constraints_n[i]) constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i];
6940 }
6941
6942 max_size_of_constraint = 0;
6943 for (i = 0; i < total_counts_cc; i++) max_size_of_constraint = PetscMax(max_size_of_constraint, constraints_idxs_ptr[i + 1] - constraints_idxs_ptr[i]);
6944 PetscCall(PetscMalloc1(constraints_idxs_ptr[total_counts_cc], &constraints_idxs_B));
6945 /* Change of basis */
6946 PetscCall(PetscBTCreate(total_counts_cc, &change_basis));
6947 if (pcbddc->use_change_of_basis) {
6948 for (i = 0; i < sub_schurs->n_subs; i++) {
6949 if (PetscBTLookup(sub_schurs->is_edge, i) || pcbddc->use_change_on_faces) PetscCall(PetscBTSet(change_basis, i + n_vertices));
6950 }
6951 }
6952 }
6953 pcbddc->local_primal_size = total_counts;
6954 PetscCall(PetscMalloc1(pcbddc->local_primal_size + pcbddc->benign_n, &pcbddc->primal_indices_local_idxs));
6955
6956 /* map constraints_idxs in boundary numbering */
6957 if (pcbddc->use_change_of_basis) {
6958 PetscCall(ISGlobalToLocalMappingApply(pcis->BtoNmap, IS_GTOLM_DROP, constraints_idxs_ptr[total_counts_cc], constraints_idxs, &i, constraints_idxs_B));
6959 PetscCheck(i == constraints_idxs_ptr[total_counts_cc], PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error in boundary numbering for constraints indices %" PetscInt_FMT " != %" PetscInt_FMT, constraints_idxs_ptr[total_counts_cc], i);
6960 }
6961
6962 /* Create constraint matrix */
6963 PetscCall(MatCreate(PETSC_COMM_SELF, &pcbddc->ConstraintMatrix));
6964 PetscCall(MatSetType(pcbddc->ConstraintMatrix, MATAIJ));
6965 PetscCall(MatSetSizes(pcbddc->ConstraintMatrix, pcbddc->local_primal_size, pcis->n, pcbddc->local_primal_size, pcis->n));
6966
6967 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */
6968 /* determine if a QR strategy is needed for change of basis */
6969 qr_needed = pcbddc->use_qr_single;
6970 PetscCall(PetscBTCreate(total_counts_cc, &qr_needed_idx));
6971 total_primal_vertices = 0;
6972 pcbddc->local_primal_size_cc = 0;
6973 for (i = 0; i < total_counts_cc; i++) {
6974 size_of_constraint = constraints_idxs_ptr[i + 1] - constraints_idxs_ptr[i];
6975 if (size_of_constraint == 1 && pcbddc->mat_graph->custom_minimal_size) {
6976 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]];
6977 pcbddc->local_primal_size_cc += 1;
6978 } else if (PetscBTLookup(change_basis, i)) {
6979 for (k = 0; k < constraints_n[i]; k++) pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i] + k];
6980 pcbddc->local_primal_size_cc += constraints_n[i];
6981 if (constraints_n[i] > 1 || pcbddc->use_qr_single) {
6982 PetscCall(PetscBTSet(qr_needed_idx, i));
6983 qr_needed = PETSC_TRUE;
6984 }
6985 } else {
6986 pcbddc->local_primal_size_cc += 1;
6987 }
6988 }
6989 /* note that the local variable n_vertices used below stores the number of pointwise constraints */
6990 pcbddc->n_vertices = total_primal_vertices;
6991 /* permute indices in order to have a sorted set of vertices */
6992 PetscCall(PetscSortInt(total_primal_vertices, pcbddc->primal_indices_local_idxs));
6993 PetscCall(PetscMalloc2(pcbddc->local_primal_size_cc + pcbddc->benign_n, &pcbddc->local_primal_ref_node, pcbddc->local_primal_size_cc + pcbddc->benign_n, &pcbddc->local_primal_ref_mult));
6994 PetscCall(PetscArraycpy(pcbddc->local_primal_ref_node, pcbddc->primal_indices_local_idxs, total_primal_vertices));
6995 for (i = 0; i < total_primal_vertices; i++) pcbddc->local_primal_ref_mult[i] = 1;
6996
6997 /* nonzero structure of constraint matrix */
6998 /* and get reference dof for local constraints */
6999 PetscCall(PetscMalloc1(pcbddc->local_primal_size, &nnz));
7000 for (i = 0; i < total_primal_vertices; i++) nnz[i] = 1;
7001
7002 j = total_primal_vertices;
7003 total_counts = total_primal_vertices;
7004 cum = total_primal_vertices;
7005 for (i = n_vertices; i < total_counts_cc; i++) {
7006 if (!PetscBTLookup(change_basis, i)) {
7007 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]];
7008 pcbddc->local_primal_ref_mult[cum] = constraints_n[i];
7009 cum++;
7010 size_of_constraint = constraints_idxs_ptr[i + 1] - constraints_idxs_ptr[i];
7011 for (k = 0; k < constraints_n[i]; k++) {
7012 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i] + k];
7013 nnz[j + k] = size_of_constraint;
7014 }
7015 j += constraints_n[i];
7016 }
7017 }
7018 PetscCall(MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix, 0, nnz));
7019 PetscCall(MatSetOption(pcbddc->ConstraintMatrix, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE));
7020 PetscCall(MatSetOption(pcbddc->ConstraintMatrix, MAT_IGNORE_ZERO_ENTRIES, PETSC_TRUE));
7021 PetscCall(PetscFree(nnz));
7022
7023 /* set values in constraint matrix */
7024 for (i = 0; i < total_primal_vertices; i++) PetscCall(MatSetValue(pcbddc->ConstraintMatrix, i, pcbddc->local_primal_ref_node[i], 1.0, INSERT_VALUES));
7025 total_counts = total_primal_vertices;
7026 for (i = n_vertices; i < total_counts_cc; i++) {
7027 if (!PetscBTLookup(change_basis, i)) {
7028 PetscInt *cols;
7029
7030 size_of_constraint = constraints_idxs_ptr[i + 1] - constraints_idxs_ptr[i];
7031 cols = constraints_idxs + constraints_idxs_ptr[i];
7032 for (k = 0; k < constraints_n[i]; k++) {
7033 PetscInt row = total_counts + k;
7034 PetscScalar *vals;
7035
7036 vals = constraints_data + constraints_data_ptr[i] + k * size_of_constraint;
7037 PetscCall(MatSetValues(pcbddc->ConstraintMatrix, 1, &row, size_of_constraint, cols, vals, INSERT_VALUES));
7038 }
7039 total_counts += constraints_n[i];
7040 }
7041 }
7042 /* assembling */
7043 PetscCall(MatAssemblyBegin(pcbddc->ConstraintMatrix, MAT_FINAL_ASSEMBLY));
7044 PetscCall(MatAssemblyEnd(pcbddc->ConstraintMatrix, MAT_FINAL_ASSEMBLY));
7045 PetscCall(MatViewFromOptions(pcbddc->ConstraintMatrix, (PetscObject)pc, "-pc_bddc_constraint_mat_view"));
7046
7047 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */
7048 if (pcbddc->use_change_of_basis) {
7049 /* dual and primal dofs on a single cc */
7050 PetscInt dual_dofs, primal_dofs;
7051 /* working stuff for GEQRF */
7052 PetscScalar *qr_basis = NULL, *qr_tau = NULL, *qr_work = NULL, lqr_work_t;
7053 PetscBLASInt lqr_work;
7054 /* working stuff for UNGQR */
7055 PetscScalar *gqr_work = NULL, lgqr_work_t = 0.0;
7056 PetscBLASInt lgqr_work;
7057 /* working stuff for TRTRS */
7058 PetscScalar *trs_rhs = NULL;
7059 PetscBLASInt Blas_NRHS;
7060 /* pointers for values insertion into change of basis matrix */
7061 PetscInt *start_rows, *start_cols;
7062 PetscScalar *start_vals;
7063 /* working stuff for values insertion */
7064 PetscBT is_primal;
7065 PetscInt *aux_primal_numbering_B;
7066 /* matrix sizes */
7067 PetscInt global_size, local_size;
7068 /* temporary change of basis */
7069 Mat localChangeOfBasisMatrix;
7070 /* extra space for debugging */
7071 PetscScalar *dbg_work = NULL;
7072
7073 PetscCall(MatCreate(PETSC_COMM_SELF, &localChangeOfBasisMatrix));
7074 PetscCall(MatSetType(localChangeOfBasisMatrix, MATAIJ));
7075 PetscCall(MatSetSizes(localChangeOfBasisMatrix, pcis->n, pcis->n, pcis->n, pcis->n));
7076 /* nonzeros for local mat */
7077 PetscCall(PetscMalloc1(pcis->n, &nnz));
7078 if (!pcbddc->benign_change || pcbddc->fake_change) {
7079 for (i = 0; i < pcis->n; i++) nnz[i] = 1;
7080 } else {
7081 const PetscInt *ii;
7082 PetscInt n;
7083 PetscBool flg_row;
7084 PetscCall(MatGetRowIJ(pcbddc->benign_change, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, NULL, &flg_row));
7085 for (i = 0; i < n; i++) nnz[i] = ii[i + 1] - ii[i];
7086 PetscCall(MatRestoreRowIJ(pcbddc->benign_change, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, NULL, &flg_row));
7087 }
7088 for (i = n_vertices; i < total_counts_cc; i++) {
7089 if (PetscBTLookup(change_basis, i)) {
7090 size_of_constraint = constraints_idxs_ptr[i + 1] - constraints_idxs_ptr[i];
7091 if (PetscBTLookup(qr_needed_idx, i)) {
7092 for (j = 0; j < size_of_constraint; j++) nnz[constraints_idxs[constraints_idxs_ptr[i] + j]] = size_of_constraint;
7093 } else {
7094 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint;
7095 for (j = 1; j < size_of_constraint; j++) nnz[constraints_idxs[constraints_idxs_ptr[i] + j]] = 2;
7096 }
7097 }
7098 }
7099 PetscCall(MatSeqAIJSetPreallocation(localChangeOfBasisMatrix, 0, nnz));
7100 PetscCall(MatSetOption(localChangeOfBasisMatrix, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE));
7101 PetscCall(PetscFree(nnz));
7102 /* Set interior change in the matrix */
7103 if (!pcbddc->benign_change || pcbddc->fake_change) {
7104 for (i = 0; i < pcis->n; i++) PetscCall(MatSetValue(localChangeOfBasisMatrix, i, i, 1.0, INSERT_VALUES));
7105 } else {
7106 const PetscInt *ii, *jj;
7107 PetscScalar *aa;
7108 PetscInt n;
7109 PetscBool flg_row;
7110 PetscCall(MatGetRowIJ(pcbddc->benign_change, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, &jj, &flg_row));
7111 PetscCall(MatSeqAIJGetArray(pcbddc->benign_change, &aa));
7112 for (i = 0; i < n; i++) PetscCall(MatSetValues(localChangeOfBasisMatrix, 1, &i, ii[i + 1] - ii[i], jj + ii[i], aa + ii[i], INSERT_VALUES));
7113 PetscCall(MatSeqAIJRestoreArray(pcbddc->benign_change, &aa));
7114 PetscCall(MatRestoreRowIJ(pcbddc->benign_change, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, &jj, &flg_row));
7115 }
7116
7117 if (pcbddc->dbg_flag) {
7118 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "--------------------------------------------------------------\n"));
7119 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Checking change of basis computation for subdomain %04d\n", PetscGlobalRank));
7120 }
7121
7122 /* Now we loop on the constraints which need a change of basis */
7123 /*
7124 Change of basis matrix is evaluated similarly to the FIRST APPROACH in
7125 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1)
7126
7127 Basic blocks of change of basis matrix T computed:
7128
7129 - By using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified)
7130
7131 | 1 0 ... 0 s_1/S |
7132 | 0 1 ... 0 s_2/S |
7133 | ... |
7134 | 0 ... 1 s_{n-1}/S |
7135 | -s_1/s_n ... -s_{n-1}/s_n s_n/S |
7136
7137 with S = \sum_{i=1}^n s_i^2
7138 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering
7139 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering
7140
7141 - QR decomposition of constraints otherwise
7142 */
7143 if (qr_needed && max_size_of_constraint) {
7144 /* space to store Q */
7145 PetscCall(PetscMalloc1(max_size_of_constraint * max_size_of_constraint, &qr_basis));
7146 /* array to store scaling factors for reflectors */
7147 PetscCall(PetscMalloc1(max_constraints, &qr_tau));
7148 /* first we issue queries for optimal work */
7149 PetscCall(PetscBLASIntCast(max_size_of_constraint, &Blas_M));
7150 PetscCall(PetscBLASIntCast(max_constraints, &Blas_N));
7151 PetscCall(PetscBLASIntCast(max_size_of_constraint, &Blas_LDA));
7152 lqr_work = -1;
7153 PetscCallBLAS("LAPACKgeqrf", LAPACKgeqrf_(&Blas_M, &Blas_N, qr_basis, &Blas_LDA, qr_tau, &lqr_work_t, &lqr_work, &lierr));
7154 PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in query to GEQRF Lapack routine %" PetscBLASInt_FMT, lierr);
7155 PetscCall(PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t), &lqr_work));
7156 PetscCall(PetscMalloc1(lqr_work, &qr_work));
7157 lgqr_work = -1;
7158 PetscCall(PetscBLASIntCast(max_size_of_constraint, &Blas_M));
7159 PetscCall(PetscBLASIntCast(max_size_of_constraint, &Blas_N));
7160 PetscCall(PetscBLASIntCast(max_constraints, &Blas_K));
7161 PetscCall(PetscBLASIntCast(max_size_of_constraint, &Blas_LDA));
7162 if (Blas_K > Blas_M) Blas_K = Blas_M; /* adjust just for computing optimal work */
7163 PetscCallBLAS("LAPACKorgqr", LAPACKorgqr_(&Blas_M, &Blas_N, &Blas_K, qr_basis, &Blas_LDA, qr_tau, &lgqr_work_t, &lgqr_work, &lierr));
7164 PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in query to ORGQR/UNGQR Lapack routine %" PetscBLASInt_FMT, lierr);
7165 PetscCall(PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t), &lgqr_work));
7166 PetscCall(PetscMalloc1(lgqr_work, &gqr_work));
7167 /* array to store rhs and solution of triangular solver */
7168 PetscCall(PetscMalloc1(max_constraints * max_constraints, &trs_rhs));
7169 /* allocating workspace for check */
7170 if (pcbddc->dbg_flag) PetscCall(PetscMalloc1(max_size_of_constraint * (max_constraints + max_size_of_constraint), &dbg_work));
7171 }
7172 /* array to store whether a node is primal or not */
7173 PetscCall(PetscBTCreate(pcis->n_B, &is_primal));
7174 PetscCall(PetscMalloc1(total_primal_vertices, &aux_primal_numbering_B));
7175 PetscCall(ISGlobalToLocalMappingApply(pcis->BtoNmap, IS_GTOLM_DROP, total_primal_vertices, pcbddc->local_primal_ref_node, &i, aux_primal_numbering_B));
7176 PetscCheck(i == total_primal_vertices, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error in boundary numbering for BDDC vertices! %" PetscInt_FMT " != %" PetscInt_FMT, total_primal_vertices, i);
7177 for (i = 0; i < total_primal_vertices; i++) PetscCall(PetscBTSet(is_primal, aux_primal_numbering_B[i]));
7178 PetscCall(PetscFree(aux_primal_numbering_B));
7179
7180 /* loop on constraints and see whether or not they need a change of basis and compute it */
7181 for (total_counts = n_vertices; total_counts < total_counts_cc; total_counts++) {
7182 size_of_constraint = constraints_idxs_ptr[total_counts + 1] - constraints_idxs_ptr[total_counts];
7183 if (PetscBTLookup(change_basis, total_counts)) {
7184 /* get constraint info */
7185 primal_dofs = constraints_n[total_counts];
7186 dual_dofs = size_of_constraint - primal_dofs;
7187
7188 if (pcbddc->dbg_flag) PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Constraints %" PetscInt_FMT ": %" PetscInt_FMT " need a change of basis (size %" PetscInt_FMT ")\n", total_counts, primal_dofs, size_of_constraint));
7189
7190 if (PetscBTLookup(qr_needed_idx, total_counts)) { /* QR */
7191
7192 /* copy quadrature constraints for change of basis check */
7193 if (pcbddc->dbg_flag) PetscCall(PetscArraycpy(dbg_work, &constraints_data[constraints_data_ptr[total_counts]], size_of_constraint * primal_dofs));
7194 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */
7195 PetscCall(PetscArraycpy(qr_basis, &constraints_data[constraints_data_ptr[total_counts]], size_of_constraint * primal_dofs));
7196
7197 /* compute QR decomposition of constraints */
7198 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_M));
7199 PetscCall(PetscBLASIntCast(primal_dofs, &Blas_N));
7200 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA));
7201 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
7202 PetscCallBLAS("LAPACKgeqrf", LAPACKgeqrf_(&Blas_M, &Blas_N, qr_basis, &Blas_LDA, qr_tau, qr_work, &lqr_work, &lierr));
7203 PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in GEQRF Lapack routine %" PetscBLASInt_FMT, lierr);
7204 PetscCall(PetscFPTrapPop());
7205
7206 /* explicitly compute R^-T */
7207 PetscCall(PetscArrayzero(trs_rhs, primal_dofs * primal_dofs));
7208 for (j = 0; j < primal_dofs; j++) trs_rhs[j * (primal_dofs + 1)] = 1.0;
7209 PetscCall(PetscBLASIntCast(primal_dofs, &Blas_N));
7210 PetscCall(PetscBLASIntCast(primal_dofs, &Blas_NRHS));
7211 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA));
7212 PetscCall(PetscBLASIntCast(primal_dofs, &Blas_LDB));
7213 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
7214 PetscCallBLAS("LAPACKtrtrs", LAPACKtrtrs_("U", "T", "N", &Blas_N, &Blas_NRHS, qr_basis, &Blas_LDA, trs_rhs, &Blas_LDB, &lierr));
7215 PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in TRTRS Lapack routine %" PetscBLASInt_FMT, lierr);
7216 PetscCall(PetscFPTrapPop());
7217
7218 /* explicitly compute all columns of Q (Q = [Q1 | Q2]) overwriting QR factorization in qr_basis */
7219 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_M));
7220 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_N));
7221 PetscCall(PetscBLASIntCast(primal_dofs, &Blas_K));
7222 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA));
7223 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
7224 PetscCallBLAS("LAPACKorgqr", LAPACKorgqr_(&Blas_M, &Blas_N, &Blas_K, qr_basis, &Blas_LDA, qr_tau, gqr_work, &lgqr_work, &lierr));
7225 PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in ORGQR/UNGQR Lapack routine %" PetscBLASInt_FMT, lierr);
7226 PetscCall(PetscFPTrapPop());
7227
7228 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints
7229 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below)
7230 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */
7231 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_M));
7232 PetscCall(PetscBLASIntCast(primal_dofs, &Blas_N));
7233 PetscCall(PetscBLASIntCast(primal_dofs, &Blas_K));
7234 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA));
7235 PetscCall(PetscBLASIntCast(primal_dofs, &Blas_LDB));
7236 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDC));
7237 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
7238 PetscCallBLAS("BLASgemm", BLASgemm_("N", "N", &Blas_M, &Blas_N, &Blas_K, &one, qr_basis, &Blas_LDA, trs_rhs, &Blas_LDB, &zero, constraints_data + constraints_data_ptr[total_counts], &Blas_LDC));
7239 PetscCall(PetscFPTrapPop());
7240 PetscCall(PetscArraycpy(qr_basis, &constraints_data[constraints_data_ptr[total_counts]], size_of_constraint * primal_dofs));
7241
7242 /* insert values in change of basis matrix respecting global ordering of new primal dofs */
7243 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]];
7244 /* insert cols for primal dofs */
7245 for (j = 0; j < primal_dofs; j++) {
7246 start_vals = &qr_basis[j * size_of_constraint];
7247 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts] + j];
7248 PetscCall(MatSetValues(localChangeOfBasisMatrix, size_of_constraint, start_rows, 1, start_cols, start_vals, INSERT_VALUES));
7249 }
7250 /* insert cols for dual dofs */
7251 for (j = 0, k = 0; j < dual_dofs; k++) {
7252 if (!PetscBTLookup(is_primal, constraints_idxs_B[constraints_idxs_ptr[total_counts] + k])) {
7253 start_vals = &qr_basis[(primal_dofs + j) * size_of_constraint];
7254 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts] + k];
7255 PetscCall(MatSetValues(localChangeOfBasisMatrix, size_of_constraint, start_rows, 1, start_cols, start_vals, INSERT_VALUES));
7256 j++;
7257 }
7258 }
7259
7260 /* check change of basis */
7261 if (pcbddc->dbg_flag) {
7262 PetscInt ii, jj;
7263 PetscBool valid_qr = PETSC_TRUE;
7264 PetscCall(PetscBLASIntCast(primal_dofs, &Blas_M));
7265 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_N));
7266 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_K));
7267 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA));
7268 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDB));
7269 PetscCall(PetscBLASIntCast(primal_dofs, &Blas_LDC));
7270 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
7271 PetscCallBLAS("BLASgemm", BLASgemm_("T", "N", &Blas_M, &Blas_N, &Blas_K, &one, dbg_work, &Blas_LDA, qr_basis, &Blas_LDB, &zero, &dbg_work[size_of_constraint * primal_dofs], &Blas_LDC));
7272 PetscCall(PetscFPTrapPop());
7273 for (jj = 0; jj < size_of_constraint; jj++) {
7274 for (ii = 0; ii < primal_dofs; ii++) {
7275 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii]) > 1.e-12) valid_qr = PETSC_FALSE;
7276 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii] - (PetscReal)1) > 1.e-12) valid_qr = PETSC_FALSE;
7277 }
7278 }
7279 if (!valid_qr) {
7280 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "\t-> wrong change of basis!\n"));
7281 for (jj = 0; jj < size_of_constraint; jj++) {
7282 for (ii = 0; ii < primal_dofs; ii++) {
7283 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii]) > 1.e-12) {
7284 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "\tQr basis function %" PetscInt_FMT " is not orthogonal to constraint %" PetscInt_FMT " (%1.14e)!\n", jj, ii, (double)PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii])));
7285 }
7286 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii] - (PetscReal)1) > 1.e-12) {
7287 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "\tQr basis function %" PetscInt_FMT " is not unitary w.r.t constraint %" PetscInt_FMT " (%1.14e)!\n", jj, ii, (double)PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii])));
7288 }
7289 }
7290 }
7291 } else {
7292 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "\t-> right change of basis!\n"));
7293 }
7294 }
7295 } else { /* simple transformation block */
7296 PetscInt row, col;
7297 PetscScalar val, norm;
7298
7299 PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_N));
7300 PetscCallBLAS("BLASdot", norm = BLASdot_(&Blas_N, constraints_data + constraints_data_ptr[total_counts], &Blas_one, constraints_data + constraints_data_ptr[total_counts], &Blas_one));
7301 for (j = 0; j < size_of_constraint; j++) {
7302 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts] + j];
7303 row = constraints_idxs[constraints_idxs_ptr[total_counts] + j];
7304 if (!PetscBTLookup(is_primal, row_B)) {
7305 col = constraints_idxs[constraints_idxs_ptr[total_counts]];
7306 PetscCall(MatSetValue(localChangeOfBasisMatrix, row, row, 1.0, INSERT_VALUES));
7307 PetscCall(MatSetValue(localChangeOfBasisMatrix, row, col, constraints_data[constraints_data_ptr[total_counts] + j] / norm, INSERT_VALUES));
7308 } else {
7309 for (k = 0; k < size_of_constraint; k++) {
7310 col = constraints_idxs[constraints_idxs_ptr[total_counts] + k];
7311 if (row != col) {
7312 val = -constraints_data[constraints_data_ptr[total_counts] + k] / constraints_data[constraints_data_ptr[total_counts]];
7313 } else {
7314 val = constraints_data[constraints_data_ptr[total_counts]] / norm;
7315 }
7316 PetscCall(MatSetValue(localChangeOfBasisMatrix, row, col, val, INSERT_VALUES));
7317 }
7318 }
7319 }
7320 if (pcbddc->dbg_flag) PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "\t-> using standard change of basis\n"));
7321 }
7322 } else {
7323 if (pcbddc->dbg_flag) PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Constraint %" PetscInt_FMT " does not need a change of basis (size %" PetscInt_FMT ")\n", total_counts, size_of_constraint));
7324 }
7325 }
7326
7327 /* free workspace */
7328 if (qr_needed) {
7329 if (pcbddc->dbg_flag) PetscCall(PetscFree(dbg_work));
7330 PetscCall(PetscFree(trs_rhs));
7331 PetscCall(PetscFree(qr_tau));
7332 PetscCall(PetscFree(qr_work));
7333 PetscCall(PetscFree(gqr_work));
7334 PetscCall(PetscFree(qr_basis));
7335 }
7336 PetscCall(PetscBTDestroy(&is_primal));
7337 PetscCall(MatAssemblyBegin(localChangeOfBasisMatrix, MAT_FINAL_ASSEMBLY));
7338 PetscCall(MatAssemblyEnd(localChangeOfBasisMatrix, MAT_FINAL_ASSEMBLY));
7339
7340 /* assembling of global change of variable */
7341 if (!pcbddc->fake_change) {
7342 Mat tmat;
7343
7344 PetscCall(VecGetSize(pcis->vec1_global, &global_size));
7345 PetscCall(VecGetLocalSize(pcis->vec1_global, &local_size));
7346 PetscCall(MatDuplicate(pc->pmat, MAT_DO_NOT_COPY_VALUES, &tmat));
7347 PetscCall(MatISSetLocalMat(tmat, localChangeOfBasisMatrix));
7348 PetscCall(MatAssemblyBegin(tmat, MAT_FINAL_ASSEMBLY));
7349 PetscCall(MatAssemblyEnd(tmat, MAT_FINAL_ASSEMBLY));
7350 PetscCall(MatConvert(tmat, MATAIJ, MAT_INITIAL_MATRIX, &pcbddc->ChangeOfBasisMatrix));
7351 PetscCall(MatDestroy(&tmat));
7352 PetscCall(VecSet(pcis->vec1_global, 0.0));
7353 PetscCall(VecSet(pcis->vec1_N, 1.0));
7354 PetscCall(VecScatterBegin(matis->rctx, pcis->vec1_N, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE));
7355 PetscCall(VecScatterEnd(matis->rctx, pcis->vec1_N, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE));
7356 PetscCall(VecReciprocal(pcis->vec1_global));
7357 PetscCall(MatDiagonalScale(pcbddc->ChangeOfBasisMatrix, pcis->vec1_global, NULL));
7358
7359 /* check */
7360 if (pcbddc->dbg_flag) {
7361 PetscReal error;
7362 Vec x, x_change;
7363
7364 PetscCall(VecDuplicate(pcis->vec1_global, &x));
7365 PetscCall(VecDuplicate(pcis->vec1_global, &x_change));
7366 PetscCall(VecSetRandom(x, NULL));
7367 PetscCall(VecCopy(x, pcis->vec1_global));
7368 PetscCall(VecScatterBegin(matis->rctx, x, pcis->vec1_N, INSERT_VALUES, SCATTER_FORWARD));
7369 PetscCall(VecScatterEnd(matis->rctx, x, pcis->vec1_N, INSERT_VALUES, SCATTER_FORWARD));
7370 PetscCall(MatMult(localChangeOfBasisMatrix, pcis->vec1_N, pcis->vec2_N));
7371 PetscCall(VecScatterBegin(matis->rctx, pcis->vec2_N, x, INSERT_VALUES, SCATTER_REVERSE));
7372 PetscCall(VecScatterEnd(matis->rctx, pcis->vec2_N, x, INSERT_VALUES, SCATTER_REVERSE));
7373 PetscCall(MatMult(pcbddc->ChangeOfBasisMatrix, pcis->vec1_global, x_change));
7374 PetscCall(VecAXPY(x, -1.0, x_change));
7375 PetscCall(VecNorm(x, NORM_INFINITY, &error));
7376 PetscCheck(error <= PETSC_SMALL, PetscObjectComm((PetscObject)pc), PETSC_ERR_PLIB, "Error global vs local change on N: %1.6e", (double)error);
7377 PetscCall(VecDestroy(&x));
7378 PetscCall(VecDestroy(&x_change));
7379 }
7380 /* adapt sub_schurs computed (if any) */
7381 if (pcbddc->use_deluxe_scaling) {
7382 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
7383
7384 PetscCheck(!pcbddc->use_change_of_basis || !pcbddc->adaptive_userdefined, PetscObjectComm((PetscObject)pc), PETSC_ERR_SUP, "Cannot mix automatic change of basis, adaptive selection and user-defined constraints");
7385 if (sub_schurs && sub_schurs->S_Ej_all) {
7386 Mat S_new, tmat;
7387 IS is_all_N, is_V_Sall = NULL;
7388
7389 PetscCall(ISLocalToGlobalMappingApplyIS(pcis->BtoNmap, sub_schurs->is_Ej_all, &is_all_N));
7390 PetscCall(MatCreateSubMatrix(localChangeOfBasisMatrix, is_all_N, is_all_N, MAT_INITIAL_MATRIX, &tmat));
7391 if (pcbddc->deluxe_zerorows) {
7392 ISLocalToGlobalMapping NtoSall;
7393 IS is_V;
7394 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, pcbddc->n_vertices, pcbddc->local_primal_ref_node, PETSC_COPY_VALUES, &is_V));
7395 PetscCall(ISLocalToGlobalMappingCreateIS(is_all_N, &NtoSall));
7396 PetscCall(ISGlobalToLocalMappingApplyIS(NtoSall, IS_GTOLM_DROP, is_V, &is_V_Sall));
7397 PetscCall(ISLocalToGlobalMappingDestroy(&NtoSall));
7398 PetscCall(ISDestroy(&is_V));
7399 }
7400 PetscCall(ISDestroy(&is_all_N));
7401 PetscCall(MatPtAP(sub_schurs->S_Ej_all, tmat, MAT_INITIAL_MATRIX, 1.0, &S_new));
7402 PetscCall(MatDestroy(&sub_schurs->S_Ej_all));
7403 PetscCall(PetscObjectReference((PetscObject)S_new));
7404 if (pcbddc->deluxe_zerorows) {
7405 const PetscScalar *array;
7406 const PetscInt *idxs_V, *idxs_all;
7407 PetscInt i, n_V;
7408
7409 PetscCall(MatZeroRowsColumnsIS(S_new, is_V_Sall, 1., NULL, NULL));
7410 PetscCall(ISGetLocalSize(is_V_Sall, &n_V));
7411 PetscCall(ISGetIndices(is_V_Sall, &idxs_V));
7412 PetscCall(ISGetIndices(sub_schurs->is_Ej_all, &idxs_all));
7413 PetscCall(VecGetArrayRead(pcis->D, &array));
7414 for (i = 0; i < n_V; i++) {
7415 PetscScalar val;
7416 PetscInt idx;
7417
7418 idx = idxs_V[i];
7419 val = array[idxs_all[idxs_V[i]]];
7420 PetscCall(MatSetValue(S_new, idx, idx, val, INSERT_VALUES));
7421 }
7422 PetscCall(MatAssemblyBegin(S_new, MAT_FINAL_ASSEMBLY));
7423 PetscCall(MatAssemblyEnd(S_new, MAT_FINAL_ASSEMBLY));
7424 PetscCall(VecRestoreArrayRead(pcis->D, &array));
7425 PetscCall(ISRestoreIndices(sub_schurs->is_Ej_all, &idxs_all));
7426 PetscCall(ISRestoreIndices(is_V_Sall, &idxs_V));
7427 }
7428 sub_schurs->S_Ej_all = S_new;
7429 PetscCall(MatDestroy(&S_new));
7430 if (sub_schurs->sum_S_Ej_all) {
7431 PetscCall(MatPtAP(sub_schurs->sum_S_Ej_all, tmat, MAT_INITIAL_MATRIX, 1.0, &S_new));
7432 PetscCall(MatDestroy(&sub_schurs->sum_S_Ej_all));
7433 PetscCall(PetscObjectReference((PetscObject)S_new));
7434 if (pcbddc->deluxe_zerorows) PetscCall(MatZeroRowsColumnsIS(S_new, is_V_Sall, 1., NULL, NULL));
7435 sub_schurs->sum_S_Ej_all = S_new;
7436 PetscCall(MatDestroy(&S_new));
7437 }
7438 PetscCall(ISDestroy(&is_V_Sall));
7439 PetscCall(MatDestroy(&tmat));
7440 }
7441 /* destroy any change of basis context in sub_schurs */
7442 if (sub_schurs && sub_schurs->change) {
7443 PetscInt i;
7444
7445 for (i = 0; i < sub_schurs->n_subs; i++) PetscCall(KSPDestroy(&sub_schurs->change[i]));
7446 PetscCall(PetscFree(sub_schurs->change));
7447 }
7448 }
7449 if (pcbddc->switch_static) { /* need to save the local change */
7450 pcbddc->switch_static_change = localChangeOfBasisMatrix;
7451 } else {
7452 PetscCall(MatDestroy(&localChangeOfBasisMatrix));
7453 }
7454 /* determine if any process has changed the pressures locally */
7455 pcbddc->change_interior = pcbddc->benign_have_null;
7456 } else { /* fake change (get back change of basis into ConstraintMatrix and info on qr) */
7457 PetscCall(MatDestroy(&pcbddc->ConstraintMatrix));
7458 pcbddc->ConstraintMatrix = localChangeOfBasisMatrix;
7459 pcbddc->use_qr_single = qr_needed;
7460 }
7461 } else if (pcbddc->user_ChangeOfBasisMatrix || pcbddc->benign_saddle_point) {
7462 if (!pcbddc->benign_have_null && pcbddc->user_ChangeOfBasisMatrix) {
7463 PetscCall(PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix));
7464 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix;
7465 } else {
7466 Mat benign_global = NULL;
7467 if (pcbddc->benign_have_null) {
7468 Mat M;
7469
7470 pcbddc->change_interior = PETSC_TRUE;
7471 PetscCall(VecCopy(matis->counter, pcis->vec1_N));
7472 PetscCall(VecReciprocal(pcis->vec1_N));
7473 PetscCall(MatDuplicate(pc->pmat, MAT_DO_NOT_COPY_VALUES, &benign_global));
7474 if (pcbddc->benign_change) {
7475 PetscCall(MatDuplicate(pcbddc->benign_change, MAT_COPY_VALUES, &M));
7476 PetscCall(MatDiagonalScale(M, pcis->vec1_N, NULL));
7477 } else {
7478 PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, pcis->n, pcis->n, 1, NULL, &M));
7479 PetscCall(MatDiagonalSet(M, pcis->vec1_N, INSERT_VALUES));
7480 }
7481 PetscCall(MatISSetLocalMat(benign_global, M));
7482 PetscCall(MatDestroy(&M));
7483 PetscCall(MatAssemblyBegin(benign_global, MAT_FINAL_ASSEMBLY));
7484 PetscCall(MatAssemblyEnd(benign_global, MAT_FINAL_ASSEMBLY));
7485 }
7486 if (pcbddc->user_ChangeOfBasisMatrix) {
7487 PetscCall(MatMatMult(pcbddc->user_ChangeOfBasisMatrix, benign_global, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &pcbddc->ChangeOfBasisMatrix));
7488 PetscCall(MatDestroy(&benign_global));
7489 } else if (pcbddc->benign_have_null) {
7490 pcbddc->ChangeOfBasisMatrix = benign_global;
7491 }
7492 }
7493 if (pcbddc->switch_static && pcbddc->ChangeOfBasisMatrix) { /* need to save the local change */
7494 IS is_global;
7495 const PetscInt *gidxs;
7496
7497 PetscCall(ISLocalToGlobalMappingGetIndices(matis->rmapping, &gidxs));
7498 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), pcis->n, gidxs, PETSC_COPY_VALUES, &is_global));
7499 PetscCall(ISLocalToGlobalMappingRestoreIndices(matis->rmapping, &gidxs));
7500 PetscCall(MatCreateSubMatrixUnsorted(pcbddc->ChangeOfBasisMatrix, is_global, is_global, &pcbddc->switch_static_change));
7501 PetscCall(ISDestroy(&is_global));
7502 }
7503 }
7504 if (!pcbddc->fake_change && pcbddc->ChangeOfBasisMatrix && !pcbddc->work_change) PetscCall(VecDuplicate(pcis->vec1_global, &pcbddc->work_change));
7505
7506 if (!pcbddc->fake_change) {
7507 /* add pressure dofs to set of primal nodes for numbering purposes */
7508 for (i = 0; i < pcbddc->benign_n; i++) {
7509 pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx[i];
7510 pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i];
7511 pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1;
7512 pcbddc->local_primal_size_cc++;
7513 pcbddc->local_primal_size++;
7514 }
7515
7516 /* check if a new primal space has been introduced (also take into account benign trick) */
7517 pcbddc->new_primal_space_local = PETSC_TRUE;
7518 if (olocal_primal_size == pcbddc->local_primal_size) {
7519 PetscCall(PetscArraycmp(pcbddc->local_primal_ref_node, olocal_primal_ref_node, olocal_primal_size_cc, &pcbddc->new_primal_space_local));
7520 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
7521 if (!pcbddc->new_primal_space_local) {
7522 PetscCall(PetscArraycmp(pcbddc->local_primal_ref_mult, olocal_primal_ref_mult, olocal_primal_size_cc, &pcbddc->new_primal_space_local));
7523 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
7524 }
7525 }
7526 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */
7527 PetscCallMPI(MPIU_Allreduce(&pcbddc->new_primal_space_local, &pcbddc->new_primal_space, 1, MPI_C_BOOL, MPI_LOR, PetscObjectComm((PetscObject)pc)));
7528 }
7529 PetscCall(PetscFree2(olocal_primal_ref_node, olocal_primal_ref_mult));
7530
7531 /* flush dbg viewer */
7532 if (pcbddc->dbg_flag) PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
7533
7534 /* free workspace */
7535 PetscCall(PetscBTDestroy(&qr_needed_idx));
7536 PetscCall(PetscBTDestroy(&change_basis));
7537 if (!pcbddc->adaptive_selection) {
7538 PetscCall(PetscFree3(constraints_idxs_ptr, constraints_data_ptr, constraints_n));
7539 PetscCall(PetscFree3(constraints_data, constraints_idxs, constraints_idxs_B));
7540 } else {
7541 PetscCall(PetscFree5(pcbddc->adaptive_constraints_n, pcbddc->adaptive_constraints_idxs_ptr, pcbddc->adaptive_constraints_data_ptr, pcbddc->adaptive_constraints_idxs, pcbddc->adaptive_constraints_data));
7542 PetscCall(PetscFree(constraints_n));
7543 PetscCall(PetscFree(constraints_idxs_B));
7544 }
7545 PetscFunctionReturn(PETSC_SUCCESS);
7546 }
7547
PCBDDCAnalyzeInterface(PC pc)7548 PetscErrorCode PCBDDCAnalyzeInterface(PC pc)
7549 {
7550 ISLocalToGlobalMapping map;
7551 PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
7552 Mat_IS *matis = (Mat_IS *)pc->pmat->data;
7553 PetscInt i, N;
7554 PetscBool rcsr = PETSC_FALSE;
7555
7556 PetscFunctionBegin;
7557 if (pcbddc->recompute_topography) {
7558 pcbddc->graphanalyzed = PETSC_FALSE;
7559 /* Reset previously computed graph */
7560 PetscCall(PCBDDCGraphReset(pcbddc->mat_graph));
7561 /* Init local Graph struct */
7562 PetscCall(MatGetSize(pc->pmat, &N, NULL));
7563 PetscCall(MatISGetLocalToGlobalMapping(pc->pmat, &map, NULL));
7564 PetscCall(PCBDDCGraphInit(pcbddc->mat_graph, map, N, pcbddc->graphmaxcount));
7565
7566 if (pcbddc->user_primal_vertices_local && !pcbddc->user_primal_vertices) PetscCall(PCBDDCConsistencyCheckIS(pc, MPI_LOR, &pcbddc->user_primal_vertices_local));
7567 /* Check validity of the csr graph passed in by the user */
7568 PetscCheck(!pcbddc->mat_graph->nvtxs_csr || pcbddc->mat_graph->nvtxs_csr == pcbddc->mat_graph->nvtxs, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid size of local CSR graph! Found %" PetscInt_FMT ", expected %" PetscInt_FMT, pcbddc->mat_graph->nvtxs_csr,
7569 pcbddc->mat_graph->nvtxs);
7570
7571 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */
7572 if (!pcbddc->mat_graph->xadj && pcbddc->use_local_adj) {
7573 PetscInt *xadj, *adjncy;
7574 PetscInt nvtxs;
7575 PetscBool flg_row;
7576 Mat A;
7577
7578 PetscCall(PetscObjectReference((PetscObject)matis->A));
7579 A = matis->A;
7580 for (PetscInt i = 0; i < pcbddc->local_adj_square; i++) {
7581 Mat AtA;
7582
7583 PetscCall(MatProductCreate(A, A, NULL, &AtA));
7584 PetscCall(MatSetOptionsPrefix(AtA, "pc_bddc_graph_"));
7585 PetscCall(MatProductSetType(AtA, MATPRODUCT_AtB));
7586 PetscCall(MatProductSetFromOptions(AtA));
7587 PetscCall(MatProductSymbolic(AtA));
7588 PetscCall(MatProductClear(AtA));
7589 /* we only need the sparsity, cheat and tell PETSc the matrix has been assembled */
7590 AtA->assembled = PETSC_TRUE;
7591 PetscCall(MatDestroy(&A));
7592 A = AtA;
7593 }
7594 PetscCall(MatGetRowIJ(A, 0, PETSC_TRUE, PETSC_FALSE, &nvtxs, (const PetscInt **)&xadj, (const PetscInt **)&adjncy, &flg_row));
7595 if (flg_row) {
7596 PetscCall(PCBDDCSetLocalAdjacencyGraph(pc, nvtxs, xadj, adjncy, PETSC_COPY_VALUES));
7597 pcbddc->computed_rowadj = PETSC_TRUE;
7598 PetscCall(MatRestoreRowIJ(A, 0, PETSC_TRUE, PETSC_FALSE, &nvtxs, (const PetscInt **)&xadj, (const PetscInt **)&adjncy, &flg_row));
7599 rcsr = PETSC_TRUE;
7600 }
7601 PetscCall(MatDestroy(&A));
7602 }
7603 if (pcbddc->dbg_flag) PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
7604
7605 if (pcbddc->mat_graph->cdim && !pcbddc->mat_graph->cloc) {
7606 PetscReal *lcoords;
7607 PetscInt n;
7608 MPI_Datatype dimrealtype;
7609 PetscMPIInt cdimi;
7610
7611 /* TODO: support for blocked */
7612 PetscCheck(pcbddc->mat_graph->cnloc == pc->pmat->rmap->n, PETSC_COMM_SELF, PETSC_ERR_USER, "Invalid number of local coordinates! Got %" PetscInt_FMT ", expected %" PetscInt_FMT, pcbddc->mat_graph->cnloc, pc->pmat->rmap->n);
7613 PetscCall(MatGetLocalSize(matis->A, &n, NULL));
7614 PetscCall(PetscMalloc1(pcbddc->mat_graph->cdim * n, &lcoords));
7615 PetscCall(PetscMPIIntCast(pcbddc->mat_graph->cdim, &cdimi));
7616 PetscCallMPI(MPI_Type_contiguous(cdimi, MPIU_REAL, &dimrealtype));
7617 PetscCallMPI(MPI_Type_commit(&dimrealtype));
7618 PetscCall(PetscSFBcastBegin(matis->sf, dimrealtype, pcbddc->mat_graph->coords, lcoords, MPI_REPLACE));
7619 PetscCall(PetscSFBcastEnd(matis->sf, dimrealtype, pcbddc->mat_graph->coords, lcoords, MPI_REPLACE));
7620 PetscCallMPI(MPI_Type_free(&dimrealtype));
7621 PetscCall(PetscFree(pcbddc->mat_graph->coords));
7622
7623 pcbddc->mat_graph->coords = lcoords;
7624 pcbddc->mat_graph->cloc = PETSC_TRUE;
7625 pcbddc->mat_graph->cnloc = n;
7626 }
7627 PetscCheck(!pcbddc->mat_graph->cnloc || pcbddc->mat_graph->cnloc == pcbddc->mat_graph->nvtxs, PETSC_COMM_SELF, PETSC_ERR_USER, "Invalid number of local subdomain coordinates! Got %" PetscInt_FMT ", expected %" PetscInt_FMT, pcbddc->mat_graph->cnloc,
7628 pcbddc->mat_graph->nvtxs);
7629 pcbddc->mat_graph->active_coords = (PetscBool)(pcbddc->corner_selection && pcbddc->mat_graph->cdim && !pcbddc->corner_selected);
7630
7631 /* attach info on disconnected subdomains if present */
7632 if (pcbddc->n_local_subs) {
7633 PetscInt *local_subs, n, totn;
7634
7635 PetscCall(MatGetLocalSize(matis->A, &n, NULL));
7636 PetscCall(PetscMalloc1(n, &local_subs));
7637 for (i = 0; i < n; i++) local_subs[i] = pcbddc->n_local_subs;
7638 for (i = 0; i < pcbddc->n_local_subs; i++) {
7639 const PetscInt *idxs;
7640 PetscInt nl, j;
7641
7642 PetscCall(ISGetLocalSize(pcbddc->local_subs[i], &nl));
7643 PetscCall(ISGetIndices(pcbddc->local_subs[i], &idxs));
7644 for (j = 0; j < nl; j++) local_subs[idxs[j]] = i;
7645 PetscCall(ISRestoreIndices(pcbddc->local_subs[i], &idxs));
7646 }
7647 for (i = 0, totn = 0; i < n; i++) totn = PetscMax(totn, local_subs[i]);
7648 pcbddc->mat_graph->n_local_subs = totn + 1;
7649 pcbddc->mat_graph->local_subs = local_subs;
7650 }
7651
7652 /* Setup of Graph */
7653 PetscCall(PCBDDCGraphSetUp(pcbddc->mat_graph, pcbddc->vertex_size, pcbddc->NeumannBoundariesLocal, pcbddc->DirichletBoundariesLocal, pcbddc->n_ISForDofsLocal, pcbddc->ISForDofsLocal, pcbddc->user_primal_vertices_local));
7654 }
7655
7656 if (!pcbddc->graphanalyzed) {
7657 /* Graph's connected components analysis */
7658 PetscCall(PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph));
7659 pcbddc->graphanalyzed = PETSC_TRUE;
7660 pcbddc->corner_selected = pcbddc->corner_selection;
7661 }
7662 if (rcsr) pcbddc->mat_graph->nvtxs_csr = 0;
7663 PetscFunctionReturn(PETSC_SUCCESS);
7664 }
7665
PCBDDCOrthonormalizeVecs(PetscInt * nio,Vec vecs[])7666 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt *nio, Vec vecs[])
7667 {
7668 PetscInt i, j, n;
7669 PetscScalar *alphas;
7670 PetscReal norm, *onorms;
7671
7672 PetscFunctionBegin;
7673 n = *nio;
7674 if (!n) PetscFunctionReturn(PETSC_SUCCESS);
7675 PetscCall(PetscMalloc2(n, &alphas, n, &onorms));
7676 PetscCall(VecNormalize(vecs[0], &norm));
7677 if (norm < PETSC_SMALL) {
7678 onorms[0] = 0.0;
7679 PetscCall(VecSet(vecs[0], 0.0));
7680 } else {
7681 onorms[0] = norm;
7682 }
7683
7684 for (i = 1; i < n; i++) {
7685 PetscCall(VecMDot(vecs[i], i, vecs, alphas));
7686 for (j = 0; j < i; j++) alphas[j] = PetscConj(-alphas[j]);
7687 PetscCall(VecMAXPY(vecs[i], i, alphas, vecs));
7688 PetscCall(VecNormalize(vecs[i], &norm));
7689 if (norm < PETSC_SMALL) {
7690 onorms[i] = 0.0;
7691 PetscCall(VecSet(vecs[i], 0.0));
7692 } else {
7693 onorms[i] = norm;
7694 }
7695 }
7696 /* push nonzero vectors at the beginning */
7697 for (i = 0; i < n; i++) {
7698 if (onorms[i] == 0.0) {
7699 for (j = i + 1; j < n; j++) {
7700 if (onorms[j] != 0.0) {
7701 PetscCall(VecCopy(vecs[j], vecs[i]));
7702 onorms[i] = onorms[j];
7703 onorms[j] = 0.0;
7704 break;
7705 }
7706 }
7707 }
7708 }
7709 for (i = 0, *nio = 0; i < n; i++) *nio += onorms[i] != 0.0 ? 1 : 0;
7710 PetscCall(PetscFree2(alphas, onorms));
7711 PetscFunctionReturn(PETSC_SUCCESS);
7712 }
7713
PCBDDCMatISGetSubassemblingPattern(Mat mat,PetscInt * n_subdomains,PetscInt redprocs,IS * is_sends,PetscBool * have_void)7714 static PetscErrorCode PCBDDCMatISGetSubassemblingPattern(Mat mat, PetscInt *n_subdomains, PetscInt redprocs, IS *is_sends, PetscBool *have_void)
7715 {
7716 ISLocalToGlobalMapping mapping;
7717 Mat A;
7718 PetscInt n_neighs, *neighs, *n_shared, **shared;
7719 PetscMPIInt size, rank, color;
7720 PetscInt *xadj, *adjncy;
7721 PetscInt *adjncy_wgt, *v_wgt, *ranks_send_to_idx;
7722 PetscInt im_active, active_procs, N, n, i, j, threshold = 2;
7723 PetscInt void_procs, *procs_candidates = NULL;
7724 PetscInt xadj_count, *count;
7725 PetscBool ismatis, use_vwgt = PETSC_FALSE;
7726 PetscSubcomm psubcomm;
7727 MPI_Comm subcomm;
7728
7729 PetscFunctionBegin;
7730 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
7731 PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATIS, &ismatis));
7732 PetscCheck(ismatis, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Cannot use %s on a matrix object which is not of type MATIS", PETSC_FUNCTION_NAME);
7733 PetscValidLogicalCollectiveInt(mat, *n_subdomains, 2);
7734 PetscValidLogicalCollectiveInt(mat, redprocs, 3);
7735 PetscCheck(*n_subdomains > 0, PetscObjectComm((PetscObject)mat), PETSC_ERR_ARG_WRONG, "Invalid number of subdomains requested %" PetscInt_FMT, *n_subdomains);
7736
7737 if (have_void) *have_void = PETSC_FALSE;
7738 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)mat), &size));
7739 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
7740 PetscCall(MatISGetLocalMat(mat, &A));
7741 PetscCall(MatGetLocalSize(A, &n, NULL));
7742 im_active = !!n;
7743 PetscCallMPI(MPIU_Allreduce(&im_active, &active_procs, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)mat)));
7744 void_procs = size - active_procs;
7745 /* get ranks of non-active processes in mat communicator */
7746 if (void_procs) {
7747 PetscInt ncand;
7748
7749 if (have_void) *have_void = PETSC_TRUE;
7750 PetscCall(PetscMalloc1(size, &procs_candidates));
7751 PetscCallMPI(MPI_Allgather(&im_active, 1, MPIU_INT, procs_candidates, 1, MPIU_INT, PetscObjectComm((PetscObject)mat)));
7752 for (i = 0, ncand = 0; i < size; i++) {
7753 if (!procs_candidates[i]) procs_candidates[ncand++] = i;
7754 }
7755 /* force n_subdomains to be not greater that the number of non-active processes */
7756 *n_subdomains = PetscMin(void_procs, *n_subdomains);
7757 }
7758
7759 /* number of subdomains requested greater than active processes or matrix size -> just shift the matrix
7760 number of subdomains requested 1 -> send to rank-0 or first candidate in voids */
7761 PetscCall(MatGetSize(mat, &N, NULL));
7762 if (active_procs < *n_subdomains || *n_subdomains == 1 || N <= *n_subdomains) {
7763 PetscInt issize, isidx, dest;
7764 PetscBool default_sub;
7765
7766 if (*n_subdomains == 1) dest = 0;
7767 else dest = rank;
7768 if (im_active) {
7769 issize = 1;
7770 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */
7771 isidx = procs_candidates[dest];
7772 } else {
7773 isidx = dest;
7774 }
7775 } else {
7776 issize = 0;
7777 isidx = rank;
7778 }
7779 if (*n_subdomains != 1) *n_subdomains = active_procs;
7780 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)mat), issize, &isidx, PETSC_COPY_VALUES, is_sends));
7781 default_sub = (PetscBool)(isidx == rank);
7782 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &default_sub, 1, MPI_C_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
7783 if (default_sub) PetscCall(PetscObjectSetName((PetscObject)*is_sends, "default subassembling"));
7784 PetscCall(PetscFree(procs_candidates));
7785 PetscFunctionReturn(PETSC_SUCCESS);
7786 }
7787 PetscCall(PetscOptionsGetBool(NULL, ((PetscObject)A)->prefix, "-mat_is_partitioning_use_vwgt", &use_vwgt, NULL));
7788 PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)A)->prefix, "-mat_is_partitioning_threshold", &threshold, NULL));
7789 threshold = PetscMax(threshold, 2);
7790
7791 /* Get info on mapping */
7792 PetscCall(MatISGetLocalToGlobalMapping(mat, &mapping, NULL));
7793 PetscCall(ISLocalToGlobalMappingGetInfo(mapping, &n_neighs, &neighs, &n_shared, &shared));
7794
7795 /* build local CSR graph of subdomains' connectivity */
7796 PetscCall(PetscMalloc1(2, &xadj));
7797 xadj[0] = 0;
7798 xadj[1] = PetscMax(n_neighs - 1, 0);
7799 PetscCall(PetscMalloc1(xadj[1], &adjncy));
7800 PetscCall(PetscMalloc1(xadj[1], &adjncy_wgt));
7801 PetscCall(PetscCalloc1(n, &count));
7802 for (i = 1; i < n_neighs; i++)
7803 for (j = 0; j < n_shared[i]; j++) count[shared[i][j]] += 1;
7804
7805 xadj_count = 0;
7806 for (i = 1; i < n_neighs; i++) {
7807 for (j = 0; j < n_shared[i]; j++) {
7808 if (count[shared[i][j]] < threshold) {
7809 adjncy[xadj_count] = neighs[i];
7810 adjncy_wgt[xadj_count] = n_shared[i];
7811 xadj_count++;
7812 break;
7813 }
7814 }
7815 }
7816 xadj[1] = xadj_count;
7817 PetscCall(PetscFree(count));
7818 PetscCall(ISLocalToGlobalMappingRestoreInfo(mapping, &n_neighs, &neighs, &n_shared, &shared));
7819 PetscCall(PetscSortIntWithArray(xadj[1], adjncy, adjncy_wgt));
7820
7821 PetscCall(PetscMalloc1(1, &ranks_send_to_idx));
7822
7823 /* Restrict work on active processes only */
7824 PetscCall(PetscMPIIntCast(im_active, &color));
7825 if (void_procs) {
7826 PetscCall(PetscSubcommCreate(PetscObjectComm((PetscObject)mat), &psubcomm));
7827 PetscCall(PetscSubcommSetNumber(psubcomm, 2)); /* 2 groups, active process and not active processes */
7828 PetscCall(PetscSubcommSetTypeGeneral(psubcomm, color, rank));
7829 subcomm = PetscSubcommChild(psubcomm);
7830 } else {
7831 psubcomm = NULL;
7832 subcomm = PetscObjectComm((PetscObject)mat);
7833 }
7834
7835 v_wgt = NULL;
7836 if (!color) {
7837 PetscCall(PetscFree(xadj));
7838 PetscCall(PetscFree(adjncy));
7839 PetscCall(PetscFree(adjncy_wgt));
7840 } else {
7841 Mat subdomain_adj;
7842 IS new_ranks, new_ranks_contig;
7843 MatPartitioning partitioner;
7844 PetscInt rstart, rend;
7845 PetscMPIInt irstart = 0, irend = 0;
7846 PetscInt *is_indices, *oldranks;
7847 PetscMPIInt size;
7848 PetscBool aggregate;
7849
7850 PetscCallMPI(MPI_Comm_size(subcomm, &size));
7851 if (void_procs) {
7852 PetscInt prank = rank;
7853 PetscCall(PetscMalloc1(size, &oldranks));
7854 PetscCallMPI(MPI_Allgather(&prank, 1, MPIU_INT, oldranks, 1, MPIU_INT, subcomm));
7855 for (i = 0; i < xadj[1]; i++) PetscCall(PetscFindInt(adjncy[i], size, oldranks, &adjncy[i]));
7856 PetscCall(PetscSortIntWithArray(xadj[1], adjncy, adjncy_wgt));
7857 } else {
7858 oldranks = NULL;
7859 }
7860 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE);
7861 if (aggregate) { /* TODO: all this part could be made more efficient */
7862 PetscInt lrows, row, ncols, *cols;
7863 PetscMPIInt nrank;
7864 PetscScalar *vals;
7865
7866 PetscCallMPI(MPI_Comm_rank(subcomm, &nrank));
7867 lrows = 0;
7868 if (nrank < redprocs) {
7869 lrows = size / redprocs;
7870 if (nrank < size % redprocs) lrows++;
7871 }
7872 PetscCall(MatCreateAIJ(subcomm, lrows, lrows, size, size, 50, NULL, 50, NULL, &subdomain_adj));
7873 PetscCall(MatGetOwnershipRange(subdomain_adj, &rstart, &rend));
7874 PetscCall(PetscMPIIntCast(rstart, &irstart));
7875 PetscCall(PetscMPIIntCast(rend, &irend));
7876 PetscCall(MatSetOption(subdomain_adj, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE));
7877 PetscCall(MatSetOption(subdomain_adj, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE));
7878 row = nrank;
7879 ncols = xadj[1] - xadj[0];
7880 cols = adjncy;
7881 PetscCall(PetscMalloc1(ncols, &vals));
7882 for (i = 0; i < ncols; i++) vals[i] = adjncy_wgt[i];
7883 PetscCall(MatSetValues(subdomain_adj, 1, &row, ncols, cols, vals, INSERT_VALUES));
7884 PetscCall(MatAssemblyBegin(subdomain_adj, MAT_FINAL_ASSEMBLY));
7885 PetscCall(MatAssemblyEnd(subdomain_adj, MAT_FINAL_ASSEMBLY));
7886 PetscCall(PetscFree(xadj));
7887 PetscCall(PetscFree(adjncy));
7888 PetscCall(PetscFree(adjncy_wgt));
7889 PetscCall(PetscFree(vals));
7890 if (use_vwgt) {
7891 Vec v;
7892 const PetscScalar *array;
7893 PetscInt nl;
7894
7895 PetscCall(MatCreateVecs(subdomain_adj, &v, NULL));
7896 PetscCall(VecSetValue(v, row, (PetscScalar)n, INSERT_VALUES));
7897 PetscCall(VecAssemblyBegin(v));
7898 PetscCall(VecAssemblyEnd(v));
7899 PetscCall(VecGetLocalSize(v, &nl));
7900 PetscCall(VecGetArrayRead(v, &array));
7901 PetscCall(PetscMalloc1(nl, &v_wgt));
7902 for (i = 0; i < nl; i++) v_wgt[i] = (PetscInt)PetscRealPart(array[i]);
7903 PetscCall(VecRestoreArrayRead(v, &array));
7904 PetscCall(VecDestroy(&v));
7905 }
7906 } else {
7907 PetscCall(MatCreateMPIAdj(subcomm, 1, size, xadj, adjncy, adjncy_wgt, &subdomain_adj));
7908 if (use_vwgt) {
7909 PetscCall(PetscMalloc1(1, &v_wgt));
7910 v_wgt[0] = n;
7911 }
7912 }
7913 /* PetscCall(MatView(subdomain_adj,0)); */
7914
7915 /* Partition */
7916 PetscCall(MatPartitioningCreate(subcomm, &partitioner));
7917 #if defined(PETSC_HAVE_PTSCOTCH)
7918 PetscCall(MatPartitioningSetType(partitioner, MATPARTITIONINGPTSCOTCH));
7919 #elif defined(PETSC_HAVE_PARMETIS)
7920 PetscCall(MatPartitioningSetType(partitioner, MATPARTITIONINGPARMETIS));
7921 #else
7922 PetscCall(MatPartitioningSetType(partitioner, MATPARTITIONINGAVERAGE));
7923 #endif
7924 PetscCall(MatPartitioningSetAdjacency(partitioner, subdomain_adj));
7925 if (v_wgt) PetscCall(MatPartitioningSetVertexWeights(partitioner, v_wgt));
7926 *n_subdomains = PetscMin(size, *n_subdomains);
7927 PetscCall(MatPartitioningSetNParts(partitioner, *n_subdomains));
7928 PetscCall(MatPartitioningSetFromOptions(partitioner));
7929 PetscCall(MatPartitioningApply(partitioner, &new_ranks));
7930 /* PetscCall(MatPartitioningView(partitioner,0)); */
7931
7932 /* renumber new_ranks to avoid "holes" in new set of processors */
7933 PetscCall(ISRenumber(new_ranks, NULL, NULL, &new_ranks_contig));
7934 PetscCall(ISDestroy(&new_ranks));
7935 PetscCall(ISGetIndices(new_ranks_contig, (const PetscInt **)&is_indices));
7936 if (!aggregate) {
7937 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */
7938 PetscAssert(oldranks, PETSC_COMM_SELF, PETSC_ERR_PLIB, "This should not happen");
7939 ranks_send_to_idx[0] = procs_candidates[oldranks[is_indices[0]]];
7940 } else if (oldranks) {
7941 ranks_send_to_idx[0] = oldranks[is_indices[0]];
7942 } else {
7943 ranks_send_to_idx[0] = is_indices[0];
7944 }
7945 } else {
7946 PetscInt idx = 0;
7947 PetscMPIInt tag;
7948 MPI_Request *reqs;
7949
7950 PetscCall(PetscObjectGetNewTag((PetscObject)subdomain_adj, &tag));
7951 PetscCall(PetscMalloc1(rend - rstart, &reqs));
7952 for (PetscMPIInt i = irstart; i < irend; i++) PetscCallMPI(MPIU_Isend(is_indices + i - rstart, 1, MPIU_INT, i, tag, subcomm, &reqs[i - rstart]));
7953 PetscCallMPI(MPIU_Recv(&idx, 1, MPIU_INT, MPI_ANY_SOURCE, tag, subcomm, MPI_STATUS_IGNORE));
7954 PetscCallMPI(MPI_Waitall(irend - irstart, reqs, MPI_STATUSES_IGNORE));
7955 PetscCall(PetscFree(reqs));
7956 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */
7957 PetscAssert(oldranks, PETSC_COMM_SELF, PETSC_ERR_PLIB, "This should not happen");
7958 ranks_send_to_idx[0] = procs_candidates[oldranks[idx]];
7959 } else if (oldranks) {
7960 ranks_send_to_idx[0] = oldranks[idx];
7961 } else {
7962 ranks_send_to_idx[0] = idx;
7963 }
7964 }
7965 PetscCall(ISRestoreIndices(new_ranks_contig, (const PetscInt **)&is_indices));
7966 /* clean up */
7967 PetscCall(PetscFree(oldranks));
7968 PetscCall(ISDestroy(&new_ranks_contig));
7969 PetscCall(MatDestroy(&subdomain_adj));
7970 PetscCall(MatPartitioningDestroy(&partitioner));
7971 }
7972 PetscCall(PetscSubcommDestroy(&psubcomm));
7973 PetscCall(PetscFree(procs_candidates));
7974
7975 /* assemble parallel IS for sends */
7976 i = 1;
7977 if (!color) i = 0;
7978 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)mat), i, ranks_send_to_idx, PETSC_OWN_POINTER, is_sends));
7979 PetscFunctionReturn(PETSC_SUCCESS);
7980 }
7981
7982 typedef enum {
7983 MATDENSE_PRIVATE = 0,
7984 MATAIJ_PRIVATE,
7985 MATBAIJ_PRIVATE,
7986 MATSBAIJ_PRIVATE
7987 } MatTypePrivate;
7988
PCBDDCMatISSubassemble(Mat mat,IS is_sends,PetscInt n_subdomains,PetscBool restrict_comm,PetscBool restrict_full,PetscBool reuse,Mat * mat_n,PetscInt nis,IS isarray[],PetscInt nvecs,Vec nnsp_vec[])7989 static PetscErrorCode PCBDDCMatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, PetscBool reuse, Mat *mat_n, PetscInt nis, IS isarray[], PetscInt nvecs, Vec nnsp_vec[])
7990 {
7991 Mat local_mat;
7992 IS is_sends_internal;
7993 PetscInt rows, cols, new_local_rows;
7994 PetscInt i, bs, buf_size_idxs, buf_size_idxs_is, buf_size_vals, buf_size_vecs;
7995 PetscBool ismatis, isdense, newisdense, destroy_mat;
7996 ISLocalToGlobalMapping l2gmap;
7997 PetscInt *l2gmap_indices;
7998 const PetscInt *is_indices;
7999 MatType new_local_type;
8000 /* buffers */
8001 PetscInt *ptr_idxs, *send_buffer_idxs, *recv_buffer_idxs;
8002 PetscInt *ptr_idxs_is, *send_buffer_idxs_is, *recv_buffer_idxs_is;
8003 PetscInt *recv_buffer_idxs_local;
8004 PetscScalar *ptr_vals, *recv_buffer_vals;
8005 const PetscScalar *send_buffer_vals;
8006 PetscScalar *ptr_vecs, *send_buffer_vecs, *recv_buffer_vecs;
8007 /* MPI */
8008 MPI_Comm comm, comm_n;
8009 PetscSubcomm subcomm;
8010 PetscMPIInt n_sends, n_recvs, size;
8011 PetscMPIInt *iflags, *ilengths_idxs, *ilengths_vals, *ilengths_idxs_is;
8012 PetscMPIInt *onodes, *onodes_is, *olengths_idxs, *olengths_idxs_is, *olengths_vals;
8013 PetscMPIInt len, tag_idxs, tag_idxs_is, tag_vals, tag_vecs, source_dest;
8014 MPI_Request *send_req_idxs, *send_req_idxs_is, *send_req_vals, *send_req_vecs;
8015 MPI_Request *recv_req_idxs, *recv_req_idxs_is, *recv_req_vals, *recv_req_vecs;
8016
8017 PetscFunctionBegin;
8018 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
8019 PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATIS, &ismatis));
8020 PetscCheck(ismatis, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Cannot use %s on a matrix object which is not of type MATIS", PETSC_FUNCTION_NAME);
8021 PetscValidLogicalCollectiveInt(mat, n_subdomains, 3);
8022 PetscValidLogicalCollectiveBool(mat, restrict_comm, 4);
8023 PetscValidLogicalCollectiveBool(mat, restrict_full, 5);
8024 PetscValidLogicalCollectiveBool(mat, reuse, 6);
8025 PetscValidLogicalCollectiveInt(mat, nis, 8);
8026 PetscValidLogicalCollectiveInt(mat, nvecs, 10);
8027 if (nvecs) {
8028 PetscCheck(nvecs <= 1, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Just 1 vector supported");
8029 PetscValidHeaderSpecific(nnsp_vec[0], VEC_CLASSID, 11);
8030 }
8031 /* further checks */
8032 PetscCall(MatISGetLocalMat(mat, &local_mat));
8033 PetscCall(PetscObjectTypeCompare((PetscObject)local_mat, MATSEQDENSE, &isdense));
8034 PetscCheck(isdense, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE");
8035
8036 PetscCall(MatGetSize(local_mat, &rows, &cols));
8037 PetscCheck(rows == cols, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Local MATIS matrices should be square");
8038 if (reuse && *mat_n) {
8039 PetscInt mrows, mcols, mnrows, mncols;
8040 PetscValidHeaderSpecific(*mat_n, MAT_CLASSID, 7);
8041 PetscCall(PetscObjectTypeCompare((PetscObject)*mat_n, MATIS, &ismatis));
8042 PetscCheck(ismatis, PetscObjectComm((PetscObject)*mat_n), PETSC_ERR_SUP, "Cannot reuse a matrix which is not of type MATIS");
8043 PetscCall(MatGetSize(mat, &mrows, &mcols));
8044 PetscCall(MatGetSize(*mat_n, &mnrows, &mncols));
8045 PetscCheck(mrows == mnrows, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Cannot reuse matrix! Wrong number of rows %" PetscInt_FMT " != %" PetscInt_FMT, mrows, mnrows);
8046 PetscCheck(mcols == mncols, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Cannot reuse matrix! Wrong number of cols %" PetscInt_FMT " != %" PetscInt_FMT, mcols, mncols);
8047 }
8048 PetscCall(MatGetBlockSize(local_mat, &bs));
8049 PetscValidLogicalCollectiveInt(mat, bs, 1);
8050
8051 /* prepare IS for sending if not provided */
8052 if (!is_sends) {
8053 PetscCheck(n_subdomains, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "You should specify either an IS or a target number of subdomains");
8054 PetscCall(PCBDDCMatISGetSubassemblingPattern(mat, &n_subdomains, 0, &is_sends_internal, NULL));
8055 } else {
8056 PetscCall(PetscObjectReference((PetscObject)is_sends));
8057 is_sends_internal = is_sends;
8058 }
8059
8060 /* get comm */
8061 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
8062
8063 /* compute number of sends */
8064 PetscCall(ISGetLocalSize(is_sends_internal, &i));
8065 PetscCall(PetscMPIIntCast(i, &n_sends));
8066
8067 /* compute number of receives */
8068 PetscCallMPI(MPI_Comm_size(comm, &size));
8069 PetscCall(PetscMalloc1(size, &iflags));
8070 PetscCall(PetscArrayzero(iflags, size));
8071 PetscCall(ISGetIndices(is_sends_internal, &is_indices));
8072 for (i = 0; i < n_sends; i++) iflags[is_indices[i]] = 1;
8073 PetscCall(PetscGatherNumberOfMessages(comm, iflags, NULL, &n_recvs));
8074 PetscCall(PetscFree(iflags));
8075
8076 /* restrict comm if requested */
8077 subcomm = NULL;
8078 destroy_mat = PETSC_FALSE;
8079 if (restrict_comm) {
8080 PetscMPIInt color, subcommsize;
8081
8082 color = 0;
8083 if (restrict_full) {
8084 if (!n_recvs) color = 1; /* processes not receiving anything will not participate in new comm (full restriction) */
8085 } else {
8086 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not participate in new comm */
8087 }
8088 PetscCallMPI(MPIU_Allreduce(&color, &subcommsize, 1, MPI_INT, MPI_SUM, comm));
8089 subcommsize = size - subcommsize;
8090 /* check if reuse has been requested */
8091 if (reuse) {
8092 if (*mat_n) {
8093 PetscMPIInt subcommsize2;
8094 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n), &subcommsize2));
8095 PetscCheck(subcommsize == subcommsize2, PetscObjectComm((PetscObject)*mat_n), PETSC_ERR_PLIB, "Cannot reuse matrix! wrong subcomm size %d != %d", subcommsize, subcommsize2);
8096 comm_n = PetscObjectComm((PetscObject)*mat_n);
8097 } else {
8098 comm_n = PETSC_COMM_SELF;
8099 }
8100 } else { /* MAT_INITIAL_MATRIX */
8101 PetscMPIInt rank;
8102
8103 PetscCallMPI(MPI_Comm_rank(comm, &rank));
8104 PetscCall(PetscSubcommCreate(comm, &subcomm));
8105 PetscCall(PetscSubcommSetNumber(subcomm, 2));
8106 PetscCall(PetscSubcommSetTypeGeneral(subcomm, color, rank));
8107 comm_n = PetscSubcommChild(subcomm);
8108 }
8109 /* flag to destroy *mat_n if not significative */
8110 if (color) destroy_mat = PETSC_TRUE;
8111 } else {
8112 comm_n = comm;
8113 }
8114
8115 /* prepare send/receive buffers */
8116 PetscCall(PetscMalloc1(size, &ilengths_idxs));
8117 PetscCall(PetscArrayzero(ilengths_idxs, size));
8118 PetscCall(PetscMalloc1(size, &ilengths_vals));
8119 PetscCall(PetscArrayzero(ilengths_vals, size));
8120 if (nis) PetscCall(PetscCalloc1(size, &ilengths_idxs_is));
8121
8122 /* Get data from local matrices */
8123 PetscCheck(isdense, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Subassembling of AIJ local matrices not yet implemented");
8124 /* TODO: See below some guidelines on how to prepare the local buffers */
8125 /*
8126 send_buffer_vals should contain the raw values of the local matrix
8127 send_buffer_idxs should contain:
8128 - MatType_PRIVATE type
8129 - PetscInt size_of_l2gmap
8130 - PetscInt global_row_indices[size_of_l2gmap]
8131 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values
8132 */
8133 {
8134 ISLocalToGlobalMapping mapping;
8135
8136 PetscCall(MatISGetLocalToGlobalMapping(mat, &mapping, NULL));
8137 PetscCall(MatDenseGetArrayRead(local_mat, &send_buffer_vals));
8138 PetscCall(ISLocalToGlobalMappingGetSize(mapping, &i));
8139 PetscCall(PetscMalloc1(i + 2, &send_buffer_idxs));
8140 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE;
8141 send_buffer_idxs[1] = i;
8142 PetscCall(ISLocalToGlobalMappingGetIndices(mapping, (const PetscInt **)&ptr_idxs));
8143 PetscCall(PetscArraycpy(&send_buffer_idxs[2], ptr_idxs, i));
8144 PetscCall(ISLocalToGlobalMappingRestoreIndices(mapping, (const PetscInt **)&ptr_idxs));
8145 PetscCall(PetscMPIIntCast(i, &len));
8146 for (i = 0; i < n_sends; i++) {
8147 ilengths_vals[is_indices[i]] = len * len;
8148 ilengths_idxs[is_indices[i]] = len + 2;
8149 }
8150 }
8151 PetscCall(PetscGatherMessageLengths2(comm, n_sends, n_recvs, ilengths_idxs, ilengths_vals, &onodes, &olengths_idxs, &olengths_vals));
8152 /* additional is (if any) */
8153 if (nis) {
8154 PetscMPIInt psum;
8155 PetscInt j;
8156 for (j = 0, psum = 0; j < nis; j++) {
8157 PetscInt plen;
8158 PetscCall(ISGetLocalSize(isarray[j], &plen));
8159 PetscCall(PetscMPIIntCast(plen, &len));
8160 psum += len + 1; /* indices + length */
8161 }
8162 PetscCall(PetscMalloc1(psum, &send_buffer_idxs_is));
8163 for (j = 0, psum = 0; j < nis; j++) {
8164 PetscInt plen;
8165 const PetscInt *is_array_idxs;
8166 PetscCall(ISGetLocalSize(isarray[j], &plen));
8167 send_buffer_idxs_is[psum] = plen;
8168 PetscCall(ISGetIndices(isarray[j], &is_array_idxs));
8169 PetscCall(PetscArraycpy(&send_buffer_idxs_is[psum + 1], is_array_idxs, plen));
8170 PetscCall(ISRestoreIndices(isarray[j], &is_array_idxs));
8171 psum += plen + 1; /* indices + length */
8172 }
8173 for (i = 0; i < n_sends; i++) ilengths_idxs_is[is_indices[i]] = psum;
8174 PetscCall(PetscGatherMessageLengths(comm, n_sends, n_recvs, ilengths_idxs_is, &onodes_is, &olengths_idxs_is));
8175 }
8176 PetscCall(MatISRestoreLocalMat(mat, &local_mat));
8177
8178 buf_size_idxs = 0;
8179 buf_size_vals = 0;
8180 buf_size_idxs_is = 0;
8181 buf_size_vecs = 0;
8182 for (i = 0; i < n_recvs; i++) {
8183 buf_size_idxs += olengths_idxs[i];
8184 buf_size_vals += olengths_vals[i];
8185 if (nis) buf_size_idxs_is += olengths_idxs_is[i];
8186 if (nvecs) buf_size_vecs += olengths_idxs[i];
8187 }
8188 PetscCall(PetscMalloc1(buf_size_idxs, &recv_buffer_idxs));
8189 PetscCall(PetscMalloc1(buf_size_vals, &recv_buffer_vals));
8190 PetscCall(PetscMalloc1(buf_size_idxs_is, &recv_buffer_idxs_is));
8191 PetscCall(PetscMalloc1(buf_size_vecs, &recv_buffer_vecs));
8192
8193 /* get new tags for clean communications */
8194 PetscCall(PetscObjectGetNewTag((PetscObject)mat, &tag_idxs));
8195 PetscCall(PetscObjectGetNewTag((PetscObject)mat, &tag_vals));
8196 PetscCall(PetscObjectGetNewTag((PetscObject)mat, &tag_idxs_is));
8197 PetscCall(PetscObjectGetNewTag((PetscObject)mat, &tag_vecs));
8198
8199 /* allocate for requests */
8200 PetscCall(PetscMalloc1(n_sends, &send_req_idxs));
8201 PetscCall(PetscMalloc1(n_sends, &send_req_vals));
8202 PetscCall(PetscMalloc1(n_sends, &send_req_idxs_is));
8203 PetscCall(PetscMalloc1(n_sends, &send_req_vecs));
8204 PetscCall(PetscMalloc1(n_recvs, &recv_req_idxs));
8205 PetscCall(PetscMalloc1(n_recvs, &recv_req_vals));
8206 PetscCall(PetscMalloc1(n_recvs, &recv_req_idxs_is));
8207 PetscCall(PetscMalloc1(n_recvs, &recv_req_vecs));
8208
8209 /* communications */
8210 ptr_idxs = recv_buffer_idxs;
8211 ptr_vals = recv_buffer_vals;
8212 ptr_idxs_is = recv_buffer_idxs_is;
8213 ptr_vecs = recv_buffer_vecs;
8214 for (i = 0; i < n_recvs; i++) {
8215 PetscCallMPI(MPIU_Irecv(ptr_idxs, olengths_idxs[i], MPIU_INT, onodes[i], tag_idxs, comm, &recv_req_idxs[i]));
8216 PetscCallMPI(MPIU_Irecv(ptr_vals, olengths_vals[i], MPIU_SCALAR, onodes[i], tag_vals, comm, &recv_req_vals[i]));
8217 ptr_idxs += olengths_idxs[i];
8218 ptr_vals += olengths_vals[i];
8219 if (nis) {
8220 PetscCallMPI(MPIU_Irecv(ptr_idxs_is, olengths_idxs_is[i], MPIU_INT, onodes_is[i], tag_idxs_is, comm, &recv_req_idxs_is[i]));
8221 ptr_idxs_is += olengths_idxs_is[i];
8222 }
8223 if (nvecs) {
8224 PetscCallMPI(MPIU_Irecv(ptr_vecs, olengths_idxs[i] - 2, MPIU_SCALAR, onodes[i], tag_vecs, comm, &recv_req_vecs[i]));
8225 ptr_vecs += olengths_idxs[i] - 2;
8226 }
8227 }
8228 for (i = 0; i < n_sends; i++) {
8229 PetscCall(PetscMPIIntCast(is_indices[i], &source_dest));
8230 PetscCallMPI(MPIU_Isend(send_buffer_idxs, ilengths_idxs[source_dest], MPIU_INT, source_dest, tag_idxs, comm, &send_req_idxs[i]));
8231 PetscCallMPI(MPIU_Isend(send_buffer_vals, ilengths_vals[source_dest], MPIU_SCALAR, source_dest, tag_vals, comm, &send_req_vals[i]));
8232 if (nis) PetscCallMPI(MPIU_Isend(send_buffer_idxs_is, ilengths_idxs_is[source_dest], MPIU_INT, source_dest, tag_idxs_is, comm, &send_req_idxs_is[i]));
8233 if (nvecs) {
8234 PetscCall(VecGetArray(nnsp_vec[0], &send_buffer_vecs));
8235 PetscCallMPI(MPIU_Isend(send_buffer_vecs, ilengths_idxs[source_dest] - 2, MPIU_SCALAR, source_dest, tag_vecs, comm, &send_req_vecs[i]));
8236 }
8237 }
8238 PetscCall(ISRestoreIndices(is_sends_internal, &is_indices));
8239 PetscCall(ISDestroy(&is_sends_internal));
8240
8241 /* assemble new l2g map */
8242 PetscCallMPI(MPI_Waitall(n_recvs, recv_req_idxs, MPI_STATUSES_IGNORE));
8243 ptr_idxs = recv_buffer_idxs;
8244 new_local_rows = 0;
8245 for (i = 0; i < n_recvs; i++) {
8246 new_local_rows += *(ptr_idxs + 1); /* second element is the local size of the l2gmap */
8247 ptr_idxs += olengths_idxs[i];
8248 }
8249 PetscCall(PetscMalloc1(new_local_rows, &l2gmap_indices));
8250 ptr_idxs = recv_buffer_idxs;
8251 new_local_rows = 0;
8252 for (i = 0; i < n_recvs; i++) {
8253 PetscCall(PetscArraycpy(&l2gmap_indices[new_local_rows], ptr_idxs + 2, *(ptr_idxs + 1)));
8254 new_local_rows += *(ptr_idxs + 1); /* second element is the local size of the l2gmap */
8255 ptr_idxs += olengths_idxs[i];
8256 }
8257 PetscCall(PetscSortRemoveDupsInt(&new_local_rows, l2gmap_indices));
8258 PetscCall(ISLocalToGlobalMappingCreate(comm_n, 1, new_local_rows, l2gmap_indices, PETSC_COPY_VALUES, &l2gmap));
8259 PetscCall(PetscFree(l2gmap_indices));
8260
8261 /* infer new local matrix type from received local matrices type */
8262 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */
8263 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */
8264 if (n_recvs) {
8265 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0];
8266 ptr_idxs = recv_buffer_idxs;
8267 for (i = 0; i < n_recvs; i++) {
8268 if ((PetscInt)new_local_type_private != *ptr_idxs) {
8269 new_local_type_private = MATAIJ_PRIVATE;
8270 break;
8271 }
8272 ptr_idxs += olengths_idxs[i];
8273 }
8274 switch (new_local_type_private) {
8275 case MATDENSE_PRIVATE:
8276 new_local_type = MATSEQAIJ;
8277 bs = 1;
8278 break;
8279 case MATAIJ_PRIVATE:
8280 new_local_type = MATSEQAIJ;
8281 bs = 1;
8282 break;
8283 case MATBAIJ_PRIVATE:
8284 new_local_type = MATSEQBAIJ;
8285 break;
8286 case MATSBAIJ_PRIVATE:
8287 new_local_type = MATSEQSBAIJ;
8288 break;
8289 default:
8290 SETERRQ(comm, PETSC_ERR_SUP, "Unsupported private type %d in %s", new_local_type_private, PETSC_FUNCTION_NAME);
8291 }
8292 } else { /* by default, new_local_type is seqaij */
8293 new_local_type = MATSEQAIJ;
8294 bs = 1;
8295 }
8296
8297 /* create MATIS object if needed */
8298 if (!reuse) {
8299 PetscCall(MatGetSize(mat, &rows, &cols));
8300 PetscCall(MatCreateIS(comm_n, bs, PETSC_DECIDE, PETSC_DECIDE, rows, cols, l2gmap, l2gmap, mat_n));
8301 } else {
8302 /* it also destroys the local matrices */
8303 if (*mat_n) {
8304 PetscCall(MatSetLocalToGlobalMapping(*mat_n, l2gmap, l2gmap));
8305 } else { /* this is a fake object */
8306 PetscCall(MatCreateIS(comm_n, bs, PETSC_DECIDE, PETSC_DECIDE, rows, cols, l2gmap, l2gmap, mat_n));
8307 }
8308 }
8309 PetscCall(MatISGetLocalMat(*mat_n, &local_mat));
8310 PetscCall(MatSetType(local_mat, new_local_type));
8311
8312 PetscCallMPI(MPI_Waitall(n_recvs, recv_req_vals, MPI_STATUSES_IGNORE));
8313
8314 /* Global to local map of received indices */
8315 PetscCall(PetscMalloc1(buf_size_idxs, &recv_buffer_idxs_local)); /* needed for values insertion */
8316 PetscCall(ISGlobalToLocalMappingApply(l2gmap, IS_GTOLM_MASK, buf_size_idxs, recv_buffer_idxs, &i, recv_buffer_idxs_local));
8317 PetscCall(ISLocalToGlobalMappingDestroy(&l2gmap));
8318
8319 /* restore attributes -> type of incoming data and its size */
8320 buf_size_idxs = 0;
8321 for (i = 0; i < n_recvs; i++) {
8322 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs];
8323 recv_buffer_idxs_local[buf_size_idxs + 1] = recv_buffer_idxs[buf_size_idxs + 1];
8324 buf_size_idxs += olengths_idxs[i];
8325 }
8326 PetscCall(PetscFree(recv_buffer_idxs));
8327
8328 /* set preallocation */
8329 PetscCall(PetscObjectTypeCompare((PetscObject)local_mat, MATSEQDENSE, &newisdense));
8330 if (!newisdense) {
8331 PetscInt *new_local_nnz = NULL;
8332
8333 ptr_idxs = recv_buffer_idxs_local;
8334 if (n_recvs) PetscCall(PetscCalloc1(new_local_rows, &new_local_nnz));
8335 for (i = 0; i < n_recvs; i++) {
8336 PetscInt j;
8337 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */
8338 for (j = 0; j < *(ptr_idxs + 1); j++) new_local_nnz[*(ptr_idxs + 2 + j)] += *(ptr_idxs + 1);
8339 } else {
8340 /* TODO */
8341 }
8342 ptr_idxs += olengths_idxs[i];
8343 }
8344 if (new_local_nnz) {
8345 for (i = 0; i < new_local_rows; i++) new_local_nnz[i] = PetscMin(new_local_nnz[i], new_local_rows);
8346 PetscCall(MatSeqAIJSetPreallocation(local_mat, 0, new_local_nnz));
8347 for (i = 0; i < new_local_rows; i++) new_local_nnz[i] /= bs;
8348 PetscCall(MatSeqBAIJSetPreallocation(local_mat, bs, 0, new_local_nnz));
8349 for (i = 0; i < new_local_rows; i++) new_local_nnz[i] = PetscMax(new_local_nnz[i] - i, 0);
8350 PetscCall(MatSeqSBAIJSetPreallocation(local_mat, bs, 0, new_local_nnz));
8351 } else {
8352 PetscCall(MatSetUp(local_mat));
8353 }
8354 PetscCall(PetscFree(new_local_nnz));
8355 } else {
8356 PetscCall(MatSetUp(local_mat));
8357 }
8358
8359 /* set values */
8360 ptr_vals = recv_buffer_vals;
8361 ptr_idxs = recv_buffer_idxs_local;
8362 for (i = 0; i < n_recvs; i++) {
8363 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */
8364 PetscCall(MatSetOption(local_mat, MAT_ROW_ORIENTED, PETSC_FALSE));
8365 PetscCall(MatSetValues(local_mat, *(ptr_idxs + 1), ptr_idxs + 2, *(ptr_idxs + 1), ptr_idxs + 2, ptr_vals, ADD_VALUES));
8366 PetscCall(MatAssemblyBegin(local_mat, MAT_FLUSH_ASSEMBLY));
8367 PetscCall(MatAssemblyEnd(local_mat, MAT_FLUSH_ASSEMBLY));
8368 PetscCall(MatSetOption(local_mat, MAT_ROW_ORIENTED, PETSC_TRUE));
8369 } else {
8370 /* TODO */
8371 }
8372 ptr_idxs += olengths_idxs[i];
8373 ptr_vals += olengths_vals[i];
8374 }
8375 PetscCall(MatAssemblyBegin(local_mat, MAT_FINAL_ASSEMBLY));
8376 PetscCall(MatAssemblyEnd(local_mat, MAT_FINAL_ASSEMBLY));
8377 PetscCall(MatISRestoreLocalMat(*mat_n, &local_mat));
8378 PetscCall(MatAssemblyBegin(*mat_n, MAT_FINAL_ASSEMBLY));
8379 PetscCall(MatAssemblyEnd(*mat_n, MAT_FINAL_ASSEMBLY));
8380 PetscCall(PetscFree(recv_buffer_vals));
8381
8382 #if 0
8383 if (!restrict_comm) { /* check */
8384 Vec lvec,rvec;
8385 PetscReal infty_error;
8386
8387 PetscCall(MatCreateVecs(mat,&rvec,&lvec));
8388 PetscCall(VecSetRandom(rvec,NULL));
8389 PetscCall(MatMult(mat,rvec,lvec));
8390 PetscCall(VecScale(lvec,-1.0));
8391 PetscCall(MatMultAdd(*mat_n,rvec,lvec,lvec));
8392 PetscCall(VecNorm(lvec,NORM_INFINITY,&infty_error));
8393 PetscCall(PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error));
8394 PetscCall(VecDestroy(&rvec));
8395 PetscCall(VecDestroy(&lvec));
8396 }
8397 #endif
8398
8399 /* assemble new additional is (if any) */
8400 if (nis) {
8401 PetscInt **temp_idxs, *count_is, j, psum;
8402
8403 PetscCallMPI(MPI_Waitall(n_recvs, recv_req_idxs_is, MPI_STATUSES_IGNORE));
8404 PetscCall(PetscCalloc1(nis, &count_is));
8405 ptr_idxs = recv_buffer_idxs_is;
8406 psum = 0;
8407 for (i = 0; i < n_recvs; i++) {
8408 for (j = 0; j < nis; j++) {
8409 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
8410 count_is[j] += plen; /* increment counting of buffer for j-th IS */
8411 psum += plen;
8412 ptr_idxs += plen + 1; /* shift pointer to received data */
8413 }
8414 }
8415 PetscCall(PetscMalloc1(nis, &temp_idxs));
8416 PetscCall(PetscMalloc1(psum, &temp_idxs[0]));
8417 for (i = 1; i < nis; i++) temp_idxs[i] = PetscSafePointerPlusOffset(temp_idxs[i - 1], count_is[i - 1]);
8418 PetscCall(PetscArrayzero(count_is, nis));
8419 ptr_idxs = recv_buffer_idxs_is;
8420 for (i = 0; i < n_recvs; i++) {
8421 for (j = 0; j < nis; j++) {
8422 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
8423 PetscCall(PetscArraycpy(&temp_idxs[j][count_is[j]], ptr_idxs + 1, plen));
8424 count_is[j] += plen; /* increment starting point of buffer for j-th IS */
8425 ptr_idxs += plen + 1; /* shift pointer to received data */
8426 }
8427 }
8428 for (i = 0; i < nis; i++) {
8429 PetscCall(ISDestroy(&isarray[i]));
8430 PetscCall(PetscSortRemoveDupsInt(&count_is[i], temp_idxs[i]));
8431 PetscCall(ISCreateGeneral(comm_n, count_is[i], temp_idxs[i], PETSC_COPY_VALUES, &isarray[i]));
8432 }
8433 PetscCall(PetscFree(count_is));
8434 PetscCall(PetscFree(temp_idxs[0]));
8435 PetscCall(PetscFree(temp_idxs));
8436 }
8437 /* free workspace */
8438 PetscCall(PetscFree(recv_buffer_idxs_is));
8439 PetscCallMPI(MPI_Waitall(n_sends, send_req_idxs, MPI_STATUSES_IGNORE));
8440 PetscCall(PetscFree(send_buffer_idxs));
8441 PetscCallMPI(MPI_Waitall(n_sends, send_req_vals, MPI_STATUSES_IGNORE));
8442 if (isdense) {
8443 PetscCall(MatISGetLocalMat(mat, &local_mat));
8444 PetscCall(MatDenseRestoreArrayRead(local_mat, &send_buffer_vals));
8445 PetscCall(MatISRestoreLocalMat(mat, &local_mat));
8446 } else {
8447 /* PetscCall(PetscFree(send_buffer_vals)); */
8448 }
8449 if (nis) {
8450 PetscCallMPI(MPI_Waitall(n_sends, send_req_idxs_is, MPI_STATUSES_IGNORE));
8451 PetscCall(PetscFree(send_buffer_idxs_is));
8452 }
8453
8454 if (nvecs) {
8455 PetscCallMPI(MPI_Waitall(n_recvs, recv_req_vecs, MPI_STATUSES_IGNORE));
8456 PetscCallMPI(MPI_Waitall(n_sends, send_req_vecs, MPI_STATUSES_IGNORE));
8457 PetscCall(VecRestoreArray(nnsp_vec[0], &send_buffer_vecs));
8458 PetscCall(VecDestroy(&nnsp_vec[0]));
8459 PetscCall(VecCreate(comm_n, &nnsp_vec[0]));
8460 PetscCall(VecSetSizes(nnsp_vec[0], new_local_rows, PETSC_DECIDE));
8461 PetscCall(VecSetType(nnsp_vec[0], VECSTANDARD));
8462 /* set values */
8463 ptr_vals = recv_buffer_vecs;
8464 ptr_idxs = recv_buffer_idxs_local;
8465 PetscCall(VecGetArray(nnsp_vec[0], &send_buffer_vecs));
8466 for (i = 0; i < n_recvs; i++) {
8467 PetscInt j;
8468 for (j = 0; j < *(ptr_idxs + 1); j++) send_buffer_vecs[*(ptr_idxs + 2 + j)] += *(ptr_vals + j);
8469 ptr_idxs += olengths_idxs[i];
8470 ptr_vals += olengths_idxs[i] - 2;
8471 }
8472 PetscCall(VecRestoreArray(nnsp_vec[0], &send_buffer_vecs));
8473 PetscCall(VecAssemblyBegin(nnsp_vec[0]));
8474 PetscCall(VecAssemblyEnd(nnsp_vec[0]));
8475 }
8476
8477 PetscCall(PetscFree(recv_buffer_vecs));
8478 PetscCall(PetscFree(recv_buffer_idxs_local));
8479 PetscCall(PetscFree(recv_req_idxs));
8480 PetscCall(PetscFree(recv_req_vals));
8481 PetscCall(PetscFree(recv_req_vecs));
8482 PetscCall(PetscFree(recv_req_idxs_is));
8483 PetscCall(PetscFree(send_req_idxs));
8484 PetscCall(PetscFree(send_req_vals));
8485 PetscCall(PetscFree(send_req_vecs));
8486 PetscCall(PetscFree(send_req_idxs_is));
8487 PetscCall(PetscFree(ilengths_vals));
8488 PetscCall(PetscFree(ilengths_idxs));
8489 PetscCall(PetscFree(olengths_vals));
8490 PetscCall(PetscFree(olengths_idxs));
8491 PetscCall(PetscFree(onodes));
8492 if (nis) {
8493 PetscCall(PetscFree(ilengths_idxs_is));
8494 PetscCall(PetscFree(olengths_idxs_is));
8495 PetscCall(PetscFree(onodes_is));
8496 }
8497 PetscCall(PetscSubcommDestroy(&subcomm));
8498 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not participate */
8499 PetscCall(MatDestroy(mat_n));
8500 for (i = 0; i < nis; i++) PetscCall(ISDestroy(&isarray[i]));
8501 if (nvecs) { /* need to match VecDestroy nnsp_vec called in the other code path */
8502 PetscCall(VecDestroy(&nnsp_vec[0]));
8503 }
8504 *mat_n = NULL;
8505 }
8506 PetscFunctionReturn(PETSC_SUCCESS);
8507 }
8508
8509 /* temporary hack into ksp private data structure */
8510 #include <petsc/private/kspimpl.h>
8511
PCBDDCSetUpCoarseSolver(PC pc,Mat coarse_submat)8512 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc, Mat coarse_submat)
8513 {
8514 PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
8515 PC_IS *pcis = (PC_IS *)pc->data;
8516 PCBDDCGraph graph = pcbddc->mat_graph;
8517 Mat coarse_mat, coarse_mat_is;
8518 Mat coarsedivudotp = NULL;
8519 Mat coarseG, t_coarse_mat_is;
8520 MatNullSpace CoarseNullSpace = NULL;
8521 ISLocalToGlobalMapping coarse_islg;
8522 IS coarse_is, *isarray, corners;
8523 PetscInt i, im_active = -1, active_procs = -1;
8524 PetscInt nis, nisdofs, nisneu, nisvert;
8525 PetscInt coarse_eqs_per_proc, coarsening_ratio;
8526 PC pc_temp;
8527 PCType coarse_pc_type;
8528 KSPType coarse_ksp_type;
8529 PetscBool multilevel_requested, multilevel_allowed;
8530 PetscBool coarse_reuse, multi_element = graph->multi_element;
8531 PetscInt ncoarse, nedcfield;
8532 PetscBool compute_vecs = PETSC_FALSE;
8533 PetscScalar *array;
8534 MatReuse coarse_mat_reuse;
8535 PetscBool restr, full_restr, have_void;
8536 PetscMPIInt size;
8537
8538 PetscFunctionBegin;
8539 PetscCall(PetscLogEventBegin(PC_BDDC_CoarseSetUp[pcbddc->current_level], pc, 0, 0, 0));
8540 /* Assign global numbering to coarse dofs */
8541 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */
8542 PetscInt ocoarse_size;
8543 compute_vecs = PETSC_TRUE;
8544
8545 pcbddc->new_primal_space = PETSC_TRUE;
8546 ocoarse_size = pcbddc->coarse_size;
8547 PetscCall(PetscFree(pcbddc->global_primal_indices));
8548 PetscCall(PCBDDCComputePrimalNumbering(pc, &pcbddc->coarse_size, &pcbddc->global_primal_indices));
8549 /* see if we can avoid some work */
8550 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */
8551 /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */
8552 if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) {
8553 PetscCall(KSPReset(pcbddc->coarse_ksp));
8554 coarse_reuse = PETSC_FALSE;
8555 } else { /* we can safely reuse already computed coarse matrix */
8556 coarse_reuse = PETSC_TRUE;
8557 }
8558 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */
8559 coarse_reuse = PETSC_FALSE;
8560 }
8561 /* reset any subassembling information */
8562 if (!coarse_reuse || pcbddc->recompute_topography) PetscCall(ISDestroy(&pcbddc->coarse_subassembling));
8563 } else { /* primal space is unchanged, so we can reuse coarse matrix */
8564 coarse_reuse = PETSC_TRUE;
8565 }
8566 if (coarse_reuse && pcbddc->coarse_ksp) {
8567 PetscCall(KSPGetOperators(pcbddc->coarse_ksp, &coarse_mat, NULL));
8568 PetscCall(PetscObjectReference((PetscObject)coarse_mat));
8569 coarse_mat_reuse = MAT_REUSE_MATRIX;
8570 } else {
8571 coarse_mat = NULL;
8572 coarse_mat_reuse = MAT_INITIAL_MATRIX;
8573 }
8574
8575 /* creates temporary l2gmap and IS for coarse indexes */
8576 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), pcbddc->local_primal_size, pcbddc->global_primal_indices, PETSC_COPY_VALUES, &coarse_is));
8577 PetscCall(ISLocalToGlobalMappingCreateIS(coarse_is, &coarse_islg));
8578
8579 /* creates temporary MATIS object for coarse matrix */
8580 PetscCall(MatCreate(PetscObjectComm((PetscObject)pc), &t_coarse_mat_is));
8581 PetscCall(MatSetType(t_coarse_mat_is, MATIS));
8582 PetscCall(MatSetSizes(t_coarse_mat_is, PETSC_DECIDE, PETSC_DECIDE, pcbddc->coarse_size, pcbddc->coarse_size));
8583 PetscCall(MatISSetAllowRepeated(t_coarse_mat_is, multi_element));
8584 PetscCall(MatSetLocalToGlobalMapping(t_coarse_mat_is, coarse_islg, coarse_islg));
8585 PetscCall(MatISSetLocalMat(t_coarse_mat_is, coarse_submat));
8586 PetscCall(MatAssemblyBegin(t_coarse_mat_is, MAT_FINAL_ASSEMBLY));
8587 PetscCall(MatAssemblyEnd(t_coarse_mat_is, MAT_FINAL_ASSEMBLY));
8588 PetscCall(MatViewFromOptions(t_coarse_mat_is, (PetscObject)pc, "-pc_bddc_coarse_mat_is_view"));
8589
8590 /* count "active" (i.e. with positive local size) and "void" processes */
8591 im_active = !!pcis->n;
8592 PetscCallMPI(MPIU_Allreduce(&im_active, &active_procs, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)pc)));
8593
8594 /* determine number of processes partecipating to coarse solver and compute subassembling pattern */
8595 /* restr : whether we want to exclude senders (which are not receivers) from the subassembling pattern */
8596 /* full_restr : just use the receivers from the subassembling pattern */
8597 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)pc), &size));
8598 coarse_mat_is = NULL;
8599 multilevel_allowed = PETSC_FALSE;
8600 multilevel_requested = PETSC_FALSE;
8601 coarse_eqs_per_proc = PetscMin(PetscMax(pcbddc->coarse_size, 1), pcbddc->coarse_eqs_per_proc);
8602 if (coarse_eqs_per_proc < 0 || size == 1) coarse_eqs_per_proc = PetscMax(pcbddc->coarse_size, 1);
8603 if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE;
8604 if (pcbddc->coarse_size <= pcbddc->coarse_eqs_limit) multilevel_requested = PETSC_FALSE;
8605 coarsening_ratio = multi_element ? 1 : pcbddc->coarsening_ratio;
8606 if (multilevel_requested) {
8607 ncoarse = active_procs / coarsening_ratio;
8608 restr = PETSC_FALSE;
8609 full_restr = PETSC_FALSE;
8610 } else {
8611 ncoarse = pcbddc->coarse_size / coarse_eqs_per_proc + !!(pcbddc->coarse_size % coarse_eqs_per_proc);
8612 restr = PETSC_TRUE;
8613 full_restr = PETSC_TRUE;
8614 }
8615 if (!pcbddc->coarse_size || (size == 1 && !multi_element)) multilevel_allowed = multilevel_requested = restr = full_restr = PETSC_FALSE;
8616 ncoarse = PetscMax(1, ncoarse);
8617 if (!pcbddc->coarse_subassembling) {
8618 if (coarsening_ratio > 1) {
8619 if (multilevel_requested) {
8620 PetscCall(PCBDDCMatISGetSubassemblingPattern(pc->pmat, &ncoarse, pcbddc->coarse_adj_red, &pcbddc->coarse_subassembling, &have_void));
8621 } else {
8622 PetscCall(PCBDDCMatISGetSubassemblingPattern(t_coarse_mat_is, &ncoarse, pcbddc->coarse_adj_red, &pcbddc->coarse_subassembling, &have_void));
8623 }
8624 } else {
8625 PetscMPIInt rank;
8626
8627 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)pc), &rank));
8628 have_void = (active_procs == size) ? PETSC_FALSE : PETSC_TRUE;
8629 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)pc), 1, rank, 1, &pcbddc->coarse_subassembling));
8630 PetscCall(PetscObjectSetName((PetscObject)pcbddc->coarse_subassembling, "default subassembling"));
8631 }
8632 } else { /* if a subassembling pattern exists, then we can reuse the coarse ksp and compute the number of process involved */
8633 PetscInt psum;
8634 if (pcbddc->coarse_ksp) psum = 1;
8635 else psum = 0;
8636 PetscCallMPI(MPIU_Allreduce(&psum, &ncoarse, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)pc)));
8637 have_void = ncoarse < size ? PETSC_TRUE : PETSC_FALSE;
8638 }
8639 /* determine if we can go multilevel */
8640 if (multilevel_requested) {
8641 if (ncoarse > 1) multilevel_allowed = PETSC_TRUE; /* found enough processes */
8642 else restr = full_restr = PETSC_TRUE; /* 1 subdomain, use a direct solver */
8643 }
8644 if (multilevel_allowed && have_void) restr = PETSC_TRUE;
8645
8646 /* dump subassembling pattern */
8647 if (pcbddc->dbg_flag && multilevel_allowed) PetscCall(ISView(pcbddc->coarse_subassembling, pcbddc->dbg_viewer));
8648 /* compute dofs splitting and neumann boundaries for coarse dofs */
8649 nedcfield = -1;
8650 corners = NULL;
8651 if (multilevel_allowed && !coarse_reuse && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal || pcbddc->nedclocal || pcbddc->corner_selected)) { /* protects from unneeded computations */
8652 PetscInt *tidxs, *tidxs2, nout, tsize, i;
8653 const PetscInt *idxs;
8654 ISLocalToGlobalMapping tmap;
8655
8656 /* create map between primal indices (in local representative ordering) and local primal numbering */
8657 PetscCall(ISLocalToGlobalMappingCreate(PETSC_COMM_SELF, 1, pcbddc->local_primal_size, pcbddc->primal_indices_local_idxs, PETSC_COPY_VALUES, &tmap));
8658 /* allocate space for temporary storage */
8659 PetscCall(PetscMalloc1(pcbddc->local_primal_size, &tidxs));
8660 PetscCall(PetscMalloc1(pcbddc->local_primal_size, &tidxs2));
8661 /* allocate for IS array */
8662 nisdofs = pcbddc->n_ISForDofsLocal;
8663 if (pcbddc->nedclocal) {
8664 if (pcbddc->nedfield > -1) {
8665 nedcfield = pcbddc->nedfield;
8666 } else {
8667 nedcfield = 0;
8668 PetscCheck(!nisdofs, PetscObjectComm((PetscObject)pc), PETSC_ERR_PLIB, "This should not happen (%" PetscInt_FMT ")", nisdofs);
8669 nisdofs = 1;
8670 }
8671 }
8672 nisneu = !!pcbddc->NeumannBoundariesLocal;
8673 nisvert = 0; /* nisvert is not used */
8674 nis = nisdofs + nisneu + nisvert;
8675 PetscCall(PetscMalloc1(nis, &isarray));
8676 /* dofs splitting */
8677 for (i = 0; i < nisdofs; i++) {
8678 /* PetscCall(ISView(pcbddc->ISForDofsLocal[i],0)); */
8679 if (nedcfield != i) {
8680 PetscCall(ISGetLocalSize(pcbddc->ISForDofsLocal[i], &tsize));
8681 PetscCall(ISGetIndices(pcbddc->ISForDofsLocal[i], &idxs));
8682 PetscCall(ISGlobalToLocalMappingApply(tmap, IS_GTOLM_DROP, tsize, idxs, &nout, tidxs));
8683 PetscCall(ISRestoreIndices(pcbddc->ISForDofsLocal[i], &idxs));
8684 } else {
8685 PetscCall(ISGetLocalSize(pcbddc->nedclocal, &tsize));
8686 PetscCall(ISGetIndices(pcbddc->nedclocal, &idxs));
8687 PetscCall(ISGlobalToLocalMappingApply(tmap, IS_GTOLM_DROP, tsize, idxs, &nout, tidxs));
8688 PetscCheck(tsize == nout, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Failed when mapping coarse nedelec field! %" PetscInt_FMT " != %" PetscInt_FMT, tsize, nout);
8689 PetscCall(ISRestoreIndices(pcbddc->nedclocal, &idxs));
8690 }
8691 PetscCall(ISLocalToGlobalMappingApply(coarse_islg, nout, tidxs, tidxs2));
8692 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), nout, tidxs2, PETSC_COPY_VALUES, &isarray[i]));
8693 /* PetscCall(ISView(isarray[i],0)); */
8694 }
8695 /* neumann boundaries */
8696 if (pcbddc->NeumannBoundariesLocal) {
8697 /* PetscCall(ISView(pcbddc->NeumannBoundariesLocal,0)); */
8698 PetscCall(ISGetLocalSize(pcbddc->NeumannBoundariesLocal, &tsize));
8699 PetscCall(ISGetIndices(pcbddc->NeumannBoundariesLocal, &idxs));
8700 PetscCall(ISGlobalToLocalMappingApply(tmap, IS_GTOLM_DROP, tsize, idxs, &nout, tidxs));
8701 PetscCall(ISRestoreIndices(pcbddc->NeumannBoundariesLocal, &idxs));
8702 PetscCall(ISLocalToGlobalMappingApply(coarse_islg, nout, tidxs, tidxs2));
8703 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), nout, tidxs2, PETSC_COPY_VALUES, &isarray[nisdofs]));
8704 /* PetscCall(ISView(isarray[nisdofs],0)); */
8705 }
8706 /* coordinates */
8707 if (pcbddc->corner_selected) {
8708 PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &corners));
8709 PetscCall(ISGetLocalSize(corners, &tsize));
8710 PetscCall(ISGetIndices(corners, &idxs));
8711 PetscCall(ISGlobalToLocalMappingApply(tmap, IS_GTOLM_DROP, tsize, idxs, &nout, tidxs));
8712 PetscCheck(tsize == nout, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Failed when mapping corners! %" PetscInt_FMT " != %" PetscInt_FMT, tsize, nout);
8713 PetscCall(ISRestoreIndices(corners, &idxs));
8714 PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &corners));
8715 PetscCall(ISLocalToGlobalMappingApply(coarse_islg, nout, tidxs, tidxs2));
8716 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), nout, tidxs2, PETSC_COPY_VALUES, &corners));
8717 }
8718 PetscCall(PetscFree(tidxs));
8719 PetscCall(PetscFree(tidxs2));
8720 PetscCall(ISLocalToGlobalMappingDestroy(&tmap));
8721 } else {
8722 nis = 0;
8723 nisdofs = 0;
8724 nisneu = 0;
8725 nisvert = 0;
8726 isarray = NULL;
8727 }
8728 /* destroy no longer needed map */
8729 PetscCall(ISLocalToGlobalMappingDestroy(&coarse_islg));
8730
8731 /* subassemble */
8732 if (multilevel_allowed) {
8733 Vec vp[1];
8734 PetscInt nvecs = 0;
8735 PetscBool reuse;
8736
8737 vp[0] = NULL;
8738 /* XXX HDIV also */
8739 if (pcbddc->benign_have_null) { /* propagate no-net-flux quadrature to coarser level */
8740 PetscCall(VecCreate(PetscObjectComm((PetscObject)pc), &vp[0]));
8741 PetscCall(VecSetSizes(vp[0], pcbddc->local_primal_size, PETSC_DECIDE));
8742 PetscCall(VecSetType(vp[0], VECSTANDARD));
8743 nvecs = 1;
8744
8745 if (pcbddc->divudotp) {
8746 Mat B, loc_divudotp;
8747 Vec v, p;
8748 IS dummy;
8749 PetscInt np;
8750
8751 PetscCall(MatISGetLocalMat(pcbddc->divudotp, &loc_divudotp));
8752 PetscCall(MatGetSize(loc_divudotp, &np, NULL));
8753 PetscCall(ISCreateStride(PETSC_COMM_SELF, np, 0, 1, &dummy));
8754 PetscCall(MatCreateSubMatrix(loc_divudotp, dummy, pcis->is_B_local, MAT_INITIAL_MATRIX, &B));
8755 PetscCall(MatCreateVecs(B, &v, &p));
8756 PetscCall(VecSet(p, 1.));
8757 PetscCall(MatMultTranspose(B, p, v));
8758 PetscCall(VecDestroy(&p));
8759 PetscCall(MatDestroy(&B));
8760 PetscCall(VecGetArray(vp[0], &array));
8761 PetscCall(VecPlaceArray(pcbddc->vec1_P, array));
8762 PetscCall(MatMultTranspose(pcbddc->coarse_phi_B, v, pcbddc->vec1_P));
8763 PetscCall(VecResetArray(pcbddc->vec1_P));
8764 PetscCall(VecRestoreArray(vp[0], &array));
8765 PetscCall(ISDestroy(&dummy));
8766 PetscCall(VecDestroy(&v));
8767 }
8768 }
8769 if (coarse_mat) reuse = PETSC_TRUE;
8770 else reuse = PETSC_FALSE;
8771 if (multi_element) {
8772 PetscCall(PetscObjectReference((PetscObject)t_coarse_mat_is));
8773 coarse_mat_is = t_coarse_mat_is;
8774 } else {
8775 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &reuse, 1, MPI_C_BOOL, MPI_LOR, PetscObjectComm((PetscObject)pc)));
8776 if (reuse) {
8777 PetscCall(PCBDDCMatISSubassemble(t_coarse_mat_is, pcbddc->coarse_subassembling, 0, restr, full_restr, PETSC_TRUE, &coarse_mat, nis, isarray, nvecs, vp));
8778 } else {
8779 PetscCall(PCBDDCMatISSubassemble(t_coarse_mat_is, pcbddc->coarse_subassembling, 0, restr, full_restr, PETSC_FALSE, &coarse_mat_is, nis, isarray, nvecs, vp));
8780 }
8781 if (vp[0]) { /* vp[0] could have been placed on a different set of processes */
8782 PetscScalar *arraym;
8783 const PetscScalar *arrayv;
8784 PetscInt nl;
8785 PetscCall(VecGetLocalSize(vp[0], &nl));
8786 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, 1, nl, NULL, &coarsedivudotp));
8787 PetscCall(MatDenseGetArray(coarsedivudotp, &arraym));
8788 PetscCall(VecGetArrayRead(vp[0], &arrayv));
8789 PetscCall(PetscArraycpy(arraym, arrayv, nl));
8790 PetscCall(VecRestoreArrayRead(vp[0], &arrayv));
8791 PetscCall(MatDenseRestoreArray(coarsedivudotp, &arraym));
8792 PetscCall(VecDestroy(&vp[0]));
8793 } else {
8794 PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, 0, 0, 1, NULL, &coarsedivudotp));
8795 }
8796 }
8797 } else {
8798 PetscBool default_sub;
8799
8800 PetscCall(PetscStrcmp(((PetscObject)pcbddc->coarse_subassembling)->name, "default subassembling", &default_sub));
8801 if (!default_sub) PetscCall(PCBDDCMatISSubassemble(t_coarse_mat_is, pcbddc->coarse_subassembling, 0, restr, full_restr, PETSC_FALSE, &coarse_mat_is, 0, NULL, 0, NULL));
8802 else {
8803 PetscCall(PetscObjectReference((PetscObject)t_coarse_mat_is));
8804 coarse_mat_is = t_coarse_mat_is;
8805 }
8806 }
8807 if (coarse_mat_is || coarse_mat) {
8808 if (!multilevel_allowed) {
8809 PetscCall(MatConvert(coarse_mat_is, MATAIJ, coarse_mat_reuse, &coarse_mat));
8810 } else {
8811 /* if this matrix is present, it means we are not reusing the coarse matrix */
8812 if (coarse_mat_is) {
8813 PetscCheck(!coarse_mat, PetscObjectComm((PetscObject)coarse_mat_is), PETSC_ERR_PLIB, "This should not happen");
8814 PetscCall(PetscObjectReference((PetscObject)coarse_mat_is));
8815 coarse_mat = coarse_mat_is;
8816 }
8817 }
8818 }
8819 PetscCall(MatDestroy(&t_coarse_mat_is));
8820 PetscCall(MatDestroy(&coarse_mat_is));
8821
8822 /* create local to global scatters for coarse problem */
8823 if (compute_vecs) {
8824 PetscInt lrows;
8825 PetscCall(VecDestroy(&pcbddc->coarse_vec));
8826 if (coarse_mat) {
8827 PetscCall(MatGetLocalSize(coarse_mat, &lrows, NULL));
8828 } else {
8829 lrows = 0;
8830 }
8831 PetscCall(VecCreate(PetscObjectComm((PetscObject)pc), &pcbddc->coarse_vec));
8832 PetscCall(VecSetSizes(pcbddc->coarse_vec, lrows, PETSC_DECIDE));
8833 PetscCall(VecSetType(pcbddc->coarse_vec, coarse_mat ? coarse_mat->defaultvectype : VECSTANDARD));
8834 PetscCall(VecScatterDestroy(&pcbddc->coarse_loc_to_glob));
8835 PetscCall(VecScatterCreate(pcbddc->vec1_P, NULL, pcbddc->coarse_vec, coarse_is, &pcbddc->coarse_loc_to_glob));
8836 }
8837 PetscCall(ISDestroy(&coarse_is));
8838
8839 /* set defaults for coarse KSP and PC */
8840 if (multilevel_allowed) {
8841 coarse_ksp_type = KSPRICHARDSON;
8842 coarse_pc_type = PCBDDC;
8843 } else {
8844 coarse_ksp_type = KSPPREONLY;
8845 coarse_pc_type = PCREDUNDANT;
8846 }
8847
8848 /* print some info if requested */
8849 if (pcbddc->dbg_flag) {
8850 if (!multilevel_allowed) {
8851 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n"));
8852 if (multilevel_requested) {
8853 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Not enough active processes on level %" PetscInt_FMT " (active processes %" PetscInt_FMT ", coarsening ratio %" PetscInt_FMT ")\n", pcbddc->current_level, active_procs, coarsening_ratio));
8854 } else if (pcbddc->max_levels) {
8855 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Maximum number of requested levels reached (%" PetscInt_FMT ")\n", pcbddc->max_levels));
8856 }
8857 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
8858 }
8859 }
8860
8861 /* communicate coarse discrete gradient */
8862 coarseG = NULL;
8863 if (pcbddc->nedcG && multilevel_allowed) {
8864 MPI_Comm ccomm;
8865 if (coarse_mat) {
8866 ccomm = PetscObjectComm((PetscObject)coarse_mat);
8867 } else {
8868 ccomm = MPI_COMM_NULL;
8869 }
8870 PetscCall(MatMPIAIJRestrict(pcbddc->nedcG, ccomm, &coarseG));
8871 }
8872
8873 /* create the coarse KSP object only once with defaults */
8874 if (coarse_mat) {
8875 PetscBool isredundant, isbddc, force, valid;
8876 PetscViewer dbg_viewer = NULL;
8877 PetscBool isset, issym, isher, isspd;
8878
8879 if (pcbddc->dbg_flag) {
8880 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat));
8881 PetscCall(PetscViewerASCIIAddTab(dbg_viewer, 2 * pcbddc->current_level));
8882 }
8883 if (!pcbddc->coarse_ksp) {
8884 char prefix[256], str_level[16];
8885 size_t len;
8886
8887 PetscCall(KSPCreate(PetscObjectComm((PetscObject)coarse_mat), &pcbddc->coarse_ksp));
8888 PetscCall(KSPSetNestLevel(pcbddc->coarse_ksp, pc->kspnestlevel));
8889 PetscCall(KSPSetErrorIfNotConverged(pcbddc->coarse_ksp, pc->erroriffailure));
8890 PetscCall(PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp, (PetscObject)pc, 1));
8891 PetscCall(KSPSetTolerances(pcbddc->coarse_ksp, PETSC_CURRENT, PETSC_CURRENT, PETSC_CURRENT, 1));
8892 PetscCall(KSPSetOperators(pcbddc->coarse_ksp, coarse_mat, coarse_mat));
8893 PetscCall(KSPSetType(pcbddc->coarse_ksp, coarse_ksp_type));
8894 PetscCall(KSPSetNormType(pcbddc->coarse_ksp, KSP_NORM_NONE));
8895 PetscCall(KSPGetPC(pcbddc->coarse_ksp, &pc_temp));
8896 /* TODO is this logic correct? should check for coarse_mat type */
8897 PetscCall(PCSetType(pc_temp, coarse_pc_type));
8898 /* prefix */
8899 PetscCall(PetscStrncpy(prefix, "", sizeof(prefix)));
8900 PetscCall(PetscStrncpy(str_level, "", sizeof(str_level)));
8901 if (!pcbddc->current_level) {
8902 PetscCall(PetscStrncpy(prefix, ((PetscObject)pc)->prefix, sizeof(prefix)));
8903 PetscCall(PetscStrlcat(prefix, "pc_bddc_coarse_", sizeof(prefix)));
8904 } else {
8905 PetscCall(PetscStrlen(((PetscObject)pc)->prefix, &len));
8906 if (pcbddc->current_level > 1) len -= 3; /* remove "lX_" with X level number */
8907 if (pcbddc->current_level > 10) len -= 1; /* remove another char from level number */
8908 /* Nonstandard use of PetscStrncpy() to copy only a portion of the string */
8909 PetscCall(PetscStrncpy(prefix, ((PetscObject)pc)->prefix, len + 1));
8910 PetscCall(PetscSNPrintf(str_level, sizeof(str_level), "l%" PetscInt_FMT "_", pcbddc->current_level));
8911 PetscCall(PetscStrlcat(prefix, str_level, sizeof(prefix)));
8912 }
8913 PetscCall(KSPSetOptionsPrefix(pcbddc->coarse_ksp, prefix));
8914 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
8915 PetscCall(PCBDDCSetLevel(pc_temp, pcbddc->current_level + 1));
8916 PetscCall(PCBDDCSetCoarseningRatio(pc_temp, pcbddc->coarsening_ratio));
8917 PetscCall(PCBDDCSetLevels(pc_temp, pcbddc->max_levels));
8918 /* allow user customization */
8919 PetscCall(KSPSetFromOptions(pcbddc->coarse_ksp));
8920 /* get some info after set from options */
8921 PetscCall(KSPGetPC(pcbddc->coarse_ksp, &pc_temp));
8922 /* multilevel cannot be done with coarse PC different from BDDC, NN, HPDDM, unless forced to */
8923 force = PETSC_FALSE;
8924 PetscCall(PetscOptionsGetBool(NULL, ((PetscObject)pc_temp)->prefix, "-pc_type_forced", &force, NULL));
8925 PetscCall(PetscObjectTypeCompareAny((PetscObject)pc_temp, &valid, PCBDDC, PCNN, PCHPDDM, ""));
8926 PetscCall(PetscObjectTypeCompare((PetscObject)pc_temp, PCBDDC, &isbddc));
8927 if (multilevel_allowed && !force && !valid) {
8928 isbddc = PETSC_TRUE;
8929 PetscCall(PCSetType(pc_temp, PCBDDC));
8930 PetscCall(PCBDDCSetLevel(pc_temp, pcbddc->current_level + 1));
8931 PetscCall(PCBDDCSetCoarseningRatio(pc_temp, pcbddc->coarsening_ratio));
8932 PetscCall(PCBDDCSetLevels(pc_temp, pcbddc->max_levels));
8933 if (pc_temp->ops->setfromoptions) { /* need to setfromoptions again, skipping the pc_type */
8934 PetscObjectOptionsBegin((PetscObject)pc_temp);
8935 PetscCall((*pc_temp->ops->setfromoptions)(pc_temp, PetscOptionsObject));
8936 PetscCall(PetscObjectProcessOptionsHandlers((PetscObject)pc_temp, PetscOptionsObject));
8937 PetscOptionsEnd();
8938 pc_temp->setfromoptionscalled++;
8939 }
8940 }
8941 }
8942 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
8943 PetscCall(KSPGetPC(pcbddc->coarse_ksp, &pc_temp));
8944 if (nisdofs) {
8945 PetscCall(PCBDDCSetDofsSplitting(pc_temp, nisdofs, isarray));
8946 for (i = 0; i < nisdofs; i++) PetscCall(ISDestroy(&isarray[i]));
8947 }
8948 if (nisneu) {
8949 PetscCall(PCBDDCSetNeumannBoundaries(pc_temp, isarray[nisdofs]));
8950 PetscCall(ISDestroy(&isarray[nisdofs]));
8951 }
8952 if (nisvert) {
8953 PetscCall(PCBDDCSetPrimalVerticesIS(pc_temp, isarray[nis - 1]));
8954 PetscCall(ISDestroy(&isarray[nis - 1]));
8955 }
8956 if (coarseG) PetscCall(PCBDDCSetDiscreteGradient(pc_temp, coarseG, 1, nedcfield, PETSC_FALSE, PETSC_TRUE));
8957
8958 /* get some info after set from options */
8959 PetscCall(PetscObjectTypeCompare((PetscObject)pc_temp, PCBDDC, &isbddc));
8960
8961 /* multilevel can only be requested via -pc_bddc_levels or PCBDDCSetLevels */
8962 if (isbddc && !multilevel_allowed) PetscCall(PCSetType(pc_temp, coarse_pc_type));
8963 /* multilevel cannot be done with coarse PC different from BDDC, NN, HPDDM, unless forced to */
8964 force = PETSC_FALSE;
8965 PetscCall(PetscOptionsGetBool(NULL, ((PetscObject)pc_temp)->prefix, "-pc_type_forced", &force, NULL));
8966 PetscCall(PetscObjectTypeCompareAny((PetscObject)pc_temp, &valid, PCBDDC, PCNN, PCHPDDM, ""));
8967 if (multilevel_requested && multilevel_allowed && !valid && !force) PetscCall(PCSetType(pc_temp, PCBDDC));
8968 PetscCall(PetscObjectTypeCompare((PetscObject)pc_temp, PCREDUNDANT, &isredundant));
8969 if (isredundant) {
8970 KSP inner_ksp;
8971 PC inner_pc;
8972
8973 PetscCall(PCRedundantGetKSP(pc_temp, &inner_ksp));
8974 PetscCall(KSPGetPC(inner_ksp, &inner_pc));
8975 }
8976
8977 /* parameters which miss an API */
8978 PetscCall(PetscObjectTypeCompare((PetscObject)pc_temp, PCBDDC, &isbddc));
8979 if (isbddc) {
8980 PC_BDDC *pcbddc_coarse = (PC_BDDC *)pc_temp->data;
8981
8982 pcbddc_coarse->detect_disconnected = PETSC_TRUE;
8983 pcbddc_coarse->coarse_eqs_per_proc = pcbddc->coarse_eqs_per_proc;
8984 pcbddc_coarse->coarse_eqs_limit = pcbddc->coarse_eqs_limit;
8985 pcbddc_coarse->benign_saddle_point = pcbddc->benign_have_null;
8986 if (pcbddc_coarse->benign_saddle_point) {
8987 Mat coarsedivudotp_is;
8988 ISLocalToGlobalMapping l2gmap, rl2g, cl2g;
8989 IS row, col;
8990 const PetscInt *gidxs;
8991 PetscInt n, st, M, N;
8992
8993 PetscCall(MatGetSize(coarsedivudotp, &n, NULL));
8994 PetscCallMPI(MPI_Scan(&n, &st, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)coarse_mat)));
8995 st = st - n;
8996 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)coarse_mat), 1, st, 1, &row));
8997 PetscCall(MatISGetLocalToGlobalMapping(coarse_mat, &l2gmap, NULL));
8998 PetscCall(ISLocalToGlobalMappingGetSize(l2gmap, &n));
8999 PetscCall(ISLocalToGlobalMappingGetIndices(l2gmap, &gidxs));
9000 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)coarse_mat), n, gidxs, PETSC_COPY_VALUES, &col));
9001 PetscCall(ISLocalToGlobalMappingRestoreIndices(l2gmap, &gidxs));
9002 PetscCall(ISLocalToGlobalMappingCreateIS(row, &rl2g));
9003 PetscCall(ISLocalToGlobalMappingCreateIS(col, &cl2g));
9004 PetscCall(ISGetSize(row, &M));
9005 PetscCall(MatGetSize(coarse_mat, &N, NULL));
9006 PetscCall(ISDestroy(&row));
9007 PetscCall(ISDestroy(&col));
9008 PetscCall(MatCreate(PetscObjectComm((PetscObject)coarse_mat), &coarsedivudotp_is));
9009 PetscCall(MatSetType(coarsedivudotp_is, MATIS));
9010 PetscCall(MatSetSizes(coarsedivudotp_is, PETSC_DECIDE, PETSC_DECIDE, M, N));
9011 PetscCall(MatSetLocalToGlobalMapping(coarsedivudotp_is, rl2g, cl2g));
9012 PetscCall(ISLocalToGlobalMappingDestroy(&rl2g));
9013 PetscCall(ISLocalToGlobalMappingDestroy(&cl2g));
9014 PetscCall(MatISSetLocalMat(coarsedivudotp_is, coarsedivudotp));
9015 PetscCall(MatDestroy(&coarsedivudotp));
9016 PetscCall(PCBDDCSetDivergenceMat(pc_temp, coarsedivudotp_is, PETSC_FALSE, NULL));
9017 PetscCall(MatDestroy(&coarsedivudotp_is));
9018 pcbddc_coarse->adaptive_userdefined = PETSC_TRUE;
9019 if (pcbddc->adaptive_threshold[0] == 0.0) pcbddc_coarse->deluxe_zerorows = PETSC_TRUE;
9020 }
9021 }
9022
9023 /* propagate symmetry info of coarse matrix */
9024 PetscCall(MatSetOption(coarse_mat, MAT_STRUCTURALLY_SYMMETRIC, PETSC_TRUE));
9025 PetscCall(MatIsSymmetricKnown(pc->pmat, &isset, &issym));
9026 if (isset) PetscCall(MatSetOption(coarse_mat, MAT_SYMMETRIC, issym));
9027 PetscCall(MatIsHermitianKnown(pc->pmat, &isset, &isher));
9028 if (isset) PetscCall(MatSetOption(coarse_mat, MAT_HERMITIAN, isher));
9029 PetscCall(MatIsSPDKnown(pc->pmat, &isset, &isspd));
9030 if (isset) PetscCall(MatSetOption(coarse_mat, MAT_SPD, isspd));
9031
9032 if (pcbddc->benign_saddle_point && !pcbddc->benign_have_null) PetscCall(MatSetOption(coarse_mat, MAT_SPD, PETSC_TRUE));
9033 /* set operators */
9034 PetscCall(MatViewFromOptions(coarse_mat, (PetscObject)pc, "-pc_bddc_coarse_mat_view"));
9035 PetscCall(MatSetOptionsPrefix(coarse_mat, ((PetscObject)pcbddc->coarse_ksp)->prefix));
9036 PetscCall(KSPSetOperators(pcbddc->coarse_ksp, coarse_mat, coarse_mat));
9037 if (pcbddc->dbg_flag) PetscCall(PetscViewerASCIISubtractTab(dbg_viewer, 2 * pcbddc->current_level));
9038 }
9039 PetscCall(MatDestroy(&coarseG));
9040 PetscCall(PetscFree(isarray));
9041 #if 0
9042 {
9043 PetscViewer viewer;
9044 char filename[256];
9045 PetscCall(PetscSNPrintf(filename, PETSC_STATIC_ARRAY_LENGTH(filename), "coarse_mat_level%d.m",pcbddc->current_level));
9046 PetscCall(PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer));
9047 PetscCall(PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB));
9048 PetscCall(MatView(coarse_mat,viewer));
9049 PetscCall(PetscViewerPopFormat(viewer));
9050 PetscCall(PetscViewerDestroy(&viewer));
9051 }
9052 #endif
9053
9054 if (corners) {
9055 Vec gv;
9056 IS is;
9057 const PetscInt *idxs;
9058 PetscInt i, d, N, n, cdim = pcbddc->mat_graph->cdim;
9059 PetscScalar *coords;
9060
9061 PetscCheck(pcbddc->mat_graph->cloc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing local coordinates");
9062 PetscCall(VecGetSize(pcbddc->coarse_vec, &N));
9063 PetscCall(VecGetLocalSize(pcbddc->coarse_vec, &n));
9064 PetscCall(VecCreate(PetscObjectComm((PetscObject)pcbddc->coarse_vec), &gv));
9065 PetscCall(VecSetBlockSize(gv, cdim));
9066 PetscCall(VecSetSizes(gv, n * cdim, N * cdim));
9067 PetscCall(VecSetType(gv, VECSTANDARD));
9068 PetscCall(VecSetFromOptions(gv));
9069 PetscCall(VecSet(gv, PETSC_MAX_REAL)); /* we only propagate coordinates from vertices constraints */
9070
9071 PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &is));
9072 PetscCall(ISGetLocalSize(is, &n));
9073 PetscCall(ISGetIndices(is, &idxs));
9074 PetscCall(PetscMalloc1(n * cdim, &coords));
9075 for (i = 0; i < n; i++) {
9076 for (d = 0; d < cdim; d++) coords[cdim * i + d] = pcbddc->mat_graph->coords[cdim * idxs[i] + d];
9077 }
9078 PetscCall(ISRestoreIndices(is, &idxs));
9079 PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &is));
9080
9081 PetscCall(ISGetLocalSize(corners, &n));
9082 PetscCall(ISGetIndices(corners, &idxs));
9083 PetscCall(VecSetValuesBlocked(gv, n, idxs, coords, INSERT_VALUES));
9084 PetscCall(ISRestoreIndices(corners, &idxs));
9085 PetscCall(PetscFree(coords));
9086 PetscCall(VecAssemblyBegin(gv));
9087 PetscCall(VecAssemblyEnd(gv));
9088 PetscCall(VecGetArray(gv, &coords));
9089 if (pcbddc->coarse_ksp) {
9090 PC coarse_pc;
9091 PetscBool isbddc;
9092
9093 PetscCall(KSPGetPC(pcbddc->coarse_ksp, &coarse_pc));
9094 PetscCall(PetscObjectTypeCompare((PetscObject)coarse_pc, PCBDDC, &isbddc));
9095 if (isbddc) { /* coarse coordinates have PETSC_MAX_REAL, specific for BDDC */
9096 PetscReal *realcoords;
9097
9098 PetscCall(VecGetLocalSize(gv, &n));
9099 #if defined(PETSC_USE_COMPLEX)
9100 PetscCall(PetscMalloc1(n, &realcoords));
9101 for (i = 0; i < n; i++) realcoords[i] = PetscRealPart(coords[i]);
9102 #else
9103 realcoords = coords;
9104 #endif
9105 PetscCall(PCSetCoordinates(coarse_pc, cdim, n / cdim, realcoords));
9106 #if defined(PETSC_USE_COMPLEX)
9107 PetscCall(PetscFree(realcoords));
9108 #endif
9109 }
9110 }
9111 PetscCall(VecRestoreArray(gv, &coords));
9112 PetscCall(VecDestroy(&gv));
9113 }
9114 PetscCall(ISDestroy(&corners));
9115
9116 if (pcbddc->coarse_ksp) {
9117 Vec crhs, csol;
9118
9119 PetscCall(KSPGetSolution(pcbddc->coarse_ksp, &csol));
9120 PetscCall(KSPGetRhs(pcbddc->coarse_ksp, &crhs));
9121 if (!csol) PetscCall(MatCreateVecs(coarse_mat, &pcbddc->coarse_ksp->vec_sol, NULL));
9122 if (!crhs) PetscCall(MatCreateVecs(coarse_mat, NULL, &pcbddc->coarse_ksp->vec_rhs));
9123 }
9124 PetscCall(MatDestroy(&coarsedivudotp));
9125
9126 /* compute null space for coarse solver if the benign trick has been requested */
9127 if (pcbddc->benign_null) {
9128 PetscCall(VecSet(pcbddc->vec1_P, 0.));
9129 for (i = 0; i < pcbddc->benign_n; i++) PetscCall(VecSetValue(pcbddc->vec1_P, pcbddc->local_primal_size - pcbddc->benign_n + i, 1.0, INSERT_VALUES));
9130 PetscCall(VecAssemblyBegin(pcbddc->vec1_P));
9131 PetscCall(VecAssemblyEnd(pcbddc->vec1_P));
9132 PetscCall(VecScatterBegin(pcbddc->coarse_loc_to_glob, pcbddc->vec1_P, pcbddc->coarse_vec, INSERT_VALUES, SCATTER_FORWARD));
9133 PetscCall(VecScatterEnd(pcbddc->coarse_loc_to_glob, pcbddc->vec1_P, pcbddc->coarse_vec, INSERT_VALUES, SCATTER_FORWARD));
9134 if (coarse_mat) {
9135 Vec nullv;
9136 PetscScalar *array, *array2;
9137 PetscInt nl;
9138
9139 PetscCall(MatCreateVecs(coarse_mat, &nullv, NULL));
9140 PetscCall(VecGetLocalSize(nullv, &nl));
9141 PetscCall(VecGetArrayRead(pcbddc->coarse_vec, (const PetscScalar **)&array));
9142 PetscCall(VecGetArray(nullv, &array2));
9143 PetscCall(PetscArraycpy(array2, array, nl));
9144 PetscCall(VecRestoreArray(nullv, &array2));
9145 PetscCall(VecRestoreArrayRead(pcbddc->coarse_vec, (const PetscScalar **)&array));
9146 PetscCall(VecNormalize(nullv, NULL));
9147 PetscCall(MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat), PETSC_FALSE, 1, &nullv, &CoarseNullSpace));
9148 PetscCall(VecDestroy(&nullv));
9149 }
9150 }
9151 PetscCall(PetscLogEventEnd(PC_BDDC_CoarseSetUp[pcbddc->current_level], pc, 0, 0, 0));
9152
9153 PetscCall(PetscLogEventBegin(PC_BDDC_CoarseSolver[pcbddc->current_level], pc, 0, 0, 0));
9154 if (pcbddc->coarse_ksp) {
9155 PetscBool ispreonly;
9156
9157 if (CoarseNullSpace) {
9158 PetscBool isnull;
9159
9160 PetscCall(MatNullSpaceTest(CoarseNullSpace, coarse_mat, &isnull));
9161 if (isnull) PetscCall(MatSetNullSpace(coarse_mat, CoarseNullSpace));
9162 /* TODO: add local nullspaces (if any) */
9163 }
9164 /* setup coarse ksp */
9165 PetscCall(KSPSetUp(pcbddc->coarse_ksp));
9166 /* Check coarse problem if in debug mode or if solving with an iterative method */
9167 PetscCall(PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp, KSPPREONLY, &ispreonly));
9168 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates)) {
9169 KSP check_ksp;
9170 KSPType check_ksp_type;
9171 PC check_pc;
9172 Vec check_vec, coarse_vec;
9173 PetscReal abs_infty_error, infty_error, lambda_min = 1.0, lambda_max = 1.0;
9174 PetscInt its;
9175 PetscBool compute_eigs;
9176 PetscReal *eigs_r, *eigs_c;
9177 PetscInt neigs;
9178 const char *prefix;
9179
9180 /* Create ksp object suitable for estimation of extreme eigenvalues */
9181 PetscCall(KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp), &check_ksp));
9182 PetscCall(KSPSetNestLevel(check_ksp, pc->kspnestlevel));
9183 PetscCall(PetscObjectIncrementTabLevel((PetscObject)check_ksp, (PetscObject)pcbddc->coarse_ksp, 0));
9184 PetscCall(KSPSetErrorIfNotConverged(pcbddc->coarse_ksp, PETSC_FALSE));
9185 PetscCall(KSPSetOperators(check_ksp, coarse_mat, coarse_mat));
9186 PetscCall(KSPSetTolerances(check_ksp, 1.e-12, 1.e-12, PETSC_CURRENT, pcbddc->coarse_size));
9187 /* prevent from setup unneeded object */
9188 PetscCall(KSPGetPC(check_ksp, &check_pc));
9189 PetscCall(PCSetType(check_pc, PCNONE));
9190 if (ispreonly) {
9191 check_ksp_type = KSPPREONLY;
9192 compute_eigs = PETSC_FALSE;
9193 } else {
9194 check_ksp_type = KSPGMRES;
9195 compute_eigs = PETSC_TRUE;
9196 }
9197 PetscCall(KSPSetType(check_ksp, check_ksp_type));
9198 PetscCall(KSPSetComputeSingularValues(check_ksp, compute_eigs));
9199 PetscCall(KSPSetComputeEigenvalues(check_ksp, compute_eigs));
9200 PetscCall(KSPGMRESSetRestart(check_ksp, pcbddc->coarse_size + 1));
9201 PetscCall(KSPGetOptionsPrefix(pcbddc->coarse_ksp, &prefix));
9202 PetscCall(KSPSetOptionsPrefix(check_ksp, prefix));
9203 PetscCall(KSPAppendOptionsPrefix(check_ksp, "check_"));
9204 PetscCall(KSPSetFromOptions(check_ksp));
9205 PetscCall(KSPSetUp(check_ksp));
9206 PetscCall(KSPGetPC(pcbddc->coarse_ksp, &check_pc));
9207 PetscCall(KSPSetPC(check_ksp, check_pc));
9208 /* create random vec */
9209 PetscCall(MatCreateVecs(coarse_mat, &coarse_vec, &check_vec));
9210 PetscCall(VecSetRandom(check_vec, NULL));
9211 PetscCall(MatMult(coarse_mat, check_vec, coarse_vec));
9212 /* solve coarse problem */
9213 PetscCall(KSPSolve(check_ksp, coarse_vec, coarse_vec));
9214 PetscCall(KSPCheckSolve(check_ksp, pc, coarse_vec));
9215 /* set eigenvalue estimation if preonly has not been requested */
9216 if (compute_eigs) {
9217 PetscCall(PetscMalloc1(pcbddc->coarse_size + 1, &eigs_r));
9218 PetscCall(PetscMalloc1(pcbddc->coarse_size + 1, &eigs_c));
9219 PetscCall(KSPComputeEigenvalues(check_ksp, pcbddc->coarse_size + 1, eigs_r, eigs_c, &neigs));
9220 if (neigs) {
9221 lambda_max = eigs_r[neigs - 1];
9222 lambda_min = eigs_r[0];
9223 if (pcbddc->use_coarse_estimates) {
9224 if (lambda_max >= lambda_min) { /* using PETSC_SMALL since lambda_max == lambda_min is not allowed by KSPChebyshevSetEigenvalues */
9225 PetscCall(KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp, lambda_max + PETSC_SMALL, lambda_min));
9226 PetscCall(KSPRichardsonSetScale(pcbddc->coarse_ksp, 2.0 / (lambda_max + lambda_min)));
9227 }
9228 }
9229 }
9230 }
9231
9232 /* check coarse problem residual error */
9233 if (pcbddc->dbg_flag) {
9234 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp));
9235 PetscCall(PetscViewerASCIIAddTab(dbg_viewer, 2 * (pcbddc->current_level + 1)));
9236 PetscCall(VecAXPY(check_vec, -1.0, coarse_vec));
9237 PetscCall(VecNorm(check_vec, NORM_INFINITY, &infty_error));
9238 PetscCall(MatMult(coarse_mat, check_vec, coarse_vec));
9239 PetscCall(VecNorm(coarse_vec, NORM_INFINITY, &abs_infty_error));
9240 PetscCall(PetscViewerASCIIPrintf(dbg_viewer, "Coarse problem details (use estimates %d)\n", pcbddc->use_coarse_estimates));
9241 PetscCall(PetscObjectPrintClassNamePrefixType((PetscObject)pcbddc->coarse_ksp, dbg_viewer));
9242 PetscCall(PetscObjectPrintClassNamePrefixType((PetscObject)check_pc, dbg_viewer));
9243 PetscCall(PetscViewerASCIIPrintf(dbg_viewer, "Coarse problem exact infty_error : %1.6e\n", (double)infty_error));
9244 PetscCall(PetscViewerASCIIPrintf(dbg_viewer, "Coarse problem residual infty_error: %1.6e\n", (double)abs_infty_error));
9245 if (CoarseNullSpace) PetscCall(PetscViewerASCIIPrintf(dbg_viewer, "Coarse problem is singular\n"));
9246 if (compute_eigs) {
9247 PetscReal lambda_max_s, lambda_min_s;
9248 KSPConvergedReason reason;
9249 PetscCall(KSPGetType(check_ksp, &check_ksp_type));
9250 PetscCall(KSPGetIterationNumber(check_ksp, &its));
9251 PetscCall(KSPGetConvergedReason(check_ksp, &reason));
9252 PetscCall(KSPComputeExtremeSingularValues(check_ksp, &lambda_max_s, &lambda_min_s));
9253 PetscCall(PetscViewerASCIIPrintf(dbg_viewer, "Coarse problem eigenvalues (estimated with %" PetscInt_FMT " iterations of %s, conv reason %d): %1.6e %1.6e (%1.6e %1.6e)\n", its, check_ksp_type, reason, (double)lambda_min, (double)lambda_max, (double)lambda_min_s, (double)lambda_max_s));
9254 for (i = 0; i < neigs; i++) PetscCall(PetscViewerASCIIPrintf(dbg_viewer, "%1.6e %1.6ei\n", (double)eigs_r[i], (double)eigs_c[i]));
9255 }
9256 PetscCall(PetscViewerFlush(dbg_viewer));
9257 PetscCall(PetscViewerASCIISubtractTab(dbg_viewer, 2 * (pcbddc->current_level + 1)));
9258 }
9259 PetscCall(VecDestroy(&check_vec));
9260 PetscCall(VecDestroy(&coarse_vec));
9261 PetscCall(KSPDestroy(&check_ksp));
9262 if (compute_eigs) {
9263 PetscCall(PetscFree(eigs_r));
9264 PetscCall(PetscFree(eigs_c));
9265 }
9266 }
9267 }
9268 PetscCall(MatNullSpaceDestroy(&CoarseNullSpace));
9269 /* print additional info */
9270 if (pcbddc->dbg_flag) {
9271 /* waits until all processes reaches this point */
9272 PetscCall(PetscBarrier((PetscObject)pc));
9273 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Coarse solver setup completed at level %" PetscInt_FMT "\n", pcbddc->current_level));
9274 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
9275 }
9276
9277 /* free memory */
9278 PetscCall(MatDestroy(&coarse_mat));
9279 PetscCall(PetscLogEventEnd(PC_BDDC_CoarseSolver[pcbddc->current_level], pc, 0, 0, 0));
9280 PetscFunctionReturn(PETSC_SUCCESS);
9281 }
9282
PCBDDCComputePrimalNumbering(PC pc,PetscInt * coarse_size_n,PetscInt ** local_primal_indices_n)9283 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc, PetscInt *coarse_size_n, PetscInt **local_primal_indices_n)
9284 {
9285 PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
9286 PC_IS *pcis = (PC_IS *)pc->data;
9287 IS subset, subset_mult, subset_n;
9288 PetscInt local_size, coarse_size = 0;
9289 PetscInt *local_primal_indices = NULL;
9290 const PetscInt *t_local_primal_indices;
9291
9292 PetscFunctionBegin;
9293 /* Compute global number of coarse dofs */
9294 PetscCheck(!pcbddc->local_primal_size || pcbddc->local_primal_ref_node, PETSC_COMM_SELF, PETSC_ERR_PLIB, "BDDC ConstraintsSetUp should be called first");
9295 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc->pmat), pcbddc->local_primal_size_cc, pcbddc->local_primal_ref_node, PETSC_COPY_VALUES, &subset_n));
9296 PetscCall(ISLocalToGlobalMappingApplyIS(pcis->mapping, subset_n, &subset));
9297 PetscCall(ISDestroy(&subset_n));
9298 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc->pmat), pcbddc->local_primal_size_cc, pcbddc->local_primal_ref_mult, PETSC_COPY_VALUES, &subset_mult));
9299 PetscCall(ISRenumber(subset, subset_mult, &coarse_size, &subset_n));
9300 PetscCall(ISDestroy(&subset));
9301 PetscCall(ISDestroy(&subset_mult));
9302 PetscCall(ISGetLocalSize(subset_n, &local_size));
9303 PetscCheck(local_size == pcbddc->local_primal_size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Invalid number of local primal indices computed %" PetscInt_FMT " != %" PetscInt_FMT, local_size, pcbddc->local_primal_size);
9304 PetscCall(PetscMalloc1(local_size, &local_primal_indices));
9305 PetscCall(ISGetIndices(subset_n, &t_local_primal_indices));
9306 PetscCall(PetscArraycpy(local_primal_indices, t_local_primal_indices, local_size));
9307 PetscCall(ISRestoreIndices(subset_n, &t_local_primal_indices));
9308 PetscCall(ISDestroy(&subset_n));
9309
9310 if (pcbddc->dbg_flag) {
9311 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
9312 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n"));
9313 PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Size of coarse problem is %" PetscInt_FMT "\n", coarse_size));
9314 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
9315 }
9316
9317 /* get back data */
9318 *coarse_size_n = coarse_size;
9319 *local_primal_indices_n = local_primal_indices;
9320 PetscFunctionReturn(PETSC_SUCCESS);
9321 }
9322
PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork,Vec lwork,IS globalis,IS * localis)9323 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx, Vec gwork, Vec lwork, IS globalis, IS *localis)
9324 {
9325 IS localis_t;
9326 PetscInt i, lsize, *idxs, n;
9327 PetscScalar *vals;
9328
9329 PetscFunctionBegin;
9330 /* get indices in local ordering exploiting local to global map */
9331 PetscCall(ISGetLocalSize(globalis, &lsize));
9332 PetscCall(PetscMalloc1(lsize, &vals));
9333 for (i = 0; i < lsize; i++) vals[i] = 1.0;
9334 PetscCall(ISGetIndices(globalis, (const PetscInt **)&idxs));
9335 PetscCall(VecSet(gwork, 0.0));
9336 PetscCall(VecSet(lwork, 0.0));
9337 if (idxs) { /* multilevel guard */
9338 PetscCall(VecSetOption(gwork, VEC_IGNORE_NEGATIVE_INDICES, PETSC_TRUE));
9339 PetscCall(VecSetValues(gwork, lsize, idxs, vals, INSERT_VALUES));
9340 }
9341 PetscCall(VecAssemblyBegin(gwork));
9342 PetscCall(ISRestoreIndices(globalis, (const PetscInt **)&idxs));
9343 PetscCall(PetscFree(vals));
9344 PetscCall(VecAssemblyEnd(gwork));
9345 /* now compute set in local ordering */
9346 PetscCall(VecScatterBegin(g2l_ctx, gwork, lwork, INSERT_VALUES, SCATTER_FORWARD));
9347 PetscCall(VecScatterEnd(g2l_ctx, gwork, lwork, INSERT_VALUES, SCATTER_FORWARD));
9348 PetscCall(VecGetArrayRead(lwork, (const PetscScalar **)&vals));
9349 PetscCall(VecGetSize(lwork, &n));
9350 for (i = 0, lsize = 0; i < n; i++) {
9351 if (PetscRealPart(vals[i]) > 0.5) lsize++;
9352 }
9353 PetscCall(PetscMalloc1(lsize, &idxs));
9354 for (i = 0, lsize = 0; i < n; i++) {
9355 if (PetscRealPart(vals[i]) > 0.5) idxs[lsize++] = i;
9356 }
9357 PetscCall(VecRestoreArrayRead(lwork, (const PetscScalar **)&vals));
9358 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)gwork), lsize, idxs, PETSC_OWN_POINTER, &localis_t));
9359 *localis = localis_t;
9360 PetscFunctionReturn(PETSC_SUCCESS);
9361 }
9362
PCBDDCComputeFakeChange(PC pc,PetscBool constraints,PCBDDCGraph graph,PCBDDCSubSchurs schurs,Mat * change,IS * change_primal,IS * change_primal_mult,PetscBool * change_with_qr)9363 PetscErrorCode PCBDDCComputeFakeChange(PC pc, PetscBool constraints, PCBDDCGraph graph, PCBDDCSubSchurs schurs, Mat *change, IS *change_primal, IS *change_primal_mult, PetscBool *change_with_qr)
9364 {
9365 PC_IS *pcis = (PC_IS *)pc->data;
9366 PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
9367 PC_IS *pcisf;
9368 PC_BDDC *pcbddcf;
9369 PC pcf;
9370
9371 PetscFunctionBegin;
9372 PetscCall(PCCreate(PetscObjectComm((PetscObject)pc), &pcf));
9373 PetscCall(PCSetOperators(pcf, pc->mat, pc->pmat));
9374 PetscCall(PCSetType(pcf, PCBDDC));
9375
9376 pcisf = (PC_IS *)pcf->data;
9377 pcbddcf = (PC_BDDC *)pcf->data;
9378
9379 pcisf->is_B_local = pcis->is_B_local;
9380 pcisf->vec1_N = pcis->vec1_N;
9381 pcisf->BtoNmap = pcis->BtoNmap;
9382 pcisf->n = pcis->n;
9383 pcisf->n_B = pcis->n_B;
9384
9385 PetscCall(PetscFree(pcbddcf->mat_graph));
9386 PetscCall(PetscFree(pcbddcf->sub_schurs));
9387 pcbddcf->mat_graph = graph ? graph : pcbddc->mat_graph;
9388 pcbddcf->sub_schurs = schurs;
9389 pcbddcf->adaptive_selection = schurs ? PETSC_TRUE : PETSC_FALSE;
9390 pcbddcf->adaptive_threshold[0] = pcbddc->adaptive_threshold[0];
9391 pcbddcf->adaptive_threshold[1] = pcbddc->adaptive_threshold[1];
9392 pcbddcf->adaptive_nmin = pcbddc->adaptive_nmin;
9393 pcbddcf->adaptive_nmax = pcbddc->adaptive_nmax;
9394 pcbddcf->use_faces = PETSC_TRUE;
9395 pcbddcf->use_change_of_basis = (PetscBool)!constraints;
9396 pcbddcf->use_change_on_faces = (PetscBool)!constraints;
9397 pcbddcf->use_qr_single = (PetscBool)!constraints;
9398 pcbddcf->fake_change = PETSC_TRUE;
9399 pcbddcf->dbg_flag = pcbddc->dbg_flag;
9400
9401 PetscCall(PCBDDCAdaptiveSelection(pcf));
9402 PetscCall(PCBDDCConstraintsSetUp(pcf));
9403
9404 *change = pcbddcf->ConstraintMatrix;
9405 if (change_primal) PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc->pmat), pcbddcf->local_primal_size_cc, pcbddcf->local_primal_ref_node, PETSC_COPY_VALUES, change_primal));
9406 if (change_primal_mult) PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc->pmat), pcbddcf->local_primal_size_cc, pcbddcf->local_primal_ref_mult, PETSC_COPY_VALUES, change_primal_mult));
9407 if (change_with_qr) *change_with_qr = pcbddcf->use_qr_single;
9408
9409 if (schurs) pcbddcf->sub_schurs = NULL;
9410 pcbddcf->ConstraintMatrix = NULL;
9411 pcbddcf->mat_graph = NULL;
9412 pcisf->is_B_local = NULL;
9413 pcisf->vec1_N = NULL;
9414 pcisf->BtoNmap = NULL;
9415 PetscCall(PCDestroy(&pcf));
9416 PetscFunctionReturn(PETSC_SUCCESS);
9417 }
9418
PCBDDCSetUpSubSchurs(PC pc)9419 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc)
9420 {
9421 PC_IS *pcis = (PC_IS *)pc->data;
9422 PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
9423 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
9424 Mat S_j;
9425 PetscInt *used_xadj, *used_adjncy;
9426 PetscBool free_used_adj;
9427
9428 PetscFunctionBegin;
9429 PetscCall(PetscLogEventBegin(PC_BDDC_Schurs[pcbddc->current_level], pc, 0, 0, 0));
9430 /* decide the adjacency to be used for determining internal problems for local schur on subsets */
9431 free_used_adj = PETSC_FALSE;
9432 if (pcbddc->sub_schurs_layers == -1) {
9433 used_xadj = NULL;
9434 used_adjncy = NULL;
9435 } else {
9436 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) {
9437 used_xadj = pcbddc->mat_graph->xadj;
9438 used_adjncy = pcbddc->mat_graph->adjncy;
9439 } else if (pcbddc->computed_rowadj) {
9440 used_xadj = pcbddc->mat_graph->xadj;
9441 used_adjncy = pcbddc->mat_graph->adjncy;
9442 } else {
9443 PetscBool flg_row = PETSC_FALSE;
9444 const PetscInt *xadj, *adjncy;
9445 PetscInt nvtxs;
9446
9447 PetscCall(MatGetRowIJ(pcbddc->local_mat, 0, PETSC_TRUE, PETSC_FALSE, &nvtxs, &xadj, &adjncy, &flg_row));
9448 if (flg_row) {
9449 PetscCall(PetscMalloc2(nvtxs + 1, &used_xadj, xadj[nvtxs], &used_adjncy));
9450 PetscCall(PetscArraycpy(used_xadj, xadj, nvtxs + 1));
9451 PetscCall(PetscArraycpy(used_adjncy, adjncy, xadj[nvtxs]));
9452 free_used_adj = PETSC_TRUE;
9453 } else {
9454 pcbddc->sub_schurs_layers = -1;
9455 used_xadj = NULL;
9456 used_adjncy = NULL;
9457 }
9458 PetscCall(MatRestoreRowIJ(pcbddc->local_mat, 0, PETSC_TRUE, PETSC_FALSE, &nvtxs, &xadj, &adjncy, &flg_row));
9459 }
9460 }
9461
9462 /* setup sub_schurs data */
9463 PetscCall(MatCreateSchurComplement(pcis->A_II, pcis->pA_II, pcis->A_IB, pcis->A_BI, pcis->A_BB, &S_j));
9464 if (!sub_schurs->schur_explicit) {
9465 /* pcbddc->ksp_D up to date only if not using MatFactor with Schur complement support */
9466 PetscCall(MatSchurComplementSetKSP(S_j, pcbddc->ksp_D));
9467 PetscCall(PCBDDCSubSchursSetUp(sub_schurs, NULL, S_j, PETSC_FALSE, used_xadj, used_adjncy, pcbddc->sub_schurs_layers, NULL, pcbddc->adaptive_selection, PETSC_FALSE, PETSC_FALSE, 0, NULL, NULL, NULL, NULL));
9468 } else {
9469 Mat change = NULL;
9470 Vec scaling = NULL;
9471 IS change_primal = NULL, iP;
9472 PetscInt benign_n;
9473 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis;
9474 PetscBool need_change = PETSC_FALSE;
9475 PetscBool discrete_harmonic = PETSC_FALSE;
9476
9477 if (!pcbddc->use_vertices && reuse_solvers) {
9478 PetscInt n_vertices;
9479
9480 PetscCall(ISGetLocalSize(sub_schurs->is_vertices, &n_vertices));
9481 reuse_solvers = (PetscBool)!n_vertices;
9482 }
9483 if (!pcbddc->benign_change_explicit) {
9484 benign_n = pcbddc->benign_n;
9485 } else {
9486 benign_n = 0;
9487 }
9488 /* sub_schurs->change is a local object; instead, PCBDDCConstraintsSetUp and the quantities used in the test below are logically collective on pc.
9489 We need a global reduction to avoid possible deadlocks.
9490 We assume that sub_schurs->change is created once, and then reused for different solves, unless the topography has been recomputed */
9491 if (pcbddc->adaptive_userdefined || (pcbddc->deluxe_zerorows && !pcbddc->use_change_of_basis)) {
9492 PetscBool have_loc_change = (PetscBool)(!!sub_schurs->change);
9493 PetscCallMPI(MPIU_Allreduce(&have_loc_change, &need_change, 1, MPI_C_BOOL, MPI_LOR, PetscObjectComm((PetscObject)pc)));
9494 need_change = (PetscBool)(!need_change);
9495 }
9496 /* If the user defines additional constraints, we import them here */
9497 if (need_change) {
9498 PetscCheck(!pcbddc->sub_schurs_rebuild, PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot compute change of basis with a different graph");
9499 PetscCall(PCBDDCComputeFakeChange(pc, PETSC_FALSE, NULL, NULL, &change, &change_primal, NULL, &sub_schurs->change_with_qr));
9500 }
9501 if (!pcbddc->use_deluxe_scaling) scaling = pcis->D;
9502
9503 PetscCall(PetscObjectQuery((PetscObject)pc, "__KSPFETIDP_iP", (PetscObject *)&iP));
9504 if (iP) {
9505 PetscOptionsBegin(PetscObjectComm((PetscObject)iP), sub_schurs->prefix, "BDDC sub_schurs options", "PC");
9506 PetscCall(PetscOptionsBool("-sub_schurs_discrete_harmonic", NULL, NULL, discrete_harmonic, &discrete_harmonic, NULL));
9507 PetscOptionsEnd();
9508 }
9509 if (discrete_harmonic) {
9510 Mat A;
9511 PetscCall(MatDuplicate(pcbddc->local_mat, MAT_COPY_VALUES, &A));
9512 PetscCall(MatZeroRowsColumnsIS(A, iP, 1.0, NULL, NULL));
9513 PetscCall(PetscObjectCompose((PetscObject)A, "__KSPFETIDP_iP", (PetscObject)iP));
9514 PetscCall(PCBDDCSubSchursSetUp(sub_schurs, A, S_j, pcbddc->sub_schurs_exact_schur, used_xadj, used_adjncy, pcbddc->sub_schurs_layers, scaling, pcbddc->adaptive_selection, reuse_solvers, pcbddc->benign_saddle_point, benign_n, pcbddc->benign_p0_lidx,
9515 pcbddc->benign_zerodiag_subs, change, change_primal));
9516 PetscCall(MatDestroy(&A));
9517 } else {
9518 PetscCall(PCBDDCSubSchursSetUp(sub_schurs, pcbddc->local_mat, S_j, pcbddc->sub_schurs_exact_schur, used_xadj, used_adjncy, pcbddc->sub_schurs_layers, scaling, pcbddc->adaptive_selection, reuse_solvers, pcbddc->benign_saddle_point, benign_n,
9519 pcbddc->benign_p0_lidx, pcbddc->benign_zerodiag_subs, change, change_primal));
9520 }
9521 PetscCall(MatDestroy(&change));
9522 PetscCall(ISDestroy(&change_primal));
9523 }
9524 PetscCall(MatDestroy(&S_j));
9525
9526 /* free adjacency */
9527 if (free_used_adj) PetscCall(PetscFree2(used_xadj, used_adjncy));
9528 PetscCall(PetscLogEventEnd(PC_BDDC_Schurs[pcbddc->current_level], pc, 0, 0, 0));
9529 PetscFunctionReturn(PETSC_SUCCESS);
9530 }
9531
PCBDDCInitSubSchurs(PC pc)9532 PetscErrorCode PCBDDCInitSubSchurs(PC pc)
9533 {
9534 PC_IS *pcis = (PC_IS *)pc->data;
9535 PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
9536 PCBDDCGraph graph;
9537
9538 PetscFunctionBegin;
9539 /* attach interface graph for determining subsets */
9540 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */
9541 IS verticesIS, verticescomm;
9542 PetscInt vsize, *idxs;
9543
9544 PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &verticesIS));
9545 PetscCall(ISGetSize(verticesIS, &vsize));
9546 PetscCall(ISGetIndices(verticesIS, (const PetscInt **)&idxs));
9547 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), vsize, idxs, PETSC_COPY_VALUES, &verticescomm));
9548 PetscCall(ISRestoreIndices(verticesIS, (const PetscInt **)&idxs));
9549 PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &verticesIS));
9550 PetscCall(PCBDDCGraphCreate(&graph));
9551 PetscCall(PCBDDCGraphInit(graph, pcbddc->mat_graph->l2gmap, pcbddc->mat_graph->nvtxs_global, pcbddc->graphmaxcount));
9552 PetscCall(PCBDDCGraphSetUp(graph, pcbddc->mat_graph->custom_minimal_size, NULL, pcbddc->DirichletBoundariesLocal, 0, NULL, verticescomm));
9553 PetscCall(ISDestroy(&verticescomm));
9554 PetscCall(PCBDDCGraphComputeConnectedComponents(graph));
9555 } else {
9556 graph = pcbddc->mat_graph;
9557 }
9558 /* print some info */
9559 if (pcbddc->dbg_flag && !pcbddc->sub_schurs_rebuild) {
9560 IS vertices;
9561 PetscInt nv, nedges, nfaces;
9562 PetscCall(PCBDDCGraphASCIIView(graph, pcbddc->dbg_flag, pcbddc->dbg_viewer));
9563 PetscCall(PCBDDCGraphGetCandidatesIS(graph, &nfaces, NULL, &nedges, NULL, &vertices));
9564 PetscCall(ISGetSize(vertices, &nv));
9565 PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer));
9566 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "--------------------------------------------------------------\n"));
9567 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate vertices (%d)\n", PetscGlobalRank, nv, pcbddc->use_vertices));
9568 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate edges (%d)\n", PetscGlobalRank, nedges, pcbddc->use_edges));
9569 PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate faces (%d)\n", PetscGlobalRank, nfaces, pcbddc->use_faces));
9570 PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
9571 PetscCall(PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer));
9572 PetscCall(PCBDDCGraphRestoreCandidatesIS(graph, &nfaces, NULL, &nedges, NULL, &vertices));
9573 }
9574
9575 /* sub_schurs init */
9576 if (!pcbddc->sub_schurs) PetscCall(PCBDDCSubSchursCreate(&pcbddc->sub_schurs));
9577 PetscCall(PCBDDCSubSchursInit(pcbddc->sub_schurs, ((PetscObject)pc)->prefix, pcis->is_I_local, pcis->is_B_local, graph, pcis->BtoNmap, pcbddc->sub_schurs_rebuild, PETSC_FALSE));
9578
9579 /* free graph struct */
9580 if (pcbddc->sub_schurs_rebuild) PetscCall(PCBDDCGraphDestroy(&graph));
9581 PetscFunctionReturn(PETSC_SUCCESS);
9582 }
9583
PCBDDCViewGlobalIS(PC pc,IS is,PetscViewer viewer)9584 static PetscErrorCode PCBDDCViewGlobalIS(PC pc, IS is, PetscViewer viewer)
9585 {
9586 Mat_IS *matis = (Mat_IS *)pc->pmat->data;
9587 PetscInt n = pc->pmat->rmap->n, ln, ni, st;
9588 const PetscInt *idxs;
9589 IS gis;
9590
9591 PetscFunctionBegin;
9592 if (!is) PetscFunctionReturn(PETSC_SUCCESS);
9593 PetscCall(MatGetOwnershipRange(pc->pmat, &st, NULL));
9594 PetscCall(MatGetLocalSize(matis->A, NULL, &ln));
9595 PetscCall(PetscArrayzero(matis->sf_leafdata, ln));
9596 PetscCall(PetscArrayzero(matis->sf_rootdata, n));
9597 PetscCall(ISGetLocalSize(is, &ni));
9598 PetscCall(ISGetIndices(is, &idxs));
9599 for (PetscInt i = 0; i < ni; i++) {
9600 if (idxs[i] < 0 || idxs[i] >= ln) continue;
9601 matis->sf_leafdata[idxs[i]] = 1;
9602 }
9603 PetscCall(ISRestoreIndices(is, &idxs));
9604 PetscCall(PetscSFReduceBegin(matis->sf, MPIU_INT, matis->sf_leafdata, matis->sf_rootdata, MPI_SUM));
9605 PetscCall(PetscSFReduceEnd(matis->sf, MPIU_INT, matis->sf_leafdata, matis->sf_rootdata, MPI_SUM));
9606 ln = 0;
9607 for (PetscInt i = 0; i < n; i++) {
9608 if (matis->sf_rootdata[i]) matis->sf_rootdata[ln++] = i + st;
9609 }
9610 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), ln, matis->sf_rootdata, PETSC_USE_POINTER, &gis));
9611 PetscCall(ISView(gis, viewer));
9612 PetscCall(ISDestroy(&gis));
9613 PetscFunctionReturn(PETSC_SUCCESS);
9614 }
9615
PCBDDCLoadOrViewCustomization(PC pc,PetscBool load,const char * outfile)9616 PetscErrorCode PCBDDCLoadOrViewCustomization(PC pc, PetscBool load, const char *outfile)
9617 {
9618 PetscInt header[11];
9619 PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
9620 PetscViewer viewer;
9621 MPI_Comm comm = PetscObjectComm((PetscObject)pc);
9622
9623 PetscFunctionBegin;
9624 PetscCall(PetscViewerBinaryOpen(comm, outfile ? outfile : "bddc_dump.dat", load ? FILE_MODE_READ : FILE_MODE_WRITE, &viewer));
9625 if (load) {
9626 IS is;
9627 Mat A;
9628
9629 PetscCall(PetscViewerBinaryRead(viewer, header, PETSC_STATIC_ARRAY_LENGTH(header), NULL, PETSC_INT));
9630 PetscCheck(header[0] == 0 || header[0] == 1, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file");
9631 PetscCheck(header[1] == 0 || header[1] == 1, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file");
9632 PetscCheck(header[2] >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file");
9633 PetscCheck(header[3] == 0 || header[3] == 1, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file");
9634 PetscCheck(header[4] == 0 || header[4] == 1, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file");
9635 PetscCheck(header[5] >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file");
9636 PetscCheck(header[7] == 0 || header[7] == 1, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file");
9637 PetscCheck(header[8] == 0 || header[8] == 1, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file");
9638 PetscCheck(header[9] == 0 || header[9] == 1, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file");
9639 PetscCheck(header[10] == 0 || header[10] == 1, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file");
9640 if (header[0]) {
9641 PetscCall(ISCreate(comm, &is));
9642 PetscCall(ISLoad(is, viewer));
9643 PetscCall(PCBDDCSetDirichletBoundaries(pc, is));
9644 PetscCall(ISDestroy(&is));
9645 }
9646 if (header[1]) {
9647 PetscCall(ISCreate(comm, &is));
9648 PetscCall(ISLoad(is, viewer));
9649 PetscCall(PCBDDCSetNeumannBoundaries(pc, is));
9650 PetscCall(ISDestroy(&is));
9651 }
9652 if (header[2]) {
9653 IS *isarray;
9654
9655 PetscCall(PetscMalloc1(header[2], &isarray));
9656 for (PetscInt i = 0; i < header[2]; i++) {
9657 PetscCall(ISCreate(comm, &isarray[i]));
9658 PetscCall(ISLoad(isarray[i], viewer));
9659 }
9660 PetscCall(PCBDDCSetDofsSplitting(pc, header[2], isarray));
9661 for (PetscInt i = 0; i < header[2]; i++) PetscCall(ISDestroy(&isarray[i]));
9662 PetscCall(PetscFree(isarray));
9663 }
9664 if (header[3]) {
9665 PetscCall(ISCreate(comm, &is));
9666 PetscCall(ISLoad(is, viewer));
9667 PetscCall(PCBDDCSetPrimalVerticesIS(pc, is));
9668 PetscCall(ISDestroy(&is));
9669 }
9670 if (header[4]) {
9671 PetscCall(MatCreate(comm, &A));
9672 PetscCall(MatSetType(A, MATAIJ));
9673 PetscCall(MatLoad(A, viewer));
9674 PetscCall(PCBDDCSetDiscreteGradient(pc, A, header[5], header[6], (PetscBool)header[7], (PetscBool)header[8]));
9675 PetscCall(MatDestroy(&A));
9676 }
9677 if (header[9]) {
9678 PetscCall(MatCreate(comm, &A));
9679 PetscCall(MatSetType(A, MATIS));
9680 PetscCall(MatLoad(A, viewer));
9681 PetscCall(PCBDDCSetDivergenceMat(pc, A, (PetscBool)header[10], NULL));
9682 PetscCall(MatDestroy(&A));
9683 }
9684 } else {
9685 header[0] = (PetscInt)!!pcbddc->DirichletBoundariesLocal;
9686 header[1] = (PetscInt)!!pcbddc->NeumannBoundariesLocal;
9687 header[2] = pcbddc->n_ISForDofsLocal;
9688 header[3] = (PetscInt)!!pcbddc->user_primal_vertices_local;
9689 header[4] = (PetscInt)!!pcbddc->discretegradient;
9690 header[5] = pcbddc->nedorder;
9691 header[6] = pcbddc->nedfield;
9692 header[7] = (PetscInt)pcbddc->nedglobal;
9693 header[8] = (PetscInt)pcbddc->conforming;
9694 header[9] = (PetscInt)!!pcbddc->divudotp;
9695 header[10] = (PetscInt)pcbddc->divudotp_trans;
9696 if (header[4]) header[3] = 0;
9697
9698 PetscCall(PetscViewerBinaryWrite(viewer, header, PETSC_STATIC_ARRAY_LENGTH(header), PETSC_INT));
9699 PetscCall(PCBDDCViewGlobalIS(pc, pcbddc->DirichletBoundariesLocal, viewer));
9700 PetscCall(PCBDDCViewGlobalIS(pc, pcbddc->NeumannBoundariesLocal, viewer));
9701 for (PetscInt i = 0; i < header[2]; i++) PetscCall(PCBDDCViewGlobalIS(pc, pcbddc->ISForDofsLocal[i], viewer));
9702 if (header[3]) PetscCall(PCBDDCViewGlobalIS(pc, pcbddc->user_primal_vertices_local, viewer));
9703 if (header[4]) PetscCall(MatView(pcbddc->discretegradient, viewer));
9704 if (header[9]) PetscCall(MatView(pcbddc->divudotp, viewer));
9705 }
9706 PetscCall(PetscViewerDestroy(&viewer));
9707 PetscFunctionReturn(PETSC_SUCCESS);
9708 }
9709
9710 #include <../src/mat/impls/aij/mpi/mpiaij.h>
MatMPIAIJRestrict(Mat A,MPI_Comm ccomm,Mat * B)9711 static PetscErrorCode MatMPIAIJRestrict(Mat A, MPI_Comm ccomm, Mat *B)
9712 {
9713 Mat At;
9714 IS rows;
9715 PetscInt rst, ren;
9716 PetscLayout rmap;
9717
9718 PetscFunctionBegin;
9719 rst = ren = 0;
9720 if (ccomm != MPI_COMM_NULL) {
9721 PetscCall(PetscLayoutCreate(ccomm, &rmap));
9722 PetscCall(PetscLayoutSetSize(rmap, A->rmap->N));
9723 PetscCall(PetscLayoutSetBlockSize(rmap, 1));
9724 PetscCall(PetscLayoutSetUp(rmap));
9725 PetscCall(PetscLayoutGetRange(rmap, &rst, &ren));
9726 }
9727 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)A), ren - rst, rst, 1, &rows));
9728 PetscCall(MatCreateSubMatrix(A, rows, NULL, MAT_INITIAL_MATRIX, &At));
9729 PetscCall(ISDestroy(&rows));
9730
9731 if (ccomm != MPI_COMM_NULL) {
9732 Mat_MPIAIJ *a, *b;
9733 IS from, to;
9734 Vec gvec;
9735 PetscInt lsize;
9736
9737 PetscCall(MatCreate(ccomm, B));
9738 PetscCall(MatSetSizes(*B, ren - rst, PETSC_DECIDE, PETSC_DECIDE, At->cmap->N));
9739 PetscCall(MatSetType(*B, MATAIJ));
9740 PetscCall(PetscLayoutDestroy(&(*B)->rmap));
9741 PetscCall(PetscLayoutSetUp((*B)->cmap));
9742 a = (Mat_MPIAIJ *)At->data;
9743 b = (Mat_MPIAIJ *)(*B)->data;
9744 PetscCallMPI(MPI_Comm_size(ccomm, &b->size));
9745 PetscCallMPI(MPI_Comm_rank(ccomm, &b->rank));
9746 PetscCall(PetscObjectReference((PetscObject)a->A));
9747 PetscCall(PetscObjectReference((PetscObject)a->B));
9748 b->A = a->A;
9749 b->B = a->B;
9750
9751 b->donotstash = a->donotstash;
9752 b->roworiented = a->roworiented;
9753 b->rowindices = NULL;
9754 b->rowvalues = NULL;
9755 b->getrowactive = PETSC_FALSE;
9756
9757 (*B)->rmap = rmap;
9758 (*B)->factortype = A->factortype;
9759 (*B)->assembled = PETSC_TRUE;
9760 (*B)->insertmode = NOT_SET_VALUES;
9761 (*B)->preallocated = PETSC_TRUE;
9762
9763 if (a->colmap) {
9764 #if defined(PETSC_USE_CTABLE)
9765 PetscCall(PetscHMapIDuplicate(a->colmap, &b->colmap));
9766 #else
9767 PetscCall(PetscMalloc1(At->cmap->N, &b->colmap));
9768 PetscCall(PetscArraycpy(b->colmap, a->colmap, At->cmap->N));
9769 #endif
9770 } else b->colmap = NULL;
9771 if (a->garray) {
9772 PetscInt len;
9773 len = a->B->cmap->n;
9774 PetscCall(PetscMalloc1(len + 1, &b->garray));
9775 if (len) PetscCall(PetscArraycpy(b->garray, a->garray, len));
9776 } else b->garray = NULL;
9777
9778 PetscCall(PetscObjectReference((PetscObject)a->lvec));
9779 b->lvec = a->lvec;
9780
9781 /* cannot use VecScatterCopy */
9782 PetscCall(VecGetLocalSize(b->lvec, &lsize));
9783 PetscCall(ISCreateGeneral(ccomm, lsize, b->garray, PETSC_USE_POINTER, &from));
9784 PetscCall(ISCreateStride(PETSC_COMM_SELF, lsize, 0, 1, &to));
9785 PetscCall(MatCreateVecs(*B, &gvec, NULL));
9786 PetscCall(VecScatterCreate(gvec, from, b->lvec, to, &b->Mvctx));
9787 PetscCall(ISDestroy(&from));
9788 PetscCall(ISDestroy(&to));
9789 PetscCall(VecDestroy(&gvec));
9790 }
9791 PetscCall(MatDestroy(&At));
9792 PetscFunctionReturn(PETSC_SUCCESS);
9793 }
9794
9795 /* same as MatCreateSubMatrix(A, rows, NULL,...) but allows repeated rows */
MatAIJExtractRows(Mat A,IS rows,Mat * sA)9796 static PetscErrorCode MatAIJExtractRows(Mat A, IS rows, Mat *sA)
9797 {
9798 PetscBool isaij;
9799 MPI_Comm comm;
9800
9801 PetscFunctionBegin;
9802 PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
9803 PetscCall(PetscObjectBaseTypeCompareAny((PetscObject)A, &isaij, MATSEQAIJ, MATMPIAIJ, ""));
9804 PetscCheck(isaij, comm, PETSC_ERR_SUP, "Not implemented");
9805 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &isaij));
9806 if (isaij) { /* SeqAIJ supports repeated rows */
9807 PetscCall(MatCreateSubMatrix(A, rows, NULL, MAT_INITIAL_MATRIX, sA));
9808 } else {
9809 Mat A_loc;
9810 Mat_SeqAIJ *da;
9811 PetscSF sf;
9812 PetscInt ni, *di, *dj, m = A->rmap->n, c, *ldata, *rdata;
9813 PetscScalar *daa;
9814 const PetscInt *idxs;
9815 const PetscSFNode *iremotes;
9816 PetscSFNode *remotes;
9817
9818 /* SF for incoming rows */
9819 PetscCall(PetscSFCreate(comm, &sf));
9820 PetscCall(ISGetLocalSize(rows, &ni));
9821 PetscCall(ISGetIndices(rows, &idxs));
9822 PetscCall(PetscSFSetGraphLayout(sf, A->rmap, ni, NULL, PETSC_USE_POINTER, idxs));
9823 PetscCall(ISRestoreIndices(rows, &idxs));
9824
9825 PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &A_loc));
9826 da = (Mat_SeqAIJ *)A_loc->data;
9827 PetscCall(PetscMalloc2(2 * ni, &ldata, 2 * m, &rdata));
9828 for (PetscInt i = 0; i < m; i++) {
9829 rdata[2 * i + 0] = da->i[i + 1] - da->i[i];
9830 rdata[2 * i + 1] = da->i[i];
9831 }
9832 PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, rdata, ldata, MPI_REPLACE));
9833 PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, rdata, ldata, MPI_REPLACE));
9834 PetscCall(PetscMalloc1(ni + 1, &di));
9835 di[0] = 0;
9836 for (PetscInt i = 0; i < ni; i++) di[i + 1] = di[i] + ldata[2 * i + 0];
9837 PetscCall(PetscMalloc1(di[ni], &dj));
9838 PetscCall(PetscMalloc1(di[ni], &daa));
9839 PetscCall(PetscMalloc1(di[ni], &remotes));
9840
9841 PetscCall(PetscSFGetGraph(sf, NULL, NULL, NULL, &iremotes));
9842
9843 /* SF graph for nonzeros */
9844 c = 0;
9845 for (PetscInt i = 0; i < ni; i++) {
9846 const PetscInt rank = iremotes[i].rank;
9847 const PetscInt rsize = ldata[2 * i];
9848 for (PetscInt j = 0; j < rsize; j++) {
9849 remotes[c].rank = rank;
9850 remotes[c].index = ldata[2 * i + 1] + j;
9851 c++;
9852 }
9853 }
9854 PetscCheck(c == di[ni], PETSC_COMM_SELF, PETSC_ERR_PLIB, "Invalid number of local nonzeros %" PetscInt_FMT " != %" PetscInt_FMT, c, di[ni]);
9855 PetscCall(PetscSFSetGraph(sf, da->i[m], di[ni], NULL, PETSC_USE_POINTER, remotes, PETSC_USE_POINTER));
9856 PetscCall(PetscSFBcastBegin(sf, MPIU_INT, da->j, dj, MPI_REPLACE));
9857 PetscCall(PetscSFBcastEnd(sf, MPIU_INT, da->j, dj, MPI_REPLACE));
9858 PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, da->a, daa, MPI_REPLACE));
9859 PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, da->a, daa, MPI_REPLACE));
9860
9861 PetscCall(MatCreateMPIAIJWithArrays(comm, ni, A->cmap->n, PETSC_DECIDE, A->cmap->N, di, dj, daa, sA));
9862 PetscCall(MatDestroy(&A_loc));
9863 PetscCall(PetscSFDestroy(&sf));
9864 PetscCall(PetscFree(di));
9865 PetscCall(PetscFree(dj));
9866 PetscCall(PetscFree(daa));
9867 PetscCall(PetscFree(remotes));
9868 PetscCall(PetscFree2(ldata, rdata));
9869 }
9870 PetscFunctionReturn(PETSC_SUCCESS);
9871 }
9872