1 static char help[] = "This example illustrates the use of PCBDDC/FETI-DP and its customization.\n\n\
2 Discrete system: 1D, 2D or 3D laplacian, discretized with spectral elements.\n\
3 Spectral degree can be specified by passing values to -p option.\n\
4 Global problem either with dirichlet boundary conditions on one side or in the pure neumann case (depending on runtime parameters).\n\
5 Domain is [-nex,nex]x[-ney,ney]x[-nez,nez]: ne_ number of elements in _ direction.\n\
6 Example usage: \n\
7 1D: mpiexec -n 4 ex59 -nex 7\n\
8 2D: mpiexec -n 4 ex59 -npx 2 -npy 2 -nex 2 -ney 2\n\
9 3D: mpiexec -n 4 ex59 -npx 2 -npy 2 -npz 1 -nex 2 -ney 2 -nez 1\n\
10 Subdomain decomposition can be specified with -np_ parameters.\n\
11 Dirichlet boundaries on one side by default:\n\
12 it does not iterate on dirichlet nodes by default: if -usezerorows is passed in, it also iterates on Dirichlet nodes.\n\
13 Pure Neumann case can be requested by passing in -pureneumann.\n\
14 In the latter case, in order to avoid runtime errors during factorization, please specify also -coarse_redundant_pc_factor_zeropivot 0\n\n";
15
16 #include <petscksp.h>
17 #include <petscpc.h>
18 #include <petscdm.h>
19 #include <petscdmda.h>
20 #include <petscblaslapack.h>
21 #define DEBUG 0
22
23 /* structure holding domain data */
24 typedef struct {
25 /* communicator */
26 MPI_Comm gcomm;
27 /* space dimension */
28 PetscInt dim;
29 /* spectral degree */
30 PetscInt p;
31 /* subdomains per dimension */
32 PetscInt npx, npy, npz;
33 /* subdomain index in cartesian dimensions */
34 PetscInt ipx, ipy, ipz;
35 /* elements per dimension */
36 PetscInt nex, ney, nez;
37 /* local elements per dimension */
38 PetscInt nex_l, ney_l, nez_l;
39 /* global number of dofs per dimension */
40 PetscInt xm, ym, zm;
41 /* local number of dofs per dimension */
42 PetscInt xm_l, ym_l, zm_l;
43 /* starting global indexes for subdomain in lexicographic ordering */
44 PetscInt startx, starty, startz;
45 /* pure Neumann problem */
46 PetscBool pure_neumann;
47 /* Dirichlet BC implementation */
48 PetscBool DBC_zerorows;
49 /* Scaling factor for subdomain */
50 PetscScalar scalingfactor;
51 PetscBool testkspfetidp;
52 } DomainData;
53
54 /* structure holding GLL data */
55 typedef struct {
56 /* GLL nodes */
57 PetscReal *zGL;
58 /* GLL weights */
59 PetscScalar *rhoGL;
60 /* aux_mat */
61 PetscScalar **A;
62 /* Element matrix */
63 Mat elem_mat;
64 } GLLData;
65
BuildCSRGraph(DomainData dd,PetscInt ** xadj,PetscInt ** adjncy)66 static PetscErrorCode BuildCSRGraph(DomainData dd, PetscInt **xadj, PetscInt **adjncy)
67 {
68 PetscInt *xadj_temp, *adjncy_temp;
69 PetscInt i, j, k, ii, jj, kk, iindex, count_adj;
70 PetscInt istart_csr, iend_csr, jstart_csr, jend_csr, kstart_csr, kend_csr;
71 PetscBool internal_node;
72
73 /* first count dimension of adjncy */
74 PetscFunctionBeginUser;
75 count_adj = 0;
76 for (k = 0; k < dd.zm_l; k++) {
77 internal_node = PETSC_TRUE;
78 kstart_csr = k > 0 ? k - 1 : k;
79 kend_csr = k < dd.zm_l - 1 ? k + 2 : k + 1;
80 if (k == 0 || k == dd.zm_l - 1) {
81 internal_node = PETSC_FALSE;
82 kstart_csr = k;
83 kend_csr = k + 1;
84 }
85 for (j = 0; j < dd.ym_l; j++) {
86 jstart_csr = j > 0 ? j - 1 : j;
87 jend_csr = j < dd.ym_l - 1 ? j + 2 : j + 1;
88 if (j == 0 || j == dd.ym_l - 1) {
89 internal_node = PETSC_FALSE;
90 jstart_csr = j;
91 jend_csr = j + 1;
92 }
93 for (i = 0; i < dd.xm_l; i++) {
94 istart_csr = i > 0 ? i - 1 : i;
95 iend_csr = i < dd.xm_l - 1 ? i + 2 : i + 1;
96 if (i == 0 || i == dd.xm_l - 1) {
97 internal_node = PETSC_FALSE;
98 istart_csr = i;
99 iend_csr = i + 1;
100 }
101 if (internal_node) {
102 istart_csr = i;
103 iend_csr = i + 1;
104 jstart_csr = j;
105 jend_csr = j + 1;
106 kstart_csr = k;
107 kend_csr = k + 1;
108 }
109 for (kk = kstart_csr; kk < kend_csr; kk++) {
110 for (jj = jstart_csr; jj < jend_csr; jj++) {
111 for (ii = istart_csr; ii < iend_csr; ii++) count_adj = count_adj + 1;
112 }
113 }
114 }
115 }
116 }
117 PetscCall(PetscMalloc1(dd.xm_l * dd.ym_l * dd.zm_l + 1, &xadj_temp));
118 PetscCall(PetscMalloc1(count_adj, &adjncy_temp));
119
120 /* now fill CSR data structure */
121 count_adj = 0;
122 for (k = 0; k < dd.zm_l; k++) {
123 internal_node = PETSC_TRUE;
124 kstart_csr = k > 0 ? k - 1 : k;
125 kend_csr = k < dd.zm_l - 1 ? k + 2 : k + 1;
126 if (k == 0 || k == dd.zm_l - 1) {
127 internal_node = PETSC_FALSE;
128 kstart_csr = k;
129 kend_csr = k + 1;
130 }
131 for (j = 0; j < dd.ym_l; j++) {
132 jstart_csr = j > 0 ? j - 1 : j;
133 jend_csr = j < dd.ym_l - 1 ? j + 2 : j + 1;
134 if (j == 0 || j == dd.ym_l - 1) {
135 internal_node = PETSC_FALSE;
136 jstart_csr = j;
137 jend_csr = j + 1;
138 }
139 for (i = 0; i < dd.xm_l; i++) {
140 istart_csr = i > 0 ? i - 1 : i;
141 iend_csr = i < dd.xm_l - 1 ? i + 2 : i + 1;
142 if (i == 0 || i == dd.xm_l - 1) {
143 internal_node = PETSC_FALSE;
144 istart_csr = i;
145 iend_csr = i + 1;
146 }
147 iindex = k * dd.xm_l * dd.ym_l + j * dd.xm_l + i;
148 xadj_temp[iindex] = count_adj;
149 if (internal_node) {
150 istart_csr = i;
151 iend_csr = i + 1;
152 jstart_csr = j;
153 jend_csr = j + 1;
154 kstart_csr = k;
155 kend_csr = k + 1;
156 }
157 for (kk = kstart_csr; kk < kend_csr; kk++) {
158 for (jj = jstart_csr; jj < jend_csr; jj++) {
159 for (ii = istart_csr; ii < iend_csr; ii++) {
160 iindex = kk * dd.xm_l * dd.ym_l + jj * dd.xm_l + ii;
161
162 adjncy_temp[count_adj] = iindex;
163 count_adj = count_adj + 1;
164 }
165 }
166 }
167 }
168 }
169 }
170 xadj_temp[dd.xm_l * dd.ym_l * dd.zm_l] = count_adj;
171
172 *xadj = xadj_temp;
173 *adjncy = adjncy_temp;
174 PetscFunctionReturn(PETSC_SUCCESS);
175 }
176
ComputeSpecialBoundaryIndices(DomainData dd,IS * dirichlet,IS * neumann)177 static PetscErrorCode ComputeSpecialBoundaryIndices(DomainData dd, IS *dirichlet, IS *neumann)
178 {
179 IS temp_dirichlet = 0, temp_neumann = 0;
180 PetscInt localsize, i, j, k, *indices;
181 PetscBool *touched;
182
183 PetscFunctionBeginUser;
184 localsize = dd.xm_l * dd.ym_l * dd.zm_l;
185
186 PetscCall(PetscMalloc1(localsize, &indices));
187 PetscCall(PetscMalloc1(localsize, &touched));
188 for (i = 0; i < localsize; i++) touched[i] = PETSC_FALSE;
189
190 if (dirichlet) {
191 i = 0;
192 /* west boundary */
193 if (dd.ipx == 0) {
194 for (k = 0; k < dd.zm_l; k++) {
195 for (j = 0; j < dd.ym_l; j++) {
196 indices[i] = k * dd.ym_l * dd.xm_l + j * dd.xm_l;
197 touched[indices[i]] = PETSC_TRUE;
198 i++;
199 }
200 }
201 }
202 PetscCall(ISCreateGeneral(dd.gcomm, i, indices, PETSC_COPY_VALUES, &temp_dirichlet));
203 }
204 if (neumann) {
205 i = 0;
206 /* west boundary */
207 if (dd.ipx == 0) {
208 for (k = 0; k < dd.zm_l; k++) {
209 for (j = 0; j < dd.ym_l; j++) {
210 indices[i] = k * dd.ym_l * dd.xm_l + j * dd.xm_l;
211 if (!touched[indices[i]]) {
212 touched[indices[i]] = PETSC_TRUE;
213 i++;
214 }
215 }
216 }
217 }
218 /* east boundary */
219 if (dd.ipx == dd.npx - 1) {
220 for (k = 0; k < dd.zm_l; k++) {
221 for (j = 0; j < dd.ym_l; j++) {
222 indices[i] = k * dd.ym_l * dd.xm_l + j * dd.xm_l + dd.xm_l - 1;
223 if (!touched[indices[i]]) {
224 touched[indices[i]] = PETSC_TRUE;
225 i++;
226 }
227 }
228 }
229 }
230 /* south boundary */
231 if (dd.ipy == 0 && dd.dim > 1) {
232 for (k = 0; k < dd.zm_l; k++) {
233 for (j = 0; j < dd.xm_l; j++) {
234 indices[i] = k * dd.ym_l * dd.xm_l + j;
235 if (!touched[indices[i]]) {
236 touched[indices[i]] = PETSC_TRUE;
237 i++;
238 }
239 }
240 }
241 }
242 /* north boundary */
243 if (dd.ipy == dd.npy - 1 && dd.dim > 1) {
244 for (k = 0; k < dd.zm_l; k++) {
245 for (j = 0; j < dd.xm_l; j++) {
246 indices[i] = k * dd.ym_l * dd.xm_l + (dd.ym_l - 1) * dd.xm_l + j;
247 if (!touched[indices[i]]) {
248 touched[indices[i]] = PETSC_TRUE;
249 i++;
250 }
251 }
252 }
253 }
254 /* bottom boundary */
255 if (dd.ipz == 0 && dd.dim > 2) {
256 for (k = 0; k < dd.ym_l; k++) {
257 for (j = 0; j < dd.xm_l; j++) {
258 indices[i] = k * dd.xm_l + j;
259 if (!touched[indices[i]]) {
260 touched[indices[i]] = PETSC_TRUE;
261 i++;
262 }
263 }
264 }
265 }
266 /* top boundary */
267 if (dd.ipz == dd.npz - 1 && dd.dim > 2) {
268 for (k = 0; k < dd.ym_l; k++) {
269 for (j = 0; j < dd.xm_l; j++) {
270 indices[i] = (dd.zm_l - 1) * dd.ym_l * dd.xm_l + k * dd.xm_l + j;
271 if (!touched[indices[i]]) {
272 touched[indices[i]] = PETSC_TRUE;
273 i++;
274 }
275 }
276 }
277 }
278 PetscCall(ISCreateGeneral(dd.gcomm, i, indices, PETSC_COPY_VALUES, &temp_neumann));
279 }
280 if (dirichlet) *dirichlet = temp_dirichlet;
281 if (neumann) *neumann = temp_neumann;
282 PetscCall(PetscFree(indices));
283 PetscCall(PetscFree(touched));
284 PetscFunctionReturn(PETSC_SUCCESS);
285 }
286
ComputeMapping(DomainData dd,ISLocalToGlobalMapping * isg2lmap)287 static PetscErrorCode ComputeMapping(DomainData dd, ISLocalToGlobalMapping *isg2lmap)
288 {
289 DM da;
290 AO ao;
291 DMBoundaryType bx = DM_BOUNDARY_NONE, by = DM_BOUNDARY_NONE, bz = DM_BOUNDARY_NONE;
292 DMDAStencilType stype = DMDA_STENCIL_BOX;
293 ISLocalToGlobalMapping temp_isg2lmap;
294 PetscInt i, j, k, ig, jg, kg, lindex, gindex, localsize;
295 PetscInt *global_indices;
296
297 PetscFunctionBeginUser;
298 /* Not an efficient mapping: this function computes a very simple lexicographic mapping
299 just to illustrate the creation of a MATIS object */
300 localsize = dd.xm_l * dd.ym_l * dd.zm_l;
301 PetscCall(PetscMalloc1(localsize, &global_indices));
302 for (k = 0; k < dd.zm_l; k++) {
303 kg = dd.startz + k;
304 for (j = 0; j < dd.ym_l; j++) {
305 jg = dd.starty + j;
306 for (i = 0; i < dd.xm_l; i++) {
307 ig = dd.startx + i;
308 lindex = k * dd.xm_l * dd.ym_l + j * dd.xm_l + i;
309 gindex = kg * dd.xm * dd.ym + jg * dd.xm + ig;
310 global_indices[lindex] = gindex;
311 }
312 }
313 }
314 if (dd.dim == 3) {
315 PetscCall(DMDACreate3d(dd.gcomm, bx, by, bz, stype, dd.xm, dd.ym, dd.zm, PETSC_DECIDE, PETSC_DECIDE, PETSC_DECIDE, 1, 1, NULL, NULL, NULL, &da));
316 } else if (dd.dim == 2) {
317 PetscCall(DMDACreate2d(dd.gcomm, bx, by, stype, dd.xm, dd.ym, PETSC_DECIDE, PETSC_DECIDE, 1, 1, NULL, NULL, &da));
318 } else {
319 PetscCall(DMDACreate1d(dd.gcomm, bx, dd.xm, 1, 1, NULL, &da));
320 }
321 PetscCall(DMSetFromOptions(da));
322 PetscCall(DMSetUp(da));
323 PetscCall(DMDASetAOType(da, AOMEMORYSCALABLE));
324 PetscCall(DMDAGetAO(da, &ao));
325 PetscCall(AOApplicationToPetsc(ao, dd.xm_l * dd.ym_l * dd.zm_l, global_indices));
326 PetscCall(ISLocalToGlobalMappingCreate(dd.gcomm, 1, localsize, global_indices, PETSC_OWN_POINTER, &temp_isg2lmap));
327 PetscCall(DMDestroy(&da));
328 *isg2lmap = temp_isg2lmap;
329 PetscFunctionReturn(PETSC_SUCCESS);
330 }
ComputeSubdomainMatrix(DomainData dd,GLLData glldata,Mat * local_mat)331 static PetscErrorCode ComputeSubdomainMatrix(DomainData dd, GLLData glldata, Mat *local_mat)
332 {
333 PetscInt localsize, zloc, yloc, xloc, auxnex, auxney, auxnez;
334 PetscInt ie, je, ke, i, j, k, ig, jg, kg, ii, ming;
335 PetscInt *indexg, *cols, *colsg;
336 PetscScalar *vals;
337 Mat temp_local_mat, elem_mat_DBC = 0, *usedmat;
338 IS submatIS;
339
340 PetscFunctionBeginUser;
341 PetscCall(MatGetSize(glldata.elem_mat, &i, &j));
342 PetscCall(PetscMalloc1(i, &indexg));
343 PetscCall(PetscMalloc1(i, &colsg));
344 /* get submatrix of elem_mat without dirichlet nodes */
345 if (!dd.pure_neumann && !dd.DBC_zerorows && !dd.ipx) {
346 xloc = dd.p + 1;
347 yloc = 1;
348 zloc = 1;
349 if (dd.dim > 1) yloc = dd.p + 1;
350 if (dd.dim > 2) zloc = dd.p + 1;
351 ii = 0;
352 for (k = 0; k < zloc; k++) {
353 for (j = 0; j < yloc; j++) {
354 for (i = 1; i < xloc; i++) {
355 indexg[ii] = k * xloc * yloc + j * xloc + i;
356 ii++;
357 }
358 }
359 }
360 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ii, indexg, PETSC_COPY_VALUES, &submatIS));
361 PetscCall(MatCreateSubMatrix(glldata.elem_mat, submatIS, submatIS, MAT_INITIAL_MATRIX, &elem_mat_DBC));
362 PetscCall(ISDestroy(&submatIS));
363 }
364
365 /* Assemble subdomain matrix */
366 localsize = dd.xm_l * dd.ym_l * dd.zm_l;
367 PetscCall(MatCreate(PETSC_COMM_SELF, &temp_local_mat));
368 PetscCall(MatSetSizes(temp_local_mat, localsize, localsize, localsize, localsize));
369 PetscCall(MatSetOptionsPrefix(temp_local_mat, "subdomain_"));
370 /* set local matrices type: here we use SEQSBAIJ primarily for testing purpose */
371 /* in order to avoid conversions inside the BDDC code, use SeqAIJ if possible */
372 if (dd.DBC_zerorows && !dd.ipx) { /* in this case, we need to zero out some of the rows, so use seqaij */
373 PetscCall(MatSetType(temp_local_mat, MATSEQAIJ));
374 } else {
375 PetscCall(MatSetType(temp_local_mat, MATSEQSBAIJ));
376 }
377 PetscCall(MatSetFromOptions(temp_local_mat));
378
379 i = PetscPowRealInt(3.0 * (dd.p + 1.0), dd.dim);
380
381 PetscCall(MatSeqAIJSetPreallocation(temp_local_mat, i, NULL)); /* very overestimated */
382 PetscCall(MatSeqSBAIJSetPreallocation(temp_local_mat, 1, i, NULL)); /* very overestimated */
383 PetscCall(MatSeqBAIJSetPreallocation(temp_local_mat, 1, i, NULL)); /* very overestimated */
384 PetscCall(MatSetOption(temp_local_mat, MAT_KEEP_NONZERO_PATTERN, PETSC_TRUE));
385
386 yloc = dd.p + 1;
387 zloc = dd.p + 1;
388 if (dd.dim < 3) zloc = 1;
389 if (dd.dim < 2) yloc = 1;
390
391 auxnez = dd.nez_l;
392 auxney = dd.ney_l;
393 auxnex = dd.nex_l;
394 if (dd.dim < 3) auxnez = 1;
395 if (dd.dim < 2) auxney = 1;
396
397 for (ke = 0; ke < auxnez; ke++) {
398 for (je = 0; je < auxney; je++) {
399 for (ie = 0; ie < auxnex; ie++) {
400 /* customize element accounting for BC */
401 xloc = dd.p + 1;
402 ming = 0;
403 usedmat = &glldata.elem_mat;
404 if (!dd.pure_neumann && !dd.DBC_zerorows && !dd.ipx) {
405 if (ie == 0) {
406 xloc = dd.p;
407 usedmat = &elem_mat_DBC;
408 } else {
409 ming = -1;
410 usedmat = &glldata.elem_mat;
411 }
412 }
413 /* local to the element/global to the subdomain indexing */
414 for (k = 0; k < zloc; k++) {
415 kg = ke * dd.p + k;
416 for (j = 0; j < yloc; j++) {
417 jg = je * dd.p + j;
418 for (i = 0; i < xloc; i++) {
419 ig = ie * dd.p + i + ming;
420 ii = k * xloc * yloc + j * xloc + i;
421 indexg[ii] = kg * dd.xm_l * dd.ym_l + jg * dd.xm_l + ig;
422 }
423 }
424 }
425 /* Set values */
426 for (i = 0; i < xloc * yloc * zloc; i++) {
427 PetscCall(MatGetRow(*usedmat, i, &j, (const PetscInt **)&cols, (const PetscScalar **)&vals));
428 for (k = 0; k < j; k++) colsg[k] = indexg[cols[k]];
429 PetscCall(MatSetValues(temp_local_mat, 1, &indexg[i], j, colsg, vals, ADD_VALUES));
430 PetscCall(MatRestoreRow(*usedmat, i, &j, (const PetscInt **)&cols, (const PetscScalar **)&vals));
431 }
432 }
433 }
434 }
435 PetscCall(PetscFree(indexg));
436 PetscCall(PetscFree(colsg));
437 PetscCall(MatAssemblyBegin(temp_local_mat, MAT_FINAL_ASSEMBLY));
438 PetscCall(MatAssemblyEnd(temp_local_mat, MAT_FINAL_ASSEMBLY));
439 #if DEBUG
440 {
441 Vec lvec, rvec;
442 PetscReal norm;
443 PetscCall(MatCreateVecs(temp_local_mat, &lvec, &rvec));
444 PetscCall(VecSet(lvec, 1.0));
445 PetscCall(MatMult(temp_local_mat, lvec, rvec));
446 PetscCall(VecNorm(rvec, NORM_INFINITY, &norm));
447 PetscCall(VecDestroy(&lvec));
448 PetscCall(VecDestroy(&rvec));
449 }
450 #endif
451 *local_mat = temp_local_mat;
452 PetscCall(MatDestroy(&elem_mat_DBC));
453 PetscFunctionReturn(PETSC_SUCCESS);
454 }
455
GLLStuffs(DomainData dd,GLLData * glldata)456 static PetscErrorCode GLLStuffs(DomainData dd, GLLData *glldata)
457 {
458 PetscReal *M, si;
459 PetscScalar x, z0, z1, z2, Lpj, Lpr, rhoGLj, rhoGLk;
460 PetscBLASInt pm1, lierr;
461 PetscInt i, j, n, k, s, r, q, ii, jj, p = dd.p;
462 PetscInt xloc, yloc, zloc, xyloc, xyzloc;
463
464 PetscFunctionBeginUser;
465 /* Gauss-Lobatto-Legendre nodes zGL on [-1,1] */
466 PetscCall(PetscCalloc1(p + 1, &glldata->zGL));
467
468 glldata->zGL[0] = -1.0;
469 glldata->zGL[p] = 1.0;
470 if (p > 1) {
471 if (p == 2) glldata->zGL[1] = 0.0;
472 else {
473 PetscCall(PetscMalloc1(p - 1, &M));
474 for (i = 0; i < p - 1; i++) {
475 si = (PetscReal)(i + 1.0);
476 M[i] = 0.5 * PetscSqrtReal(si * (si + 2.0) / ((si + 0.5) * (si + 1.5)));
477 }
478 pm1 = (PetscBLASInt)(p - 1);
479 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
480 PetscCallBLAS("LAPACKsteqr", LAPACKsteqr_("N", &pm1, &glldata->zGL[1], M, &x, &pm1, M, &lierr));
481 PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in STERF Lapack routine %d", (int)lierr);
482 PetscCall(PetscFPTrapPop());
483 PetscCall(PetscFree(M));
484 }
485 }
486
487 /* Weights for 1D quadrature */
488 PetscCall(PetscMalloc1(p + 1, &glldata->rhoGL));
489
490 glldata->rhoGL[0] = 2.0 / (PetscScalar)(p * (p + 1.0));
491 glldata->rhoGL[p] = glldata->rhoGL[0];
492 z2 = -1; /* Dummy value to avoid -Wmaybe-initialized */
493 for (i = 1; i < p; i++) {
494 x = glldata->zGL[i];
495 z0 = 1.0;
496 z1 = x;
497 for (n = 1; n < p; n++) {
498 z2 = x * z1 * (2.0 * n + 1.0) / (n + 1.0) - z0 * (PetscScalar)(n / (n + 1.0));
499 z0 = z1;
500 z1 = z2;
501 }
502 glldata->rhoGL[i] = 2.0 / (p * (p + 1.0) * z2 * z2);
503 }
504
505 /* Auxiliary mat for laplacian */
506 PetscCall(PetscMalloc1(p + 1, &glldata->A));
507 PetscCall(PetscMalloc1((p + 1) * (p + 1), &glldata->A[0]));
508 for (i = 1; i < p + 1; i++) glldata->A[i] = glldata->A[i - 1] + p + 1;
509
510 for (j = 1; j < p; j++) {
511 x = glldata->zGL[j];
512 z0 = 1.0;
513 z1 = x;
514 for (n = 1; n < p; n++) {
515 z2 = x * z1 * (2.0 * n + 1.0) / (n + 1.0) - z0 * (PetscScalar)(n / (n + 1.0));
516 z0 = z1;
517 z1 = z2;
518 }
519 Lpj = z2;
520 for (r = 1; r < p; r++) {
521 if (r == j) {
522 glldata->A[j][j] = 2.0 / (3.0 * (1.0 - glldata->zGL[j] * glldata->zGL[j]) * Lpj * Lpj);
523 } else {
524 x = glldata->zGL[r];
525 z0 = 1.0;
526 z1 = x;
527 for (n = 1; n < p; n++) {
528 z2 = x * z1 * (2.0 * n + 1.0) / (n + 1.0) - z0 * (PetscScalar)(n / (n + 1.0));
529 z0 = z1;
530 z1 = z2;
531 }
532 Lpr = z2;
533 glldata->A[r][j] = 4.0 / (p * (p + 1.0) * Lpj * Lpr * (glldata->zGL[j] - glldata->zGL[r]) * (glldata->zGL[j] - glldata->zGL[r]));
534 }
535 }
536 }
537 for (j = 1; j < p + 1; j++) {
538 x = glldata->zGL[j];
539 z0 = 1.0;
540 z1 = x;
541 for (n = 1; n < p; n++) {
542 z2 = x * z1 * (2.0 * n + 1.0) / (n + 1.0) - z0 * (PetscScalar)(n / (n + 1.0));
543 z0 = z1;
544 z1 = z2;
545 }
546 Lpj = z2;
547 glldata->A[j][0] = 4.0 * PetscPowRealInt(-1.0, p) / (p * (p + 1.0) * Lpj * (1.0 + glldata->zGL[j]) * (1.0 + glldata->zGL[j]));
548 glldata->A[0][j] = glldata->A[j][0];
549 }
550 for (j = 0; j < p; j++) {
551 x = glldata->zGL[j];
552 z0 = 1.0;
553 z1 = x;
554 for (n = 1; n < p; n++) {
555 z2 = x * z1 * (2.0 * n + 1.0) / (n + 1.0) - z0 * (PetscScalar)(n / (n + 1.0));
556 z0 = z1;
557 z1 = z2;
558 }
559 Lpj = z2;
560
561 glldata->A[p][j] = 4.0 / (p * (p + 1.0) * Lpj * (1.0 - glldata->zGL[j]) * (1.0 - glldata->zGL[j]));
562 glldata->A[j][p] = glldata->A[p][j];
563 }
564 glldata->A[0][0] = 0.5 + (p * (p + 1.0) - 2.0) / 6.0;
565 glldata->A[p][p] = glldata->A[0][0];
566
567 /* compute element matrix */
568 xloc = p + 1;
569 yloc = p + 1;
570 zloc = p + 1;
571 if (dd.dim < 2) yloc = 1;
572 if (dd.dim < 3) zloc = 1;
573 xyloc = xloc * yloc;
574 xyzloc = xloc * yloc * zloc;
575
576 PetscCall(MatCreate(PETSC_COMM_SELF, &glldata->elem_mat));
577 PetscCall(MatSetSizes(glldata->elem_mat, xyzloc, xyzloc, xyzloc, xyzloc));
578 PetscCall(MatSetType(glldata->elem_mat, MATSEQAIJ));
579 PetscCall(MatSeqAIJSetPreallocation(glldata->elem_mat, xyzloc, NULL)); /* overestimated */
580 PetscCall(MatZeroEntries(glldata->elem_mat));
581 PetscCall(MatSetOption(glldata->elem_mat, MAT_IGNORE_ZERO_ENTRIES, PETSC_TRUE));
582
583 for (k = 0; k < zloc; k++) {
584 if (dd.dim > 2) rhoGLk = glldata->rhoGL[k];
585 else rhoGLk = 1.0;
586
587 for (j = 0; j < yloc; j++) {
588 if (dd.dim > 1) rhoGLj = glldata->rhoGL[j];
589 else rhoGLj = 1.0;
590
591 for (i = 0; i < xloc; i++) {
592 ii = k * xyloc + j * xloc + i;
593 s = k;
594 r = j;
595 for (q = 0; q < xloc; q++) {
596 jj = s * xyloc + r * xloc + q;
597 PetscCall(MatSetValue(glldata->elem_mat, jj, ii, glldata->A[i][q] * rhoGLj * rhoGLk, ADD_VALUES));
598 }
599 if (dd.dim > 1) {
600 s = k;
601 q = i;
602 for (r = 0; r < yloc; r++) {
603 jj = s * xyloc + r * xloc + q;
604 PetscCall(MatSetValue(glldata->elem_mat, jj, ii, glldata->A[j][r] * glldata->rhoGL[i] * rhoGLk, ADD_VALUES));
605 }
606 }
607 if (dd.dim > 2) {
608 r = j;
609 q = i;
610 for (s = 0; s < zloc; s++) {
611 jj = s * xyloc + r * xloc + q;
612 PetscCall(MatSetValue(glldata->elem_mat, jj, ii, glldata->A[k][s] * rhoGLj * glldata->rhoGL[i], ADD_VALUES));
613 }
614 }
615 }
616 }
617 }
618 PetscCall(MatAssemblyBegin(glldata->elem_mat, MAT_FINAL_ASSEMBLY));
619 PetscCall(MatAssemblyEnd(glldata->elem_mat, MAT_FINAL_ASSEMBLY));
620 #if DEBUG
621 {
622 Vec lvec, rvec;
623 PetscReal norm;
624 PetscCall(MatCreateVecs(glldata->elem_mat, &lvec, &rvec));
625 PetscCall(VecSet(lvec, 1.0));
626 PetscCall(MatMult(glldata->elem_mat, lvec, rvec));
627 PetscCall(VecNorm(rvec, NORM_INFINITY, &norm));
628 PetscCall(VecDestroy(&lvec));
629 PetscCall(VecDestroy(&rvec));
630 }
631 #endif
632 PetscFunctionReturn(PETSC_SUCCESS);
633 }
634
DomainDecomposition(DomainData * dd)635 static PetscErrorCode DomainDecomposition(DomainData *dd)
636 {
637 PetscMPIInt rank;
638 PetscInt i, j, k;
639
640 PetscFunctionBeginUser;
641 /* Subdomain index in cartesian coordinates */
642 PetscCallMPI(MPI_Comm_rank(dd->gcomm, &rank));
643 dd->ipx = rank % dd->npx;
644 if (dd->dim > 1) dd->ipz = rank / (dd->npx * dd->npy);
645 else dd->ipz = 0;
646
647 dd->ipy = rank / dd->npx - dd->ipz * dd->npy;
648 /* number of local elements */
649 dd->nex_l = dd->nex / dd->npx;
650 if (dd->ipx < dd->nex % dd->npx) dd->nex_l++;
651 if (dd->dim > 1) {
652 dd->ney_l = dd->ney / dd->npy;
653 if (dd->ipy < dd->ney % dd->npy) dd->ney_l++;
654 } else {
655 dd->ney_l = 0;
656 }
657 if (dd->dim > 2) {
658 dd->nez_l = dd->nez / dd->npz;
659 if (dd->ipz < dd->nez % dd->npz) dd->nez_l++;
660 } else {
661 dd->nez_l = 0;
662 }
663 /* local and global number of dofs */
664 dd->xm_l = dd->nex_l * dd->p + 1;
665 dd->xm = dd->nex * dd->p + 1;
666 dd->ym_l = dd->ney_l * dd->p + 1;
667 dd->ym = dd->ney * dd->p + 1;
668 dd->zm_l = dd->nez_l * dd->p + 1;
669 dd->zm = dd->nez * dd->p + 1;
670 if (!dd->pure_neumann) {
671 if (!dd->DBC_zerorows && !dd->ipx) dd->xm_l--;
672 if (!dd->DBC_zerorows) dd->xm--;
673 }
674
675 /* starting global index for local dofs (simple lexicographic order) */
676 dd->startx = 0;
677 j = dd->nex / dd->npx;
678 for (i = 0; i < dd->ipx; i++) {
679 k = j;
680 if (i < dd->nex % dd->npx) k++;
681 dd->startx = dd->startx + k * dd->p;
682 }
683 if (!dd->pure_neumann && !dd->DBC_zerorows && dd->ipx) dd->startx--;
684
685 dd->starty = 0;
686 if (dd->dim > 1) {
687 j = dd->ney / dd->npy;
688 for (i = 0; i < dd->ipy; i++) {
689 k = j;
690 if (i < dd->ney % dd->npy) k++;
691 dd->starty = dd->starty + k * dd->p;
692 }
693 }
694 dd->startz = 0;
695 if (dd->dim > 2) {
696 j = dd->nez / dd->npz;
697 for (i = 0; i < dd->ipz; i++) {
698 k = j;
699 if (i < dd->nez % dd->npz) k++;
700 dd->startz = dd->startz + k * dd->p;
701 }
702 }
703 PetscFunctionReturn(PETSC_SUCCESS);
704 }
705
ComputeMatrix(DomainData dd,Mat * A)706 static PetscErrorCode ComputeMatrix(DomainData dd, Mat *A)
707 {
708 GLLData gll;
709 Mat local_mat = 0, temp_A = 0;
710 ISLocalToGlobalMapping matis_map = 0;
711 IS dirichletIS = 0;
712
713 PetscFunctionBeginUser;
714 /* Compute some stuff of Gauss-Legendre-Lobatto quadrature rule */
715 PetscCall(GLLStuffs(dd, &gll));
716 /* Compute matrix of subdomain Neumann problem */
717 PetscCall(ComputeSubdomainMatrix(dd, gll, &local_mat));
718 /* Compute global mapping of local dofs */
719 PetscCall(ComputeMapping(dd, &matis_map));
720 /* Create MATIS object needed by BDDC */
721 PetscCall(MatCreateIS(dd.gcomm, 1, PETSC_DECIDE, PETSC_DECIDE, dd.xm * dd.ym * dd.zm, dd.xm * dd.ym * dd.zm, matis_map, matis_map, &temp_A));
722 /* Set local subdomain matrices into MATIS object */
723 PetscCall(MatScale(local_mat, dd.scalingfactor));
724 PetscCall(MatISSetLocalMat(temp_A, local_mat));
725 /* Call assembly functions */
726 PetscCall(MatAssemblyBegin(temp_A, MAT_FINAL_ASSEMBLY));
727 PetscCall(MatAssemblyEnd(temp_A, MAT_FINAL_ASSEMBLY));
728
729 if (dd.DBC_zerorows) {
730 PetscInt dirsize;
731
732 PetscCall(ComputeSpecialBoundaryIndices(dd, &dirichletIS, NULL));
733 PetscCall(MatSetOption(local_mat, MAT_KEEP_NONZERO_PATTERN, PETSC_TRUE));
734 PetscCall(MatZeroRowsColumnsLocalIS(temp_A, dirichletIS, 1.0, NULL, NULL));
735 PetscCall(ISGetLocalSize(dirichletIS, &dirsize));
736 PetscCall(ISDestroy(&dirichletIS));
737 }
738
739 /* giving hints to local and global matrices could be useful for the BDDC */
740 PetscCall(MatSetOption(local_mat, MAT_SPD, PETSC_TRUE));
741 PetscCall(MatSetOption(local_mat, MAT_SPD_ETERNAL, PETSC_TRUE));
742 #if DEBUG
743 {
744 Vec lvec, rvec;
745 PetscReal norm;
746 PetscCall(MatCreateVecs(temp_A, &lvec, &rvec));
747 PetscCall(VecSet(lvec, 1.0));
748 PetscCall(MatMult(temp_A, lvec, rvec));
749 PetscCall(VecNorm(rvec, NORM_INFINITY, &norm));
750 PetscCall(VecDestroy(&lvec));
751 PetscCall(VecDestroy(&rvec));
752 }
753 #endif
754 /* free allocated workspace */
755 PetscCall(PetscFree(gll.zGL));
756 PetscCall(PetscFree(gll.rhoGL));
757 PetscCall(PetscFree(gll.A[0]));
758 PetscCall(PetscFree(gll.A));
759 PetscCall(MatDestroy(&gll.elem_mat));
760 PetscCall(MatDestroy(&local_mat));
761 PetscCall(ISLocalToGlobalMappingDestroy(&matis_map));
762 /* give back the pointer to te MATIS object */
763 *A = temp_A;
764 PetscFunctionReturn(PETSC_SUCCESS);
765 }
766
ComputeKSPFETIDP(DomainData dd,KSP ksp_bddc,KSP * ksp_fetidp)767 static PetscErrorCode ComputeKSPFETIDP(DomainData dd, KSP ksp_bddc, KSP *ksp_fetidp)
768 {
769 KSP temp_ksp;
770 PC pc;
771
772 PetscFunctionBeginUser;
773 PetscCall(KSPGetPC(ksp_bddc, &pc));
774 if (!dd.testkspfetidp) {
775 PC D;
776 Mat F;
777
778 PetscCall(PCBDDCCreateFETIDPOperators(pc, PETSC_TRUE, NULL, &F, &D));
779 PetscCall(KSPCreate(PetscObjectComm((PetscObject)F), &temp_ksp));
780 PetscCall(KSPSetOperators(temp_ksp, F, F));
781 PetscCall(KSPSetType(temp_ksp, KSPCG));
782 PetscCall(KSPSetPC(temp_ksp, D));
783 PetscCall(KSPSetComputeSingularValues(temp_ksp, PETSC_TRUE));
784 PetscCall(KSPSetOptionsPrefix(temp_ksp, "fluxes_"));
785 PetscCall(KSPSetFromOptions(temp_ksp));
786 PetscCall(KSPSetUp(temp_ksp));
787 PetscCall(MatDestroy(&F));
788 PetscCall(PCDestroy(&D));
789 } else {
790 Mat A, Ap;
791
792 PetscCall(KSPCreate(PetscObjectComm((PetscObject)ksp_bddc), &temp_ksp));
793 PetscCall(KSPGetOperators(ksp_bddc, &A, &Ap));
794 PetscCall(KSPSetOperators(temp_ksp, A, Ap));
795 PetscCall(KSPSetOptionsPrefix(temp_ksp, "fluxes_"));
796 PetscCall(KSPSetType(temp_ksp, KSPFETIDP));
797 PetscCall(KSPFETIDPSetInnerBDDC(temp_ksp, pc));
798 PetscCall(KSPSetComputeSingularValues(temp_ksp, PETSC_TRUE));
799 PetscCall(KSPSetFromOptions(temp_ksp));
800 PetscCall(KSPSetUp(temp_ksp));
801 }
802 *ksp_fetidp = temp_ksp;
803 PetscFunctionReturn(PETSC_SUCCESS);
804 }
805
ComputeKSPBDDC(DomainData dd,Mat A,KSP * ksp)806 static PetscErrorCode ComputeKSPBDDC(DomainData dd, Mat A, KSP *ksp)
807 {
808 KSP temp_ksp;
809 PC pc;
810 IS primals, dirichletIS = 0, neumannIS = 0, *bddc_dofs_splitting;
811 PetscInt vidx[8], localsize, *xadj = NULL, *adjncy = NULL;
812 MatNullSpace near_null_space;
813
814 PetscFunctionBeginUser;
815 PetscCall(KSPCreate(dd.gcomm, &temp_ksp));
816 PetscCall(KSPSetOperators(temp_ksp, A, A));
817 PetscCall(KSPSetType(temp_ksp, KSPCG));
818 PetscCall(KSPGetPC(temp_ksp, &pc));
819 PetscCall(PCSetType(pc, PCBDDC));
820
821 localsize = dd.xm_l * dd.ym_l * dd.zm_l;
822
823 /* BDDC customization */
824
825 /* jumping coefficients case */
826 PetscCall(PCISSetSubdomainScalingFactor(pc, dd.scalingfactor));
827
828 /* Dofs splitting
829 Simple stride-1 IS
830 It is not needed since, by default, PCBDDC assumes a stride-1 split */
831 PetscCall(PetscMalloc1(1, &bddc_dofs_splitting));
832 #if 1
833 PetscCall(ISCreateStride(PETSC_COMM_WORLD, localsize, 0, 1, &bddc_dofs_splitting[0]));
834 PetscCall(PCBDDCSetDofsSplittingLocal(pc, 1, bddc_dofs_splitting));
835 #else
836 /* examples for global ordering */
837
838 /* each process lists the nodes it owns */
839 PetscInt sr, er;
840 PetscCall(MatGetOwnershipRange(A, &sr, &er));
841 PetscCall(ISCreateStride(PETSC_COMM_WORLD, er - sr, sr, 1, &bddc_dofs_splitting[0]));
842 PetscCall(PCBDDCSetDofsSplitting(pc, 1, bddc_dofs_splitting));
843 /* Split can be passed in a more general way since any process can list any node */
844 #endif
845 PetscCall(ISDestroy(&bddc_dofs_splitting[0]));
846 PetscCall(PetscFree(bddc_dofs_splitting));
847
848 /* Primal constraints implemented by using a near null space attached to A -> now it passes in only the constants
849 (which in practice is not needed since by default PCBDDC builds the primal space using constants for quadrature formulas */
850 #if 0
851 Vec vecs[2];
852 PetscRandom rctx;
853 PetscCall(MatCreateVecs(A,&vecs[0],&vecs[1]));
854 PetscCall(PetscRandomCreate(dd.gcomm,&rctx));
855 PetscCall(VecSetRandom(vecs[0],rctx));
856 PetscCall(VecSetRandom(vecs[1],rctx));
857 PetscCall(MatNullSpaceCreate(dd.gcomm,PETSC_TRUE,2,vecs,&near_null_space));
858 PetscCall(VecDestroy(&vecs[0]));
859 PetscCall(VecDestroy(&vecs[1]));
860 PetscCall(PetscRandomDestroy(&rctx));
861 #else
862 PetscCall(MatNullSpaceCreate(dd.gcomm, PETSC_TRUE, 0, NULL, &near_null_space));
863 #endif
864 PetscCall(MatSetNearNullSpace(A, near_null_space));
865 PetscCall(MatNullSpaceDestroy(&near_null_space));
866
867 /* CSR graph of subdomain dofs */
868 PetscCall(BuildCSRGraph(dd, &xadj, &adjncy));
869 PetscCall(PCBDDCSetLocalAdjacencyGraph(pc, localsize, xadj, adjncy, PETSC_OWN_POINTER));
870
871 /* Prescribe user-defined primal vertices: in this case we use the 8 corners in 3D (for 2D and 1D, the indices are duplicated) */
872 vidx[0] = 0 * dd.xm_l + 0;
873 vidx[1] = 0 * dd.xm_l + dd.xm_l - 1;
874 vidx[2] = (dd.ym_l - 1) * dd.xm_l + 0;
875 vidx[3] = (dd.ym_l - 1) * dd.xm_l + dd.xm_l - 1;
876 vidx[4] = (dd.zm_l - 1) * dd.xm_l * dd.ym_l + 0 * dd.xm_l + 0;
877 vidx[5] = (dd.zm_l - 1) * dd.xm_l * dd.ym_l + 0 * dd.xm_l + dd.xm_l - 1;
878 vidx[6] = (dd.zm_l - 1) * dd.xm_l * dd.ym_l + (dd.ym_l - 1) * dd.xm_l + 0;
879 vidx[7] = (dd.zm_l - 1) * dd.xm_l * dd.ym_l + (dd.ym_l - 1) * dd.xm_l + dd.xm_l - 1;
880 PetscCall(ISCreateGeneral(dd.gcomm, 8, vidx, PETSC_COPY_VALUES, &primals));
881 PetscCall(PCBDDCSetPrimalVerticesLocalIS(pc, primals));
882 PetscCall(ISDestroy(&primals));
883
884 /* Neumann/Dirichlet indices on the global boundary */
885 if (dd.DBC_zerorows) {
886 /* Only in case you eliminate some rows matrix with zerorows function, you need to set dirichlet indices into PCBDDC data */
887 PetscCall(ComputeSpecialBoundaryIndices(dd, &dirichletIS, &neumannIS));
888 PetscCall(PCBDDCSetNeumannBoundariesLocal(pc, neumannIS));
889 PetscCall(PCBDDCSetDirichletBoundariesLocal(pc, dirichletIS));
890 } else {
891 if (dd.pure_neumann) {
892 /* In such a case, all interface nodes lying on the global boundary are neumann nodes */
893 PetscCall(ComputeSpecialBoundaryIndices(dd, NULL, &neumannIS));
894 PetscCall(PCBDDCSetNeumannBoundariesLocal(pc, neumannIS));
895 } else {
896 /* It is wrong setting dirichlet indices without having zeroed the corresponding rows in the global matrix */
897 /* But we can still compute them since nodes near the dirichlet boundaries does not need to be defined as neumann nodes */
898 PetscCall(ComputeSpecialBoundaryIndices(dd, &dirichletIS, &neumannIS));
899 PetscCall(PCBDDCSetNeumannBoundariesLocal(pc, neumannIS));
900 }
901 }
902
903 /* Pass local null space information to local matrices (needed when using approximate local solvers) */
904 if (dd.ipx || dd.pure_neumann) {
905 MatNullSpace nsp;
906 Mat local_mat;
907
908 PetscCall(MatISGetLocalMat(A, &local_mat));
909 PetscCall(MatNullSpaceCreate(PETSC_COMM_SELF, PETSC_TRUE, 0, NULL, &nsp));
910 PetscCall(MatSetNullSpace(local_mat, nsp));
911 PetscCall(MatNullSpaceDestroy(&nsp));
912 }
913 PetscCall(KSPSetComputeSingularValues(temp_ksp, PETSC_TRUE));
914 PetscCall(KSPSetOptionsPrefix(temp_ksp, "physical_"));
915 PetscCall(KSPSetFromOptions(temp_ksp));
916 PetscCall(KSPSetUp(temp_ksp));
917 *ksp = temp_ksp;
918 PetscCall(ISDestroy(&dirichletIS));
919 PetscCall(ISDestroy(&neumannIS));
920 PetscFunctionReturn(PETSC_SUCCESS);
921 }
922
InitializeDomainData(DomainData * dd)923 static PetscErrorCode InitializeDomainData(DomainData *dd)
924 {
925 PetscMPIInt sizes, rank;
926 PetscInt factor;
927
928 PetscFunctionBeginUser;
929 dd->gcomm = PETSC_COMM_WORLD;
930 PetscCallMPI(MPI_Comm_size(dd->gcomm, &sizes));
931 PetscCallMPI(MPI_Comm_rank(dd->gcomm, &rank));
932 /* Get information from command line */
933 /* Processors/subdomains per dimension */
934 /* Default is 1d problem */
935 dd->npx = sizes;
936 dd->npy = 0;
937 dd->npz = 0;
938 dd->dim = 1;
939 PetscCall(PetscOptionsGetInt(NULL, NULL, "-npx", &dd->npx, NULL));
940 PetscCall(PetscOptionsGetInt(NULL, NULL, "-npy", &dd->npy, NULL));
941 PetscCall(PetscOptionsGetInt(NULL, NULL, "-npz", &dd->npz, NULL));
942 if (dd->npy) dd->dim++;
943 if (dd->npz) dd->dim++;
944 /* Number of elements per dimension */
945 /* Default is one element per subdomain */
946 dd->nex = dd->npx;
947 dd->ney = dd->npy;
948 dd->nez = dd->npz;
949 PetscCall(PetscOptionsGetInt(NULL, NULL, "-nex", &dd->nex, NULL));
950 PetscCall(PetscOptionsGetInt(NULL, NULL, "-ney", &dd->ney, NULL));
951 PetscCall(PetscOptionsGetInt(NULL, NULL, "-nez", &dd->nez, NULL));
952 if (!dd->npy) {
953 dd->ney = 0;
954 dd->nez = 0;
955 }
956 if (!dd->npz) dd->nez = 0;
957 /* Spectral degree */
958 dd->p = 3;
959 PetscCall(PetscOptionsGetInt(NULL, NULL, "-p", &dd->p, NULL));
960 /* pure neumann problem? */
961 dd->pure_neumann = PETSC_FALSE;
962 PetscCall(PetscOptionsGetBool(NULL, NULL, "-pureneumann", &dd->pure_neumann, NULL));
963
964 /* How to enforce dirichlet boundary conditions (in case pureneumann has not been requested explicitly) */
965 dd->DBC_zerorows = PETSC_FALSE;
966
967 PetscCall(PetscOptionsGetBool(NULL, NULL, "-usezerorows", &dd->DBC_zerorows, NULL));
968 if (dd->pure_neumann) dd->DBC_zerorows = PETSC_FALSE;
969 dd->scalingfactor = 1.0;
970
971 factor = 0;
972 PetscCall(PetscOptionsGetInt(NULL, NULL, "-jump", &factor, NULL));
973 /* checkerboard pattern */
974 dd->scalingfactor = PetscPowScalar(10.0, factor * PetscPowInt(-1, rank));
975 /* test data passed in */
976 if (dd->dim == 1) {
977 PetscCheck(sizes == dd->npx, dd->gcomm, PETSC_ERR_USER, "Number of mpi procs in 1D must be equal to npx");
978 PetscCheck(dd->nex >= dd->npx, dd->gcomm, PETSC_ERR_USER, "Number of elements per dim must be greater/equal than number of procs per dim");
979 } else if (dd->dim == 2) {
980 PetscCheck(sizes == dd->npx * dd->npy, dd->gcomm, PETSC_ERR_USER, "Number of mpi procs in 2D must be equal to npx*npy");
981 PetscCheck(dd->nex >= dd->npx || dd->ney < dd->npy, dd->gcomm, PETSC_ERR_USER, "Number of elements per dim must be greater/equal than number of procs per dim");
982 } else {
983 PetscCheck(sizes == dd->npx * dd->npy * dd->npz, dd->gcomm, PETSC_ERR_USER, "Number of mpi procs in 3D must be equal to npx*npy*npz");
984 PetscCheck(dd->nex >= dd->npx && dd->ney >= dd->npy && dd->nez >= dd->npz, dd->gcomm, PETSC_ERR_USER, "Number of elements per dim must be greater/equal than number of ranks per dim");
985 }
986 PetscFunctionReturn(PETSC_SUCCESS);
987 }
988
main(int argc,char ** args)989 int main(int argc, char **args)
990 {
991 DomainData dd;
992 PetscReal norm, maxeig, mineig;
993 PetscScalar scalar_value;
994 PetscInt ndofs, its;
995 Mat A = NULL, F = NULL;
996 KSP KSPwithBDDC = NULL, KSPwithFETIDP = NULL;
997 KSPConvergedReason reason;
998 Vec exact_solution = NULL, bddc_solution = NULL, bddc_rhs = NULL;
999 PetscBool testfetidp = PETSC_TRUE;
1000
1001 /* Init PETSc */
1002 PetscFunctionBeginUser;
1003 PetscCall(PetscInitialize(&argc, &args, NULL, help));
1004 /* Initialize DomainData */
1005 PetscCall(InitializeDomainData(&dd));
1006 /* Decompose domain */
1007 PetscCall(DomainDecomposition(&dd));
1008 #if DEBUG
1009 printf("Subdomain data\n");
1010 printf("IPS : %d %d %d\n", dd.ipx, dd.ipy, dd.ipz);
1011 printf("NEG : %d %d %d\n", dd.nex, dd.ney, dd.nez);
1012 printf("NEL : %d %d %d\n", dd.nex_l, dd.ney_l, dd.nez_l);
1013 printf("LDO : %d %d %d\n", dd.xm_l, dd.ym_l, dd.zm_l);
1014 printf("SIZES : %d %d %d\n", dd.xm, dd.ym, dd.zm);
1015 printf("STARTS: %d %d %d\n", dd.startx, dd.starty, dd.startz);
1016 #endif
1017 dd.testkspfetidp = PETSC_TRUE;
1018 PetscCall(PetscOptionsGetBool(NULL, NULL, "-testfetidp", &testfetidp, NULL));
1019 PetscCall(PetscOptionsGetBool(NULL, NULL, "-testkspfetidp", &dd.testkspfetidp, NULL));
1020 /* assemble global matrix */
1021 PetscCall(ComputeMatrix(dd, &A));
1022 /* get work vectors */
1023 PetscCall(MatCreateVecs(A, &bddc_solution, NULL));
1024 PetscCall(VecDuplicate(bddc_solution, &bddc_rhs));
1025 PetscCall(VecDuplicate(bddc_solution, &exact_solution));
1026 /* create and customize KSP/PC for BDDC */
1027 PetscCall(ComputeKSPBDDC(dd, A, &KSPwithBDDC));
1028 /* create KSP/PC for FETIDP */
1029 if (testfetidp) PetscCall(ComputeKSPFETIDP(dd, KSPwithBDDC, &KSPwithFETIDP));
1030 /* create random exact solution */
1031 #if defined(PETSC_USE_COMPLEX)
1032 PetscCall(VecSet(exact_solution, 1.0 + PETSC_i));
1033 #else
1034 PetscCall(VecSetRandom(exact_solution, NULL));
1035 #endif
1036 PetscCall(VecShift(exact_solution, -0.5));
1037 PetscCall(VecScale(exact_solution, 100.0));
1038 PetscCall(VecGetSize(exact_solution, &ndofs));
1039 if (dd.pure_neumann) {
1040 PetscCall(VecSum(exact_solution, &scalar_value));
1041 scalar_value = -scalar_value / (PetscScalar)ndofs;
1042 PetscCall(VecShift(exact_solution, scalar_value));
1043 }
1044 /* assemble BDDC rhs */
1045 PetscCall(MatMult(A, exact_solution, bddc_rhs));
1046 /* test ksp with BDDC */
1047 PetscCall(KSPSolve(KSPwithBDDC, bddc_rhs, bddc_solution));
1048 PetscCall(KSPGetIterationNumber(KSPwithBDDC, &its));
1049 PetscCall(KSPGetConvergedReason(KSPwithBDDC, &reason));
1050 PetscCall(KSPComputeExtremeSingularValues(KSPwithBDDC, &maxeig, &mineig));
1051 if (dd.pure_neumann) {
1052 PetscCall(VecSum(bddc_solution, &scalar_value));
1053 scalar_value = -scalar_value / (PetscScalar)ndofs;
1054 PetscCall(VecShift(bddc_solution, scalar_value));
1055 }
1056 /* check exact_solution and BDDC solultion */
1057 PetscCall(VecAXPY(bddc_solution, -1.0, exact_solution));
1058 PetscCall(VecNorm(bddc_solution, NORM_INFINITY, &norm));
1059 PetscCall(PetscPrintf(dd.gcomm, "---------------------BDDC stats-------------------------------\n"));
1060 PetscCall(PetscPrintf(dd.gcomm, "Number of degrees of freedom : %8" PetscInt_FMT "\n", ndofs));
1061 if (reason < 0) {
1062 PetscCall(PetscPrintf(dd.gcomm, "Number of iterations : %8" PetscInt_FMT "\n", its));
1063 PetscCall(PetscPrintf(dd.gcomm, "Converged reason : %s\n", KSPConvergedReasons[reason]));
1064 }
1065 if (0.95 <= mineig && mineig <= 1.05) mineig = 1.0;
1066 PetscCall(PetscPrintf(dd.gcomm, "Eigenvalues preconditioned operator : %1.1e %1.1e\n", (double)PetscFloorReal(100. * mineig) / 100., (double)PetscCeilReal(100. * maxeig) / 100.));
1067 if (norm > 1.e-1 || reason < 0) PetscCall(PetscPrintf(dd.gcomm, "Error between exact and computed solution : %1.2e\n", (double)norm));
1068 PetscCall(PetscPrintf(dd.gcomm, "--------------------------------------------------------------\n"));
1069 if (testfetidp) {
1070 Vec fetidp_solution_all = NULL, fetidp_solution = NULL, fetidp_rhs = NULL;
1071
1072 PetscCall(VecDuplicate(bddc_solution, &fetidp_solution_all));
1073 if (!dd.testkspfetidp) {
1074 /* assemble fetidp rhs on the space of Lagrange multipliers */
1075 PetscCall(KSPGetOperators(KSPwithFETIDP, &F, NULL));
1076 PetscCall(MatCreateVecs(F, &fetidp_solution, &fetidp_rhs));
1077 PetscCall(PCBDDCMatFETIDPGetRHS(F, bddc_rhs, fetidp_rhs));
1078 PetscCall(VecSet(fetidp_solution, 0.0));
1079 /* test ksp with FETIDP */
1080 PetscCall(KSPSolve(KSPwithFETIDP, fetidp_rhs, fetidp_solution));
1081 /* assemble fetidp solution on physical domain */
1082 PetscCall(PCBDDCMatFETIDPGetSolution(F, fetidp_solution, fetidp_solution_all));
1083 } else {
1084 KSP kspF;
1085 PetscCall(KSPSolve(KSPwithFETIDP, bddc_rhs, fetidp_solution_all));
1086 PetscCall(KSPFETIDPGetInnerKSP(KSPwithFETIDP, &kspF));
1087 PetscCall(KSPGetOperators(kspF, &F, NULL));
1088 }
1089 PetscCall(MatGetSize(F, &ndofs, NULL));
1090 PetscCall(KSPGetIterationNumber(KSPwithFETIDP, &its));
1091 PetscCall(KSPGetConvergedReason(KSPwithFETIDP, &reason));
1092 PetscCall(KSPComputeExtremeSingularValues(KSPwithFETIDP, &maxeig, &mineig));
1093 /* check FETIDP sol */
1094 if (dd.pure_neumann) {
1095 PetscCall(VecSum(fetidp_solution_all, &scalar_value));
1096 scalar_value = -scalar_value / (PetscScalar)ndofs;
1097 PetscCall(VecShift(fetidp_solution_all, scalar_value));
1098 }
1099 PetscCall(VecAXPY(fetidp_solution_all, -1.0, exact_solution));
1100 PetscCall(VecNorm(fetidp_solution_all, NORM_INFINITY, &norm));
1101 PetscCall(PetscPrintf(dd.gcomm, "------------------FETI-DP stats-------------------------------\n"));
1102 PetscCall(PetscPrintf(dd.gcomm, "Number of degrees of freedom : %8" PetscInt_FMT "\n", ndofs));
1103 if (reason < 0) {
1104 PetscCall(PetscPrintf(dd.gcomm, "Number of iterations : %8" PetscInt_FMT "\n", its));
1105 PetscCall(PetscPrintf(dd.gcomm, "Converged reason : %s\n", KSPConvergedReasons[reason]));
1106 }
1107 if (0.95 <= mineig && mineig <= 1.05) mineig = 1.0;
1108 PetscCall(PetscPrintf(dd.gcomm, "Eigenvalues preconditioned operator : %1.1e %1.1e\n", (double)PetscFloorReal(100. * mineig) / 100., (double)PetscCeilReal(100. * maxeig) / 100.));
1109 if (norm > 1.e-1 || reason < 0) PetscCall(PetscPrintf(dd.gcomm, "Error between exact and computed solution : %1.2e\n", (double)norm));
1110 PetscCall(PetscPrintf(dd.gcomm, "--------------------------------------------------------------\n"));
1111 PetscCall(VecDestroy(&fetidp_solution));
1112 PetscCall(VecDestroy(&fetidp_solution_all));
1113 PetscCall(VecDestroy(&fetidp_rhs));
1114 }
1115 PetscCall(KSPDestroy(&KSPwithFETIDP));
1116 PetscCall(VecDestroy(&exact_solution));
1117 PetscCall(VecDestroy(&bddc_solution));
1118 PetscCall(VecDestroy(&bddc_rhs));
1119 PetscCall(MatDestroy(&A));
1120 PetscCall(KSPDestroy(&KSPwithBDDC));
1121 /* Quit PETSc */
1122 PetscCall(PetscFinalize());
1123 return 0;
1124 }
1125
1126 /*TEST
1127
1128 testset:
1129 nsize: 4
1130 args: -nex 7 -physical_pc_bddc_coarse_eqs_per_proc 3 -physical_pc_bddc_switch_static
1131 output_file: output/ex59_bddc_fetidp_1.out
1132 test:
1133 suffix: bddc_fetidp_1
1134 test:
1135 requires: viennacl
1136 suffix: bddc_fetidp_1_viennacl
1137 args: -subdomain_mat_type aijviennacl
1138 test:
1139 requires: cuda
1140 suffix: bddc_fetidp_1_cuda
1141 args: -subdomain_mat_type aijcusparse -physical_pc_bddc_dirichlet_pc_factor_mat_solver_type cusparse
1142
1143 testset:
1144 nsize: 4
1145 args: -npx 2 -npy 2 -nex 6 -ney 6 -fluxes_ksp_max_it 10 -physical_ksp_max_it 10
1146 requires: !single
1147 test:
1148 suffix: bddc_fetidp_2
1149 test:
1150 suffix: bddc_fetidp_3
1151 args: -npz 1 -nez 1
1152 test:
1153 suffix: bddc_fetidp_4
1154 args: -npz 1 -nez 1 -physical_pc_bddc_use_change_of_basis -physical_sub_schurs_mat_solver_type petsc -physical_pc_bddc_use_deluxe_scaling -physical_pc_bddc_deluxe_singlemat -fluxes_fetidp_ksp_type cg
1155
1156 testset:
1157 nsize: 8
1158 suffix: bddc_fetidp_approximate
1159 args: -npx 2 -npy 2 -npz 2 -p 2 -nex 8 -ney 7 -nez 9 -physical_ksp_max_it 20 -subdomain_mat_type aij -physical_pc_bddc_switch_static -physical_pc_bddc_dirichlet_approximate -physical_pc_bddc_neumann_approximate -physical_pc_bddc_dirichlet_pc_type gamg -physical_pc_bddc_dirichlet_pc_gamg_esteig_ksp_type cg -physical_pc_bddc_dirichlet_pc_gamg_esteig_ksp_max_it 10 -physical_pc_bddc_dirichlet_mg_levels_ksp_max_it 3 -physical_pc_bddc_neumann_pc_type sor -physical_pc_bddc_neumann_approximate_scale -testfetidp 0
1160
1161 testset:
1162 nsize: 4
1163 args: -npx 2 -npy 2 -nex 6 -ney 6 -fluxes_ksp_max_it 10 -physical_ksp_max_it 10 -physical_ksp_view -physical_pc_bddc_levels 1
1164 filter: grep -v "variant HERMITIAN"
1165 requires: !single
1166 test:
1167 suffix: bddc_fetidp_ml_1
1168 args: -physical_pc_bddc_coarsening_ratio 1
1169 test:
1170 suffix: bddc_fetidp_ml_2
1171 args: -physical_pc_bddc_coarsening_ratio 2 -mat_partitioning_type average
1172 test:
1173 suffix: bddc_fetidp_ml_3
1174 args: -physical_pc_bddc_coarsening_ratio 4
1175
1176 testset:
1177 nsize: 9
1178 args: -npx 3 -npy 3 -p 2 -nex 6 -ney 6 -physical_pc_bddc_deluxe_singlemat -physical_sub_schurs_mat_solver_type petsc -physical_pc_bddc_use_deluxe_scaling -physical_pc_bddc_graph_maxcount 1 -physical_pc_bddc_levels 3 -physical_pc_bddc_coarsening_ratio 2 -physical_pc_bddc_coarse_ksp_type gmres
1179 output_file: output/ex59_bddc_fetidp_ml_eqlimit.out
1180 test:
1181 suffix: bddc_fetidp_ml_eqlimit_1
1182 args: -physical_pc_bddc_coarse_eqs_limit 31 -mat_partitioning_type average -physical_pc_bddc_coarse_pc_bddc_graph_maxcount 1
1183 test:
1184 suffix: bddc_fetidp_ml_eqlimit_2
1185 args: -physical_pc_bddc_coarse_eqs_limit 46
1186
1187 TEST*/
1188