xref: /petsc/src/ksp/ksp/tutorials/ex59.c (revision d0e6bf2ad94dcc89b258ce16c7987200a4714786)
1 static char help[] = "This example illustrates the use of PCBDDC/FETI-DP and its customization.\n\n\
2 Discrete system: 1D, 2D or 3D laplacian, discretized with spectral elements.\n\
3 Spectral degree can be specified by passing values to -p option.\n\
4 Global problem either with dirichlet boundary conditions on one side or in the pure neumann case (depending on runtime parameters).\n\
5 Domain is [-nex,nex]x[-ney,ney]x[-nez,nez]: ne_ number of elements in _ direction.\n\
6 Example usage: \n\
7 1D: mpiexec -n 4 ex59 -nex 7\n\
8 2D: mpiexec -n 4 ex59 -npx 2 -npy 2 -nex 2 -ney 2\n\
9 3D: mpiexec -n 4 ex59 -npx 2 -npy 2 -npz 1 -nex 2 -ney 2 -nez 1\n\
10 Subdomain decomposition can be specified with -np_ parameters.\n\
11 Dirichlet boundaries on one side by default:\n\
12 it does not iterate on dirichlet nodes by default: if -usezerorows is passed in, it also iterates on Dirichlet nodes.\n\
13 Pure Neumann case can be requested by passing in -pureneumann.\n\
14 In the latter case, in order to avoid runtime errors during factorization, please specify also -coarse_redundant_pc_factor_zeropivot 0\n\n";
15 
16 #include <petscksp.h>
17 #include <petscpc.h>
18 #include <petscdm.h>
19 #include <petscdmda.h>
20 #include <petscblaslapack.h>
21 #define DEBUG 0
22 
23 /* structure holding domain data */
24 typedef struct {
25   /* communicator */
26   MPI_Comm gcomm;
27   /* space dimension */
28   PetscInt dim;
29   /* spectral degree */
30   PetscInt p;
31   /* subdomains per dimension */
32   PetscInt npx, npy, npz;
33   /* subdomain index in cartesian dimensions */
34   PetscInt ipx, ipy, ipz;
35   /* elements per dimension */
36   PetscInt nex, ney, nez;
37   /* local elements per dimension */
38   PetscInt nex_l, ney_l, nez_l;
39   /* global number of dofs per dimension */
40   PetscInt xm, ym, zm;
41   /* local number of dofs per dimension */
42   PetscInt xm_l, ym_l, zm_l;
43   /* starting global indexes for subdomain in lexicographic ordering */
44   PetscInt startx, starty, startz;
45   /* pure Neumann problem */
46   PetscBool pure_neumann;
47   /* Dirichlet BC implementation */
48   PetscBool DBC_zerorows;
49   /* Scaling factor for subdomain */
50   PetscScalar scalingfactor;
51   PetscBool   testkspfetidp;
52 } DomainData;
53 
54 /* structure holding GLL data */
55 typedef struct {
56   /* GLL nodes */
57   PetscReal *zGL;
58   /* GLL weights */
59   PetscScalar *rhoGL;
60   /* aux_mat */
61   PetscScalar **A;
62   /* Element matrix */
63   Mat elem_mat;
64 } GLLData;
65 
BuildCSRGraph(DomainData dd,PetscInt ** xadj,PetscInt ** adjncy)66 static PetscErrorCode BuildCSRGraph(DomainData dd, PetscInt **xadj, PetscInt **adjncy)
67 {
68   PetscInt *xadj_temp, *adjncy_temp;
69   PetscInt  i, j, k, ii, jj, kk, iindex, count_adj;
70   PetscInt  istart_csr, iend_csr, jstart_csr, jend_csr, kstart_csr, kend_csr;
71   PetscBool internal_node;
72 
73   /* first count dimension of adjncy */
74   PetscFunctionBeginUser;
75   count_adj = 0;
76   for (k = 0; k < dd.zm_l; k++) {
77     internal_node = PETSC_TRUE;
78     kstart_csr    = k > 0 ? k - 1 : k;
79     kend_csr      = k < dd.zm_l - 1 ? k + 2 : k + 1;
80     if (k == 0 || k == dd.zm_l - 1) {
81       internal_node = PETSC_FALSE;
82       kstart_csr    = k;
83       kend_csr      = k + 1;
84     }
85     for (j = 0; j < dd.ym_l; j++) {
86       jstart_csr = j > 0 ? j - 1 : j;
87       jend_csr   = j < dd.ym_l - 1 ? j + 2 : j + 1;
88       if (j == 0 || j == dd.ym_l - 1) {
89         internal_node = PETSC_FALSE;
90         jstart_csr    = j;
91         jend_csr      = j + 1;
92       }
93       for (i = 0; i < dd.xm_l; i++) {
94         istart_csr = i > 0 ? i - 1 : i;
95         iend_csr   = i < dd.xm_l - 1 ? i + 2 : i + 1;
96         if (i == 0 || i == dd.xm_l - 1) {
97           internal_node = PETSC_FALSE;
98           istart_csr    = i;
99           iend_csr      = i + 1;
100         }
101         if (internal_node) {
102           istart_csr = i;
103           iend_csr   = i + 1;
104           jstart_csr = j;
105           jend_csr   = j + 1;
106           kstart_csr = k;
107           kend_csr   = k + 1;
108         }
109         for (kk = kstart_csr; kk < kend_csr; kk++) {
110           for (jj = jstart_csr; jj < jend_csr; jj++) {
111             for (ii = istart_csr; ii < iend_csr; ii++) count_adj = count_adj + 1;
112           }
113         }
114       }
115     }
116   }
117   PetscCall(PetscMalloc1(dd.xm_l * dd.ym_l * dd.zm_l + 1, &xadj_temp));
118   PetscCall(PetscMalloc1(count_adj, &adjncy_temp));
119 
120   /* now fill CSR data structure */
121   count_adj = 0;
122   for (k = 0; k < dd.zm_l; k++) {
123     internal_node = PETSC_TRUE;
124     kstart_csr    = k > 0 ? k - 1 : k;
125     kend_csr      = k < dd.zm_l - 1 ? k + 2 : k + 1;
126     if (k == 0 || k == dd.zm_l - 1) {
127       internal_node = PETSC_FALSE;
128       kstart_csr    = k;
129       kend_csr      = k + 1;
130     }
131     for (j = 0; j < dd.ym_l; j++) {
132       jstart_csr = j > 0 ? j - 1 : j;
133       jend_csr   = j < dd.ym_l - 1 ? j + 2 : j + 1;
134       if (j == 0 || j == dd.ym_l - 1) {
135         internal_node = PETSC_FALSE;
136         jstart_csr    = j;
137         jend_csr      = j + 1;
138       }
139       for (i = 0; i < dd.xm_l; i++) {
140         istart_csr = i > 0 ? i - 1 : i;
141         iend_csr   = i < dd.xm_l - 1 ? i + 2 : i + 1;
142         if (i == 0 || i == dd.xm_l - 1) {
143           internal_node = PETSC_FALSE;
144           istart_csr    = i;
145           iend_csr      = i + 1;
146         }
147         iindex            = k * dd.xm_l * dd.ym_l + j * dd.xm_l + i;
148         xadj_temp[iindex] = count_adj;
149         if (internal_node) {
150           istart_csr = i;
151           iend_csr   = i + 1;
152           jstart_csr = j;
153           jend_csr   = j + 1;
154           kstart_csr = k;
155           kend_csr   = k + 1;
156         }
157         for (kk = kstart_csr; kk < kend_csr; kk++) {
158           for (jj = jstart_csr; jj < jend_csr; jj++) {
159             for (ii = istart_csr; ii < iend_csr; ii++) {
160               iindex = kk * dd.xm_l * dd.ym_l + jj * dd.xm_l + ii;
161 
162               adjncy_temp[count_adj] = iindex;
163               count_adj              = count_adj + 1;
164             }
165           }
166         }
167       }
168     }
169   }
170   xadj_temp[dd.xm_l * dd.ym_l * dd.zm_l] = count_adj;
171 
172   *xadj   = xadj_temp;
173   *adjncy = adjncy_temp;
174   PetscFunctionReturn(PETSC_SUCCESS);
175 }
176 
ComputeSpecialBoundaryIndices(DomainData dd,IS * dirichlet,IS * neumann)177 static PetscErrorCode ComputeSpecialBoundaryIndices(DomainData dd, IS *dirichlet, IS *neumann)
178 {
179   IS         temp_dirichlet = 0, temp_neumann = 0;
180   PetscInt   localsize, i, j, k, *indices;
181   PetscBool *touched;
182 
183   PetscFunctionBeginUser;
184   localsize = dd.xm_l * dd.ym_l * dd.zm_l;
185 
186   PetscCall(PetscMalloc1(localsize, &indices));
187   PetscCall(PetscMalloc1(localsize, &touched));
188   for (i = 0; i < localsize; i++) touched[i] = PETSC_FALSE;
189 
190   if (dirichlet) {
191     i = 0;
192     /* west boundary */
193     if (dd.ipx == 0) {
194       for (k = 0; k < dd.zm_l; k++) {
195         for (j = 0; j < dd.ym_l; j++) {
196           indices[i]          = k * dd.ym_l * dd.xm_l + j * dd.xm_l;
197           touched[indices[i]] = PETSC_TRUE;
198           i++;
199         }
200       }
201     }
202     PetscCall(ISCreateGeneral(dd.gcomm, i, indices, PETSC_COPY_VALUES, &temp_dirichlet));
203   }
204   if (neumann) {
205     i = 0;
206     /* west boundary */
207     if (dd.ipx == 0) {
208       for (k = 0; k < dd.zm_l; k++) {
209         for (j = 0; j < dd.ym_l; j++) {
210           indices[i] = k * dd.ym_l * dd.xm_l + j * dd.xm_l;
211           if (!touched[indices[i]]) {
212             touched[indices[i]] = PETSC_TRUE;
213             i++;
214           }
215         }
216       }
217     }
218     /* east boundary */
219     if (dd.ipx == dd.npx - 1) {
220       for (k = 0; k < dd.zm_l; k++) {
221         for (j = 0; j < dd.ym_l; j++) {
222           indices[i] = k * dd.ym_l * dd.xm_l + j * dd.xm_l + dd.xm_l - 1;
223           if (!touched[indices[i]]) {
224             touched[indices[i]] = PETSC_TRUE;
225             i++;
226           }
227         }
228       }
229     }
230     /* south boundary */
231     if (dd.ipy == 0 && dd.dim > 1) {
232       for (k = 0; k < dd.zm_l; k++) {
233         for (j = 0; j < dd.xm_l; j++) {
234           indices[i] = k * dd.ym_l * dd.xm_l + j;
235           if (!touched[indices[i]]) {
236             touched[indices[i]] = PETSC_TRUE;
237             i++;
238           }
239         }
240       }
241     }
242     /* north boundary */
243     if (dd.ipy == dd.npy - 1 && dd.dim > 1) {
244       for (k = 0; k < dd.zm_l; k++) {
245         for (j = 0; j < dd.xm_l; j++) {
246           indices[i] = k * dd.ym_l * dd.xm_l + (dd.ym_l - 1) * dd.xm_l + j;
247           if (!touched[indices[i]]) {
248             touched[indices[i]] = PETSC_TRUE;
249             i++;
250           }
251         }
252       }
253     }
254     /* bottom boundary */
255     if (dd.ipz == 0 && dd.dim > 2) {
256       for (k = 0; k < dd.ym_l; k++) {
257         for (j = 0; j < dd.xm_l; j++) {
258           indices[i] = k * dd.xm_l + j;
259           if (!touched[indices[i]]) {
260             touched[indices[i]] = PETSC_TRUE;
261             i++;
262           }
263         }
264       }
265     }
266     /* top boundary */
267     if (dd.ipz == dd.npz - 1 && dd.dim > 2) {
268       for (k = 0; k < dd.ym_l; k++) {
269         for (j = 0; j < dd.xm_l; j++) {
270           indices[i] = (dd.zm_l - 1) * dd.ym_l * dd.xm_l + k * dd.xm_l + j;
271           if (!touched[indices[i]]) {
272             touched[indices[i]] = PETSC_TRUE;
273             i++;
274           }
275         }
276       }
277     }
278     PetscCall(ISCreateGeneral(dd.gcomm, i, indices, PETSC_COPY_VALUES, &temp_neumann));
279   }
280   if (dirichlet) *dirichlet = temp_dirichlet;
281   if (neumann) *neumann = temp_neumann;
282   PetscCall(PetscFree(indices));
283   PetscCall(PetscFree(touched));
284   PetscFunctionReturn(PETSC_SUCCESS);
285 }
286 
ComputeMapping(DomainData dd,ISLocalToGlobalMapping * isg2lmap)287 static PetscErrorCode ComputeMapping(DomainData dd, ISLocalToGlobalMapping *isg2lmap)
288 {
289   DM                     da;
290   AO                     ao;
291   DMBoundaryType         bx = DM_BOUNDARY_NONE, by = DM_BOUNDARY_NONE, bz = DM_BOUNDARY_NONE;
292   DMDAStencilType        stype = DMDA_STENCIL_BOX;
293   ISLocalToGlobalMapping temp_isg2lmap;
294   PetscInt               i, j, k, ig, jg, kg, lindex, gindex, localsize;
295   PetscInt              *global_indices;
296 
297   PetscFunctionBeginUser;
298   /* Not an efficient mapping: this function computes a very simple lexicographic mapping
299      just to illustrate the creation of a MATIS object */
300   localsize = dd.xm_l * dd.ym_l * dd.zm_l;
301   PetscCall(PetscMalloc1(localsize, &global_indices));
302   for (k = 0; k < dd.zm_l; k++) {
303     kg = dd.startz + k;
304     for (j = 0; j < dd.ym_l; j++) {
305       jg = dd.starty + j;
306       for (i = 0; i < dd.xm_l; i++) {
307         ig                     = dd.startx + i;
308         lindex                 = k * dd.xm_l * dd.ym_l + j * dd.xm_l + i;
309         gindex                 = kg * dd.xm * dd.ym + jg * dd.xm + ig;
310         global_indices[lindex] = gindex;
311       }
312     }
313   }
314   if (dd.dim == 3) {
315     PetscCall(DMDACreate3d(dd.gcomm, bx, by, bz, stype, dd.xm, dd.ym, dd.zm, PETSC_DECIDE, PETSC_DECIDE, PETSC_DECIDE, 1, 1, NULL, NULL, NULL, &da));
316   } else if (dd.dim == 2) {
317     PetscCall(DMDACreate2d(dd.gcomm, bx, by, stype, dd.xm, dd.ym, PETSC_DECIDE, PETSC_DECIDE, 1, 1, NULL, NULL, &da));
318   } else {
319     PetscCall(DMDACreate1d(dd.gcomm, bx, dd.xm, 1, 1, NULL, &da));
320   }
321   PetscCall(DMSetFromOptions(da));
322   PetscCall(DMSetUp(da));
323   PetscCall(DMDASetAOType(da, AOMEMORYSCALABLE));
324   PetscCall(DMDAGetAO(da, &ao));
325   PetscCall(AOApplicationToPetsc(ao, dd.xm_l * dd.ym_l * dd.zm_l, global_indices));
326   PetscCall(ISLocalToGlobalMappingCreate(dd.gcomm, 1, localsize, global_indices, PETSC_OWN_POINTER, &temp_isg2lmap));
327   PetscCall(DMDestroy(&da));
328   *isg2lmap = temp_isg2lmap;
329   PetscFunctionReturn(PETSC_SUCCESS);
330 }
ComputeSubdomainMatrix(DomainData dd,GLLData glldata,Mat * local_mat)331 static PetscErrorCode ComputeSubdomainMatrix(DomainData dd, GLLData glldata, Mat *local_mat)
332 {
333   PetscInt     localsize, zloc, yloc, xloc, auxnex, auxney, auxnez;
334   PetscInt     ie, je, ke, i, j, k, ig, jg, kg, ii, ming;
335   PetscInt    *indexg, *cols, *colsg;
336   PetscScalar *vals;
337   Mat          temp_local_mat, elem_mat_DBC = 0, *usedmat;
338   IS           submatIS;
339 
340   PetscFunctionBeginUser;
341   PetscCall(MatGetSize(glldata.elem_mat, &i, &j));
342   PetscCall(PetscMalloc1(i, &indexg));
343   PetscCall(PetscMalloc1(i, &colsg));
344   /* get submatrix of elem_mat without dirichlet nodes */
345   if (!dd.pure_neumann && !dd.DBC_zerorows && !dd.ipx) {
346     xloc = dd.p + 1;
347     yloc = 1;
348     zloc = 1;
349     if (dd.dim > 1) yloc = dd.p + 1;
350     if (dd.dim > 2) zloc = dd.p + 1;
351     ii = 0;
352     for (k = 0; k < zloc; k++) {
353       for (j = 0; j < yloc; j++) {
354         for (i = 1; i < xloc; i++) {
355           indexg[ii] = k * xloc * yloc + j * xloc + i;
356           ii++;
357         }
358       }
359     }
360     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ii, indexg, PETSC_COPY_VALUES, &submatIS));
361     PetscCall(MatCreateSubMatrix(glldata.elem_mat, submatIS, submatIS, MAT_INITIAL_MATRIX, &elem_mat_DBC));
362     PetscCall(ISDestroy(&submatIS));
363   }
364 
365   /* Assemble subdomain matrix */
366   localsize = dd.xm_l * dd.ym_l * dd.zm_l;
367   PetscCall(MatCreate(PETSC_COMM_SELF, &temp_local_mat));
368   PetscCall(MatSetSizes(temp_local_mat, localsize, localsize, localsize, localsize));
369   PetscCall(MatSetOptionsPrefix(temp_local_mat, "subdomain_"));
370   /* set local matrices type: here we use SEQSBAIJ primarily for testing purpose */
371   /* in order to avoid conversions inside the BDDC code, use SeqAIJ if possible */
372   if (dd.DBC_zerorows && !dd.ipx) { /* in this case, we need to zero out some of the rows, so use seqaij */
373     PetscCall(MatSetType(temp_local_mat, MATSEQAIJ));
374   } else {
375     PetscCall(MatSetType(temp_local_mat, MATSEQSBAIJ));
376   }
377   PetscCall(MatSetFromOptions(temp_local_mat));
378 
379   i = PetscPowRealInt(3.0 * (dd.p + 1.0), dd.dim);
380 
381   PetscCall(MatSeqAIJSetPreallocation(temp_local_mat, i, NULL));      /* very overestimated */
382   PetscCall(MatSeqSBAIJSetPreallocation(temp_local_mat, 1, i, NULL)); /* very overestimated */
383   PetscCall(MatSeqBAIJSetPreallocation(temp_local_mat, 1, i, NULL));  /* very overestimated */
384   PetscCall(MatSetOption(temp_local_mat, MAT_KEEP_NONZERO_PATTERN, PETSC_TRUE));
385 
386   yloc = dd.p + 1;
387   zloc = dd.p + 1;
388   if (dd.dim < 3) zloc = 1;
389   if (dd.dim < 2) yloc = 1;
390 
391   auxnez = dd.nez_l;
392   auxney = dd.ney_l;
393   auxnex = dd.nex_l;
394   if (dd.dim < 3) auxnez = 1;
395   if (dd.dim < 2) auxney = 1;
396 
397   for (ke = 0; ke < auxnez; ke++) {
398     for (je = 0; je < auxney; je++) {
399       for (ie = 0; ie < auxnex; ie++) {
400         /* customize element accounting for BC */
401         xloc    = dd.p + 1;
402         ming    = 0;
403         usedmat = &glldata.elem_mat;
404         if (!dd.pure_neumann && !dd.DBC_zerorows && !dd.ipx) {
405           if (ie == 0) {
406             xloc    = dd.p;
407             usedmat = &elem_mat_DBC;
408           } else {
409             ming    = -1;
410             usedmat = &glldata.elem_mat;
411           }
412         }
413         /* local to the element/global to the subdomain indexing */
414         for (k = 0; k < zloc; k++) {
415           kg = ke * dd.p + k;
416           for (j = 0; j < yloc; j++) {
417             jg = je * dd.p + j;
418             for (i = 0; i < xloc; i++) {
419               ig         = ie * dd.p + i + ming;
420               ii         = k * xloc * yloc + j * xloc + i;
421               indexg[ii] = kg * dd.xm_l * dd.ym_l + jg * dd.xm_l + ig;
422             }
423           }
424         }
425         /* Set values */
426         for (i = 0; i < xloc * yloc * zloc; i++) {
427           PetscCall(MatGetRow(*usedmat, i, &j, (const PetscInt **)&cols, (const PetscScalar **)&vals));
428           for (k = 0; k < j; k++) colsg[k] = indexg[cols[k]];
429           PetscCall(MatSetValues(temp_local_mat, 1, &indexg[i], j, colsg, vals, ADD_VALUES));
430           PetscCall(MatRestoreRow(*usedmat, i, &j, (const PetscInt **)&cols, (const PetscScalar **)&vals));
431         }
432       }
433     }
434   }
435   PetscCall(PetscFree(indexg));
436   PetscCall(PetscFree(colsg));
437   PetscCall(MatAssemblyBegin(temp_local_mat, MAT_FINAL_ASSEMBLY));
438   PetscCall(MatAssemblyEnd(temp_local_mat, MAT_FINAL_ASSEMBLY));
439 #if DEBUG
440   {
441     Vec       lvec, rvec;
442     PetscReal norm;
443     PetscCall(MatCreateVecs(temp_local_mat, &lvec, &rvec));
444     PetscCall(VecSet(lvec, 1.0));
445     PetscCall(MatMult(temp_local_mat, lvec, rvec));
446     PetscCall(VecNorm(rvec, NORM_INFINITY, &norm));
447     PetscCall(VecDestroy(&lvec));
448     PetscCall(VecDestroy(&rvec));
449   }
450 #endif
451   *local_mat = temp_local_mat;
452   PetscCall(MatDestroy(&elem_mat_DBC));
453   PetscFunctionReturn(PETSC_SUCCESS);
454 }
455 
GLLStuffs(DomainData dd,GLLData * glldata)456 static PetscErrorCode GLLStuffs(DomainData dd, GLLData *glldata)
457 {
458   PetscReal   *M, si;
459   PetscScalar  x, z0, z1, z2, Lpj, Lpr, rhoGLj, rhoGLk;
460   PetscBLASInt pm1, lierr;
461   PetscInt     i, j, n, k, s, r, q, ii, jj, p = dd.p;
462   PetscInt     xloc, yloc, zloc, xyloc, xyzloc;
463 
464   PetscFunctionBeginUser;
465   /* Gauss-Lobatto-Legendre nodes zGL on [-1,1] */
466   PetscCall(PetscCalloc1(p + 1, &glldata->zGL));
467 
468   glldata->zGL[0] = -1.0;
469   glldata->zGL[p] = 1.0;
470   if (p > 1) {
471     if (p == 2) glldata->zGL[1] = 0.0;
472     else {
473       PetscCall(PetscMalloc1(p - 1, &M));
474       for (i = 0; i < p - 1; i++) {
475         si   = (PetscReal)(i + 1.0);
476         M[i] = 0.5 * PetscSqrtReal(si * (si + 2.0) / ((si + 0.5) * (si + 1.5)));
477       }
478       pm1 = (PetscBLASInt)(p - 1);
479       PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
480       PetscCallBLAS("LAPACKsteqr", LAPACKsteqr_("N", &pm1, &glldata->zGL[1], M, &x, &pm1, M, &lierr));
481       PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in STERF Lapack routine %d", (int)lierr);
482       PetscCall(PetscFPTrapPop());
483       PetscCall(PetscFree(M));
484     }
485   }
486 
487   /* Weights for 1D quadrature */
488   PetscCall(PetscMalloc1(p + 1, &glldata->rhoGL));
489 
490   glldata->rhoGL[0] = 2.0 / (PetscScalar)(p * (p + 1.0));
491   glldata->rhoGL[p] = glldata->rhoGL[0];
492   z2                = -1; /* Dummy value to avoid -Wmaybe-initialized */
493   for (i = 1; i < p; i++) {
494     x  = glldata->zGL[i];
495     z0 = 1.0;
496     z1 = x;
497     for (n = 1; n < p; n++) {
498       z2 = x * z1 * (2.0 * n + 1.0) / (n + 1.0) - z0 * (PetscScalar)(n / (n + 1.0));
499       z0 = z1;
500       z1 = z2;
501     }
502     glldata->rhoGL[i] = 2.0 / (p * (p + 1.0) * z2 * z2);
503   }
504 
505   /* Auxiliary mat for laplacian */
506   PetscCall(PetscMalloc1(p + 1, &glldata->A));
507   PetscCall(PetscMalloc1((p + 1) * (p + 1), &glldata->A[0]));
508   for (i = 1; i < p + 1; i++) glldata->A[i] = glldata->A[i - 1] + p + 1;
509 
510   for (j = 1; j < p; j++) {
511     x  = glldata->zGL[j];
512     z0 = 1.0;
513     z1 = x;
514     for (n = 1; n < p; n++) {
515       z2 = x * z1 * (2.0 * n + 1.0) / (n + 1.0) - z0 * (PetscScalar)(n / (n + 1.0));
516       z0 = z1;
517       z1 = z2;
518     }
519     Lpj = z2;
520     for (r = 1; r < p; r++) {
521       if (r == j) {
522         glldata->A[j][j] = 2.0 / (3.0 * (1.0 - glldata->zGL[j] * glldata->zGL[j]) * Lpj * Lpj);
523       } else {
524         x  = glldata->zGL[r];
525         z0 = 1.0;
526         z1 = x;
527         for (n = 1; n < p; n++) {
528           z2 = x * z1 * (2.0 * n + 1.0) / (n + 1.0) - z0 * (PetscScalar)(n / (n + 1.0));
529           z0 = z1;
530           z1 = z2;
531         }
532         Lpr              = z2;
533         glldata->A[r][j] = 4.0 / (p * (p + 1.0) * Lpj * Lpr * (glldata->zGL[j] - glldata->zGL[r]) * (glldata->zGL[j] - glldata->zGL[r]));
534       }
535     }
536   }
537   for (j = 1; j < p + 1; j++) {
538     x  = glldata->zGL[j];
539     z0 = 1.0;
540     z1 = x;
541     for (n = 1; n < p; n++) {
542       z2 = x * z1 * (2.0 * n + 1.0) / (n + 1.0) - z0 * (PetscScalar)(n / (n + 1.0));
543       z0 = z1;
544       z1 = z2;
545     }
546     Lpj              = z2;
547     glldata->A[j][0] = 4.0 * PetscPowRealInt(-1.0, p) / (p * (p + 1.0) * Lpj * (1.0 + glldata->zGL[j]) * (1.0 + glldata->zGL[j]));
548     glldata->A[0][j] = glldata->A[j][0];
549   }
550   for (j = 0; j < p; j++) {
551     x  = glldata->zGL[j];
552     z0 = 1.0;
553     z1 = x;
554     for (n = 1; n < p; n++) {
555       z2 = x * z1 * (2.0 * n + 1.0) / (n + 1.0) - z0 * (PetscScalar)(n / (n + 1.0));
556       z0 = z1;
557       z1 = z2;
558     }
559     Lpj = z2;
560 
561     glldata->A[p][j] = 4.0 / (p * (p + 1.0) * Lpj * (1.0 - glldata->zGL[j]) * (1.0 - glldata->zGL[j]));
562     glldata->A[j][p] = glldata->A[p][j];
563   }
564   glldata->A[0][0] = 0.5 + (p * (p + 1.0) - 2.0) / 6.0;
565   glldata->A[p][p] = glldata->A[0][0];
566 
567   /* compute element matrix */
568   xloc = p + 1;
569   yloc = p + 1;
570   zloc = p + 1;
571   if (dd.dim < 2) yloc = 1;
572   if (dd.dim < 3) zloc = 1;
573   xyloc  = xloc * yloc;
574   xyzloc = xloc * yloc * zloc;
575 
576   PetscCall(MatCreate(PETSC_COMM_SELF, &glldata->elem_mat));
577   PetscCall(MatSetSizes(glldata->elem_mat, xyzloc, xyzloc, xyzloc, xyzloc));
578   PetscCall(MatSetType(glldata->elem_mat, MATSEQAIJ));
579   PetscCall(MatSeqAIJSetPreallocation(glldata->elem_mat, xyzloc, NULL)); /* overestimated */
580   PetscCall(MatZeroEntries(glldata->elem_mat));
581   PetscCall(MatSetOption(glldata->elem_mat, MAT_IGNORE_ZERO_ENTRIES, PETSC_TRUE));
582 
583   for (k = 0; k < zloc; k++) {
584     if (dd.dim > 2) rhoGLk = glldata->rhoGL[k];
585     else rhoGLk = 1.0;
586 
587     for (j = 0; j < yloc; j++) {
588       if (dd.dim > 1) rhoGLj = glldata->rhoGL[j];
589       else rhoGLj = 1.0;
590 
591       for (i = 0; i < xloc; i++) {
592         ii = k * xyloc + j * xloc + i;
593         s  = k;
594         r  = j;
595         for (q = 0; q < xloc; q++) {
596           jj = s * xyloc + r * xloc + q;
597           PetscCall(MatSetValue(glldata->elem_mat, jj, ii, glldata->A[i][q] * rhoGLj * rhoGLk, ADD_VALUES));
598         }
599         if (dd.dim > 1) {
600           s = k;
601           q = i;
602           for (r = 0; r < yloc; r++) {
603             jj = s * xyloc + r * xloc + q;
604             PetscCall(MatSetValue(glldata->elem_mat, jj, ii, glldata->A[j][r] * glldata->rhoGL[i] * rhoGLk, ADD_VALUES));
605           }
606         }
607         if (dd.dim > 2) {
608           r = j;
609           q = i;
610           for (s = 0; s < zloc; s++) {
611             jj = s * xyloc + r * xloc + q;
612             PetscCall(MatSetValue(glldata->elem_mat, jj, ii, glldata->A[k][s] * rhoGLj * glldata->rhoGL[i], ADD_VALUES));
613           }
614         }
615       }
616     }
617   }
618   PetscCall(MatAssemblyBegin(glldata->elem_mat, MAT_FINAL_ASSEMBLY));
619   PetscCall(MatAssemblyEnd(glldata->elem_mat, MAT_FINAL_ASSEMBLY));
620 #if DEBUG
621   {
622     Vec       lvec, rvec;
623     PetscReal norm;
624     PetscCall(MatCreateVecs(glldata->elem_mat, &lvec, &rvec));
625     PetscCall(VecSet(lvec, 1.0));
626     PetscCall(MatMult(glldata->elem_mat, lvec, rvec));
627     PetscCall(VecNorm(rvec, NORM_INFINITY, &norm));
628     PetscCall(VecDestroy(&lvec));
629     PetscCall(VecDestroy(&rvec));
630   }
631 #endif
632   PetscFunctionReturn(PETSC_SUCCESS);
633 }
634 
DomainDecomposition(DomainData * dd)635 static PetscErrorCode DomainDecomposition(DomainData *dd)
636 {
637   PetscMPIInt rank;
638   PetscInt    i, j, k;
639 
640   PetscFunctionBeginUser;
641   /* Subdomain index in cartesian coordinates */
642   PetscCallMPI(MPI_Comm_rank(dd->gcomm, &rank));
643   dd->ipx = rank % dd->npx;
644   if (dd->dim > 1) dd->ipz = rank / (dd->npx * dd->npy);
645   else dd->ipz = 0;
646 
647   dd->ipy = rank / dd->npx - dd->ipz * dd->npy;
648   /* number of local elements */
649   dd->nex_l = dd->nex / dd->npx;
650   if (dd->ipx < dd->nex % dd->npx) dd->nex_l++;
651   if (dd->dim > 1) {
652     dd->ney_l = dd->ney / dd->npy;
653     if (dd->ipy < dd->ney % dd->npy) dd->ney_l++;
654   } else {
655     dd->ney_l = 0;
656   }
657   if (dd->dim > 2) {
658     dd->nez_l = dd->nez / dd->npz;
659     if (dd->ipz < dd->nez % dd->npz) dd->nez_l++;
660   } else {
661     dd->nez_l = 0;
662   }
663   /* local and global number of dofs */
664   dd->xm_l = dd->nex_l * dd->p + 1;
665   dd->xm   = dd->nex * dd->p + 1;
666   dd->ym_l = dd->ney_l * dd->p + 1;
667   dd->ym   = dd->ney * dd->p + 1;
668   dd->zm_l = dd->nez_l * dd->p + 1;
669   dd->zm   = dd->nez * dd->p + 1;
670   if (!dd->pure_neumann) {
671     if (!dd->DBC_zerorows && !dd->ipx) dd->xm_l--;
672     if (!dd->DBC_zerorows) dd->xm--;
673   }
674 
675   /* starting global index for local dofs (simple lexicographic order) */
676   dd->startx = 0;
677   j          = dd->nex / dd->npx;
678   for (i = 0; i < dd->ipx; i++) {
679     k = j;
680     if (i < dd->nex % dd->npx) k++;
681     dd->startx = dd->startx + k * dd->p;
682   }
683   if (!dd->pure_neumann && !dd->DBC_zerorows && dd->ipx) dd->startx--;
684 
685   dd->starty = 0;
686   if (dd->dim > 1) {
687     j = dd->ney / dd->npy;
688     for (i = 0; i < dd->ipy; i++) {
689       k = j;
690       if (i < dd->ney % dd->npy) k++;
691       dd->starty = dd->starty + k * dd->p;
692     }
693   }
694   dd->startz = 0;
695   if (dd->dim > 2) {
696     j = dd->nez / dd->npz;
697     for (i = 0; i < dd->ipz; i++) {
698       k = j;
699       if (i < dd->nez % dd->npz) k++;
700       dd->startz = dd->startz + k * dd->p;
701     }
702   }
703   PetscFunctionReturn(PETSC_SUCCESS);
704 }
705 
ComputeMatrix(DomainData dd,Mat * A)706 static PetscErrorCode ComputeMatrix(DomainData dd, Mat *A)
707 {
708   GLLData                gll;
709   Mat                    local_mat = 0, temp_A = 0;
710   ISLocalToGlobalMapping matis_map   = 0;
711   IS                     dirichletIS = 0;
712 
713   PetscFunctionBeginUser;
714   /* Compute some stuff of Gauss-Legendre-Lobatto quadrature rule */
715   PetscCall(GLLStuffs(dd, &gll));
716   /* Compute matrix of subdomain Neumann problem */
717   PetscCall(ComputeSubdomainMatrix(dd, gll, &local_mat));
718   /* Compute global mapping of local dofs */
719   PetscCall(ComputeMapping(dd, &matis_map));
720   /* Create MATIS object needed by BDDC */
721   PetscCall(MatCreateIS(dd.gcomm, 1, PETSC_DECIDE, PETSC_DECIDE, dd.xm * dd.ym * dd.zm, dd.xm * dd.ym * dd.zm, matis_map, matis_map, &temp_A));
722   /* Set local subdomain matrices into MATIS object */
723   PetscCall(MatScale(local_mat, dd.scalingfactor));
724   PetscCall(MatISSetLocalMat(temp_A, local_mat));
725   /* Call assembly functions */
726   PetscCall(MatAssemblyBegin(temp_A, MAT_FINAL_ASSEMBLY));
727   PetscCall(MatAssemblyEnd(temp_A, MAT_FINAL_ASSEMBLY));
728 
729   if (dd.DBC_zerorows) {
730     PetscInt dirsize;
731 
732     PetscCall(ComputeSpecialBoundaryIndices(dd, &dirichletIS, NULL));
733     PetscCall(MatSetOption(local_mat, MAT_KEEP_NONZERO_PATTERN, PETSC_TRUE));
734     PetscCall(MatZeroRowsColumnsLocalIS(temp_A, dirichletIS, 1.0, NULL, NULL));
735     PetscCall(ISGetLocalSize(dirichletIS, &dirsize));
736     PetscCall(ISDestroy(&dirichletIS));
737   }
738 
739   /* giving hints to local and global matrices could be useful for the BDDC */
740   PetscCall(MatSetOption(local_mat, MAT_SPD, PETSC_TRUE));
741   PetscCall(MatSetOption(local_mat, MAT_SPD_ETERNAL, PETSC_TRUE));
742 #if DEBUG
743   {
744     Vec       lvec, rvec;
745     PetscReal norm;
746     PetscCall(MatCreateVecs(temp_A, &lvec, &rvec));
747     PetscCall(VecSet(lvec, 1.0));
748     PetscCall(MatMult(temp_A, lvec, rvec));
749     PetscCall(VecNorm(rvec, NORM_INFINITY, &norm));
750     PetscCall(VecDestroy(&lvec));
751     PetscCall(VecDestroy(&rvec));
752   }
753 #endif
754   /* free allocated workspace */
755   PetscCall(PetscFree(gll.zGL));
756   PetscCall(PetscFree(gll.rhoGL));
757   PetscCall(PetscFree(gll.A[0]));
758   PetscCall(PetscFree(gll.A));
759   PetscCall(MatDestroy(&gll.elem_mat));
760   PetscCall(MatDestroy(&local_mat));
761   PetscCall(ISLocalToGlobalMappingDestroy(&matis_map));
762   /* give back the pointer to te MATIS object */
763   *A = temp_A;
764   PetscFunctionReturn(PETSC_SUCCESS);
765 }
766 
ComputeKSPFETIDP(DomainData dd,KSP ksp_bddc,KSP * ksp_fetidp)767 static PetscErrorCode ComputeKSPFETIDP(DomainData dd, KSP ksp_bddc, KSP *ksp_fetidp)
768 {
769   KSP temp_ksp;
770   PC  pc;
771 
772   PetscFunctionBeginUser;
773   PetscCall(KSPGetPC(ksp_bddc, &pc));
774   if (!dd.testkspfetidp) {
775     PC  D;
776     Mat F;
777 
778     PetscCall(PCBDDCCreateFETIDPOperators(pc, PETSC_TRUE, NULL, &F, &D));
779     PetscCall(KSPCreate(PetscObjectComm((PetscObject)F), &temp_ksp));
780     PetscCall(KSPSetOperators(temp_ksp, F, F));
781     PetscCall(KSPSetType(temp_ksp, KSPCG));
782     PetscCall(KSPSetPC(temp_ksp, D));
783     PetscCall(KSPSetComputeSingularValues(temp_ksp, PETSC_TRUE));
784     PetscCall(KSPSetOptionsPrefix(temp_ksp, "fluxes_"));
785     PetscCall(KSPSetFromOptions(temp_ksp));
786     PetscCall(KSPSetUp(temp_ksp));
787     PetscCall(MatDestroy(&F));
788     PetscCall(PCDestroy(&D));
789   } else {
790     Mat A, Ap;
791 
792     PetscCall(KSPCreate(PetscObjectComm((PetscObject)ksp_bddc), &temp_ksp));
793     PetscCall(KSPGetOperators(ksp_bddc, &A, &Ap));
794     PetscCall(KSPSetOperators(temp_ksp, A, Ap));
795     PetscCall(KSPSetOptionsPrefix(temp_ksp, "fluxes_"));
796     PetscCall(KSPSetType(temp_ksp, KSPFETIDP));
797     PetscCall(KSPFETIDPSetInnerBDDC(temp_ksp, pc));
798     PetscCall(KSPSetComputeSingularValues(temp_ksp, PETSC_TRUE));
799     PetscCall(KSPSetFromOptions(temp_ksp));
800     PetscCall(KSPSetUp(temp_ksp));
801   }
802   *ksp_fetidp = temp_ksp;
803   PetscFunctionReturn(PETSC_SUCCESS);
804 }
805 
ComputeKSPBDDC(DomainData dd,Mat A,KSP * ksp)806 static PetscErrorCode ComputeKSPBDDC(DomainData dd, Mat A, KSP *ksp)
807 {
808   KSP          temp_ksp;
809   PC           pc;
810   IS           primals, dirichletIS = 0, neumannIS = 0, *bddc_dofs_splitting;
811   PetscInt     vidx[8], localsize, *xadj = NULL, *adjncy = NULL;
812   MatNullSpace near_null_space;
813 
814   PetscFunctionBeginUser;
815   PetscCall(KSPCreate(dd.gcomm, &temp_ksp));
816   PetscCall(KSPSetOperators(temp_ksp, A, A));
817   PetscCall(KSPSetType(temp_ksp, KSPCG));
818   PetscCall(KSPGetPC(temp_ksp, &pc));
819   PetscCall(PCSetType(pc, PCBDDC));
820 
821   localsize = dd.xm_l * dd.ym_l * dd.zm_l;
822 
823   /* BDDC customization */
824 
825   /* jumping coefficients case */
826   PetscCall(PCISSetSubdomainScalingFactor(pc, dd.scalingfactor));
827 
828   /* Dofs splitting
829      Simple stride-1 IS
830      It is not needed since, by default, PCBDDC assumes a stride-1 split */
831   PetscCall(PetscMalloc1(1, &bddc_dofs_splitting));
832 #if 1
833   PetscCall(ISCreateStride(PETSC_COMM_WORLD, localsize, 0, 1, &bddc_dofs_splitting[0]));
834   PetscCall(PCBDDCSetDofsSplittingLocal(pc, 1, bddc_dofs_splitting));
835 #else
836   /* examples for global ordering */
837 
838   /* each process lists the nodes it owns */
839   PetscInt sr, er;
840   PetscCall(MatGetOwnershipRange(A, &sr, &er));
841   PetscCall(ISCreateStride(PETSC_COMM_WORLD, er - sr, sr, 1, &bddc_dofs_splitting[0]));
842   PetscCall(PCBDDCSetDofsSplitting(pc, 1, bddc_dofs_splitting));
843   /* Split can be passed in a more general way since any process can list any node */
844 #endif
845   PetscCall(ISDestroy(&bddc_dofs_splitting[0]));
846   PetscCall(PetscFree(bddc_dofs_splitting));
847 
848   /* Primal constraints implemented by using a near null space attached to A -> now it passes in only the constants
849     (which in practice is not needed since by default PCBDDC builds the primal space using constants for quadrature formulas */
850 #if 0
851   Vec vecs[2];
852   PetscRandom rctx;
853   PetscCall(MatCreateVecs(A,&vecs[0],&vecs[1]));
854   PetscCall(PetscRandomCreate(dd.gcomm,&rctx));
855   PetscCall(VecSetRandom(vecs[0],rctx));
856   PetscCall(VecSetRandom(vecs[1],rctx));
857   PetscCall(MatNullSpaceCreate(dd.gcomm,PETSC_TRUE,2,vecs,&near_null_space));
858   PetscCall(VecDestroy(&vecs[0]));
859   PetscCall(VecDestroy(&vecs[1]));
860   PetscCall(PetscRandomDestroy(&rctx));
861 #else
862   PetscCall(MatNullSpaceCreate(dd.gcomm, PETSC_TRUE, 0, NULL, &near_null_space));
863 #endif
864   PetscCall(MatSetNearNullSpace(A, near_null_space));
865   PetscCall(MatNullSpaceDestroy(&near_null_space));
866 
867   /* CSR graph of subdomain dofs */
868   PetscCall(BuildCSRGraph(dd, &xadj, &adjncy));
869   PetscCall(PCBDDCSetLocalAdjacencyGraph(pc, localsize, xadj, adjncy, PETSC_OWN_POINTER));
870 
871   /* Prescribe user-defined primal vertices: in this case we use the 8 corners in 3D (for 2D and 1D, the indices are duplicated) */
872   vidx[0] = 0 * dd.xm_l + 0;
873   vidx[1] = 0 * dd.xm_l + dd.xm_l - 1;
874   vidx[2] = (dd.ym_l - 1) * dd.xm_l + 0;
875   vidx[3] = (dd.ym_l - 1) * dd.xm_l + dd.xm_l - 1;
876   vidx[4] = (dd.zm_l - 1) * dd.xm_l * dd.ym_l + 0 * dd.xm_l + 0;
877   vidx[5] = (dd.zm_l - 1) * dd.xm_l * dd.ym_l + 0 * dd.xm_l + dd.xm_l - 1;
878   vidx[6] = (dd.zm_l - 1) * dd.xm_l * dd.ym_l + (dd.ym_l - 1) * dd.xm_l + 0;
879   vidx[7] = (dd.zm_l - 1) * dd.xm_l * dd.ym_l + (dd.ym_l - 1) * dd.xm_l + dd.xm_l - 1;
880   PetscCall(ISCreateGeneral(dd.gcomm, 8, vidx, PETSC_COPY_VALUES, &primals));
881   PetscCall(PCBDDCSetPrimalVerticesLocalIS(pc, primals));
882   PetscCall(ISDestroy(&primals));
883 
884   /* Neumann/Dirichlet indices on the global boundary */
885   if (dd.DBC_zerorows) {
886     /* Only in case you eliminate some rows matrix with zerorows function, you need to set dirichlet indices into PCBDDC data */
887     PetscCall(ComputeSpecialBoundaryIndices(dd, &dirichletIS, &neumannIS));
888     PetscCall(PCBDDCSetNeumannBoundariesLocal(pc, neumannIS));
889     PetscCall(PCBDDCSetDirichletBoundariesLocal(pc, dirichletIS));
890   } else {
891     if (dd.pure_neumann) {
892       /* In such a case, all interface nodes lying on the global boundary are neumann nodes */
893       PetscCall(ComputeSpecialBoundaryIndices(dd, NULL, &neumannIS));
894       PetscCall(PCBDDCSetNeumannBoundariesLocal(pc, neumannIS));
895     } else {
896       /* It is wrong setting dirichlet indices without having zeroed the corresponding rows in the global matrix */
897       /* But we can still compute them since nodes near the dirichlet boundaries does not need to be defined as neumann nodes */
898       PetscCall(ComputeSpecialBoundaryIndices(dd, &dirichletIS, &neumannIS));
899       PetscCall(PCBDDCSetNeumannBoundariesLocal(pc, neumannIS));
900     }
901   }
902 
903   /* Pass local null space information to local matrices (needed when using approximate local solvers) */
904   if (dd.ipx || dd.pure_neumann) {
905     MatNullSpace nsp;
906     Mat          local_mat;
907 
908     PetscCall(MatISGetLocalMat(A, &local_mat));
909     PetscCall(MatNullSpaceCreate(PETSC_COMM_SELF, PETSC_TRUE, 0, NULL, &nsp));
910     PetscCall(MatSetNullSpace(local_mat, nsp));
911     PetscCall(MatNullSpaceDestroy(&nsp));
912   }
913   PetscCall(KSPSetComputeSingularValues(temp_ksp, PETSC_TRUE));
914   PetscCall(KSPSetOptionsPrefix(temp_ksp, "physical_"));
915   PetscCall(KSPSetFromOptions(temp_ksp));
916   PetscCall(KSPSetUp(temp_ksp));
917   *ksp = temp_ksp;
918   PetscCall(ISDestroy(&dirichletIS));
919   PetscCall(ISDestroy(&neumannIS));
920   PetscFunctionReturn(PETSC_SUCCESS);
921 }
922 
InitializeDomainData(DomainData * dd)923 static PetscErrorCode InitializeDomainData(DomainData *dd)
924 {
925   PetscMPIInt sizes, rank;
926   PetscInt    factor;
927 
928   PetscFunctionBeginUser;
929   dd->gcomm = PETSC_COMM_WORLD;
930   PetscCallMPI(MPI_Comm_size(dd->gcomm, &sizes));
931   PetscCallMPI(MPI_Comm_rank(dd->gcomm, &rank));
932   /* Get information from command line */
933   /* Processors/subdomains per dimension */
934   /* Default is 1d problem */
935   dd->npx = sizes;
936   dd->npy = 0;
937   dd->npz = 0;
938   dd->dim = 1;
939   PetscCall(PetscOptionsGetInt(NULL, NULL, "-npx", &dd->npx, NULL));
940   PetscCall(PetscOptionsGetInt(NULL, NULL, "-npy", &dd->npy, NULL));
941   PetscCall(PetscOptionsGetInt(NULL, NULL, "-npz", &dd->npz, NULL));
942   if (dd->npy) dd->dim++;
943   if (dd->npz) dd->dim++;
944   /* Number of elements per dimension */
945   /* Default is one element per subdomain */
946   dd->nex = dd->npx;
947   dd->ney = dd->npy;
948   dd->nez = dd->npz;
949   PetscCall(PetscOptionsGetInt(NULL, NULL, "-nex", &dd->nex, NULL));
950   PetscCall(PetscOptionsGetInt(NULL, NULL, "-ney", &dd->ney, NULL));
951   PetscCall(PetscOptionsGetInt(NULL, NULL, "-nez", &dd->nez, NULL));
952   if (!dd->npy) {
953     dd->ney = 0;
954     dd->nez = 0;
955   }
956   if (!dd->npz) dd->nez = 0;
957   /* Spectral degree */
958   dd->p = 3;
959   PetscCall(PetscOptionsGetInt(NULL, NULL, "-p", &dd->p, NULL));
960   /* pure neumann problem? */
961   dd->pure_neumann = PETSC_FALSE;
962   PetscCall(PetscOptionsGetBool(NULL, NULL, "-pureneumann", &dd->pure_neumann, NULL));
963 
964   /* How to enforce dirichlet boundary conditions (in case pureneumann has not been requested explicitly) */
965   dd->DBC_zerorows = PETSC_FALSE;
966 
967   PetscCall(PetscOptionsGetBool(NULL, NULL, "-usezerorows", &dd->DBC_zerorows, NULL));
968   if (dd->pure_neumann) dd->DBC_zerorows = PETSC_FALSE;
969   dd->scalingfactor = 1.0;
970 
971   factor = 0;
972   PetscCall(PetscOptionsGetInt(NULL, NULL, "-jump", &factor, NULL));
973   /* checkerboard pattern */
974   dd->scalingfactor = PetscPowScalar(10.0, factor * PetscPowInt(-1, rank));
975   /* test data passed in */
976   if (dd->dim == 1) {
977     PetscCheck(sizes == dd->npx, dd->gcomm, PETSC_ERR_USER, "Number of mpi procs in 1D must be equal to npx");
978     PetscCheck(dd->nex >= dd->npx, dd->gcomm, PETSC_ERR_USER, "Number of elements per dim must be greater/equal than number of procs per dim");
979   } else if (dd->dim == 2) {
980     PetscCheck(sizes == dd->npx * dd->npy, dd->gcomm, PETSC_ERR_USER, "Number of mpi procs in 2D must be equal to npx*npy");
981     PetscCheck(dd->nex >= dd->npx || dd->ney < dd->npy, dd->gcomm, PETSC_ERR_USER, "Number of elements per dim must be greater/equal than number of procs per dim");
982   } else {
983     PetscCheck(sizes == dd->npx * dd->npy * dd->npz, dd->gcomm, PETSC_ERR_USER, "Number of mpi procs in 3D must be equal to npx*npy*npz");
984     PetscCheck(dd->nex >= dd->npx && dd->ney >= dd->npy && dd->nez >= dd->npz, dd->gcomm, PETSC_ERR_USER, "Number of elements per dim must be greater/equal than number of ranks per dim");
985   }
986   PetscFunctionReturn(PETSC_SUCCESS);
987 }
988 
main(int argc,char ** args)989 int main(int argc, char **args)
990 {
991   DomainData         dd;
992   PetscReal          norm, maxeig, mineig;
993   PetscScalar        scalar_value;
994   PetscInt           ndofs, its;
995   Mat                A = NULL, F = NULL;
996   KSP                KSPwithBDDC = NULL, KSPwithFETIDP = NULL;
997   KSPConvergedReason reason;
998   Vec                exact_solution = NULL, bddc_solution = NULL, bddc_rhs = NULL;
999   PetscBool          testfetidp = PETSC_TRUE;
1000 
1001   /* Init PETSc */
1002   PetscFunctionBeginUser;
1003   PetscCall(PetscInitialize(&argc, &args, NULL, help));
1004   /* Initialize DomainData */
1005   PetscCall(InitializeDomainData(&dd));
1006   /* Decompose domain */
1007   PetscCall(DomainDecomposition(&dd));
1008 #if DEBUG
1009   printf("Subdomain data\n");
1010   printf("IPS   : %d %d %d\n", dd.ipx, dd.ipy, dd.ipz);
1011   printf("NEG   : %d %d %d\n", dd.nex, dd.ney, dd.nez);
1012   printf("NEL   : %d %d %d\n", dd.nex_l, dd.ney_l, dd.nez_l);
1013   printf("LDO   : %d %d %d\n", dd.xm_l, dd.ym_l, dd.zm_l);
1014   printf("SIZES : %d %d %d\n", dd.xm, dd.ym, dd.zm);
1015   printf("STARTS: %d %d %d\n", dd.startx, dd.starty, dd.startz);
1016 #endif
1017   dd.testkspfetidp = PETSC_TRUE;
1018   PetscCall(PetscOptionsGetBool(NULL, NULL, "-testfetidp", &testfetidp, NULL));
1019   PetscCall(PetscOptionsGetBool(NULL, NULL, "-testkspfetidp", &dd.testkspfetidp, NULL));
1020   /* assemble global matrix */
1021   PetscCall(ComputeMatrix(dd, &A));
1022   /* get work vectors */
1023   PetscCall(MatCreateVecs(A, &bddc_solution, NULL));
1024   PetscCall(VecDuplicate(bddc_solution, &bddc_rhs));
1025   PetscCall(VecDuplicate(bddc_solution, &exact_solution));
1026   /* create and customize KSP/PC for BDDC */
1027   PetscCall(ComputeKSPBDDC(dd, A, &KSPwithBDDC));
1028   /* create KSP/PC for FETIDP */
1029   if (testfetidp) PetscCall(ComputeKSPFETIDP(dd, KSPwithBDDC, &KSPwithFETIDP));
1030   /* create random exact solution */
1031 #if defined(PETSC_USE_COMPLEX)
1032   PetscCall(VecSet(exact_solution, 1.0 + PETSC_i));
1033 #else
1034   PetscCall(VecSetRandom(exact_solution, NULL));
1035 #endif
1036   PetscCall(VecShift(exact_solution, -0.5));
1037   PetscCall(VecScale(exact_solution, 100.0));
1038   PetscCall(VecGetSize(exact_solution, &ndofs));
1039   if (dd.pure_neumann) {
1040     PetscCall(VecSum(exact_solution, &scalar_value));
1041     scalar_value = -scalar_value / (PetscScalar)ndofs;
1042     PetscCall(VecShift(exact_solution, scalar_value));
1043   }
1044   /* assemble BDDC rhs */
1045   PetscCall(MatMult(A, exact_solution, bddc_rhs));
1046   /* test ksp with BDDC */
1047   PetscCall(KSPSolve(KSPwithBDDC, bddc_rhs, bddc_solution));
1048   PetscCall(KSPGetIterationNumber(KSPwithBDDC, &its));
1049   PetscCall(KSPGetConvergedReason(KSPwithBDDC, &reason));
1050   PetscCall(KSPComputeExtremeSingularValues(KSPwithBDDC, &maxeig, &mineig));
1051   if (dd.pure_neumann) {
1052     PetscCall(VecSum(bddc_solution, &scalar_value));
1053     scalar_value = -scalar_value / (PetscScalar)ndofs;
1054     PetscCall(VecShift(bddc_solution, scalar_value));
1055   }
1056   /* check exact_solution and BDDC solultion */
1057   PetscCall(VecAXPY(bddc_solution, -1.0, exact_solution));
1058   PetscCall(VecNorm(bddc_solution, NORM_INFINITY, &norm));
1059   PetscCall(PetscPrintf(dd.gcomm, "---------------------BDDC stats-------------------------------\n"));
1060   PetscCall(PetscPrintf(dd.gcomm, "Number of degrees of freedom               : %8" PetscInt_FMT "\n", ndofs));
1061   if (reason < 0) {
1062     PetscCall(PetscPrintf(dd.gcomm, "Number of iterations                       : %8" PetscInt_FMT "\n", its));
1063     PetscCall(PetscPrintf(dd.gcomm, "Converged reason                           : %s\n", KSPConvergedReasons[reason]));
1064   }
1065   if (0.95 <= mineig && mineig <= 1.05) mineig = 1.0;
1066   PetscCall(PetscPrintf(dd.gcomm, "Eigenvalues preconditioned operator        : %1.1e %1.1e\n", (double)PetscFloorReal(100. * mineig) / 100., (double)PetscCeilReal(100. * maxeig) / 100.));
1067   if (norm > 1.e-1 || reason < 0) PetscCall(PetscPrintf(dd.gcomm, "Error between exact and computed solution : %1.2e\n", (double)norm));
1068   PetscCall(PetscPrintf(dd.gcomm, "--------------------------------------------------------------\n"));
1069   if (testfetidp) {
1070     Vec fetidp_solution_all = NULL, fetidp_solution = NULL, fetidp_rhs = NULL;
1071 
1072     PetscCall(VecDuplicate(bddc_solution, &fetidp_solution_all));
1073     if (!dd.testkspfetidp) {
1074       /* assemble fetidp rhs on the space of Lagrange multipliers */
1075       PetscCall(KSPGetOperators(KSPwithFETIDP, &F, NULL));
1076       PetscCall(MatCreateVecs(F, &fetidp_solution, &fetidp_rhs));
1077       PetscCall(PCBDDCMatFETIDPGetRHS(F, bddc_rhs, fetidp_rhs));
1078       PetscCall(VecSet(fetidp_solution, 0.0));
1079       /* test ksp with FETIDP */
1080       PetscCall(KSPSolve(KSPwithFETIDP, fetidp_rhs, fetidp_solution));
1081       /* assemble fetidp solution on physical domain */
1082       PetscCall(PCBDDCMatFETIDPGetSolution(F, fetidp_solution, fetidp_solution_all));
1083     } else {
1084       KSP kspF;
1085       PetscCall(KSPSolve(KSPwithFETIDP, bddc_rhs, fetidp_solution_all));
1086       PetscCall(KSPFETIDPGetInnerKSP(KSPwithFETIDP, &kspF));
1087       PetscCall(KSPGetOperators(kspF, &F, NULL));
1088     }
1089     PetscCall(MatGetSize(F, &ndofs, NULL));
1090     PetscCall(KSPGetIterationNumber(KSPwithFETIDP, &its));
1091     PetscCall(KSPGetConvergedReason(KSPwithFETIDP, &reason));
1092     PetscCall(KSPComputeExtremeSingularValues(KSPwithFETIDP, &maxeig, &mineig));
1093     /* check FETIDP sol */
1094     if (dd.pure_neumann) {
1095       PetscCall(VecSum(fetidp_solution_all, &scalar_value));
1096       scalar_value = -scalar_value / (PetscScalar)ndofs;
1097       PetscCall(VecShift(fetidp_solution_all, scalar_value));
1098     }
1099     PetscCall(VecAXPY(fetidp_solution_all, -1.0, exact_solution));
1100     PetscCall(VecNorm(fetidp_solution_all, NORM_INFINITY, &norm));
1101     PetscCall(PetscPrintf(dd.gcomm, "------------------FETI-DP stats-------------------------------\n"));
1102     PetscCall(PetscPrintf(dd.gcomm, "Number of degrees of freedom               : %8" PetscInt_FMT "\n", ndofs));
1103     if (reason < 0) {
1104       PetscCall(PetscPrintf(dd.gcomm, "Number of iterations                       : %8" PetscInt_FMT "\n", its));
1105       PetscCall(PetscPrintf(dd.gcomm, "Converged reason                           : %s\n", KSPConvergedReasons[reason]));
1106     }
1107     if (0.95 <= mineig && mineig <= 1.05) mineig = 1.0;
1108     PetscCall(PetscPrintf(dd.gcomm, "Eigenvalues preconditioned operator        : %1.1e %1.1e\n", (double)PetscFloorReal(100. * mineig) / 100., (double)PetscCeilReal(100. * maxeig) / 100.));
1109     if (norm > 1.e-1 || reason < 0) PetscCall(PetscPrintf(dd.gcomm, "Error between exact and computed solution : %1.2e\n", (double)norm));
1110     PetscCall(PetscPrintf(dd.gcomm, "--------------------------------------------------------------\n"));
1111     PetscCall(VecDestroy(&fetidp_solution));
1112     PetscCall(VecDestroy(&fetidp_solution_all));
1113     PetscCall(VecDestroy(&fetidp_rhs));
1114   }
1115   PetscCall(KSPDestroy(&KSPwithFETIDP));
1116   PetscCall(VecDestroy(&exact_solution));
1117   PetscCall(VecDestroy(&bddc_solution));
1118   PetscCall(VecDestroy(&bddc_rhs));
1119   PetscCall(MatDestroy(&A));
1120   PetscCall(KSPDestroy(&KSPwithBDDC));
1121   /* Quit PETSc */
1122   PetscCall(PetscFinalize());
1123   return 0;
1124 }
1125 
1126 /*TEST
1127 
1128  testset:
1129    nsize: 4
1130    args: -nex 7 -physical_pc_bddc_coarse_eqs_per_proc 3 -physical_pc_bddc_switch_static
1131    output_file: output/ex59_bddc_fetidp_1.out
1132    test:
1133      suffix: bddc_fetidp_1
1134    test:
1135      requires: viennacl
1136      suffix: bddc_fetidp_1_viennacl
1137      args: -subdomain_mat_type aijviennacl
1138    test:
1139      requires: cuda
1140      suffix: bddc_fetidp_1_cuda
1141      args: -subdomain_mat_type aijcusparse -physical_pc_bddc_dirichlet_pc_factor_mat_solver_type cusparse
1142 
1143  testset:
1144    nsize: 4
1145    args: -npx 2 -npy 2 -nex 6 -ney 6 -fluxes_ksp_max_it 10 -physical_ksp_max_it 10
1146    requires: !single
1147    test:
1148      suffix: bddc_fetidp_2
1149    test:
1150      suffix: bddc_fetidp_3
1151      args: -npz 1 -nez 1
1152    test:
1153      suffix: bddc_fetidp_4
1154      args: -npz 1 -nez 1 -physical_pc_bddc_use_change_of_basis -physical_sub_schurs_mat_solver_type petsc -physical_pc_bddc_use_deluxe_scaling -physical_pc_bddc_deluxe_singlemat -fluxes_fetidp_ksp_type cg
1155 
1156  testset:
1157    nsize: 8
1158    suffix: bddc_fetidp_approximate
1159    args: -npx 2 -npy 2 -npz 2 -p 2 -nex 8 -ney 7 -nez 9 -physical_ksp_max_it 20 -subdomain_mat_type aij -physical_pc_bddc_switch_static -physical_pc_bddc_dirichlet_approximate -physical_pc_bddc_neumann_approximate -physical_pc_bddc_dirichlet_pc_type gamg -physical_pc_bddc_dirichlet_pc_gamg_esteig_ksp_type cg -physical_pc_bddc_dirichlet_pc_gamg_esteig_ksp_max_it 10 -physical_pc_bddc_dirichlet_mg_levels_ksp_max_it 3 -physical_pc_bddc_neumann_pc_type sor -physical_pc_bddc_neumann_approximate_scale -testfetidp 0
1160 
1161  testset:
1162    nsize: 4
1163    args: -npx 2 -npy 2 -nex 6 -ney 6 -fluxes_ksp_max_it 10 -physical_ksp_max_it 10 -physical_ksp_view -physical_pc_bddc_levels 1
1164    filter: grep -v "variant HERMITIAN"
1165    requires: !single
1166    test:
1167      suffix: bddc_fetidp_ml_1
1168      args: -physical_pc_bddc_coarsening_ratio 1
1169    test:
1170      suffix: bddc_fetidp_ml_2
1171      args: -physical_pc_bddc_coarsening_ratio 2 -mat_partitioning_type average
1172    test:
1173      suffix: bddc_fetidp_ml_3
1174      args: -physical_pc_bddc_coarsening_ratio 4
1175 
1176  testset:
1177    nsize: 9
1178    args: -npx 3 -npy 3 -p 2 -nex 6 -ney 6 -physical_pc_bddc_deluxe_singlemat -physical_sub_schurs_mat_solver_type petsc -physical_pc_bddc_use_deluxe_scaling -physical_pc_bddc_graph_maxcount 1 -physical_pc_bddc_levels 3 -physical_pc_bddc_coarsening_ratio 2 -physical_pc_bddc_coarse_ksp_type gmres
1179    output_file: output/ex59_bddc_fetidp_ml_eqlimit.out
1180    test:
1181      suffix: bddc_fetidp_ml_eqlimit_1
1182      args: -physical_pc_bddc_coarse_eqs_limit 31 -mat_partitioning_type average -physical_pc_bddc_coarse_pc_bddc_graph_maxcount 1
1183    test:
1184      suffix: bddc_fetidp_ml_eqlimit_2
1185      args: -physical_pc_bddc_coarse_eqs_limit 46
1186 
1187 TEST*/
1188