xref: /petsc/src/snes/tests/ex13.c (revision 47dfd8b8ae57d3d37841277916f2fbbf6797bf4d)
1 static char help[] = "Benchmark Poisson Problem in 2d and 3d with finite elements.\n\
2 We solve the Poisson problem in a rectangular domain\n\
3 using a parallel unstructured mesh (DMPLEX) to discretize it.\n\n\n";
4 
5 #include <petscdmplex.h>
6 #include <petscsnes.h>
7 #include <petscds.h>
8 #include <petscconvest.h>
9 #if defined(PETSC_HAVE_AMGX)
10 #include <amgx_c.h>
11 #endif
12 
13 typedef struct {
14   PetscInt  nit;    /* Number of benchmark iterations */
15   PetscBool strong; /* Do not integrate the Laplacian by parts */
16 } AppCtx;
17 
18 static PetscErrorCode trig_u(PetscInt dim, PetscReal time, const PetscReal x[], PetscInt Nc, PetscScalar *u, void *ctx) {
19   PetscInt d;
20   *u = 0.0;
21   for (d = 0; d < dim; ++d) *u += PetscSinReal(2.0 * PETSC_PI * x[d]);
22   return 0;
23 }
24 
25 static void f0_trig_u(PetscInt dim, PetscInt Nf, PetscInt NfAux, const PetscInt uOff[], const PetscInt uOff_x[], const PetscScalar u[], const PetscScalar u_t[], const PetscScalar u_x[], const PetscInt aOff[], const PetscInt aOff_x[], const PetscScalar a[], const PetscScalar a_t[], const PetscScalar a_x[], PetscReal t, const PetscReal x[], PetscInt numConstants, const PetscScalar constants[], PetscScalar f0[]) {
26   PetscInt d;
27   for (d = 0; d < dim; ++d) f0[0] += -4.0 * PetscSqr(PETSC_PI) * PetscSinReal(2.0 * PETSC_PI * x[d]);
28 }
29 
30 static void f1_u(PetscInt dim, PetscInt Nf, PetscInt NfAux, const PetscInt uOff[], const PetscInt uOff_x[], const PetscScalar u[], const PetscScalar u_t[], const PetscScalar u_x[], const PetscInt aOff[], const PetscInt aOff_x[], const PetscScalar a[], const PetscScalar a_t[], const PetscScalar a_x[], PetscReal t, const PetscReal x[], PetscInt numConstants, const PetscScalar constants[], PetscScalar f1[]) {
31   PetscInt d;
32   for (d = 0; d < dim; ++d) f1[d] = u_x[d];
33 }
34 
35 static void g3_uu(PetscInt dim, PetscInt Nf, PetscInt NfAux, const PetscInt uOff[], const PetscInt uOff_x[], const PetscScalar u[], const PetscScalar u_t[], const PetscScalar u_x[], const PetscInt aOff[], const PetscInt aOff_x[], const PetscScalar a[], const PetscScalar a_t[], const PetscScalar a_x[], PetscReal t, PetscReal u_tShift, const PetscReal x[], PetscInt numConstants, const PetscScalar constants[], PetscScalar g3[]) {
36   PetscInt d;
37   for (d = 0; d < dim; ++d) g3[d * dim + d] = 1.0;
38 }
39 
40 static PetscErrorCode quadratic_u(PetscInt dim, PetscReal time, const PetscReal x[], PetscInt Nc, PetscScalar *u, void *ctx) {
41   *u = PetscSqr(x[0]) + PetscSqr(x[1]);
42   return 0;
43 }
44 
45 static void f0_strong_u(PetscInt dim, PetscInt Nf, PetscInt NfAux, const PetscInt uOff[], const PetscInt uOff_x[], const PetscScalar u[], const PetscScalar u_t[], const PetscScalar u_x[], const PetscInt aOff[], const PetscInt aOff_x[], const PetscScalar a[], const PetscScalar a_t[], const PetscScalar a_x[], PetscReal t, const PetscReal x[], PetscInt numConstants, const PetscScalar constants[], PetscScalar f0[]) {
46   PetscInt d;
47   for (d = 0; d < dim; ++d) f0[0] -= u_x[dim + d * dim + d];
48   f0[0] += 4.0;
49 }
50 
51 static PetscErrorCode ProcessOptions(MPI_Comm comm, AppCtx *options) {
52   PetscFunctionBeginUser;
53   options->nit    = 10;
54   options->strong = PETSC_FALSE;
55   PetscOptionsBegin(comm, "", "Poisson Problem Options", "DMPLEX");
56   PetscCall(PetscOptionsInt("-benchmark_it", "Solve the benchmark problem this many times", "ex13.c", options->nit, &options->nit, NULL));
57   PetscCall(PetscOptionsBool("-strong", "Do not integrate the Laplacian by parts", "ex13.c", options->strong, &options->strong, NULL));
58   PetscOptionsEnd();
59   PetscFunctionReturn(0);
60 }
61 
62 static PetscErrorCode CreateMesh(MPI_Comm comm, AppCtx *user, DM *dm) {
63   PetscFunctionBeginUser;
64   PetscCall(DMCreate(comm, dm));
65   PetscCall(DMSetType(*dm, DMPLEX));
66   PetscCall(DMSetFromOptions(*dm));
67   PetscCall(DMSetApplicationContext(*dm, user));
68   PetscCall(DMViewFromOptions(*dm, NULL, "-dm_view"));
69   PetscFunctionReturn(0);
70 }
71 
72 static PetscErrorCode SetupPrimalProblem(DM dm, AppCtx *user) {
73   PetscDS        ds;
74   DMLabel        label;
75   const PetscInt id = 1;
76 
77   PetscFunctionBeginUser;
78   PetscCall(DMGetDS(dm, &ds));
79   PetscCall(DMGetLabel(dm, "marker", &label));
80   if (user->strong) {
81     PetscCall(PetscDSSetResidual(ds, 0, f0_strong_u, NULL));
82     PetscCall(PetscDSSetExactSolution(ds, 0, quadratic_u, user));
83     PetscCall(DMAddBoundary(dm, DM_BC_ESSENTIAL, "wall", label, 1, &id, 0, 0, NULL, (void (*)(void))quadratic_u, NULL, user, NULL));
84   } else {
85     PetscCall(PetscDSSetResidual(ds, 0, f0_trig_u, f1_u));
86     PetscCall(PetscDSSetJacobian(ds, 0, 0, NULL, NULL, NULL, g3_uu));
87     PetscCall(PetscDSSetExactSolution(ds, 0, trig_u, user));
88     PetscCall(DMAddBoundary(dm, DM_BC_ESSENTIAL, "wall", label, 1, &id, 0, 0, NULL, (void (*)(void))trig_u, NULL, user, NULL));
89   }
90   PetscFunctionReturn(0);
91 }
92 
93 static PetscErrorCode SetupDiscretization(DM dm, const char name[], PetscErrorCode (*setup)(DM, AppCtx *), AppCtx *user) {
94   DM             cdm = dm;
95   PetscFE        fe;
96   DMPolytopeType ct;
97   PetscBool      simplex;
98   PetscInt       dim, cStart;
99   char           prefix[PETSC_MAX_PATH_LEN];
100 
101   PetscFunctionBeginUser;
102   PetscCall(DMGetDimension(dm, &dim));
103   PetscCall(DMPlexGetHeightStratum(dm, 0, &cStart, NULL));
104   PetscCall(DMPlexGetCellType(dm, cStart, &ct));
105   simplex = DMPolytopeTypeGetNumVertices(ct) == DMPolytopeTypeGetDim(ct) + 1 ? PETSC_TRUE : PETSC_FALSE; // false
106   /* Create finite element */
107   PetscCall(PetscSNPrintf(prefix, PETSC_MAX_PATH_LEN, "%s_", name));
108   PetscCall(PetscFECreateDefault(PETSC_COMM_SELF, dim, 1, simplex, name ? prefix : NULL, -1, &fe));
109   PetscCall(PetscObjectSetName((PetscObject)fe, name));
110   /* Set discretization and boundary conditions for each mesh */
111   PetscCall(DMSetField(dm, 0, NULL, (PetscObject)fe));
112   PetscCall(DMCreateDS(dm));
113   PetscCall((*setup)(dm, user));
114   while (cdm) {
115     PetscCall(DMCopyDisc(dm, cdm));
116     /* TODO: Check whether the boundary of coarse meshes is marked */
117     PetscCall(DMGetCoarseDM(cdm, &cdm));
118   }
119   PetscCall(PetscFEDestroy(&fe));
120   PetscFunctionReturn(0);
121 }
122 
123 int main(int argc, char **argv) {
124   DM             dm;   /* Problem specification */
125   SNES           snes; /* Nonlinear solver */
126   Vec            u;    /* Solutions */
127   AppCtx         user; /* User-defined work context */
128   PetscLogDouble time;
129   Mat            Amat;
130 
131   PetscFunctionBeginUser;
132   PetscCall(PetscInitialize(&argc, &argv, NULL, help));
133   PetscCall(ProcessOptions(PETSC_COMM_WORLD, &user));
134   /* system */
135   PetscCall(SNESCreate(PETSC_COMM_WORLD, &snes));
136   PetscCall(CreateMesh(PETSC_COMM_WORLD, &user, &dm));
137   PetscCall(SNESSetDM(snes, dm));
138   PetscCall(SetupDiscretization(dm, "potential", SetupPrimalProblem, &user));
139   PetscCall(DMCreateGlobalVector(dm, &u));
140   PetscCall(SNESSetFromOptions(snes));
141   PetscCall(PetscObjectSetName((PetscObject)u, "potential"));
142   PetscCall(DMPlexSetSNESLocalFEM(dm, &user, &user, &user));
143   PetscCall(DMSNESCheckFromOptions(snes, u));
144   PetscCall(PetscTime(&time));
145   PetscCall(SNESSetUp(snes));
146 #if defined(PETSC_HAVE_AMGX)
147   KSP                   ksp;
148   PC                    pc;
149   PetscBool             flg;
150   AMGX_resources_handle rsc;
151   PetscCall(SNESGetKSP(snes, &ksp));
152   PetscCall(KSPGetPC(ksp, &pc));
153   PetscCall(PetscObjectTypeCompare((PetscObject)pc, PCAMGX, &flg));
154   if (flg) {
155     PetscCall(PCAmgXGetResources(pc, (void *)&rsc));
156     /* do ... with resource */
157   }
158 #endif
159   PetscCall(SNESGetJacobian(snes, &Amat, NULL, NULL, NULL));
160   PetscCall(MatSetOption(Amat, MAT_SPD, PETSC_TRUE));
161   PetscCall(MatSetOption(Amat, MAT_SPD_ETERNAL, PETSC_TRUE));
162   PetscCall(SNESSolve(snes, NULL, u));
163   PetscCall(PetscTimeSubtract(&time));
164   /* Benchmark system */
165   if (user.nit) {
166     Vec      b;
167     PetscInt i;
168 #if defined(PETSC_USE_LOG)
169     PetscLogStage kspstage;
170 #endif
171     PetscCall(PetscLogStageRegister("Solve only", &kspstage));
172     PetscCall(PetscLogStagePush(kspstage));
173     PetscCall(SNESGetSolution(snes, &u));
174     PetscCall(SNESGetFunction(snes, &b, NULL, NULL));
175     for (i = 0; i < user.nit; i++) {
176       PetscCall(VecZeroEntries(u));
177       PetscCall(SNESSolve(snes, NULL, u));
178     }
179     PetscCall(PetscLogStagePop());
180   }
181   PetscCall(SNESGetSolution(snes, &u));
182   PetscCall(VecViewFromOptions(u, NULL, "-potential_view"));
183   /* Cleanup */
184   PetscCall(VecDestroy(&u));
185   PetscCall(SNESDestroy(&snes));
186   PetscCall(DMDestroy(&dm));
187   PetscCall(PetscFinalize());
188   return 0;
189 }
190 
191 /*TEST
192 
193   test:
194     suffix: strong
195     requires: triangle
196     args: -dm_plex_dim 2 -dm_refine 1 -benchmark_it 0 -dmsnes_check -potential_petscspace_degree 2 -dm_ds_jet_degree 2 -strong
197 
198   test:
199     suffix: bench
200     nsize: 4
201     args: -dm_plex_dim 3 -dm_plex_simplex 0 -dm_plex_box_faces 2,2,1 -dm_refine 2 -dm_view -ksp_monitor \
202        -benchmark_it 1 -dm_plex_box_upper 2,2,1 -dm_plex_box_lower 0,0,0 -dm_plex_dim 3 -ksp_converged_reason \
203        -ksp_norm_type unpreconditioned -ksp_rtol 1.e-6 -ksp_type cg -mg_levels_ksp_chebyshev_esteig 0,0.2,0,1.1 \
204        -mg_levels_ksp_max_it 1 -mg_levels_ksp_type chebyshev  -mg_levels_pc_type jacobi -pc_gamg_coarse_eq_limit 200 \
205        -pc_gamg_coarse_grid_layout_type compact -pc_gamg_esteig_ksp_max_it 5 -pc_gamg_process_eq_limit 200 \
206        -pc_gamg_repartition false -pc_gamg_reuse_interpolation true -pc_gamg_aggressive_coarsening 0 -pc_gamg_threshold 0.001 -pc_gamg_threshold_scale .5 \
207        -pc_gamg_type agg -pc_type gamg -petscpartitioner_simple_node_grid 1,2,1 -petscpartitioner_simple_process_grid 2,1,1 \
208        -petscpartitioner_type simple -potential_petscspace_degree 2 -snes_lag_jacobian -2 -snes_max_it 1 -snes_rtol 1.e-8 -snes_type ksponly -use_gpu_aware_mpi true
209 
210   testset:
211     nsize: 4
212     output_file: output/ex13_comparison.out
213     args: -dm_plex_dim 2 -benchmark_it 10 -dm_plex_box_faces 4,4 -dm_refine 3 -petscpartitioner_simple_process_grid 2,2 \
214       -petscpartitioner_simple_node_grid 1,1 -potential_petscspace_degree 2 -petscpartitioner_type simple  \
215       -dm_plex_simplex 0 -snes_type ksponly -dm_view -ksp_type cg -pc_type gamg -pc_gamg_process_eq_limit 400 \
216       -ksp_norm_type unpreconditioned -ksp_converged_reason
217     test:
218       suffix: comparison
219     test:
220       suffix: cuda
221       requires: cuda
222       args: -dm_mat_type aijcusparse -dm_vec_type cuda
223     test:
224       suffix: kokkos
225       requires: sycl kokkos_kernels
226       args: -dm_mat_type aijkokkos -dm_vec_type kokkos
227     test:
228       suffix: aijmkl_comp
229       requires: mkl_sparse
230       args: -dm_mat_type aijmkl
231 
232   test:
233     suffix: aijmkl_seq
234     nsize: 1
235     requires: mkl_sparse
236     TODO: broken (INDEFINITE PC)
237     args: -dm_plex_dim 3 -dm_plex_box_faces 4,4,4 -dm_refine 1 -petscpartitioner_type simple -potential_petscspace_degree 1 -dm_plex_simplex 0 \
238           -snes_type ksponly -dm_view -pc_type gamg -pc_gamg_threshold -1 -pc_gamg_square_graph 10 -pc_gamg_process_eq_limit 400 \
239           -pc_gamg_reuse_interpolation -pc_gamg_coarse_eq_limit 10 -pc_gamg_esteig_ksp_type cg -ksp_type cg -ksp_norm_type unpreconditioned \
240           -ksp_converged_reason -snes_rtol 1.e-4 -dm_mat_type aijmkl -dm_vec_type standard
241 
242   testset:
243     requires: cuda amgx
244     filter: grep -v Built | grep -v "AMGX version" | grep -v "CUDA Runtime"
245     output_file: output/ex13_amgx.out
246     args: -dm_plex_dim 2 -dm_plex_box_faces 2,2 -dm_refine 2 -petscpartitioner_type simple -potential_petscspace_degree 2 -dm_plex_simplex 0 -ksp_monitor \
247           -snes_type ksponly -dm_view -ksp_type cg -ksp_norm_type unpreconditioned -ksp_converged_reason -snes_rtol 1.e-4 -pc_type amgx -benchmark_it 1 -pc_amgx_verbose false
248     nsize: 4
249     test:
250       suffix: amgx
251       args: -dm_mat_type aijcusparse -dm_vec_type cuda
252     test:
253       suffix: amgx_cpu
254       args: -dm_mat_type aij
255 
256 TEST*/
257