1 static char help[] = "Benchmark Poisson Problem in 2d and 3d with finite elements.\n\ 2 We solve the Poisson problem in a rectangular domain\n\ 3 using a parallel unstructured mesh (DMPLEX) to discretize it.\n\n\n"; 4 5 #include <petscdmplex.h> 6 #include <petscsnes.h> 7 #include <petscds.h> 8 #include <petscconvest.h> 9 #if defined(PETSC_HAVE_AMGX) 10 #include <amgx_c.h> 11 #endif 12 13 typedef struct { 14 PetscInt nit; /* Number of benchmark iterations */ 15 PetscBool strong; /* Do not integrate the Laplacian by parts */ 16 } AppCtx; 17 18 static PetscErrorCode trig_u(PetscInt dim, PetscReal time, const PetscReal x[], PetscInt Nc, PetscScalar *u, void *ctx) { 19 PetscInt d; 20 *u = 0.0; 21 for (d = 0; d < dim; ++d) *u += PetscSinReal(2.0 * PETSC_PI * x[d]); 22 return 0; 23 } 24 25 static void f0_trig_u(PetscInt dim, PetscInt Nf, PetscInt NfAux, const PetscInt uOff[], const PetscInt uOff_x[], const PetscScalar u[], const PetscScalar u_t[], const PetscScalar u_x[], const PetscInt aOff[], const PetscInt aOff_x[], const PetscScalar a[], const PetscScalar a_t[], const PetscScalar a_x[], PetscReal t, const PetscReal x[], PetscInt numConstants, const PetscScalar constants[], PetscScalar f0[]) { 26 PetscInt d; 27 for (d = 0; d < dim; ++d) f0[0] += -4.0 * PetscSqr(PETSC_PI) * PetscSinReal(2.0 * PETSC_PI * x[d]); 28 } 29 30 static void f1_u(PetscInt dim, PetscInt Nf, PetscInt NfAux, const PetscInt uOff[], const PetscInt uOff_x[], const PetscScalar u[], const PetscScalar u_t[], const PetscScalar u_x[], const PetscInt aOff[], const PetscInt aOff_x[], const PetscScalar a[], const PetscScalar a_t[], const PetscScalar a_x[], PetscReal t, const PetscReal x[], PetscInt numConstants, const PetscScalar constants[], PetscScalar f1[]) { 31 PetscInt d; 32 for (d = 0; d < dim; ++d) f1[d] = u_x[d]; 33 } 34 35 static void g3_uu(PetscInt dim, PetscInt Nf, PetscInt NfAux, const PetscInt uOff[], const PetscInt uOff_x[], const PetscScalar u[], const PetscScalar u_t[], const PetscScalar u_x[], const PetscInt aOff[], const PetscInt aOff_x[], const PetscScalar a[], const PetscScalar a_t[], const PetscScalar a_x[], PetscReal t, PetscReal u_tShift, const PetscReal x[], PetscInt numConstants, const PetscScalar constants[], PetscScalar g3[]) { 36 PetscInt d; 37 for (d = 0; d < dim; ++d) g3[d * dim + d] = 1.0; 38 } 39 40 static PetscErrorCode quadratic_u(PetscInt dim, PetscReal time, const PetscReal x[], PetscInt Nc, PetscScalar *u, void *ctx) { 41 *u = PetscSqr(x[0]) + PetscSqr(x[1]); 42 return 0; 43 } 44 45 static void f0_strong_u(PetscInt dim, PetscInt Nf, PetscInt NfAux, const PetscInt uOff[], const PetscInt uOff_x[], const PetscScalar u[], const PetscScalar u_t[], const PetscScalar u_x[], const PetscInt aOff[], const PetscInt aOff_x[], const PetscScalar a[], const PetscScalar a_t[], const PetscScalar a_x[], PetscReal t, const PetscReal x[], PetscInt numConstants, const PetscScalar constants[], PetscScalar f0[]) { 46 PetscInt d; 47 for (d = 0; d < dim; ++d) f0[0] -= u_x[dim + d * dim + d]; 48 f0[0] += 4.0; 49 } 50 51 static PetscErrorCode ProcessOptions(MPI_Comm comm, AppCtx *options) { 52 PetscFunctionBeginUser; 53 options->nit = 10; 54 options->strong = PETSC_FALSE; 55 PetscOptionsBegin(comm, "", "Poisson Problem Options", "DMPLEX"); 56 PetscCall(PetscOptionsInt("-benchmark_it", "Solve the benchmark problem this many times", "ex13.c", options->nit, &options->nit, NULL)); 57 PetscCall(PetscOptionsBool("-strong", "Do not integrate the Laplacian by parts", "ex13.c", options->strong, &options->strong, NULL)); 58 PetscOptionsEnd(); 59 PetscFunctionReturn(0); 60 } 61 62 static PetscErrorCode CreateMesh(MPI_Comm comm, AppCtx *user, DM *dm) { 63 PetscFunctionBeginUser; 64 PetscCall(DMCreate(comm, dm)); 65 PetscCall(DMSetType(*dm, DMPLEX)); 66 PetscCall(DMSetFromOptions(*dm)); 67 PetscCall(DMSetApplicationContext(*dm, user)); 68 PetscCall(DMViewFromOptions(*dm, NULL, "-dm_view")); 69 PetscFunctionReturn(0); 70 } 71 72 static PetscErrorCode SetupPrimalProblem(DM dm, AppCtx *user) { 73 PetscDS ds; 74 DMLabel label; 75 const PetscInt id = 1; 76 77 PetscFunctionBeginUser; 78 PetscCall(DMGetDS(dm, &ds)); 79 PetscCall(DMGetLabel(dm, "marker", &label)); 80 if (user->strong) { 81 PetscCall(PetscDSSetResidual(ds, 0, f0_strong_u, NULL)); 82 PetscCall(PetscDSSetExactSolution(ds, 0, quadratic_u, user)); 83 PetscCall(DMAddBoundary(dm, DM_BC_ESSENTIAL, "wall", label, 1, &id, 0, 0, NULL, (void (*)(void))quadratic_u, NULL, user, NULL)); 84 } else { 85 PetscCall(PetscDSSetResidual(ds, 0, f0_trig_u, f1_u)); 86 PetscCall(PetscDSSetJacobian(ds, 0, 0, NULL, NULL, NULL, g3_uu)); 87 PetscCall(PetscDSSetExactSolution(ds, 0, trig_u, user)); 88 PetscCall(DMAddBoundary(dm, DM_BC_ESSENTIAL, "wall", label, 1, &id, 0, 0, NULL, (void (*)(void))trig_u, NULL, user, NULL)); 89 } 90 PetscFunctionReturn(0); 91 } 92 93 static PetscErrorCode SetupDiscretization(DM dm, const char name[], PetscErrorCode (*setup)(DM, AppCtx *), AppCtx *user) { 94 DM cdm = dm; 95 PetscFE fe; 96 DMPolytopeType ct; 97 PetscBool simplex; 98 PetscInt dim, cStart; 99 char prefix[PETSC_MAX_PATH_LEN]; 100 101 PetscFunctionBeginUser; 102 PetscCall(DMGetDimension(dm, &dim)); 103 PetscCall(DMPlexGetHeightStratum(dm, 0, &cStart, NULL)); 104 PetscCall(DMPlexGetCellType(dm, cStart, &ct)); 105 simplex = DMPolytopeTypeGetNumVertices(ct) == DMPolytopeTypeGetDim(ct) + 1 ? PETSC_TRUE : PETSC_FALSE; // false 106 /* Create finite element */ 107 PetscCall(PetscSNPrintf(prefix, PETSC_MAX_PATH_LEN, "%s_", name)); 108 PetscCall(PetscFECreateDefault(PETSC_COMM_SELF, dim, 1, simplex, name ? prefix : NULL, -1, &fe)); 109 PetscCall(PetscObjectSetName((PetscObject)fe, name)); 110 /* Set discretization and boundary conditions for each mesh */ 111 PetscCall(DMSetField(dm, 0, NULL, (PetscObject)fe)); 112 PetscCall(DMCreateDS(dm)); 113 PetscCall((*setup)(dm, user)); 114 while (cdm) { 115 PetscCall(DMCopyDisc(dm, cdm)); 116 /* TODO: Check whether the boundary of coarse meshes is marked */ 117 PetscCall(DMGetCoarseDM(cdm, &cdm)); 118 } 119 PetscCall(PetscFEDestroy(&fe)); 120 PetscFunctionReturn(0); 121 } 122 123 int main(int argc, char **argv) { 124 DM dm; /* Problem specification */ 125 SNES snes; /* Nonlinear solver */ 126 Vec u; /* Solutions */ 127 AppCtx user; /* User-defined work context */ 128 PetscLogDouble time; 129 Mat Amat; 130 131 PetscFunctionBeginUser; 132 PetscCall(PetscInitialize(&argc, &argv, NULL, help)); 133 PetscCall(ProcessOptions(PETSC_COMM_WORLD, &user)); 134 /* system */ 135 PetscCall(SNESCreate(PETSC_COMM_WORLD, &snes)); 136 PetscCall(CreateMesh(PETSC_COMM_WORLD, &user, &dm)); 137 PetscCall(SNESSetDM(snes, dm)); 138 PetscCall(SetupDiscretization(dm, "potential", SetupPrimalProblem, &user)); 139 PetscCall(DMCreateGlobalVector(dm, &u)); 140 PetscCall(SNESSetFromOptions(snes)); 141 PetscCall(PetscObjectSetName((PetscObject)u, "potential")); 142 PetscCall(DMPlexSetSNESLocalFEM(dm, &user, &user, &user)); 143 PetscCall(DMSNESCheckFromOptions(snes, u)); 144 PetscCall(PetscTime(&time)); 145 PetscCall(SNESSetUp(snes)); 146 #if defined(PETSC_HAVE_AMGX) 147 KSP ksp; 148 PC pc; 149 PetscBool flg; 150 AMGX_resources_handle rsc; 151 PetscCall(SNESGetKSP(snes, &ksp)); 152 PetscCall(KSPGetPC(ksp, &pc)); 153 PetscCall(PetscObjectTypeCompare((PetscObject)pc, PCAMGX, &flg)); 154 if (flg) { 155 PetscCall(PCAmgXGetResources(pc, (void *)&rsc)); 156 /* do ... with resource */ 157 } 158 #endif 159 PetscCall(SNESGetJacobian(snes, &Amat, NULL, NULL, NULL)); 160 PetscCall(MatSetOption(Amat, MAT_SPD, PETSC_TRUE)); 161 PetscCall(MatSetOption(Amat, MAT_SPD_ETERNAL, PETSC_TRUE)); 162 PetscCall(SNESSolve(snes, NULL, u)); 163 PetscCall(PetscTimeSubtract(&time)); 164 /* Benchmark system */ 165 if (user.nit) { 166 Vec b; 167 PetscInt i; 168 #if defined(PETSC_USE_LOG) 169 PetscLogStage kspstage; 170 #endif 171 PetscCall(PetscLogStageRegister("Solve only", &kspstage)); 172 PetscCall(PetscLogStagePush(kspstage)); 173 PetscCall(SNESGetSolution(snes, &u)); 174 PetscCall(SNESGetFunction(snes, &b, NULL, NULL)); 175 for (i = 0; i < user.nit; i++) { 176 PetscCall(VecZeroEntries(u)); 177 PetscCall(SNESSolve(snes, NULL, u)); 178 } 179 PetscCall(PetscLogStagePop()); 180 } 181 PetscCall(SNESGetSolution(snes, &u)); 182 PetscCall(VecViewFromOptions(u, NULL, "-potential_view")); 183 /* Cleanup */ 184 PetscCall(VecDestroy(&u)); 185 PetscCall(SNESDestroy(&snes)); 186 PetscCall(DMDestroy(&dm)); 187 PetscCall(PetscFinalize()); 188 return 0; 189 } 190 191 /*TEST 192 193 test: 194 suffix: strong 195 requires: triangle 196 args: -dm_plex_dim 2 -dm_refine 1 -benchmark_it 0 -dmsnes_check -potential_petscspace_degree 2 -dm_ds_jet_degree 2 -strong 197 198 test: 199 suffix: bench 200 nsize: 4 201 args: -dm_plex_dim 3 -dm_plex_simplex 0 -dm_plex_box_faces 2,2,1 -dm_refine 2 -dm_view -ksp_monitor \ 202 -benchmark_it 1 -dm_plex_box_upper 2,2,1 -dm_plex_box_lower 0,0,0 -dm_plex_dim 3 -ksp_converged_reason \ 203 -ksp_norm_type unpreconditioned -ksp_rtol 1.e-6 -ksp_type cg -mg_levels_ksp_chebyshev_esteig 0,0.2,0,1.1 \ 204 -mg_levels_ksp_max_it 1 -mg_levels_ksp_type chebyshev -mg_levels_pc_type jacobi -pc_gamg_coarse_eq_limit 200 \ 205 -pc_gamg_coarse_grid_layout_type compact -pc_gamg_esteig_ksp_max_it 5 -pc_gamg_process_eq_limit 200 \ 206 -pc_gamg_repartition false -pc_gamg_reuse_interpolation true -pc_gamg_aggressive_coarsening 0 -pc_gamg_threshold 0.001 -pc_gamg_threshold_scale .5 \ 207 -pc_gamg_type agg -pc_type gamg -petscpartitioner_simple_node_grid 1,2,1 -petscpartitioner_simple_process_grid 2,1,1 \ 208 -petscpartitioner_type simple -potential_petscspace_degree 2 -snes_lag_jacobian -2 -snes_max_it 1 -snes_rtol 1.e-8 -snes_type ksponly -use_gpu_aware_mpi true 209 210 testset: 211 nsize: 4 212 output_file: output/ex13_comparison.out 213 args: -dm_plex_dim 2 -benchmark_it 10 -dm_plex_box_faces 4,4 -dm_refine 3 -petscpartitioner_simple_process_grid 2,2 \ 214 -petscpartitioner_simple_node_grid 1,1 -potential_petscspace_degree 2 -petscpartitioner_type simple \ 215 -dm_plex_simplex 0 -snes_type ksponly -dm_view -ksp_type cg -pc_type gamg -pc_gamg_process_eq_limit 400 \ 216 -ksp_norm_type unpreconditioned -ksp_converged_reason 217 test: 218 suffix: comparison 219 test: 220 suffix: cuda 221 requires: cuda 222 args: -dm_mat_type aijcusparse -dm_vec_type cuda 223 test: 224 suffix: kokkos 225 requires: sycl kokkos_kernels 226 args: -dm_mat_type aijkokkos -dm_vec_type kokkos 227 test: 228 suffix: aijmkl_comp 229 requires: mkl_sparse 230 args: -dm_mat_type aijmkl 231 232 test: 233 suffix: aijmkl_seq 234 nsize: 1 235 requires: mkl_sparse 236 TODO: broken (INDEFINITE PC) 237 args: -dm_plex_dim 3 -dm_plex_box_faces 4,4,4 -dm_refine 1 -petscpartitioner_type simple -potential_petscspace_degree 1 -dm_plex_simplex 0 \ 238 -snes_type ksponly -dm_view -pc_type gamg -pc_gamg_threshold -1 -pc_gamg_square_graph 10 -pc_gamg_process_eq_limit 400 \ 239 -pc_gamg_reuse_interpolation -pc_gamg_coarse_eq_limit 10 -pc_gamg_esteig_ksp_type cg -ksp_type cg -ksp_norm_type unpreconditioned \ 240 -ksp_converged_reason -snes_rtol 1.e-4 -dm_mat_type aijmkl -dm_vec_type standard 241 242 testset: 243 requires: cuda amgx 244 filter: grep -v Built | grep -v "AMGX version" | grep -v "CUDA Runtime" 245 output_file: output/ex13_amgx.out 246 args: -dm_plex_dim 2 -dm_plex_box_faces 2,2 -dm_refine 2 -petscpartitioner_type simple -potential_petscspace_degree 2 -dm_plex_simplex 0 -ksp_monitor \ 247 -snes_type ksponly -dm_view -ksp_type cg -ksp_norm_type unpreconditioned -ksp_converged_reason -snes_rtol 1.e-4 -pc_type amgx -benchmark_it 1 -pc_amgx_verbose false 248 nsize: 4 249 test: 250 suffix: amgx 251 args: -dm_mat_type aijcusparse -dm_vec_type cuda 252 test: 253 suffix: amgx_cpu 254 args: -dm_mat_type aij 255 256 TEST*/ 257