1 static char help[] = "Benchmark Poisson Problem in 2d and 3d with finite elements.\n\ 2 We solve the Poisson problem in a rectangular domain\n\ 3 using a parallel unstructured mesh (DMPLEX) to discretize it.\n\n\n"; 4 5 #include <petscdmplex.h> 6 #include <petscsnes.h> 7 #include <petscds.h> 8 #include <petscconvest.h> 9 10 typedef struct { 11 PetscInt nit; /* Number of benchmark iterations */ 12 PetscBool strong; /* Do not integrate the Laplacian by parts */ 13 } AppCtx; 14 15 static PetscErrorCode trig_u(PetscInt dim, PetscReal time, const PetscReal x[], PetscInt Nc, PetscScalar *u, void *ctx) 16 { 17 PetscInt d; 18 *u = 0.0; 19 for (d = 0; d < dim; ++d) *u += PetscSinReal(2.0*PETSC_PI*x[d]); 20 return 0; 21 } 22 23 static void f0_trig_u(PetscInt dim, PetscInt Nf, PetscInt NfAux, 24 const PetscInt uOff[], const PetscInt uOff_x[], const PetscScalar u[], const PetscScalar u_t[], const PetscScalar u_x[], 25 const PetscInt aOff[], const PetscInt aOff_x[], const PetscScalar a[], const PetscScalar a_t[], const PetscScalar a_x[], 26 PetscReal t, const PetscReal x[], PetscInt numConstants, const PetscScalar constants[], PetscScalar f0[]) 27 { 28 PetscInt d; 29 for (d = 0; d < dim; ++d) f0[0] += -4.0*PetscSqr(PETSC_PI)*PetscSinReal(2.0*PETSC_PI*x[d]); 30 } 31 32 static void f1_u(PetscInt dim, PetscInt Nf, PetscInt NfAux, 33 const PetscInt uOff[], const PetscInt uOff_x[], const PetscScalar u[], const PetscScalar u_t[], const PetscScalar u_x[], 34 const PetscInt aOff[], const PetscInt aOff_x[], const PetscScalar a[], const PetscScalar a_t[], const PetscScalar a_x[], 35 PetscReal t, const PetscReal x[], PetscInt numConstants, const PetscScalar constants[], PetscScalar f1[]) 36 { 37 PetscInt d; 38 for (d = 0; d < dim; ++d) f1[d] = u_x[d]; 39 } 40 41 static void g3_uu(PetscInt dim, PetscInt Nf, PetscInt NfAux, 42 const PetscInt uOff[], const PetscInt uOff_x[], const PetscScalar u[], const PetscScalar u_t[], const PetscScalar u_x[], 43 const PetscInt aOff[], const PetscInt aOff_x[], const PetscScalar a[], const PetscScalar a_t[], const PetscScalar a_x[], 44 PetscReal t, PetscReal u_tShift, const PetscReal x[], PetscInt numConstants, const PetscScalar constants[], PetscScalar g3[]) 45 { 46 PetscInt d; 47 for (d = 0; d < dim; ++d) g3[d*dim+d] = 1.0; 48 } 49 50 static PetscErrorCode quadratic_u(PetscInt dim, PetscReal time, const PetscReal x[], PetscInt Nc, PetscScalar *u, void *ctx) 51 { 52 *u = PetscSqr(x[0]) + PetscSqr(x[1]); 53 return 0; 54 } 55 56 static void f0_strong_u(PetscInt dim, PetscInt Nf, PetscInt NfAux, 57 const PetscInt uOff[], const PetscInt uOff_x[], const PetscScalar u[], const PetscScalar u_t[], const PetscScalar u_x[], 58 const PetscInt aOff[], const PetscInt aOff_x[], const PetscScalar a[], const PetscScalar a_t[], const PetscScalar a_x[], 59 PetscReal t, const PetscReal x[], PetscInt numConstants, const PetscScalar constants[], PetscScalar f0[]) 60 { 61 PetscInt d; 62 for (d = 0; d < dim; ++d) f0[0] -= u_x[dim + d*dim+d]; 63 f0[0] += 4.0; 64 } 65 66 static PetscErrorCode ProcessOptions(MPI_Comm comm, AppCtx *options) 67 { 68 PetscErrorCode ierr; 69 70 PetscFunctionBeginUser; 71 options->nit = 10; 72 options->strong = PETSC_FALSE; 73 ierr = PetscOptionsBegin(comm, "", "Poisson Problem Options", "DMPLEX");CHKERRQ(ierr); 74 ierr = PetscOptionsInt("-benchmark_it", "Solve the benchmark problem this many times", "ex13.c", options->nit, &options->nit, NULL);CHKERRQ(ierr); 75 ierr = PetscOptionsBool("-strong", "Do not integrate the Laplacian by parts", "ex13.c", options->strong, &options->strong, NULL);CHKERRQ(ierr); 76 ierr = PetscOptionsEnd(); 77 PetscFunctionReturn(0); 78 } 79 80 static PetscErrorCode CreateMesh(MPI_Comm comm, AppCtx *user, DM *dm) 81 { 82 PetscErrorCode ierr; 83 84 PetscFunctionBeginUser; 85 /* Create box mesh */ 86 ierr = DMPlexCreateBoxMesh(comm, 2, PETSC_TRUE, NULL, NULL, NULL, NULL, PETSC_TRUE, dm);CHKERRQ(ierr); 87 /* TODO: This should be pulled into the library */ 88 { 89 char convType[256]; 90 PetscBool flg; 91 92 ierr = PetscOptionsBegin(comm, "", "Mesh conversion options", "DMPLEX");CHKERRQ(ierr); 93 ierr = PetscOptionsFList("-dm_plex_convert_type","Convert DMPlex to another format","ex12",DMList,DMPLEX,convType,256,&flg);CHKERRQ(ierr); 94 ierr = PetscOptionsEnd(); 95 if (flg) { 96 DM dmConv; 97 98 ierr = DMConvert(*dm,convType,&dmConv);CHKERRQ(ierr); 99 if (dmConv) { 100 ierr = DMDestroy(dm);CHKERRQ(ierr); 101 *dm = dmConv; 102 } 103 } 104 } 105 ierr = DMLocalizeCoordinates(*dm);CHKERRQ(ierr); 106 107 ierr = PetscObjectSetName((PetscObject) *dm, "Mesh");CHKERRQ(ierr); 108 ierr = DMSetApplicationContext(*dm, user);CHKERRQ(ierr); 109 ierr = DMSetFromOptions(*dm);CHKERRQ(ierr); 110 ierr = DMViewFromOptions(*dm, NULL, "-dm_view");CHKERRQ(ierr); 111 PetscFunctionReturn(0); 112 } 113 114 static PetscErrorCode SetupPrimalProblem(DM dm, AppCtx *user) 115 { 116 PetscDS ds; 117 DMLabel label; 118 const PetscInt id = 1; 119 PetscErrorCode ierr; 120 121 PetscFunctionBeginUser; 122 ierr = DMGetDS(dm, &ds);CHKERRQ(ierr); 123 ierr = DMGetLabel(dm, "marker", &label);CHKERRQ(ierr); 124 if (user->strong) { 125 ierr = PetscDSSetResidual(ds, 0, f0_strong_u, NULL);CHKERRQ(ierr); 126 ierr = PetscDSSetExactSolution(ds, 0, quadratic_u, user);CHKERRQ(ierr); 127 ierr = DMAddBoundary(dm, DM_BC_ESSENTIAL, "wall", label, 1, &id, 0, 0, NULL, (void (*)(void)) quadratic_u, NULL, user, NULL);CHKERRQ(ierr); 128 } else { 129 ierr = PetscDSSetResidual(ds, 0, f0_trig_u, f1_u);CHKERRQ(ierr); 130 ierr = PetscDSSetJacobian(ds, 0, 0, NULL, NULL, NULL, g3_uu);CHKERRQ(ierr); 131 ierr = PetscDSSetExactSolution(ds, 0, trig_u, user);CHKERRQ(ierr); 132 ierr = DMAddBoundary(dm, DM_BC_ESSENTIAL, "wall", label, 1, &id, 0, 0, NULL, (void (*)(void)) trig_u, NULL, user, NULL);CHKERRQ(ierr); 133 } 134 PetscFunctionReturn(0); 135 } 136 137 static PetscErrorCode SetupDiscretization(DM dm, const char name[], PetscErrorCode (*setup)(DM, AppCtx *), AppCtx *user) 138 { 139 DM cdm = dm; 140 PetscFE fe; 141 DMPolytopeType ct; 142 PetscBool simplex; 143 PetscInt dim, cStart; 144 char prefix[PETSC_MAX_PATH_LEN]; 145 PetscErrorCode ierr; 146 147 PetscFunctionBeginUser; 148 ierr = DMGetDimension(dm, &dim);CHKERRQ(ierr); 149 ierr = DMPlexGetHeightStratum(dm, 0, &cStart, NULL);CHKERRQ(ierr); 150 ierr = DMPlexGetCellType(dm, cStart, &ct);CHKERRQ(ierr); 151 simplex = DMPolytopeTypeGetNumVertices(ct) == DMPolytopeTypeGetDim(ct)+1 ? PETSC_TRUE : PETSC_FALSE; 152 /* Create finite element */ 153 ierr = PetscSNPrintf(prefix, PETSC_MAX_PATH_LEN, "%s_", name);CHKERRQ(ierr); 154 ierr = PetscFECreateDefault(PETSC_COMM_SELF, dim, 1, simplex, name ? prefix : NULL, -1, &fe);CHKERRQ(ierr); 155 ierr = PetscObjectSetName((PetscObject) fe, name);CHKERRQ(ierr); 156 /* Set discretization and boundary conditions for each mesh */ 157 ierr = DMSetField(dm, 0, NULL, (PetscObject) fe);CHKERRQ(ierr); 158 ierr = DMCreateDS(dm);CHKERRQ(ierr); 159 ierr = (*setup)(dm, user);CHKERRQ(ierr); 160 while (cdm) { 161 ierr = DMCopyDisc(dm,cdm);CHKERRQ(ierr); 162 /* TODO: Check whether the boundary of coarse meshes is marked */ 163 ierr = DMGetCoarseDM(cdm, &cdm);CHKERRQ(ierr); 164 } 165 ierr = PetscFEDestroy(&fe);CHKERRQ(ierr); 166 PetscFunctionReturn(0); 167 } 168 169 int main(int argc, char **argv) 170 { 171 DM dm; /* Problem specification */ 172 SNES snes; /* Nonlinear solver */ 173 Vec u; /* Solutions */ 174 AppCtx user; /* User-defined work context */ 175 PetscErrorCode ierr; 176 177 ierr = PetscInitialize(&argc, &argv, NULL,help);if (ierr) return ierr; 178 ierr = ProcessOptions(PETSC_COMM_WORLD, &user);CHKERRQ(ierr); 179 /* Primal system */ 180 ierr = SNESCreate(PETSC_COMM_WORLD, &snes);CHKERRQ(ierr); 181 ierr = CreateMesh(PETSC_COMM_WORLD, &user, &dm);CHKERRQ(ierr); 182 ierr = SNESSetDM(snes, dm);CHKERRQ(ierr); 183 ierr = SetupDiscretization(dm, "potential", SetupPrimalProblem, &user);CHKERRQ(ierr); 184 ierr = DMCreateGlobalVector(dm, &u);CHKERRQ(ierr); 185 ierr = VecSet(u, 0.0);CHKERRQ(ierr); 186 ierr = PetscObjectSetName((PetscObject) u, "potential");CHKERRQ(ierr); 187 ierr = DMPlexSetSNESLocalFEM(dm, &user, &user, &user);CHKERRQ(ierr); 188 ierr = SNESSetFromOptions(snes);CHKERRQ(ierr); 189 ierr = DMSNESCheckFromOptions(snes, u);CHKERRQ(ierr); 190 ierr = SNESSolve(snes, NULL, u);CHKERRQ(ierr); 191 /* Benchmark system */ 192 if (user.nit) { 193 #if defined(PETSC_USE_LOG) 194 PetscLogStage kspstage,pcstage; 195 #endif 196 KSP ksp; 197 PC pc; 198 Mat A,P; 199 Vec b; 200 PetscInt i; 201 ierr = PetscOptionsClearValue(NULL,"-ksp_monitor");CHKERRQ(ierr); 202 ierr = SNESGetKSP(snes, &ksp);CHKERRQ(ierr); 203 ierr = SNESGetSolution(snes, &u);CHKERRQ(ierr); 204 ierr = SNESGetJacobian(snes, &A, &P, NULL, NULL);CHKERRQ(ierr); 205 ierr = VecSet(u, 0.0);CHKERRQ(ierr); 206 ierr = SNESGetFunction(snes, &b, NULL, NULL);CHKERRQ(ierr); 207 ierr = SNESComputeFunction(snes, u, b);CHKERRQ(ierr); 208 ierr = SNESComputeJacobian(snes, u, A, P);CHKERRQ(ierr); 209 ierr = KSPGetPC(ksp, &pc);CHKERRQ(ierr); 210 ierr = PetscLogStageRegister("PCSetUp", &pcstage);CHKERRQ(ierr); 211 ierr = PetscLogStagePush(pcstage);CHKERRQ(ierr); 212 ierr = PCSetUp(pc);CHKERRQ(ierr); 213 ierr = PetscLogStagePop();CHKERRQ(ierr); 214 ierr = PetscLogStageRegister("KSP Solve only", &kspstage);CHKERRQ(ierr); 215 ierr = PetscLogStagePush(kspstage);CHKERRQ(ierr); 216 for (i=0;i<user.nit;i++) { 217 ierr = VecZeroEntries(u);CHKERRQ(ierr); 218 ierr = KSPSolve(ksp, b, u);CHKERRQ(ierr); 219 } 220 ierr = PetscLogStagePop();CHKERRQ(ierr); 221 } 222 ierr = SNESGetSolution(snes, &u);CHKERRQ(ierr); 223 ierr = VecViewFromOptions(u, NULL, "-potential_view");CHKERRQ(ierr); 224 /* Cleanup */ 225 ierr = VecDestroy(&u);CHKERRQ(ierr); 226 ierr = SNESDestroy(&snes);CHKERRQ(ierr); 227 ierr = DMDestroy(&dm);CHKERRQ(ierr); 228 ierr = PetscFinalize(); 229 return ierr; 230 } 231 232 /*TEST 233 234 test: 235 suffix: strong 236 requires: triangle 237 args: -dm_plex_box_dim 2 -dm_refine 1 -benchmark_it 0 -dmsnes_check \ 238 -potential_petscspace_degree 2 -dm_ds_jet_degree 2 -strong 239 240 test: 241 suffix: bench 242 nsize: 4 243 args: -dm_plex_box_dim 3 -dm_plex_box_simplex 0 -dm_plex_box_faces 2,2,8 -dm_refine 1 -dm_distribute \ 244 -petscpartitioner_type simple -petscpartitioner_simple_process_grid 1,1,2 -petscpartitioner_simple_node_grid 1,1,2 \ 245 -potential_petscspace_degree 2 -ksp_type cg -pc_type gamg -benchmark_it 1 -dm_view -snes_rtol 1.e-4 246 247 test: 248 suffix: comparison 249 nsize: 4 250 args: -dm_plex_box_dim 2 -dm_plex_box_faces 4,4 -dm_refine 3 -petscpartitioner_simple_process_grid 2,2 \ 251 -petscpartitioner_simple_node_grid 1,1 -potential_petscspace_degree 2 -dm_distribute -petscpartitioner_type simple \ 252 -dm_plex_box_simplex 0 -snes_monitor_short -snes_type ksponly -dm_view -pc_type gamg -pc_gamg_process_eq_limit 400 -ksp_norm_type unpreconditioned \ 253 -pc_gamg_coarse_eq_limit 10 -snes_converged_reason -ksp_converged_reason -snes_rtol 1.e-4 254 255 test: 256 suffix: cuda 257 nsize: 4 258 requires: cuda 259 output_file: output/ex13_comparison.out 260 args: -dm_plex_box_dim 2 -dm_plex_box_faces 4,4 -dm_refine 3 -petscpartitioner_simple_process_grid 2,2 \ 261 -petscpartitioner_simple_node_grid 1,1 -potential_petscspace_degree 2 -dm_distribute -petscpartitioner_type simple \ 262 -dm_plex_box_simplex 0 -snes_monitor_short -snes_type ksponly -dm_view -pc_type gamg -pc_gamg_process_eq_limit 400 -ksp_norm_type unpreconditioned \ 263 -pc_gamg_coarse_eq_limit 10 -snes_converged_reason -ksp_converged_reason -snes_rtol 1.e-4 -dm_mat_type aijcusparse -dm_vec_type cuda 264 265 test: 266 suffix: kokkos_comp 267 nsize: 4 268 requires: kokkos_kernels 269 output_file: output/ex13_comparison.out 270 args: -dm_plex_box_dim 2 -dm_plex_box_faces 4,4 -dm_refine 3 -petscpartitioner_simple_process_grid 2,2 \ 271 -petscpartitioner_simple_node_grid 1,1 -potential_petscspace_degree 2 -dm_distribute -petscpartitioner_type simple \ 272 -dm_plex_box_simplex 0 -snes_monitor_short -snes_type ksponly -dm_view -pc_type gamg -pc_gamg_process_eq_limit 400 -ksp_norm_type unpreconditioned \ 273 -pc_gamg_coarse_eq_limit 10 -snes_converged_reason -ksp_converged_reason -snes_rtol 1.e-4 -dm_mat_type aijkokkos -dm_vec_type kokkos 274 275 test: 276 nsize: 4 277 requires: kokkos_kernels 278 suffix: kokkos 279 args: -dm_plex_box_dim 2 -dm_plex_box_faces 2,8 -dm_distribute -petscpartitioner_type simple -petscpartitioner_simple_process_grid 2,1 \ 280 -petscpartitioner_simple_node_grid 2,1 -dm_plex_box_simplex 0 -potential_petscspace_degree 1 -dm_refine 1 -ksp_type cg -pc_type gamg -ksp_norm_type unpreconditioned \ 281 -mg_levels_esteig_ksp_type cg -mg_levels_pc_type jacobi -ksp_converged_reason -snes_monitor_short -snes_rtol 1.e-4 -dm_view -dm_mat_type aijkokkos -dm_vec_type kokkos 282 283 test: 284 suffix: aijmkl_comp 285 nsize: 4 286 requires: mkl 287 output_file: output/ex13_comparison.out 288 args: -dm_plex_box_dim 2 -dm_plex_box_faces 4,4 -dm_refine 3 -petscpartitioner_simple_process_grid 2,2 \ 289 -petscpartitioner_simple_node_grid 1,1 -potential_petscspace_degree 2 -dm_distribute -petscpartitioner_type simple \ 290 -dm_plex_box_simplex 0 -snes_monitor_short -snes_type ksponly -dm_view -pc_type gamg -pc_gamg_process_eq_limit 400 -ksp_norm_type unpreconditioned \ 291 -pc_gamg_coarse_eq_limit 10 -snes_converged_reason -ksp_converged_reason -snes_rtol 1.e-4 -dm_mat_type aijmkl 292 293 test: 294 suffix: aijmkl_seq 295 nsize: 1 296 requires: mkl 297 TODO: broken (INDEFINITE PC) 298 args: -dm_plex_box_dim 3 -dm_plex_box_faces 4,4,4 -dm_refine 1 -petscpartitioner_type simple -potential_petscspace_degree 1 -dm_distribute -dm_plex_box_simplex 0 -snes_monitor_short -snes_type ksponly -dm_view -pc_type gamg -pc_gamg_sym_graph 0 -pc_gamg_threshold -1 -pc_gamg_square_graph 10 -pc_gamg_process_eq_limit 400 -pc_gamg_reuse_interpolation -pc_gamg_coarse_eq_limit 10 -mg_levels_esteig_ksp_type cg -mg_levels_pc_type jacobi -ksp_type cg -ksp_norm_type unpreconditioned -snes_converged_reason -ksp_converged_reason -snes_rtol 1.e-4 -dm_mat_type aijmkl -dm_vec_type standard 299 300 TEST*/ 301