1 static char help[] = "Benchmark Poisson Problem in 2d and 3d with finite elements.\n\ 2 We solve the Poisson problem in a rectangular domain\n\ 3 using a parallel unstructured mesh (DMPLEX) to discretize it.\n\n\n"; 4 5 #include <petscdmplex.h> 6 #include <petscsnes.h> 7 #include <petscds.h> 8 #include <petscconvest.h> 9 10 typedef struct { 11 PetscInt nit; /* Number of benchmark iterations */ 12 PetscBool strong; /* Do not integrate the Laplacian by parts */ 13 } AppCtx; 14 15 static PetscErrorCode trig_u(PetscInt dim, PetscReal time, const PetscReal x[], PetscInt Nc, PetscScalar *u, void *ctx) 16 { 17 PetscInt d; 18 *u = 0.0; 19 for (d = 0; d < dim; ++d) *u += PetscSinReal(2.0*PETSC_PI*x[d]); 20 return 0; 21 } 22 23 static void f0_trig_u(PetscInt dim, PetscInt Nf, PetscInt NfAux, 24 const PetscInt uOff[], const PetscInt uOff_x[], const PetscScalar u[], const PetscScalar u_t[], const PetscScalar u_x[], 25 const PetscInt aOff[], const PetscInt aOff_x[], const PetscScalar a[], const PetscScalar a_t[], const PetscScalar a_x[], 26 PetscReal t, const PetscReal x[], PetscInt numConstants, const PetscScalar constants[], PetscScalar f0[]) 27 { 28 PetscInt d; 29 for (d = 0; d < dim; ++d) f0[0] += -4.0*PetscSqr(PETSC_PI)*PetscSinReal(2.0*PETSC_PI*x[d]); 30 } 31 32 static void f1_u(PetscInt dim, PetscInt Nf, PetscInt NfAux, 33 const PetscInt uOff[], const PetscInt uOff_x[], const PetscScalar u[], const PetscScalar u_t[], const PetscScalar u_x[], 34 const PetscInt aOff[], const PetscInt aOff_x[], const PetscScalar a[], const PetscScalar a_t[], const PetscScalar a_x[], 35 PetscReal t, const PetscReal x[], PetscInt numConstants, const PetscScalar constants[], PetscScalar f1[]) 36 { 37 PetscInt d; 38 for (d = 0; d < dim; ++d) f1[d] = u_x[d]; 39 } 40 41 static void g3_uu(PetscInt dim, PetscInt Nf, PetscInt NfAux, 42 const PetscInt uOff[], const PetscInt uOff_x[], const PetscScalar u[], const PetscScalar u_t[], const PetscScalar u_x[], 43 const PetscInt aOff[], const PetscInt aOff_x[], const PetscScalar a[], const PetscScalar a_t[], const PetscScalar a_x[], 44 PetscReal t, PetscReal u_tShift, const PetscReal x[], PetscInt numConstants, const PetscScalar constants[], PetscScalar g3[]) 45 { 46 PetscInt d; 47 for (d = 0; d < dim; ++d) g3[d*dim+d] = 1.0; 48 } 49 50 static PetscErrorCode quadratic_u(PetscInt dim, PetscReal time, const PetscReal x[], PetscInt Nc, PetscScalar *u, void *ctx) 51 { 52 *u = PetscSqr(x[0]) + PetscSqr(x[1]); 53 return 0; 54 } 55 56 static void f0_strong_u(PetscInt dim, PetscInt Nf, PetscInt NfAux, 57 const PetscInt uOff[], const PetscInt uOff_x[], const PetscScalar u[], const PetscScalar u_t[], const PetscScalar u_x[], 58 const PetscInt aOff[], const PetscInt aOff_x[], const PetscScalar a[], const PetscScalar a_t[], const PetscScalar a_x[], 59 PetscReal t, const PetscReal x[], PetscInt numConstants, const PetscScalar constants[], PetscScalar f0[]) 60 { 61 PetscInt d; 62 for (d = 0; d < dim; ++d) f0[0] -= u_x[dim + d*dim+d]; 63 f0[0] += 4.0; 64 } 65 66 static PetscErrorCode ProcessOptions(MPI_Comm comm, AppCtx *options) 67 { 68 69 PetscFunctionBeginUser; 70 options->nit = 10; 71 options->strong = PETSC_FALSE; 72 PetscOptionsBegin(comm, "", "Poisson Problem Options", "DMPLEX"); 73 PetscCall(PetscOptionsInt("-benchmark_it", "Solve the benchmark problem this many times", "ex13.c", options->nit, &options->nit, NULL)); 74 PetscCall(PetscOptionsBool("-strong", "Do not integrate the Laplacian by parts", "ex13.c", options->strong, &options->strong, NULL)); 75 PetscOptionsEnd(); 76 PetscFunctionReturn(0); 77 } 78 79 static PetscErrorCode CreateMesh(MPI_Comm comm, AppCtx *user, DM *dm) 80 { 81 PetscFunctionBeginUser; 82 PetscCall(DMCreate(comm, dm)); 83 PetscCall(DMSetType(*dm, DMPLEX)); 84 PetscCall(DMSetFromOptions(*dm)); 85 PetscCall(DMSetApplicationContext(*dm, user)); 86 PetscCall(DMViewFromOptions(*dm, NULL, "-dm_view")); 87 PetscFunctionReturn(0); 88 } 89 90 static PetscErrorCode SetupPrimalProblem(DM dm, AppCtx *user) 91 { 92 PetscDS ds; 93 DMLabel label; 94 const PetscInt id = 1; 95 96 PetscFunctionBeginUser; 97 PetscCall(DMGetDS(dm, &ds)); 98 PetscCall(DMGetLabel(dm, "marker", &label)); 99 if (user->strong) { 100 PetscCall(PetscDSSetResidual(ds, 0, f0_strong_u, NULL)); 101 PetscCall(PetscDSSetExactSolution(ds, 0, quadratic_u, user)); 102 PetscCall(DMAddBoundary(dm, DM_BC_ESSENTIAL, "wall", label, 1, &id, 0, 0, NULL, (void (*)(void)) quadratic_u, NULL, user, NULL)); 103 } else { 104 PetscCall(PetscDSSetResidual(ds, 0, f0_trig_u, f1_u)); 105 PetscCall(PetscDSSetJacobian(ds, 0, 0, NULL, NULL, NULL, g3_uu)); 106 PetscCall(PetscDSSetExactSolution(ds, 0, trig_u, user)); 107 PetscCall(DMAddBoundary(dm, DM_BC_ESSENTIAL, "wall", label, 1, &id, 0, 0, NULL, (void (*)(void)) trig_u, NULL, user, NULL)); 108 } 109 PetscFunctionReturn(0); 110 } 111 112 static PetscErrorCode SetupDiscretization(DM dm, const char name[], PetscErrorCode (*setup)(DM, AppCtx *), AppCtx *user) 113 { 114 DM cdm = dm; 115 PetscFE fe; 116 DMPolytopeType ct; 117 PetscBool simplex; 118 PetscInt dim, cStart; 119 char prefix[PETSC_MAX_PATH_LEN]; 120 121 PetscFunctionBeginUser; 122 PetscCall(DMGetDimension(dm, &dim)); 123 PetscCall(DMPlexGetHeightStratum(dm, 0, &cStart, NULL)); 124 PetscCall(DMPlexGetCellType(dm, cStart, &ct)); 125 simplex = DMPolytopeTypeGetNumVertices(ct) == DMPolytopeTypeGetDim(ct)+1 ? PETSC_TRUE : PETSC_FALSE; // false 126 /* Create finite element */ 127 PetscCall(PetscSNPrintf(prefix, PETSC_MAX_PATH_LEN, "%s_", name)); 128 PetscCall(PetscFECreateDefault(PETSC_COMM_SELF, dim, 1, simplex, name ? prefix : NULL, -1, &fe)); 129 PetscCall(PetscObjectSetName((PetscObject) fe, name)); 130 /* Set discretization and boundary conditions for each mesh */ 131 PetscCall(DMSetField(dm, 0, NULL, (PetscObject) fe)); 132 PetscCall(DMCreateDS(dm)); 133 PetscCall((*setup)(dm, user)); 134 while (cdm) { 135 PetscCall(DMCopyDisc(dm,cdm)); 136 /* TODO: Check whether the boundary of coarse meshes is marked */ 137 PetscCall(DMGetCoarseDM(cdm, &cdm)); 138 } 139 PetscCall(PetscFEDestroy(&fe)); 140 PetscFunctionReturn(0); 141 } 142 143 int main(int argc, char **argv) 144 { 145 DM dm; /* Problem specification */ 146 SNES snes; /* Nonlinear solver */ 147 Vec u; /* Solutions */ 148 AppCtx user; /* User-defined work context */ 149 PetscLogDouble time; 150 Mat Amat; 151 152 PetscCall(PetscInitialize(&argc, &argv, NULL,help)); 153 PetscCall(ProcessOptions(PETSC_COMM_WORLD, &user)); 154 /* system */ 155 PetscCall(SNESCreate(PETSC_COMM_WORLD, &snes)); 156 PetscCall(CreateMesh(PETSC_COMM_WORLD, &user, &dm)); 157 PetscCall(SNESSetDM(snes, dm)); 158 PetscCall(SetupDiscretization(dm, "potential", SetupPrimalProblem, &user)); 159 PetscCall(DMCreateGlobalVector(dm, &u)); 160 PetscCall(SNESSetFromOptions(snes)); 161 PetscCall(PetscObjectSetName((PetscObject) u, "potential")); 162 PetscCall(DMPlexSetSNESLocalFEM(dm, &user, &user, &user)); 163 PetscCall(DMSNESCheckFromOptions(snes, u)); 164 PetscCall(PetscTime(&time)); 165 PetscCall(SNESSetUp(snes)); 166 PetscCall(SNESGetJacobian(snes, &Amat, NULL, NULL, NULL)); 167 PetscCall(MatSetOption(Amat,MAT_SPD,PETSC_TRUE)); 168 PetscCall(MatSetOption(Amat,MAT_SPD_ETERNAL,PETSC_TRUE)); 169 PetscCall(SNESSolve(snes, NULL, u)); 170 PetscCall(PetscTimeSubtract(&time)); 171 // PetscCall(PetscPrintf(PETSC_COMM_WORLD,"First Solve time: %g\n",-time)); 172 /* Benchmark system */ 173 if (user.nit) { 174 Vec b; 175 PetscInt i; 176 #if defined(PETSC_USE_LOG) 177 PetscLogStage kspstage; 178 #endif 179 PetscCall(PetscLogStageRegister("Solve only", &kspstage)); 180 PetscCall(PetscLogStagePush(kspstage)); 181 PetscCall(SNESGetSolution(snes, &u)); 182 PetscCall(SNESGetFunction(snes, &b, NULL, NULL)); 183 for (i=0;i<user.nit;i++) { 184 PetscCall(VecZeroEntries(u)); 185 PetscCall(SNESSolve(snes, NULL, u)); 186 } 187 PetscCall(PetscLogStagePop()); 188 } 189 PetscCall(SNESGetSolution(snes, &u)); 190 PetscCall(VecViewFromOptions(u, NULL, "-potential_view")); 191 /* Cleanup */ 192 PetscCall(VecDestroy(&u)); 193 PetscCall(SNESDestroy(&snes)); 194 PetscCall(DMDestroy(&dm)); 195 PetscCall(PetscFinalize()); 196 return 0; 197 } 198 199 /*TEST 200 201 test: 202 suffix: strong 203 requires: triangle 204 args: -dm_plex_dim 2 -dm_refine 1 -benchmark_it 0 -dmsnes_check -potential_petscspace_degree 2 -dm_ds_jet_degree 2 -strong 205 206 test: 207 suffix: bench 208 nsize: 4 209 args: -dm_plex_dim 3 -dm_plex_simplex 0 -dm_plex_box_faces 2,2,1 -dm_refine 2 -dm_view -ksp_monitor \ 210 -benchmark_it 1 -dm_plex_box_upper 2,2,1 -dm_plex_box_lower 0,0,0 -dm_plex_dim 3 -ksp_converged_reason \ 211 -ksp_norm_type unpreconditioned -ksp_rtol 1.e-6 -ksp_type cg -mg_levels_ksp_chebyshev_esteig 0,0.2,0,1.1 \ 212 -mg_levels_ksp_max_it 1 -mg_levels_ksp_type chebyshev -mg_levels_pc_type jacobi -pc_gamg_coarse_eq_limit 200 \ 213 -pc_gamg_coarse_grid_layout_type compact -pc_gamg_esteig_ksp_max_it 5 -pc_gamg_process_eq_limit 200 \ 214 -pc_gamg_repartition false -pc_gamg_reuse_interpolation true -pc_gamg_aggressive_coarsening 0 -pc_gamg_threshold 0.001 -pc_gamg_threshold_scale .5 \ 215 -pc_gamg_type agg -pc_type gamg -petscpartitioner_simple_node_grid 1,2,1 -petscpartitioner_simple_process_grid 2,1,1 \ 216 -petscpartitioner_type simple -potential_petscspace_degree 2 -snes_lag_jacobian -2 -snes_max_it 1 -snes_rtol 1.e-8 -snes_type ksponly -use_gpu_aware_mpi true 217 218 testset: 219 nsize: 4 220 output_file: output/ex13_comparison.out 221 args: -dm_plex_dim 2 -benchmark_it 10 -dm_plex_box_faces 4,4 -dm_refine 3 -petscpartitioner_simple_process_grid 2,2 \ 222 -petscpartitioner_simple_node_grid 1,1 -potential_petscspace_degree 2 -petscpartitioner_type simple \ 223 -dm_plex_simplex 0 -snes_type ksponly -dm_view -ksp_type cg -pc_type gamg -pc_gamg_process_eq_limit 400 \ 224 -ksp_norm_type unpreconditioned -ksp_converged_reason 225 test: 226 suffix: comparison 227 test: 228 suffix: cuda 229 requires: cuda 230 args: -dm_mat_type aijcusparse -dm_vec_type cuda 231 test: 232 suffix: kokkos 233 requires: !sycl kokkos_kernels 234 args: -dm_mat_type aijkokkos -dm_vec_type kokkos 235 test: 236 suffix: aijmkl_comp 237 requires: mkl_sparse 238 args: -dm_mat_type aijmkl 239 240 test: 241 suffix: aijmkl_seq 242 nsize: 1 243 requires: mkl_sparse 244 TODO: broken (INDEFINITE PC) 245 args: -dm_plex_dim 3 -dm_plex_box_faces 4,4,4 -dm_refine 1 -petscpartitioner_type simple -potential_petscspace_degree 1 -dm_plex_simplex 0 \ 246 -snes_type ksponly -dm_view -pc_type gamg -pc_gamg_threshold -1 -pc_gamg_square_graph 10 -pc_gamg_process_eq_limit 400 \ 247 -pc_gamg_reuse_interpolation -pc_gamg_coarse_eq_limit 10 -pc_gamg_esteig_ksp_type cg -ksp_type cg -ksp_norm_type unpreconditioned \ 248 -ksp_converged_reason -snes_rtol 1.e-4 -dm_mat_type aijmkl -dm_vec_type standard 249 250 TEST*/ 251