1 static char help[] = "Benchmark Poisson Problem in 2d and 3d with finite elements.\n\ 2 We solve the Poisson problem in a rectangular domain\n\ 3 using a parallel unstructured mesh (DMPLEX) to discretize it.\n\n\n"; 4 5 #include <petscdmplex.h> 6 #include <petscsnes.h> 7 #include <petscds.h> 8 #include <petscconvest.h> 9 #if defined(PETSC_HAVE_AMGX) 10 #include <amgx_c.h> 11 #endif 12 13 typedef struct { 14 PetscInt nit; /* Number of benchmark iterations */ 15 PetscBool strong; /* Do not integrate the Laplacian by parts */ 16 } AppCtx; 17 18 static PetscErrorCode trig_u(PetscInt dim, PetscReal time, const PetscReal x[], PetscInt Nc, PetscScalar *u, void *ctx) 19 { 20 PetscInt d; 21 *u = 0.0; 22 for (d = 0; d < dim; ++d) *u += PetscSinReal(2.0 * PETSC_PI * x[d]); 23 return PETSC_SUCCESS; 24 } 25 26 static void f0_trig_u(PetscInt dim, PetscInt Nf, PetscInt NfAux, const PetscInt uOff[], const PetscInt uOff_x[], const PetscScalar u[], const PetscScalar u_t[], const PetscScalar u_x[], const PetscInt aOff[], const PetscInt aOff_x[], const PetscScalar a[], const PetscScalar a_t[], const PetscScalar a_x[], PetscReal t, const PetscReal x[], PetscInt numConstants, const PetscScalar constants[], PetscScalar f0[]) 27 { 28 PetscInt d; 29 for (d = 0; d < dim; ++d) f0[0] += -4.0 * PetscSqr(PETSC_PI) * PetscSinReal(2.0 * PETSC_PI * x[d]); 30 } 31 32 static void f1_u(PetscInt dim, PetscInt Nf, PetscInt NfAux, const PetscInt uOff[], const PetscInt uOff_x[], const PetscScalar u[], const PetscScalar u_t[], const PetscScalar u_x[], const PetscInt aOff[], const PetscInt aOff_x[], const PetscScalar a[], const PetscScalar a_t[], const PetscScalar a_x[], PetscReal t, const PetscReal x[], PetscInt numConstants, const PetscScalar constants[], PetscScalar f1[]) 33 { 34 PetscInt d; 35 for (d = 0; d < dim; ++d) f1[d] = u_x[d]; 36 } 37 38 static void g3_uu(PetscInt dim, PetscInt Nf, PetscInt NfAux, const PetscInt uOff[], const PetscInt uOff_x[], const PetscScalar u[], const PetscScalar u_t[], const PetscScalar u_x[], const PetscInt aOff[], const PetscInt aOff_x[], const PetscScalar a[], const PetscScalar a_t[], const PetscScalar a_x[], PetscReal t, PetscReal u_tShift, const PetscReal x[], PetscInt numConstants, const PetscScalar constants[], PetscScalar g3[]) 39 { 40 PetscInt d; 41 for (d = 0; d < dim; ++d) g3[d * dim + d] = 1.0; 42 } 43 44 static PetscErrorCode quadratic_u(PetscInt dim, PetscReal time, const PetscReal x[], PetscInt Nc, PetscScalar *u, void *ctx) 45 { 46 *u = PetscSqr(x[0]) + PetscSqr(x[1]); 47 return PETSC_SUCCESS; 48 } 49 50 static void f0_strong_u(PetscInt dim, PetscInt Nf, PetscInt NfAux, const PetscInt uOff[], const PetscInt uOff_x[], const PetscScalar u[], const PetscScalar u_t[], const PetscScalar u_x[], const PetscInt aOff[], const PetscInt aOff_x[], const PetscScalar a[], const PetscScalar a_t[], const PetscScalar a_x[], PetscReal t, const PetscReal x[], PetscInt numConstants, const PetscScalar constants[], PetscScalar f0[]) 51 { 52 PetscInt d; 53 for (d = 0; d < dim; ++d) f0[0] -= u_x[dim + d * dim + d]; 54 f0[0] += 4.0; 55 } 56 57 static PetscErrorCode ProcessOptions(MPI_Comm comm, AppCtx *options) 58 { 59 PetscFunctionBeginUser; 60 options->nit = 10; 61 options->strong = PETSC_FALSE; 62 PetscOptionsBegin(comm, "", "Poisson Problem Options", "DMPLEX"); 63 PetscCall(PetscOptionsInt("-benchmark_it", "Solve the benchmark problem this many times", "ex13.c", options->nit, &options->nit, NULL)); 64 PetscCall(PetscOptionsBool("-strong", "Do not integrate the Laplacian by parts", "ex13.c", options->strong, &options->strong, NULL)); 65 PetscOptionsEnd(); 66 PetscFunctionReturn(PETSC_SUCCESS); 67 } 68 69 static PetscErrorCode CreateMesh(MPI_Comm comm, AppCtx *user, DM *dm) 70 { 71 PetscFunctionBeginUser; 72 PetscCall(DMCreate(comm, dm)); 73 PetscCall(DMSetType(*dm, DMPLEX)); 74 PetscCall(DMSetFromOptions(*dm)); 75 PetscCall(DMSetApplicationContext(*dm, user)); 76 PetscCall(DMViewFromOptions(*dm, NULL, "-dm_view")); 77 { // perturb to get general coordinates 78 Vec coordinates; 79 PetscScalar *coords; 80 PetscInt nloc, v; 81 PetscRandom rnd; 82 PetscReal del; 83 PetscCall(PetscRandomCreate(PETSC_COMM_SELF, &rnd)); 84 PetscCall(PetscRandomSetInterval(rnd, -PETSC_SQRT_MACHINE_EPSILON, PETSC_SQRT_MACHINE_EPSILON)); 85 PetscCall(PetscRandomSetFromOptions(rnd)); 86 PetscCall(DMGetCoordinatesLocal(*dm, &coordinates)); 87 PetscCall(VecGetArray(coordinates, &coords)); 88 PetscCall(VecGetLocalSize(coordinates, &nloc)); 89 for (v = 0; v < nloc; ++v) { 90 PetscCall(PetscRandomGetValueReal(rnd, &del)); 91 coords[v] += del * coords[v]; 92 } 93 PetscCall(VecRestoreArray(coordinates, &coords)); 94 PetscCall(PetscRandomDestroy(&rnd)); 95 } 96 PetscFunctionReturn(PETSC_SUCCESS); 97 } 98 99 static PetscErrorCode SetupPrimalProblem(DM dm, AppCtx *user) 100 { 101 PetscDS ds; 102 DMLabel label; 103 const PetscInt id = 1; 104 105 PetscFunctionBeginUser; 106 PetscCall(DMGetDS(dm, &ds)); 107 PetscCall(DMGetLabel(dm, "marker", &label)); 108 if (user->strong) { 109 PetscCall(PetscDSSetResidual(ds, 0, f0_strong_u, NULL)); 110 PetscCall(PetscDSSetExactSolution(ds, 0, quadratic_u, user)); 111 PetscCall(DMAddBoundary(dm, DM_BC_ESSENTIAL, "wall", label, 1, &id, 0, 0, NULL, (PetscFortranCallbackFn *)quadratic_u, NULL, user, NULL)); 112 } else { 113 PetscCall(PetscDSSetResidual(ds, 0, f0_trig_u, f1_u)); 114 PetscCall(PetscDSSetJacobian(ds, 0, 0, NULL, NULL, NULL, g3_uu)); 115 PetscCall(PetscDSSetExactSolution(ds, 0, trig_u, user)); 116 PetscCall(DMAddBoundary(dm, DM_BC_ESSENTIAL, "wall", label, 1, &id, 0, 0, NULL, (PetscVoidFn *)trig_u, NULL, user, NULL)); 117 } 118 PetscFunctionReturn(PETSC_SUCCESS); 119 } 120 121 static PetscErrorCode SetupDiscretization(DM dm, const char name[], PetscErrorCode (*setup)(DM, AppCtx *), AppCtx *user) 122 { 123 DM cdm = dm; 124 PetscFE fe; 125 DMPolytopeType ct; 126 PetscBool simplex; 127 PetscInt dim, cStart; 128 char prefix[PETSC_MAX_PATH_LEN]; 129 130 PetscFunctionBeginUser; 131 PetscCall(DMGetDimension(dm, &dim)); 132 PetscCall(DMPlexGetHeightStratum(dm, 0, &cStart, NULL)); 133 PetscCall(DMPlexGetCellType(dm, cStart, &ct)); 134 simplex = DMPolytopeTypeGetNumVertices(ct) == DMPolytopeTypeGetDim(ct) + 1 ? PETSC_TRUE : PETSC_FALSE; // false 135 /* Create finite element */ 136 PetscCall(PetscSNPrintf(prefix, PETSC_MAX_PATH_LEN, "%s_", name)); 137 PetscCall(PetscFECreateDefault(PETSC_COMM_SELF, dim, 1, simplex, name ? prefix : NULL, -1, &fe)); 138 PetscCall(PetscObjectSetName((PetscObject)fe, name)); 139 /* Set discretization and boundary conditions for each mesh */ 140 PetscCall(DMSetField(dm, 0, NULL, (PetscObject)fe)); 141 PetscCall(DMCreateDS(dm)); 142 PetscCall((*setup)(dm, user)); 143 while (cdm) { 144 PetscCall(DMCopyDisc(dm, cdm)); 145 /* TODO: Check whether the boundary of coarse meshes is marked */ 146 PetscCall(DMGetCoarseDM(cdm, &cdm)); 147 } 148 PetscCall(PetscFEDestroy(&fe)); 149 PetscFunctionReturn(PETSC_SUCCESS); 150 } 151 152 int main(int argc, char **argv) 153 { 154 DM dm; /* Problem specification */ 155 SNES snes; /* Nonlinear solver */ 156 Vec u; /* Solutions */ 157 AppCtx user; /* User-defined work context */ 158 PetscLogDouble time; 159 Mat Amat; 160 161 PetscFunctionBeginUser; 162 PetscCall(PetscInitialize(&argc, &argv, NULL, help)); 163 PetscCall(ProcessOptions(PETSC_COMM_WORLD, &user)); 164 /* system */ 165 PetscCall(SNESCreate(PETSC_COMM_WORLD, &snes)); 166 PetscCall(CreateMesh(PETSC_COMM_WORLD, &user, &dm)); 167 PetscCall(SNESSetDM(snes, dm)); 168 PetscCall(SetupDiscretization(dm, "potential", SetupPrimalProblem, &user)); 169 PetscCall(DMCreateGlobalVector(dm, &u)); 170 { 171 PetscInt N; 172 PetscCall(VecGetSize(u, &N)); 173 PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Number equations N = %" PetscInt_FMT "\n", N)); 174 } 175 PetscCall(SNESSetFromOptions(snes)); 176 PetscCall(PetscObjectSetName((PetscObject)u, "potential")); 177 PetscCall(DMPlexSetSNESLocalFEM(dm, PETSC_FALSE, &user)); 178 PetscCall(DMSNESCheckFromOptions(snes, u)); 179 PetscCall(PetscTime(&time)); 180 PetscCall(SNESSetUp(snes)); 181 #if defined(PETSC_HAVE_AMGX) 182 KSP ksp; 183 PC pc; 184 PetscBool flg; 185 AMGX_resources_handle rsc; 186 PetscCall(SNESGetKSP(snes, &ksp)); 187 PetscCall(KSPGetPC(ksp, &pc)); 188 PetscCall(PetscObjectTypeCompare((PetscObject)pc, PCAMGX, &flg)); 189 if (flg) { 190 PetscCall(PCAmgXGetResources(pc, (void *)&rsc)); 191 /* do ... with resource */ 192 } 193 #endif 194 PetscCall(SNESGetJacobian(snes, &Amat, NULL, NULL, NULL)); 195 PetscCall(MatSetOption(Amat, MAT_SPD, PETSC_TRUE)); 196 PetscCall(MatSetOption(Amat, MAT_SPD_ETERNAL, PETSC_TRUE)); 197 PetscCall(SNESSolve(snes, NULL, u)); 198 PetscCall(PetscTimeSubtract(&time)); 199 /* Benchmark system */ 200 if (user.nit) { 201 Vec b; 202 PetscInt i; 203 PetscLogStage kspstage; 204 PetscCall(PetscLogStageRegister("Solve only", &kspstage)); 205 PetscCall(PetscLogStagePush(kspstage)); 206 PetscCall(SNESGetSolution(snes, &u)); 207 PetscCall(SNESGetFunction(snes, &b, NULL, NULL)); 208 for (i = 0; i < user.nit; i++) { 209 PetscCall(VecZeroEntries(u)); 210 PetscCall(SNESSolve(snes, NULL, u)); 211 } 212 PetscCall(PetscLogStagePop()); 213 } 214 PetscCall(SNESGetSolution(snes, &u)); 215 PetscCall(VecViewFromOptions(u, NULL, "-potential_view")); 216 /* Cleanup */ 217 PetscCall(VecDestroy(&u)); 218 PetscCall(SNESDestroy(&snes)); 219 PetscCall(DMDestroy(&dm)); 220 PetscCall(PetscFinalize()); 221 return 0; 222 } 223 224 /*TEST 225 226 test: 227 suffix: strong 228 requires: triangle 229 args: -dm_plex_dim 2 -dm_refine 1 -benchmark_it 0 -dmsnes_check -potential_petscspace_degree 2 -dm_ds_jet_degree 2 -strong -pc_type jacobi 230 231 testset: 232 nsize: 4 233 output_file: output/ex13_comparison.out 234 args: -dm_plex_dim 3 -benchmark_it 2 -dm_plex_simplex 0 -dm_plex_box_faces 2,2,1 -dm_refine 2 -petscpartitioner_simple_node_grid 1,1,1 -petscpartitioner_simple_process_grid 2,2,1 -potential_petscspace_degree 2 -petscpartitioner_type simple -snes_type ksponly -dm_view -ksp_type cg -ksp_rtol 1e-12 -snes_lag_jacobian -2 -dm_plex_box_upper 2,2,1 -dm_plex_box_lower 0,0,0 -pc_type gamg -pc_gamg_process_eq_limit 200 -pc_gamg_coarse_eq_limit 1000 -pc_gamg_esteig_ksp_type cg -mg_levels_ksp_chebyshev_esteig 0,0.2,0,1.05 -pc_gamg_reuse_interpolation true -pc_gamg_aggressive_square_graph true -pc_gamg_threshold 0.04 -pc_gamg_threshold_scale .25 -pc_gamg_aggressive_coarsening 2 -pc_gamg_mis_k_minimum_degree_ordering true -ksp_monitor -ksp_norm_type unpreconditioned 235 test: 236 suffix: comparison 237 test: 238 suffix: cuda 239 requires: cuda 240 args: -dm_mat_type aijcusparse -dm_vec_type cuda 241 test: 242 suffix: kokkos 243 requires: kokkos_kernels 244 args: -dm_mat_type aijkokkos -dm_vec_type kokkos 245 test: 246 suffix: kokkos_sycl 247 requires: sycl kokkos_kernels 248 args: -dm_mat_type aijkokkos -dm_vec_type kokkos 249 test: 250 suffix: aijmkl_comp 251 requires: mkl_sparse 252 args: -dm_mat_type aijmkl 253 254 testset: 255 requires: cuda amgx 256 filter: grep -v Built | grep -v "AMGX version" | grep -v "CUDA Runtime" 257 output_file: output/ex13_amgx.out 258 args: -dm_plex_dim 2 -dm_plex_box_faces 2,2 -dm_refine 2 -petscpartitioner_type simple -potential_petscspace_degree 2 -dm_plex_simplex 0 -ksp_monitor \ 259 -snes_type ksponly -dm_view -ksp_type cg -ksp_norm_type unpreconditioned -ksp_converged_reason -snes_rtol 1.e-4 -pc_type amgx -benchmark_it 1 -pc_amgx_verbose false 260 nsize: 4 261 test: 262 suffix: amgx 263 args: -dm_mat_type aijcusparse -dm_vec_type cuda 264 test: 265 suffix: amgx_cpu 266 args: -dm_mat_type aij 267 268 TEST*/ 269