1 // Copyright (c) 2017, Lawrence Livermore National Security, LLC. Produced at 2 // the Lawrence Livermore National Laboratory. LLNL-CODE-734707. All Rights 3 // reserved. See files LICENSE and NOTICE for details. 4 // 5 // This file is part of CEED, a collection of benchmarks, miniapps, software 6 // libraries and APIs for efficient high-order finite element and spectral 7 // element discretizations for exascale applications. For more information and 8 // source code availability see http://github.com/ceed. 9 // 10 // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC, 11 // a collaborative effort of two U.S. Department of Energy organizations (Office 12 // of Science and the National Nuclear Security Administration) responsible for 13 // the planning and preparation of a capable exascale ecosystem, including 14 // software, applications, hardware, advanced system engineering and early 15 // testbed platforms, in support of the nation's exascale computing imperative. 16 17 // libCEED + PETSc Example: CEED BPs 18 // 19 // This example demonstrates a simple usage of libCEED with PETSc to solve the 20 // CEED BP benchmark problems, see http://ceed.exascaleproject.org/bps. 21 // 22 // The code uses higher level communication protocols in DMPlex. 23 // 24 // Build with: 25 // 26 // make bps [PETSC_DIR=</path/to/petsc>] [CEED_DIR=</path/to/libceed>] 27 // 28 // Sample runs: 29 // 30 // ./bps -problem bp1 -degree 3 31 // ./bps -problem bp2 -ceed /cpu/self -degree 3 32 // ./bps -problem bp3 -ceed /gpu/occa -degree 3 33 // ./bps -problem bp4 -ceed /cpu/occa -degree 3 34 // ./bps -problem bp5 -ceed /omp/occa -degree 3 35 // ./bps -problem bp6 -ceed /ocl/occa -degree 3 36 // 37 //TESTARGS -ceed {ceed_resource} -test -problem bp5 -degree 3 38 39 /// @file 40 /// CEED BPs example using PETSc with DMPlex 41 /// See bpsraw.c for a "raw" implementation using a structured grid. 42 const char help[] = "Solve CEED BPs using PETSc with DMPlex\n"; 43 44 #include <stdbool.h> 45 #include <string.h> 46 #include <petscksp.h> 47 #include <petscdmplex.h> 48 #include <ceed.h> 49 #include "setup.h" 50 51 // ----------------------------------------------------------------------------- 52 // Utilities 53 // ----------------------------------------------------------------------------- 54 55 // Utility function, compute three factors of an integer 56 static void Split3(PetscInt size, PetscInt m[3], bool reverse) { 57 for (PetscInt d=0,sizeleft=size; d<3; d++) { 58 PetscInt try = (PetscInt)PetscCeilReal(PetscPowReal(sizeleft, 1./(3 - d))); 59 while (try * (sizeleft / try) != sizeleft) try++; 60 m[reverse ? 2-d : d] = try; 61 sizeleft /= try; 62 } 63 } 64 65 static int Max3(const PetscInt a[3]) { 66 return PetscMax(a[0], PetscMax(a[1], a[2])); 67 } 68 69 static int Min3(const PetscInt a[3]) { 70 return PetscMin(a[0], PetscMin(a[1], a[2])); 71 } 72 73 // ----------------------------------------------------------------------------- 74 // Parameter structure for running problems 75 // ----------------------------------------------------------------------------- 76 typedef struct RunParams_ *RunParams; 77 struct RunParams_ { 78 MPI_Comm comm; 79 PetscBool test_mode, benchmark_mode, read_mesh, userlnodes, setmemtyperequest, 80 petschavecuda, write_solution; 81 char *filename, *ceedresource; 82 PetscInt localnodes, degree, qextra, dim, ncompu, *melem; 83 bpType bpchoice; 84 CeedMemType memtyperequested; 85 PetscLogStage solvestage; 86 }; 87 88 // ----------------------------------------------------------------------------- 89 // Main body of program, called in a loop for performance benchmarking purposes 90 // ----------------------------------------------------------------------------- 91 92 static PetscErrorCode Run(RunParams rp) { 93 PetscInt ierr; 94 double my_rt_start, my_rt, rt_min, rt_max; 95 PetscInt xlsize, lsize, gsize; 96 PetscScalar *r; 97 Vec X, Xloc, rhs, rhsloc; 98 Mat matO; 99 KSP ksp; 100 DM dm; 101 UserO userO; 102 Ceed ceed; 103 CeedData ceeddata; 104 CeedQFunction qferror; 105 CeedOperator operror; 106 CeedVector rhsceed, target; 107 108 PetscFunctionBeginUser; 109 // Setup DM 110 if (rp->read_mesh) { 111 ierr = DMPlexCreateFromFile(PETSC_COMM_WORLD, rp->filename, PETSC_TRUE, &dm); 112 CHKERRQ(ierr); 113 } else { 114 if (rp->userlnodes) { 115 // Find a nicely composite number of elements no less than global nodes 116 PetscMPIInt size; 117 ierr = MPI_Comm_size(rp->comm, &size); CHKERRQ(ierr); 118 for (PetscInt gelem = 119 PetscMax(1, size * rp->localnodes / PetscPowInt(rp->degree, rp->dim)); 120 ; 121 gelem++) { 122 Split3(gelem, rp->melem, true); 123 if (Max3(rp->melem) / Min3(rp->melem) <= 2) break; 124 } 125 } 126 ierr = DMPlexCreateBoxMesh(PETSC_COMM_WORLD, rp->dim, PETSC_FALSE, rp->melem, 127 NULL, NULL, NULL, PETSC_TRUE, &dm); CHKERRQ(ierr); 128 } 129 130 { 131 DM dmDist = NULL; 132 PetscPartitioner part; 133 134 ierr = DMPlexGetPartitioner(dm, &part); CHKERRQ(ierr); 135 ierr = PetscPartitionerSetFromOptions(part); CHKERRQ(ierr); 136 ierr = DMPlexDistribute(dm, 0, NULL, &dmDist); CHKERRQ(ierr); 137 if (dmDist) { 138 ierr = DMDestroy(&dm); CHKERRQ(ierr); 139 dm = dmDist; 140 } 141 } 142 143 // Set up libCEED 144 CeedInit(rp->ceedresource, &ceed); 145 CeedMemType memtypebackend; 146 CeedGetPreferredMemType(ceed, &memtypebackend); 147 148 // Check memtype compatibility 149 if (!rp->setmemtyperequest) 150 rp->memtyperequested = memtypebackend; 151 else if (!rp->petschavecuda && rp->memtyperequested == CEED_MEM_DEVICE) 152 SETERRQ1(PETSC_COMM_WORLD, PETSC_ERR_SUP_SYS, 153 "PETSc was not built with CUDA. " 154 "Requested MemType CEED_MEM_DEVICE is not supported.", NULL); 155 156 // Create DM 157 ierr = SetupDMByDegree(dm, rp->degree, rp->ncompu, rp->bpchoice); 158 CHKERRQ(ierr); 159 160 // Create vectors 161 if (rp->memtyperequested == CEED_MEM_DEVICE) { 162 ierr = DMSetVecType(dm, VECCUDA); CHKERRQ(ierr); 163 } 164 ierr = DMCreateGlobalVector(dm, &X); CHKERRQ(ierr); 165 ierr = VecGetLocalSize(X, &lsize); CHKERRQ(ierr); 166 ierr = VecGetSize(X, &gsize); CHKERRQ(ierr); 167 ierr = DMCreateLocalVector(dm, &Xloc); CHKERRQ(ierr); 168 ierr = VecGetSize(Xloc, &xlsize); CHKERRQ(ierr); 169 ierr = VecDuplicate(X, &rhs); CHKERRQ(ierr); 170 171 // Operator 172 ierr = PetscMalloc1(1, &userO); CHKERRQ(ierr); 173 ierr = MatCreateShell(rp->comm, lsize, lsize, gsize, gsize, 174 userO, &matO); CHKERRQ(ierr); 175 ierr = MatShellSetOperation(matO, MATOP_MULT, 176 (void(*)(void))MatMult_Ceed); CHKERRQ(ierr); 177 ierr = MatShellSetOperation(matO, MATOP_GET_DIAGONAL, 178 (void(*)(void))MatGetDiag); CHKERRQ(ierr); 179 if (rp->memtyperequested == CEED_MEM_DEVICE) { 180 ierr = MatShellSetVecType(matO, VECCUDA); CHKERRQ(ierr); 181 } 182 183 // Print summary 184 if (!rp->test_mode) { 185 PetscInt P = rp->degree + 1, Q = P + rp->qextra; 186 187 const char *usedresource; 188 CeedGetResource(ceed, &usedresource); 189 190 VecType vectype; 191 ierr = VecGetType(X, &vectype); CHKERRQ(ierr); 192 193 PetscInt cStart, cEnd; 194 ierr = DMPlexGetHeightStratum(dm, 0, &cStart, &cEnd); CHKERRQ(ierr); 195 ierr = PetscPrintf(rp->comm, 196 "\n-- CEED Benchmark Problem %d -- libCEED + PETSc --\n" 197 " PETSc:\n" 198 " PETSc Vec Type : %s\n" 199 " libCEED:\n" 200 " libCEED Backend : %s\n" 201 " libCEED Backend MemType : %s\n" 202 " libCEED User Requested MemType : %s\n" 203 " Mesh:\n" 204 " Number of 1D Basis Nodes (P) : %d\n" 205 " Number of 1D Quadrature Points (Q) : %d\n" 206 " Global nodes : %D\n" 207 " Local Elements : %D\n" 208 " Owned nodes : %D\n" 209 " DoF per node : %D\n", 210 rp->bpchoice+1, vectype, usedresource, 211 CeedMemTypes[memtypebackend], 212 (rp->setmemtyperequest) ? 213 CeedMemTypes[rp->memtyperequested] : "none", 214 P, Q, gsize/rp->ncompu, cEnd - cStart, lsize/rp->ncompu, 215 rp->ncompu); 216 CHKERRQ(ierr); 217 } 218 219 // Create RHS vector 220 ierr = VecDuplicate(Xloc, &rhsloc); CHKERRQ(ierr); 221 ierr = VecZeroEntries(rhsloc); CHKERRQ(ierr); 222 if (rp->memtyperequested == CEED_MEM_HOST) { 223 ierr = VecGetArray(rhsloc, &r); CHKERRQ(ierr); 224 } else { 225 ierr = VecCUDAGetArray(rhsloc, &r); CHKERRQ(ierr); 226 } 227 CeedVectorCreate(ceed, xlsize, &rhsceed); 228 CeedVectorSetArray(rhsceed, rp->memtyperequested, CEED_USE_POINTER, r); 229 230 ierr = PetscMalloc1(1, &ceeddata); CHKERRQ(ierr); 231 ierr = SetupLibceedByDegree(dm, ceed, rp->degree, rp->dim, rp->qextra, 232 rp->ncompu, gsize, xlsize, rp->bpchoice, ceeddata, 233 true, rhsceed, &target); CHKERRQ(ierr); 234 235 // Gather RHS 236 CeedVectorSyncArray(rhsceed, rp->memtyperequested); 237 if (rp->memtyperequested == CEED_MEM_HOST) { 238 ierr = VecRestoreArray(rhsloc, &r); CHKERRQ(ierr); 239 } else { 240 ierr = VecCUDARestoreArray(rhsloc, &r); CHKERRQ(ierr); 241 } 242 ierr = VecZeroEntries(rhs); CHKERRQ(ierr); 243 ierr = DMLocalToGlobalBegin(dm, rhsloc, ADD_VALUES, rhs); CHKERRQ(ierr); 244 ierr = DMLocalToGlobalEnd(dm, rhsloc, ADD_VALUES, rhs); CHKERRQ(ierr); 245 CeedVectorDestroy(&rhsceed); 246 247 // Create the error Q-function 248 CeedQFunctionCreateInterior(ceed, 1, bpOptions[rp->bpchoice].error, 249 bpOptions[rp->bpchoice].errorfname, &qferror); 250 CeedQFunctionAddInput(qferror, "u", rp->ncompu, CEED_EVAL_INTERP); 251 CeedQFunctionAddInput(qferror, "true_soln", rp->ncompu, CEED_EVAL_NONE); 252 CeedQFunctionAddOutput(qferror, "error", rp->ncompu, CEED_EVAL_NONE); 253 254 // Create the error operator 255 CeedOperatorCreate(ceed, qferror, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, 256 &operror); 257 CeedOperatorSetField(operror, "u", ceeddata->Erestrictu, 258 ceeddata->basisu, CEED_VECTOR_ACTIVE); 259 CeedOperatorSetField(operror, "true_soln", ceeddata->Erestrictui, 260 CEED_BASIS_COLLOCATED, target); 261 CeedOperatorSetField(operror, "error", ceeddata->Erestrictui, 262 CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE); 263 264 // Set up Mat 265 userO->comm = rp->comm; 266 userO->dm = dm; 267 userO->Xloc = Xloc; 268 ierr = VecDuplicate(Xloc, &userO->Yloc); CHKERRQ(ierr); 269 userO->xceed = ceeddata->xceed; 270 userO->yceed = ceeddata->yceed; 271 userO->op = ceeddata->opapply; 272 userO->ceed = ceed; 273 userO->memtype = rp->memtyperequested; 274 if (rp->memtyperequested == CEED_MEM_HOST) { 275 userO->VecGetArray = VecGetArray; 276 userO->VecGetArrayRead = VecGetArrayRead; 277 userO->VecRestoreArray = VecRestoreArray; 278 userO->VecRestoreArrayRead = VecRestoreArrayRead; 279 } else { 280 userO->VecGetArray = VecCUDAGetArray; 281 userO->VecGetArrayRead = VecCUDAGetArrayRead; 282 userO->VecRestoreArray = VecCUDARestoreArray; 283 userO->VecRestoreArrayRead = VecCUDARestoreArrayRead; 284 } 285 286 ierr = KSPCreate(rp->comm, &ksp); CHKERRQ(ierr); 287 { 288 PC pc; 289 ierr = KSPGetPC(ksp, &pc); CHKERRQ(ierr); 290 if (rp->bpchoice == CEED_BP1 || rp->bpchoice == CEED_BP2) { 291 ierr = PCSetType(pc, PCJACOBI); CHKERRQ(ierr); 292 ierr = PCJacobiSetType(pc, PC_JACOBI_ROWSUM); CHKERRQ(ierr); 293 } else { 294 ierr = PCSetType(pc, PCNONE); CHKERRQ(ierr); 295 } 296 ierr = KSPSetType(ksp, KSPCG); CHKERRQ(ierr); 297 ierr = KSPSetNormType(ksp, KSP_NORM_NATURAL); CHKERRQ(ierr); 298 ierr = KSPSetTolerances(ksp, 1e-10, PETSC_DEFAULT, PETSC_DEFAULT, 299 PETSC_DEFAULT); CHKERRQ(ierr); 300 } 301 ierr = KSPSetOperators(ksp, matO, matO); CHKERRQ(ierr); 302 303 // First run, if benchmarking 304 if (rp->benchmark_mode) { 305 ierr = KSPSetTolerances(ksp, 1e-10, PETSC_DEFAULT, PETSC_DEFAULT, 1); 306 CHKERRQ(ierr); 307 my_rt_start = MPI_Wtime(); 308 ierr = KSPSolve(ksp, rhs, X); CHKERRQ(ierr); 309 my_rt = MPI_Wtime() - my_rt_start; 310 ierr = MPI_Allreduce(MPI_IN_PLACE, &my_rt, 1, MPI_DOUBLE, MPI_MIN, rp->comm); 311 CHKERRQ(ierr); 312 // Set maxits based on first iteration timing 313 if (my_rt > 0.02) { 314 ierr = KSPSetTolerances(ksp, 1e-10, PETSC_DEFAULT, PETSC_DEFAULT, 5); 315 CHKERRQ(ierr); 316 } else { 317 ierr = KSPSetTolerances(ksp, 1e-10, PETSC_DEFAULT, PETSC_DEFAULT, 20); 318 CHKERRQ(ierr); 319 } 320 } 321 ierr = KSPSetFromOptions(ksp); CHKERRQ(ierr); 322 323 // Timed solve 324 ierr = VecZeroEntries(X); CHKERRQ(ierr); 325 ierr = PetscBarrier((PetscObject)ksp); CHKERRQ(ierr); 326 327 // -- Performance logging 328 ierr = PetscLogStagePush(rp->solvestage); CHKERRQ(ierr); 329 330 // -- Solve 331 my_rt_start = MPI_Wtime(); 332 ierr = KSPSolve(ksp, rhs, X); CHKERRQ(ierr); 333 my_rt = MPI_Wtime() - my_rt_start; 334 335 // -- Performance logging 336 ierr = PetscLogStagePop(); 337 338 // Output results 339 { 340 KSPType ksptype; 341 KSPConvergedReason reason; 342 PetscReal rnorm; 343 PetscInt its; 344 ierr = KSPGetType(ksp, &ksptype); CHKERRQ(ierr); 345 ierr = KSPGetConvergedReason(ksp, &reason); CHKERRQ(ierr); 346 ierr = KSPGetIterationNumber(ksp, &its); CHKERRQ(ierr); 347 ierr = KSPGetResidualNorm(ksp, &rnorm); CHKERRQ(ierr); 348 if (!rp->test_mode || reason < 0 || rnorm > 1e-8) { 349 ierr = PetscPrintf(rp->comm, 350 " KSP:\n" 351 " KSP Type : %s\n" 352 " KSP Convergence : %s\n" 353 " Total KSP Iterations : %D\n" 354 " Final rnorm : %e\n", 355 ksptype, KSPConvergedReasons[reason], its, 356 (double)rnorm); CHKERRQ(ierr); 357 } 358 if (!rp->test_mode) { 359 ierr = PetscPrintf(rp->comm," Performance:\n"); CHKERRQ(ierr); 360 } 361 { 362 PetscReal maxerror; 363 ierr = ComputeErrorMax(userO, operror, X, target, &maxerror); 364 CHKERRQ(ierr); 365 PetscReal tol = 5e-2; 366 if (!rp->test_mode || maxerror > tol) { 367 ierr = MPI_Allreduce(&my_rt, &rt_min, 1, MPI_DOUBLE, MPI_MIN, rp->comm); 368 CHKERRQ(ierr); 369 ierr = MPI_Allreduce(&my_rt, &rt_max, 1, MPI_DOUBLE, MPI_MAX, rp->comm); 370 CHKERRQ(ierr); 371 ierr = PetscPrintf(rp->comm, 372 " Pointwise Error (max) : %e\n" 373 " CG Solve Time : %g (%g) sec\n", 374 (double)maxerror, rt_max, rt_min); CHKERRQ(ierr); 375 } 376 } 377 if (rp->benchmark_mode && (!rp->test_mode)) { 378 ierr = PetscPrintf(rp->comm, 379 " DoFs/Sec in CG : %g (%g) million\n", 380 1e-6*gsize*its/rt_max, 381 1e-6*gsize*its/rt_min); CHKERRQ(ierr); 382 } 383 } 384 385 if (rp->write_solution) { 386 PetscViewer vtkviewersoln; 387 388 ierr = PetscViewerCreate(rp->comm, &vtkviewersoln); CHKERRQ(ierr); 389 ierr = PetscViewerSetType(vtkviewersoln, PETSCVIEWERVTK); CHKERRQ(ierr); 390 ierr = PetscViewerFileSetName(vtkviewersoln, "solution.vtk"); CHKERRQ(ierr); 391 ierr = VecView(X, vtkviewersoln); CHKERRQ(ierr); 392 ierr = PetscViewerDestroy(&vtkviewersoln); CHKERRQ(ierr); 393 } 394 395 // Cleanup 396 ierr = VecDestroy(&X); CHKERRQ(ierr); 397 ierr = VecDestroy(&Xloc); CHKERRQ(ierr); 398 ierr = VecDestroy(&userO->Yloc); CHKERRQ(ierr); 399 ierr = MatDestroy(&matO); CHKERRQ(ierr); 400 ierr = PetscFree(userO); CHKERRQ(ierr); 401 ierr = CeedDataDestroy(0, ceeddata); CHKERRQ(ierr); 402 ierr = DMDestroy(&dm); CHKERRQ(ierr); 403 404 ierr = VecDestroy(&rhs); CHKERRQ(ierr); 405 ierr = VecDestroy(&rhsloc); CHKERRQ(ierr); 406 ierr = KSPDestroy(&ksp); CHKERRQ(ierr); 407 CeedVectorDestroy(&target); 408 CeedQFunctionDestroy(&qferror); 409 CeedOperatorDestroy(&operror); 410 CeedDestroy(&ceed); 411 PetscFunctionReturn(0); 412 } 413 414 int main(int argc, char **argv) { 415 PetscInt ierr, commsize; 416 RunParams rp; 417 MPI_Comm comm, splitcomm; 418 PetscLogStage solvestage; 419 char filename[PETSC_MAX_PATH_LEN], 420 ceedresource[PETSC_MAX_PATH_LEN] = "/cpu/self"; 421 422 PetscInt degree = 3, qextra, dim = 3, melem[3] = {3, 3, 3}, 423 ncompu = 1, localnodes, maxdegree = 8, totproc, procpercompnode, maxdofsnode = 3 * PetscPowInt(2, 20); 424 PetscBool test_mode, benchmark_mode, read_mesh, write_solution, 425 userlnodes = PETSC_FALSE; 426 CeedMemType memtyperequested; 427 bpType bpchoice; 428 429 // Check PETSc CUDA support 430 PetscBool petschavecuda, setmemtyperequest = PETSC_FALSE; 431 // *INDENT-OFF* 432 #ifdef PETSC_HAVE_CUDA 433 petschavecuda = PETSC_TRUE; 434 #else 435 petschavecuda = PETSC_FALSE; 436 #endif 437 // *INDENT-ON* 438 439 // Initialize PETSc 440 ierr = PetscInitialize(&argc, &argv, NULL, help); 441 if (ierr) return ierr; 442 comm = PETSC_COMM_WORLD; 443 ierr = MPI_Comm_size(comm, &commsize); 444 if (ierr != MPI_SUCCESS) return ierr; 445 // Determine number of processes per compute node 446 ierr = MPI_Comm_split_type(comm, MPI_COMM_TYPE_SHARED, 0, MPI_INFO_NULL, 447 &splitcomm); 448 if (ierr != MPI_SUCCESS) return ierr; 449 ierr = MPI_Comm_size(splitcomm, &procpercompnode); 450 if (ierr != MPI_SUCCESS) return ierr; 451 452 // Read command line options 453 ierr = PetscOptionsBegin(comm, NULL, "CEED BPs in PETSc", NULL); 454 CHKERRQ(ierr); 455 bpchoice = CEED_BP1; 456 ierr = PetscOptionsEnum("-problem", 457 "CEED benchmark problem to solve", NULL, 458 bpTypes, (PetscEnum)bpchoice, (PetscEnum *)&bpchoice, 459 NULL); CHKERRQ(ierr); 460 ncompu = bpOptions[bpchoice].ncompu; 461 test_mode = PETSC_FALSE; 462 ierr = PetscOptionsBool("-test", 463 "Testing mode (do not print unless error is large)", 464 NULL, test_mode, &test_mode, NULL); CHKERRQ(ierr); 465 benchmark_mode = PETSC_FALSE; 466 ierr = PetscOptionsBool("-benchmark", 467 "Benchmarking mode (prints benchmark statistics)", 468 NULL, benchmark_mode, &benchmark_mode, NULL); 469 CHKERRQ(ierr); 470 write_solution = PETSC_FALSE; 471 ierr = PetscOptionsBool("-write_solution", 472 "Write solution for visualization", 473 NULL, write_solution, &write_solution, NULL); 474 CHKERRQ(ierr); 475 degree = test_mode ? 3 : 2; 476 ierr = PetscOptionsInt("-degree", "Polynomial degree of tensor product basis", 477 NULL, degree, °ree, NULL); CHKERRQ(ierr); 478 qextra = bpOptions[bpchoice].qextra; 479 ierr = PetscOptionsInt("-qextra", "Number of extra quadrature points", 480 NULL, qextra, &qextra, NULL); CHKERRQ(ierr); 481 ierr = PetscOptionsString("-ceed", "CEED resource specifier", 482 NULL, ceedresource, ceedresource, 483 sizeof(ceedresource), NULL); CHKERRQ(ierr); 484 read_mesh = PETSC_FALSE; 485 ierr = PetscOptionsString("-mesh", "Read mesh from file", NULL, 486 filename, filename, sizeof(filename), &read_mesh); 487 CHKERRQ(ierr); 488 if (!read_mesh) { 489 PetscInt tmp = dim; 490 ierr = PetscOptionsIntArray("-cells", "Number of cells per dimension", NULL, 491 melem, &tmp, NULL); CHKERRQ(ierr); 492 } 493 memtyperequested = petschavecuda ? CEED_MEM_DEVICE : CEED_MEM_HOST; 494 ierr = PetscOptionsEnum("-memtype", 495 "CEED MemType requested", NULL, 496 memTypes, (PetscEnum)memtyperequested, 497 (PetscEnum *)&memtyperequested, &setmemtyperequest); 498 CHKERRQ(ierr); 499 localnodes = 1000; 500 ierr = PetscOptionsInt("-local_nodes", 501 "Target number of locally owned nodes per process", 502 NULL, localnodes, &localnodes, &userlnodes); 503 CHKERRQ(ierr); 504 ierr = PetscOptionsInt("-max_degree", 505 "Max degree to run in benchmark mode", 506 NULL, maxdegree, &maxdegree, NULL); CHKERRQ(ierr); 507 totproc = commsize; 508 ierr = PetscOptionsInt("-n", 509 "Total number of processors across all compute nodes", 510 NULL, totproc, &totproc, NULL); CHKERRQ(ierr); 511 ierr = PetscOptionsInt("-p", 512 "Number of processors per node", 513 NULL, procpercompnode, &procpercompnode, NULL); 514 CHKERRQ(ierr); 515 516 ierr = PetscOptionsEnd(); CHKERRQ(ierr); 517 518 // Register PETSc logging stage 519 ierr = PetscLogStageRegister("Solve Stage", &solvestage); CHKERRQ(ierr); 520 521 // Setup all parameters needed in Run() 522 ierr = PetscMalloc1(1, &rp); CHKERRQ(ierr); 523 rp->comm = comm; 524 rp->test_mode = test_mode; 525 rp->benchmark_mode = benchmark_mode; 526 rp->read_mesh = read_mesh; 527 rp->userlnodes = userlnodes; 528 rp->setmemtyperequest = setmemtyperequest; 529 rp->petschavecuda = petschavecuda; 530 rp->write_solution = write_solution; 531 rp->filename = filename; 532 rp->ceedresource = ceedresource; 533 rp->localnodes = localnodes; 534 rp->degree = degree; 535 rp->qextra = qextra; 536 rp->dim = dim; 537 rp->ncompu = ncompu; 538 rp->melem = melem; 539 rp->bpchoice = bpchoice; 540 rp->memtyperequested = memtyperequested; 541 rp->solvestage = solvestage; 542 543 if (benchmark_mode) { 544 PetscInt maxlocnodes = maxdofsnode / procpercompnode; 545 // In benchmark_mode, call the main body of the program in a loop 546 for (PetscInt p = 1; p <= maxdegree; p++) { 547 for (PetscInt e = 1; e * PetscPowInt(degree, 3) <= maxlocnodes; e *= 2) { 548 rp->degree = p; 549 rp->localnodes = e * PetscPowInt(degree, 3); 550 ierr = Run(rp); CHKERRQ(ierr); 551 } 552 } 553 } 554 else { // Othersise run program only once 555 ierr = Run(rp); CHKERRQ(ierr); 556 } 557 558 // Clear memory 559 ierr = PetscFree(rp); CHKERRQ(ierr); 560 561 return PetscFinalize(); 562 } 563