1 // Copyright (c) 2017, Lawrence Livermore National Security, LLC. Produced at 2 // the Lawrence Livermore National Laboratory. LLNL-CODE-734707. All Rights 3 // reserved. See files LICENSE and NOTICE for details. 4 // 5 // This file is part of CEED, a collection of benchmarks, miniapps, software 6 // libraries and APIs for efficient high-order finite element and spectral 7 // element discretizations for exascale applications. For more information and 8 // source code availability see http://github.com/ceed. 9 // 10 // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC, 11 // a collaborative effort of two U.S. Department of Energy organizations (Office 12 // of Science and the National Nuclear Security Administration) responsible for 13 // the planning and preparation of a capable exascale ecosystem, including 14 // software, applications, hardware, advanced system engineering and early 15 // testbed platforms, in support of the nation's exascale computing imperative. 16 17 // libCEED + PETSc Example: CEED BPs 18 // 19 // This example demonstrates a simple usage of libCEED with PETSc to solve the 20 // CEED BP benchmark problems, see http://ceed.exascaleproject.org/bps. 21 // 22 // The code uses higher level communication protocols in DMPlex. 23 // 24 // Build with: 25 // 26 // make bps [PETSC_DIR=</path/to/petsc>] [CEED_DIR=</path/to/libceed>] 27 // 28 // Sample runs: 29 // 30 // ./bps -problem bp1 -degree 3 31 // ./bps -problem bp2 -ceed /cpu/self -degree 3 32 // ./bps -problem bp3 -ceed /gpu/occa -degree 3 33 // ./bps -problem bp4 -ceed /cpu/occa -degree 3 34 // ./bps -problem bp5 -ceed /omp/occa -degree 3 35 // ./bps -problem bp6 -ceed /ocl/occa -degree 3 36 // 37 //TESTARGS -ceed {ceed_resource} -test -problem bp5 -degree 3 38 39 /// @file 40 /// CEED BPs example using PETSc with DMPlex 41 /// See bpsraw.c for a "raw" implementation using a structured grid. 42 const char help[] = "Solve CEED BPs using PETSc with DMPlex\n"; 43 44 #include <stdbool.h> 45 #include <string.h> 46 #include <petscksp.h> 47 #include <petscdmplex.h> 48 #include <ceed.h> 49 #include "setup.h" 50 51 // ----------------------------------------------------------------------------- 52 // Utilities 53 // ----------------------------------------------------------------------------- 54 55 // Utility function, compute three factors of an integer 56 static void Split3(PetscInt size, PetscInt m[3], bool reverse) { 57 for (PetscInt d=0,sizeleft=size; d<3; d++) { 58 PetscInt try = (PetscInt)PetscCeilReal(PetscPowReal(sizeleft, 1./(3 - d))); 59 while (try * (sizeleft / try) != sizeleft) try++; 60 m[reverse ? 2-d : d] = try; 61 sizeleft /= try; 62 } 63 } 64 65 static int Max3(const PetscInt a[3]) { 66 return PetscMax(a[0], PetscMax(a[1], a[2])); 67 } 68 69 static int Min3(const PetscInt a[3]) { 70 return PetscMin(a[0], PetscMin(a[1], a[2])); 71 } 72 73 // ----------------------------------------------------------------------------- 74 // Parameter structure for running problems 75 // ----------------------------------------------------------------------------- 76 typedef struct RunParams_ *RunParams; 77 struct RunParams_ { 78 MPI_Comm comm; 79 PetscBool test_mode, benchmark_mode, read_mesh, userlnodes, setmemtyperequest, 80 petschavecuda, write_solution; 81 char *filename, *ceedresource, *hostname; 82 PetscInt localnodes, degree, qextra, dim, ncompu, *melem; 83 PetscMPIInt rankspernode; 84 bpType bpchoice; 85 CeedMemType memtyperequested; 86 PetscLogStage solvestage; 87 }; 88 89 // ----------------------------------------------------------------------------- 90 // Main body of program, called in a loop for performance benchmarking purposes 91 // ----------------------------------------------------------------------------- 92 93 static PetscErrorCode Run(RunParams rp) { 94 PetscInt ierr; 95 double my_rt_start, my_rt, rt_min, rt_max; 96 PetscInt xlsize, lsize, gsize; 97 PetscScalar *r; 98 Vec X, Xloc, rhs, rhsloc; 99 Mat matO; 100 KSP ksp; 101 DM dm; 102 UserO userO; 103 Ceed ceed; 104 CeedData ceeddata; 105 CeedQFunction qferror; 106 CeedOperator operror; 107 CeedVector rhsceed, target; 108 109 PetscFunctionBeginUser; 110 // Setup DM 111 if (rp->read_mesh) { 112 ierr = DMPlexCreateFromFile(PETSC_COMM_WORLD, rp->filename, PETSC_TRUE, &dm); 113 CHKERRQ(ierr); 114 } else { 115 if (rp->userlnodes) { 116 // Find a nicely composite number of elements no less than global nodes 117 PetscMPIInt size; 118 ierr = MPI_Comm_size(rp->comm, &size); CHKERRQ(ierr); 119 for (PetscInt gelem = 120 PetscMax(1, size * rp->localnodes / PetscPowInt(rp->degree, rp->dim)); 121 ; 122 gelem++) { 123 Split3(gelem, rp->melem, true); 124 if (Max3(rp->melem) / Min3(rp->melem) <= 2) break; 125 } 126 } 127 ierr = DMPlexCreateBoxMesh(PETSC_COMM_WORLD, rp->dim, PETSC_FALSE, rp->melem, 128 NULL, NULL, NULL, PETSC_TRUE, &dm); CHKERRQ(ierr); 129 } 130 131 { 132 DM dmDist = NULL; 133 PetscPartitioner part; 134 135 ierr = DMPlexGetPartitioner(dm, &part); CHKERRQ(ierr); 136 ierr = PetscPartitionerSetFromOptions(part); CHKERRQ(ierr); 137 ierr = DMPlexDistribute(dm, 0, NULL, &dmDist); CHKERRQ(ierr); 138 if (dmDist) { 139 ierr = DMDestroy(&dm); CHKERRQ(ierr); 140 dm = dmDist; 141 } 142 } 143 144 // Set up libCEED 145 CeedInit(rp->ceedresource, &ceed); 146 CeedMemType memtypebackend; 147 CeedGetPreferredMemType(ceed, &memtypebackend); 148 149 // Check memtype compatibility 150 if (!rp->setmemtyperequest) 151 rp->memtyperequested = memtypebackend; 152 else if (!rp->petschavecuda && rp->memtyperequested == CEED_MEM_DEVICE) 153 SETERRQ1(PETSC_COMM_WORLD, PETSC_ERR_SUP_SYS, 154 "PETSc was not built with CUDA. " 155 "Requested MemType CEED_MEM_DEVICE is not supported.", NULL); 156 157 // Create DM 158 ierr = SetupDMByDegree(dm, rp->degree, rp->ncompu, rp->bpchoice); 159 CHKERRQ(ierr); 160 161 // Create vectors 162 if (rp->memtyperequested == CEED_MEM_DEVICE) { 163 ierr = DMSetVecType(dm, VECCUDA); CHKERRQ(ierr); 164 } 165 ierr = DMCreateGlobalVector(dm, &X); CHKERRQ(ierr); 166 ierr = VecGetLocalSize(X, &lsize); CHKERRQ(ierr); 167 ierr = VecGetSize(X, &gsize); CHKERRQ(ierr); 168 ierr = DMCreateLocalVector(dm, &Xloc); CHKERRQ(ierr); 169 ierr = VecGetSize(Xloc, &xlsize); CHKERRQ(ierr); 170 ierr = VecDuplicate(X, &rhs); CHKERRQ(ierr); 171 172 // Operator 173 ierr = PetscMalloc1(1, &userO); CHKERRQ(ierr); 174 ierr = MatCreateShell(rp->comm, lsize, lsize, gsize, gsize, 175 userO, &matO); CHKERRQ(ierr); 176 ierr = MatShellSetOperation(matO, MATOP_MULT, 177 (void(*)(void))MatMult_Ceed); CHKERRQ(ierr); 178 ierr = MatShellSetOperation(matO, MATOP_GET_DIAGONAL, 179 (void(*)(void))MatGetDiag); CHKERRQ(ierr); 180 if (rp->memtyperequested == CEED_MEM_DEVICE) { 181 ierr = MatShellSetVecType(matO, VECCUDA); CHKERRQ(ierr); 182 } 183 184 // Print summary 185 if (!rp->test_mode) { 186 PetscInt P = rp->degree + 1, Q = P + rp->qextra; 187 188 const char *usedresource; 189 CeedGetResource(ceed, &usedresource); 190 191 VecType vectype; 192 ierr = VecGetType(X, &vectype); CHKERRQ(ierr); 193 194 PetscInt cStart, cEnd; 195 ierr = DMPlexGetHeightStratum(dm, 0, &cStart, &cEnd); CHKERRQ(ierr); 196 PetscMPIInt comm_size; 197 ierr = MPI_Comm_size(rp->comm, &comm_size); CHKERRQ(ierr); 198 ierr = PetscPrintf(rp->comm, 199 "\n-- CEED Benchmark Problem %d -- libCEED + PETSc --\n" 200 " MPI:\n" 201 " Hostname : %s\n" 202 " Total ranks : %d\n" 203 " Ranks per compute node : %d\n" 204 " PETSc:\n" 205 " PETSc Vec Type : %s\n" 206 " libCEED:\n" 207 " libCEED Backend : %s\n" 208 " libCEED Backend MemType : %s\n" 209 " libCEED User Requested MemType : %s\n" 210 " Mesh:\n" 211 " Number of 1D Basis Nodes (P) : %d\n" 212 " Number of 1D Quadrature Points (Q) : %d\n" 213 " Global nodes : %D\n" 214 " Local Elements : %D\n" 215 " Owned nodes : %D\n" 216 " DoF per node : %D\n", 217 rp->bpchoice+1, 218 rp->hostname, 219 comm_size, rp->rankspernode, 220 vectype, usedresource, 221 CeedMemTypes[memtypebackend], 222 (rp->setmemtyperequest) ? 223 CeedMemTypes[rp->memtyperequested] : "none", 224 P, Q, gsize/rp->ncompu, cEnd - cStart, lsize/rp->ncompu, 225 rp->ncompu); 226 CHKERRQ(ierr); 227 } 228 229 // Create RHS vector 230 ierr = VecDuplicate(Xloc, &rhsloc); CHKERRQ(ierr); 231 ierr = VecZeroEntries(rhsloc); CHKERRQ(ierr); 232 if (rp->memtyperequested == CEED_MEM_HOST) { 233 ierr = VecGetArray(rhsloc, &r); CHKERRQ(ierr); 234 } else { 235 ierr = VecCUDAGetArray(rhsloc, &r); CHKERRQ(ierr); 236 } 237 CeedVectorCreate(ceed, xlsize, &rhsceed); 238 CeedVectorSetArray(rhsceed, rp->memtyperequested, CEED_USE_POINTER, r); 239 240 ierr = PetscMalloc1(1, &ceeddata); CHKERRQ(ierr); 241 ierr = SetupLibceedByDegree(dm, ceed, rp->degree, rp->dim, rp->qextra, 242 rp->ncompu, gsize, xlsize, rp->bpchoice, ceeddata, 243 true, rhsceed, &target); CHKERRQ(ierr); 244 245 // Gather RHS 246 CeedVectorSyncArray(rhsceed, rp->memtyperequested); 247 if (rp->memtyperequested == CEED_MEM_HOST) { 248 ierr = VecRestoreArray(rhsloc, &r); CHKERRQ(ierr); 249 } else { 250 ierr = VecCUDARestoreArray(rhsloc, &r); CHKERRQ(ierr); 251 } 252 ierr = VecZeroEntries(rhs); CHKERRQ(ierr); 253 ierr = DMLocalToGlobalBegin(dm, rhsloc, ADD_VALUES, rhs); CHKERRQ(ierr); 254 ierr = DMLocalToGlobalEnd(dm, rhsloc, ADD_VALUES, rhs); CHKERRQ(ierr); 255 CeedVectorDestroy(&rhsceed); 256 257 // Create the error Q-function 258 CeedQFunctionCreateInterior(ceed, 1, bpOptions[rp->bpchoice].error, 259 bpOptions[rp->bpchoice].errorfname, &qferror); 260 CeedQFunctionAddInput(qferror, "u", rp->ncompu, CEED_EVAL_INTERP); 261 CeedQFunctionAddInput(qferror, "true_soln", rp->ncompu, CEED_EVAL_NONE); 262 CeedQFunctionAddOutput(qferror, "error", rp->ncompu, CEED_EVAL_NONE); 263 264 // Create the error operator 265 CeedOperatorCreate(ceed, qferror, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, 266 &operror); 267 CeedOperatorSetField(operror, "u", ceeddata->Erestrictu, 268 ceeddata->basisu, CEED_VECTOR_ACTIVE); 269 CeedOperatorSetField(operror, "true_soln", ceeddata->Erestrictui, 270 CEED_BASIS_COLLOCATED, target); 271 CeedOperatorSetField(operror, "error", ceeddata->Erestrictui, 272 CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE); 273 274 // Set up Mat 275 userO->comm = rp->comm; 276 userO->dm = dm; 277 userO->Xloc = Xloc; 278 ierr = VecDuplicate(Xloc, &userO->Yloc); CHKERRQ(ierr); 279 userO->xceed = ceeddata->xceed; 280 userO->yceed = ceeddata->yceed; 281 userO->op = ceeddata->opapply; 282 userO->ceed = ceed; 283 userO->memtype = rp->memtyperequested; 284 if (rp->memtyperequested == CEED_MEM_HOST) { 285 userO->VecGetArray = VecGetArray; 286 userO->VecGetArrayRead = VecGetArrayRead; 287 userO->VecRestoreArray = VecRestoreArray; 288 userO->VecRestoreArrayRead = VecRestoreArrayRead; 289 } else { 290 userO->VecGetArray = VecCUDAGetArray; 291 userO->VecGetArrayRead = VecCUDAGetArrayRead; 292 userO->VecRestoreArray = VecCUDARestoreArray; 293 userO->VecRestoreArrayRead = VecCUDARestoreArrayRead; 294 } 295 296 ierr = KSPCreate(rp->comm, &ksp); CHKERRQ(ierr); 297 { 298 PC pc; 299 ierr = KSPGetPC(ksp, &pc); CHKERRQ(ierr); 300 if (rp->bpchoice == CEED_BP1 || rp->bpchoice == CEED_BP2) { 301 ierr = PCSetType(pc, PCJACOBI); CHKERRQ(ierr); 302 ierr = PCJacobiSetType(pc, PC_JACOBI_ROWSUM); CHKERRQ(ierr); 303 } else { 304 ierr = PCSetType(pc, PCNONE); CHKERRQ(ierr); 305 } 306 ierr = KSPSetType(ksp, KSPCG); CHKERRQ(ierr); 307 ierr = KSPSetNormType(ksp, KSP_NORM_NATURAL); CHKERRQ(ierr); 308 ierr = KSPSetTolerances(ksp, 1e-10, PETSC_DEFAULT, PETSC_DEFAULT, 309 PETSC_DEFAULT); CHKERRQ(ierr); 310 } 311 ierr = KSPSetOperators(ksp, matO, matO); CHKERRQ(ierr); 312 313 // First run, if benchmarking 314 if (rp->benchmark_mode) { 315 ierr = KSPSetTolerances(ksp, 1e-10, PETSC_DEFAULT, PETSC_DEFAULT, 1); 316 CHKERRQ(ierr); 317 my_rt_start = MPI_Wtime(); 318 ierr = KSPSolve(ksp, rhs, X); CHKERRQ(ierr); 319 my_rt = MPI_Wtime() - my_rt_start; 320 ierr = MPI_Allreduce(MPI_IN_PLACE, &my_rt, 1, MPI_DOUBLE, MPI_MIN, rp->comm); 321 CHKERRQ(ierr); 322 // Set maxits based on first iteration timing 323 if (my_rt > 0.02) { 324 ierr = KSPSetTolerances(ksp, 1e-10, PETSC_DEFAULT, PETSC_DEFAULT, 5); 325 CHKERRQ(ierr); 326 } else { 327 ierr = KSPSetTolerances(ksp, 1e-10, PETSC_DEFAULT, PETSC_DEFAULT, 20); 328 CHKERRQ(ierr); 329 } 330 } 331 ierr = KSPSetFromOptions(ksp); CHKERRQ(ierr); 332 333 // Timed solve 334 ierr = VecZeroEntries(X); CHKERRQ(ierr); 335 ierr = PetscBarrier((PetscObject)ksp); CHKERRQ(ierr); 336 337 // -- Performance logging 338 ierr = PetscLogStagePush(rp->solvestage); CHKERRQ(ierr); 339 340 // -- Solve 341 my_rt_start = MPI_Wtime(); 342 ierr = KSPSolve(ksp, rhs, X); CHKERRQ(ierr); 343 my_rt = MPI_Wtime() - my_rt_start; 344 345 // -- Performance logging 346 ierr = PetscLogStagePop(); 347 348 // Output results 349 { 350 KSPType ksptype; 351 KSPConvergedReason reason; 352 PetscReal rnorm; 353 PetscInt its; 354 ierr = KSPGetType(ksp, &ksptype); CHKERRQ(ierr); 355 ierr = KSPGetConvergedReason(ksp, &reason); CHKERRQ(ierr); 356 ierr = KSPGetIterationNumber(ksp, &its); CHKERRQ(ierr); 357 ierr = KSPGetResidualNorm(ksp, &rnorm); CHKERRQ(ierr); 358 if (!rp->test_mode || reason < 0 || rnorm > 1e-8) { 359 ierr = PetscPrintf(rp->comm, 360 " KSP:\n" 361 " KSP Type : %s\n" 362 " KSP Convergence : %s\n" 363 " Total KSP Iterations : %D\n" 364 " Final rnorm : %e\n", 365 ksptype, KSPConvergedReasons[reason], its, 366 (double)rnorm); CHKERRQ(ierr); 367 } 368 if (!rp->test_mode) { 369 ierr = PetscPrintf(rp->comm," Performance:\n"); CHKERRQ(ierr); 370 } 371 { 372 PetscReal maxerror; 373 ierr = ComputeErrorMax(userO, operror, X, target, &maxerror); 374 CHKERRQ(ierr); 375 PetscReal tol = 5e-2; 376 if (!rp->test_mode || maxerror > tol) { 377 ierr = MPI_Allreduce(&my_rt, &rt_min, 1, MPI_DOUBLE, MPI_MIN, rp->comm); 378 CHKERRQ(ierr); 379 ierr = MPI_Allreduce(&my_rt, &rt_max, 1, MPI_DOUBLE, MPI_MAX, rp->comm); 380 CHKERRQ(ierr); 381 ierr = PetscPrintf(rp->comm, 382 " Pointwise Error (max) : %e\n" 383 " CG Solve Time : %g (%g) sec\n", 384 (double)maxerror, rt_max, rt_min); CHKERRQ(ierr); 385 } 386 } 387 if (rp->benchmark_mode && (!rp->test_mode)) { 388 ierr = PetscPrintf(rp->comm, 389 " DoFs/Sec in CG : %g (%g) million\n", 390 1e-6*gsize*its/rt_max, 391 1e-6*gsize*its/rt_min); CHKERRQ(ierr); 392 } 393 } 394 395 if (rp->write_solution) { 396 PetscViewer vtkviewersoln; 397 398 ierr = PetscViewerCreate(rp->comm, &vtkviewersoln); CHKERRQ(ierr); 399 ierr = PetscViewerSetType(vtkviewersoln, PETSCVIEWERVTK); CHKERRQ(ierr); 400 ierr = PetscViewerFileSetName(vtkviewersoln, "solution.vtk"); CHKERRQ(ierr); 401 ierr = VecView(X, vtkviewersoln); CHKERRQ(ierr); 402 ierr = PetscViewerDestroy(&vtkviewersoln); CHKERRQ(ierr); 403 } 404 405 // Cleanup 406 ierr = VecDestroy(&X); CHKERRQ(ierr); 407 ierr = VecDestroy(&Xloc); CHKERRQ(ierr); 408 ierr = VecDestroy(&userO->Yloc); CHKERRQ(ierr); 409 ierr = MatDestroy(&matO); CHKERRQ(ierr); 410 ierr = PetscFree(userO); CHKERRQ(ierr); 411 ierr = CeedDataDestroy(0, ceeddata); CHKERRQ(ierr); 412 ierr = DMDestroy(&dm); CHKERRQ(ierr); 413 414 ierr = VecDestroy(&rhs); CHKERRQ(ierr); 415 ierr = VecDestroy(&rhsloc); CHKERRQ(ierr); 416 ierr = KSPDestroy(&ksp); CHKERRQ(ierr); 417 CeedVectorDestroy(&target); 418 CeedQFunctionDestroy(&qferror); 419 CeedOperatorDestroy(&operror); 420 CeedDestroy(&ceed); 421 PetscFunctionReturn(0); 422 } 423 424 int main(int argc, char **argv) { 425 PetscInt ierr, commsize; 426 RunParams rp; 427 MPI_Comm comm; 428 PetscLogStage solvestage; 429 char filename[PETSC_MAX_PATH_LEN]; 430 char ceedresource[PETSC_MAX_PATH_LEN] = "/cpu/self"; 431 char hostname[PETSC_MAX_PATH_LEN]; 432 433 PetscInt qextra, dim = 3, melem[3] = {3, 3, 3}, ncompu = 1; 434 PetscInt num_degrees = 30, degree[30] = {}, num_localnodes = 2, localnodes[2] = {}; 435 PetscMPIInt rankspernode; 436 PetscBool test_mode, benchmark_mode, read_mesh, write_solution, 437 userlnodes = PETSC_FALSE, degree_set; 438 CeedMemType memtyperequested; 439 bpType bpchoice; 440 441 // Check PETSc CUDA support 442 PetscBool petschavecuda, setmemtyperequest = PETSC_FALSE; 443 // *INDENT-OFF* 444 #ifdef PETSC_HAVE_CUDA 445 petschavecuda = PETSC_TRUE; 446 #else 447 petschavecuda = PETSC_FALSE; 448 #endif 449 // *INDENT-ON* 450 451 // Initialize PETSc 452 ierr = PetscInitialize(&argc, &argv, NULL, help); 453 if (ierr) return ierr; 454 comm = PETSC_COMM_WORLD; 455 ierr = MPI_Comm_size(comm, &commsize); 456 if (ierr != MPI_SUCCESS) return ierr; 457 #if defined(PETSC_HAVE_MPI_PROCESS_SHARED_MEMORY) 458 { 459 MPI_Comm splitcomm; 460 ierr = MPI_Comm_split_type(comm, MPI_COMM_TYPE_SHARED, 0, MPI_INFO_NULL, 461 &splitcomm); 462 CHKERRQ(ierr); 463 ierr = MPI_Comm_size(splitcomm, &rankspernode); CHKERRQ(ierr); 464 ierr = MPI_Comm_free(&splitcomm); CHKERRQ(ierr); 465 } 466 #else 467 rankspernode = -1; // Unknown 468 #endif 469 470 // Read command line options 471 ierr = PetscOptionsBegin(comm, NULL, "CEED BPs in PETSc", NULL); 472 CHKERRQ(ierr); 473 bpchoice = CEED_BP1; 474 ierr = PetscOptionsEnum("-problem", "CEED benchmark problem to solve", NULL, 475 bpTypes, (PetscEnum)bpchoice, (PetscEnum *)&bpchoice, 476 NULL); CHKERRQ(ierr); 477 ncompu = bpOptions[bpchoice].ncompu; 478 test_mode = PETSC_FALSE; 479 ierr = PetscOptionsBool("-test", 480 "Testing mode (do not print unless error is large)", 481 NULL, test_mode, &test_mode, NULL); CHKERRQ(ierr); 482 benchmark_mode = PETSC_FALSE; 483 ierr = PetscOptionsBool("-benchmark", 484 "Benchmarking mode (prints benchmark statistics)", 485 NULL, benchmark_mode, &benchmark_mode, NULL); 486 CHKERRQ(ierr); 487 write_solution = PETSC_FALSE; 488 ierr = PetscOptionsBool("-write_solution", "Write solution for visualization", 489 NULL, write_solution, &write_solution, NULL); 490 CHKERRQ(ierr); 491 degree[0] = test_mode ? 3 : 2; 492 ierr = PetscOptionsIntArray("-degree", 493 "Polynomial degree of tensor product basis", NULL, 494 degree, &num_degrees, °ree_set); CHKERRQ(ierr); 495 if (!degree_set) 496 num_degrees = 1; 497 qextra = bpOptions[bpchoice].qextra; 498 ierr = PetscOptionsInt("-qextra", "Number of extra quadrature points", NULL, 499 qextra, &qextra, NULL); CHKERRQ(ierr); 500 ierr = PetscOptionsString("-ceed", "CEED resource specifier", NULL, 501 ceedresource, 502 ceedresource, sizeof(ceedresource), NULL); CHKERRQ(ierr); 503 ierr = PetscGetHostName(hostname, sizeof hostname); CHKERRQ(ierr); 504 ierr = PetscOptionsString("-hostname", "Hostname for output", NULL, hostname, 505 hostname, sizeof(hostname), NULL); CHKERRQ(ierr); 506 read_mesh = PETSC_FALSE; 507 ierr = PetscOptionsString("-mesh", "Read mesh from file", NULL, filename, 508 filename, sizeof(filename), &read_mesh); 509 CHKERRQ(ierr); 510 if (!read_mesh) { 511 PetscInt tmp = dim; 512 ierr = PetscOptionsIntArray("-cells", "Number of cells per dimension", NULL, 513 melem, &tmp, NULL); CHKERRQ(ierr); 514 } 515 memtyperequested = petschavecuda ? CEED_MEM_DEVICE : CEED_MEM_HOST; 516 ierr = PetscOptionsEnum("-memtype", "CEED MemType requested", NULL, memTypes, 517 (PetscEnum)memtyperequested, 518 (PetscEnum *)&memtyperequested, &setmemtyperequest); 519 CHKERRQ(ierr); 520 localnodes[0] = 1000; 521 ierr = PetscOptionsIntArray("-local_nodes", 522 "Target number of locally owned nodes per " 523 "process (single value or min,max)", 524 NULL, localnodes, &num_localnodes, &userlnodes); 525 CHKERRQ(ierr); 526 if (num_localnodes < 2) 527 localnodes[1] = 2 * localnodes[0]; 528 if (benchmark_mode && !degree_set) { 529 PetscInt maxdegree = 8; 530 ierr = PetscOptionsInt("-max_degree", "Max degree to run in benchmark mode", 531 NULL, maxdegree, &maxdegree, NULL); 532 CHKERRQ(ierr); 533 for (PetscInt i = 0; i < maxdegree; i++) 534 degree[i] = i + 1; 535 num_degrees = maxdegree; 536 } 537 { 538 PetscBool flg; 539 PetscInt p = rankspernode; 540 ierr = PetscOptionsInt("-p", "Number of MPI ranks per node", NULL, 541 p, &p, &flg); 542 CHKERRQ(ierr); 543 if (flg) rankspernode = p; 544 } 545 546 ierr = PetscOptionsEnd(); 547 CHKERRQ(ierr); 548 549 // Register PETSc logging stage 550 ierr = PetscLogStageRegister("Solve Stage", &solvestage); 551 CHKERRQ(ierr); 552 553 // Setup all parameters needed in Run() 554 ierr = PetscMalloc1(1, &rp); 555 CHKERRQ(ierr); 556 rp->comm = comm; 557 rp->test_mode = test_mode; 558 rp->benchmark_mode = benchmark_mode; 559 rp->read_mesh = read_mesh; 560 rp->userlnodes = userlnodes; 561 rp->setmemtyperequest = setmemtyperequest; 562 rp->petschavecuda = petschavecuda; 563 rp->write_solution = write_solution; 564 rp->filename = filename; 565 rp->ceedresource = ceedresource; 566 rp->hostname = hostname; 567 rp->qextra = qextra; 568 rp->dim = dim; 569 rp->ncompu = ncompu; 570 rp->melem = melem; 571 rp->bpchoice = bpchoice; 572 rp->memtyperequested = memtyperequested; 573 rp->solvestage = solvestage; 574 rp->rankspernode = rankspernode; 575 576 for (PetscInt d = 0; d < num_degrees; d++) { 577 PetscInt deg = degree[d]; 578 for (PetscInt n = localnodes[0]; n < localnodes[1]; n *= 2) { 579 rp->degree = deg; 580 rp->localnodes = n; 581 ierr = Run(rp); CHKERRQ(ierr); 582 } 583 } 584 585 // Clear memory 586 ierr = PetscFree(rp); CHKERRQ(ierr); 587 588 return PetscFinalize(); 589 } 590