1 // Copyright (c) 2017, Lawrence Livermore National Security, LLC. Produced at 2 // the Lawrence Livermore National Laboratory. LLNL-CODE-734707. All Rights 3 // reserved. See files LICENSE and NOTICE for details. 4 // 5 // This file is part of CEED, a collection of benchmarks, miniapps, software 6 // libraries and APIs for efficient high-order finite element and spectral 7 // element discretizations for exascale applications. For more information and 8 // source code availability see http://github.com/ceed. 9 // 10 // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC, 11 // a collaborative effort of two U.S. Department of Energy organizations (Office 12 // of Science and the National Nuclear Security Administration) responsible for 13 // the planning and preparation of a capable exascale ecosystem, including 14 // software, applications, hardware, advanced system engineering and early 15 // testbed platforms, in support of the nation's exascale computing imperative. 16 17 // libCEED + PETSc Example: CEED BPs 18 // 19 // This example demonstrates a simple usage of libCEED with PETSc to solve the 20 // CEED BP benchmark problems, see http://ceed.exascaleproject.org/bps. 21 // 22 // The code uses higher level communication protocols in DMPlex. 23 // 24 // Build with: 25 // 26 // make bps [PETSC_DIR=</path/to/petsc>] [CEED_DIR=</path/to/libceed>] 27 // 28 // Sample runs: 29 // 30 // ./bps -problem bp1 -degree 3 31 // ./bps -problem bp2 -ceed /cpu/self -degree 3 32 // ./bps -problem bp3 -ceed /gpu/occa -degree 3 33 // ./bps -problem bp4 -ceed /cpu/occa -degree 3 34 // ./bps -problem bp5 -ceed /omp/occa -degree 3 35 // ./bps -problem bp6 -ceed /ocl/occa -degree 3 36 // 37 //TESTARGS -ceed {ceed_resource} -test -problem bp5 -degree 3 38 39 /// @file 40 /// CEED BPs example using PETSc with DMPlex 41 /// See bpsraw.c for a "raw" implementation using a structured grid. 42 const char help[] = "Solve CEED BPs using PETSc with DMPlex\n"; 43 44 #include <stdbool.h> 45 #include <string.h> 46 #include <petscksp.h> 47 #include <petscdmplex.h> 48 #include <ceed.h> 49 #include "setup.h" 50 51 // ----------------------------------------------------------------------------- 52 // Utilities 53 // ----------------------------------------------------------------------------- 54 55 // Utility function, compute three factors of an integer 56 static void Split3(PetscInt size, PetscInt m[3], bool reverse) { 57 for (PetscInt d=0,sizeleft=size; d<3; d++) { 58 PetscInt try = (PetscInt)PetscCeilReal(PetscPowReal(sizeleft, 1./(3 - d))); 59 while (try * (sizeleft / try) != sizeleft) try++; 60 m[reverse ? 2-d : d] = try; 61 sizeleft /= try; 62 } 63 } 64 65 static int Max3(const PetscInt a[3]) { 66 return PetscMax(a[0], PetscMax(a[1], a[2])); 67 } 68 69 static int Min3(const PetscInt a[3]) { 70 return PetscMin(a[0], PetscMin(a[1], a[2])); 71 } 72 73 // ----------------------------------------------------------------------------- 74 // Parameter structure for running problems 75 // ----------------------------------------------------------------------------- 76 typedef struct RunParams_ *RunParams; 77 struct RunParams_ { 78 MPI_Comm comm; 79 PetscBool test_mode, read_mesh, userlnodes, setmemtyperequest, 80 petschavecuda, write_solution; 81 char *filename, *hostname; 82 PetscInt localnodes, degree, qextra, dim, ncompu, *melem; 83 PetscInt ksp_max_it_clip[2]; 84 PetscMPIInt rankspernode; 85 bpType bpchoice; 86 CeedMemType memtyperequested; 87 PetscLogStage solvestage; 88 }; 89 90 // ----------------------------------------------------------------------------- 91 // Main body of program, called in a loop for performance benchmarking purposes 92 // ----------------------------------------------------------------------------- 93 static PetscErrorCode RunWithDM(RunParams rp, DM dm, 94 const char *ceedresource) { 95 PetscErrorCode ierr; 96 double my_rt_start, my_rt, rt_min, rt_max; 97 PetscInt xlsize, lsize, gsize; 98 PetscScalar *r; 99 Vec X, Xloc, rhs, rhsloc; 100 Mat matO; 101 KSP ksp; 102 UserO userO; 103 Ceed ceed; 104 CeedData ceeddata; 105 CeedQFunction qferror; 106 CeedOperator operror; 107 CeedVector rhsceed, target; 108 109 PetscFunctionBeginUser; 110 // Set up libCEED 111 CeedInit(ceedresource, &ceed); 112 CeedMemType memtypebackend; 113 CeedGetPreferredMemType(ceed, &memtypebackend); 114 115 // Check memtype compatibility 116 if (!rp->setmemtyperequest) 117 rp->memtyperequested = memtypebackend; 118 else if (!rp->petschavecuda && rp->memtyperequested == CEED_MEM_DEVICE) 119 SETERRQ1(PETSC_COMM_WORLD, PETSC_ERR_SUP_SYS, 120 "PETSc was not built with CUDA. " 121 "Requested MemType CEED_MEM_DEVICE is not supported.", NULL); 122 123 // Create vectors 124 if (rp->memtyperequested == CEED_MEM_DEVICE) { 125 ierr = DMSetVecType(dm, VECCUDA); CHKERRQ(ierr); 126 } 127 ierr = DMCreateGlobalVector(dm, &X); CHKERRQ(ierr); 128 ierr = VecGetLocalSize(X, &lsize); CHKERRQ(ierr); 129 ierr = VecGetSize(X, &gsize); CHKERRQ(ierr); 130 ierr = DMCreateLocalVector(dm, &Xloc); CHKERRQ(ierr); 131 ierr = VecGetSize(Xloc, &xlsize); CHKERRQ(ierr); 132 ierr = VecDuplicate(X, &rhs); CHKERRQ(ierr); 133 134 // Operator 135 ierr = PetscMalloc1(1, &userO); CHKERRQ(ierr); 136 ierr = MatCreateShell(rp->comm, lsize, lsize, gsize, gsize, 137 userO, &matO); CHKERRQ(ierr); 138 ierr = MatShellSetOperation(matO, MATOP_MULT, 139 (void(*)(void))MatMult_Ceed); CHKERRQ(ierr); 140 ierr = MatShellSetOperation(matO, MATOP_GET_DIAGONAL, 141 (void(*)(void))MatGetDiag); CHKERRQ(ierr); 142 if (rp->memtyperequested == CEED_MEM_DEVICE) { 143 ierr = MatShellSetVecType(matO, VECCUDA); CHKERRQ(ierr); 144 } 145 146 // Print summary 147 if (!rp->test_mode) { 148 PetscInt P = rp->degree + 1, Q = P + rp->qextra; 149 150 const char *usedresource; 151 CeedGetResource(ceed, &usedresource); 152 153 VecType vectype; 154 ierr = VecGetType(X, &vectype); CHKERRQ(ierr); 155 156 PetscInt cStart, cEnd; 157 ierr = DMPlexGetHeightStratum(dm, 0, &cStart, &cEnd); CHKERRQ(ierr); 158 PetscMPIInt comm_size; 159 ierr = MPI_Comm_size(rp->comm, &comm_size); CHKERRQ(ierr); 160 ierr = PetscPrintf(rp->comm, 161 "\n-- CEED Benchmark Problem %d -- libCEED + PETSc --\n" 162 " MPI:\n" 163 " Hostname : %s\n" 164 " Total ranks : %d\n" 165 " Ranks per compute node : %d\n" 166 " PETSc:\n" 167 " PETSc Vec Type : %s\n" 168 " libCEED:\n" 169 " libCEED Backend : %s\n" 170 " libCEED Backend MemType : %s\n" 171 " libCEED User Requested MemType : %s\n" 172 " Mesh:\n" 173 " Number of 1D Basis Nodes (P) : %d\n" 174 " Number of 1D Quadrature Points (Q) : %d\n" 175 " Global nodes : %D\n" 176 " Local Elements : %D\n" 177 " Owned nodes : %D\n" 178 " DoF per node : %D\n", 179 rp->bpchoice+1, 180 rp->hostname, 181 comm_size, rp->rankspernode, 182 vectype, usedresource, 183 CeedMemTypes[memtypebackend], 184 (rp->setmemtyperequest) ? 185 CeedMemTypes[rp->memtyperequested] : "none", 186 P, Q, gsize/rp->ncompu, cEnd - cStart, lsize/rp->ncompu, 187 rp->ncompu); 188 CHKERRQ(ierr); 189 } 190 191 // Create RHS vector 192 ierr = VecDuplicate(Xloc, &rhsloc); CHKERRQ(ierr); 193 ierr = VecZeroEntries(rhsloc); CHKERRQ(ierr); 194 if (rp->memtyperequested == CEED_MEM_HOST) { 195 ierr = VecGetArray(rhsloc, &r); CHKERRQ(ierr); 196 } else { 197 ierr = VecCUDAGetArray(rhsloc, &r); CHKERRQ(ierr); 198 } 199 CeedVectorCreate(ceed, xlsize, &rhsceed); 200 CeedVectorSetArray(rhsceed, rp->memtyperequested, CEED_USE_POINTER, r); 201 202 ierr = PetscMalloc1(1, &ceeddata); CHKERRQ(ierr); 203 ierr = SetupLibceedByDegree(dm, ceed, rp->degree, rp->dim, rp->qextra, 204 rp->ncompu, gsize, xlsize, rp->bpchoice, ceeddata, 205 true, rhsceed, &target); CHKERRQ(ierr); 206 207 // Gather RHS 208 CeedVectorSyncArray(rhsceed, rp->memtyperequested); 209 if (rp->memtyperequested == CEED_MEM_HOST) { 210 ierr = VecRestoreArray(rhsloc, &r); CHKERRQ(ierr); 211 } else { 212 ierr = VecCUDARestoreArray(rhsloc, &r); CHKERRQ(ierr); 213 } 214 ierr = VecZeroEntries(rhs); CHKERRQ(ierr); 215 ierr = DMLocalToGlobalBegin(dm, rhsloc, ADD_VALUES, rhs); CHKERRQ(ierr); 216 ierr = DMLocalToGlobalEnd(dm, rhsloc, ADD_VALUES, rhs); CHKERRQ(ierr); 217 CeedVectorDestroy(&rhsceed); 218 219 // Create the error Q-function 220 CeedQFunctionCreateInterior(ceed, 1, bpOptions[rp->bpchoice].error, 221 bpOptions[rp->bpchoice].errorfname, &qferror); 222 CeedQFunctionAddInput(qferror, "u", rp->ncompu, CEED_EVAL_INTERP); 223 CeedQFunctionAddInput(qferror, "true_soln", rp->ncompu, CEED_EVAL_NONE); 224 CeedQFunctionAddOutput(qferror, "error", rp->ncompu, CEED_EVAL_NONE); 225 226 // Create the error operator 227 CeedOperatorCreate(ceed, qferror, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, 228 &operror); 229 CeedOperatorSetField(operror, "u", ceeddata->Erestrictu, 230 ceeddata->basisu, CEED_VECTOR_ACTIVE); 231 CeedOperatorSetField(operror, "true_soln", ceeddata->Erestrictui, 232 CEED_BASIS_COLLOCATED, target); 233 CeedOperatorSetField(operror, "error", ceeddata->Erestrictui, 234 CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE); 235 236 // Set up Mat 237 userO->comm = rp->comm; 238 userO->dm = dm; 239 userO->Xloc = Xloc; 240 ierr = VecDuplicate(Xloc, &userO->Yloc); CHKERRQ(ierr); 241 userO->xceed = ceeddata->xceed; 242 userO->yceed = ceeddata->yceed; 243 userO->op = ceeddata->opapply; 244 userO->ceed = ceed; 245 userO->memtype = rp->memtyperequested; 246 if (rp->memtyperequested == CEED_MEM_HOST) { 247 userO->VecGetArray = VecGetArray; 248 userO->VecGetArrayRead = VecGetArrayRead; 249 userO->VecRestoreArray = VecRestoreArray; 250 userO->VecRestoreArrayRead = VecRestoreArrayRead; 251 } else { 252 userO->VecGetArray = VecCUDAGetArray; 253 userO->VecGetArrayRead = VecCUDAGetArrayRead; 254 userO->VecRestoreArray = VecCUDARestoreArray; 255 userO->VecRestoreArrayRead = VecCUDARestoreArrayRead; 256 } 257 258 ierr = KSPCreate(rp->comm, &ksp); CHKERRQ(ierr); 259 { 260 PC pc; 261 ierr = KSPGetPC(ksp, &pc); CHKERRQ(ierr); 262 if (rp->bpchoice == CEED_BP1 || rp->bpchoice == CEED_BP2) { 263 ierr = PCSetType(pc, PCJACOBI); CHKERRQ(ierr); 264 ierr = PCJacobiSetType(pc, PC_JACOBI_ROWSUM); CHKERRQ(ierr); 265 } else { 266 ierr = PCSetType(pc, PCNONE); CHKERRQ(ierr); 267 } 268 ierr = KSPSetType(ksp, KSPCG); CHKERRQ(ierr); 269 ierr = KSPSetNormType(ksp, KSP_NORM_NATURAL); CHKERRQ(ierr); 270 ierr = KSPSetTolerances(ksp, 1e-10, PETSC_DEFAULT, PETSC_DEFAULT, 271 PETSC_DEFAULT); CHKERRQ(ierr); 272 } 273 ierr = KSPSetOperators(ksp, matO, matO); CHKERRQ(ierr); 274 275 // First run's performance log is not considered for benchmarking purposes 276 ierr = KSPSetTolerances(ksp, 1e-10, PETSC_DEFAULT, PETSC_DEFAULT, 1); 277 CHKERRQ(ierr); 278 my_rt_start = MPI_Wtime(); 279 ierr = KSPSolve(ksp, rhs, X); CHKERRQ(ierr); 280 my_rt = MPI_Wtime() - my_rt_start; 281 ierr = MPI_Allreduce(MPI_IN_PLACE, &my_rt, 1, MPI_DOUBLE, MPI_MIN, rp->comm); 282 CHKERRQ(ierr); 283 // Set maxits based on first iteration timing 284 if (my_rt > 0.02) { 285 ierr = KSPSetTolerances(ksp, 1e-10, PETSC_DEFAULT, PETSC_DEFAULT, 286 rp->ksp_max_it_clip[0]); 287 CHKERRQ(ierr); 288 } else { 289 ierr = KSPSetTolerances(ksp, 1e-10, PETSC_DEFAULT, PETSC_DEFAULT, 290 rp->ksp_max_it_clip[1]); 291 CHKERRQ(ierr); 292 } 293 ierr = KSPSetFromOptions(ksp); CHKERRQ(ierr); 294 295 // Timed solve 296 ierr = VecZeroEntries(X); CHKERRQ(ierr); 297 ierr = PetscBarrier((PetscObject)ksp); CHKERRQ(ierr); 298 299 // -- Performance logging 300 ierr = PetscLogStagePush(rp->solvestage); CHKERRQ(ierr); 301 302 // -- Solve 303 my_rt_start = MPI_Wtime(); 304 ierr = KSPSolve(ksp, rhs, X); CHKERRQ(ierr); 305 my_rt = MPI_Wtime() - my_rt_start; 306 307 // -- Performance logging 308 ierr = PetscLogStagePop(); 309 310 // Output results 311 { 312 KSPType ksptype; 313 KSPConvergedReason reason; 314 PetscReal rnorm; 315 PetscInt its; 316 ierr = KSPGetType(ksp, &ksptype); CHKERRQ(ierr); 317 ierr = KSPGetConvergedReason(ksp, &reason); CHKERRQ(ierr); 318 ierr = KSPGetIterationNumber(ksp, &its); CHKERRQ(ierr); 319 ierr = KSPGetResidualNorm(ksp, &rnorm); CHKERRQ(ierr); 320 if (!rp->test_mode || reason < 0 || rnorm > 1e-8) { 321 ierr = PetscPrintf(rp->comm, 322 " KSP:\n" 323 " KSP Type : %s\n" 324 " KSP Convergence : %s\n" 325 " Total KSP Iterations : %D\n" 326 " Final rnorm : %e\n", 327 ksptype, KSPConvergedReasons[reason], its, 328 (double)rnorm); CHKERRQ(ierr); 329 } 330 if (!rp->test_mode) { 331 ierr = PetscPrintf(rp->comm," Performance:\n"); CHKERRQ(ierr); 332 } 333 { 334 PetscReal maxerror; 335 ierr = ComputeErrorMax(userO, operror, X, target, &maxerror); 336 CHKERRQ(ierr); 337 PetscReal tol = 5e-2; 338 if (!rp->test_mode || maxerror > tol) { 339 ierr = MPI_Allreduce(&my_rt, &rt_min, 1, MPI_DOUBLE, MPI_MIN, rp->comm); 340 CHKERRQ(ierr); 341 ierr = MPI_Allreduce(&my_rt, &rt_max, 1, MPI_DOUBLE, MPI_MAX, rp->comm); 342 CHKERRQ(ierr); 343 ierr = PetscPrintf(rp->comm, 344 " Pointwise Error (max) : %e\n" 345 " CG Solve Time : %g (%g) sec\n", 346 (double)maxerror, rt_max, rt_min); CHKERRQ(ierr); 347 } 348 } 349 if (!rp->test_mode) { 350 ierr = PetscPrintf(rp->comm, 351 " DoFs/Sec in CG : %g (%g) million\n", 352 1e-6*gsize*its/rt_max, 353 1e-6*gsize*its/rt_min); CHKERRQ(ierr); 354 } 355 } 356 357 if (rp->write_solution) { 358 PetscViewer vtkviewersoln; 359 360 ierr = PetscViewerCreate(rp->comm, &vtkviewersoln); CHKERRQ(ierr); 361 ierr = PetscViewerSetType(vtkviewersoln, PETSCVIEWERVTK); CHKERRQ(ierr); 362 ierr = PetscViewerFileSetName(vtkviewersoln, "solution.vtk"); CHKERRQ(ierr); 363 ierr = VecView(X, vtkviewersoln); CHKERRQ(ierr); 364 ierr = PetscViewerDestroy(&vtkviewersoln); CHKERRQ(ierr); 365 } 366 367 // Cleanup 368 ierr = VecDestroy(&X); CHKERRQ(ierr); 369 ierr = VecDestroy(&Xloc); CHKERRQ(ierr); 370 ierr = VecDestroy(&userO->Yloc); CHKERRQ(ierr); 371 ierr = MatDestroy(&matO); CHKERRQ(ierr); 372 ierr = PetscFree(userO); CHKERRQ(ierr); 373 ierr = CeedDataDestroy(0, ceeddata); CHKERRQ(ierr); 374 375 ierr = VecDestroy(&rhs); CHKERRQ(ierr); 376 ierr = VecDestroy(&rhsloc); CHKERRQ(ierr); 377 ierr = KSPDestroy(&ksp); CHKERRQ(ierr); 378 CeedVectorDestroy(&target); 379 CeedQFunctionDestroy(&qferror); 380 CeedOperatorDestroy(&operror); 381 CeedDestroy(&ceed); 382 PetscFunctionReturn(0); 383 } 384 385 static PetscErrorCode Run(RunParams rp, 386 PetscInt num_resources, char *const *ceedresources, 387 PetscInt num_bpchoices, const bpType *bpchoices) { 388 PetscInt ierr; 389 DM dm; 390 391 PetscFunctionBeginUser; 392 // Setup DM 393 if (rp->read_mesh) { 394 ierr = DMPlexCreateFromFile(PETSC_COMM_WORLD, rp->filename, PETSC_TRUE, &dm); 395 CHKERRQ(ierr); 396 } else { 397 if (rp->userlnodes) { 398 // Find a nicely composite number of elements no less than global nodes 399 PetscMPIInt size; 400 ierr = MPI_Comm_size(rp->comm, &size); CHKERRQ(ierr); 401 for (PetscInt gelem = 402 PetscMax(1, size * rp->localnodes / PetscPowInt(rp->degree, rp->dim)); 403 ; 404 gelem++) { 405 Split3(gelem, rp->melem, true); 406 if (Max3(rp->melem) / Min3(rp->melem) <= 2) break; 407 } 408 } 409 ierr = DMPlexCreateBoxMesh(PETSC_COMM_WORLD, rp->dim, PETSC_FALSE, rp->melem, 410 NULL, NULL, NULL, PETSC_TRUE, &dm); CHKERRQ(ierr); 411 } 412 413 { 414 DM dmDist = NULL; 415 PetscPartitioner part; 416 417 ierr = DMPlexGetPartitioner(dm, &part); CHKERRQ(ierr); 418 ierr = PetscPartitionerSetFromOptions(part); CHKERRQ(ierr); 419 ierr = DMPlexDistribute(dm, 0, NULL, &dmDist); CHKERRQ(ierr); 420 if (dmDist) { 421 ierr = DMDestroy(&dm); CHKERRQ(ierr); 422 dm = dmDist; 423 } 424 } 425 426 for (PetscInt b = 0; b < num_bpchoices; b++) { 427 DM dm_deg; 428 PetscInt qextra = rp->qextra; 429 rp->bpchoice = bpchoices[b]; 430 rp->ncompu = bpOptions[rp->bpchoice].ncompu; 431 rp->qextra = qextra < 0 ? bpOptions[rp->bpchoice].qextra : qextra; 432 ierr = DMClone(dm, &dm_deg); CHKERRQ(ierr); 433 // Create DM 434 ierr = SetupDMByDegree(dm_deg, rp->degree, rp->ncompu, rp->bpchoice); 435 CHKERRQ(ierr); 436 for (PetscInt r = 0; r < num_resources; r++) { 437 ierr = RunWithDM(rp, dm_deg, ceedresources[r]); CHKERRQ(ierr); 438 } 439 ierr = DMDestroy(&dm_deg); CHKERRQ(ierr); 440 rp->qextra = qextra; 441 } 442 443 ierr = DMDestroy(&dm); CHKERRQ(ierr); 444 PetscFunctionReturn(0); 445 } 446 447 int main(int argc, char **argv) { 448 PetscInt ierr, commsize; 449 RunParams rp; 450 MPI_Comm comm; 451 char filename[PETSC_MAX_PATH_LEN]; 452 char *ceedresources[30]; 453 PetscInt num_ceedresources = 30; 454 char hostname[PETSC_MAX_PATH_LEN]; 455 456 PetscInt dim = 3, melem[3] = {3, 3, 3}; 457 PetscInt num_degrees = 30, degree[30] = {}, num_localnodes = 2, localnodes[2] = {}; 458 PetscMPIInt rankspernode; 459 PetscBool degree_set; 460 bpType bpchoices[10]; 461 PetscInt num_bpchoices = 10; 462 463 // Check PETSc CUDA support 464 PetscBool petschavecuda; 465 // *INDENT-OFF* 466 #ifdef PETSC_HAVE_CUDA 467 petschavecuda = PETSC_TRUE; 468 #else 469 petschavecuda = PETSC_FALSE; 470 #endif 471 // *INDENT-ON* 472 473 // Initialize PETSc 474 ierr = PetscInitialize(&argc, &argv, NULL, help); 475 if (ierr) return ierr; 476 comm = PETSC_COMM_WORLD; 477 ierr = MPI_Comm_size(comm, &commsize); 478 if (ierr != MPI_SUCCESS) return ierr; 479 #if defined(PETSC_HAVE_MPI_PROCESS_SHARED_MEMORY) 480 { 481 MPI_Comm splitcomm; 482 ierr = MPI_Comm_split_type(comm, MPI_COMM_TYPE_SHARED, 0, MPI_INFO_NULL, 483 &splitcomm); 484 CHKERRQ(ierr); 485 ierr = MPI_Comm_size(splitcomm, &rankspernode); CHKERRQ(ierr); 486 ierr = MPI_Comm_free(&splitcomm); CHKERRQ(ierr); 487 } 488 #else 489 rankspernode = -1; // Unknown 490 #endif 491 492 // Setup all parameters needed in Run() 493 ierr = PetscMalloc1(1, &rp); CHKERRQ(ierr); 494 rp->comm = comm; 495 496 // Read command line options 497 ierr = PetscOptionsBegin(comm, NULL, "CEED BPs in PETSc", NULL); 498 CHKERRQ(ierr); 499 { 500 PetscBool set; 501 ierr = PetscOptionsEnumArray("-problem", "CEED benchmark problem to solve", 502 NULL, 503 bpTypes, (PetscEnum *)bpchoices, &num_bpchoices, &set); 504 CHKERRQ(ierr); 505 if (!set) { 506 bpchoices[0] = CEED_BP1; 507 num_bpchoices = 1; 508 } 509 } 510 rp->test_mode = PETSC_FALSE; 511 ierr = PetscOptionsBool("-test", 512 "Testing mode (do not print unless error is large)", 513 NULL, rp->test_mode, &rp->test_mode, NULL); CHKERRQ(ierr); 514 rp->write_solution = PETSC_FALSE; 515 ierr = PetscOptionsBool("-write_solution", "Write solution for visualization", 516 NULL, rp->write_solution, &rp->write_solution, NULL); 517 CHKERRQ(ierr); 518 degree[0] = rp->test_mode ? 3 : 2; 519 ierr = PetscOptionsIntArray("-degree", 520 "Polynomial degree of tensor product basis", NULL, 521 degree, &num_degrees, °ree_set); CHKERRQ(ierr); 522 if (!degree_set) 523 num_degrees = 1; 524 rp->qextra = PETSC_DECIDE; 525 ierr = PetscOptionsInt("-qextra", 526 "Number of extra quadrature points (-1 for auto)", NULL, 527 rp->qextra, &rp->qextra, NULL); CHKERRQ(ierr); 528 { 529 PetscBool set; 530 ierr = PetscOptionsStringArray("-ceed", 531 "CEED resource specifier (comma-separated list)", NULL, 532 ceedresources, &num_ceedresources, &set); CHKERRQ(ierr); 533 if (!set) { 534 ierr = PetscStrallocpy( "/cpu/self", &ceedresources[0]); CHKERRQ(ierr); 535 num_ceedresources = 1; 536 } 537 } 538 ierr = PetscGetHostName(hostname, sizeof hostname); CHKERRQ(ierr); 539 ierr = PetscOptionsString("-hostname", "Hostname for output", NULL, hostname, 540 hostname, sizeof(hostname), NULL); CHKERRQ(ierr); 541 rp->read_mesh = PETSC_FALSE; 542 ierr = PetscOptionsString("-mesh", "Read mesh from file", NULL, filename, 543 filename, sizeof(filename), &rp->read_mesh); 544 CHKERRQ(ierr); 545 rp->filename = filename; 546 if (!rp->read_mesh) { 547 PetscInt tmp = dim; 548 ierr = PetscOptionsIntArray("-cells", "Number of cells per dimension", NULL, 549 melem, &tmp, NULL); CHKERRQ(ierr); 550 } 551 rp->memtyperequested = petschavecuda ? CEED_MEM_DEVICE : CEED_MEM_HOST; 552 ierr = PetscOptionsEnum("-memtype", "CEED MemType requested", NULL, memTypes, 553 (PetscEnum)rp->memtyperequested, 554 (PetscEnum *)&rp->memtyperequested, &rp->setmemtyperequest); 555 CHKERRQ(ierr); 556 localnodes[0] = 1000; 557 ierr = PetscOptionsIntArray("-local_nodes", 558 "Target number of locally owned nodes per " 559 "process (single value or min,max)", 560 NULL, localnodes, &num_localnodes, &rp->userlnodes); 561 CHKERRQ(ierr); 562 if (num_localnodes < 2) 563 localnodes[1] = 2 * localnodes[0]; 564 { 565 PetscInt two = 2; 566 rp->ksp_max_it_clip[0] = 5; 567 rp->ksp_max_it_clip[1] = 20; 568 ierr = PetscOptionsIntArray("-ksp_max_it_clip", 569 "Min and max number of iterations to use during benchmarking", 570 NULL, rp->ksp_max_it_clip, &two, NULL); CHKERRQ(ierr); 571 } 572 if (!degree_set) { 573 PetscInt maxdegree = 8; 574 ierr = PetscOptionsInt("-max_degree", 575 "Range of degrees [1, maxdegree] to run with", 576 NULL, maxdegree, &maxdegree, NULL); 577 CHKERRQ(ierr); 578 for (PetscInt i = 0; i < maxdegree; i++) 579 degree[i] = i + 1; 580 num_degrees = maxdegree; 581 } 582 { 583 PetscBool flg; 584 PetscInt p = rankspernode; 585 ierr = PetscOptionsInt("-p", "Number of MPI ranks per node", NULL, 586 p, &p, &flg); 587 CHKERRQ(ierr); 588 if (flg) rankspernode = p; 589 } 590 591 ierr = PetscOptionsEnd(); 592 CHKERRQ(ierr); 593 594 // Register PETSc logging stage 595 ierr = PetscLogStageRegister("Solve Stage", &rp->solvestage); 596 CHKERRQ(ierr); 597 598 rp->petschavecuda = petschavecuda; 599 rp->hostname = hostname; 600 rp->dim = dim; 601 rp->melem = melem; 602 rp->rankspernode = rankspernode; 603 604 for (PetscInt d = 0; d < num_degrees; d++) { 605 PetscInt deg = degree[d]; 606 for (PetscInt n = localnodes[0]; n < localnodes[1]; n *= 2) { 607 rp->degree = deg; 608 rp->localnodes = n; 609 ierr = Run(rp, num_ceedresources, ceedresources, 610 num_bpchoices, bpchoices); CHKERRQ(ierr); 611 } 612 } 613 // Clear memory 614 ierr = PetscFree(rp); CHKERRQ(ierr); 615 for (PetscInt i=0; i<num_ceedresources; i++) { 616 ierr = PetscFree(ceedresources[i]); CHKERRQ(ierr); 617 } 618 return PetscFinalize(); 619 } 620