// Copyright (c) 2017, Lawrence Livermore National Security, LLC. Produced at // the Lawrence Livermore National Laboratory. LLNL-CODE-734707. All Rights // reserved. See files LICENSE and NOTICE for details. // // This file is part of CEED, a collection of benchmarks, miniapps, software // libraries and APIs for efficient high-order finite element and spectral // element discretizations for exascale applications. For more information and // source code availability see http://github.com/ceed. // // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC, // a collaborative effort of two U.S. Department of Energy organizations (Office // of Science and the National Nuclear Security Administration) responsible for // the planning and preparation of a capable exascale ecosystem, including // software, applications, hardware, advanced system engineering and early // testbed platforms, in support of the nation's exascale computing imperative. // libCEED + PETSc Example: CEED BPs // // This example demonstrates a simple usage of libCEED with PETSc to solve the // CEED BP benchmark problems, see http://ceed.exascaleproject.org/bps. // // The code is intentionally "raw", using only low-level communication // primitives. // // Build with: // // make bpsraw [PETSC_DIR=] [CEED_DIR=] // // Sample runs: // // ./bpsraw -problem bp1 // ./bpsraw -problem bp2 -ceed /cpu/self // ./bpsraw -problem bp3 -ceed /gpu/occa // ./bpsraw -problem bp4 -ceed /cpu/occa // ./bpsraw -problem bp5 -ceed /omp/occa // ./bpsraw -problem bp6 -ceed /ocl/occa // //TESTARGS -ceed {ceed_resource} -test -problem bp2 -degree 5 -qextra 5 /// @file /// CEED BPs example using PETSc /// See bps.c for an implementation using DMPlex unstructured grids. const char help[] = "Solve CEED BPs using PETSc\n"; #include #include #include #include #include "qfunctions/bps/common.h" #include "qfunctions/bps/bp1.h" #include "qfunctions/bps/bp2.h" #include "qfunctions/bps/bp3.h" #include "qfunctions/bps/bp4.h" static void Split3(PetscInt size, PetscInt m[3], bool reverse) { for (PetscInt d=0,sizeleft=size; d<3; d++) { PetscInt try = (PetscInt)PetscCeilReal(PetscPowReal(sizeleft, 1./(3 - d))); while (try * (sizeleft / try) != sizeleft) try++; m[reverse ? 2-d : d] = try; sizeleft /= try; } } static PetscInt Max3(const PetscInt a[3]) { return PetscMax(a[0], PetscMax(a[1], a[2])); } static PetscInt Min3(const PetscInt a[3]) { return PetscMin(a[0], PetscMin(a[1], a[2])); } static void GlobalNodes(const PetscInt p[3], const PetscInt irank[3], PetscInt degree, const PetscInt melem[3], PetscInt mnodes[3]) { for (int d=0; d<3; d++) mnodes[d] = degree*melem[d] + (irank[d] == p[d]-1); } static PetscInt GlobalStart(const PetscInt p[3], const PetscInt irank[3], PetscInt degree, const PetscInt melem[3]) { PetscInt start = 0; // Dumb brute-force is easier to read for (PetscInt i=0; iltog, X, user->Xloc, INSERT_VALUES, SCATTER_REVERSE); CHKERRQ(ierr); ierr = VecScatterEnd(user->ltog, X, user->Xloc, INSERT_VALUES, SCATTER_REVERSE); CHKERRQ(ierr); ierr = VecZeroEntries(user->Yloc); CHKERRQ(ierr); ierr = VecGetArrayRead(user->Xloc, (const PetscScalar **)&x); CHKERRQ(ierr); ierr = VecGetArray(user->Yloc, &y); CHKERRQ(ierr); CeedVectorSetArray(user->xceed, CEED_MEM_HOST, CEED_USE_POINTER, x); CeedVectorSetArray(user->yceed, CEED_MEM_HOST, CEED_USE_POINTER, y); CeedOperatorApply(user->op, user->xceed, user->yceed, CEED_REQUEST_IMMEDIATE); ierr = CeedVectorSyncArray(user->yceed, CEED_MEM_HOST); CHKERRQ(ierr); ierr = VecRestoreArrayRead(user->Xloc, (const PetscScalar **)&x); CHKERRQ(ierr); ierr = VecRestoreArray(user->Yloc, &y); CHKERRQ(ierr); if (Y) { ierr = VecZeroEntries(Y); CHKERRQ(ierr); ierr = VecScatterBegin(user->ltog, user->Yloc, Y, ADD_VALUES, SCATTER_FORWARD); CHKERRQ(ierr); ierr = VecScatterEnd(user->ltog, user->Yloc, Y, ADD_VALUES, SCATTER_FORWARD); CHKERRQ(ierr); } PetscFunctionReturn(0); } // This function uses libCEED to compute the action of the Laplacian with // Dirichlet boundary conditions static PetscErrorCode MatMult_Diff(Mat A, Vec X, Vec Y) { PetscErrorCode ierr; User user; PetscScalar *x, *y; PetscFunctionBeginUser; ierr = MatShellGetContext(A, &user); CHKERRQ(ierr); // Global-to-local ierr = VecScatterBegin(user->ltog0, X, user->Xloc, INSERT_VALUES, SCATTER_REVERSE); CHKERRQ(ierr); ierr = VecScatterEnd(user->ltog0, X, user->Xloc, INSERT_VALUES, SCATTER_REVERSE); CHKERRQ(ierr); ierr = VecZeroEntries(user->Yloc); CHKERRQ(ierr); // Setup CEED vectors ierr = VecGetArrayRead(user->Xloc, (const PetscScalar **)&x); CHKERRQ(ierr); ierr = VecGetArray(user->Yloc, &y); CHKERRQ(ierr); CeedVectorSetArray(user->xceed, CEED_MEM_HOST, CEED_USE_POINTER, x); CeedVectorSetArray(user->yceed, CEED_MEM_HOST, CEED_USE_POINTER, y); // Apply CEED operator CeedOperatorApply(user->op, user->xceed, user->yceed, CEED_REQUEST_IMMEDIATE); ierr = CeedVectorSyncArray(user->yceed, CEED_MEM_HOST); CHKERRQ(ierr); // Restore PETSc vectors ierr = VecRestoreArrayRead(user->Xloc, (const PetscScalar **)&x); CHKERRQ(ierr); ierr = VecRestoreArray(user->Yloc, &y); CHKERRQ(ierr); // Local-to-global ierr = VecZeroEntries(Y); CHKERRQ(ierr); ierr = VecScatterBegin(user->gtogD, X, Y, INSERT_VALUES, SCATTER_FORWARD); CHKERRQ(ierr); ierr = VecScatterEnd(user->gtogD, X, Y, INSERT_VALUES, SCATTER_FORWARD); CHKERRQ(ierr); ierr = VecScatterBegin(user->ltog0, user->Yloc, Y, ADD_VALUES, SCATTER_FORWARD); CHKERRQ(ierr); ierr = VecScatterEnd(user->ltog0, user->Yloc, Y, ADD_VALUES, SCATTER_FORWARD); CHKERRQ(ierr); PetscFunctionReturn(0); } // This function calculates the error in the final solution static PetscErrorCode ComputeErrorMax(User user, CeedOperator op_error, Vec X, CeedVector target, PetscReal *maxerror) { PetscErrorCode ierr; PetscScalar *x; CeedVector collocated_error; CeedInt length; PetscFunctionBeginUser; CeedVectorGetLength(target, &length); CeedVectorCreate(user->ceed, length, &collocated_error); // Global-to-local ierr = VecScatterBegin(user->ltog, X, user->Xloc, INSERT_VALUES, SCATTER_REVERSE); CHKERRQ(ierr); ierr = VecScatterEnd(user->ltog, X, user->Xloc, INSERT_VALUES, SCATTER_REVERSE); CHKERRQ(ierr); // Setup CEED vector ierr = VecGetArrayRead(user->Xloc, (const PetscScalar **)&x); CHKERRQ(ierr); CeedVectorSetArray(user->xceed, CEED_MEM_HOST, CEED_USE_POINTER, x); // Apply CEED operator CeedOperatorApply(op_error, user->xceed, collocated_error, CEED_REQUEST_IMMEDIATE); // Restore PETSc vector VecRestoreArrayRead(user->Xloc, (const PetscScalar **)&x); CHKERRQ(ierr); // Reduce max error *maxerror = 0; const CeedScalar *e; CeedVectorGetArrayRead(collocated_error, CEED_MEM_HOST, &e); for (CeedInt i=0; icomm); CHKERRQ(ierr); // Cleanup CeedVectorDestroy(&collocated_error); PetscFunctionReturn(0); } int main(int argc, char **argv) { PetscInt ierr; MPI_Comm comm; char ceedresource[PETSC_MAX_PATH_LEN] = "/cpu/self"; double my_rt_start, my_rt, rt_min, rt_max; PetscInt degree, qextra, localnodes, localelem, melem[3], mnodes[3], p[3], irank[3], lnodes[3], lsize, ncompu = 1; PetscScalar *r; PetscBool test_mode, benchmark_mode, write_solution; PetscMPIInt size, rank; Vec X, Xloc, rhs, rhsloc; Mat mat; KSP ksp; VecScatter ltog, ltog0, gtogD; User user; Ceed ceed; CeedBasis basisx, basisu; CeedElemRestriction Erestrictx, Erestrictu, Erestrictxi, Erestrictui, Erestrictqdi; CeedQFunction qf_setupgeo, qf_setuprhs, qf_apply, qf_error; CeedOperator op_setupgeo, op_setuprhs, op_apply, op_error; CeedVector xcoord, qdata, rhsceed, target; CeedInt P, Q; const CeedInt dim = 3, ncompx = 3; bpType bpChoice; ierr = PetscInitialize(&argc, &argv, NULL, help); if (ierr) return ierr; comm = PETSC_COMM_WORLD; // Read command line options ierr = PetscOptionsBegin(comm, NULL, "CEED BPs in PETSc", NULL); CHKERRQ(ierr); bpChoice = CEED_BP1; ierr = PetscOptionsEnum("-problem", "CEED benchmark problem to solve", NULL, bpTypes, (PetscEnum)bpChoice, (PetscEnum *)&bpChoice, NULL); CHKERRQ(ierr); ncompu = bpOptions[bpChoice].ncompu; test_mode = PETSC_FALSE; ierr = PetscOptionsBool("-test", "Testing mode (do not print unless error is large)", NULL, test_mode, &test_mode, NULL); CHKERRQ(ierr); benchmark_mode = PETSC_FALSE; ierr = PetscOptionsBool("-benchmark", "Benchmarking mode (prints benchmark statistics)", NULL, benchmark_mode, &benchmark_mode, NULL); CHKERRQ(ierr); write_solution = PETSC_FALSE; ierr = PetscOptionsBool("-write_solution", "Write solution for visualization", NULL, write_solution, &write_solution, NULL); CHKERRQ(ierr); degree = test_mode ? 3 : 1; ierr = PetscOptionsInt("-degree", "Polynomial degree of tensor product basis", NULL, degree, °ree, NULL); CHKERRQ(ierr); qextra = bpOptions[bpChoice].qextra; ierr = PetscOptionsInt("-qextra", "Number of extra quadrature points", NULL, qextra, &qextra, NULL); CHKERRQ(ierr); ierr = PetscOptionsString("-ceed", "CEED resource specifier", NULL, ceedresource, ceedresource, sizeof(ceedresource), NULL); CHKERRQ(ierr); localnodes = 1000; ierr = PetscOptionsInt("-local", "Target number of locally owned nodes per process", NULL, localnodes, &localnodes, NULL); CHKERRQ(ierr); ierr = PetscOptionsEnd(); CHKERRQ(ierr); P = degree + 1; Q = P + qextra; // Determine size of process grid ierr = MPI_Comm_size(comm, &size); CHKERRQ(ierr); Split3(size, p, false); // Find a nicely composite number of elements no less than localnodes for (localelem = PetscMax(1, localnodes / (degree*degree*degree)); ; localelem++) { Split3(localelem, melem, true); if (Max3(melem) / Min3(melem) <= 2) break; } // Find my location in the process grid ierr = MPI_Comm_rank(comm, &rank); CHKERRQ(ierr); for (int d=0,rankleft=rank; d=mnodes[0], ii=i-ir*mnodes[0], i=mnodes[1], jj=j-jr*mnodes[1], j=mnodes[2], kk=k-kr*mnodes[2], kcomm = comm; user->ltog = ltog; if (bpChoice != CEED_BP1 && bpChoice != CEED_BP2) { user->ltog0 = ltog0; user->gtogD = gtogD; } user->Xloc = Xloc; ierr = VecDuplicate(Xloc, &user->Yloc); CHKERRQ(ierr); CeedVectorCreate(ceed, lsize*ncompu, &user->xceed); CeedVectorCreate(ceed, lsize*ncompu, &user->yceed); user->op = op_apply; user->qdata = qdata; user->ceed = ceed; ierr = MatCreateShell(comm, mnodes[0]*mnodes[1]*mnodes[2]*ncompu, mnodes[0]*mnodes[1]*mnodes[2]*ncompu, PETSC_DECIDE, PETSC_DECIDE, user, &mat); CHKERRQ(ierr); if (bpChoice == CEED_BP1 || bpChoice == CEED_BP2) { ierr = MatShellSetOperation(mat, MATOP_MULT, (void(*)(void))MatMult_Mass); CHKERRQ(ierr); } else { ierr = MatShellSetOperation(mat, MATOP_MULT, (void(*)(void))MatMult_Diff); CHKERRQ(ierr); } ierr = MatCreateVecs(mat, &rhs, NULL); CHKERRQ(ierr); // Get RHS vector ierr = VecDuplicate(Xloc, &rhsloc); CHKERRQ(ierr); ierr = VecZeroEntries(rhsloc); CHKERRQ(ierr); ierr = VecGetArray(rhsloc, &r); CHKERRQ(ierr); CeedVectorSetArray(rhsceed, CEED_MEM_HOST, CEED_USE_POINTER, r); // Setup qdata, rhs, and target CeedOperatorApply(op_setupgeo, xcoord, qdata, CEED_REQUEST_IMMEDIATE); CeedOperatorApply(op_setuprhs, xcoord, rhsceed, CEED_REQUEST_IMMEDIATE); ierr = CeedVectorSyncArray(rhsceed, CEED_MEM_HOST); CHKERRQ(ierr); CeedVectorDestroy(&xcoord); // Gather RHS ierr = VecRestoreArray(rhsloc, &r); CHKERRQ(ierr); ierr = VecZeroEntries(rhs); CHKERRQ(ierr); ierr = VecScatterBegin(ltog, rhsloc, rhs, ADD_VALUES, SCATTER_FORWARD); CHKERRQ(ierr); ierr = VecScatterEnd(ltog, rhsloc, rhs, ADD_VALUES, SCATTER_FORWARD); CHKERRQ(ierr); CeedVectorDestroy(&rhsceed); ierr = KSPCreate(comm, &ksp); CHKERRQ(ierr); { PC pc; ierr = KSPGetPC(ksp, &pc); CHKERRQ(ierr); if (bpChoice == CEED_BP1 || bpChoice == CEED_BP2) { ierr = PCSetType(pc, PCJACOBI); CHKERRQ(ierr); ierr = PCJacobiSetType(pc, PC_JACOBI_ROWSUM); CHKERRQ(ierr); } else { ierr = PCSetType(pc, PCNONE); CHKERRQ(ierr); } ierr = KSPSetType(ksp, KSPCG); CHKERRQ(ierr); ierr = KSPSetNormType(ksp, KSP_NORM_NATURAL); CHKERRQ(ierr); ierr = KSPSetTolerances(ksp, 1e-10, PETSC_DEFAULT, PETSC_DEFAULT, PETSC_DEFAULT); CHKERRQ(ierr); } ierr = KSPSetFromOptions(ksp); CHKERRQ(ierr); ierr = KSPSetOperators(ksp, mat, mat); CHKERRQ(ierr); // First run, if benchmarking if (benchmark_mode) { ierr = KSPSetTolerances(ksp, 1e-10, PETSC_DEFAULT, PETSC_DEFAULT, 1); CHKERRQ(ierr); my_rt_start = MPI_Wtime(); ierr = KSPSolve(ksp, rhs, X); CHKERRQ(ierr); my_rt = MPI_Wtime() - my_rt_start; ierr = MPI_Allreduce(MPI_IN_PLACE, &my_rt, 1, MPI_DOUBLE, MPI_MIN, comm); CHKERRQ(ierr); // Set maxits based on first iteration timing if (my_rt > 0.02) { ierr = KSPSetTolerances(ksp, 1e-10, PETSC_DEFAULT, PETSC_DEFAULT, 5); CHKERRQ(ierr); } else { ierr = KSPSetTolerances(ksp, 1e-10, PETSC_DEFAULT, PETSC_DEFAULT, 20); CHKERRQ(ierr); } } // Timed solve ierr = PetscBarrier((PetscObject)ksp); CHKERRQ(ierr); my_rt_start = MPI_Wtime(); ierr = KSPSolve(ksp, rhs, X); CHKERRQ(ierr); my_rt = MPI_Wtime() - my_rt_start; { KSPType ksptype; KSPConvergedReason reason; PetscReal rnorm; PetscInt its; ierr = KSPGetType(ksp, &ksptype); CHKERRQ(ierr); ierr = KSPGetConvergedReason(ksp, &reason); CHKERRQ(ierr); ierr = KSPGetIterationNumber(ksp, &its); CHKERRQ(ierr); ierr = KSPGetResidualNorm(ksp, &rnorm); CHKERRQ(ierr); if (!test_mode || reason < 0 || rnorm > 1e-8) { ierr = MPI_Allreduce(&my_rt, &rt_min, 1, MPI_DOUBLE, MPI_MIN, comm); CHKERRQ(ierr); ierr = MPI_Allreduce(&my_rt, &rt_max, 1, MPI_DOUBLE, MPI_MAX, comm); CHKERRQ(ierr); ierr = PetscPrintf(comm, " KSP:\n" " KSP Type : %s\n" " KSP Convergence : %s\n" " Total KSP Iterations : %D\n" " Final rnorm : %e\n", ksptype, KSPConvergedReasons[reason], its, (double)rnorm); CHKERRQ(ierr); ierr = PetscPrintf(comm, " Performance:\n" " CG Solve Time : %g (%g) sec\n" " DoFs/Sec in CG : %g (%g) million\n", rt_max, rt_min, 1e-6*gsize*its/rt_max, 1e-6*gsize*its/rt_min); CHKERRQ(ierr); } if (benchmark_mode && (!test_mode)) { ierr = MPI_Allreduce(&my_rt, &rt_min, 1, MPI_DOUBLE, MPI_MIN, comm); CHKERRQ(ierr); ierr = MPI_Allreduce(&my_rt, &rt_max, 1, MPI_DOUBLE, MPI_MAX, comm); CHKERRQ(ierr); ierr = PetscPrintf(comm, " Performance:\n" " CG Solve Time : %g (%g) sec\n" " DoFs/Sec in CG : %g (%g) million\n", rt_max, rt_min, 1e-6*gsize*its/rt_max, 1e-6*gsize*its/rt_min); CHKERRQ(ierr); } } { PetscReal maxerror; ierr = ComputeErrorMax(user, op_error, X, target, &maxerror); CHKERRQ(ierr); PetscReal tol = 5e-2; if (!test_mode || maxerror > tol) { ierr = PetscPrintf(comm, " Pointwise Error (max) : %e\n", (double)maxerror); CHKERRQ(ierr); } } if (write_solution) { PetscViewer vtkviewersoln; ierr = PetscViewerCreate(comm, &vtkviewersoln); CHKERRQ(ierr); ierr = PetscViewerSetType(vtkviewersoln, PETSCVIEWERVTK); CHKERRQ(ierr); ierr = PetscViewerFileSetName(vtkviewersoln, "solution.vtk"); CHKERRQ(ierr); ierr = VecView(X, vtkviewersoln); CHKERRQ(ierr); ierr = PetscViewerDestroy(&vtkviewersoln); CHKERRQ(ierr); } ierr = VecDestroy(&rhs); CHKERRQ(ierr); ierr = VecDestroy(&rhsloc); CHKERRQ(ierr); ierr = VecDestroy(&X); CHKERRQ(ierr); ierr = VecDestroy(&user->Xloc); CHKERRQ(ierr); ierr = VecDestroy(&user->Yloc); CHKERRQ(ierr); ierr = VecScatterDestroy(<og); CHKERRQ(ierr); ierr = VecScatterDestroy(<og0); CHKERRQ(ierr); ierr = VecScatterDestroy(>ogD); CHKERRQ(ierr); ierr = MatDestroy(&mat); CHKERRQ(ierr); ierr = KSPDestroy(&ksp); CHKERRQ(ierr); CeedVectorDestroy(&user->xceed); CeedVectorDestroy(&user->yceed); CeedVectorDestroy(&user->qdata); CeedVectorDestroy(&target); CeedOperatorDestroy(&op_setupgeo); CeedOperatorDestroy(&op_setuprhs); CeedOperatorDestroy(&op_apply); CeedOperatorDestroy(&op_error); CeedElemRestrictionDestroy(&Erestrictu); CeedElemRestrictionDestroy(&Erestrictx); CeedElemRestrictionDestroy(&Erestrictui); CeedElemRestrictionDestroy(&Erestrictxi); CeedElemRestrictionDestroy(&Erestrictqdi); CeedQFunctionDestroy(&qf_setupgeo); CeedQFunctionDestroy(&qf_setuprhs); CeedQFunctionDestroy(&qf_apply); CeedQFunctionDestroy(&qf_error); CeedBasisDestroy(&basisu); CeedBasisDestroy(&basisx); CeedDestroy(&ceed); ierr = PetscFree(user); CHKERRQ(ierr); return PetscFinalize(); }