1*6d69246aSJeremy L Thompson // Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC. 2*6d69246aSJeremy L Thompson // Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707. 3*6d69246aSJeremy L Thompson // All Rights reserved. See files LICENSE and NOTICE for details. 4*6d69246aSJeremy L Thompson // 5*6d69246aSJeremy L Thompson // This file is part of CEED, a collection of benchmarks, miniapps, software 6*6d69246aSJeremy L Thompson // libraries and APIs for efficient high-order finite element and spectral 7*6d69246aSJeremy L Thompson // element discretizations for exascale applications. For more information and 8*6d69246aSJeremy L Thompson // source code availability see http://github.com/ceed. 9*6d69246aSJeremy L Thompson // 10*6d69246aSJeremy L Thompson // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC, 11*6d69246aSJeremy L Thompson // a collaborative effort of two U.S. Department of Energy organizations (Office 12*6d69246aSJeremy L Thompson // of Science and the National Nuclear Security Administration) responsible for 13*6d69246aSJeremy L Thompson // the planning and preparation of a capable exascale ecosystem, including 14*6d69246aSJeremy L Thompson // software, applications, hardware, advanced system engineering and early 15*6d69246aSJeremy L Thompson // testbed platforms, in support of the nation's exascale computing imperative. 16*6d69246aSJeremy L Thompson 17*6d69246aSJeremy L Thompson #ifndef _ceed_cuda_compile_h 18*6d69246aSJeremy L Thompson #define _ceed_cuda_compile_h 19*6d69246aSJeremy L Thompson 20*6d69246aSJeremy L Thompson #include <ceed/ceed.h> 21*6d69246aSJeremy L Thompson #include <cuda.h> 22*6d69246aSJeremy L Thompson #include <nvrtc.h> 23*6d69246aSJeremy L Thompson 24*6d69246aSJeremy L Thompson CEED_INTERN int CeedCompileCuda(Ceed ceed, const char *source, CUmodule *module, 25*6d69246aSJeremy L Thompson const CeedInt numopts, ...); 26*6d69246aSJeremy L Thompson 27*6d69246aSJeremy L Thompson CEED_INTERN int CeedGetKernelCuda(Ceed ceed, CUmodule module, const char *name, 28*6d69246aSJeremy L Thompson CUfunction *kernel); 29*6d69246aSJeremy L Thompson 30*6d69246aSJeremy L Thompson CEED_INTERN int CeedRunKernelCuda(Ceed ceed, CUfunction kernel, 31*6d69246aSJeremy L Thompson const int gridSize, 32*6d69246aSJeremy L Thompson const int blockSize, void **args); 33*6d69246aSJeremy L Thompson 34*6d69246aSJeremy L Thompson CEED_INTERN int CeedRunKernelAutoblockCuda(Ceed ceed, CUfunction kernel, 35*6d69246aSJeremy L Thompson size_t size, void **args); 36*6d69246aSJeremy L Thompson 37*6d69246aSJeremy L Thompson CEED_INTERN int CeedRunKernelDimCuda(Ceed ceed, CUfunction kernel, 38*6d69246aSJeremy L Thompson const int gridSize, 39*6d69246aSJeremy L Thompson const int blockSizeX, const int blockSizeY, 40*6d69246aSJeremy L Thompson const int blockSizeZ, void **args); 41*6d69246aSJeremy L Thompson 42*6d69246aSJeremy L Thompson CEED_INTERN int CeedRunKernelDimSharedCuda(Ceed ceed, CUfunction kernel, 43*6d69246aSJeremy L Thompson const int gridSize, const int blockSizeX, const int blockSizeY, 44*6d69246aSJeremy L Thompson const int blockSizeZ, const int sharedMemSize, void **args); 45*6d69246aSJeremy L Thompson 46*6d69246aSJeremy L Thompson #endif // _ceed_cuda_compile_h 47