16d69246aSJeremy L Thompson // Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC. 26d69246aSJeremy L Thompson // Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707. 36d69246aSJeremy L Thompson // All Rights reserved. See files LICENSE and NOTICE for details. 46d69246aSJeremy L Thompson // 56d69246aSJeremy L Thompson // This file is part of CEED, a collection of benchmarks, miniapps, software 66d69246aSJeremy L Thompson // libraries and APIs for efficient high-order finite element and spectral 76d69246aSJeremy L Thompson // element discretizations for exascale applications. For more information and 86d69246aSJeremy L Thompson // source code availability see http://github.com/ceed. 96d69246aSJeremy L Thompson // 106d69246aSJeremy L Thompson // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC, 116d69246aSJeremy L Thompson // a collaborative effort of two U.S. Department of Energy organizations (Office 126d69246aSJeremy L Thompson // of Science and the National Nuclear Security Administration) responsible for 136d69246aSJeremy L Thompson // the planning and preparation of a capable exascale ecosystem, including 146d69246aSJeremy L Thompson // software, applications, hardware, advanced system engineering and early 156d69246aSJeremy L Thompson // testbed platforms, in support of the nation's exascale computing imperative. 166d69246aSJeremy L Thompson 176d69246aSJeremy L Thompson #ifndef _ceed_cuda_compile_h 186d69246aSJeremy L Thompson #define _ceed_cuda_compile_h 196d69246aSJeremy L Thompson 206d69246aSJeremy L Thompson #include <ceed/ceed.h> 216d69246aSJeremy L Thompson #include <cuda.h> 226d69246aSJeremy L Thompson #include <nvrtc.h> 236d69246aSJeremy L Thompson 24*0d0321e0SJeremy L Thompson static inline CeedInt CeedDivUpInt(CeedInt numerator, CeedInt denominator) { 25*0d0321e0SJeremy L Thompson return (numerator + denominator - 1) / denominator; 26*0d0321e0SJeremy L Thompson } 27*0d0321e0SJeremy L Thompson 286d69246aSJeremy L Thompson CEED_INTERN int CeedCompileCuda(Ceed ceed, const char *source, CUmodule *module, 29*0d0321e0SJeremy L Thompson const CeedInt num_opts, ...); 306d69246aSJeremy L Thompson 316d69246aSJeremy L Thompson CEED_INTERN int CeedGetKernelCuda(Ceed ceed, CUmodule module, const char *name, 326d69246aSJeremy L Thompson CUfunction *kernel); 336d69246aSJeremy L Thompson 346d69246aSJeremy L Thompson CEED_INTERN int CeedRunKernelCuda(Ceed ceed, CUfunction kernel, 35*0d0321e0SJeremy L Thompson const int grid_size, 36*0d0321e0SJeremy L Thompson const int block_size, void **args); 376d69246aSJeremy L Thompson 386d69246aSJeremy L Thompson CEED_INTERN int CeedRunKernelAutoblockCuda(Ceed ceed, CUfunction kernel, 396d69246aSJeremy L Thompson size_t size, void **args); 406d69246aSJeremy L Thompson 416d69246aSJeremy L Thompson CEED_INTERN int CeedRunKernelDimCuda(Ceed ceed, CUfunction kernel, 42*0d0321e0SJeremy L Thompson const int grid_size, 43*0d0321e0SJeremy L Thompson const int block_size_x, const int block_size_y, 44*0d0321e0SJeremy L Thompson const int block_size_z, void **args); 456d69246aSJeremy L Thompson 466d69246aSJeremy L Thompson CEED_INTERN int CeedRunKernelDimSharedCuda(Ceed ceed, CUfunction kernel, 47*0d0321e0SJeremy L Thompson const int grid_size, const int block_size_x, const int block_size_y, 48*0d0321e0SJeremy L Thompson const int block_size_z, const int shared_mem_size, void **args); 496d69246aSJeremy L Thompson 506d69246aSJeremy L Thompson #endif // _ceed_cuda_compile_h 51