1*c532df63SYohann // Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC. 2*c532df63SYohann // Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707. 3*c532df63SYohann // All Rights reserved. See files LICENSE and NOTICE for details. 4*c532df63SYohann // 5*c532df63SYohann // This file is part of CEED, a collection of benchmarks, miniapps, software 6*c532df63SYohann // libraries and APIs for efficient high-order finite element and spectral 7*c532df63SYohann // element discretizations for exascale applications. For more information and 8*c532df63SYohann // source code availability see http://github.com/ceed. 9*c532df63SYohann // 10*c532df63SYohann // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC, 11*c532df63SYohann // a collaborative effort of two U.S. Department of Energy organizations (Office 12*c532df63SYohann // of Science and the National Nuclear Security Administration) responsible for 13*c532df63SYohann // the planning and preparation of a capable exascale ecosystem, including 14*c532df63SYohann // software, applications, hardware, advanced system engineering and early 15*c532df63SYohann // testbed platforms, in support of the nation's exascale computing imperative. 16*c532df63SYohann #include <ceed-backend.h> 17*c532df63SYohann #include <ceed.h> 18*c532df63SYohann #include <nvrtc.h> 19*c532df63SYohann #include <cuda.h> 20*c532df63SYohann #include <cuda_runtime.h> 21*c532df63SYohann 22*c532df63SYohann typedef struct { 23*c532df63SYohann CUmodule module; 24*c532df63SYohann CUfunction interp; 25*c532df63SYohann CUfunction grad; 26*c532df63SYohann CUfunction weight; 27*c532df63SYohann CeedScalar *d_interp1d; 28*c532df63SYohann CeedScalar *d_grad1d; 29*c532df63SYohann CeedScalar *d_qweight1d; 30*c532df63SYohann CeedScalar *c_B; 31*c532df63SYohann CeedScalar *c_G; 32*c532df63SYohann } CeedBasis_Cuda_shared; 33*c532df63SYohann 34*c532df63SYohann typedef struct { 35*c532df63SYohann } Ceed_Cuda_shared; 36*c532df63SYohann 37*c532df63SYohann CEED_INTERN int CeedBasisCreateTensorH1_Cuda_shared(CeedInt dim, CeedInt P1d, 38*c532df63SYohann CeedInt Q1d, 39*c532df63SYohann const CeedScalar *interp1d, 40*c532df63SYohann const CeedScalar *grad1d, 41*c532df63SYohann const CeedScalar *qref1d, 42*c532df63SYohann const CeedScalar *qweight1d, 43*c532df63SYohann CeedBasis basis); 44*c532df63SYohann 45*c532df63SYohann CEED_INTERN int CeedBasisCreateH1_Cuda_shared(CeedElemTopology, CeedInt, 46*c532df63SYohann CeedInt, 47*c532df63SYohann CeedInt, const CeedScalar *, 48*c532df63SYohann const CeedScalar *, const CeedScalar *, 49*c532df63SYohann const CeedScalar *, CeedBasis); 50