1*0d0321e0SJeremy L Thompson // Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC. 2*0d0321e0SJeremy L Thompson // Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707. 3*0d0321e0SJeremy L Thompson // All Rights reserved. See files LICENSE and NOTICE for details. 4*0d0321e0SJeremy L Thompson // 5*0d0321e0SJeremy L Thompson // This file is part of CEED, a collection of benchmarks, miniapps, software 6*0d0321e0SJeremy L Thompson // libraries and APIs for efficient high-order finite element and spectral 7*0d0321e0SJeremy L Thompson // element discretizations for exascale applications. For more information and 8*0d0321e0SJeremy L Thompson // source code availability see http://github.com/ceed. 9*0d0321e0SJeremy L Thompson // 10*0d0321e0SJeremy L Thompson // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC, 11*0d0321e0SJeremy L Thompson // a collaborative effort of two U.S. Department of Energy organizations (Office 12*0d0321e0SJeremy L Thompson // of Science and the National Nuclear Security Administration) responsible for 13*0d0321e0SJeremy L Thompson // the planning and preparation of a capable exascale ecosystem, including 14*0d0321e0SJeremy L Thompson // software, applications, hardware, advanced system engineering and early 15*0d0321e0SJeremy L Thompson // testbed platforms, in support of the nation's exascale computing imperative. 16*0d0321e0SJeremy L Thompson 17*0d0321e0SJeremy L Thompson #include <ceed/ceed.h> 18*0d0321e0SJeremy L Thompson #include <ceed/backend.h> 19*0d0321e0SJeremy L Thompson #include <cublas_v2.h> 20*0d0321e0SJeremy L Thompson #include <cuda.h> 21*0d0321e0SJeremy L Thompson #include <cuda_runtime.h> 22*0d0321e0SJeremy L Thompson #include <string.h> 23*0d0321e0SJeremy L Thompson #include "ceed-cuda-ref.h" 24*0d0321e0SJeremy L Thompson 25*0d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 26*0d0321e0SJeremy L Thompson // CUDA preferred MemType 27*0d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 28*0d0321e0SJeremy L Thompson static int CeedGetPreferredMemType_Cuda(CeedMemType *type) { 29*0d0321e0SJeremy L Thompson *type = CEED_MEM_DEVICE; 30*0d0321e0SJeremy L Thompson return CEED_ERROR_SUCCESS; 31*0d0321e0SJeremy L Thompson } 32*0d0321e0SJeremy L Thompson 33*0d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 34*0d0321e0SJeremy L Thompson // Get CUBLAS handle 35*0d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 36*0d0321e0SJeremy L Thompson int CeedCudaGetCublasHandle(Ceed ceed, cublasHandle_t *handle) { 37*0d0321e0SJeremy L Thompson int ierr; 38*0d0321e0SJeremy L Thompson Ceed_Cuda *data; 39*0d0321e0SJeremy L Thompson ierr = CeedGetData(ceed, &data); CeedChkBackend(ierr); 40*0d0321e0SJeremy L Thompson 41*0d0321e0SJeremy L Thompson if (!data->cublas_handle) { 42*0d0321e0SJeremy L Thompson ierr = cublasCreate(&data->cublas_handle); CeedChk_Cublas(ceed, ierr); 43*0d0321e0SJeremy L Thompson } 44*0d0321e0SJeremy L Thompson *handle = data->cublas_handle; 45*0d0321e0SJeremy L Thompson return CEED_ERROR_SUCCESS; 46*0d0321e0SJeremy L Thompson } 47*0d0321e0SJeremy L Thompson 48*0d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 49*0d0321e0SJeremy L Thompson // Backend Init 50*0d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 51*0d0321e0SJeremy L Thompson static int CeedInit_Cuda(const char *resource, Ceed ceed) { 52*0d0321e0SJeremy L Thompson int ierr; 53*0d0321e0SJeremy L Thompson 54*0d0321e0SJeremy L Thompson if (strcmp(resource, "/gpu/cuda/ref")) 55*0d0321e0SJeremy L Thompson // LCOV_EXCL_START 56*0d0321e0SJeremy L Thompson return CeedError(ceed, CEED_ERROR_BACKEND, 57*0d0321e0SJeremy L Thompson "Cuda backend cannot use resource: %s", resource); 58*0d0321e0SJeremy L Thompson // LCOV_EXCL_STOP 59*0d0321e0SJeremy L Thompson ierr = CeedSetDeterministic(ceed, true); CeedChk(ierr); 60*0d0321e0SJeremy L Thompson 61*0d0321e0SJeremy L Thompson Ceed_Cuda *data; 62*0d0321e0SJeremy L Thompson ierr = CeedCalloc(1, &data); CeedChkBackend(ierr); 63*0d0321e0SJeremy L Thompson ierr = CeedSetData(ceed, data); CeedChkBackend(ierr); 64*0d0321e0SJeremy L Thompson ierr = CeedCudaInit(ceed, resource); CeedChkBackend(ierr); 65*0d0321e0SJeremy L Thompson 66*0d0321e0SJeremy L Thompson ierr = CeedSetBackendFunction(ceed, "Ceed", ceed, "GetPreferredMemType", 67*0d0321e0SJeremy L Thompson CeedGetPreferredMemType_Cuda); CeedChkBackend(ierr); 68*0d0321e0SJeremy L Thompson ierr = CeedSetBackendFunction(ceed, "Ceed", ceed, "VectorCreate", 69*0d0321e0SJeremy L Thompson CeedVectorCreate_Cuda); CeedChkBackend(ierr); 70*0d0321e0SJeremy L Thompson ierr = CeedSetBackendFunction(ceed, "Ceed", ceed, "BasisCreateTensorH1", 71*0d0321e0SJeremy L Thompson CeedBasisCreateTensorH1_Cuda); CeedChkBackend(ierr); 72*0d0321e0SJeremy L Thompson ierr = CeedSetBackendFunction(ceed, "Ceed", ceed, "BasisCreateH1", 73*0d0321e0SJeremy L Thompson CeedBasisCreateH1_Cuda); CeedChkBackend(ierr); 74*0d0321e0SJeremy L Thompson ierr = CeedSetBackendFunction(ceed, "Ceed", ceed, "ElemRestrictionCreate", 75*0d0321e0SJeremy L Thompson CeedElemRestrictionCreate_Cuda); CeedChkBackend(ierr); 76*0d0321e0SJeremy L Thompson ierr = CeedSetBackendFunction(ceed, "Ceed", ceed, 77*0d0321e0SJeremy L Thompson "ElemRestrictionCreateBlocked", 78*0d0321e0SJeremy L Thompson CeedElemRestrictionCreateBlocked_Cuda); 79*0d0321e0SJeremy L Thompson CeedChkBackend(ierr); 80*0d0321e0SJeremy L Thompson ierr = CeedSetBackendFunction(ceed, "Ceed", ceed, "QFunctionCreate", 81*0d0321e0SJeremy L Thompson CeedQFunctionCreate_Cuda); CeedChkBackend(ierr); 82*0d0321e0SJeremy L Thompson ierr = CeedSetBackendFunction(ceed, "Ceed", ceed, "QFunctionContextCreate", 83*0d0321e0SJeremy L Thompson CeedQFunctionContextCreate_Cuda); CeedChkBackend(ierr); 84*0d0321e0SJeremy L Thompson ierr = CeedSetBackendFunction(ceed, "Ceed", ceed, "OperatorCreate", 85*0d0321e0SJeremy L Thompson CeedOperatorCreate_Cuda); CeedChkBackend(ierr); 86*0d0321e0SJeremy L Thompson ierr = CeedSetBackendFunction(ceed, "Ceed", ceed, "CompositeOperatorCreate", 87*0d0321e0SJeremy L Thompson CeedCompositeOperatorCreate_Cuda); CeedChkBackend(ierr); 88*0d0321e0SJeremy L Thompson ierr = CeedSetBackendFunction(ceed, "Ceed", ceed, "Destroy", 89*0d0321e0SJeremy L Thompson CeedDestroy_Cuda); CeedChkBackend(ierr); 90*0d0321e0SJeremy L Thompson return CEED_ERROR_SUCCESS; 91*0d0321e0SJeremy L Thompson } 92*0d0321e0SJeremy L Thompson 93*0d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 94*0d0321e0SJeremy L Thompson // Backend Register 95*0d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 96*0d0321e0SJeremy L Thompson CEED_INTERN int CeedRegister_Cuda(void) { 97*0d0321e0SJeremy L Thompson return CeedRegister("/gpu/cuda/ref", CeedInit_Cuda, 40); 98*0d0321e0SJeremy L Thompson } 99*0d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 100