10d0321e0SJeremy L Thompson // Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC. 20d0321e0SJeremy L Thompson // Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707. 30d0321e0SJeremy L Thompson // All Rights reserved. See files LICENSE and NOTICE for details. 40d0321e0SJeremy L Thompson // 50d0321e0SJeremy L Thompson // This file is part of CEED, a collection of benchmarks, miniapps, software 60d0321e0SJeremy L Thompson // libraries and APIs for efficient high-order finite element and spectral 70d0321e0SJeremy L Thompson // element discretizations for exascale applications. For more information and 80d0321e0SJeremy L Thompson // source code availability see http://github.com/ceed. 90d0321e0SJeremy L Thompson // 100d0321e0SJeremy L Thompson // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC, 110d0321e0SJeremy L Thompson // a collaborative effort of two U.S. Department of Energy organizations (Office 120d0321e0SJeremy L Thompson // of Science and the National Nuclear Security Administration) responsible for 130d0321e0SJeremy L Thompson // the planning and preparation of a capable exascale ecosystem, including 140d0321e0SJeremy L Thompson // software, applications, hardware, advanced system engineering and early 150d0321e0SJeremy L Thompson // testbed platforms, in support of the nation's exascale computing imperative. 160d0321e0SJeremy L Thompson 170d0321e0SJeremy L Thompson #include <ceed/ceed.h> 180d0321e0SJeremy L Thompson #include <ceed/backend.h> 190d0321e0SJeremy L Thompson #include <cublas_v2.h> 200d0321e0SJeremy L Thompson #include <cuda.h> 210d0321e0SJeremy L Thompson #include <cuda_runtime.h> 220d0321e0SJeremy L Thompson #include <string.h> 230d0321e0SJeremy L Thompson #include "ceed-cuda-ref.h" 240d0321e0SJeremy L Thompson 250d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 260d0321e0SJeremy L Thompson // CUDA preferred MemType 270d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 28*437930d1SJeremy L Thompson static int CeedGetPreferredMemType_Cuda(CeedMemType *mem_type) { 29*437930d1SJeremy L Thompson *mem_type = CEED_MEM_DEVICE; 300d0321e0SJeremy L Thompson return CEED_ERROR_SUCCESS; 310d0321e0SJeremy L Thompson } 320d0321e0SJeremy L Thompson 330d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 340d0321e0SJeremy L Thompson // Get CUBLAS handle 350d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 360d0321e0SJeremy L Thompson int CeedCudaGetCublasHandle(Ceed ceed, cublasHandle_t *handle) { 370d0321e0SJeremy L Thompson int ierr; 380d0321e0SJeremy L Thompson Ceed_Cuda *data; 390d0321e0SJeremy L Thompson ierr = CeedGetData(ceed, &data); CeedChkBackend(ierr); 400d0321e0SJeremy L Thompson 410d0321e0SJeremy L Thompson if (!data->cublas_handle) { 420d0321e0SJeremy L Thompson ierr = cublasCreate(&data->cublas_handle); CeedChk_Cublas(ceed, ierr); 430d0321e0SJeremy L Thompson } 440d0321e0SJeremy L Thompson *handle = data->cublas_handle; 450d0321e0SJeremy L Thompson return CEED_ERROR_SUCCESS; 460d0321e0SJeremy L Thompson } 470d0321e0SJeremy L Thompson 480d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 490d0321e0SJeremy L Thompson // Backend Init 500d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 510d0321e0SJeremy L Thompson static int CeedInit_Cuda(const char *resource, Ceed ceed) { 520d0321e0SJeremy L Thompson int ierr; 530d0321e0SJeremy L Thompson 540d0321e0SJeremy L Thompson if (strcmp(resource, "/gpu/cuda/ref")) 550d0321e0SJeremy L Thompson // LCOV_EXCL_START 560d0321e0SJeremy L Thompson return CeedError(ceed, CEED_ERROR_BACKEND, 570d0321e0SJeremy L Thompson "Cuda backend cannot use resource: %s", resource); 580d0321e0SJeremy L Thompson // LCOV_EXCL_STOP 590d0321e0SJeremy L Thompson ierr = CeedSetDeterministic(ceed, true); CeedChk(ierr); 600d0321e0SJeremy L Thompson 610d0321e0SJeremy L Thompson Ceed_Cuda *data; 620d0321e0SJeremy L Thompson ierr = CeedCalloc(1, &data); CeedChkBackend(ierr); 630d0321e0SJeremy L Thompson ierr = CeedSetData(ceed, data); CeedChkBackend(ierr); 640d0321e0SJeremy L Thompson ierr = CeedCudaInit(ceed, resource); CeedChkBackend(ierr); 650d0321e0SJeremy L Thompson 660d0321e0SJeremy L Thompson ierr = CeedSetBackendFunction(ceed, "Ceed", ceed, "GetPreferredMemType", 670d0321e0SJeremy L Thompson CeedGetPreferredMemType_Cuda); CeedChkBackend(ierr); 680d0321e0SJeremy L Thompson ierr = CeedSetBackendFunction(ceed, "Ceed", ceed, "VectorCreate", 690d0321e0SJeremy L Thompson CeedVectorCreate_Cuda); CeedChkBackend(ierr); 700d0321e0SJeremy L Thompson ierr = CeedSetBackendFunction(ceed, "Ceed", ceed, "BasisCreateTensorH1", 710d0321e0SJeremy L Thompson CeedBasisCreateTensorH1_Cuda); CeedChkBackend(ierr); 720d0321e0SJeremy L Thompson ierr = CeedSetBackendFunction(ceed, "Ceed", ceed, "BasisCreateH1", 730d0321e0SJeremy L Thompson CeedBasisCreateH1_Cuda); CeedChkBackend(ierr); 740d0321e0SJeremy L Thompson ierr = CeedSetBackendFunction(ceed, "Ceed", ceed, "ElemRestrictionCreate", 750d0321e0SJeremy L Thompson CeedElemRestrictionCreate_Cuda); CeedChkBackend(ierr); 760d0321e0SJeremy L Thompson ierr = CeedSetBackendFunction(ceed, "Ceed", ceed, 770d0321e0SJeremy L Thompson "ElemRestrictionCreateBlocked", 780d0321e0SJeremy L Thompson CeedElemRestrictionCreateBlocked_Cuda); 790d0321e0SJeremy L Thompson CeedChkBackend(ierr); 800d0321e0SJeremy L Thompson ierr = CeedSetBackendFunction(ceed, "Ceed", ceed, "QFunctionCreate", 810d0321e0SJeremy L Thompson CeedQFunctionCreate_Cuda); CeedChkBackend(ierr); 820d0321e0SJeremy L Thompson ierr = CeedSetBackendFunction(ceed, "Ceed", ceed, "QFunctionContextCreate", 830d0321e0SJeremy L Thompson CeedQFunctionContextCreate_Cuda); CeedChkBackend(ierr); 840d0321e0SJeremy L Thompson ierr = CeedSetBackendFunction(ceed, "Ceed", ceed, "OperatorCreate", 850d0321e0SJeremy L Thompson CeedOperatorCreate_Cuda); CeedChkBackend(ierr); 860d0321e0SJeremy L Thompson ierr = CeedSetBackendFunction(ceed, "Ceed", ceed, "CompositeOperatorCreate", 870d0321e0SJeremy L Thompson CeedCompositeOperatorCreate_Cuda); CeedChkBackend(ierr); 880d0321e0SJeremy L Thompson ierr = CeedSetBackendFunction(ceed, "Ceed", ceed, "Destroy", 890d0321e0SJeremy L Thompson CeedDestroy_Cuda); CeedChkBackend(ierr); 900d0321e0SJeremy L Thompson return CEED_ERROR_SUCCESS; 910d0321e0SJeremy L Thompson } 920d0321e0SJeremy L Thompson 930d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 940d0321e0SJeremy L Thompson // Backend Register 950d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 960d0321e0SJeremy L Thompson CEED_INTERN int CeedRegister_Cuda(void) { 970d0321e0SJeremy L Thompson return CeedRegister("/gpu/cuda/ref", CeedInit_Cuda, 40); 980d0321e0SJeremy L Thompson } 990d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 100