13d8e8822SJeremy L Thompson // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors. 23d8e8822SJeremy L Thompson // All Rights Reserved. See the top-level LICENSE and NOTICE files for details. 30d0321e0SJeremy L Thompson // 43d8e8822SJeremy L Thompson // SPDX-License-Identifier: BSD-2-Clause 50d0321e0SJeremy L Thompson // 63d8e8822SJeremy L Thompson // This file is part of CEED: http://github.com/ceed 70d0321e0SJeremy L Thompson 8*2b730f8bSJeremy L Thompson #include "ceed-cuda-ref.h" 9*2b730f8bSJeremy L Thompson 100d0321e0SJeremy L Thompson #include <ceed/backend.h> 11*2b730f8bSJeremy L Thompson #include <ceed/ceed.h> 120d0321e0SJeremy L Thompson #include <cublas_v2.h> 130d0321e0SJeremy L Thompson #include <cuda.h> 140d0321e0SJeremy L Thompson #include <cuda_runtime.h> 150d0321e0SJeremy L Thompson #include <string.h> 160d0321e0SJeremy L Thompson 170d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 180d0321e0SJeremy L Thompson // CUDA preferred MemType 190d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 20437930d1SJeremy L Thompson static int CeedGetPreferredMemType_Cuda(CeedMemType *mem_type) { 21437930d1SJeremy L Thompson *mem_type = CEED_MEM_DEVICE; 220d0321e0SJeremy L Thompson return CEED_ERROR_SUCCESS; 230d0321e0SJeremy L Thompson } 240d0321e0SJeremy L Thompson 250d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 260d0321e0SJeremy L Thompson // Get CUBLAS handle 270d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 280d0321e0SJeremy L Thompson int CeedCudaGetCublasHandle(Ceed ceed, cublasHandle_t *handle) { 290d0321e0SJeremy L Thompson Ceed_Cuda *data; 30*2b730f8bSJeremy L Thompson CeedCallBackend(CeedGetData(ceed, &data)); 310d0321e0SJeremy L Thompson 32*2b730f8bSJeremy L Thompson if (!data->cublas_handle) CeedCallCublas(ceed, cublasCreate(&data->cublas_handle)); 330d0321e0SJeremy L Thompson *handle = data->cublas_handle; 340d0321e0SJeremy L Thompson return CEED_ERROR_SUCCESS; 350d0321e0SJeremy L Thompson } 360d0321e0SJeremy L Thompson 370d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 380d0321e0SJeremy L Thompson // Backend Init 390d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 400d0321e0SJeremy L Thompson static int CeedInit_Cuda(const char *resource, Ceed ceed) { 41b11824b3SJeremy L Thompson char *resource_root; 42*2b730f8bSJeremy L Thompson CeedCallBackend(CeedCudaGetResourceRoot(ceed, resource, &resource_root)); 43*2b730f8bSJeremy L Thompson if (strcmp(resource_root, "/gpu/cuda/ref")) { 440d0321e0SJeremy L Thompson // LCOV_EXCL_START 45*2b730f8bSJeremy L Thompson return CeedError(ceed, CEED_ERROR_BACKEND, "Cuda backend cannot use resource: %s", resource); 460d0321e0SJeremy L Thompson // LCOV_EXCL_STOP 47*2b730f8bSJeremy L Thompson } 48*2b730f8bSJeremy L Thompson CeedCallBackend(CeedFree(&resource_root)); 49*2b730f8bSJeremy L Thompson CeedCallBackend(CeedSetDeterministic(ceed, true)); 500d0321e0SJeremy L Thompson 510d0321e0SJeremy L Thompson Ceed_Cuda *data; 52*2b730f8bSJeremy L Thompson CeedCallBackend(CeedCalloc(1, &data)); 53*2b730f8bSJeremy L Thompson CeedCallBackend(CeedSetData(ceed, data)); 54*2b730f8bSJeremy L Thompson CeedCallBackend(CeedCudaInit(ceed, resource)); 550d0321e0SJeremy L Thompson 56*2b730f8bSJeremy L Thompson CeedCallBackend(CeedSetBackendFunction(ceed, "Ceed", ceed, "GetPreferredMemType", CeedGetPreferredMemType_Cuda)); 57*2b730f8bSJeremy L Thompson CeedCallBackend(CeedSetBackendFunction(ceed, "Ceed", ceed, "VectorCreate", CeedVectorCreate_Cuda)); 58*2b730f8bSJeremy L Thompson CeedCallBackend(CeedSetBackendFunction(ceed, "Ceed", ceed, "BasisCreateTensorH1", CeedBasisCreateTensorH1_Cuda)); 59*2b730f8bSJeremy L Thompson CeedCallBackend(CeedSetBackendFunction(ceed, "Ceed", ceed, "BasisCreateH1", CeedBasisCreateH1_Cuda)); 60*2b730f8bSJeremy L Thompson CeedCallBackend(CeedSetBackendFunction(ceed, "Ceed", ceed, "ElemRestrictionCreate", CeedElemRestrictionCreate_Cuda)); 61*2b730f8bSJeremy L Thompson CeedCallBackend(CeedSetBackendFunction(ceed, "Ceed", ceed, "ElemRestrictionCreateBlocked", CeedElemRestrictionCreateBlocked_Cuda)); 62*2b730f8bSJeremy L Thompson CeedCallBackend(CeedSetBackendFunction(ceed, "Ceed", ceed, "QFunctionCreate", CeedQFunctionCreate_Cuda)); 63*2b730f8bSJeremy L Thompson CeedCallBackend(CeedSetBackendFunction(ceed, "Ceed", ceed, "QFunctionContextCreate", CeedQFunctionContextCreate_Cuda)); 64*2b730f8bSJeremy L Thompson CeedCallBackend(CeedSetBackendFunction(ceed, "Ceed", ceed, "OperatorCreate", CeedOperatorCreate_Cuda)); 65*2b730f8bSJeremy L Thompson CeedCallBackend(CeedSetBackendFunction(ceed, "Ceed", ceed, "Destroy", CeedDestroy_Cuda)); 660d0321e0SJeremy L Thompson return CEED_ERROR_SUCCESS; 670d0321e0SJeremy L Thompson } 680d0321e0SJeremy L Thompson 690d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 700d0321e0SJeremy L Thompson // Backend Register 710d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 72*2b730f8bSJeremy L Thompson CEED_INTERN int CeedRegister_Cuda(void) { return CeedRegister("/gpu/cuda/ref", CeedInit_Cuda, 40); } 730d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 74