xref: /libCEED/rust/libceed-sys/c-src/backends/cuda-ref/ceed-cuda-ref.c (revision 437930d19388999b5cc2d76e2fe0d14f58fb41f3) !
10d0321e0SJeremy L Thompson // Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC.
20d0321e0SJeremy L Thompson // Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707.
30d0321e0SJeremy L Thompson // All Rights reserved. See files LICENSE and NOTICE for details.
40d0321e0SJeremy L Thompson //
50d0321e0SJeremy L Thompson // This file is part of CEED, a collection of benchmarks, miniapps, software
60d0321e0SJeremy L Thompson // libraries and APIs for efficient high-order finite element and spectral
70d0321e0SJeremy L Thompson // element discretizations for exascale applications. For more information and
80d0321e0SJeremy L Thompson // source code availability see http://github.com/ceed.
90d0321e0SJeremy L Thompson //
100d0321e0SJeremy L Thompson // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
110d0321e0SJeremy L Thompson // a collaborative effort of two U.S. Department of Energy organizations (Office
120d0321e0SJeremy L Thompson // of Science and the National Nuclear Security Administration) responsible for
130d0321e0SJeremy L Thompson // the planning and preparation of a capable exascale ecosystem, including
140d0321e0SJeremy L Thompson // software, applications, hardware, advanced system engineering and early
150d0321e0SJeremy L Thompson // testbed platforms, in support of the nation's exascale computing imperative.
160d0321e0SJeremy L Thompson 
170d0321e0SJeremy L Thompson #include <ceed/ceed.h>
180d0321e0SJeremy L Thompson #include <ceed/backend.h>
190d0321e0SJeremy L Thompson #include <cublas_v2.h>
200d0321e0SJeremy L Thompson #include <cuda.h>
210d0321e0SJeremy L Thompson #include <cuda_runtime.h>
220d0321e0SJeremy L Thompson #include <string.h>
230d0321e0SJeremy L Thompson #include "ceed-cuda-ref.h"
240d0321e0SJeremy L Thompson 
250d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
260d0321e0SJeremy L Thompson // CUDA preferred MemType
270d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
28*437930d1SJeremy L Thompson static int CeedGetPreferredMemType_Cuda(CeedMemType *mem_type) {
29*437930d1SJeremy L Thompson   *mem_type = CEED_MEM_DEVICE;
300d0321e0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
310d0321e0SJeremy L Thompson }
320d0321e0SJeremy L Thompson 
330d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
340d0321e0SJeremy L Thompson // Get CUBLAS handle
350d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
360d0321e0SJeremy L Thompson int CeedCudaGetCublasHandle(Ceed ceed, cublasHandle_t *handle) {
370d0321e0SJeremy L Thompson   int ierr;
380d0321e0SJeremy L Thompson   Ceed_Cuda *data;
390d0321e0SJeremy L Thompson   ierr = CeedGetData(ceed, &data); CeedChkBackend(ierr);
400d0321e0SJeremy L Thompson 
410d0321e0SJeremy L Thompson   if (!data->cublas_handle) {
420d0321e0SJeremy L Thompson     ierr = cublasCreate(&data->cublas_handle); CeedChk_Cublas(ceed, ierr);
430d0321e0SJeremy L Thompson   }
440d0321e0SJeremy L Thompson   *handle = data->cublas_handle;
450d0321e0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
460d0321e0SJeremy L Thompson }
470d0321e0SJeremy L Thompson 
480d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
490d0321e0SJeremy L Thompson // Backend Init
500d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
510d0321e0SJeremy L Thompson static int CeedInit_Cuda(const char *resource, Ceed ceed) {
520d0321e0SJeremy L Thompson   int ierr;
530d0321e0SJeremy L Thompson 
540d0321e0SJeremy L Thompson   if (strcmp(resource, "/gpu/cuda/ref"))
550d0321e0SJeremy L Thompson     // LCOV_EXCL_START
560d0321e0SJeremy L Thompson     return CeedError(ceed, CEED_ERROR_BACKEND,
570d0321e0SJeremy L Thompson                      "Cuda backend cannot use resource: %s", resource);
580d0321e0SJeremy L Thompson   // LCOV_EXCL_STOP
590d0321e0SJeremy L Thompson   ierr = CeedSetDeterministic(ceed, true); CeedChk(ierr);
600d0321e0SJeremy L Thompson 
610d0321e0SJeremy L Thompson   Ceed_Cuda *data;
620d0321e0SJeremy L Thompson   ierr = CeedCalloc(1, &data); CeedChkBackend(ierr);
630d0321e0SJeremy L Thompson   ierr = CeedSetData(ceed, data); CeedChkBackend(ierr);
640d0321e0SJeremy L Thompson   ierr = CeedCudaInit(ceed, resource); CeedChkBackend(ierr);
650d0321e0SJeremy L Thompson 
660d0321e0SJeremy L Thompson   ierr = CeedSetBackendFunction(ceed, "Ceed", ceed, "GetPreferredMemType",
670d0321e0SJeremy L Thompson                                 CeedGetPreferredMemType_Cuda); CeedChkBackend(ierr);
680d0321e0SJeremy L Thompson   ierr = CeedSetBackendFunction(ceed, "Ceed", ceed, "VectorCreate",
690d0321e0SJeremy L Thompson                                 CeedVectorCreate_Cuda); CeedChkBackend(ierr);
700d0321e0SJeremy L Thompson   ierr = CeedSetBackendFunction(ceed, "Ceed", ceed, "BasisCreateTensorH1",
710d0321e0SJeremy L Thompson                                 CeedBasisCreateTensorH1_Cuda); CeedChkBackend(ierr);
720d0321e0SJeremy L Thompson   ierr = CeedSetBackendFunction(ceed, "Ceed", ceed, "BasisCreateH1",
730d0321e0SJeremy L Thompson                                 CeedBasisCreateH1_Cuda); CeedChkBackend(ierr);
740d0321e0SJeremy L Thompson   ierr = CeedSetBackendFunction(ceed, "Ceed", ceed, "ElemRestrictionCreate",
750d0321e0SJeremy L Thompson                                 CeedElemRestrictionCreate_Cuda); CeedChkBackend(ierr);
760d0321e0SJeremy L Thompson   ierr = CeedSetBackendFunction(ceed, "Ceed", ceed,
770d0321e0SJeremy L Thompson                                 "ElemRestrictionCreateBlocked",
780d0321e0SJeremy L Thompson                                 CeedElemRestrictionCreateBlocked_Cuda);
790d0321e0SJeremy L Thompson   CeedChkBackend(ierr);
800d0321e0SJeremy L Thompson   ierr = CeedSetBackendFunction(ceed, "Ceed", ceed, "QFunctionCreate",
810d0321e0SJeremy L Thompson                                 CeedQFunctionCreate_Cuda); CeedChkBackend(ierr);
820d0321e0SJeremy L Thompson   ierr = CeedSetBackendFunction(ceed, "Ceed", ceed, "QFunctionContextCreate",
830d0321e0SJeremy L Thompson                                 CeedQFunctionContextCreate_Cuda); CeedChkBackend(ierr);
840d0321e0SJeremy L Thompson   ierr = CeedSetBackendFunction(ceed, "Ceed", ceed, "OperatorCreate",
850d0321e0SJeremy L Thompson                                 CeedOperatorCreate_Cuda); CeedChkBackend(ierr);
860d0321e0SJeremy L Thompson   ierr = CeedSetBackendFunction(ceed, "Ceed", ceed, "CompositeOperatorCreate",
870d0321e0SJeremy L Thompson                                 CeedCompositeOperatorCreate_Cuda); CeedChkBackend(ierr);
880d0321e0SJeremy L Thompson   ierr = CeedSetBackendFunction(ceed, "Ceed", ceed, "Destroy",
890d0321e0SJeremy L Thompson                                 CeedDestroy_Cuda); CeedChkBackend(ierr);
900d0321e0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
910d0321e0SJeremy L Thompson }
920d0321e0SJeremy L Thompson 
930d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
940d0321e0SJeremy L Thompson // Backend Register
950d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
960d0321e0SJeremy L Thompson CEED_INTERN int CeedRegister_Cuda(void) {
970d0321e0SJeremy L Thompson   return CeedRegister("/gpu/cuda/ref", CeedInit_Cuda, 40);
980d0321e0SJeremy L Thompson }
990d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
100