1*d275d636SJeremy L Thompson // Copyright (c) 2017-2025, Lawrence Livermore National Security, LLC and other CEED contributors. 23d8e8822SJeremy L Thompson // All Rights Reserved. See the top-level LICENSE and NOTICE files for details. 37fcac036SJeremy L Thompson // 43d8e8822SJeremy L Thompson // SPDX-License-Identifier: BSD-2-Clause 57fcac036SJeremy L Thompson // 63d8e8822SJeremy L Thompson // This file is part of CEED: http://github.com/ceed 7509d4af6SJeremy L Thompson #pragma once 87fcac036SJeremy L Thompson 949aac155SJeremy L Thompson #include <ceed.h> 107fcac036SJeremy L Thompson #include <ceed/backend.h> 117fcac036SJeremy L Thompson #include <cublas_v2.h> 127fcac036SJeremy L Thompson #include <cuda.h> 137fcac036SJeremy L Thompson 147fcac036SJeremy L Thompson #define QUOTE(...) #__VA_ARGS__ 157fcac036SJeremy L Thompson 167fcac036SJeremy L Thompson #define CeedChk_Cu(ceed, x) \ 177fcac036SJeremy L Thompson do { \ 18c9c2c079SJeremy L Thompson CUresult cuda_result = (CUresult)x; \ 197fcac036SJeremy L Thompson if (cuda_result != CUDA_SUCCESS) { \ 207fcac036SJeremy L Thompson const char *msg; \ 217fcac036SJeremy L Thompson cuGetErrorName(cuda_result, &msg); \ 227fcac036SJeremy L Thompson return CeedError((ceed), CEED_ERROR_BACKEND, msg); \ 237fcac036SJeremy L Thompson } \ 247fcac036SJeremy L Thompson } while (0) 257fcac036SJeremy L Thompson 267fcac036SJeremy L Thompson #define CeedChk_Cublas(ceed, x) \ 277fcac036SJeremy L Thompson do { \ 287fcac036SJeremy L Thompson cublasStatus_t cublas_result = x; \ 297fcac036SJeremy L Thompson if (cublas_result != CUBLAS_STATUS_SUCCESS) { \ 307fcac036SJeremy L Thompson const char *msg = cublasGetErrorName(cublas_result); \ 317fcac036SJeremy L Thompson return CeedError((ceed), CEED_ERROR_BACKEND, msg); \ 327fcac036SJeremy L Thompson } \ 337fcac036SJeremy L Thompson } while (0) 347fcac036SJeremy L Thompson 352b730f8bSJeremy L Thompson #define CeedCallCuda(ceed, ...) \ 362b730f8bSJeremy L Thompson do { \ 372b730f8bSJeremy L Thompson int ierr_q_ = __VA_ARGS__; \ 382b730f8bSJeremy L Thompson CeedChk_Cu(ceed, ierr_q_); \ 396574a04fSJeremy L Thompson } while (0) 402b730f8bSJeremy L Thompson 412b730f8bSJeremy L Thompson #define CeedCallCublas(ceed, ...) \ 422b730f8bSJeremy L Thompson do { \ 432b730f8bSJeremy L Thompson int ierr_q_ = __VA_ARGS__; \ 442b730f8bSJeremy L Thompson CeedChk_Cublas(ceed, ierr_q_); \ 456574a04fSJeremy L Thompson } while (0) 462b730f8bSJeremy L Thompson 472b730f8bSJeremy L Thompson #define CASE(name) \ 482b730f8bSJeremy L Thompson case name: \ 492b730f8bSJeremy L Thompson return #name 507fcac036SJeremy L Thompson // LCOV_EXCL_START 517fcac036SJeremy L Thompson static const char *cublasGetErrorName(cublasStatus_t error) { 527fcac036SJeremy L Thompson switch (error) { 537fcac036SJeremy L Thompson CASE(CUBLAS_STATUS_SUCCESS); 547fcac036SJeremy L Thompson CASE(CUBLAS_STATUS_NOT_INITIALIZED); 557fcac036SJeremy L Thompson CASE(CUBLAS_STATUS_ALLOC_FAILED); 567fcac036SJeremy L Thompson CASE(CUBLAS_STATUS_INVALID_VALUE); 577fcac036SJeremy L Thompson CASE(CUBLAS_STATUS_ARCH_MISMATCH); 587fcac036SJeremy L Thompson CASE(CUBLAS_STATUS_MAPPING_ERROR); 597fcac036SJeremy L Thompson CASE(CUBLAS_STATUS_EXECUTION_FAILED); 607fcac036SJeremy L Thompson CASE(CUBLAS_STATUS_INTERNAL_ERROR); 612b730f8bSJeremy L Thompson default: 622b730f8bSJeremy L Thompson return "CUBLAS_STATUS_UNKNOWN_ERROR"; 637fcac036SJeremy L Thompson } 647fcac036SJeremy L Thompson } 657fcac036SJeremy L Thompson // LCOV_EXCL_STOP 667fcac036SJeremy L Thompson 677fcac036SJeremy L Thompson typedef struct { 680d0321e0SJeremy L Thompson int device_id; 690d0321e0SJeremy L Thompson cublasHandle_t cublas_handle; 700d0321e0SJeremy L Thompson struct cudaDeviceProp device_prop; 717fcac036SJeremy L Thompson } Ceed_Cuda; 727fcac036SJeremy L Thompson 73eb7e6cafSJeremy L Thompson CEED_INTERN int CeedInit_Cuda(Ceed ceed, const char *resource); 747fcac036SJeremy L Thompson 757fcac036SJeremy L Thompson CEED_INTERN int CeedDestroy_Cuda(Ceed ceed); 767fcac036SJeremy L Thompson 77f5d1e504SJeremy L Thompson CEED_INTERN int CeedSetDeviceBoolArray_Cuda(Ceed ceed, const bool *source_array, CeedCopyMode copy_mode, CeedSize num_values, 78f5d1e504SJeremy L Thompson const bool **target_array_owned, const bool **target_array_borrowed, const bool **target_array); 79f5d1e504SJeremy L Thompson CEED_INTERN int CeedSetDeviceCeedInt8Array_Cuda(Ceed ceed, const CeedInt8 *source_array, CeedCopyMode copy_mode, CeedSize num_values, 80f5d1e504SJeremy L Thompson const CeedInt8 **target_array_owned, const CeedInt8 **target_array_borrowed, 81f5d1e504SJeremy L Thompson const CeedInt8 **target_array); 82f5d1e504SJeremy L Thompson CEED_INTERN int CeedSetDeviceCeedIntArray_Cuda(Ceed ceed, const CeedInt *source_array, CeedCopyMode copy_mode, CeedSize num_values, 83f5d1e504SJeremy L Thompson const CeedInt **target_array_owned, const CeedInt **target_array_borrowed, 84f5d1e504SJeremy L Thompson const CeedInt **target_array); 85f5d1e504SJeremy L Thompson CEED_INTERN int CeedSetDeviceCeedScalarArray_Cuda(Ceed ceed, const CeedScalar *source_array, CeedCopyMode copy_mode, CeedSize num_values, 86f5d1e504SJeremy L Thompson const CeedScalar **target_array_owned, const CeedScalar **target_array_borrowed, 87f5d1e504SJeremy L Thompson const CeedScalar **target_array); 88