1*9ba83ac0SJeremy L Thompson // Copyright (c) 2017-2026, Lawrence Livermore National Security, LLC and other CEED contributors.
23d8e8822SJeremy L Thompson // All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
37fcac036SJeremy L Thompson //
43d8e8822SJeremy L Thompson // SPDX-License-Identifier: BSD-2-Clause
57fcac036SJeremy L Thompson //
63d8e8822SJeremy L Thompson // This file is part of CEED: http://github.com/ceed
7509d4af6SJeremy L Thompson #pragma once
87fcac036SJeremy L Thompson
949aac155SJeremy L Thompson #include <ceed.h>
107fcac036SJeremy L Thompson #include <ceed/backend.h>
117fcac036SJeremy L Thompson #include <cublas_v2.h>
127fcac036SJeremy L Thompson #include <cuda.h>
137fcac036SJeremy L Thompson
147fcac036SJeremy L Thompson #define QUOTE(...) #__VA_ARGS__
157fcac036SJeremy L Thompson
167fcac036SJeremy L Thompson #define CeedChk_Cu(ceed, x) \
177fcac036SJeremy L Thompson do { \
18c9c2c079SJeremy L Thompson CUresult cuda_result = (CUresult)x; \
197fcac036SJeremy L Thompson if (cuda_result != CUDA_SUCCESS) { \
207fcac036SJeremy L Thompson const char *msg; \
217fcac036SJeremy L Thompson cuGetErrorName(cuda_result, &msg); \
227fcac036SJeremy L Thompson return CeedError((ceed), CEED_ERROR_BACKEND, msg); \
237fcac036SJeremy L Thompson } \
247fcac036SJeremy L Thompson } while (0)
257fcac036SJeremy L Thompson
267fcac036SJeremy L Thompson #define CeedChk_Cublas(ceed, x) \
277fcac036SJeremy L Thompson do { \
287fcac036SJeremy L Thompson cublasStatus_t cublas_result = x; \
297fcac036SJeremy L Thompson if (cublas_result != CUBLAS_STATUS_SUCCESS) { \
307fcac036SJeremy L Thompson const char *msg = cublasGetErrorName(cublas_result); \
317fcac036SJeremy L Thompson return CeedError((ceed), CEED_ERROR_BACKEND, msg); \
327fcac036SJeremy L Thompson } \
337fcac036SJeremy L Thompson } while (0)
347fcac036SJeremy L Thompson
352b730f8bSJeremy L Thompson #define CeedCallCuda(ceed, ...) \
362b730f8bSJeremy L Thompson do { \
372b730f8bSJeremy L Thompson int ierr_q_ = __VA_ARGS__; \
382b730f8bSJeremy L Thompson CeedChk_Cu(ceed, ierr_q_); \
396574a04fSJeremy L Thompson } while (0)
402b730f8bSJeremy L Thompson
412b730f8bSJeremy L Thompson #define CeedCallCublas(ceed, ...) \
422b730f8bSJeremy L Thompson do { \
432b730f8bSJeremy L Thompson int ierr_q_ = __VA_ARGS__; \
442b730f8bSJeremy L Thompson CeedChk_Cublas(ceed, ierr_q_); \
456574a04fSJeremy L Thompson } while (0)
462b730f8bSJeremy L Thompson
472b730f8bSJeremy L Thompson #define CASE(name) \
482b730f8bSJeremy L Thompson case name: \
492b730f8bSJeremy L Thompson return #name
507fcac036SJeremy L Thompson // LCOV_EXCL_START
cublasGetErrorName(cublasStatus_t error)517fcac036SJeremy L Thompson static const char *cublasGetErrorName(cublasStatus_t error) {
527fcac036SJeremy L Thompson switch (error) {
537fcac036SJeremy L Thompson CASE(CUBLAS_STATUS_SUCCESS);
547fcac036SJeremy L Thompson CASE(CUBLAS_STATUS_NOT_INITIALIZED);
557fcac036SJeremy L Thompson CASE(CUBLAS_STATUS_ALLOC_FAILED);
567fcac036SJeremy L Thompson CASE(CUBLAS_STATUS_INVALID_VALUE);
577fcac036SJeremy L Thompson CASE(CUBLAS_STATUS_ARCH_MISMATCH);
587fcac036SJeremy L Thompson CASE(CUBLAS_STATUS_MAPPING_ERROR);
597fcac036SJeremy L Thompson CASE(CUBLAS_STATUS_EXECUTION_FAILED);
607fcac036SJeremy L Thompson CASE(CUBLAS_STATUS_INTERNAL_ERROR);
612b730f8bSJeremy L Thompson default:
622b730f8bSJeremy L Thompson return "CUBLAS_STATUS_UNKNOWN_ERROR";
637fcac036SJeremy L Thompson }
647fcac036SJeremy L Thompson }
657fcac036SJeremy L Thompson // LCOV_EXCL_STOP
667fcac036SJeremy L Thompson
677fcac036SJeremy L Thompson typedef struct {
680d0321e0SJeremy L Thompson int device_id;
69fa619eccSJeremy L Thompson bool use_llvm_version;
70fa619eccSJeremy L Thompson int llvm_version;
710d0321e0SJeremy L Thompson cublasHandle_t cublas_handle;
720d0321e0SJeremy L Thompson struct cudaDeviceProp device_prop;
737fcac036SJeremy L Thompson } Ceed_Cuda;
747fcac036SJeremy L Thompson
75eb7e6cafSJeremy L Thompson CEED_INTERN int CeedInit_Cuda(Ceed ceed, const char *resource);
767fcac036SJeremy L Thompson
777fcac036SJeremy L Thompson CEED_INTERN int CeedDestroy_Cuda(Ceed ceed);
787fcac036SJeremy L Thompson
79f5d1e504SJeremy L Thompson CEED_INTERN int CeedSetDeviceBoolArray_Cuda(Ceed ceed, const bool *source_array, CeedCopyMode copy_mode, CeedSize num_values,
80f5d1e504SJeremy L Thompson const bool **target_array_owned, const bool **target_array_borrowed, const bool **target_array);
81f5d1e504SJeremy L Thompson CEED_INTERN int CeedSetDeviceCeedInt8Array_Cuda(Ceed ceed, const CeedInt8 *source_array, CeedCopyMode copy_mode, CeedSize num_values,
82f5d1e504SJeremy L Thompson const CeedInt8 **target_array_owned, const CeedInt8 **target_array_borrowed,
83f5d1e504SJeremy L Thompson const CeedInt8 **target_array);
84f5d1e504SJeremy L Thompson CEED_INTERN int CeedSetDeviceCeedIntArray_Cuda(Ceed ceed, const CeedInt *source_array, CeedCopyMode copy_mode, CeedSize num_values,
85f5d1e504SJeremy L Thompson const CeedInt **target_array_owned, const CeedInt **target_array_borrowed,
86f5d1e504SJeremy L Thompson const CeedInt **target_array);
87f5d1e504SJeremy L Thompson CEED_INTERN int CeedSetDeviceCeedScalarArray_Cuda(Ceed ceed, const CeedScalar *source_array, CeedCopyMode copy_mode, CeedSize num_values,
88f5d1e504SJeremy L Thompson const CeedScalar **target_array_owned, const CeedScalar **target_array_borrowed,
89f5d1e504SJeremy L Thompson const CeedScalar **target_array);
90