1*9ba83ac0SJeremy L Thompson // Copyright (c) 2017-2026, Lawrence Livermore National Security, LLC and other CEED contributors.
23d8e8822SJeremy L Thompson // All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
37fcac036SJeremy L Thompson //
43d8e8822SJeremy L Thompson // SPDX-License-Identifier: BSD-2-Clause
57fcac036SJeremy L Thompson //
63d8e8822SJeremy L Thompson // This file is part of CEED: http://github.com/ceed
77fcac036SJeremy L Thompson
87fcac036SJeremy L Thompson #include "ceed-cuda-common.h"
97fcac036SJeremy L Thompson
1049aac155SJeremy L Thompson #include <ceed.h>
1149aac155SJeremy L Thompson #include <ceed/backend.h>
1249aac155SJeremy L Thompson #include <cuda_runtime.h>
1349aac155SJeremy L Thompson #include <stdlib.h>
142b730f8bSJeremy L Thompson #include <string.h>
152b730f8bSJeremy L Thompson
167fcac036SJeremy L Thompson //------------------------------------------------------------------------------
177fcac036SJeremy L Thompson // Device information backend init
187fcac036SJeremy L Thompson //------------------------------------------------------------------------------
CeedInit_Cuda(Ceed ceed,const char * resource)19eb7e6cafSJeremy L Thompson int CeedInit_Cuda(Ceed ceed, const char *resource) {
20ca735530SJeremy L Thompson Ceed_Cuda *data;
217fcac036SJeremy L Thompson const char *device_spec = strstr(resource, ":device_id=");
220d0321e0SJeremy L Thompson const int device_id = (device_spec) ? atoi(device_spec + 11) : -1;
230d0321e0SJeremy L Thompson int current_device_id;
24ca735530SJeremy L Thompson
252b730f8bSJeremy L Thompson CeedCallCuda(ceed, cudaGetDevice(¤t_device_id));
260d0321e0SJeremy L Thompson if (device_id >= 0 && current_device_id != device_id) {
272b730f8bSJeremy L Thompson CeedCallCuda(ceed, cudaSetDevice(device_id));
280d0321e0SJeremy L Thompson current_device_id = device_id;
297fcac036SJeremy L Thompson }
30ca735530SJeremy L Thompson
312b730f8bSJeremy L Thompson CeedCallBackend(CeedGetData(ceed, &data));
320d0321e0SJeremy L Thompson data->device_id = current_device_id;
332b730f8bSJeremy L Thompson CeedCallCuda(ceed, cudaGetDeviceProperties(&data->device_prop, current_device_id));
347fcac036SJeremy L Thompson return CEED_ERROR_SUCCESS;
357fcac036SJeremy L Thompson }
367fcac036SJeremy L Thompson
377fcac036SJeremy L Thompson //------------------------------------------------------------------------------
387fcac036SJeremy L Thompson // Backend destroy
397fcac036SJeremy L Thompson //------------------------------------------------------------------------------
CeedDestroy_Cuda(Ceed ceed)407fcac036SJeremy L Thompson int CeedDestroy_Cuda(Ceed ceed) {
417fcac036SJeremy L Thompson Ceed_Cuda *data;
42ca735530SJeremy L Thompson
432b730f8bSJeremy L Thompson CeedCallBackend(CeedGetData(ceed, &data));
442b730f8bSJeremy L Thompson if (data->cublas_handle) CeedCallCublas(ceed, cublasDestroy(data->cublas_handle));
452b730f8bSJeremy L Thompson CeedCallBackend(CeedFree(&data));
467fcac036SJeremy L Thompson return CEED_ERROR_SUCCESS;
477fcac036SJeremy L Thompson }
487fcac036SJeremy L Thompson
497fcac036SJeremy L Thompson //------------------------------------------------------------------------------
50f5d1e504SJeremy L Thompson // Memory transfer utilities
51f5d1e504SJeremy L Thompson //------------------------------------------------------------------------------
CeedSetDeviceGenericArray_Cuda(Ceed ceed,const void * source_array,CeedCopyMode copy_mode,size_t size_unit,CeedSize num_values,void * target_array_owned,void * target_array_borrowed,void * target_array)52f5d1e504SJeremy L Thompson static inline int CeedSetDeviceGenericArray_Cuda(Ceed ceed, const void *source_array, CeedCopyMode copy_mode, size_t size_unit, CeedSize num_values,
53f5d1e504SJeremy L Thompson void *target_array_owned, void *target_array_borrowed, void *target_array) {
54f5d1e504SJeremy L Thompson switch (copy_mode) {
55f5d1e504SJeremy L Thompson case CEED_COPY_VALUES:
56cc3bdf8cSJeremy L Thompson if (!*(void **)target_array) {
57cc3bdf8cSJeremy L Thompson if (*(void **)target_array_borrowed) {
58cc3bdf8cSJeremy L Thompson *(void **)target_array = *(void **)target_array_borrowed;
59cc3bdf8cSJeremy L Thompson } else {
60f5d1e504SJeremy L Thompson if (!*(void **)target_array_owned) CeedCallCuda(ceed, cudaMalloc(target_array_owned, size_unit * num_values));
61f5d1e504SJeremy L Thompson *(void **)target_array = *(void **)target_array_owned;
62cc3bdf8cSJeremy L Thompson }
63cc3bdf8cSJeremy L Thompson }
64cc3bdf8cSJeremy L Thompson if (source_array) CeedCallCuda(ceed, cudaMemcpy(*(void **)target_array, source_array, size_unit * num_values, cudaMemcpyDeviceToDevice));
65f5d1e504SJeremy L Thompson break;
66f5d1e504SJeremy L Thompson case CEED_OWN_POINTER:
67081aa29dSJeremy L Thompson CeedCallCuda(ceed, cudaFree(*(void **)target_array_owned));
68f5d1e504SJeremy L Thompson *(void **)target_array_owned = (void *)source_array;
69f5d1e504SJeremy L Thompson *(void **)target_array_borrowed = NULL;
70f5d1e504SJeremy L Thompson *(void **)target_array = *(void **)target_array_owned;
71f5d1e504SJeremy L Thompson break;
72f5d1e504SJeremy L Thompson case CEED_USE_POINTER:
73081aa29dSJeremy L Thompson CeedCallCuda(ceed, cudaFree(*(void **)target_array_owned));
74f5d1e504SJeremy L Thompson *(void **)target_array_owned = NULL;
75f5d1e504SJeremy L Thompson *(void **)target_array_borrowed = (void *)source_array;
76f5d1e504SJeremy L Thompson *(void **)target_array = *(void **)target_array_borrowed;
77f5d1e504SJeremy L Thompson }
78f5d1e504SJeremy L Thompson return CEED_ERROR_SUCCESS;
79f5d1e504SJeremy L Thompson }
80f5d1e504SJeremy L Thompson
CeedSetDeviceBoolArray_Cuda(Ceed ceed,const bool * source_array,CeedCopyMode copy_mode,CeedSize num_values,const bool ** target_array_owned,const bool ** target_array_borrowed,const bool ** target_array)81f5d1e504SJeremy L Thompson int CeedSetDeviceBoolArray_Cuda(Ceed ceed, const bool *source_array, CeedCopyMode copy_mode, CeedSize num_values, const bool **target_array_owned,
82f5d1e504SJeremy L Thompson const bool **target_array_borrowed, const bool **target_array) {
83f5d1e504SJeremy L Thompson CeedCallBackend(CeedSetDeviceGenericArray_Cuda(ceed, source_array, copy_mode, sizeof(bool), num_values, target_array_owned, target_array_borrowed,
84f5d1e504SJeremy L Thompson target_array));
85f5d1e504SJeremy L Thompson return CEED_ERROR_SUCCESS;
86f5d1e504SJeremy L Thompson }
87f5d1e504SJeremy L Thompson
CeedSetDeviceCeedInt8Array_Cuda(Ceed ceed,const CeedInt8 * source_array,CeedCopyMode copy_mode,CeedSize num_values,const CeedInt8 ** target_array_owned,const CeedInt8 ** target_array_borrowed,const CeedInt8 ** target_array)88f5d1e504SJeremy L Thompson int CeedSetDeviceCeedInt8Array_Cuda(Ceed ceed, const CeedInt8 *source_array, CeedCopyMode copy_mode, CeedSize num_values,
89f5d1e504SJeremy L Thompson const CeedInt8 **target_array_owned, const CeedInt8 **target_array_borrowed, const CeedInt8 **target_array) {
90f5d1e504SJeremy L Thompson CeedCallBackend(CeedSetDeviceGenericArray_Cuda(ceed, source_array, copy_mode, sizeof(CeedInt8), num_values, target_array_owned,
91f5d1e504SJeremy L Thompson target_array_borrowed, target_array));
92f5d1e504SJeremy L Thompson return CEED_ERROR_SUCCESS;
93f5d1e504SJeremy L Thompson }
94f5d1e504SJeremy L Thompson
CeedSetDeviceCeedIntArray_Cuda(Ceed ceed,const CeedInt * source_array,CeedCopyMode copy_mode,CeedSize num_values,const CeedInt ** target_array_owned,const CeedInt ** target_array_borrowed,const CeedInt ** target_array)95f5d1e504SJeremy L Thompson int CeedSetDeviceCeedIntArray_Cuda(Ceed ceed, const CeedInt *source_array, CeedCopyMode copy_mode, CeedSize num_values,
96f5d1e504SJeremy L Thompson const CeedInt **target_array_owned, const CeedInt **target_array_borrowed, const CeedInt **target_array) {
97f5d1e504SJeremy L Thompson CeedCallBackend(CeedSetDeviceGenericArray_Cuda(ceed, source_array, copy_mode, sizeof(CeedInt), num_values, target_array_owned,
98f5d1e504SJeremy L Thompson target_array_borrowed, target_array));
99f5d1e504SJeremy L Thompson return CEED_ERROR_SUCCESS;
100f5d1e504SJeremy L Thompson }
101f5d1e504SJeremy L Thompson
CeedSetDeviceCeedScalarArray_Cuda(Ceed ceed,const CeedScalar * source_array,CeedCopyMode copy_mode,CeedSize num_values,const CeedScalar ** target_array_owned,const CeedScalar ** target_array_borrowed,const CeedScalar ** target_array)102f5d1e504SJeremy L Thompson int CeedSetDeviceCeedScalarArray_Cuda(Ceed ceed, const CeedScalar *source_array, CeedCopyMode copy_mode, CeedSize num_values,
103f5d1e504SJeremy L Thompson const CeedScalar **target_array_owned, const CeedScalar **target_array_borrowed,
104f5d1e504SJeremy L Thompson const CeedScalar **target_array) {
105f5d1e504SJeremy L Thompson CeedCallBackend(CeedSetDeviceGenericArray_Cuda(ceed, source_array, copy_mode, sizeof(CeedScalar), num_values, target_array_owned,
106f5d1e504SJeremy L Thompson target_array_borrowed, target_array));
107f5d1e504SJeremy L Thompson return CEED_ERROR_SUCCESS;
108f5d1e504SJeremy L Thompson }
109f5d1e504SJeremy L Thompson
110f5d1e504SJeremy L Thompson //------------------------------------------------------------------------------
111