xref: /libCEED/backends/cuda-ref/ceed-cuda-ref-vector.c (revision e84c3ebc97a9b078c9a14c8bdeb2c2ebf4a01814)
15aed82e4SJeremy L Thompson // Copyright (c) 2017-2024, Lawrence Livermore National Security, LLC and other CEED contributors.
2ff1e7120SSebastian Grimberg // All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
3ff1e7120SSebastian Grimberg //
4ff1e7120SSebastian Grimberg // SPDX-License-Identifier: BSD-2-Clause
5ff1e7120SSebastian Grimberg //
6ff1e7120SSebastian Grimberg // This file is part of CEED:  http://github.com/ceed
7ff1e7120SSebastian Grimberg 
8ff1e7120SSebastian Grimberg #include <ceed.h>
9ff1e7120SSebastian Grimberg #include <ceed/backend.h>
10ff1e7120SSebastian Grimberg #include <cuda_runtime.h>
11ff1e7120SSebastian Grimberg #include <math.h>
12ff1e7120SSebastian Grimberg #include <stdbool.h>
13ff1e7120SSebastian Grimberg #include <string.h>
14ff1e7120SSebastian Grimberg 
15ff1e7120SSebastian Grimberg #include "../cuda/ceed-cuda-common.h"
16ff1e7120SSebastian Grimberg #include "ceed-cuda-ref.h"
17ff1e7120SSebastian Grimberg 
18ff1e7120SSebastian Grimberg //------------------------------------------------------------------------------
19ff1e7120SSebastian Grimberg // Check if host/device sync is needed
20ff1e7120SSebastian Grimberg //------------------------------------------------------------------------------
21ff1e7120SSebastian Grimberg static inline int CeedVectorNeedSync_Cuda(const CeedVector vec, CeedMemType mem_type, bool *need_sync) {
22ff1e7120SSebastian Grimberg   bool             has_valid_array = false;
23ca735530SJeremy L Thompson   CeedVector_Cuda *impl;
24ca735530SJeremy L Thompson 
25ca735530SJeremy L Thompson   CeedCallBackend(CeedVectorGetData(vec, &impl));
26ff1e7120SSebastian Grimberg   CeedCallBackend(CeedVectorHasValidArray(vec, &has_valid_array));
27ff1e7120SSebastian Grimberg   switch (mem_type) {
28ff1e7120SSebastian Grimberg     case CEED_MEM_HOST:
29ff1e7120SSebastian Grimberg       *need_sync = has_valid_array && !impl->h_array;
30ff1e7120SSebastian Grimberg       break;
31ff1e7120SSebastian Grimberg     case CEED_MEM_DEVICE:
32ff1e7120SSebastian Grimberg       *need_sync = has_valid_array && !impl->d_array;
33ff1e7120SSebastian Grimberg       break;
34ff1e7120SSebastian Grimberg   }
35ff1e7120SSebastian Grimberg   return CEED_ERROR_SUCCESS;
36ff1e7120SSebastian Grimberg }
37ff1e7120SSebastian Grimberg 
38ff1e7120SSebastian Grimberg //------------------------------------------------------------------------------
39ff1e7120SSebastian Grimberg // Sync host to device
40ff1e7120SSebastian Grimberg //------------------------------------------------------------------------------
41ff1e7120SSebastian Grimberg static inline int CeedVectorSyncH2D_Cuda(const CeedVector vec) {
42ca735530SJeremy L Thompson   CeedSize         length;
43672b0f2aSSebastian Grimberg   size_t           bytes;
44ff1e7120SSebastian Grimberg   CeedVector_Cuda *impl;
45ca735530SJeremy L Thompson 
46ff1e7120SSebastian Grimberg   CeedCallBackend(CeedVectorGetData(vec, &impl));
47ff1e7120SSebastian Grimberg 
486e536b99SJeremy L Thompson   CeedCheck(impl->h_array, CeedVectorReturnCeed(vec), CEED_ERROR_BACKEND, "No valid host data to sync to device");
49ff1e7120SSebastian Grimberg 
50ff1e7120SSebastian Grimberg   CeedCallBackend(CeedVectorGetLength(vec, &length));
51672b0f2aSSebastian Grimberg   bytes = length * sizeof(CeedScalar);
52ff1e7120SSebastian Grimberg   if (impl->d_array_borrowed) {
53ff1e7120SSebastian Grimberg     impl->d_array = impl->d_array_borrowed;
54ff1e7120SSebastian Grimberg   } else if (impl->d_array_owned) {
55ff1e7120SSebastian Grimberg     impl->d_array = impl->d_array_owned;
56ff1e7120SSebastian Grimberg   } else {
579bc66399SJeremy L Thompson     CeedCallCuda(CeedVectorReturnCeed(vec), cudaMalloc((void **)&impl->d_array_owned, bytes));
58ff1e7120SSebastian Grimberg     impl->d_array = impl->d_array_owned;
59ff1e7120SSebastian Grimberg   }
609bc66399SJeremy L Thompson   CeedCallCuda(CeedVectorReturnCeed(vec), cudaMemcpy(impl->d_array, impl->h_array, bytes, cudaMemcpyHostToDevice));
61ff1e7120SSebastian Grimberg   return CEED_ERROR_SUCCESS;
62ff1e7120SSebastian Grimberg }
63ff1e7120SSebastian Grimberg 
64ff1e7120SSebastian Grimberg //------------------------------------------------------------------------------
65ff1e7120SSebastian Grimberg // Sync device to host
66ff1e7120SSebastian Grimberg //------------------------------------------------------------------------------
67ff1e7120SSebastian Grimberg static inline int CeedVectorSyncD2H_Cuda(const CeedVector vec) {
68ca735530SJeremy L Thompson   CeedSize         length;
69ff1e7120SSebastian Grimberg   CeedVector_Cuda *impl;
70ca735530SJeremy L Thompson 
71ff1e7120SSebastian Grimberg   CeedCallBackend(CeedVectorGetData(vec, &impl));
72ff1e7120SSebastian Grimberg 
739bc66399SJeremy L Thompson   CeedCheck(impl->d_array, CeedVectorReturnCeed(vec), CEED_ERROR_BACKEND, "No valid device data to sync to host");
74ff1e7120SSebastian Grimberg 
75ff1e7120SSebastian Grimberg   if (impl->h_array_borrowed) {
76ff1e7120SSebastian Grimberg     impl->h_array = impl->h_array_borrowed;
77ff1e7120SSebastian Grimberg   } else if (impl->h_array_owned) {
78ff1e7120SSebastian Grimberg     impl->h_array = impl->h_array_owned;
79ff1e7120SSebastian Grimberg   } else {
80ff1e7120SSebastian Grimberg     CeedSize length;
81ca735530SJeremy L Thompson 
82ff1e7120SSebastian Grimberg     CeedCallBackend(CeedVectorGetLength(vec, &length));
83ff1e7120SSebastian Grimberg     CeedCallBackend(CeedCalloc(length, &impl->h_array_owned));
84ff1e7120SSebastian Grimberg     impl->h_array = impl->h_array_owned;
85ff1e7120SSebastian Grimberg   }
86ff1e7120SSebastian Grimberg 
87ff1e7120SSebastian Grimberg   CeedCallBackend(CeedVectorGetLength(vec, &length));
88ff1e7120SSebastian Grimberg   size_t bytes = length * sizeof(CeedScalar);
89ff1e7120SSebastian Grimberg 
909bc66399SJeremy L Thompson   CeedCallCuda(CeedVectorReturnCeed(vec), cudaMemcpy(impl->h_array, impl->d_array, bytes, cudaMemcpyDeviceToHost));
91ff1e7120SSebastian Grimberg   return CEED_ERROR_SUCCESS;
92ff1e7120SSebastian Grimberg }
93ff1e7120SSebastian Grimberg 
94ff1e7120SSebastian Grimberg //------------------------------------------------------------------------------
95ff1e7120SSebastian Grimberg // Sync arrays
96ff1e7120SSebastian Grimberg //------------------------------------------------------------------------------
97ff1e7120SSebastian Grimberg static int CeedVectorSyncArray_Cuda(const CeedVector vec, CeedMemType mem_type) {
98ff1e7120SSebastian Grimberg   bool need_sync = false;
99ca735530SJeremy L Thompson 
100ca735530SJeremy L Thompson   // Check whether device/host sync is needed
101ff1e7120SSebastian Grimberg   CeedCallBackend(CeedVectorNeedSync_Cuda(vec, mem_type, &need_sync));
102ff1e7120SSebastian Grimberg   if (!need_sync) return CEED_ERROR_SUCCESS;
103ff1e7120SSebastian Grimberg 
104ff1e7120SSebastian Grimberg   switch (mem_type) {
105ff1e7120SSebastian Grimberg     case CEED_MEM_HOST:
106ff1e7120SSebastian Grimberg       return CeedVectorSyncD2H_Cuda(vec);
107ff1e7120SSebastian Grimberg     case CEED_MEM_DEVICE:
108ff1e7120SSebastian Grimberg       return CeedVectorSyncH2D_Cuda(vec);
109ff1e7120SSebastian Grimberg   }
110ff1e7120SSebastian Grimberg   return CEED_ERROR_UNSUPPORTED;
111ff1e7120SSebastian Grimberg }
112ff1e7120SSebastian Grimberg 
113ff1e7120SSebastian Grimberg //------------------------------------------------------------------------------
114ff1e7120SSebastian Grimberg // Set all pointers as invalid
115ff1e7120SSebastian Grimberg //------------------------------------------------------------------------------
116ff1e7120SSebastian Grimberg static inline int CeedVectorSetAllInvalid_Cuda(const CeedVector vec) {
117ff1e7120SSebastian Grimberg   CeedVector_Cuda *impl;
118ff1e7120SSebastian Grimberg 
119ca735530SJeremy L Thompson   CeedCallBackend(CeedVectorGetData(vec, &impl));
120ff1e7120SSebastian Grimberg   impl->h_array = NULL;
121ff1e7120SSebastian Grimberg   impl->d_array = NULL;
122ff1e7120SSebastian Grimberg   return CEED_ERROR_SUCCESS;
123ff1e7120SSebastian Grimberg }
124ff1e7120SSebastian Grimberg 
125ff1e7120SSebastian Grimberg //------------------------------------------------------------------------------
126ff1e7120SSebastian Grimberg // Check if CeedVector has any valid pointer
127ff1e7120SSebastian Grimberg //------------------------------------------------------------------------------
128ff1e7120SSebastian Grimberg static inline int CeedVectorHasValidArray_Cuda(const CeedVector vec, bool *has_valid_array) {
129ff1e7120SSebastian Grimberg   CeedVector_Cuda *impl;
130ca735530SJeremy L Thompson 
131ff1e7120SSebastian Grimberg   CeedCallBackend(CeedVectorGetData(vec, &impl));
1321c66c397SJeremy L Thompson   *has_valid_array = impl->h_array || impl->d_array;
133ff1e7120SSebastian Grimberg   return CEED_ERROR_SUCCESS;
134ff1e7120SSebastian Grimberg }
135ff1e7120SSebastian Grimberg 
136ff1e7120SSebastian Grimberg //------------------------------------------------------------------------------
137ff1e7120SSebastian Grimberg // Check if has array of given type
138ff1e7120SSebastian Grimberg //------------------------------------------------------------------------------
139ff1e7120SSebastian Grimberg static inline int CeedVectorHasArrayOfType_Cuda(const CeedVector vec, CeedMemType mem_type, bool *has_array_of_type) {
140ff1e7120SSebastian Grimberg   CeedVector_Cuda *impl;
141ff1e7120SSebastian Grimberg 
142ca735530SJeremy L Thompson   CeedCallBackend(CeedVectorGetData(vec, &impl));
143ff1e7120SSebastian Grimberg   switch (mem_type) {
144ff1e7120SSebastian Grimberg     case CEED_MEM_HOST:
1451c66c397SJeremy L Thompson       *has_array_of_type = impl->h_array_borrowed || impl->h_array_owned;
146ff1e7120SSebastian Grimberg       break;
147ff1e7120SSebastian Grimberg     case CEED_MEM_DEVICE:
1481c66c397SJeremy L Thompson       *has_array_of_type = impl->d_array_borrowed || impl->d_array_owned;
149ff1e7120SSebastian Grimberg       break;
150ff1e7120SSebastian Grimberg   }
151ff1e7120SSebastian Grimberg   return CEED_ERROR_SUCCESS;
152ff1e7120SSebastian Grimberg }
153ff1e7120SSebastian Grimberg 
154ff1e7120SSebastian Grimberg //------------------------------------------------------------------------------
155ff1e7120SSebastian Grimberg // Check if has borrowed array of given type
156ff1e7120SSebastian Grimberg //------------------------------------------------------------------------------
157ff1e7120SSebastian Grimberg static inline int CeedVectorHasBorrowedArrayOfType_Cuda(const CeedVector vec, CeedMemType mem_type, bool *has_borrowed_array_of_type) {
158ff1e7120SSebastian Grimberg   CeedVector_Cuda *impl;
159ff1e7120SSebastian Grimberg 
160ca735530SJeremy L Thompson   CeedCallBackend(CeedVectorGetData(vec, &impl));
161ff1e7120SSebastian Grimberg   switch (mem_type) {
162ff1e7120SSebastian Grimberg     case CEED_MEM_HOST:
1631c66c397SJeremy L Thompson       *has_borrowed_array_of_type = impl->h_array_borrowed;
164ff1e7120SSebastian Grimberg       break;
165ff1e7120SSebastian Grimberg     case CEED_MEM_DEVICE:
1661c66c397SJeremy L Thompson       *has_borrowed_array_of_type = impl->d_array_borrowed;
167ff1e7120SSebastian Grimberg       break;
168ff1e7120SSebastian Grimberg   }
169ff1e7120SSebastian Grimberg   return CEED_ERROR_SUCCESS;
170ff1e7120SSebastian Grimberg }
171ff1e7120SSebastian Grimberg 
172ff1e7120SSebastian Grimberg //------------------------------------------------------------------------------
173ff1e7120SSebastian Grimberg // Set array from host
174ff1e7120SSebastian Grimberg //------------------------------------------------------------------------------
175ff1e7120SSebastian Grimberg static int CeedVectorSetArrayHost_Cuda(const CeedVector vec, const CeedCopyMode copy_mode, CeedScalar *array) {
176a267acd1SJeremy L Thompson   CeedSize         length;
177ff1e7120SSebastian Grimberg   CeedVector_Cuda *impl;
178ff1e7120SSebastian Grimberg 
179ca735530SJeremy L Thompson   CeedCallBackend(CeedVectorGetData(vec, &impl));
180ff1e7120SSebastian Grimberg   CeedCallBackend(CeedVectorGetLength(vec, &length));
181a267acd1SJeremy L Thompson 
182f5d1e504SJeremy L Thompson   CeedCallBackend(CeedSetHostCeedScalarArray(array, copy_mode, length, (const CeedScalar **)&impl->h_array_owned,
183f5d1e504SJeremy L Thompson                                              (const CeedScalar **)&impl->h_array_borrowed, (const CeedScalar **)&impl->h_array));
184ff1e7120SSebastian Grimberg   return CEED_ERROR_SUCCESS;
185ff1e7120SSebastian Grimberg }
186ff1e7120SSebastian Grimberg 
187ff1e7120SSebastian Grimberg //------------------------------------------------------------------------------
188ff1e7120SSebastian Grimberg // Set array from device
189ff1e7120SSebastian Grimberg //------------------------------------------------------------------------------
190ff1e7120SSebastian Grimberg static int CeedVectorSetArrayDevice_Cuda(const CeedVector vec, const CeedCopyMode copy_mode, CeedScalar *array) {
191a267acd1SJeremy L Thompson   CeedSize         length;
192ff1e7120SSebastian Grimberg   Ceed             ceed;
193ff1e7120SSebastian Grimberg   CeedVector_Cuda *impl;
194ff1e7120SSebastian Grimberg 
195ca735530SJeremy L Thompson   CeedCallBackend(CeedVectorGetCeed(vec, &ceed));
196ca735530SJeremy L Thompson   CeedCallBackend(CeedVectorGetData(vec, &impl));
197ff1e7120SSebastian Grimberg   CeedCallBackend(CeedVectorGetLength(vec, &length));
198a267acd1SJeremy L Thompson 
199f5d1e504SJeremy L Thompson   CeedCallBackend(CeedSetDeviceCeedScalarArray_Cuda(ceed, array, copy_mode, length, (const CeedScalar **)&impl->d_array_owned,
200f5d1e504SJeremy L Thompson                                                     (const CeedScalar **)&impl->d_array_borrowed, (const CeedScalar **)&impl->d_array));
2019bc66399SJeremy L Thompson   CeedCallBackend(CeedDestroy(&ceed));
202ff1e7120SSebastian Grimberg   return CEED_ERROR_SUCCESS;
203ff1e7120SSebastian Grimberg }
204ff1e7120SSebastian Grimberg 
205ff1e7120SSebastian Grimberg //------------------------------------------------------------------------------
206ff1e7120SSebastian Grimberg // Set the array used by a vector,
207ff1e7120SSebastian Grimberg //   freeing any previously allocated array if applicable
208ff1e7120SSebastian Grimberg //------------------------------------------------------------------------------
209ff1e7120SSebastian Grimberg static int CeedVectorSetArray_Cuda(const CeedVector vec, const CeedMemType mem_type, const CeedCopyMode copy_mode, CeedScalar *array) {
210ff1e7120SSebastian Grimberg   CeedVector_Cuda *impl;
211ff1e7120SSebastian Grimberg 
212ca735530SJeremy L Thompson   CeedCallBackend(CeedVectorGetData(vec, &impl));
213ff1e7120SSebastian Grimberg   CeedCallBackend(CeedVectorSetAllInvalid_Cuda(vec));
214ff1e7120SSebastian Grimberg   switch (mem_type) {
215ff1e7120SSebastian Grimberg     case CEED_MEM_HOST:
216ff1e7120SSebastian Grimberg       return CeedVectorSetArrayHost_Cuda(vec, copy_mode, array);
217ff1e7120SSebastian Grimberg     case CEED_MEM_DEVICE:
218ff1e7120SSebastian Grimberg       return CeedVectorSetArrayDevice_Cuda(vec, copy_mode, array);
219ff1e7120SSebastian Grimberg   }
220ff1e7120SSebastian Grimberg   return CEED_ERROR_UNSUPPORTED;
221ff1e7120SSebastian Grimberg }
222ff1e7120SSebastian Grimberg 
223ff1e7120SSebastian Grimberg //------------------------------------------------------------------------------
224f1c2287bSJeremy L Thompson // Copy host array to value strided
225f1c2287bSJeremy L Thompson //------------------------------------------------------------------------------
226f1c2287bSJeremy L Thompson static int CeedHostCopyStrided_Cuda(CeedScalar *h_array, CeedSize start, CeedSize step, CeedSize length, CeedScalar *h_copy_array) {
227f1c2287bSJeremy L Thompson   for (CeedSize i = start; i < length; i += step) h_copy_array[i] = h_array[i];
228f1c2287bSJeremy L Thompson   return CEED_ERROR_SUCCESS;
229f1c2287bSJeremy L Thompson }
230f1c2287bSJeremy L Thompson 
231f1c2287bSJeremy L Thompson //------------------------------------------------------------------------------
232f1c2287bSJeremy L Thompson // Copy device array to value strided (impl in .cu file)
233f1c2287bSJeremy L Thompson //------------------------------------------------------------------------------
234f1c2287bSJeremy L Thompson int CeedDeviceCopyStrided_Cuda(CeedScalar *d_array, CeedSize start, CeedSize step, CeedSize length, CeedScalar *d_copy_array);
235f1c2287bSJeremy L Thompson 
236f1c2287bSJeremy L Thompson //------------------------------------------------------------------------------
237f1c2287bSJeremy L Thompson // Copy a vector to a value strided
238f1c2287bSJeremy L Thompson //------------------------------------------------------------------------------
239f1c2287bSJeremy L Thompson static int CeedVectorCopyStrided_Cuda(CeedVector vec, CeedSize start, CeedSize step, CeedVector vec_copy) {
240f1c2287bSJeremy L Thompson   CeedSize         length;
241f1c2287bSJeremy L Thompson   CeedVector_Cuda *impl;
242f1c2287bSJeremy L Thompson 
243f1c2287bSJeremy L Thompson   CeedCallBackend(CeedVectorGetData(vec, &impl));
244a7efc114SJeremy L Thompson   {
245a7efc114SJeremy L Thompson     CeedSize length_vec, length_copy;
246a7efc114SJeremy L Thompson 
2475a5594ffSJeremy L Thompson     CeedCallBackend(CeedVectorGetLength(vec, &length_vec));
2485a5594ffSJeremy L Thompson     CeedCallBackend(CeedVectorGetLength(vec_copy, &length_copy));
249a7efc114SJeremy L Thompson     length = length_vec < length_copy ? length_vec : length_copy;
250a7efc114SJeremy L Thompson   }
251f1c2287bSJeremy L Thompson   // Set value for synced device/host array
252f1c2287bSJeremy L Thompson   if (impl->d_array) {
253f1c2287bSJeremy L Thompson     CeedScalar *copy_array;
254f1c2287bSJeremy L Thompson 
255f1c2287bSJeremy L Thompson     CeedCallBackend(CeedVectorGetArray(vec_copy, CEED_MEM_DEVICE, &copy_array));
256*e84c3ebcSJeremy L Thompson #if (CUDA_VERSION >= 12000)
257*e84c3ebcSJeremy L Thompson     cublasHandle_t handle;
258*e84c3ebcSJeremy L Thompson     Ceed           ceed;
259*e84c3ebcSJeremy L Thompson 
260*e84c3ebcSJeremy L Thompson     CeedCallBackend(CeedVectorGetCeed(vec, &ceed));
261*e84c3ebcSJeremy L Thompson     CeedCallBackend(CeedGetCublasHandle_Cuda(ceed, &handle));
262*e84c3ebcSJeremy L Thompson #if defined(CEED_SCALAR_IS_FP32)
263*e84c3ebcSJeremy L Thompson     CeedCallCublas(ceed, cublasScopy_64(handle, (int64_t)length, impl->d_array + start, (int64_t)step, copy_array + start, (int64_t)step));
264*e84c3ebcSJeremy L Thompson #else  /* CEED_SCALAR */
265*e84c3ebcSJeremy L Thompson     CeedCallCublas(ceed, cublasDcopy_64(handle, (int64_t)length, impl->d_array + start, (int64_t)step, copy_array + start, (int64_t)step));
266*e84c3ebcSJeremy L Thompson #endif /* CEED_SCALAR */
267*e84c3ebcSJeremy L Thompson     CeedCallBackend(CeedDestroy(&ceed));
268*e84c3ebcSJeremy L Thompson #else  /* CUDA_VERSION */
269f1c2287bSJeremy L Thompson     CeedCallBackend(CeedDeviceCopyStrided_Cuda(impl->d_array, start, step, length, copy_array));
270*e84c3ebcSJeremy L Thompson #endif /* CUDA_VERSION */
271f1c2287bSJeremy L Thompson     CeedCallBackend(CeedVectorRestoreArray(vec_copy, &copy_array));
272*e84c3ebcSJeremy L Thompson     impl->h_array = NULL;
273f1c2287bSJeremy L Thompson   } else if (impl->h_array) {
274f1c2287bSJeremy L Thompson     CeedScalar *copy_array;
275f1c2287bSJeremy L Thompson 
276f1c2287bSJeremy L Thompson     CeedCallBackend(CeedVectorGetArray(vec_copy, CEED_MEM_HOST, &copy_array));
277f1c2287bSJeremy L Thompson     CeedCallBackend(CeedHostCopyStrided_Cuda(impl->h_array, start, step, length, copy_array));
278f1c2287bSJeremy L Thompson     CeedCallBackend(CeedVectorRestoreArray(vec_copy, &copy_array));
279*e84c3ebcSJeremy L Thompson     impl->d_array = NULL;
280f1c2287bSJeremy L Thompson   } else {
281f1c2287bSJeremy L Thompson     return CeedError(CeedVectorReturnCeed(vec), CEED_ERROR_BACKEND, "CeedVector must have valid data set");
282f1c2287bSJeremy L Thompson   }
283f1c2287bSJeremy L Thompson   return CEED_ERROR_SUCCESS;
284f1c2287bSJeremy L Thompson }
285f1c2287bSJeremy L Thompson 
286f1c2287bSJeremy L Thompson //------------------------------------------------------------------------------
287ff1e7120SSebastian Grimberg // Set host array to value
288ff1e7120SSebastian Grimberg //------------------------------------------------------------------------------
289f7c1b517Snbeams static int CeedHostSetValue_Cuda(CeedScalar *h_array, CeedSize length, CeedScalar val) {
290f7c1b517Snbeams   for (CeedSize i = 0; i < length; i++) h_array[i] = val;
291ff1e7120SSebastian Grimberg   return CEED_ERROR_SUCCESS;
292ff1e7120SSebastian Grimberg }
293ff1e7120SSebastian Grimberg 
294ff1e7120SSebastian Grimberg //------------------------------------------------------------------------------
295ff1e7120SSebastian Grimberg // Set device array to value (impl in .cu file)
296ff1e7120SSebastian Grimberg //------------------------------------------------------------------------------
297f7c1b517Snbeams int CeedDeviceSetValue_Cuda(CeedScalar *d_array, CeedSize length, CeedScalar val);
298ff1e7120SSebastian Grimberg 
299ff1e7120SSebastian Grimberg //------------------------------------------------------------------------------
300f7c1b517Snbeams // Set a vector to a value
301ff1e7120SSebastian Grimberg //------------------------------------------------------------------------------
302ff1e7120SSebastian Grimberg static int CeedVectorSetValue_Cuda(CeedVector vec, CeedScalar val) {
303ff1e7120SSebastian Grimberg   CeedSize         length;
304ca735530SJeremy L Thompson   CeedVector_Cuda *impl;
305ff1e7120SSebastian Grimberg 
306ca735530SJeremy L Thompson   CeedCallBackend(CeedVectorGetData(vec, &impl));
307ca735530SJeremy L Thompson   CeedCallBackend(CeedVectorGetLength(vec, &length));
308ff1e7120SSebastian Grimberg   // Set value for synced device/host array
309ff1e7120SSebastian Grimberg   if (!impl->d_array && !impl->h_array) {
310ff1e7120SSebastian Grimberg     if (impl->d_array_borrowed) {
311ff1e7120SSebastian Grimberg       impl->d_array = impl->d_array_borrowed;
312ff1e7120SSebastian Grimberg     } else if (impl->h_array_borrowed) {
313ff1e7120SSebastian Grimberg       impl->h_array = impl->h_array_borrowed;
314ff1e7120SSebastian Grimberg     } else if (impl->d_array_owned) {
315ff1e7120SSebastian Grimberg       impl->d_array = impl->d_array_owned;
316ff1e7120SSebastian Grimberg     } else if (impl->h_array_owned) {
317ff1e7120SSebastian Grimberg       impl->h_array = impl->h_array_owned;
318ff1e7120SSebastian Grimberg     } else {
319ff1e7120SSebastian Grimberg       CeedCallBackend(CeedVectorSetArray(vec, CEED_MEM_DEVICE, CEED_COPY_VALUES, NULL));
320ff1e7120SSebastian Grimberg     }
321ff1e7120SSebastian Grimberg   }
322ff1e7120SSebastian Grimberg   if (impl->d_array) {
323124cc107SJeremy L Thompson     if (val == 0) {
324124cc107SJeremy L Thompson       CeedCallCuda(CeedVectorReturnCeed(vec), cudaMemset(impl->d_array, 0, length * sizeof(CeedScalar)));
325124cc107SJeremy L Thompson     } else {
326ff1e7120SSebastian Grimberg       CeedCallBackend(CeedDeviceSetValue_Cuda(impl->d_array, length, val));
327ff1e7120SSebastian Grimberg     }
328124cc107SJeremy L Thompson     impl->h_array = NULL;
329124cc107SJeremy L Thompson   } else if (impl->h_array) {
330ff1e7120SSebastian Grimberg     CeedCallBackend(CeedHostSetValue_Cuda(impl->h_array, length, val));
331ff1e7120SSebastian Grimberg     impl->d_array = NULL;
332ff1e7120SSebastian Grimberg   }
333ff1e7120SSebastian Grimberg   return CEED_ERROR_SUCCESS;
334ff1e7120SSebastian Grimberg }
335ff1e7120SSebastian Grimberg 
336ff1e7120SSebastian Grimberg //------------------------------------------------------------------------------
337f1c2287bSJeremy L Thompson // Set host array to value strided
338f1c2287bSJeremy L Thompson //------------------------------------------------------------------------------
339f1c2287bSJeremy L Thompson static int CeedHostSetValueStrided_Cuda(CeedScalar *h_array, CeedSize start, CeedSize step, CeedSize length, CeedScalar val) {
340f1c2287bSJeremy L Thompson   for (CeedSize i = start; i < length; i += step) h_array[i] = val;
341f1c2287bSJeremy L Thompson   return CEED_ERROR_SUCCESS;
342f1c2287bSJeremy L Thompson }
343f1c2287bSJeremy L Thompson 
344f1c2287bSJeremy L Thompson //------------------------------------------------------------------------------
345f1c2287bSJeremy L Thompson // Set device array to value strided (impl in .cu file)
346f1c2287bSJeremy L Thompson //------------------------------------------------------------------------------
347f1c2287bSJeremy L Thompson int CeedDeviceSetValueStrided_Cuda(CeedScalar *d_array, CeedSize start, CeedSize step, CeedSize length, CeedScalar val);
348f1c2287bSJeremy L Thompson 
349f1c2287bSJeremy L Thompson //------------------------------------------------------------------------------
350f1c2287bSJeremy L Thompson // Set a vector to a value strided
351f1c2287bSJeremy L Thompson //------------------------------------------------------------------------------
352f1c2287bSJeremy L Thompson static int CeedVectorSetValueStrided_Cuda(CeedVector vec, CeedSize start, CeedSize step, CeedScalar val) {
353f1c2287bSJeremy L Thompson   CeedSize         length;
354f1c2287bSJeremy L Thompson   CeedVector_Cuda *impl;
355f1c2287bSJeremy L Thompson 
356f1c2287bSJeremy L Thompson   CeedCallBackend(CeedVectorGetData(vec, &impl));
357f1c2287bSJeremy L Thompson   CeedCallBackend(CeedVectorGetLength(vec, &length));
358f1c2287bSJeremy L Thompson   // Set value for synced device/host array
359f1c2287bSJeremy L Thompson   if (impl->d_array) {
360f1c2287bSJeremy L Thompson     CeedCallBackend(CeedDeviceSetValueStrided_Cuda(impl->d_array, start, step, length, val));
361f1c2287bSJeremy L Thompson     impl->h_array = NULL;
362f1c2287bSJeremy L Thompson   } else if (impl->h_array) {
363f1c2287bSJeremy L Thompson     CeedCallBackend(CeedHostSetValueStrided_Cuda(impl->h_array, start, step, length, val));
364f1c2287bSJeremy L Thompson     impl->d_array = NULL;
365f1c2287bSJeremy L Thompson   } else {
366f1c2287bSJeremy L Thompson     return CeedError(CeedVectorReturnCeed(vec), CEED_ERROR_BACKEND, "CeedVector must have valid data set");
367f1c2287bSJeremy L Thompson   }
368f1c2287bSJeremy L Thompson   return CEED_ERROR_SUCCESS;
369f1c2287bSJeremy L Thompson }
370f1c2287bSJeremy L Thompson 
371f1c2287bSJeremy L Thompson //------------------------------------------------------------------------------
372ff1e7120SSebastian Grimberg // Vector Take Array
373ff1e7120SSebastian Grimberg //------------------------------------------------------------------------------
374ff1e7120SSebastian Grimberg static int CeedVectorTakeArray_Cuda(CeedVector vec, CeedMemType mem_type, CeedScalar **array) {
375ff1e7120SSebastian Grimberg   CeedVector_Cuda *impl;
376ff1e7120SSebastian Grimberg 
377ca735530SJeremy L Thompson   CeedCallBackend(CeedVectorGetData(vec, &impl));
378ff1e7120SSebastian Grimberg   // Sync array to requested mem_type
379ff1e7120SSebastian Grimberg   CeedCallBackend(CeedVectorSyncArray(vec, mem_type));
380ff1e7120SSebastian Grimberg   // Update pointer
381ff1e7120SSebastian Grimberg   switch (mem_type) {
382ff1e7120SSebastian Grimberg     case CEED_MEM_HOST:
383ff1e7120SSebastian Grimberg       (*array)               = impl->h_array_borrowed;
384ff1e7120SSebastian Grimberg       impl->h_array_borrowed = NULL;
385ff1e7120SSebastian Grimberg       impl->h_array          = NULL;
386ff1e7120SSebastian Grimberg       break;
387ff1e7120SSebastian Grimberg     case CEED_MEM_DEVICE:
388ff1e7120SSebastian Grimberg       (*array)               = impl->d_array_borrowed;
389ff1e7120SSebastian Grimberg       impl->d_array_borrowed = NULL;
390ff1e7120SSebastian Grimberg       impl->d_array          = NULL;
391ff1e7120SSebastian Grimberg       break;
392ff1e7120SSebastian Grimberg   }
393ff1e7120SSebastian Grimberg   return CEED_ERROR_SUCCESS;
394ff1e7120SSebastian Grimberg }
395ff1e7120SSebastian Grimberg 
396ff1e7120SSebastian Grimberg //------------------------------------------------------------------------------
397ff1e7120SSebastian Grimberg // Core logic for array syncronization for GetArray.
398ff1e7120SSebastian Grimberg //   If a different memory type is most up to date, this will perform a copy
399ff1e7120SSebastian Grimberg //------------------------------------------------------------------------------
400ff1e7120SSebastian Grimberg static int CeedVectorGetArrayCore_Cuda(const CeedVector vec, const CeedMemType mem_type, CeedScalar **array) {
401ff1e7120SSebastian Grimberg   CeedVector_Cuda *impl;
402ff1e7120SSebastian Grimberg 
403ca735530SJeremy L Thompson   CeedCallBackend(CeedVectorGetData(vec, &impl));
404ff1e7120SSebastian Grimberg   // Sync array to requested mem_type
405ff1e7120SSebastian Grimberg   CeedCallBackend(CeedVectorSyncArray(vec, mem_type));
406ff1e7120SSebastian Grimberg   // Update pointer
407ff1e7120SSebastian Grimberg   switch (mem_type) {
408ff1e7120SSebastian Grimberg     case CEED_MEM_HOST:
409ff1e7120SSebastian Grimberg       *array = impl->h_array;
410ff1e7120SSebastian Grimberg       break;
411ff1e7120SSebastian Grimberg     case CEED_MEM_DEVICE:
412ff1e7120SSebastian Grimberg       *array = impl->d_array;
413ff1e7120SSebastian Grimberg       break;
414ff1e7120SSebastian Grimberg   }
415ff1e7120SSebastian Grimberg   return CEED_ERROR_SUCCESS;
416ff1e7120SSebastian Grimberg }
417ff1e7120SSebastian Grimberg 
418ff1e7120SSebastian Grimberg //------------------------------------------------------------------------------
419ff1e7120SSebastian Grimberg // Get read-only access to a vector via the specified mem_type
420ff1e7120SSebastian Grimberg //------------------------------------------------------------------------------
421ff1e7120SSebastian Grimberg static int CeedVectorGetArrayRead_Cuda(const CeedVector vec, const CeedMemType mem_type, const CeedScalar **array) {
422ff1e7120SSebastian Grimberg   return CeedVectorGetArrayCore_Cuda(vec, mem_type, (CeedScalar **)array);
423ff1e7120SSebastian Grimberg }
424ff1e7120SSebastian Grimberg 
425ff1e7120SSebastian Grimberg //------------------------------------------------------------------------------
426ff1e7120SSebastian Grimberg // Get read/write access to a vector via the specified mem_type
427ff1e7120SSebastian Grimberg //------------------------------------------------------------------------------
428ff1e7120SSebastian Grimberg static int CeedVectorGetArray_Cuda(const CeedVector vec, const CeedMemType mem_type, CeedScalar **array) {
429ff1e7120SSebastian Grimberg   CeedVector_Cuda *impl;
430ca735530SJeremy L Thompson 
431ff1e7120SSebastian Grimberg   CeedCallBackend(CeedVectorGetData(vec, &impl));
432ff1e7120SSebastian Grimberg   CeedCallBackend(CeedVectorGetArrayCore_Cuda(vec, mem_type, array));
433ff1e7120SSebastian Grimberg   CeedCallBackend(CeedVectorSetAllInvalid_Cuda(vec));
434ff1e7120SSebastian Grimberg   switch (mem_type) {
435ff1e7120SSebastian Grimberg     case CEED_MEM_HOST:
436ff1e7120SSebastian Grimberg       impl->h_array = *array;
437ff1e7120SSebastian Grimberg       break;
438ff1e7120SSebastian Grimberg     case CEED_MEM_DEVICE:
439ff1e7120SSebastian Grimberg       impl->d_array = *array;
440ff1e7120SSebastian Grimberg       break;
441ff1e7120SSebastian Grimberg   }
442ff1e7120SSebastian Grimberg   return CEED_ERROR_SUCCESS;
443ff1e7120SSebastian Grimberg }
444ff1e7120SSebastian Grimberg 
445ff1e7120SSebastian Grimberg //------------------------------------------------------------------------------
446ff1e7120SSebastian Grimberg // Get write access to a vector via the specified mem_type
447ff1e7120SSebastian Grimberg //------------------------------------------------------------------------------
448ff1e7120SSebastian Grimberg static int CeedVectorGetArrayWrite_Cuda(const CeedVector vec, const CeedMemType mem_type, CeedScalar **array) {
449ff1e7120SSebastian Grimberg   bool             has_array_of_type = true;
450ca735530SJeremy L Thompson   CeedVector_Cuda *impl;
451ca735530SJeremy L Thompson 
452ca735530SJeremy L Thompson   CeedCallBackend(CeedVectorGetData(vec, &impl));
453ff1e7120SSebastian Grimberg   CeedCallBackend(CeedVectorHasArrayOfType_Cuda(vec, mem_type, &has_array_of_type));
454ff1e7120SSebastian Grimberg   if (!has_array_of_type) {
455ff1e7120SSebastian Grimberg     // Allocate if array is not yet allocated
456ff1e7120SSebastian Grimberg     CeedCallBackend(CeedVectorSetArray(vec, mem_type, CEED_COPY_VALUES, NULL));
457ff1e7120SSebastian Grimberg   } else {
458ff1e7120SSebastian Grimberg     // Select dirty array
459ff1e7120SSebastian Grimberg     switch (mem_type) {
460ff1e7120SSebastian Grimberg       case CEED_MEM_HOST:
461ff1e7120SSebastian Grimberg         if (impl->h_array_borrowed) impl->h_array = impl->h_array_borrowed;
462ff1e7120SSebastian Grimberg         else impl->h_array = impl->h_array_owned;
463ff1e7120SSebastian Grimberg         break;
464ff1e7120SSebastian Grimberg       case CEED_MEM_DEVICE:
465ff1e7120SSebastian Grimberg         if (impl->d_array_borrowed) impl->d_array = impl->d_array_borrowed;
466ff1e7120SSebastian Grimberg         else impl->d_array = impl->d_array_owned;
467ff1e7120SSebastian Grimberg     }
468ff1e7120SSebastian Grimberg   }
469ff1e7120SSebastian Grimberg   return CeedVectorGetArray_Cuda(vec, mem_type, array);
470ff1e7120SSebastian Grimberg }
471ff1e7120SSebastian Grimberg 
472ff1e7120SSebastian Grimberg //------------------------------------------------------------------------------
473ff1e7120SSebastian Grimberg // Get the norm of a CeedVector
474ff1e7120SSebastian Grimberg //------------------------------------------------------------------------------
475ff1e7120SSebastian Grimberg static int CeedVectorNorm_Cuda(CeedVector vec, CeedNormType type, CeedScalar *norm) {
476ff1e7120SSebastian Grimberg   Ceed     ceed;
477ca735530SJeremy L Thompson   CeedSize length;
478*e84c3ebcSJeremy L Thompson #if (CUDA_VERSION < 12000)
479672b0f2aSSebastian Grimberg   CeedSize num_calls;
480*e84c3ebcSJeremy L Thompson #endif /* CUDA_VERSION */
481ca735530SJeremy L Thompson   const CeedScalar *d_array;
482ca735530SJeremy L Thompson   CeedVector_Cuda  *impl;
483672b0f2aSSebastian Grimberg   cublasHandle_t    handle;
484ca735530SJeremy L Thompson 
485ca735530SJeremy L Thompson   CeedCallBackend(CeedVectorGetCeed(vec, &ceed));
486ca735530SJeremy L Thompson   CeedCallBackend(CeedVectorGetData(vec, &impl));
487ca735530SJeremy L Thompson   CeedCallBackend(CeedVectorGetLength(vec, &length));
488ff1e7120SSebastian Grimberg   CeedCallBackend(CeedGetCublasHandle_Cuda(ceed, &handle));
489ff1e7120SSebastian Grimberg 
490*e84c3ebcSJeremy L Thompson #if (CUDA_VERSION < 12000)
491f7c1b517Snbeams   // With CUDA 12, we can use the 64-bit integer interface. Prior to that,
492f7c1b517Snbeams   // we need to check if the vector is too long to handle with int32,
493b2165e7aSSebastian Grimberg   // and if so, divide it into subsections for repeated cuBLAS calls.
494672b0f2aSSebastian Grimberg   num_calls = length / INT_MAX;
495f7c1b517Snbeams   if (length % INT_MAX > 0) num_calls += 1;
496*e84c3ebcSJeremy L Thompson #endif /* CUDA_VERSION */
497f7c1b517Snbeams 
498ff1e7120SSebastian Grimberg   // Compute norm
499ff1e7120SSebastian Grimberg   CeedCallBackend(CeedVectorGetArrayRead(vec, CEED_MEM_DEVICE, &d_array));
500ff1e7120SSebastian Grimberg   switch (type) {
501ff1e7120SSebastian Grimberg     case CEED_NORM_1: {
502f6f49adbSnbeams       *norm = 0.0;
503*e84c3ebcSJeremy L Thompson #if defined(CEED_SCALAR_IS_FP32)
504*e84c3ebcSJeremy L Thompson #if (CUDA_VERSION >= 12000)  // We have CUDA 12, and can use 64-bit integers
505f7c1b517Snbeams       CeedCallCublas(ceed, cublasSasum_64(handle, (int64_t)length, (float *)d_array, 1, (float *)norm));
506*e84c3ebcSJeremy L Thompson #else  /* CUDA_VERSION */
507f7c1b517Snbeams       float  sub_norm = 0.0;
508f7c1b517Snbeams       float *d_array_start;
509ca735530SJeremy L Thompson 
510f7c1b517Snbeams       for (CeedInt i = 0; i < num_calls; i++) {
511f7c1b517Snbeams         d_array_start             = (float *)d_array + (CeedSize)(i)*INT_MAX;
512f7c1b517Snbeams         CeedSize remaining_length = length - (CeedSize)(i)*INT_MAX;
513f7c1b517Snbeams         CeedInt  sub_length       = (i == num_calls - 1) ? (CeedInt)(remaining_length) : INT_MAX;
514ca735530SJeremy L Thompson 
515f7c1b517Snbeams         CeedCallCublas(ceed, cublasSasum(handle, (CeedInt)sub_length, (float *)d_array_start, 1, &sub_norm));
516f7c1b517Snbeams         *norm += sub_norm;
517f7c1b517Snbeams       }
518*e84c3ebcSJeremy L Thompson #endif /* CUDA_VERSION */
519*e84c3ebcSJeremy L Thompson #else  /* CEED_SCALAR */
520*e84c3ebcSJeremy L Thompson #if (CUDA_VERSION >= 12000)
521f7c1b517Snbeams       CeedCallCublas(ceed, cublasDasum_64(handle, (int64_t)length, (double *)d_array, 1, (double *)norm));
522*e84c3ebcSJeremy L Thompson #else  /* CUDA_VERSION */
523f7c1b517Snbeams       double  sub_norm = 0.0;
524f7c1b517Snbeams       double *d_array_start;
525ca735530SJeremy L Thompson 
526f7c1b517Snbeams       for (CeedInt i = 0; i < num_calls; i++) {
527f7c1b517Snbeams         d_array_start             = (double *)d_array + (CeedSize)(i)*INT_MAX;
528f7c1b517Snbeams         CeedSize remaining_length = length - (CeedSize)(i)*INT_MAX;
529f7c1b517Snbeams         CeedInt  sub_length       = (i == num_calls - 1) ? (CeedInt)(remaining_length) : INT_MAX;
530ca735530SJeremy L Thompson 
531f7c1b517Snbeams         CeedCallCublas(ceed, cublasDasum(handle, (CeedInt)sub_length, (double *)d_array_start, 1, &sub_norm));
532f7c1b517Snbeams         *norm += sub_norm;
533f7c1b517Snbeams       }
534*e84c3ebcSJeremy L Thompson #endif /* CUDA_VERSION */
535*e84c3ebcSJeremy L Thompson #endif /* CEED_SCALAR */
536ff1e7120SSebastian Grimberg       break;
537ff1e7120SSebastian Grimberg     }
538ff1e7120SSebastian Grimberg     case CEED_NORM_2: {
539*e84c3ebcSJeremy L Thompson #if defined(CEED_SCALAR_IS_FP32)
540*e84c3ebcSJeremy L Thompson #if (CUDA_VERSION >= 12000)
541f7c1b517Snbeams       CeedCallCublas(ceed, cublasSnrm2_64(handle, (int64_t)length, (float *)d_array, 1, (float *)norm));
542*e84c3ebcSJeremy L Thompson #else  /* CUDA_VERSION */
543f7c1b517Snbeams       float  sub_norm = 0.0, norm_sum = 0.0;
544f7c1b517Snbeams       float *d_array_start;
545ca735530SJeremy L Thompson 
546f7c1b517Snbeams       for (CeedInt i = 0; i < num_calls; i++) {
547f7c1b517Snbeams         d_array_start             = (float *)d_array + (CeedSize)(i)*INT_MAX;
548f7c1b517Snbeams         CeedSize remaining_length = length - (CeedSize)(i)*INT_MAX;
549f7c1b517Snbeams         CeedInt  sub_length       = (i == num_calls - 1) ? (CeedInt)(remaining_length) : INT_MAX;
550ca735530SJeremy L Thompson 
551f7c1b517Snbeams         CeedCallCublas(ceed, cublasSnrm2(handle, (CeedInt)sub_length, (float *)d_array_start, 1, &sub_norm));
552f7c1b517Snbeams         norm_sum += sub_norm * sub_norm;
553f7c1b517Snbeams       }
554f7c1b517Snbeams       *norm = sqrt(norm_sum);
555*e84c3ebcSJeremy L Thompson #endif /* CUDA_VERSION */
556*e84c3ebcSJeremy L Thompson #else  /* CEED_SCALAR */
557*e84c3ebcSJeremy L Thompson #if (CUDA_VERSION >= 12000)
558f7c1b517Snbeams       CeedCallCublas(ceed, cublasDnrm2_64(handle, (int64_t)length, (double *)d_array, 1, (double *)norm));
559*e84c3ebcSJeremy L Thompson #else  /* CUDA_VERSION */
560f7c1b517Snbeams       double  sub_norm = 0.0, norm_sum = 0.0;
561f7c1b517Snbeams       double *d_array_start;
562ca735530SJeremy L Thompson 
563f7c1b517Snbeams       for (CeedInt i = 0; i < num_calls; i++) {
564f7c1b517Snbeams         d_array_start             = (double *)d_array + (CeedSize)(i)*INT_MAX;
565f7c1b517Snbeams         CeedSize remaining_length = length - (CeedSize)(i)*INT_MAX;
566f7c1b517Snbeams         CeedInt  sub_length       = (i == num_calls - 1) ? (CeedInt)(remaining_length) : INT_MAX;
567ca735530SJeremy L Thompson 
568f7c1b517Snbeams         CeedCallCublas(ceed, cublasDnrm2(handle, (CeedInt)sub_length, (double *)d_array_start, 1, &sub_norm));
569f7c1b517Snbeams         norm_sum += sub_norm * sub_norm;
570f7c1b517Snbeams       }
571f7c1b517Snbeams       *norm = sqrt(norm_sum);
572*e84c3ebcSJeremy L Thompson #endif /* CUDA_VERSION */
573*e84c3ebcSJeremy L Thompson #endif /* CEED_SCALAR */
574ff1e7120SSebastian Grimberg       break;
575ff1e7120SSebastian Grimberg     }
576ff1e7120SSebastian Grimberg     case CEED_NORM_MAX: {
577*e84c3ebcSJeremy L Thompson #if defined(CEED_SCALAR_IS_FP32)
578*e84c3ebcSJeremy L Thompson #if (CUDA_VERSION >= 12000)
579ca735530SJeremy L Thompson       int64_t    index;
580ca735530SJeremy L Thompson       CeedScalar norm_no_abs;
581ca735530SJeremy L Thompson 
582ca735530SJeremy L Thompson       CeedCallCublas(ceed, cublasIsamax_64(handle, (int64_t)length, (float *)d_array, 1, &index));
583ca735530SJeremy L Thompson       CeedCallCuda(ceed, cudaMemcpy(&norm_no_abs, impl->d_array + index - 1, sizeof(CeedScalar), cudaMemcpyDeviceToHost));
584ca735530SJeremy L Thompson       *norm = fabs(norm_no_abs);
585*e84c3ebcSJeremy L Thompson #else  /* CUDA_VERSION */
586ca735530SJeremy L Thompson       CeedInt index;
587f7c1b517Snbeams       float   sub_max = 0.0, current_max = 0.0;
588f7c1b517Snbeams       float  *d_array_start;
589ca735530SJeremy L Thompson 
590f7c1b517Snbeams       for (CeedInt i = 0; i < num_calls; i++) {
591f7c1b517Snbeams         d_array_start             = (float *)d_array + (CeedSize)(i)*INT_MAX;
592f7c1b517Snbeams         CeedSize remaining_length = length - (CeedSize)(i)*INT_MAX;
593f7c1b517Snbeams         CeedInt  sub_length       = (i == num_calls - 1) ? (CeedInt)(remaining_length) : INT_MAX;
594ca735530SJeremy L Thompson 
595ca735530SJeremy L Thompson         CeedCallCublas(ceed, cublasIsamax(handle, (CeedInt)sub_length, (float *)d_array_start, 1, &index));
596ca735530SJeremy L Thompson         CeedCallCuda(ceed, cudaMemcpy(&sub_max, d_array_start + index - 1, sizeof(CeedScalar), cudaMemcpyDeviceToHost));
597f7c1b517Snbeams         if (fabs(sub_max) > current_max) current_max = fabs(sub_max);
598f7c1b517Snbeams       }
599f7c1b517Snbeams       *norm = current_max;
600*e84c3ebcSJeremy L Thompson #endif /* CUDA_VERSION */
601*e84c3ebcSJeremy L Thompson #else  /* CEED_SCALAR */
602*e84c3ebcSJeremy L Thompson #if (CUDA_VERSION >= 12000)
603ca735530SJeremy L Thompson       int64_t    index;
604ca735530SJeremy L Thompson       CeedScalar norm_no_abs;
605ca735530SJeremy L Thompson 
606ca735530SJeremy L Thompson       CeedCallCublas(ceed, cublasIdamax_64(handle, (int64_t)length, (double *)d_array, 1, &index));
607ca735530SJeremy L Thompson       CeedCallCuda(ceed, cudaMemcpy(&norm_no_abs, impl->d_array + index - 1, sizeof(CeedScalar), cudaMemcpyDeviceToHost));
608ca735530SJeremy L Thompson       *norm = fabs(norm_no_abs);
609*e84c3ebcSJeremy L Thompson #else  /* CUDA_VERSION */
610ca735530SJeremy L Thompson       CeedInt index;
611f7c1b517Snbeams       double  sub_max = 0.0, current_max = 0.0;
612f7c1b517Snbeams       double *d_array_start;
613ca735530SJeremy L Thompson 
614f7c1b517Snbeams       for (CeedInt i = 0; i < num_calls; i++) {
615f7c1b517Snbeams         d_array_start             = (double *)d_array + (CeedSize)(i)*INT_MAX;
616f7c1b517Snbeams         CeedSize remaining_length = length - (CeedSize)(i)*INT_MAX;
617f7c1b517Snbeams         CeedInt  sub_length       = (i == num_calls - 1) ? (CeedInt)(remaining_length) : INT_MAX;
618ca735530SJeremy L Thompson 
619ca735530SJeremy L Thompson         CeedCallCublas(ceed, cublasIdamax(handle, (CeedInt)sub_length, (double *)d_array_start, 1, &index));
620ca735530SJeremy L Thompson         CeedCallCuda(ceed, cudaMemcpy(&sub_max, d_array_start + index - 1, sizeof(CeedScalar), cudaMemcpyDeviceToHost));
621f7c1b517Snbeams         if (fabs(sub_max) > current_max) current_max = fabs(sub_max);
622f7c1b517Snbeams       }
623f7c1b517Snbeams       *norm = current_max;
624*e84c3ebcSJeremy L Thompson #endif /* CUDA_VERSION */
625*e84c3ebcSJeremy L Thompson #endif /* CEED_SCALAR */
626ff1e7120SSebastian Grimberg       break;
627ff1e7120SSebastian Grimberg     }
628ff1e7120SSebastian Grimberg   }
629ff1e7120SSebastian Grimberg   CeedCallBackend(CeedVectorRestoreArrayRead(vec, &d_array));
6309bc66399SJeremy L Thompson   CeedCallBackend(CeedDestroy(&ceed));
631ff1e7120SSebastian Grimberg   return CEED_ERROR_SUCCESS;
632ff1e7120SSebastian Grimberg }
633ff1e7120SSebastian Grimberg 
634ff1e7120SSebastian Grimberg //------------------------------------------------------------------------------
635ff1e7120SSebastian Grimberg // Take reciprocal of a vector on host
636ff1e7120SSebastian Grimberg //------------------------------------------------------------------------------
637f7c1b517Snbeams static int CeedHostReciprocal_Cuda(CeedScalar *h_array, CeedSize length) {
638f7c1b517Snbeams   for (CeedSize i = 0; i < length; i++) {
639ff1e7120SSebastian Grimberg     if (fabs(h_array[i]) > CEED_EPSILON) h_array[i] = 1. / h_array[i];
640ff1e7120SSebastian Grimberg   }
641ff1e7120SSebastian Grimberg   return CEED_ERROR_SUCCESS;
642ff1e7120SSebastian Grimberg }
643ff1e7120SSebastian Grimberg 
644ff1e7120SSebastian Grimberg //------------------------------------------------------------------------------
645ff1e7120SSebastian Grimberg // Take reciprocal of a vector on device (impl in .cu file)
646ff1e7120SSebastian Grimberg //------------------------------------------------------------------------------
647f7c1b517Snbeams int CeedDeviceReciprocal_Cuda(CeedScalar *d_array, CeedSize length);
648ff1e7120SSebastian Grimberg 
649ff1e7120SSebastian Grimberg //------------------------------------------------------------------------------
650ff1e7120SSebastian Grimberg // Take reciprocal of a vector
651ff1e7120SSebastian Grimberg //------------------------------------------------------------------------------
652ff1e7120SSebastian Grimberg static int CeedVectorReciprocal_Cuda(CeedVector vec) {
653ff1e7120SSebastian Grimberg   CeedSize         length;
654ca735530SJeremy L Thompson   CeedVector_Cuda *impl;
655ff1e7120SSebastian Grimberg 
656ca735530SJeremy L Thompson   CeedCallBackend(CeedVectorGetData(vec, &impl));
657ca735530SJeremy L Thompson   CeedCallBackend(CeedVectorGetLength(vec, &length));
658ff1e7120SSebastian Grimberg   // Set value for synced device/host array
659ff1e7120SSebastian Grimberg   if (impl->d_array) CeedCallBackend(CeedDeviceReciprocal_Cuda(impl->d_array, length));
660ff1e7120SSebastian Grimberg   if (impl->h_array) CeedCallBackend(CeedHostReciprocal_Cuda(impl->h_array, length));
661ff1e7120SSebastian Grimberg   return CEED_ERROR_SUCCESS;
662ff1e7120SSebastian Grimberg }
663ff1e7120SSebastian Grimberg 
664ff1e7120SSebastian Grimberg //------------------------------------------------------------------------------
665ff1e7120SSebastian Grimberg // Compute x = alpha x on the host
666ff1e7120SSebastian Grimberg //------------------------------------------------------------------------------
667f7c1b517Snbeams static int CeedHostScale_Cuda(CeedScalar *x_array, CeedScalar alpha, CeedSize length) {
668f7c1b517Snbeams   for (CeedSize i = 0; i < length; i++) x_array[i] *= alpha;
669ff1e7120SSebastian Grimberg   return CEED_ERROR_SUCCESS;
670ff1e7120SSebastian Grimberg }
671ff1e7120SSebastian Grimberg 
672ff1e7120SSebastian Grimberg //------------------------------------------------------------------------------
673ff1e7120SSebastian Grimberg // Compute x = alpha x on device (impl in .cu file)
674ff1e7120SSebastian Grimberg //------------------------------------------------------------------------------
675f7c1b517Snbeams int CeedDeviceScale_Cuda(CeedScalar *x_array, CeedScalar alpha, CeedSize length);
676ff1e7120SSebastian Grimberg 
677ff1e7120SSebastian Grimberg //------------------------------------------------------------------------------
678ff1e7120SSebastian Grimberg // Compute x = alpha x
679ff1e7120SSebastian Grimberg //------------------------------------------------------------------------------
680ff1e7120SSebastian Grimberg static int CeedVectorScale_Cuda(CeedVector x, CeedScalar alpha) {
681ff1e7120SSebastian Grimberg   CeedSize         length;
682*e84c3ebcSJeremy L Thompson   CeedVector_Cuda *impl;
683ff1e7120SSebastian Grimberg 
684*e84c3ebcSJeremy L Thompson   CeedCallBackend(CeedVectorGetData(x, &impl));
685ca735530SJeremy L Thompson   CeedCallBackend(CeedVectorGetLength(x, &length));
686ff1e7120SSebastian Grimberg   // Set value for synced device/host array
687*e84c3ebcSJeremy L Thompson   if (impl->d_array) {
688*e84c3ebcSJeremy L Thompson #if (CUDA_VERSION >= 12000)
689*e84c3ebcSJeremy L Thompson     cublasHandle_t handle;
690*e84c3ebcSJeremy L Thompson 
691*e84c3ebcSJeremy L Thompson     CeedCallBackend(CeedGetCublasHandle_Cuda(CeedVectorReturnCeed(x), &handle));
692*e84c3ebcSJeremy L Thompson #if defined(CEED_SCALAR_IS_FP32)
693*e84c3ebcSJeremy L Thompson     CeedCallCublas(CeedVectorReturnCeed(x), cublasSscal_64(handle, (int64_t)length, &alpha, impl->d_array, 1));
694*e84c3ebcSJeremy L Thompson #else  /* CEED_SCALAR */
695*e84c3ebcSJeremy L Thompson     CeedCallCublas(CeedVectorReturnCeed(x), cublasDscal_64(handle, (int64_t)length, &alpha, impl->d_array, 1));
696*e84c3ebcSJeremy L Thompson #endif /* CEED_SCALAR */
697*e84c3ebcSJeremy L Thompson #else  /* CUDA_VERSION */
698*e84c3ebcSJeremy L Thompson     CeedCallBackend(CeedDeviceScale_Cuda(impl->d_array, alpha, length));
699*e84c3ebcSJeremy L Thompson #endif /* CUDA_VERSION */
700*e84c3ebcSJeremy L Thompson     impl->h_array = NULL;
701*e84c3ebcSJeremy L Thompson   } else if (impl->h_array) {
702*e84c3ebcSJeremy L Thompson     CeedCallBackend(CeedHostScale_Cuda(impl->h_array, alpha, length));
703*e84c3ebcSJeremy L Thompson     impl->d_array = NULL;
704*e84c3ebcSJeremy L Thompson   }
705ff1e7120SSebastian Grimberg   return CEED_ERROR_SUCCESS;
706ff1e7120SSebastian Grimberg }
707ff1e7120SSebastian Grimberg 
708ff1e7120SSebastian Grimberg //------------------------------------------------------------------------------
709ff1e7120SSebastian Grimberg // Compute y = alpha x + y on the host
710ff1e7120SSebastian Grimberg //------------------------------------------------------------------------------
711f7c1b517Snbeams static int CeedHostAXPY_Cuda(CeedScalar *y_array, CeedScalar alpha, CeedScalar *x_array, CeedSize length) {
712f7c1b517Snbeams   for (CeedSize i = 0; i < length; i++) y_array[i] += alpha * x_array[i];
713ff1e7120SSebastian Grimberg   return CEED_ERROR_SUCCESS;
714ff1e7120SSebastian Grimberg }
715ff1e7120SSebastian Grimberg 
716ff1e7120SSebastian Grimberg //------------------------------------------------------------------------------
717ff1e7120SSebastian Grimberg // Compute y = alpha x + y on device (impl in .cu file)
718ff1e7120SSebastian Grimberg //------------------------------------------------------------------------------
719f7c1b517Snbeams int CeedDeviceAXPY_Cuda(CeedScalar *y_array, CeedScalar alpha, CeedScalar *x_array, CeedSize length);
720ff1e7120SSebastian Grimberg 
721ff1e7120SSebastian Grimberg //------------------------------------------------------------------------------
722ff1e7120SSebastian Grimberg // Compute y = alpha x + y
723ff1e7120SSebastian Grimberg //------------------------------------------------------------------------------
724ff1e7120SSebastian Grimberg static int CeedVectorAXPY_Cuda(CeedVector y, CeedScalar alpha, CeedVector x) {
725ca735530SJeremy L Thompson   CeedSize         length;
726ff1e7120SSebastian Grimberg   CeedVector_Cuda *y_impl, *x_impl;
727ca735530SJeremy L Thompson 
728ff1e7120SSebastian Grimberg   CeedCallBackend(CeedVectorGetData(y, &y_impl));
729ff1e7120SSebastian Grimberg   CeedCallBackend(CeedVectorGetData(x, &x_impl));
730ff1e7120SSebastian Grimberg   CeedCallBackend(CeedVectorGetLength(y, &length));
731ff1e7120SSebastian Grimberg   // Set value for synced device/host array
732ff1e7120SSebastian Grimberg   if (y_impl->d_array) {
733ff1e7120SSebastian Grimberg     CeedCallBackend(CeedVectorSyncArray(x, CEED_MEM_DEVICE));
734*e84c3ebcSJeremy L Thompson #if (CUDA_VERSION >= 12000)
735*e84c3ebcSJeremy L Thompson     cublasHandle_t handle;
736*e84c3ebcSJeremy L Thompson 
737*e84c3ebcSJeremy L Thompson     CeedCallBackend(CeedGetCublasHandle_Cuda(CeedVectorReturnCeed(y), &handle));
738*e84c3ebcSJeremy L Thompson #if defined(CEED_SCALAR_IS_FP32)
739*e84c3ebcSJeremy L Thompson     CeedCallCublas(CeedVectorReturnCeed(y), cublasSaxpy_64(handle, (int64_t)length, &alpha, x_impl->d_array, 1, y_impl->d_array, 1));
740*e84c3ebcSJeremy L Thompson #else  /* CEED_SCALAR */
741*e84c3ebcSJeremy L Thompson     CeedCallCublas(CeedVectorReturnCeed(y), cublasDaxpy_64(handle, (int64_t)length, &alpha, x_impl->d_array, 1, y_impl->d_array, 1));
742*e84c3ebcSJeremy L Thompson #endif /* CEED_SCALAR */
743*e84c3ebcSJeremy L Thompson #else  /* CUDA_VERSION */
744ff1e7120SSebastian Grimberg     CeedCallBackend(CeedDeviceAXPY_Cuda(y_impl->d_array, alpha, x_impl->d_array, length));
745*e84c3ebcSJeremy L Thompson #endif /* CUDA_VERSION */
746*e84c3ebcSJeremy L Thompson     y_impl->h_array = NULL;
747*e84c3ebcSJeremy L Thompson   } else if (y_impl->h_array) {
748ff1e7120SSebastian Grimberg     CeedCallBackend(CeedVectorSyncArray(x, CEED_MEM_HOST));
749ff1e7120SSebastian Grimberg     CeedCallBackend(CeedHostAXPY_Cuda(y_impl->h_array, alpha, x_impl->h_array, length));
750*e84c3ebcSJeremy L Thompson     y_impl->d_array = NULL;
751ff1e7120SSebastian Grimberg   }
752ff1e7120SSebastian Grimberg   return CEED_ERROR_SUCCESS;
753ff1e7120SSebastian Grimberg }
754ff1e7120SSebastian Grimberg 
755ff1e7120SSebastian Grimberg //------------------------------------------------------------------------------
756ff1e7120SSebastian Grimberg // Compute y = alpha x + beta y on the host
757ff1e7120SSebastian Grimberg //------------------------------------------------------------------------------
758f7c1b517Snbeams static int CeedHostAXPBY_Cuda(CeedScalar *y_array, CeedScalar alpha, CeedScalar beta, CeedScalar *x_array, CeedSize length) {
759aa67b842SZach Atkins   for (CeedSize i = 0; i < length; i++) y_array[i] = alpha * x_array[i] + beta * y_array[i];
760ff1e7120SSebastian Grimberg   return CEED_ERROR_SUCCESS;
761ff1e7120SSebastian Grimberg }
762ff1e7120SSebastian Grimberg 
763ff1e7120SSebastian Grimberg //------------------------------------------------------------------------------
764ff1e7120SSebastian Grimberg // Compute y = alpha x + beta y on device (impl in .cu file)
765ff1e7120SSebastian Grimberg //------------------------------------------------------------------------------
766f7c1b517Snbeams int CeedDeviceAXPBY_Cuda(CeedScalar *y_array, CeedScalar alpha, CeedScalar beta, CeedScalar *x_array, CeedSize length);
767ff1e7120SSebastian Grimberg 
768ff1e7120SSebastian Grimberg //------------------------------------------------------------------------------
769ff1e7120SSebastian Grimberg // Compute y = alpha x + beta y
770ff1e7120SSebastian Grimberg //------------------------------------------------------------------------------
771ff1e7120SSebastian Grimberg static int CeedVectorAXPBY_Cuda(CeedVector y, CeedScalar alpha, CeedScalar beta, CeedVector x) {
772ca735530SJeremy L Thompson   CeedSize         length;
773ff1e7120SSebastian Grimberg   CeedVector_Cuda *y_impl, *x_impl;
774ca735530SJeremy L Thompson 
775ff1e7120SSebastian Grimberg   CeedCallBackend(CeedVectorGetData(y, &y_impl));
776ff1e7120SSebastian Grimberg   CeedCallBackend(CeedVectorGetData(x, &x_impl));
777ff1e7120SSebastian Grimberg   CeedCallBackend(CeedVectorGetLength(y, &length));
778ff1e7120SSebastian Grimberg   // Set value for synced device/host array
779ff1e7120SSebastian Grimberg   if (y_impl->d_array) {
780ff1e7120SSebastian Grimberg     CeedCallBackend(CeedVectorSyncArray(x, CEED_MEM_DEVICE));
781ff1e7120SSebastian Grimberg     CeedCallBackend(CeedDeviceAXPBY_Cuda(y_impl->d_array, alpha, beta, x_impl->d_array, length));
782ff1e7120SSebastian Grimberg   }
783ff1e7120SSebastian Grimberg   if (y_impl->h_array) {
784ff1e7120SSebastian Grimberg     CeedCallBackend(CeedVectorSyncArray(x, CEED_MEM_HOST));
785ff1e7120SSebastian Grimberg     CeedCallBackend(CeedHostAXPBY_Cuda(y_impl->h_array, alpha, beta, x_impl->h_array, length));
786ff1e7120SSebastian Grimberg   }
787ff1e7120SSebastian Grimberg   return CEED_ERROR_SUCCESS;
788ff1e7120SSebastian Grimberg }
789ff1e7120SSebastian Grimberg 
790ff1e7120SSebastian Grimberg //------------------------------------------------------------------------------
791ff1e7120SSebastian Grimberg // Compute the pointwise multiplication w = x .* y on the host
792ff1e7120SSebastian Grimberg //------------------------------------------------------------------------------
793f7c1b517Snbeams static int CeedHostPointwiseMult_Cuda(CeedScalar *w_array, CeedScalar *x_array, CeedScalar *y_array, CeedSize length) {
794f7c1b517Snbeams   for (CeedSize i = 0; i < length; i++) w_array[i] = x_array[i] * y_array[i];
795ff1e7120SSebastian Grimberg   return CEED_ERROR_SUCCESS;
796ff1e7120SSebastian Grimberg }
797ff1e7120SSebastian Grimberg 
798ff1e7120SSebastian Grimberg //------------------------------------------------------------------------------
799ff1e7120SSebastian Grimberg // Compute the pointwise multiplication w = x .* y on device (impl in .cu file)
800ff1e7120SSebastian Grimberg //------------------------------------------------------------------------------
801f7c1b517Snbeams int CeedDevicePointwiseMult_Cuda(CeedScalar *w_array, CeedScalar *x_array, CeedScalar *y_array, CeedSize length);
802ff1e7120SSebastian Grimberg 
803ff1e7120SSebastian Grimberg //------------------------------------------------------------------------------
804ff1e7120SSebastian Grimberg // Compute the pointwise multiplication w = x .* y
805ff1e7120SSebastian Grimberg //------------------------------------------------------------------------------
806ff1e7120SSebastian Grimberg static int CeedVectorPointwiseMult_Cuda(CeedVector w, CeedVector x, CeedVector y) {
807ca735530SJeremy L Thompson   CeedSize         length;
808ff1e7120SSebastian Grimberg   CeedVector_Cuda *w_impl, *x_impl, *y_impl;
809ca735530SJeremy L Thompson 
810ff1e7120SSebastian Grimberg   CeedCallBackend(CeedVectorGetData(w, &w_impl));
811ff1e7120SSebastian Grimberg   CeedCallBackend(CeedVectorGetData(x, &x_impl));
812ff1e7120SSebastian Grimberg   CeedCallBackend(CeedVectorGetData(y, &y_impl));
813ff1e7120SSebastian Grimberg   CeedCallBackend(CeedVectorGetLength(w, &length));
814ff1e7120SSebastian Grimberg   // Set value for synced device/host array
815ff1e7120SSebastian Grimberg   if (!w_impl->d_array && !w_impl->h_array) {
816ff1e7120SSebastian Grimberg     CeedCallBackend(CeedVectorSetValue(w, 0.0));
817ff1e7120SSebastian Grimberg   }
818ff1e7120SSebastian Grimberg   if (w_impl->d_array) {
819ff1e7120SSebastian Grimberg     CeedCallBackend(CeedVectorSyncArray(x, CEED_MEM_DEVICE));
820ff1e7120SSebastian Grimberg     CeedCallBackend(CeedVectorSyncArray(y, CEED_MEM_DEVICE));
821ff1e7120SSebastian Grimberg     CeedCallBackend(CeedDevicePointwiseMult_Cuda(w_impl->d_array, x_impl->d_array, y_impl->d_array, length));
822ff1e7120SSebastian Grimberg   }
823ff1e7120SSebastian Grimberg   if (w_impl->h_array) {
824ff1e7120SSebastian Grimberg     CeedCallBackend(CeedVectorSyncArray(x, CEED_MEM_HOST));
825ff1e7120SSebastian Grimberg     CeedCallBackend(CeedVectorSyncArray(y, CEED_MEM_HOST));
826ff1e7120SSebastian Grimberg     CeedCallBackend(CeedHostPointwiseMult_Cuda(w_impl->h_array, x_impl->h_array, y_impl->h_array, length));
827ff1e7120SSebastian Grimberg   }
828ff1e7120SSebastian Grimberg   return CEED_ERROR_SUCCESS;
829ff1e7120SSebastian Grimberg }
830ff1e7120SSebastian Grimberg 
831ff1e7120SSebastian Grimberg //------------------------------------------------------------------------------
832ff1e7120SSebastian Grimberg // Destroy the vector
833ff1e7120SSebastian Grimberg //------------------------------------------------------------------------------
834ff1e7120SSebastian Grimberg static int CeedVectorDestroy_Cuda(const CeedVector vec) {
835ff1e7120SSebastian Grimberg   CeedVector_Cuda *impl;
836ff1e7120SSebastian Grimberg 
837ca735530SJeremy L Thompson   CeedCallBackend(CeedVectorGetData(vec, &impl));
8386e536b99SJeremy L Thompson   CeedCallCuda(CeedVectorReturnCeed(vec), cudaFree(impl->d_array_owned));
839ff1e7120SSebastian Grimberg   CeedCallBackend(CeedFree(&impl->h_array_owned));
840ff1e7120SSebastian Grimberg   CeedCallBackend(CeedFree(&impl));
841ff1e7120SSebastian Grimberg   return CEED_ERROR_SUCCESS;
842ff1e7120SSebastian Grimberg }
843ff1e7120SSebastian Grimberg 
844ff1e7120SSebastian Grimberg //------------------------------------------------------------------------------
845ff1e7120SSebastian Grimberg // Create a vector of the specified length (does not allocate memory)
846ff1e7120SSebastian Grimberg //------------------------------------------------------------------------------
847ff1e7120SSebastian Grimberg int CeedVectorCreate_Cuda(CeedSize n, CeedVector vec) {
848ff1e7120SSebastian Grimberg   CeedVector_Cuda *impl;
849ff1e7120SSebastian Grimberg   Ceed             ceed;
850ff1e7120SSebastian Grimberg 
851ca735530SJeremy L Thompson   CeedCallBackend(CeedVectorGetCeed(vec, &ceed));
852ff1e7120SSebastian Grimberg   CeedCallBackend(CeedSetBackendFunction(ceed, "Vector", vec, "HasValidArray", CeedVectorHasValidArray_Cuda));
853ff1e7120SSebastian Grimberg   CeedCallBackend(CeedSetBackendFunction(ceed, "Vector", vec, "HasBorrowedArrayOfType", CeedVectorHasBorrowedArrayOfType_Cuda));
854ff1e7120SSebastian Grimberg   CeedCallBackend(CeedSetBackendFunction(ceed, "Vector", vec, "SetArray", CeedVectorSetArray_Cuda));
855ff1e7120SSebastian Grimberg   CeedCallBackend(CeedSetBackendFunction(ceed, "Vector", vec, "TakeArray", CeedVectorTakeArray_Cuda));
8563e961e14SJeremy L Thompson   CeedCallBackend(CeedSetBackendFunction(ceed, "Vector", vec, "CopyStrided", CeedVectorCopyStrided_Cuda));
8573e961e14SJeremy L Thompson   CeedCallBackend(CeedSetBackendFunction(ceed, "Vector", vec, "SetValue", CeedVectorSetValue_Cuda));
8583e961e14SJeremy L Thompson   CeedCallBackend(CeedSetBackendFunction(ceed, "Vector", vec, "SetValueStrided", CeedVectorSetValueStrided_Cuda));
859ff1e7120SSebastian Grimberg   CeedCallBackend(CeedSetBackendFunction(ceed, "Vector", vec, "SyncArray", CeedVectorSyncArray_Cuda));
860ff1e7120SSebastian Grimberg   CeedCallBackend(CeedSetBackendFunction(ceed, "Vector", vec, "GetArray", CeedVectorGetArray_Cuda));
861ff1e7120SSebastian Grimberg   CeedCallBackend(CeedSetBackendFunction(ceed, "Vector", vec, "GetArrayRead", CeedVectorGetArrayRead_Cuda));
862ff1e7120SSebastian Grimberg   CeedCallBackend(CeedSetBackendFunction(ceed, "Vector", vec, "GetArrayWrite", CeedVectorGetArrayWrite_Cuda));
863ff1e7120SSebastian Grimberg   CeedCallBackend(CeedSetBackendFunction(ceed, "Vector", vec, "Norm", CeedVectorNorm_Cuda));
864ff1e7120SSebastian Grimberg   CeedCallBackend(CeedSetBackendFunction(ceed, "Vector", vec, "Reciprocal", CeedVectorReciprocal_Cuda));
8653e961e14SJeremy L Thompson   CeedCallBackend(CeedSetBackendFunction(ceed, "Vector", vec, "Scale", CeedVectorScale_Cuda));
8663e961e14SJeremy L Thompson   CeedCallBackend(CeedSetBackendFunction(ceed, "Vector", vec, "AXPY", CeedVectorAXPY_Cuda));
8673e961e14SJeremy L Thompson   CeedCallBackend(CeedSetBackendFunction(ceed, "Vector", vec, "AXPBY", CeedVectorAXPBY_Cuda));
868ff1e7120SSebastian Grimberg   CeedCallBackend(CeedSetBackendFunction(ceed, "Vector", vec, "PointwiseMult", CeedVectorPointwiseMult_Cuda));
869ff1e7120SSebastian Grimberg   CeedCallBackend(CeedSetBackendFunction(ceed, "Vector", vec, "Destroy", CeedVectorDestroy_Cuda));
8709bc66399SJeremy L Thompson   CeedCallBackend(CeedDestroy(&ceed));
871ff1e7120SSebastian Grimberg   CeedCallBackend(CeedCalloc(1, &impl));
872ff1e7120SSebastian Grimberg   CeedCallBackend(CeedVectorSetData(vec, impl));
873ff1e7120SSebastian Grimberg   return CEED_ERROR_SUCCESS;
874ff1e7120SSebastian Grimberg }
875ff1e7120SSebastian Grimberg 
876ff1e7120SSebastian Grimberg //------------------------------------------------------------------------------
877