xref: /libCEED/backends/hip-ref/ceed-hip-ref-vector.c (revision 0002d81d205a4e0fbfcfe4897732c8c00278f8e6)
1 // Copyright (c) 2017-2025, Lawrence Livermore National Security, LLC and other CEED contributors.
2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
3 //
4 // SPDX-License-Identifier: BSD-2-Clause
5 //
6 // This file is part of CEED:  http://github.com/ceed
7 
8 #include <ceed.h>
9 #include <ceed/backend.h>
10 #include <math.h>
11 #include <stdbool.h>
12 #include <string.h>
13 #include <hip/hip_runtime.h>
14 
15 #include "../hip/ceed-hip-common.h"
16 #include "ceed-hip-ref.h"
17 
18 //------------------------------------------------------------------------------
19 // Check if host/device sync is needed
20 //------------------------------------------------------------------------------
21 static inline int CeedVectorNeedSync_Hip(const CeedVector vec, CeedMemType mem_type, bool *need_sync) {
22   CeedVector_Hip *impl;
23   bool            has_valid_array = false;
24 
25   CeedCallBackend(CeedVectorGetData(vec, &impl));
26   CeedCallBackend(CeedVectorHasValidArray(vec, &has_valid_array));
27   switch (mem_type) {
28     case CEED_MEM_HOST:
29       *need_sync = has_valid_array && !impl->h_array;
30       break;
31     case CEED_MEM_DEVICE:
32       *need_sync = has_valid_array && !impl->d_array;
33       break;
34   }
35   return CEED_ERROR_SUCCESS;
36 }
37 
38 //------------------------------------------------------------------------------
39 // Sync host to device
40 //------------------------------------------------------------------------------
41 static inline int CeedVectorSyncH2D_Hip(const CeedVector vec) {
42   CeedSize        length;
43   size_t          bytes;
44   CeedVector_Hip *impl;
45 
46   CeedCallBackend(CeedVectorGetData(vec, &impl));
47 
48   CeedCheck(impl->h_array, CeedVectorReturnCeed(vec), CEED_ERROR_BACKEND, "No valid host data to sync to device");
49 
50   CeedCallBackend(CeedVectorGetLength(vec, &length));
51   bytes = length * sizeof(CeedScalar);
52   if (impl->d_array_borrowed) {
53     impl->d_array = impl->d_array_borrowed;
54   } else if (impl->d_array_owned) {
55     impl->d_array = impl->d_array_owned;
56   } else {
57     CeedCallHip(CeedVectorReturnCeed(vec), hipMalloc((void **)&impl->d_array_owned, bytes));
58     impl->d_array = impl->d_array_owned;
59   }
60   CeedCallHip(CeedVectorReturnCeed(vec), hipMemcpy(impl->d_array, impl->h_array, bytes, hipMemcpyHostToDevice));
61   return CEED_ERROR_SUCCESS;
62 }
63 
64 //------------------------------------------------------------------------------
65 // Sync device to host
66 //------------------------------------------------------------------------------
67 static inline int CeedVectorSyncD2H_Hip(const CeedVector vec) {
68   CeedSize        length;
69   size_t          bytes;
70   CeedVector_Hip *impl;
71 
72   CeedCallBackend(CeedVectorGetData(vec, &impl));
73 
74   CeedCheck(impl->d_array, CeedVectorReturnCeed(vec), CEED_ERROR_BACKEND, "No valid device data to sync to host");
75 
76   if (impl->h_array_borrowed) {
77     impl->h_array = impl->h_array_borrowed;
78   } else if (impl->h_array_owned) {
79     impl->h_array = impl->h_array_owned;
80   } else {
81     CeedSize length;
82 
83     CeedCallBackend(CeedVectorGetLength(vec, &length));
84     CeedCallBackend(CeedCalloc(length, &impl->h_array_owned));
85     impl->h_array = impl->h_array_owned;
86   }
87 
88   CeedCallBackend(CeedVectorGetLength(vec, &length));
89   bytes = length * sizeof(CeedScalar);
90   CeedCallHip(CeedVectorReturnCeed(vec), hipMemcpy(impl->h_array, impl->d_array, bytes, hipMemcpyDeviceToHost));
91   return CEED_ERROR_SUCCESS;
92 }
93 
94 //------------------------------------------------------------------------------
95 // Sync arrays
96 //------------------------------------------------------------------------------
97 static int CeedVectorSyncArray_Hip(const CeedVector vec, CeedMemType mem_type) {
98   bool            need_sync = false;
99   CeedVector_Hip *impl;
100 
101   // Sync for unified memory
102   CeedCallBackend(CeedVectorGetData(vec, &impl));
103   if (impl->has_unified_addressing && !impl->h_array_borrowed) {
104     CeedCallHip(CeedVectorReturnCeed(vec), hipDeviceSynchronize());
105     return CEED_ERROR_SUCCESS;
106   }
107 
108   // Check whether device/host sync is needed
109   CeedCallBackend(CeedVectorNeedSync_Hip(vec, mem_type, &need_sync));
110   if (!need_sync) return CEED_ERROR_SUCCESS;
111 
112   switch (mem_type) {
113     case CEED_MEM_HOST:
114       return CeedVectorSyncD2H_Hip(vec);
115     case CEED_MEM_DEVICE:
116       return CeedVectorSyncH2D_Hip(vec);
117   }
118   return CEED_ERROR_UNSUPPORTED;
119 }
120 
121 //------------------------------------------------------------------------------
122 // Set all pointers as invalid
123 //------------------------------------------------------------------------------
124 static inline int CeedVectorSetAllInvalid_Hip(const CeedVector vec) {
125   CeedVector_Hip *impl;
126 
127   CeedCallBackend(CeedVectorGetData(vec, &impl));
128   impl->h_array = NULL;
129   impl->d_array = NULL;
130   return CEED_ERROR_SUCCESS;
131 }
132 
133 //------------------------------------------------------------------------------
134 // Check if CeedVector has any valid pointer
135 //------------------------------------------------------------------------------
136 static inline int CeedVectorHasValidArray_Hip(const CeedVector vec, bool *has_valid_array) {
137   CeedVector_Hip *impl;
138 
139   CeedCallBackend(CeedVectorGetData(vec, &impl));
140   *has_valid_array = impl->h_array || impl->d_array;
141   return CEED_ERROR_SUCCESS;
142 }
143 
144 //------------------------------------------------------------------------------
145 // Check if has array of given type
146 //------------------------------------------------------------------------------
147 static inline int CeedVectorHasArrayOfType_Hip(const CeedVector vec, CeedMemType mem_type, bool *has_array_of_type) {
148   CeedVector_Hip *impl;
149 
150   CeedCallBackend(CeedVectorGetData(vec, &impl));
151   switch (mem_type) {
152     case CEED_MEM_HOST:
153       *has_array_of_type = impl->h_array_borrowed || impl->h_array_owned;
154       break;
155     case CEED_MEM_DEVICE:
156       *has_array_of_type = impl->d_array_borrowed || impl->d_array_owned;
157       break;
158   }
159   return CEED_ERROR_SUCCESS;
160 }
161 
162 //------------------------------------------------------------------------------
163 // Check if has borrowed array of given type
164 //------------------------------------------------------------------------------
165 static inline int CeedVectorHasBorrowedArrayOfType_Hip(const CeedVector vec, CeedMemType mem_type, bool *has_borrowed_array_of_type) {
166   CeedVector_Hip *impl;
167 
168   CeedCallBackend(CeedVectorGetData(vec, &impl));
169 
170   // Use device memory for unified memory
171   mem_type = impl->has_unified_addressing && !impl->h_array_borrowed ? CEED_MEM_DEVICE : mem_type;
172 
173   switch (mem_type) {
174     case CEED_MEM_HOST:
175       *has_borrowed_array_of_type = impl->h_array_borrowed;
176       break;
177     case CEED_MEM_DEVICE:
178       *has_borrowed_array_of_type = impl->d_array_borrowed;
179       break;
180   }
181   return CEED_ERROR_SUCCESS;
182 }
183 
184 //------------------------------------------------------------------------------
185 // Set array from host
186 //------------------------------------------------------------------------------
187 static int CeedVectorSetArrayHost_Hip(const CeedVector vec, const CeedCopyMode copy_mode, CeedScalar *array) {
188   CeedSize        length;
189   CeedVector_Hip *impl;
190 
191   CeedCallBackend(CeedVectorGetData(vec, &impl));
192   CeedCallBackend(CeedVectorGetLength(vec, &length));
193 
194   CeedCallBackend(CeedSetHostCeedScalarArray(array, copy_mode, length, (const CeedScalar **)&impl->h_array_owned,
195                                              (const CeedScalar **)&impl->h_array_borrowed, (const CeedScalar **)&impl->h_array));
196   return CEED_ERROR_SUCCESS;
197 }
198 
199 //------------------------------------------------------------------------------
200 // Set array from device
201 //------------------------------------------------------------------------------
202 static int CeedVectorSetArrayDevice_Hip(const CeedVector vec, const CeedCopyMode copy_mode, CeedScalar *array) {
203   CeedSize        length;
204   Ceed            ceed;
205   CeedVector_Hip *impl;
206 
207   CeedCallBackend(CeedVectorGetCeed(vec, &ceed));
208   CeedCallBackend(CeedVectorGetData(vec, &impl));
209   CeedCallBackend(CeedVectorGetLength(vec, &length));
210 
211   CeedCallBackend(CeedSetDeviceCeedScalarArray_Hip(ceed, array, copy_mode, length, (const CeedScalar **)&impl->d_array_owned,
212                                                    (const CeedScalar **)&impl->d_array_borrowed, (const CeedScalar **)&impl->d_array));
213   CeedCallBackend(CeedDestroy(&ceed));
214   return CEED_ERROR_SUCCESS;
215 }
216 
217 //------------------------------------------------------------------------------
218 // Set array with unified memory
219 //------------------------------------------------------------------------------
220 static int CeedVectorSetArrayUnifiedHostToDevice_Hip(const CeedVector vec, const CeedCopyMode copy_mode, CeedScalar *array) {
221   CeedSize        length;
222   Ceed            ceed;
223   CeedVector_Hip *impl;
224 
225   CeedCallBackend(CeedVectorGetCeed(vec, &ceed));
226   CeedCallBackend(CeedVectorGetData(vec, &impl));
227   CeedCallBackend(CeedVectorGetLength(vec, &length));
228 
229   switch (copy_mode) {
230     case CEED_COPY_VALUES:
231     case CEED_OWN_POINTER:
232       if (!impl->d_array) {
233         if (impl->d_array_borrowed) {
234           impl->d_array = impl->d_array_borrowed;
235         } else {
236           if (!impl->d_array_owned) CeedCallHip(ceed, hipMalloc((void **)&impl->d_array_owned, sizeof(CeedScalar) * length));
237           impl->d_array = impl->d_array_owned;
238         }
239       }
240       if (array) CeedCallHip(ceed, hipMemcpy(impl->d_array, array, sizeof(CeedScalar) * length, hipMemcpyHostToDevice));
241       if (copy_mode == CEED_OWN_POINTER) CeedCallBackend(CeedFree(&array));
242       break;
243     case CEED_USE_POINTER:
244       CeedCallHip(ceed, hipFree(impl->d_array_owned));
245       CeedCallBackend(CeedFree(&impl->h_array_owned));
246       impl->h_array_owned    = NULL;
247       impl->h_array_borrowed = array;
248       impl->d_array          = impl->h_array_borrowed;
249   }
250   CeedCallBackend(CeedDestroy(&ceed));
251   return CEED_ERROR_SUCCESS;
252 }
253 
254 //------------------------------------------------------------------------------
255 // Set the array used by a vector,
256 //   freeing any previously allocated array if applicable
257 //------------------------------------------------------------------------------
258 static int CeedVectorSetArray_Hip(const CeedVector vec, const CeedMemType mem_type, const CeedCopyMode copy_mode, CeedScalar *array) {
259   CeedVector_Hip *impl;
260 
261   CeedCallBackend(CeedVectorGetData(vec, &impl));
262   CeedCallBackend(CeedVectorSetAllInvalid_Hip(vec));
263   switch (mem_type) {
264     case CEED_MEM_HOST:
265       if (impl->has_unified_addressing) {
266         return CeedVectorSetArrayUnifiedHostToDevice_Hip(vec, copy_mode, array);
267       } else {
268         return CeedVectorSetArrayHost_Hip(vec, copy_mode, array);
269       }
270     case CEED_MEM_DEVICE:
271       return CeedVectorSetArrayDevice_Hip(vec, copy_mode, array);
272   }
273   return CEED_ERROR_UNSUPPORTED;
274 }
275 
276 //------------------------------------------------------------------------------
277 // Copy host array to value strided
278 //------------------------------------------------------------------------------
279 static int CeedHostCopyStrided_Hip(CeedScalar *h_array, CeedSize start, CeedSize stop, CeedSize step, CeedScalar *h_copy_array) {
280   for (CeedSize i = start; i < stop; i += step) h_copy_array[i] = h_array[i];
281   return CEED_ERROR_SUCCESS;
282 }
283 
284 //------------------------------------------------------------------------------
285 // Copy device array to value strided (impl in .hip.cpp file)
286 //------------------------------------------------------------------------------
287 int CeedDeviceCopyStrided_Hip(CeedScalar *d_array, CeedSize start, CeedSize stop, CeedSize step, CeedScalar *d_copy_array);
288 
289 //------------------------------------------------------------------------------
290 // Copy a vector to a value strided
291 //------------------------------------------------------------------------------
292 static int CeedVectorCopyStrided_Hip(CeedVector vec, CeedSize start, CeedSize stop, CeedSize step, CeedVector vec_copy) {
293   CeedSize        length;
294   CeedVector_Hip *impl;
295 
296   CeedCallBackend(CeedVectorGetData(vec, &impl));
297   {
298     CeedSize length_vec, length_copy;
299 
300     CeedCallBackend(CeedVectorGetLength(vec, &length_vec));
301     CeedCallBackend(CeedVectorGetLength(vec_copy, &length_copy));
302     length = length_vec < length_copy ? length_vec : length_copy;
303   }
304   if (stop == -1) stop = length;
305   // Set value for synced device/host array
306   if (impl->d_array) {
307     CeedScalar *copy_array;
308 
309     CeedCallBackend(CeedVectorGetArray(vec_copy, CEED_MEM_DEVICE, &copy_array));
310 #if (HIP_VERSION >= 60000000)
311     hipblasHandle_t handle;
312     hipStream_t     stream;
313     Ceed            ceed;
314 
315     CeedCallBackend(CeedVectorGetCeed(vec, &ceed));
316     CeedCallBackend(CeedGetHipblasHandle_Hip(ceed, &handle));
317     CeedCallHipblas(ceed, hipblasGetStream(handle, &stream));
318 #if defined(CEED_SCALAR_IS_FP32)
319     CeedCallHipblas(ceed, hipblasScopy_64(handle, (int64_t)(stop - start), impl->d_array + start, (int64_t)step, copy_array + start, (int64_t)step));
320 #else  /* CEED_SCALAR */
321     CeedCallHipblas(ceed, hipblasDcopy_64(handle, (int64_t)(stop - start), impl->d_array + start, (int64_t)step, copy_array + start, (int64_t)step));
322 #endif /* CEED_SCALAR */
323     CeedCallHip(ceed, hipStreamSynchronize(stream));
324 #else  /* HIP_VERSION */
325     CeedCallBackend(CeedDeviceCopyStrided_Hip(impl->d_array, start, stop, step, copy_array));
326 #endif /* HIP_VERSION */
327     CeedCallBackend(CeedVectorRestoreArray(vec_copy, &copy_array));
328     impl->h_array = NULL;
329     CeedCallBackend(CeedDestroy(&ceed));
330   } else if (impl->h_array) {
331     CeedScalar *copy_array;
332 
333     CeedCallBackend(CeedVectorGetArray(vec_copy, CEED_MEM_HOST, &copy_array));
334     CeedCallBackend(CeedHostCopyStrided_Hip(impl->h_array, start, stop, step, copy_array));
335     CeedCallBackend(CeedVectorRestoreArray(vec_copy, &copy_array));
336     impl->d_array = NULL;
337   } else {
338     return CeedError(CeedVectorReturnCeed(vec), CEED_ERROR_BACKEND, "CeedVector must have valid data set");
339   }
340   return CEED_ERROR_SUCCESS;
341 }
342 
343 //------------------------------------------------------------------------------
344 // Set host array to value
345 //------------------------------------------------------------------------------
346 static int CeedHostSetValue_Hip(CeedScalar *h_array, CeedSize length, CeedScalar val) {
347   for (CeedSize i = 0; i < length; i++) h_array[i] = val;
348   return CEED_ERROR_SUCCESS;
349 }
350 
351 //------------------------------------------------------------------------------
352 // Set device array to value (impl in .hip file)
353 //------------------------------------------------------------------------------
354 int CeedDeviceSetValue_Hip(CeedScalar *d_array, CeedSize length, CeedScalar val);
355 
356 //------------------------------------------------------------------------------
357 // Set a vector to a value
358 //------------------------------------------------------------------------------
359 static int CeedVectorSetValue_Hip(CeedVector vec, CeedScalar val) {
360   CeedSize        length;
361   CeedVector_Hip *impl;
362   Ceed_Hip       *hip_data;
363 
364   CeedCallBackend(CeedVectorGetData(vec, &impl));
365   CeedCallBackend(CeedGetData(CeedVectorReturnCeed(vec), &hip_data));
366   CeedCallBackend(CeedVectorGetLength(vec, &length));
367   // Set value for synced device/host array
368   if (!impl->d_array && !impl->h_array) {
369     if (impl->d_array_borrowed) {
370       impl->d_array = impl->d_array_borrowed;
371     } else if (impl->h_array_borrowed) {
372       impl->h_array = impl->h_array_borrowed;
373     } else if (impl->d_array_owned) {
374       impl->d_array = impl->d_array_owned;
375     } else if (impl->h_array_owned) {
376       impl->h_array = impl->h_array_owned;
377     } else {
378       CeedCallBackend(CeedVectorSetArray(vec, CEED_MEM_DEVICE, CEED_COPY_VALUES, NULL));
379     }
380   }
381   if (impl->d_array) {
382     if (val == 0 && !impl->h_array_borrowed) {
383       CeedCallHip(CeedVectorReturnCeed(vec), hipMemset(impl->d_array, 0, length * sizeof(CeedScalar)));
384     } else {
385       CeedCallBackend(CeedDeviceSetValue_Hip(impl->d_array, length, val));
386     }
387     impl->h_array = NULL;
388   } else if (impl->h_array) {
389     CeedCallBackend(CeedHostSetValue_Hip(impl->h_array, length, val));
390     impl->d_array = NULL;
391   }
392   return CEED_ERROR_SUCCESS;
393 }
394 
395 //------------------------------------------------------------------------------
396 // Set host array to value strided
397 //------------------------------------------------------------------------------
398 static int CeedHostSetValueStrided_Hip(CeedScalar *h_array, CeedSize start, CeedSize stop, CeedSize step, CeedScalar val) {
399   for (CeedSize i = start; i < stop; i += step) h_array[i] = val;
400   return CEED_ERROR_SUCCESS;
401 }
402 
403 //------------------------------------------------------------------------------
404 // Set device array to value strided (impl in .hip.cpp file)
405 //------------------------------------------------------------------------------
406 int CeedDeviceSetValueStrided_Hip(CeedScalar *d_array, CeedSize start, CeedSize stop, CeedSize step, CeedScalar val);
407 
408 //------------------------------------------------------------------------------
409 // Set a vector to a value strided
410 //------------------------------------------------------------------------------
411 static int CeedVectorSetValueStrided_Hip(CeedVector vec, CeedSize start, CeedSize stop, CeedSize step, CeedScalar val) {
412   CeedSize        length;
413   CeedVector_Hip *impl;
414 
415   CeedCallBackend(CeedVectorGetData(vec, &impl));
416   CeedCallBackend(CeedVectorGetLength(vec, &length));
417   // Set value for synced device/host array
418   if (stop == -1) stop = length;
419   if (impl->d_array) {
420     CeedCallBackend(CeedDeviceSetValueStrided_Hip(impl->d_array, start, stop, step, val));
421     impl->h_array = NULL;
422   } else if (impl->h_array) {
423     CeedCallBackend(CeedHostSetValueStrided_Hip(impl->h_array, start, stop, step, val));
424     impl->d_array = NULL;
425   } else {
426     return CeedError(CeedVectorReturnCeed(vec), CEED_ERROR_BACKEND, "CeedVector must have valid data set");
427   }
428   return CEED_ERROR_SUCCESS;
429 }
430 
431 //------------------------------------------------------------------------------
432 // Vector Take Array
433 //------------------------------------------------------------------------------
434 static int CeedVectorTakeArray_Hip(CeedVector vec, CeedMemType mem_type, CeedScalar **array) {
435   CeedVector_Hip *impl;
436 
437   CeedCallBackend(CeedVectorGetData(vec, &impl));
438 
439   // Sync array to requested mem_type
440   CeedCallBackend(CeedVectorSyncArray(vec, mem_type));
441 
442   // Update pointer
443   switch (mem_type) {
444     case CEED_MEM_HOST:
445       (*array)               = impl->h_array_borrowed;
446       impl->h_array_borrowed = NULL;
447       impl->h_array          = NULL;
448       break;
449     case CEED_MEM_DEVICE:
450       (*array)               = impl->d_array_borrowed;
451       impl->d_array_borrowed = NULL;
452       impl->d_array          = NULL;
453       break;
454   }
455   return CEED_ERROR_SUCCESS;
456 }
457 
458 //------------------------------------------------------------------------------
459 // Core logic for array synchronization for GetArray.
460 //   If a different memory type is most up to date, this will perform a copy
461 //------------------------------------------------------------------------------
462 static int CeedVectorGetArrayCore_Hip(const CeedVector vec, CeedMemType mem_type, CeedScalar **array) {
463   CeedVector_Hip *impl;
464 
465   CeedCallBackend(CeedVectorGetData(vec, &impl));
466 
467   // Use device memory for unified memory
468   mem_type = impl->has_unified_addressing && !impl->h_array_borrowed ? CEED_MEM_DEVICE : mem_type;
469 
470   // Sync array to requested mem_type
471   CeedCallBackend(CeedVectorSyncArray(vec, mem_type));
472 
473   // Update pointer
474   switch (mem_type) {
475     case CEED_MEM_HOST:
476       *array = impl->h_array;
477       break;
478     case CEED_MEM_DEVICE:
479       *array = impl->d_array;
480       break;
481   }
482   return CEED_ERROR_SUCCESS;
483 }
484 
485 //------------------------------------------------------------------------------
486 // Get read-only access to a vector via the specified mem_type
487 //------------------------------------------------------------------------------
488 static int CeedVectorGetArrayRead_Hip(const CeedVector vec, const CeedMemType mem_type, const CeedScalar **array) {
489   return CeedVectorGetArrayCore_Hip(vec, mem_type, (CeedScalar **)array);
490 }
491 
492 //------------------------------------------------------------------------------
493 // Get read/write access to a vector via the specified mem_type
494 //------------------------------------------------------------------------------
495 static int CeedVectorGetArray_Hip(const CeedVector vec, CeedMemType mem_type, CeedScalar **array) {
496   CeedVector_Hip *impl;
497 
498   CeedCallBackend(CeedVectorGetData(vec, &impl));
499 
500   // Use device memory for unified memory
501   mem_type = impl->has_unified_addressing && !impl->h_array_borrowed ? CEED_MEM_DEVICE : mem_type;
502 
503   // 'Get' array and set only 'get'ed array as valid
504   CeedCallBackend(CeedVectorGetArrayCore_Hip(vec, mem_type, array));
505   CeedCallBackend(CeedVectorSetAllInvalid_Hip(vec));
506   switch (mem_type) {
507     case CEED_MEM_HOST:
508       impl->h_array = *array;
509       if (impl->has_unified_addressing) impl->d_array = *array;
510       break;
511     case CEED_MEM_DEVICE:
512       impl->d_array = *array;
513       break;
514   }
515   return CEED_ERROR_SUCCESS;
516 }
517 
518 //------------------------------------------------------------------------------
519 // Get write access to a vector via the specified mem_type
520 //------------------------------------------------------------------------------
521 static int CeedVectorGetArrayWrite_Hip(const CeedVector vec, CeedMemType mem_type, CeedScalar **array) {
522   bool            has_array_of_type = true;
523   CeedVector_Hip *impl;
524   Ceed_Hip       *hip_data;
525 
526   CeedCallBackend(CeedVectorGetData(vec, &impl));
527   CeedCallBackend(CeedGetData(CeedVectorReturnCeed(vec), &hip_data));
528 
529   // Use device memory for unified memory
530   mem_type = impl->has_unified_addressing && !impl->h_array_borrowed ? CEED_MEM_DEVICE : mem_type;
531 
532   CeedCallBackend(CeedVectorHasArrayOfType_Hip(vec, mem_type, &has_array_of_type));
533   if (!has_array_of_type) {
534     // Allocate if array is not yet allocated
535     CeedCallBackend(CeedVectorSetArray(vec, mem_type, CEED_COPY_VALUES, NULL));
536   } else {
537     // Select dirty array
538     switch (mem_type) {
539       case CEED_MEM_HOST:
540         if (impl->h_array_borrowed) impl->h_array = impl->h_array_borrowed;
541         else impl->h_array = impl->h_array_owned;
542         break;
543       case CEED_MEM_DEVICE:
544         if (impl->d_array_borrowed) impl->d_array = impl->d_array_borrowed;
545         else impl->d_array = impl->d_array_owned;
546     }
547   }
548   return CeedVectorGetArray_Hip(vec, mem_type, array);
549 }
550 
551 //------------------------------------------------------------------------------
552 // Get the norm of a CeedVector
553 //------------------------------------------------------------------------------
554 static int CeedVectorNorm_Hip(CeedVector vec, CeedNormType type, CeedScalar *norm) {
555   Ceed     ceed;
556   CeedSize length;
557 #if (HIP_VERSION < 60000000)
558   CeedSize num_calls;
559 #endif /* HIP_VERSION */
560   const CeedScalar *d_array;
561   CeedVector_Hip   *impl;
562   hipblasHandle_t   handle;
563   hipStream_t       stream;
564   Ceed_Hip         *hip_data;
565 
566   CeedCallBackend(CeedVectorGetCeed(vec, &ceed));
567   CeedCallBackend(CeedGetData(ceed, &hip_data));
568   CeedCallBackend(CeedVectorGetData(vec, &impl));
569   CeedCallBackend(CeedVectorGetLength(vec, &length));
570   CeedCallBackend(CeedGetHipblasHandle_Hip(ceed, &handle));
571   CeedCallHipblas(ceed, hipblasGetStream(handle, &stream));
572 #if (HIP_VERSION < 60000000)
573   // With ROCm 6, we can use the 64-bit integer interface. Prior to that,
574   // we need to check if the vector is too long to handle with int32,
575   // and if so, divide it into subsections for repeated hipBLAS calls.
576   num_calls = length / INT_MAX;
577   if (length % INT_MAX > 0) num_calls += 1;
578 #endif /* HIP_VERSION */
579 
580   // Compute norm
581   CeedCallBackend(CeedVectorGetArrayRead(vec, CEED_MEM_DEVICE, &d_array));
582   switch (type) {
583     case CEED_NORM_1: {
584       *norm = 0.0;
585 #if defined(CEED_SCALAR_IS_FP32)
586 #if (HIP_VERSION >= 60000000)  // We have ROCm 6, and can use 64-bit integers
587       CeedCallHipblas(ceed, hipblasSasum_64(handle, (int64_t)length, (float *)d_array, 1, (float *)norm));
588       CeedCallHip(ceed, hipStreamSynchronize(stream));
589 #else  /* HIP_VERSION */
590       float  sub_norm = 0.0;
591       float *d_array_start;
592 
593       for (CeedInt i = 0; i < num_calls; i++) {
594         d_array_start             = (float *)d_array + (CeedSize)(i)*INT_MAX;
595         CeedSize remaining_length = length - (CeedSize)(i)*INT_MAX;
596         CeedInt  sub_length       = (i == num_calls - 1) ? (CeedInt)(remaining_length) : INT_MAX;
597 
598         CeedCallHipblas(ceed, hipblasSasum(handle, (CeedInt)sub_length, (float *)d_array_start, 1, &sub_norm));
599         CeedCallHip(ceed, hipStreamSynchronize(stream));
600         *norm += sub_norm;
601       }
602 #endif /* HIP_VERSION */
603 #else  /* CEED_SCALAR */
604 #if (HIP_VERSION >= 60000000)
605       CeedCallHipblas(ceed, hipblasDasum_64(handle, (int64_t)length, (double *)d_array, 1, (double *)norm));
606       CeedCallHip(ceed, hipStreamSynchronize(stream));
607 #else  /* HIP_VERSION */
608       double  sub_norm = 0.0;
609       double *d_array_start;
610 
611       for (CeedInt i = 0; i < num_calls; i++) {
612         d_array_start             = (double *)d_array + (CeedSize)(i)*INT_MAX;
613         CeedSize remaining_length = length - (CeedSize)(i)*INT_MAX;
614         CeedInt  sub_length       = (i == num_calls - 1) ? (CeedInt)(remaining_length) : INT_MAX;
615 
616         CeedCallHipblas(ceed, hipblasDasum(handle, (CeedInt)sub_length, (double *)d_array_start, 1, &sub_norm));
617         CeedCallHip(ceed, hipStreamSynchronize(stream));
618         *norm += sub_norm;
619       }
620 #endif /* HIP_VERSION */
621 #endif /* CEED_SCALAR */
622       break;
623     }
624     case CEED_NORM_2: {
625 #if defined(CEED_SCALAR_IS_FP32)
626 #if (HIP_VERSION >= 60000000)
627       CeedCallHipblas(ceed, hipblasSnrm2_64(handle, (int64_t)length, (float *)d_array, 1, (float *)norm));
628       CeedCallHip(ceed, hipStreamSynchronize(stream));
629 #else  /* HIP_VERSION */
630       float  sub_norm = 0.0, norm_sum = 0.0;
631       float *d_array_start;
632 
633       for (CeedInt i = 0; i < num_calls; i++) {
634         d_array_start             = (float *)d_array + (CeedSize)(i)*INT_MAX;
635         CeedSize remaining_length = length - (CeedSize)(i)*INT_MAX;
636         CeedInt  sub_length       = (i == num_calls - 1) ? (CeedInt)(remaining_length) : INT_MAX;
637 
638         CeedCallHipblas(ceed, hipblasSnrm2(handle, (CeedInt)sub_length, (float *)d_array_start, 1, &sub_norm));
639         CeedCallHip(ceed, hipStreamSynchronize(stream));
640         norm_sum += sub_norm * sub_norm;
641       }
642       *norm = sqrt(norm_sum);
643 #endif /* HIP_VERSION */
644 #else  /* CEED_SCALAR */
645 #if (HIP_VERSION >= 60000000)
646       CeedCallHipblas(ceed, hipblasDnrm2_64(handle, (int64_t)length, (double *)d_array, 1, (double *)norm));
647       CeedCallHip(ceed, hipStreamSynchronize(stream));
648 #else  /* HIP_VERSION */
649       double  sub_norm = 0.0, norm_sum = 0.0;
650       double *d_array_start;
651 
652       for (CeedInt i = 0; i < num_calls; i++) {
653         d_array_start             = (double *)d_array + (CeedSize)(i)*INT_MAX;
654         CeedSize remaining_length = length - (CeedSize)(i)*INT_MAX;
655         CeedInt  sub_length       = (i == num_calls - 1) ? (CeedInt)(remaining_length) : INT_MAX;
656 
657         CeedCallHipblas(ceed, hipblasDnrm2(handle, (CeedInt)sub_length, (double *)d_array_start, 1, &sub_norm));
658         CeedCallHip(ceed, hipStreamSynchronize(stream));
659         norm_sum += sub_norm * sub_norm;
660       }
661       *norm = sqrt(norm_sum);
662 #endif /* HIP_VERSION */
663 #endif /* CEED_SCALAR */
664       break;
665     }
666     case CEED_NORM_MAX: {
667 #if defined(CEED_SCALAR_IS_FP32)
668 #if (HIP_VERSION >= 60000000)
669       int64_t    index;
670       CeedScalar norm_no_abs;
671 
672       CeedCallHipblas(ceed, hipblasIsamax_64(handle, (int64_t)length, (float *)d_array, 1, &index));
673       CeedCallHip(ceed, hipMemcpyAsync(&norm_no_abs, impl->d_array + index - 1, sizeof(CeedScalar), hipMemcpyDeviceToHost, stream));
674       CeedCallHip(ceed, hipStreamSynchronize(stream));
675       *norm = fabs(norm_no_abs);
676 #else  /* HIP_VERSION */
677       CeedInt index;
678       float   sub_max = 0.0, current_max = 0.0;
679       float  *d_array_start;
680 
681       for (CeedInt i = 0; i < num_calls; i++) {
682         d_array_start             = (float *)d_array + (CeedSize)(i)*INT_MAX;
683         CeedSize remaining_length = length - (CeedSize)(i)*INT_MAX;
684         CeedInt  sub_length       = (i == num_calls - 1) ? (CeedInt)(remaining_length) : INT_MAX;
685 
686         CeedCallHipblas(ceed, hipblasIsamax(handle, (CeedInt)sub_length, (float *)d_array_start, 1, &index));
687         if (hip_data->has_unified_addressing) {
688           CeedCallHip(ceed, hipStreamSynchronize(stream));
689           sub_max = fabs(d_array[index - 1]);
690         } else {
691           CeedCallHip(ceed, hipMemcpyAsync(&sub_max, d_array_start + index - 1, sizeof(CeedScalar), hipMemcpyDeviceToHost, stream));
692           CeedCallHip(ceed, hipStreamSynchronize(stream));
693         }
694         if (fabs(sub_max) > current_max) current_max = fabs(sub_max);
695       }
696       *norm = current_max;
697 #endif /* HIP_VERSION */
698 #else  /* CEED_SCALAR */
699 #if (HIP_VERSION >= 60000000)
700       int64_t    index;
701       CeedScalar norm_no_abs;
702 
703       CeedCallHipblas(ceed, hipblasIdamax_64(handle, (int64_t)length, (double *)d_array, 1, &index));
704       if (hip_data->has_unified_addressing) {
705         CeedCallHip(ceed, hipStreamSynchronize(stream));
706         norm_no_abs = fabs(d_array[index - 1]);
707       } else {
708         CeedCallHip(ceed, hipMemcpyAsync(&norm_no_abs, impl->d_array + index - 1, sizeof(CeedScalar), hipMemcpyDeviceToHost, stream));
709         CeedCallHip(ceed, hipStreamSynchronize(stream));
710       }
711       *norm = fabs(norm_no_abs);
712 #else  /* HIP_VERSION */
713       CeedInt index;
714       double  sub_max = 0.0, current_max = 0.0;
715       double *d_array_start;
716 
717       for (CeedInt i = 0; i < num_calls; i++) {
718         d_array_start             = (double *)d_array + (CeedSize)(i)*INT_MAX;
719         CeedSize remaining_length = length - (CeedSize)(i)*INT_MAX;
720         CeedInt  sub_length       = (i == num_calls - 1) ? (CeedInt)(remaining_length) : INT_MAX;
721 
722         CeedCallHipblas(ceed, hipblasIdamax(handle, (CeedInt)sub_length, (double *)d_array_start, 1, &index));
723         if (hip_data->has_unified_addressing) {
724           CeedCallHip(ceed, hipStreamSynchronize(stream));
725           sub_max = fabs(d_array[index - 1]);
726         } else {
727           CeedCallHip(ceed, hipMemcpyAsync(&sub_max, d_array_start + index - 1, sizeof(CeedScalar), hipMemcpyDeviceToHost, stream));
728           CeedCallHip(ceed, hipStreamSynchronize(stream));
729         }
730         if (fabs(sub_max) > current_max) current_max = fabs(sub_max);
731       }
732       *norm = current_max;
733 #endif /* HIP_VERSION */
734 #endif /* CEED_SCALAR */
735       break;
736     }
737   }
738   CeedCallBackend(CeedVectorRestoreArrayRead(vec, &d_array));
739   CeedCallBackend(CeedDestroy(&ceed));
740   return CEED_ERROR_SUCCESS;
741 }
742 
743 //------------------------------------------------------------------------------
744 // Take reciprocal of a vector on host
745 //------------------------------------------------------------------------------
746 static int CeedHostReciprocal_Hip(CeedScalar *h_array, CeedSize length) {
747   for (CeedSize i = 0; i < length; i++) {
748     if (fabs(h_array[i]) > CEED_EPSILON) h_array[i] = 1. / h_array[i];
749   }
750   return CEED_ERROR_SUCCESS;
751 }
752 
753 //------------------------------------------------------------------------------
754 // Take reciprocal of a vector on device (impl in .hip.cpp file)
755 //------------------------------------------------------------------------------
756 int CeedDeviceReciprocal_Hip(CeedScalar *d_array, CeedSize length);
757 
758 //------------------------------------------------------------------------------
759 // Take reciprocal of a vector
760 //------------------------------------------------------------------------------
761 static int CeedVectorReciprocal_Hip(CeedVector vec) {
762   CeedSize        length;
763   CeedVector_Hip *impl;
764 
765   CeedCallBackend(CeedVectorGetData(vec, &impl));
766   CeedCallBackend(CeedVectorGetLength(vec, &length));
767   // Set value for synced device/host array
768   if (impl->d_array) CeedCallBackend(CeedDeviceReciprocal_Hip(impl->d_array, length));
769   if (impl->h_array) CeedCallBackend(CeedHostReciprocal_Hip(impl->h_array, length));
770   return CEED_ERROR_SUCCESS;
771 }
772 
773 //------------------------------------------------------------------------------
774 // Compute x = alpha x on the host
775 //------------------------------------------------------------------------------
776 static int CeedHostScale_Hip(CeedScalar *x_array, CeedScalar alpha, CeedSize length) {
777   for (CeedSize i = 0; i < length; i++) x_array[i] *= alpha;
778   return CEED_ERROR_SUCCESS;
779 }
780 
781 //------------------------------------------------------------------------------
782 // Compute x = alpha x on device (impl in .hip.cpp file)
783 //------------------------------------------------------------------------------
784 int CeedDeviceScale_Hip(CeedScalar *x_array, CeedScalar alpha, CeedSize length);
785 
786 //------------------------------------------------------------------------------
787 // Compute x = alpha x
788 //------------------------------------------------------------------------------
789 static int CeedVectorScale_Hip(CeedVector x, CeedScalar alpha) {
790   CeedSize        length;
791   CeedVector_Hip *impl;
792 
793   CeedCallBackend(CeedVectorGetData(x, &impl));
794   CeedCallBackend(CeedVectorGetLength(x, &length));
795   // Set value for synced device/host array
796   if (impl->d_array) {
797 #if (HIP_VERSION >= 60000000)
798     hipblasHandle_t handle;
799     hipStream_t     stream;
800 
801     CeedCallBackend(CeedGetHipblasHandle_Hip(CeedVectorReturnCeed(x), &handle));
802     CeedCallHipblas(CeedVectorReturnCeed(x), hipblasGetStream(handle, &stream));
803 #if defined(CEED_SCALAR_IS_FP32)
804     CeedCallHipblas(CeedVectorReturnCeed(x), hipblasSscal_64(handle, (int64_t)length, &alpha, impl->d_array, 1));
805 #else  /* CEED_SCALAR */
806     CeedCallHipblas(CeedVectorReturnCeed(x), hipblasDscal_64(handle, (int64_t)length, &alpha, impl->d_array, 1));
807 #endif /* CEED_SCALAR */
808     CeedCallHip(CeedVectorReturnCeed(x), hipStreamSynchronize(stream));
809 #else  /* HIP_VERSION */
810     CeedCallBackend(CeedDeviceScale_Hip(impl->d_array, alpha, length));
811 #endif /* HIP_VERSION */
812     impl->h_array = NULL;
813   }
814   if (impl->h_array) {
815     CeedCallBackend(CeedHostScale_Hip(impl->h_array, alpha, length));
816     impl->d_array = NULL;
817   }
818   return CEED_ERROR_SUCCESS;
819 }
820 
821 //------------------------------------------------------------------------------
822 // Compute y = alpha x + y on the host
823 //------------------------------------------------------------------------------
824 static int CeedHostAXPY_Hip(CeedScalar *y_array, CeedScalar alpha, CeedScalar *x_array, CeedSize length) {
825   for (CeedSize i = 0; i < length; i++) y_array[i] += alpha * x_array[i];
826   return CEED_ERROR_SUCCESS;
827 }
828 
829 //------------------------------------------------------------------------------
830 // Compute y = alpha x + y on device (impl in .hip.cpp file)
831 //------------------------------------------------------------------------------
832 int CeedDeviceAXPY_Hip(CeedScalar *y_array, CeedScalar alpha, CeedScalar *x_array, CeedSize length);
833 
834 //------------------------------------------------------------------------------
835 // Compute y = alpha x + y
836 //------------------------------------------------------------------------------
837 static int CeedVectorAXPY_Hip(CeedVector y, CeedScalar alpha, CeedVector x) {
838   CeedSize        length;
839   CeedVector_Hip *y_impl, *x_impl;
840 
841   CeedCallBackend(CeedVectorGetData(y, &y_impl));
842   CeedCallBackend(CeedVectorGetData(x, &x_impl));
843   CeedCallBackend(CeedVectorGetLength(y, &length));
844   // Set value for synced device/host array
845   if (y_impl->d_array) {
846     CeedCallBackend(CeedVectorSyncArray(x, CEED_MEM_DEVICE));
847 #if (HIP_VERSION >= 60000000)
848     hipblasHandle_t handle;
849     hipStream_t     stream;
850 
851     CeedCallBackend(CeedGetHipblasHandle_Hip(CeedVectorReturnCeed(x), &handle));
852     CeedCallHipblas(CeedVectorReturnCeed(y), hipblasGetStream(handle, &stream));
853 #if defined(CEED_SCALAR_IS_FP32)
854     CeedCallHipblas(CeedVectorReturnCeed(y), hipblasSaxpy_64(handle, (int64_t)length, &alpha, x_impl->d_array, 1, y_impl->d_array, 1));
855 #else  /* CEED_SCALAR */
856     CeedCallHipblas(CeedVectorReturnCeed(y), hipblasDaxpy_64(handle, (int64_t)length, &alpha, x_impl->d_array, 1, y_impl->d_array, 1));
857 #endif /* CEED_SCALAR */
858     CeedCallHip(CeedVectorReturnCeed(y), hipStreamSynchronize(stream));
859 #else  /* HIP_VERSION */
860     CeedCallBackend(CeedDeviceAXPY_Hip(y_impl->d_array, alpha, x_impl->d_array, length));
861 #endif /* HIP_VERSION */
862     y_impl->h_array = NULL;
863   } else if (y_impl->h_array) {
864     CeedCallBackend(CeedVectorSyncArray(x, CEED_MEM_HOST));
865     CeedCallBackend(CeedHostAXPY_Hip(y_impl->h_array, alpha, x_impl->h_array, length));
866     y_impl->d_array = NULL;
867   }
868   return CEED_ERROR_SUCCESS;
869 }
870 
871 //------------------------------------------------------------------------------
872 // Compute y = alpha x + beta y on the host
873 //------------------------------------------------------------------------------
874 static int CeedHostAXPBY_Hip(CeedScalar *y_array, CeedScalar alpha, CeedScalar beta, CeedScalar *x_array, CeedSize length) {
875   for (CeedSize i = 0; i < length; i++) y_array[i] = alpha * x_array[i] + beta * y_array[i];
876   return CEED_ERROR_SUCCESS;
877 }
878 
879 //------------------------------------------------------------------------------
880 // Compute y = alpha x + beta y on device (impl in .hip.cpp file)
881 //------------------------------------------------------------------------------
882 int CeedDeviceAXPBY_Hip(CeedScalar *y_array, CeedScalar alpha, CeedScalar beta, CeedScalar *x_array, CeedSize length);
883 
884 //------------------------------------------------------------------------------
885 // Compute y = alpha x + beta y
886 //------------------------------------------------------------------------------
887 static int CeedVectorAXPBY_Hip(CeedVector y, CeedScalar alpha, CeedScalar beta, CeedVector x) {
888   CeedSize        length;
889   CeedVector_Hip *y_impl, *x_impl;
890 
891   CeedCallBackend(CeedVectorGetData(y, &y_impl));
892   CeedCallBackend(CeedVectorGetData(x, &x_impl));
893   CeedCallBackend(CeedVectorGetLength(y, &length));
894   // Set value for synced device/host array
895   if (y_impl->d_array) {
896     CeedCallBackend(CeedVectorSyncArray(x, CEED_MEM_DEVICE));
897     CeedCallBackend(CeedDeviceAXPBY_Hip(y_impl->d_array, alpha, beta, x_impl->d_array, length));
898   }
899   if (y_impl->h_array) {
900     CeedCallBackend(CeedVectorSyncArray(x, CEED_MEM_HOST));
901     CeedCallBackend(CeedHostAXPBY_Hip(y_impl->h_array, alpha, beta, x_impl->h_array, length));
902   }
903   return CEED_ERROR_SUCCESS;
904 }
905 
906 //------------------------------------------------------------------------------
907 // Compute the pointwise multiplication w = x .* y on the host
908 //------------------------------------------------------------------------------
909 static int CeedHostPointwiseMult_Hip(CeedScalar *w_array, CeedScalar *x_array, CeedScalar *y_array, CeedSize length) {
910   for (CeedSize i = 0; i < length; i++) w_array[i] = x_array[i] * y_array[i];
911   return CEED_ERROR_SUCCESS;
912 }
913 
914 //------------------------------------------------------------------------------
915 // Compute the pointwise multiplication w = x .* y on device (impl in .hip.cpp file)
916 //------------------------------------------------------------------------------
917 int CeedDevicePointwiseMult_Hip(CeedScalar *w_array, CeedScalar *x_array, CeedScalar *y_array, CeedSize length);
918 
919 //------------------------------------------------------------------------------
920 // Compute the pointwise multiplication w = x .* y
921 //------------------------------------------------------------------------------
922 static int CeedVectorPointwiseMult_Hip(CeedVector w, CeedVector x, CeedVector y) {
923   CeedSize        length;
924   CeedVector_Hip *w_impl, *x_impl, *y_impl;
925 
926   CeedCallBackend(CeedVectorGetData(w, &w_impl));
927   CeedCallBackend(CeedVectorGetData(x, &x_impl));
928   CeedCallBackend(CeedVectorGetData(y, &y_impl));
929   CeedCallBackend(CeedVectorGetLength(w, &length));
930 
931   // Set value for synced device/host array
932   if (!w_impl->d_array && !w_impl->h_array) {
933     CeedCallBackend(CeedVectorSetValue(w, 0.0));
934   }
935   if (w_impl->d_array) {
936     CeedCallBackend(CeedVectorSyncArray(x, CEED_MEM_DEVICE));
937     CeedCallBackend(CeedVectorSyncArray(y, CEED_MEM_DEVICE));
938     CeedCallBackend(CeedDevicePointwiseMult_Hip(w_impl->d_array, x_impl->d_array, y_impl->d_array, length));
939   }
940   if (w_impl->h_array) {
941     CeedCallBackend(CeedVectorSyncArray(x, CEED_MEM_HOST));
942     CeedCallBackend(CeedVectorSyncArray(y, CEED_MEM_HOST));
943     CeedCallBackend(CeedHostPointwiseMult_Hip(w_impl->h_array, x_impl->h_array, y_impl->h_array, length));
944   }
945   return CEED_ERROR_SUCCESS;
946 }
947 
948 //------------------------------------------------------------------------------
949 // Destroy the vector
950 //------------------------------------------------------------------------------
951 static int CeedVectorDestroy_Hip(const CeedVector vec) {
952   CeedVector_Hip *impl;
953 
954   CeedCallBackend(CeedVectorGetData(vec, &impl));
955   CeedCallHip(CeedVectorReturnCeed(vec), hipFree(impl->d_array_owned));
956   CeedCallBackend(CeedFree(&impl->h_array_owned));
957   CeedCallBackend(CeedFree(&impl));
958   return CEED_ERROR_SUCCESS;
959 }
960 
961 //------------------------------------------------------------------------------
962 // Create a vector of the specified length (does not allocate memory)
963 //------------------------------------------------------------------------------
964 int CeedVectorCreate_Hip(CeedSize n, CeedVector vec) {
965   CeedVector_Hip *impl;
966   Ceed_Hip       *hip_impl;
967   Ceed            ceed;
968 
969   CeedCallBackend(CeedVectorGetCeed(vec, &ceed));
970   CeedCallBackend(CeedSetBackendFunction(ceed, "Vector", vec, "HasValidArray", CeedVectorHasValidArray_Hip));
971   CeedCallBackend(CeedSetBackendFunction(ceed, "Vector", vec, "HasBorrowedArrayOfType", CeedVectorHasBorrowedArrayOfType_Hip));
972   CeedCallBackend(CeedSetBackendFunction(ceed, "Vector", vec, "SetArray", CeedVectorSetArray_Hip));
973   CeedCallBackend(CeedSetBackendFunction(ceed, "Vector", vec, "TakeArray", CeedVectorTakeArray_Hip));
974   CeedCallBackend(CeedSetBackendFunction(ceed, "Vector", vec, "CopyStrided", CeedVectorCopyStrided_Hip));
975   CeedCallBackend(CeedSetBackendFunction(ceed, "Vector", vec, "SetValue", CeedVectorSetValue_Hip));
976   CeedCallBackend(CeedSetBackendFunction(ceed, "Vector", vec, "SetValueStrided", CeedVectorSetValueStrided_Hip));
977   CeedCallBackend(CeedSetBackendFunction(ceed, "Vector", vec, "SyncArray", CeedVectorSyncArray_Hip));
978   CeedCallBackend(CeedSetBackendFunction(ceed, "Vector", vec, "GetArray", CeedVectorGetArray_Hip));
979   CeedCallBackend(CeedSetBackendFunction(ceed, "Vector", vec, "GetArrayRead", CeedVectorGetArrayRead_Hip));
980   CeedCallBackend(CeedSetBackendFunction(ceed, "Vector", vec, "GetArrayWrite", CeedVectorGetArrayWrite_Hip));
981   CeedCallBackend(CeedSetBackendFunction(ceed, "Vector", vec, "Norm", CeedVectorNorm_Hip));
982   CeedCallBackend(CeedSetBackendFunction(ceed, "Vector", vec, "Reciprocal", CeedVectorReciprocal_Hip));
983   CeedCallBackend(CeedSetBackendFunction(ceed, "Vector", vec, "Scale", CeedVectorScale_Hip));
984   CeedCallBackend(CeedSetBackendFunction(ceed, "Vector", vec, "AXPY", CeedVectorAXPY_Hip));
985   CeedCallBackend(CeedSetBackendFunction(ceed, "Vector", vec, "AXPBY", CeedVectorAXPBY_Hip));
986   CeedCallBackend(CeedSetBackendFunction(ceed, "Vector", vec, "PointwiseMult", CeedVectorPointwiseMult_Hip));
987   CeedCallBackend(CeedSetBackendFunction(ceed, "Vector", vec, "Destroy", CeedVectorDestroy_Hip));
988   CeedCallBackend(CeedCalloc(1, &impl));
989   CeedCallBackend(CeedGetData(ceed, &hip_impl));
990   CeedCallBackend(CeedDestroy(&ceed));
991   impl->has_unified_addressing = hip_impl->has_unified_addressing;
992   CeedCallBackend(CeedVectorSetData(vec, impl));
993   return CEED_ERROR_SUCCESS;
994 }
995 
996 //------------------------------------------------------------------------------
997