xref: /libCEED/backends/hip-ref/ceed-hip-ref-vector.c (revision d4cc18453651bd0f94c1a2e078b2646a92dafdcc)
1*9ba83ac0SJeremy L Thompson // Copyright (c) 2017-2026, Lawrence Livermore National Security, LLC and other CEED contributors.
23d8e8822SJeremy L Thompson // All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
30d0321e0SJeremy L Thompson //
43d8e8822SJeremy L Thompson // SPDX-License-Identifier: BSD-2-Clause
50d0321e0SJeremy L Thompson //
63d8e8822SJeremy L Thompson // This file is part of CEED:  http://github.com/ceed
70d0321e0SJeremy L Thompson 
849aac155SJeremy L Thompson #include <ceed.h>
90d0321e0SJeremy L Thompson #include <ceed/backend.h>
100d0321e0SJeremy L Thompson #include <math.h>
1149aac155SJeremy L Thompson #include <stdbool.h>
120d0321e0SJeremy L Thompson #include <string.h>
13c85e8640SSebastian Grimberg #include <hip/hip_runtime.h>
140d0321e0SJeremy L Thompson 
1549aac155SJeremy L Thompson #include "../hip/ceed-hip-common.h"
162b730f8bSJeremy L Thompson #include "ceed-hip-ref.h"
17f48ed27dSnbeams 
18f48ed27dSnbeams //------------------------------------------------------------------------------
19f48ed27dSnbeams // Check if host/device sync is needed
20f48ed27dSnbeams //------------------------------------------------------------------------------
CeedVectorNeedSync_Hip(const CeedVector vec,CeedMemType mem_type,bool * need_sync)212b730f8bSJeremy L Thompson static inline int CeedVectorNeedSync_Hip(const CeedVector vec, CeedMemType mem_type, bool *need_sync) {
22f48ed27dSnbeams   CeedVector_Hip *impl;
23f48ed27dSnbeams   bool            has_valid_array = false;
24b7453713SJeremy L Thompson 
25b7453713SJeremy L Thompson   CeedCallBackend(CeedVectorGetData(vec, &impl));
262b730f8bSJeremy L Thompson   CeedCallBackend(CeedVectorHasValidArray(vec, &has_valid_array));
27f48ed27dSnbeams   switch (mem_type) {
28f48ed27dSnbeams     case CEED_MEM_HOST:
29f48ed27dSnbeams       *need_sync = has_valid_array && !impl->h_array;
30f48ed27dSnbeams       break;
31f48ed27dSnbeams     case CEED_MEM_DEVICE:
32f48ed27dSnbeams       *need_sync = has_valid_array && !impl->d_array;
33f48ed27dSnbeams       break;
34f48ed27dSnbeams   }
35f48ed27dSnbeams   return CEED_ERROR_SUCCESS;
36f48ed27dSnbeams }
37f48ed27dSnbeams 
380d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
390d0321e0SJeremy L Thompson // Sync host to device
400d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
CeedVectorSyncH2D_Hip(const CeedVector vec)410d0321e0SJeremy L Thompson static inline int CeedVectorSyncH2D_Hip(const CeedVector vec) {
42b7453713SJeremy L Thompson   CeedSize        length;
43672b0f2aSSebastian Grimberg   size_t          bytes;
440d0321e0SJeremy L Thompson   CeedVector_Hip *impl;
45b7453713SJeremy L Thompson 
462b730f8bSJeremy L Thompson   CeedCallBackend(CeedVectorGetData(vec, &impl));
470d0321e0SJeremy L Thompson 
489bc66399SJeremy L Thompson   CeedCheck(impl->h_array, CeedVectorReturnCeed(vec), CEED_ERROR_BACKEND, "No valid host data to sync to device");
490d0321e0SJeremy L Thompson 
50672b0f2aSSebastian Grimberg   CeedCallBackend(CeedVectorGetLength(vec, &length));
51672b0f2aSSebastian Grimberg   bytes = length * sizeof(CeedScalar);
520d0321e0SJeremy L Thompson   if (impl->d_array_borrowed) {
530d0321e0SJeremy L Thompson     impl->d_array = impl->d_array_borrowed;
540d0321e0SJeremy L Thompson   } else if (impl->d_array_owned) {
550d0321e0SJeremy L Thompson     impl->d_array = impl->d_array_owned;
560d0321e0SJeremy L Thompson   } else {
579bc66399SJeremy L Thompson     CeedCallHip(CeedVectorReturnCeed(vec), hipMalloc((void **)&impl->d_array_owned, bytes));
580d0321e0SJeremy L Thompson     impl->d_array = impl->d_array_owned;
590d0321e0SJeremy L Thompson   }
609bc66399SJeremy L Thompson   CeedCallHip(CeedVectorReturnCeed(vec), hipMemcpy(impl->d_array, impl->h_array, bytes, hipMemcpyHostToDevice));
610d0321e0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
620d0321e0SJeremy L Thompson }
630d0321e0SJeremy L Thompson 
640d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
650d0321e0SJeremy L Thompson // Sync device to host
660d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
CeedVectorSyncD2H_Hip(const CeedVector vec)670d0321e0SJeremy L Thompson static inline int CeedVectorSyncD2H_Hip(const CeedVector vec) {
68b7453713SJeremy L Thompson   CeedSize        length;
69672b0f2aSSebastian Grimberg   size_t          bytes;
700d0321e0SJeremy L Thompson   CeedVector_Hip *impl;
71b7453713SJeremy L Thompson 
722b730f8bSJeremy L Thompson   CeedCallBackend(CeedVectorGetData(vec, &impl));
730d0321e0SJeremy L Thompson 
749bc66399SJeremy L Thompson   CeedCheck(impl->d_array, CeedVectorReturnCeed(vec), CEED_ERROR_BACKEND, "No valid device data to sync to host");
750d0321e0SJeremy L Thompson 
760d0321e0SJeremy L Thompson   if (impl->h_array_borrowed) {
770d0321e0SJeremy L Thompson     impl->h_array = impl->h_array_borrowed;
780d0321e0SJeremy L Thompson   } else if (impl->h_array_owned) {
790d0321e0SJeremy L Thompson     impl->h_array = impl->h_array_owned;
800d0321e0SJeremy L Thompson   } else {
811f9221feSJeremy L Thompson     CeedSize length;
82672b0f2aSSebastian Grimberg 
832b730f8bSJeremy L Thompson     CeedCallBackend(CeedVectorGetLength(vec, &length));
842b730f8bSJeremy L Thompson     CeedCallBackend(CeedCalloc(length, &impl->h_array_owned));
850d0321e0SJeremy L Thompson     impl->h_array = impl->h_array_owned;
860d0321e0SJeremy L Thompson   }
870d0321e0SJeremy L Thompson 
882b730f8bSJeremy L Thompson   CeedCallBackend(CeedVectorGetLength(vec, &length));
89672b0f2aSSebastian Grimberg   bytes = length * sizeof(CeedScalar);
909bc66399SJeremy L Thompson   CeedCallHip(CeedVectorReturnCeed(vec), hipMemcpy(impl->h_array, impl->d_array, bytes, hipMemcpyDeviceToHost));
910d0321e0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
920d0321e0SJeremy L Thompson }
930d0321e0SJeremy L Thompson 
940d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
950d0321e0SJeremy L Thompson // Sync arrays
960d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
CeedVectorSyncArray_Hip(const CeedVector vec,CeedMemType mem_type)972b730f8bSJeremy L Thompson static int CeedVectorSyncArray_Hip(const CeedVector vec, CeedMemType mem_type) {
98f48ed27dSnbeams   bool            need_sync = false;
99a3b195efSJeremy L Thompson   CeedVector_Hip *impl;
100a3b195efSJeremy L Thompson 
101a3b195efSJeremy L Thompson   // Sync for unified memory
102a3b195efSJeremy L Thompson   CeedCallBackend(CeedVectorGetData(vec, &impl));
103a3b195efSJeremy L Thompson   if (impl->has_unified_addressing && !impl->h_array_borrowed) {
104a3b195efSJeremy L Thompson     CeedCallHip(CeedVectorReturnCeed(vec), hipDeviceSynchronize());
105a3b195efSJeremy L Thompson     return CEED_ERROR_SUCCESS;
106a3b195efSJeremy L Thompson   }
107b7453713SJeremy L Thompson 
108b7453713SJeremy L Thompson   // Check whether device/host sync is needed
1092b730f8bSJeremy L Thompson   CeedCallBackend(CeedVectorNeedSync_Hip(vec, mem_type, &need_sync));
1102b730f8bSJeremy L Thompson   if (!need_sync) return CEED_ERROR_SUCCESS;
111f48ed27dSnbeams 
11243c928f4SJeremy L Thompson   switch (mem_type) {
1132b730f8bSJeremy L Thompson     case CEED_MEM_HOST:
1142b730f8bSJeremy L Thompson       return CeedVectorSyncD2H_Hip(vec);
1152b730f8bSJeremy L Thompson     case CEED_MEM_DEVICE:
1162b730f8bSJeremy L Thompson       return CeedVectorSyncH2D_Hip(vec);
1170d0321e0SJeremy L Thompson   }
1180d0321e0SJeremy L Thompson   return CEED_ERROR_UNSUPPORTED;
1190d0321e0SJeremy L Thompson }
1200d0321e0SJeremy L Thompson 
1210d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
1220d0321e0SJeremy L Thompson // Set all pointers as invalid
1230d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
CeedVectorSetAllInvalid_Hip(const CeedVector vec)1240d0321e0SJeremy L Thompson static inline int CeedVectorSetAllInvalid_Hip(const CeedVector vec) {
1250d0321e0SJeremy L Thompson   CeedVector_Hip *impl;
1260d0321e0SJeremy L Thompson 
127b7453713SJeremy L Thompson   CeedCallBackend(CeedVectorGetData(vec, &impl));
1280d0321e0SJeremy L Thompson   impl->h_array = NULL;
1290d0321e0SJeremy L Thompson   impl->d_array = NULL;
1300d0321e0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
1310d0321e0SJeremy L Thompson }
1320d0321e0SJeremy L Thompson 
1330d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
134b2165e7aSSebastian Grimberg // Check if CeedVector has any valid pointer
1350d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
CeedVectorHasValidArray_Hip(const CeedVector vec,bool * has_valid_array)1362b730f8bSJeremy L Thompson static inline int CeedVectorHasValidArray_Hip(const CeedVector vec, bool *has_valid_array) {
1370d0321e0SJeremy L Thompson   CeedVector_Hip *impl;
138b7453713SJeremy L Thompson 
1392b730f8bSJeremy L Thompson   CeedCallBackend(CeedVectorGetData(vec, &impl));
1401c66c397SJeremy L Thompson   *has_valid_array = impl->h_array || impl->d_array;
1410d0321e0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
1420d0321e0SJeremy L Thompson }
1430d0321e0SJeremy L Thompson 
1440d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
145b2165e7aSSebastian Grimberg // Check if has array of given type
1460d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
CeedVectorHasArrayOfType_Hip(const CeedVector vec,CeedMemType mem_type,bool * has_array_of_type)1472b730f8bSJeremy L Thompson static inline int CeedVectorHasArrayOfType_Hip(const CeedVector vec, CeedMemType mem_type, bool *has_array_of_type) {
1480d0321e0SJeremy L Thompson   CeedVector_Hip *impl;
1490d0321e0SJeremy L Thompson 
150b7453713SJeremy L Thompson   CeedCallBackend(CeedVectorGetData(vec, &impl));
15143c928f4SJeremy L Thompson   switch (mem_type) {
1520d0321e0SJeremy L Thompson     case CEED_MEM_HOST:
1531c66c397SJeremy L Thompson       *has_array_of_type = impl->h_array_borrowed || impl->h_array_owned;
1540d0321e0SJeremy L Thompson       break;
1550d0321e0SJeremy L Thompson     case CEED_MEM_DEVICE:
1561c66c397SJeremy L Thompson       *has_array_of_type = impl->d_array_borrowed || impl->d_array_owned;
1570d0321e0SJeremy L Thompson       break;
1580d0321e0SJeremy L Thompson   }
1590d0321e0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
1600d0321e0SJeremy L Thompson }
1610d0321e0SJeremy L Thompson 
1620d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
1630d0321e0SJeremy L Thompson // Check if has borrowed array of given type
1640d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
CeedVectorHasBorrowedArrayOfType_Hip(const CeedVector vec,CeedMemType mem_type,bool * has_borrowed_array_of_type)1652b730f8bSJeremy L Thompson static inline int CeedVectorHasBorrowedArrayOfType_Hip(const CeedVector vec, CeedMemType mem_type, bool *has_borrowed_array_of_type) {
1660d0321e0SJeremy L Thompson   CeedVector_Hip *impl;
1670d0321e0SJeremy L Thompson 
168b7453713SJeremy L Thompson   CeedCallBackend(CeedVectorGetData(vec, &impl));
169a3b195efSJeremy L Thompson 
170a3b195efSJeremy L Thompson   // Use device memory for unified memory
171a3b195efSJeremy L Thompson   mem_type = impl->has_unified_addressing && !impl->h_array_borrowed ? CEED_MEM_DEVICE : mem_type;
172a3b195efSJeremy L Thompson 
17343c928f4SJeremy L Thompson   switch (mem_type) {
1740d0321e0SJeremy L Thompson     case CEED_MEM_HOST:
1751c66c397SJeremy L Thompson       *has_borrowed_array_of_type = impl->h_array_borrowed;
1760d0321e0SJeremy L Thompson       break;
1770d0321e0SJeremy L Thompson     case CEED_MEM_DEVICE:
1781c66c397SJeremy L Thompson       *has_borrowed_array_of_type = impl->d_array_borrowed;
1790d0321e0SJeremy L Thompson       break;
1800d0321e0SJeremy L Thompson   }
1810d0321e0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
1820d0321e0SJeremy L Thompson }
1830d0321e0SJeremy L Thompson 
1840d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
1850d0321e0SJeremy L Thompson // Set array from host
1860d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
CeedVectorSetArrayHost_Hip(const CeedVector vec,const CeedCopyMode copy_mode,CeedScalar * array)1872b730f8bSJeremy L Thompson static int CeedVectorSetArrayHost_Hip(const CeedVector vec, const CeedCopyMode copy_mode, CeedScalar *array) {
188a267acd1SJeremy L Thompson   CeedSize        length;
1890d0321e0SJeremy L Thompson   CeedVector_Hip *impl;
1900d0321e0SJeremy L Thompson 
191b7453713SJeremy L Thompson   CeedCallBackend(CeedVectorGetData(vec, &impl));
192a267acd1SJeremy L Thompson   CeedCallBackend(CeedVectorGetLength(vec, &length));
193a267acd1SJeremy L Thompson 
194f5d1e504SJeremy L Thompson   CeedCallBackend(CeedSetHostCeedScalarArray(array, copy_mode, length, (const CeedScalar **)&impl->h_array_owned,
195f5d1e504SJeremy L Thompson                                              (const CeedScalar **)&impl->h_array_borrowed, (const CeedScalar **)&impl->h_array));
1960d0321e0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
1970d0321e0SJeremy L Thompson }
1980d0321e0SJeremy L Thompson 
1990d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
2000d0321e0SJeremy L Thompson // Set array from device
2010d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
CeedVectorSetArrayDevice_Hip(const CeedVector vec,const CeedCopyMode copy_mode,CeedScalar * array)2022b730f8bSJeremy L Thompson static int CeedVectorSetArrayDevice_Hip(const CeedVector vec, const CeedCopyMode copy_mode, CeedScalar *array) {
203a267acd1SJeremy L Thompson   CeedSize        length;
2040d0321e0SJeremy L Thompson   Ceed            ceed;
2050d0321e0SJeremy L Thompson   CeedVector_Hip *impl;
2060d0321e0SJeremy L Thompson 
207b7453713SJeremy L Thompson   CeedCallBackend(CeedVectorGetCeed(vec, &ceed));
208b7453713SJeremy L Thompson   CeedCallBackend(CeedVectorGetData(vec, &impl));
209a267acd1SJeremy L Thompson   CeedCallBackend(CeedVectorGetLength(vec, &length));
210f5d1e504SJeremy L Thompson 
211f5d1e504SJeremy L Thompson   CeedCallBackend(CeedSetDeviceCeedScalarArray_Hip(ceed, array, copy_mode, length, (const CeedScalar **)&impl->d_array_owned,
212f5d1e504SJeremy L Thompson                                                    (const CeedScalar **)&impl->d_array_borrowed, (const CeedScalar **)&impl->d_array));
2139bc66399SJeremy L Thompson   CeedCallBackend(CeedDestroy(&ceed));
2140d0321e0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
2150d0321e0SJeremy L Thompson }
2160d0321e0SJeremy L Thompson 
2170d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
218a3b195efSJeremy L Thompson // Set array with unified memory
219a3b195efSJeremy L Thompson //------------------------------------------------------------------------------
CeedVectorSetArrayUnifiedHostToDevice_Hip(const CeedVector vec,const CeedCopyMode copy_mode,CeedScalar * array)220a3b195efSJeremy L Thompson static int CeedVectorSetArrayUnifiedHostToDevice_Hip(const CeedVector vec, const CeedCopyMode copy_mode, CeedScalar *array) {
221a3b195efSJeremy L Thompson   CeedSize        length;
222a3b195efSJeremy L Thompson   Ceed            ceed;
223a3b195efSJeremy L Thompson   CeedVector_Hip *impl;
224a3b195efSJeremy L Thompson 
225a3b195efSJeremy L Thompson   CeedCallBackend(CeedVectorGetCeed(vec, &ceed));
226a3b195efSJeremy L Thompson   CeedCallBackend(CeedVectorGetData(vec, &impl));
227a3b195efSJeremy L Thompson   CeedCallBackend(CeedVectorGetLength(vec, &length));
228a3b195efSJeremy L Thompson 
229a3b195efSJeremy L Thompson   switch (copy_mode) {
230a3b195efSJeremy L Thompson     case CEED_COPY_VALUES:
231a3b195efSJeremy L Thompson     case CEED_OWN_POINTER:
232a3b195efSJeremy L Thompson       if (!impl->d_array) {
233a3b195efSJeremy L Thompson         if (impl->d_array_borrowed) {
234a3b195efSJeremy L Thompson           impl->d_array = impl->d_array_borrowed;
235a3b195efSJeremy L Thompson         } else {
236a3b195efSJeremy L Thompson           if (!impl->d_array_owned) CeedCallHip(ceed, hipMalloc((void **)&impl->d_array_owned, sizeof(CeedScalar) * length));
237a3b195efSJeremy L Thompson           impl->d_array = impl->d_array_owned;
238a3b195efSJeremy L Thompson         }
239a3b195efSJeremy L Thompson       }
240a3b195efSJeremy L Thompson       if (array) CeedCallHip(ceed, hipMemcpy(impl->d_array, array, sizeof(CeedScalar) * length, hipMemcpyHostToDevice));
241a3b195efSJeremy L Thompson       if (copy_mode == CEED_OWN_POINTER) CeedCallBackend(CeedFree(&array));
242a3b195efSJeremy L Thompson       break;
243a3b195efSJeremy L Thompson     case CEED_USE_POINTER:
244a3b195efSJeremy L Thompson       CeedCallHip(ceed, hipFree(impl->d_array_owned));
245a3b195efSJeremy L Thompson       CeedCallBackend(CeedFree(&impl->h_array_owned));
246a3b195efSJeremy L Thompson       impl->h_array_owned    = NULL;
247a3b195efSJeremy L Thompson       impl->h_array_borrowed = array;
248a3b195efSJeremy L Thompson       impl->d_array          = impl->h_array_borrowed;
249a3b195efSJeremy L Thompson   }
250a3b195efSJeremy L Thompson   CeedCallBackend(CeedDestroy(&ceed));
251a3b195efSJeremy L Thompson   return CEED_ERROR_SUCCESS;
252a3b195efSJeremy L Thompson }
253a3b195efSJeremy L Thompson 
254a3b195efSJeremy L Thompson //------------------------------------------------------------------------------
2550d0321e0SJeremy L Thompson // Set the array used by a vector,
2560d0321e0SJeremy L Thompson //   freeing any previously allocated array if applicable
2570d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
CeedVectorSetArray_Hip(const CeedVector vec,const CeedMemType mem_type,const CeedCopyMode copy_mode,CeedScalar * array)2582b730f8bSJeremy L Thompson static int CeedVectorSetArray_Hip(const CeedVector vec, const CeedMemType mem_type, const CeedCopyMode copy_mode, CeedScalar *array) {
2590d0321e0SJeremy L Thompson   CeedVector_Hip *impl;
2600d0321e0SJeremy L Thompson 
261b7453713SJeremy L Thompson   CeedCallBackend(CeedVectorGetData(vec, &impl));
2622b730f8bSJeremy L Thompson   CeedCallBackend(CeedVectorSetAllInvalid_Hip(vec));
26343c928f4SJeremy L Thompson   switch (mem_type) {
2640d0321e0SJeremy L Thompson     case CEED_MEM_HOST:
265a3b195efSJeremy L Thompson       if (impl->has_unified_addressing) {
266a3b195efSJeremy L Thompson         return CeedVectorSetArrayUnifiedHostToDevice_Hip(vec, copy_mode, array);
267a3b195efSJeremy L Thompson       } else {
26843c928f4SJeremy L Thompson         return CeedVectorSetArrayHost_Hip(vec, copy_mode, array);
269a3b195efSJeremy L Thompson       }
2700d0321e0SJeremy L Thompson     case CEED_MEM_DEVICE:
27143c928f4SJeremy L Thompson       return CeedVectorSetArrayDevice_Hip(vec, copy_mode, array);
2720d0321e0SJeremy L Thompson   }
2730d0321e0SJeremy L Thompson   return CEED_ERROR_UNSUPPORTED;
2740d0321e0SJeremy L Thompson }
2750d0321e0SJeremy L Thompson 
2760d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
2773196072fSJeremy L Thompson // Copy host array to value strided
2783196072fSJeremy L Thompson //------------------------------------------------------------------------------
CeedHostCopyStrided_Hip(CeedScalar * h_array,CeedSize start,CeedSize stop,CeedSize step,CeedScalar * h_copy_array)279832a6d73SJeremy L Thompson static int CeedHostCopyStrided_Hip(CeedScalar *h_array, CeedSize start, CeedSize stop, CeedSize step, CeedScalar *h_copy_array) {
280832a6d73SJeremy L Thompson   for (CeedSize i = start; i < stop; i += step) h_copy_array[i] = h_array[i];
2813196072fSJeremy L Thompson   return CEED_ERROR_SUCCESS;
2823196072fSJeremy L Thompson }
2833196072fSJeremy L Thompson 
2843196072fSJeremy L Thompson //------------------------------------------------------------------------------
285956a3dbaSJeremy L Thompson // Copy device array to value strided (impl in .hip.cpp file)
2863196072fSJeremy L Thompson //------------------------------------------------------------------------------
287832a6d73SJeremy L Thompson int CeedDeviceCopyStrided_Hip(CeedScalar *d_array, CeedSize start, CeedSize stop, CeedSize step, CeedScalar *d_copy_array);
2883196072fSJeremy L Thompson 
2893196072fSJeremy L Thompson //------------------------------------------------------------------------------
2903196072fSJeremy L Thompson // Copy a vector to a value strided
2913196072fSJeremy L Thompson //------------------------------------------------------------------------------
CeedVectorCopyStrided_Hip(CeedVector vec,CeedSize start,CeedSize stop,CeedSize step,CeedVector vec_copy)292832a6d73SJeremy L Thompson static int CeedVectorCopyStrided_Hip(CeedVector vec, CeedSize start, CeedSize stop, CeedSize step, CeedVector vec_copy) {
2933196072fSJeremy L Thompson   CeedSize        length;
2943196072fSJeremy L Thompson   CeedVector_Hip *impl;
2953196072fSJeremy L Thompson 
2963196072fSJeremy L Thompson   CeedCallBackend(CeedVectorGetData(vec, &impl));
297a7efc114SJeremy L Thompson   {
298a7efc114SJeremy L Thompson     CeedSize length_vec, length_copy;
299a7efc114SJeremy L Thompson 
3005a5594ffSJeremy L Thompson     CeedCallBackend(CeedVectorGetLength(vec, &length_vec));
3015a5594ffSJeremy L Thompson     CeedCallBackend(CeedVectorGetLength(vec_copy, &length_copy));
302a7efc114SJeremy L Thompson     length = length_vec < length_copy ? length_vec : length_copy;
303a7efc114SJeremy L Thompson   }
304832a6d73SJeremy L Thompson   if (stop == -1) stop = length;
3053196072fSJeremy L Thompson   // Set value for synced device/host array
3063196072fSJeremy L Thompson   if (impl->d_array) {
3073196072fSJeremy L Thompson     CeedScalar *copy_array;
308b46df0d2SZach Atkins     Ceed        ceed;
3093196072fSJeremy L Thompson 
310b46df0d2SZach Atkins     CeedCallBackend(CeedVectorGetCeed(vec, &ceed));
3113196072fSJeremy L Thompson     CeedCallBackend(CeedVectorGetArray(vec_copy, CEED_MEM_DEVICE, &copy_array));
312e84c3ebcSJeremy L Thompson #if (HIP_VERSION >= 60000000)
313e84c3ebcSJeremy L Thompson     hipblasHandle_t handle;
3140002d81dSZach Atkins     hipStream_t     stream;
315e84c3ebcSJeremy L Thompson     CeedCallBackend(CeedGetHipblasHandle_Hip(ceed, &handle));
3160002d81dSZach Atkins     CeedCallHipblas(ceed, hipblasGetStream(handle, &stream));
317e84c3ebcSJeremy L Thompson #if defined(CEED_SCALAR_IS_FP32)
318832a6d73SJeremy L Thompson     CeedCallHipblas(ceed, hipblasScopy_64(handle, (int64_t)(stop - start), impl->d_array + start, (int64_t)step, copy_array + start, (int64_t)step));
319e84c3ebcSJeremy L Thompson #else  /* CEED_SCALAR */
320832a6d73SJeremy L Thompson     CeedCallHipblas(ceed, hipblasDcopy_64(handle, (int64_t)(stop - start), impl->d_array + start, (int64_t)step, copy_array + start, (int64_t)step));
321e84c3ebcSJeremy L Thompson #endif /* CEED_SCALAR */
3220002d81dSZach Atkins     CeedCallHip(ceed, hipStreamSynchronize(stream));
323e84c3ebcSJeremy L Thompson #else  /* HIP_VERSION */
324832a6d73SJeremy L Thompson     CeedCallBackend(CeedDeviceCopyStrided_Hip(impl->d_array, start, stop, step, copy_array));
325e84c3ebcSJeremy L Thompson #endif /* HIP_VERSION */
3263196072fSJeremy L Thompson     CeedCallBackend(CeedVectorRestoreArray(vec_copy, &copy_array));
327e84c3ebcSJeremy L Thompson     impl->h_array = NULL;
328e84c3ebcSJeremy L Thompson     CeedCallBackend(CeedDestroy(&ceed));
3293196072fSJeremy L Thompson   } else if (impl->h_array) {
3303196072fSJeremy L Thompson     CeedScalar *copy_array;
3313196072fSJeremy L Thompson 
3323196072fSJeremy L Thompson     CeedCallBackend(CeedVectorGetArray(vec_copy, CEED_MEM_HOST, &copy_array));
333832a6d73SJeremy L Thompson     CeedCallBackend(CeedHostCopyStrided_Hip(impl->h_array, start, stop, step, copy_array));
3343196072fSJeremy L Thompson     CeedCallBackend(CeedVectorRestoreArray(vec_copy, &copy_array));
335e84c3ebcSJeremy L Thompson     impl->d_array = NULL;
3363196072fSJeremy L Thompson   } else {
3373196072fSJeremy L Thompson     return CeedError(CeedVectorReturnCeed(vec), CEED_ERROR_BACKEND, "CeedVector must have valid data set");
3383196072fSJeremy L Thompson   }
3393196072fSJeremy L Thompson   return CEED_ERROR_SUCCESS;
3403196072fSJeremy L Thompson }
3413196072fSJeremy L Thompson 
3423196072fSJeremy L Thompson //------------------------------------------------------------------------------
3430d0321e0SJeremy L Thompson // Set host array to value
3440d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
CeedHostSetValue_Hip(CeedScalar * h_array,CeedSize length,CeedScalar val)3459330daecSnbeams static int CeedHostSetValue_Hip(CeedScalar *h_array, CeedSize length, CeedScalar val) {
3469330daecSnbeams   for (CeedSize i = 0; i < length; i++) h_array[i] = val;
3470d0321e0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
3480d0321e0SJeremy L Thompson }
3490d0321e0SJeremy L Thompson 
3500d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
3510d0321e0SJeremy L Thompson // Set device array to value (impl in .hip file)
3520d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
3539330daecSnbeams int CeedDeviceSetValue_Hip(CeedScalar *d_array, CeedSize length, CeedScalar val);
3540d0321e0SJeremy L Thompson 
3550d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
356b2165e7aSSebastian Grimberg // Set a vector to a value
3570d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
CeedVectorSetValue_Hip(CeedVector vec,CeedScalar val)3580d0321e0SJeremy L Thompson static int CeedVectorSetValue_Hip(CeedVector vec, CeedScalar val) {
3591f9221feSJeremy L Thompson   CeedSize        length;
360b7453713SJeremy L Thompson   CeedVector_Hip *impl;
361a3b195efSJeremy L Thompson   Ceed_Hip       *hip_data;
3620d0321e0SJeremy L Thompson 
363b7453713SJeremy L Thompson   CeedCallBackend(CeedVectorGetData(vec, &impl));
364a3b195efSJeremy L Thompson   CeedCallBackend(CeedGetData(CeedVectorReturnCeed(vec), &hip_data));
365b7453713SJeremy L Thompson   CeedCallBackend(CeedVectorGetLength(vec, &length));
3660d0321e0SJeremy L Thompson   // Set value for synced device/host array
3670d0321e0SJeremy L Thompson   if (!impl->d_array && !impl->h_array) {
3680d0321e0SJeremy L Thompson     if (impl->d_array_borrowed) {
3690d0321e0SJeremy L Thompson       impl->d_array = impl->d_array_borrowed;
3700d0321e0SJeremy L Thompson     } else if (impl->h_array_borrowed) {
3710d0321e0SJeremy L Thompson       impl->h_array = impl->h_array_borrowed;
3720d0321e0SJeremy L Thompson     } else if (impl->d_array_owned) {
3730d0321e0SJeremy L Thompson       impl->d_array = impl->d_array_owned;
3740d0321e0SJeremy L Thompson     } else if (impl->h_array_owned) {
3750d0321e0SJeremy L Thompson       impl->h_array = impl->h_array_owned;
3760d0321e0SJeremy L Thompson     } else {
3772b730f8bSJeremy L Thompson       CeedCallBackend(CeedVectorSetArray(vec, CEED_MEM_DEVICE, CEED_COPY_VALUES, NULL));
3780d0321e0SJeremy L Thompson     }
3790d0321e0SJeremy L Thompson   }
3800d0321e0SJeremy L Thompson   if (impl->d_array) {
381a3b195efSJeremy L Thompson     if (val == 0 && !impl->h_array_borrowed) {
382124cc107SJeremy L Thompson       CeedCallHip(CeedVectorReturnCeed(vec), hipMemset(impl->d_array, 0, length * sizeof(CeedScalar)));
383124cc107SJeremy L Thompson     } else {
3842b730f8bSJeremy L Thompson       CeedCallBackend(CeedDeviceSetValue_Hip(impl->d_array, length, val));
3850d0321e0SJeremy L Thompson     }
386124cc107SJeremy L Thompson     impl->h_array = NULL;
387124cc107SJeremy L Thompson   } else if (impl->h_array) {
3882b730f8bSJeremy L Thompson     CeedCallBackend(CeedHostSetValue_Hip(impl->h_array, length, val));
389b2165e7aSSebastian Grimberg     impl->d_array = NULL;
3900d0321e0SJeremy L Thompson   }
3910d0321e0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
3920d0321e0SJeremy L Thompson }
3930d0321e0SJeremy L Thompson 
3940d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
3953196072fSJeremy L Thompson // Set host array to value strided
3963196072fSJeremy L Thompson //------------------------------------------------------------------------------
CeedHostSetValueStrided_Hip(CeedScalar * h_array,CeedSize start,CeedSize stop,CeedSize step,CeedScalar val)39714c82621SJeremy L Thompson static int CeedHostSetValueStrided_Hip(CeedScalar *h_array, CeedSize start, CeedSize stop, CeedSize step, CeedScalar val) {
3982d73a370SJeremy L Thompson   for (CeedSize i = start; i < stop; i += step) h_array[i] = val;
3993196072fSJeremy L Thompson   return CEED_ERROR_SUCCESS;
4003196072fSJeremy L Thompson }
4013196072fSJeremy L Thompson 
4023196072fSJeremy L Thompson //------------------------------------------------------------------------------
403956a3dbaSJeremy L Thompson // Set device array to value strided (impl in .hip.cpp file)
4043196072fSJeremy L Thompson //------------------------------------------------------------------------------
40514c82621SJeremy L Thompson int CeedDeviceSetValueStrided_Hip(CeedScalar *d_array, CeedSize start, CeedSize stop, CeedSize step, CeedScalar val);
4063196072fSJeremy L Thompson 
4073196072fSJeremy L Thompson //------------------------------------------------------------------------------
4083196072fSJeremy L Thompson // Set a vector to a value strided
4093196072fSJeremy L Thompson //------------------------------------------------------------------------------
CeedVectorSetValueStrided_Hip(CeedVector vec,CeedSize start,CeedSize stop,CeedSize step,CeedScalar val)410ff90b007SJeremy L Thompson static int CeedVectorSetValueStrided_Hip(CeedVector vec, CeedSize start, CeedSize stop, CeedSize step, CeedScalar val) {
4113196072fSJeremy L Thompson   CeedSize        length;
4123196072fSJeremy L Thompson   CeedVector_Hip *impl;
4133196072fSJeremy L Thompson 
4143196072fSJeremy L Thompson   CeedCallBackend(CeedVectorGetData(vec, &impl));
4153196072fSJeremy L Thompson   CeedCallBackend(CeedVectorGetLength(vec, &length));
4163196072fSJeremy L Thompson   // Set value for synced device/host array
417ff90b007SJeremy L Thompson   if (stop == -1) stop = length;
4183196072fSJeremy L Thompson   if (impl->d_array) {
41914c82621SJeremy L Thompson     CeedCallBackend(CeedDeviceSetValueStrided_Hip(impl->d_array, start, stop, step, val));
4203196072fSJeremy L Thompson     impl->h_array = NULL;
4213196072fSJeremy L Thompson   } else if (impl->h_array) {
42214c82621SJeremy L Thompson     CeedCallBackend(CeedHostSetValueStrided_Hip(impl->h_array, start, stop, step, val));
4233196072fSJeremy L Thompson     impl->d_array = NULL;
4243196072fSJeremy L Thompson   } else {
4253196072fSJeremy L Thompson     return CeedError(CeedVectorReturnCeed(vec), CEED_ERROR_BACKEND, "CeedVector must have valid data set");
4263196072fSJeremy L Thompson   }
4273196072fSJeremy L Thompson   return CEED_ERROR_SUCCESS;
4283196072fSJeremy L Thompson }
4293196072fSJeremy L Thompson 
4303196072fSJeremy L Thompson //------------------------------------------------------------------------------
4310d0321e0SJeremy L Thompson // Vector Take Array
4320d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
CeedVectorTakeArray_Hip(CeedVector vec,CeedMemType mem_type,CeedScalar ** array)4332b730f8bSJeremy L Thompson static int CeedVectorTakeArray_Hip(CeedVector vec, CeedMemType mem_type, CeedScalar **array) {
4340d0321e0SJeremy L Thompson   CeedVector_Hip *impl;
435b7453713SJeremy L Thompson 
4362b730f8bSJeremy L Thompson   CeedCallBackend(CeedVectorGetData(vec, &impl));
4370d0321e0SJeremy L Thompson 
43843c928f4SJeremy L Thompson   // Sync array to requested mem_type
4392b730f8bSJeremy L Thompson   CeedCallBackend(CeedVectorSyncArray(vec, mem_type));
4400d0321e0SJeremy L Thompson 
4410d0321e0SJeremy L Thompson   // Update pointer
44243c928f4SJeremy L Thompson   switch (mem_type) {
4430d0321e0SJeremy L Thompson     case CEED_MEM_HOST:
4440d0321e0SJeremy L Thompson       (*array)               = impl->h_array_borrowed;
4450d0321e0SJeremy L Thompson       impl->h_array_borrowed = NULL;
4460d0321e0SJeremy L Thompson       impl->h_array          = NULL;
4470d0321e0SJeremy L Thompson       break;
4480d0321e0SJeremy L Thompson     case CEED_MEM_DEVICE:
4490d0321e0SJeremy L Thompson       (*array)               = impl->d_array_borrowed;
4500d0321e0SJeremy L Thompson       impl->d_array_borrowed = NULL;
4510d0321e0SJeremy L Thompson       impl->d_array          = NULL;
4520d0321e0SJeremy L Thompson       break;
4530d0321e0SJeremy L Thompson   }
4540d0321e0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
4550d0321e0SJeremy L Thompson }
4560d0321e0SJeremy L Thompson 
4570d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
458a3b195efSJeremy L Thompson // Core logic for array synchronization for GetArray.
4590d0321e0SJeremy L Thompson //   If a different memory type is most up to date, this will perform a copy
4600d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
CeedVectorGetArrayCore_Hip(const CeedVector vec,CeedMemType mem_type,CeedScalar ** array)461a3b195efSJeremy L Thompson static int CeedVectorGetArrayCore_Hip(const CeedVector vec, CeedMemType mem_type, CeedScalar **array) {
4620d0321e0SJeremy L Thompson   CeedVector_Hip *impl;
463b7453713SJeremy L Thompson 
4642b730f8bSJeremy L Thompson   CeedCallBackend(CeedVectorGetData(vec, &impl));
4650d0321e0SJeremy L Thompson 
466a3b195efSJeremy L Thompson   // Use device memory for unified memory
467a3b195efSJeremy L Thompson   mem_type = impl->has_unified_addressing && !impl->h_array_borrowed ? CEED_MEM_DEVICE : mem_type;
468a3b195efSJeremy L Thompson 
46943c928f4SJeremy L Thompson   // Sync array to requested mem_type
4702b730f8bSJeremy L Thompson   CeedCallBackend(CeedVectorSyncArray(vec, mem_type));
4710d0321e0SJeremy L Thompson 
4720d0321e0SJeremy L Thompson   // Update pointer
47343c928f4SJeremy L Thompson   switch (mem_type) {
4740d0321e0SJeremy L Thompson     case CEED_MEM_HOST:
4750d0321e0SJeremy L Thompson       *array = impl->h_array;
4760d0321e0SJeremy L Thompson       break;
4770d0321e0SJeremy L Thompson     case CEED_MEM_DEVICE:
4780d0321e0SJeremy L Thompson       *array = impl->d_array;
4790d0321e0SJeremy L Thompson       break;
4800d0321e0SJeremy L Thompson   }
4810d0321e0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
4820d0321e0SJeremy L Thompson }
4830d0321e0SJeremy L Thompson 
4840d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
48543c928f4SJeremy L Thompson // Get read-only access to a vector via the specified mem_type
4860d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
CeedVectorGetArrayRead_Hip(const CeedVector vec,const CeedMemType mem_type,const CeedScalar ** array)4872b730f8bSJeremy L Thompson static int CeedVectorGetArrayRead_Hip(const CeedVector vec, const CeedMemType mem_type, const CeedScalar **array) {
48843c928f4SJeremy L Thompson   return CeedVectorGetArrayCore_Hip(vec, mem_type, (CeedScalar **)array);
4890d0321e0SJeremy L Thompson }
4900d0321e0SJeremy L Thompson 
4910d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
49243c928f4SJeremy L Thompson // Get read/write access to a vector via the specified mem_type
4930d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
CeedVectorGetArray_Hip(const CeedVector vec,CeedMemType mem_type,CeedScalar ** array)494a3b195efSJeremy L Thompson static int CeedVectorGetArray_Hip(const CeedVector vec, CeedMemType mem_type, CeedScalar **array) {
4950d0321e0SJeremy L Thompson   CeedVector_Hip *impl;
496b7453713SJeremy L Thompson 
4972b730f8bSJeremy L Thompson   CeedCallBackend(CeedVectorGetData(vec, &impl));
498a3b195efSJeremy L Thompson 
499a3b195efSJeremy L Thompson   // Use device memory for unified memory
500a3b195efSJeremy L Thompson   mem_type = impl->has_unified_addressing && !impl->h_array_borrowed ? CEED_MEM_DEVICE : mem_type;
501a3b195efSJeremy L Thompson 
502a3b195efSJeremy L Thompson   // 'Get' array and set only 'get'ed array as valid
5032b730f8bSJeremy L Thompson   CeedCallBackend(CeedVectorGetArrayCore_Hip(vec, mem_type, array));
5042b730f8bSJeremy L Thompson   CeedCallBackend(CeedVectorSetAllInvalid_Hip(vec));
50543c928f4SJeremy L Thompson   switch (mem_type) {
5060d0321e0SJeremy L Thompson     case CEED_MEM_HOST:
5070d0321e0SJeremy L Thompson       impl->h_array = *array;
508a3b195efSJeremy L Thompson       if (impl->has_unified_addressing) impl->d_array = *array;
5090d0321e0SJeremy L Thompson       break;
5100d0321e0SJeremy L Thompson     case CEED_MEM_DEVICE:
5110d0321e0SJeremy L Thompson       impl->d_array = *array;
5120d0321e0SJeremy L Thompson       break;
5130d0321e0SJeremy L Thompson   }
5140d0321e0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
5150d0321e0SJeremy L Thompson }
5160d0321e0SJeremy L Thompson 
5170d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
51843c928f4SJeremy L Thompson // Get write access to a vector via the specified mem_type
5190d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
CeedVectorGetArrayWrite_Hip(const CeedVector vec,CeedMemType mem_type,CeedScalar ** array)520a3b195efSJeremy L Thompson static int CeedVectorGetArrayWrite_Hip(const CeedVector vec, CeedMemType mem_type, CeedScalar **array) {
5210d0321e0SJeremy L Thompson   bool            has_array_of_type = true;
522b7453713SJeremy L Thompson   CeedVector_Hip *impl;
523a3b195efSJeremy L Thompson   Ceed_Hip       *hip_data;
524b7453713SJeremy L Thompson 
525b7453713SJeremy L Thompson   CeedCallBackend(CeedVectorGetData(vec, &impl));
526a3b195efSJeremy L Thompson   CeedCallBackend(CeedGetData(CeedVectorReturnCeed(vec), &hip_data));
527a3b195efSJeremy L Thompson 
528a3b195efSJeremy L Thompson   // Use device memory for unified memory
529a3b195efSJeremy L Thompson   mem_type = impl->has_unified_addressing && !impl->h_array_borrowed ? CEED_MEM_DEVICE : mem_type;
530a3b195efSJeremy L Thompson 
5312b730f8bSJeremy L Thompson   CeedCallBackend(CeedVectorHasArrayOfType_Hip(vec, mem_type, &has_array_of_type));
5320d0321e0SJeremy L Thompson   if (!has_array_of_type) {
5330d0321e0SJeremy L Thompson     // Allocate if array is not yet allocated
5342b730f8bSJeremy L Thompson     CeedCallBackend(CeedVectorSetArray(vec, mem_type, CEED_COPY_VALUES, NULL));
5350d0321e0SJeremy L Thompson   } else {
5360d0321e0SJeremy L Thompson     // Select dirty array
53743c928f4SJeremy L Thompson     switch (mem_type) {
5380d0321e0SJeremy L Thompson       case CEED_MEM_HOST:
5392b730f8bSJeremy L Thompson         if (impl->h_array_borrowed) impl->h_array = impl->h_array_borrowed;
5402b730f8bSJeremy L Thompson         else impl->h_array = impl->h_array_owned;
5410d0321e0SJeremy L Thompson         break;
5420d0321e0SJeremy L Thompson       case CEED_MEM_DEVICE:
5432b730f8bSJeremy L Thompson         if (impl->d_array_borrowed) impl->d_array = impl->d_array_borrowed;
5442b730f8bSJeremy L Thompson         else impl->d_array = impl->d_array_owned;
5450d0321e0SJeremy L Thompson     }
5460d0321e0SJeremy L Thompson   }
54743c928f4SJeremy L Thompson   return CeedVectorGetArray_Hip(vec, mem_type, array);
5480d0321e0SJeremy L Thompson }
5490d0321e0SJeremy L Thompson 
5500d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
5510d0321e0SJeremy L Thompson // Get the norm of a CeedVector
5520d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
CeedVectorNorm_Hip(CeedVector vec,CeedNormType type,CeedScalar * norm)5532b730f8bSJeremy L Thompson static int CeedVectorNorm_Hip(CeedVector vec, CeedNormType type, CeedScalar *norm) {
5540d0321e0SJeremy L Thompson   Ceed     ceed;
555e84c3ebcSJeremy L Thompson   CeedSize length;
556e84c3ebcSJeremy L Thompson #if (HIP_VERSION < 60000000)
557e84c3ebcSJeremy L Thompson   CeedSize num_calls;
558e84c3ebcSJeremy L Thompson #endif /* HIP_VERSION */
559b7453713SJeremy L Thompson   const CeedScalar *d_array;
560b7453713SJeremy L Thompson   CeedVector_Hip   *impl;
5610d0321e0SJeremy L Thompson   hipblasHandle_t   handle;
5620002d81dSZach Atkins   hipStream_t       stream;
563a3b195efSJeremy L Thompson   Ceed_Hip         *hip_data;
564b7453713SJeremy L Thompson 
565b7453713SJeremy L Thompson   CeedCallBackend(CeedVectorGetCeed(vec, &ceed));
566a3b195efSJeremy L Thompson   CeedCallBackend(CeedGetData(ceed, &hip_data));
567b7453713SJeremy L Thompson   CeedCallBackend(CeedVectorGetData(vec, &impl));
568b7453713SJeremy L Thompson   CeedCallBackend(CeedVectorGetLength(vec, &length));
569eb7e6cafSJeremy L Thompson   CeedCallBackend(CeedGetHipblasHandle_Hip(ceed, &handle));
5700002d81dSZach Atkins   CeedCallHipblas(ceed, hipblasGetStream(handle, &stream));
571e84c3ebcSJeremy L Thompson #if (HIP_VERSION < 60000000)
572e84c3ebcSJeremy L Thompson   // With ROCm 6, we can use the 64-bit integer interface. Prior to that,
573e84c3ebcSJeremy L Thompson   // we need to check if the vector is too long to handle with int32,
574e84c3ebcSJeremy L Thompson   // and if so, divide it into subsections for repeated hipBLAS calls.
575672b0f2aSSebastian Grimberg   num_calls = length / INT_MAX;
5769330daecSnbeams   if (length % INT_MAX > 0) num_calls += 1;
577e84c3ebcSJeremy L Thompson #endif /* HIP_VERSION */
5789330daecSnbeams 
5790d0321e0SJeremy L Thompson   // Compute norm
5802b730f8bSJeremy L Thompson   CeedCallBackend(CeedVectorGetArrayRead(vec, CEED_MEM_DEVICE, &d_array));
5810d0321e0SJeremy L Thompson   switch (type) {
5820d0321e0SJeremy L Thompson     case CEED_NORM_1: {
583f6f49adbSnbeams       *norm = 0.0;
584e84c3ebcSJeremy L Thompson #if defined(CEED_SCALAR_IS_FP32)
585e84c3ebcSJeremy L Thompson #if (HIP_VERSION >= 60000000)  // We have ROCm 6, and can use 64-bit integers
586e84c3ebcSJeremy L Thompson       CeedCallHipblas(ceed, hipblasSasum_64(handle, (int64_t)length, (float *)d_array, 1, (float *)norm));
5870002d81dSZach Atkins       CeedCallHip(ceed, hipStreamSynchronize(stream));
588e84c3ebcSJeremy L Thompson #else  /* HIP_VERSION */
5899330daecSnbeams       float  sub_norm = 0.0;
5909330daecSnbeams       float *d_array_start;
591b7453713SJeremy L Thompson 
5929330daecSnbeams       for (CeedInt i = 0; i < num_calls; i++) {
5939330daecSnbeams         d_array_start             = (float *)d_array + (CeedSize)(i)*INT_MAX;
5949330daecSnbeams         CeedSize remaining_length = length - (CeedSize)(i)*INT_MAX;
5959330daecSnbeams         CeedInt  sub_length       = (i == num_calls - 1) ? (CeedInt)(remaining_length) : INT_MAX;
596b7453713SJeremy L Thompson 
597a3b195efSJeremy L Thompson         CeedCallHipblas(ceed, hipblasSasum(handle, (CeedInt)sub_length, (float *)d_array_start, 1, &sub_norm));
5980002d81dSZach Atkins         CeedCallHip(ceed, hipStreamSynchronize(stream));
5999330daecSnbeams         *norm += sub_norm;
6009330daecSnbeams       }
601e84c3ebcSJeremy L Thompson #endif /* HIP_VERSION */
602e84c3ebcSJeremy L Thompson #else  /* CEED_SCALAR */
603e84c3ebcSJeremy L Thompson #if (HIP_VERSION >= 60000000)
604e84c3ebcSJeremy L Thompson       CeedCallHipblas(ceed, hipblasDasum_64(handle, (int64_t)length, (double *)d_array, 1, (double *)norm));
6050002d81dSZach Atkins       CeedCallHip(ceed, hipStreamSynchronize(stream));
606e84c3ebcSJeremy L Thompson #else  /* HIP_VERSION */
6079330daecSnbeams       double  sub_norm = 0.0;
6089330daecSnbeams       double *d_array_start;
609b7453713SJeremy L Thompson 
6109330daecSnbeams       for (CeedInt i = 0; i < num_calls; i++) {
6119330daecSnbeams         d_array_start             = (double *)d_array + (CeedSize)(i)*INT_MAX;
6129330daecSnbeams         CeedSize remaining_length = length - (CeedSize)(i)*INT_MAX;
6139330daecSnbeams         CeedInt  sub_length       = (i == num_calls - 1) ? (CeedInt)(remaining_length) : INT_MAX;
614b7453713SJeremy L Thompson 
6159330daecSnbeams         CeedCallHipblas(ceed, hipblasDasum(handle, (CeedInt)sub_length, (double *)d_array_start, 1, &sub_norm));
6160002d81dSZach Atkins         CeedCallHip(ceed, hipStreamSynchronize(stream));
6179330daecSnbeams         *norm += sub_norm;
6189330daecSnbeams       }
619e84c3ebcSJeremy L Thompson #endif /* HIP_VERSION */
620e84c3ebcSJeremy L Thompson #endif /* CEED_SCALAR */
6210d0321e0SJeremy L Thompson       break;
6220d0321e0SJeremy L Thompson     }
6230d0321e0SJeremy L Thompson     case CEED_NORM_2: {
624e84c3ebcSJeremy L Thompson #if defined(CEED_SCALAR_IS_FP32)
625e84c3ebcSJeremy L Thompson #if (HIP_VERSION >= 60000000)
626e84c3ebcSJeremy L Thompson       CeedCallHipblas(ceed, hipblasSnrm2_64(handle, (int64_t)length, (float *)d_array, 1, (float *)norm));
6270002d81dSZach Atkins       CeedCallHip(ceed, hipStreamSynchronize(stream));
628a3b195efSJeremy L Thompson #else  /* HIP_VERSION */
6299330daecSnbeams       float  sub_norm = 0.0, norm_sum = 0.0;
6309330daecSnbeams       float *d_array_start;
631b7453713SJeremy L Thompson 
6329330daecSnbeams       for (CeedInt i = 0; i < num_calls; i++) {
6339330daecSnbeams         d_array_start             = (float *)d_array + (CeedSize)(i)*INT_MAX;
6349330daecSnbeams         CeedSize remaining_length = length - (CeedSize)(i)*INT_MAX;
6359330daecSnbeams         CeedInt  sub_length       = (i == num_calls - 1) ? (CeedInt)(remaining_length) : INT_MAX;
636b7453713SJeremy L Thompson 
6379330daecSnbeams         CeedCallHipblas(ceed, hipblasSnrm2(handle, (CeedInt)sub_length, (float *)d_array_start, 1, &sub_norm));
6380002d81dSZach Atkins         CeedCallHip(ceed, hipStreamSynchronize(stream));
6399330daecSnbeams         norm_sum += sub_norm * sub_norm;
6409330daecSnbeams       }
6419330daecSnbeams       *norm = sqrt(norm_sum);
642e84c3ebcSJeremy L Thompson #endif /* HIP_VERSION */
643e84c3ebcSJeremy L Thompson #else  /* CEED_SCALAR */
644e84c3ebcSJeremy L Thompson #if (HIP_VERSION >= 60000000)
645e84c3ebcSJeremy L Thompson       CeedCallHipblas(ceed, hipblasDnrm2_64(handle, (int64_t)length, (double *)d_array, 1, (double *)norm));
6460002d81dSZach Atkins       CeedCallHip(ceed, hipStreamSynchronize(stream));
647a3b195efSJeremy L Thompson #else  /* HIP_VERSION */
6489330daecSnbeams       double  sub_norm = 0.0, norm_sum = 0.0;
6499330daecSnbeams       double *d_array_start;
650b7453713SJeremy L Thompson 
6519330daecSnbeams       for (CeedInt i = 0; i < num_calls; i++) {
6529330daecSnbeams         d_array_start             = (double *)d_array + (CeedSize)(i)*INT_MAX;
6539330daecSnbeams         CeedSize remaining_length = length - (CeedSize)(i)*INT_MAX;
6549330daecSnbeams         CeedInt  sub_length       = (i == num_calls - 1) ? (CeedInt)(remaining_length) : INT_MAX;
655b7453713SJeremy L Thompson 
6569330daecSnbeams         CeedCallHipblas(ceed, hipblasDnrm2(handle, (CeedInt)sub_length, (double *)d_array_start, 1, &sub_norm));
6570002d81dSZach Atkins         CeedCallHip(ceed, hipStreamSynchronize(stream));
6589330daecSnbeams         norm_sum += sub_norm * sub_norm;
6599330daecSnbeams       }
6609330daecSnbeams       *norm = sqrt(norm_sum);
661e84c3ebcSJeremy L Thompson #endif /* HIP_VERSION */
662e84c3ebcSJeremy L Thompson #endif /* CEED_SCALAR */
6630d0321e0SJeremy L Thompson       break;
6640d0321e0SJeremy L Thompson     }
6650d0321e0SJeremy L Thompson     case CEED_NORM_MAX: {
666e84c3ebcSJeremy L Thompson #if defined(CEED_SCALAR_IS_FP32)
667e84c3ebcSJeremy L Thompson #if (HIP_VERSION >= 60000000)
668e84c3ebcSJeremy L Thompson       int64_t    index;
669e84c3ebcSJeremy L Thompson       CeedScalar norm_no_abs;
670b7453713SJeremy L Thompson 
671e84c3ebcSJeremy L Thompson       CeedCallHipblas(ceed, hipblasIsamax_64(handle, (int64_t)length, (float *)d_array, 1, &index));
6720002d81dSZach Atkins       CeedCallHip(ceed, hipMemcpyAsync(&norm_no_abs, impl->d_array + index - 1, sizeof(CeedScalar), hipMemcpyDeviceToHost, stream));
6730002d81dSZach Atkins       CeedCallHip(ceed, hipStreamSynchronize(stream));
674e84c3ebcSJeremy L Thompson       *norm = fabs(norm_no_abs);
675e84c3ebcSJeremy L Thompson #else  /* HIP_VERSION */
676e84c3ebcSJeremy L Thompson       CeedInt index;
6779330daecSnbeams       float   sub_max = 0.0, current_max = 0.0;
6789330daecSnbeams       float  *d_array_start;
679e84c3ebcSJeremy L Thompson 
6809330daecSnbeams       for (CeedInt i = 0; i < num_calls; i++) {
6819330daecSnbeams         d_array_start             = (float *)d_array + (CeedSize)(i)*INT_MAX;
6829330daecSnbeams         CeedSize remaining_length = length - (CeedSize)(i)*INT_MAX;
6839330daecSnbeams         CeedInt  sub_length       = (i == num_calls - 1) ? (CeedInt)(remaining_length) : INT_MAX;
684b7453713SJeremy L Thompson 
685b7453713SJeremy L Thompson         CeedCallHipblas(ceed, hipblasIsamax(handle, (CeedInt)sub_length, (float *)d_array_start, 1, &index));
686a3b195efSJeremy L Thompson         if (hip_data->has_unified_addressing) {
6870002d81dSZach Atkins           CeedCallHip(ceed, hipStreamSynchronize(stream));
688a3b195efSJeremy L Thompson           sub_max = fabs(d_array[index - 1]);
689a3b195efSJeremy L Thompson         } else {
6900002d81dSZach Atkins           CeedCallHip(ceed, hipMemcpyAsync(&sub_max, d_array_start + index - 1, sizeof(CeedScalar), hipMemcpyDeviceToHost, stream));
6910002d81dSZach Atkins           CeedCallHip(ceed, hipStreamSynchronize(stream));
692a3b195efSJeremy L Thompson         }
6939330daecSnbeams         if (fabs(sub_max) > current_max) current_max = fabs(sub_max);
6949330daecSnbeams       }
6959330daecSnbeams       *norm = current_max;
696e84c3ebcSJeremy L Thompson #endif /* HIP_VERSION */
697e84c3ebcSJeremy L Thompson #else  /* CEED_SCALAR */
698e84c3ebcSJeremy L Thompson #if (HIP_VERSION >= 60000000)
699e84c3ebcSJeremy L Thompson       int64_t    index;
700e84c3ebcSJeremy L Thompson       CeedScalar norm_no_abs;
701e84c3ebcSJeremy L Thompson 
702e84c3ebcSJeremy L Thompson       CeedCallHipblas(ceed, hipblasIdamax_64(handle, (int64_t)length, (double *)d_array, 1, &index));
703a3b195efSJeremy L Thompson       if (hip_data->has_unified_addressing) {
7040002d81dSZach Atkins         CeedCallHip(ceed, hipStreamSynchronize(stream));
705a3b195efSJeremy L Thompson         norm_no_abs = fabs(d_array[index - 1]);
706a3b195efSJeremy L Thompson       } else {
7070002d81dSZach Atkins         CeedCallHip(ceed, hipMemcpyAsync(&norm_no_abs, impl->d_array + index - 1, sizeof(CeedScalar), hipMemcpyDeviceToHost, stream));
7080002d81dSZach Atkins         CeedCallHip(ceed, hipStreamSynchronize(stream));
709a3b195efSJeremy L Thompson       }
710e84c3ebcSJeremy L Thompson       *norm = fabs(norm_no_abs);
711e84c3ebcSJeremy L Thompson #else  /* HIP_VERSION */
712e84c3ebcSJeremy L Thompson       CeedInt index;
7139330daecSnbeams       double  sub_max = 0.0, current_max = 0.0;
7149330daecSnbeams       double *d_array_start;
715b7453713SJeremy L Thompson 
7169330daecSnbeams       for (CeedInt i = 0; i < num_calls; i++) {
7179330daecSnbeams         d_array_start             = (double *)d_array + (CeedSize)(i)*INT_MAX;
7189330daecSnbeams         CeedSize remaining_length = length - (CeedSize)(i)*INT_MAX;
7199330daecSnbeams         CeedInt  sub_length       = (i == num_calls - 1) ? (CeedInt)(remaining_length) : INT_MAX;
720b7453713SJeremy L Thompson 
721b7453713SJeremy L Thompson         CeedCallHipblas(ceed, hipblasIdamax(handle, (CeedInt)sub_length, (double *)d_array_start, 1, &index));
722a3b195efSJeremy L Thompson         if (hip_data->has_unified_addressing) {
7230002d81dSZach Atkins           CeedCallHip(ceed, hipStreamSynchronize(stream));
724a3b195efSJeremy L Thompson           sub_max = fabs(d_array[index - 1]);
725a3b195efSJeremy L Thompson         } else {
7260002d81dSZach Atkins           CeedCallHip(ceed, hipMemcpyAsync(&sub_max, d_array_start + index - 1, sizeof(CeedScalar), hipMemcpyDeviceToHost, stream));
7270002d81dSZach Atkins           CeedCallHip(ceed, hipStreamSynchronize(stream));
728a3b195efSJeremy L Thompson         }
7299330daecSnbeams         if (fabs(sub_max) > current_max) current_max = fabs(sub_max);
7309330daecSnbeams       }
7319330daecSnbeams       *norm = current_max;
732e84c3ebcSJeremy L Thompson #endif /* HIP_VERSION */
733e84c3ebcSJeremy L Thompson #endif /* CEED_SCALAR */
7340d0321e0SJeremy L Thompson       break;
7350d0321e0SJeremy L Thompson     }
7360d0321e0SJeremy L Thompson   }
7372b730f8bSJeremy L Thompson   CeedCallBackend(CeedVectorRestoreArrayRead(vec, &d_array));
7389bc66399SJeremy L Thompson   CeedCallBackend(CeedDestroy(&ceed));
7390d0321e0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
7400d0321e0SJeremy L Thompson }
7410d0321e0SJeremy L Thompson 
7420d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
7430d0321e0SJeremy L Thompson // Take reciprocal of a vector on host
7440d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
CeedHostReciprocal_Hip(CeedScalar * h_array,CeedSize length)7459330daecSnbeams static int CeedHostReciprocal_Hip(CeedScalar *h_array, CeedSize length) {
7469330daecSnbeams   for (CeedSize i = 0; i < length; i++) {
7472b730f8bSJeremy L Thompson     if (fabs(h_array[i]) > CEED_EPSILON) h_array[i] = 1. / h_array[i];
7482b730f8bSJeremy L Thompson   }
7490d0321e0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
7500d0321e0SJeremy L Thompson }
7510d0321e0SJeremy L Thompson 
7520d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
753956a3dbaSJeremy L Thompson // Take reciprocal of a vector on device (impl in .hip.cpp file)
7540d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
7559330daecSnbeams int CeedDeviceReciprocal_Hip(CeedScalar *d_array, CeedSize length);
7560d0321e0SJeremy L Thompson 
7570d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
7580d0321e0SJeremy L Thompson // Take reciprocal of a vector
7590d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
CeedVectorReciprocal_Hip(CeedVector vec)7600d0321e0SJeremy L Thompson static int CeedVectorReciprocal_Hip(CeedVector vec) {
7611f9221feSJeremy L Thompson   CeedSize        length;
762b7453713SJeremy L Thompson   CeedVector_Hip *impl;
7630d0321e0SJeremy L Thompson 
764b7453713SJeremy L Thompson   CeedCallBackend(CeedVectorGetData(vec, &impl));
765b7453713SJeremy L Thompson   CeedCallBackend(CeedVectorGetLength(vec, &length));
7660d0321e0SJeremy L Thompson   // Set value for synced device/host array
7672b730f8bSJeremy L Thompson   if (impl->d_array) CeedCallBackend(CeedDeviceReciprocal_Hip(impl->d_array, length));
7682b730f8bSJeremy L Thompson   if (impl->h_array) CeedCallBackend(CeedHostReciprocal_Hip(impl->h_array, length));
7690d0321e0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
7700d0321e0SJeremy L Thompson }
7710d0321e0SJeremy L Thompson 
7720d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
7730d0321e0SJeremy L Thompson // Compute x = alpha x on the host
7740d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
CeedHostScale_Hip(CeedScalar * x_array,CeedScalar alpha,CeedSize length)7759330daecSnbeams static int CeedHostScale_Hip(CeedScalar *x_array, CeedScalar alpha, CeedSize length) {
7769330daecSnbeams   for (CeedSize i = 0; i < length; i++) x_array[i] *= alpha;
7770d0321e0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
7780d0321e0SJeremy L Thompson }
7790d0321e0SJeremy L Thompson 
7800d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
781956a3dbaSJeremy L Thompson // Compute x = alpha x on device (impl in .hip.cpp file)
7820d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
7839330daecSnbeams int CeedDeviceScale_Hip(CeedScalar *x_array, CeedScalar alpha, CeedSize length);
7840d0321e0SJeremy L Thompson 
7850d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
7860d0321e0SJeremy L Thompson // Compute x = alpha x
7870d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
CeedVectorScale_Hip(CeedVector x,CeedScalar alpha)7880d0321e0SJeremy L Thompson static int CeedVectorScale_Hip(CeedVector x, CeedScalar alpha) {
7891f9221feSJeremy L Thompson   CeedSize        length;
790e84c3ebcSJeremy L Thompson   CeedVector_Hip *impl;
7910d0321e0SJeremy L Thompson 
792e84c3ebcSJeremy L Thompson   CeedCallBackend(CeedVectorGetData(x, &impl));
793b7453713SJeremy L Thompson   CeedCallBackend(CeedVectorGetLength(x, &length));
7940d0321e0SJeremy L Thompson   // Set value for synced device/host array
795e84c3ebcSJeremy L Thompson   if (impl->d_array) {
796e84c3ebcSJeremy L Thompson #if (HIP_VERSION >= 60000000)
797e84c3ebcSJeremy L Thompson     hipblasHandle_t handle;
7980002d81dSZach Atkins     hipStream_t     stream;
799e84c3ebcSJeremy L Thompson 
800e84c3ebcSJeremy L Thompson     CeedCallBackend(CeedGetHipblasHandle_Hip(CeedVectorReturnCeed(x), &handle));
8010002d81dSZach Atkins     CeedCallHipblas(CeedVectorReturnCeed(x), hipblasGetStream(handle, &stream));
802e84c3ebcSJeremy L Thompson #if defined(CEED_SCALAR_IS_FP32)
803e84c3ebcSJeremy L Thompson     CeedCallHipblas(CeedVectorReturnCeed(x), hipblasSscal_64(handle, (int64_t)length, &alpha, impl->d_array, 1));
804e84c3ebcSJeremy L Thompson #else  /* CEED_SCALAR */
805e84c3ebcSJeremy L Thompson     CeedCallHipblas(CeedVectorReturnCeed(x), hipblasDscal_64(handle, (int64_t)length, &alpha, impl->d_array, 1));
806e84c3ebcSJeremy L Thompson #endif /* CEED_SCALAR */
8070002d81dSZach Atkins     CeedCallHip(CeedVectorReturnCeed(x), hipStreamSynchronize(stream));
808e84c3ebcSJeremy L Thompson #else  /* HIP_VERSION */
809e84c3ebcSJeremy L Thompson     CeedCallBackend(CeedDeviceScale_Hip(impl->d_array, alpha, length));
810e84c3ebcSJeremy L Thompson #endif /* HIP_VERSION */
811e84c3ebcSJeremy L Thompson     impl->h_array = NULL;
812e84c3ebcSJeremy L Thompson   }
813e84c3ebcSJeremy L Thompson   if (impl->h_array) {
814e84c3ebcSJeremy L Thompson     CeedCallBackend(CeedHostScale_Hip(impl->h_array, alpha, length));
815e84c3ebcSJeremy L Thompson     impl->d_array = NULL;
816e84c3ebcSJeremy L Thompson   }
8170d0321e0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
8180d0321e0SJeremy L Thompson }
8190d0321e0SJeremy L Thompson 
8200d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
8210d0321e0SJeremy L Thompson // Compute y = alpha x + y on the host
8220d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
CeedHostAXPY_Hip(CeedScalar * y_array,CeedScalar alpha,CeedScalar * x_array,CeedSize length)8239330daecSnbeams static int CeedHostAXPY_Hip(CeedScalar *y_array, CeedScalar alpha, CeedScalar *x_array, CeedSize length) {
8249330daecSnbeams   for (CeedSize i = 0; i < length; i++) y_array[i] += alpha * x_array[i];
8250d0321e0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
8260d0321e0SJeremy L Thompson }
8270d0321e0SJeremy L Thompson 
8280d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
829956a3dbaSJeremy L Thompson // Compute y = alpha x + y on device (impl in .hip.cpp file)
8300d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
8319330daecSnbeams int CeedDeviceAXPY_Hip(CeedScalar *y_array, CeedScalar alpha, CeedScalar *x_array, CeedSize length);
8320d0321e0SJeremy L Thompson 
8330d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
8340d0321e0SJeremy L Thompson // Compute y = alpha x + y
8350d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
CeedVectorAXPY_Hip(CeedVector y,CeedScalar alpha,CeedVector x)8360d0321e0SJeremy L Thompson static int CeedVectorAXPY_Hip(CeedVector y, CeedScalar alpha, CeedVector x) {
837b7453713SJeremy L Thompson   CeedSize        length;
8380d0321e0SJeremy L Thompson   CeedVector_Hip *y_impl, *x_impl;
839b7453713SJeremy L Thompson 
8402b730f8bSJeremy L Thompson   CeedCallBackend(CeedVectorGetData(y, &y_impl));
8412b730f8bSJeremy L Thompson   CeedCallBackend(CeedVectorGetData(x, &x_impl));
8422b730f8bSJeremy L Thompson   CeedCallBackend(CeedVectorGetLength(y, &length));
8430d0321e0SJeremy L Thompson   // Set value for synced device/host array
8440d0321e0SJeremy L Thompson   if (y_impl->d_array) {
8452b730f8bSJeremy L Thompson     CeedCallBackend(CeedVectorSyncArray(x, CEED_MEM_DEVICE));
846e84c3ebcSJeremy L Thompson #if (HIP_VERSION >= 60000000)
847e84c3ebcSJeremy L Thompson     hipblasHandle_t handle;
8480002d81dSZach Atkins     hipStream_t     stream;
849e84c3ebcSJeremy L Thompson 
8500002d81dSZach Atkins     CeedCallBackend(CeedGetHipblasHandle_Hip(CeedVectorReturnCeed(x), &handle));
8510002d81dSZach Atkins     CeedCallHipblas(CeedVectorReturnCeed(y), hipblasGetStream(handle, &stream));
852e84c3ebcSJeremy L Thompson #if defined(CEED_SCALAR_IS_FP32)
853e84c3ebcSJeremy L Thompson     CeedCallHipblas(CeedVectorReturnCeed(y), hipblasSaxpy_64(handle, (int64_t)length, &alpha, x_impl->d_array, 1, y_impl->d_array, 1));
854e84c3ebcSJeremy L Thompson #else  /* CEED_SCALAR */
855e84c3ebcSJeremy L Thompson     CeedCallHipblas(CeedVectorReturnCeed(y), hipblasDaxpy_64(handle, (int64_t)length, &alpha, x_impl->d_array, 1, y_impl->d_array, 1));
856e84c3ebcSJeremy L Thompson #endif /* CEED_SCALAR */
8570002d81dSZach Atkins     CeedCallHip(CeedVectorReturnCeed(y), hipStreamSynchronize(stream));
858e84c3ebcSJeremy L Thompson #else  /* HIP_VERSION */
8592b730f8bSJeremy L Thompson     CeedCallBackend(CeedDeviceAXPY_Hip(y_impl->d_array, alpha, x_impl->d_array, length));
860e84c3ebcSJeremy L Thompson #endif /* HIP_VERSION */
861e84c3ebcSJeremy L Thompson     y_impl->h_array = NULL;
862e84c3ebcSJeremy L Thompson   } else if (y_impl->h_array) {
8632b730f8bSJeremy L Thompson     CeedCallBackend(CeedVectorSyncArray(x, CEED_MEM_HOST));
8642b730f8bSJeremy L Thompson     CeedCallBackend(CeedHostAXPY_Hip(y_impl->h_array, alpha, x_impl->h_array, length));
865e84c3ebcSJeremy L Thompson     y_impl->d_array = NULL;
8660d0321e0SJeremy L Thompson   }
8670d0321e0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
8680d0321e0SJeremy L Thompson }
869ff1e7120SSebastian Grimberg 
8705fb68f37SKaren (Ren) Stengel //------------------------------------------------------------------------------
8715fb68f37SKaren (Ren) Stengel // Compute y = alpha x + beta y on the host
8725fb68f37SKaren (Ren) Stengel //------------------------------------------------------------------------------
CeedHostAXPBY_Hip(CeedScalar * y_array,CeedScalar alpha,CeedScalar beta,CeedScalar * x_array,CeedSize length)8739330daecSnbeams static int CeedHostAXPBY_Hip(CeedScalar *y_array, CeedScalar alpha, CeedScalar beta, CeedScalar *x_array, CeedSize length) {
874aa67b842SZach Atkins   for (CeedSize i = 0; i < length; i++) y_array[i] = alpha * x_array[i] + beta * y_array[i];
8755fb68f37SKaren (Ren) Stengel   return CEED_ERROR_SUCCESS;
8765fb68f37SKaren (Ren) Stengel }
8775fb68f37SKaren (Ren) Stengel 
8785fb68f37SKaren (Ren) Stengel //------------------------------------------------------------------------------
879956a3dbaSJeremy L Thompson // Compute y = alpha x + beta y on device (impl in .hip.cpp file)
8805fb68f37SKaren (Ren) Stengel //------------------------------------------------------------------------------
8819330daecSnbeams int CeedDeviceAXPBY_Hip(CeedScalar *y_array, CeedScalar alpha, CeedScalar beta, CeedScalar *x_array, CeedSize length);
8825fb68f37SKaren (Ren) Stengel 
8835fb68f37SKaren (Ren) Stengel //------------------------------------------------------------------------------
8845fb68f37SKaren (Ren) Stengel // Compute y = alpha x + beta y
8855fb68f37SKaren (Ren) Stengel //------------------------------------------------------------------------------
CeedVectorAXPBY_Hip(CeedVector y,CeedScalar alpha,CeedScalar beta,CeedVector x)8865fb68f37SKaren (Ren) Stengel static int CeedVectorAXPBY_Hip(CeedVector y, CeedScalar alpha, CeedScalar beta, CeedVector x) {
887b7453713SJeremy L Thompson   CeedSize        length;
8885fb68f37SKaren (Ren) Stengel   CeedVector_Hip *y_impl, *x_impl;
889b7453713SJeremy L Thompson 
8905fb68f37SKaren (Ren) Stengel   CeedCallBackend(CeedVectorGetData(y, &y_impl));
8915fb68f37SKaren (Ren) Stengel   CeedCallBackend(CeedVectorGetData(x, &x_impl));
8925fb68f37SKaren (Ren) Stengel   CeedCallBackend(CeedVectorGetLength(y, &length));
8935fb68f37SKaren (Ren) Stengel   // Set value for synced device/host array
8945fb68f37SKaren (Ren) Stengel   if (y_impl->d_array) {
8955fb68f37SKaren (Ren) Stengel     CeedCallBackend(CeedVectorSyncArray(x, CEED_MEM_DEVICE));
8965fb68f37SKaren (Ren) Stengel     CeedCallBackend(CeedDeviceAXPBY_Hip(y_impl->d_array, alpha, beta, x_impl->d_array, length));
8975fb68f37SKaren (Ren) Stengel   }
8985fb68f37SKaren (Ren) Stengel   if (y_impl->h_array) {
8995fb68f37SKaren (Ren) Stengel     CeedCallBackend(CeedVectorSyncArray(x, CEED_MEM_HOST));
9005fb68f37SKaren (Ren) Stengel     CeedCallBackend(CeedHostAXPBY_Hip(y_impl->h_array, alpha, beta, x_impl->h_array, length));
9015fb68f37SKaren (Ren) Stengel   }
9025fb68f37SKaren (Ren) Stengel   return CEED_ERROR_SUCCESS;
9035fb68f37SKaren (Ren) Stengel }
9040d0321e0SJeremy L Thompson 
9050d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
9060d0321e0SJeremy L Thompson // Compute the pointwise multiplication w = x .* y on the host
9070d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
CeedHostPointwiseMult_Hip(CeedScalar * w_array,CeedScalar * x_array,CeedScalar * y_array,CeedSize length)9089330daecSnbeams static int CeedHostPointwiseMult_Hip(CeedScalar *w_array, CeedScalar *x_array, CeedScalar *y_array, CeedSize length) {
9099330daecSnbeams   for (CeedSize i = 0; i < length; i++) w_array[i] = x_array[i] * y_array[i];
9100d0321e0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
9110d0321e0SJeremy L Thompson }
9120d0321e0SJeremy L Thompson 
9130d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
914956a3dbaSJeremy L Thompson // Compute the pointwise multiplication w = x .* y on device (impl in .hip.cpp file)
9150d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
9169330daecSnbeams int CeedDevicePointwiseMult_Hip(CeedScalar *w_array, CeedScalar *x_array, CeedScalar *y_array, CeedSize length);
9170d0321e0SJeremy L Thompson 
9180d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
9190d0321e0SJeremy L Thompson // Compute the pointwise multiplication w = x .* y
9200d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
CeedVectorPointwiseMult_Hip(CeedVector w,CeedVector x,CeedVector y)9212b730f8bSJeremy L Thompson static int CeedVectorPointwiseMult_Hip(CeedVector w, CeedVector x, CeedVector y) {
922b7453713SJeremy L Thompson   CeedSize        length;
9230d0321e0SJeremy L Thompson   CeedVector_Hip *w_impl, *x_impl, *y_impl;
924b7453713SJeremy L Thompson 
9252b730f8bSJeremy L Thompson   CeedCallBackend(CeedVectorGetData(w, &w_impl));
9262b730f8bSJeremy L Thompson   CeedCallBackend(CeedVectorGetData(x, &x_impl));
9272b730f8bSJeremy L Thompson   CeedCallBackend(CeedVectorGetData(y, &y_impl));
9282b730f8bSJeremy L Thompson   CeedCallBackend(CeedVectorGetLength(w, &length));
9290d0321e0SJeremy L Thompson 
9300d0321e0SJeremy L Thompson   // Set value for synced device/host array
9310d0321e0SJeremy L Thompson   if (!w_impl->d_array && !w_impl->h_array) {
9322b730f8bSJeremy L Thompson     CeedCallBackend(CeedVectorSetValue(w, 0.0));
9330d0321e0SJeremy L Thompson   }
9340d0321e0SJeremy L Thompson   if (w_impl->d_array) {
9352b730f8bSJeremy L Thompson     CeedCallBackend(CeedVectorSyncArray(x, CEED_MEM_DEVICE));
9362b730f8bSJeremy L Thompson     CeedCallBackend(CeedVectorSyncArray(y, CEED_MEM_DEVICE));
9372b730f8bSJeremy L Thompson     CeedCallBackend(CeedDevicePointwiseMult_Hip(w_impl->d_array, x_impl->d_array, y_impl->d_array, length));
9380d0321e0SJeremy L Thompson   }
9390d0321e0SJeremy L Thompson   if (w_impl->h_array) {
9402b730f8bSJeremy L Thompson     CeedCallBackend(CeedVectorSyncArray(x, CEED_MEM_HOST));
9412b730f8bSJeremy L Thompson     CeedCallBackend(CeedVectorSyncArray(y, CEED_MEM_HOST));
9422b730f8bSJeremy L Thompson     CeedCallBackend(CeedHostPointwiseMult_Hip(w_impl->h_array, x_impl->h_array, y_impl->h_array, length));
9430d0321e0SJeremy L Thompson   }
9440d0321e0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
9450d0321e0SJeremy L Thompson }
9460d0321e0SJeremy L Thompson 
9470d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
9480d0321e0SJeremy L Thompson // Destroy the vector
9490d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
CeedVectorDestroy_Hip(const CeedVector vec)9500d0321e0SJeremy L Thompson static int CeedVectorDestroy_Hip(const CeedVector vec) {
9510d0321e0SJeremy L Thompson   CeedVector_Hip *impl;
9520d0321e0SJeremy L Thompson 
953b7453713SJeremy L Thompson   CeedCallBackend(CeedVectorGetData(vec, &impl));
9546e536b99SJeremy L Thompson   CeedCallHip(CeedVectorReturnCeed(vec), hipFree(impl->d_array_owned));
9552b730f8bSJeremy L Thompson   CeedCallBackend(CeedFree(&impl->h_array_owned));
9562b730f8bSJeremy L Thompson   CeedCallBackend(CeedFree(&impl));
9570d0321e0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
9580d0321e0SJeremy L Thompson }
9590d0321e0SJeremy L Thompson 
9600d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
9610d0321e0SJeremy L Thompson // Create a vector of the specified length (does not allocate memory)
9620d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
CeedVectorCreate_Hip(CeedSize n,CeedVector vec)9631f9221feSJeremy L Thompson int CeedVectorCreate_Hip(CeedSize n, CeedVector vec) {
9640d0321e0SJeremy L Thompson   CeedVector_Hip *impl;
965a3b195efSJeremy L Thompson   Ceed_Hip       *hip_impl;
9660d0321e0SJeremy L Thompson   Ceed            ceed;
9670d0321e0SJeremy L Thompson 
968b7453713SJeremy L Thompson   CeedCallBackend(CeedVectorGetCeed(vec, &ceed));
9692b730f8bSJeremy L Thompson   CeedCallBackend(CeedSetBackendFunction(ceed, "Vector", vec, "HasValidArray", CeedVectorHasValidArray_Hip));
9702b730f8bSJeremy L Thompson   CeedCallBackend(CeedSetBackendFunction(ceed, "Vector", vec, "HasBorrowedArrayOfType", CeedVectorHasBorrowedArrayOfType_Hip));
9712b730f8bSJeremy L Thompson   CeedCallBackend(CeedSetBackendFunction(ceed, "Vector", vec, "SetArray", CeedVectorSetArray_Hip));
9722b730f8bSJeremy L Thompson   CeedCallBackend(CeedSetBackendFunction(ceed, "Vector", vec, "TakeArray", CeedVectorTakeArray_Hip));
9733e961e14SJeremy L Thompson   CeedCallBackend(CeedSetBackendFunction(ceed, "Vector", vec, "CopyStrided", CeedVectorCopyStrided_Hip));
9743e961e14SJeremy L Thompson   CeedCallBackend(CeedSetBackendFunction(ceed, "Vector", vec, "SetValue", CeedVectorSetValue_Hip));
9753e961e14SJeremy L Thompson   CeedCallBackend(CeedSetBackendFunction(ceed, "Vector", vec, "SetValueStrided", CeedVectorSetValueStrided_Hip));
9762b730f8bSJeremy L Thompson   CeedCallBackend(CeedSetBackendFunction(ceed, "Vector", vec, "SyncArray", CeedVectorSyncArray_Hip));
9772b730f8bSJeremy L Thompson   CeedCallBackend(CeedSetBackendFunction(ceed, "Vector", vec, "GetArray", CeedVectorGetArray_Hip));
9782b730f8bSJeremy L Thompson   CeedCallBackend(CeedSetBackendFunction(ceed, "Vector", vec, "GetArrayRead", CeedVectorGetArrayRead_Hip));
9792b730f8bSJeremy L Thompson   CeedCallBackend(CeedSetBackendFunction(ceed, "Vector", vec, "GetArrayWrite", CeedVectorGetArrayWrite_Hip));
9802b730f8bSJeremy L Thompson   CeedCallBackend(CeedSetBackendFunction(ceed, "Vector", vec, "Norm", CeedVectorNorm_Hip));
9812b730f8bSJeremy L Thompson   CeedCallBackend(CeedSetBackendFunction(ceed, "Vector", vec, "Reciprocal", CeedVectorReciprocal_Hip));
9823e961e14SJeremy L Thompson   CeedCallBackend(CeedSetBackendFunction(ceed, "Vector", vec, "Scale", CeedVectorScale_Hip));
9833e961e14SJeremy L Thompson   CeedCallBackend(CeedSetBackendFunction(ceed, "Vector", vec, "AXPY", CeedVectorAXPY_Hip));
9843e961e14SJeremy L Thompson   CeedCallBackend(CeedSetBackendFunction(ceed, "Vector", vec, "AXPBY", CeedVectorAXPBY_Hip));
9852b730f8bSJeremy L Thompson   CeedCallBackend(CeedSetBackendFunction(ceed, "Vector", vec, "PointwiseMult", CeedVectorPointwiseMult_Hip));
9862b730f8bSJeremy L Thompson   CeedCallBackend(CeedSetBackendFunction(ceed, "Vector", vec, "Destroy", CeedVectorDestroy_Hip));
9872b730f8bSJeremy L Thompson   CeedCallBackend(CeedCalloc(1, &impl));
988a3b195efSJeremy L Thompson   CeedCallBackend(CeedGetData(ceed, &hip_impl));
989a3b195efSJeremy L Thompson   CeedCallBackend(CeedDestroy(&ceed));
990a3b195efSJeremy L Thompson   impl->has_unified_addressing = hip_impl->has_unified_addressing;
9912b730f8bSJeremy L Thompson   CeedCallBackend(CeedVectorSetData(vec, impl));
9920d0321e0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
9930d0321e0SJeremy L Thompson }
9942a86cc9dSSebastian Grimberg 
9952a86cc9dSSebastian Grimberg //------------------------------------------------------------------------------
996