1d275d636SJeremy L Thompson // Copyright (c) 2017-2025, Lawrence Livermore National Security, LLC and other CEED contributors. 23d8e8822SJeremy L Thompson // All Rights Reserved. See the top-level LICENSE and NOTICE files for details. 30d0321e0SJeremy L Thompson // 43d8e8822SJeremy L Thompson // SPDX-License-Identifier: BSD-2-Clause 50d0321e0SJeremy L Thompson // 63d8e8822SJeremy L Thompson // This file is part of CEED: http://github.com/ceed 70d0321e0SJeremy L Thompson 849aac155SJeremy L Thompson #include <ceed.h> 90d0321e0SJeremy L Thompson #include <ceed/backend.h> 100d0321e0SJeremy L Thompson #include <math.h> 1149aac155SJeremy L Thompson #include <stdbool.h> 120d0321e0SJeremy L Thompson #include <string.h> 13c85e8640SSebastian Grimberg #include <hip/hip_runtime.h> 140d0321e0SJeremy L Thompson 1549aac155SJeremy L Thompson #include "../hip/ceed-hip-common.h" 162b730f8bSJeremy L Thompson #include "ceed-hip-ref.h" 17f48ed27dSnbeams 18f48ed27dSnbeams //------------------------------------------------------------------------------ 19f48ed27dSnbeams // Check if host/device sync is needed 20f48ed27dSnbeams //------------------------------------------------------------------------------ 212b730f8bSJeremy L Thompson static inline int CeedVectorNeedSync_Hip(const CeedVector vec, CeedMemType mem_type, bool *need_sync) { 22f48ed27dSnbeams CeedVector_Hip *impl; 23f48ed27dSnbeams bool has_valid_array = false; 24b7453713SJeremy L Thompson 25b7453713SJeremy L Thompson CeedCallBackend(CeedVectorGetData(vec, &impl)); 262b730f8bSJeremy L Thompson CeedCallBackend(CeedVectorHasValidArray(vec, &has_valid_array)); 27f48ed27dSnbeams switch (mem_type) { 28f48ed27dSnbeams case CEED_MEM_HOST: 29f48ed27dSnbeams *need_sync = has_valid_array && !impl->h_array; 30f48ed27dSnbeams break; 31f48ed27dSnbeams case CEED_MEM_DEVICE: 32f48ed27dSnbeams *need_sync = has_valid_array && !impl->d_array; 33f48ed27dSnbeams break; 34f48ed27dSnbeams } 35f48ed27dSnbeams return CEED_ERROR_SUCCESS; 36f48ed27dSnbeams } 37f48ed27dSnbeams 380d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 390d0321e0SJeremy L Thompson // Sync host to device 400d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 410d0321e0SJeremy L Thompson static inline int CeedVectorSyncH2D_Hip(const CeedVector vec) { 42b7453713SJeremy L Thompson CeedSize length; 43672b0f2aSSebastian Grimberg size_t bytes; 440d0321e0SJeremy L Thompson CeedVector_Hip *impl; 45b7453713SJeremy L Thompson 462b730f8bSJeremy L Thompson CeedCallBackend(CeedVectorGetData(vec, &impl)); 470d0321e0SJeremy L Thompson 489bc66399SJeremy L Thompson CeedCheck(impl->h_array, CeedVectorReturnCeed(vec), CEED_ERROR_BACKEND, "No valid host data to sync to device"); 490d0321e0SJeremy L Thompson 50672b0f2aSSebastian Grimberg CeedCallBackend(CeedVectorGetLength(vec, &length)); 51672b0f2aSSebastian Grimberg bytes = length * sizeof(CeedScalar); 520d0321e0SJeremy L Thompson if (impl->d_array_borrowed) { 530d0321e0SJeremy L Thompson impl->d_array = impl->d_array_borrowed; 540d0321e0SJeremy L Thompson } else if (impl->d_array_owned) { 550d0321e0SJeremy L Thompson impl->d_array = impl->d_array_owned; 560d0321e0SJeremy L Thompson } else { 579bc66399SJeremy L Thompson CeedCallHip(CeedVectorReturnCeed(vec), hipMalloc((void **)&impl->d_array_owned, bytes)); 580d0321e0SJeremy L Thompson impl->d_array = impl->d_array_owned; 590d0321e0SJeremy L Thompson } 609bc66399SJeremy L Thompson CeedCallHip(CeedVectorReturnCeed(vec), hipMemcpy(impl->d_array, impl->h_array, bytes, hipMemcpyHostToDevice)); 610d0321e0SJeremy L Thompson return CEED_ERROR_SUCCESS; 620d0321e0SJeremy L Thompson } 630d0321e0SJeremy L Thompson 640d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 650d0321e0SJeremy L Thompson // Sync device to host 660d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 670d0321e0SJeremy L Thompson static inline int CeedVectorSyncD2H_Hip(const CeedVector vec) { 68b7453713SJeremy L Thompson CeedSize length; 69672b0f2aSSebastian Grimberg size_t bytes; 700d0321e0SJeremy L Thompson CeedVector_Hip *impl; 71b7453713SJeremy L Thompson 722b730f8bSJeremy L Thompson CeedCallBackend(CeedVectorGetData(vec, &impl)); 730d0321e0SJeremy L Thompson 749bc66399SJeremy L Thompson CeedCheck(impl->d_array, CeedVectorReturnCeed(vec), CEED_ERROR_BACKEND, "No valid device data to sync to host"); 750d0321e0SJeremy L Thompson 760d0321e0SJeremy L Thompson if (impl->h_array_borrowed) { 770d0321e0SJeremy L Thompson impl->h_array = impl->h_array_borrowed; 780d0321e0SJeremy L Thompson } else if (impl->h_array_owned) { 790d0321e0SJeremy L Thompson impl->h_array = impl->h_array_owned; 800d0321e0SJeremy L Thompson } else { 811f9221feSJeremy L Thompson CeedSize length; 82672b0f2aSSebastian Grimberg 832b730f8bSJeremy L Thompson CeedCallBackend(CeedVectorGetLength(vec, &length)); 842b730f8bSJeremy L Thompson CeedCallBackend(CeedCalloc(length, &impl->h_array_owned)); 850d0321e0SJeremy L Thompson impl->h_array = impl->h_array_owned; 860d0321e0SJeremy L Thompson } 870d0321e0SJeremy L Thompson 882b730f8bSJeremy L Thompson CeedCallBackend(CeedVectorGetLength(vec, &length)); 89672b0f2aSSebastian Grimberg bytes = length * sizeof(CeedScalar); 909bc66399SJeremy L Thompson CeedCallHip(CeedVectorReturnCeed(vec), hipMemcpy(impl->h_array, impl->d_array, bytes, hipMemcpyDeviceToHost)); 910d0321e0SJeremy L Thompson return CEED_ERROR_SUCCESS; 920d0321e0SJeremy L Thompson } 930d0321e0SJeremy L Thompson 940d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 950d0321e0SJeremy L Thompson // Sync arrays 960d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 972b730f8bSJeremy L Thompson static int CeedVectorSyncArray_Hip(const CeedVector vec, CeedMemType mem_type) { 98f48ed27dSnbeams bool need_sync = false; 99a3b195efSJeremy L Thompson CeedVector_Hip *impl; 100a3b195efSJeremy L Thompson 101a3b195efSJeremy L Thompson // Sync for unified memory 102a3b195efSJeremy L Thompson CeedCallBackend(CeedVectorGetData(vec, &impl)); 103a3b195efSJeremy L Thompson if (impl->has_unified_addressing && !impl->h_array_borrowed) { 104a3b195efSJeremy L Thompson CeedCallHip(CeedVectorReturnCeed(vec), hipDeviceSynchronize()); 105a3b195efSJeremy L Thompson return CEED_ERROR_SUCCESS; 106a3b195efSJeremy L Thompson } 107b7453713SJeremy L Thompson 108b7453713SJeremy L Thompson // Check whether device/host sync is needed 1092b730f8bSJeremy L Thompson CeedCallBackend(CeedVectorNeedSync_Hip(vec, mem_type, &need_sync)); 1102b730f8bSJeremy L Thompson if (!need_sync) return CEED_ERROR_SUCCESS; 111f48ed27dSnbeams 11243c928f4SJeremy L Thompson switch (mem_type) { 1132b730f8bSJeremy L Thompson case CEED_MEM_HOST: 1142b730f8bSJeremy L Thompson return CeedVectorSyncD2H_Hip(vec); 1152b730f8bSJeremy L Thompson case CEED_MEM_DEVICE: 1162b730f8bSJeremy L Thompson return CeedVectorSyncH2D_Hip(vec); 1170d0321e0SJeremy L Thompson } 1180d0321e0SJeremy L Thompson return CEED_ERROR_UNSUPPORTED; 1190d0321e0SJeremy L Thompson } 1200d0321e0SJeremy L Thompson 1210d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 1220d0321e0SJeremy L Thompson // Set all pointers as invalid 1230d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 1240d0321e0SJeremy L Thompson static inline int CeedVectorSetAllInvalid_Hip(const CeedVector vec) { 1250d0321e0SJeremy L Thompson CeedVector_Hip *impl; 1260d0321e0SJeremy L Thompson 127b7453713SJeremy L Thompson CeedCallBackend(CeedVectorGetData(vec, &impl)); 1280d0321e0SJeremy L Thompson impl->h_array = NULL; 1290d0321e0SJeremy L Thompson impl->d_array = NULL; 1300d0321e0SJeremy L Thompson return CEED_ERROR_SUCCESS; 1310d0321e0SJeremy L Thompson } 1320d0321e0SJeremy L Thompson 1330d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 134b2165e7aSSebastian Grimberg // Check if CeedVector has any valid pointer 1350d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 1362b730f8bSJeremy L Thompson static inline int CeedVectorHasValidArray_Hip(const CeedVector vec, bool *has_valid_array) { 1370d0321e0SJeremy L Thompson CeedVector_Hip *impl; 138b7453713SJeremy L Thompson 1392b730f8bSJeremy L Thompson CeedCallBackend(CeedVectorGetData(vec, &impl)); 1401c66c397SJeremy L Thompson *has_valid_array = impl->h_array || impl->d_array; 1410d0321e0SJeremy L Thompson return CEED_ERROR_SUCCESS; 1420d0321e0SJeremy L Thompson } 1430d0321e0SJeremy L Thompson 1440d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 145b2165e7aSSebastian Grimberg // Check if has array of given type 1460d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 1472b730f8bSJeremy L Thompson static inline int CeedVectorHasArrayOfType_Hip(const CeedVector vec, CeedMemType mem_type, bool *has_array_of_type) { 1480d0321e0SJeremy L Thompson CeedVector_Hip *impl; 1490d0321e0SJeremy L Thompson 150b7453713SJeremy L Thompson CeedCallBackend(CeedVectorGetData(vec, &impl)); 15143c928f4SJeremy L Thompson switch (mem_type) { 1520d0321e0SJeremy L Thompson case CEED_MEM_HOST: 1531c66c397SJeremy L Thompson *has_array_of_type = impl->h_array_borrowed || impl->h_array_owned; 1540d0321e0SJeremy L Thompson break; 1550d0321e0SJeremy L Thompson case CEED_MEM_DEVICE: 1561c66c397SJeremy L Thompson *has_array_of_type = impl->d_array_borrowed || impl->d_array_owned; 1570d0321e0SJeremy L Thompson break; 1580d0321e0SJeremy L Thompson } 1590d0321e0SJeremy L Thompson return CEED_ERROR_SUCCESS; 1600d0321e0SJeremy L Thompson } 1610d0321e0SJeremy L Thompson 1620d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 1630d0321e0SJeremy L Thompson // Check if has borrowed array of given type 1640d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 1652b730f8bSJeremy L Thompson static inline int CeedVectorHasBorrowedArrayOfType_Hip(const CeedVector vec, CeedMemType mem_type, bool *has_borrowed_array_of_type) { 1660d0321e0SJeremy L Thompson CeedVector_Hip *impl; 1670d0321e0SJeremy L Thompson 168b7453713SJeremy L Thompson CeedCallBackend(CeedVectorGetData(vec, &impl)); 169a3b195efSJeremy L Thompson 170a3b195efSJeremy L Thompson // Use device memory for unified memory 171a3b195efSJeremy L Thompson mem_type = impl->has_unified_addressing && !impl->h_array_borrowed ? CEED_MEM_DEVICE : mem_type; 172a3b195efSJeremy L Thompson 17343c928f4SJeremy L Thompson switch (mem_type) { 1740d0321e0SJeremy L Thompson case CEED_MEM_HOST: 1751c66c397SJeremy L Thompson *has_borrowed_array_of_type = impl->h_array_borrowed; 1760d0321e0SJeremy L Thompson break; 1770d0321e0SJeremy L Thompson case CEED_MEM_DEVICE: 1781c66c397SJeremy L Thompson *has_borrowed_array_of_type = impl->d_array_borrowed; 1790d0321e0SJeremy L Thompson break; 1800d0321e0SJeremy L Thompson } 1810d0321e0SJeremy L Thompson return CEED_ERROR_SUCCESS; 1820d0321e0SJeremy L Thompson } 1830d0321e0SJeremy L Thompson 1840d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 1850d0321e0SJeremy L Thompson // Set array from host 1860d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 1872b730f8bSJeremy L Thompson static int CeedVectorSetArrayHost_Hip(const CeedVector vec, const CeedCopyMode copy_mode, CeedScalar *array) { 188a267acd1SJeremy L Thompson CeedSize length; 1890d0321e0SJeremy L Thompson CeedVector_Hip *impl; 1900d0321e0SJeremy L Thompson 191b7453713SJeremy L Thompson CeedCallBackend(CeedVectorGetData(vec, &impl)); 192a267acd1SJeremy L Thompson CeedCallBackend(CeedVectorGetLength(vec, &length)); 193a267acd1SJeremy L Thompson 194f5d1e504SJeremy L Thompson CeedCallBackend(CeedSetHostCeedScalarArray(array, copy_mode, length, (const CeedScalar **)&impl->h_array_owned, 195f5d1e504SJeremy L Thompson (const CeedScalar **)&impl->h_array_borrowed, (const CeedScalar **)&impl->h_array)); 1960d0321e0SJeremy L Thompson return CEED_ERROR_SUCCESS; 1970d0321e0SJeremy L Thompson } 1980d0321e0SJeremy L Thompson 1990d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 2000d0321e0SJeremy L Thompson // Set array from device 2010d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 2022b730f8bSJeremy L Thompson static int CeedVectorSetArrayDevice_Hip(const CeedVector vec, const CeedCopyMode copy_mode, CeedScalar *array) { 203a267acd1SJeremy L Thompson CeedSize length; 2040d0321e0SJeremy L Thompson Ceed ceed; 2050d0321e0SJeremy L Thompson CeedVector_Hip *impl; 2060d0321e0SJeremy L Thompson 207b7453713SJeremy L Thompson CeedCallBackend(CeedVectorGetCeed(vec, &ceed)); 208b7453713SJeremy L Thompson CeedCallBackend(CeedVectorGetData(vec, &impl)); 209a267acd1SJeremy L Thompson CeedCallBackend(CeedVectorGetLength(vec, &length)); 210f5d1e504SJeremy L Thompson 211f5d1e504SJeremy L Thompson CeedCallBackend(CeedSetDeviceCeedScalarArray_Hip(ceed, array, copy_mode, length, (const CeedScalar **)&impl->d_array_owned, 212f5d1e504SJeremy L Thompson (const CeedScalar **)&impl->d_array_borrowed, (const CeedScalar **)&impl->d_array)); 2139bc66399SJeremy L Thompson CeedCallBackend(CeedDestroy(&ceed)); 2140d0321e0SJeremy L Thompson return CEED_ERROR_SUCCESS; 2150d0321e0SJeremy L Thompson } 2160d0321e0SJeremy L Thompson 2170d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 218a3b195efSJeremy L Thompson // Set array with unified memory 219a3b195efSJeremy L Thompson //------------------------------------------------------------------------------ 220a3b195efSJeremy L Thompson static int CeedVectorSetArrayUnifiedHostToDevice_Hip(const CeedVector vec, const CeedCopyMode copy_mode, CeedScalar *array) { 221a3b195efSJeremy L Thompson CeedSize length; 222a3b195efSJeremy L Thompson Ceed ceed; 223a3b195efSJeremy L Thompson CeedVector_Hip *impl; 224a3b195efSJeremy L Thompson 225a3b195efSJeremy L Thompson CeedCallBackend(CeedVectorGetCeed(vec, &ceed)); 226a3b195efSJeremy L Thompson CeedCallBackend(CeedVectorGetData(vec, &impl)); 227a3b195efSJeremy L Thompson CeedCallBackend(CeedVectorGetLength(vec, &length)); 228a3b195efSJeremy L Thompson 229a3b195efSJeremy L Thompson switch (copy_mode) { 230a3b195efSJeremy L Thompson case CEED_COPY_VALUES: 231a3b195efSJeremy L Thompson case CEED_OWN_POINTER: 232a3b195efSJeremy L Thompson if (!impl->d_array) { 233a3b195efSJeremy L Thompson if (impl->d_array_borrowed) { 234a3b195efSJeremy L Thompson impl->d_array = impl->d_array_borrowed; 235a3b195efSJeremy L Thompson } else { 236a3b195efSJeremy L Thompson if (!impl->d_array_owned) CeedCallHip(ceed, hipMalloc((void **)&impl->d_array_owned, sizeof(CeedScalar) * length)); 237a3b195efSJeremy L Thompson impl->d_array = impl->d_array_owned; 238a3b195efSJeremy L Thompson } 239a3b195efSJeremy L Thompson } 240a3b195efSJeremy L Thompson if (array) CeedCallHip(ceed, hipMemcpy(impl->d_array, array, sizeof(CeedScalar) * length, hipMemcpyHostToDevice)); 241a3b195efSJeremy L Thompson if (copy_mode == CEED_OWN_POINTER) CeedCallBackend(CeedFree(&array)); 242a3b195efSJeremy L Thompson break; 243a3b195efSJeremy L Thompson case CEED_USE_POINTER: 244a3b195efSJeremy L Thompson CeedCallHip(ceed, hipFree(impl->d_array_owned)); 245a3b195efSJeremy L Thompson CeedCallBackend(CeedFree(&impl->h_array_owned)); 246a3b195efSJeremy L Thompson impl->h_array_owned = NULL; 247a3b195efSJeremy L Thompson impl->h_array_borrowed = array; 248a3b195efSJeremy L Thompson impl->d_array = impl->h_array_borrowed; 249a3b195efSJeremy L Thompson } 250a3b195efSJeremy L Thompson CeedCallBackend(CeedDestroy(&ceed)); 251a3b195efSJeremy L Thompson return CEED_ERROR_SUCCESS; 252a3b195efSJeremy L Thompson } 253a3b195efSJeremy L Thompson 254a3b195efSJeremy L Thompson //------------------------------------------------------------------------------ 2550d0321e0SJeremy L Thompson // Set the array used by a vector, 2560d0321e0SJeremy L Thompson // freeing any previously allocated array if applicable 2570d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 2582b730f8bSJeremy L Thompson static int CeedVectorSetArray_Hip(const CeedVector vec, const CeedMemType mem_type, const CeedCopyMode copy_mode, CeedScalar *array) { 2590d0321e0SJeremy L Thompson CeedVector_Hip *impl; 2600d0321e0SJeremy L Thompson 261b7453713SJeremy L Thompson CeedCallBackend(CeedVectorGetData(vec, &impl)); 2622b730f8bSJeremy L Thompson CeedCallBackend(CeedVectorSetAllInvalid_Hip(vec)); 26343c928f4SJeremy L Thompson switch (mem_type) { 2640d0321e0SJeremy L Thompson case CEED_MEM_HOST: 265a3b195efSJeremy L Thompson if (impl->has_unified_addressing) { 266a3b195efSJeremy L Thompson return CeedVectorSetArrayUnifiedHostToDevice_Hip(vec, copy_mode, array); 267a3b195efSJeremy L Thompson } else { 26843c928f4SJeremy L Thompson return CeedVectorSetArrayHost_Hip(vec, copy_mode, array); 269a3b195efSJeremy L Thompson } 2700d0321e0SJeremy L Thompson case CEED_MEM_DEVICE: 27143c928f4SJeremy L Thompson return CeedVectorSetArrayDevice_Hip(vec, copy_mode, array); 2720d0321e0SJeremy L Thompson } 2730d0321e0SJeremy L Thompson return CEED_ERROR_UNSUPPORTED; 2740d0321e0SJeremy L Thompson } 2750d0321e0SJeremy L Thompson 2760d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 2773196072fSJeremy L Thompson // Copy host array to value strided 2783196072fSJeremy L Thompson //------------------------------------------------------------------------------ 279832a6d73SJeremy L Thompson static int CeedHostCopyStrided_Hip(CeedScalar *h_array, CeedSize start, CeedSize stop, CeedSize step, CeedScalar *h_copy_array) { 280832a6d73SJeremy L Thompson for (CeedSize i = start; i < stop; i += step) h_copy_array[i] = h_array[i]; 2813196072fSJeremy L Thompson return CEED_ERROR_SUCCESS; 2823196072fSJeremy L Thompson } 2833196072fSJeremy L Thompson 2843196072fSJeremy L Thompson //------------------------------------------------------------------------------ 285956a3dbaSJeremy L Thompson // Copy device array to value strided (impl in .hip.cpp file) 2863196072fSJeremy L Thompson //------------------------------------------------------------------------------ 287832a6d73SJeremy L Thompson int CeedDeviceCopyStrided_Hip(CeedScalar *d_array, CeedSize start, CeedSize stop, CeedSize step, CeedScalar *d_copy_array); 2883196072fSJeremy L Thompson 2893196072fSJeremy L Thompson //------------------------------------------------------------------------------ 2903196072fSJeremy L Thompson // Copy a vector to a value strided 2913196072fSJeremy L Thompson //------------------------------------------------------------------------------ 292832a6d73SJeremy L Thompson static int CeedVectorCopyStrided_Hip(CeedVector vec, CeedSize start, CeedSize stop, CeedSize step, CeedVector vec_copy) { 2933196072fSJeremy L Thompson CeedSize length; 2943196072fSJeremy L Thompson CeedVector_Hip *impl; 2953196072fSJeremy L Thompson 2963196072fSJeremy L Thompson CeedCallBackend(CeedVectorGetData(vec, &impl)); 297a7efc114SJeremy L Thompson { 298a7efc114SJeremy L Thompson CeedSize length_vec, length_copy; 299a7efc114SJeremy L Thompson 3005a5594ffSJeremy L Thompson CeedCallBackend(CeedVectorGetLength(vec, &length_vec)); 3015a5594ffSJeremy L Thompson CeedCallBackend(CeedVectorGetLength(vec_copy, &length_copy)); 302a7efc114SJeremy L Thompson length = length_vec < length_copy ? length_vec : length_copy; 303a7efc114SJeremy L Thompson } 304832a6d73SJeremy L Thompson if (stop == -1) stop = length; 3053196072fSJeremy L Thompson // Set value for synced device/host array 3063196072fSJeremy L Thompson if (impl->d_array) { 3073196072fSJeremy L Thompson CeedScalar *copy_array; 3083196072fSJeremy L Thompson 3093196072fSJeremy L Thompson CeedCallBackend(CeedVectorGetArray(vec_copy, CEED_MEM_DEVICE, ©_array)); 310e84c3ebcSJeremy L Thompson #if (HIP_VERSION >= 60000000) 311e84c3ebcSJeremy L Thompson hipblasHandle_t handle; 312*0002d81dSZach Atkins hipStream_t stream; 313e84c3ebcSJeremy L Thompson Ceed ceed; 314e84c3ebcSJeremy L Thompson 315e84c3ebcSJeremy L Thompson CeedCallBackend(CeedVectorGetCeed(vec, &ceed)); 316e84c3ebcSJeremy L Thompson CeedCallBackend(CeedGetHipblasHandle_Hip(ceed, &handle)); 317*0002d81dSZach Atkins CeedCallHipblas(ceed, hipblasGetStream(handle, &stream)); 318e84c3ebcSJeremy L Thompson #if defined(CEED_SCALAR_IS_FP32) 319832a6d73SJeremy L Thompson CeedCallHipblas(ceed, hipblasScopy_64(handle, (int64_t)(stop - start), impl->d_array + start, (int64_t)step, copy_array + start, (int64_t)step)); 320e84c3ebcSJeremy L Thompson #else /* CEED_SCALAR */ 321832a6d73SJeremy L Thompson CeedCallHipblas(ceed, hipblasDcopy_64(handle, (int64_t)(stop - start), impl->d_array + start, (int64_t)step, copy_array + start, (int64_t)step)); 322e84c3ebcSJeremy L Thompson #endif /* CEED_SCALAR */ 323*0002d81dSZach Atkins CeedCallHip(ceed, hipStreamSynchronize(stream)); 324e84c3ebcSJeremy L Thompson #else /* HIP_VERSION */ 325832a6d73SJeremy L Thompson CeedCallBackend(CeedDeviceCopyStrided_Hip(impl->d_array, start, stop, step, copy_array)); 326e84c3ebcSJeremy L Thompson #endif /* HIP_VERSION */ 3273196072fSJeremy L Thompson CeedCallBackend(CeedVectorRestoreArray(vec_copy, ©_array)); 328e84c3ebcSJeremy L Thompson impl->h_array = NULL; 329e84c3ebcSJeremy L Thompson CeedCallBackend(CeedDestroy(&ceed)); 3303196072fSJeremy L Thompson } else if (impl->h_array) { 3313196072fSJeremy L Thompson CeedScalar *copy_array; 3323196072fSJeremy L Thompson 3333196072fSJeremy L Thompson CeedCallBackend(CeedVectorGetArray(vec_copy, CEED_MEM_HOST, ©_array)); 334832a6d73SJeremy L Thompson CeedCallBackend(CeedHostCopyStrided_Hip(impl->h_array, start, stop, step, copy_array)); 3353196072fSJeremy L Thompson CeedCallBackend(CeedVectorRestoreArray(vec_copy, ©_array)); 336e84c3ebcSJeremy L Thompson impl->d_array = NULL; 3373196072fSJeremy L Thompson } else { 3383196072fSJeremy L Thompson return CeedError(CeedVectorReturnCeed(vec), CEED_ERROR_BACKEND, "CeedVector must have valid data set"); 3393196072fSJeremy L Thompson } 3403196072fSJeremy L Thompson return CEED_ERROR_SUCCESS; 3413196072fSJeremy L Thompson } 3423196072fSJeremy L Thompson 3433196072fSJeremy L Thompson //------------------------------------------------------------------------------ 3440d0321e0SJeremy L Thompson // Set host array to value 3450d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 3469330daecSnbeams static int CeedHostSetValue_Hip(CeedScalar *h_array, CeedSize length, CeedScalar val) { 3479330daecSnbeams for (CeedSize i = 0; i < length; i++) h_array[i] = val; 3480d0321e0SJeremy L Thompson return CEED_ERROR_SUCCESS; 3490d0321e0SJeremy L Thompson } 3500d0321e0SJeremy L Thompson 3510d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 3520d0321e0SJeremy L Thompson // Set device array to value (impl in .hip file) 3530d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 3549330daecSnbeams int CeedDeviceSetValue_Hip(CeedScalar *d_array, CeedSize length, CeedScalar val); 3550d0321e0SJeremy L Thompson 3560d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 357b2165e7aSSebastian Grimberg // Set a vector to a value 3580d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 3590d0321e0SJeremy L Thompson static int CeedVectorSetValue_Hip(CeedVector vec, CeedScalar val) { 3601f9221feSJeremy L Thompson CeedSize length; 361b7453713SJeremy L Thompson CeedVector_Hip *impl; 362a3b195efSJeremy L Thompson Ceed_Hip *hip_data; 3630d0321e0SJeremy L Thompson 364b7453713SJeremy L Thompson CeedCallBackend(CeedVectorGetData(vec, &impl)); 365a3b195efSJeremy L Thompson CeedCallBackend(CeedGetData(CeedVectorReturnCeed(vec), &hip_data)); 366b7453713SJeremy L Thompson CeedCallBackend(CeedVectorGetLength(vec, &length)); 3670d0321e0SJeremy L Thompson // Set value for synced device/host array 3680d0321e0SJeremy L Thompson if (!impl->d_array && !impl->h_array) { 3690d0321e0SJeremy L Thompson if (impl->d_array_borrowed) { 3700d0321e0SJeremy L Thompson impl->d_array = impl->d_array_borrowed; 3710d0321e0SJeremy L Thompson } else if (impl->h_array_borrowed) { 3720d0321e0SJeremy L Thompson impl->h_array = impl->h_array_borrowed; 3730d0321e0SJeremy L Thompson } else if (impl->d_array_owned) { 3740d0321e0SJeremy L Thompson impl->d_array = impl->d_array_owned; 3750d0321e0SJeremy L Thompson } else if (impl->h_array_owned) { 3760d0321e0SJeremy L Thompson impl->h_array = impl->h_array_owned; 3770d0321e0SJeremy L Thompson } else { 3782b730f8bSJeremy L Thompson CeedCallBackend(CeedVectorSetArray(vec, CEED_MEM_DEVICE, CEED_COPY_VALUES, NULL)); 3790d0321e0SJeremy L Thompson } 3800d0321e0SJeremy L Thompson } 3810d0321e0SJeremy L Thompson if (impl->d_array) { 382a3b195efSJeremy L Thompson if (val == 0 && !impl->h_array_borrowed) { 383124cc107SJeremy L Thompson CeedCallHip(CeedVectorReturnCeed(vec), hipMemset(impl->d_array, 0, length * sizeof(CeedScalar))); 384124cc107SJeremy L Thompson } else { 3852b730f8bSJeremy L Thompson CeedCallBackend(CeedDeviceSetValue_Hip(impl->d_array, length, val)); 3860d0321e0SJeremy L Thompson } 387124cc107SJeremy L Thompson impl->h_array = NULL; 388124cc107SJeremy L Thompson } else if (impl->h_array) { 3892b730f8bSJeremy L Thompson CeedCallBackend(CeedHostSetValue_Hip(impl->h_array, length, val)); 390b2165e7aSSebastian Grimberg impl->d_array = NULL; 3910d0321e0SJeremy L Thompson } 3920d0321e0SJeremy L Thompson return CEED_ERROR_SUCCESS; 3930d0321e0SJeremy L Thompson } 3940d0321e0SJeremy L Thompson 3950d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 3963196072fSJeremy L Thompson // Set host array to value strided 3973196072fSJeremy L Thompson //------------------------------------------------------------------------------ 39814c82621SJeremy L Thompson static int CeedHostSetValueStrided_Hip(CeedScalar *h_array, CeedSize start, CeedSize stop, CeedSize step, CeedScalar val) { 3992d73a370SJeremy L Thompson for (CeedSize i = start; i < stop; i += step) h_array[i] = val; 4003196072fSJeremy L Thompson return CEED_ERROR_SUCCESS; 4013196072fSJeremy L Thompson } 4023196072fSJeremy L Thompson 4033196072fSJeremy L Thompson //------------------------------------------------------------------------------ 404956a3dbaSJeremy L Thompson // Set device array to value strided (impl in .hip.cpp file) 4053196072fSJeremy L Thompson //------------------------------------------------------------------------------ 40614c82621SJeremy L Thompson int CeedDeviceSetValueStrided_Hip(CeedScalar *d_array, CeedSize start, CeedSize stop, CeedSize step, CeedScalar val); 4073196072fSJeremy L Thompson 4083196072fSJeremy L Thompson //------------------------------------------------------------------------------ 4093196072fSJeremy L Thompson // Set a vector to a value strided 4103196072fSJeremy L Thompson //------------------------------------------------------------------------------ 411ff90b007SJeremy L Thompson static int CeedVectorSetValueStrided_Hip(CeedVector vec, CeedSize start, CeedSize stop, CeedSize step, CeedScalar val) { 4123196072fSJeremy L Thompson CeedSize length; 4133196072fSJeremy L Thompson CeedVector_Hip *impl; 4143196072fSJeremy L Thompson 4153196072fSJeremy L Thompson CeedCallBackend(CeedVectorGetData(vec, &impl)); 4163196072fSJeremy L Thompson CeedCallBackend(CeedVectorGetLength(vec, &length)); 4173196072fSJeremy L Thompson // Set value for synced device/host array 418ff90b007SJeremy L Thompson if (stop == -1) stop = length; 4193196072fSJeremy L Thompson if (impl->d_array) { 42014c82621SJeremy L Thompson CeedCallBackend(CeedDeviceSetValueStrided_Hip(impl->d_array, start, stop, step, val)); 4213196072fSJeremy L Thompson impl->h_array = NULL; 4223196072fSJeremy L Thompson } else if (impl->h_array) { 42314c82621SJeremy L Thompson CeedCallBackend(CeedHostSetValueStrided_Hip(impl->h_array, start, stop, step, val)); 4243196072fSJeremy L Thompson impl->d_array = NULL; 4253196072fSJeremy L Thompson } else { 4263196072fSJeremy L Thompson return CeedError(CeedVectorReturnCeed(vec), CEED_ERROR_BACKEND, "CeedVector must have valid data set"); 4273196072fSJeremy L Thompson } 4283196072fSJeremy L Thompson return CEED_ERROR_SUCCESS; 4293196072fSJeremy L Thompson } 4303196072fSJeremy L Thompson 4313196072fSJeremy L Thompson //------------------------------------------------------------------------------ 4320d0321e0SJeremy L Thompson // Vector Take Array 4330d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 4342b730f8bSJeremy L Thompson static int CeedVectorTakeArray_Hip(CeedVector vec, CeedMemType mem_type, CeedScalar **array) { 4350d0321e0SJeremy L Thompson CeedVector_Hip *impl; 436b7453713SJeremy L Thompson 4372b730f8bSJeremy L Thompson CeedCallBackend(CeedVectorGetData(vec, &impl)); 4380d0321e0SJeremy L Thompson 43943c928f4SJeremy L Thompson // Sync array to requested mem_type 4402b730f8bSJeremy L Thompson CeedCallBackend(CeedVectorSyncArray(vec, mem_type)); 4410d0321e0SJeremy L Thompson 4420d0321e0SJeremy L Thompson // Update pointer 44343c928f4SJeremy L Thompson switch (mem_type) { 4440d0321e0SJeremy L Thompson case CEED_MEM_HOST: 4450d0321e0SJeremy L Thompson (*array) = impl->h_array_borrowed; 4460d0321e0SJeremy L Thompson impl->h_array_borrowed = NULL; 4470d0321e0SJeremy L Thompson impl->h_array = NULL; 4480d0321e0SJeremy L Thompson break; 4490d0321e0SJeremy L Thompson case CEED_MEM_DEVICE: 4500d0321e0SJeremy L Thompson (*array) = impl->d_array_borrowed; 4510d0321e0SJeremy L Thompson impl->d_array_borrowed = NULL; 4520d0321e0SJeremy L Thompson impl->d_array = NULL; 4530d0321e0SJeremy L Thompson break; 4540d0321e0SJeremy L Thompson } 4550d0321e0SJeremy L Thompson return CEED_ERROR_SUCCESS; 4560d0321e0SJeremy L Thompson } 4570d0321e0SJeremy L Thompson 4580d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 459a3b195efSJeremy L Thompson // Core logic for array synchronization for GetArray. 4600d0321e0SJeremy L Thompson // If a different memory type is most up to date, this will perform a copy 4610d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 462a3b195efSJeremy L Thompson static int CeedVectorGetArrayCore_Hip(const CeedVector vec, CeedMemType mem_type, CeedScalar **array) { 4630d0321e0SJeremy L Thompson CeedVector_Hip *impl; 464b7453713SJeremy L Thompson 4652b730f8bSJeremy L Thompson CeedCallBackend(CeedVectorGetData(vec, &impl)); 4660d0321e0SJeremy L Thompson 467a3b195efSJeremy L Thompson // Use device memory for unified memory 468a3b195efSJeremy L Thompson mem_type = impl->has_unified_addressing && !impl->h_array_borrowed ? CEED_MEM_DEVICE : mem_type; 469a3b195efSJeremy L Thompson 47043c928f4SJeremy L Thompson // Sync array to requested mem_type 4712b730f8bSJeremy L Thompson CeedCallBackend(CeedVectorSyncArray(vec, mem_type)); 4720d0321e0SJeremy L Thompson 4730d0321e0SJeremy L Thompson // Update pointer 47443c928f4SJeremy L Thompson switch (mem_type) { 4750d0321e0SJeremy L Thompson case CEED_MEM_HOST: 4760d0321e0SJeremy L Thompson *array = impl->h_array; 4770d0321e0SJeremy L Thompson break; 4780d0321e0SJeremy L Thompson case CEED_MEM_DEVICE: 4790d0321e0SJeremy L Thompson *array = impl->d_array; 4800d0321e0SJeremy L Thompson break; 4810d0321e0SJeremy L Thompson } 4820d0321e0SJeremy L Thompson return CEED_ERROR_SUCCESS; 4830d0321e0SJeremy L Thompson } 4840d0321e0SJeremy L Thompson 4850d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 48643c928f4SJeremy L Thompson // Get read-only access to a vector via the specified mem_type 4870d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 4882b730f8bSJeremy L Thompson static int CeedVectorGetArrayRead_Hip(const CeedVector vec, const CeedMemType mem_type, const CeedScalar **array) { 48943c928f4SJeremy L Thompson return CeedVectorGetArrayCore_Hip(vec, mem_type, (CeedScalar **)array); 4900d0321e0SJeremy L Thompson } 4910d0321e0SJeremy L Thompson 4920d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 49343c928f4SJeremy L Thompson // Get read/write access to a vector via the specified mem_type 4940d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 495a3b195efSJeremy L Thompson static int CeedVectorGetArray_Hip(const CeedVector vec, CeedMemType mem_type, CeedScalar **array) { 4960d0321e0SJeremy L Thompson CeedVector_Hip *impl; 497b7453713SJeremy L Thompson 4982b730f8bSJeremy L Thompson CeedCallBackend(CeedVectorGetData(vec, &impl)); 499a3b195efSJeremy L Thompson 500a3b195efSJeremy L Thompson // Use device memory for unified memory 501a3b195efSJeremy L Thompson mem_type = impl->has_unified_addressing && !impl->h_array_borrowed ? CEED_MEM_DEVICE : mem_type; 502a3b195efSJeremy L Thompson 503a3b195efSJeremy L Thompson // 'Get' array and set only 'get'ed array as valid 5042b730f8bSJeremy L Thompson CeedCallBackend(CeedVectorGetArrayCore_Hip(vec, mem_type, array)); 5052b730f8bSJeremy L Thompson CeedCallBackend(CeedVectorSetAllInvalid_Hip(vec)); 50643c928f4SJeremy L Thompson switch (mem_type) { 5070d0321e0SJeremy L Thompson case CEED_MEM_HOST: 5080d0321e0SJeremy L Thompson impl->h_array = *array; 509a3b195efSJeremy L Thompson if (impl->has_unified_addressing) impl->d_array = *array; 5100d0321e0SJeremy L Thompson break; 5110d0321e0SJeremy L Thompson case CEED_MEM_DEVICE: 5120d0321e0SJeremy L Thompson impl->d_array = *array; 5130d0321e0SJeremy L Thompson break; 5140d0321e0SJeremy L Thompson } 5150d0321e0SJeremy L Thompson return CEED_ERROR_SUCCESS; 5160d0321e0SJeremy L Thompson } 5170d0321e0SJeremy L Thompson 5180d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 51943c928f4SJeremy L Thompson // Get write access to a vector via the specified mem_type 5200d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 521a3b195efSJeremy L Thompson static int CeedVectorGetArrayWrite_Hip(const CeedVector vec, CeedMemType mem_type, CeedScalar **array) { 5220d0321e0SJeremy L Thompson bool has_array_of_type = true; 523b7453713SJeremy L Thompson CeedVector_Hip *impl; 524a3b195efSJeremy L Thompson Ceed_Hip *hip_data; 525b7453713SJeremy L Thompson 526b7453713SJeremy L Thompson CeedCallBackend(CeedVectorGetData(vec, &impl)); 527a3b195efSJeremy L Thompson CeedCallBackend(CeedGetData(CeedVectorReturnCeed(vec), &hip_data)); 528a3b195efSJeremy L Thompson 529a3b195efSJeremy L Thompson // Use device memory for unified memory 530a3b195efSJeremy L Thompson mem_type = impl->has_unified_addressing && !impl->h_array_borrowed ? CEED_MEM_DEVICE : mem_type; 531a3b195efSJeremy L Thompson 5322b730f8bSJeremy L Thompson CeedCallBackend(CeedVectorHasArrayOfType_Hip(vec, mem_type, &has_array_of_type)); 5330d0321e0SJeremy L Thompson if (!has_array_of_type) { 5340d0321e0SJeremy L Thompson // Allocate if array is not yet allocated 5352b730f8bSJeremy L Thompson CeedCallBackend(CeedVectorSetArray(vec, mem_type, CEED_COPY_VALUES, NULL)); 5360d0321e0SJeremy L Thompson } else { 5370d0321e0SJeremy L Thompson // Select dirty array 53843c928f4SJeremy L Thompson switch (mem_type) { 5390d0321e0SJeremy L Thompson case CEED_MEM_HOST: 5402b730f8bSJeremy L Thompson if (impl->h_array_borrowed) impl->h_array = impl->h_array_borrowed; 5412b730f8bSJeremy L Thompson else impl->h_array = impl->h_array_owned; 5420d0321e0SJeremy L Thompson break; 5430d0321e0SJeremy L Thompson case CEED_MEM_DEVICE: 5442b730f8bSJeremy L Thompson if (impl->d_array_borrowed) impl->d_array = impl->d_array_borrowed; 5452b730f8bSJeremy L Thompson else impl->d_array = impl->d_array_owned; 5460d0321e0SJeremy L Thompson } 5470d0321e0SJeremy L Thompson } 54843c928f4SJeremy L Thompson return CeedVectorGetArray_Hip(vec, mem_type, array); 5490d0321e0SJeremy L Thompson } 5500d0321e0SJeremy L Thompson 5510d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 5520d0321e0SJeremy L Thompson // Get the norm of a CeedVector 5530d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 5542b730f8bSJeremy L Thompson static int CeedVectorNorm_Hip(CeedVector vec, CeedNormType type, CeedScalar *norm) { 5550d0321e0SJeremy L Thompson Ceed ceed; 556e84c3ebcSJeremy L Thompson CeedSize length; 557e84c3ebcSJeremy L Thompson #if (HIP_VERSION < 60000000) 558e84c3ebcSJeremy L Thompson CeedSize num_calls; 559e84c3ebcSJeremy L Thompson #endif /* HIP_VERSION */ 560b7453713SJeremy L Thompson const CeedScalar *d_array; 561b7453713SJeremy L Thompson CeedVector_Hip *impl; 5620d0321e0SJeremy L Thompson hipblasHandle_t handle; 563*0002d81dSZach Atkins hipStream_t stream; 564a3b195efSJeremy L Thompson Ceed_Hip *hip_data; 565b7453713SJeremy L Thompson 566b7453713SJeremy L Thompson CeedCallBackend(CeedVectorGetCeed(vec, &ceed)); 567a3b195efSJeremy L Thompson CeedCallBackend(CeedGetData(ceed, &hip_data)); 568b7453713SJeremy L Thompson CeedCallBackend(CeedVectorGetData(vec, &impl)); 569b7453713SJeremy L Thompson CeedCallBackend(CeedVectorGetLength(vec, &length)); 570eb7e6cafSJeremy L Thompson CeedCallBackend(CeedGetHipblasHandle_Hip(ceed, &handle)); 571*0002d81dSZach Atkins CeedCallHipblas(ceed, hipblasGetStream(handle, &stream)); 572e84c3ebcSJeremy L Thompson #if (HIP_VERSION < 60000000) 573e84c3ebcSJeremy L Thompson // With ROCm 6, we can use the 64-bit integer interface. Prior to that, 574e84c3ebcSJeremy L Thompson // we need to check if the vector is too long to handle with int32, 575e84c3ebcSJeremy L Thompson // and if so, divide it into subsections for repeated hipBLAS calls. 576672b0f2aSSebastian Grimberg num_calls = length / INT_MAX; 5779330daecSnbeams if (length % INT_MAX > 0) num_calls += 1; 578e84c3ebcSJeremy L Thompson #endif /* HIP_VERSION */ 5799330daecSnbeams 5800d0321e0SJeremy L Thompson // Compute norm 5812b730f8bSJeremy L Thompson CeedCallBackend(CeedVectorGetArrayRead(vec, CEED_MEM_DEVICE, &d_array)); 5820d0321e0SJeremy L Thompson switch (type) { 5830d0321e0SJeremy L Thompson case CEED_NORM_1: { 584f6f49adbSnbeams *norm = 0.0; 585e84c3ebcSJeremy L Thompson #if defined(CEED_SCALAR_IS_FP32) 586e84c3ebcSJeremy L Thompson #if (HIP_VERSION >= 60000000) // We have ROCm 6, and can use 64-bit integers 587e84c3ebcSJeremy L Thompson CeedCallHipblas(ceed, hipblasSasum_64(handle, (int64_t)length, (float *)d_array, 1, (float *)norm)); 588*0002d81dSZach Atkins CeedCallHip(ceed, hipStreamSynchronize(stream)); 589e84c3ebcSJeremy L Thompson #else /* HIP_VERSION */ 5909330daecSnbeams float sub_norm = 0.0; 5919330daecSnbeams float *d_array_start; 592b7453713SJeremy L Thompson 5939330daecSnbeams for (CeedInt i = 0; i < num_calls; i++) { 5949330daecSnbeams d_array_start = (float *)d_array + (CeedSize)(i)*INT_MAX; 5959330daecSnbeams CeedSize remaining_length = length - (CeedSize)(i)*INT_MAX; 5969330daecSnbeams CeedInt sub_length = (i == num_calls - 1) ? (CeedInt)(remaining_length) : INT_MAX; 597b7453713SJeremy L Thompson 598a3b195efSJeremy L Thompson CeedCallHipblas(ceed, hipblasSasum(handle, (CeedInt)sub_length, (float *)d_array_start, 1, &sub_norm)); 599*0002d81dSZach Atkins CeedCallHip(ceed, hipStreamSynchronize(stream)); 6009330daecSnbeams *norm += sub_norm; 6019330daecSnbeams } 602e84c3ebcSJeremy L Thompson #endif /* HIP_VERSION */ 603e84c3ebcSJeremy L Thompson #else /* CEED_SCALAR */ 604e84c3ebcSJeremy L Thompson #if (HIP_VERSION >= 60000000) 605e84c3ebcSJeremy L Thompson CeedCallHipblas(ceed, hipblasDasum_64(handle, (int64_t)length, (double *)d_array, 1, (double *)norm)); 606*0002d81dSZach Atkins CeedCallHip(ceed, hipStreamSynchronize(stream)); 607e84c3ebcSJeremy L Thompson #else /* HIP_VERSION */ 6089330daecSnbeams double sub_norm = 0.0; 6099330daecSnbeams double *d_array_start; 610b7453713SJeremy L Thompson 6119330daecSnbeams for (CeedInt i = 0; i < num_calls; i++) { 6129330daecSnbeams d_array_start = (double *)d_array + (CeedSize)(i)*INT_MAX; 6139330daecSnbeams CeedSize remaining_length = length - (CeedSize)(i)*INT_MAX; 6149330daecSnbeams CeedInt sub_length = (i == num_calls - 1) ? (CeedInt)(remaining_length) : INT_MAX; 615b7453713SJeremy L Thompson 6169330daecSnbeams CeedCallHipblas(ceed, hipblasDasum(handle, (CeedInt)sub_length, (double *)d_array_start, 1, &sub_norm)); 617*0002d81dSZach Atkins CeedCallHip(ceed, hipStreamSynchronize(stream)); 6189330daecSnbeams *norm += sub_norm; 6199330daecSnbeams } 620e84c3ebcSJeremy L Thompson #endif /* HIP_VERSION */ 621e84c3ebcSJeremy L Thompson #endif /* CEED_SCALAR */ 6220d0321e0SJeremy L Thompson break; 6230d0321e0SJeremy L Thompson } 6240d0321e0SJeremy L Thompson case CEED_NORM_2: { 625e84c3ebcSJeremy L Thompson #if defined(CEED_SCALAR_IS_FP32) 626e84c3ebcSJeremy L Thompson #if (HIP_VERSION >= 60000000) 627e84c3ebcSJeremy L Thompson CeedCallHipblas(ceed, hipblasSnrm2_64(handle, (int64_t)length, (float *)d_array, 1, (float *)norm)); 628*0002d81dSZach Atkins CeedCallHip(ceed, hipStreamSynchronize(stream)); 629a3b195efSJeremy L Thompson #else /* HIP_VERSION */ 6309330daecSnbeams float sub_norm = 0.0, norm_sum = 0.0; 6319330daecSnbeams float *d_array_start; 632b7453713SJeremy L Thompson 6339330daecSnbeams for (CeedInt i = 0; i < num_calls; i++) { 6349330daecSnbeams d_array_start = (float *)d_array + (CeedSize)(i)*INT_MAX; 6359330daecSnbeams CeedSize remaining_length = length - (CeedSize)(i)*INT_MAX; 6369330daecSnbeams CeedInt sub_length = (i == num_calls - 1) ? (CeedInt)(remaining_length) : INT_MAX; 637b7453713SJeremy L Thompson 6389330daecSnbeams CeedCallHipblas(ceed, hipblasSnrm2(handle, (CeedInt)sub_length, (float *)d_array_start, 1, &sub_norm)); 639*0002d81dSZach Atkins CeedCallHip(ceed, hipStreamSynchronize(stream)); 6409330daecSnbeams norm_sum += sub_norm * sub_norm; 6419330daecSnbeams } 6429330daecSnbeams *norm = sqrt(norm_sum); 643e84c3ebcSJeremy L Thompson #endif /* HIP_VERSION */ 644e84c3ebcSJeremy L Thompson #else /* CEED_SCALAR */ 645e84c3ebcSJeremy L Thompson #if (HIP_VERSION >= 60000000) 646e84c3ebcSJeremy L Thompson CeedCallHipblas(ceed, hipblasDnrm2_64(handle, (int64_t)length, (double *)d_array, 1, (double *)norm)); 647*0002d81dSZach Atkins CeedCallHip(ceed, hipStreamSynchronize(stream)); 648a3b195efSJeremy L Thompson #else /* HIP_VERSION */ 6499330daecSnbeams double sub_norm = 0.0, norm_sum = 0.0; 6509330daecSnbeams double *d_array_start; 651b7453713SJeremy L Thompson 6529330daecSnbeams for (CeedInt i = 0; i < num_calls; i++) { 6539330daecSnbeams d_array_start = (double *)d_array + (CeedSize)(i)*INT_MAX; 6549330daecSnbeams CeedSize remaining_length = length - (CeedSize)(i)*INT_MAX; 6559330daecSnbeams CeedInt sub_length = (i == num_calls - 1) ? (CeedInt)(remaining_length) : INT_MAX; 656b7453713SJeremy L Thompson 6579330daecSnbeams CeedCallHipblas(ceed, hipblasDnrm2(handle, (CeedInt)sub_length, (double *)d_array_start, 1, &sub_norm)); 658*0002d81dSZach Atkins CeedCallHip(ceed, hipStreamSynchronize(stream)); 6599330daecSnbeams norm_sum += sub_norm * sub_norm; 6609330daecSnbeams } 6619330daecSnbeams *norm = sqrt(norm_sum); 662e84c3ebcSJeremy L Thompson #endif /* HIP_VERSION */ 663e84c3ebcSJeremy L Thompson #endif /* CEED_SCALAR */ 6640d0321e0SJeremy L Thompson break; 6650d0321e0SJeremy L Thompson } 6660d0321e0SJeremy L Thompson case CEED_NORM_MAX: { 667e84c3ebcSJeremy L Thompson #if defined(CEED_SCALAR_IS_FP32) 668e84c3ebcSJeremy L Thompson #if (HIP_VERSION >= 60000000) 669e84c3ebcSJeremy L Thompson int64_t index; 670e84c3ebcSJeremy L Thompson CeedScalar norm_no_abs; 671b7453713SJeremy L Thompson 672e84c3ebcSJeremy L Thompson CeedCallHipblas(ceed, hipblasIsamax_64(handle, (int64_t)length, (float *)d_array, 1, &index)); 673*0002d81dSZach Atkins CeedCallHip(ceed, hipMemcpyAsync(&norm_no_abs, impl->d_array + index - 1, sizeof(CeedScalar), hipMemcpyDeviceToHost, stream)); 674*0002d81dSZach Atkins CeedCallHip(ceed, hipStreamSynchronize(stream)); 675e84c3ebcSJeremy L Thompson *norm = fabs(norm_no_abs); 676e84c3ebcSJeremy L Thompson #else /* HIP_VERSION */ 677e84c3ebcSJeremy L Thompson CeedInt index; 6789330daecSnbeams float sub_max = 0.0, current_max = 0.0; 6799330daecSnbeams float *d_array_start; 680e84c3ebcSJeremy L Thompson 6819330daecSnbeams for (CeedInt i = 0; i < num_calls; i++) { 6829330daecSnbeams d_array_start = (float *)d_array + (CeedSize)(i)*INT_MAX; 6839330daecSnbeams CeedSize remaining_length = length - (CeedSize)(i)*INT_MAX; 6849330daecSnbeams CeedInt sub_length = (i == num_calls - 1) ? (CeedInt)(remaining_length) : INT_MAX; 685b7453713SJeremy L Thompson 686b7453713SJeremy L Thompson CeedCallHipblas(ceed, hipblasIsamax(handle, (CeedInt)sub_length, (float *)d_array_start, 1, &index)); 687a3b195efSJeremy L Thompson if (hip_data->has_unified_addressing) { 688*0002d81dSZach Atkins CeedCallHip(ceed, hipStreamSynchronize(stream)); 689a3b195efSJeremy L Thompson sub_max = fabs(d_array[index - 1]); 690a3b195efSJeremy L Thompson } else { 691*0002d81dSZach Atkins CeedCallHip(ceed, hipMemcpyAsync(&sub_max, d_array_start + index - 1, sizeof(CeedScalar), hipMemcpyDeviceToHost, stream)); 692*0002d81dSZach Atkins CeedCallHip(ceed, hipStreamSynchronize(stream)); 693a3b195efSJeremy L Thompson } 6949330daecSnbeams if (fabs(sub_max) > current_max) current_max = fabs(sub_max); 6959330daecSnbeams } 6969330daecSnbeams *norm = current_max; 697e84c3ebcSJeremy L Thompson #endif /* HIP_VERSION */ 698e84c3ebcSJeremy L Thompson #else /* CEED_SCALAR */ 699e84c3ebcSJeremy L Thompson #if (HIP_VERSION >= 60000000) 700e84c3ebcSJeremy L Thompson int64_t index; 701e84c3ebcSJeremy L Thompson CeedScalar norm_no_abs; 702e84c3ebcSJeremy L Thompson 703e84c3ebcSJeremy L Thompson CeedCallHipblas(ceed, hipblasIdamax_64(handle, (int64_t)length, (double *)d_array, 1, &index)); 704a3b195efSJeremy L Thompson if (hip_data->has_unified_addressing) { 705*0002d81dSZach Atkins CeedCallHip(ceed, hipStreamSynchronize(stream)); 706a3b195efSJeremy L Thompson norm_no_abs = fabs(d_array[index - 1]); 707a3b195efSJeremy L Thompson } else { 708*0002d81dSZach Atkins CeedCallHip(ceed, hipMemcpyAsync(&norm_no_abs, impl->d_array + index - 1, sizeof(CeedScalar), hipMemcpyDeviceToHost, stream)); 709*0002d81dSZach Atkins CeedCallHip(ceed, hipStreamSynchronize(stream)); 710a3b195efSJeremy L Thompson } 711e84c3ebcSJeremy L Thompson *norm = fabs(norm_no_abs); 712e84c3ebcSJeremy L Thompson #else /* HIP_VERSION */ 713e84c3ebcSJeremy L Thompson CeedInt index; 7149330daecSnbeams double sub_max = 0.0, current_max = 0.0; 7159330daecSnbeams double *d_array_start; 716b7453713SJeremy L Thompson 7179330daecSnbeams for (CeedInt i = 0; i < num_calls; i++) { 7189330daecSnbeams d_array_start = (double *)d_array + (CeedSize)(i)*INT_MAX; 7199330daecSnbeams CeedSize remaining_length = length - (CeedSize)(i)*INT_MAX; 7209330daecSnbeams CeedInt sub_length = (i == num_calls - 1) ? (CeedInt)(remaining_length) : INT_MAX; 721b7453713SJeremy L Thompson 722b7453713SJeremy L Thompson CeedCallHipblas(ceed, hipblasIdamax(handle, (CeedInt)sub_length, (double *)d_array_start, 1, &index)); 723a3b195efSJeremy L Thompson if (hip_data->has_unified_addressing) { 724*0002d81dSZach Atkins CeedCallHip(ceed, hipStreamSynchronize(stream)); 725a3b195efSJeremy L Thompson sub_max = fabs(d_array[index - 1]); 726a3b195efSJeremy L Thompson } else { 727*0002d81dSZach Atkins CeedCallHip(ceed, hipMemcpyAsync(&sub_max, d_array_start + index - 1, sizeof(CeedScalar), hipMemcpyDeviceToHost, stream)); 728*0002d81dSZach Atkins CeedCallHip(ceed, hipStreamSynchronize(stream)); 729a3b195efSJeremy L Thompson } 7309330daecSnbeams if (fabs(sub_max) > current_max) current_max = fabs(sub_max); 7319330daecSnbeams } 7329330daecSnbeams *norm = current_max; 733e84c3ebcSJeremy L Thompson #endif /* HIP_VERSION */ 734e84c3ebcSJeremy L Thompson #endif /* CEED_SCALAR */ 7350d0321e0SJeremy L Thompson break; 7360d0321e0SJeremy L Thompson } 7370d0321e0SJeremy L Thompson } 7382b730f8bSJeremy L Thompson CeedCallBackend(CeedVectorRestoreArrayRead(vec, &d_array)); 7399bc66399SJeremy L Thompson CeedCallBackend(CeedDestroy(&ceed)); 7400d0321e0SJeremy L Thompson return CEED_ERROR_SUCCESS; 7410d0321e0SJeremy L Thompson } 7420d0321e0SJeremy L Thompson 7430d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 7440d0321e0SJeremy L Thompson // Take reciprocal of a vector on host 7450d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 7469330daecSnbeams static int CeedHostReciprocal_Hip(CeedScalar *h_array, CeedSize length) { 7479330daecSnbeams for (CeedSize i = 0; i < length; i++) { 7482b730f8bSJeremy L Thompson if (fabs(h_array[i]) > CEED_EPSILON) h_array[i] = 1. / h_array[i]; 7492b730f8bSJeremy L Thompson } 7500d0321e0SJeremy L Thompson return CEED_ERROR_SUCCESS; 7510d0321e0SJeremy L Thompson } 7520d0321e0SJeremy L Thompson 7530d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 754956a3dbaSJeremy L Thompson // Take reciprocal of a vector on device (impl in .hip.cpp file) 7550d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 7569330daecSnbeams int CeedDeviceReciprocal_Hip(CeedScalar *d_array, CeedSize length); 7570d0321e0SJeremy L Thompson 7580d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 7590d0321e0SJeremy L Thompson // Take reciprocal of a vector 7600d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 7610d0321e0SJeremy L Thompson static int CeedVectorReciprocal_Hip(CeedVector vec) { 7621f9221feSJeremy L Thompson CeedSize length; 763b7453713SJeremy L Thompson CeedVector_Hip *impl; 7640d0321e0SJeremy L Thompson 765b7453713SJeremy L Thompson CeedCallBackend(CeedVectorGetData(vec, &impl)); 766b7453713SJeremy L Thompson CeedCallBackend(CeedVectorGetLength(vec, &length)); 7670d0321e0SJeremy L Thompson // Set value for synced device/host array 7682b730f8bSJeremy L Thompson if (impl->d_array) CeedCallBackend(CeedDeviceReciprocal_Hip(impl->d_array, length)); 7692b730f8bSJeremy L Thompson if (impl->h_array) CeedCallBackend(CeedHostReciprocal_Hip(impl->h_array, length)); 7700d0321e0SJeremy L Thompson return CEED_ERROR_SUCCESS; 7710d0321e0SJeremy L Thompson } 7720d0321e0SJeremy L Thompson 7730d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 7740d0321e0SJeremy L Thompson // Compute x = alpha x on the host 7750d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 7769330daecSnbeams static int CeedHostScale_Hip(CeedScalar *x_array, CeedScalar alpha, CeedSize length) { 7779330daecSnbeams for (CeedSize i = 0; i < length; i++) x_array[i] *= alpha; 7780d0321e0SJeremy L Thompson return CEED_ERROR_SUCCESS; 7790d0321e0SJeremy L Thompson } 7800d0321e0SJeremy L Thompson 7810d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 782956a3dbaSJeremy L Thompson // Compute x = alpha x on device (impl in .hip.cpp file) 7830d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 7849330daecSnbeams int CeedDeviceScale_Hip(CeedScalar *x_array, CeedScalar alpha, CeedSize length); 7850d0321e0SJeremy L Thompson 7860d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 7870d0321e0SJeremy L Thompson // Compute x = alpha x 7880d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 7890d0321e0SJeremy L Thompson static int CeedVectorScale_Hip(CeedVector x, CeedScalar alpha) { 7901f9221feSJeremy L Thompson CeedSize length; 791e84c3ebcSJeremy L Thompson CeedVector_Hip *impl; 7920d0321e0SJeremy L Thompson 793e84c3ebcSJeremy L Thompson CeedCallBackend(CeedVectorGetData(x, &impl)); 794b7453713SJeremy L Thompson CeedCallBackend(CeedVectorGetLength(x, &length)); 7950d0321e0SJeremy L Thompson // Set value for synced device/host array 796e84c3ebcSJeremy L Thompson if (impl->d_array) { 797e84c3ebcSJeremy L Thompson #if (HIP_VERSION >= 60000000) 798e84c3ebcSJeremy L Thompson hipblasHandle_t handle; 799*0002d81dSZach Atkins hipStream_t stream; 800e84c3ebcSJeremy L Thompson 801e84c3ebcSJeremy L Thompson CeedCallBackend(CeedGetHipblasHandle_Hip(CeedVectorReturnCeed(x), &handle)); 802*0002d81dSZach Atkins CeedCallHipblas(CeedVectorReturnCeed(x), hipblasGetStream(handle, &stream)); 803e84c3ebcSJeremy L Thompson #if defined(CEED_SCALAR_IS_FP32) 804e84c3ebcSJeremy L Thompson CeedCallHipblas(CeedVectorReturnCeed(x), hipblasSscal_64(handle, (int64_t)length, &alpha, impl->d_array, 1)); 805e84c3ebcSJeremy L Thompson #else /* CEED_SCALAR */ 806e84c3ebcSJeremy L Thompson CeedCallHipblas(CeedVectorReturnCeed(x), hipblasDscal_64(handle, (int64_t)length, &alpha, impl->d_array, 1)); 807e84c3ebcSJeremy L Thompson #endif /* CEED_SCALAR */ 808*0002d81dSZach Atkins CeedCallHip(CeedVectorReturnCeed(x), hipStreamSynchronize(stream)); 809e84c3ebcSJeremy L Thompson #else /* HIP_VERSION */ 810e84c3ebcSJeremy L Thompson CeedCallBackend(CeedDeviceScale_Hip(impl->d_array, alpha, length)); 811e84c3ebcSJeremy L Thompson #endif /* HIP_VERSION */ 812e84c3ebcSJeremy L Thompson impl->h_array = NULL; 813e84c3ebcSJeremy L Thompson } 814e84c3ebcSJeremy L Thompson if (impl->h_array) { 815e84c3ebcSJeremy L Thompson CeedCallBackend(CeedHostScale_Hip(impl->h_array, alpha, length)); 816e84c3ebcSJeremy L Thompson impl->d_array = NULL; 817e84c3ebcSJeremy L Thompson } 8180d0321e0SJeremy L Thompson return CEED_ERROR_SUCCESS; 8190d0321e0SJeremy L Thompson } 8200d0321e0SJeremy L Thompson 8210d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 8220d0321e0SJeremy L Thompson // Compute y = alpha x + y on the host 8230d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 8249330daecSnbeams static int CeedHostAXPY_Hip(CeedScalar *y_array, CeedScalar alpha, CeedScalar *x_array, CeedSize length) { 8259330daecSnbeams for (CeedSize i = 0; i < length; i++) y_array[i] += alpha * x_array[i]; 8260d0321e0SJeremy L Thompson return CEED_ERROR_SUCCESS; 8270d0321e0SJeremy L Thompson } 8280d0321e0SJeremy L Thompson 8290d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 830956a3dbaSJeremy L Thompson // Compute y = alpha x + y on device (impl in .hip.cpp file) 8310d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 8329330daecSnbeams int CeedDeviceAXPY_Hip(CeedScalar *y_array, CeedScalar alpha, CeedScalar *x_array, CeedSize length); 8330d0321e0SJeremy L Thompson 8340d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 8350d0321e0SJeremy L Thompson // Compute y = alpha x + y 8360d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 8370d0321e0SJeremy L Thompson static int CeedVectorAXPY_Hip(CeedVector y, CeedScalar alpha, CeedVector x) { 838b7453713SJeremy L Thompson CeedSize length; 8390d0321e0SJeremy L Thompson CeedVector_Hip *y_impl, *x_impl; 840b7453713SJeremy L Thompson 8412b730f8bSJeremy L Thompson CeedCallBackend(CeedVectorGetData(y, &y_impl)); 8422b730f8bSJeremy L Thompson CeedCallBackend(CeedVectorGetData(x, &x_impl)); 8432b730f8bSJeremy L Thompson CeedCallBackend(CeedVectorGetLength(y, &length)); 8440d0321e0SJeremy L Thompson // Set value for synced device/host array 8450d0321e0SJeremy L Thompson if (y_impl->d_array) { 8462b730f8bSJeremy L Thompson CeedCallBackend(CeedVectorSyncArray(x, CEED_MEM_DEVICE)); 847e84c3ebcSJeremy L Thompson #if (HIP_VERSION >= 60000000) 848e84c3ebcSJeremy L Thompson hipblasHandle_t handle; 849*0002d81dSZach Atkins hipStream_t stream; 850e84c3ebcSJeremy L Thompson 851*0002d81dSZach Atkins CeedCallBackend(CeedGetHipblasHandle_Hip(CeedVectorReturnCeed(x), &handle)); 852*0002d81dSZach Atkins CeedCallHipblas(CeedVectorReturnCeed(y), hipblasGetStream(handle, &stream)); 853e84c3ebcSJeremy L Thompson #if defined(CEED_SCALAR_IS_FP32) 854e84c3ebcSJeremy L Thompson CeedCallHipblas(CeedVectorReturnCeed(y), hipblasSaxpy_64(handle, (int64_t)length, &alpha, x_impl->d_array, 1, y_impl->d_array, 1)); 855e84c3ebcSJeremy L Thompson #else /* CEED_SCALAR */ 856e84c3ebcSJeremy L Thompson CeedCallHipblas(CeedVectorReturnCeed(y), hipblasDaxpy_64(handle, (int64_t)length, &alpha, x_impl->d_array, 1, y_impl->d_array, 1)); 857e84c3ebcSJeremy L Thompson #endif /* CEED_SCALAR */ 858*0002d81dSZach Atkins CeedCallHip(CeedVectorReturnCeed(y), hipStreamSynchronize(stream)); 859e84c3ebcSJeremy L Thompson #else /* HIP_VERSION */ 8602b730f8bSJeremy L Thompson CeedCallBackend(CeedDeviceAXPY_Hip(y_impl->d_array, alpha, x_impl->d_array, length)); 861e84c3ebcSJeremy L Thompson #endif /* HIP_VERSION */ 862e84c3ebcSJeremy L Thompson y_impl->h_array = NULL; 863e84c3ebcSJeremy L Thompson } else if (y_impl->h_array) { 8642b730f8bSJeremy L Thompson CeedCallBackend(CeedVectorSyncArray(x, CEED_MEM_HOST)); 8652b730f8bSJeremy L Thompson CeedCallBackend(CeedHostAXPY_Hip(y_impl->h_array, alpha, x_impl->h_array, length)); 866e84c3ebcSJeremy L Thompson y_impl->d_array = NULL; 8670d0321e0SJeremy L Thompson } 8680d0321e0SJeremy L Thompson return CEED_ERROR_SUCCESS; 8690d0321e0SJeremy L Thompson } 870ff1e7120SSebastian Grimberg 8715fb68f37SKaren (Ren) Stengel //------------------------------------------------------------------------------ 8725fb68f37SKaren (Ren) Stengel // Compute y = alpha x + beta y on the host 8735fb68f37SKaren (Ren) Stengel //------------------------------------------------------------------------------ 8749330daecSnbeams static int CeedHostAXPBY_Hip(CeedScalar *y_array, CeedScalar alpha, CeedScalar beta, CeedScalar *x_array, CeedSize length) { 875aa67b842SZach Atkins for (CeedSize i = 0; i < length; i++) y_array[i] = alpha * x_array[i] + beta * y_array[i]; 8765fb68f37SKaren (Ren) Stengel return CEED_ERROR_SUCCESS; 8775fb68f37SKaren (Ren) Stengel } 8785fb68f37SKaren (Ren) Stengel 8795fb68f37SKaren (Ren) Stengel //------------------------------------------------------------------------------ 880956a3dbaSJeremy L Thompson // Compute y = alpha x + beta y on device (impl in .hip.cpp file) 8815fb68f37SKaren (Ren) Stengel //------------------------------------------------------------------------------ 8829330daecSnbeams int CeedDeviceAXPBY_Hip(CeedScalar *y_array, CeedScalar alpha, CeedScalar beta, CeedScalar *x_array, CeedSize length); 8835fb68f37SKaren (Ren) Stengel 8845fb68f37SKaren (Ren) Stengel //------------------------------------------------------------------------------ 8855fb68f37SKaren (Ren) Stengel // Compute y = alpha x + beta y 8865fb68f37SKaren (Ren) Stengel //------------------------------------------------------------------------------ 8875fb68f37SKaren (Ren) Stengel static int CeedVectorAXPBY_Hip(CeedVector y, CeedScalar alpha, CeedScalar beta, CeedVector x) { 888b7453713SJeremy L Thompson CeedSize length; 8895fb68f37SKaren (Ren) Stengel CeedVector_Hip *y_impl, *x_impl; 890b7453713SJeremy L Thompson 8915fb68f37SKaren (Ren) Stengel CeedCallBackend(CeedVectorGetData(y, &y_impl)); 8925fb68f37SKaren (Ren) Stengel CeedCallBackend(CeedVectorGetData(x, &x_impl)); 8935fb68f37SKaren (Ren) Stengel CeedCallBackend(CeedVectorGetLength(y, &length)); 8945fb68f37SKaren (Ren) Stengel // Set value for synced device/host array 8955fb68f37SKaren (Ren) Stengel if (y_impl->d_array) { 8965fb68f37SKaren (Ren) Stengel CeedCallBackend(CeedVectorSyncArray(x, CEED_MEM_DEVICE)); 8975fb68f37SKaren (Ren) Stengel CeedCallBackend(CeedDeviceAXPBY_Hip(y_impl->d_array, alpha, beta, x_impl->d_array, length)); 8985fb68f37SKaren (Ren) Stengel } 8995fb68f37SKaren (Ren) Stengel if (y_impl->h_array) { 9005fb68f37SKaren (Ren) Stengel CeedCallBackend(CeedVectorSyncArray(x, CEED_MEM_HOST)); 9015fb68f37SKaren (Ren) Stengel CeedCallBackend(CeedHostAXPBY_Hip(y_impl->h_array, alpha, beta, x_impl->h_array, length)); 9025fb68f37SKaren (Ren) Stengel } 9035fb68f37SKaren (Ren) Stengel return CEED_ERROR_SUCCESS; 9045fb68f37SKaren (Ren) Stengel } 9050d0321e0SJeremy L Thompson 9060d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 9070d0321e0SJeremy L Thompson // Compute the pointwise multiplication w = x .* y on the host 9080d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 9099330daecSnbeams static int CeedHostPointwiseMult_Hip(CeedScalar *w_array, CeedScalar *x_array, CeedScalar *y_array, CeedSize length) { 9109330daecSnbeams for (CeedSize i = 0; i < length; i++) w_array[i] = x_array[i] * y_array[i]; 9110d0321e0SJeremy L Thompson return CEED_ERROR_SUCCESS; 9120d0321e0SJeremy L Thompson } 9130d0321e0SJeremy L Thompson 9140d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 915956a3dbaSJeremy L Thompson // Compute the pointwise multiplication w = x .* y on device (impl in .hip.cpp file) 9160d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 9179330daecSnbeams int CeedDevicePointwiseMult_Hip(CeedScalar *w_array, CeedScalar *x_array, CeedScalar *y_array, CeedSize length); 9180d0321e0SJeremy L Thompson 9190d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 9200d0321e0SJeremy L Thompson // Compute the pointwise multiplication w = x .* y 9210d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 9222b730f8bSJeremy L Thompson static int CeedVectorPointwiseMult_Hip(CeedVector w, CeedVector x, CeedVector y) { 923b7453713SJeremy L Thompson CeedSize length; 9240d0321e0SJeremy L Thompson CeedVector_Hip *w_impl, *x_impl, *y_impl; 925b7453713SJeremy L Thompson 9262b730f8bSJeremy L Thompson CeedCallBackend(CeedVectorGetData(w, &w_impl)); 9272b730f8bSJeremy L Thompson CeedCallBackend(CeedVectorGetData(x, &x_impl)); 9282b730f8bSJeremy L Thompson CeedCallBackend(CeedVectorGetData(y, &y_impl)); 9292b730f8bSJeremy L Thompson CeedCallBackend(CeedVectorGetLength(w, &length)); 9300d0321e0SJeremy L Thompson 9310d0321e0SJeremy L Thompson // Set value for synced device/host array 9320d0321e0SJeremy L Thompson if (!w_impl->d_array && !w_impl->h_array) { 9332b730f8bSJeremy L Thompson CeedCallBackend(CeedVectorSetValue(w, 0.0)); 9340d0321e0SJeremy L Thompson } 9350d0321e0SJeremy L Thompson if (w_impl->d_array) { 9362b730f8bSJeremy L Thompson CeedCallBackend(CeedVectorSyncArray(x, CEED_MEM_DEVICE)); 9372b730f8bSJeremy L Thompson CeedCallBackend(CeedVectorSyncArray(y, CEED_MEM_DEVICE)); 9382b730f8bSJeremy L Thompson CeedCallBackend(CeedDevicePointwiseMult_Hip(w_impl->d_array, x_impl->d_array, y_impl->d_array, length)); 9390d0321e0SJeremy L Thompson } 9400d0321e0SJeremy L Thompson if (w_impl->h_array) { 9412b730f8bSJeremy L Thompson CeedCallBackend(CeedVectorSyncArray(x, CEED_MEM_HOST)); 9422b730f8bSJeremy L Thompson CeedCallBackend(CeedVectorSyncArray(y, CEED_MEM_HOST)); 9432b730f8bSJeremy L Thompson CeedCallBackend(CeedHostPointwiseMult_Hip(w_impl->h_array, x_impl->h_array, y_impl->h_array, length)); 9440d0321e0SJeremy L Thompson } 9450d0321e0SJeremy L Thompson return CEED_ERROR_SUCCESS; 9460d0321e0SJeremy L Thompson } 9470d0321e0SJeremy L Thompson 9480d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 9490d0321e0SJeremy L Thompson // Destroy the vector 9500d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 9510d0321e0SJeremy L Thompson static int CeedVectorDestroy_Hip(const CeedVector vec) { 9520d0321e0SJeremy L Thompson CeedVector_Hip *impl; 9530d0321e0SJeremy L Thompson 954b7453713SJeremy L Thompson CeedCallBackend(CeedVectorGetData(vec, &impl)); 9556e536b99SJeremy L Thompson CeedCallHip(CeedVectorReturnCeed(vec), hipFree(impl->d_array_owned)); 9562b730f8bSJeremy L Thompson CeedCallBackend(CeedFree(&impl->h_array_owned)); 9572b730f8bSJeremy L Thompson CeedCallBackend(CeedFree(&impl)); 9580d0321e0SJeremy L Thompson return CEED_ERROR_SUCCESS; 9590d0321e0SJeremy L Thompson } 9600d0321e0SJeremy L Thompson 9610d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 9620d0321e0SJeremy L Thompson // Create a vector of the specified length (does not allocate memory) 9630d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 9641f9221feSJeremy L Thompson int CeedVectorCreate_Hip(CeedSize n, CeedVector vec) { 9650d0321e0SJeremy L Thompson CeedVector_Hip *impl; 966a3b195efSJeremy L Thompson Ceed_Hip *hip_impl; 9670d0321e0SJeremy L Thompson Ceed ceed; 9680d0321e0SJeremy L Thompson 969b7453713SJeremy L Thompson CeedCallBackend(CeedVectorGetCeed(vec, &ceed)); 9702b730f8bSJeremy L Thompson CeedCallBackend(CeedSetBackendFunction(ceed, "Vector", vec, "HasValidArray", CeedVectorHasValidArray_Hip)); 9712b730f8bSJeremy L Thompson CeedCallBackend(CeedSetBackendFunction(ceed, "Vector", vec, "HasBorrowedArrayOfType", CeedVectorHasBorrowedArrayOfType_Hip)); 9722b730f8bSJeremy L Thompson CeedCallBackend(CeedSetBackendFunction(ceed, "Vector", vec, "SetArray", CeedVectorSetArray_Hip)); 9732b730f8bSJeremy L Thompson CeedCallBackend(CeedSetBackendFunction(ceed, "Vector", vec, "TakeArray", CeedVectorTakeArray_Hip)); 9743e961e14SJeremy L Thompson CeedCallBackend(CeedSetBackendFunction(ceed, "Vector", vec, "CopyStrided", CeedVectorCopyStrided_Hip)); 9753e961e14SJeremy L Thompson CeedCallBackend(CeedSetBackendFunction(ceed, "Vector", vec, "SetValue", CeedVectorSetValue_Hip)); 9763e961e14SJeremy L Thompson CeedCallBackend(CeedSetBackendFunction(ceed, "Vector", vec, "SetValueStrided", CeedVectorSetValueStrided_Hip)); 9772b730f8bSJeremy L Thompson CeedCallBackend(CeedSetBackendFunction(ceed, "Vector", vec, "SyncArray", CeedVectorSyncArray_Hip)); 9782b730f8bSJeremy L Thompson CeedCallBackend(CeedSetBackendFunction(ceed, "Vector", vec, "GetArray", CeedVectorGetArray_Hip)); 9792b730f8bSJeremy L Thompson CeedCallBackend(CeedSetBackendFunction(ceed, "Vector", vec, "GetArrayRead", CeedVectorGetArrayRead_Hip)); 9802b730f8bSJeremy L Thompson CeedCallBackend(CeedSetBackendFunction(ceed, "Vector", vec, "GetArrayWrite", CeedVectorGetArrayWrite_Hip)); 9812b730f8bSJeremy L Thompson CeedCallBackend(CeedSetBackendFunction(ceed, "Vector", vec, "Norm", CeedVectorNorm_Hip)); 9822b730f8bSJeremy L Thompson CeedCallBackend(CeedSetBackendFunction(ceed, "Vector", vec, "Reciprocal", CeedVectorReciprocal_Hip)); 9833e961e14SJeremy L Thompson CeedCallBackend(CeedSetBackendFunction(ceed, "Vector", vec, "Scale", CeedVectorScale_Hip)); 9843e961e14SJeremy L Thompson CeedCallBackend(CeedSetBackendFunction(ceed, "Vector", vec, "AXPY", CeedVectorAXPY_Hip)); 9853e961e14SJeremy L Thompson CeedCallBackend(CeedSetBackendFunction(ceed, "Vector", vec, "AXPBY", CeedVectorAXPBY_Hip)); 9862b730f8bSJeremy L Thompson CeedCallBackend(CeedSetBackendFunction(ceed, "Vector", vec, "PointwiseMult", CeedVectorPointwiseMult_Hip)); 9872b730f8bSJeremy L Thompson CeedCallBackend(CeedSetBackendFunction(ceed, "Vector", vec, "Destroy", CeedVectorDestroy_Hip)); 9882b730f8bSJeremy L Thompson CeedCallBackend(CeedCalloc(1, &impl)); 989a3b195efSJeremy L Thompson CeedCallBackend(CeedGetData(ceed, &hip_impl)); 990a3b195efSJeremy L Thompson CeedCallBackend(CeedDestroy(&ceed)); 991a3b195efSJeremy L Thompson impl->has_unified_addressing = hip_impl->has_unified_addressing; 9922b730f8bSJeremy L Thompson CeedCallBackend(CeedVectorSetData(vec, impl)); 9930d0321e0SJeremy L Thompson return CEED_ERROR_SUCCESS; 9940d0321e0SJeremy L Thompson } 9952a86cc9dSSebastian Grimberg 9962a86cc9dSSebastian Grimberg //------------------------------------------------------------------------------ 997