15aed82e4SJeremy L Thompson // Copyright (c) 2017-2024, Lawrence Livermore National Security, LLC and other CEED contributors. 23d8e8822SJeremy L Thompson // All Rights Reserved. See the top-level LICENSE and NOTICE files for details. 30d0321e0SJeremy L Thompson // 43d8e8822SJeremy L Thompson // SPDX-License-Identifier: BSD-2-Clause 50d0321e0SJeremy L Thompson // 63d8e8822SJeremy L Thompson // This file is part of CEED: http://github.com/ceed 70d0321e0SJeremy L Thompson 849aac155SJeremy L Thompson #include <ceed.h> 90d0321e0SJeremy L Thompson #include <ceed/backend.h> 10437930d1SJeremy L Thompson #include <ceed/jit-tools.h> 11111870feSJeremy L Thompson #include <string.h> 120d0321e0SJeremy L Thompson #include <hip/hip_runtime.h> 132b730f8bSJeremy L Thompson 1449aac155SJeremy L Thompson #include "../hip/ceed-hip-common.h" 150d0321e0SJeremy L Thompson #include "../hip/ceed-hip-compile.h" 162b730f8bSJeremy L Thompson #include "ceed-hip-ref.h" 170d0321e0SJeremy L Thompson 180d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 190d0321e0SJeremy L Thompson // Basis apply - tensor 200d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 21db2becc9SJeremy L Thompson static int CeedBasisApplyCore_Hip(CeedBasis basis, bool apply_add, const CeedInt num_elem, CeedTransposeMode t_mode, CeedEvalMode eval_mode, 22db2becc9SJeremy L Thompson CeedVector u, CeedVector v) { 230d0321e0SJeremy L Thompson Ceed ceed; 24b7453713SJeremy L Thompson CeedInt Q_1d, dim; 257bbbfca3SJeremy L Thompson const CeedInt is_transpose = t_mode == CEED_TRANSPOSE; 26437930d1SJeremy L Thompson const int max_block_size = 64; 270d0321e0SJeremy L Thompson const CeedScalar *d_u; 280d0321e0SJeremy L Thompson CeedScalar *d_v; 29b7453713SJeremy L Thompson CeedBasis_Hip *data; 30b7453713SJeremy L Thompson 31b7453713SJeremy L Thompson CeedCallBackend(CeedBasisGetCeed(basis, &ceed)); 32b7453713SJeremy L Thompson CeedCallBackend(CeedBasisGetData(basis, &data)); 33b7453713SJeremy L Thompson 349ea2cfd9SJeremy L Thompson // Get read/write access to u, v 356574a04fSJeremy L Thompson if (u != CEED_VECTOR_NONE) CeedCallBackend(CeedVectorGetArrayRead(u, CEED_MEM_DEVICE, &d_u)); 366574a04fSJeremy L Thompson else CeedCheck(eval_mode == CEED_EVAL_WEIGHT, ceed, CEED_ERROR_BACKEND, "An input vector is required for this CeedEvalMode"); 37db2becc9SJeremy L Thompson if (apply_add) CeedCallBackend(CeedVectorGetArray(v, CEED_MEM_DEVICE, &d_v)); 38db2becc9SJeremy L Thompson else CeedCallBackend(CeedVectorGetArrayWrite(v, CEED_MEM_DEVICE, &d_v)); 390d0321e0SJeremy L Thompson 400d0321e0SJeremy L Thompson // Clear v for transpose operation 41db2becc9SJeremy L Thompson if (is_transpose && !apply_add) { 42*19a04db8SJeremy L Thompson CeedInt num_comp, q_comp, num_nodes, num_qpts; 431f9221feSJeremy L Thompson CeedSize length; 44b7453713SJeremy L Thompson 45*19a04db8SJeremy L Thompson CeedCallBackend(CeedBasisGetNumComponents(basis, &num_comp)); 46*19a04db8SJeremy L Thompson CeedCallBackend(CeedBasisGetNumQuadratureComponents(basis, eval_mode, &q_comp)); 47*19a04db8SJeremy L Thompson CeedCallBackend(CeedBasisGetNumNodes(basis, &num_nodes)); 48*19a04db8SJeremy L Thompson CeedCallBackend(CeedBasisGetNumQuadraturePoints(basis, &num_qpts)); 49*19a04db8SJeremy L Thompson length = (CeedSize)num_elem * (CeedSize)num_comp * (t_mode == CEED_TRANSPOSE ? (CeedSize)num_nodes : ((CeedSize)num_qpts * (CeedSize)q_comp)); 502b730f8bSJeremy L Thompson CeedCallHip(ceed, hipMemset(d_v, 0, length * sizeof(CeedScalar))); 510d0321e0SJeremy L Thompson } 52b2165e7aSSebastian Grimberg CeedCallBackend(CeedBasisGetNumQuadraturePoints1D(basis, &Q_1d)); 53b2165e7aSSebastian Grimberg CeedCallBackend(CeedBasisGetDimension(basis, &dim)); 540d0321e0SJeremy L Thompson 550d0321e0SJeremy L Thompson // Basis action 56437930d1SJeremy L Thompson switch (eval_mode) { 570d0321e0SJeremy L Thompson case CEED_EVAL_INTERP: { 587bbbfca3SJeremy L Thompson void *interp_args[] = {(void *)&num_elem, (void *)&is_transpose, &data->d_interp_1d, &d_u, &d_v}; 59b2165e7aSSebastian Grimberg const CeedInt block_size = CeedIntMin(CeedIntPow(Q_1d, dim), max_block_size); 600d0321e0SJeremy L Thompson 61eb7e6cafSJeremy L Thompson CeedCallBackend(CeedRunKernel_Hip(ceed, data->Interp, num_elem, block_size, interp_args)); 620d0321e0SJeremy L Thompson } break; 630d0321e0SJeremy L Thompson case CEED_EVAL_GRAD: { 647bbbfca3SJeremy L Thompson void *grad_args[] = {(void *)&num_elem, (void *)&is_transpose, &data->d_interp_1d, &data->d_grad_1d, &d_u, &d_v}; 65b2165e7aSSebastian Grimberg const CeedInt block_size = max_block_size; 660d0321e0SJeremy L Thompson 67eb7e6cafSJeremy L Thompson CeedCallBackend(CeedRunKernel_Hip(ceed, data->Grad, num_elem, block_size, grad_args)); 680d0321e0SJeremy L Thompson } break; 690d0321e0SJeremy L Thompson case CEED_EVAL_WEIGHT: { 70097cc795SJames Wright CeedCheck(data->d_q_weight_1d, ceed, CEED_ERROR_BACKEND, "%s not supported; q_weights_1d not set", CeedEvalModes[eval_mode]); 71437930d1SJeremy L Thompson void *weight_args[] = {(void *)&num_elem, (void *)&data->d_q_weight_1d, &d_v}; 72b2165e7aSSebastian Grimberg const int block_size_x = Q_1d; 73b2165e7aSSebastian Grimberg const int block_size_y = dim >= 2 ? Q_1d : 1; 740d0321e0SJeremy L Thompson 75b2165e7aSSebastian Grimberg CeedCallBackend(CeedRunKernelDim_Hip(ceed, data->Weight, num_elem, block_size_x, block_size_y, 1, weight_args)); 760d0321e0SJeremy L Thompson } break; 779ea2cfd9SJeremy L Thompson case CEED_EVAL_NONE: /* handled separately below */ 789ea2cfd9SJeremy L Thompson break; 790d0321e0SJeremy L Thompson // LCOV_EXCL_START 800d0321e0SJeremy L Thompson case CEED_EVAL_DIV: 810d0321e0SJeremy L Thompson case CEED_EVAL_CURL: 82bcbe1c99SJeremy L Thompson return CeedError(ceed, CEED_ERROR_BACKEND, "%s not supported", CeedEvalModes[eval_mode]); 830d0321e0SJeremy L Thompson // LCOV_EXCL_STOP 840d0321e0SJeremy L Thompson } 850d0321e0SJeremy L Thompson 869ea2cfd9SJeremy L Thompson // Restore vectors, cover CEED_EVAL_NONE 872b730f8bSJeremy L Thompson CeedCallBackend(CeedVectorRestoreArray(v, &d_v)); 889ea2cfd9SJeremy L Thompson if (eval_mode == CEED_EVAL_NONE) CeedCallBackend(CeedVectorSetArray(v, CEED_MEM_DEVICE, CEED_COPY_VALUES, (CeedScalar *)d_u)); 899ea2cfd9SJeremy L Thompson if (eval_mode != CEED_EVAL_WEIGHT) CeedCallBackend(CeedVectorRestoreArrayRead(u, &d_u)); 900d0321e0SJeremy L Thompson return CEED_ERROR_SUCCESS; 910d0321e0SJeremy L Thompson } 920d0321e0SJeremy L Thompson 93db2becc9SJeremy L Thompson static int CeedBasisApply_Hip(CeedBasis basis, const CeedInt num_elem, CeedTransposeMode t_mode, CeedEvalMode eval_mode, CeedVector u, CeedVector v) { 94db2becc9SJeremy L Thompson CeedCallBackend(CeedBasisApplyCore_Hip(basis, false, num_elem, t_mode, eval_mode, u, v)); 95db2becc9SJeremy L Thompson return CEED_ERROR_SUCCESS; 96db2becc9SJeremy L Thompson } 97db2becc9SJeremy L Thompson 98db2becc9SJeremy L Thompson static int CeedBasisApplyAdd_Hip(CeedBasis basis, const CeedInt num_elem, CeedTransposeMode t_mode, CeedEvalMode eval_mode, CeedVector u, 99db2becc9SJeremy L Thompson CeedVector v) { 100db2becc9SJeremy L Thompson CeedCallBackend(CeedBasisApplyCore_Hip(basis, true, num_elem, t_mode, eval_mode, u, v)); 101db2becc9SJeremy L Thompson return CEED_ERROR_SUCCESS; 102db2becc9SJeremy L Thompson } 103db2becc9SJeremy L Thompson 1040d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 1051c21e869SJeremy L Thompson // Basis apply - tensor AtPoints 1061c21e869SJeremy L Thompson //------------------------------------------------------------------------------ 107db2becc9SJeremy L Thompson static int CeedBasisApplyAtPointsCore_Hip(CeedBasis basis, bool apply_add, const CeedInt num_elem, const CeedInt *num_points, 108db2becc9SJeremy L Thompson CeedTransposeMode t_mode, CeedEvalMode eval_mode, CeedVector x_ref, CeedVector u, CeedVector v) { 1091c21e869SJeremy L Thompson Ceed ceed; 1101c21e869SJeremy L Thompson CeedInt Q_1d, dim, max_num_points = num_points[0]; 1111c21e869SJeremy L Thompson const CeedInt is_transpose = t_mode == CEED_TRANSPOSE; 1121c21e869SJeremy L Thompson const int max_block_size = 32; 1131c21e869SJeremy L Thompson const CeedScalar *d_x, *d_u; 1141c21e869SJeremy L Thompson CeedScalar *d_v; 1151c21e869SJeremy L Thompson CeedBasis_Hip *data; 1161c21e869SJeremy L Thompson 1171c21e869SJeremy L Thompson CeedCallBackend(CeedBasisGetCeed(basis, &ceed)); 1181c21e869SJeremy L Thompson CeedCallBackend(CeedBasisGetData(basis, &data)); 1191c21e869SJeremy L Thompson CeedCallBackend(CeedBasisGetNumQuadraturePoints1D(basis, &Q_1d)); 1201c21e869SJeremy L Thompson CeedCallBackend(CeedBasisGetDimension(basis, &dim)); 1211c21e869SJeremy L Thompson 1221c21e869SJeremy L Thompson // Weight handled separately 1231c21e869SJeremy L Thompson if (eval_mode == CEED_EVAL_WEIGHT) { 1245a5594ffSJeremy L Thompson CeedCallBackend(CeedVectorSetValue(v, 1.0)); 1251c21e869SJeremy L Thompson return CEED_ERROR_SUCCESS; 1261c21e869SJeremy L Thompson } 1271c21e869SJeremy L Thompson 128111870feSJeremy L Thompson // Check padded to uniform number of points per elem 129111870feSJeremy L Thompson for (CeedInt i = 1; i < num_elem; i++) max_num_points = CeedIntMax(max_num_points, num_points[i]); 130111870feSJeremy L Thompson { 131111870feSJeremy L Thompson CeedInt num_comp, q_comp; 132111870feSJeremy L Thompson CeedSize len, len_required; 133111870feSJeremy L Thompson 134111870feSJeremy L Thompson CeedCallBackend(CeedBasisGetNumComponents(basis, &num_comp)); 135111870feSJeremy L Thompson CeedCallBackend(CeedBasisGetNumQuadratureComponents(basis, eval_mode, &q_comp)); 136111870feSJeremy L Thompson CeedCallBackend(CeedVectorGetLength(is_transpose ? u : v, &len)); 137111870feSJeremy L Thompson len_required = (CeedSize)num_comp * (CeedSize)q_comp * (CeedSize)num_elem * (CeedSize)max_num_points; 138111870feSJeremy L Thompson CeedCheck(len >= len_required, ceed, CEED_ERROR_BACKEND, 139111870feSJeremy L Thompson "Vector at points must be padded to the same number of points in each element for BasisApplyAtPoints on GPU backends." 140111870feSJeremy L Thompson " Found %" CeedSize_FMT ", Required %" CeedSize_FMT, 141111870feSJeremy L Thompson len, len_required); 142111870feSJeremy L Thompson } 143111870feSJeremy L Thompson 144111870feSJeremy L Thompson // Move num_points array to device 145111870feSJeremy L Thompson if (is_transpose) { 146111870feSJeremy L Thompson const CeedInt num_bytes = num_elem * sizeof(CeedInt); 147111870feSJeremy L Thompson 148111870feSJeremy L Thompson if (num_elem != data->num_elem_at_points) { 149111870feSJeremy L Thompson data->num_elem_at_points = num_elem; 150111870feSJeremy L Thompson 151111870feSJeremy L Thompson if (data->d_points_per_elem) CeedCallHip(ceed, hipFree(data->d_points_per_elem)); 152111870feSJeremy L Thompson CeedCallHip(ceed, hipMalloc((void **)&data->d_points_per_elem, num_bytes)); 153111870feSJeremy L Thompson CeedCallBackend(CeedFree(&data->h_points_per_elem)); 154111870feSJeremy L Thompson CeedCallBackend(CeedCalloc(num_elem, &data->h_points_per_elem)); 155111870feSJeremy L Thompson } 1569e511c80SJeremy L Thompson if (memcmp(data->h_points_per_elem, num_points, num_bytes)) { 157111870feSJeremy L Thompson memcpy(data->h_points_per_elem, num_points, num_bytes); 158111870feSJeremy L Thompson CeedCallHip(ceed, hipMemcpy(data->d_points_per_elem, num_points, num_bytes, hipMemcpyHostToDevice)); 159111870feSJeremy L Thompson } 160111870feSJeremy L Thompson } 161111870feSJeremy L Thompson 1621c21e869SJeremy L Thompson // Build kernels if needed 1631c21e869SJeremy L Thompson if (data->num_points != max_num_points) { 1641c21e869SJeremy L Thompson CeedInt P_1d; 1651c21e869SJeremy L Thompson 1661c21e869SJeremy L Thompson CeedCallBackend(CeedBasisGetNumNodes1D(basis, &P_1d)); 1671c21e869SJeremy L Thompson data->num_points = max_num_points; 1681c21e869SJeremy L Thompson 1691c21e869SJeremy L Thompson // -- Create interp matrix to Chebyshev coefficients 1701c21e869SJeremy L Thompson if (!data->d_chebyshev_interp_1d) { 1711c21e869SJeremy L Thompson CeedSize interp_bytes; 1721c21e869SJeremy L Thompson CeedScalar *chebyshev_interp_1d; 1731c21e869SJeremy L Thompson 1741c21e869SJeremy L Thompson interp_bytes = P_1d * Q_1d * sizeof(CeedScalar); 1751c21e869SJeremy L Thompson CeedCallBackend(CeedCalloc(P_1d * Q_1d, &chebyshev_interp_1d)); 1765a5594ffSJeremy L Thompson CeedCallBackend(CeedBasisGetChebyshevInterp1D(basis, chebyshev_interp_1d)); 1771c21e869SJeremy L Thompson CeedCallHip(ceed, hipMalloc((void **)&data->d_chebyshev_interp_1d, interp_bytes)); 1781c21e869SJeremy L Thompson CeedCallHip(ceed, hipMemcpy(data->d_chebyshev_interp_1d, chebyshev_interp_1d, interp_bytes, hipMemcpyHostToDevice)); 1791c21e869SJeremy L Thompson CeedCallBackend(CeedFree(&chebyshev_interp_1d)); 1801c21e869SJeremy L Thompson } 1811c21e869SJeremy L Thompson 1821c21e869SJeremy L Thompson // -- Compile kernels 1831c21e869SJeremy L Thompson char *basis_kernel_source; 1841c21e869SJeremy L Thompson const char *basis_kernel_path; 1851c21e869SJeremy L Thompson CeedInt num_comp; 1861c21e869SJeremy L Thompson 1871c21e869SJeremy L Thompson if (data->moduleAtPoints) CeedCallHip(ceed, hipModuleUnload(data->moduleAtPoints)); 1881c21e869SJeremy L Thompson CeedCallBackend(CeedBasisGetNumComponents(basis, &num_comp)); 1891c21e869SJeremy L Thompson CeedCallBackend(CeedGetJitAbsolutePath(ceed, "ceed/jit-source/hip/hip-ref-basis-tensor-at-points.h", &basis_kernel_path)); 1901c21e869SJeremy L Thompson CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Basis Kernel Source -----\n"); 1911c21e869SJeremy L Thompson CeedCallBackend(CeedLoadSourceToBuffer(ceed, basis_kernel_path, &basis_kernel_source)); 1921c21e869SJeremy L Thompson CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Basis Kernel Source Complete! -----\n"); 1931c21e869SJeremy L Thompson CeedCallBackend(CeedCompile_Hip(ceed, basis_kernel_source, &data->moduleAtPoints, 9, "BASIS_Q_1D", Q_1d, "BASIS_P_1D", P_1d, "BASIS_BUF_LEN", 194f7c9815fSJeremy L Thompson Q_1d * CeedIntPow(Q_1d > P_1d ? Q_1d : P_1d, dim - 1), "BASIS_DIM", dim, "BASIS_NUM_COMP", num_comp, 1951c21e869SJeremy L Thompson "BASIS_NUM_NODES", CeedIntPow(P_1d, dim), "BASIS_NUM_QPTS", CeedIntPow(Q_1d, dim), "BASIS_NUM_PTS", 196f7c9815fSJeremy L Thompson max_num_points, "POINTS_BUFF_LEN", CeedIntPow(Q_1d, dim - 1))); 1971c21e869SJeremy L Thompson CeedCallBackend(CeedGetKernel_Hip(ceed, data->moduleAtPoints, "InterpAtPoints", &data->InterpAtPoints)); 1981c21e869SJeremy L Thompson CeedCallBackend(CeedGetKernel_Hip(ceed, data->moduleAtPoints, "GradAtPoints", &data->GradAtPoints)); 1991c21e869SJeremy L Thompson CeedCallBackend(CeedFree(&basis_kernel_path)); 2001c21e869SJeremy L Thompson CeedCallBackend(CeedFree(&basis_kernel_source)); 2011c21e869SJeremy L Thompson } 2021c21e869SJeremy L Thompson 2031c21e869SJeremy L Thompson // Get read/write access to u, v 2041c21e869SJeremy L Thompson CeedCallBackend(CeedVectorGetArrayRead(x_ref, CEED_MEM_DEVICE, &d_x)); 2051c21e869SJeremy L Thompson if (u != CEED_VECTOR_NONE) CeedCallBackend(CeedVectorGetArrayRead(u, CEED_MEM_DEVICE, &d_u)); 2061c21e869SJeremy L Thompson else CeedCheck(eval_mode == CEED_EVAL_WEIGHT, ceed, CEED_ERROR_BACKEND, "An input vector is required for this CeedEvalMode"); 207db2becc9SJeremy L Thompson if (apply_add) CeedCallBackend(CeedVectorGetArray(v, CEED_MEM_DEVICE, &d_v)); 208db2becc9SJeremy L Thompson else CeedCallBackend(CeedVectorGetArrayWrite(v, CEED_MEM_DEVICE, &d_v)); 2091c21e869SJeremy L Thompson 2101c21e869SJeremy L Thompson // Clear v for transpose operation 211db2becc9SJeremy L Thompson if (is_transpose && !apply_add) { 212*19a04db8SJeremy L Thompson CeedInt num_comp, q_comp, num_nodes; 2131c21e869SJeremy L Thompson CeedSize length; 2141c21e869SJeremy L Thompson 215*19a04db8SJeremy L Thompson CeedCallBackend(CeedBasisGetNumComponents(basis, &num_comp)); 216*19a04db8SJeremy L Thompson CeedCallBackend(CeedBasisGetNumQuadratureComponents(basis, eval_mode, &q_comp)); 217*19a04db8SJeremy L Thompson CeedCallBackend(CeedBasisGetNumNodes(basis, &num_nodes)); 218*19a04db8SJeremy L Thompson length = 219*19a04db8SJeremy L Thompson (CeedSize)num_elem * (CeedSize)num_comp * (t_mode == CEED_TRANSPOSE ? (CeedSize)num_nodes : ((CeedSize)max_num_points * (CeedSize)q_comp)); 2201c21e869SJeremy L Thompson CeedCallHip(ceed, hipMemset(d_v, 0, length * sizeof(CeedScalar))); 2211c21e869SJeremy L Thompson } 2221c21e869SJeremy L Thompson 2231c21e869SJeremy L Thompson // Basis action 2241c21e869SJeremy L Thompson switch (eval_mode) { 2251c21e869SJeremy L Thompson case CEED_EVAL_INTERP: { 226111870feSJeremy L Thompson void *interp_args[] = {(void *)&num_elem, (void *)&is_transpose, &data->d_chebyshev_interp_1d, &data->d_points_per_elem, &d_x, &d_u, &d_v}; 2271c21e869SJeremy L Thompson const CeedInt block_size = CeedIntMin(CeedIntPow(Q_1d, dim), max_block_size); 2281c21e869SJeremy L Thompson 2291c21e869SJeremy L Thompson CeedCallBackend(CeedRunKernel_Hip(ceed, data->InterpAtPoints, num_elem, block_size, interp_args)); 2301c21e869SJeremy L Thompson } break; 2311c21e869SJeremy L Thompson case CEED_EVAL_GRAD: { 232111870feSJeremy L Thompson void *grad_args[] = {(void *)&num_elem, (void *)&is_transpose, &data->d_chebyshev_interp_1d, &data->d_points_per_elem, &d_x, &d_u, &d_v}; 2332d10e82cSJeremy L Thompson const CeedInt block_size = CeedIntMin(CeedIntPow(Q_1d, dim), max_block_size); 2341c21e869SJeremy L Thompson 2351c21e869SJeremy L Thompson CeedCallBackend(CeedRunKernel_Hip(ceed, data->GradAtPoints, num_elem, block_size, grad_args)); 2361c21e869SJeremy L Thompson } break; 2371c21e869SJeremy L Thompson case CEED_EVAL_WEIGHT: 2381c21e869SJeremy L Thompson case CEED_EVAL_NONE: /* handled separately below */ 2391c21e869SJeremy L Thompson break; 2401c21e869SJeremy L Thompson // LCOV_EXCL_START 2411c21e869SJeremy L Thompson case CEED_EVAL_DIV: 2421c21e869SJeremy L Thompson case CEED_EVAL_CURL: 2431c21e869SJeremy L Thompson return CeedError(ceed, CEED_ERROR_BACKEND, "%s not supported", CeedEvalModes[eval_mode]); 2441c21e869SJeremy L Thompson // LCOV_EXCL_STOP 2451c21e869SJeremy L Thompson } 2461c21e869SJeremy L Thompson 2471c21e869SJeremy L Thompson // Restore vectors, cover CEED_EVAL_NONE 2481c21e869SJeremy L Thompson CeedCallBackend(CeedVectorRestoreArrayRead(x_ref, &d_x)); 2491c21e869SJeremy L Thompson CeedCallBackend(CeedVectorRestoreArray(v, &d_v)); 2501c21e869SJeremy L Thompson if (eval_mode == CEED_EVAL_NONE) CeedCallBackend(CeedVectorSetArray(v, CEED_MEM_DEVICE, CEED_COPY_VALUES, (CeedScalar *)d_u)); 2511c21e869SJeremy L Thompson if (eval_mode != CEED_EVAL_WEIGHT) CeedCallBackend(CeedVectorRestoreArrayRead(u, &d_u)); 2521c21e869SJeremy L Thompson return CEED_ERROR_SUCCESS; 2531c21e869SJeremy L Thompson } 2541c21e869SJeremy L Thompson 255db2becc9SJeremy L Thompson static int CeedBasisApplyAtPoints_Hip(CeedBasis basis, const CeedInt num_elem, const CeedInt *num_points, CeedTransposeMode t_mode, 256db2becc9SJeremy L Thompson CeedEvalMode eval_mode, CeedVector x_ref, CeedVector u, CeedVector v) { 257db2becc9SJeremy L Thompson CeedCallBackend(CeedBasisApplyAtPointsCore_Hip(basis, false, num_elem, num_points, t_mode, eval_mode, x_ref, u, v)); 258db2becc9SJeremy L Thompson return CEED_ERROR_SUCCESS; 259db2becc9SJeremy L Thompson } 260db2becc9SJeremy L Thompson 261db2becc9SJeremy L Thompson static int CeedBasisApplyAddAtPoints_Hip(CeedBasis basis, const CeedInt num_elem, const CeedInt *num_points, CeedTransposeMode t_mode, 262db2becc9SJeremy L Thompson CeedEvalMode eval_mode, CeedVector x_ref, CeedVector u, CeedVector v) { 263db2becc9SJeremy L Thompson CeedCallBackend(CeedBasisApplyAtPointsCore_Hip(basis, true, num_elem, num_points, t_mode, eval_mode, x_ref, u, v)); 264db2becc9SJeremy L Thompson return CEED_ERROR_SUCCESS; 265db2becc9SJeremy L Thompson } 266db2becc9SJeremy L Thompson 2671c21e869SJeremy L Thompson //------------------------------------------------------------------------------ 2680d0321e0SJeremy L Thompson // Basis apply - non-tensor 2690d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 270db2becc9SJeremy L Thompson static int CeedBasisApplyNonTensorCore_Hip(CeedBasis basis, bool apply_add, const CeedInt num_elem, CeedTransposeMode t_mode, CeedEvalMode eval_mode, 271db2becc9SJeremy L Thompson CeedVector u, CeedVector v) { 2720d0321e0SJeremy L Thompson Ceed ceed; 273437930d1SJeremy L Thompson CeedInt num_nodes, num_qpts; 2747bbbfca3SJeremy L Thompson const CeedInt is_transpose = t_mode == CEED_TRANSPOSE; 275d075f50bSSebastian Grimberg const int elems_per_block = 1; 276d075f50bSSebastian Grimberg const int grid = CeedDivUpInt(num_elem, elems_per_block); 2770d0321e0SJeremy L Thompson const CeedScalar *d_u; 2780d0321e0SJeremy L Thompson CeedScalar *d_v; 279b7453713SJeremy L Thompson CeedBasisNonTensor_Hip *data; 280b7453713SJeremy L Thompson 281b7453713SJeremy L Thompson CeedCallBackend(CeedBasisGetCeed(basis, &ceed)); 282b7453713SJeremy L Thompson CeedCallBackend(CeedBasisGetData(basis, &data)); 283b7453713SJeremy L Thompson CeedCallBackend(CeedBasisGetNumQuadraturePoints(basis, &num_qpts)); 284b7453713SJeremy L Thompson CeedCallBackend(CeedBasisGetNumNodes(basis, &num_nodes)); 285b7453713SJeremy L Thompson 2869ea2cfd9SJeremy L Thompson // Get read/write access to u, v 2879ea2cfd9SJeremy L Thompson if (u != CEED_VECTOR_NONE) CeedCallBackend(CeedVectorGetArrayRead(u, CEED_MEM_DEVICE, &d_u)); 2889ea2cfd9SJeremy L Thompson else CeedCheck(eval_mode == CEED_EVAL_WEIGHT, ceed, CEED_ERROR_BACKEND, "An input vector is required for this CeedEvalMode"); 289db2becc9SJeremy L Thompson if (apply_add) CeedCallBackend(CeedVectorGetArray(v, CEED_MEM_DEVICE, &d_v)); 290db2becc9SJeremy L Thompson else CeedCallBackend(CeedVectorGetArrayWrite(v, CEED_MEM_DEVICE, &d_v)); 2910d0321e0SJeremy L Thompson 2920d0321e0SJeremy L Thompson // Clear v for transpose operation 293db2becc9SJeremy L Thompson if (is_transpose && !apply_add) { 2941f9221feSJeremy L Thompson CeedSize length; 295b7453713SJeremy L Thompson 2962b730f8bSJeremy L Thompson CeedCallBackend(CeedVectorGetLength(v, &length)); 2972b730f8bSJeremy L Thompson CeedCallHip(ceed, hipMemset(d_v, 0, length * sizeof(CeedScalar))); 2980d0321e0SJeremy L Thompson } 2990d0321e0SJeremy L Thompson 3000d0321e0SJeremy L Thompson // Apply basis operation 301437930d1SJeremy L Thompson switch (eval_mode) { 3020d0321e0SJeremy L Thompson case CEED_EVAL_INTERP: { 303d075f50bSSebastian Grimberg void *interp_args[] = {(void *)&num_elem, &data->d_interp, &d_u, &d_v}; 3047bbbfca3SJeremy L Thompson const int block_size_x = is_transpose ? num_nodes : num_qpts; 305b2165e7aSSebastian Grimberg 3067bbbfca3SJeremy L Thompson if (is_transpose) { 307d075f50bSSebastian Grimberg CeedCallBackend(CeedRunKernelDim_Hip(ceed, data->InterpTranspose, grid, block_size_x, 1, elems_per_block, interp_args)); 308d075f50bSSebastian Grimberg } else { 309b2165e7aSSebastian Grimberg CeedCallBackend(CeedRunKernelDim_Hip(ceed, data->Interp, grid, block_size_x, 1, elems_per_block, interp_args)); 310d075f50bSSebastian Grimberg } 3110d0321e0SJeremy L Thompson } break; 3120d0321e0SJeremy L Thompson case CEED_EVAL_GRAD: { 313d075f50bSSebastian Grimberg void *grad_args[] = {(void *)&num_elem, &data->d_grad, &d_u, &d_v}; 3147bbbfca3SJeremy L Thompson const int block_size_x = is_transpose ? num_nodes : num_qpts; 315b2165e7aSSebastian Grimberg 3167bbbfca3SJeremy L Thompson if (is_transpose) { 317d075f50bSSebastian Grimberg CeedCallBackend(CeedRunKernelDim_Hip(ceed, data->DerivTranspose, grid, block_size_x, 1, elems_per_block, grad_args)); 318d075f50bSSebastian Grimberg } else { 319d075f50bSSebastian Grimberg CeedCallBackend(CeedRunKernelDim_Hip(ceed, data->Deriv, grid, block_size_x, 1, elems_per_block, grad_args)); 320d075f50bSSebastian Grimberg } 321d075f50bSSebastian Grimberg } break; 322d075f50bSSebastian Grimberg case CEED_EVAL_DIV: { 323d075f50bSSebastian Grimberg void *div_args[] = {(void *)&num_elem, &data->d_div, &d_u, &d_v}; 3247bbbfca3SJeremy L Thompson const int block_size_x = is_transpose ? num_nodes : num_qpts; 325d075f50bSSebastian Grimberg 3267bbbfca3SJeremy L Thompson if (is_transpose) { 327d075f50bSSebastian Grimberg CeedCallBackend(CeedRunKernelDim_Hip(ceed, data->DerivTranspose, grid, block_size_x, 1, elems_per_block, div_args)); 328d075f50bSSebastian Grimberg } else { 329d075f50bSSebastian Grimberg CeedCallBackend(CeedRunKernelDim_Hip(ceed, data->Deriv, grid, block_size_x, 1, elems_per_block, div_args)); 330d075f50bSSebastian Grimberg } 331d075f50bSSebastian Grimberg } break; 332d075f50bSSebastian Grimberg case CEED_EVAL_CURL: { 333d075f50bSSebastian Grimberg void *curl_args[] = {(void *)&num_elem, &data->d_curl, &d_u, &d_v}; 3347bbbfca3SJeremy L Thompson const int block_size_x = is_transpose ? num_nodes : num_qpts; 335d075f50bSSebastian Grimberg 3367bbbfca3SJeremy L Thompson if (is_transpose) { 337d075f50bSSebastian Grimberg CeedCallBackend(CeedRunKernelDim_Hip(ceed, data->DerivTranspose, grid, block_size_x, 1, elems_per_block, curl_args)); 338d075f50bSSebastian Grimberg } else { 339d075f50bSSebastian Grimberg CeedCallBackend(CeedRunKernelDim_Hip(ceed, data->Deriv, grid, block_size_x, 1, elems_per_block, curl_args)); 340d075f50bSSebastian Grimberg } 3410d0321e0SJeremy L Thompson } break; 3420d0321e0SJeremy L Thompson case CEED_EVAL_WEIGHT: { 343097cc795SJames Wright CeedCheck(data->d_q_weight, ceed, CEED_ERROR_BACKEND, "%s not supported; q_weights not set", CeedEvalModes[eval_mode]); 344437930d1SJeremy L Thompson void *weight_args[] = {(void *)&num_elem, (void *)&data->d_q_weight, &d_v}; 345b2165e7aSSebastian Grimberg 346b2165e7aSSebastian Grimberg CeedCallBackend(CeedRunKernelDim_Hip(ceed, data->Weight, grid, num_qpts, 1, elems_per_block, weight_args)); 3470d0321e0SJeremy L Thompson } break; 3489ea2cfd9SJeremy L Thompson case CEED_EVAL_NONE: /* handled separately below */ 3499ea2cfd9SJeremy L Thompson break; 3500d0321e0SJeremy L Thompson } 3510d0321e0SJeremy L Thompson 3529ea2cfd9SJeremy L Thompson // Restore vectors, cover CEED_EVAL_NONE 3532b730f8bSJeremy L Thompson CeedCallBackend(CeedVectorRestoreArray(v, &d_v)); 3549ea2cfd9SJeremy L Thompson if (eval_mode == CEED_EVAL_NONE) CeedCallBackend(CeedVectorSetArray(v, CEED_MEM_DEVICE, CEED_COPY_VALUES, (CeedScalar *)d_u)); 3559ea2cfd9SJeremy L Thompson if (eval_mode != CEED_EVAL_WEIGHT) CeedCallBackend(CeedVectorRestoreArrayRead(u, &d_u)); 3560d0321e0SJeremy L Thompson return CEED_ERROR_SUCCESS; 3570d0321e0SJeremy L Thompson } 3580d0321e0SJeremy L Thompson 359db2becc9SJeremy L Thompson static int CeedBasisApplyNonTensor_Hip(CeedBasis basis, const CeedInt num_elem, CeedTransposeMode t_mode, CeedEvalMode eval_mode, CeedVector u, 360db2becc9SJeremy L Thompson CeedVector v) { 361db2becc9SJeremy L Thompson CeedCallBackend(CeedBasisApplyNonTensorCore_Hip(basis, false, num_elem, t_mode, eval_mode, u, v)); 362db2becc9SJeremy L Thompson return CEED_ERROR_SUCCESS; 363db2becc9SJeremy L Thompson } 364db2becc9SJeremy L Thompson 365db2becc9SJeremy L Thompson static int CeedBasisApplyAddNonTensor_Hip(CeedBasis basis, const CeedInt num_elem, CeedTransposeMode t_mode, CeedEvalMode eval_mode, CeedVector u, 366db2becc9SJeremy L Thompson CeedVector v) { 367db2becc9SJeremy L Thompson CeedCallBackend(CeedBasisApplyNonTensorCore_Hip(basis, true, num_elem, t_mode, eval_mode, u, v)); 368db2becc9SJeremy L Thompson return CEED_ERROR_SUCCESS; 369db2becc9SJeremy L Thompson } 370db2becc9SJeremy L Thompson 3710d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 3720d0321e0SJeremy L Thompson // Destroy tensor basis 3730d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 3740d0321e0SJeremy L Thompson static int CeedBasisDestroy_Hip(CeedBasis basis) { 3750d0321e0SJeremy L Thompson Ceed ceed; 3760d0321e0SJeremy L Thompson CeedBasis_Hip *data; 377b7453713SJeremy L Thompson 378b7453713SJeremy L Thompson CeedCallBackend(CeedBasisGetCeed(basis, &ceed)); 3792b730f8bSJeremy L Thompson CeedCallBackend(CeedBasisGetData(basis, &data)); 3802b730f8bSJeremy L Thompson CeedCallHip(ceed, hipModuleUnload(data->module)); 3811c21e869SJeremy L Thompson if (data->moduleAtPoints) CeedCallHip(ceed, hipModuleUnload(data->moduleAtPoints)); 382097cc795SJames Wright if (data->d_q_weight_1d) CeedCallHip(ceed, hipFree(data->d_q_weight_1d)); 383111870feSJeremy L Thompson CeedCallBackend(CeedFree(&data->h_points_per_elem)); 384111870feSJeremy L Thompson if (data->d_points_per_elem) CeedCallHip(ceed, hipFree(data->d_points_per_elem)); 3852b730f8bSJeremy L Thompson CeedCallHip(ceed, hipFree(data->d_interp_1d)); 3862b730f8bSJeremy L Thompson CeedCallHip(ceed, hipFree(data->d_grad_1d)); 3871c21e869SJeremy L Thompson CeedCallHip(ceed, hipFree(data->d_chebyshev_interp_1d)); 3882b730f8bSJeremy L Thompson CeedCallBackend(CeedFree(&data)); 3890d0321e0SJeremy L Thompson return CEED_ERROR_SUCCESS; 3900d0321e0SJeremy L Thompson } 3910d0321e0SJeremy L Thompson 3920d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 3930d0321e0SJeremy L Thompson // Destroy non-tensor basis 3940d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 3950d0321e0SJeremy L Thompson static int CeedBasisDestroyNonTensor_Hip(CeedBasis basis) { 3960d0321e0SJeremy L Thompson Ceed ceed; 3970d0321e0SJeremy L Thompson CeedBasisNonTensor_Hip *data; 398b7453713SJeremy L Thompson 399b7453713SJeremy L Thompson CeedCallBackend(CeedBasisGetCeed(basis, &ceed)); 4002b730f8bSJeremy L Thompson CeedCallBackend(CeedBasisGetData(basis, &data)); 4012b730f8bSJeremy L Thompson CeedCallHip(ceed, hipModuleUnload(data->module)); 402097cc795SJames Wright if (data->d_q_weight) CeedCallHip(ceed, hipFree(data->d_q_weight)); 4032b730f8bSJeremy L Thompson CeedCallHip(ceed, hipFree(data->d_interp)); 4042b730f8bSJeremy L Thompson CeedCallHip(ceed, hipFree(data->d_grad)); 405d075f50bSSebastian Grimberg CeedCallHip(ceed, hipFree(data->d_div)); 406d075f50bSSebastian Grimberg CeedCallHip(ceed, hipFree(data->d_curl)); 4072b730f8bSJeremy L Thompson CeedCallBackend(CeedFree(&data)); 4080d0321e0SJeremy L Thompson return CEED_ERROR_SUCCESS; 4090d0321e0SJeremy L Thompson } 4100d0321e0SJeremy L Thompson 4110d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 4120d0321e0SJeremy L Thompson // Create tensor 4130d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 4142b730f8bSJeremy L Thompson int CeedBasisCreateTensorH1_Hip(CeedInt dim, CeedInt P_1d, CeedInt Q_1d, const CeedScalar *interp_1d, const CeedScalar *grad_1d, 4156574a04fSJeremy L Thompson const CeedScalar *q_ref_1d, const CeedScalar *q_weight_1d, CeedBasis basis) { 4160d0321e0SJeremy L Thompson Ceed ceed; 41722070f95SJeremy L Thompson char *basis_kernel_source; 41822070f95SJeremy L Thompson const char *basis_kernel_path; 419b7453713SJeremy L Thompson CeedInt num_comp; 420b7453713SJeremy L Thompson const CeedInt q_bytes = Q_1d * sizeof(CeedScalar); 421b7453713SJeremy L Thompson const CeedInt interp_bytes = q_bytes * P_1d; 4220d0321e0SJeremy L Thompson CeedBasis_Hip *data; 423b7453713SJeremy L Thompson 424b7453713SJeremy L Thompson CeedCallBackend(CeedBasisGetCeed(basis, &ceed)); 4252b730f8bSJeremy L Thompson CeedCallBackend(CeedCalloc(1, &data)); 4260d0321e0SJeremy L Thompson 4270d0321e0SJeremy L Thompson // Copy data to GPU 428097cc795SJames Wright if (q_weight_1d) { 4292b730f8bSJeremy L Thompson CeedCallHip(ceed, hipMalloc((void **)&data->d_q_weight_1d, q_bytes)); 4302b730f8bSJeremy L Thompson CeedCallHip(ceed, hipMemcpy(data->d_q_weight_1d, q_weight_1d, q_bytes, hipMemcpyHostToDevice)); 431097cc795SJames Wright } 4322b730f8bSJeremy L Thompson CeedCallHip(ceed, hipMalloc((void **)&data->d_interp_1d, interp_bytes)); 4332b730f8bSJeremy L Thompson CeedCallHip(ceed, hipMemcpy(data->d_interp_1d, interp_1d, interp_bytes, hipMemcpyHostToDevice)); 4342b730f8bSJeremy L Thompson CeedCallHip(ceed, hipMalloc((void **)&data->d_grad_1d, interp_bytes)); 4352b730f8bSJeremy L Thompson CeedCallHip(ceed, hipMemcpy(data->d_grad_1d, grad_1d, interp_bytes, hipMemcpyHostToDevice)); 4360d0321e0SJeremy L Thompson 437ecc88aebSJeremy L Thompson // Compile basis kernels 438b7453713SJeremy L Thompson CeedCallBackend(CeedBasisGetNumComponents(basis, &num_comp)); 4392b730f8bSJeremy L Thompson CeedCallBackend(CeedGetJitAbsolutePath(ceed, "ceed/jit-source/hip/hip-ref-basis-tensor.h", &basis_kernel_path)); 44023d4529eSJeremy L Thompson CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Basis Kernel Source -----\n"); 4412b730f8bSJeremy L Thompson CeedCallBackend(CeedLoadSourceToBuffer(ceed, basis_kernel_path, &basis_kernel_source)); 44223d4529eSJeremy L Thompson CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Basis Kernel Source Complete! -----\n"); 443eb7e6cafSJeremy L Thompson CeedCallBackend(CeedCompile_Hip(ceed, basis_kernel_source, &data->module, 7, "BASIS_Q_1D", Q_1d, "BASIS_P_1D", P_1d, "BASIS_BUF_LEN", 444f7c9815fSJeremy L Thompson Q_1d * CeedIntPow(Q_1d > P_1d ? Q_1d : P_1d, dim - 1), "BASIS_DIM", dim, "BASIS_NUM_COMP", num_comp, 445b7453713SJeremy L Thompson "BASIS_NUM_NODES", CeedIntPow(P_1d, dim), "BASIS_NUM_QPTS", CeedIntPow(Q_1d, dim))); 446eb7e6cafSJeremy L Thompson CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, "Interp", &data->Interp)); 447eb7e6cafSJeremy L Thompson CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, "Grad", &data->Grad)); 448eb7e6cafSJeremy L Thompson CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, "Weight", &data->Weight)); 4492b730f8bSJeremy L Thompson CeedCallBackend(CeedFree(&basis_kernel_path)); 4502b730f8bSJeremy L Thompson CeedCallBackend(CeedFree(&basis_kernel_source)); 451437930d1SJeremy L Thompson 4522b730f8bSJeremy L Thompson CeedCallBackend(CeedBasisSetData(basis, data)); 4530d0321e0SJeremy L Thompson 454d075f50bSSebastian Grimberg // Register backend functions 4552b730f8bSJeremy L Thompson CeedCallBackend(CeedSetBackendFunction(ceed, "Basis", basis, "Apply", CeedBasisApply_Hip)); 456db2becc9SJeremy L Thompson CeedCallBackend(CeedSetBackendFunction(ceed, "Basis", basis, "ApplyAdd", CeedBasisApplyAdd_Hip)); 4571c21e869SJeremy L Thompson CeedCallBackend(CeedSetBackendFunction(ceed, "Basis", basis, "ApplyAtPoints", CeedBasisApplyAtPoints_Hip)); 458db2becc9SJeremy L Thompson CeedCallBackend(CeedSetBackendFunction(ceed, "Basis", basis, "ApplyAddAtPoints", CeedBasisApplyAddAtPoints_Hip)); 4592b730f8bSJeremy L Thompson CeedCallBackend(CeedSetBackendFunction(ceed, "Basis", basis, "Destroy", CeedBasisDestroy_Hip)); 4600d0321e0SJeremy L Thompson return CEED_ERROR_SUCCESS; 4610d0321e0SJeremy L Thompson } 4620d0321e0SJeremy L Thompson 4630d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 464d075f50bSSebastian Grimberg // Create non-tensor H^1 4650d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 4662b730f8bSJeremy L Thompson int CeedBasisCreateH1_Hip(CeedElemTopology topo, CeedInt dim, CeedInt num_nodes, CeedInt num_qpts, const CeedScalar *interp, const CeedScalar *grad, 46751475c7cSJeremy L Thompson const CeedScalar *q_ref, const CeedScalar *q_weight, CeedBasis basis) { 4680d0321e0SJeremy L Thompson Ceed ceed; 46922070f95SJeremy L Thompson char *basis_kernel_source; 47022070f95SJeremy L Thompson const char *basis_kernel_path; 471d075f50bSSebastian Grimberg CeedInt num_comp, q_comp_interp, q_comp_grad; 472b7453713SJeremy L Thompson const CeedInt q_bytes = num_qpts * sizeof(CeedScalar); 4730d0321e0SJeremy L Thompson CeedBasisNonTensor_Hip *data; 474b7453713SJeremy L Thompson 475b7453713SJeremy L Thompson CeedCallBackend(CeedBasisGetCeed(basis, &ceed)); 4762b730f8bSJeremy L Thompson CeedCallBackend(CeedCalloc(1, &data)); 4770d0321e0SJeremy L Thompson 4780d0321e0SJeremy L Thompson // Copy basis data to GPU 479d075f50bSSebastian Grimberg CeedCallBackend(CeedBasisGetNumQuadratureComponents(basis, CEED_EVAL_INTERP, &q_comp_interp)); 480d075f50bSSebastian Grimberg CeedCallBackend(CeedBasisGetNumQuadratureComponents(basis, CEED_EVAL_GRAD, &q_comp_grad)); 481097cc795SJames Wright if (q_weight) { 4822b730f8bSJeremy L Thompson CeedCallHip(ceed, hipMalloc((void **)&data->d_q_weight, q_bytes)); 4832b730f8bSJeremy L Thompson CeedCallHip(ceed, hipMemcpy(data->d_q_weight, q_weight, q_bytes, hipMemcpyHostToDevice)); 484097cc795SJames Wright } 485d075f50bSSebastian Grimberg if (interp) { 486d075f50bSSebastian Grimberg const CeedInt interp_bytes = q_bytes * num_nodes * q_comp_interp; 487d075f50bSSebastian Grimberg 4882b730f8bSJeremy L Thompson CeedCallHip(ceed, hipMalloc((void **)&data->d_interp, interp_bytes)); 4892b730f8bSJeremy L Thompson CeedCallHip(ceed, hipMemcpy(data->d_interp, interp, interp_bytes, hipMemcpyHostToDevice)); 490d075f50bSSebastian Grimberg } 491d075f50bSSebastian Grimberg if (grad) { 492d075f50bSSebastian Grimberg const CeedInt grad_bytes = q_bytes * num_nodes * q_comp_grad; 493d075f50bSSebastian Grimberg 4942b730f8bSJeremy L Thompson CeedCallHip(ceed, hipMalloc((void **)&data->d_grad, grad_bytes)); 4952b730f8bSJeremy L Thompson CeedCallHip(ceed, hipMemcpy(data->d_grad, grad, grad_bytes, hipMemcpyHostToDevice)); 496d075f50bSSebastian Grimberg } 4970d0321e0SJeremy L Thompson 4980d0321e0SJeremy L Thompson // Compile basis kernels 499b7453713SJeremy L Thompson CeedCallBackend(CeedBasisGetNumComponents(basis, &num_comp)); 5002b730f8bSJeremy L Thompson CeedCallBackend(CeedGetJitAbsolutePath(ceed, "ceed/jit-source/hip/hip-ref-basis-nontensor.h", &basis_kernel_path)); 50123d4529eSJeremy L Thompson CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Basis Kernel Source -----\n"); 5022b730f8bSJeremy L Thompson CeedCallBackend(CeedLoadSourceToBuffer(ceed, basis_kernel_path, &basis_kernel_source)); 50323d4529eSJeremy L Thompson CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Basis Kernel Source Complete! -----\n"); 504d075f50bSSebastian Grimberg CeedCallBackend(CeedCompile_Hip(ceed, basis_kernel_source, &data->module, 5, "BASIS_Q", num_qpts, "BASIS_P", num_nodes, "BASIS_Q_COMP_INTERP", 505d075f50bSSebastian Grimberg q_comp_interp, "BASIS_Q_COMP_DERIV", q_comp_grad, "BASIS_NUM_COMP", num_comp)); 506eb7e6cafSJeremy L Thompson CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, "Interp", &data->Interp)); 507d075f50bSSebastian Grimberg CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, "InterpTranspose", &data->InterpTranspose)); 508d075f50bSSebastian Grimberg CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, "Deriv", &data->Deriv)); 509d075f50bSSebastian Grimberg CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, "DerivTranspose", &data->DerivTranspose)); 510eb7e6cafSJeremy L Thompson CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, "Weight", &data->Weight)); 5112b730f8bSJeremy L Thompson CeedCallBackend(CeedFree(&basis_kernel_path)); 5122b730f8bSJeremy L Thompson CeedCallBackend(CeedFree(&basis_kernel_source)); 513d075f50bSSebastian Grimberg 514d075f50bSSebastian Grimberg CeedCallBackend(CeedBasisSetData(basis, data)); 515d075f50bSSebastian Grimberg 516d075f50bSSebastian Grimberg // Register backend functions 517d075f50bSSebastian Grimberg CeedCallBackend(CeedSetBackendFunction(ceed, "Basis", basis, "Apply", CeedBasisApplyNonTensor_Hip)); 518db2becc9SJeremy L Thompson CeedCallBackend(CeedSetBackendFunction(ceed, "Basis", basis, "ApplyAdd", CeedBasisApplyAddNonTensor_Hip)); 519d075f50bSSebastian Grimberg CeedCallBackend(CeedSetBackendFunction(ceed, "Basis", basis, "Destroy", CeedBasisDestroyNonTensor_Hip)); 520d075f50bSSebastian Grimberg return CEED_ERROR_SUCCESS; 521d075f50bSSebastian Grimberg } 522d075f50bSSebastian Grimberg 523d075f50bSSebastian Grimberg //------------------------------------------------------------------------------ 524d075f50bSSebastian Grimberg // Create non-tensor H(div) 525d075f50bSSebastian Grimberg //------------------------------------------------------------------------------ 526d075f50bSSebastian Grimberg int CeedBasisCreateHdiv_Hip(CeedElemTopology topo, CeedInt dim, CeedInt num_nodes, CeedInt num_qpts, const CeedScalar *interp, const CeedScalar *div, 527d075f50bSSebastian Grimberg const CeedScalar *q_ref, const CeedScalar *q_weight, CeedBasis basis) { 528d075f50bSSebastian Grimberg Ceed ceed; 52922070f95SJeremy L Thompson char *basis_kernel_source; 53022070f95SJeremy L Thompson const char *basis_kernel_path; 531d075f50bSSebastian Grimberg CeedInt num_comp, q_comp_interp, q_comp_div; 532d075f50bSSebastian Grimberg const CeedInt q_bytes = num_qpts * sizeof(CeedScalar); 533d075f50bSSebastian Grimberg CeedBasisNonTensor_Hip *data; 534d075f50bSSebastian Grimberg 535d075f50bSSebastian Grimberg CeedCallBackend(CeedBasisGetCeed(basis, &ceed)); 536d075f50bSSebastian Grimberg CeedCallBackend(CeedCalloc(1, &data)); 537d075f50bSSebastian Grimberg 538d075f50bSSebastian Grimberg // Copy basis data to GPU 539d075f50bSSebastian Grimberg CeedCallBackend(CeedBasisGetNumQuadratureComponents(basis, CEED_EVAL_INTERP, &q_comp_interp)); 540d075f50bSSebastian Grimberg CeedCallBackend(CeedBasisGetNumQuadratureComponents(basis, CEED_EVAL_DIV, &q_comp_div)); 541097cc795SJames Wright if (q_weight) { 542d075f50bSSebastian Grimberg CeedCallHip(ceed, hipMalloc((void **)&data->d_q_weight, q_bytes)); 543d075f50bSSebastian Grimberg CeedCallHip(ceed, hipMemcpy(data->d_q_weight, q_weight, q_bytes, hipMemcpyHostToDevice)); 544097cc795SJames Wright } 545d075f50bSSebastian Grimberg if (interp) { 546d075f50bSSebastian Grimberg const CeedInt interp_bytes = q_bytes * num_nodes * q_comp_interp; 547d075f50bSSebastian Grimberg 548d075f50bSSebastian Grimberg CeedCallHip(ceed, hipMalloc((void **)&data->d_interp, interp_bytes)); 549d075f50bSSebastian Grimberg CeedCallHip(ceed, hipMemcpy(data->d_interp, interp, interp_bytes, hipMemcpyHostToDevice)); 550d075f50bSSebastian Grimberg } 551d075f50bSSebastian Grimberg if (div) { 552d075f50bSSebastian Grimberg const CeedInt div_bytes = q_bytes * num_nodes * q_comp_div; 553d075f50bSSebastian Grimberg 554d075f50bSSebastian Grimberg CeedCallHip(ceed, hipMalloc((void **)&data->d_div, div_bytes)); 555d075f50bSSebastian Grimberg CeedCallHip(ceed, hipMemcpy(data->d_div, div, div_bytes, hipMemcpyHostToDevice)); 556d075f50bSSebastian Grimberg } 557d075f50bSSebastian Grimberg 558d075f50bSSebastian Grimberg // Compile basis kernels 559d075f50bSSebastian Grimberg CeedCallBackend(CeedBasisGetNumComponents(basis, &num_comp)); 560d075f50bSSebastian Grimberg CeedCallBackend(CeedGetJitAbsolutePath(ceed, "ceed/jit-source/hip/hip-ref-basis-nontensor.h", &basis_kernel_path)); 561d075f50bSSebastian Grimberg CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Basis Kernel Source -----\n"); 562d075f50bSSebastian Grimberg CeedCallBackend(CeedLoadSourceToBuffer(ceed, basis_kernel_path, &basis_kernel_source)); 563d075f50bSSebastian Grimberg CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Basis Kernel Source Complete! -----\n"); 564d075f50bSSebastian Grimberg CeedCallBackend(CeedCompile_Hip(ceed, basis_kernel_source, &data->module, 5, "BASIS_Q", num_qpts, "BASIS_P", num_nodes, "BASIS_Q_COMP_INTERP", 565d075f50bSSebastian Grimberg q_comp_interp, "BASIS_Q_COMP_DERIV", q_comp_div, "BASIS_NUM_COMP", num_comp)); 566d075f50bSSebastian Grimberg CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, "Interp", &data->Interp)); 567d075f50bSSebastian Grimberg CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, "InterpTranspose", &data->InterpTranspose)); 568d075f50bSSebastian Grimberg CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, "Deriv", &data->Deriv)); 569d075f50bSSebastian Grimberg CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, "DerivTranspose", &data->DerivTranspose)); 570d075f50bSSebastian Grimberg CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, "Weight", &data->Weight)); 571d075f50bSSebastian Grimberg CeedCallBackend(CeedFree(&basis_kernel_path)); 572d075f50bSSebastian Grimberg CeedCallBackend(CeedFree(&basis_kernel_source)); 573d075f50bSSebastian Grimberg 574d075f50bSSebastian Grimberg CeedCallBackend(CeedBasisSetData(basis, data)); 575d075f50bSSebastian Grimberg 576d075f50bSSebastian Grimberg // Register backend functions 577d075f50bSSebastian Grimberg CeedCallBackend(CeedSetBackendFunction(ceed, "Basis", basis, "Apply", CeedBasisApplyNonTensor_Hip)); 578db2becc9SJeremy L Thompson CeedCallBackend(CeedSetBackendFunction(ceed, "Basis", basis, "ApplyAdd", CeedBasisApplyAddNonTensor_Hip)); 579d075f50bSSebastian Grimberg CeedCallBackend(CeedSetBackendFunction(ceed, "Basis", basis, "Destroy", CeedBasisDestroyNonTensor_Hip)); 580d075f50bSSebastian Grimberg return CEED_ERROR_SUCCESS; 581d075f50bSSebastian Grimberg } 582d075f50bSSebastian Grimberg 583d075f50bSSebastian Grimberg //------------------------------------------------------------------------------ 584d075f50bSSebastian Grimberg // Create non-tensor H(curl) 585d075f50bSSebastian Grimberg //------------------------------------------------------------------------------ 586d075f50bSSebastian Grimberg int CeedBasisCreateHcurl_Hip(CeedElemTopology topo, CeedInt dim, CeedInt num_nodes, CeedInt num_qpts, const CeedScalar *interp, 587d075f50bSSebastian Grimberg const CeedScalar *curl, const CeedScalar *q_ref, const CeedScalar *q_weight, CeedBasis basis) { 588d075f50bSSebastian Grimberg Ceed ceed; 58922070f95SJeremy L Thompson char *basis_kernel_source; 59022070f95SJeremy L Thompson const char *basis_kernel_path; 591d075f50bSSebastian Grimberg CeedInt num_comp, q_comp_interp, q_comp_curl; 592d075f50bSSebastian Grimberg const CeedInt q_bytes = num_qpts * sizeof(CeedScalar); 593d075f50bSSebastian Grimberg CeedBasisNonTensor_Hip *data; 594d075f50bSSebastian Grimberg 595d075f50bSSebastian Grimberg CeedCallBackend(CeedBasisGetCeed(basis, &ceed)); 596d075f50bSSebastian Grimberg CeedCallBackend(CeedCalloc(1, &data)); 597d075f50bSSebastian Grimberg 598d075f50bSSebastian Grimberg // Copy basis data to GPU 599d075f50bSSebastian Grimberg CeedCallBackend(CeedBasisGetNumQuadratureComponents(basis, CEED_EVAL_INTERP, &q_comp_interp)); 600d075f50bSSebastian Grimberg CeedCallBackend(CeedBasisGetNumQuadratureComponents(basis, CEED_EVAL_CURL, &q_comp_curl)); 601097cc795SJames Wright if (q_weight) { 602d075f50bSSebastian Grimberg CeedCallHip(ceed, hipMalloc((void **)&data->d_q_weight, q_bytes)); 603d075f50bSSebastian Grimberg CeedCallHip(ceed, hipMemcpy(data->d_q_weight, q_weight, q_bytes, hipMemcpyHostToDevice)); 604097cc795SJames Wright } 605d075f50bSSebastian Grimberg if (interp) { 606d075f50bSSebastian Grimberg const CeedInt interp_bytes = q_bytes * num_nodes * q_comp_interp; 607d075f50bSSebastian Grimberg 608d075f50bSSebastian Grimberg CeedCallHip(ceed, hipMalloc((void **)&data->d_interp, interp_bytes)); 609d075f50bSSebastian Grimberg CeedCallHip(ceed, hipMemcpy(data->d_interp, interp, interp_bytes, hipMemcpyHostToDevice)); 610d075f50bSSebastian Grimberg } 611d075f50bSSebastian Grimberg if (curl) { 612d075f50bSSebastian Grimberg const CeedInt curl_bytes = q_bytes * num_nodes * q_comp_curl; 613d075f50bSSebastian Grimberg 614d075f50bSSebastian Grimberg CeedCallHip(ceed, hipMalloc((void **)&data->d_curl, curl_bytes)); 615d075f50bSSebastian Grimberg CeedCallHip(ceed, hipMemcpy(data->d_curl, curl, curl_bytes, hipMemcpyHostToDevice)); 616d075f50bSSebastian Grimberg } 617d075f50bSSebastian Grimberg 618d075f50bSSebastian Grimberg // Compile basis kernels 619d075f50bSSebastian Grimberg CeedCallBackend(CeedBasisGetNumComponents(basis, &num_comp)); 620d075f50bSSebastian Grimberg CeedCallBackend(CeedGetJitAbsolutePath(ceed, "ceed/jit-source/hip/hip-ref-basis-nontensor.h", &basis_kernel_path)); 621d075f50bSSebastian Grimberg CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Basis Kernel Source -----\n"); 622d075f50bSSebastian Grimberg CeedCallBackend(CeedLoadSourceToBuffer(ceed, basis_kernel_path, &basis_kernel_source)); 623d075f50bSSebastian Grimberg CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Basis Kernel Source Complete! -----\n"); 624d075f50bSSebastian Grimberg CeedCallBackend(CeedCompile_Hip(ceed, basis_kernel_source, &data->module, 5, "BASIS_Q", num_qpts, "BASIS_P", num_nodes, "BASIS_Q_COMP_INTERP", 625d075f50bSSebastian Grimberg q_comp_interp, "BASIS_Q_COMP_DERIV", q_comp_curl, "BASIS_NUM_COMP", num_comp)); 626d075f50bSSebastian Grimberg CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, "Interp", &data->Interp)); 627d075f50bSSebastian Grimberg CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, "InterpTranspose", &data->InterpTranspose)); 628d075f50bSSebastian Grimberg CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, "Deriv", &data->Deriv)); 629d075f50bSSebastian Grimberg CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, "DerivTranspose", &data->DerivTranspose)); 630d075f50bSSebastian Grimberg CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, "Weight", &data->Weight)); 631d075f50bSSebastian Grimberg CeedCallBackend(CeedFree(&basis_kernel_path)); 632d075f50bSSebastian Grimberg CeedCallBackend(CeedFree(&basis_kernel_source)); 633d075f50bSSebastian Grimberg 6342b730f8bSJeremy L Thompson CeedCallBackend(CeedBasisSetData(basis, data)); 6350d0321e0SJeremy L Thompson 6360d0321e0SJeremy L Thompson // Register backend functions 6372b730f8bSJeremy L Thompson CeedCallBackend(CeedSetBackendFunction(ceed, "Basis", basis, "Apply", CeedBasisApplyNonTensor_Hip)); 638db2becc9SJeremy L Thompson CeedCallBackend(CeedSetBackendFunction(ceed, "Basis", basis, "ApplyAdd", CeedBasisApplyAddNonTensor_Hip)); 6392b730f8bSJeremy L Thompson CeedCallBackend(CeedSetBackendFunction(ceed, "Basis", basis, "Destroy", CeedBasisDestroyNonTensor_Hip)); 6400d0321e0SJeremy L Thompson return CEED_ERROR_SUCCESS; 6410d0321e0SJeremy L Thompson } 6422a86cc9dSSebastian Grimberg 6430d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 644