15aed82e4SJeremy L Thompson // Copyright (c) 2017-2024, Lawrence Livermore National Security, LLC and other CEED contributors. 23d8e8822SJeremy L Thompson // All Rights Reserved. See the top-level LICENSE and NOTICE files for details. 30d0321e0SJeremy L Thompson // 43d8e8822SJeremy L Thompson // SPDX-License-Identifier: BSD-2-Clause 50d0321e0SJeremy L Thompson // 63d8e8822SJeremy L Thompson // This file is part of CEED: http://github.com/ceed 70d0321e0SJeremy L Thompson 849aac155SJeremy L Thompson #include <ceed.h> 90d0321e0SJeremy L Thompson #include <ceed/backend.h> 10437930d1SJeremy L Thompson #include <ceed/jit-tools.h> 110d0321e0SJeremy L Thompson #include <hip/hip_runtime.h> 122b730f8bSJeremy L Thompson 1349aac155SJeremy L Thompson #include "../hip/ceed-hip-common.h" 140d0321e0SJeremy L Thompson #include "../hip/ceed-hip-compile.h" 152b730f8bSJeremy L Thompson #include "ceed-hip-ref.h" 160d0321e0SJeremy L Thompson 170d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 180d0321e0SJeremy L Thompson // Basis apply - tensor 190d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 20*db2becc9SJeremy L Thompson static int CeedBasisApplyCore_Hip(CeedBasis basis, bool apply_add, const CeedInt num_elem, CeedTransposeMode t_mode, CeedEvalMode eval_mode, 21*db2becc9SJeremy L Thompson CeedVector u, CeedVector v) { 220d0321e0SJeremy L Thompson Ceed ceed; 23b7453713SJeremy L Thompson CeedInt Q_1d, dim; 247bbbfca3SJeremy L Thompson const CeedInt is_transpose = t_mode == CEED_TRANSPOSE; 25437930d1SJeremy L Thompson const int max_block_size = 64; 260d0321e0SJeremy L Thompson const CeedScalar *d_u; 270d0321e0SJeremy L Thompson CeedScalar *d_v; 28b7453713SJeremy L Thompson CeedBasis_Hip *data; 29b7453713SJeremy L Thompson 30b7453713SJeremy L Thompson CeedCallBackend(CeedBasisGetCeed(basis, &ceed)); 31b7453713SJeremy L Thompson CeedCallBackend(CeedBasisGetData(basis, &data)); 32b7453713SJeremy L Thompson 339ea2cfd9SJeremy L Thompson // Get read/write access to u, v 346574a04fSJeremy L Thompson if (u != CEED_VECTOR_NONE) CeedCallBackend(CeedVectorGetArrayRead(u, CEED_MEM_DEVICE, &d_u)); 356574a04fSJeremy L Thompson else CeedCheck(eval_mode == CEED_EVAL_WEIGHT, ceed, CEED_ERROR_BACKEND, "An input vector is required for this CeedEvalMode"); 36*db2becc9SJeremy L Thompson if (apply_add) CeedCallBackend(CeedVectorGetArray(v, CEED_MEM_DEVICE, &d_v)); 37*db2becc9SJeremy L Thompson else CeedCallBackend(CeedVectorGetArrayWrite(v, CEED_MEM_DEVICE, &d_v)); 380d0321e0SJeremy L Thompson 390d0321e0SJeremy L Thompson // Clear v for transpose operation 40*db2becc9SJeremy L Thompson if (is_transpose && !apply_add) { 411f9221feSJeremy L Thompson CeedSize length; 42b7453713SJeremy L Thompson 432b730f8bSJeremy L Thompson CeedCallBackend(CeedVectorGetLength(v, &length)); 442b730f8bSJeremy L Thompson CeedCallHip(ceed, hipMemset(d_v, 0, length * sizeof(CeedScalar))); 450d0321e0SJeremy L Thompson } 46b2165e7aSSebastian Grimberg CeedCallBackend(CeedBasisGetNumQuadraturePoints1D(basis, &Q_1d)); 47b2165e7aSSebastian Grimberg CeedCallBackend(CeedBasisGetDimension(basis, &dim)); 480d0321e0SJeremy L Thompson 490d0321e0SJeremy L Thompson // Basis action 50437930d1SJeremy L Thompson switch (eval_mode) { 510d0321e0SJeremy L Thompson case CEED_EVAL_INTERP: { 527bbbfca3SJeremy L Thompson void *interp_args[] = {(void *)&num_elem, (void *)&is_transpose, &data->d_interp_1d, &d_u, &d_v}; 53b2165e7aSSebastian Grimberg const CeedInt block_size = CeedIntMin(CeedIntPow(Q_1d, dim), max_block_size); 540d0321e0SJeremy L Thompson 55eb7e6cafSJeremy L Thompson CeedCallBackend(CeedRunKernel_Hip(ceed, data->Interp, num_elem, block_size, interp_args)); 560d0321e0SJeremy L Thompson } break; 570d0321e0SJeremy L Thompson case CEED_EVAL_GRAD: { 587bbbfca3SJeremy L Thompson void *grad_args[] = {(void *)&num_elem, (void *)&is_transpose, &data->d_interp_1d, &data->d_grad_1d, &d_u, &d_v}; 59b2165e7aSSebastian Grimberg const CeedInt block_size = max_block_size; 600d0321e0SJeremy L Thompson 61eb7e6cafSJeremy L Thompson CeedCallBackend(CeedRunKernel_Hip(ceed, data->Grad, num_elem, block_size, grad_args)); 620d0321e0SJeremy L Thompson } break; 630d0321e0SJeremy L Thompson case CEED_EVAL_WEIGHT: { 64097cc795SJames Wright CeedCheck(data->d_q_weight_1d, ceed, CEED_ERROR_BACKEND, "%s not supported; q_weights_1d not set", CeedEvalModes[eval_mode]); 65437930d1SJeremy L Thompson void *weight_args[] = {(void *)&num_elem, (void *)&data->d_q_weight_1d, &d_v}; 66b2165e7aSSebastian Grimberg const int block_size_x = Q_1d; 67b2165e7aSSebastian Grimberg const int block_size_y = dim >= 2 ? Q_1d : 1; 680d0321e0SJeremy L Thompson 69b2165e7aSSebastian Grimberg CeedCallBackend(CeedRunKernelDim_Hip(ceed, data->Weight, num_elem, block_size_x, block_size_y, 1, weight_args)); 700d0321e0SJeremy L Thompson } break; 719ea2cfd9SJeremy L Thompson case CEED_EVAL_NONE: /* handled separately below */ 729ea2cfd9SJeremy L Thompson break; 730d0321e0SJeremy L Thompson // LCOV_EXCL_START 740d0321e0SJeremy L Thompson case CEED_EVAL_DIV: 750d0321e0SJeremy L Thompson case CEED_EVAL_CURL: 76bcbe1c99SJeremy L Thompson return CeedError(ceed, CEED_ERROR_BACKEND, "%s not supported", CeedEvalModes[eval_mode]); 770d0321e0SJeremy L Thompson // LCOV_EXCL_STOP 780d0321e0SJeremy L Thompson } 790d0321e0SJeremy L Thompson 809ea2cfd9SJeremy L Thompson // Restore vectors, cover CEED_EVAL_NONE 812b730f8bSJeremy L Thompson CeedCallBackend(CeedVectorRestoreArray(v, &d_v)); 829ea2cfd9SJeremy L Thompson if (eval_mode == CEED_EVAL_NONE) CeedCallBackend(CeedVectorSetArray(v, CEED_MEM_DEVICE, CEED_COPY_VALUES, (CeedScalar *)d_u)); 839ea2cfd9SJeremy L Thompson if (eval_mode != CEED_EVAL_WEIGHT) CeedCallBackend(CeedVectorRestoreArrayRead(u, &d_u)); 840d0321e0SJeremy L Thompson return CEED_ERROR_SUCCESS; 850d0321e0SJeremy L Thompson } 860d0321e0SJeremy L Thompson 87*db2becc9SJeremy L Thompson static int CeedBasisApply_Hip(CeedBasis basis, const CeedInt num_elem, CeedTransposeMode t_mode, CeedEvalMode eval_mode, CeedVector u, CeedVector v) { 88*db2becc9SJeremy L Thompson CeedCallBackend(CeedBasisApplyCore_Hip(basis, false, num_elem, t_mode, eval_mode, u, v)); 89*db2becc9SJeremy L Thompson return CEED_ERROR_SUCCESS; 90*db2becc9SJeremy L Thompson } 91*db2becc9SJeremy L Thompson 92*db2becc9SJeremy L Thompson static int CeedBasisApplyAdd_Hip(CeedBasis basis, const CeedInt num_elem, CeedTransposeMode t_mode, CeedEvalMode eval_mode, CeedVector u, 93*db2becc9SJeremy L Thompson CeedVector v) { 94*db2becc9SJeremy L Thompson CeedCallBackend(CeedBasisApplyCore_Hip(basis, true, num_elem, t_mode, eval_mode, u, v)); 95*db2becc9SJeremy L Thompson return CEED_ERROR_SUCCESS; 96*db2becc9SJeremy L Thompson } 97*db2becc9SJeremy L Thompson 980d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 991c21e869SJeremy L Thompson // Basis apply - tensor AtPoints 1001c21e869SJeremy L Thompson //------------------------------------------------------------------------------ 101*db2becc9SJeremy L Thompson static int CeedBasisApplyAtPointsCore_Hip(CeedBasis basis, bool apply_add, const CeedInt num_elem, const CeedInt *num_points, 102*db2becc9SJeremy L Thompson CeedTransposeMode t_mode, CeedEvalMode eval_mode, CeedVector x_ref, CeedVector u, CeedVector v) { 1031c21e869SJeremy L Thompson Ceed ceed; 1041c21e869SJeremy L Thompson CeedInt Q_1d, dim, max_num_points = num_points[0]; 1051c21e869SJeremy L Thompson const CeedInt is_transpose = t_mode == CEED_TRANSPOSE; 1061c21e869SJeremy L Thompson const int max_block_size = 32; 1071c21e869SJeremy L Thompson const CeedScalar *d_x, *d_u; 1081c21e869SJeremy L Thompson CeedScalar *d_v; 1091c21e869SJeremy L Thompson CeedBasis_Hip *data; 1101c21e869SJeremy L Thompson 1111c21e869SJeremy L Thompson CeedCallBackend(CeedBasisGetCeed(basis, &ceed)); 1121c21e869SJeremy L Thompson CeedCallBackend(CeedBasisGetData(basis, &data)); 1131c21e869SJeremy L Thompson CeedCallBackend(CeedBasisGetNumQuadraturePoints1D(basis, &Q_1d)); 1141c21e869SJeremy L Thompson CeedCallBackend(CeedBasisGetDimension(basis, &dim)); 1151c21e869SJeremy L Thompson 1161c21e869SJeremy L Thompson // Check uniform number of points per elem 1171c21e869SJeremy L Thompson for (CeedInt i = 1; i < num_elem; i++) { 1181c21e869SJeremy L Thompson CeedCheck(max_num_points == num_points[i], ceed, CEED_ERROR_BACKEND, 1191c21e869SJeremy L Thompson "BasisApplyAtPoints only supported for the same number of points in each element"); 1201c21e869SJeremy L Thompson } 1211c21e869SJeremy L Thompson 1221c21e869SJeremy L Thompson // Weight handled separately 1231c21e869SJeremy L Thompson if (eval_mode == CEED_EVAL_WEIGHT) { 1241c21e869SJeremy L Thompson CeedCall(CeedVectorSetValue(v, 1.0)); 1251c21e869SJeremy L Thompson return CEED_ERROR_SUCCESS; 1261c21e869SJeremy L Thompson } 1271c21e869SJeremy L Thompson 1281c21e869SJeremy L Thompson // Build kernels if needed 1291c21e869SJeremy L Thompson if (data->num_points != max_num_points) { 1301c21e869SJeremy L Thompson CeedInt P_1d; 1311c21e869SJeremy L Thompson 1321c21e869SJeremy L Thompson CeedCallBackend(CeedBasisGetNumNodes1D(basis, &P_1d)); 1331c21e869SJeremy L Thompson data->num_points = max_num_points; 1341c21e869SJeremy L Thompson 1351c21e869SJeremy L Thompson // -- Create interp matrix to Chebyshev coefficients 1361c21e869SJeremy L Thompson if (!data->d_chebyshev_interp_1d) { 1371c21e869SJeremy L Thompson CeedSize interp_bytes; 1381c21e869SJeremy L Thompson CeedScalar *chebyshev_interp_1d; 1391c21e869SJeremy L Thompson 1401c21e869SJeremy L Thompson interp_bytes = P_1d * Q_1d * sizeof(CeedScalar); 1411c21e869SJeremy L Thompson CeedCallBackend(CeedCalloc(P_1d * Q_1d, &chebyshev_interp_1d)); 1421c21e869SJeremy L Thompson CeedCall(CeedBasisGetChebyshevInterp1D(basis, chebyshev_interp_1d)); 1431c21e869SJeremy L Thompson CeedCallHip(ceed, hipMalloc((void **)&data->d_chebyshev_interp_1d, interp_bytes)); 1441c21e869SJeremy L Thompson CeedCallHip(ceed, hipMemcpy(data->d_chebyshev_interp_1d, chebyshev_interp_1d, interp_bytes, hipMemcpyHostToDevice)); 1451c21e869SJeremy L Thompson CeedCallBackend(CeedFree(&chebyshev_interp_1d)); 1461c21e869SJeremy L Thompson } 1471c21e869SJeremy L Thompson 1481c21e869SJeremy L Thompson // -- Compile kernels 1491c21e869SJeremy L Thompson char *basis_kernel_source; 1501c21e869SJeremy L Thompson const char *basis_kernel_path; 1511c21e869SJeremy L Thompson CeedInt num_comp; 1521c21e869SJeremy L Thompson 1531c21e869SJeremy L Thompson if (data->moduleAtPoints) CeedCallHip(ceed, hipModuleUnload(data->moduleAtPoints)); 1541c21e869SJeremy L Thompson CeedCallBackend(CeedBasisGetNumComponents(basis, &num_comp)); 1551c21e869SJeremy L Thompson CeedCallBackend(CeedGetJitAbsolutePath(ceed, "ceed/jit-source/hip/hip-ref-basis-tensor-at-points.h", &basis_kernel_path)); 1561c21e869SJeremy L Thompson CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Basis Kernel Source -----\n"); 1571c21e869SJeremy L Thompson CeedCallBackend(CeedLoadSourceToBuffer(ceed, basis_kernel_path, &basis_kernel_source)); 1581c21e869SJeremy L Thompson CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Basis Kernel Source Complete! -----\n"); 1591c21e869SJeremy L Thompson CeedCallBackend(CeedCompile_Hip(ceed, basis_kernel_source, &data->moduleAtPoints, 9, "BASIS_Q_1D", Q_1d, "BASIS_P_1D", P_1d, "BASIS_BUF_LEN", 160f7c9815fSJeremy L Thompson Q_1d * CeedIntPow(Q_1d > P_1d ? Q_1d : P_1d, dim - 1), "BASIS_DIM", dim, "BASIS_NUM_COMP", num_comp, 1611c21e869SJeremy L Thompson "BASIS_NUM_NODES", CeedIntPow(P_1d, dim), "BASIS_NUM_QPTS", CeedIntPow(Q_1d, dim), "BASIS_NUM_PTS", 162f7c9815fSJeremy L Thompson max_num_points, "POINTS_BUFF_LEN", CeedIntPow(Q_1d, dim - 1))); 1631c21e869SJeremy L Thompson CeedCallBackend(CeedGetKernel_Hip(ceed, data->moduleAtPoints, "InterpAtPoints", &data->InterpAtPoints)); 1641c21e869SJeremy L Thompson CeedCallBackend(CeedGetKernel_Hip(ceed, data->moduleAtPoints, "GradAtPoints", &data->GradAtPoints)); 1651c21e869SJeremy L Thompson CeedCallBackend(CeedFree(&basis_kernel_path)); 1661c21e869SJeremy L Thompson CeedCallBackend(CeedFree(&basis_kernel_source)); 1671c21e869SJeremy L Thompson } 1681c21e869SJeremy L Thompson 1691c21e869SJeremy L Thompson // Get read/write access to u, v 1701c21e869SJeremy L Thompson CeedCallBackend(CeedVectorGetArrayRead(x_ref, CEED_MEM_DEVICE, &d_x)); 1711c21e869SJeremy L Thompson if (u != CEED_VECTOR_NONE) CeedCallBackend(CeedVectorGetArrayRead(u, CEED_MEM_DEVICE, &d_u)); 1721c21e869SJeremy L Thompson else CeedCheck(eval_mode == CEED_EVAL_WEIGHT, ceed, CEED_ERROR_BACKEND, "An input vector is required for this CeedEvalMode"); 173*db2becc9SJeremy L Thompson if (apply_add) CeedCallBackend(CeedVectorGetArray(v, CEED_MEM_DEVICE, &d_v)); 174*db2becc9SJeremy L Thompson else CeedCallBackend(CeedVectorGetArrayWrite(v, CEED_MEM_DEVICE, &d_v)); 1751c21e869SJeremy L Thompson 1761c21e869SJeremy L Thompson // Clear v for transpose operation 177*db2becc9SJeremy L Thompson if (is_transpose && !apply_add) { 1781c21e869SJeremy L Thompson CeedSize length; 1791c21e869SJeremy L Thompson 1801c21e869SJeremy L Thompson CeedCallBackend(CeedVectorGetLength(v, &length)); 1811c21e869SJeremy L Thompson CeedCallHip(ceed, hipMemset(d_v, 0, length * sizeof(CeedScalar))); 1821c21e869SJeremy L Thompson } 1831c21e869SJeremy L Thompson 1841c21e869SJeremy L Thompson // Basis action 1851c21e869SJeremy L Thompson switch (eval_mode) { 1861c21e869SJeremy L Thompson case CEED_EVAL_INTERP: { 1871c21e869SJeremy L Thompson void *interp_args[] = {(void *)&num_elem, (void *)&is_transpose, &data->d_chebyshev_interp_1d, &d_x, &d_u, &d_v}; 1881c21e869SJeremy L Thompson const CeedInt block_size = CeedIntMin(CeedIntPow(Q_1d, dim), max_block_size); 1891c21e869SJeremy L Thompson 1901c21e869SJeremy L Thompson CeedCallBackend(CeedRunKernel_Hip(ceed, data->InterpAtPoints, num_elem, block_size, interp_args)); 1911c21e869SJeremy L Thompson } break; 1921c21e869SJeremy L Thompson case CEED_EVAL_GRAD: { 1931c21e869SJeremy L Thompson void *grad_args[] = {(void *)&num_elem, (void *)&is_transpose, &data->d_chebyshev_interp_1d, &d_x, &d_u, &d_v}; 1942d10e82cSJeremy L Thompson const CeedInt block_size = CeedIntMin(CeedIntPow(Q_1d, dim), max_block_size); 1951c21e869SJeremy L Thompson 1961c21e869SJeremy L Thompson CeedCallBackend(CeedRunKernel_Hip(ceed, data->GradAtPoints, num_elem, block_size, grad_args)); 1971c21e869SJeremy L Thompson } break; 1981c21e869SJeremy L Thompson case CEED_EVAL_WEIGHT: 1991c21e869SJeremy L Thompson case CEED_EVAL_NONE: /* handled separately below */ 2001c21e869SJeremy L Thompson break; 2011c21e869SJeremy L Thompson // LCOV_EXCL_START 2021c21e869SJeremy L Thompson case CEED_EVAL_DIV: 2031c21e869SJeremy L Thompson case CEED_EVAL_CURL: 2041c21e869SJeremy L Thompson return CeedError(ceed, CEED_ERROR_BACKEND, "%s not supported", CeedEvalModes[eval_mode]); 2051c21e869SJeremy L Thompson // LCOV_EXCL_STOP 2061c21e869SJeremy L Thompson } 2071c21e869SJeremy L Thompson 2081c21e869SJeremy L Thompson // Restore vectors, cover CEED_EVAL_NONE 2091c21e869SJeremy L Thompson CeedCallBackend(CeedVectorRestoreArrayRead(x_ref, &d_x)); 2101c21e869SJeremy L Thompson CeedCallBackend(CeedVectorRestoreArray(v, &d_v)); 2111c21e869SJeremy L Thompson if (eval_mode == CEED_EVAL_NONE) CeedCallBackend(CeedVectorSetArray(v, CEED_MEM_DEVICE, CEED_COPY_VALUES, (CeedScalar *)d_u)); 2121c21e869SJeremy L Thompson if (eval_mode != CEED_EVAL_WEIGHT) CeedCallBackend(CeedVectorRestoreArrayRead(u, &d_u)); 2131c21e869SJeremy L Thompson return CEED_ERROR_SUCCESS; 2141c21e869SJeremy L Thompson } 2151c21e869SJeremy L Thompson 216*db2becc9SJeremy L Thompson static int CeedBasisApplyAtPoints_Hip(CeedBasis basis, const CeedInt num_elem, const CeedInt *num_points, CeedTransposeMode t_mode, 217*db2becc9SJeremy L Thompson CeedEvalMode eval_mode, CeedVector x_ref, CeedVector u, CeedVector v) { 218*db2becc9SJeremy L Thompson CeedCallBackend(CeedBasisApplyAtPointsCore_Hip(basis, false, num_elem, num_points, t_mode, eval_mode, x_ref, u, v)); 219*db2becc9SJeremy L Thompson return CEED_ERROR_SUCCESS; 220*db2becc9SJeremy L Thompson } 221*db2becc9SJeremy L Thompson 222*db2becc9SJeremy L Thompson static int CeedBasisApplyAddAtPoints_Hip(CeedBasis basis, const CeedInt num_elem, const CeedInt *num_points, CeedTransposeMode t_mode, 223*db2becc9SJeremy L Thompson CeedEvalMode eval_mode, CeedVector x_ref, CeedVector u, CeedVector v) { 224*db2becc9SJeremy L Thompson CeedCallBackend(CeedBasisApplyAtPointsCore_Hip(basis, true, num_elem, num_points, t_mode, eval_mode, x_ref, u, v)); 225*db2becc9SJeremy L Thompson return CEED_ERROR_SUCCESS; 226*db2becc9SJeremy L Thompson } 227*db2becc9SJeremy L Thompson 2281c21e869SJeremy L Thompson //------------------------------------------------------------------------------ 2290d0321e0SJeremy L Thompson // Basis apply - non-tensor 2300d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 231*db2becc9SJeremy L Thompson static int CeedBasisApplyNonTensorCore_Hip(CeedBasis basis, bool apply_add, const CeedInt num_elem, CeedTransposeMode t_mode, CeedEvalMode eval_mode, 232*db2becc9SJeremy L Thompson CeedVector u, CeedVector v) { 2330d0321e0SJeremy L Thompson Ceed ceed; 234437930d1SJeremy L Thompson CeedInt num_nodes, num_qpts; 2357bbbfca3SJeremy L Thompson const CeedInt is_transpose = t_mode == CEED_TRANSPOSE; 236d075f50bSSebastian Grimberg const int elems_per_block = 1; 237d075f50bSSebastian Grimberg const int grid = CeedDivUpInt(num_elem, elems_per_block); 2380d0321e0SJeremy L Thompson const CeedScalar *d_u; 2390d0321e0SJeremy L Thompson CeedScalar *d_v; 240b7453713SJeremy L Thompson CeedBasisNonTensor_Hip *data; 241b7453713SJeremy L Thompson 242b7453713SJeremy L Thompson CeedCallBackend(CeedBasisGetCeed(basis, &ceed)); 243b7453713SJeremy L Thompson CeedCallBackend(CeedBasisGetData(basis, &data)); 244b7453713SJeremy L Thompson CeedCallBackend(CeedBasisGetNumQuadraturePoints(basis, &num_qpts)); 245b7453713SJeremy L Thompson CeedCallBackend(CeedBasisGetNumNodes(basis, &num_nodes)); 246b7453713SJeremy L Thompson 2479ea2cfd9SJeremy L Thompson // Get read/write access to u, v 2489ea2cfd9SJeremy L Thompson if (u != CEED_VECTOR_NONE) CeedCallBackend(CeedVectorGetArrayRead(u, CEED_MEM_DEVICE, &d_u)); 2499ea2cfd9SJeremy L Thompson else CeedCheck(eval_mode == CEED_EVAL_WEIGHT, ceed, CEED_ERROR_BACKEND, "An input vector is required for this CeedEvalMode"); 250*db2becc9SJeremy L Thompson if (apply_add) CeedCallBackend(CeedVectorGetArray(v, CEED_MEM_DEVICE, &d_v)); 251*db2becc9SJeremy L Thompson else CeedCallBackend(CeedVectorGetArrayWrite(v, CEED_MEM_DEVICE, &d_v)); 2520d0321e0SJeremy L Thompson 2530d0321e0SJeremy L Thompson // Clear v for transpose operation 254*db2becc9SJeremy L Thompson if (is_transpose && !apply_add) { 2551f9221feSJeremy L Thompson CeedSize length; 256b7453713SJeremy L Thompson 2572b730f8bSJeremy L Thompson CeedCallBackend(CeedVectorGetLength(v, &length)); 2582b730f8bSJeremy L Thompson CeedCallHip(ceed, hipMemset(d_v, 0, length * sizeof(CeedScalar))); 2590d0321e0SJeremy L Thompson } 2600d0321e0SJeremy L Thompson 2610d0321e0SJeremy L Thompson // Apply basis operation 262437930d1SJeremy L Thompson switch (eval_mode) { 2630d0321e0SJeremy L Thompson case CEED_EVAL_INTERP: { 264d075f50bSSebastian Grimberg void *interp_args[] = {(void *)&num_elem, &data->d_interp, &d_u, &d_v}; 2657bbbfca3SJeremy L Thompson const int block_size_x = is_transpose ? num_nodes : num_qpts; 266b2165e7aSSebastian Grimberg 2677bbbfca3SJeremy L Thompson if (is_transpose) { 268d075f50bSSebastian Grimberg CeedCallBackend(CeedRunKernelDim_Hip(ceed, data->InterpTranspose, grid, block_size_x, 1, elems_per_block, interp_args)); 269d075f50bSSebastian Grimberg } else { 270b2165e7aSSebastian Grimberg CeedCallBackend(CeedRunKernelDim_Hip(ceed, data->Interp, grid, block_size_x, 1, elems_per_block, interp_args)); 271d075f50bSSebastian Grimberg } 2720d0321e0SJeremy L Thompson } break; 2730d0321e0SJeremy L Thompson case CEED_EVAL_GRAD: { 274d075f50bSSebastian Grimberg void *grad_args[] = {(void *)&num_elem, &data->d_grad, &d_u, &d_v}; 2757bbbfca3SJeremy L Thompson const int block_size_x = is_transpose ? num_nodes : num_qpts; 276b2165e7aSSebastian Grimberg 2777bbbfca3SJeremy L Thompson if (is_transpose) { 278d075f50bSSebastian Grimberg CeedCallBackend(CeedRunKernelDim_Hip(ceed, data->DerivTranspose, grid, block_size_x, 1, elems_per_block, grad_args)); 279d075f50bSSebastian Grimberg } else { 280d075f50bSSebastian Grimberg CeedCallBackend(CeedRunKernelDim_Hip(ceed, data->Deriv, grid, block_size_x, 1, elems_per_block, grad_args)); 281d075f50bSSebastian Grimberg } 282d075f50bSSebastian Grimberg } break; 283d075f50bSSebastian Grimberg case CEED_EVAL_DIV: { 284d075f50bSSebastian Grimberg void *div_args[] = {(void *)&num_elem, &data->d_div, &d_u, &d_v}; 2857bbbfca3SJeremy L Thompson const int block_size_x = is_transpose ? num_nodes : num_qpts; 286d075f50bSSebastian Grimberg 2877bbbfca3SJeremy L Thompson if (is_transpose) { 288d075f50bSSebastian Grimberg CeedCallBackend(CeedRunKernelDim_Hip(ceed, data->DerivTranspose, grid, block_size_x, 1, elems_per_block, div_args)); 289d075f50bSSebastian Grimberg } else { 290d075f50bSSebastian Grimberg CeedCallBackend(CeedRunKernelDim_Hip(ceed, data->Deriv, grid, block_size_x, 1, elems_per_block, div_args)); 291d075f50bSSebastian Grimberg } 292d075f50bSSebastian Grimberg } break; 293d075f50bSSebastian Grimberg case CEED_EVAL_CURL: { 294d075f50bSSebastian Grimberg void *curl_args[] = {(void *)&num_elem, &data->d_curl, &d_u, &d_v}; 2957bbbfca3SJeremy L Thompson const int block_size_x = is_transpose ? num_nodes : num_qpts; 296d075f50bSSebastian Grimberg 2977bbbfca3SJeremy L Thompson if (is_transpose) { 298d075f50bSSebastian Grimberg CeedCallBackend(CeedRunKernelDim_Hip(ceed, data->DerivTranspose, grid, block_size_x, 1, elems_per_block, curl_args)); 299d075f50bSSebastian Grimberg } else { 300d075f50bSSebastian Grimberg CeedCallBackend(CeedRunKernelDim_Hip(ceed, data->Deriv, grid, block_size_x, 1, elems_per_block, curl_args)); 301d075f50bSSebastian Grimberg } 3020d0321e0SJeremy L Thompson } break; 3030d0321e0SJeremy L Thompson case CEED_EVAL_WEIGHT: { 304097cc795SJames Wright CeedCheck(data->d_q_weight, ceed, CEED_ERROR_BACKEND, "%s not supported; q_weights not set", CeedEvalModes[eval_mode]); 305437930d1SJeremy L Thompson void *weight_args[] = {(void *)&num_elem, (void *)&data->d_q_weight, &d_v}; 306b2165e7aSSebastian Grimberg 307b2165e7aSSebastian Grimberg CeedCallBackend(CeedRunKernelDim_Hip(ceed, data->Weight, grid, num_qpts, 1, elems_per_block, weight_args)); 3080d0321e0SJeremy L Thompson } break; 3099ea2cfd9SJeremy L Thompson case CEED_EVAL_NONE: /* handled separately below */ 3109ea2cfd9SJeremy L Thompson break; 3110d0321e0SJeremy L Thompson } 3120d0321e0SJeremy L Thompson 3139ea2cfd9SJeremy L Thompson // Restore vectors, cover CEED_EVAL_NONE 3142b730f8bSJeremy L Thompson CeedCallBackend(CeedVectorRestoreArray(v, &d_v)); 3159ea2cfd9SJeremy L Thompson if (eval_mode == CEED_EVAL_NONE) CeedCallBackend(CeedVectorSetArray(v, CEED_MEM_DEVICE, CEED_COPY_VALUES, (CeedScalar *)d_u)); 3169ea2cfd9SJeremy L Thompson if (eval_mode != CEED_EVAL_WEIGHT) CeedCallBackend(CeedVectorRestoreArrayRead(u, &d_u)); 3170d0321e0SJeremy L Thompson return CEED_ERROR_SUCCESS; 3180d0321e0SJeremy L Thompson } 3190d0321e0SJeremy L Thompson 320*db2becc9SJeremy L Thompson static int CeedBasisApplyNonTensor_Hip(CeedBasis basis, const CeedInt num_elem, CeedTransposeMode t_mode, CeedEvalMode eval_mode, CeedVector u, 321*db2becc9SJeremy L Thompson CeedVector v) { 322*db2becc9SJeremy L Thompson CeedCallBackend(CeedBasisApplyNonTensorCore_Hip(basis, false, num_elem, t_mode, eval_mode, u, v)); 323*db2becc9SJeremy L Thompson return CEED_ERROR_SUCCESS; 324*db2becc9SJeremy L Thompson } 325*db2becc9SJeremy L Thompson 326*db2becc9SJeremy L Thompson static int CeedBasisApplyAddNonTensor_Hip(CeedBasis basis, const CeedInt num_elem, CeedTransposeMode t_mode, CeedEvalMode eval_mode, CeedVector u, 327*db2becc9SJeremy L Thompson CeedVector v) { 328*db2becc9SJeremy L Thompson CeedCallBackend(CeedBasisApplyNonTensorCore_Hip(basis, true, num_elem, t_mode, eval_mode, u, v)); 329*db2becc9SJeremy L Thompson return CEED_ERROR_SUCCESS; 330*db2becc9SJeremy L Thompson } 331*db2becc9SJeremy L Thompson 3320d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 3330d0321e0SJeremy L Thompson // Destroy tensor basis 3340d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 3350d0321e0SJeremy L Thompson static int CeedBasisDestroy_Hip(CeedBasis basis) { 3360d0321e0SJeremy L Thompson Ceed ceed; 3370d0321e0SJeremy L Thompson CeedBasis_Hip *data; 338b7453713SJeremy L Thompson 339b7453713SJeremy L Thompson CeedCallBackend(CeedBasisGetCeed(basis, &ceed)); 3402b730f8bSJeremy L Thompson CeedCallBackend(CeedBasisGetData(basis, &data)); 3412b730f8bSJeremy L Thompson CeedCallHip(ceed, hipModuleUnload(data->module)); 3421c21e869SJeremy L Thompson if (data->moduleAtPoints) CeedCallHip(ceed, hipModuleUnload(data->moduleAtPoints)); 343097cc795SJames Wright if (data->d_q_weight_1d) CeedCallHip(ceed, hipFree(data->d_q_weight_1d)); 3442b730f8bSJeremy L Thompson CeedCallHip(ceed, hipFree(data->d_interp_1d)); 3452b730f8bSJeremy L Thompson CeedCallHip(ceed, hipFree(data->d_grad_1d)); 3461c21e869SJeremy L Thompson CeedCallHip(ceed, hipFree(data->d_chebyshev_interp_1d)); 3472b730f8bSJeremy L Thompson CeedCallBackend(CeedFree(&data)); 3480d0321e0SJeremy L Thompson return CEED_ERROR_SUCCESS; 3490d0321e0SJeremy L Thompson } 3500d0321e0SJeremy L Thompson 3510d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 3520d0321e0SJeremy L Thompson // Destroy non-tensor basis 3530d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 3540d0321e0SJeremy L Thompson static int CeedBasisDestroyNonTensor_Hip(CeedBasis basis) { 3550d0321e0SJeremy L Thompson Ceed ceed; 3560d0321e0SJeremy L Thompson CeedBasisNonTensor_Hip *data; 357b7453713SJeremy L Thompson 358b7453713SJeremy L Thompson CeedCallBackend(CeedBasisGetCeed(basis, &ceed)); 3592b730f8bSJeremy L Thompson CeedCallBackend(CeedBasisGetData(basis, &data)); 3602b730f8bSJeremy L Thompson CeedCallHip(ceed, hipModuleUnload(data->module)); 361097cc795SJames Wright if (data->d_q_weight) CeedCallHip(ceed, hipFree(data->d_q_weight)); 3622b730f8bSJeremy L Thompson CeedCallHip(ceed, hipFree(data->d_interp)); 3632b730f8bSJeremy L Thompson CeedCallHip(ceed, hipFree(data->d_grad)); 364d075f50bSSebastian Grimberg CeedCallHip(ceed, hipFree(data->d_div)); 365d075f50bSSebastian Grimberg CeedCallHip(ceed, hipFree(data->d_curl)); 3662b730f8bSJeremy L Thompson CeedCallBackend(CeedFree(&data)); 3670d0321e0SJeremy L Thompson return CEED_ERROR_SUCCESS; 3680d0321e0SJeremy L Thompson } 3690d0321e0SJeremy L Thompson 3700d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 3710d0321e0SJeremy L Thompson // Create tensor 3720d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 3732b730f8bSJeremy L Thompson int CeedBasisCreateTensorH1_Hip(CeedInt dim, CeedInt P_1d, CeedInt Q_1d, const CeedScalar *interp_1d, const CeedScalar *grad_1d, 3746574a04fSJeremy L Thompson const CeedScalar *q_ref_1d, const CeedScalar *q_weight_1d, CeedBasis basis) { 3750d0321e0SJeremy L Thompson Ceed ceed; 37622070f95SJeremy L Thompson char *basis_kernel_source; 37722070f95SJeremy L Thompson const char *basis_kernel_path; 378b7453713SJeremy L Thompson CeedInt num_comp; 379b7453713SJeremy L Thompson const CeedInt q_bytes = Q_1d * sizeof(CeedScalar); 380b7453713SJeremy L Thompson const CeedInt interp_bytes = q_bytes * P_1d; 3810d0321e0SJeremy L Thompson CeedBasis_Hip *data; 382b7453713SJeremy L Thompson 383b7453713SJeremy L Thompson CeedCallBackend(CeedBasisGetCeed(basis, &ceed)); 3842b730f8bSJeremy L Thompson CeedCallBackend(CeedCalloc(1, &data)); 3850d0321e0SJeremy L Thompson 3860d0321e0SJeremy L Thompson // Copy data to GPU 387097cc795SJames Wright if (q_weight_1d) { 3882b730f8bSJeremy L Thompson CeedCallHip(ceed, hipMalloc((void **)&data->d_q_weight_1d, q_bytes)); 3892b730f8bSJeremy L Thompson CeedCallHip(ceed, hipMemcpy(data->d_q_weight_1d, q_weight_1d, q_bytes, hipMemcpyHostToDevice)); 390097cc795SJames Wright } 3912b730f8bSJeremy L Thompson CeedCallHip(ceed, hipMalloc((void **)&data->d_interp_1d, interp_bytes)); 3922b730f8bSJeremy L Thompson CeedCallHip(ceed, hipMemcpy(data->d_interp_1d, interp_1d, interp_bytes, hipMemcpyHostToDevice)); 3932b730f8bSJeremy L Thompson CeedCallHip(ceed, hipMalloc((void **)&data->d_grad_1d, interp_bytes)); 3942b730f8bSJeremy L Thompson CeedCallHip(ceed, hipMemcpy(data->d_grad_1d, grad_1d, interp_bytes, hipMemcpyHostToDevice)); 3950d0321e0SJeremy L Thompson 396ecc88aebSJeremy L Thompson // Compile basis kernels 397b7453713SJeremy L Thompson CeedCallBackend(CeedBasisGetNumComponents(basis, &num_comp)); 3982b730f8bSJeremy L Thompson CeedCallBackend(CeedGetJitAbsolutePath(ceed, "ceed/jit-source/hip/hip-ref-basis-tensor.h", &basis_kernel_path)); 39923d4529eSJeremy L Thompson CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Basis Kernel Source -----\n"); 4002b730f8bSJeremy L Thompson CeedCallBackend(CeedLoadSourceToBuffer(ceed, basis_kernel_path, &basis_kernel_source)); 40123d4529eSJeremy L Thompson CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Basis Kernel Source Complete! -----\n"); 402eb7e6cafSJeremy L Thompson CeedCallBackend(CeedCompile_Hip(ceed, basis_kernel_source, &data->module, 7, "BASIS_Q_1D", Q_1d, "BASIS_P_1D", P_1d, "BASIS_BUF_LEN", 403f7c9815fSJeremy L Thompson Q_1d * CeedIntPow(Q_1d > P_1d ? Q_1d : P_1d, dim - 1), "BASIS_DIM", dim, "BASIS_NUM_COMP", num_comp, 404b7453713SJeremy L Thompson "BASIS_NUM_NODES", CeedIntPow(P_1d, dim), "BASIS_NUM_QPTS", CeedIntPow(Q_1d, dim))); 405eb7e6cafSJeremy L Thompson CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, "Interp", &data->Interp)); 406eb7e6cafSJeremy L Thompson CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, "Grad", &data->Grad)); 407eb7e6cafSJeremy L Thompson CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, "Weight", &data->Weight)); 4082b730f8bSJeremy L Thompson CeedCallBackend(CeedFree(&basis_kernel_path)); 4092b730f8bSJeremy L Thompson CeedCallBackend(CeedFree(&basis_kernel_source)); 410437930d1SJeremy L Thompson 4112b730f8bSJeremy L Thompson CeedCallBackend(CeedBasisSetData(basis, data)); 4120d0321e0SJeremy L Thompson 413d075f50bSSebastian Grimberg // Register backend functions 4142b730f8bSJeremy L Thompson CeedCallBackend(CeedSetBackendFunction(ceed, "Basis", basis, "Apply", CeedBasisApply_Hip)); 415*db2becc9SJeremy L Thompson CeedCallBackend(CeedSetBackendFunction(ceed, "Basis", basis, "ApplyAdd", CeedBasisApplyAdd_Hip)); 4161c21e869SJeremy L Thompson CeedCallBackend(CeedSetBackendFunction(ceed, "Basis", basis, "ApplyAtPoints", CeedBasisApplyAtPoints_Hip)); 417*db2becc9SJeremy L Thompson CeedCallBackend(CeedSetBackendFunction(ceed, "Basis", basis, "ApplyAddAtPoints", CeedBasisApplyAddAtPoints_Hip)); 4182b730f8bSJeremy L Thompson CeedCallBackend(CeedSetBackendFunction(ceed, "Basis", basis, "Destroy", CeedBasisDestroy_Hip)); 4190d0321e0SJeremy L Thompson return CEED_ERROR_SUCCESS; 4200d0321e0SJeremy L Thompson } 4210d0321e0SJeremy L Thompson 4220d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 423d075f50bSSebastian Grimberg // Create non-tensor H^1 4240d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 4252b730f8bSJeremy L Thompson int CeedBasisCreateH1_Hip(CeedElemTopology topo, CeedInt dim, CeedInt num_nodes, CeedInt num_qpts, const CeedScalar *interp, const CeedScalar *grad, 42651475c7cSJeremy L Thompson const CeedScalar *q_ref, const CeedScalar *q_weight, CeedBasis basis) { 4270d0321e0SJeremy L Thompson Ceed ceed; 42822070f95SJeremy L Thompson char *basis_kernel_source; 42922070f95SJeremy L Thompson const char *basis_kernel_path; 430d075f50bSSebastian Grimberg CeedInt num_comp, q_comp_interp, q_comp_grad; 431b7453713SJeremy L Thompson const CeedInt q_bytes = num_qpts * sizeof(CeedScalar); 4320d0321e0SJeremy L Thompson CeedBasisNonTensor_Hip *data; 433b7453713SJeremy L Thompson 434b7453713SJeremy L Thompson CeedCallBackend(CeedBasisGetCeed(basis, &ceed)); 4352b730f8bSJeremy L Thompson CeedCallBackend(CeedCalloc(1, &data)); 4360d0321e0SJeremy L Thompson 4370d0321e0SJeremy L Thompson // Copy basis data to GPU 438d075f50bSSebastian Grimberg CeedCallBackend(CeedBasisGetNumQuadratureComponents(basis, CEED_EVAL_INTERP, &q_comp_interp)); 439d075f50bSSebastian Grimberg CeedCallBackend(CeedBasisGetNumQuadratureComponents(basis, CEED_EVAL_GRAD, &q_comp_grad)); 440097cc795SJames Wright if (q_weight) { 4412b730f8bSJeremy L Thompson CeedCallHip(ceed, hipMalloc((void **)&data->d_q_weight, q_bytes)); 4422b730f8bSJeremy L Thompson CeedCallHip(ceed, hipMemcpy(data->d_q_weight, q_weight, q_bytes, hipMemcpyHostToDevice)); 443097cc795SJames Wright } 444d075f50bSSebastian Grimberg if (interp) { 445d075f50bSSebastian Grimberg const CeedInt interp_bytes = q_bytes * num_nodes * q_comp_interp; 446d075f50bSSebastian Grimberg 4472b730f8bSJeremy L Thompson CeedCallHip(ceed, hipMalloc((void **)&data->d_interp, interp_bytes)); 4482b730f8bSJeremy L Thompson CeedCallHip(ceed, hipMemcpy(data->d_interp, interp, interp_bytes, hipMemcpyHostToDevice)); 449d075f50bSSebastian Grimberg } 450d075f50bSSebastian Grimberg if (grad) { 451d075f50bSSebastian Grimberg const CeedInt grad_bytes = q_bytes * num_nodes * q_comp_grad; 452d075f50bSSebastian Grimberg 4532b730f8bSJeremy L Thompson CeedCallHip(ceed, hipMalloc((void **)&data->d_grad, grad_bytes)); 4542b730f8bSJeremy L Thompson CeedCallHip(ceed, hipMemcpy(data->d_grad, grad, grad_bytes, hipMemcpyHostToDevice)); 455d075f50bSSebastian Grimberg } 4560d0321e0SJeremy L Thompson 4570d0321e0SJeremy L Thompson // Compile basis kernels 458b7453713SJeremy L Thompson CeedCallBackend(CeedBasisGetNumComponents(basis, &num_comp)); 4592b730f8bSJeremy L Thompson CeedCallBackend(CeedGetJitAbsolutePath(ceed, "ceed/jit-source/hip/hip-ref-basis-nontensor.h", &basis_kernel_path)); 46023d4529eSJeremy L Thompson CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Basis Kernel Source -----\n"); 4612b730f8bSJeremy L Thompson CeedCallBackend(CeedLoadSourceToBuffer(ceed, basis_kernel_path, &basis_kernel_source)); 46223d4529eSJeremy L Thompson CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Basis Kernel Source Complete! -----\n"); 463d075f50bSSebastian Grimberg CeedCallBackend(CeedCompile_Hip(ceed, basis_kernel_source, &data->module, 5, "BASIS_Q", num_qpts, "BASIS_P", num_nodes, "BASIS_Q_COMP_INTERP", 464d075f50bSSebastian Grimberg q_comp_interp, "BASIS_Q_COMP_DERIV", q_comp_grad, "BASIS_NUM_COMP", num_comp)); 465eb7e6cafSJeremy L Thompson CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, "Interp", &data->Interp)); 466d075f50bSSebastian Grimberg CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, "InterpTranspose", &data->InterpTranspose)); 467d075f50bSSebastian Grimberg CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, "Deriv", &data->Deriv)); 468d075f50bSSebastian Grimberg CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, "DerivTranspose", &data->DerivTranspose)); 469eb7e6cafSJeremy L Thompson CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, "Weight", &data->Weight)); 4702b730f8bSJeremy L Thompson CeedCallBackend(CeedFree(&basis_kernel_path)); 4712b730f8bSJeremy L Thompson CeedCallBackend(CeedFree(&basis_kernel_source)); 472d075f50bSSebastian Grimberg 473d075f50bSSebastian Grimberg CeedCallBackend(CeedBasisSetData(basis, data)); 474d075f50bSSebastian Grimberg 475d075f50bSSebastian Grimberg // Register backend functions 476d075f50bSSebastian Grimberg CeedCallBackend(CeedSetBackendFunction(ceed, "Basis", basis, "Apply", CeedBasisApplyNonTensor_Hip)); 477*db2becc9SJeremy L Thompson CeedCallBackend(CeedSetBackendFunction(ceed, "Basis", basis, "ApplyAdd", CeedBasisApplyAddNonTensor_Hip)); 478d075f50bSSebastian Grimberg CeedCallBackend(CeedSetBackendFunction(ceed, "Basis", basis, "Destroy", CeedBasisDestroyNonTensor_Hip)); 479d075f50bSSebastian Grimberg return CEED_ERROR_SUCCESS; 480d075f50bSSebastian Grimberg } 481d075f50bSSebastian Grimberg 482d075f50bSSebastian Grimberg //------------------------------------------------------------------------------ 483d075f50bSSebastian Grimberg // Create non-tensor H(div) 484d075f50bSSebastian Grimberg //------------------------------------------------------------------------------ 485d075f50bSSebastian Grimberg int CeedBasisCreateHdiv_Hip(CeedElemTopology topo, CeedInt dim, CeedInt num_nodes, CeedInt num_qpts, const CeedScalar *interp, const CeedScalar *div, 486d075f50bSSebastian Grimberg const CeedScalar *q_ref, const CeedScalar *q_weight, CeedBasis basis) { 487d075f50bSSebastian Grimberg Ceed ceed; 48822070f95SJeremy L Thompson char *basis_kernel_source; 48922070f95SJeremy L Thompson const char *basis_kernel_path; 490d075f50bSSebastian Grimberg CeedInt num_comp, q_comp_interp, q_comp_div; 491d075f50bSSebastian Grimberg const CeedInt q_bytes = num_qpts * sizeof(CeedScalar); 492d075f50bSSebastian Grimberg CeedBasisNonTensor_Hip *data; 493d075f50bSSebastian Grimberg 494d075f50bSSebastian Grimberg CeedCallBackend(CeedBasisGetCeed(basis, &ceed)); 495d075f50bSSebastian Grimberg CeedCallBackend(CeedCalloc(1, &data)); 496d075f50bSSebastian Grimberg 497d075f50bSSebastian Grimberg // Copy basis data to GPU 498d075f50bSSebastian Grimberg CeedCallBackend(CeedBasisGetNumQuadratureComponents(basis, CEED_EVAL_INTERP, &q_comp_interp)); 499d075f50bSSebastian Grimberg CeedCallBackend(CeedBasisGetNumQuadratureComponents(basis, CEED_EVAL_DIV, &q_comp_div)); 500097cc795SJames Wright if (q_weight) { 501d075f50bSSebastian Grimberg CeedCallHip(ceed, hipMalloc((void **)&data->d_q_weight, q_bytes)); 502d075f50bSSebastian Grimberg CeedCallHip(ceed, hipMemcpy(data->d_q_weight, q_weight, q_bytes, hipMemcpyHostToDevice)); 503097cc795SJames Wright } 504d075f50bSSebastian Grimberg if (interp) { 505d075f50bSSebastian Grimberg const CeedInt interp_bytes = q_bytes * num_nodes * q_comp_interp; 506d075f50bSSebastian Grimberg 507d075f50bSSebastian Grimberg CeedCallHip(ceed, hipMalloc((void **)&data->d_interp, interp_bytes)); 508d075f50bSSebastian Grimberg CeedCallHip(ceed, hipMemcpy(data->d_interp, interp, interp_bytes, hipMemcpyHostToDevice)); 509d075f50bSSebastian Grimberg } 510d075f50bSSebastian Grimberg if (div) { 511d075f50bSSebastian Grimberg const CeedInt div_bytes = q_bytes * num_nodes * q_comp_div; 512d075f50bSSebastian Grimberg 513d075f50bSSebastian Grimberg CeedCallHip(ceed, hipMalloc((void **)&data->d_div, div_bytes)); 514d075f50bSSebastian Grimberg CeedCallHip(ceed, hipMemcpy(data->d_div, div, div_bytes, hipMemcpyHostToDevice)); 515d075f50bSSebastian Grimberg } 516d075f50bSSebastian Grimberg 517d075f50bSSebastian Grimberg // Compile basis kernels 518d075f50bSSebastian Grimberg CeedCallBackend(CeedBasisGetNumComponents(basis, &num_comp)); 519d075f50bSSebastian Grimberg CeedCallBackend(CeedGetJitAbsolutePath(ceed, "ceed/jit-source/hip/hip-ref-basis-nontensor.h", &basis_kernel_path)); 520d075f50bSSebastian Grimberg CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Basis Kernel Source -----\n"); 521d075f50bSSebastian Grimberg CeedCallBackend(CeedLoadSourceToBuffer(ceed, basis_kernel_path, &basis_kernel_source)); 522d075f50bSSebastian Grimberg CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Basis Kernel Source Complete! -----\n"); 523d075f50bSSebastian Grimberg CeedCallBackend(CeedCompile_Hip(ceed, basis_kernel_source, &data->module, 5, "BASIS_Q", num_qpts, "BASIS_P", num_nodes, "BASIS_Q_COMP_INTERP", 524d075f50bSSebastian Grimberg q_comp_interp, "BASIS_Q_COMP_DERIV", q_comp_div, "BASIS_NUM_COMP", num_comp)); 525d075f50bSSebastian Grimberg CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, "Interp", &data->Interp)); 526d075f50bSSebastian Grimberg CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, "InterpTranspose", &data->InterpTranspose)); 527d075f50bSSebastian Grimberg CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, "Deriv", &data->Deriv)); 528d075f50bSSebastian Grimberg CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, "DerivTranspose", &data->DerivTranspose)); 529d075f50bSSebastian Grimberg CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, "Weight", &data->Weight)); 530d075f50bSSebastian Grimberg CeedCallBackend(CeedFree(&basis_kernel_path)); 531d075f50bSSebastian Grimberg CeedCallBackend(CeedFree(&basis_kernel_source)); 532d075f50bSSebastian Grimberg 533d075f50bSSebastian Grimberg CeedCallBackend(CeedBasisSetData(basis, data)); 534d075f50bSSebastian Grimberg 535d075f50bSSebastian Grimberg // Register backend functions 536d075f50bSSebastian Grimberg CeedCallBackend(CeedSetBackendFunction(ceed, "Basis", basis, "Apply", CeedBasisApplyNonTensor_Hip)); 537*db2becc9SJeremy L Thompson CeedCallBackend(CeedSetBackendFunction(ceed, "Basis", basis, "ApplyAdd", CeedBasisApplyAddNonTensor_Hip)); 538d075f50bSSebastian Grimberg CeedCallBackend(CeedSetBackendFunction(ceed, "Basis", basis, "Destroy", CeedBasisDestroyNonTensor_Hip)); 539d075f50bSSebastian Grimberg return CEED_ERROR_SUCCESS; 540d075f50bSSebastian Grimberg } 541d075f50bSSebastian Grimberg 542d075f50bSSebastian Grimberg //------------------------------------------------------------------------------ 543d075f50bSSebastian Grimberg // Create non-tensor H(curl) 544d075f50bSSebastian Grimberg //------------------------------------------------------------------------------ 545d075f50bSSebastian Grimberg int CeedBasisCreateHcurl_Hip(CeedElemTopology topo, CeedInt dim, CeedInt num_nodes, CeedInt num_qpts, const CeedScalar *interp, 546d075f50bSSebastian Grimberg const CeedScalar *curl, const CeedScalar *q_ref, const CeedScalar *q_weight, CeedBasis basis) { 547d075f50bSSebastian Grimberg Ceed ceed; 54822070f95SJeremy L Thompson char *basis_kernel_source; 54922070f95SJeremy L Thompson const char *basis_kernel_path; 550d075f50bSSebastian Grimberg CeedInt num_comp, q_comp_interp, q_comp_curl; 551d075f50bSSebastian Grimberg const CeedInt q_bytes = num_qpts * sizeof(CeedScalar); 552d075f50bSSebastian Grimberg CeedBasisNonTensor_Hip *data; 553d075f50bSSebastian Grimberg 554d075f50bSSebastian Grimberg CeedCallBackend(CeedBasisGetCeed(basis, &ceed)); 555d075f50bSSebastian Grimberg CeedCallBackend(CeedCalloc(1, &data)); 556d075f50bSSebastian Grimberg 557d075f50bSSebastian Grimberg // Copy basis data to GPU 558d075f50bSSebastian Grimberg CeedCallBackend(CeedBasisGetNumQuadratureComponents(basis, CEED_EVAL_INTERP, &q_comp_interp)); 559d075f50bSSebastian Grimberg CeedCallBackend(CeedBasisGetNumQuadratureComponents(basis, CEED_EVAL_CURL, &q_comp_curl)); 560097cc795SJames Wright if (q_weight) { 561d075f50bSSebastian Grimberg CeedCallHip(ceed, hipMalloc((void **)&data->d_q_weight, q_bytes)); 562d075f50bSSebastian Grimberg CeedCallHip(ceed, hipMemcpy(data->d_q_weight, q_weight, q_bytes, hipMemcpyHostToDevice)); 563097cc795SJames Wright } 564d075f50bSSebastian Grimberg if (interp) { 565d075f50bSSebastian Grimberg const CeedInt interp_bytes = q_bytes * num_nodes * q_comp_interp; 566d075f50bSSebastian Grimberg 567d075f50bSSebastian Grimberg CeedCallHip(ceed, hipMalloc((void **)&data->d_interp, interp_bytes)); 568d075f50bSSebastian Grimberg CeedCallHip(ceed, hipMemcpy(data->d_interp, interp, interp_bytes, hipMemcpyHostToDevice)); 569d075f50bSSebastian Grimberg } 570d075f50bSSebastian Grimberg if (curl) { 571d075f50bSSebastian Grimberg const CeedInt curl_bytes = q_bytes * num_nodes * q_comp_curl; 572d075f50bSSebastian Grimberg 573d075f50bSSebastian Grimberg CeedCallHip(ceed, hipMalloc((void **)&data->d_curl, curl_bytes)); 574d075f50bSSebastian Grimberg CeedCallHip(ceed, hipMemcpy(data->d_curl, curl, curl_bytes, hipMemcpyHostToDevice)); 575d075f50bSSebastian Grimberg } 576d075f50bSSebastian Grimberg 577d075f50bSSebastian Grimberg // Compile basis kernels 578d075f50bSSebastian Grimberg CeedCallBackend(CeedBasisGetNumComponents(basis, &num_comp)); 579d075f50bSSebastian Grimberg CeedCallBackend(CeedGetJitAbsolutePath(ceed, "ceed/jit-source/hip/hip-ref-basis-nontensor.h", &basis_kernel_path)); 580d075f50bSSebastian Grimberg CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Basis Kernel Source -----\n"); 581d075f50bSSebastian Grimberg CeedCallBackend(CeedLoadSourceToBuffer(ceed, basis_kernel_path, &basis_kernel_source)); 582d075f50bSSebastian Grimberg CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Basis Kernel Source Complete! -----\n"); 583d075f50bSSebastian Grimberg CeedCallBackend(CeedCompile_Hip(ceed, basis_kernel_source, &data->module, 5, "BASIS_Q", num_qpts, "BASIS_P", num_nodes, "BASIS_Q_COMP_INTERP", 584d075f50bSSebastian Grimberg q_comp_interp, "BASIS_Q_COMP_DERIV", q_comp_curl, "BASIS_NUM_COMP", num_comp)); 585d075f50bSSebastian Grimberg CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, "Interp", &data->Interp)); 586d075f50bSSebastian Grimberg CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, "InterpTranspose", &data->InterpTranspose)); 587d075f50bSSebastian Grimberg CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, "Deriv", &data->Deriv)); 588d075f50bSSebastian Grimberg CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, "DerivTranspose", &data->DerivTranspose)); 589d075f50bSSebastian Grimberg CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, "Weight", &data->Weight)); 590d075f50bSSebastian Grimberg CeedCallBackend(CeedFree(&basis_kernel_path)); 591d075f50bSSebastian Grimberg CeedCallBackend(CeedFree(&basis_kernel_source)); 592d075f50bSSebastian Grimberg 5932b730f8bSJeremy L Thompson CeedCallBackend(CeedBasisSetData(basis, data)); 5940d0321e0SJeremy L Thompson 5950d0321e0SJeremy L Thompson // Register backend functions 5962b730f8bSJeremy L Thompson CeedCallBackend(CeedSetBackendFunction(ceed, "Basis", basis, "Apply", CeedBasisApplyNonTensor_Hip)); 597*db2becc9SJeremy L Thompson CeedCallBackend(CeedSetBackendFunction(ceed, "Basis", basis, "ApplyAdd", CeedBasisApplyAddNonTensor_Hip)); 5982b730f8bSJeremy L Thompson CeedCallBackend(CeedSetBackendFunction(ceed, "Basis", basis, "Destroy", CeedBasisDestroyNonTensor_Hip)); 5990d0321e0SJeremy L Thompson return CEED_ERROR_SUCCESS; 6000d0321e0SJeremy L Thompson } 6012a86cc9dSSebastian Grimberg 6020d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 603