15aed82e4SJeremy L Thompson // Copyright (c) 2017-2024, Lawrence Livermore National Security, LLC and other CEED contributors. 23d8e8822SJeremy L Thompson // All Rights Reserved. See the top-level LICENSE and NOTICE files for details. 30d0321e0SJeremy L Thompson // 43d8e8822SJeremy L Thompson // SPDX-License-Identifier: BSD-2-Clause 50d0321e0SJeremy L Thompson // 63d8e8822SJeremy L Thompson // This file is part of CEED: http://github.com/ceed 70d0321e0SJeremy L Thompson 849aac155SJeremy L Thompson #include <ceed.h> 90d0321e0SJeremy L Thompson #include <ceed/backend.h> 10437930d1SJeremy L Thompson #include <ceed/jit-tools.h> 110d0321e0SJeremy L Thompson #include <hip/hip_runtime.h> 122b730f8bSJeremy L Thompson 1349aac155SJeremy L Thompson #include "../hip/ceed-hip-common.h" 140d0321e0SJeremy L Thompson #include "../hip/ceed-hip-compile.h" 152b730f8bSJeremy L Thompson #include "ceed-hip-ref.h" 160d0321e0SJeremy L Thompson 170d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 180d0321e0SJeremy L Thompson // Basis apply - tensor 190d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 202b730f8bSJeremy L Thompson int CeedBasisApply_Hip(CeedBasis basis, const CeedInt num_elem, CeedTransposeMode t_mode, CeedEvalMode eval_mode, CeedVector u, CeedVector v) { 210d0321e0SJeremy L Thompson Ceed ceed; 22b7453713SJeremy L Thompson CeedInt Q_1d, dim; 237bbbfca3SJeremy L Thompson const CeedInt is_transpose = t_mode == CEED_TRANSPOSE; 24437930d1SJeremy L Thompson const int max_block_size = 64; 250d0321e0SJeremy L Thompson const CeedScalar *d_u; 260d0321e0SJeremy L Thompson CeedScalar *d_v; 27b7453713SJeremy L Thompson CeedBasis_Hip *data; 28b7453713SJeremy L Thompson 29b7453713SJeremy L Thompson CeedCallBackend(CeedBasisGetCeed(basis, &ceed)); 30b7453713SJeremy L Thompson CeedCallBackend(CeedBasisGetData(basis, &data)); 31b7453713SJeremy L Thompson 329ea2cfd9SJeremy L Thompson // Get read/write access to u, v 336574a04fSJeremy L Thompson if (u != CEED_VECTOR_NONE) CeedCallBackend(CeedVectorGetArrayRead(u, CEED_MEM_DEVICE, &d_u)); 346574a04fSJeremy L Thompson else CeedCheck(eval_mode == CEED_EVAL_WEIGHT, ceed, CEED_ERROR_BACKEND, "An input vector is required for this CeedEvalMode"); 352b730f8bSJeremy L Thompson CeedCallBackend(CeedVectorGetArrayWrite(v, CEED_MEM_DEVICE, &d_v)); 360d0321e0SJeremy L Thompson 370d0321e0SJeremy L Thompson // Clear v for transpose operation 387bbbfca3SJeremy L Thompson if (is_transpose) { 391f9221feSJeremy L Thompson CeedSize length; 40b7453713SJeremy L Thompson 412b730f8bSJeremy L Thompson CeedCallBackend(CeedVectorGetLength(v, &length)); 422b730f8bSJeremy L Thompson CeedCallHip(ceed, hipMemset(d_v, 0, length * sizeof(CeedScalar))); 430d0321e0SJeremy L Thompson } 44b2165e7aSSebastian Grimberg CeedCallBackend(CeedBasisGetNumQuadraturePoints1D(basis, &Q_1d)); 45b2165e7aSSebastian Grimberg CeedCallBackend(CeedBasisGetDimension(basis, &dim)); 460d0321e0SJeremy L Thompson 470d0321e0SJeremy L Thompson // Basis action 48437930d1SJeremy L Thompson switch (eval_mode) { 490d0321e0SJeremy L Thompson case CEED_EVAL_INTERP: { 507bbbfca3SJeremy L Thompson void *interp_args[] = {(void *)&num_elem, (void *)&is_transpose, &data->d_interp_1d, &d_u, &d_v}; 51b2165e7aSSebastian Grimberg const CeedInt block_size = CeedIntMin(CeedIntPow(Q_1d, dim), max_block_size); 520d0321e0SJeremy L Thompson 53eb7e6cafSJeremy L Thompson CeedCallBackend(CeedRunKernel_Hip(ceed, data->Interp, num_elem, block_size, interp_args)); 540d0321e0SJeremy L Thompson } break; 550d0321e0SJeremy L Thompson case CEED_EVAL_GRAD: { 567bbbfca3SJeremy L Thompson void *grad_args[] = {(void *)&num_elem, (void *)&is_transpose, &data->d_interp_1d, &data->d_grad_1d, &d_u, &d_v}; 57b2165e7aSSebastian Grimberg const CeedInt block_size = max_block_size; 580d0321e0SJeremy L Thompson 59eb7e6cafSJeremy L Thompson CeedCallBackend(CeedRunKernel_Hip(ceed, data->Grad, num_elem, block_size, grad_args)); 600d0321e0SJeremy L Thompson } break; 610d0321e0SJeremy L Thompson case CEED_EVAL_WEIGHT: { 62097cc795SJames Wright CeedCheck(data->d_q_weight_1d, ceed, CEED_ERROR_BACKEND, "%s not supported; q_weights_1d not set", CeedEvalModes[eval_mode]); 63437930d1SJeremy L Thompson void *weight_args[] = {(void *)&num_elem, (void *)&data->d_q_weight_1d, &d_v}; 64b2165e7aSSebastian Grimberg const int block_size_x = Q_1d; 65b2165e7aSSebastian Grimberg const int block_size_y = dim >= 2 ? Q_1d : 1; 660d0321e0SJeremy L Thompson 67b2165e7aSSebastian Grimberg CeedCallBackend(CeedRunKernelDim_Hip(ceed, data->Weight, num_elem, block_size_x, block_size_y, 1, weight_args)); 680d0321e0SJeremy L Thompson } break; 699ea2cfd9SJeremy L Thompson case CEED_EVAL_NONE: /* handled separately below */ 709ea2cfd9SJeremy L Thompson break; 710d0321e0SJeremy L Thompson // LCOV_EXCL_START 720d0321e0SJeremy L Thompson case CEED_EVAL_DIV: 730d0321e0SJeremy L Thompson case CEED_EVAL_CURL: 74bcbe1c99SJeremy L Thompson return CeedError(ceed, CEED_ERROR_BACKEND, "%s not supported", CeedEvalModes[eval_mode]); 750d0321e0SJeremy L Thompson // LCOV_EXCL_STOP 760d0321e0SJeremy L Thompson } 770d0321e0SJeremy L Thompson 789ea2cfd9SJeremy L Thompson // Restore vectors, cover CEED_EVAL_NONE 792b730f8bSJeremy L Thompson CeedCallBackend(CeedVectorRestoreArray(v, &d_v)); 809ea2cfd9SJeremy L Thompson if (eval_mode == CEED_EVAL_NONE) CeedCallBackend(CeedVectorSetArray(v, CEED_MEM_DEVICE, CEED_COPY_VALUES, (CeedScalar *)d_u)); 819ea2cfd9SJeremy L Thompson if (eval_mode != CEED_EVAL_WEIGHT) CeedCallBackend(CeedVectorRestoreArrayRead(u, &d_u)); 820d0321e0SJeremy L Thompson return CEED_ERROR_SUCCESS; 830d0321e0SJeremy L Thompson } 840d0321e0SJeremy L Thompson 850d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 86*1c21e869SJeremy L Thompson // Basis apply - tensor AtPoints 87*1c21e869SJeremy L Thompson //------------------------------------------------------------------------------ 88*1c21e869SJeremy L Thompson int CeedBasisApplyAtPoints_Hip(CeedBasis basis, const CeedInt num_elem, const CeedInt *num_points, CeedTransposeMode t_mode, CeedEvalMode eval_mode, 89*1c21e869SJeremy L Thompson CeedVector x_ref, CeedVector u, CeedVector v) { 90*1c21e869SJeremy L Thompson Ceed ceed; 91*1c21e869SJeremy L Thompson CeedInt Q_1d, dim, max_num_points = num_points[0]; 92*1c21e869SJeremy L Thompson const CeedInt is_transpose = t_mode == CEED_TRANSPOSE; 93*1c21e869SJeremy L Thompson const int max_block_size = 32; 94*1c21e869SJeremy L Thompson const CeedScalar *d_x, *d_u; 95*1c21e869SJeremy L Thompson CeedScalar *d_v; 96*1c21e869SJeremy L Thompson CeedBasis_Hip *data; 97*1c21e869SJeremy L Thompson 98*1c21e869SJeremy L Thompson CeedCallBackend(CeedBasisGetCeed(basis, &ceed)); 99*1c21e869SJeremy L Thompson CeedCallBackend(CeedBasisGetData(basis, &data)); 100*1c21e869SJeremy L Thompson CeedCallBackend(CeedBasisGetNumQuadraturePoints1D(basis, &Q_1d)); 101*1c21e869SJeremy L Thompson CeedCallBackend(CeedBasisGetDimension(basis, &dim)); 102*1c21e869SJeremy L Thompson 103*1c21e869SJeremy L Thompson // Check uniform number of points per elem 104*1c21e869SJeremy L Thompson for (CeedInt i = 1; i < num_elem; i++) { 105*1c21e869SJeremy L Thompson CeedCheck(max_num_points == num_points[i], ceed, CEED_ERROR_BACKEND, 106*1c21e869SJeremy L Thompson "BasisApplyAtPoints only supported for the same number of points in each element"); 107*1c21e869SJeremy L Thompson } 108*1c21e869SJeremy L Thompson 109*1c21e869SJeremy L Thompson // Weight handled separately 110*1c21e869SJeremy L Thompson if (eval_mode == CEED_EVAL_WEIGHT) { 111*1c21e869SJeremy L Thompson CeedCall(CeedVectorSetValue(v, 1.0)); 112*1c21e869SJeremy L Thompson return CEED_ERROR_SUCCESS; 113*1c21e869SJeremy L Thompson } 114*1c21e869SJeremy L Thompson 115*1c21e869SJeremy L Thompson // Build kernels if needed 116*1c21e869SJeremy L Thompson if (data->num_points != max_num_points) { 117*1c21e869SJeremy L Thompson CeedInt P_1d; 118*1c21e869SJeremy L Thompson 119*1c21e869SJeremy L Thompson CeedCallBackend(CeedBasisGetNumNodes1D(basis, &P_1d)); 120*1c21e869SJeremy L Thompson data->num_points = max_num_points; 121*1c21e869SJeremy L Thompson 122*1c21e869SJeremy L Thompson // -- Create interp matrix to Chebyshev coefficients 123*1c21e869SJeremy L Thompson if (!data->d_chebyshev_interp_1d) { 124*1c21e869SJeremy L Thompson CeedSize interp_bytes; 125*1c21e869SJeremy L Thompson CeedScalar *chebyshev_interp_1d; 126*1c21e869SJeremy L Thompson 127*1c21e869SJeremy L Thompson interp_bytes = P_1d * Q_1d * sizeof(CeedScalar); 128*1c21e869SJeremy L Thompson CeedCallBackend(CeedCalloc(P_1d * Q_1d, &chebyshev_interp_1d)); 129*1c21e869SJeremy L Thompson CeedCall(CeedBasisGetChebyshevInterp1D(basis, chebyshev_interp_1d)); 130*1c21e869SJeremy L Thompson CeedCallHip(ceed, hipMalloc((void **)&data->d_chebyshev_interp_1d, interp_bytes)); 131*1c21e869SJeremy L Thompson CeedCallHip(ceed, hipMemcpy(data->d_chebyshev_interp_1d, chebyshev_interp_1d, interp_bytes, hipMemcpyHostToDevice)); 132*1c21e869SJeremy L Thompson CeedCallBackend(CeedFree(&chebyshev_interp_1d)); 133*1c21e869SJeremy L Thompson } 134*1c21e869SJeremy L Thompson 135*1c21e869SJeremy L Thompson // -- Compile kernels 136*1c21e869SJeremy L Thompson char *basis_kernel_source; 137*1c21e869SJeremy L Thompson const char *basis_kernel_path; 138*1c21e869SJeremy L Thompson CeedInt num_comp; 139*1c21e869SJeremy L Thompson 140*1c21e869SJeremy L Thompson if (data->moduleAtPoints) CeedCallHip(ceed, hipModuleUnload(data->moduleAtPoints)); 141*1c21e869SJeremy L Thompson CeedCallBackend(CeedBasisGetNumComponents(basis, &num_comp)); 142*1c21e869SJeremy L Thompson CeedCallBackend(CeedGetJitAbsolutePath(ceed, "ceed/jit-source/hip/hip-ref-basis-tensor-at-points.h", &basis_kernel_path)); 143*1c21e869SJeremy L Thompson CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Basis Kernel Source -----\n"); 144*1c21e869SJeremy L Thompson CeedCallBackend(CeedLoadSourceToBuffer(ceed, basis_kernel_path, &basis_kernel_source)); 145*1c21e869SJeremy L Thompson CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Basis Kernel Source Complete! -----\n"); 146*1c21e869SJeremy L Thompson CeedCallBackend(CeedCompile_Hip(ceed, basis_kernel_source, &data->moduleAtPoints, 9, "BASIS_Q_1D", Q_1d, "BASIS_P_1D", P_1d, "BASIS_BUF_LEN", 147*1c21e869SJeremy L Thompson num_comp * CeedIntPow(Q_1d > P_1d ? Q_1d : P_1d, dim), "BASIS_DIM", dim, "BASIS_NUM_COMP", num_comp, 148*1c21e869SJeremy L Thompson "BASIS_NUM_NODES", CeedIntPow(P_1d, dim), "BASIS_NUM_QPTS", CeedIntPow(Q_1d, dim), "BASIS_NUM_PTS", 149*1c21e869SJeremy L Thompson max_num_points, "POINTS_BUFF_LEN", 150*1c21e869SJeremy L Thompson max_num_points * CeedIntPow(Q_1d > max_num_points ? Q_1d : max_num_points, dim - 1))); 151*1c21e869SJeremy L Thompson CeedCallBackend(CeedGetKernel_Hip(ceed, data->moduleAtPoints, "InterpAtPoints", &data->InterpAtPoints)); 152*1c21e869SJeremy L Thompson CeedCallBackend(CeedGetKernel_Hip(ceed, data->moduleAtPoints, "GradAtPoints", &data->GradAtPoints)); 153*1c21e869SJeremy L Thompson CeedCallBackend(CeedFree(&basis_kernel_path)); 154*1c21e869SJeremy L Thompson CeedCallBackend(CeedFree(&basis_kernel_source)); 155*1c21e869SJeremy L Thompson } 156*1c21e869SJeremy L Thompson 157*1c21e869SJeremy L Thompson // Get read/write access to u, v 158*1c21e869SJeremy L Thompson CeedCallBackend(CeedVectorGetArrayRead(x_ref, CEED_MEM_DEVICE, &d_x)); 159*1c21e869SJeremy L Thompson if (u != CEED_VECTOR_NONE) CeedCallBackend(CeedVectorGetArrayRead(u, CEED_MEM_DEVICE, &d_u)); 160*1c21e869SJeremy L Thompson else CeedCheck(eval_mode == CEED_EVAL_WEIGHT, ceed, CEED_ERROR_BACKEND, "An input vector is required for this CeedEvalMode"); 161*1c21e869SJeremy L Thompson CeedCallBackend(CeedVectorGetArrayWrite(v, CEED_MEM_DEVICE, &d_v)); 162*1c21e869SJeremy L Thompson 163*1c21e869SJeremy L Thompson // Clear v for transpose operation 164*1c21e869SJeremy L Thompson if (is_transpose) { 165*1c21e869SJeremy L Thompson CeedSize length; 166*1c21e869SJeremy L Thompson 167*1c21e869SJeremy L Thompson CeedCallBackend(CeedVectorGetLength(v, &length)); 168*1c21e869SJeremy L Thompson CeedCallHip(ceed, hipMemset(d_v, 0, length * sizeof(CeedScalar))); 169*1c21e869SJeremy L Thompson } 170*1c21e869SJeremy L Thompson 171*1c21e869SJeremy L Thompson // Basis action 172*1c21e869SJeremy L Thompson switch (eval_mode) { 173*1c21e869SJeremy L Thompson case CEED_EVAL_INTERP: { 174*1c21e869SJeremy L Thompson void *interp_args[] = {(void *)&num_elem, (void *)&is_transpose, &data->d_chebyshev_interp_1d, &d_x, &d_u, &d_v}; 175*1c21e869SJeremy L Thompson const CeedInt block_size = CeedIntMin(CeedIntPow(Q_1d, dim), max_block_size); 176*1c21e869SJeremy L Thompson 177*1c21e869SJeremy L Thompson CeedCallBackend(CeedRunKernel_Hip(ceed, data->InterpAtPoints, num_elem, block_size, interp_args)); 178*1c21e869SJeremy L Thompson } break; 179*1c21e869SJeremy L Thompson case CEED_EVAL_GRAD: { 180*1c21e869SJeremy L Thompson void *grad_args[] = {(void *)&num_elem, (void *)&is_transpose, &data->d_chebyshev_interp_1d, &d_x, &d_u, &d_v}; 181*1c21e869SJeremy L Thompson const CeedInt block_size = max_block_size; 182*1c21e869SJeremy L Thompson 183*1c21e869SJeremy L Thompson CeedCallBackend(CeedRunKernel_Hip(ceed, data->GradAtPoints, num_elem, block_size, grad_args)); 184*1c21e869SJeremy L Thompson } break; 185*1c21e869SJeremy L Thompson case CEED_EVAL_WEIGHT: 186*1c21e869SJeremy L Thompson case CEED_EVAL_NONE: /* handled separately below */ 187*1c21e869SJeremy L Thompson break; 188*1c21e869SJeremy L Thompson // LCOV_EXCL_START 189*1c21e869SJeremy L Thompson case CEED_EVAL_DIV: 190*1c21e869SJeremy L Thompson case CEED_EVAL_CURL: 191*1c21e869SJeremy L Thompson return CeedError(ceed, CEED_ERROR_BACKEND, "%s not supported", CeedEvalModes[eval_mode]); 192*1c21e869SJeremy L Thompson // LCOV_EXCL_STOP 193*1c21e869SJeremy L Thompson } 194*1c21e869SJeremy L Thompson 195*1c21e869SJeremy L Thompson // Restore vectors, cover CEED_EVAL_NONE 196*1c21e869SJeremy L Thompson CeedCallBackend(CeedVectorRestoreArrayRead(x_ref, &d_x)); 197*1c21e869SJeremy L Thompson CeedCallBackend(CeedVectorRestoreArray(v, &d_v)); 198*1c21e869SJeremy L Thompson if (eval_mode == CEED_EVAL_NONE) CeedCallBackend(CeedVectorSetArray(v, CEED_MEM_DEVICE, CEED_COPY_VALUES, (CeedScalar *)d_u)); 199*1c21e869SJeremy L Thompson if (eval_mode != CEED_EVAL_WEIGHT) CeedCallBackend(CeedVectorRestoreArrayRead(u, &d_u)); 200*1c21e869SJeremy L Thompson return CEED_ERROR_SUCCESS; 201*1c21e869SJeremy L Thompson } 202*1c21e869SJeremy L Thompson 203*1c21e869SJeremy L Thompson //------------------------------------------------------------------------------ 2040d0321e0SJeremy L Thompson // Basis apply - non-tensor 2050d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 2062b730f8bSJeremy L Thompson int CeedBasisApplyNonTensor_Hip(CeedBasis basis, const CeedInt num_elem, CeedTransposeMode t_mode, CeedEvalMode eval_mode, CeedVector u, 2072b730f8bSJeremy L Thompson CeedVector v) { 2080d0321e0SJeremy L Thompson Ceed ceed; 209437930d1SJeremy L Thompson CeedInt num_nodes, num_qpts; 2107bbbfca3SJeremy L Thompson const CeedInt is_transpose = t_mode == CEED_TRANSPOSE; 211d075f50bSSebastian Grimberg const int elems_per_block = 1; 212d075f50bSSebastian Grimberg const int grid = CeedDivUpInt(num_elem, elems_per_block); 2130d0321e0SJeremy L Thompson const CeedScalar *d_u; 2140d0321e0SJeremy L Thompson CeedScalar *d_v; 215b7453713SJeremy L Thompson CeedBasisNonTensor_Hip *data; 216b7453713SJeremy L Thompson 217b7453713SJeremy L Thompson CeedCallBackend(CeedBasisGetCeed(basis, &ceed)); 218b7453713SJeremy L Thompson CeedCallBackend(CeedBasisGetData(basis, &data)); 219b7453713SJeremy L Thompson CeedCallBackend(CeedBasisGetNumQuadraturePoints(basis, &num_qpts)); 220b7453713SJeremy L Thompson CeedCallBackend(CeedBasisGetNumNodes(basis, &num_nodes)); 221b7453713SJeremy L Thompson 2229ea2cfd9SJeremy L Thompson // Get read/write access to u, v 2239ea2cfd9SJeremy L Thompson if (u != CEED_VECTOR_NONE) CeedCallBackend(CeedVectorGetArrayRead(u, CEED_MEM_DEVICE, &d_u)); 2249ea2cfd9SJeremy L Thompson else CeedCheck(eval_mode == CEED_EVAL_WEIGHT, ceed, CEED_ERROR_BACKEND, "An input vector is required for this CeedEvalMode"); 2252b730f8bSJeremy L Thompson CeedCallBackend(CeedVectorGetArrayWrite(v, CEED_MEM_DEVICE, &d_v)); 2260d0321e0SJeremy L Thompson 2270d0321e0SJeremy L Thompson // Clear v for transpose operation 2287bbbfca3SJeremy L Thompson if (is_transpose) { 2291f9221feSJeremy L Thompson CeedSize length; 230b7453713SJeremy L Thompson 2312b730f8bSJeremy L Thompson CeedCallBackend(CeedVectorGetLength(v, &length)); 2322b730f8bSJeremy L Thompson CeedCallHip(ceed, hipMemset(d_v, 0, length * sizeof(CeedScalar))); 2330d0321e0SJeremy L Thompson } 2340d0321e0SJeremy L Thompson 2350d0321e0SJeremy L Thompson // Apply basis operation 236437930d1SJeremy L Thompson switch (eval_mode) { 2370d0321e0SJeremy L Thompson case CEED_EVAL_INTERP: { 238d075f50bSSebastian Grimberg void *interp_args[] = {(void *)&num_elem, &data->d_interp, &d_u, &d_v}; 2397bbbfca3SJeremy L Thompson const int block_size_x = is_transpose ? num_nodes : num_qpts; 240b2165e7aSSebastian Grimberg 2417bbbfca3SJeremy L Thompson if (is_transpose) { 242d075f50bSSebastian Grimberg CeedCallBackend(CeedRunKernelDim_Hip(ceed, data->InterpTranspose, grid, block_size_x, 1, elems_per_block, interp_args)); 243d075f50bSSebastian Grimberg } else { 244b2165e7aSSebastian Grimberg CeedCallBackend(CeedRunKernelDim_Hip(ceed, data->Interp, grid, block_size_x, 1, elems_per_block, interp_args)); 245d075f50bSSebastian Grimberg } 2460d0321e0SJeremy L Thompson } break; 2470d0321e0SJeremy L Thompson case CEED_EVAL_GRAD: { 248d075f50bSSebastian Grimberg void *grad_args[] = {(void *)&num_elem, &data->d_grad, &d_u, &d_v}; 2497bbbfca3SJeremy L Thompson const int block_size_x = is_transpose ? num_nodes : num_qpts; 250b2165e7aSSebastian Grimberg 2517bbbfca3SJeremy L Thompson if (is_transpose) { 252d075f50bSSebastian Grimberg CeedCallBackend(CeedRunKernelDim_Hip(ceed, data->DerivTranspose, grid, block_size_x, 1, elems_per_block, grad_args)); 253d075f50bSSebastian Grimberg } else { 254d075f50bSSebastian Grimberg CeedCallBackend(CeedRunKernelDim_Hip(ceed, data->Deriv, grid, block_size_x, 1, elems_per_block, grad_args)); 255d075f50bSSebastian Grimberg } 256d075f50bSSebastian Grimberg } break; 257d075f50bSSebastian Grimberg case CEED_EVAL_DIV: { 258d075f50bSSebastian Grimberg void *div_args[] = {(void *)&num_elem, &data->d_div, &d_u, &d_v}; 2597bbbfca3SJeremy L Thompson const int block_size_x = is_transpose ? num_nodes : num_qpts; 260d075f50bSSebastian Grimberg 2617bbbfca3SJeremy L Thompson if (is_transpose) { 262d075f50bSSebastian Grimberg CeedCallBackend(CeedRunKernelDim_Hip(ceed, data->DerivTranspose, grid, block_size_x, 1, elems_per_block, div_args)); 263d075f50bSSebastian Grimberg } else { 264d075f50bSSebastian Grimberg CeedCallBackend(CeedRunKernelDim_Hip(ceed, data->Deriv, grid, block_size_x, 1, elems_per_block, div_args)); 265d075f50bSSebastian Grimberg } 266d075f50bSSebastian Grimberg } break; 267d075f50bSSebastian Grimberg case CEED_EVAL_CURL: { 268d075f50bSSebastian Grimberg void *curl_args[] = {(void *)&num_elem, &data->d_curl, &d_u, &d_v}; 2697bbbfca3SJeremy L Thompson const int block_size_x = is_transpose ? num_nodes : num_qpts; 270d075f50bSSebastian Grimberg 2717bbbfca3SJeremy L Thompson if (is_transpose) { 272d075f50bSSebastian Grimberg CeedCallBackend(CeedRunKernelDim_Hip(ceed, data->DerivTranspose, grid, block_size_x, 1, elems_per_block, curl_args)); 273d075f50bSSebastian Grimberg } else { 274d075f50bSSebastian Grimberg CeedCallBackend(CeedRunKernelDim_Hip(ceed, data->Deriv, grid, block_size_x, 1, elems_per_block, curl_args)); 275d075f50bSSebastian Grimberg } 2760d0321e0SJeremy L Thompson } break; 2770d0321e0SJeremy L Thompson case CEED_EVAL_WEIGHT: { 278097cc795SJames Wright CeedCheck(data->d_q_weight, ceed, CEED_ERROR_BACKEND, "%s not supported; q_weights not set", CeedEvalModes[eval_mode]); 279437930d1SJeremy L Thompson void *weight_args[] = {(void *)&num_elem, (void *)&data->d_q_weight, &d_v}; 280b2165e7aSSebastian Grimberg 281b2165e7aSSebastian Grimberg CeedCallBackend(CeedRunKernelDim_Hip(ceed, data->Weight, grid, num_qpts, 1, elems_per_block, weight_args)); 2820d0321e0SJeremy L Thompson } break; 2839ea2cfd9SJeremy L Thompson case CEED_EVAL_NONE: /* handled separately below */ 2849ea2cfd9SJeremy L Thompson break; 2850d0321e0SJeremy L Thompson } 2860d0321e0SJeremy L Thompson 2879ea2cfd9SJeremy L Thompson // Restore vectors, cover CEED_EVAL_NONE 2882b730f8bSJeremy L Thompson CeedCallBackend(CeedVectorRestoreArray(v, &d_v)); 2899ea2cfd9SJeremy L Thompson if (eval_mode == CEED_EVAL_NONE) CeedCallBackend(CeedVectorSetArray(v, CEED_MEM_DEVICE, CEED_COPY_VALUES, (CeedScalar *)d_u)); 2909ea2cfd9SJeremy L Thompson if (eval_mode != CEED_EVAL_WEIGHT) CeedCallBackend(CeedVectorRestoreArrayRead(u, &d_u)); 2910d0321e0SJeremy L Thompson return CEED_ERROR_SUCCESS; 2920d0321e0SJeremy L Thompson } 2930d0321e0SJeremy L Thompson 2940d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 2950d0321e0SJeremy L Thompson // Destroy tensor basis 2960d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 2970d0321e0SJeremy L Thompson static int CeedBasisDestroy_Hip(CeedBasis basis) { 2980d0321e0SJeremy L Thompson Ceed ceed; 2990d0321e0SJeremy L Thompson CeedBasis_Hip *data; 300b7453713SJeremy L Thompson 301b7453713SJeremy L Thompson CeedCallBackend(CeedBasisGetCeed(basis, &ceed)); 3022b730f8bSJeremy L Thompson CeedCallBackend(CeedBasisGetData(basis, &data)); 3032b730f8bSJeremy L Thompson CeedCallHip(ceed, hipModuleUnload(data->module)); 304*1c21e869SJeremy L Thompson if (data->moduleAtPoints) CeedCallHip(ceed, hipModuleUnload(data->moduleAtPoints)); 305097cc795SJames Wright if (data->d_q_weight_1d) CeedCallHip(ceed, hipFree(data->d_q_weight_1d)); 3062b730f8bSJeremy L Thompson CeedCallHip(ceed, hipFree(data->d_interp_1d)); 3072b730f8bSJeremy L Thompson CeedCallHip(ceed, hipFree(data->d_grad_1d)); 308*1c21e869SJeremy L Thompson CeedCallHip(ceed, hipFree(data->d_chebyshev_interp_1d)); 3092b730f8bSJeremy L Thompson CeedCallBackend(CeedFree(&data)); 3100d0321e0SJeremy L Thompson return CEED_ERROR_SUCCESS; 3110d0321e0SJeremy L Thompson } 3120d0321e0SJeremy L Thompson 3130d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 3140d0321e0SJeremy L Thompson // Destroy non-tensor basis 3150d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 3160d0321e0SJeremy L Thompson static int CeedBasisDestroyNonTensor_Hip(CeedBasis basis) { 3170d0321e0SJeremy L Thompson Ceed ceed; 3180d0321e0SJeremy L Thompson CeedBasisNonTensor_Hip *data; 319b7453713SJeremy L Thompson 320b7453713SJeremy L Thompson CeedCallBackend(CeedBasisGetCeed(basis, &ceed)); 3212b730f8bSJeremy L Thompson CeedCallBackend(CeedBasisGetData(basis, &data)); 3222b730f8bSJeremy L Thompson CeedCallHip(ceed, hipModuleUnload(data->module)); 323097cc795SJames Wright if (data->d_q_weight) CeedCallHip(ceed, hipFree(data->d_q_weight)); 3242b730f8bSJeremy L Thompson CeedCallHip(ceed, hipFree(data->d_interp)); 3252b730f8bSJeremy L Thompson CeedCallHip(ceed, hipFree(data->d_grad)); 326d075f50bSSebastian Grimberg CeedCallHip(ceed, hipFree(data->d_div)); 327d075f50bSSebastian Grimberg CeedCallHip(ceed, hipFree(data->d_curl)); 3282b730f8bSJeremy L Thompson CeedCallBackend(CeedFree(&data)); 3290d0321e0SJeremy L Thompson return CEED_ERROR_SUCCESS; 3300d0321e0SJeremy L Thompson } 3310d0321e0SJeremy L Thompson 3320d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 3330d0321e0SJeremy L Thompson // Create tensor 3340d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 3352b730f8bSJeremy L Thompson int CeedBasisCreateTensorH1_Hip(CeedInt dim, CeedInt P_1d, CeedInt Q_1d, const CeedScalar *interp_1d, const CeedScalar *grad_1d, 3366574a04fSJeremy L Thompson const CeedScalar *q_ref_1d, const CeedScalar *q_weight_1d, CeedBasis basis) { 3370d0321e0SJeremy L Thompson Ceed ceed; 33822070f95SJeremy L Thompson char *basis_kernel_source; 33922070f95SJeremy L Thompson const char *basis_kernel_path; 340b7453713SJeremy L Thompson CeedInt num_comp; 341b7453713SJeremy L Thompson const CeedInt q_bytes = Q_1d * sizeof(CeedScalar); 342b7453713SJeremy L Thompson const CeedInt interp_bytes = q_bytes * P_1d; 3430d0321e0SJeremy L Thompson CeedBasis_Hip *data; 344b7453713SJeremy L Thompson 345b7453713SJeremy L Thompson CeedCallBackend(CeedBasisGetCeed(basis, &ceed)); 3462b730f8bSJeremy L Thompson CeedCallBackend(CeedCalloc(1, &data)); 3470d0321e0SJeremy L Thompson 3480d0321e0SJeremy L Thompson // Copy data to GPU 349097cc795SJames Wright if (q_weight_1d) { 3502b730f8bSJeremy L Thompson CeedCallHip(ceed, hipMalloc((void **)&data->d_q_weight_1d, q_bytes)); 3512b730f8bSJeremy L Thompson CeedCallHip(ceed, hipMemcpy(data->d_q_weight_1d, q_weight_1d, q_bytes, hipMemcpyHostToDevice)); 352097cc795SJames Wright } 3532b730f8bSJeremy L Thompson CeedCallHip(ceed, hipMalloc((void **)&data->d_interp_1d, interp_bytes)); 3542b730f8bSJeremy L Thompson CeedCallHip(ceed, hipMemcpy(data->d_interp_1d, interp_1d, interp_bytes, hipMemcpyHostToDevice)); 3552b730f8bSJeremy L Thompson CeedCallHip(ceed, hipMalloc((void **)&data->d_grad_1d, interp_bytes)); 3562b730f8bSJeremy L Thompson CeedCallHip(ceed, hipMemcpy(data->d_grad_1d, grad_1d, interp_bytes, hipMemcpyHostToDevice)); 3570d0321e0SJeremy L Thompson 358ecc88aebSJeremy L Thompson // Compile basis kernels 359b7453713SJeremy L Thompson CeedCallBackend(CeedBasisGetNumComponents(basis, &num_comp)); 3602b730f8bSJeremy L Thompson CeedCallBackend(CeedGetJitAbsolutePath(ceed, "ceed/jit-source/hip/hip-ref-basis-tensor.h", &basis_kernel_path)); 36123d4529eSJeremy L Thompson CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Basis Kernel Source -----\n"); 3622b730f8bSJeremy L Thompson CeedCallBackend(CeedLoadSourceToBuffer(ceed, basis_kernel_path, &basis_kernel_source)); 36323d4529eSJeremy L Thompson CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Basis Kernel Source Complete! -----\n"); 364eb7e6cafSJeremy L Thompson CeedCallBackend(CeedCompile_Hip(ceed, basis_kernel_source, &data->module, 7, "BASIS_Q_1D", Q_1d, "BASIS_P_1D", P_1d, "BASIS_BUF_LEN", 365b7453713SJeremy L Thompson num_comp * CeedIntPow(Q_1d > P_1d ? Q_1d : P_1d, dim), "BASIS_DIM", dim, "BASIS_NUM_COMP", num_comp, 366b7453713SJeremy L Thompson "BASIS_NUM_NODES", CeedIntPow(P_1d, dim), "BASIS_NUM_QPTS", CeedIntPow(Q_1d, dim))); 367eb7e6cafSJeremy L Thompson CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, "Interp", &data->Interp)); 368eb7e6cafSJeremy L Thompson CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, "Grad", &data->Grad)); 369eb7e6cafSJeremy L Thompson CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, "Weight", &data->Weight)); 3702b730f8bSJeremy L Thompson CeedCallBackend(CeedFree(&basis_kernel_path)); 3712b730f8bSJeremy L Thompson CeedCallBackend(CeedFree(&basis_kernel_source)); 372437930d1SJeremy L Thompson 3732b730f8bSJeremy L Thompson CeedCallBackend(CeedBasisSetData(basis, data)); 3740d0321e0SJeremy L Thompson 375d075f50bSSebastian Grimberg // Register backend functions 3762b730f8bSJeremy L Thompson CeedCallBackend(CeedSetBackendFunction(ceed, "Basis", basis, "Apply", CeedBasisApply_Hip)); 377*1c21e869SJeremy L Thompson CeedCallBackend(CeedSetBackendFunction(ceed, "Basis", basis, "ApplyAtPoints", CeedBasisApplyAtPoints_Hip)); 3782b730f8bSJeremy L Thompson CeedCallBackend(CeedSetBackendFunction(ceed, "Basis", basis, "Destroy", CeedBasisDestroy_Hip)); 3790d0321e0SJeremy L Thompson return CEED_ERROR_SUCCESS; 3800d0321e0SJeremy L Thompson } 3810d0321e0SJeremy L Thompson 3820d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 383d075f50bSSebastian Grimberg // Create non-tensor H^1 3840d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 3852b730f8bSJeremy L Thompson int CeedBasisCreateH1_Hip(CeedElemTopology topo, CeedInt dim, CeedInt num_nodes, CeedInt num_qpts, const CeedScalar *interp, const CeedScalar *grad, 38651475c7cSJeremy L Thompson const CeedScalar *q_ref, const CeedScalar *q_weight, CeedBasis basis) { 3870d0321e0SJeremy L Thompson Ceed ceed; 38822070f95SJeremy L Thompson char *basis_kernel_source; 38922070f95SJeremy L Thompson const char *basis_kernel_path; 390d075f50bSSebastian Grimberg CeedInt num_comp, q_comp_interp, q_comp_grad; 391b7453713SJeremy L Thompson const CeedInt q_bytes = num_qpts * sizeof(CeedScalar); 3920d0321e0SJeremy L Thompson CeedBasisNonTensor_Hip *data; 393b7453713SJeremy L Thompson 394b7453713SJeremy L Thompson CeedCallBackend(CeedBasisGetCeed(basis, &ceed)); 3952b730f8bSJeremy L Thompson CeedCallBackend(CeedCalloc(1, &data)); 3960d0321e0SJeremy L Thompson 3970d0321e0SJeremy L Thompson // Copy basis data to GPU 398d075f50bSSebastian Grimberg CeedCallBackend(CeedBasisGetNumQuadratureComponents(basis, CEED_EVAL_INTERP, &q_comp_interp)); 399d075f50bSSebastian Grimberg CeedCallBackend(CeedBasisGetNumQuadratureComponents(basis, CEED_EVAL_GRAD, &q_comp_grad)); 400097cc795SJames Wright if (q_weight) { 4012b730f8bSJeremy L Thompson CeedCallHip(ceed, hipMalloc((void **)&data->d_q_weight, q_bytes)); 4022b730f8bSJeremy L Thompson CeedCallHip(ceed, hipMemcpy(data->d_q_weight, q_weight, q_bytes, hipMemcpyHostToDevice)); 403097cc795SJames Wright } 404d075f50bSSebastian Grimberg if (interp) { 405d075f50bSSebastian Grimberg const CeedInt interp_bytes = q_bytes * num_nodes * q_comp_interp; 406d075f50bSSebastian Grimberg 4072b730f8bSJeremy L Thompson CeedCallHip(ceed, hipMalloc((void **)&data->d_interp, interp_bytes)); 4082b730f8bSJeremy L Thompson CeedCallHip(ceed, hipMemcpy(data->d_interp, interp, interp_bytes, hipMemcpyHostToDevice)); 409d075f50bSSebastian Grimberg } 410d075f50bSSebastian Grimberg if (grad) { 411d075f50bSSebastian Grimberg const CeedInt grad_bytes = q_bytes * num_nodes * q_comp_grad; 412d075f50bSSebastian Grimberg 4132b730f8bSJeremy L Thompson CeedCallHip(ceed, hipMalloc((void **)&data->d_grad, grad_bytes)); 4142b730f8bSJeremy L Thompson CeedCallHip(ceed, hipMemcpy(data->d_grad, grad, grad_bytes, hipMemcpyHostToDevice)); 415d075f50bSSebastian Grimberg } 4160d0321e0SJeremy L Thompson 4170d0321e0SJeremy L Thompson // Compile basis kernels 418b7453713SJeremy L Thompson CeedCallBackend(CeedBasisGetNumComponents(basis, &num_comp)); 4192b730f8bSJeremy L Thompson CeedCallBackend(CeedGetJitAbsolutePath(ceed, "ceed/jit-source/hip/hip-ref-basis-nontensor.h", &basis_kernel_path)); 42023d4529eSJeremy L Thompson CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Basis Kernel Source -----\n"); 4212b730f8bSJeremy L Thompson CeedCallBackend(CeedLoadSourceToBuffer(ceed, basis_kernel_path, &basis_kernel_source)); 42223d4529eSJeremy L Thompson CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Basis Kernel Source Complete! -----\n"); 423d075f50bSSebastian Grimberg CeedCallBackend(CeedCompile_Hip(ceed, basis_kernel_source, &data->module, 5, "BASIS_Q", num_qpts, "BASIS_P", num_nodes, "BASIS_Q_COMP_INTERP", 424d075f50bSSebastian Grimberg q_comp_interp, "BASIS_Q_COMP_DERIV", q_comp_grad, "BASIS_NUM_COMP", num_comp)); 425eb7e6cafSJeremy L Thompson CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, "Interp", &data->Interp)); 426d075f50bSSebastian Grimberg CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, "InterpTranspose", &data->InterpTranspose)); 427d075f50bSSebastian Grimberg CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, "Deriv", &data->Deriv)); 428d075f50bSSebastian Grimberg CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, "DerivTranspose", &data->DerivTranspose)); 429eb7e6cafSJeremy L Thompson CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, "Weight", &data->Weight)); 4302b730f8bSJeremy L Thompson CeedCallBackend(CeedFree(&basis_kernel_path)); 4312b730f8bSJeremy L Thompson CeedCallBackend(CeedFree(&basis_kernel_source)); 432d075f50bSSebastian Grimberg 433d075f50bSSebastian Grimberg CeedCallBackend(CeedBasisSetData(basis, data)); 434d075f50bSSebastian Grimberg 435d075f50bSSebastian Grimberg // Register backend functions 436d075f50bSSebastian Grimberg CeedCallBackend(CeedSetBackendFunction(ceed, "Basis", basis, "Apply", CeedBasisApplyNonTensor_Hip)); 437d075f50bSSebastian Grimberg CeedCallBackend(CeedSetBackendFunction(ceed, "Basis", basis, "Destroy", CeedBasisDestroyNonTensor_Hip)); 438d075f50bSSebastian Grimberg return CEED_ERROR_SUCCESS; 439d075f50bSSebastian Grimberg } 440d075f50bSSebastian Grimberg 441d075f50bSSebastian Grimberg //------------------------------------------------------------------------------ 442d075f50bSSebastian Grimberg // Create non-tensor H(div) 443d075f50bSSebastian Grimberg //------------------------------------------------------------------------------ 444d075f50bSSebastian Grimberg int CeedBasisCreateHdiv_Hip(CeedElemTopology topo, CeedInt dim, CeedInt num_nodes, CeedInt num_qpts, const CeedScalar *interp, const CeedScalar *div, 445d075f50bSSebastian Grimberg const CeedScalar *q_ref, const CeedScalar *q_weight, CeedBasis basis) { 446d075f50bSSebastian Grimberg Ceed ceed; 44722070f95SJeremy L Thompson char *basis_kernel_source; 44822070f95SJeremy L Thompson const char *basis_kernel_path; 449d075f50bSSebastian Grimberg CeedInt num_comp, q_comp_interp, q_comp_div; 450d075f50bSSebastian Grimberg const CeedInt q_bytes = num_qpts * sizeof(CeedScalar); 451d075f50bSSebastian Grimberg CeedBasisNonTensor_Hip *data; 452d075f50bSSebastian Grimberg 453d075f50bSSebastian Grimberg CeedCallBackend(CeedBasisGetCeed(basis, &ceed)); 454d075f50bSSebastian Grimberg CeedCallBackend(CeedCalloc(1, &data)); 455d075f50bSSebastian Grimberg 456d075f50bSSebastian Grimberg // Copy basis data to GPU 457d075f50bSSebastian Grimberg CeedCallBackend(CeedBasisGetNumQuadratureComponents(basis, CEED_EVAL_INTERP, &q_comp_interp)); 458d075f50bSSebastian Grimberg CeedCallBackend(CeedBasisGetNumQuadratureComponents(basis, CEED_EVAL_DIV, &q_comp_div)); 459097cc795SJames Wright if (q_weight) { 460d075f50bSSebastian Grimberg CeedCallHip(ceed, hipMalloc((void **)&data->d_q_weight, q_bytes)); 461d075f50bSSebastian Grimberg CeedCallHip(ceed, hipMemcpy(data->d_q_weight, q_weight, q_bytes, hipMemcpyHostToDevice)); 462097cc795SJames Wright } 463d075f50bSSebastian Grimberg if (interp) { 464d075f50bSSebastian Grimberg const CeedInt interp_bytes = q_bytes * num_nodes * q_comp_interp; 465d075f50bSSebastian Grimberg 466d075f50bSSebastian Grimberg CeedCallHip(ceed, hipMalloc((void **)&data->d_interp, interp_bytes)); 467d075f50bSSebastian Grimberg CeedCallHip(ceed, hipMemcpy(data->d_interp, interp, interp_bytes, hipMemcpyHostToDevice)); 468d075f50bSSebastian Grimberg } 469d075f50bSSebastian Grimberg if (div) { 470d075f50bSSebastian Grimberg const CeedInt div_bytes = q_bytes * num_nodes * q_comp_div; 471d075f50bSSebastian Grimberg 472d075f50bSSebastian Grimberg CeedCallHip(ceed, hipMalloc((void **)&data->d_div, div_bytes)); 473d075f50bSSebastian Grimberg CeedCallHip(ceed, hipMemcpy(data->d_div, div, div_bytes, hipMemcpyHostToDevice)); 474d075f50bSSebastian Grimberg } 475d075f50bSSebastian Grimberg 476d075f50bSSebastian Grimberg // Compile basis kernels 477d075f50bSSebastian Grimberg CeedCallBackend(CeedBasisGetNumComponents(basis, &num_comp)); 478d075f50bSSebastian Grimberg CeedCallBackend(CeedGetJitAbsolutePath(ceed, "ceed/jit-source/hip/hip-ref-basis-nontensor.h", &basis_kernel_path)); 479d075f50bSSebastian Grimberg CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Basis Kernel Source -----\n"); 480d075f50bSSebastian Grimberg CeedCallBackend(CeedLoadSourceToBuffer(ceed, basis_kernel_path, &basis_kernel_source)); 481d075f50bSSebastian Grimberg CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Basis Kernel Source Complete! -----\n"); 482d075f50bSSebastian Grimberg CeedCallBackend(CeedCompile_Hip(ceed, basis_kernel_source, &data->module, 5, "BASIS_Q", num_qpts, "BASIS_P", num_nodes, "BASIS_Q_COMP_INTERP", 483d075f50bSSebastian Grimberg q_comp_interp, "BASIS_Q_COMP_DERIV", q_comp_div, "BASIS_NUM_COMP", num_comp)); 484d075f50bSSebastian Grimberg CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, "Interp", &data->Interp)); 485d075f50bSSebastian Grimberg CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, "InterpTranspose", &data->InterpTranspose)); 486d075f50bSSebastian Grimberg CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, "Deriv", &data->Deriv)); 487d075f50bSSebastian Grimberg CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, "DerivTranspose", &data->DerivTranspose)); 488d075f50bSSebastian Grimberg CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, "Weight", &data->Weight)); 489d075f50bSSebastian Grimberg CeedCallBackend(CeedFree(&basis_kernel_path)); 490d075f50bSSebastian Grimberg CeedCallBackend(CeedFree(&basis_kernel_source)); 491d075f50bSSebastian Grimberg 492d075f50bSSebastian Grimberg CeedCallBackend(CeedBasisSetData(basis, data)); 493d075f50bSSebastian Grimberg 494d075f50bSSebastian Grimberg // Register backend functions 495d075f50bSSebastian Grimberg CeedCallBackend(CeedSetBackendFunction(ceed, "Basis", basis, "Apply", CeedBasisApplyNonTensor_Hip)); 496d075f50bSSebastian Grimberg CeedCallBackend(CeedSetBackendFunction(ceed, "Basis", basis, "Destroy", CeedBasisDestroyNonTensor_Hip)); 497d075f50bSSebastian Grimberg return CEED_ERROR_SUCCESS; 498d075f50bSSebastian Grimberg } 499d075f50bSSebastian Grimberg 500d075f50bSSebastian Grimberg //------------------------------------------------------------------------------ 501d075f50bSSebastian Grimberg // Create non-tensor H(curl) 502d075f50bSSebastian Grimberg //------------------------------------------------------------------------------ 503d075f50bSSebastian Grimberg int CeedBasisCreateHcurl_Hip(CeedElemTopology topo, CeedInt dim, CeedInt num_nodes, CeedInt num_qpts, const CeedScalar *interp, 504d075f50bSSebastian Grimberg const CeedScalar *curl, const CeedScalar *q_ref, const CeedScalar *q_weight, CeedBasis basis) { 505d075f50bSSebastian Grimberg Ceed ceed; 50622070f95SJeremy L Thompson char *basis_kernel_source; 50722070f95SJeremy L Thompson const char *basis_kernel_path; 508d075f50bSSebastian Grimberg CeedInt num_comp, q_comp_interp, q_comp_curl; 509d075f50bSSebastian Grimberg const CeedInt q_bytes = num_qpts * sizeof(CeedScalar); 510d075f50bSSebastian Grimberg CeedBasisNonTensor_Hip *data; 511d075f50bSSebastian Grimberg 512d075f50bSSebastian Grimberg CeedCallBackend(CeedBasisGetCeed(basis, &ceed)); 513d075f50bSSebastian Grimberg CeedCallBackend(CeedCalloc(1, &data)); 514d075f50bSSebastian Grimberg 515d075f50bSSebastian Grimberg // Copy basis data to GPU 516d075f50bSSebastian Grimberg CeedCallBackend(CeedBasisGetNumQuadratureComponents(basis, CEED_EVAL_INTERP, &q_comp_interp)); 517d075f50bSSebastian Grimberg CeedCallBackend(CeedBasisGetNumQuadratureComponents(basis, CEED_EVAL_CURL, &q_comp_curl)); 518097cc795SJames Wright if (q_weight) { 519d075f50bSSebastian Grimberg CeedCallHip(ceed, hipMalloc((void **)&data->d_q_weight, q_bytes)); 520d075f50bSSebastian Grimberg CeedCallHip(ceed, hipMemcpy(data->d_q_weight, q_weight, q_bytes, hipMemcpyHostToDevice)); 521097cc795SJames Wright } 522d075f50bSSebastian Grimberg if (interp) { 523d075f50bSSebastian Grimberg const CeedInt interp_bytes = q_bytes * num_nodes * q_comp_interp; 524d075f50bSSebastian Grimberg 525d075f50bSSebastian Grimberg CeedCallHip(ceed, hipMalloc((void **)&data->d_interp, interp_bytes)); 526d075f50bSSebastian Grimberg CeedCallHip(ceed, hipMemcpy(data->d_interp, interp, interp_bytes, hipMemcpyHostToDevice)); 527d075f50bSSebastian Grimberg } 528d075f50bSSebastian Grimberg if (curl) { 529d075f50bSSebastian Grimberg const CeedInt curl_bytes = q_bytes * num_nodes * q_comp_curl; 530d075f50bSSebastian Grimberg 531d075f50bSSebastian Grimberg CeedCallHip(ceed, hipMalloc((void **)&data->d_curl, curl_bytes)); 532d075f50bSSebastian Grimberg CeedCallHip(ceed, hipMemcpy(data->d_curl, curl, curl_bytes, hipMemcpyHostToDevice)); 533d075f50bSSebastian Grimberg } 534d075f50bSSebastian Grimberg 535d075f50bSSebastian Grimberg // Compile basis kernels 536d075f50bSSebastian Grimberg CeedCallBackend(CeedBasisGetNumComponents(basis, &num_comp)); 537d075f50bSSebastian Grimberg CeedCallBackend(CeedGetJitAbsolutePath(ceed, "ceed/jit-source/hip/hip-ref-basis-nontensor.h", &basis_kernel_path)); 538d075f50bSSebastian Grimberg CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Basis Kernel Source -----\n"); 539d075f50bSSebastian Grimberg CeedCallBackend(CeedLoadSourceToBuffer(ceed, basis_kernel_path, &basis_kernel_source)); 540d075f50bSSebastian Grimberg CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading Basis Kernel Source Complete! -----\n"); 541d075f50bSSebastian Grimberg CeedCallBackend(CeedCompile_Hip(ceed, basis_kernel_source, &data->module, 5, "BASIS_Q", num_qpts, "BASIS_P", num_nodes, "BASIS_Q_COMP_INTERP", 542d075f50bSSebastian Grimberg q_comp_interp, "BASIS_Q_COMP_DERIV", q_comp_curl, "BASIS_NUM_COMP", num_comp)); 543d075f50bSSebastian Grimberg CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, "Interp", &data->Interp)); 544d075f50bSSebastian Grimberg CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, "InterpTranspose", &data->InterpTranspose)); 545d075f50bSSebastian Grimberg CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, "Deriv", &data->Deriv)); 546d075f50bSSebastian Grimberg CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, "DerivTranspose", &data->DerivTranspose)); 547d075f50bSSebastian Grimberg CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, "Weight", &data->Weight)); 548d075f50bSSebastian Grimberg CeedCallBackend(CeedFree(&basis_kernel_path)); 549d075f50bSSebastian Grimberg CeedCallBackend(CeedFree(&basis_kernel_source)); 550d075f50bSSebastian Grimberg 5512b730f8bSJeremy L Thompson CeedCallBackend(CeedBasisSetData(basis, data)); 5520d0321e0SJeremy L Thompson 5530d0321e0SJeremy L Thompson // Register backend functions 5542b730f8bSJeremy L Thompson CeedCallBackend(CeedSetBackendFunction(ceed, "Basis", basis, "Apply", CeedBasisApplyNonTensor_Hip)); 5552b730f8bSJeremy L Thompson CeedCallBackend(CeedSetBackendFunction(ceed, "Basis", basis, "Destroy", CeedBasisDestroyNonTensor_Hip)); 5560d0321e0SJeremy L Thompson return CEED_ERROR_SUCCESS; 5570d0321e0SJeremy L Thompson } 5582a86cc9dSSebastian Grimberg 5590d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 560