1*9ba83ac0SJeremy L Thompson // Copyright (c) 2017-2026, Lawrence Livermore National Security, LLC and other CEED contributors.
23d8e8822SJeremy L Thompson // All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
30d0321e0SJeremy L Thompson //
43d8e8822SJeremy L Thompson // SPDX-License-Identifier: BSD-2-Clause
50d0321e0SJeremy L Thompson //
63d8e8822SJeremy L Thompson // This file is part of CEED: http://github.com/ceed
70d0321e0SJeremy L Thompson
849aac155SJeremy L Thompson #include <ceed.h>
90d0321e0SJeremy L Thompson #include <ceed/backend.h>
10437930d1SJeremy L Thompson #include <ceed/jit-tools.h>
11111870feSJeremy L Thompson #include <string.h>
120d0321e0SJeremy L Thompson #include <hip/hip_runtime.h>
132b730f8bSJeremy L Thompson
1449aac155SJeremy L Thompson #include "../hip/ceed-hip-common.h"
150d0321e0SJeremy L Thompson #include "../hip/ceed-hip-compile.h"
162b730f8bSJeremy L Thompson #include "ceed-hip-ref.h"
170d0321e0SJeremy L Thompson
180d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
190d0321e0SJeremy L Thompson // Basis apply - tensor
200d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
CeedBasisApplyCore_Hip(CeedBasis basis,bool apply_add,const CeedInt num_elem,CeedTransposeMode t_mode,CeedEvalMode eval_mode,CeedVector u,CeedVector v)21db2becc9SJeremy L Thompson static int CeedBasisApplyCore_Hip(CeedBasis basis, bool apply_add, const CeedInt num_elem, CeedTransposeMode t_mode, CeedEvalMode eval_mode,
22db2becc9SJeremy L Thompson CeedVector u, CeedVector v) {
230d0321e0SJeremy L Thompson Ceed ceed;
24b7453713SJeremy L Thompson CeedInt Q_1d, dim;
257bbbfca3SJeremy L Thompson const CeedInt is_transpose = t_mode == CEED_TRANSPOSE;
26437930d1SJeremy L Thompson const int max_block_size = 64;
270d0321e0SJeremy L Thompson const CeedScalar *d_u;
280d0321e0SJeremy L Thompson CeedScalar *d_v;
29b7453713SJeremy L Thompson CeedBasis_Hip *data;
30b7453713SJeremy L Thompson
31b7453713SJeremy L Thompson CeedCallBackend(CeedBasisGetCeed(basis, &ceed));
32b7453713SJeremy L Thompson CeedCallBackend(CeedBasisGetData(basis, &data));
33b7453713SJeremy L Thompson
349ea2cfd9SJeremy L Thompson // Get read/write access to u, v
356574a04fSJeremy L Thompson if (u != CEED_VECTOR_NONE) CeedCallBackend(CeedVectorGetArrayRead(u, CEED_MEM_DEVICE, &d_u));
366574a04fSJeremy L Thompson else CeedCheck(eval_mode == CEED_EVAL_WEIGHT, ceed, CEED_ERROR_BACKEND, "An input vector is required for this CeedEvalMode");
3711ac676fSZach Atkins if (apply_add) {
3811ac676fSZach Atkins CeedCallBackend(CeedVectorGetArray(v, CEED_MEM_DEVICE, &d_v));
3911ac676fSZach Atkins } else {
400d0321e0SJeremy L Thompson // Clear v for transpose operation
4111ac676fSZach Atkins if (is_transpose) CeedCallBackend(CeedVectorSetValue(v, 0.0));
4211ac676fSZach Atkins CeedCallBackend(CeedVectorGetArrayWrite(v, CEED_MEM_DEVICE, &d_v));
430d0321e0SJeremy L Thompson }
4411ac676fSZach Atkins
45b2165e7aSSebastian Grimberg CeedCallBackend(CeedBasisGetNumQuadraturePoints1D(basis, &Q_1d));
46b2165e7aSSebastian Grimberg CeedCallBackend(CeedBasisGetDimension(basis, &dim));
470d0321e0SJeremy L Thompson
480d0321e0SJeremy L Thompson // Basis action
49437930d1SJeremy L Thompson switch (eval_mode) {
500d0321e0SJeremy L Thompson case CEED_EVAL_INTERP: {
517bbbfca3SJeremy L Thompson void *interp_args[] = {(void *)&num_elem, (void *)&is_transpose, &data->d_interp_1d, &d_u, &d_v};
52b2165e7aSSebastian Grimberg const CeedInt block_size = CeedIntMin(CeedIntPow(Q_1d, dim), max_block_size);
530d0321e0SJeremy L Thompson
54eb7e6cafSJeremy L Thompson CeedCallBackend(CeedRunKernel_Hip(ceed, data->Interp, num_elem, block_size, interp_args));
550d0321e0SJeremy L Thompson } break;
560d0321e0SJeremy L Thompson case CEED_EVAL_GRAD: {
577bbbfca3SJeremy L Thompson void *grad_args[] = {(void *)&num_elem, (void *)&is_transpose, &data->d_interp_1d, &data->d_grad_1d, &d_u, &d_v};
58b2165e7aSSebastian Grimberg const CeedInt block_size = max_block_size;
590d0321e0SJeremy L Thompson
60eb7e6cafSJeremy L Thompson CeedCallBackend(CeedRunKernel_Hip(ceed, data->Grad, num_elem, block_size, grad_args));
610d0321e0SJeremy L Thompson } break;
620d0321e0SJeremy L Thompson case CEED_EVAL_WEIGHT: {
63097cc795SJames Wright CeedCheck(data->d_q_weight_1d, ceed, CEED_ERROR_BACKEND, "%s not supported; q_weights_1d not set", CeedEvalModes[eval_mode]);
64437930d1SJeremy L Thompson void *weight_args[] = {(void *)&num_elem, (void *)&data->d_q_weight_1d, &d_v};
65b2165e7aSSebastian Grimberg const int block_size_x = Q_1d;
66b2165e7aSSebastian Grimberg const int block_size_y = dim >= 2 ? Q_1d : 1;
670d0321e0SJeremy L Thompson
68b2165e7aSSebastian Grimberg CeedCallBackend(CeedRunKernelDim_Hip(ceed, data->Weight, num_elem, block_size_x, block_size_y, 1, weight_args));
690d0321e0SJeremy L Thompson } break;
709ea2cfd9SJeremy L Thompson case CEED_EVAL_NONE: /* handled separately below */
719ea2cfd9SJeremy L Thompson break;
720d0321e0SJeremy L Thompson // LCOV_EXCL_START
730d0321e0SJeremy L Thompson case CEED_EVAL_DIV:
740d0321e0SJeremy L Thompson case CEED_EVAL_CURL:
75bcbe1c99SJeremy L Thompson return CeedError(ceed, CEED_ERROR_BACKEND, "%s not supported", CeedEvalModes[eval_mode]);
760d0321e0SJeremy L Thompson // LCOV_EXCL_STOP
770d0321e0SJeremy L Thompson }
780d0321e0SJeremy L Thompson
799ea2cfd9SJeremy L Thompson // Restore vectors, cover CEED_EVAL_NONE
802b730f8bSJeremy L Thompson CeedCallBackend(CeedVectorRestoreArray(v, &d_v));
819ea2cfd9SJeremy L Thompson if (eval_mode == CEED_EVAL_NONE) CeedCallBackend(CeedVectorSetArray(v, CEED_MEM_DEVICE, CEED_COPY_VALUES, (CeedScalar *)d_u));
829ea2cfd9SJeremy L Thompson if (eval_mode != CEED_EVAL_WEIGHT) CeedCallBackend(CeedVectorRestoreArrayRead(u, &d_u));
839bc66399SJeremy L Thompson CeedCallBackend(CeedDestroy(&ceed));
840d0321e0SJeremy L Thompson return CEED_ERROR_SUCCESS;
850d0321e0SJeremy L Thompson }
860d0321e0SJeremy L Thompson
CeedBasisApply_Hip(CeedBasis basis,const CeedInt num_elem,CeedTransposeMode t_mode,CeedEvalMode eval_mode,CeedVector u,CeedVector v)87db2becc9SJeremy L Thompson static int CeedBasisApply_Hip(CeedBasis basis, const CeedInt num_elem, CeedTransposeMode t_mode, CeedEvalMode eval_mode, CeedVector u, CeedVector v) {
88db2becc9SJeremy L Thompson CeedCallBackend(CeedBasisApplyCore_Hip(basis, false, num_elem, t_mode, eval_mode, u, v));
89db2becc9SJeremy L Thompson return CEED_ERROR_SUCCESS;
90db2becc9SJeremy L Thompson }
91db2becc9SJeremy L Thompson
CeedBasisApplyAdd_Hip(CeedBasis basis,const CeedInt num_elem,CeedTransposeMode t_mode,CeedEvalMode eval_mode,CeedVector u,CeedVector v)92db2becc9SJeremy L Thompson static int CeedBasisApplyAdd_Hip(CeedBasis basis, const CeedInt num_elem, CeedTransposeMode t_mode, CeedEvalMode eval_mode, CeedVector u,
93db2becc9SJeremy L Thompson CeedVector v) {
94db2becc9SJeremy L Thompson CeedCallBackend(CeedBasisApplyCore_Hip(basis, true, num_elem, t_mode, eval_mode, u, v));
95db2becc9SJeremy L Thompson return CEED_ERROR_SUCCESS;
96db2becc9SJeremy L Thompson }
97db2becc9SJeremy L Thompson
980d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
991c21e869SJeremy L Thompson // Basis apply - tensor AtPoints
1001c21e869SJeremy L Thompson //------------------------------------------------------------------------------
CeedBasisApplyAtPointsCore_Hip(CeedBasis basis,bool apply_add,const CeedInt num_elem,const CeedInt * num_points,CeedTransposeMode t_mode,CeedEvalMode eval_mode,CeedVector x_ref,CeedVector u,CeedVector v)101db2becc9SJeremy L Thompson static int CeedBasisApplyAtPointsCore_Hip(CeedBasis basis, bool apply_add, const CeedInt num_elem, const CeedInt *num_points,
102db2becc9SJeremy L Thompson CeedTransposeMode t_mode, CeedEvalMode eval_mode, CeedVector x_ref, CeedVector u, CeedVector v) {
1031c21e869SJeremy L Thompson Ceed ceed;
1041c21e869SJeremy L Thompson CeedInt Q_1d, dim, max_num_points = num_points[0];
1051c21e869SJeremy L Thompson const CeedInt is_transpose = t_mode == CEED_TRANSPOSE;
1061c21e869SJeremy L Thompson const int max_block_size = 32;
1071c21e869SJeremy L Thompson const CeedScalar *d_x, *d_u;
1081c21e869SJeremy L Thompson CeedScalar *d_v;
1091c21e869SJeremy L Thompson CeedBasis_Hip *data;
1101c21e869SJeremy L Thompson
1111c21e869SJeremy L Thompson CeedCallBackend(CeedBasisGetData(basis, &data));
1121c21e869SJeremy L Thompson CeedCallBackend(CeedBasisGetNumQuadraturePoints1D(basis, &Q_1d));
1131c21e869SJeremy L Thompson CeedCallBackend(CeedBasisGetDimension(basis, &dim));
1141c21e869SJeremy L Thompson
1151c21e869SJeremy L Thompson // Weight handled separately
1161c21e869SJeremy L Thompson if (eval_mode == CEED_EVAL_WEIGHT) {
1175a5594ffSJeremy L Thompson CeedCallBackend(CeedVectorSetValue(v, 1.0));
1181c21e869SJeremy L Thompson return CEED_ERROR_SUCCESS;
1191c21e869SJeremy L Thompson }
1201c21e869SJeremy L Thompson
1219bc66399SJeremy L Thompson CeedCallBackend(CeedBasisGetCeed(basis, &ceed));
1229bc66399SJeremy L Thompson
123111870feSJeremy L Thompson // Check padded to uniform number of points per elem
124111870feSJeremy L Thompson for (CeedInt i = 1; i < num_elem; i++) max_num_points = CeedIntMax(max_num_points, num_points[i]);
125111870feSJeremy L Thompson {
126111870feSJeremy L Thompson CeedInt num_comp, q_comp;
127111870feSJeremy L Thompson CeedSize len, len_required;
128111870feSJeremy L Thompson
129111870feSJeremy L Thompson CeedCallBackend(CeedBasisGetNumComponents(basis, &num_comp));
130111870feSJeremy L Thompson CeedCallBackend(CeedBasisGetNumQuadratureComponents(basis, eval_mode, &q_comp));
131111870feSJeremy L Thompson CeedCallBackend(CeedVectorGetLength(is_transpose ? u : v, &len));
132111870feSJeremy L Thompson len_required = (CeedSize)num_comp * (CeedSize)q_comp * (CeedSize)num_elem * (CeedSize)max_num_points;
133111870feSJeremy L Thompson CeedCheck(len >= len_required, ceed, CEED_ERROR_BACKEND,
134111870feSJeremy L Thompson "Vector at points must be padded to the same number of points in each element for BasisApplyAtPoints on GPU backends."
135111870feSJeremy L Thompson " Found %" CeedSize_FMT ", Required %" CeedSize_FMT,
136111870feSJeremy L Thompson len, len_required);
137111870feSJeremy L Thompson }
138111870feSJeremy L Thompson
139111870feSJeremy L Thompson // Move num_points array to device
140111870feSJeremy L Thompson if (is_transpose) {
141111870feSJeremy L Thompson const CeedInt num_bytes = num_elem * sizeof(CeedInt);
142111870feSJeremy L Thompson
143111870feSJeremy L Thompson if (num_elem != data->num_elem_at_points) {
144111870feSJeremy L Thompson data->num_elem_at_points = num_elem;
145111870feSJeremy L Thompson
146111870feSJeremy L Thompson if (data->d_points_per_elem) CeedCallHip(ceed, hipFree(data->d_points_per_elem));
147111870feSJeremy L Thompson CeedCallHip(ceed, hipMalloc((void **)&data->d_points_per_elem, num_bytes));
148111870feSJeremy L Thompson CeedCallBackend(CeedFree(&data->h_points_per_elem));
149111870feSJeremy L Thompson CeedCallBackend(CeedCalloc(num_elem, &data->h_points_per_elem));
150111870feSJeremy L Thompson }
1519e511c80SJeremy L Thompson if (memcmp(data->h_points_per_elem, num_points, num_bytes)) {
152111870feSJeremy L Thompson memcpy(data->h_points_per_elem, num_points, num_bytes);
153111870feSJeremy L Thompson CeedCallHip(ceed, hipMemcpy(data->d_points_per_elem, num_points, num_bytes, hipMemcpyHostToDevice));
154111870feSJeremy L Thompson }
155111870feSJeremy L Thompson }
156111870feSJeremy L Thompson
1571c21e869SJeremy L Thompson // Build kernels if needed
1581c21e869SJeremy L Thompson if (data->num_points != max_num_points) {
1591c21e869SJeremy L Thompson CeedInt P_1d;
1601c21e869SJeremy L Thompson
1611c21e869SJeremy L Thompson CeedCallBackend(CeedBasisGetNumNodes1D(basis, &P_1d));
1621c21e869SJeremy L Thompson data->num_points = max_num_points;
1631c21e869SJeremy L Thompson
1641c21e869SJeremy L Thompson // -- Create interp matrix to Chebyshev coefficients
1651c21e869SJeremy L Thompson if (!data->d_chebyshev_interp_1d) {
1661c21e869SJeremy L Thompson CeedSize interp_bytes;
1671c21e869SJeremy L Thompson CeedScalar *chebyshev_interp_1d;
1681c21e869SJeremy L Thompson
1691c21e869SJeremy L Thompson interp_bytes = P_1d * Q_1d * sizeof(CeedScalar);
1701c21e869SJeremy L Thompson CeedCallBackend(CeedCalloc(P_1d * Q_1d, &chebyshev_interp_1d));
1715a5594ffSJeremy L Thompson CeedCallBackend(CeedBasisGetChebyshevInterp1D(basis, chebyshev_interp_1d));
1721c21e869SJeremy L Thompson CeedCallHip(ceed, hipMalloc((void **)&data->d_chebyshev_interp_1d, interp_bytes));
1731c21e869SJeremy L Thompson CeedCallHip(ceed, hipMemcpy(data->d_chebyshev_interp_1d, chebyshev_interp_1d, interp_bytes, hipMemcpyHostToDevice));
1741c21e869SJeremy L Thompson CeedCallBackend(CeedFree(&chebyshev_interp_1d));
1751c21e869SJeremy L Thompson }
1761c21e869SJeremy L Thompson
1771c21e869SJeremy L Thompson // -- Compile kernels
1789c25dd66SJeremy L Thompson const char basis_kernel_source[] = "// AtPoints basis source\n#include <ceed/jit-source/hip/hip-ref-basis-tensor-at-points.h>\n";
1791c21e869SJeremy L Thompson CeedInt num_comp;
1801c21e869SJeremy L Thompson
1811c21e869SJeremy L Thompson if (data->moduleAtPoints) CeedCallHip(ceed, hipModuleUnload(data->moduleAtPoints));
1821c21e869SJeremy L Thompson CeedCallBackend(CeedBasisGetNumComponents(basis, &num_comp));
1831c21e869SJeremy L Thompson CeedCallBackend(CeedCompile_Hip(ceed, basis_kernel_source, &data->moduleAtPoints, 9, "BASIS_Q_1D", Q_1d, "BASIS_P_1D", P_1d, "BASIS_BUF_LEN",
184f7c9815fSJeremy L Thompson Q_1d * CeedIntPow(Q_1d > P_1d ? Q_1d : P_1d, dim - 1), "BASIS_DIM", dim, "BASIS_NUM_COMP", num_comp,
1851c21e869SJeremy L Thompson "BASIS_NUM_NODES", CeedIntPow(P_1d, dim), "BASIS_NUM_QPTS", CeedIntPow(Q_1d, dim), "BASIS_NUM_PTS",
186f7c9815fSJeremy L Thompson max_num_points, "POINTS_BUFF_LEN", CeedIntPow(Q_1d, dim - 1)));
1871c21e869SJeremy L Thompson CeedCallBackend(CeedGetKernel_Hip(ceed, data->moduleAtPoints, "InterpAtPoints", &data->InterpAtPoints));
18881ae6159SJeremy L Thompson CeedCallBackend(CeedGetKernel_Hip(ceed, data->moduleAtPoints, "InterpTransposeAtPoints", &data->InterpTransposeAtPoints));
1891c21e869SJeremy L Thompson CeedCallBackend(CeedGetKernel_Hip(ceed, data->moduleAtPoints, "GradAtPoints", &data->GradAtPoints));
19081ae6159SJeremy L Thompson CeedCallBackend(CeedGetKernel_Hip(ceed, data->moduleAtPoints, "GradTransposeAtPoints", &data->GradTransposeAtPoints));
1911c21e869SJeremy L Thompson }
1921c21e869SJeremy L Thompson
1931c21e869SJeremy L Thompson // Get read/write access to u, v
1941c21e869SJeremy L Thompson CeedCallBackend(CeedVectorGetArrayRead(x_ref, CEED_MEM_DEVICE, &d_x));
1951c21e869SJeremy L Thompson if (u != CEED_VECTOR_NONE) CeedCallBackend(CeedVectorGetArrayRead(u, CEED_MEM_DEVICE, &d_u));
1961c21e869SJeremy L Thompson else CeedCheck(eval_mode == CEED_EVAL_WEIGHT, ceed, CEED_ERROR_BACKEND, "An input vector is required for this CeedEvalMode");
19711ac676fSZach Atkins if (apply_add) {
19811ac676fSZach Atkins CeedCallBackend(CeedVectorGetArray(v, CEED_MEM_DEVICE, &d_v));
19911ac676fSZach Atkins } else {
2001c21e869SJeremy L Thompson // Clear v for transpose operation
20111ac676fSZach Atkins if (is_transpose) CeedCallBackend(CeedVectorSetValue(v, 0.0));
20211ac676fSZach Atkins CeedCallBackend(CeedVectorGetArrayWrite(v, CEED_MEM_DEVICE, &d_v));
2031c21e869SJeremy L Thompson }
2041c21e869SJeremy L Thompson
2051c21e869SJeremy L Thompson // Basis action
2061c21e869SJeremy L Thompson switch (eval_mode) {
2071c21e869SJeremy L Thompson case CEED_EVAL_INTERP: {
20881ae6159SJeremy L Thompson void *interp_args[] = {(void *)&num_elem, &data->d_chebyshev_interp_1d, &data->d_points_per_elem, &d_x, &d_u, &d_v};
2091c21e869SJeremy L Thompson const CeedInt block_size = CeedIntMin(CeedIntPow(Q_1d, dim), max_block_size);
2101c21e869SJeremy L Thompson
2111a8516d0SJames Wright CeedCallBackend(CeedRunKernel_Hip(ceed, is_transpose ? data->InterpTransposeAtPoints : data->InterpAtPoints, num_elem, block_size,
2121a8516d0SJames Wright interp_args));
2131c21e869SJeremy L Thompson } break;
2141c21e869SJeremy L Thompson case CEED_EVAL_GRAD: {
21581ae6159SJeremy L Thompson void *grad_args[] = {(void *)&num_elem, &data->d_chebyshev_interp_1d, &data->d_points_per_elem, &d_x, &d_u, &d_v};
2162d10e82cSJeremy L Thompson const CeedInt block_size = CeedIntMin(CeedIntPow(Q_1d, dim), max_block_size);
2171c21e869SJeremy L Thompson
21881ae6159SJeremy L Thompson CeedCallBackend(CeedRunKernel_Hip(ceed, is_transpose ? data->GradTransposeAtPoints : data->GradAtPoints, num_elem, block_size, grad_args));
2191c21e869SJeremy L Thompson } break;
2201c21e869SJeremy L Thompson case CEED_EVAL_WEIGHT:
2211c21e869SJeremy L Thompson case CEED_EVAL_NONE: /* handled separately below */
2221c21e869SJeremy L Thompson break;
2231c21e869SJeremy L Thompson // LCOV_EXCL_START
2241c21e869SJeremy L Thompson case CEED_EVAL_DIV:
2251c21e869SJeremy L Thompson case CEED_EVAL_CURL:
2261c21e869SJeremy L Thompson return CeedError(ceed, CEED_ERROR_BACKEND, "%s not supported", CeedEvalModes[eval_mode]);
2271c21e869SJeremy L Thompson // LCOV_EXCL_STOP
2281c21e869SJeremy L Thompson }
2291c21e869SJeremy L Thompson
2301c21e869SJeremy L Thompson // Restore vectors, cover CEED_EVAL_NONE
2311c21e869SJeremy L Thompson CeedCallBackend(CeedVectorRestoreArrayRead(x_ref, &d_x));
2321c21e869SJeremy L Thompson CeedCallBackend(CeedVectorRestoreArray(v, &d_v));
2331c21e869SJeremy L Thompson if (eval_mode == CEED_EVAL_NONE) CeedCallBackend(CeedVectorSetArray(v, CEED_MEM_DEVICE, CEED_COPY_VALUES, (CeedScalar *)d_u));
2341c21e869SJeremy L Thompson if (eval_mode != CEED_EVAL_WEIGHT) CeedCallBackend(CeedVectorRestoreArrayRead(u, &d_u));
2359bc66399SJeremy L Thompson CeedCallBackend(CeedDestroy(&ceed));
2361c21e869SJeremy L Thompson return CEED_ERROR_SUCCESS;
2371c21e869SJeremy L Thompson }
2381c21e869SJeremy L Thompson
CeedBasisApplyAtPoints_Hip(CeedBasis basis,const CeedInt num_elem,const CeedInt * num_points,CeedTransposeMode t_mode,CeedEvalMode eval_mode,CeedVector x_ref,CeedVector u,CeedVector v)239db2becc9SJeremy L Thompson static int CeedBasisApplyAtPoints_Hip(CeedBasis basis, const CeedInt num_elem, const CeedInt *num_points, CeedTransposeMode t_mode,
240db2becc9SJeremy L Thompson CeedEvalMode eval_mode, CeedVector x_ref, CeedVector u, CeedVector v) {
241db2becc9SJeremy L Thompson CeedCallBackend(CeedBasisApplyAtPointsCore_Hip(basis, false, num_elem, num_points, t_mode, eval_mode, x_ref, u, v));
242db2becc9SJeremy L Thompson return CEED_ERROR_SUCCESS;
243db2becc9SJeremy L Thompson }
244db2becc9SJeremy L Thompson
CeedBasisApplyAddAtPoints_Hip(CeedBasis basis,const CeedInt num_elem,const CeedInt * num_points,CeedTransposeMode t_mode,CeedEvalMode eval_mode,CeedVector x_ref,CeedVector u,CeedVector v)245db2becc9SJeremy L Thompson static int CeedBasisApplyAddAtPoints_Hip(CeedBasis basis, const CeedInt num_elem, const CeedInt *num_points, CeedTransposeMode t_mode,
246db2becc9SJeremy L Thompson CeedEvalMode eval_mode, CeedVector x_ref, CeedVector u, CeedVector v) {
247db2becc9SJeremy L Thompson CeedCallBackend(CeedBasisApplyAtPointsCore_Hip(basis, true, num_elem, num_points, t_mode, eval_mode, x_ref, u, v));
248db2becc9SJeremy L Thompson return CEED_ERROR_SUCCESS;
249db2becc9SJeremy L Thompson }
250db2becc9SJeremy L Thompson
2511c21e869SJeremy L Thompson //------------------------------------------------------------------------------
2520d0321e0SJeremy L Thompson // Basis apply - non-tensor
2530d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
CeedBasisApplyNonTensorCore_Hip(CeedBasis basis,bool apply_add,const CeedInt num_elem,CeedTransposeMode t_mode,CeedEvalMode eval_mode,CeedVector u,CeedVector v)254db2becc9SJeremy L Thompson static int CeedBasisApplyNonTensorCore_Hip(CeedBasis basis, bool apply_add, const CeedInt num_elem, CeedTransposeMode t_mode, CeedEvalMode eval_mode,
255db2becc9SJeremy L Thompson CeedVector u, CeedVector v) {
2560d0321e0SJeremy L Thompson Ceed ceed;
257437930d1SJeremy L Thompson CeedInt num_nodes, num_qpts;
2587bbbfca3SJeremy L Thompson const CeedInt is_transpose = t_mode == CEED_TRANSPOSE;
259d075f50bSSebastian Grimberg const int elems_per_block = 1;
260d075f50bSSebastian Grimberg const int grid = CeedDivUpInt(num_elem, elems_per_block);
2610d0321e0SJeremy L Thompson const CeedScalar *d_u;
2620d0321e0SJeremy L Thompson CeedScalar *d_v;
263b7453713SJeremy L Thompson CeedBasisNonTensor_Hip *data;
264b7453713SJeremy L Thompson
265b7453713SJeremy L Thompson CeedCallBackend(CeedBasisGetCeed(basis, &ceed));
266b7453713SJeremy L Thompson CeedCallBackend(CeedBasisGetData(basis, &data));
267b7453713SJeremy L Thompson CeedCallBackend(CeedBasisGetNumQuadraturePoints(basis, &num_qpts));
268b7453713SJeremy L Thompson CeedCallBackend(CeedBasisGetNumNodes(basis, &num_nodes));
269b7453713SJeremy L Thompson
2709ea2cfd9SJeremy L Thompson // Get read/write access to u, v
2719ea2cfd9SJeremy L Thompson if (u != CEED_VECTOR_NONE) CeedCallBackend(CeedVectorGetArrayRead(u, CEED_MEM_DEVICE, &d_u));
2729ea2cfd9SJeremy L Thompson else CeedCheck(eval_mode == CEED_EVAL_WEIGHT, ceed, CEED_ERROR_BACKEND, "An input vector is required for this CeedEvalMode");
27311ac676fSZach Atkins if (apply_add) {
27411ac676fSZach Atkins CeedCallBackend(CeedVectorGetArray(v, CEED_MEM_DEVICE, &d_v));
27511ac676fSZach Atkins } else {
2760d0321e0SJeremy L Thompson // Clear v for transpose operation
27711ac676fSZach Atkins if (is_transpose) CeedCallBackend(CeedVectorSetValue(v, 0.0));
27811ac676fSZach Atkins CeedCallBackend(CeedVectorGetArrayWrite(v, CEED_MEM_DEVICE, &d_v));
2790d0321e0SJeremy L Thompson }
2800d0321e0SJeremy L Thompson
2810d0321e0SJeremy L Thompson // Apply basis operation
282437930d1SJeremy L Thompson switch (eval_mode) {
2830d0321e0SJeremy L Thompson case CEED_EVAL_INTERP: {
284d075f50bSSebastian Grimberg void *interp_args[] = {(void *)&num_elem, &data->d_interp, &d_u, &d_v};
2857bbbfca3SJeremy L Thompson const int block_size_x = is_transpose ? num_nodes : num_qpts;
286b2165e7aSSebastian Grimberg
2877bbbfca3SJeremy L Thompson if (is_transpose) {
288d075f50bSSebastian Grimberg CeedCallBackend(CeedRunKernelDim_Hip(ceed, data->InterpTranspose, grid, block_size_x, 1, elems_per_block, interp_args));
289d075f50bSSebastian Grimberg } else {
290b2165e7aSSebastian Grimberg CeedCallBackend(CeedRunKernelDim_Hip(ceed, data->Interp, grid, block_size_x, 1, elems_per_block, interp_args));
291d075f50bSSebastian Grimberg }
2920d0321e0SJeremy L Thompson } break;
2930d0321e0SJeremy L Thompson case CEED_EVAL_GRAD: {
294d075f50bSSebastian Grimberg void *grad_args[] = {(void *)&num_elem, &data->d_grad, &d_u, &d_v};
2957bbbfca3SJeremy L Thompson const int block_size_x = is_transpose ? num_nodes : num_qpts;
296b2165e7aSSebastian Grimberg
2977bbbfca3SJeremy L Thompson if (is_transpose) {
298d075f50bSSebastian Grimberg CeedCallBackend(CeedRunKernelDim_Hip(ceed, data->DerivTranspose, grid, block_size_x, 1, elems_per_block, grad_args));
299d075f50bSSebastian Grimberg } else {
300d075f50bSSebastian Grimberg CeedCallBackend(CeedRunKernelDim_Hip(ceed, data->Deriv, grid, block_size_x, 1, elems_per_block, grad_args));
301d075f50bSSebastian Grimberg }
302d075f50bSSebastian Grimberg } break;
303d075f50bSSebastian Grimberg case CEED_EVAL_DIV: {
304d075f50bSSebastian Grimberg void *div_args[] = {(void *)&num_elem, &data->d_div, &d_u, &d_v};
3057bbbfca3SJeremy L Thompson const int block_size_x = is_transpose ? num_nodes : num_qpts;
306d075f50bSSebastian Grimberg
3077bbbfca3SJeremy L Thompson if (is_transpose) {
308d075f50bSSebastian Grimberg CeedCallBackend(CeedRunKernelDim_Hip(ceed, data->DerivTranspose, grid, block_size_x, 1, elems_per_block, div_args));
309d075f50bSSebastian Grimberg } else {
310d075f50bSSebastian Grimberg CeedCallBackend(CeedRunKernelDim_Hip(ceed, data->Deriv, grid, block_size_x, 1, elems_per_block, div_args));
311d075f50bSSebastian Grimberg }
312d075f50bSSebastian Grimberg } break;
313d075f50bSSebastian Grimberg case CEED_EVAL_CURL: {
314d075f50bSSebastian Grimberg void *curl_args[] = {(void *)&num_elem, &data->d_curl, &d_u, &d_v};
3157bbbfca3SJeremy L Thompson const int block_size_x = is_transpose ? num_nodes : num_qpts;
316d075f50bSSebastian Grimberg
3177bbbfca3SJeremy L Thompson if (is_transpose) {
318d075f50bSSebastian Grimberg CeedCallBackend(CeedRunKernelDim_Hip(ceed, data->DerivTranspose, grid, block_size_x, 1, elems_per_block, curl_args));
319d075f50bSSebastian Grimberg } else {
320d075f50bSSebastian Grimberg CeedCallBackend(CeedRunKernelDim_Hip(ceed, data->Deriv, grid, block_size_x, 1, elems_per_block, curl_args));
321d075f50bSSebastian Grimberg }
3220d0321e0SJeremy L Thompson } break;
3230d0321e0SJeremy L Thompson case CEED_EVAL_WEIGHT: {
324097cc795SJames Wright CeedCheck(data->d_q_weight, ceed, CEED_ERROR_BACKEND, "%s not supported; q_weights not set", CeedEvalModes[eval_mode]);
325437930d1SJeremy L Thompson void *weight_args[] = {(void *)&num_elem, (void *)&data->d_q_weight, &d_v};
326b2165e7aSSebastian Grimberg
327b2165e7aSSebastian Grimberg CeedCallBackend(CeedRunKernelDim_Hip(ceed, data->Weight, grid, num_qpts, 1, elems_per_block, weight_args));
3280d0321e0SJeremy L Thompson } break;
3299ea2cfd9SJeremy L Thompson case CEED_EVAL_NONE: /* handled separately below */
3309ea2cfd9SJeremy L Thompson break;
3310d0321e0SJeremy L Thompson }
3320d0321e0SJeremy L Thompson
3339ea2cfd9SJeremy L Thompson // Restore vectors, cover CEED_EVAL_NONE
3342b730f8bSJeremy L Thompson CeedCallBackend(CeedVectorRestoreArray(v, &d_v));
3359ea2cfd9SJeremy L Thompson if (eval_mode == CEED_EVAL_NONE) CeedCallBackend(CeedVectorSetArray(v, CEED_MEM_DEVICE, CEED_COPY_VALUES, (CeedScalar *)d_u));
3369ea2cfd9SJeremy L Thompson if (eval_mode != CEED_EVAL_WEIGHT) CeedCallBackend(CeedVectorRestoreArrayRead(u, &d_u));
3379bc66399SJeremy L Thompson CeedCallBackend(CeedDestroy(&ceed));
3380d0321e0SJeremy L Thompson return CEED_ERROR_SUCCESS;
3390d0321e0SJeremy L Thompson }
3400d0321e0SJeremy L Thompson
CeedBasisApplyNonTensor_Hip(CeedBasis basis,const CeedInt num_elem,CeedTransposeMode t_mode,CeedEvalMode eval_mode,CeedVector u,CeedVector v)341db2becc9SJeremy L Thompson static int CeedBasisApplyNonTensor_Hip(CeedBasis basis, const CeedInt num_elem, CeedTransposeMode t_mode, CeedEvalMode eval_mode, CeedVector u,
342db2becc9SJeremy L Thompson CeedVector v) {
343db2becc9SJeremy L Thompson CeedCallBackend(CeedBasisApplyNonTensorCore_Hip(basis, false, num_elem, t_mode, eval_mode, u, v));
344db2becc9SJeremy L Thompson return CEED_ERROR_SUCCESS;
345db2becc9SJeremy L Thompson }
346db2becc9SJeremy L Thompson
CeedBasisApplyAddNonTensor_Hip(CeedBasis basis,const CeedInt num_elem,CeedTransposeMode t_mode,CeedEvalMode eval_mode,CeedVector u,CeedVector v)347db2becc9SJeremy L Thompson static int CeedBasisApplyAddNonTensor_Hip(CeedBasis basis, const CeedInt num_elem, CeedTransposeMode t_mode, CeedEvalMode eval_mode, CeedVector u,
348db2becc9SJeremy L Thompson CeedVector v) {
349db2becc9SJeremy L Thompson CeedCallBackend(CeedBasisApplyNonTensorCore_Hip(basis, true, num_elem, t_mode, eval_mode, u, v));
350db2becc9SJeremy L Thompson return CEED_ERROR_SUCCESS;
351db2becc9SJeremy L Thompson }
352db2becc9SJeremy L Thompson
3530d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
3540d0321e0SJeremy L Thompson // Destroy tensor basis
3550d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
CeedBasisDestroy_Hip(CeedBasis basis)3560d0321e0SJeremy L Thompson static int CeedBasisDestroy_Hip(CeedBasis basis) {
3570d0321e0SJeremy L Thompson Ceed ceed;
3580d0321e0SJeremy L Thompson CeedBasis_Hip *data;
359b7453713SJeremy L Thompson
360b7453713SJeremy L Thompson CeedCallBackend(CeedBasisGetCeed(basis, &ceed));
3612b730f8bSJeremy L Thompson CeedCallBackend(CeedBasisGetData(basis, &data));
3622b730f8bSJeremy L Thompson CeedCallHip(ceed, hipModuleUnload(data->module));
3631c21e869SJeremy L Thompson if (data->moduleAtPoints) CeedCallHip(ceed, hipModuleUnload(data->moduleAtPoints));
364097cc795SJames Wright if (data->d_q_weight_1d) CeedCallHip(ceed, hipFree(data->d_q_weight_1d));
365111870feSJeremy L Thompson CeedCallBackend(CeedFree(&data->h_points_per_elem));
366111870feSJeremy L Thompson if (data->d_points_per_elem) CeedCallHip(ceed, hipFree(data->d_points_per_elem));
3672b730f8bSJeremy L Thompson CeedCallHip(ceed, hipFree(data->d_interp_1d));
3682b730f8bSJeremy L Thompson CeedCallHip(ceed, hipFree(data->d_grad_1d));
3691c21e869SJeremy L Thompson CeedCallHip(ceed, hipFree(data->d_chebyshev_interp_1d));
3702b730f8bSJeremy L Thompson CeedCallBackend(CeedFree(&data));
3719bc66399SJeremy L Thompson CeedCallBackend(CeedDestroy(&ceed));
3720d0321e0SJeremy L Thompson return CEED_ERROR_SUCCESS;
3730d0321e0SJeremy L Thompson }
3740d0321e0SJeremy L Thompson
3750d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
3760d0321e0SJeremy L Thompson // Destroy non-tensor basis
3770d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
CeedBasisDestroyNonTensor_Hip(CeedBasis basis)3780d0321e0SJeremy L Thompson static int CeedBasisDestroyNonTensor_Hip(CeedBasis basis) {
3790d0321e0SJeremy L Thompson Ceed ceed;
3800d0321e0SJeremy L Thompson CeedBasisNonTensor_Hip *data;
381b7453713SJeremy L Thompson
382b7453713SJeremy L Thompson CeedCallBackend(CeedBasisGetCeed(basis, &ceed));
3832b730f8bSJeremy L Thompson CeedCallBackend(CeedBasisGetData(basis, &data));
3842b730f8bSJeremy L Thompson CeedCallHip(ceed, hipModuleUnload(data->module));
385097cc795SJames Wright if (data->d_q_weight) CeedCallHip(ceed, hipFree(data->d_q_weight));
3862b730f8bSJeremy L Thompson CeedCallHip(ceed, hipFree(data->d_interp));
3872b730f8bSJeremy L Thompson CeedCallHip(ceed, hipFree(data->d_grad));
388d075f50bSSebastian Grimberg CeedCallHip(ceed, hipFree(data->d_div));
389d075f50bSSebastian Grimberg CeedCallHip(ceed, hipFree(data->d_curl));
3902b730f8bSJeremy L Thompson CeedCallBackend(CeedFree(&data));
3919bc66399SJeremy L Thompson CeedCallBackend(CeedDestroy(&ceed));
3920d0321e0SJeremy L Thompson return CEED_ERROR_SUCCESS;
3930d0321e0SJeremy L Thompson }
3940d0321e0SJeremy L Thompson
3950d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
3960d0321e0SJeremy L Thompson // Create tensor
3970d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
CeedBasisCreateTensorH1_Hip(CeedInt dim,CeedInt P_1d,CeedInt Q_1d,const CeedScalar * interp_1d,const CeedScalar * grad_1d,const CeedScalar * q_ref_1d,const CeedScalar * q_weight_1d,CeedBasis basis)3982b730f8bSJeremy L Thompson int CeedBasisCreateTensorH1_Hip(CeedInt dim, CeedInt P_1d, CeedInt Q_1d, const CeedScalar *interp_1d, const CeedScalar *grad_1d,
3996574a04fSJeremy L Thompson const CeedScalar *q_ref_1d, const CeedScalar *q_weight_1d, CeedBasis basis) {
4000d0321e0SJeremy L Thompson Ceed ceed;
401b7453713SJeremy L Thompson CeedInt num_comp;
402b7453713SJeremy L Thompson const CeedInt q_bytes = Q_1d * sizeof(CeedScalar);
403b7453713SJeremy L Thompson const CeedInt interp_bytes = q_bytes * P_1d;
4040d0321e0SJeremy L Thompson CeedBasis_Hip *data;
405b7453713SJeremy L Thompson
406b7453713SJeremy L Thompson CeedCallBackend(CeedBasisGetCeed(basis, &ceed));
4072b730f8bSJeremy L Thompson CeedCallBackend(CeedCalloc(1, &data));
4080d0321e0SJeremy L Thompson
4090d0321e0SJeremy L Thompson // Copy data to GPU
410097cc795SJames Wright if (q_weight_1d) {
4112b730f8bSJeremy L Thompson CeedCallHip(ceed, hipMalloc((void **)&data->d_q_weight_1d, q_bytes));
4122b730f8bSJeremy L Thompson CeedCallHip(ceed, hipMemcpy(data->d_q_weight_1d, q_weight_1d, q_bytes, hipMemcpyHostToDevice));
413097cc795SJames Wright }
4142b730f8bSJeremy L Thompson CeedCallHip(ceed, hipMalloc((void **)&data->d_interp_1d, interp_bytes));
4152b730f8bSJeremy L Thompson CeedCallHip(ceed, hipMemcpy(data->d_interp_1d, interp_1d, interp_bytes, hipMemcpyHostToDevice));
4162b730f8bSJeremy L Thompson CeedCallHip(ceed, hipMalloc((void **)&data->d_grad_1d, interp_bytes));
4172b730f8bSJeremy L Thompson CeedCallHip(ceed, hipMemcpy(data->d_grad_1d, grad_1d, interp_bytes, hipMemcpyHostToDevice));
4180d0321e0SJeremy L Thompson
419ecc88aebSJeremy L Thompson // Compile basis kernels
4209c25dd66SJeremy L Thompson const char basis_kernel_source[] = "// Tensor basis source\n#include <ceed/jit-source/hip/hip-ref-basis-tensor.h>\n";
4219c25dd66SJeremy L Thompson
422b7453713SJeremy L Thompson CeedCallBackend(CeedBasisGetNumComponents(basis, &num_comp));
423eb7e6cafSJeremy L Thompson CeedCallBackend(CeedCompile_Hip(ceed, basis_kernel_source, &data->module, 7, "BASIS_Q_1D", Q_1d, "BASIS_P_1D", P_1d, "BASIS_BUF_LEN",
424f7c9815fSJeremy L Thompson Q_1d * CeedIntPow(Q_1d > P_1d ? Q_1d : P_1d, dim - 1), "BASIS_DIM", dim, "BASIS_NUM_COMP", num_comp,
425b7453713SJeremy L Thompson "BASIS_NUM_NODES", CeedIntPow(P_1d, dim), "BASIS_NUM_QPTS", CeedIntPow(Q_1d, dim)));
426eb7e6cafSJeremy L Thompson CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, "Interp", &data->Interp));
427eb7e6cafSJeremy L Thompson CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, "Grad", &data->Grad));
428eb7e6cafSJeremy L Thompson CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, "Weight", &data->Weight));
429437930d1SJeremy L Thompson
4302b730f8bSJeremy L Thompson CeedCallBackend(CeedBasisSetData(basis, data));
4310d0321e0SJeremy L Thompson
432d075f50bSSebastian Grimberg // Register backend functions
4332b730f8bSJeremy L Thompson CeedCallBackend(CeedSetBackendFunction(ceed, "Basis", basis, "Apply", CeedBasisApply_Hip));
434db2becc9SJeremy L Thompson CeedCallBackend(CeedSetBackendFunction(ceed, "Basis", basis, "ApplyAdd", CeedBasisApplyAdd_Hip));
4351c21e869SJeremy L Thompson CeedCallBackend(CeedSetBackendFunction(ceed, "Basis", basis, "ApplyAtPoints", CeedBasisApplyAtPoints_Hip));
436db2becc9SJeremy L Thompson CeedCallBackend(CeedSetBackendFunction(ceed, "Basis", basis, "ApplyAddAtPoints", CeedBasisApplyAddAtPoints_Hip));
4372b730f8bSJeremy L Thompson CeedCallBackend(CeedSetBackendFunction(ceed, "Basis", basis, "Destroy", CeedBasisDestroy_Hip));
4389bc66399SJeremy L Thompson CeedCallBackend(CeedDestroy(&ceed));
4390d0321e0SJeremy L Thompson return CEED_ERROR_SUCCESS;
4400d0321e0SJeremy L Thompson }
4410d0321e0SJeremy L Thompson
4420d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
443d075f50bSSebastian Grimberg // Create non-tensor H^1
4440d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
CeedBasisCreateH1_Hip(CeedElemTopology topo,CeedInt dim,CeedInt num_nodes,CeedInt num_qpts,const CeedScalar * interp,const CeedScalar * grad,const CeedScalar * q_ref,const CeedScalar * q_weight,CeedBasis basis)4452b730f8bSJeremy L Thompson int CeedBasisCreateH1_Hip(CeedElemTopology topo, CeedInt dim, CeedInt num_nodes, CeedInt num_qpts, const CeedScalar *interp, const CeedScalar *grad,
44651475c7cSJeremy L Thompson const CeedScalar *q_ref, const CeedScalar *q_weight, CeedBasis basis) {
4470d0321e0SJeremy L Thompson Ceed ceed;
448d075f50bSSebastian Grimberg CeedInt num_comp, q_comp_interp, q_comp_grad;
449b7453713SJeremy L Thompson const CeedInt q_bytes = num_qpts * sizeof(CeedScalar);
4500d0321e0SJeremy L Thompson CeedBasisNonTensor_Hip *data;
451b7453713SJeremy L Thompson
452b7453713SJeremy L Thompson CeedCallBackend(CeedBasisGetCeed(basis, &ceed));
4532b730f8bSJeremy L Thompson CeedCallBackend(CeedCalloc(1, &data));
4540d0321e0SJeremy L Thompson
4550d0321e0SJeremy L Thompson // Copy basis data to GPU
456d075f50bSSebastian Grimberg CeedCallBackend(CeedBasisGetNumQuadratureComponents(basis, CEED_EVAL_INTERP, &q_comp_interp));
457d075f50bSSebastian Grimberg CeedCallBackend(CeedBasisGetNumQuadratureComponents(basis, CEED_EVAL_GRAD, &q_comp_grad));
458097cc795SJames Wright if (q_weight) {
4592b730f8bSJeremy L Thompson CeedCallHip(ceed, hipMalloc((void **)&data->d_q_weight, q_bytes));
4602b730f8bSJeremy L Thompson CeedCallHip(ceed, hipMemcpy(data->d_q_weight, q_weight, q_bytes, hipMemcpyHostToDevice));
461097cc795SJames Wright }
462d075f50bSSebastian Grimberg if (interp) {
463d075f50bSSebastian Grimberg const CeedInt interp_bytes = q_bytes * num_nodes * q_comp_interp;
464d075f50bSSebastian Grimberg
4652b730f8bSJeremy L Thompson CeedCallHip(ceed, hipMalloc((void **)&data->d_interp, interp_bytes));
4662b730f8bSJeremy L Thompson CeedCallHip(ceed, hipMemcpy(data->d_interp, interp, interp_bytes, hipMemcpyHostToDevice));
467d075f50bSSebastian Grimberg }
468d075f50bSSebastian Grimberg if (grad) {
469d075f50bSSebastian Grimberg const CeedInt grad_bytes = q_bytes * num_nodes * q_comp_grad;
470d075f50bSSebastian Grimberg
4712b730f8bSJeremy L Thompson CeedCallHip(ceed, hipMalloc((void **)&data->d_grad, grad_bytes));
4722b730f8bSJeremy L Thompson CeedCallHip(ceed, hipMemcpy(data->d_grad, grad, grad_bytes, hipMemcpyHostToDevice));
473d075f50bSSebastian Grimberg }
4740d0321e0SJeremy L Thompson
4750d0321e0SJeremy L Thompson // Compile basis kernels
4769c25dd66SJeremy L Thompson const char basis_kernel_source[] = "// Nontensor basis source\n#include <ceed/jit-source/hip/hip-ref-basis-nontensor.h>\n";
4779c25dd66SJeremy L Thompson
478b7453713SJeremy L Thompson CeedCallBackend(CeedBasisGetNumComponents(basis, &num_comp));
479d075f50bSSebastian Grimberg CeedCallBackend(CeedCompile_Hip(ceed, basis_kernel_source, &data->module, 5, "BASIS_Q", num_qpts, "BASIS_P", num_nodes, "BASIS_Q_COMP_INTERP",
480d075f50bSSebastian Grimberg q_comp_interp, "BASIS_Q_COMP_DERIV", q_comp_grad, "BASIS_NUM_COMP", num_comp));
481eb7e6cafSJeremy L Thompson CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, "Interp", &data->Interp));
482d075f50bSSebastian Grimberg CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, "InterpTranspose", &data->InterpTranspose));
483d075f50bSSebastian Grimberg CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, "Deriv", &data->Deriv));
484d075f50bSSebastian Grimberg CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, "DerivTranspose", &data->DerivTranspose));
485eb7e6cafSJeremy L Thompson CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, "Weight", &data->Weight));
486d075f50bSSebastian Grimberg
487d075f50bSSebastian Grimberg CeedCallBackend(CeedBasisSetData(basis, data));
488d075f50bSSebastian Grimberg
489d075f50bSSebastian Grimberg // Register backend functions
490d075f50bSSebastian Grimberg CeedCallBackend(CeedSetBackendFunction(ceed, "Basis", basis, "Apply", CeedBasisApplyNonTensor_Hip));
491db2becc9SJeremy L Thompson CeedCallBackend(CeedSetBackendFunction(ceed, "Basis", basis, "ApplyAdd", CeedBasisApplyAddNonTensor_Hip));
492d075f50bSSebastian Grimberg CeedCallBackend(CeedSetBackendFunction(ceed, "Basis", basis, "Destroy", CeedBasisDestroyNonTensor_Hip));
4939bc66399SJeremy L Thompson CeedCallBackend(CeedDestroy(&ceed));
494d075f50bSSebastian Grimberg return CEED_ERROR_SUCCESS;
495d075f50bSSebastian Grimberg }
496d075f50bSSebastian Grimberg
497d075f50bSSebastian Grimberg //------------------------------------------------------------------------------
498d075f50bSSebastian Grimberg // Create non-tensor H(div)
499d075f50bSSebastian Grimberg //------------------------------------------------------------------------------
CeedBasisCreateHdiv_Hip(CeedElemTopology topo,CeedInt dim,CeedInt num_nodes,CeedInt num_qpts,const CeedScalar * interp,const CeedScalar * div,const CeedScalar * q_ref,const CeedScalar * q_weight,CeedBasis basis)500d075f50bSSebastian Grimberg int CeedBasisCreateHdiv_Hip(CeedElemTopology topo, CeedInt dim, CeedInt num_nodes, CeedInt num_qpts, const CeedScalar *interp, const CeedScalar *div,
501d075f50bSSebastian Grimberg const CeedScalar *q_ref, const CeedScalar *q_weight, CeedBasis basis) {
502d075f50bSSebastian Grimberg Ceed ceed;
503d075f50bSSebastian Grimberg CeedInt num_comp, q_comp_interp, q_comp_div;
504d075f50bSSebastian Grimberg const CeedInt q_bytes = num_qpts * sizeof(CeedScalar);
505d075f50bSSebastian Grimberg CeedBasisNonTensor_Hip *data;
506d075f50bSSebastian Grimberg
507d075f50bSSebastian Grimberg CeedCallBackend(CeedBasisGetCeed(basis, &ceed));
508d075f50bSSebastian Grimberg CeedCallBackend(CeedCalloc(1, &data));
509d075f50bSSebastian Grimberg
510d075f50bSSebastian Grimberg // Copy basis data to GPU
511d075f50bSSebastian Grimberg CeedCallBackend(CeedBasisGetNumQuadratureComponents(basis, CEED_EVAL_INTERP, &q_comp_interp));
512d075f50bSSebastian Grimberg CeedCallBackend(CeedBasisGetNumQuadratureComponents(basis, CEED_EVAL_DIV, &q_comp_div));
513097cc795SJames Wright if (q_weight) {
514d075f50bSSebastian Grimberg CeedCallHip(ceed, hipMalloc((void **)&data->d_q_weight, q_bytes));
515d075f50bSSebastian Grimberg CeedCallHip(ceed, hipMemcpy(data->d_q_weight, q_weight, q_bytes, hipMemcpyHostToDevice));
516097cc795SJames Wright }
517d075f50bSSebastian Grimberg if (interp) {
518d075f50bSSebastian Grimberg const CeedInt interp_bytes = q_bytes * num_nodes * q_comp_interp;
519d075f50bSSebastian Grimberg
520d075f50bSSebastian Grimberg CeedCallHip(ceed, hipMalloc((void **)&data->d_interp, interp_bytes));
521d075f50bSSebastian Grimberg CeedCallHip(ceed, hipMemcpy(data->d_interp, interp, interp_bytes, hipMemcpyHostToDevice));
522d075f50bSSebastian Grimberg }
523d075f50bSSebastian Grimberg if (div) {
524d075f50bSSebastian Grimberg const CeedInt div_bytes = q_bytes * num_nodes * q_comp_div;
525d075f50bSSebastian Grimberg
526d075f50bSSebastian Grimberg CeedCallHip(ceed, hipMalloc((void **)&data->d_div, div_bytes));
527d075f50bSSebastian Grimberg CeedCallHip(ceed, hipMemcpy(data->d_div, div, div_bytes, hipMemcpyHostToDevice));
528d075f50bSSebastian Grimberg }
529d075f50bSSebastian Grimberg
530d075f50bSSebastian Grimberg // Compile basis kernels
5319c25dd66SJeremy L Thompson const char basis_kernel_source[] = "// Nontensor basis source\n#include <ceed/jit-source/hip/hip-ref-basis-nontensor.h>\n";
5329c25dd66SJeremy L Thompson
533d075f50bSSebastian Grimberg CeedCallBackend(CeedBasisGetNumComponents(basis, &num_comp));
534d075f50bSSebastian Grimberg CeedCallBackend(CeedCompile_Hip(ceed, basis_kernel_source, &data->module, 5, "BASIS_Q", num_qpts, "BASIS_P", num_nodes, "BASIS_Q_COMP_INTERP",
535d075f50bSSebastian Grimberg q_comp_interp, "BASIS_Q_COMP_DERIV", q_comp_div, "BASIS_NUM_COMP", num_comp));
536d075f50bSSebastian Grimberg CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, "Interp", &data->Interp));
537d075f50bSSebastian Grimberg CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, "InterpTranspose", &data->InterpTranspose));
538d075f50bSSebastian Grimberg CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, "Deriv", &data->Deriv));
539d075f50bSSebastian Grimberg CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, "DerivTranspose", &data->DerivTranspose));
540d075f50bSSebastian Grimberg CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, "Weight", &data->Weight));
541d075f50bSSebastian Grimberg
542d075f50bSSebastian Grimberg CeedCallBackend(CeedBasisSetData(basis, data));
543d075f50bSSebastian Grimberg
544d075f50bSSebastian Grimberg // Register backend functions
545d075f50bSSebastian Grimberg CeedCallBackend(CeedSetBackendFunction(ceed, "Basis", basis, "Apply", CeedBasisApplyNonTensor_Hip));
546db2becc9SJeremy L Thompson CeedCallBackend(CeedSetBackendFunction(ceed, "Basis", basis, "ApplyAdd", CeedBasisApplyAddNonTensor_Hip));
547d075f50bSSebastian Grimberg CeedCallBackend(CeedSetBackendFunction(ceed, "Basis", basis, "Destroy", CeedBasisDestroyNonTensor_Hip));
5489bc66399SJeremy L Thompson CeedCallBackend(CeedDestroy(&ceed));
549d075f50bSSebastian Grimberg return CEED_ERROR_SUCCESS;
550d075f50bSSebastian Grimberg }
551d075f50bSSebastian Grimberg
552d075f50bSSebastian Grimberg //------------------------------------------------------------------------------
553d075f50bSSebastian Grimberg // Create non-tensor H(curl)
554d075f50bSSebastian Grimberg //------------------------------------------------------------------------------
CeedBasisCreateHcurl_Hip(CeedElemTopology topo,CeedInt dim,CeedInt num_nodes,CeedInt num_qpts,const CeedScalar * interp,const CeedScalar * curl,const CeedScalar * q_ref,const CeedScalar * q_weight,CeedBasis basis)555d075f50bSSebastian Grimberg int CeedBasisCreateHcurl_Hip(CeedElemTopology topo, CeedInt dim, CeedInt num_nodes, CeedInt num_qpts, const CeedScalar *interp,
556d075f50bSSebastian Grimberg const CeedScalar *curl, const CeedScalar *q_ref, const CeedScalar *q_weight, CeedBasis basis) {
557d075f50bSSebastian Grimberg Ceed ceed;
558d075f50bSSebastian Grimberg CeedInt num_comp, q_comp_interp, q_comp_curl;
559d075f50bSSebastian Grimberg const CeedInt q_bytes = num_qpts * sizeof(CeedScalar);
560d075f50bSSebastian Grimberg CeedBasisNonTensor_Hip *data;
561d075f50bSSebastian Grimberg
562d075f50bSSebastian Grimberg CeedCallBackend(CeedBasisGetCeed(basis, &ceed));
563d075f50bSSebastian Grimberg CeedCallBackend(CeedCalloc(1, &data));
564d075f50bSSebastian Grimberg
565d075f50bSSebastian Grimberg // Copy basis data to GPU
566d075f50bSSebastian Grimberg CeedCallBackend(CeedBasisGetNumQuadratureComponents(basis, CEED_EVAL_INTERP, &q_comp_interp));
567d075f50bSSebastian Grimberg CeedCallBackend(CeedBasisGetNumQuadratureComponents(basis, CEED_EVAL_CURL, &q_comp_curl));
568097cc795SJames Wright if (q_weight) {
569d075f50bSSebastian Grimberg CeedCallHip(ceed, hipMalloc((void **)&data->d_q_weight, q_bytes));
570d075f50bSSebastian Grimberg CeedCallHip(ceed, hipMemcpy(data->d_q_weight, q_weight, q_bytes, hipMemcpyHostToDevice));
571097cc795SJames Wright }
572d075f50bSSebastian Grimberg if (interp) {
573d075f50bSSebastian Grimberg const CeedInt interp_bytes = q_bytes * num_nodes * q_comp_interp;
574d075f50bSSebastian Grimberg
575d075f50bSSebastian Grimberg CeedCallHip(ceed, hipMalloc((void **)&data->d_interp, interp_bytes));
576d075f50bSSebastian Grimberg CeedCallHip(ceed, hipMemcpy(data->d_interp, interp, interp_bytes, hipMemcpyHostToDevice));
577d075f50bSSebastian Grimberg }
578d075f50bSSebastian Grimberg if (curl) {
579d075f50bSSebastian Grimberg const CeedInt curl_bytes = q_bytes * num_nodes * q_comp_curl;
580d075f50bSSebastian Grimberg
581d075f50bSSebastian Grimberg CeedCallHip(ceed, hipMalloc((void **)&data->d_curl, curl_bytes));
582d075f50bSSebastian Grimberg CeedCallHip(ceed, hipMemcpy(data->d_curl, curl, curl_bytes, hipMemcpyHostToDevice));
583d075f50bSSebastian Grimberg }
584d075f50bSSebastian Grimberg
585d075f50bSSebastian Grimberg // Compile basis kernels
5869c25dd66SJeremy L Thompson const char basis_kernel_source[] = "// Nontensor basis source\n#include <ceed/jit-source/hip/hip-ref-basis-nontensor.h>\n";
5879c25dd66SJeremy L Thompson
588d075f50bSSebastian Grimberg CeedCallBackend(CeedBasisGetNumComponents(basis, &num_comp));
589d075f50bSSebastian Grimberg CeedCallBackend(CeedCompile_Hip(ceed, basis_kernel_source, &data->module, 5, "BASIS_Q", num_qpts, "BASIS_P", num_nodes, "BASIS_Q_COMP_INTERP",
590d075f50bSSebastian Grimberg q_comp_interp, "BASIS_Q_COMP_DERIV", q_comp_curl, "BASIS_NUM_COMP", num_comp));
591d075f50bSSebastian Grimberg CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, "Interp", &data->Interp));
592d075f50bSSebastian Grimberg CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, "InterpTranspose", &data->InterpTranspose));
593d075f50bSSebastian Grimberg CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, "Deriv", &data->Deriv));
594d075f50bSSebastian Grimberg CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, "DerivTranspose", &data->DerivTranspose));
595d075f50bSSebastian Grimberg CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, "Weight", &data->Weight));
596d075f50bSSebastian Grimberg
5972b730f8bSJeremy L Thompson CeedCallBackend(CeedBasisSetData(basis, data));
5980d0321e0SJeremy L Thompson
5990d0321e0SJeremy L Thompson // Register backend functions
6002b730f8bSJeremy L Thompson CeedCallBackend(CeedSetBackendFunction(ceed, "Basis", basis, "Apply", CeedBasisApplyNonTensor_Hip));
601db2becc9SJeremy L Thompson CeedCallBackend(CeedSetBackendFunction(ceed, "Basis", basis, "ApplyAdd", CeedBasisApplyAddNonTensor_Hip));
6022b730f8bSJeremy L Thompson CeedCallBackend(CeedSetBackendFunction(ceed, "Basis", basis, "Destroy", CeedBasisDestroyNonTensor_Hip));
6039bc66399SJeremy L Thompson CeedCallBackend(CeedDestroy(&ceed));
6040d0321e0SJeremy L Thompson return CEED_ERROR_SUCCESS;
6050d0321e0SJeremy L Thompson }
6062a86cc9dSSebastian Grimberg
6070d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
608