xref: /libCEED/rust/libceed-sys/c-src/backends/hip-ref/ceed-hip-ref-basis.c (revision d4cc18453651bd0f94c1a2e078b2646a92dafdcc)
1*9ba83ac0SJeremy L Thompson // Copyright (c) 2017-2026, Lawrence Livermore National Security, LLC and other CEED contributors.
23d8e8822SJeremy L Thompson // All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
30d0321e0SJeremy L Thompson //
43d8e8822SJeremy L Thompson // SPDX-License-Identifier: BSD-2-Clause
50d0321e0SJeremy L Thompson //
63d8e8822SJeremy L Thompson // This file is part of CEED:  http://github.com/ceed
70d0321e0SJeremy L Thompson 
849aac155SJeremy L Thompson #include <ceed.h>
90d0321e0SJeremy L Thompson #include <ceed/backend.h>
10437930d1SJeremy L Thompson #include <ceed/jit-tools.h>
11111870feSJeremy L Thompson #include <string.h>
120d0321e0SJeremy L Thompson #include <hip/hip_runtime.h>
132b730f8bSJeremy L Thompson 
1449aac155SJeremy L Thompson #include "../hip/ceed-hip-common.h"
150d0321e0SJeremy L Thompson #include "../hip/ceed-hip-compile.h"
162b730f8bSJeremy L Thompson #include "ceed-hip-ref.h"
170d0321e0SJeremy L Thompson 
180d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
190d0321e0SJeremy L Thompson // Basis apply - tensor
200d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
CeedBasisApplyCore_Hip(CeedBasis basis,bool apply_add,const CeedInt num_elem,CeedTransposeMode t_mode,CeedEvalMode eval_mode,CeedVector u,CeedVector v)21db2becc9SJeremy L Thompson static int CeedBasisApplyCore_Hip(CeedBasis basis, bool apply_add, const CeedInt num_elem, CeedTransposeMode t_mode, CeedEvalMode eval_mode,
22db2becc9SJeremy L Thompson                                   CeedVector u, CeedVector v) {
230d0321e0SJeremy L Thompson   Ceed              ceed;
24b7453713SJeremy L Thompson   CeedInt           Q_1d, dim;
257bbbfca3SJeremy L Thompson   const CeedInt     is_transpose   = t_mode == CEED_TRANSPOSE;
26437930d1SJeremy L Thompson   const int         max_block_size = 64;
270d0321e0SJeremy L Thompson   const CeedScalar *d_u;
280d0321e0SJeremy L Thompson   CeedScalar       *d_v;
29b7453713SJeremy L Thompson   CeedBasis_Hip    *data;
30b7453713SJeremy L Thompson 
31b7453713SJeremy L Thompson   CeedCallBackend(CeedBasisGetCeed(basis, &ceed));
32b7453713SJeremy L Thompson   CeedCallBackend(CeedBasisGetData(basis, &data));
33b7453713SJeremy L Thompson 
349ea2cfd9SJeremy L Thompson   // Get read/write access to u, v
356574a04fSJeremy L Thompson   if (u != CEED_VECTOR_NONE) CeedCallBackend(CeedVectorGetArrayRead(u, CEED_MEM_DEVICE, &d_u));
366574a04fSJeremy L Thompson   else CeedCheck(eval_mode == CEED_EVAL_WEIGHT, ceed, CEED_ERROR_BACKEND, "An input vector is required for this CeedEvalMode");
3711ac676fSZach Atkins   if (apply_add) {
3811ac676fSZach Atkins     CeedCallBackend(CeedVectorGetArray(v, CEED_MEM_DEVICE, &d_v));
3911ac676fSZach Atkins   } else {
400d0321e0SJeremy L Thompson     // Clear v for transpose operation
4111ac676fSZach Atkins     if (is_transpose) CeedCallBackend(CeedVectorSetValue(v, 0.0));
4211ac676fSZach Atkins     CeedCallBackend(CeedVectorGetArrayWrite(v, CEED_MEM_DEVICE, &d_v));
430d0321e0SJeremy L Thompson   }
4411ac676fSZach Atkins 
45b2165e7aSSebastian Grimberg   CeedCallBackend(CeedBasisGetNumQuadraturePoints1D(basis, &Q_1d));
46b2165e7aSSebastian Grimberg   CeedCallBackend(CeedBasisGetDimension(basis, &dim));
470d0321e0SJeremy L Thompson 
480d0321e0SJeremy L Thompson   // Basis action
49437930d1SJeremy L Thompson   switch (eval_mode) {
500d0321e0SJeremy L Thompson     case CEED_EVAL_INTERP: {
517bbbfca3SJeremy L Thompson       void         *interp_args[] = {(void *)&num_elem, (void *)&is_transpose, &data->d_interp_1d, &d_u, &d_v};
52b2165e7aSSebastian Grimberg       const CeedInt block_size    = CeedIntMin(CeedIntPow(Q_1d, dim), max_block_size);
530d0321e0SJeremy L Thompson 
54eb7e6cafSJeremy L Thompson       CeedCallBackend(CeedRunKernel_Hip(ceed, data->Interp, num_elem, block_size, interp_args));
550d0321e0SJeremy L Thompson     } break;
560d0321e0SJeremy L Thompson     case CEED_EVAL_GRAD: {
577bbbfca3SJeremy L Thompson       void         *grad_args[] = {(void *)&num_elem, (void *)&is_transpose, &data->d_interp_1d, &data->d_grad_1d, &d_u, &d_v};
58b2165e7aSSebastian Grimberg       const CeedInt block_size  = max_block_size;
590d0321e0SJeremy L Thompson 
60eb7e6cafSJeremy L Thompson       CeedCallBackend(CeedRunKernel_Hip(ceed, data->Grad, num_elem, block_size, grad_args));
610d0321e0SJeremy L Thompson     } break;
620d0321e0SJeremy L Thompson     case CEED_EVAL_WEIGHT: {
63097cc795SJames Wright       CeedCheck(data->d_q_weight_1d, ceed, CEED_ERROR_BACKEND, "%s not supported; q_weights_1d not set", CeedEvalModes[eval_mode]);
64437930d1SJeremy L Thompson       void     *weight_args[] = {(void *)&num_elem, (void *)&data->d_q_weight_1d, &d_v};
65b2165e7aSSebastian Grimberg       const int block_size_x  = Q_1d;
66b2165e7aSSebastian Grimberg       const int block_size_y  = dim >= 2 ? Q_1d : 1;
670d0321e0SJeremy L Thompson 
68b2165e7aSSebastian Grimberg       CeedCallBackend(CeedRunKernelDim_Hip(ceed, data->Weight, num_elem, block_size_x, block_size_y, 1, weight_args));
690d0321e0SJeremy L Thompson     } break;
709ea2cfd9SJeremy L Thompson     case CEED_EVAL_NONE: /* handled separately below */
719ea2cfd9SJeremy L Thompson       break;
720d0321e0SJeremy L Thompson     // LCOV_EXCL_START
730d0321e0SJeremy L Thompson     case CEED_EVAL_DIV:
740d0321e0SJeremy L Thompson     case CEED_EVAL_CURL:
75bcbe1c99SJeremy L Thompson       return CeedError(ceed, CEED_ERROR_BACKEND, "%s not supported", CeedEvalModes[eval_mode]);
760d0321e0SJeremy L Thompson       // LCOV_EXCL_STOP
770d0321e0SJeremy L Thompson   }
780d0321e0SJeremy L Thompson 
799ea2cfd9SJeremy L Thompson   // Restore vectors, cover CEED_EVAL_NONE
802b730f8bSJeremy L Thompson   CeedCallBackend(CeedVectorRestoreArray(v, &d_v));
819ea2cfd9SJeremy L Thompson   if (eval_mode == CEED_EVAL_NONE) CeedCallBackend(CeedVectorSetArray(v, CEED_MEM_DEVICE, CEED_COPY_VALUES, (CeedScalar *)d_u));
829ea2cfd9SJeremy L Thompson   if (eval_mode != CEED_EVAL_WEIGHT) CeedCallBackend(CeedVectorRestoreArrayRead(u, &d_u));
839bc66399SJeremy L Thompson   CeedCallBackend(CeedDestroy(&ceed));
840d0321e0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
850d0321e0SJeremy L Thompson }
860d0321e0SJeremy L Thompson 
CeedBasisApply_Hip(CeedBasis basis,const CeedInt num_elem,CeedTransposeMode t_mode,CeedEvalMode eval_mode,CeedVector u,CeedVector v)87db2becc9SJeremy L Thompson static int CeedBasisApply_Hip(CeedBasis basis, const CeedInt num_elem, CeedTransposeMode t_mode, CeedEvalMode eval_mode, CeedVector u, CeedVector v) {
88db2becc9SJeremy L Thompson   CeedCallBackend(CeedBasisApplyCore_Hip(basis, false, num_elem, t_mode, eval_mode, u, v));
89db2becc9SJeremy L Thompson   return CEED_ERROR_SUCCESS;
90db2becc9SJeremy L Thompson }
91db2becc9SJeremy L Thompson 
CeedBasisApplyAdd_Hip(CeedBasis basis,const CeedInt num_elem,CeedTransposeMode t_mode,CeedEvalMode eval_mode,CeedVector u,CeedVector v)92db2becc9SJeremy L Thompson static int CeedBasisApplyAdd_Hip(CeedBasis basis, const CeedInt num_elem, CeedTransposeMode t_mode, CeedEvalMode eval_mode, CeedVector u,
93db2becc9SJeremy L Thompson                                  CeedVector v) {
94db2becc9SJeremy L Thompson   CeedCallBackend(CeedBasisApplyCore_Hip(basis, true, num_elem, t_mode, eval_mode, u, v));
95db2becc9SJeremy L Thompson   return CEED_ERROR_SUCCESS;
96db2becc9SJeremy L Thompson }
97db2becc9SJeremy L Thompson 
980d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
991c21e869SJeremy L Thompson // Basis apply - tensor AtPoints
1001c21e869SJeremy L Thompson //------------------------------------------------------------------------------
CeedBasisApplyAtPointsCore_Hip(CeedBasis basis,bool apply_add,const CeedInt num_elem,const CeedInt * num_points,CeedTransposeMode t_mode,CeedEvalMode eval_mode,CeedVector x_ref,CeedVector u,CeedVector v)101db2becc9SJeremy L Thompson static int CeedBasisApplyAtPointsCore_Hip(CeedBasis basis, bool apply_add, const CeedInt num_elem, const CeedInt *num_points,
102db2becc9SJeremy L Thompson                                           CeedTransposeMode t_mode, CeedEvalMode eval_mode, CeedVector x_ref, CeedVector u, CeedVector v) {
1031c21e869SJeremy L Thompson   Ceed              ceed;
1041c21e869SJeremy L Thompson   CeedInt           Q_1d, dim, max_num_points = num_points[0];
1051c21e869SJeremy L Thompson   const CeedInt     is_transpose   = t_mode == CEED_TRANSPOSE;
1061c21e869SJeremy L Thompson   const int         max_block_size = 32;
1071c21e869SJeremy L Thompson   const CeedScalar *d_x, *d_u;
1081c21e869SJeremy L Thompson   CeedScalar       *d_v;
1091c21e869SJeremy L Thompson   CeedBasis_Hip    *data;
1101c21e869SJeremy L Thompson 
1111c21e869SJeremy L Thompson   CeedCallBackend(CeedBasisGetData(basis, &data));
1121c21e869SJeremy L Thompson   CeedCallBackend(CeedBasisGetNumQuadraturePoints1D(basis, &Q_1d));
1131c21e869SJeremy L Thompson   CeedCallBackend(CeedBasisGetDimension(basis, &dim));
1141c21e869SJeremy L Thompson 
1151c21e869SJeremy L Thompson   // Weight handled separately
1161c21e869SJeremy L Thompson   if (eval_mode == CEED_EVAL_WEIGHT) {
1175a5594ffSJeremy L Thompson     CeedCallBackend(CeedVectorSetValue(v, 1.0));
1181c21e869SJeremy L Thompson     return CEED_ERROR_SUCCESS;
1191c21e869SJeremy L Thompson   }
1201c21e869SJeremy L Thompson 
1219bc66399SJeremy L Thompson   CeedCallBackend(CeedBasisGetCeed(basis, &ceed));
1229bc66399SJeremy L Thompson 
123111870feSJeremy L Thompson   // Check padded to uniform number of points per elem
124111870feSJeremy L Thompson   for (CeedInt i = 1; i < num_elem; i++) max_num_points = CeedIntMax(max_num_points, num_points[i]);
125111870feSJeremy L Thompson   {
126111870feSJeremy L Thompson     CeedInt  num_comp, q_comp;
127111870feSJeremy L Thompson     CeedSize len, len_required;
128111870feSJeremy L Thompson 
129111870feSJeremy L Thompson     CeedCallBackend(CeedBasisGetNumComponents(basis, &num_comp));
130111870feSJeremy L Thompson     CeedCallBackend(CeedBasisGetNumQuadratureComponents(basis, eval_mode, &q_comp));
131111870feSJeremy L Thompson     CeedCallBackend(CeedVectorGetLength(is_transpose ? u : v, &len));
132111870feSJeremy L Thompson     len_required = (CeedSize)num_comp * (CeedSize)q_comp * (CeedSize)num_elem * (CeedSize)max_num_points;
133111870feSJeremy L Thompson     CeedCheck(len >= len_required, ceed, CEED_ERROR_BACKEND,
134111870feSJeremy L Thompson               "Vector at points must be padded to the same number of points in each element for BasisApplyAtPoints on GPU backends."
135111870feSJeremy L Thompson               " Found %" CeedSize_FMT ", Required %" CeedSize_FMT,
136111870feSJeremy L Thompson               len, len_required);
137111870feSJeremy L Thompson   }
138111870feSJeremy L Thompson 
139111870feSJeremy L Thompson   // Move num_points array to device
140111870feSJeremy L Thompson   if (is_transpose) {
141111870feSJeremy L Thompson     const CeedInt num_bytes = num_elem * sizeof(CeedInt);
142111870feSJeremy L Thompson 
143111870feSJeremy L Thompson     if (num_elem != data->num_elem_at_points) {
144111870feSJeremy L Thompson       data->num_elem_at_points = num_elem;
145111870feSJeremy L Thompson 
146111870feSJeremy L Thompson       if (data->d_points_per_elem) CeedCallHip(ceed, hipFree(data->d_points_per_elem));
147111870feSJeremy L Thompson       CeedCallHip(ceed, hipMalloc((void **)&data->d_points_per_elem, num_bytes));
148111870feSJeremy L Thompson       CeedCallBackend(CeedFree(&data->h_points_per_elem));
149111870feSJeremy L Thompson       CeedCallBackend(CeedCalloc(num_elem, &data->h_points_per_elem));
150111870feSJeremy L Thompson     }
1519e511c80SJeremy L Thompson     if (memcmp(data->h_points_per_elem, num_points, num_bytes)) {
152111870feSJeremy L Thompson       memcpy(data->h_points_per_elem, num_points, num_bytes);
153111870feSJeremy L Thompson       CeedCallHip(ceed, hipMemcpy(data->d_points_per_elem, num_points, num_bytes, hipMemcpyHostToDevice));
154111870feSJeremy L Thompson     }
155111870feSJeremy L Thompson   }
156111870feSJeremy L Thompson 
1571c21e869SJeremy L Thompson   // Build kernels if needed
1581c21e869SJeremy L Thompson   if (data->num_points != max_num_points) {
1591c21e869SJeremy L Thompson     CeedInt P_1d;
1601c21e869SJeremy L Thompson 
1611c21e869SJeremy L Thompson     CeedCallBackend(CeedBasisGetNumNodes1D(basis, &P_1d));
1621c21e869SJeremy L Thompson     data->num_points = max_num_points;
1631c21e869SJeremy L Thompson 
1641c21e869SJeremy L Thompson     // -- Create interp matrix to Chebyshev coefficients
1651c21e869SJeremy L Thompson     if (!data->d_chebyshev_interp_1d) {
1661c21e869SJeremy L Thompson       CeedSize    interp_bytes;
1671c21e869SJeremy L Thompson       CeedScalar *chebyshev_interp_1d;
1681c21e869SJeremy L Thompson 
1691c21e869SJeremy L Thompson       interp_bytes = P_1d * Q_1d * sizeof(CeedScalar);
1701c21e869SJeremy L Thompson       CeedCallBackend(CeedCalloc(P_1d * Q_1d, &chebyshev_interp_1d));
1715a5594ffSJeremy L Thompson       CeedCallBackend(CeedBasisGetChebyshevInterp1D(basis, chebyshev_interp_1d));
1721c21e869SJeremy L Thompson       CeedCallHip(ceed, hipMalloc((void **)&data->d_chebyshev_interp_1d, interp_bytes));
1731c21e869SJeremy L Thompson       CeedCallHip(ceed, hipMemcpy(data->d_chebyshev_interp_1d, chebyshev_interp_1d, interp_bytes, hipMemcpyHostToDevice));
1741c21e869SJeremy L Thompson       CeedCallBackend(CeedFree(&chebyshev_interp_1d));
1751c21e869SJeremy L Thompson     }
1761c21e869SJeremy L Thompson 
1771c21e869SJeremy L Thompson     // -- Compile kernels
1789c25dd66SJeremy L Thompson     const char basis_kernel_source[] = "// AtPoints basis source\n#include <ceed/jit-source/hip/hip-ref-basis-tensor-at-points.h>\n";
1791c21e869SJeremy L Thompson     CeedInt    num_comp;
1801c21e869SJeremy L Thompson 
1811c21e869SJeremy L Thompson     if (data->moduleAtPoints) CeedCallHip(ceed, hipModuleUnload(data->moduleAtPoints));
1821c21e869SJeremy L Thompson     CeedCallBackend(CeedBasisGetNumComponents(basis, &num_comp));
1831c21e869SJeremy L Thompson     CeedCallBackend(CeedCompile_Hip(ceed, basis_kernel_source, &data->moduleAtPoints, 9, "BASIS_Q_1D", Q_1d, "BASIS_P_1D", P_1d, "BASIS_BUF_LEN",
184f7c9815fSJeremy L Thompson                                     Q_1d * CeedIntPow(Q_1d > P_1d ? Q_1d : P_1d, dim - 1), "BASIS_DIM", dim, "BASIS_NUM_COMP", num_comp,
1851c21e869SJeremy L Thompson                                     "BASIS_NUM_NODES", CeedIntPow(P_1d, dim), "BASIS_NUM_QPTS", CeedIntPow(Q_1d, dim), "BASIS_NUM_PTS",
186f7c9815fSJeremy L Thompson                                     max_num_points, "POINTS_BUFF_LEN", CeedIntPow(Q_1d, dim - 1)));
1871c21e869SJeremy L Thompson     CeedCallBackend(CeedGetKernel_Hip(ceed, data->moduleAtPoints, "InterpAtPoints", &data->InterpAtPoints));
18881ae6159SJeremy L Thompson     CeedCallBackend(CeedGetKernel_Hip(ceed, data->moduleAtPoints, "InterpTransposeAtPoints", &data->InterpTransposeAtPoints));
1891c21e869SJeremy L Thompson     CeedCallBackend(CeedGetKernel_Hip(ceed, data->moduleAtPoints, "GradAtPoints", &data->GradAtPoints));
19081ae6159SJeremy L Thompson     CeedCallBackend(CeedGetKernel_Hip(ceed, data->moduleAtPoints, "GradTransposeAtPoints", &data->GradTransposeAtPoints));
1911c21e869SJeremy L Thompson   }
1921c21e869SJeremy L Thompson 
1931c21e869SJeremy L Thompson   // Get read/write access to u, v
1941c21e869SJeremy L Thompson   CeedCallBackend(CeedVectorGetArrayRead(x_ref, CEED_MEM_DEVICE, &d_x));
1951c21e869SJeremy L Thompson   if (u != CEED_VECTOR_NONE) CeedCallBackend(CeedVectorGetArrayRead(u, CEED_MEM_DEVICE, &d_u));
1961c21e869SJeremy L Thompson   else CeedCheck(eval_mode == CEED_EVAL_WEIGHT, ceed, CEED_ERROR_BACKEND, "An input vector is required for this CeedEvalMode");
19711ac676fSZach Atkins   if (apply_add) {
19811ac676fSZach Atkins     CeedCallBackend(CeedVectorGetArray(v, CEED_MEM_DEVICE, &d_v));
19911ac676fSZach Atkins   } else {
2001c21e869SJeremy L Thompson     // Clear v for transpose operation
20111ac676fSZach Atkins     if (is_transpose) CeedCallBackend(CeedVectorSetValue(v, 0.0));
20211ac676fSZach Atkins     CeedCallBackend(CeedVectorGetArrayWrite(v, CEED_MEM_DEVICE, &d_v));
2031c21e869SJeremy L Thompson   }
2041c21e869SJeremy L Thompson 
2051c21e869SJeremy L Thompson   // Basis action
2061c21e869SJeremy L Thompson   switch (eval_mode) {
2071c21e869SJeremy L Thompson     case CEED_EVAL_INTERP: {
20881ae6159SJeremy L Thompson       void         *interp_args[] = {(void *)&num_elem, &data->d_chebyshev_interp_1d, &data->d_points_per_elem, &d_x, &d_u, &d_v};
2091c21e869SJeremy L Thompson       const CeedInt block_size    = CeedIntMin(CeedIntPow(Q_1d, dim), max_block_size);
2101c21e869SJeremy L Thompson 
2111a8516d0SJames Wright       CeedCallBackend(CeedRunKernel_Hip(ceed, is_transpose ? data->InterpTransposeAtPoints : data->InterpAtPoints, num_elem, block_size,
2121a8516d0SJames Wright                                         interp_args));
2131c21e869SJeremy L Thompson     } break;
2141c21e869SJeremy L Thompson     case CEED_EVAL_GRAD: {
21581ae6159SJeremy L Thompson       void         *grad_args[] = {(void *)&num_elem, &data->d_chebyshev_interp_1d, &data->d_points_per_elem, &d_x, &d_u, &d_v};
2162d10e82cSJeremy L Thompson       const CeedInt block_size  = CeedIntMin(CeedIntPow(Q_1d, dim), max_block_size);
2171c21e869SJeremy L Thompson 
21881ae6159SJeremy L Thompson       CeedCallBackend(CeedRunKernel_Hip(ceed, is_transpose ? data->GradTransposeAtPoints : data->GradAtPoints, num_elem, block_size, grad_args));
2191c21e869SJeremy L Thompson     } break;
2201c21e869SJeremy L Thompson     case CEED_EVAL_WEIGHT:
2211c21e869SJeremy L Thompson     case CEED_EVAL_NONE: /* handled separately below */
2221c21e869SJeremy L Thompson       break;
2231c21e869SJeremy L Thompson     // LCOV_EXCL_START
2241c21e869SJeremy L Thompson     case CEED_EVAL_DIV:
2251c21e869SJeremy L Thompson     case CEED_EVAL_CURL:
2261c21e869SJeremy L Thompson       return CeedError(ceed, CEED_ERROR_BACKEND, "%s not supported", CeedEvalModes[eval_mode]);
2271c21e869SJeremy L Thompson       // LCOV_EXCL_STOP
2281c21e869SJeremy L Thompson   }
2291c21e869SJeremy L Thompson 
2301c21e869SJeremy L Thompson   // Restore vectors, cover CEED_EVAL_NONE
2311c21e869SJeremy L Thompson   CeedCallBackend(CeedVectorRestoreArrayRead(x_ref, &d_x));
2321c21e869SJeremy L Thompson   CeedCallBackend(CeedVectorRestoreArray(v, &d_v));
2331c21e869SJeremy L Thompson   if (eval_mode == CEED_EVAL_NONE) CeedCallBackend(CeedVectorSetArray(v, CEED_MEM_DEVICE, CEED_COPY_VALUES, (CeedScalar *)d_u));
2341c21e869SJeremy L Thompson   if (eval_mode != CEED_EVAL_WEIGHT) CeedCallBackend(CeedVectorRestoreArrayRead(u, &d_u));
2359bc66399SJeremy L Thompson   CeedCallBackend(CeedDestroy(&ceed));
2361c21e869SJeremy L Thompson   return CEED_ERROR_SUCCESS;
2371c21e869SJeremy L Thompson }
2381c21e869SJeremy L Thompson 
CeedBasisApplyAtPoints_Hip(CeedBasis basis,const CeedInt num_elem,const CeedInt * num_points,CeedTransposeMode t_mode,CeedEvalMode eval_mode,CeedVector x_ref,CeedVector u,CeedVector v)239db2becc9SJeremy L Thompson static int CeedBasisApplyAtPoints_Hip(CeedBasis basis, const CeedInt num_elem, const CeedInt *num_points, CeedTransposeMode t_mode,
240db2becc9SJeremy L Thompson                                       CeedEvalMode eval_mode, CeedVector x_ref, CeedVector u, CeedVector v) {
241db2becc9SJeremy L Thompson   CeedCallBackend(CeedBasisApplyAtPointsCore_Hip(basis, false, num_elem, num_points, t_mode, eval_mode, x_ref, u, v));
242db2becc9SJeremy L Thompson   return CEED_ERROR_SUCCESS;
243db2becc9SJeremy L Thompson }
244db2becc9SJeremy L Thompson 
CeedBasisApplyAddAtPoints_Hip(CeedBasis basis,const CeedInt num_elem,const CeedInt * num_points,CeedTransposeMode t_mode,CeedEvalMode eval_mode,CeedVector x_ref,CeedVector u,CeedVector v)245db2becc9SJeremy L Thompson static int CeedBasisApplyAddAtPoints_Hip(CeedBasis basis, const CeedInt num_elem, const CeedInt *num_points, CeedTransposeMode t_mode,
246db2becc9SJeremy L Thompson                                          CeedEvalMode eval_mode, CeedVector x_ref, CeedVector u, CeedVector v) {
247db2becc9SJeremy L Thompson   CeedCallBackend(CeedBasisApplyAtPointsCore_Hip(basis, true, num_elem, num_points, t_mode, eval_mode, x_ref, u, v));
248db2becc9SJeremy L Thompson   return CEED_ERROR_SUCCESS;
249db2becc9SJeremy L Thompson }
250db2becc9SJeremy L Thompson 
2511c21e869SJeremy L Thompson //------------------------------------------------------------------------------
2520d0321e0SJeremy L Thompson // Basis apply - non-tensor
2530d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
CeedBasisApplyNonTensorCore_Hip(CeedBasis basis,bool apply_add,const CeedInt num_elem,CeedTransposeMode t_mode,CeedEvalMode eval_mode,CeedVector u,CeedVector v)254db2becc9SJeremy L Thompson static int CeedBasisApplyNonTensorCore_Hip(CeedBasis basis, bool apply_add, const CeedInt num_elem, CeedTransposeMode t_mode, CeedEvalMode eval_mode,
255db2becc9SJeremy L Thompson                                            CeedVector u, CeedVector v) {
2560d0321e0SJeremy L Thompson   Ceed                    ceed;
257437930d1SJeremy L Thompson   CeedInt                 num_nodes, num_qpts;
2587bbbfca3SJeremy L Thompson   const CeedInt           is_transpose    = t_mode == CEED_TRANSPOSE;
259d075f50bSSebastian Grimberg   const int               elems_per_block = 1;
260d075f50bSSebastian Grimberg   const int               grid            = CeedDivUpInt(num_elem, elems_per_block);
2610d0321e0SJeremy L Thompson   const CeedScalar       *d_u;
2620d0321e0SJeremy L Thompson   CeedScalar             *d_v;
263b7453713SJeremy L Thompson   CeedBasisNonTensor_Hip *data;
264b7453713SJeremy L Thompson 
265b7453713SJeremy L Thompson   CeedCallBackend(CeedBasisGetCeed(basis, &ceed));
266b7453713SJeremy L Thompson   CeedCallBackend(CeedBasisGetData(basis, &data));
267b7453713SJeremy L Thompson   CeedCallBackend(CeedBasisGetNumQuadraturePoints(basis, &num_qpts));
268b7453713SJeremy L Thompson   CeedCallBackend(CeedBasisGetNumNodes(basis, &num_nodes));
269b7453713SJeremy L Thompson 
2709ea2cfd9SJeremy L Thompson   // Get read/write access to u, v
2719ea2cfd9SJeremy L Thompson   if (u != CEED_VECTOR_NONE) CeedCallBackend(CeedVectorGetArrayRead(u, CEED_MEM_DEVICE, &d_u));
2729ea2cfd9SJeremy L Thompson   else CeedCheck(eval_mode == CEED_EVAL_WEIGHT, ceed, CEED_ERROR_BACKEND, "An input vector is required for this CeedEvalMode");
27311ac676fSZach Atkins   if (apply_add) {
27411ac676fSZach Atkins     CeedCallBackend(CeedVectorGetArray(v, CEED_MEM_DEVICE, &d_v));
27511ac676fSZach Atkins   } else {
2760d0321e0SJeremy L Thompson     // Clear v for transpose operation
27711ac676fSZach Atkins     if (is_transpose) CeedCallBackend(CeedVectorSetValue(v, 0.0));
27811ac676fSZach Atkins     CeedCallBackend(CeedVectorGetArrayWrite(v, CEED_MEM_DEVICE, &d_v));
2790d0321e0SJeremy L Thompson   }
2800d0321e0SJeremy L Thompson 
2810d0321e0SJeremy L Thompson   // Apply basis operation
282437930d1SJeremy L Thompson   switch (eval_mode) {
2830d0321e0SJeremy L Thompson     case CEED_EVAL_INTERP: {
284d075f50bSSebastian Grimberg       void     *interp_args[] = {(void *)&num_elem, &data->d_interp, &d_u, &d_v};
2857bbbfca3SJeremy L Thompson       const int block_size_x  = is_transpose ? num_nodes : num_qpts;
286b2165e7aSSebastian Grimberg 
2877bbbfca3SJeremy L Thompson       if (is_transpose) {
288d075f50bSSebastian Grimberg         CeedCallBackend(CeedRunKernelDim_Hip(ceed, data->InterpTranspose, grid, block_size_x, 1, elems_per_block, interp_args));
289d075f50bSSebastian Grimberg       } else {
290b2165e7aSSebastian Grimberg         CeedCallBackend(CeedRunKernelDim_Hip(ceed, data->Interp, grid, block_size_x, 1, elems_per_block, interp_args));
291d075f50bSSebastian Grimberg       }
2920d0321e0SJeremy L Thompson     } break;
2930d0321e0SJeremy L Thompson     case CEED_EVAL_GRAD: {
294d075f50bSSebastian Grimberg       void     *grad_args[]  = {(void *)&num_elem, &data->d_grad, &d_u, &d_v};
2957bbbfca3SJeremy L Thompson       const int block_size_x = is_transpose ? num_nodes : num_qpts;
296b2165e7aSSebastian Grimberg 
2977bbbfca3SJeremy L Thompson       if (is_transpose) {
298d075f50bSSebastian Grimberg         CeedCallBackend(CeedRunKernelDim_Hip(ceed, data->DerivTranspose, grid, block_size_x, 1, elems_per_block, grad_args));
299d075f50bSSebastian Grimberg       } else {
300d075f50bSSebastian Grimberg         CeedCallBackend(CeedRunKernelDim_Hip(ceed, data->Deriv, grid, block_size_x, 1, elems_per_block, grad_args));
301d075f50bSSebastian Grimberg       }
302d075f50bSSebastian Grimberg     } break;
303d075f50bSSebastian Grimberg     case CEED_EVAL_DIV: {
304d075f50bSSebastian Grimberg       void     *div_args[]   = {(void *)&num_elem, &data->d_div, &d_u, &d_v};
3057bbbfca3SJeremy L Thompson       const int block_size_x = is_transpose ? num_nodes : num_qpts;
306d075f50bSSebastian Grimberg 
3077bbbfca3SJeremy L Thompson       if (is_transpose) {
308d075f50bSSebastian Grimberg         CeedCallBackend(CeedRunKernelDim_Hip(ceed, data->DerivTranspose, grid, block_size_x, 1, elems_per_block, div_args));
309d075f50bSSebastian Grimberg       } else {
310d075f50bSSebastian Grimberg         CeedCallBackend(CeedRunKernelDim_Hip(ceed, data->Deriv, grid, block_size_x, 1, elems_per_block, div_args));
311d075f50bSSebastian Grimberg       }
312d075f50bSSebastian Grimberg     } break;
313d075f50bSSebastian Grimberg     case CEED_EVAL_CURL: {
314d075f50bSSebastian Grimberg       void     *curl_args[]  = {(void *)&num_elem, &data->d_curl, &d_u, &d_v};
3157bbbfca3SJeremy L Thompson       const int block_size_x = is_transpose ? num_nodes : num_qpts;
316d075f50bSSebastian Grimberg 
3177bbbfca3SJeremy L Thompson       if (is_transpose) {
318d075f50bSSebastian Grimberg         CeedCallBackend(CeedRunKernelDim_Hip(ceed, data->DerivTranspose, grid, block_size_x, 1, elems_per_block, curl_args));
319d075f50bSSebastian Grimberg       } else {
320d075f50bSSebastian Grimberg         CeedCallBackend(CeedRunKernelDim_Hip(ceed, data->Deriv, grid, block_size_x, 1, elems_per_block, curl_args));
321d075f50bSSebastian Grimberg       }
3220d0321e0SJeremy L Thompson     } break;
3230d0321e0SJeremy L Thompson     case CEED_EVAL_WEIGHT: {
324097cc795SJames Wright       CeedCheck(data->d_q_weight, ceed, CEED_ERROR_BACKEND, "%s not supported; q_weights not set", CeedEvalModes[eval_mode]);
325437930d1SJeremy L Thompson       void *weight_args[] = {(void *)&num_elem, (void *)&data->d_q_weight, &d_v};
326b2165e7aSSebastian Grimberg 
327b2165e7aSSebastian Grimberg       CeedCallBackend(CeedRunKernelDim_Hip(ceed, data->Weight, grid, num_qpts, 1, elems_per_block, weight_args));
3280d0321e0SJeremy L Thompson     } break;
3299ea2cfd9SJeremy L Thompson     case CEED_EVAL_NONE: /* handled separately below */
3309ea2cfd9SJeremy L Thompson       break;
3310d0321e0SJeremy L Thompson   }
3320d0321e0SJeremy L Thompson 
3339ea2cfd9SJeremy L Thompson   // Restore vectors, cover CEED_EVAL_NONE
3342b730f8bSJeremy L Thompson   CeedCallBackend(CeedVectorRestoreArray(v, &d_v));
3359ea2cfd9SJeremy L Thompson   if (eval_mode == CEED_EVAL_NONE) CeedCallBackend(CeedVectorSetArray(v, CEED_MEM_DEVICE, CEED_COPY_VALUES, (CeedScalar *)d_u));
3369ea2cfd9SJeremy L Thompson   if (eval_mode != CEED_EVAL_WEIGHT) CeedCallBackend(CeedVectorRestoreArrayRead(u, &d_u));
3379bc66399SJeremy L Thompson   CeedCallBackend(CeedDestroy(&ceed));
3380d0321e0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
3390d0321e0SJeremy L Thompson }
3400d0321e0SJeremy L Thompson 
CeedBasisApplyNonTensor_Hip(CeedBasis basis,const CeedInt num_elem,CeedTransposeMode t_mode,CeedEvalMode eval_mode,CeedVector u,CeedVector v)341db2becc9SJeremy L Thompson static int CeedBasisApplyNonTensor_Hip(CeedBasis basis, const CeedInt num_elem, CeedTransposeMode t_mode, CeedEvalMode eval_mode, CeedVector u,
342db2becc9SJeremy L Thompson                                        CeedVector v) {
343db2becc9SJeremy L Thompson   CeedCallBackend(CeedBasisApplyNonTensorCore_Hip(basis, false, num_elem, t_mode, eval_mode, u, v));
344db2becc9SJeremy L Thompson   return CEED_ERROR_SUCCESS;
345db2becc9SJeremy L Thompson }
346db2becc9SJeremy L Thompson 
CeedBasisApplyAddNonTensor_Hip(CeedBasis basis,const CeedInt num_elem,CeedTransposeMode t_mode,CeedEvalMode eval_mode,CeedVector u,CeedVector v)347db2becc9SJeremy L Thompson static int CeedBasisApplyAddNonTensor_Hip(CeedBasis basis, const CeedInt num_elem, CeedTransposeMode t_mode, CeedEvalMode eval_mode, CeedVector u,
348db2becc9SJeremy L Thompson                                           CeedVector v) {
349db2becc9SJeremy L Thompson   CeedCallBackend(CeedBasisApplyNonTensorCore_Hip(basis, true, num_elem, t_mode, eval_mode, u, v));
350db2becc9SJeremy L Thompson   return CEED_ERROR_SUCCESS;
351db2becc9SJeremy L Thompson }
352db2becc9SJeremy L Thompson 
3530d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
3540d0321e0SJeremy L Thompson // Destroy tensor basis
3550d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
CeedBasisDestroy_Hip(CeedBasis basis)3560d0321e0SJeremy L Thompson static int CeedBasisDestroy_Hip(CeedBasis basis) {
3570d0321e0SJeremy L Thompson   Ceed           ceed;
3580d0321e0SJeremy L Thompson   CeedBasis_Hip *data;
359b7453713SJeremy L Thompson 
360b7453713SJeremy L Thompson   CeedCallBackend(CeedBasisGetCeed(basis, &ceed));
3612b730f8bSJeremy L Thompson   CeedCallBackend(CeedBasisGetData(basis, &data));
3622b730f8bSJeremy L Thompson   CeedCallHip(ceed, hipModuleUnload(data->module));
3631c21e869SJeremy L Thompson   if (data->moduleAtPoints) CeedCallHip(ceed, hipModuleUnload(data->moduleAtPoints));
364097cc795SJames Wright   if (data->d_q_weight_1d) CeedCallHip(ceed, hipFree(data->d_q_weight_1d));
365111870feSJeremy L Thompson   CeedCallBackend(CeedFree(&data->h_points_per_elem));
366111870feSJeremy L Thompson   if (data->d_points_per_elem) CeedCallHip(ceed, hipFree(data->d_points_per_elem));
3672b730f8bSJeremy L Thompson   CeedCallHip(ceed, hipFree(data->d_interp_1d));
3682b730f8bSJeremy L Thompson   CeedCallHip(ceed, hipFree(data->d_grad_1d));
3691c21e869SJeremy L Thompson   CeedCallHip(ceed, hipFree(data->d_chebyshev_interp_1d));
3702b730f8bSJeremy L Thompson   CeedCallBackend(CeedFree(&data));
3719bc66399SJeremy L Thompson   CeedCallBackend(CeedDestroy(&ceed));
3720d0321e0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
3730d0321e0SJeremy L Thompson }
3740d0321e0SJeremy L Thompson 
3750d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
3760d0321e0SJeremy L Thompson // Destroy non-tensor basis
3770d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
CeedBasisDestroyNonTensor_Hip(CeedBasis basis)3780d0321e0SJeremy L Thompson static int CeedBasisDestroyNonTensor_Hip(CeedBasis basis) {
3790d0321e0SJeremy L Thompson   Ceed                    ceed;
3800d0321e0SJeremy L Thompson   CeedBasisNonTensor_Hip *data;
381b7453713SJeremy L Thompson 
382b7453713SJeremy L Thompson   CeedCallBackend(CeedBasisGetCeed(basis, &ceed));
3832b730f8bSJeremy L Thompson   CeedCallBackend(CeedBasisGetData(basis, &data));
3842b730f8bSJeremy L Thompson   CeedCallHip(ceed, hipModuleUnload(data->module));
385097cc795SJames Wright   if (data->d_q_weight) CeedCallHip(ceed, hipFree(data->d_q_weight));
3862b730f8bSJeremy L Thompson   CeedCallHip(ceed, hipFree(data->d_interp));
3872b730f8bSJeremy L Thompson   CeedCallHip(ceed, hipFree(data->d_grad));
388d075f50bSSebastian Grimberg   CeedCallHip(ceed, hipFree(data->d_div));
389d075f50bSSebastian Grimberg   CeedCallHip(ceed, hipFree(data->d_curl));
3902b730f8bSJeremy L Thompson   CeedCallBackend(CeedFree(&data));
3919bc66399SJeremy L Thompson   CeedCallBackend(CeedDestroy(&ceed));
3920d0321e0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
3930d0321e0SJeremy L Thompson }
3940d0321e0SJeremy L Thompson 
3950d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
3960d0321e0SJeremy L Thompson // Create tensor
3970d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
CeedBasisCreateTensorH1_Hip(CeedInt dim,CeedInt P_1d,CeedInt Q_1d,const CeedScalar * interp_1d,const CeedScalar * grad_1d,const CeedScalar * q_ref_1d,const CeedScalar * q_weight_1d,CeedBasis basis)3982b730f8bSJeremy L Thompson int CeedBasisCreateTensorH1_Hip(CeedInt dim, CeedInt P_1d, CeedInt Q_1d, const CeedScalar *interp_1d, const CeedScalar *grad_1d,
3996574a04fSJeremy L Thompson                                 const CeedScalar *q_ref_1d, const CeedScalar *q_weight_1d, CeedBasis basis) {
4000d0321e0SJeremy L Thompson   Ceed           ceed;
401b7453713SJeremy L Thompson   CeedInt        num_comp;
402b7453713SJeremy L Thompson   const CeedInt  q_bytes      = Q_1d * sizeof(CeedScalar);
403b7453713SJeremy L Thompson   const CeedInt  interp_bytes = q_bytes * P_1d;
4040d0321e0SJeremy L Thompson   CeedBasis_Hip *data;
405b7453713SJeremy L Thompson 
406b7453713SJeremy L Thompson   CeedCallBackend(CeedBasisGetCeed(basis, &ceed));
4072b730f8bSJeremy L Thompson   CeedCallBackend(CeedCalloc(1, &data));
4080d0321e0SJeremy L Thompson 
4090d0321e0SJeremy L Thompson   // Copy data to GPU
410097cc795SJames Wright   if (q_weight_1d) {
4112b730f8bSJeremy L Thompson     CeedCallHip(ceed, hipMalloc((void **)&data->d_q_weight_1d, q_bytes));
4122b730f8bSJeremy L Thompson     CeedCallHip(ceed, hipMemcpy(data->d_q_weight_1d, q_weight_1d, q_bytes, hipMemcpyHostToDevice));
413097cc795SJames Wright   }
4142b730f8bSJeremy L Thompson   CeedCallHip(ceed, hipMalloc((void **)&data->d_interp_1d, interp_bytes));
4152b730f8bSJeremy L Thompson   CeedCallHip(ceed, hipMemcpy(data->d_interp_1d, interp_1d, interp_bytes, hipMemcpyHostToDevice));
4162b730f8bSJeremy L Thompson   CeedCallHip(ceed, hipMalloc((void **)&data->d_grad_1d, interp_bytes));
4172b730f8bSJeremy L Thompson   CeedCallHip(ceed, hipMemcpy(data->d_grad_1d, grad_1d, interp_bytes, hipMemcpyHostToDevice));
4180d0321e0SJeremy L Thompson 
419ecc88aebSJeremy L Thompson   // Compile basis kernels
4209c25dd66SJeremy L Thompson   const char basis_kernel_source[] = "// Tensor basis source\n#include <ceed/jit-source/hip/hip-ref-basis-tensor.h>\n";
4219c25dd66SJeremy L Thompson 
422b7453713SJeremy L Thompson   CeedCallBackend(CeedBasisGetNumComponents(basis, &num_comp));
423eb7e6cafSJeremy L Thompson   CeedCallBackend(CeedCompile_Hip(ceed, basis_kernel_source, &data->module, 7, "BASIS_Q_1D", Q_1d, "BASIS_P_1D", P_1d, "BASIS_BUF_LEN",
424f7c9815fSJeremy L Thompson                                   Q_1d * CeedIntPow(Q_1d > P_1d ? Q_1d : P_1d, dim - 1), "BASIS_DIM", dim, "BASIS_NUM_COMP", num_comp,
425b7453713SJeremy L Thompson                                   "BASIS_NUM_NODES", CeedIntPow(P_1d, dim), "BASIS_NUM_QPTS", CeedIntPow(Q_1d, dim)));
426eb7e6cafSJeremy L Thompson   CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, "Interp", &data->Interp));
427eb7e6cafSJeremy L Thompson   CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, "Grad", &data->Grad));
428eb7e6cafSJeremy L Thompson   CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, "Weight", &data->Weight));
429437930d1SJeremy L Thompson 
4302b730f8bSJeremy L Thompson   CeedCallBackend(CeedBasisSetData(basis, data));
4310d0321e0SJeremy L Thompson 
432d075f50bSSebastian Grimberg   // Register backend functions
4332b730f8bSJeremy L Thompson   CeedCallBackend(CeedSetBackendFunction(ceed, "Basis", basis, "Apply", CeedBasisApply_Hip));
434db2becc9SJeremy L Thompson   CeedCallBackend(CeedSetBackendFunction(ceed, "Basis", basis, "ApplyAdd", CeedBasisApplyAdd_Hip));
4351c21e869SJeremy L Thompson   CeedCallBackend(CeedSetBackendFunction(ceed, "Basis", basis, "ApplyAtPoints", CeedBasisApplyAtPoints_Hip));
436db2becc9SJeremy L Thompson   CeedCallBackend(CeedSetBackendFunction(ceed, "Basis", basis, "ApplyAddAtPoints", CeedBasisApplyAddAtPoints_Hip));
4372b730f8bSJeremy L Thompson   CeedCallBackend(CeedSetBackendFunction(ceed, "Basis", basis, "Destroy", CeedBasisDestroy_Hip));
4389bc66399SJeremy L Thompson   CeedCallBackend(CeedDestroy(&ceed));
4390d0321e0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
4400d0321e0SJeremy L Thompson }
4410d0321e0SJeremy L Thompson 
4420d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
443d075f50bSSebastian Grimberg // Create non-tensor H^1
4440d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
CeedBasisCreateH1_Hip(CeedElemTopology topo,CeedInt dim,CeedInt num_nodes,CeedInt num_qpts,const CeedScalar * interp,const CeedScalar * grad,const CeedScalar * q_ref,const CeedScalar * q_weight,CeedBasis basis)4452b730f8bSJeremy L Thompson int CeedBasisCreateH1_Hip(CeedElemTopology topo, CeedInt dim, CeedInt num_nodes, CeedInt num_qpts, const CeedScalar *interp, const CeedScalar *grad,
44651475c7cSJeremy L Thompson                           const CeedScalar *q_ref, const CeedScalar *q_weight, CeedBasis basis) {
4470d0321e0SJeremy L Thompson   Ceed                    ceed;
448d075f50bSSebastian Grimberg   CeedInt                 num_comp, q_comp_interp, q_comp_grad;
449b7453713SJeremy L Thompson   const CeedInt           q_bytes = num_qpts * sizeof(CeedScalar);
4500d0321e0SJeremy L Thompson   CeedBasisNonTensor_Hip *data;
451b7453713SJeremy L Thompson 
452b7453713SJeremy L Thompson   CeedCallBackend(CeedBasisGetCeed(basis, &ceed));
4532b730f8bSJeremy L Thompson   CeedCallBackend(CeedCalloc(1, &data));
4540d0321e0SJeremy L Thompson 
4550d0321e0SJeremy L Thompson   // Copy basis data to GPU
456d075f50bSSebastian Grimberg   CeedCallBackend(CeedBasisGetNumQuadratureComponents(basis, CEED_EVAL_INTERP, &q_comp_interp));
457d075f50bSSebastian Grimberg   CeedCallBackend(CeedBasisGetNumQuadratureComponents(basis, CEED_EVAL_GRAD, &q_comp_grad));
458097cc795SJames Wright   if (q_weight) {
4592b730f8bSJeremy L Thompson     CeedCallHip(ceed, hipMalloc((void **)&data->d_q_weight, q_bytes));
4602b730f8bSJeremy L Thompson     CeedCallHip(ceed, hipMemcpy(data->d_q_weight, q_weight, q_bytes, hipMemcpyHostToDevice));
461097cc795SJames Wright   }
462d075f50bSSebastian Grimberg   if (interp) {
463d075f50bSSebastian Grimberg     const CeedInt interp_bytes = q_bytes * num_nodes * q_comp_interp;
464d075f50bSSebastian Grimberg 
4652b730f8bSJeremy L Thompson     CeedCallHip(ceed, hipMalloc((void **)&data->d_interp, interp_bytes));
4662b730f8bSJeremy L Thompson     CeedCallHip(ceed, hipMemcpy(data->d_interp, interp, interp_bytes, hipMemcpyHostToDevice));
467d075f50bSSebastian Grimberg   }
468d075f50bSSebastian Grimberg   if (grad) {
469d075f50bSSebastian Grimberg     const CeedInt grad_bytes = q_bytes * num_nodes * q_comp_grad;
470d075f50bSSebastian Grimberg 
4712b730f8bSJeremy L Thompson     CeedCallHip(ceed, hipMalloc((void **)&data->d_grad, grad_bytes));
4722b730f8bSJeremy L Thompson     CeedCallHip(ceed, hipMemcpy(data->d_grad, grad, grad_bytes, hipMemcpyHostToDevice));
473d075f50bSSebastian Grimberg   }
4740d0321e0SJeremy L Thompson 
4750d0321e0SJeremy L Thompson   // Compile basis kernels
4769c25dd66SJeremy L Thompson   const char basis_kernel_source[] = "// Nontensor basis source\n#include <ceed/jit-source/hip/hip-ref-basis-nontensor.h>\n";
4779c25dd66SJeremy L Thompson 
478b7453713SJeremy L Thompson   CeedCallBackend(CeedBasisGetNumComponents(basis, &num_comp));
479d075f50bSSebastian Grimberg   CeedCallBackend(CeedCompile_Hip(ceed, basis_kernel_source, &data->module, 5, "BASIS_Q", num_qpts, "BASIS_P", num_nodes, "BASIS_Q_COMP_INTERP",
480d075f50bSSebastian Grimberg                                   q_comp_interp, "BASIS_Q_COMP_DERIV", q_comp_grad, "BASIS_NUM_COMP", num_comp));
481eb7e6cafSJeremy L Thompson   CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, "Interp", &data->Interp));
482d075f50bSSebastian Grimberg   CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, "InterpTranspose", &data->InterpTranspose));
483d075f50bSSebastian Grimberg   CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, "Deriv", &data->Deriv));
484d075f50bSSebastian Grimberg   CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, "DerivTranspose", &data->DerivTranspose));
485eb7e6cafSJeremy L Thompson   CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, "Weight", &data->Weight));
486d075f50bSSebastian Grimberg 
487d075f50bSSebastian Grimberg   CeedCallBackend(CeedBasisSetData(basis, data));
488d075f50bSSebastian Grimberg 
489d075f50bSSebastian Grimberg   // Register backend functions
490d075f50bSSebastian Grimberg   CeedCallBackend(CeedSetBackendFunction(ceed, "Basis", basis, "Apply", CeedBasisApplyNonTensor_Hip));
491db2becc9SJeremy L Thompson   CeedCallBackend(CeedSetBackendFunction(ceed, "Basis", basis, "ApplyAdd", CeedBasisApplyAddNonTensor_Hip));
492d075f50bSSebastian Grimberg   CeedCallBackend(CeedSetBackendFunction(ceed, "Basis", basis, "Destroy", CeedBasisDestroyNonTensor_Hip));
4939bc66399SJeremy L Thompson   CeedCallBackend(CeedDestroy(&ceed));
494d075f50bSSebastian Grimberg   return CEED_ERROR_SUCCESS;
495d075f50bSSebastian Grimberg }
496d075f50bSSebastian Grimberg 
497d075f50bSSebastian Grimberg //------------------------------------------------------------------------------
498d075f50bSSebastian Grimberg // Create non-tensor H(div)
499d075f50bSSebastian Grimberg //------------------------------------------------------------------------------
CeedBasisCreateHdiv_Hip(CeedElemTopology topo,CeedInt dim,CeedInt num_nodes,CeedInt num_qpts,const CeedScalar * interp,const CeedScalar * div,const CeedScalar * q_ref,const CeedScalar * q_weight,CeedBasis basis)500d075f50bSSebastian Grimberg int CeedBasisCreateHdiv_Hip(CeedElemTopology topo, CeedInt dim, CeedInt num_nodes, CeedInt num_qpts, const CeedScalar *interp, const CeedScalar *div,
501d075f50bSSebastian Grimberg                             const CeedScalar *q_ref, const CeedScalar *q_weight, CeedBasis basis) {
502d075f50bSSebastian Grimberg   Ceed                    ceed;
503d075f50bSSebastian Grimberg   CeedInt                 num_comp, q_comp_interp, q_comp_div;
504d075f50bSSebastian Grimberg   const CeedInt           q_bytes = num_qpts * sizeof(CeedScalar);
505d075f50bSSebastian Grimberg   CeedBasisNonTensor_Hip *data;
506d075f50bSSebastian Grimberg 
507d075f50bSSebastian Grimberg   CeedCallBackend(CeedBasisGetCeed(basis, &ceed));
508d075f50bSSebastian Grimberg   CeedCallBackend(CeedCalloc(1, &data));
509d075f50bSSebastian Grimberg 
510d075f50bSSebastian Grimberg   // Copy basis data to GPU
511d075f50bSSebastian Grimberg   CeedCallBackend(CeedBasisGetNumQuadratureComponents(basis, CEED_EVAL_INTERP, &q_comp_interp));
512d075f50bSSebastian Grimberg   CeedCallBackend(CeedBasisGetNumQuadratureComponents(basis, CEED_EVAL_DIV, &q_comp_div));
513097cc795SJames Wright   if (q_weight) {
514d075f50bSSebastian Grimberg     CeedCallHip(ceed, hipMalloc((void **)&data->d_q_weight, q_bytes));
515d075f50bSSebastian Grimberg     CeedCallHip(ceed, hipMemcpy(data->d_q_weight, q_weight, q_bytes, hipMemcpyHostToDevice));
516097cc795SJames Wright   }
517d075f50bSSebastian Grimberg   if (interp) {
518d075f50bSSebastian Grimberg     const CeedInt interp_bytes = q_bytes * num_nodes * q_comp_interp;
519d075f50bSSebastian Grimberg 
520d075f50bSSebastian Grimberg     CeedCallHip(ceed, hipMalloc((void **)&data->d_interp, interp_bytes));
521d075f50bSSebastian Grimberg     CeedCallHip(ceed, hipMemcpy(data->d_interp, interp, interp_bytes, hipMemcpyHostToDevice));
522d075f50bSSebastian Grimberg   }
523d075f50bSSebastian Grimberg   if (div) {
524d075f50bSSebastian Grimberg     const CeedInt div_bytes = q_bytes * num_nodes * q_comp_div;
525d075f50bSSebastian Grimberg 
526d075f50bSSebastian Grimberg     CeedCallHip(ceed, hipMalloc((void **)&data->d_div, div_bytes));
527d075f50bSSebastian Grimberg     CeedCallHip(ceed, hipMemcpy(data->d_div, div, div_bytes, hipMemcpyHostToDevice));
528d075f50bSSebastian Grimberg   }
529d075f50bSSebastian Grimberg 
530d075f50bSSebastian Grimberg   // Compile basis kernels
5319c25dd66SJeremy L Thompson   const char basis_kernel_source[] = "// Nontensor basis source\n#include <ceed/jit-source/hip/hip-ref-basis-nontensor.h>\n";
5329c25dd66SJeremy L Thompson 
533d075f50bSSebastian Grimberg   CeedCallBackend(CeedBasisGetNumComponents(basis, &num_comp));
534d075f50bSSebastian Grimberg   CeedCallBackend(CeedCompile_Hip(ceed, basis_kernel_source, &data->module, 5, "BASIS_Q", num_qpts, "BASIS_P", num_nodes, "BASIS_Q_COMP_INTERP",
535d075f50bSSebastian Grimberg                                   q_comp_interp, "BASIS_Q_COMP_DERIV", q_comp_div, "BASIS_NUM_COMP", num_comp));
536d075f50bSSebastian Grimberg   CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, "Interp", &data->Interp));
537d075f50bSSebastian Grimberg   CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, "InterpTranspose", &data->InterpTranspose));
538d075f50bSSebastian Grimberg   CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, "Deriv", &data->Deriv));
539d075f50bSSebastian Grimberg   CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, "DerivTranspose", &data->DerivTranspose));
540d075f50bSSebastian Grimberg   CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, "Weight", &data->Weight));
541d075f50bSSebastian Grimberg 
542d075f50bSSebastian Grimberg   CeedCallBackend(CeedBasisSetData(basis, data));
543d075f50bSSebastian Grimberg 
544d075f50bSSebastian Grimberg   // Register backend functions
545d075f50bSSebastian Grimberg   CeedCallBackend(CeedSetBackendFunction(ceed, "Basis", basis, "Apply", CeedBasisApplyNonTensor_Hip));
546db2becc9SJeremy L Thompson   CeedCallBackend(CeedSetBackendFunction(ceed, "Basis", basis, "ApplyAdd", CeedBasisApplyAddNonTensor_Hip));
547d075f50bSSebastian Grimberg   CeedCallBackend(CeedSetBackendFunction(ceed, "Basis", basis, "Destroy", CeedBasisDestroyNonTensor_Hip));
5489bc66399SJeremy L Thompson   CeedCallBackend(CeedDestroy(&ceed));
549d075f50bSSebastian Grimberg   return CEED_ERROR_SUCCESS;
550d075f50bSSebastian Grimberg }
551d075f50bSSebastian Grimberg 
552d075f50bSSebastian Grimberg //------------------------------------------------------------------------------
553d075f50bSSebastian Grimberg // Create non-tensor H(curl)
554d075f50bSSebastian Grimberg //------------------------------------------------------------------------------
CeedBasisCreateHcurl_Hip(CeedElemTopology topo,CeedInt dim,CeedInt num_nodes,CeedInt num_qpts,const CeedScalar * interp,const CeedScalar * curl,const CeedScalar * q_ref,const CeedScalar * q_weight,CeedBasis basis)555d075f50bSSebastian Grimberg int CeedBasisCreateHcurl_Hip(CeedElemTopology topo, CeedInt dim, CeedInt num_nodes, CeedInt num_qpts, const CeedScalar *interp,
556d075f50bSSebastian Grimberg                              const CeedScalar *curl, const CeedScalar *q_ref, const CeedScalar *q_weight, CeedBasis basis) {
557d075f50bSSebastian Grimberg   Ceed                    ceed;
558d075f50bSSebastian Grimberg   CeedInt                 num_comp, q_comp_interp, q_comp_curl;
559d075f50bSSebastian Grimberg   const CeedInt           q_bytes = num_qpts * sizeof(CeedScalar);
560d075f50bSSebastian Grimberg   CeedBasisNonTensor_Hip *data;
561d075f50bSSebastian Grimberg 
562d075f50bSSebastian Grimberg   CeedCallBackend(CeedBasisGetCeed(basis, &ceed));
563d075f50bSSebastian Grimberg   CeedCallBackend(CeedCalloc(1, &data));
564d075f50bSSebastian Grimberg 
565d075f50bSSebastian Grimberg   // Copy basis data to GPU
566d075f50bSSebastian Grimberg   CeedCallBackend(CeedBasisGetNumQuadratureComponents(basis, CEED_EVAL_INTERP, &q_comp_interp));
567d075f50bSSebastian Grimberg   CeedCallBackend(CeedBasisGetNumQuadratureComponents(basis, CEED_EVAL_CURL, &q_comp_curl));
568097cc795SJames Wright   if (q_weight) {
569d075f50bSSebastian Grimberg     CeedCallHip(ceed, hipMalloc((void **)&data->d_q_weight, q_bytes));
570d075f50bSSebastian Grimberg     CeedCallHip(ceed, hipMemcpy(data->d_q_weight, q_weight, q_bytes, hipMemcpyHostToDevice));
571097cc795SJames Wright   }
572d075f50bSSebastian Grimberg   if (interp) {
573d075f50bSSebastian Grimberg     const CeedInt interp_bytes = q_bytes * num_nodes * q_comp_interp;
574d075f50bSSebastian Grimberg 
575d075f50bSSebastian Grimberg     CeedCallHip(ceed, hipMalloc((void **)&data->d_interp, interp_bytes));
576d075f50bSSebastian Grimberg     CeedCallHip(ceed, hipMemcpy(data->d_interp, interp, interp_bytes, hipMemcpyHostToDevice));
577d075f50bSSebastian Grimberg   }
578d075f50bSSebastian Grimberg   if (curl) {
579d075f50bSSebastian Grimberg     const CeedInt curl_bytes = q_bytes * num_nodes * q_comp_curl;
580d075f50bSSebastian Grimberg 
581d075f50bSSebastian Grimberg     CeedCallHip(ceed, hipMalloc((void **)&data->d_curl, curl_bytes));
582d075f50bSSebastian Grimberg     CeedCallHip(ceed, hipMemcpy(data->d_curl, curl, curl_bytes, hipMemcpyHostToDevice));
583d075f50bSSebastian Grimberg   }
584d075f50bSSebastian Grimberg 
585d075f50bSSebastian Grimberg   // Compile basis kernels
5869c25dd66SJeremy L Thompson   const char basis_kernel_source[] = "// Nontensor basis source\n#include <ceed/jit-source/hip/hip-ref-basis-nontensor.h>\n";
5879c25dd66SJeremy L Thompson 
588d075f50bSSebastian Grimberg   CeedCallBackend(CeedBasisGetNumComponents(basis, &num_comp));
589d075f50bSSebastian Grimberg   CeedCallBackend(CeedCompile_Hip(ceed, basis_kernel_source, &data->module, 5, "BASIS_Q", num_qpts, "BASIS_P", num_nodes, "BASIS_Q_COMP_INTERP",
590d075f50bSSebastian Grimberg                                   q_comp_interp, "BASIS_Q_COMP_DERIV", q_comp_curl, "BASIS_NUM_COMP", num_comp));
591d075f50bSSebastian Grimberg   CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, "Interp", &data->Interp));
592d075f50bSSebastian Grimberg   CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, "InterpTranspose", &data->InterpTranspose));
593d075f50bSSebastian Grimberg   CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, "Deriv", &data->Deriv));
594d075f50bSSebastian Grimberg   CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, "DerivTranspose", &data->DerivTranspose));
595d075f50bSSebastian Grimberg   CeedCallBackend(CeedGetKernel_Hip(ceed, data->module, "Weight", &data->Weight));
596d075f50bSSebastian Grimberg 
5972b730f8bSJeremy L Thompson   CeedCallBackend(CeedBasisSetData(basis, data));
5980d0321e0SJeremy L Thompson 
5990d0321e0SJeremy L Thompson   // Register backend functions
6002b730f8bSJeremy L Thompson   CeedCallBackend(CeedSetBackendFunction(ceed, "Basis", basis, "Apply", CeedBasisApplyNonTensor_Hip));
601db2becc9SJeremy L Thompson   CeedCallBackend(CeedSetBackendFunction(ceed, "Basis", basis, "ApplyAdd", CeedBasisApplyAddNonTensor_Hip));
6022b730f8bSJeremy L Thompson   CeedCallBackend(CeedSetBackendFunction(ceed, "Basis", basis, "Destroy", CeedBasisDestroyNonTensor_Hip));
6039bc66399SJeremy L Thompson   CeedCallBackend(CeedDestroy(&ceed));
6040d0321e0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
6050d0321e0SJeremy L Thompson }
6062a86cc9dSSebastian Grimberg 
6070d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
608