15aed82e4SJeremy L Thompson // Copyright (c) 2017-2024, Lawrence Livermore National Security, LLC and other CEED contributors. 23d8e8822SJeremy L Thompson // All Rights Reserved. See the top-level LICENSE and NOTICE files for details. 37d8d0e25Snbeams // 43d8e8822SJeremy L Thompson // SPDX-License-Identifier: BSD-2-Clause 57d8d0e25Snbeams // 63d8e8822SJeremy L Thompson // This file is part of CEED: http://github.com/ceed 7509d4af6SJeremy L Thompson #pragma once 83d576824SJeremy L Thompson 949aac155SJeremy L Thompson #include <ceed.h> 10ec3da8bcSJed Brown #include <ceed/backend.h> 113d576824SJeremy L Thompson #include <hip/hip_runtime.h> 122b730f8bSJeremy L Thompson 137d8d0e25Snbeams typedef struct { 147d8d0e25Snbeams hipModule_t module; 15437930d1SJeremy L Thompson hipFunction_t Interp; 169e201c85SYohann hipFunction_t InterpTranspose; 17db2becc9SJeremy L Thompson hipFunction_t InterpTransposeAdd; 18437930d1SJeremy L Thompson hipFunction_t Grad; 199e201c85SYohann hipFunction_t GradTranspose; 20db2becc9SJeremy L Thompson hipFunction_t GradTransposeAdd; 21437930d1SJeremy L Thompson hipFunction_t Weight; 221dda9c1aSJeremy L Thompson hipModule_t moduleAtPoints; 231dda9c1aSJeremy L Thompson CeedInt num_points; 241dda9c1aSJeremy L Thompson hipFunction_t InterpAtPoints; 2581ae6159SJeremy L Thompson hipFunction_t InterpTransposeAtPoints; 26*af0e6e89SJeremy L Thompson hipFunction_t InterpTransposeAddAtPoints; 271dda9c1aSJeremy L Thompson hipFunction_t GradAtPoints; 2881ae6159SJeremy L Thompson hipFunction_t GradTransposeAtPoints; 29*af0e6e89SJeremy L Thompson hipFunction_t GradTransposeAddAtPoints; 30437930d1SJeremy L Thompson CeedInt block_sizes[3]; // interp, grad, weight thread block sizes 31437930d1SJeremy L Thompson CeedScalar *d_interp_1d; 32437930d1SJeremy L Thompson CeedScalar *d_grad_1d; 33437930d1SJeremy L Thompson CeedScalar *d_collo_grad_1d; 34437930d1SJeremy L Thompson CeedScalar *d_q_weight_1d; 351dda9c1aSJeremy L Thompson CeedScalar *d_chebyshev_interp_1d; 36111870feSJeremy L Thompson CeedInt num_elem_at_points; 37111870feSJeremy L Thompson CeedInt *h_points_per_elem; 38111870feSJeremy L Thompson CeedInt *d_points_per_elem; 397d8d0e25Snbeams } CeedBasis_Hip_shared; 407d8d0e25Snbeams 416574a04fSJeremy L Thompson CEED_INTERN int CeedBasisCreateTensorH1_Hip_shared(CeedInt dim, CeedInt P_1d, CeedInt Q_1d, const CeedScalar *interp_1d, const CeedScalar *grad_1d, 426574a04fSJeremy L Thompson const CeedScalar *q_ref_1d, const CeedScalar *q_weight_1d, CeedBasis basis); 436c13bbcbSJeremy L Thompson 446c13bbcbSJeremy L Thompson CEED_INTERN int CeedBasisCreateH1_Hip_shared(CeedElemTopology topo, CeedInt dim, CeedInt num_nodes, CeedInt num_qpts, const CeedScalar *interp, 456c13bbcbSJeremy L Thompson const CeedScalar *grad, const CeedScalar *q_ref, const CeedScalar *q_weight, CeedBasis basis); 46