1*9ba83ac0SJeremy L Thompson // Copyright (c) 2017-2026, Lawrence Livermore National Security, LLC and other CEED contributors. 29e201c85SYohann // All Rights Reserved. See the top-level LICENSE and NOTICE files for details. 39e201c85SYohann // 49e201c85SYohann // SPDX-License-Identifier: BSD-2-Clause 59e201c85SYohann // 69e201c85SYohann // This file is part of CEED: http://github.com/ceed 79e201c85SYohann 89e201c85SYohann /// @file 99e201c85SYohann /// Internal header for HIP shared memory tensor product basis templates 10c0b5abf0SJeremy L Thompson #include <ceed/types.h> 119e201c85SYohann 129e201c85SYohann //------------------------------------------------------------------------------ 139e201c85SYohann // 1D 149e201c85SYohann //------------------------------------------------------------------------------ 159e201c85SYohann 169e201c85SYohann //------------------------------------------------------------------------------ 179e201c85SYohann // 1D tensor contraction x 189e201c85SYohann //------------------------------------------------------------------------------ 199e201c85SYohann template <int NUM_COMP, int P_1D, int Q_1D> 209e201c85SYohann inline __device__ void ContractX1d(SharedData_Hip &data, const CeedScalar *U, const CeedScalar *B, CeedScalar *V) { 21d6c19ee8SJeremy L Thompson __syncthreads(); 229e201c85SYohann data.slice[data.t_id_x] = *U; 239e201c85SYohann __syncthreads(); 249e201c85SYohann *V = 0.0; 259e201c85SYohann if (data.t_id_x < Q_1D) { 269e201c85SYohann for (CeedInt i = 0; i < P_1D; i++) { 279e201c85SYohann *V += B[i + data.t_id_x * P_1D] * data.slice[i]; // Contract x direction 289e201c85SYohann } 299e201c85SYohann } 309e201c85SYohann } 319e201c85SYohann 329e201c85SYohann //------------------------------------------------------------------------------ 339e201c85SYohann // 1D transpose tensor contraction x 349e201c85SYohann //------------------------------------------------------------------------------ 359e201c85SYohann template <int NUM_COMP, int P_1D, int Q_1D> 369e201c85SYohann inline __device__ void ContractTransposeX1d(SharedData_Hip &data, const CeedScalar *U, const CeedScalar *B, CeedScalar *V) { 37d6c19ee8SJeremy L Thompson __syncthreads(); 389e201c85SYohann data.slice[data.t_id_x] = *U; 399e201c85SYohann __syncthreads(); 409e201c85SYohann *V = 0.0; 419e201c85SYohann if (data.t_id_x < P_1D) { 429e201c85SYohann for (CeedInt i = 0; i < Q_1D; i++) { 439e201c85SYohann *V += B[data.t_id_x + i * P_1D] * data.slice[i]; // Contract x direction 449e201c85SYohann } 459e201c85SYohann } 469e201c85SYohann } 479e201c85SYohann 489e201c85SYohann //------------------------------------------------------------------------------ 499e201c85SYohann // 1D interpolate to quadrature points 509e201c85SYohann //------------------------------------------------------------------------------ 516b92dc4bSJeremy L Thompson template <int NUM_COMP, int P_1D, int Q_1D, int T_1D> 529e201c85SYohann inline __device__ void Interp1d(SharedData_Hip &data, const CeedScalar *__restrict__ r_U, const CeedScalar *c_B, CeedScalar *__restrict__ r_V) { 539e201c85SYohann for (CeedInt comp = 0; comp < NUM_COMP; comp++) { 54db2becc9SJeremy L Thompson ContractX1d<NUM_COMP, P_1D, Q_1D>(data, &r_U[comp], c_B, &r_V[comp]); 559e201c85SYohann } 569e201c85SYohann } 579e201c85SYohann 589e201c85SYohann //------------------------------------------------------------------------------ 599e201c85SYohann // 1D interpolate transpose 609e201c85SYohann //------------------------------------------------------------------------------ 616b92dc4bSJeremy L Thompson template <int NUM_COMP, int P_1D, int Q_1D, int T_1D> 622b730f8bSJeremy L Thompson inline __device__ void InterpTranspose1d(SharedData_Hip &data, const CeedScalar *__restrict__ r_U, const CeedScalar *c_B, 632b730f8bSJeremy L Thompson CeedScalar *__restrict__ r_V) { 649e201c85SYohann for (CeedInt comp = 0; comp < NUM_COMP; comp++) { 65db2becc9SJeremy L Thompson ContractTransposeX1d<NUM_COMP, P_1D, Q_1D>(data, &r_U[comp], c_B, &r_V[comp]); 669e201c85SYohann } 679e201c85SYohann } 689e201c85SYohann 699e201c85SYohann //------------------------------------------------------------------------------ 700ccda8ebSJeremy L Thompson // 1D interpolate to quadrature points, nodes and quadrature points collocated 710ccda8ebSJeremy L Thompson //------------------------------------------------------------------------------ 720ccda8ebSJeremy L Thompson template <int NUM_COMP, int P_1D, int Q_1D, int T_1D> 730ccda8ebSJeremy L Thompson inline __device__ void InterpCollocatedNodes1d(SharedData_Hip &data, const CeedScalar *__restrict__ r_U, const CeedScalar *c_B, 740ccda8ebSJeremy L Thompson CeedScalar *__restrict__ r_V) { 750ccda8ebSJeremy L Thompson for (CeedInt comp = 0; comp < NUM_COMP; comp++) { 760ccda8ebSJeremy L Thompson r_V[comp] = r_U[comp]; 770ccda8ebSJeremy L Thompson } 780ccda8ebSJeremy L Thompson } 790ccda8ebSJeremy L Thompson 800ccda8ebSJeremy L Thompson //------------------------------------------------------------------------------ 810ccda8ebSJeremy L Thompson // 1D interpolate transpose, nodes and quadrature points collocated 820ccda8ebSJeremy L Thompson //------------------------------------------------------------------------------ 830ccda8ebSJeremy L Thompson template <int NUM_COMP, int P_1D, int Q_1D, int T_1D> 840ccda8ebSJeremy L Thompson inline __device__ void InterpTransposeCollocatedNodes1d(SharedData_Hip &data, const CeedScalar *__restrict__ r_U, const CeedScalar *c_B, 850ccda8ebSJeremy L Thompson CeedScalar *__restrict__ r_V) { 860ccda8ebSJeremy L Thompson for (CeedInt comp = 0; comp < NUM_COMP; comp++) { 870ccda8ebSJeremy L Thompson r_V[comp] = r_U[comp]; 880ccda8ebSJeremy L Thompson } 890ccda8ebSJeremy L Thompson } 900ccda8ebSJeremy L Thompson 910ccda8ebSJeremy L Thompson //------------------------------------------------------------------------------ 929e201c85SYohann // 1D derivatives at quadrature points 939e201c85SYohann //------------------------------------------------------------------------------ 946b92dc4bSJeremy L Thompson template <int NUM_COMP, int P_1D, int Q_1D, int T_1D> 952b730f8bSJeremy L Thompson inline __device__ void Grad1d(SharedData_Hip &data, const CeedScalar *__restrict__ r_U, const CeedScalar *c_B, const CeedScalar *c_G, 962b730f8bSJeremy L Thompson CeedScalar *__restrict__ r_V) { 979e201c85SYohann for (CeedInt comp = 0; comp < NUM_COMP; comp++) { 98db2becc9SJeremy L Thompson ContractX1d<NUM_COMP, P_1D, Q_1D>(data, &r_U[comp], c_G, &r_V[comp]); 999e201c85SYohann } 1009e201c85SYohann } 1019e201c85SYohann 1029e201c85SYohann //------------------------------------------------------------------------------ 1039e201c85SYohann // 1D derivatives transpose 1049e201c85SYohann //------------------------------------------------------------------------------ 1056b92dc4bSJeremy L Thompson template <int NUM_COMP, int P_1D, int Q_1D, int T_1D> 1062b730f8bSJeremy L Thompson inline __device__ void GradTranspose1d(SharedData_Hip &data, const CeedScalar *__restrict__ r_U, const CeedScalar *c_B, const CeedScalar *c_G, 1072b730f8bSJeremy L Thompson CeedScalar *__restrict__ r_V) { 1089e201c85SYohann for (CeedInt comp = 0; comp < NUM_COMP; comp++) { 109db2becc9SJeremy L Thompson ContractTransposeX1d<NUM_COMP, P_1D, Q_1D>(data, &r_U[comp], c_G, &r_V[comp]); 1109e201c85SYohann } 1119e201c85SYohann } 1129e201c85SYohann 1139e201c85SYohann //------------------------------------------------------------------------------ 1149e201c85SYohann // 1D quadrature weights 1159e201c85SYohann //------------------------------------------------------------------------------ 1169b91271bSJeremy L Thompson template <int P_1D, int Q_1D> 1179e201c85SYohann inline __device__ void Weight1d(SharedData_Hip &data, const CeedScalar *__restrict__ q_weight_1d, CeedScalar *w) { 1189e201c85SYohann *w = (data.t_id_x < Q_1D) ? q_weight_1d[data.t_id_x] : 0.0; 1199e201c85SYohann } 1209e201c85SYohann 1219e201c85SYohann //------------------------------------------------------------------------------ 1229e201c85SYohann // 2D 1239e201c85SYohann //------------------------------------------------------------------------------ 1249e201c85SYohann 1259e201c85SYohann //------------------------------------------------------------------------------ 1269e201c85SYohann // 2D tensor contraction x 1279e201c85SYohann //------------------------------------------------------------------------------ 1286b92dc4bSJeremy L Thompson template <int NUM_COMP, int P_1D, int Q_1D, int T_1D> 1299e201c85SYohann inline __device__ void ContractX2d(SharedData_Hip &data, const CeedScalar *U, const CeedScalar *B, CeedScalar *V) { 130d6c19ee8SJeremy L Thompson __syncthreads(); 1319e201c85SYohann data.slice[data.t_id_x + data.t_id_y * T_1D] = *U; 1329e201c85SYohann __syncthreads(); 1339e201c85SYohann *V = 0.0; 1349e201c85SYohann if (data.t_id_x < Q_1D && data.t_id_y < P_1D) { 1359e201c85SYohann for (CeedInt i = 0; i < P_1D; i++) { 1369e201c85SYohann *V += B[i + data.t_id_x * P_1D] * data.slice[i + data.t_id_y * T_1D]; // Contract x direction 1379e201c85SYohann } 1389e201c85SYohann } 1399e201c85SYohann } 1409e201c85SYohann 1419e201c85SYohann //------------------------------------------------------------------------------ 1429e201c85SYohann // 2D tensor contract y 1439e201c85SYohann //------------------------------------------------------------------------------ 1446b92dc4bSJeremy L Thompson template <int NUM_COMP, int P_1D, int Q_1D, int T_1D> 1459e201c85SYohann inline __device__ void ContractY2d(SharedData_Hip &data, const CeedScalar *U, const CeedScalar *B, CeedScalar *V) { 146d6c19ee8SJeremy L Thompson __syncthreads(); 1479e201c85SYohann data.slice[data.t_id_x + data.t_id_y * T_1D] = *U; 1489e201c85SYohann __syncthreads(); 1499e201c85SYohann *V = 0.0; 1509e201c85SYohann if (data.t_id_x < Q_1D && data.t_id_y < Q_1D) { 1519e201c85SYohann for (CeedInt i = 0; i < P_1D; i++) { 1529e201c85SYohann *V += B[i + data.t_id_y * P_1D] * data.slice[data.t_id_x + i * T_1D]; // Contract y direction 1539e201c85SYohann } 1549e201c85SYohann } 1559e201c85SYohann } 1569e201c85SYohann 1579e201c85SYohann //------------------------------------------------------------------------------ 1589e201c85SYohann // 2D transpose tensor contract y 1599e201c85SYohann //------------------------------------------------------------------------------ 1606b92dc4bSJeremy L Thompson template <int NUM_COMP, int P_1D, int Q_1D, int T_1D> 1619e201c85SYohann inline __device__ void ContractTransposeY2d(SharedData_Hip &data, const CeedScalar *U, const CeedScalar *B, CeedScalar *V) { 162d6c19ee8SJeremy L Thompson __syncthreads(); 1639e201c85SYohann data.slice[data.t_id_x + data.t_id_y * T_1D] = *U; 1649e201c85SYohann __syncthreads(); 1659e201c85SYohann *V = 0.0; 1669e201c85SYohann if (data.t_id_x < Q_1D && data.t_id_y < P_1D) { 1679e201c85SYohann for (CeedInt i = 0; i < Q_1D; i++) { 1689e201c85SYohann *V += B[data.t_id_y + i * P_1D] * data.slice[data.t_id_x + i * T_1D]; // Contract y direction 1699e201c85SYohann } 1709e201c85SYohann } 1719e201c85SYohann } 1729e201c85SYohann 1739e201c85SYohann //------------------------------------------------------------------------------ 1749e201c85SYohann // 2D transpose tensor contract x 1759e201c85SYohann //------------------------------------------------------------------------------ 1766b92dc4bSJeremy L Thompson template <int NUM_COMP, int P_1D, int Q_1D, int T_1D> 1779e201c85SYohann inline __device__ void ContractTransposeX2d(SharedData_Hip &data, const CeedScalar *U, const CeedScalar *B, CeedScalar *V) { 178d6c19ee8SJeremy L Thompson __syncthreads(); 1799e201c85SYohann data.slice[data.t_id_x + data.t_id_y * T_1D] = *U; 1809e201c85SYohann __syncthreads(); 1819e201c85SYohann *V = 0.0; 1829e201c85SYohann if (data.t_id_x < P_1D && data.t_id_y < P_1D) { 1839e201c85SYohann for (CeedInt i = 0; i < Q_1D; i++) { 1849e201c85SYohann *V += B[data.t_id_x + i * P_1D] * data.slice[i + data.t_id_y * T_1D]; // Contract x direction 1859e201c85SYohann } 1869e201c85SYohann } 1879e201c85SYohann } 1889e201c85SYohann 1899e201c85SYohann //------------------------------------------------------------------------------ 1909e201c85SYohann // 2D transpose tensor contract and add x 1919e201c85SYohann //------------------------------------------------------------------------------ 1926b92dc4bSJeremy L Thompson template <int NUM_COMP, int P_1D, int Q_1D, int T_1D> 1939e201c85SYohann inline __device__ void ContractTransposeAddX2d(SharedData_Hip &data, const CeedScalar *U, const CeedScalar *B, CeedScalar *V) { 194d6c19ee8SJeremy L Thompson __syncthreads(); 1959e201c85SYohann data.slice[data.t_id_x + data.t_id_y * T_1D] = *U; 1969e201c85SYohann __syncthreads(); 1979e201c85SYohann if (data.t_id_x < P_1D && data.t_id_y < P_1D) { 1989e201c85SYohann for (CeedInt i = 0; i < Q_1D; i++) { 1999e201c85SYohann *V += B[data.t_id_x + i * P_1D] * data.slice[i + data.t_id_y * T_1D]; // Contract x direction 2009e201c85SYohann } 2019e201c85SYohann } 2029e201c85SYohann } 2039e201c85SYohann 2049e201c85SYohann //------------------------------------------------------------------------------ 2059e201c85SYohann // 2D interpolate to quadrature points 2069e201c85SYohann //------------------------------------------------------------------------------ 2076b92dc4bSJeremy L Thompson template <int NUM_COMP, int P_1D, int Q_1D, int T_1D> 2089e201c85SYohann inline __device__ void InterpTensor2d(SharedData_Hip &data, const CeedScalar *__restrict__ r_U, const CeedScalar *c_B, CeedScalar *__restrict__ r_V) { 2099e201c85SYohann CeedScalar r_t[1]; 2109e201c85SYohann for (CeedInt comp = 0; comp < NUM_COMP; comp++) { 2116b92dc4bSJeremy L Thompson ContractX2d<NUM_COMP, P_1D, Q_1D, T_1D>(data, &r_U[comp], c_B, r_t); 2126b92dc4bSJeremy L Thompson ContractY2d<NUM_COMP, P_1D, Q_1D, T_1D>(data, r_t, c_B, &r_V[comp]); 2139e201c85SYohann } 2149e201c85SYohann } 2159e201c85SYohann 2169e201c85SYohann //------------------------------------------------------------------------------ 2179e201c85SYohann // 2D interpolate transpose 2189e201c85SYohann //------------------------------------------------------------------------------ 2196b92dc4bSJeremy L Thompson template <int NUM_COMP, int P_1D, int Q_1D, int T_1D> 2202b730f8bSJeremy L Thompson inline __device__ void InterpTransposeTensor2d(SharedData_Hip &data, const CeedScalar *__restrict__ r_U, const CeedScalar *c_B, 2212b730f8bSJeremy L Thompson CeedScalar *__restrict__ r_V) { 2229e201c85SYohann CeedScalar r_t[1]; 2239e201c85SYohann for (CeedInt comp = 0; comp < NUM_COMP; comp++) { 2246b92dc4bSJeremy L Thompson ContractTransposeY2d<NUM_COMP, P_1D, Q_1D, T_1D>(data, &r_U[comp], c_B, r_t); 2256b92dc4bSJeremy L Thompson ContractTransposeX2d<NUM_COMP, P_1D, Q_1D, T_1D>(data, r_t, c_B, &r_V[comp]); 2269e201c85SYohann } 2279e201c85SYohann } 2289e201c85SYohann 2299e201c85SYohann //------------------------------------------------------------------------------ 2300ccda8ebSJeremy L Thompson // 2D interpolate to quadrature points, nodes and quadrature points collocated 2310ccda8ebSJeremy L Thompson //------------------------------------------------------------------------------ 2320ccda8ebSJeremy L Thompson template <int NUM_COMP, int P_1D, int Q_1D, int T_1D> 2330ccda8ebSJeremy L Thompson inline __device__ void InterpTensorCollocatedNodes2d(SharedData_Hip &data, const CeedScalar *__restrict__ r_U, const CeedScalar *c_B, 2340ccda8ebSJeremy L Thompson CeedScalar *__restrict__ r_V) { 2350ccda8ebSJeremy L Thompson for (CeedInt comp = 0; comp < NUM_COMP; comp++) { 2360ccda8ebSJeremy L Thompson r_V[comp] = r_U[comp]; 2370ccda8ebSJeremy L Thompson } 2380ccda8ebSJeremy L Thompson } 2390ccda8ebSJeremy L Thompson 2400ccda8ebSJeremy L Thompson //------------------------------------------------------------------------------ 2410ccda8ebSJeremy L Thompson // 2D interpolate transpose, nodes and quadrature points collocated 2420ccda8ebSJeremy L Thompson //------------------------------------------------------------------------------ 2430ccda8ebSJeremy L Thompson template <int NUM_COMP, int P_1D, int Q_1D, int T_1D> 2440ccda8ebSJeremy L Thompson inline __device__ void InterpTransposeTensorCollocatedNodes2d(SharedData_Hip &data, const CeedScalar *__restrict__ r_U, const CeedScalar *c_B, 2450ccda8ebSJeremy L Thompson CeedScalar *__restrict__ r_V) { 2460ccda8ebSJeremy L Thompson for (CeedInt comp = 0; comp < NUM_COMP; comp++) { 2470ccda8ebSJeremy L Thompson r_V[comp] = r_U[comp]; 2480ccda8ebSJeremy L Thompson } 2490ccda8ebSJeremy L Thompson } 2500ccda8ebSJeremy L Thompson 2510ccda8ebSJeremy L Thompson //------------------------------------------------------------------------------ 2529e201c85SYohann // 2D derivatives at quadrature points 2539e201c85SYohann //------------------------------------------------------------------------------ 2546b92dc4bSJeremy L Thompson template <int NUM_COMP, int P_1D, int Q_1D, int T_1D> 2552b730f8bSJeremy L Thompson inline __device__ void GradTensor2d(SharedData_Hip &data, const CeedScalar *__restrict__ r_U, const CeedScalar *c_B, const CeedScalar *c_G, 2562b730f8bSJeremy L Thompson CeedScalar *__restrict__ r_V) { 2579e201c85SYohann CeedScalar r_t[1]; 2589e201c85SYohann for (CeedInt comp = 0; comp < NUM_COMP; comp++) { 2596b92dc4bSJeremy L Thompson ContractX2d<NUM_COMP, P_1D, Q_1D, T_1D>(data, &r_U[comp], c_G, r_t); 2606b92dc4bSJeremy L Thompson ContractY2d<NUM_COMP, P_1D, Q_1D, T_1D>(data, r_t, c_B, &r_V[comp + 0 * NUM_COMP]); 2616b92dc4bSJeremy L Thompson ContractX2d<NUM_COMP, P_1D, Q_1D, T_1D>(data, &r_U[comp], c_B, r_t); 2626b92dc4bSJeremy L Thompson ContractY2d<NUM_COMP, P_1D, Q_1D, T_1D>(data, r_t, c_G, &r_V[comp + 1 * NUM_COMP]); 2639e201c85SYohann } 2649e201c85SYohann } 2659e201c85SYohann 2669e201c85SYohann //------------------------------------------------------------------------------ 2679e201c85SYohann // 2D derivatives transpose 2689e201c85SYohann //------------------------------------------------------------------------------ 2696b92dc4bSJeremy L Thompson template <int NUM_COMP, int P_1D, int Q_1D, int T_1D> 2702b730f8bSJeremy L Thompson inline __device__ void GradTransposeTensor2d(SharedData_Hip &data, const CeedScalar *__restrict__ r_U, const CeedScalar *c_B, const CeedScalar *c_G, 2712b730f8bSJeremy L Thompson CeedScalar *__restrict__ r_V) { 2729e201c85SYohann CeedScalar r_t[1]; 2739e201c85SYohann for (CeedInt comp = 0; comp < NUM_COMP; comp++) { 2746b92dc4bSJeremy L Thompson ContractTransposeY2d<NUM_COMP, P_1D, Q_1D, T_1D>(data, &r_U[comp + 0 * NUM_COMP], c_B, r_t); 2756b92dc4bSJeremy L Thompson ContractTransposeX2d<NUM_COMP, P_1D, Q_1D, T_1D>(data, r_t, c_G, &r_V[comp]); 2766b92dc4bSJeremy L Thompson ContractTransposeY2d<NUM_COMP, P_1D, Q_1D, T_1D>(data, &r_U[comp + 1 * NUM_COMP], c_G, r_t); 2776b92dc4bSJeremy L Thompson ContractTransposeAddX2d<NUM_COMP, P_1D, Q_1D, T_1D>(data, r_t, c_B, &r_V[comp]); 2789e201c85SYohann } 2799e201c85SYohann } 2809e201c85SYohann 2819e201c85SYohann //------------------------------------------------------------------------------ 28202219a08SJeremy L Thompson // 2D derivatives at quadrature points, nodes and quadrature points collocated 28302219a08SJeremy L Thompson //------------------------------------------------------------------------------ 28402219a08SJeremy L Thompson template <int NUM_COMP, int P_1D, int Q_1D, int T_1D> 2850ccda8ebSJeremy L Thompson inline __device__ void GradTensorCollocatedNodes2d(SharedData_Hip &data, const CeedScalar *__restrict__ r_U, const CeedScalar *c_B, 2860ccda8ebSJeremy L Thompson const CeedScalar *c_G, CeedScalar *__restrict__ r_V) { 28702219a08SJeremy L Thompson for (CeedInt comp = 0; comp < NUM_COMP; comp++) { 28802219a08SJeremy L Thompson ContractX2d<NUM_COMP, P_1D, Q_1D, T_1D>(data, &r_U[comp], c_G, &r_V[comp + 0 * NUM_COMP]); 28902219a08SJeremy L Thompson ContractY2d<NUM_COMP, P_1D, Q_1D, T_1D>(data, &r_U[comp], c_G, &r_V[comp + 1 * NUM_COMP]); 29002219a08SJeremy L Thompson } 29102219a08SJeremy L Thompson } 29202219a08SJeremy L Thompson 29302219a08SJeremy L Thompson //------------------------------------------------------------------------------ 29402219a08SJeremy L Thompson // 2D derivatives transpose, nodes and quadrature points collocated 29502219a08SJeremy L Thompson //------------------------------------------------------------------------------ 29602219a08SJeremy L Thompson template <int NUM_COMP, int P_1D, int Q_1D, int T_1D> 2970ccda8ebSJeremy L Thompson inline __device__ void GradTransposeTensorCollocatedNodes2d(SharedData_Hip &data, const CeedScalar *__restrict__ r_U, const CeedScalar *c_B, 2980ccda8ebSJeremy L Thompson const CeedScalar *c_G, CeedScalar *__restrict__ r_V) { 29902219a08SJeremy L Thompson for (CeedInt comp = 0; comp < NUM_COMP; comp++) { 30002219a08SJeremy L Thompson ContractTransposeY2d<NUM_COMP, P_1D, Q_1D, T_1D>(data, &r_U[comp + 1 * NUM_COMP], c_G, &r_V[comp]); 30102219a08SJeremy L Thompson ContractTransposeAddX2d<NUM_COMP, P_1D, Q_1D, T_1D>(data, &r_U[comp + 0 * NUM_COMP], c_G, &r_V[comp]); 30202219a08SJeremy L Thompson } 30302219a08SJeremy L Thompson } 30402219a08SJeremy L Thompson 30502219a08SJeremy L Thompson //------------------------------------------------------------------------------ 3069e201c85SYohann // 2D quadrature weights 3079e201c85SYohann //------------------------------------------------------------------------------ 308ca595be6SJeremy L Thompson template <int P_1D, int Q_1D> 3099e201c85SYohann inline __device__ void WeightTensor2d(SharedData_Hip &data, const CeedScalar *__restrict__ q_weight_1d, CeedScalar *w) { 3102b730f8bSJeremy L Thompson *w = (data.t_id_x < Q_1D && data.t_id_y < Q_1D) ? q_weight_1d[data.t_id_x] * q_weight_1d[data.t_id_y] : 0.0; 3119e201c85SYohann } 3129e201c85SYohann 3139e201c85SYohann //------------------------------------------------------------------------------ 3149e201c85SYohann // 3D 3159e201c85SYohann //------------------------------------------------------------------------------ 3169e201c85SYohann 3179e201c85SYohann //------------------------------------------------------------------------------ 3189e201c85SYohann // 3D tensor contract x 3199e201c85SYohann //------------------------------------------------------------------------------ 3206b92dc4bSJeremy L Thompson template <int NUM_COMP, int P_1D, int Q_1D, int T_1D> 3219e201c85SYohann inline __device__ void ContractX3d(SharedData_Hip &data, const CeedScalar *U, const CeedScalar *B, CeedScalar *V) { 3229e201c85SYohann CeedScalar r_B[P_1D]; 3239e201c85SYohann for (CeedInt i = 0; i < P_1D; i++) { 3249e201c85SYohann r_B[i] = B[i + data.t_id_x * P_1D]; 3259e201c85SYohann } 3269e201c85SYohann 3279e201c85SYohann for (CeedInt k = 0; k < P_1D; k++) { 328d6c19ee8SJeremy L Thompson __syncthreads(); 3299e201c85SYohann data.slice[data.t_id_x + data.t_id_y * T_1D] = U[k]; 3309e201c85SYohann __syncthreads(); 3319e201c85SYohann V[k] = 0.0; 3329e201c85SYohann if (data.t_id_x < Q_1D && data.t_id_y < P_1D) { 3339e201c85SYohann for (CeedInt i = 0; i < P_1D; i++) { 3349e201c85SYohann V[k] += r_B[i] * data.slice[i + data.t_id_y * T_1D]; // Contract x direction 3359e201c85SYohann } 3369e201c85SYohann } 3379e201c85SYohann } 3389e201c85SYohann } 3399e201c85SYohann 3409e201c85SYohann //------------------------------------------------------------------------------ 3419e201c85SYohann // 3D tensor contract y 3429e201c85SYohann //------------------------------------------------------------------------------ 3436b92dc4bSJeremy L Thompson template <int NUM_COMP, int P_1D, int Q_1D, int T_1D> 3449e201c85SYohann inline __device__ void ContractY3d(SharedData_Hip &data, const CeedScalar *U, const CeedScalar *B, CeedScalar *V) { 3459e201c85SYohann CeedScalar r_B[P_1D]; 3469e201c85SYohann for (CeedInt i = 0; i < P_1D; i++) { 3479e201c85SYohann r_B[i] = B[i + data.t_id_y * P_1D]; 3489e201c85SYohann } 3499e201c85SYohann 3509e201c85SYohann for (CeedInt k = 0; k < P_1D; k++) { 351d6c19ee8SJeremy L Thompson __syncthreads(); 3529e201c85SYohann data.slice[data.t_id_x + data.t_id_y * T_1D] = U[k]; 3539e201c85SYohann __syncthreads(); 3549e201c85SYohann V[k] = 0.0; 3559e201c85SYohann if (data.t_id_x < Q_1D && data.t_id_y < Q_1D) { 3569e201c85SYohann for (CeedInt i = 0; i < P_1D; i++) { 3579e201c85SYohann V[k] += r_B[i] * data.slice[data.t_id_x + i * T_1D]; // Contract y direction 3589e201c85SYohann } 3599e201c85SYohann } 3609e201c85SYohann } 3619e201c85SYohann } 3629e201c85SYohann 3639e201c85SYohann //------------------------------------------------------------------------------ 3649e201c85SYohann // 3D tensor contract z 3659e201c85SYohann //------------------------------------------------------------------------------ 3666b92dc4bSJeremy L Thompson template <int NUM_COMP, int P_1D, int Q_1D, int T_1D> 3679e201c85SYohann inline __device__ void ContractZ3d(SharedData_Hip &data, const CeedScalar *U, const CeedScalar *B, CeedScalar *V) { 3689e201c85SYohann for (CeedInt k = 0; k < Q_1D; k++) { 3699e201c85SYohann V[k] = 0.0; 3709e201c85SYohann if (data.t_id_x < Q_1D && data.t_id_y < Q_1D) { 3719e201c85SYohann for (CeedInt i = 0; i < P_1D; i++) { 3729e201c85SYohann V[k] += B[i + k * P_1D] * U[i]; // Contract z direction 3739e201c85SYohann } 3749e201c85SYohann } 3759e201c85SYohann } 3769e201c85SYohann } 3779e201c85SYohann 3789e201c85SYohann //------------------------------------------------------------------------------ 3799e201c85SYohann // 3D transpose tensor contract z 3809e201c85SYohann //------------------------------------------------------------------------------ 3816b92dc4bSJeremy L Thompson template <int NUM_COMP, int P_1D, int Q_1D, int T_1D> 3829e201c85SYohann inline __device__ void ContractTransposeZ3d(SharedData_Hip &data, const CeedScalar *U, const CeedScalar *B, CeedScalar *V) { 3839e201c85SYohann for (CeedInt k = 0; k < P_1D; k++) { 3849e201c85SYohann V[k] = 0.0; 3859e201c85SYohann if (data.t_id_x < Q_1D && data.t_id_y < Q_1D) { 3869e201c85SYohann for (CeedInt i = 0; i < Q_1D; i++) { 3879e201c85SYohann V[k] += B[k + i * P_1D] * U[i]; // Contract z direction 3889e201c85SYohann } 3899e201c85SYohann } 3909e201c85SYohann } 3919e201c85SYohann } 3929e201c85SYohann 3939e201c85SYohann //------------------------------------------------------------------------------ 3949e201c85SYohann // 3D transpose tensor contract y 3959e201c85SYohann //------------------------------------------------------------------------------ 3966b92dc4bSJeremy L Thompson template <int NUM_COMP, int P_1D, int Q_1D, int T_1D> 3979e201c85SYohann inline __device__ void ContractTransposeY3d(SharedData_Hip &data, const CeedScalar *U, const CeedScalar *B, CeedScalar *V) { 3989e201c85SYohann CeedScalar r_B[Q_1D]; 3999e201c85SYohann for (CeedInt i = 0; i < Q_1D; i++) { 4009e201c85SYohann r_B[i] = B[data.t_id_y + i * P_1D]; 4019e201c85SYohann } 4029e201c85SYohann 4039e201c85SYohann for (CeedInt k = 0; k < P_1D; k++) { 404d6c19ee8SJeremy L Thompson __syncthreads(); 4059e201c85SYohann data.slice[data.t_id_x + data.t_id_y * T_1D] = U[k]; 4069e201c85SYohann __syncthreads(); 4079e201c85SYohann V[k] = 0.0; 4089e201c85SYohann if (data.t_id_x < Q_1D && data.t_id_y < P_1D) { 4099e201c85SYohann for (CeedInt i = 0; i < Q_1D; i++) { 4109e201c85SYohann V[k] += r_B[i] * data.slice[data.t_id_x + i * T_1D]; // Contract y direction 4119e201c85SYohann } 4129e201c85SYohann } 4139e201c85SYohann } 4149e201c85SYohann } 4159e201c85SYohann 4169e201c85SYohann //------------------------------------------------------------------------------ 4179e201c85SYohann // 3D transpose tensor contract y 4189e201c85SYohann //------------------------------------------------------------------------------ 4196b92dc4bSJeremy L Thompson template <int NUM_COMP, int P_1D, int Q_1D, int T_1D> 4209e201c85SYohann inline __device__ void ContractTransposeAddY3d(SharedData_Hip &data, const CeedScalar *U, const CeedScalar *B, CeedScalar *V) { 4219e201c85SYohann CeedScalar r_B[Q_1D]; 4229e201c85SYohann for (CeedInt i = 0; i < Q_1D; i++) { 4239e201c85SYohann r_B[i] = B[data.t_id_y + i * P_1D]; 4249e201c85SYohann } 4259e201c85SYohann 4269e201c85SYohann for (CeedInt k = 0; k < P_1D; k++) { 427d6c19ee8SJeremy L Thompson __syncthreads(); 4289e201c85SYohann data.slice[data.t_id_x + data.t_id_y * T_1D] = U[k]; 4299e201c85SYohann __syncthreads(); 4309e201c85SYohann if (data.t_id_x < Q_1D && data.t_id_y < P_1D) { 4319e201c85SYohann for (CeedInt i = 0; i < Q_1D; i++) { 4329e201c85SYohann V[k] += r_B[i] * data.slice[data.t_id_x + i * T_1D]; // Contract y direction 4339e201c85SYohann } 4349e201c85SYohann } 4359e201c85SYohann } 4369e201c85SYohann } 4379e201c85SYohann 4389e201c85SYohann //------------------------------------------------------------------------------ 4399e201c85SYohann // 3D transpose tensor contract x 4409e201c85SYohann //------------------------------------------------------------------------------ 4416b92dc4bSJeremy L Thompson template <int NUM_COMP, int P_1D, int Q_1D, int T_1D> 4429e201c85SYohann inline __device__ void ContractTransposeX3d(SharedData_Hip &data, const CeedScalar *U, const CeedScalar *B, CeedScalar *V) { 4439e201c85SYohann CeedScalar r_B[Q_1D]; 4449e201c85SYohann for (CeedInt i = 0; i < Q_1D; i++) { 4459e201c85SYohann r_B[i] = B[data.t_id_x + i * P_1D]; 4469e201c85SYohann } 4479e201c85SYohann 4489e201c85SYohann for (CeedInt k = 0; k < P_1D; k++) { 449d6c19ee8SJeremy L Thompson __syncthreads(); 4509e201c85SYohann data.slice[data.t_id_x + data.t_id_y * T_1D] = U[k]; 4519e201c85SYohann __syncthreads(); 4529e201c85SYohann V[k] = 0.0; 4539e201c85SYohann if (data.t_id_x < P_1D && data.t_id_y < P_1D) { 4549e201c85SYohann for (CeedInt i = 0; i < Q_1D; i++) { 4559e201c85SYohann V[k] += r_B[i] * data.slice[i + data.t_id_y * T_1D]; // Contract x direction 4569e201c85SYohann } 4579e201c85SYohann } 4589e201c85SYohann } 4599e201c85SYohann } 4609e201c85SYohann 4619e201c85SYohann //------------------------------------------------------------------------------ 4629e201c85SYohann // 3D transpose tensor contract add x 4639e201c85SYohann //------------------------------------------------------------------------------ 4646b92dc4bSJeremy L Thompson template <int NUM_COMP, int P_1D, int Q_1D, int T_1D> 4659e201c85SYohann inline __device__ void ContractTransposeAddX3d(SharedData_Hip &data, const CeedScalar *U, const CeedScalar *B, CeedScalar *V) { 4669e201c85SYohann CeedScalar r_B[Q_1D]; 4679e201c85SYohann for (CeedInt i = 0; i < Q_1D; i++) { 4689e201c85SYohann r_B[i] = B[data.t_id_x + i * P_1D]; 4699e201c85SYohann } 4709e201c85SYohann 4719e201c85SYohann for (CeedInt k = 0; k < P_1D; k++) { 472d6c19ee8SJeremy L Thompson __syncthreads(); 4739e201c85SYohann data.slice[data.t_id_x + data.t_id_y * T_1D] = U[k]; 4749e201c85SYohann __syncthreads(); 4759e201c85SYohann if (data.t_id_x < P_1D && data.t_id_y < P_1D) { 4769e201c85SYohann for (CeedInt i = 0; i < Q_1D; i++) { 4779e201c85SYohann V[k] += r_B[i] * data.slice[i + data.t_id_y * T_1D]; // Contract x direction 4789e201c85SYohann } 4799e201c85SYohann } 4809e201c85SYohann } 4819e201c85SYohann } 4829e201c85SYohann 4839e201c85SYohann //------------------------------------------------------------------------------ 4849e201c85SYohann // 3D interpolate to quadrature points 4859e201c85SYohann //------------------------------------------------------------------------------ 4866b92dc4bSJeremy L Thompson template <int NUM_COMP, int P_1D, int Q_1D, int T_1D> 4879e201c85SYohann inline __device__ void InterpTensor3d(SharedData_Hip &data, const CeedScalar *__restrict__ r_U, const CeedScalar *c_B, CeedScalar *__restrict__ r_V) { 4889e201c85SYohann CeedScalar r_t1[T_1D]; 4899e201c85SYohann CeedScalar r_t2[T_1D]; 4909e201c85SYohann for (CeedInt comp = 0; comp < NUM_COMP; comp++) { 4916b92dc4bSJeremy L Thompson ContractX3d<NUM_COMP, P_1D, Q_1D, T_1D>(data, &r_U[comp * P_1D], c_B, r_t1); 4926b92dc4bSJeremy L Thompson ContractY3d<NUM_COMP, P_1D, Q_1D, T_1D>(data, r_t1, c_B, r_t2); 4936b92dc4bSJeremy L Thompson ContractZ3d<NUM_COMP, P_1D, Q_1D, T_1D>(data, r_t2, c_B, &r_V[comp * Q_1D]); 4949e201c85SYohann } 4959e201c85SYohann } 4969e201c85SYohann 4979e201c85SYohann //------------------------------------------------------------------------------ 4989e201c85SYohann // 3D interpolate transpose 4999e201c85SYohann //------------------------------------------------------------------------------ 5006b92dc4bSJeremy L Thompson template <int NUM_COMP, int P_1D, int Q_1D, int T_1D> 5012b730f8bSJeremy L Thompson inline __device__ void InterpTransposeTensor3d(SharedData_Hip &data, const CeedScalar *__restrict__ r_U, const CeedScalar *c_B, 5022b730f8bSJeremy L Thompson CeedScalar *__restrict__ r_V) { 5039e201c85SYohann CeedScalar r_t1[T_1D]; 5049e201c85SYohann CeedScalar r_t2[T_1D]; 5059e201c85SYohann for (CeedInt comp = 0; comp < NUM_COMP; comp++) { 5066b92dc4bSJeremy L Thompson ContractTransposeZ3d<NUM_COMP, P_1D, Q_1D, T_1D>(data, &r_U[comp * Q_1D], c_B, r_t1); 5076b92dc4bSJeremy L Thompson ContractTransposeY3d<NUM_COMP, P_1D, Q_1D, T_1D>(data, r_t1, c_B, r_t2); 5086b92dc4bSJeremy L Thompson ContractTransposeX3d<NUM_COMP, P_1D, Q_1D, T_1D>(data, r_t2, c_B, &r_V[comp * P_1D]); 5099e201c85SYohann } 5109e201c85SYohann } 5119e201c85SYohann 5129e201c85SYohann //------------------------------------------------------------------------------ 5130ccda8ebSJeremy L Thompson // 3D interpolate to quadrature points, nodes and quadrature points collocated 5140ccda8ebSJeremy L Thompson //------------------------------------------------------------------------------ 5150ccda8ebSJeremy L Thompson template <int NUM_COMP, int P_1D, int Q_1D, int T_1D> 5160ccda8ebSJeremy L Thompson inline __device__ void InterpTensorCollocatedNodes3d(SharedData_Hip &data, const CeedScalar *__restrict__ r_U, const CeedScalar *c_B, 5170ccda8ebSJeremy L Thompson CeedScalar *__restrict__ r_V) { 5180ccda8ebSJeremy L Thompson for (CeedInt i = 0; i < Q_1D; i++) { 5190ccda8ebSJeremy L Thompson for (CeedInt comp = 0; comp < NUM_COMP; comp++) { 5200ccda8ebSJeremy L Thompson r_V[i + comp * Q_1D] = r_U[i + comp * P_1D]; 5210ccda8ebSJeremy L Thompson } 5220ccda8ebSJeremy L Thompson } 5230ccda8ebSJeremy L Thompson } 5240ccda8ebSJeremy L Thompson 5250ccda8ebSJeremy L Thompson //------------------------------------------------------------------------------ 5260ccda8ebSJeremy L Thompson // 3D interpolate transpose, nodes and quadrature points collocated 5270ccda8ebSJeremy L Thompson //------------------------------------------------------------------------------ 5280ccda8ebSJeremy L Thompson template <int NUM_COMP, int P_1D, int Q_1D, int T_1D> 5290ccda8ebSJeremy L Thompson inline __device__ void InterpTransposeTensorCollocatedNodes3d(SharedData_Hip &data, const CeedScalar *__restrict__ r_U, const CeedScalar *c_B, 5300ccda8ebSJeremy L Thompson CeedScalar *__restrict__ r_V) { 5310ccda8ebSJeremy L Thompson for (CeedInt i = 0; i < Q_1D; i++) { 5320ccda8ebSJeremy L Thompson for (CeedInt comp = 0; comp < NUM_COMP; comp++) { 5330ccda8ebSJeremy L Thompson r_V[i + comp * P_1D] = r_U[i + comp * Q_1D]; 5340ccda8ebSJeremy L Thompson } 5350ccda8ebSJeremy L Thompson } 5360ccda8ebSJeremy L Thompson } 5370ccda8ebSJeremy L Thompson 5380ccda8ebSJeremy L Thompson //------------------------------------------------------------------------------ 5399e201c85SYohann // 3D derivatives at quadrature points 5409e201c85SYohann //------------------------------------------------------------------------------ 5416b92dc4bSJeremy L Thompson template <int NUM_COMP, int P_1D, int Q_1D, int T_1D> 5422b730f8bSJeremy L Thompson inline __device__ void GradTensor3d(SharedData_Hip &data, const CeedScalar *__restrict__ r_U, const CeedScalar *c_B, const CeedScalar *c_G, 5432b730f8bSJeremy L Thompson CeedScalar *__restrict__ r_V) { 5449e201c85SYohann CeedScalar r_t1[T_1D]; 5459e201c85SYohann CeedScalar r_t2[T_1D]; 5469e201c85SYohann for (CeedInt comp = 0; comp < NUM_COMP; comp++) { 5476b92dc4bSJeremy L Thompson ContractX3d<NUM_COMP, P_1D, Q_1D, T_1D>(data, &r_U[comp * P_1D], c_G, r_t1); 5486b92dc4bSJeremy L Thompson ContractY3d<NUM_COMP, P_1D, Q_1D, T_1D>(data, r_t1, c_B, r_t2); 5496b92dc4bSJeremy L Thompson ContractZ3d<NUM_COMP, P_1D, Q_1D, T_1D>(data, r_t2, c_B, &r_V[comp * Q_1D + 0 * NUM_COMP * Q_1D]); 5506b92dc4bSJeremy L Thompson ContractX3d<NUM_COMP, P_1D, Q_1D, T_1D>(data, &r_U[comp * P_1D], c_B, r_t1); 5516b92dc4bSJeremy L Thompson ContractY3d<NUM_COMP, P_1D, Q_1D, T_1D>(data, r_t1, c_G, r_t2); 5526b92dc4bSJeremy L Thompson ContractZ3d<NUM_COMP, P_1D, Q_1D, T_1D>(data, r_t2, c_B, &r_V[comp * Q_1D + 1 * NUM_COMP * Q_1D]); 5536b92dc4bSJeremy L Thompson ContractX3d<NUM_COMP, P_1D, Q_1D, T_1D>(data, &r_U[comp * P_1D], c_B, r_t1); 5546b92dc4bSJeremy L Thompson ContractY3d<NUM_COMP, P_1D, Q_1D, T_1D>(data, r_t1, c_B, r_t2); 5556b92dc4bSJeremy L Thompson ContractZ3d<NUM_COMP, P_1D, Q_1D, T_1D>(data, r_t2, c_G, &r_V[comp * Q_1D + 2 * NUM_COMP * Q_1D]); 5569e201c85SYohann } 5579e201c85SYohann } 5589e201c85SYohann 5599e201c85SYohann //------------------------------------------------------------------------------ 5609e201c85SYohann // 3D derivatives transpose 5619e201c85SYohann //------------------------------------------------------------------------------ 5626b92dc4bSJeremy L Thompson template <int NUM_COMP, int P_1D, int Q_1D, int T_1D> 5632b730f8bSJeremy L Thompson inline __device__ void GradTransposeTensor3d(SharedData_Hip &data, const CeedScalar *__restrict__ r_U, const CeedScalar *c_B, const CeedScalar *c_G, 5642b730f8bSJeremy L Thompson CeedScalar *__restrict__ r_V) { 5659e201c85SYohann CeedScalar r_t1[T_1D]; 5669e201c85SYohann CeedScalar r_t2[T_1D]; 5679e201c85SYohann for (CeedInt comp = 0; comp < NUM_COMP; comp++) { 5686b92dc4bSJeremy L Thompson ContractTransposeZ3d<NUM_COMP, P_1D, Q_1D, T_1D>(data, &r_U[comp * Q_1D + 0 * NUM_COMP * Q_1D], c_B, r_t1); 5696b92dc4bSJeremy L Thompson ContractTransposeY3d<NUM_COMP, P_1D, Q_1D, T_1D>(data, r_t1, c_B, r_t2); 5706b92dc4bSJeremy L Thompson ContractTransposeX3d<NUM_COMP, P_1D, Q_1D, T_1D>(data, r_t2, c_G, &r_V[comp * P_1D]); 5716b92dc4bSJeremy L Thompson ContractTransposeZ3d<NUM_COMP, P_1D, Q_1D, T_1D>(data, &r_U[comp * Q_1D + 1 * NUM_COMP * Q_1D], c_B, r_t1); 5726b92dc4bSJeremy L Thompson ContractTransposeY3d<NUM_COMP, P_1D, Q_1D, T_1D>(data, r_t1, c_G, r_t2); 5736b92dc4bSJeremy L Thompson ContractTransposeAddX3d<NUM_COMP, P_1D, Q_1D, T_1D>(data, r_t2, c_B, &r_V[comp * P_1D]); 5746b92dc4bSJeremy L Thompson ContractTransposeZ3d<NUM_COMP, P_1D, Q_1D, T_1D>(data, &r_U[comp * Q_1D + 2 * NUM_COMP * Q_1D], c_G, r_t1); 5756b92dc4bSJeremy L Thompson ContractTransposeY3d<NUM_COMP, P_1D, Q_1D, T_1D>(data, r_t1, c_B, r_t2); 5766b92dc4bSJeremy L Thompson ContractTransposeAddX3d<NUM_COMP, P_1D, Q_1D, T_1D>(data, r_t2, c_B, &r_V[comp * P_1D]); 5779e201c85SYohann } 5789e201c85SYohann } 5799e201c85SYohann 5809e201c85SYohann //------------------------------------------------------------------------------ 5819e201c85SYohann // 3D derivatives at quadrature points 5829e201c85SYohann //------------------------------------------------------------------------------ 5836b92dc4bSJeremy L Thompson template <int NUM_COMP, int P_1D, int Q_1D, int T_1D> 5842b730f8bSJeremy L Thompson inline __device__ void GradTensorCollocated3d(SharedData_Hip &data, const CeedScalar *__restrict__ r_U, const CeedScalar *c_B, const CeedScalar *c_G, 5852b730f8bSJeremy L Thompson CeedScalar *__restrict__ r_V) { 5869e201c85SYohann CeedScalar r_t1[T_1D]; 5879e201c85SYohann CeedScalar r_t2[T_1D]; 5889e201c85SYohann for (CeedInt comp = 0; comp < NUM_COMP; comp++) { 5896b92dc4bSJeremy L Thompson ContractX3d<NUM_COMP, P_1D, Q_1D, T_1D>(data, &r_U[comp * P_1D], c_B, r_t1); 5906b92dc4bSJeremy L Thompson ContractY3d<NUM_COMP, P_1D, Q_1D, T_1D>(data, r_t1, c_B, r_t2); 5916b92dc4bSJeremy L Thompson ContractZ3d<NUM_COMP, P_1D, Q_1D, T_1D>(data, r_t2, c_B, r_t1); 5926b92dc4bSJeremy L Thompson ContractX3d<NUM_COMP, Q_1D, Q_1D, T_1D>(data, r_t1, c_G, &r_V[comp * Q_1D + 0 * NUM_COMP * Q_1D]); 5936b92dc4bSJeremy L Thompson ContractY3d<NUM_COMP, Q_1D, Q_1D, T_1D>(data, r_t1, c_G, &r_V[comp * Q_1D + 1 * NUM_COMP * Q_1D]); 5946b92dc4bSJeremy L Thompson ContractZ3d<NUM_COMP, Q_1D, Q_1D, T_1D>(data, r_t1, c_G, &r_V[comp * Q_1D + 2 * NUM_COMP * Q_1D]); 5959e201c85SYohann } 5969e201c85SYohann } 5979e201c85SYohann 5989e201c85SYohann //------------------------------------------------------------------------------ 5999e201c85SYohann // 3D derivatives transpose 6009e201c85SYohann //------------------------------------------------------------------------------ 6016b92dc4bSJeremy L Thompson template <int NUM_COMP, int P_1D, int Q_1D, int T_1D> 6022b730f8bSJeremy L Thompson inline __device__ void GradTransposeTensorCollocated3d(SharedData_Hip &data, const CeedScalar *__restrict__ r_U, const CeedScalar *c_B, 6032b730f8bSJeremy L Thompson const CeedScalar *c_G, CeedScalar *__restrict__ r_V) { 6049e201c85SYohann CeedScalar r_t1[T_1D]; 6059e201c85SYohann CeedScalar r_t2[T_1D]; 6069e201c85SYohann for (CeedInt comp = 0; comp < NUM_COMP; comp++) { 6076b92dc4bSJeremy L Thompson ContractTransposeZ3d<NUM_COMP, Q_1D, Q_1D, T_1D>(data, &r_U[comp * Q_1D + 2 * NUM_COMP * Q_1D], c_G, r_t2); 6086b92dc4bSJeremy L Thompson ContractTransposeAddY3d<NUM_COMP, Q_1D, Q_1D, T_1D>(data, &r_U[comp * Q_1D + 1 * NUM_COMP * Q_1D], c_G, r_t2); 6096b92dc4bSJeremy L Thompson ContractTransposeAddX3d<NUM_COMP, Q_1D, Q_1D, T_1D>(data, &r_U[comp * Q_1D + 0 * NUM_COMP * Q_1D], c_G, r_t2); 6106b92dc4bSJeremy L Thompson ContractTransposeZ3d<NUM_COMP, P_1D, Q_1D, T_1D>(data, r_t2, c_B, r_t1); 6116b92dc4bSJeremy L Thompson ContractTransposeY3d<NUM_COMP, P_1D, Q_1D, T_1D>(data, r_t1, c_B, r_t2); 6126b92dc4bSJeremy L Thompson ContractTransposeX3d<NUM_COMP, P_1D, Q_1D, T_1D>(data, r_t2, c_B, &r_V[comp * P_1D]); 6139e201c85SYohann } 6149e201c85SYohann } 6159e201c85SYohann 6169e201c85SYohann //------------------------------------------------------------------------------ 61702219a08SJeremy L Thompson // 3D derivatives at quadrature points, nodes and quadrature points collocated 61802219a08SJeremy L Thompson //------------------------------------------------------------------------------ 61902219a08SJeremy L Thompson template <int NUM_COMP, int P_1D, int Q_1D, int T_1D> 6200ccda8ebSJeremy L Thompson inline __device__ void GradTensorCollocatedNodes3d(SharedData_Hip &data, const CeedScalar *__restrict__ r_U, const CeedScalar *c_B, 6210ccda8ebSJeremy L Thompson const CeedScalar *c_G, CeedScalar *__restrict__ r_V) { 62202219a08SJeremy L Thompson for (CeedInt comp = 0; comp < NUM_COMP; comp++) { 62302219a08SJeremy L Thompson ContractX3d<NUM_COMP, P_1D, Q_1D, T_1D>(data, &r_U[comp * P_1D], c_G, &r_V[comp * Q_1D + 0 * NUM_COMP * Q_1D]); 62402219a08SJeremy L Thompson ContractY3d<NUM_COMP, P_1D, Q_1D, T_1D>(data, &r_U[comp * P_1D], c_G, &r_V[comp * Q_1D + 1 * NUM_COMP * Q_1D]); 62502219a08SJeremy L Thompson ContractZ3d<NUM_COMP, P_1D, Q_1D, T_1D>(data, &r_U[comp * P_1D], c_G, &r_V[comp * Q_1D + 2 * NUM_COMP * Q_1D]); 62602219a08SJeremy L Thompson } 62702219a08SJeremy L Thompson } 62802219a08SJeremy L Thompson 62902219a08SJeremy L Thompson //------------------------------------------------------------------------------ 63002219a08SJeremy L Thompson // 3D derivatives transpose, nodes and quadrature points collocated 63102219a08SJeremy L Thompson //------------------------------------------------------------------------------ 63202219a08SJeremy L Thompson template <int NUM_COMP, int P_1D, int Q_1D, int T_1D> 6330ccda8ebSJeremy L Thompson inline __device__ void GradTransposeTensorCollocatedNodes3d(SharedData_Hip &data, const CeedScalar *__restrict__ r_U, const CeedScalar *c_B, 6340ccda8ebSJeremy L Thompson const CeedScalar *c_G, CeedScalar *__restrict__ r_V) { 63502219a08SJeremy L Thompson for (CeedInt comp = 0; comp < NUM_COMP; comp++) { 63602219a08SJeremy L Thompson ContractTransposeZ3d<NUM_COMP, P_1D, Q_1D, T_1D>(data, &r_U[comp * Q_1D + 2 * NUM_COMP * Q_1D], c_G, &r_V[comp * P_1D]); 63702219a08SJeremy L Thompson ContractTransposeAddY3d<NUM_COMP, P_1D, Q_1D, T_1D>(data, &r_U[comp * Q_1D + 1 * NUM_COMP * Q_1D], c_G, &r_V[comp * P_1D]); 63802219a08SJeremy L Thompson ContractTransposeAddX3d<NUM_COMP, P_1D, Q_1D, T_1D>(data, &r_U[comp * Q_1D + 0 * NUM_COMP * Q_1D], c_G, &r_V[comp * P_1D]); 63902219a08SJeremy L Thompson } 64002219a08SJeremy L Thompson } 64102219a08SJeremy L Thompson 64202219a08SJeremy L Thompson //------------------------------------------------------------------------------ 6439e201c85SYohann // 3D quadrature weights 6449e201c85SYohann //------------------------------------------------------------------------------ 6459b91271bSJeremy L Thompson template <int P_1D, int Q_1D> 6469e201c85SYohann inline __device__ void WeightTensor3d(SharedData_Hip &data, const CeedScalar *__restrict__ q_weight_1d, CeedScalar *w) { 6479e201c85SYohann const bool quad = (data.t_id_x < Q_1D && data.t_id_y < Q_1D); 6489e201c85SYohann const CeedScalar pw = quad ? q_weight_1d[data.t_id_x] * q_weight_1d[data.t_id_y] : 0.0; 6499e201c85SYohann for (CeedInt q = 0; q < Q_1D; q++) { 6509e201c85SYohann w[q] = quad ? pw * q_weight_1d[q] : 0.0; 6519e201c85SYohann } 6529e201c85SYohann } 653