xref: /libCEED/include/ceed/jit-source/cuda/cuda-shared-basis-nontensor-templates.h (revision d4cc18453651bd0f94c1a2e078b2646a92dafdcc)
1*9ba83ac0SJeremy L Thompson // Copyright (c) 2017-2026, Lawrence Livermore National Security, LLC and other CEED contributors.
29ff05d55SJeremy L Thompson // All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
39ff05d55SJeremy L Thompson //
49ff05d55SJeremy L Thompson // SPDX-License-Identifier: BSD-2-Clause
59ff05d55SJeremy L Thompson //
69ff05d55SJeremy L Thompson // This file is part of CEED:  http://github.com/ceed
79ff05d55SJeremy L Thompson 
89ff05d55SJeremy L Thompson /// @file
96c13bbcbSJeremy L Thompson /// Internal header for CUDA shared memory non-tensor basis templates
109ff05d55SJeremy L Thompson #include <ceed/types.h>
119ff05d55SJeremy L Thompson 
129ff05d55SJeremy L Thompson //------------------------------------------------------------------------------
139ff05d55SJeremy L Thompson // 1D tensor contraction
149ff05d55SJeremy L Thompson //------------------------------------------------------------------------------
159ff05d55SJeremy L Thompson template <int NUM_COMP, int P_1D, int Q_1D>
Contract1d(SharedData_Cuda & data,const CeedScalar * U,const CeedScalar * B,CeedScalar * V)169ff05d55SJeremy L Thompson inline __device__ void Contract1d(SharedData_Cuda &data, const CeedScalar *U, const CeedScalar *B, CeedScalar *V) {
179ff05d55SJeremy L Thompson   data.slice[data.t_id_x] = *U;
189ff05d55SJeremy L Thompson   __syncthreads();
199ff05d55SJeremy L Thompson   *V = 0.0;
209ff05d55SJeremy L Thompson   if (data.t_id_x < Q_1D) {
219ff05d55SJeremy L Thompson     for (CeedInt i = 0; i < P_1D; i++) {
229ff05d55SJeremy L Thompson       *V += B[i + data.t_id_x * P_1D] * data.slice[i];  // Contract x direction
239ff05d55SJeremy L Thompson     }
249ff05d55SJeremy L Thompson   }
259ff05d55SJeremy L Thompson   __syncthreads();
269ff05d55SJeremy L Thompson }
279ff05d55SJeremy L Thompson 
289ff05d55SJeremy L Thompson //------------------------------------------------------------------------------
299ff05d55SJeremy L Thompson // 1D transpose tensor contraction
309ff05d55SJeremy L Thompson //------------------------------------------------------------------------------
319ff05d55SJeremy L Thompson template <int NUM_COMP, int P_1D, int Q_1D>
ContractTranspose1d(SharedData_Cuda & data,const CeedScalar * U,const CeedScalar * B,CeedScalar * V)329ff05d55SJeremy L Thompson inline __device__ void ContractTranspose1d(SharedData_Cuda &data, const CeedScalar *U, const CeedScalar *B, CeedScalar *V) {
339ff05d55SJeremy L Thompson   data.slice[data.t_id_x] = *U;
349ff05d55SJeremy L Thompson   __syncthreads();
359ff05d55SJeremy L Thompson   if (data.t_id_x < P_1D) {
369ff05d55SJeremy L Thompson     for (CeedInt i = 0; i < Q_1D; i++) {
379ff05d55SJeremy L Thompson       *V += B[data.t_id_x + i * P_1D] * data.slice[i];  // Contract x direction
389ff05d55SJeremy L Thompson     }
399ff05d55SJeremy L Thompson   }
409ff05d55SJeremy L Thompson   __syncthreads();
419ff05d55SJeremy L Thompson }
429ff05d55SJeremy L Thompson 
439ff05d55SJeremy L Thompson //------------------------------------------------------------------------------
449ff05d55SJeremy L Thompson // Interpolate to quadrature points
459ff05d55SJeremy L Thompson //------------------------------------------------------------------------------
4699421279SJeremy L Thompson template <int NUM_COMP, int P, int Q, int T_1D>
InterpNonTensor(SharedData_Cuda & data,const CeedScalar * __restrict__ r_U,const CeedScalar * c_B,CeedScalar * __restrict__ r_V)479ff05d55SJeremy L Thompson inline __device__ void InterpNonTensor(SharedData_Cuda &data, const CeedScalar *__restrict__ r_U, const CeedScalar *c_B,
489ff05d55SJeremy L Thompson                                        CeedScalar *__restrict__ r_V) {
499ff05d55SJeremy L Thompson   for (CeedInt comp = 0; comp < NUM_COMP; comp++) {
509ff05d55SJeremy L Thompson     Contract1d<NUM_COMP, P, Q>(data, &r_U[comp], c_B, &r_V[comp]);
519ff05d55SJeremy L Thompson   }
529ff05d55SJeremy L Thompson }
539ff05d55SJeremy L Thompson 
549ff05d55SJeremy L Thompson //------------------------------------------------------------------------------
559ff05d55SJeremy L Thompson // Interpolate transpose
569ff05d55SJeremy L Thompson //------------------------------------------------------------------------------
5799421279SJeremy L Thompson template <int NUM_COMP, int P, int Q, int T_1D>
InterpTransposeNonTensor(SharedData_Cuda & data,const CeedScalar * __restrict__ r_U,const CeedScalar * c_B,CeedScalar * __restrict__ r_V)589ff05d55SJeremy L Thompson inline __device__ void InterpTransposeNonTensor(SharedData_Cuda &data, const CeedScalar *__restrict__ r_U, const CeedScalar *c_B,
599ff05d55SJeremy L Thompson                                                 CeedScalar *__restrict__ r_V) {
609ff05d55SJeremy L Thompson   for (CeedInt comp = 0; comp < NUM_COMP; comp++) {
619ff05d55SJeremy L Thompson     r_V[comp] = 0.0;
629ff05d55SJeremy L Thompson     ContractTranspose1d<NUM_COMP, P, Q>(data, &r_U[comp], c_B, &r_V[comp]);
639ff05d55SJeremy L Thompson   }
649ff05d55SJeremy L Thompson }
659ff05d55SJeremy L Thompson 
669ff05d55SJeremy L Thompson //------------------------------------------------------------------------------
679ff05d55SJeremy L Thompson // Derivatives at quadrature points
689ff05d55SJeremy L Thompson //------------------------------------------------------------------------------
6999421279SJeremy L Thompson template <int NUM_COMP, int DIM, int P, int Q, int T_1D>
GradNonTensor(SharedData_Cuda & data,const CeedScalar * __restrict__ r_U,const CeedScalar * c_G,CeedScalar * __restrict__ r_V)709ff05d55SJeremy L Thompson inline __device__ void GradNonTensor(SharedData_Cuda &data, const CeedScalar *__restrict__ r_U, const CeedScalar *c_G, CeedScalar *__restrict__ r_V) {
719ff05d55SJeremy L Thompson   for (CeedInt dim = 0; dim < DIM; dim++) {
729ff05d55SJeremy L Thompson     for (CeedInt comp = 0; comp < NUM_COMP; comp++) {
739ff05d55SJeremy L Thompson       Contract1d<NUM_COMP, P, Q>(data, &r_U[comp], &c_G[dim * P * Q], &r_V[comp + dim * NUM_COMP]);
749ff05d55SJeremy L Thompson     }
759ff05d55SJeremy L Thompson   }
769ff05d55SJeremy L Thompson }
779ff05d55SJeremy L Thompson 
789ff05d55SJeremy L Thompson //------------------------------------------------------------------------------
799ff05d55SJeremy L Thompson // Derivatives transpose
809ff05d55SJeremy L Thompson //------------------------------------------------------------------------------
8199421279SJeremy L Thompson template <int NUM_COMP, int DIM, int P, int Q, int T_1D>
GradTransposeNonTensor(SharedData_Cuda & data,const CeedScalar * __restrict__ r_U,const CeedScalar * c_G,CeedScalar * __restrict__ r_V)829ff05d55SJeremy L Thompson inline __device__ void GradTransposeNonTensor(SharedData_Cuda &data, const CeedScalar *__restrict__ r_U, const CeedScalar *c_G,
839ff05d55SJeremy L Thompson                                               CeedScalar *__restrict__ r_V) {
849ff05d55SJeremy L Thompson   for (CeedInt comp = 0; comp < NUM_COMP; comp++) r_V[comp] = 0.0;
859ff05d55SJeremy L Thompson   for (CeedInt dim = 0; dim < DIM; dim++) {
869ff05d55SJeremy L Thompson     for (CeedInt comp = 0; comp < NUM_COMP; comp++) {
879ff05d55SJeremy L Thompson       ContractTranspose1d<NUM_COMP, P, Q>(data, &r_U[comp + dim * NUM_COMP], &c_G[dim * P * Q], &r_V[comp]);
889ff05d55SJeremy L Thompson     }
899ff05d55SJeremy L Thompson   }
909ff05d55SJeremy L Thompson }
919ff05d55SJeremy L Thompson 
929ff05d55SJeremy L Thompson //------------------------------------------------------------------------------
939ff05d55SJeremy L Thompson // Quadrature weights
949ff05d55SJeremy L Thompson //------------------------------------------------------------------------------
95343e3094SJeremy L Thompson template <int P, int Q>
WeightNonTensor(SharedData_Cuda & data,const CeedScalar * __restrict__ q_weight,CeedScalar * w)969ff05d55SJeremy L Thompson inline __device__ void WeightNonTensor(SharedData_Cuda &data, const CeedScalar *__restrict__ q_weight, CeedScalar *w) {
979ff05d55SJeremy L Thompson   *w = (data.t_id_x < Q) ? q_weight[data.t_id_x] : 0.0;
989ff05d55SJeremy L Thompson }
99