1 // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors. 2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details. 3 // 4 // SPDX-License-Identifier: BSD-2-Clause 5 // 6 // This file is part of CEED: http://github.com/ceed 7 8 /// @file 9 /// Internal header for CUDA shared memory basis read/write templates 10 #ifndef _ceed_cuda_shared_basis_read_write_templates_h 11 #define _ceed_cuda_shared_basis_read_write_templates_h 12 13 #include <ceed.h> 14 15 //------------------------------------------------------------------------------ 16 // 1D 17 //------------------------------------------------------------------------------ 18 19 //------------------------------------------------------------------------------ 20 // E-vector -> single element 21 //------------------------------------------------------------------------------ 22 template <int NUM_COMP, int P_1D> 23 inline __device__ void ReadElementStrided1d(SharedData_Cuda &data, const CeedInt elem, const CeedInt strides_node, const CeedInt strides_comp, const CeedInt strides_elem, const CeedScalar *__restrict__ d_u, CeedScalar *r_u) { 24 if (data.t_id_x < P_1D) { 25 const CeedInt node = data.t_id_x; 26 const CeedInt ind = node * strides_node + elem * strides_elem; 27 for (CeedInt comp = 0; comp < NUM_COMP; comp++) { 28 r_u[comp] = d_u[ind + comp * strides_comp]; 29 } 30 } 31 } 32 33 //------------------------------------------------------------------------------ 34 // Single element -> E-vector 35 //------------------------------------------------------------------------------ 36 template <int NUM_COMP, int P_1D> 37 inline __device__ void WriteElementStrided1d(SharedData_Cuda &data, const CeedInt elem, const CeedInt strides_node, const CeedInt strides_comp, const CeedInt strides_elem, const CeedScalar *r_v, CeedScalar *d_v) { 38 if (data.t_id_x < P_1D) { 39 const CeedInt node = data.t_id_x; 40 const CeedInt ind = node * strides_node + elem * strides_elem; 41 for (CeedInt comp = 0; comp < NUM_COMP; comp++) { 42 d_v[ind + comp * strides_comp] = r_v[comp]; 43 } 44 } 45 } 46 47 //------------------------------------------------------------------------------ 48 // 2D 49 //------------------------------------------------------------------------------ 50 51 //------------------------------------------------------------------------------ 52 // E-vector -> single element 53 //------------------------------------------------------------------------------ 54 template <int NUM_COMP, int P_1D> 55 inline __device__ void ReadElementStrided2d(SharedData_Cuda &data, const CeedInt elem, const CeedInt strides_node, const CeedInt strides_comp, const CeedInt strides_elem, const CeedScalar *__restrict__ d_u, CeedScalar *r_u) { 56 if (data.t_id_x < P_1D && data.t_id_y < P_1D) { 57 const CeedInt node = data.t_id_x + data.t_id_y*P_1D; 58 const CeedInt ind = node * strides_node + elem * strides_elem; 59 for (CeedInt comp = 0; comp < NUM_COMP; comp++) { 60 r_u[comp] = d_u[ind + comp * strides_comp]; 61 } 62 } 63 } 64 65 //------------------------------------------------------------------------------ 66 // Single element -> E-vector 67 //------------------------------------------------------------------------------ 68 template <int NUM_COMP, int P_1D> 69 inline __device__ void WriteElementStrided2d(SharedData_Cuda &data, const CeedInt elem, const CeedInt strides_node, const CeedInt strides_comp, const CeedInt strides_elem, const CeedScalar *r_v, CeedScalar *d_v) { 70 if (data.t_id_x < P_1D && data.t_id_y < P_1D) { 71 const CeedInt node = data.t_id_x + data.t_id_y*P_1D; 72 const CeedInt ind = node * strides_node + elem * strides_elem; 73 for (CeedInt comp = 0; comp < NUM_COMP; comp++) { 74 d_v[ind + comp * strides_comp] = r_v[comp]; 75 } 76 } 77 } 78 79 //------------------------------------------------------------------------------ 80 // 3D 81 //------------------------------------------------------------------------------ 82 83 //------------------------------------------------------------------------------ 84 // E-vector -> single element 85 //------------------------------------------------------------------------------ 86 template <int NUM_COMP, int P_1D> 87 inline __device__ void ReadElementStrided3d(SharedData_Cuda &data, const CeedInt elem, const CeedInt strides_node, const CeedInt strides_comp, const CeedInt strides_elem, const CeedScalar *__restrict__ d_u, CeedScalar *r_u) { 88 if (data.t_id_x < P_1D && data.t_id_y < P_1D) { 89 for (CeedInt z = 0; z < P_1D; z++) { 90 const CeedInt node = data.t_id_x + data.t_id_y*P_1D + z*P_1D*P_1D; 91 const CeedInt ind = node * strides_node + elem * strides_elem; 92 for (CeedInt comp = 0; comp < NUM_COMP; comp++) { 93 r_u[z + comp * P_1D] = d_u[ind + comp * strides_comp]; 94 } 95 } 96 } 97 } 98 99 //------------------------------------------------------------------------------ 100 // Single element -> E-vector 101 //------------------------------------------------------------------------------ 102 template <int NUM_COMP, int P_1D> 103 inline __device__ void WriteElementStrided3d(SharedData_Cuda &data, const CeedInt elem, const CeedInt strides_node, const CeedInt strides_comp, const CeedInt strides_elem, const CeedScalar *r_v, CeedScalar *d_v) { 104 if (data.t_id_x < P_1D && data.t_id_y < P_1D) { 105 for (CeedInt z = 0; z < P_1D; z++) { 106 const CeedInt node = data.t_id_x + data.t_id_y*P_1D + z*P_1D*P_1D; 107 const CeedInt ind = node * strides_node + elem * strides_elem; 108 for (CeedInt comp = 0; comp < NUM_COMP; comp++) { 109 d_v[ind + comp * strides_comp] = r_v[z + comp * P_1D]; 110 } 111 } 112 } 113 } 114 115 //------------------------------------------------------------------------------ 116 117 #endif 118