1 // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors. 2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details. 3 // 4 // SPDX-License-Identifier: BSD-2-Clause 5 // 6 // This file is part of CEED: http://github.com/ceed 7 8 /// @file 9 /// Internal header for CUDA shared memory basis read/write templates 10 #ifndef CEED_CUDA_SHARED_BASIS_READ_WRITE_TEMPLATES_H 11 #define CEED_CUDA_SHARED_BASIS_READ_WRITE_TEMPLATES_H 12 13 #include <ceed.h> 14 15 //------------------------------------------------------------------------------ 16 // 1D 17 //------------------------------------------------------------------------------ 18 19 //------------------------------------------------------------------------------ 20 // E-vector -> single element 21 //------------------------------------------------------------------------------ 22 template <int NUM_COMP, int P_1D> 23 inline __device__ void ReadElementStrided1d(SharedData_Cuda &data, const CeedInt elem, const CeedInt strides_node, const CeedInt strides_comp, 24 const CeedInt strides_elem, const CeedScalar *__restrict__ d_u, CeedScalar *r_u) { 25 if (data.t_id_x < P_1D) { 26 const CeedInt node = data.t_id_x; 27 const CeedInt ind = node * strides_node + elem * strides_elem; 28 29 for (CeedInt comp = 0; comp < NUM_COMP; comp++) { 30 r_u[comp] = d_u[ind + comp * strides_comp]; 31 } 32 } 33 } 34 35 //------------------------------------------------------------------------------ 36 // Single element -> E-vector 37 //------------------------------------------------------------------------------ 38 template <int NUM_COMP, int P_1D> 39 inline __device__ void WriteElementStrided1d(SharedData_Cuda &data, const CeedInt elem, const CeedInt strides_node, const CeedInt strides_comp, 40 const CeedInt strides_elem, const CeedScalar *r_v, CeedScalar *d_v) { 41 if (data.t_id_x < P_1D) { 42 const CeedInt node = data.t_id_x; 43 const CeedInt ind = node * strides_node + elem * strides_elem; 44 45 for (CeedInt comp = 0; comp < NUM_COMP; comp++) { 46 d_v[ind + comp * strides_comp] = r_v[comp]; 47 } 48 } 49 } 50 51 //------------------------------------------------------------------------------ 52 // 2D 53 //------------------------------------------------------------------------------ 54 55 //------------------------------------------------------------------------------ 56 // E-vector -> single element 57 //------------------------------------------------------------------------------ 58 template <int NUM_COMP, int P_1D> 59 inline __device__ void ReadElementStrided2d(SharedData_Cuda &data, const CeedInt elem, const CeedInt strides_node, const CeedInt strides_comp, 60 const CeedInt strides_elem, const CeedScalar *__restrict__ d_u, CeedScalar *r_u) { 61 if (data.t_id_x < P_1D && data.t_id_y < P_1D) { 62 const CeedInt node = data.t_id_x + data.t_id_y * P_1D; 63 const CeedInt ind = node * strides_node + elem * strides_elem; 64 65 for (CeedInt comp = 0; comp < NUM_COMP; comp++) { 66 r_u[comp] = d_u[ind + comp * strides_comp]; 67 } 68 } 69 } 70 71 //------------------------------------------------------------------------------ 72 // Single element -> E-vector 73 //------------------------------------------------------------------------------ 74 template <int NUM_COMP, int P_1D> 75 inline __device__ void WriteElementStrided2d(SharedData_Cuda &data, const CeedInt elem, const CeedInt strides_node, const CeedInt strides_comp, 76 const CeedInt strides_elem, const CeedScalar *r_v, CeedScalar *d_v) { 77 if (data.t_id_x < P_1D && data.t_id_y < P_1D) { 78 const CeedInt node = data.t_id_x + data.t_id_y * P_1D; 79 const CeedInt ind = node * strides_node + elem * strides_elem; 80 81 for (CeedInt comp = 0; comp < NUM_COMP; comp++) { 82 d_v[ind + comp * strides_comp] = r_v[comp]; 83 } 84 } 85 } 86 87 //------------------------------------------------------------------------------ 88 // 3D 89 //------------------------------------------------------------------------------ 90 91 //------------------------------------------------------------------------------ 92 // E-vector -> single element 93 //------------------------------------------------------------------------------ 94 template <int NUM_COMP, int P_1D> 95 inline __device__ void ReadElementStrided3d(SharedData_Cuda &data, const CeedInt elem, const CeedInt strides_node, const CeedInt strides_comp, 96 const CeedInt strides_elem, const CeedScalar *__restrict__ d_u, CeedScalar *r_u) { 97 if (data.t_id_x < P_1D && data.t_id_y < P_1D) { 98 for (CeedInt z = 0; z < P_1D; z++) { 99 const CeedInt node = data.t_id_x + data.t_id_y * P_1D + z * P_1D * P_1D; 100 const CeedInt ind = node * strides_node + elem * strides_elem; 101 102 for (CeedInt comp = 0; comp < NUM_COMP; comp++) { 103 r_u[z + comp * P_1D] = d_u[ind + comp * strides_comp]; 104 } 105 } 106 } 107 } 108 109 //------------------------------------------------------------------------------ 110 // Single element -> E-vector 111 //------------------------------------------------------------------------------ 112 template <int NUM_COMP, int P_1D> 113 inline __device__ void WriteElementStrided3d(SharedData_Cuda &data, const CeedInt elem, const CeedInt strides_node, const CeedInt strides_comp, 114 const CeedInt strides_elem, const CeedScalar *r_v, CeedScalar *d_v) { 115 if (data.t_id_x < P_1D && data.t_id_y < P_1D) { 116 for (CeedInt z = 0; z < P_1D; z++) { 117 const CeedInt node = data.t_id_x + data.t_id_y * P_1D + z * P_1D * P_1D; 118 const CeedInt ind = node * strides_node + elem * strides_elem; 119 120 for (CeedInt comp = 0; comp < NUM_COMP; comp++) { 121 d_v[ind + comp * strides_comp] = r_v[z + comp * P_1D]; 122 } 123 } 124 } 125 } 126 127 //------------------------------------------------------------------------------ 128 129 #endif // CEED_CUDA_SHARED_BASIS_READ_WRITE_TEMPLATES_H 130