1 // Copyright (c) 2017-2024, Lawrence Livermore National Security, LLC and other CEED contributors. 2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details. 3 // 4 // SPDX-License-Identifier: BSD-2-Clause 5 // 6 // This file is part of CEED: http://github.com/ceed 7 8 /// @file 9 /// Internal header for CUDA shared memory basis read/write templates 10 #include <ceed/types.h> 11 12 //------------------------------------------------------------------------------ 13 // 1D 14 //------------------------------------------------------------------------------ 15 16 //------------------------------------------------------------------------------ 17 // E-vector -> single element 18 //------------------------------------------------------------------------------ 19 template <int NUM_COMP, int P_1D> 20 inline __device__ void ReadElementStrided1d(SharedData_Cuda &data, const CeedInt elem, const CeedInt strides_node, const CeedInt strides_comp, 21 const CeedInt strides_elem, const CeedScalar *__restrict__ d_u, CeedScalar *r_u) { 22 if (data.t_id_x < P_1D) { 23 const CeedInt node = data.t_id_x; 24 const CeedInt ind = node * strides_node + elem * strides_elem; 25 26 for (CeedInt comp = 0; comp < NUM_COMP; comp++) { 27 r_u[comp] = d_u[ind + comp * strides_comp]; 28 } 29 } 30 } 31 32 //------------------------------------------------------------------------------ 33 // Single element -> E-vector 34 //------------------------------------------------------------------------------ 35 template <int NUM_COMP, int P_1D> 36 inline __device__ void WriteElementStrided1d(SharedData_Cuda &data, const CeedInt elem, const CeedInt strides_node, const CeedInt strides_comp, 37 const CeedInt strides_elem, const CeedScalar *r_v, CeedScalar *d_v) { 38 if (data.t_id_x < P_1D) { 39 const CeedInt node = data.t_id_x; 40 const CeedInt ind = node * strides_node + elem * strides_elem; 41 42 for (CeedInt comp = 0; comp < NUM_COMP; comp++) { 43 d_v[ind + comp * strides_comp] = r_v[comp]; 44 } 45 } 46 } 47 48 template <int NUM_COMP, int P_1D> 49 inline __device__ void SumElementStrided1d(SharedData_Cuda &data, const CeedInt elem, const CeedInt strides_node, const CeedInt strides_comp, 50 const CeedInt strides_elem, const CeedScalar *r_v, CeedScalar *d_v) { 51 if (data.t_id_x < P_1D) { 52 const CeedInt node = data.t_id_x; 53 const CeedInt ind = node * strides_node + elem * strides_elem; 54 55 for (CeedInt comp = 0; comp < NUM_COMP; comp++) { 56 d_v[ind + comp * strides_comp] += r_v[comp]; 57 } 58 } 59 } 60 61 //------------------------------------------------------------------------------ 62 // 2D 63 //------------------------------------------------------------------------------ 64 65 //------------------------------------------------------------------------------ 66 // E-vector -> single element 67 //------------------------------------------------------------------------------ 68 template <int NUM_COMP, int P_1D> 69 inline __device__ void ReadElementStrided2d(SharedData_Cuda &data, const CeedInt elem, const CeedInt strides_node, const CeedInt strides_comp, 70 const CeedInt strides_elem, const CeedScalar *__restrict__ d_u, CeedScalar *r_u) { 71 if (data.t_id_x < P_1D && data.t_id_y < P_1D) { 72 const CeedInt node = data.t_id_x + data.t_id_y * P_1D; 73 const CeedInt ind = node * strides_node + elem * strides_elem; 74 75 for (CeedInt comp = 0; comp < NUM_COMP; comp++) { 76 r_u[comp] = d_u[ind + comp * strides_comp]; 77 } 78 } 79 } 80 81 //------------------------------------------------------------------------------ 82 // Single element -> E-vector 83 //------------------------------------------------------------------------------ 84 template <int NUM_COMP, int P_1D> 85 inline __device__ void WriteElementStrided2d(SharedData_Cuda &data, const CeedInt elem, const CeedInt strides_node, const CeedInt strides_comp, 86 const CeedInt strides_elem, const CeedScalar *r_v, CeedScalar *d_v) { 87 if (data.t_id_x < P_1D && data.t_id_y < P_1D) { 88 const CeedInt node = data.t_id_x + data.t_id_y * P_1D; 89 const CeedInt ind = node * strides_node + elem * strides_elem; 90 91 for (CeedInt comp = 0; comp < NUM_COMP; comp++) { 92 d_v[ind + comp * strides_comp] = r_v[comp]; 93 } 94 } 95 } 96 97 template <int NUM_COMP, int P_1D> 98 inline __device__ void SumElementStrided2d(SharedData_Cuda &data, const CeedInt elem, const CeedInt strides_node, const CeedInt strides_comp, 99 const CeedInt strides_elem, const CeedScalar *r_v, CeedScalar *d_v) { 100 if (data.t_id_x < P_1D && data.t_id_y < P_1D) { 101 const CeedInt node = data.t_id_x + data.t_id_y * P_1D; 102 const CeedInt ind = node * strides_node + elem * strides_elem; 103 104 for (CeedInt comp = 0; comp < NUM_COMP; comp++) { 105 d_v[ind + comp * strides_comp] += r_v[comp]; 106 } 107 } 108 } 109 110 //------------------------------------------------------------------------------ 111 // 3D 112 //------------------------------------------------------------------------------ 113 114 //------------------------------------------------------------------------------ 115 // E-vector -> single element 116 //------------------------------------------------------------------------------ 117 template <int NUM_COMP, int P_1D> 118 inline __device__ void ReadElementStrided3d(SharedData_Cuda &data, const CeedInt elem, const CeedInt strides_node, const CeedInt strides_comp, 119 const CeedInt strides_elem, const CeedScalar *__restrict__ d_u, CeedScalar *r_u) { 120 if (data.t_id_x < P_1D && data.t_id_y < P_1D) { 121 for (CeedInt z = 0; z < P_1D; z++) { 122 const CeedInt node = data.t_id_x + data.t_id_y * P_1D + z * P_1D * P_1D; 123 const CeedInt ind = node * strides_node + elem * strides_elem; 124 125 for (CeedInt comp = 0; comp < NUM_COMP; comp++) { 126 r_u[z + comp * P_1D] = d_u[ind + comp * strides_comp]; 127 } 128 } 129 } 130 } 131 132 //------------------------------------------------------------------------------ 133 // Single element -> E-vector 134 //------------------------------------------------------------------------------ 135 template <int NUM_COMP, int P_1D> 136 inline __device__ void WriteElementStrided3d(SharedData_Cuda &data, const CeedInt elem, const CeedInt strides_node, const CeedInt strides_comp, 137 const CeedInt strides_elem, const CeedScalar *r_v, CeedScalar *d_v) { 138 if (data.t_id_x < P_1D && data.t_id_y < P_1D) { 139 for (CeedInt z = 0; z < P_1D; z++) { 140 const CeedInt node = data.t_id_x + data.t_id_y * P_1D + z * P_1D * P_1D; 141 const CeedInt ind = node * strides_node + elem * strides_elem; 142 143 for (CeedInt comp = 0; comp < NUM_COMP; comp++) { 144 d_v[ind + comp * strides_comp] = r_v[z + comp * P_1D]; 145 } 146 } 147 } 148 } 149 150 template <int NUM_COMP, int P_1D> 151 inline __device__ void SumElementStrided3d(SharedData_Cuda &data, const CeedInt elem, const CeedInt strides_node, const CeedInt strides_comp, 152 const CeedInt strides_elem, const CeedScalar *r_v, CeedScalar *d_v) { 153 if (data.t_id_x < P_1D && data.t_id_y < P_1D) { 154 for (CeedInt z = 0; z < P_1D; z++) { 155 const CeedInt node = data.t_id_x + data.t_id_y * P_1D + z * P_1D * P_1D; 156 const CeedInt ind = node * strides_node + elem * strides_elem; 157 158 for (CeedInt comp = 0; comp < NUM_COMP; comp++) { 159 d_v[ind + comp * strides_comp] += r_v[z + comp * P_1D]; 160 } 161 } 162 } 163 } 164