1 // Copyright (c) 2017-2024, Lawrence Livermore National Security, LLC and other CEED contributors. 2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details. 3 // 4 // SPDX-License-Identifier: BSD-2-Clause 5 // 6 // This file is part of CEED: http://github.com/ceed 7 8 /// @file 9 /// Internal header for CUDA shared memory basis read/write templates 10 11 #include <ceed/types.h> 12 13 //------------------------------------------------------------------------------ 14 // 1D 15 //------------------------------------------------------------------------------ 16 17 //------------------------------------------------------------------------------ 18 // E-vector -> single element 19 //------------------------------------------------------------------------------ 20 template <int NUM_COMP, int P_1D> 21 inline __device__ void ReadElementStrided1d(SharedData_Cuda &data, const CeedInt elem, const CeedInt strides_node, const CeedInt strides_comp, 22 const CeedInt strides_elem, const CeedScalar *__restrict__ d_u, CeedScalar *r_u) { 23 if (data.t_id_x < P_1D) { 24 const CeedInt node = data.t_id_x; 25 const CeedInt ind = node * strides_node + elem * strides_elem; 26 27 for (CeedInt comp = 0; comp < NUM_COMP; comp++) { 28 r_u[comp] = d_u[ind + comp * strides_comp]; 29 } 30 } 31 } 32 33 //------------------------------------------------------------------------------ 34 // Single element -> E-vector 35 //------------------------------------------------------------------------------ 36 template <int NUM_COMP, int P_1D> 37 inline __device__ void WriteElementStrided1d(SharedData_Cuda &data, const CeedInt elem, const CeedInt strides_node, const CeedInt strides_comp, 38 const CeedInt strides_elem, const CeedScalar *r_v, CeedScalar *d_v) { 39 if (data.t_id_x < P_1D) { 40 const CeedInt node = data.t_id_x; 41 const CeedInt ind = node * strides_node + elem * strides_elem; 42 43 for (CeedInt comp = 0; comp < NUM_COMP; comp++) { 44 d_v[ind + comp * strides_comp] = r_v[comp]; 45 } 46 } 47 } 48 49 template <int NUM_COMP, int P_1D> 50 inline __device__ void SumElementStrided1d(SharedData_Cuda &data, const CeedInt elem, const CeedInt strides_node, const CeedInt strides_comp, 51 const CeedInt strides_elem, const CeedScalar *r_v, CeedScalar *d_v) { 52 if (data.t_id_x < P_1D) { 53 const CeedInt node = data.t_id_x; 54 const CeedInt ind = node * strides_node + elem * strides_elem; 55 56 for (CeedInt comp = 0; comp < NUM_COMP; comp++) { 57 d_v[ind + comp * strides_comp] += r_v[comp]; 58 } 59 } 60 } 61 62 //------------------------------------------------------------------------------ 63 // 2D 64 //------------------------------------------------------------------------------ 65 66 //------------------------------------------------------------------------------ 67 // E-vector -> single element 68 //------------------------------------------------------------------------------ 69 template <int NUM_COMP, int P_1D> 70 inline __device__ void ReadElementStrided2d(SharedData_Cuda &data, const CeedInt elem, const CeedInt strides_node, const CeedInt strides_comp, 71 const CeedInt strides_elem, const CeedScalar *__restrict__ d_u, CeedScalar *r_u) { 72 if (data.t_id_x < P_1D && data.t_id_y < P_1D) { 73 const CeedInt node = data.t_id_x + data.t_id_y * P_1D; 74 const CeedInt ind = node * strides_node + elem * strides_elem; 75 76 for (CeedInt comp = 0; comp < NUM_COMP; comp++) { 77 r_u[comp] = d_u[ind + comp * strides_comp]; 78 } 79 } 80 } 81 82 //------------------------------------------------------------------------------ 83 // Single element -> E-vector 84 //------------------------------------------------------------------------------ 85 template <int NUM_COMP, int P_1D> 86 inline __device__ void WriteElementStrided2d(SharedData_Cuda &data, const CeedInt elem, const CeedInt strides_node, const CeedInt strides_comp, 87 const CeedInt strides_elem, const CeedScalar *r_v, CeedScalar *d_v) { 88 if (data.t_id_x < P_1D && data.t_id_y < P_1D) { 89 const CeedInt node = data.t_id_x + data.t_id_y * P_1D; 90 const CeedInt ind = node * strides_node + elem * strides_elem; 91 92 for (CeedInt comp = 0; comp < NUM_COMP; comp++) { 93 d_v[ind + comp * strides_comp] = r_v[comp]; 94 } 95 } 96 } 97 98 template <int NUM_COMP, int P_1D> 99 inline __device__ void SumElementStrided2d(SharedData_Cuda &data, const CeedInt elem, const CeedInt strides_node, const CeedInt strides_comp, 100 const CeedInt strides_elem, const CeedScalar *r_v, CeedScalar *d_v) { 101 if (data.t_id_x < P_1D && data.t_id_y < P_1D) { 102 const CeedInt node = data.t_id_x + data.t_id_y * P_1D; 103 const CeedInt ind = node * strides_node + elem * strides_elem; 104 105 for (CeedInt comp = 0; comp < NUM_COMP; comp++) { 106 d_v[ind + comp * strides_comp] += r_v[comp]; 107 } 108 } 109 } 110 111 //------------------------------------------------------------------------------ 112 // 3D 113 //------------------------------------------------------------------------------ 114 115 //------------------------------------------------------------------------------ 116 // E-vector -> single element 117 //------------------------------------------------------------------------------ 118 template <int NUM_COMP, int P_1D> 119 inline __device__ void ReadElementStrided3d(SharedData_Cuda &data, const CeedInt elem, const CeedInt strides_node, const CeedInt strides_comp, 120 const CeedInt strides_elem, const CeedScalar *__restrict__ d_u, CeedScalar *r_u) { 121 if (data.t_id_x < P_1D && data.t_id_y < P_1D) { 122 for (CeedInt z = 0; z < P_1D; z++) { 123 const CeedInt node = data.t_id_x + data.t_id_y * P_1D + z * P_1D * P_1D; 124 const CeedInt ind = node * strides_node + elem * strides_elem; 125 126 for (CeedInt comp = 0; comp < NUM_COMP; comp++) { 127 r_u[z + comp * P_1D] = d_u[ind + comp * strides_comp]; 128 } 129 } 130 } 131 } 132 133 //------------------------------------------------------------------------------ 134 // Single element -> E-vector 135 //------------------------------------------------------------------------------ 136 template <int NUM_COMP, int P_1D> 137 inline __device__ void WriteElementStrided3d(SharedData_Cuda &data, const CeedInt elem, const CeedInt strides_node, const CeedInt strides_comp, 138 const CeedInt strides_elem, const CeedScalar *r_v, CeedScalar *d_v) { 139 if (data.t_id_x < P_1D && data.t_id_y < P_1D) { 140 for (CeedInt z = 0; z < P_1D; z++) { 141 const CeedInt node = data.t_id_x + data.t_id_y * P_1D + z * P_1D * P_1D; 142 const CeedInt ind = node * strides_node + elem * strides_elem; 143 144 for (CeedInt comp = 0; comp < NUM_COMP; comp++) { 145 d_v[ind + comp * strides_comp] = r_v[z + comp * P_1D]; 146 } 147 } 148 } 149 } 150 151 template <int NUM_COMP, int P_1D> 152 inline __device__ void SumElementStrided3d(SharedData_Cuda &data, const CeedInt elem, const CeedInt strides_node, const CeedInt strides_comp, 153 const CeedInt strides_elem, const CeedScalar *r_v, CeedScalar *d_v) { 154 if (data.t_id_x < P_1D && data.t_id_y < P_1D) { 155 for (CeedInt z = 0; z < P_1D; z++) { 156 const CeedInt node = data.t_id_x + data.t_id_y * P_1D + z * P_1D * P_1D; 157 const CeedInt ind = node * strides_node + elem * strides_elem; 158 159 for (CeedInt comp = 0; comp < NUM_COMP; comp++) { 160 d_v[ind + comp * strides_comp] += r_v[z + comp * P_1D]; 161 } 162 } 163 } 164 } 165