hip-ref-basis-tensor-at-points.h - OpenGrok cross reference for /libCEED/include/ceed/jit-source/hip/hip-ref-basis-tensor-at-points.h

Lines Matching refs:CeedInt
19 …for (CeedInt i = 2; i < Q_1D; i++) chebyshev_x[i] = 2 * x * chebyshev_x[i - 1] - chebyshev_x[i - 2…  in ChebyshevPolynomialsAtPoint()
30   for (CeedInt i = 2; i < Q_1D; i++) {  in ChebyshevDerivativeAtPoint()
43 extern "C" __global__ void InterpAtPoints(const CeedInt num_elem, const CeedScalar *__restrict__ ch…  in InterpAtPoints()
44 …                const CeedInt *__restrict__ points_per_elem, const CeedScalar *__restrict__ coords,  in InterpAtPoints()
46   const CeedInt i = threadIdx.x;  in InterpAtPoints()
54   for (CeedInt k = i; k < BASIS_Q_1D * BASIS_P_1D; k += blockDim.x) {  in InterpAtPoints()
58   const CeedInt P             = BASIS_P_1D;  in InterpAtPoints()
59   const CeedInt Q             = BASIS_Q_1D;  in InterpAtPoints()
60   const CeedInt u_stride      = BASIS_NUM_NODES;  in InterpAtPoints()
61   const CeedInt v_stride      = BASIS_NUM_PTS;  in InterpAtPoints()
62   const CeedInt u_comp_stride = num_elem * BASIS_NUM_NODES;  in InterpAtPoints()
63   const CeedInt v_comp_stride = num_elem * BASIS_NUM_PTS;  in InterpAtPoints()
64   const CeedInt u_size        = BASIS_NUM_NODES;  in InterpAtPoints()
67   for (CeedInt elem = blockIdx.x; elem < num_elem; elem += gridDim.x) {  in InterpAtPoints()
68     for (CeedInt comp = 0; comp < BASIS_NUM_COMP; comp++) {  in InterpAtPoints()
71       CeedInt           pre   = u_size;  in InterpAtPoints()
72       CeedInt           post  = 1;  in InterpAtPoints()
75       for (CeedInt d = 0; d < BASIS_DIM; d++) {  in InterpAtPoints()
81         const CeedInt     writeLen = pre * post * Q;  in InterpAtPoints()
84         for (CeedInt k = i; k < writeLen; k += blockDim.x) {  in InterpAtPoints()
85           const CeedInt c   = k % post;  in InterpAtPoints()
86           const CeedInt j   = (k / post) % Q;  in InterpAtPoints()
87           const CeedInt a   = k / (post * Q);  in InterpAtPoints()
90 …for (CeedInt b = 0; b < P; b++) v_k += s_chebyshev_interp_1d[j * BASIS_P_1D + b] * in[(a * P + b) …  in InterpAtPoints()
98       for (CeedInt p = threadIdx.x; p < BASIS_NUM_PTS; p += blockDim.x) {  in InterpAtPoints()
101         for (CeedInt d = 0; d < BASIS_DIM; d++) {  in InterpAtPoints()
111           for (CeedInt a = 0; a < pre; a++) {  in InterpAtPoints()
112             for (CeedInt c = 0; c < post; c++) {  in InterpAtPoints()
115               for (CeedInt b = 0; b < Q; b++) v_k += chebyshev_x[b] * in[(a * Q + b) * post + c];  in InterpAtPoints()
126 extern "C" __global__ void InterpTransposeAtPoints(const CeedInt num_elem, const CeedScalar *__rest…  in InterpTransposeAtPoints()
127 …                const CeedInt *__restrict__ points_per_elem, const CeedScalar *__restrict__ coords,  in InterpTransposeAtPoints()
129   const CeedInt i = threadIdx.x;  in InterpTransposeAtPoints()
137   for (CeedInt k = i; k < BASIS_Q_1D * BASIS_P_1D; k += blockDim.x) {  in InterpTransposeAtPoints()
141   const CeedInt P             = BASIS_P_1D;  in InterpTransposeAtPoints()
142   const CeedInt Q             = BASIS_Q_1D;  in InterpTransposeAtPoints()
143   const CeedInt u_stride      = BASIS_NUM_PTS;  in InterpTransposeAtPoints()
144   const CeedInt v_stride      = BASIS_NUM_NODES;  in InterpTransposeAtPoints()
145   const CeedInt u_comp_stride = num_elem * BASIS_NUM_PTS;  in InterpTransposeAtPoints()
146   const CeedInt v_comp_stride = num_elem * BASIS_NUM_NODES;  in InterpTransposeAtPoints()
147   const CeedInt u_size        = BASIS_NUM_PTS;  in InterpTransposeAtPoints()
150   for (CeedInt elem = blockIdx.x; elem < num_elem; elem += gridDim.x) {  in InterpTransposeAtPoints()
151     for (CeedInt comp = 0; comp < BASIS_NUM_COMP; comp++) {  in InterpTransposeAtPoints()
154       CeedInt           pre   = 1;  in InterpTransposeAtPoints()
155       CeedInt           post  = 1;  in InterpTransposeAtPoints()
158       for (CeedInt k = i; k < BASIS_NUM_QPTS; k += blockDim.x) {  in InterpTransposeAtPoints()
164       for (CeedInt p = threadIdx.x; p < BASIS_NUM_PTS; p += blockDim.x) {  in InterpTransposeAtPoints()
168         for (CeedInt d = 0; d < BASIS_DIM; d++) {  in InterpTransposeAtPoints()
178           for (CeedInt a = 0; a < pre; a++) {  in InterpTransposeAtPoints()
179             for (CeedInt c = 0; c < post; c++) {  in InterpTransposeAtPoints()
181 …for (CeedInt j = 0; j < Q; j++) atomicAdd(&out[(a * Q + (j + p) % Q) * post + c], chebyshev_x[(j +…  in InterpTransposeAtPoints()
183 …   for (CeedInt j = 0; j < Q; j++) out[(a * Q + j) * post + c] = chebyshev_x[j] * in[a * post + c];  in InterpTransposeAtPoints()
194       for (CeedInt d = 0; d < BASIS_DIM; d++) {  in InterpTransposeAtPoints()
200         const CeedInt     writeLen = pre * post * P;  in InterpTransposeAtPoints()
203         for (CeedInt k = i; k < writeLen; k += blockDim.x) {  in InterpTransposeAtPoints()
204           const CeedInt c   = k % post;  in InterpTransposeAtPoints()
205           const CeedInt j   = (k / post) % P;  in InterpTransposeAtPoints()
206           const CeedInt a   = k / (post * P);  in InterpTransposeAtPoints()
209 …for (CeedInt b = 0; b < Q; b++) v_k += s_chebyshev_interp_1d[j + b * BASIS_P_1D] * in[(a * Q + b) …  in InterpTransposeAtPoints()
222 extern "C" __global__ void GradAtPoints(const CeedInt num_elem, const CeedScalar *__restrict__ cheb…  in GradAtPoints()
223 …                const CeedInt *__restrict__ points_per_elem, const CeedScalar *__restrict__ coords,  in GradAtPoints()
225   const CeedInt i = threadIdx.x;  in GradAtPoints()
233   for (CeedInt k = i; k < BASIS_Q_1D * BASIS_P_1D; k += blockDim.x) {  in GradAtPoints()
237   const CeedInt P             = BASIS_P_1D;  in GradAtPoints()
238   const CeedInt Q             = BASIS_Q_1D;  in GradAtPoints()
239   const CeedInt u_stride      = BASIS_NUM_NODES;  in GradAtPoints()
240   const CeedInt v_stride      = BASIS_NUM_PTS;  in GradAtPoints()
241   const CeedInt u_comp_stride = num_elem * BASIS_NUM_NODES;  in GradAtPoints()
242   const CeedInt v_comp_stride = num_elem * BASIS_NUM_PTS;  in GradAtPoints()
243   const CeedInt u_size        = BASIS_NUM_NODES;  in GradAtPoints()
244   const CeedInt u_dim_stride  = 0;  in GradAtPoints()
245   const CeedInt v_dim_stride  = num_elem * BASIS_NUM_PTS * BASIS_NUM_COMP;  in GradAtPoints()
248   for (CeedInt elem = blockIdx.x; elem < num_elem; elem += gridDim.x) {  in GradAtPoints()
249     for (CeedInt comp = 0; comp < BASIS_NUM_COMP; comp++) {  in GradAtPoints()
251       CeedInt           pre   = u_size;  in GradAtPoints()
252       CeedInt           post  = 1;  in GradAtPoints()
255       for (CeedInt d = 0; d < BASIS_DIM; d++) {  in GradAtPoints()
261         const CeedInt     writeLen = pre * post * Q;  in GradAtPoints()
264         for (CeedInt k = i; k < writeLen; k += blockDim.x) {  in GradAtPoints()
265           const CeedInt c   = k % post;  in GradAtPoints()
266           const CeedInt j   = (k / post) % Q;  in GradAtPoints()
267           const CeedInt a   = k / (post * Q);  in GradAtPoints()
270 …for (CeedInt b = 0; b < P; b++) v_k += s_chebyshev_interp_1d[j * BASIS_P_1D + b] * in[(a * P + b) …  in GradAtPoints()
278       for (CeedInt p = threadIdx.x; p < BASIS_NUM_PTS; p += blockDim.x) {  in GradAtPoints()
279         for (CeedInt dim_1 = 0; dim_1 < BASIS_DIM; dim_1++) {  in GradAtPoints()
284           for (CeedInt dim_2 = 0; dim_2 < BASIS_DIM; dim_2++) {  in GradAtPoints()
295             for (CeedInt a = 0; a < pre; a++) {  in GradAtPoints()
296               for (CeedInt c = 0; c < post; c++) {  in GradAtPoints()
299                 for (CeedInt b = 0; b < Q; b++) v_k += chebyshev_x[b] * in[(a * Q + b) * post + c];  in GradAtPoints()
311 extern "C" __global__ void GradTransposeAtPoints(const CeedInt num_elem, const CeedScalar *__restri…  in GradTransposeAtPoints()
312 …                const CeedInt *__restrict__ points_per_elem, const CeedScalar *__restrict__ coords,  in GradTransposeAtPoints()
314   const CeedInt i = threadIdx.x;  in GradTransposeAtPoints()
322   for (CeedInt k = i; k < BASIS_Q_1D * BASIS_P_1D; k += blockDim.x) {  in GradTransposeAtPoints()
326   const CeedInt P             = BASIS_P_1D;  in GradTransposeAtPoints()
327   const CeedInt Q             = BASIS_Q_1D;  in GradTransposeAtPoints()
328   const CeedInt u_stride      = BASIS_NUM_PTS;  in GradTransposeAtPoints()
329   const CeedInt v_stride      = BASIS_NUM_NODES;  in GradTransposeAtPoints()
330   const CeedInt u_comp_stride = num_elem * BASIS_NUM_PTS;  in GradTransposeAtPoints()
331   const CeedInt v_comp_stride = num_elem * BASIS_NUM_NODES;  in GradTransposeAtPoints()
332   const CeedInt u_size        = BASIS_NUM_PTS;  in GradTransposeAtPoints()
333   const CeedInt u_dim_stride  = num_elem * BASIS_NUM_PTS * BASIS_NUM_COMP;  in GradTransposeAtPoints()
334   const CeedInt v_dim_stride  = 0;  in GradTransposeAtPoints()
337   for (CeedInt elem = blockIdx.x; elem < num_elem; elem += gridDim.x) {  in GradTransposeAtPoints()
338     for (CeedInt comp = 0; comp < BASIS_NUM_COMP; comp++) {  in GradTransposeAtPoints()
340       CeedInt     pre   = 1;  in GradTransposeAtPoints()
341       CeedInt     post  = 1;  in GradTransposeAtPoints()
344       for (CeedInt k = i; k < BASIS_NUM_QPTS; k += blockDim.x) {  in GradTransposeAtPoints()
350       for (CeedInt p = threadIdx.x; p < BASIS_NUM_PTS; p += blockDim.x) {  in GradTransposeAtPoints()
352         for (CeedInt dim_1 = 0; dim_1 < BASIS_DIM; dim_1++) {  in GradTransposeAtPoints()
357           for (CeedInt dim_2 = 0; dim_2 < BASIS_DIM; dim_2++) {  in GradTransposeAtPoints()
368             for (CeedInt a = 0; a < pre; a++) {  in GradTransposeAtPoints()
369               for (CeedInt c = 0; c < post; c++) {  in GradTransposeAtPoints()
371 …for (CeedInt j = 0; j < Q; j++) atomicAdd(&out[(a * Q + (j + p) % Q) * post + c], chebyshev_x[(j +…  in GradTransposeAtPoints()
373 …   for (CeedInt j = 0; j < Q; j++) out[(a * Q + j) * post + c] = chebyshev_x[j] * in[a * post + c];  in GradTransposeAtPoints()
385       for (CeedInt d = 0; d < BASIS_DIM; d++) {  in GradTransposeAtPoints()
391         const CeedInt     writeLen = pre * post * P;  in GradTransposeAtPoints()
394         for (CeedInt k = i; k < writeLen; k += blockDim.x) {  in GradTransposeAtPoints()
395           const CeedInt c   = k % post;  in GradTransposeAtPoints()
396           const CeedInt j   = (k / post) % P;  in GradTransposeAtPoints()
397           const CeedInt a   = k / (post * P);  in GradTransposeAtPoints()
400 …for (CeedInt b = 0; b < Q; b++) v_k += s_chebyshev_interp_1d[j + b * BASIS_P_1D] * in[(a * Q + b) …  in GradTransposeAtPoints()