Lines Matching refs:CeedInt

19 …for (CeedInt i = 2; i < Q_1D; i++) chebyshev_x[i] = 2 * x * chebyshev_x[i - 1] - chebyshev_x[i - 2…  in ChebyshevPolynomialsAtPoint()
30 for (CeedInt i = 2; i < Q_1D; i++) { in ChebyshevDerivativeAtPoint()
43 extern "C" __global__ void InterpAtPoints(const CeedInt num_elem, const CeedScalar *__restrict__ ch… in InterpAtPoints()
44 … const CeedInt *__restrict__ points_per_elem, const CeedScalar *__restrict__ coords, in InterpAtPoints()
46 const CeedInt i = threadIdx.x; in InterpAtPoints()
54 for (CeedInt k = i; k < BASIS_Q_1D * BASIS_P_1D; k += blockDim.x) { in InterpAtPoints()
58 const CeedInt P = BASIS_P_1D; in InterpAtPoints()
59 const CeedInt Q = BASIS_Q_1D; in InterpAtPoints()
60 const CeedInt u_stride = BASIS_NUM_NODES; in InterpAtPoints()
61 const CeedInt v_stride = BASIS_NUM_PTS; in InterpAtPoints()
62 const CeedInt u_comp_stride = num_elem * BASIS_NUM_NODES; in InterpAtPoints()
63 const CeedInt v_comp_stride = num_elem * BASIS_NUM_PTS; in InterpAtPoints()
64 const CeedInt u_size = BASIS_NUM_NODES; in InterpAtPoints()
67 for (CeedInt elem = blockIdx.x; elem < num_elem; elem += gridDim.x) { in InterpAtPoints()
68 for (CeedInt comp = 0; comp < BASIS_NUM_COMP; comp++) { in InterpAtPoints()
71 CeedInt pre = u_size; in InterpAtPoints()
72 CeedInt post = 1; in InterpAtPoints()
75 for (CeedInt d = 0; d < BASIS_DIM; d++) { in InterpAtPoints()
81 const CeedInt writeLen = pre * post * Q; in InterpAtPoints()
84 for (CeedInt k = i; k < writeLen; k += blockDim.x) { in InterpAtPoints()
85 const CeedInt c = k % post; in InterpAtPoints()
86 const CeedInt j = (k / post) % Q; in InterpAtPoints()
87 const CeedInt a = k / (post * Q); in InterpAtPoints()
90 …for (CeedInt b = 0; b < P; b++) v_k += s_chebyshev_interp_1d[j * BASIS_P_1D + b] * in[(a * P + b) … in InterpAtPoints()
98 for (CeedInt p = threadIdx.x; p < BASIS_NUM_PTS; p += blockDim.x) { in InterpAtPoints()
101 for (CeedInt d = 0; d < BASIS_DIM; d++) { in InterpAtPoints()
111 for (CeedInt a = 0; a < pre; a++) { in InterpAtPoints()
112 for (CeedInt c = 0; c < post; c++) { in InterpAtPoints()
115 for (CeedInt b = 0; b < Q; b++) v_k += chebyshev_x[b] * in[(a * Q + b) * post + c]; in InterpAtPoints()
126 extern "C" __global__ void InterpTransposeAtPoints(const CeedInt num_elem, const CeedScalar *__rest… in InterpTransposeAtPoints()
127 … const CeedInt *__restrict__ points_per_elem, const CeedScalar *__restrict__ coords, in InterpTransposeAtPoints()
129 const CeedInt i = threadIdx.x; in InterpTransposeAtPoints()
137 for (CeedInt k = i; k < BASIS_Q_1D * BASIS_P_1D; k += blockDim.x) { in InterpTransposeAtPoints()
141 const CeedInt P = BASIS_P_1D; in InterpTransposeAtPoints()
142 const CeedInt Q = BASIS_Q_1D; in InterpTransposeAtPoints()
143 const CeedInt u_stride = BASIS_NUM_PTS; in InterpTransposeAtPoints()
144 const CeedInt v_stride = BASIS_NUM_NODES; in InterpTransposeAtPoints()
145 const CeedInt u_comp_stride = num_elem * BASIS_NUM_PTS; in InterpTransposeAtPoints()
146 const CeedInt v_comp_stride = num_elem * BASIS_NUM_NODES; in InterpTransposeAtPoints()
147 const CeedInt u_size = BASIS_NUM_PTS; in InterpTransposeAtPoints()
150 for (CeedInt elem = blockIdx.x; elem < num_elem; elem += gridDim.x) { in InterpTransposeAtPoints()
151 for (CeedInt comp = 0; comp < BASIS_NUM_COMP; comp++) { in InterpTransposeAtPoints()
154 CeedInt pre = 1; in InterpTransposeAtPoints()
155 CeedInt post = 1; in InterpTransposeAtPoints()
158 for (CeedInt k = i; k < BASIS_NUM_QPTS; k += blockDim.x) { in InterpTransposeAtPoints()
164 for (CeedInt p = threadIdx.x; p < BASIS_NUM_PTS; p += blockDim.x) { in InterpTransposeAtPoints()
168 for (CeedInt d = 0; d < BASIS_DIM; d++) { in InterpTransposeAtPoints()
178 for (CeedInt a = 0; a < pre; a++) { in InterpTransposeAtPoints()
179 for (CeedInt c = 0; c < post; c++) { in InterpTransposeAtPoints()
181 …for (CeedInt j = 0; j < Q; j++) atomicAdd(&out[(a * Q + (j + p) % Q) * post + c], chebyshev_x[(j +… in InterpTransposeAtPoints()
183 … for (CeedInt j = 0; j < Q; j++) out[(a * Q + j) * post + c] = chebyshev_x[j] * in[a * post + c]; in InterpTransposeAtPoints()
194 for (CeedInt d = 0; d < BASIS_DIM; d++) { in InterpTransposeAtPoints()
200 const CeedInt writeLen = pre * post * P; in InterpTransposeAtPoints()
203 for (CeedInt k = i; k < writeLen; k += blockDim.x) { in InterpTransposeAtPoints()
204 const CeedInt c = k % post; in InterpTransposeAtPoints()
205 const CeedInt j = (k / post) % P; in InterpTransposeAtPoints()
206 const CeedInt a = k / (post * P); in InterpTransposeAtPoints()
209 …for (CeedInt b = 0; b < Q; b++) v_k += s_chebyshev_interp_1d[j + b * BASIS_P_1D] * in[(a * Q + b) … in InterpTransposeAtPoints()
222 extern "C" __global__ void GradAtPoints(const CeedInt num_elem, const CeedScalar *__restrict__ cheb… in GradAtPoints()
223 … const CeedInt *__restrict__ points_per_elem, const CeedScalar *__restrict__ coords, in GradAtPoints()
225 const CeedInt i = threadIdx.x; in GradAtPoints()
233 for (CeedInt k = i; k < BASIS_Q_1D * BASIS_P_1D; k += blockDim.x) { in GradAtPoints()
237 const CeedInt P = BASIS_P_1D; in GradAtPoints()
238 const CeedInt Q = BASIS_Q_1D; in GradAtPoints()
239 const CeedInt u_stride = BASIS_NUM_NODES; in GradAtPoints()
240 const CeedInt v_stride = BASIS_NUM_PTS; in GradAtPoints()
241 const CeedInt u_comp_stride = num_elem * BASIS_NUM_NODES; in GradAtPoints()
242 const CeedInt v_comp_stride = num_elem * BASIS_NUM_PTS; in GradAtPoints()
243 const CeedInt u_size = BASIS_NUM_NODES; in GradAtPoints()
244 const CeedInt u_dim_stride = 0; in GradAtPoints()
245 const CeedInt v_dim_stride = num_elem * BASIS_NUM_PTS * BASIS_NUM_COMP; in GradAtPoints()
248 for (CeedInt elem = blockIdx.x; elem < num_elem; elem += gridDim.x) { in GradAtPoints()
249 for (CeedInt comp = 0; comp < BASIS_NUM_COMP; comp++) { in GradAtPoints()
251 CeedInt pre = u_size; in GradAtPoints()
252 CeedInt post = 1; in GradAtPoints()
255 for (CeedInt d = 0; d < BASIS_DIM; d++) { in GradAtPoints()
261 const CeedInt writeLen = pre * post * Q; in GradAtPoints()
264 for (CeedInt k = i; k < writeLen; k += blockDim.x) { in GradAtPoints()
265 const CeedInt c = k % post; in GradAtPoints()
266 const CeedInt j = (k / post) % Q; in GradAtPoints()
267 const CeedInt a = k / (post * Q); in GradAtPoints()
270 …for (CeedInt b = 0; b < P; b++) v_k += s_chebyshev_interp_1d[j * BASIS_P_1D + b] * in[(a * P + b) … in GradAtPoints()
278 for (CeedInt p = threadIdx.x; p < BASIS_NUM_PTS; p += blockDim.x) { in GradAtPoints()
279 for (CeedInt dim_1 = 0; dim_1 < BASIS_DIM; dim_1++) { in GradAtPoints()
284 for (CeedInt dim_2 = 0; dim_2 < BASIS_DIM; dim_2++) { in GradAtPoints()
295 for (CeedInt a = 0; a < pre; a++) { in GradAtPoints()
296 for (CeedInt c = 0; c < post; c++) { in GradAtPoints()
299 for (CeedInt b = 0; b < Q; b++) v_k += chebyshev_x[b] * in[(a * Q + b) * post + c]; in GradAtPoints()
311 extern "C" __global__ void GradTransposeAtPoints(const CeedInt num_elem, const CeedScalar *__restri… in GradTransposeAtPoints()
312 … const CeedInt *__restrict__ points_per_elem, const CeedScalar *__restrict__ coords, in GradTransposeAtPoints()
314 const CeedInt i = threadIdx.x; in GradTransposeAtPoints()
322 for (CeedInt k = i; k < BASIS_Q_1D * BASIS_P_1D; k += blockDim.x) { in GradTransposeAtPoints()
326 const CeedInt P = BASIS_P_1D; in GradTransposeAtPoints()
327 const CeedInt Q = BASIS_Q_1D; in GradTransposeAtPoints()
328 const CeedInt u_stride = BASIS_NUM_PTS; in GradTransposeAtPoints()
329 const CeedInt v_stride = BASIS_NUM_NODES; in GradTransposeAtPoints()
330 const CeedInt u_comp_stride = num_elem * BASIS_NUM_PTS; in GradTransposeAtPoints()
331 const CeedInt v_comp_stride = num_elem * BASIS_NUM_NODES; in GradTransposeAtPoints()
332 const CeedInt u_size = BASIS_NUM_PTS; in GradTransposeAtPoints()
333 const CeedInt u_dim_stride = num_elem * BASIS_NUM_PTS * BASIS_NUM_COMP; in GradTransposeAtPoints()
334 const CeedInt v_dim_stride = 0; in GradTransposeAtPoints()
337 for (CeedInt elem = blockIdx.x; elem < num_elem; elem += gridDim.x) { in GradTransposeAtPoints()
338 for (CeedInt comp = 0; comp < BASIS_NUM_COMP; comp++) { in GradTransposeAtPoints()
340 CeedInt pre = 1; in GradTransposeAtPoints()
341 CeedInt post = 1; in GradTransposeAtPoints()
344 for (CeedInt k = i; k < BASIS_NUM_QPTS; k += blockDim.x) { in GradTransposeAtPoints()
350 for (CeedInt p = threadIdx.x; p < BASIS_NUM_PTS; p += blockDim.x) { in GradTransposeAtPoints()
352 for (CeedInt dim_1 = 0; dim_1 < BASIS_DIM; dim_1++) { in GradTransposeAtPoints()
357 for (CeedInt dim_2 = 0; dim_2 < BASIS_DIM; dim_2++) { in GradTransposeAtPoints()
368 for (CeedInt a = 0; a < pre; a++) { in GradTransposeAtPoints()
369 for (CeedInt c = 0; c < post; c++) { in GradTransposeAtPoints()
371 …for (CeedInt j = 0; j < Q; j++) atomicAdd(&out[(a * Q + (j + p) % Q) * post + c], chebyshev_x[(j +… in GradTransposeAtPoints()
373 … for (CeedInt j = 0; j < Q; j++) out[(a * Q + j) * post + c] = chebyshev_x[j] * in[a * post + c]; in GradTransposeAtPoints()
385 for (CeedInt d = 0; d < BASIS_DIM; d++) { in GradTransposeAtPoints()
391 const CeedInt writeLen = pre * post * P; in GradTransposeAtPoints()
394 for (CeedInt k = i; k < writeLen; k += blockDim.x) { in GradTransposeAtPoints()
395 const CeedInt c = k % post; in GradTransposeAtPoints()
396 const CeedInt j = (k / post) % P; in GradTransposeAtPoints()
397 const CeedInt a = k / (post * P); in GradTransposeAtPoints()
400 …for (CeedInt b = 0; b < Q; b++) v_k += s_chebyshev_interp_1d[j + b * BASIS_P_1D] * in[(a * Q + b) … in GradTransposeAtPoints()