Lines Matching refs:CeedInt
19 extern "C" __global__ void Interp(const CeedInt num_elem, const CeedInt is_transpose, const CeedSca… in Interp()
21 const CeedInt i = threadIdx.x; in Interp()
27 for (CeedInt k = i; k < BASIS_Q_1D * BASIS_P_1D; k += blockDim.x) { in Interp()
31 const CeedInt P = is_transpose ? BASIS_Q_1D : BASIS_P_1D; in Interp()
32 const CeedInt Q = is_transpose ? BASIS_P_1D : BASIS_Q_1D; in Interp()
33 const CeedInt stride_0 = is_transpose ? 1 : BASIS_P_1D; in Interp()
34 const CeedInt stride_1 = is_transpose ? BASIS_P_1D : 1; in Interp()
35 const CeedInt u_stride = is_transpose ? BASIS_NUM_QPTS : BASIS_NUM_NODES; in Interp()
36 const CeedInt v_stride = is_transpose ? BASIS_NUM_NODES : BASIS_NUM_QPTS; in Interp()
37 const CeedInt u_comp_stride = num_elem * (is_transpose ? BASIS_NUM_QPTS : BASIS_NUM_NODES); in Interp()
38 const CeedInt v_comp_stride = num_elem * (is_transpose ? BASIS_NUM_NODES : BASIS_NUM_QPTS); in Interp()
39 const CeedInt u_size = is_transpose ? BASIS_NUM_QPTS : BASIS_NUM_NODES; in Interp()
42 for (CeedInt elem = blockIdx.x; elem < num_elem; elem += gridDim.x) { in Interp()
43 for (CeedInt comp = 0; comp < BASIS_NUM_COMP; comp++) { in Interp()
46 CeedInt pre = u_size; in Interp()
47 CeedInt post = 1; in Interp()
49 for (CeedInt d = 0; d < BASIS_DIM; d++) { in Interp()
55 const CeedInt writeLen = pre * post * Q; in Interp()
58 for (CeedInt k = i; k < writeLen; k += blockDim.x) { in Interp()
59 const CeedInt c = k % post; in Interp()
60 const CeedInt j = (k / post) % Q; in Interp()
61 const CeedInt a = k / (post * Q); in Interp()
64 …for (CeedInt b = 0; b < P; b++) v_k += s_interp_1d[j * stride_0 + b * stride_1] * in[(a * P + b) *… in Interp()
77 extern "C" __global__ void Grad(const CeedInt num_elem, const CeedInt is_transpose, const CeedScala… in Grad()
79 const CeedInt i = threadIdx.x; in Grad()
86 for (CeedInt k = i; k < BASIS_Q_1D * BASIS_P_1D; k += blockDim.x) { in Grad()
91 const CeedInt P = is_transpose ? BASIS_Q_1D : BASIS_P_1D; in Grad()
92 const CeedInt Q = is_transpose ? BASIS_P_1D : BASIS_Q_1D; in Grad()
93 const CeedInt stride_0 = is_transpose ? 1 : BASIS_P_1D; in Grad()
94 const CeedInt stride_1 = is_transpose ? BASIS_P_1D : 1; in Grad()
95 const CeedInt u_stride = is_transpose ? BASIS_NUM_QPTS : BASIS_NUM_NODES; in Grad()
96 const CeedInt v_stride = is_transpose ? BASIS_NUM_NODES : BASIS_NUM_QPTS; in Grad()
97 const CeedInt u_comp_stride = num_elem * (is_transpose ? BASIS_NUM_QPTS : BASIS_NUM_NODES); in Grad()
98 const CeedInt v_comp_stride = num_elem * (is_transpose ? BASIS_NUM_NODES : BASIS_NUM_QPTS); in Grad()
99 const CeedInt u_dim_stride = is_transpose ? num_elem * BASIS_NUM_QPTS * BASIS_NUM_COMP : 0; in Grad()
100 const CeedInt v_dim_stride = is_transpose ? 0 : num_elem * BASIS_NUM_QPTS * BASIS_NUM_COMP; in Grad()
103 for (CeedInt elem = blockIdx.x; elem < num_elem; elem += gridDim.x) { in Grad()
104 for (CeedInt comp = 0; comp < BASIS_NUM_COMP; comp++) { in Grad()
106 for (CeedInt dim_1 = 0; dim_1 < BASIS_DIM; dim_1++) { in Grad()
107 CeedInt pre = is_transpose ? BASIS_NUM_QPTS : BASIS_NUM_NODES; in Grad()
108 CeedInt post = 1; in Grad()
112 for (CeedInt dim_2 = 0; dim_2 < BASIS_DIM; dim_2++) { in Grad()
119 const CeedInt writeLen = pre * post * Q; in Grad()
122 for (CeedInt k = i; k < writeLen; k += blockDim.x) { in Grad()
123 const CeedInt c = k % post; in Grad()
124 const CeedInt j = (k / post) % Q; in Grad()
125 const CeedInt a = k / (post * Q); in Grad()
128 …for (CeedInt b = 0; b < P; b++) v_k += op[j * stride_0 + b * stride_1] * in[(a * P + b) * post + c… in Grad()
142 __device__ void Weight1d(const CeedInt num_elem, const CeedScalar *q_weight_1d, CeedScalar *w) { in Weight1d()
143 const CeedInt i = threadIdx.x; in Weight1d()
155 __device__ void Weight2d(const CeedInt num_elem, const CeedScalar *q_weight_1d, CeedScalar *w) { in Weight2d()
156 const CeedInt i = threadIdx.x; in Weight2d()
157 const CeedInt j = threadIdx.y; in Weight2d()
173 __device__ void Weight3d(const CeedInt num_elem, const CeedScalar *q_weight_1d, CeedScalar *w) { in Weight3d()
174 const CeedInt i = threadIdx.x; in Weight3d()
175 const CeedInt j = threadIdx.y; in Weight3d()
181 for (CeedInt k = 0; k < BASIS_Q_1D; k++) { in Weight3d()
193 extern "C" __global__ void Weight(const CeedInt num_elem, const CeedScalar *__restrict__ q_weight_1… in Weight()