cuda-ref-basis-tensor.h - OpenGrok cross reference for /libCEED/include/ceed/jit-source/cuda/cuda-ref-basis-tensor.h

Lines Matching refs:CeedInt
19 extern "C" __global__ void Interp(const CeedInt num_elem, const CeedInt is_transpose, const CeedSca…  in Interp()
21   const CeedInt i = threadIdx.x;  in Interp()
27   for (CeedInt k = i; k < BASIS_Q_1D * BASIS_P_1D; k += blockDim.x) {  in Interp()
31   const CeedInt P             = is_transpose ? BASIS_Q_1D : BASIS_P_1D;  in Interp()
32   const CeedInt Q             = is_transpose ? BASIS_P_1D : BASIS_Q_1D;  in Interp()
33   const CeedInt stride_0      = is_transpose ? 1 : BASIS_P_1D;  in Interp()
34   const CeedInt stride_1      = is_transpose ? BASIS_P_1D : 1;  in Interp()
35   const CeedInt u_stride      = is_transpose ? BASIS_NUM_QPTS : BASIS_NUM_NODES;  in Interp()
36   const CeedInt v_stride      = is_transpose ? BASIS_NUM_NODES : BASIS_NUM_QPTS;  in Interp()
37   const CeedInt u_comp_stride = num_elem * (is_transpose ? BASIS_NUM_QPTS : BASIS_NUM_NODES);  in Interp()
38   const CeedInt v_comp_stride = num_elem * (is_transpose ? BASIS_NUM_NODES : BASIS_NUM_QPTS);  in Interp()
39   const CeedInt u_size        = is_transpose ? BASIS_NUM_QPTS : BASIS_NUM_NODES;  in Interp()
42   for (CeedInt elem = blockIdx.x; elem < num_elem; elem += gridDim.x) {  in Interp()
43     for (CeedInt comp = 0; comp < BASIS_NUM_COMP; comp++) {  in Interp()
46       CeedInt           pre   = u_size;  in Interp()
47       CeedInt           post  = 1;  in Interp()
49       for (CeedInt d = 0; d < BASIS_DIM; d++) {  in Interp()
55         const CeedInt     writeLen = pre * post * Q;  in Interp()
58         for (CeedInt k = i; k < writeLen; k += blockDim.x) {  in Interp()
59           const CeedInt c   = k % post;  in Interp()
60           const CeedInt j   = (k / post) % Q;  in Interp()
61           const CeedInt a   = k / (post * Q);  in Interp()
64 …for (CeedInt b = 0; b < P; b++) v_k += s_interp_1d[j * stride_0 + b * stride_1] * in[(a * P + b) *…  in Interp()
77 extern "C" __global__ void Grad(const CeedInt num_elem, const CeedInt is_transpose, const CeedScala…  in Grad()
79   const CeedInt i = threadIdx.x;  in Grad()
86   for (CeedInt k = i; k < BASIS_Q_1D * BASIS_P_1D; k += blockDim.x) {  in Grad()
91   const CeedInt P             = is_transpose ? BASIS_Q_1D : BASIS_P_1D;  in Grad()
92   const CeedInt Q             = is_transpose ? BASIS_P_1D : BASIS_Q_1D;  in Grad()
93   const CeedInt stride_0      = is_transpose ? 1 : BASIS_P_1D;  in Grad()
94   const CeedInt stride_1      = is_transpose ? BASIS_P_1D : 1;  in Grad()
95   const CeedInt u_stride      = is_transpose ? BASIS_NUM_QPTS : BASIS_NUM_NODES;  in Grad()
96   const CeedInt v_stride      = is_transpose ? BASIS_NUM_NODES : BASIS_NUM_QPTS;  in Grad()
97   const CeedInt u_comp_stride = num_elem * (is_transpose ? BASIS_NUM_QPTS : BASIS_NUM_NODES);  in Grad()
98   const CeedInt v_comp_stride = num_elem * (is_transpose ? BASIS_NUM_NODES : BASIS_NUM_QPTS);  in Grad()
99   const CeedInt u_dim_stride  = is_transpose ? num_elem * BASIS_NUM_QPTS * BASIS_NUM_COMP : 0;  in Grad()
100   const CeedInt v_dim_stride  = is_transpose ? 0 : num_elem * BASIS_NUM_QPTS * BASIS_NUM_COMP;  in Grad()
103   for (CeedInt elem = blockIdx.x; elem < num_elem; elem += gridDim.x) {  in Grad()
104     for (CeedInt comp = 0; comp < BASIS_NUM_COMP; comp++) {  in Grad()
106       for (CeedInt dim_1 = 0; dim_1 < BASIS_DIM; dim_1++) {  in Grad()
107         CeedInt           pre   = is_transpose ? BASIS_NUM_QPTS : BASIS_NUM_NODES;  in Grad()
108         CeedInt           post  = 1;  in Grad()
112         for (CeedInt dim_2 = 0; dim_2 < BASIS_DIM; dim_2++) {  in Grad()
119           const CeedInt     writeLen = pre * post * Q;  in Grad()
122           for (CeedInt k = i; k < writeLen; k += blockDim.x) {  in Grad()
123             const CeedInt c   = k % post;  in Grad()
124             const CeedInt j   = (k / post) % Q;  in Grad()
125             const CeedInt a   = k / (post * Q);  in Grad()
128 …for (CeedInt b = 0; b < P; b++) v_k += op[j * stride_0 + b * stride_1] * in[(a * P + b) * post + c…  in Grad()
142 __device__ void Weight1d(const CeedInt num_elem, const CeedScalar *q_weight_1d, CeedScalar *w) {  in Weight1d()
143   const CeedInt i = threadIdx.x;  in Weight1d()
155 __device__ void Weight2d(const CeedInt num_elem, const CeedScalar *q_weight_1d, CeedScalar *w) {  in Weight2d()
156   const CeedInt i = threadIdx.x;  in Weight2d()
157   const CeedInt j = threadIdx.y;  in Weight2d()
173 __device__ void Weight3d(const CeedInt num_elem, const CeedScalar *q_weight_1d, CeedScalar *w) {  in Weight3d()
174   const CeedInt i = threadIdx.x;  in Weight3d()
175   const CeedInt j = threadIdx.y;  in Weight3d()
181       for (CeedInt k = 0; k < BASIS_Q_1D; k++) {  in Weight3d()
193 extern "C" __global__ void Weight(const CeedInt num_elem, const CeedScalar *__restrict__ q_weight_1…  in Weight()