Home
last modified time | relevance | path

Searched refs:elem (Results 1 – 25 of 52) sorted by relevance

123

/libCEED/include/ceed/jit-source/hip/
H A Dhip-ref-basis-nontensor.h23 …for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * bl… in Interp() local
24 …Contract<BASIS_NUM_COMP, BASIS_Q_COMP_INTERP, BASIS_P, BASIS_Q>(elem, BASIS_P, BASIS_Q, BASIS_P * … in Interp()
31 …for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * bl… in InterpTranspose() local
32 …ContractTranspose<BASIS_NUM_COMP, BASIS_Q_COMP_INTERP, BASIS_P, BASIS_Q>(elem, BASIS_Q, BASIS_P, B… in InterpTranspose()
42 …for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * bl… in Deriv() local
43 …Contract<BASIS_NUM_COMP, BASIS_Q_COMP_DERIV, BASIS_P, BASIS_Q>(elem, BASIS_P, BASIS_Q, BASIS_P * n… in Deriv()
50 …for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * bl… in DerivTranspose() local
51 …ContractTranspose<BASIS_NUM_COMP, BASIS_Q_COMP_DERIV, BASIS_P, BASIS_Q>(elem, BASIS_Q, BASIS_P, BA… in DerivTranspose()
63 …for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * bl… in Weight() local
64 d_V[elem * BASIS_Q + t_id] = q_weight[t_id]; in Weight()
H A Dhip-shared-basis-tensor.h38 …for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * bl… in __launch_bounds__() local
40 …ReadElementStrided1d<BASIS_NUM_COMP, BASIS_P_1D>(data, elem, 1, BASIS_P_1D * num_elem, BASIS_P_1D,… in __launch_bounds__()
42 …WriteElementStrided1d<BASIS_NUM_COMP, BASIS_Q_1D>(data, elem, 1, BASIS_Q_1D * num_elem, BASIS_Q_1D… in __launch_bounds__()
44 …ReadElementStrided2d<BASIS_NUM_COMP, BASIS_P_1D>(data, elem, 1, BASIS_P_1D * BASIS_P_1D * num_elem… in __launch_bounds__()
46 …WriteElementStrided2d<BASIS_NUM_COMP, BASIS_Q_1D>(data, elem, 1, BASIS_Q_1D * BASIS_Q_1D * num_ele… in __launch_bounds__()
48 …ReadElementStrided3d<BASIS_NUM_COMP, BASIS_P_1D>(data, elem, 1, BASIS_P_1D * BASIS_P_1D * BASIS_P_… in __launch_bounds__()
51 …WriteElementStrided3d<BASIS_NUM_COMP, BASIS_Q_1D>(data, elem, 1, BASIS_Q_1D * BASIS_Q_1D * BASIS_Q… in __launch_bounds__()
71 …for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * bl… in __launch_bounds__() local
73 …ReadElementStrided1d<BASIS_NUM_COMP, BASIS_P_1D>(data, elem, 1, BASIS_P_1D * num_elem, BASIS_P_1D,… in __launch_bounds__()
74 …WriteElementStrided1d<BASIS_NUM_COMP, BASIS_Q_1D>(data, elem, 1, BASIS_Q_1D * num_elem, BASIS_Q_1D… in __launch_bounds__()
[all …]
H A Dhip-shared-basis-nontensor.h38 …for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * bl… in __launch_bounds__() local
39 …ReadElementStrided1d<BASIS_NUM_COMP, BASIS_P>(data, elem, 1, BASIS_P * num_elem, BASIS_P, d_U, r_U… in __launch_bounds__()
41 …WriteElementStrided1d<BASIS_NUM_COMP, BASIS_Q>(data, elem, 1, BASIS_Q * num_elem, BASIS_Q, r_V, d_… in __launch_bounds__()
65 …for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * bl… in __launch_bounds__() local
66 …ReadElementStrided1d<BASIS_NUM_COMP, BASIS_Q>(data, elem, 1, BASIS_Q * num_elem, BASIS_Q, d_U, r_U… in __launch_bounds__()
68 …WriteElementStrided1d<BASIS_NUM_COMP, BASIS_P>(data, elem, 1, BASIS_P * num_elem, BASIS_P, r_V, d_… in __launch_bounds__()
92 …for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * bl… in __launch_bounds__() local
93 …ReadElementStrided1d<BASIS_NUM_COMP, BASIS_Q>(data, elem, 1, BASIS_Q * num_elem, BASIS_Q, d_U, r_U… in __launch_bounds__()
95 …SumElementStrided1d<BASIS_NUM_COMP, BASIS_P>(data, elem, 1, BASIS_P * num_elem, BASIS_P, r_V, d_V); in __launch_bounds__()
122 …for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * bl… in __launch_bounds__() local
[all …]
H A Dhip-shared-basis-tensor-at-points.h46 …for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * bl… in __launch_bounds__() local
49 …ReadElementStrided1d<BASIS_NUM_COMP, BASIS_P_1D>(data, elem, 1, BASIS_P_1D * num_elem, BASIS_P_1D,… in __launch_bounds__()
52 …ReadElementStrided2d<BASIS_NUM_COMP, BASIS_P_1D>(data, elem, 1, BASIS_P_1D * BASIS_P_1D * num_elem… in __launch_bounds__()
55 …ReadElementStrided3d<BASIS_NUM_COMP, BASIS_P_1D>(data, elem, 1, BASIS_P_1D * BASIS_P_1D * BASIS_P_… in __launch_bounds__()
66 …ReadPoint<BASIS_DIM, BASIS_NUM_PTS>(data, elem, p, BASIS_NUM_PTS, 1, num_elem * BASIS_NUM_PTS, BAS… in __launch_bounds__()
74 …WritePoint<BASIS_NUM_COMP, BASIS_NUM_PTS>(data, elem, p, BASIS_NUM_PTS, 1, num_elem * BASIS_NUM_PT… in __launch_bounds__()
102 …for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * bl… in __launch_bounds__() local
109 …WriteElementStrided1d<BASIS_NUM_COMP, BASIS_P_1D>(data, elem, 1, BASIS_P_1D * num_elem, BASIS_P_1D… in __launch_bounds__()
111 …WriteElementStrided2d<BASIS_NUM_COMP, BASIS_P_1D>(data, elem, 1, BASIS_P_1D * BASIS_P_1D * num_ele… in __launch_bounds__()
113 …WriteElementStrided3d<BASIS_NUM_COMP, BASIS_P_1D>(data, elem, 1, BASIS_P_1D * BASIS_P_1D * BASIS_P… in __launch_bounds__()
[all …]
H A Dhip-shared-basis-read-write-templates.h28 inline __device__ void ReadElementStrided1d(SharedData_Hip &data, const CeedInt elem, const CeedInt… in ReadElementStrided1d() argument
32 const CeedInt ind = node * strides_node + elem * strides_elem; in ReadElementStrided1d()
44 inline __device__ void WriteElementStrided1d(SharedData_Hip &data, const CeedInt elem, const CeedIn… in WriteElementStrided1d() argument
48 const CeedInt ind = node * strides_node + elem * strides_elem; in WriteElementStrided1d()
57 inline __device__ void SumElementStrided1d(SharedData_Hip &data, const CeedInt elem, const CeedInt … in SumElementStrided1d() argument
61 const CeedInt ind = node * strides_node + elem * strides_elem; in SumElementStrided1d()
77 inline __device__ void ReadElementStrided2d(SharedData_Hip &data, const CeedInt elem, const CeedInt… in ReadElementStrided2d() argument
81 const CeedInt ind = node * strides_node + elem * strides_elem; in ReadElementStrided2d()
93 inline __device__ void WriteElementStrided2d(SharedData_Hip &data, const CeedInt elem, const CeedIn… in WriteElementStrided2d() argument
97 const CeedInt ind = node * strides_node + elem * strides_elem; in WriteElementStrided2d()
[all …]
H A Dhip-ref-basis-tensor.h42 for (CeedInt elem = blockIdx.x; elem < num_elem; elem += gridDim.x) { in Interp() local
44 const CeedScalar *cur_u = &u[elem * u_stride + comp * u_comp_stride]; in Interp()
45 CeedScalar *cur_v = &v[elem * v_stride + comp * v_comp_stride]; in Interp()
103 for (CeedInt elem = blockIdx.x; elem < num_elem; elem += gridDim.x) { in Grad() local
109 const CeedScalar *cur_u = &u[elem * u_stride + dim_1 * u_dim_stride + comp * u_comp_stride]; in Grad()
110 CeedScalar *cur_v = &v[elem * v_stride + dim_1 * v_dim_stride + comp * v_comp_stride]; in Grad()
146 const size_t elem = blockIdx.x; in Weight1d() local
148 if (elem < num_elem) w[elem * BASIS_Q_1D + i] = q_weight_1d[i]; in Weight1d()
160 const size_t elem = blockIdx.x; in Weight2d() local
162 if (elem < num_elem) { in Weight2d()
[all …]
H A Dhip-gen-templates.h28 inline __device__ void ReadPoint(SharedData_Hip &data, const CeedInt elem, const CeedInt p, const C… in ReadPoint() argument
30 const CeedInt ind = indices[p + elem * NUM_PTS]; in ReadPoint()
41 inline __device__ void WritePoint(SharedData_Hip &data, const CeedInt elem, const CeedInt p, const … in WritePoint() argument
44 const CeedInt ind = indices[p + elem * NUM_PTS]; in WritePoint()
73 …ecStandard1d(SharedData_Hip &data, const CeedInt num_nodes, const CeedInt elem, const CeedInt *__r… in ReadLVecStandard1d() argument
77 const CeedInt ind = indices[node + elem * P_1D]; in ReadLVecStandard1d()
87 inline __device__ void ReadLVecStrided1d(SharedData_Hip &data, const CeedInt elem, const CeedScalar… in ReadLVecStrided1d() argument
90 const CeedInt ind = node * STRIDES_NODE + elem * STRIDES_ELEM; in ReadLVecStrided1d()
100 …ecStandard1d(SharedData_Hip &data, const CeedInt num_nodes, const CeedInt elem, const CeedInt *__r… in WriteLVecStandard1d() argument
104 const CeedInt ind = indices[node + elem * P_1D]; in WriteLVecStandard1d()
[all …]
H A Dhip-ref-restriction-curl-oriented.h19 const CeedInt elem = node / RSTR_ELEM_SIZE; in CurlOrientedNoTranspose() local
32 v[loc_node + comp * RSTR_ELEM_SIZE * RSTR_NUM_ELEM + elem * RSTR_ELEM_SIZE] = value; in CurlOrientedNoTranspose()
44 const CeedInt elem = node / RSTR_ELEM_SIZE; in CurlOrientedUnsignedNoTranspose() local
57 v[loc_node + comp * RSTR_ELEM_SIZE * RSTR_NUM_ELEM + elem * RSTR_ELEM_SIZE] = value; in CurlOrientedUnsignedNoTranspose()
71 const CeedInt elem = node / RSTR_ELEM_SIZE; in CurlOrientedTranspose() local
78 …value += loc_node > 0 ? u[loc_node - 1 + comp * RSTR_ELEM_SIZE * RSTR_NUM_ELEM + elem * RSTR_ELEM_… in CurlOrientedTranspose()
79 …value += u[loc_node + comp * RSTR_ELEM_SIZE * RSTR_NUM_ELEM + elem * RSTR_ELEM_SIZE] * curl_orient… in CurlOrientedTranspose()
81 …ELEM_SIZE - 1) ? u[loc_node + 1 + comp * RSTR_ELEM_SIZE * RSTR_NUM_ELEM + elem * RSTR_ELEM_SIZE] *… in CurlOrientedTranspose()
102 const CeedInt elem = t_ind / RSTR_ELEM_SIZE; in CurlOrientedTranspose() local
108 …value[comp] += loc_node > 0 ? u[loc_node - 1 + comp * RSTR_ELEM_SIZE * RSTR_NUM_ELEM + elem * RSTR… in CurlOrientedTranspose()
[all …]
H A Dhip-ref-restriction-strided.h18 const CeedInt elem = node / RSTR_ELEM_SIZE; in StridedNoTranspose() local
21 v[loc_node + comp * RSTR_ELEM_SIZE * RSTR_NUM_ELEM + elem * RSTR_ELEM_SIZE] = in StridedNoTranspose()
22 u[loc_node * RSTR_STRIDE_NODES + comp * RSTR_STRIDE_COMP + elem * RSTR_STRIDE_ELEM]; in StridedNoTranspose()
33 const CeedInt elem = node / RSTR_ELEM_SIZE; in StridedTranspose() local
36 v[loc_node * RSTR_STRIDE_NODES + comp * RSTR_STRIDE_COMP + elem * RSTR_STRIDE_ELEM] += in StridedTranspose()
37 u[loc_node + comp * RSTR_ELEM_SIZE * RSTR_NUM_ELEM + elem * RSTR_ELEM_SIZE]; in StridedTranspose()
H A Dhip-ref-basis-tensor-at-points.h67 for (CeedInt elem = blockIdx.x; elem < num_elem; elem += gridDim.x) { in InterpAtPoints() local
69 const CeedScalar *cur_u = &u[elem * u_stride + comp * u_comp_stride]; in InterpAtPoints()
70 CeedScalar *cur_v = &v[elem * v_stride + comp * v_comp_stride]; in InterpAtPoints()
108 …ChebyshevPolynomialsAtPoint<BASIS_Q_1D>(coords[elem * v_stride + d * v_comp_stride + p], chebyshev… in InterpAtPoints()
150 for (CeedInt elem = blockIdx.x; elem < num_elem; elem += gridDim.x) { in InterpTransposeAtPoints() local
152 const CeedScalar *cur_u = &u[elem * u_stride + comp * u_comp_stride]; in InterpTransposeAtPoints()
153 CeedScalar *cur_v = &v[elem * v_stride + comp * v_comp_stride]; in InterpTransposeAtPoints()
165 if (p >= points_per_elem[elem]) continue; in InterpTransposeAtPoints()
175 …ChebyshevPolynomialsAtPoint<BASIS_Q_1D>(coords[elem * u_stride + d * u_comp_stride + p], chebyshev… in InterpTransposeAtPoints()
248 for (CeedInt elem = blockIdx.x; elem < num_elem; elem += gridDim.x) { in GradAtPoints() local
[all …]
H A Dhip-ref-restriction-at-points.h21 const CeedInt elem = node / RSTR_ELEM_SIZE; in AtPointsTranspose() local
23 if (loc_node >= points_per_elem[elem]) continue; in AtPointsTranspose()
25 … * RSTR_COMP_STRIDE], u[loc_node + comp * RSTR_ELEM_SIZE * RSTR_NUM_ELEM + elem * RSTR_ELEM_SIZE]); in AtPointsTranspose()
45 const CeedInt elem = t_ind / RSTR_ELEM_SIZE; in AtPointsTranspose() local
47 if (loc_node >= points_per_elem[elem]) continue; in AtPointsTranspose()
49 value[comp] += u[loc_node + comp * RSTR_ELEM_SIZE * RSTR_NUM_ELEM + elem * RSTR_ELEM_SIZE]; in AtPointsTranspose()
H A Dhip-ref-basis-nontensor-templates.h16 inline __device__ void Contract(const CeedInt elem, const CeedInt strides_elem_U, const CeedInt str… in Contract() argument
26 U = &d_U[elem * strides_elem_U + comp * strides_comp_U]; in Contract()
34 d_V[elem * strides_elem_V + comp * strides_comp_V + d * strides_q_comp_V + t_id] = r_V[d]; in Contract()
43 inline __device__ void ContractTranspose(const CeedInt elem, const CeedInt strides_elem_U, const Ce… in ContractTranspose() argument
55 U = &d_U[elem * strides_elem_U + comp * strides_comp_U + d * strides_q_comp_U]; in ContractTranspose()
58 d_V[elem * strides_elem_V + comp * strides_comp_V + t_id] += r_V; in ContractTranspose()
/libCEED/include/ceed/jit-source/cuda/
H A Dcuda-ref-basis-nontensor.h23 …for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * bl… in Interp() local
24 …Contract<BASIS_NUM_COMP, BASIS_Q_COMP_INTERP, BASIS_P, BASIS_Q>(elem, BASIS_P, BASIS_Q, BASIS_P * … in Interp()
31 …for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * bl… in InterpTranspose() local
32 …ContractTranspose<BASIS_NUM_COMP, BASIS_Q_COMP_INTERP, BASIS_P, BASIS_Q>(elem, BASIS_Q, BASIS_P, B… in InterpTranspose()
42 …for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * bl… in Deriv() local
43 …Contract<BASIS_NUM_COMP, BASIS_Q_COMP_DERIV, BASIS_P, BASIS_Q>(elem, BASIS_P, BASIS_Q, BASIS_P * n… in Deriv()
50 …for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * bl… in DerivTranspose() local
51 …ContractTranspose<BASIS_NUM_COMP, BASIS_Q_COMP_DERIV, BASIS_P, BASIS_Q>(elem, BASIS_Q, BASIS_P, BA… in DerivTranspose()
63 …for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * bl… in Weight() local
64 d_V[elem * BASIS_Q + t_id] = q_weight[t_id]; in Weight()
H A Dcuda-shared-basis-tensor.h37 …for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * bl… in Interp() local
39 …ReadElementStrided1d<BASIS_NUM_COMP, BASIS_P_1D>(data, elem, 1, BASIS_P_1D * num_elem, BASIS_P_1D,… in Interp()
41 …WriteElementStrided1d<BASIS_NUM_COMP, BASIS_Q_1D>(data, elem, 1, BASIS_Q_1D * num_elem, BASIS_Q_1D… in Interp()
43 …ReadElementStrided2d<BASIS_NUM_COMP, BASIS_P_1D>(data, elem, 1, BASIS_P_1D * BASIS_P_1D * num_elem… in Interp()
45 …WriteElementStrided2d<BASIS_NUM_COMP, BASIS_Q_1D>(data, elem, 1, BASIS_Q_1D * BASIS_Q_1D * num_ele… in Interp()
47 …ReadElementStrided3d<BASIS_NUM_COMP, BASIS_P_1D>(data, elem, 1, BASIS_P_1D * BASIS_P_1D * BASIS_P_… in Interp()
50 …WriteElementStrided3d<BASIS_NUM_COMP, BASIS_Q_1D>(data, elem, 1, BASIS_Q_1D * BASIS_Q_1D * BASIS_Q… in Interp()
70 …for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * bl… in InterpCollocated() local
72 …ReadElementStrided1d<BASIS_NUM_COMP, BASIS_P_1D>(data, elem, 1, BASIS_P_1D * num_elem, BASIS_P_1D,… in InterpCollocated()
73 …WriteElementStrided1d<BASIS_NUM_COMP, BASIS_Q_1D>(data, elem, 1, BASIS_Q_1D * num_elem, BASIS_Q_1D… in InterpCollocated()
[all …]
H A Dcuda-shared-basis-nontensor.h37 …for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * bl… in Interp() local
38 …ReadElementStrided1d<BASIS_NUM_COMP, BASIS_P>(data, elem, 1, BASIS_P * num_elem, BASIS_P, d_U, r_U… in Interp()
40 …WriteElementStrided1d<BASIS_NUM_COMP, BASIS_Q>(data, elem, 1, BASIS_Q * num_elem, BASIS_Q, r_V, d_… in Interp()
64 …for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * bl… in InterpTranspose() local
65 …ReadElementStrided1d<BASIS_NUM_COMP, BASIS_Q>(data, elem, 1, BASIS_Q * num_elem, BASIS_Q, d_U, r_U… in InterpTranspose()
67 …WriteElementStrided1d<BASIS_NUM_COMP, BASIS_P>(data, elem, 1, BASIS_P * num_elem, BASIS_P, r_V, d_… in InterpTranspose()
91 …for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * bl… in InterpTransposeAdd() local
92 …ReadElementStrided1d<BASIS_NUM_COMP, BASIS_Q>(data, elem, 1, BASIS_Q * num_elem, BASIS_Q, d_U, r_U… in InterpTransposeAdd()
94 …SumElementStrided1d<BASIS_NUM_COMP, BASIS_P>(data, elem, 1, BASIS_P * num_elem, BASIS_P, r_V, d_V); in InterpTransposeAdd()
120 …for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * bl… in Grad() local
[all …]
H A Dcuda-shared-basis-tensor-at-points.h45 …for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * bl… in InterpAtPoints() local
48 …ReadElementStrided1d<BASIS_NUM_COMP, BASIS_P_1D>(data, elem, 1, BASIS_P_1D * num_elem, BASIS_P_1D,… in InterpAtPoints()
51 …ReadElementStrided2d<BASIS_NUM_COMP, BASIS_P_1D>(data, elem, 1, BASIS_P_1D * BASIS_P_1D * num_elem… in InterpAtPoints()
54 …ReadElementStrided3d<BASIS_NUM_COMP, BASIS_P_1D>(data, elem, 1, BASIS_P_1D * BASIS_P_1D * BASIS_P_… in InterpAtPoints()
65 …ReadPoint<BASIS_DIM, BASIS_NUM_PTS>(data, elem, p, BASIS_NUM_PTS, 1, num_elem * BASIS_NUM_PTS, BAS… in InterpAtPoints()
73 …WritePoint<BASIS_NUM_COMP, BASIS_NUM_PTS>(data, elem, p, BASIS_NUM_PTS, 1, num_elem * BASIS_NUM_PT… in InterpAtPoints()
101 …for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * bl… in InterpTransposeAtPoints() local
108 …WriteElementStrided1d<BASIS_NUM_COMP, BASIS_P_1D>(data, elem, 1, BASIS_P_1D * num_elem, BASIS_P_1D… in InterpTransposeAtPoints()
110 …WriteElementStrided2d<BASIS_NUM_COMP, BASIS_P_1D>(data, elem, 1, BASIS_P_1D * BASIS_P_1D * num_ele… in InterpTransposeAtPoints()
112 …WriteElementStrided3d<BASIS_NUM_COMP, BASIS_P_1D>(data, elem, 1, BASIS_P_1D * BASIS_P_1D * BASIS_P… in InterpTransposeAtPoints()
[all …]
H A Dcuda-shared-basis-read-write-templates.h28 inline __device__ void ReadElementStrided1d(SharedData_Cuda &data, const CeedInt elem, const CeedIn… in ReadElementStrided1d() argument
32 const CeedInt ind = node * strides_node + elem * strides_elem; in ReadElementStrided1d()
44 inline __device__ void WriteElementStrided1d(SharedData_Cuda &data, const CeedInt elem, const CeedI… in WriteElementStrided1d() argument
48 const CeedInt ind = node * strides_node + elem * strides_elem; in WriteElementStrided1d()
57 inline __device__ void SumElementStrided1d(SharedData_Cuda &data, const CeedInt elem, const CeedInt… in SumElementStrided1d() argument
61 const CeedInt ind = node * strides_node + elem * strides_elem; in SumElementStrided1d()
77 inline __device__ void ReadElementStrided2d(SharedData_Cuda &data, const CeedInt elem, const CeedIn… in ReadElementStrided2d() argument
81 const CeedInt ind = node * strides_node + elem * strides_elem; in ReadElementStrided2d()
93 inline __device__ void WriteElementStrided2d(SharedData_Cuda &data, const CeedInt elem, const CeedI… in WriteElementStrided2d() argument
97 const CeedInt ind = node * strides_node + elem * strides_elem; in WriteElementStrided2d()
[all …]
H A Dcuda-ref-basis-tensor.h42 for (CeedInt elem = blockIdx.x; elem < num_elem; elem += gridDim.x) { in Interp() local
44 const CeedScalar *cur_u = &u[elem * u_stride + comp * u_comp_stride]; in Interp()
45 CeedScalar *cur_v = &v[elem * v_stride + comp * v_comp_stride]; in Interp()
103 for (CeedInt elem = blockIdx.x; elem < num_elem; elem += gridDim.x) { in Grad() local
109 const CeedScalar *cur_u = &u[elem * u_stride + dim_1 * u_dim_stride + comp * u_comp_stride]; in Grad()
110 CeedScalar *cur_v = &v[elem * v_stride + dim_1 * v_dim_stride + comp * v_comp_stride]; in Grad()
146 const size_t elem = blockIdx.x; in Weight1d() local
148 if (elem < num_elem) w[elem * BASIS_Q_1D + i] = q_weight_1d[i]; in Weight1d()
160 const size_t elem = blockIdx.x; in Weight2d() local
162 if (elem < num_elem) { in Weight2d()
[all …]
H A Dcuda-gen-templates.h28 inline __device__ void ReadPoint(SharedData_Cuda &data, const CeedInt elem, const CeedInt p, const … in ReadPoint() argument
30 const CeedInt ind = indices[p + elem * NUM_PTS]; in ReadPoint()
41 inline __device__ void WritePoint(SharedData_Cuda &data, const CeedInt elem, const CeedInt p, const… in WritePoint() argument
44 const CeedInt ind = indices[p + elem * NUM_PTS]; in WritePoint()
73 …cStandard1d(SharedData_Cuda &data, const CeedInt num_nodes, const CeedInt elem, const CeedInt *__r… in ReadLVecStandard1d() argument
77 const CeedInt ind = indices[node + elem * P_1D]; in ReadLVecStandard1d()
87 inline __device__ void ReadLVecStrided1d(SharedData_Cuda &data, const CeedInt elem, const CeedScala… in ReadLVecStrided1d() argument
91 const CeedInt ind = node * STRIDES_NODE + elem * STRIDES_ELEM; in ReadLVecStrided1d()
101 …cStandard1d(SharedData_Cuda &data, const CeedInt num_nodes, const CeedInt elem, const CeedInt *__r… in WriteLVecStandard1d() argument
105 const CeedInt ind = indices[node + elem * P_1D]; in WriteLVecStandard1d()
[all …]
H A Dcuda-ref-restriction-curl-oriented.h19 const CeedInt elem = node / RSTR_ELEM_SIZE; in CurlOrientedNoTranspose() local
32 v[loc_node + comp * RSTR_ELEM_SIZE * RSTR_NUM_ELEM + elem * RSTR_ELEM_SIZE] = value; in CurlOrientedNoTranspose()
44 const CeedInt elem = node / RSTR_ELEM_SIZE; in CurlOrientedUnsignedNoTranspose() local
57 v[loc_node + comp * RSTR_ELEM_SIZE * RSTR_NUM_ELEM + elem * RSTR_ELEM_SIZE] = value; in CurlOrientedUnsignedNoTranspose()
71 const CeedInt elem = node / RSTR_ELEM_SIZE; in CurlOrientedTranspose() local
78 …value += loc_node > 0 ? u[loc_node - 1 + comp * RSTR_ELEM_SIZE * RSTR_NUM_ELEM + elem * RSTR_ELEM_… in CurlOrientedTranspose()
79 …value += u[loc_node + comp * RSTR_ELEM_SIZE * RSTR_NUM_ELEM + elem * RSTR_ELEM_SIZE] * curl_orient… in CurlOrientedTranspose()
81 …ELEM_SIZE - 1) ? u[loc_node + 1 + comp * RSTR_ELEM_SIZE * RSTR_NUM_ELEM + elem * RSTR_ELEM_SIZE] *… in CurlOrientedTranspose()
102 const CeedInt elem = t_ind / RSTR_ELEM_SIZE; in CurlOrientedTranspose() local
108 …value[comp] += loc_node > 0 ? u[loc_node - 1 + comp * RSTR_ELEM_SIZE * RSTR_NUM_ELEM + elem * RSTR… in CurlOrientedTranspose()
[all …]
H A Dcuda-ref-restriction-strided.h18 const CeedInt elem = node / RSTR_ELEM_SIZE; in StridedNoTranspose() local
21 v[loc_node + comp * RSTR_ELEM_SIZE * RSTR_NUM_ELEM + elem * RSTR_ELEM_SIZE] = in StridedNoTranspose()
22 u[loc_node * RSTR_STRIDE_NODES + comp * RSTR_STRIDE_COMP + elem * RSTR_STRIDE_ELEM]; in StridedNoTranspose()
33 const CeedInt elem = node / RSTR_ELEM_SIZE; in StridedTranspose() local
36 v[loc_node * RSTR_STRIDE_NODES + comp * RSTR_STRIDE_COMP + elem * RSTR_STRIDE_ELEM] += in StridedTranspose()
37 u[loc_node + comp * RSTR_ELEM_SIZE * RSTR_NUM_ELEM + elem * RSTR_ELEM_SIZE]; in StridedTranspose()
H A Dcuda-ref-basis-tensor-at-points.h67 for (CeedInt elem = blockIdx.x; elem < num_elem; elem += gridDim.x) { in InterpAtPoints() local
69 const CeedScalar *cur_u = &u[elem * u_stride + comp * u_comp_stride]; in InterpAtPoints()
70 CeedScalar *cur_v = &v[elem * v_stride + comp * v_comp_stride]; in InterpAtPoints()
108 …ChebyshevPolynomialsAtPoint<BASIS_Q_1D>(coords[elem * v_stride + d * v_comp_stride + p], chebyshev… in InterpAtPoints()
150 for (CeedInt elem = blockIdx.x; elem < num_elem; elem += gridDim.x) { in InterpTransposeAtPoints() local
152 const CeedScalar *cur_u = &u[elem * u_stride + comp * u_comp_stride]; in InterpTransposeAtPoints()
153 CeedScalar *cur_v = &v[elem * v_stride + comp * v_comp_stride]; in InterpTransposeAtPoints()
165 if (p >= points_per_elem[elem]) continue; in InterpTransposeAtPoints()
175 …ChebyshevPolynomialsAtPoint<BASIS_Q_1D>(coords[elem * u_stride + d * u_comp_stride + p], chebyshev… in InterpTransposeAtPoints()
248 for (CeedInt elem = blockIdx.x; elem < num_elem; elem += gridDim.x) { in GradAtPoints() local
[all …]
H A Dcuda-ref-restriction-at-points.h21 const CeedInt elem = node / RSTR_ELEM_SIZE; in AtPointsTranspose() local
23 if (loc_node >= points_per_elem[elem]) continue; in AtPointsTranspose()
25 … * RSTR_COMP_STRIDE], u[loc_node + comp * RSTR_ELEM_SIZE * RSTR_NUM_ELEM + elem * RSTR_ELEM_SIZE]); in AtPointsTranspose()
45 const CeedInt elem = t_ind / RSTR_ELEM_SIZE; in AtPointsTranspose() local
47 if (loc_node >= points_per_elem[elem]) continue; in AtPointsTranspose()
49 value[comp] += u[loc_node + comp * RSTR_ELEM_SIZE * RSTR_NUM_ELEM + elem * RSTR_ELEM_SIZE]; in AtPointsTranspose()
/libCEED/include/ceed/jit-source/sycl/
H A Dsycl-shared-basis-read-write-templates.h32 const CeedInt elem = get_global_id(2); in ReadElementStrided1d() local
34 if (item_id_x < P_1D && elem < num_elem) { in ReadElementStrided1d()
36 const CeedInt ind = node * strides_node + elem * strides_elem; in ReadElementStrided1d()
50 const CeedInt elem = get_global_id(2); in WriteElementStrided1d() local
52 if (item_id_x < P_1D && elem < num_elem) { in WriteElementStrided1d()
54 const CeedInt ind = node * strides_node + elem * strides_elem; in WriteElementStrided1d()
73 const CeedInt elem = get_global_id(2); in ReadElementStrided2d() local
75 if (item_id_x < P_1D && item_id_y < P_1D && elem < num_elem) { in ReadElementStrided2d()
77 const CeedInt ind = node * strides_node + elem * strides_elem; in ReadElementStrided2d()
92 const CeedInt elem = get_global_id(2); in WriteElementStrided2d() local
[all …]
H A Dsycl-gen-templates.h36 const CeedInt elem = get_global_id(2); in readDofsOffset1d() local
38 if (item_id_x < P_1D && elem < num_elem) { in readDofsOffset1d()
40 const CeedInt ind = indices[node + elem * P_1D]; in readDofsOffset1d()
54 const CeedInt elem = get_global_id(2); in readDofsStrided1d() local
56 if (item_id_x < P_1D && elem < num_elem) { in readDofsStrided1d()
58 const CeedInt ind = node * strides_node + elem * strides_elem; in readDofsStrided1d()
71 const CeedInt elem = get_global_id(2); in writeDofsOffset1d() local
73 if (item_id_x < P_1D && elem < num_elem) { in writeDofsOffset1d()
75 const CeedInt ind = indices[node + elem * P_1D]; in writeDofsOffset1d()
88 const CeedInt elem = get_global_id(2); in writeDofsStrided1d() local
[all …]

123