Home
last modified time | relevance | path

Searched refs:t_id (Results 1 – 16 of 16) sorted by relevance

/libCEED/include/ceed/jit-source/hip/
H A Dhip-ref-basis-nontensor-templates.h19 const CeedInt t_id = threadIdx.x; in Contract() local
31 for (CeedInt d = 0; d < Q_COMP; d++) r_V[d] += d_B[i + t_id * P + d * P * Q] * val; in Contract()
34 d_V[elem * strides_elem_V + comp * strides_comp_V + d * strides_q_comp_V + t_id] = r_V[d]; in Contract()
46 const CeedInt t_id = threadIdx.x; in ContractTranspose() local
56 for (CeedInt i = 0; i < Q; i++) r_V += d_B[t_id + i * P + d * P * Q] * U[i]; in ContractTranspose()
58 d_V[elem * strides_elem_V + comp * strides_comp_V + t_id] += r_V; in ContractTranspose()
H A Dhip-ref-basis-nontensor.h60 const CeedInt t_id = threadIdx.x; in Weight() local
64 d_V[elem * BASIS_Q + t_id] = q_weight[t_id]; in Weight()
H A Dhip-shared-basis-nontensor.h26 data.t_id = threadIdx.x + threadIdx.y * blockDim.x + threadIdx.z * blockDim.y * blockDim.x; in __launch_bounds__()
53 data.t_id = threadIdx.x + threadIdx.y * blockDim.x + threadIdx.z * blockDim.y * blockDim.x; in __launch_bounds__()
80 data.t_id = threadIdx.x + threadIdx.y * blockDim.x + threadIdx.z * blockDim.y * blockDim.x; in __launch_bounds__()
110 data.t_id = threadIdx.x + threadIdx.y * blockDim.x + threadIdx.z * blockDim.y * blockDim.x; in __launch_bounds__()
137 data.t_id = threadIdx.x + threadIdx.y * blockDim.x + threadIdx.z * blockDim.y * blockDim.x; in __launch_bounds__()
164 data.t_id = threadIdx.x + threadIdx.y * blockDim.x + threadIdx.z * blockDim.y * blockDim.x; in __launch_bounds__()
194 data.t_id = threadIdx.x + threadIdx.y * blockDim.x + threadIdx.z * blockDim.y * blockDim.x; in __launch_bounds__()
H A Dhip-types.h37 CeedInt t_id; member
H A Dhip-shared-basis-tensor.h26 data.t_id = threadIdx.x + threadIdx.y * blockDim.x + threadIdx.z * blockDim.y * blockDim.x; in __launch_bounds__()
65 data.t_id = threadIdx.x + threadIdx.y * blockDim.x + threadIdx.z * blockDim.y * blockDim.x; in __launch_bounds__()
95 data.t_id = threadIdx.x + threadIdx.y * blockDim.x + threadIdx.z * blockDim.y * blockDim.x; in __launch_bounds__()
134 data.t_id = threadIdx.x + threadIdx.y * blockDim.x + threadIdx.z * blockDim.y * blockDim.x; in __launch_bounds__()
164 data.t_id = threadIdx.x + threadIdx.y * blockDim.x + threadIdx.z * blockDim.y * blockDim.x; in __launch_bounds__()
204 data.t_id = threadIdx.x + threadIdx.y * blockDim.x + threadIdx.z * blockDim.y * blockDim.x; in __launch_bounds__()
237 data.t_id = threadIdx.x + threadIdx.y * blockDim.x + threadIdx.z * blockDim.y * blockDim.x; in __launch_bounds__()
281 data.t_id = threadIdx.x + threadIdx.y * blockDim.x + threadIdx.z * blockDim.y * blockDim.x; in __launch_bounds__()
322 data.t_id = threadIdx.x + threadIdx.y * blockDim.x + threadIdx.z * blockDim.y * blockDim.x; in __launch_bounds__()
366 data.t_id = threadIdx.x + threadIdx.y * blockDim.x + threadIdx.z * blockDim.y * blockDim.x; in __launch_bounds__()
[all …]
H A Dhip-shared-basis-tensor-at-points.h32 data.t_id = threadIdx.x + threadIdx.y * blockDim.x + threadIdx.z * blockDim.y * blockDim.x; in __launch_bounds__()
88 data.t_id = threadIdx.x + threadIdx.y * blockDim.x + threadIdx.z * blockDim.y * blockDim.x; in __launch_bounds__()
158 data.t_id = threadIdx.x + threadIdx.y * blockDim.x + threadIdx.z * blockDim.y * blockDim.x; in __launch_bounds__()
220 data.t_id = threadIdx.x + threadIdx.y * blockDim.x + threadIdx.z * blockDim.y * blockDim.x; in __launch_bounds__()
276 data.t_id = threadIdx.x + threadIdx.y * blockDim.x + threadIdx.z * blockDim.y * blockDim.x; in __launch_bounds__()
347 data.t_id = threadIdx.x + threadIdx.y * blockDim.x + threadIdx.z * blockDim.y * blockDim.x; in __launch_bounds__()
H A Dhip-shared-basis-read-write-templates.h17 for (CeedInt i = data.t_id; i < P * Q; i += blockDim.x * blockDim.y * blockDim.z) B[i] = d_B[i]; in LoadMatrix()
H A Dhip-gen-templates.h17 for (CeedInt i = data.t_id; i < P * Q; i += blockDim.x * blockDim.y * blockDim.z) B[i] = d_B[i]; in LoadMatrix()
/libCEED/include/ceed/jit-source/cuda/
H A Dcuda-ref-basis-nontensor-templates.h19 const CeedInt t_id = threadIdx.x; in Contract() local
31 for (CeedInt d = 0; d < Q_COMP; d++) r_V[d] += d_B[i + t_id * P + d * P * Q] * val; in Contract()
34 d_V[elem * strides_elem_V + comp * strides_comp_V + d * strides_q_comp_V + t_id] = r_V[d]; in Contract()
46 const CeedInt t_id = threadIdx.x; in ContractTranspose() local
56 for (CeedInt i = 0; i < Q; i++) r_V += d_B[t_id + i * P + d * P * Q] * U[i]; in ContractTranspose()
58 d_V[elem * strides_elem_V + comp * strides_comp_V + t_id] += r_V; in ContractTranspose()
H A Dcuda-ref-basis-nontensor.h60 const CeedInt t_id = threadIdx.x; in Weight() local
64 d_V[elem * BASIS_Q + t_id] = q_weight[t_id]; in Weight()
H A Dcuda-shared-basis-nontensor.h25 data.t_id = threadIdx.x + threadIdx.y * blockDim.x + threadIdx.z * blockDim.y * blockDim.x; in Interp()
52 data.t_id = threadIdx.x + threadIdx.y * blockDim.x + threadIdx.z * blockDim.y * blockDim.x; in InterpTranspose()
79 data.t_id = threadIdx.x + threadIdx.y * blockDim.x + threadIdx.z * blockDim.y * blockDim.x; in InterpTransposeAdd()
108 data.t_id = threadIdx.x + threadIdx.y * blockDim.x + threadIdx.z * blockDim.y * blockDim.x; in Grad()
135 data.t_id = threadIdx.x + threadIdx.y * blockDim.x + threadIdx.z * blockDim.y * blockDim.x; in GradTranspose()
162 data.t_id = threadIdx.x + threadIdx.y * blockDim.x + threadIdx.z * blockDim.y * blockDim.x; in GradTransposeAdd()
191 data.t_id = threadIdx.x + threadIdx.y * blockDim.x + threadIdx.z * blockDim.y * blockDim.x; in Weight()
H A Dcuda-types.h37 CeedInt t_id; member
H A Dcuda-shared-basis-tensor.h25 data.t_id = threadIdx.x + threadIdx.y * blockDim.x + threadIdx.z * blockDim.y * blockDim.x; in Interp()
64 data.t_id = threadIdx.x + threadIdx.y * blockDim.x + threadIdx.z * blockDim.y * blockDim.x; in InterpCollocated()
94 data.t_id = threadIdx.x + threadIdx.y * blockDim.x + threadIdx.z * blockDim.y * blockDim.x; in InterpTranspose()
133 data.t_id = threadIdx.x + threadIdx.y * blockDim.x + threadIdx.z * blockDim.y * blockDim.x; in InterpCollocatedTranspose()
163 data.t_id = threadIdx.x + threadIdx.y * blockDim.x + threadIdx.z * blockDim.y * blockDim.x; in InterpTransposeAdd()
202 data.t_id = threadIdx.x + threadIdx.y * blockDim.x + threadIdx.z * blockDim.y * blockDim.x; in InterpCollocatedTransposeAdd()
235 data.t_id = threadIdx.x + threadIdx.y * blockDim.x + threadIdx.z * blockDim.y * blockDim.x; in Grad()
278 data.t_id = threadIdx.x + threadIdx.y * blockDim.x + threadIdx.z * blockDim.y * blockDim.x; in GradCollocated()
318 data.t_id = threadIdx.x + threadIdx.y * blockDim.x + threadIdx.z * blockDim.y * blockDim.x; in GradTranspose()
361 data.t_id = threadIdx.x + threadIdx.y * blockDim.x + threadIdx.z * blockDim.y * blockDim.x; in GradCollocatedTranspose()
[all …]
H A Dcuda-shared-basis-tensor-at-points.h31 data.t_id = threadIdx.x + threadIdx.y * blockDim.x + threadIdx.z * blockDim.y * blockDim.x; in InterpAtPoints()
87 data.t_id = threadIdx.x + threadIdx.y * blockDim.x + threadIdx.z * blockDim.y * blockDim.x; in InterpTransposeAtPoints()
157 data.t_id = threadIdx.x + threadIdx.y * blockDim.x + threadIdx.z * blockDim.y * blockDim.x; in InterpTransposeAddAtPoints()
218 data.t_id = threadIdx.x + threadIdx.y * blockDim.x + threadIdx.z * blockDim.y * blockDim.x; in GradAtPoints()
274 data.t_id = threadIdx.x + threadIdx.y * blockDim.x + threadIdx.z * blockDim.y * blockDim.x; in GradTransposeAtPoints()
345 data.t_id = threadIdx.x + threadIdx.y * blockDim.x + threadIdx.z * blockDim.y * blockDim.x; in GradTransposeAddAtPoints()
H A Dcuda-shared-basis-read-write-templates.h17 for (CeedInt i = data.t_id; i < P * Q; i += blockDim.x * blockDim.y * blockDim.z) B[i] = d_B[i]; in LoadMatrix()
H A Dcuda-gen-templates.h17 for (CeedInt i = data.t_id; i < P * Q; i += blockDim.x * blockDim.y * blockDim.z) B[i] = d_B[i]; in LoadMatrix()