Home
last modified time | relevance | path

Searched defs:elem (Results 1 – 25 of 46) sorted by relevance

12

/libCEED/include/ceed/jit-source/hip/
H A Dhip-shared-basis-tensor.h38 …for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * bl… in __launch_bounds__() local
71 …for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * bl… in __launch_bounds__() local
107 …for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * bl… in __launch_bounds__() local
140 …for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * bl… in __launch_bounds__() local
176 …for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * bl… in __launch_bounds__() local
210 …for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * bl… in __launch_bounds__() local
251 …for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * bl… in __launch_bounds__() local
293 …for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * bl… in __launch_bounds__() local
336 …for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * bl… in __launch_bounds__() local
378 …for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * bl… in __launch_bounds__() local
[all …]
H A Dhip-shared-basis-nontensor.h38 …for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * bl… in __launch_bounds__() local
65 …for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * bl… in __launch_bounds__() local
92 …for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * bl… in __launch_bounds__() local
122 …for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * bl… in __launch_bounds__() local
149 …for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * bl… in __launch_bounds__() local
176 …for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * bl… in __launch_bounds__() local
199 …for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * bl… in __launch_bounds__() local
H A Dhip-ref-basis-nontensor.h23 …for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * bl… in Interp() local
31 …for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * bl… in InterpTranspose() local
42 …for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * bl… in Deriv() local
50 …for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * bl… in DerivTranspose() local
63 …for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * bl… in Weight() local
H A Dhip-gen-templates.h28 inline __device__ void ReadPoint(SharedData_Hip &data, const CeedInt elem, const CeedInt p, const C… in ReadPoint()
41 inline __device__ void WritePoint(SharedData_Hip &data, const CeedInt elem, const CeedInt p, const … in WritePoint()
73 … ReadLVecStandard1d(SharedData_Hip &data, const CeedInt num_nodes, const CeedInt elem, const CeedI… in ReadLVecStandard1d()
87 inline __device__ void ReadLVecStrided1d(SharedData_Hip &data, const CeedInt elem, const CeedScalar… in ReadLVecStrided1d()
100 …WriteLVecStandard1d(SharedData_Hip &data, const CeedInt num_nodes, const CeedInt elem, const CeedI… in WriteLVecStandard1d()
111 …ecStandard1d_Single(SharedData_Hip &data, const CeedInt num_nodes, const CeedInt elem, const CeedI… in WriteLVecStandard1d_Single()
128 …Standard1d_Assembly(SharedData_Hip &data, const CeedInt num_nodes, const CeedInt elem, const CeedI… in WriteLVecStandard1d_Assembly()
147 …tandard1d_QFAssembly(SharedData_Hip &data, const CeedInt num_elem, const CeedInt elem, const CeedI… in WriteLVecStandard1d_QFAssembly()
162 inline __device__ void WriteLVecStrided1d(SharedData_Hip &data, const CeedInt elem, const CeedScala… in WriteLVecStrided1d()
194 … ReadLVecStandard2d(SharedData_Hip &data, const CeedInt num_nodes, const CeedInt elem, const CeedI… in ReadLVecStandard2d()
[all …]
H A Dhip-shared-basis-read-write-templates.h28 inline __device__ void ReadElementStrided1d(SharedData_Hip &data, const CeedInt elem, const CeedInt… in ReadElementStrided1d()
44 inline __device__ void WriteElementStrided1d(SharedData_Hip &data, const CeedInt elem, const CeedIn… in WriteElementStrided1d()
57 inline __device__ void SumElementStrided1d(SharedData_Hip &data, const CeedInt elem, const CeedInt … in SumElementStrided1d()
77 inline __device__ void ReadElementStrided2d(SharedData_Hip &data, const CeedInt elem, const CeedInt… in ReadElementStrided2d()
93 inline __device__ void WriteElementStrided2d(SharedData_Hip &data, const CeedInt elem, const CeedIn… in WriteElementStrided2d()
106 inline __device__ void SumElementStrided2d(SharedData_Hip &data, const CeedInt elem, const CeedInt … in SumElementStrided2d()
126 inline __device__ void ReadElementStrided3d(SharedData_Hip &data, const CeedInt elem, const CeedInt… in ReadElementStrided3d()
144 inline __device__ void WriteElementStrided3d(SharedData_Hip &data, const CeedInt elem, const CeedIn… in WriteElementStrided3d()
159 inline __device__ void SumElementStrided3d(SharedData_Hip &data, const CeedInt elem, const CeedInt … in SumElementStrided3d()
181 inline __device__ void ReadPoint(SharedData_Hip &data, const CeedInt elem, const CeedInt p, const C… in ReadPoint()
[all …]
H A Dhip-shared-basis-tensor-at-points.h46 …for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * bl… in __launch_bounds__() local
102 …for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * bl… in __launch_bounds__() local
172 …for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * bl… in __launch_bounds__() local
234 …for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * bl… in __launch_bounds__() local
290 …for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * bl… in __launch_bounds__() local
361 …for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * bl… in __launch_bounds__() local
H A Dhip-ref-restriction-strided.h18 const CeedInt elem = node / RSTR_ELEM_SIZE; in StridedNoTranspose() local
33 const CeedInt elem = node / RSTR_ELEM_SIZE; in StridedTranspose() local
H A Dhip-ref-restriction-offset.h19 const CeedInt elem = node / RSTR_ELEM_SIZE; in OffsetNoTranspose() local
35 const CeedInt elem = node / RSTR_ELEM_SIZE; in OffsetTranspose() local
57 const CeedInt elem = t_ind / RSTR_ELEM_SIZE; in OffsetTranspose() local
H A Dhip-ref-restriction-oriented.h21 const CeedInt elem = node / RSTR_ELEM_SIZE; in OrientedNoTranspose() local
39 const CeedInt elem = node / RSTR_ELEM_SIZE; in OrientedTranspose() local
64 const CeedInt elem = t_ind / RSTR_ELEM_SIZE; in OrientedTranspose() local
H A Dhip-ref-restriction-curl-oriented.h19 const CeedInt elem = node / RSTR_ELEM_SIZE; in CurlOrientedNoTranspose() local
44 const CeedInt elem = node / RSTR_ELEM_SIZE; in CurlOrientedUnsignedNoTranspose() local
71 const CeedInt elem = node / RSTR_ELEM_SIZE; in CurlOrientedTranspose() local
102 const CeedInt elem = t_ind / RSTR_ELEM_SIZE; in CurlOrientedTranspose() local
128 const CeedInt elem = node / RSTR_ELEM_SIZE; in CurlOrientedUnsignedTranspose() local
160 const CeedInt elem = t_ind / RSTR_ELEM_SIZE; in CurlOrientedUnsignedTranspose() local
H A Dhip-ref-restriction-at-points.h21 const CeedInt elem = node / RSTR_ELEM_SIZE; in AtPointsTranspose() local
45 const CeedInt elem = t_ind / RSTR_ELEM_SIZE; in AtPointsTranspose() local
H A Dhip-ref-basis-tensor.h42 for (CeedInt elem = blockIdx.x; elem < num_elem; elem += gridDim.x) { in Interp() local
103 for (CeedInt elem = blockIdx.x; elem < num_elem; elem += gridDim.x) { in Grad() local
146 const size_t elem = blockIdx.x; in Weight1d() local
160 const size_t elem = blockIdx.x; in Weight2d() local
178 const size_t elem = blockIdx.x; in Weight3d() local
/libCEED/include/ceed/jit-source/cuda/
H A Dcuda-ref-basis-nontensor.h23 …for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * bl… in Interp() local
31 …for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * bl… in InterpTranspose() local
42 …for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * bl… in Deriv() local
50 …for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * bl… in DerivTranspose() local
63 …for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * bl… in Weight() local
H A Dcuda-shared-basis-tensor.h37 …for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * bl… in Interp() local
70 …for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * bl… in InterpCollocated() local
106 …for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * bl… in InterpTranspose() local
139 …for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * bl… in InterpCollocatedTranspose() local
175 …for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * bl… in InterpTransposeAdd() local
208 …for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * bl… in InterpCollocatedTransposeAdd() local
249 …for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * bl… in Grad() local
290 …for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * bl… in GradCollocated() local
332 …for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * bl… in GradTranspose() local
373 …for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * bl… in GradCollocatedTranspose() local
[all …]
H A Dcuda-shared-basis-nontensor.h37 …for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * bl… in Interp() local
64 …for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * bl… in InterpTranspose() local
91 …for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * bl… in InterpTransposeAdd() local
120 …for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * bl… in Grad() local
147 …for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * bl… in GradTranspose() local
174 …for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * bl… in GradTransposeAdd() local
196 …for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * bl… in Weight() local
H A Dcuda-gen-templates.h28 inline __device__ void ReadPoint(SharedData_Cuda &data, const CeedInt elem, const CeedInt p, const … in ReadPoint()
41 inline __device__ void WritePoint(SharedData_Cuda &data, const CeedInt elem, const CeedInt p, const… in WritePoint()
73 …ReadLVecStandard1d(SharedData_Cuda &data, const CeedInt num_nodes, const CeedInt elem, const CeedI… in ReadLVecStandard1d()
87 inline __device__ void ReadLVecStrided1d(SharedData_Cuda &data, const CeedInt elem, const CeedScala… in ReadLVecStrided1d()
101 …riteLVecStandard1d(SharedData_Cuda &data, const CeedInt num_nodes, const CeedInt elem, const CeedI… in WriteLVecStandard1d()
112 …cStandard1d_Single(SharedData_Cuda &data, const CeedInt num_nodes, const CeedInt elem, const CeedI… in WriteLVecStandard1d_Single()
129 …tandard1d_Assembly(SharedData_Cuda &data, const CeedInt num_nodes, const CeedInt elem, const CeedI… in WriteLVecStandard1d_Assembly()
148 …andard1d_QFAssembly(SharedData_Cuda &data, const CeedInt num_elem, const CeedInt elem, const CeedI… in WriteLVecStandard1d_QFAssembly()
163 inline __device__ void WriteLVecStrided1d(SharedData_Cuda &data, const CeedInt elem, const CeedScal… in WriteLVecStrided1d()
195 …ReadLVecStandard2d(SharedData_Cuda &data, const CeedInt num_nodes, const CeedInt elem, const CeedI… in ReadLVecStandard2d()
[all …]
H A Dcuda-shared-basis-read-write-templates.h28 inline __device__ void ReadElementStrided1d(SharedData_Cuda &data, const CeedInt elem, const CeedIn… in ReadElementStrided1d()
44 inline __device__ void WriteElementStrided1d(SharedData_Cuda &data, const CeedInt elem, const CeedI… in WriteElementStrided1d()
57 inline __device__ void SumElementStrided1d(SharedData_Cuda &data, const CeedInt elem, const CeedInt… in SumElementStrided1d()
77 inline __device__ void ReadElementStrided2d(SharedData_Cuda &data, const CeedInt elem, const CeedIn… in ReadElementStrided2d()
93 inline __device__ void WriteElementStrided2d(SharedData_Cuda &data, const CeedInt elem, const CeedI… in WriteElementStrided2d()
106 inline __device__ void SumElementStrided2d(SharedData_Cuda &data, const CeedInt elem, const CeedInt… in SumElementStrided2d()
126 inline __device__ void ReadElementStrided3d(SharedData_Cuda &data, const CeedInt elem, const CeedIn… in ReadElementStrided3d()
144 inline __device__ void WriteElementStrided3d(SharedData_Cuda &data, const CeedInt elem, const CeedI… in WriteElementStrided3d()
159 inline __device__ void SumElementStrided3d(SharedData_Cuda &data, const CeedInt elem, const CeedInt… in SumElementStrided3d()
181 inline __device__ void ReadPoint(SharedData_Cuda &data, const CeedInt elem, const CeedInt p, const … in ReadPoint()
[all …]
H A Dcuda-ref-restriction-strided.h18 const CeedInt elem = node / RSTR_ELEM_SIZE; in StridedNoTranspose() local
33 const CeedInt elem = node / RSTR_ELEM_SIZE; in StridedTranspose() local
H A Dcuda-ref-restriction-offset.h19 const CeedInt elem = node / RSTR_ELEM_SIZE; in OffsetNoTranspose() local
35 const CeedInt elem = node / RSTR_ELEM_SIZE; in OffsetTranspose() local
57 const CeedInt elem = t_ind / RSTR_ELEM_SIZE; in OffsetTranspose() local
H A Dcuda-ref-restriction-oriented.h21 const CeedInt elem = node / RSTR_ELEM_SIZE; in OrientedNoTranspose() local
39 const CeedInt elem = node / RSTR_ELEM_SIZE; in OrientedTranspose() local
64 const CeedInt elem = t_ind / RSTR_ELEM_SIZE; in OrientedTranspose() local
H A Dcuda-shared-basis-tensor-at-points.h45 …for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * bl… in InterpAtPoints() local
101 …for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * bl… in InterpTransposeAtPoints() local
171 …for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * bl… in InterpTransposeAddAtPoints() local
232 …for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * bl… in GradAtPoints() local
288 …for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * bl… in GradTransposeAtPoints() local
359 …for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * bl… in GradTransposeAddAtPoints() local
H A Dcuda-ref-restriction-curl-oriented.h19 const CeedInt elem = node / RSTR_ELEM_SIZE; in CurlOrientedNoTranspose() local
44 const CeedInt elem = node / RSTR_ELEM_SIZE; in CurlOrientedUnsignedNoTranspose() local
71 const CeedInt elem = node / RSTR_ELEM_SIZE; in CurlOrientedTranspose() local
102 const CeedInt elem = t_ind / RSTR_ELEM_SIZE; in CurlOrientedTranspose() local
128 const CeedInt elem = node / RSTR_ELEM_SIZE; in CurlOrientedUnsignedTranspose() local
160 const CeedInt elem = t_ind / RSTR_ELEM_SIZE; in CurlOrientedUnsignedTranspose() local
H A Dcuda-ref-restriction-at-points.h21 const CeedInt elem = node / RSTR_ELEM_SIZE; in AtPointsTranspose() local
45 const CeedInt elem = t_ind / RSTR_ELEM_SIZE; in AtPointsTranspose() local
/libCEED/include/ceed/jit-source/sycl/
H A Dsycl-gen-templates.h36 const CeedInt elem = get_global_id(2); in readDofsOffset1d() local
54 const CeedInt elem = get_global_id(2); in readDofsStrided1d() local
71 const CeedInt elem = get_global_id(2); in writeDofsOffset1d() local
88 const CeedInt elem = get_global_id(2); in writeDofsStrided1d() local
110 const CeedInt elem = get_global_id(2); in readDofsOffset2d() local
127 const CeedInt elem = get_global_id(2); in readDofsStrided2d() local
143 const CeedInt elem = get_global_id(2); in writeDofsOffset2d() local
161 const CeedInt elem = get_global_id(2); in writeDofsStrided2d() local
181 const CeedInt elem = get_global_id(2); in readDofsOffset3d() local
200 const CeedInt elem = get_global_id(2); in readDofsStrided3d() local
[all …]
H A Dsycl-shared-basis-read-write-templates.h32 const CeedInt elem = get_global_id(2); in ReadElementStrided1d() local
50 const CeedInt elem = get_global_id(2); in WriteElementStrided1d() local
73 const CeedInt elem = get_global_id(2); in ReadElementStrided2d() local
92 const CeedInt elem = get_global_id(2); in WriteElementStrided2d() local
115 const CeedInt elem = get_global_id(2); in ReadElementStrided3d() local
136 const CeedInt elem = get_global_id(2); in WriteElementStrided3d() local

12