| /libCEED/include/ceed/jit-source/hip/ |
| H A D | hip-shared-basis-tensor.h | 38 …for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * bl… in __launch_bounds__() local 71 …for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * bl… in __launch_bounds__() local 107 …for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * bl… in __launch_bounds__() local 140 …for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * bl… in __launch_bounds__() local 176 …for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * bl… in __launch_bounds__() local 210 …for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * bl… in __launch_bounds__() local 251 …for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * bl… in __launch_bounds__() local 293 …for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * bl… in __launch_bounds__() local 336 …for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * bl… in __launch_bounds__() local 378 …for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * bl… in __launch_bounds__() local [all …]
|
| H A D | hip-shared-basis-nontensor.h | 38 …for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * bl… in __launch_bounds__() local 65 …for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * bl… in __launch_bounds__() local 92 …for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * bl… in __launch_bounds__() local 122 …for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * bl… in __launch_bounds__() local 149 …for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * bl… in __launch_bounds__() local 176 …for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * bl… in __launch_bounds__() local 199 …for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * bl… in __launch_bounds__() local
|
| H A D | hip-ref-basis-nontensor.h | 23 …for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * bl… in Interp() local 31 …for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * bl… in InterpTranspose() local 42 …for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * bl… in Deriv() local 50 …for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * bl… in DerivTranspose() local 63 …for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * bl… in Weight() local
|
| H A D | hip-gen-templates.h | 28 inline __device__ void ReadPoint(SharedData_Hip &data, const CeedInt elem, const CeedInt p, const C… in ReadPoint() 41 inline __device__ void WritePoint(SharedData_Hip &data, const CeedInt elem, const CeedInt p, const … in WritePoint() 73 … ReadLVecStandard1d(SharedData_Hip &data, const CeedInt num_nodes, const CeedInt elem, const CeedI… in ReadLVecStandard1d() 87 inline __device__ void ReadLVecStrided1d(SharedData_Hip &data, const CeedInt elem, const CeedScalar… in ReadLVecStrided1d() 100 …WriteLVecStandard1d(SharedData_Hip &data, const CeedInt num_nodes, const CeedInt elem, const CeedI… in WriteLVecStandard1d() 111 …ecStandard1d_Single(SharedData_Hip &data, const CeedInt num_nodes, const CeedInt elem, const CeedI… in WriteLVecStandard1d_Single() 128 …Standard1d_Assembly(SharedData_Hip &data, const CeedInt num_nodes, const CeedInt elem, const CeedI… in WriteLVecStandard1d_Assembly() 147 …tandard1d_QFAssembly(SharedData_Hip &data, const CeedInt num_elem, const CeedInt elem, const CeedI… in WriteLVecStandard1d_QFAssembly() 162 inline __device__ void WriteLVecStrided1d(SharedData_Hip &data, const CeedInt elem, const CeedScala… in WriteLVecStrided1d() 194 … ReadLVecStandard2d(SharedData_Hip &data, const CeedInt num_nodes, const CeedInt elem, const CeedI… in ReadLVecStandard2d() [all …]
|
| H A D | hip-shared-basis-read-write-templates.h | 28 inline __device__ void ReadElementStrided1d(SharedData_Hip &data, const CeedInt elem, const CeedInt… in ReadElementStrided1d() 44 inline __device__ void WriteElementStrided1d(SharedData_Hip &data, const CeedInt elem, const CeedIn… in WriteElementStrided1d() 57 inline __device__ void SumElementStrided1d(SharedData_Hip &data, const CeedInt elem, const CeedInt … in SumElementStrided1d() 77 inline __device__ void ReadElementStrided2d(SharedData_Hip &data, const CeedInt elem, const CeedInt… in ReadElementStrided2d() 93 inline __device__ void WriteElementStrided2d(SharedData_Hip &data, const CeedInt elem, const CeedIn… in WriteElementStrided2d() 106 inline __device__ void SumElementStrided2d(SharedData_Hip &data, const CeedInt elem, const CeedInt … in SumElementStrided2d() 126 inline __device__ void ReadElementStrided3d(SharedData_Hip &data, const CeedInt elem, const CeedInt… in ReadElementStrided3d() 144 inline __device__ void WriteElementStrided3d(SharedData_Hip &data, const CeedInt elem, const CeedIn… in WriteElementStrided3d() 159 inline __device__ void SumElementStrided3d(SharedData_Hip &data, const CeedInt elem, const CeedInt … in SumElementStrided3d() 181 inline __device__ void ReadPoint(SharedData_Hip &data, const CeedInt elem, const CeedInt p, const C… in ReadPoint() [all …]
|
| H A D | hip-shared-basis-tensor-at-points.h | 46 …for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * bl… in __launch_bounds__() local 102 …for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * bl… in __launch_bounds__() local 172 …for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * bl… in __launch_bounds__() local 234 …for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * bl… in __launch_bounds__() local 290 …for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * bl… in __launch_bounds__() local 361 …for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * bl… in __launch_bounds__() local
|
| H A D | hip-ref-restriction-strided.h | 18 const CeedInt elem = node / RSTR_ELEM_SIZE; in StridedNoTranspose() local 33 const CeedInt elem = node / RSTR_ELEM_SIZE; in StridedTranspose() local
|
| H A D | hip-ref-restriction-offset.h | 19 const CeedInt elem = node / RSTR_ELEM_SIZE; in OffsetNoTranspose() local 35 const CeedInt elem = node / RSTR_ELEM_SIZE; in OffsetTranspose() local 57 const CeedInt elem = t_ind / RSTR_ELEM_SIZE; in OffsetTranspose() local
|
| H A D | hip-ref-restriction-oriented.h | 21 const CeedInt elem = node / RSTR_ELEM_SIZE; in OrientedNoTranspose() local 39 const CeedInt elem = node / RSTR_ELEM_SIZE; in OrientedTranspose() local 64 const CeedInt elem = t_ind / RSTR_ELEM_SIZE; in OrientedTranspose() local
|
| H A D | hip-ref-restriction-curl-oriented.h | 19 const CeedInt elem = node / RSTR_ELEM_SIZE; in CurlOrientedNoTranspose() local 44 const CeedInt elem = node / RSTR_ELEM_SIZE; in CurlOrientedUnsignedNoTranspose() local 71 const CeedInt elem = node / RSTR_ELEM_SIZE; in CurlOrientedTranspose() local 102 const CeedInt elem = t_ind / RSTR_ELEM_SIZE; in CurlOrientedTranspose() local 128 const CeedInt elem = node / RSTR_ELEM_SIZE; in CurlOrientedUnsignedTranspose() local 160 const CeedInt elem = t_ind / RSTR_ELEM_SIZE; in CurlOrientedUnsignedTranspose() local
|
| H A D | hip-ref-restriction-at-points.h | 21 const CeedInt elem = node / RSTR_ELEM_SIZE; in AtPointsTranspose() local 45 const CeedInt elem = t_ind / RSTR_ELEM_SIZE; in AtPointsTranspose() local
|
| H A D | hip-ref-basis-tensor.h | 42 for (CeedInt elem = blockIdx.x; elem < num_elem; elem += gridDim.x) { in Interp() local 103 for (CeedInt elem = blockIdx.x; elem < num_elem; elem += gridDim.x) { in Grad() local 146 const size_t elem = blockIdx.x; in Weight1d() local 160 const size_t elem = blockIdx.x; in Weight2d() local 178 const size_t elem = blockIdx.x; in Weight3d() local
|
| /libCEED/include/ceed/jit-source/cuda/ |
| H A D | cuda-ref-basis-nontensor.h | 23 …for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * bl… in Interp() local 31 …for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * bl… in InterpTranspose() local 42 …for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * bl… in Deriv() local 50 …for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * bl… in DerivTranspose() local 63 …for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * bl… in Weight() local
|
| H A D | cuda-shared-basis-tensor.h | 37 …for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * bl… in Interp() local 70 …for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * bl… in InterpCollocated() local 106 …for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * bl… in InterpTranspose() local 139 …for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * bl… in InterpCollocatedTranspose() local 175 …for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * bl… in InterpTransposeAdd() local 208 …for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * bl… in InterpCollocatedTransposeAdd() local 249 …for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * bl… in Grad() local 290 …for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * bl… in GradCollocated() local 332 …for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * bl… in GradTranspose() local 373 …for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * bl… in GradCollocatedTranspose() local [all …]
|
| H A D | cuda-shared-basis-nontensor.h | 37 …for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * bl… in Interp() local 64 …for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * bl… in InterpTranspose() local 91 …for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * bl… in InterpTransposeAdd() local 120 …for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * bl… in Grad() local 147 …for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * bl… in GradTranspose() local 174 …for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * bl… in GradTransposeAdd() local 196 …for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * bl… in Weight() local
|
| H A D | cuda-gen-templates.h | 28 inline __device__ void ReadPoint(SharedData_Cuda &data, const CeedInt elem, const CeedInt p, const … in ReadPoint() 41 inline __device__ void WritePoint(SharedData_Cuda &data, const CeedInt elem, const CeedInt p, const… in WritePoint() 73 …ReadLVecStandard1d(SharedData_Cuda &data, const CeedInt num_nodes, const CeedInt elem, const CeedI… in ReadLVecStandard1d() 87 inline __device__ void ReadLVecStrided1d(SharedData_Cuda &data, const CeedInt elem, const CeedScala… in ReadLVecStrided1d() 101 …riteLVecStandard1d(SharedData_Cuda &data, const CeedInt num_nodes, const CeedInt elem, const CeedI… in WriteLVecStandard1d() 112 …cStandard1d_Single(SharedData_Cuda &data, const CeedInt num_nodes, const CeedInt elem, const CeedI… in WriteLVecStandard1d_Single() 129 …tandard1d_Assembly(SharedData_Cuda &data, const CeedInt num_nodes, const CeedInt elem, const CeedI… in WriteLVecStandard1d_Assembly() 148 …andard1d_QFAssembly(SharedData_Cuda &data, const CeedInt num_elem, const CeedInt elem, const CeedI… in WriteLVecStandard1d_QFAssembly() 163 inline __device__ void WriteLVecStrided1d(SharedData_Cuda &data, const CeedInt elem, const CeedScal… in WriteLVecStrided1d() 195 …ReadLVecStandard2d(SharedData_Cuda &data, const CeedInt num_nodes, const CeedInt elem, const CeedI… in ReadLVecStandard2d() [all …]
|
| H A D | cuda-shared-basis-read-write-templates.h | 28 inline __device__ void ReadElementStrided1d(SharedData_Cuda &data, const CeedInt elem, const CeedIn… in ReadElementStrided1d() 44 inline __device__ void WriteElementStrided1d(SharedData_Cuda &data, const CeedInt elem, const CeedI… in WriteElementStrided1d() 57 inline __device__ void SumElementStrided1d(SharedData_Cuda &data, const CeedInt elem, const CeedInt… in SumElementStrided1d() 77 inline __device__ void ReadElementStrided2d(SharedData_Cuda &data, const CeedInt elem, const CeedIn… in ReadElementStrided2d() 93 inline __device__ void WriteElementStrided2d(SharedData_Cuda &data, const CeedInt elem, const CeedI… in WriteElementStrided2d() 106 inline __device__ void SumElementStrided2d(SharedData_Cuda &data, const CeedInt elem, const CeedInt… in SumElementStrided2d() 126 inline __device__ void ReadElementStrided3d(SharedData_Cuda &data, const CeedInt elem, const CeedIn… in ReadElementStrided3d() 144 inline __device__ void WriteElementStrided3d(SharedData_Cuda &data, const CeedInt elem, const CeedI… in WriteElementStrided3d() 159 inline __device__ void SumElementStrided3d(SharedData_Cuda &data, const CeedInt elem, const CeedInt… in SumElementStrided3d() 181 inline __device__ void ReadPoint(SharedData_Cuda &data, const CeedInt elem, const CeedInt p, const … in ReadPoint() [all …]
|
| H A D | cuda-ref-restriction-strided.h | 18 const CeedInt elem = node / RSTR_ELEM_SIZE; in StridedNoTranspose() local 33 const CeedInt elem = node / RSTR_ELEM_SIZE; in StridedTranspose() local
|
| H A D | cuda-ref-restriction-offset.h | 19 const CeedInt elem = node / RSTR_ELEM_SIZE; in OffsetNoTranspose() local 35 const CeedInt elem = node / RSTR_ELEM_SIZE; in OffsetTranspose() local 57 const CeedInt elem = t_ind / RSTR_ELEM_SIZE; in OffsetTranspose() local
|
| H A D | cuda-ref-restriction-oriented.h | 21 const CeedInt elem = node / RSTR_ELEM_SIZE; in OrientedNoTranspose() local 39 const CeedInt elem = node / RSTR_ELEM_SIZE; in OrientedTranspose() local 64 const CeedInt elem = t_ind / RSTR_ELEM_SIZE; in OrientedTranspose() local
|
| H A D | cuda-shared-basis-tensor-at-points.h | 45 …for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * bl… in InterpAtPoints() local 101 …for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * bl… in InterpTransposeAtPoints() local 171 …for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * bl… in InterpTransposeAddAtPoints() local 232 …for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * bl… in GradAtPoints() local 288 …for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * bl… in GradTransposeAtPoints() local 359 …for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * bl… in GradTransposeAddAtPoints() local
|
| H A D | cuda-ref-restriction-curl-oriented.h | 19 const CeedInt elem = node / RSTR_ELEM_SIZE; in CurlOrientedNoTranspose() local 44 const CeedInt elem = node / RSTR_ELEM_SIZE; in CurlOrientedUnsignedNoTranspose() local 71 const CeedInt elem = node / RSTR_ELEM_SIZE; in CurlOrientedTranspose() local 102 const CeedInt elem = t_ind / RSTR_ELEM_SIZE; in CurlOrientedTranspose() local 128 const CeedInt elem = node / RSTR_ELEM_SIZE; in CurlOrientedUnsignedTranspose() local 160 const CeedInt elem = t_ind / RSTR_ELEM_SIZE; in CurlOrientedUnsignedTranspose() local
|
| H A D | cuda-ref-restriction-at-points.h | 21 const CeedInt elem = node / RSTR_ELEM_SIZE; in AtPointsTranspose() local 45 const CeedInt elem = t_ind / RSTR_ELEM_SIZE; in AtPointsTranspose() local
|
| /libCEED/include/ceed/jit-source/sycl/ |
| H A D | sycl-gen-templates.h | 36 const CeedInt elem = get_global_id(2); in readDofsOffset1d() local 54 const CeedInt elem = get_global_id(2); in readDofsStrided1d() local 71 const CeedInt elem = get_global_id(2); in writeDofsOffset1d() local 88 const CeedInt elem = get_global_id(2); in writeDofsStrided1d() local 110 const CeedInt elem = get_global_id(2); in readDofsOffset2d() local 127 const CeedInt elem = get_global_id(2); in readDofsStrided2d() local 143 const CeedInt elem = get_global_id(2); in writeDofsOffset2d() local 161 const CeedInt elem = get_global_id(2); in writeDofsStrided2d() local 181 const CeedInt elem = get_global_id(2); in readDofsOffset3d() local 200 const CeedInt elem = get_global_id(2); in readDofsStrided3d() local [all …]
|
| H A D | sycl-shared-basis-read-write-templates.h | 32 const CeedInt elem = get_global_id(2); in ReadElementStrided1d() local 50 const CeedInt elem = get_global_id(2); in WriteElementStrided1d() local 73 const CeedInt elem = get_global_id(2); in ReadElementStrided2d() local 92 const CeedInt elem = get_global_id(2); in WriteElementStrided2d() local 115 const CeedInt elem = get_global_id(2); in ReadElementStrided3d() local 136 const CeedInt elem = get_global_id(2); in WriteElementStrided3d() local
|