| /libCEED/include/ceed/jit-source/cuda/ |
| H A D | cuda-shared-basis-tensor-flattened-templates.h | 291 …ned(SharedData_Cuda &data, const int t_id_x, const int t_id_y, const int t_id_z, const CeedScalar … in ContractX3dFlattened() argument 294 data.slice[t_id_x + t_id_y * T_1D + t_id_z * T_1D * T_1D] = *U; in ContractX3dFlattened() 297 if (t_id_x < Q_1D && t_id_y < P_1D && t_id_z < P_1D) { in ContractX3dFlattened() 299 …*V += B[i + t_id_x * P_1D] * data.slice[i + t_id_y * T_1D + t_id_z * T_1D * T_1D]; // Contract x … in ContractX3dFlattened() 308 …ned(SharedData_Cuda &data, const int t_id_x, const int t_id_y, const int t_id_z, const CeedScalar … in ContractY3dFlattened() argument 311 data.slice[t_id_x + t_id_y * T_1D + t_id_z * T_1D * T_1D] = *U; in ContractY3dFlattened() 314 if (t_id_x < Q_1D && t_id_y < Q_1D && t_id_z < P_1D) { in ContractY3dFlattened() 316 …*V += B[i + t_id_y * P_1D] * data.slice[t_id_x + i * T_1D + t_id_z * T_1D * T_1D]; // Contract y … in ContractY3dFlattened() 325 …ned(SharedData_Cuda &data, const int t_id_x, const int t_id_y, const int t_id_z, const CeedScalar … in ContractZ3dFlattened() argument 328 data.slice[t_id_x + t_id_y * T_1D + t_id_z * T_1D * T_1D] = *U; in ContractZ3dFlattened() [all …]
|
| H A D | cuda-shared-basis-nontensor.h | 24 data.t_id_z = threadIdx.z; in Interp() 26 data.slice = slice + data.t_id_z * BASIS_T_1D; in Interp() 51 data.t_id_z = threadIdx.z; in InterpTranspose() 53 data.slice = slice + data.t_id_z * BASIS_T_1D; in InterpTranspose() 78 data.t_id_z = threadIdx.z; in InterpTransposeAdd() 80 data.slice = slice + data.t_id_z * BASIS_T_1D; in InterpTransposeAdd() 107 data.t_id_z = threadIdx.z; in Grad() 109 data.slice = slice + data.t_id_z * BASIS_T_1D; in Grad() 134 data.t_id_z = threadIdx.z; in GradTranspose() 136 data.slice = slice + data.t_id_z * BASIS_T_1D; in GradTranspose() [all …]
|
| H A D | cuda-shared-basis-tensor.h | 24 data.t_id_z = threadIdx.z; in Interp() 26 data.slice = slice + data.t_id_z * BASIS_T_1D * (BASIS_DIM > 1 ? BASIS_T_1D : 1); in Interp() 63 data.t_id_z = threadIdx.z; in InterpCollocated() 65 data.slice = slice + data.t_id_z * BASIS_T_1D * (BASIS_DIM > 1 ? BASIS_T_1D : 1); in InterpCollocated() 93 data.t_id_z = threadIdx.z; in InterpTranspose() 95 data.slice = slice + data.t_id_z * BASIS_T_1D * (BASIS_DIM > 1 ? BASIS_T_1D : 1); in InterpTranspose() 132 data.t_id_z = threadIdx.z; in InterpCollocatedTranspose() 134 data.slice = slice + data.t_id_z * BASIS_T_1D * (BASIS_DIM > 1 ? BASIS_T_1D : 1); in InterpCollocatedTranspose() 162 data.t_id_z = threadIdx.z; in InterpTransposeAdd() 164 data.slice = slice + data.t_id_z * BASIS_T_1D * (BASIS_DIM > 1 ? BASIS_T_1D : 1); in InterpTransposeAdd() [all …]
|
| H A D | cuda-shared-basis-tensor-at-points.h | 30 data.t_id_z = threadIdx.z; in InterpAtPoints() 32 data.slice = slice + data.t_id_z * BASIS_T_1D * (BASIS_DIM > 1 ? BASIS_T_1D : 1); in InterpAtPoints() 86 data.t_id_z = threadIdx.z; in InterpTransposeAtPoints() 88 data.slice = slice + data.t_id_z * BASIS_T_1D * (BASIS_DIM > 1 ? BASIS_T_1D : 1); in InterpTransposeAtPoints() 156 data.t_id_z = threadIdx.z; in InterpTransposeAddAtPoints() 158 data.slice = slice + data.t_id_z * BASIS_T_1D * (BASIS_DIM > 1 ? BASIS_T_1D : 1); in InterpTransposeAddAtPoints() 217 data.t_id_z = threadIdx.z; in GradAtPoints() 219 data.slice = slice + data.t_id_z * BASIS_T_1D * (BASIS_DIM > 1 ? BASIS_T_1D : 1); in GradAtPoints() 273 data.t_id_z = threadIdx.z; in GradTransposeAtPoints() 275 data.slice = slice + data.t_id_z * BASIS_T_1D * (BASIS_DIM > 1 ? BASIS_T_1D : 1); in GradTransposeAtPoints() [all …]
|
| H A D | cuda-types.h | 36 CeedInt t_id_z; member
|
| /libCEED/include/ceed/jit-source/hip/ |
| H A D | hip-shared-basis-tensor-flattened-templates.h | 291 …ened(SharedData_Hip &data, const int t_id_x, const int t_id_y, const int t_id_z, const CeedScalar … in ContractX3dFlattened() argument 294 data.slice[t_id_x + t_id_y * T_1D + t_id_z * T_1D * T_1D] = *U; in ContractX3dFlattened() 297 if (t_id_x < Q_1D && t_id_y < P_1D && t_id_z < P_1D) { in ContractX3dFlattened() 299 …*V += B[i + t_id_x * P_1D] * data.slice[i + t_id_y * T_1D + t_id_z * T_1D * T_1D]; // Contract x … in ContractX3dFlattened() 308 …ened(SharedData_Hip &data, const int t_id_x, const int t_id_y, const int t_id_z, const CeedScalar … in ContractY3dFlattened() argument 311 data.slice[t_id_x + t_id_y * T_1D + t_id_z * T_1D * T_1D] = *U; in ContractY3dFlattened() 314 if (t_id_x < Q_1D && t_id_y < Q_1D && t_id_z < P_1D) { in ContractY3dFlattened() 316 …*V += B[i + t_id_y * P_1D] * data.slice[t_id_x + i * T_1D + t_id_z * T_1D * T_1D]; // Contract y … in ContractY3dFlattened() 325 …ened(SharedData_Hip &data, const int t_id_x, const int t_id_y, const int t_id_z, const CeedScalar … in ContractZ3dFlattened() argument 328 data.slice[t_id_x + t_id_y * T_1D + t_id_z * T_1D * T_1D] = *U; in ContractZ3dFlattened() [all …]
|
| H A D | hip-shared-basis-nontensor.h | 25 data.t_id_z = threadIdx.z; in __launch_bounds__() 27 data.slice = slice + data.t_id_z * BASIS_T_1D; in __launch_bounds__() 52 data.t_id_z = threadIdx.z; in __launch_bounds__() 54 data.slice = slice + data.t_id_z * BASIS_T_1D; in __launch_bounds__() 79 data.t_id_z = threadIdx.z; in __launch_bounds__() 81 data.slice = slice + data.t_id_z * BASIS_T_1D; in __launch_bounds__() 109 data.t_id_z = threadIdx.z; in __launch_bounds__() 111 data.slice = slice + data.t_id_z * BASIS_T_1D; in __launch_bounds__() 136 data.t_id_z = threadIdx.z; in __launch_bounds__() 138 data.slice = slice + data.t_id_z * BASIS_T_1D; in __launch_bounds__() [all …]
|
| H A D | hip-shared-basis-tensor.h | 25 data.t_id_z = threadIdx.z; in __launch_bounds__() 27 data.slice = slice + data.t_id_z * BASIS_T_1D * (BASIS_DIM > 1 ? BASIS_T_1D : 1); in __launch_bounds__() 64 data.t_id_z = threadIdx.z; in __launch_bounds__() 66 data.slice = slice + data.t_id_z * BASIS_T_1D * (BASIS_DIM > 1 ? BASIS_T_1D : 1); in __launch_bounds__() 94 data.t_id_z = threadIdx.z; in __launch_bounds__() 96 data.slice = slice + data.t_id_z * BASIS_T_1D * (BASIS_DIM > 1 ? BASIS_T_1D : 1); in __launch_bounds__() 133 data.t_id_z = threadIdx.z; in __launch_bounds__() 135 data.slice = slice + data.t_id_z * BASIS_T_1D * (BASIS_DIM > 1 ? BASIS_T_1D : 1); in __launch_bounds__() 163 data.t_id_z = threadIdx.z; in __launch_bounds__() 165 data.slice = slice + data.t_id_z * BASIS_T_1D * (BASIS_DIM > 1 ? BASIS_T_1D : 1); in __launch_bounds__() [all …]
|
| H A D | hip-shared-basis-tensor-at-points.h | 31 data.t_id_z = threadIdx.z; in __launch_bounds__() 33 data.slice = slice + data.t_id_z * BASIS_T_1D * (BASIS_DIM > 1 ? BASIS_T_1D : 1); in __launch_bounds__() 87 data.t_id_z = threadIdx.z; in __launch_bounds__() 89 data.slice = slice + data.t_id_z * BASIS_T_1D * (BASIS_DIM > 1 ? BASIS_T_1D : 1); in __launch_bounds__() 157 data.t_id_z = threadIdx.z; in __launch_bounds__() 159 data.slice = slice + data.t_id_z * BASIS_T_1D * (BASIS_DIM > 1 ? BASIS_T_1D : 1); in __launch_bounds__() 219 data.t_id_z = threadIdx.z; in __launch_bounds__() 221 data.slice = slice + data.t_id_z * BASIS_T_1D * (BASIS_DIM > 1 ? BASIS_T_1D : 1); in __launch_bounds__() 275 data.t_id_z = threadIdx.z; in __launch_bounds__() 277 data.slice = slice + data.t_id_z * BASIS_T_1D * (BASIS_DIM > 1 ? BASIS_T_1D : 1); in __launch_bounds__() [all …]
|
| H A D | hip-types.h | 36 CeedInt t_id_z; member
|