| /libCEED/include/ceed/jit-source/cuda/ |
| H A D | cuda-shared-basis-tensor-templates.h | 20 inline __device__ void ContractX1d(SharedData_Cuda &data, const CeedScalar *U, const CeedScalar *B,… in ContractX1d() 36 inline __device__ void ContractTransposeX1d(SharedData_Cuda &data, const CeedScalar *U, const CeedS… in ContractTransposeX1d() 52 inline __device__ void Interp1d(SharedData_Cuda &data, const CeedScalar *__restrict__ r_U, const Ce… in Interp1d() 62 inline __device__ void InterpTranspose1d(SharedData_Cuda &data, const CeedScalar *__restrict__ r_U,… in InterpTranspose1d() 73 inline __device__ void InterpCollocatedNodes1d(SharedData_Cuda &data, const CeedScalar *__restrict_… in InterpCollocatedNodes1d() 84 inline __device__ void InterpTransposeCollocatedNodes1d(SharedData_Cuda &data, const CeedScalar *__… in InterpTransposeCollocatedNodes1d() 95 inline __device__ void Grad1d(SharedData_Cuda &data, const CeedScalar *__restrict__ r_U, const Ceed… in Grad1d() 106 inline __device__ void GradTranspose1d(SharedData_Cuda &data, const CeedScalar *__restrict__ r_U, c… in GradTranspose1d() 117 inline __device__ void Weight1d(SharedData_Cuda &data, const CeedScalar *__restrict__ q_weight_1d, … in Weight1d() 129 inline __device__ void ContractX2d(SharedData_Cuda &data, const CeedScalar *U, const CeedScalar *B,… in ContractX2d() [all …]
|
| H A D | cuda-shared-basis-nontensor-templates.h | 16 inline __device__ void Contract1d(SharedData_Cuda &data, const CeedScalar *U, const CeedScalar *B, … in Contract1d() 32 inline __device__ void ContractTranspose1d(SharedData_Cuda &data, const CeedScalar *U, const CeedSc… in ContractTranspose1d() 47 inline __device__ void InterpNonTensor(SharedData_Cuda &data, const CeedScalar *__restrict__ r_U, c… in InterpNonTensor() 58 inline __device__ void InterpTransposeNonTensor(SharedData_Cuda &data, const CeedScalar *__restrict… in InterpTransposeNonTensor() 70 inline __device__ void GradNonTensor(SharedData_Cuda &data, const CeedScalar *__restrict__ r_U, con… in GradNonTensor() 82 inline __device__ void GradTransposeNonTensor(SharedData_Cuda &data, const CeedScalar *__restrict__… in GradTransposeNonTensor() 96 inline __device__ void WeightNonTensor(SharedData_Cuda &data, const CeedScalar *__restrict__ q_weig… in WeightNonTensor()
|
| H A D | cuda-shared-basis-read-write-templates.h | 16 inline __device__ void LoadMatrix(SharedData_Cuda &data, const CeedScalar *__restrict__ d_B, CeedSc… in LoadMatrix() 28 inline __device__ void ReadElementStrided1d(SharedData_Cuda &data, const CeedInt elem, const CeedIn… in ReadElementStrided1d() 44 inline __device__ void WriteElementStrided1d(SharedData_Cuda &data, const CeedInt elem, const CeedI… in WriteElementStrided1d() 57 inline __device__ void SumElementStrided1d(SharedData_Cuda &data, const CeedInt elem, const CeedInt… in SumElementStrided1d() 77 inline __device__ void ReadElementStrided2d(SharedData_Cuda &data, const CeedInt elem, const CeedIn… in ReadElementStrided2d() 93 inline __device__ void WriteElementStrided2d(SharedData_Cuda &data, const CeedInt elem, const CeedI… in WriteElementStrided2d() 106 inline __device__ void SumElementStrided2d(SharedData_Cuda &data, const CeedInt elem, const CeedInt… in SumElementStrided2d() 126 inline __device__ void ReadElementStrided3d(SharedData_Cuda &data, const CeedInt elem, const CeedIn… in ReadElementStrided3d() 144 inline __device__ void WriteElementStrided3d(SharedData_Cuda &data, const CeedInt elem, const CeedI… in WriteElementStrided3d() 159 inline __device__ void SumElementStrided3d(SharedData_Cuda &data, const CeedInt elem, const CeedInt… in SumElementStrided3d() [all …]
|
| H A D | cuda-gen-templates.h | 16 inline __device__ void LoadMatrix(SharedData_Cuda &data, const CeedScalar *__restrict__ d_B, CeedSc… in LoadMatrix() 28 inline __device__ void ReadPoint(SharedData_Cuda &data, const CeedInt elem, const CeedInt p, const … in ReadPoint() 41 inline __device__ void WritePoint(SharedData_Cuda &data, const CeedInt elem, const CeedInt p, const… in WritePoint() 60 inline __device__ void SetEVecStandard1d_Single(SharedData_Cuda &data, const CeedInt n, const CeedS… in SetEVecStandard1d_Single() 73 inline __device__ void ReadLVecStandard1d(SharedData_Cuda &data, const CeedInt num_nodes, const Cee… in ReadLVecStandard1d() 87 inline __device__ void ReadLVecStrided1d(SharedData_Cuda &data, const CeedInt elem, const CeedScala… in ReadLVecStrided1d() 101 inline __device__ void WriteLVecStandard1d(SharedData_Cuda &data, const CeedInt num_nodes, const Ce… in WriteLVecStandard1d() 112 inline __device__ void WriteLVecStandard1d_Single(SharedData_Cuda &data, const CeedInt num_nodes, c… in WriteLVecStandard1d_Single() 129 inline __device__ void WriteLVecStandard1d_Assembly(SharedData_Cuda &data, const CeedInt num_nodes,… in WriteLVecStandard1d_Assembly() 148 inline __device__ void WriteLVecStandard1d_QFAssembly(SharedData_Cuda &data, const CeedInt num_elem… in WriteLVecStandard1d_QFAssembly() [all …]
|
| H A D | cuda-shared-basis-tensor-flattened-templates.h | 20 inline __device__ void ContractX2dFlattened(SharedData_Cuda &data, const int t_id_x, const int t_id… in ContractX2dFlattened() 37 inline __device__ void ContractY2dFlattened(SharedData_Cuda &data, const int t_id_x, const int t_id… in ContractY2dFlattened() 54 inline __device__ void ContractTransposeY2dFlattened(SharedData_Cuda &data, const int t_id_x, const… in ContractTransposeY2dFlattened() 71 inline __device__ void ContractTransposeX2dFlattened(SharedData_Cuda &data, const int t_id_x, const… in ContractTransposeX2dFlattened() 88 inline __device__ void ContractTransposeAddX2dFlattened(SharedData_Cuda &data, const int t_id_x, co… in ContractTransposeAddX2dFlattened() 104 inline __device__ void QPack2d(SharedData_Cuda &data, const int t_id_x, const int t_id_y, CeedScala… in QPack2d() 116 inline __device__ void QUnpack2d(SharedData_Cuda &data, const int t_id_x, const int t_id_y, CeedSca… in QUnpack2d() 131 inline __device__ void InterpTensor2dFlattened(SharedData_Cuda &data, CeedScalar *__restrict__ r_U,… in InterpTensor2dFlattened() 150 inline __device__ void InterpTransposeTensor2dFlattened(SharedData_Cuda &data, CeedScalar *__restri… in InterpTransposeTensor2dFlattened() 168 inline __device__ void InterpTensorCollocatedNodes2dFlattened(SharedData_Cuda &data, CeedScalar *__… in InterpTensorCollocatedNodes2dFlattened() [all …]
|
| H A D | cuda-shared-basis-tensor-at-points-templates.h | 44 inline __device__ void InterpAtPoints1d(SharedData_Cuda &data, const CeedInt p, const CeedScalar *_… in InterpAtPoints1d() 65 inline __device__ void InterpTransposeAtPoints1d(SharedData_Cuda &data, const CeedInt p, const Ceed… in InterpTransposeAtPoints1d() 90 inline __device__ void GradAtPoints1d(SharedData_Cuda &data, const CeedInt p, const CeedScalar *__r… in GradAtPoints1d() 112 inline __device__ void GradTransposeAtPoints1d(SharedData_Cuda &data, const CeedInt p, const CeedSc… in GradTransposeAtPoints1d() 141 inline __device__ void InterpAtPoints2d(SharedData_Cuda &data, const CeedInt p, const CeedScalar *_… in InterpAtPoints2d() 172 inline __device__ void InterpTransposeAtPoints2d(SharedData_Cuda &data, const CeedInt p, const Ceed… in InterpTransposeAtPoints2d() 210 inline __device__ void GradAtPoints2d(SharedData_Cuda &data, const CeedInt p, const CeedScalar *__r… in GradAtPoints2d() 245 inline __device__ void GradTransposeAtPoints2d(SharedData_Cuda &data, const CeedInt p, const CeedSc… in GradTransposeAtPoints2d() 291 inline __device__ void InterpAtPoints3d(SharedData_Cuda &data, const CeedInt p, const CeedScalar *_… in InterpAtPoints3d() 328 inline __device__ void InterpTransposeAtPoints3d(SharedData_Cuda &data, const CeedInt p, const Ceed… in InterpTransposeAtPoints3d() [all …]
|
| H A D | cuda-shared-basis-nontensor.h | 21 SharedData_Cuda data; in Interp() 48 SharedData_Cuda data; in InterpTranspose() 75 SharedData_Cuda data; in InterpTransposeAdd() 104 SharedData_Cuda data; in Grad() 131 SharedData_Cuda data; in GradTranspose() 158 SharedData_Cuda data; in GradTransposeAdd() 187 SharedData_Cuda data; in Weight()
|
| H A D | cuda-types.h | 39 } SharedData_Cuda; typedef
|
| H A D | cuda-shared-basis-tensor.h | 21 SharedData_Cuda data; in Interp() 60 SharedData_Cuda data; in InterpCollocated() 90 SharedData_Cuda data; in InterpTranspose() 129 SharedData_Cuda data; in InterpCollocatedTranspose() 159 SharedData_Cuda data; in InterpTransposeAdd() 198 SharedData_Cuda data; in InterpCollocatedTransposeAdd() 231 SharedData_Cuda data; in Grad() 274 SharedData_Cuda data; in GradCollocated() 314 SharedData_Cuda data; in GradTranspose() 357 SharedData_Cuda data; in GradCollocatedTranspose() [all …]
|
| H A D | cuda-shared-basis-tensor-at-points.h | 27 SharedData_Cuda data; in InterpAtPoints() 83 SharedData_Cuda data; in InterpTransposeAtPoints() 153 SharedData_Cuda data; in InterpTransposeAddAtPoints() 214 SharedData_Cuda data; in GradAtPoints() 270 SharedData_Cuda data; in GradTransposeAtPoints() 341 SharedData_Cuda data; in GradTransposeAddAtPoints()
|