| /libCEED/include/ceed/jit-source/hip/ |
| H A D | hip-ref-basis-nontensor.h | 22 CeedScalar *__restrict__ d_V) { in Interp() argument 25 … BASIS_NUM_COMP * BASIS_Q * num_elem, d_B, d_U, d_V); in Interp() 30 CeedScalar *__restrict__ d_V) { in InterpTranspose() argument 33 … BASIS_NUM_COMP * BASIS_Q * num_elem, d_B, d_U, d_V); in InterpTranspose() 41 CeedScalar *__restrict__ d_V) { in Deriv() argument 44 … BASIS_NUM_COMP * BASIS_Q * num_elem, d_B, d_U, d_V); in Deriv() 49 CeedScalar *__restrict__ d_V) { in DerivTranspose() argument 52 … BASIS_NUM_COMP * BASIS_Q * num_elem, d_B, d_U, d_V); in DerivTranspose() 59 …ht(const CeedInt num_elem, const CeedScalar *__restrict__ q_weight, CeedScalar *__restrict__ d_V) { in Weight() argument 64 d_V[elem * BASIS_Q + t_id] = q_weight[t_id]; in Weight()
|
| H A D | hip-shared-basis-tensor.h | 19 …um_elem, const CeedScalar *c_B, const CeedScalar *__restrict__ d_U, CeedScalar *__restrict__ d_V) { in __launch_bounds__() 42 …tStrided1d<BASIS_NUM_COMP, BASIS_Q_1D>(data, elem, 1, BASIS_Q_1D * num_elem, BASIS_Q_1D, r_V, d_V); in __launch_bounds__() 46 … BASIS_Q_1D>(data, elem, 1, BASIS_Q_1D * BASIS_Q_1D * num_elem, BASIS_Q_1D * BASIS_Q_1D, r_V, d_V); in __launch_bounds__() 52 … BASIS_Q_1D * BASIS_Q_1D * BASIS_Q_1D, r_V, d_V); in __launch_bounds__() 58 …um_elem, const CeedScalar *c_B, const CeedScalar *__restrict__ d_U, CeedScalar *__restrict__ d_V) { in __launch_bounds__() 74 …tStrided1d<BASIS_NUM_COMP, BASIS_Q_1D>(data, elem, 1, BASIS_Q_1D * num_elem, BASIS_Q_1D, r_U, d_V); in __launch_bounds__() 77 … BASIS_Q_1D>(data, elem, 1, BASIS_Q_1D * BASIS_Q_1D * num_elem, BASIS_Q_1D * BASIS_Q_1D, r_U, d_V); in __launch_bounds__() 82 … BASIS_Q_1D * BASIS_Q_1D * BASIS_Q_1D, r_U, d_V); in __launch_bounds__() 88 …um_elem, const CeedScalar *c_B, const CeedScalar *__restrict__ d_U, CeedScalar *__restrict__ d_V) { in __launch_bounds__() 111 …tStrided1d<BASIS_NUM_COMP, BASIS_P_1D>(data, elem, 1, BASIS_P_1D * num_elem, BASIS_P_1D, r_V, d_V); in __launch_bounds__() [all …]
|
| H A D | hip-shared-basis-tensor-at-points.h | 25 const CeedScalar *__restrict__ d_U, CeedScalar *__restrict__ d_V) { in __launch_bounds__() 74 …BASIS_NUM_PTS>(data, elem, p, BASIS_NUM_PTS, 1, num_elem * BASIS_NUM_PTS, BASIS_NUM_PTS, r_V, d_V); in __launch_bounds__() 81 const CeedScalar *__restrict__ d_U, CeedScalar *__restrict__ d_V) { in __launch_bounds__() 109 …tStrided1d<BASIS_NUM_COMP, BASIS_P_1D>(data, elem, 1, BASIS_P_1D * num_elem, BASIS_P_1D, r_V, d_V); in __launch_bounds__() 111 … BASIS_P_1D>(data, elem, 1, BASIS_P_1D * BASIS_P_1D * num_elem, BASIS_P_1D * BASIS_P_1D, r_V, d_V); in __launch_bounds__() 114 … BASIS_P_1D * BASIS_P_1D * BASIS_P_1D, r_V, d_V); in __launch_bounds__() 137 …tStrided1d<BASIS_NUM_COMP, BASIS_P_1D>(data, elem, 1, BASIS_P_1D * num_elem, BASIS_P_1D, r_V, d_V); in __launch_bounds__() 140 … BASIS_P_1D>(data, elem, 1, BASIS_P_1D * BASIS_P_1D * num_elem, BASIS_P_1D * BASIS_P_1D, r_V, d_V); in __launch_bounds__() 144 … BASIS_P_1D * BASIS_P_1D * BASIS_P_1D, r_V, d_V); in __launch_bounds__() 151 … const CeedScalar *__restrict__ d_U, CeedScalar *__restrict__ d_V) { in __launch_bounds__() [all …]
|
| H A D | hip-shared-basis-nontensor.h | 19 …um_elem, const CeedScalar *c_B, const CeedScalar *__restrict__ d_U, CeedScalar *__restrict__ d_V) { in __launch_bounds__() 41 …iteElementStrided1d<BASIS_NUM_COMP, BASIS_Q>(data, elem, 1, BASIS_Q * num_elem, BASIS_Q, r_V, d_V); in __launch_bounds__() 46 …um_elem, const CeedScalar *c_B, const CeedScalar *__restrict__ d_U, CeedScalar *__restrict__ d_V) { in __launch_bounds__() 68 …iteElementStrided1d<BASIS_NUM_COMP, BASIS_P>(data, elem, 1, BASIS_P * num_elem, BASIS_P, r_V, d_V); in __launch_bounds__() 73 …um_elem, const CeedScalar *c_B, const CeedScalar *__restrict__ d_U, CeedScalar *__restrict__ d_V) { in __launch_bounds__() 95 …SumElementStrided1d<BASIS_NUM_COMP, BASIS_P>(data, elem, 1, BASIS_P * num_elem, BASIS_P, r_V, d_V); in __launch_bounds__() 103 …um_elem, const CeedScalar *c_G, const CeedScalar *__restrict__ d_U, CeedScalar *__restrict__ d_V) { in __launch_bounds__() 125 …rided1d<BASIS_NUM_COMP * BASIS_DIM, BASIS_Q>(data, elem, 1, BASIS_Q * num_elem, BASIS_Q, r_V, d_V); in __launch_bounds__() 130 …um_elem, const CeedScalar *c_G, const CeedScalar *__restrict__ d_U, CeedScalar *__restrict__ d_V) { in __launch_bounds__() 152 …iteElementStrided1d<BASIS_NUM_COMP, BASIS_P>(data, elem, 1, BASIS_P * num_elem, BASIS_P, r_V, d_V); in __launch_bounds__() [all …]
|
| H A D | hip-ref-basis-nontensor-templates.h | 18 const CeedScalar *__restrict__ d_U, CeedScalar *__restrict__ d_V) { in Contract() argument 34 d_V[elem * strides_elem_V + comp * strides_comp_V + d * strides_q_comp_V + t_id] = r_V[d]; in Contract() 45 … const CeedScalar *__restrict__ d_U, CeedScalar *__restrict__ d_V) { in ContractTranspose() argument 58 d_V[elem * strides_elem_V + comp * strides_comp_V + t_id] += r_V; in ContractTranspose()
|
| /libCEED/include/ceed/jit-source/cuda/ |
| H A D | cuda-ref-basis-nontensor.h | 22 CeedScalar *__restrict__ d_V) { in Interp() argument 25 … BASIS_NUM_COMP * BASIS_Q * num_elem, d_B, d_U, d_V); in Interp() 30 CeedScalar *__restrict__ d_V) { in InterpTranspose() argument 33 … BASIS_NUM_COMP * BASIS_Q * num_elem, d_B, d_U, d_V); in InterpTranspose() 41 CeedScalar *__restrict__ d_V) { in Deriv() argument 44 … BASIS_NUM_COMP * BASIS_Q * num_elem, d_B, d_U, d_V); in Deriv() 49 CeedScalar *__restrict__ d_V) { in DerivTranspose() argument 52 … BASIS_NUM_COMP * BASIS_Q * num_elem, d_B, d_U, d_V); in DerivTranspose() 59 …ht(const CeedInt num_elem, const CeedScalar *__restrict__ q_weight, CeedScalar *__restrict__ d_V) { in Weight() argument 64 d_V[elem * BASIS_Q + t_id] = q_weight[t_id]; in Weight()
|
| H A D | cuda-shared-basis-tensor.h | 18 …um_elem, const CeedScalar *c_B, const CeedScalar *__restrict__ d_U, CeedScalar *__restrict__ d_V) { in Interp() argument 41 …tStrided1d<BASIS_NUM_COMP, BASIS_Q_1D>(data, elem, 1, BASIS_Q_1D * num_elem, BASIS_Q_1D, r_V, d_V); in Interp() 45 … BASIS_Q_1D>(data, elem, 1, BASIS_Q_1D * BASIS_Q_1D * num_elem, BASIS_Q_1D * BASIS_Q_1D, r_V, d_V); in Interp() 51 … BASIS_Q_1D * BASIS_Q_1D * BASIS_Q_1D, r_V, d_V); in Interp() 57 CeedScalar *__restrict__ d_V) { in InterpCollocated() argument 73 …tStrided1d<BASIS_NUM_COMP, BASIS_Q_1D>(data, elem, 1, BASIS_Q_1D * num_elem, BASIS_Q_1D, r_U, d_V); in InterpCollocated() 76 … BASIS_Q_1D>(data, elem, 1, BASIS_Q_1D * BASIS_Q_1D * num_elem, BASIS_Q_1D * BASIS_Q_1D, r_U, d_V); in InterpCollocated() 81 … BASIS_Q_1D * BASIS_Q_1D * BASIS_Q_1D, r_U, d_V); in InterpCollocated() 87 CeedScalar *__restrict__ d_V) { in InterpTranspose() argument 110 …tStrided1d<BASIS_NUM_COMP, BASIS_P_1D>(data, elem, 1, BASIS_P_1D * num_elem, BASIS_P_1D, r_V, d_V); in InterpTranspose() [all …]
|
| H A D | cuda-shared-basis-tensor-at-points.h | 24 …t CeedScalar *__restrict__ d_X, const CeedScalar *__restrict__ d_U, CeedScalar *__restrict__ d_V) { in InterpAtPoints() argument 73 …BASIS_NUM_PTS>(data, elem, p, BASIS_NUM_PTS, 1, num_elem * BASIS_NUM_PTS, BASIS_NUM_PTS, r_V, d_V); in InterpAtPoints() 80 … const CeedScalar *__restrict__ d_U, CeedScalar *__restrict__ d_V) { in InterpTransposeAtPoints() argument 108 …tStrided1d<BASIS_NUM_COMP, BASIS_P_1D>(data, elem, 1, BASIS_P_1D * num_elem, BASIS_P_1D, r_V, d_V); in InterpTransposeAtPoints() 110 … BASIS_P_1D>(data, elem, 1, BASIS_P_1D * BASIS_P_1D * num_elem, BASIS_P_1D * BASIS_P_1D, r_V, d_V); in InterpTransposeAtPoints() 113 … BASIS_P_1D * BASIS_P_1D * BASIS_P_1D, r_V, d_V); in InterpTransposeAtPoints() 136 …tStrided1d<BASIS_NUM_COMP, BASIS_P_1D>(data, elem, 1, BASIS_P_1D * num_elem, BASIS_P_1D, r_V, d_V); in InterpTransposeAtPoints() 139 … BASIS_P_1D>(data, elem, 1, BASIS_P_1D * BASIS_P_1D * num_elem, BASIS_P_1D * BASIS_P_1D, r_V, d_V); in InterpTransposeAtPoints() 143 … BASIS_P_1D * BASIS_P_1D * BASIS_P_1D, r_V, d_V); in InterpTransposeAtPoints() 150 … const CeedScalar *__restrict__ d_U, CeedScalar *__restrict__ d_V) { in InterpTransposeAddAtPoints() argument [all …]
|
| H A D | cuda-shared-basis-nontensor.h | 18 …um_elem, const CeedScalar *c_B, const CeedScalar *__restrict__ d_U, CeedScalar *__restrict__ d_V) { in Interp() argument 40 …iteElementStrided1d<BASIS_NUM_COMP, BASIS_Q>(data, elem, 1, BASIS_Q * num_elem, BASIS_Q, r_V, d_V); in Interp() 45 CeedScalar *__restrict__ d_V) { in InterpTranspose() argument 67 …iteElementStrided1d<BASIS_NUM_COMP, BASIS_P>(data, elem, 1, BASIS_P * num_elem, BASIS_P, r_V, d_V); in InterpTranspose() 72 CeedScalar *__restrict__ d_V) { in InterpTransposeAdd() argument 94 …SumElementStrided1d<BASIS_NUM_COMP, BASIS_P>(data, elem, 1, BASIS_P * num_elem, BASIS_P, r_V, d_V); in InterpTransposeAdd() 101 …um_elem, const CeedScalar *c_G, const CeedScalar *__restrict__ d_U, CeedScalar *__restrict__ d_V) { in Grad() argument 123 …rided1d<BASIS_NUM_COMP * BASIS_DIM, BASIS_Q>(data, elem, 1, BASIS_Q * num_elem, BASIS_Q, r_V, d_V); in Grad() 128 CeedScalar *__restrict__ d_V) { in GradTranspose() argument 150 …iteElementStrided1d<BASIS_NUM_COMP, BASIS_P>(data, elem, 1, BASIS_P * num_elem, BASIS_P, r_V, d_V); in GradTranspose() [all …]
|
| H A D | cuda-ref-basis-nontensor-templates.h | 18 const CeedScalar *__restrict__ d_U, CeedScalar *__restrict__ d_V) { in Contract() argument 34 d_V[elem * strides_elem_V + comp * strides_comp_V + d * strides_q_comp_V + t_id] = r_V[d]; in Contract() 45 … const CeedScalar *__restrict__ d_U, CeedScalar *__restrict__ d_V) { in ContractTranspose() argument 58 d_V[elem * strides_elem_V + comp * strides_comp_V + t_id] += r_V; in ContractTranspose()
|
| /libCEED/include/ceed/jit-source/sycl/ |
| H A D | sycl-shared-basis-tensor.h | 23 global CeedScalar *restrict d_V) { in Interp() argument 39 …ed1d(BASIS_NUM_COMP, BASIS_Q_1D, num_elem, 1, BASIS_NUM_QPTS * num_elem, BASIS_NUM_QPTS, r_V, d_V); in Interp() 44 …ed2d(BASIS_NUM_COMP, BASIS_Q_1D, num_elem, 1, BASIS_NUM_QPTS * num_elem, BASIS_NUM_QPTS, r_V, d_V); in Interp() 49 …ed3d(BASIS_NUM_COMP, BASIS_Q_1D, num_elem, 1, BASIS_NUM_QPTS * num_elem, BASIS_NUM_QPTS, r_V, d_V); in Interp() 54 global CeedScalar *restrict d_V) { in InterpTranspose() argument 73 …1d(BASIS_NUM_COMP, BASIS_P_1D, num_elem, 1, BASIS_NUM_NODES * num_elem, BASIS_NUM_NODES, r_V, d_V); in InterpTranspose() 78 …2d(BASIS_NUM_COMP, BASIS_P_1D, num_elem, 1, BASIS_NUM_NODES * num_elem, BASIS_NUM_NODES, r_V, d_V); in InterpTranspose() 83 …3d(BASIS_NUM_COMP, BASIS_P_1D, num_elem, 1, BASIS_NUM_NODES * num_elem, BASIS_NUM_NODES, r_V, d_V); in InterpTranspose() 91 global const CeedScalar *restrict d_U, global CeedScalar *restrict d_V) { in Grad() argument 110 …ed1d(BASIS_NUM_COMP, BASIS_Q_1D, num_elem, 1, BASIS_NUM_QPTS * num_elem, BASIS_NUM_QPTS, r_V, d_V); in Grad() [all …]
|
| /libCEED/backends/sycl-ref/ |
| H A D | ceed-sycl-ref-basis.sycl.cpp | 339 const CeedScalar *d_U, CeedScalar *d_V) { in CeedBasisApplyNonTensorInterp_Sycl() argument 370 d_V[i + elem * v_stride + comp * v_comp_stride] = V; in CeedBasisApplyNonTensorInterp_Sycl() 380 const CeedScalar *d_U, CeedScalar *d_V) { in CeedBasisApplyNonTensorGrad_Sycl() argument 425 d_V[i + elem * v_stride + comp * v_comp_stride + d0 * v_dim_stride] = V[d0]; in CeedBasisApplyNonTensorGrad_Sycl() 435 …(sycl::queue &sycl_queue, CeedInt num_elem, const CeedBasisNonTensor_Sycl *impl, CeedScalar *d_V) { in CeedBasisApplyNonTensorWeight_Sycl() argument 448 d_V[i + elem * num_qpts] = q_weight[i]; in CeedBasisApplyNonTensorWeight_Sycl()
|