| /libCEED/include/ceed/jit-source/hip/ |
| H A D | hip-ref-basis-nontensor.h | 23 …for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * bl… in Interp() local 24 …Contract<BASIS_NUM_COMP, BASIS_Q_COMP_INTERP, BASIS_P, BASIS_Q>(elem, BASIS_P, BASIS_Q, BASIS_P * … in Interp() 31 …for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * bl… in InterpTranspose() local 32 …ContractTranspose<BASIS_NUM_COMP, BASIS_Q_COMP_INTERP, BASIS_P, BASIS_Q>(elem, BASIS_Q, BASIS_P, B… in InterpTranspose() 42 …for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * bl… in Deriv() local 43 …Contract<BASIS_NUM_COMP, BASIS_Q_COMP_DERIV, BASIS_P, BASIS_Q>(elem, BASIS_P, BASIS_Q, BASIS_P * n… in Deriv() 50 …for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * bl… in DerivTranspose() local 51 …ContractTranspose<BASIS_NUM_COMP, BASIS_Q_COMP_DERIV, BASIS_P, BASIS_Q>(elem, BASIS_Q, BASIS_P, BA… in DerivTranspose() 63 …for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * bl… in Weight() local 64 d_V[elem * BASIS_Q + t_id] = q_weight[t_id]; in Weight()
|
| H A D | hip-shared-basis-tensor.h | 38 …for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * bl… in __launch_bounds__() local 40 …ReadElementStrided1d<BASIS_NUM_COMP, BASIS_P_1D>(data, elem, 1, BASIS_P_1D * num_elem, BASIS_P_1D,… in __launch_bounds__() 42 …WriteElementStrided1d<BASIS_NUM_COMP, BASIS_Q_1D>(data, elem, 1, BASIS_Q_1D * num_elem, BASIS_Q_1D… in __launch_bounds__() 44 …ReadElementStrided2d<BASIS_NUM_COMP, BASIS_P_1D>(data, elem, 1, BASIS_P_1D * BASIS_P_1D * num_elem… in __launch_bounds__() 46 …WriteElementStrided2d<BASIS_NUM_COMP, BASIS_Q_1D>(data, elem, 1, BASIS_Q_1D * BASIS_Q_1D * num_ele… in __launch_bounds__() 48 …ReadElementStrided3d<BASIS_NUM_COMP, BASIS_P_1D>(data, elem, 1, BASIS_P_1D * BASIS_P_1D * BASIS_P_… in __launch_bounds__() 51 …WriteElementStrided3d<BASIS_NUM_COMP, BASIS_Q_1D>(data, elem, 1, BASIS_Q_1D * BASIS_Q_1D * BASIS_Q… in __launch_bounds__() 71 …for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * bl… in __launch_bounds__() local 73 …ReadElementStrided1d<BASIS_NUM_COMP, BASIS_P_1D>(data, elem, 1, BASIS_P_1D * num_elem, BASIS_P_1D,… in __launch_bounds__() 74 …WriteElementStrided1d<BASIS_NUM_COMP, BASIS_Q_1D>(data, elem, 1, BASIS_Q_1D * num_elem, BASIS_Q_1D… in __launch_bounds__() [all …]
|
| H A D | hip-shared-basis-nontensor.h | 38 …for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * bl… in __launch_bounds__() local 39 …ReadElementStrided1d<BASIS_NUM_COMP, BASIS_P>(data, elem, 1, BASIS_P * num_elem, BASIS_P, d_U, r_U… in __launch_bounds__() 41 …WriteElementStrided1d<BASIS_NUM_COMP, BASIS_Q>(data, elem, 1, BASIS_Q * num_elem, BASIS_Q, r_V, d_… in __launch_bounds__() 65 …for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * bl… in __launch_bounds__() local 66 …ReadElementStrided1d<BASIS_NUM_COMP, BASIS_Q>(data, elem, 1, BASIS_Q * num_elem, BASIS_Q, d_U, r_U… in __launch_bounds__() 68 …WriteElementStrided1d<BASIS_NUM_COMP, BASIS_P>(data, elem, 1, BASIS_P * num_elem, BASIS_P, r_V, d_… in __launch_bounds__() 92 …for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * bl… in __launch_bounds__() local 93 …ReadElementStrided1d<BASIS_NUM_COMP, BASIS_Q>(data, elem, 1, BASIS_Q * num_elem, BASIS_Q, d_U, r_U… in __launch_bounds__() 95 …SumElementStrided1d<BASIS_NUM_COMP, BASIS_P>(data, elem, 1, BASIS_P * num_elem, BASIS_P, r_V, d_V); in __launch_bounds__() 122 …for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * bl… in __launch_bounds__() local [all …]
|
| H A D | hip-shared-basis-tensor-at-points.h | 46 …for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * bl… in __launch_bounds__() local 49 …ReadElementStrided1d<BASIS_NUM_COMP, BASIS_P_1D>(data, elem, 1, BASIS_P_1D * num_elem, BASIS_P_1D,… in __launch_bounds__() 52 …ReadElementStrided2d<BASIS_NUM_COMP, BASIS_P_1D>(data, elem, 1, BASIS_P_1D * BASIS_P_1D * num_elem… in __launch_bounds__() 55 …ReadElementStrided3d<BASIS_NUM_COMP, BASIS_P_1D>(data, elem, 1, BASIS_P_1D * BASIS_P_1D * BASIS_P_… in __launch_bounds__() 66 …ReadPoint<BASIS_DIM, BASIS_NUM_PTS>(data, elem, p, BASIS_NUM_PTS, 1, num_elem * BASIS_NUM_PTS, BAS… in __launch_bounds__() 74 …WritePoint<BASIS_NUM_COMP, BASIS_NUM_PTS>(data, elem, p, BASIS_NUM_PTS, 1, num_elem * BASIS_NUM_PT… in __launch_bounds__() 102 …for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * bl… in __launch_bounds__() local 109 …WriteElementStrided1d<BASIS_NUM_COMP, BASIS_P_1D>(data, elem, 1, BASIS_P_1D * num_elem, BASIS_P_1D… in __launch_bounds__() 111 …WriteElementStrided2d<BASIS_NUM_COMP, BASIS_P_1D>(data, elem, 1, BASIS_P_1D * BASIS_P_1D * num_ele… in __launch_bounds__() 113 …WriteElementStrided3d<BASIS_NUM_COMP, BASIS_P_1D>(data, elem, 1, BASIS_P_1D * BASIS_P_1D * BASIS_P… in __launch_bounds__() [all …]
|
| H A D | hip-shared-basis-read-write-templates.h | 28 inline __device__ void ReadElementStrided1d(SharedData_Hip &data, const CeedInt elem, const CeedInt… in ReadElementStrided1d() argument 32 const CeedInt ind = node * strides_node + elem * strides_elem; in ReadElementStrided1d() 44 inline __device__ void WriteElementStrided1d(SharedData_Hip &data, const CeedInt elem, const CeedIn… in WriteElementStrided1d() argument 48 const CeedInt ind = node * strides_node + elem * strides_elem; in WriteElementStrided1d() 57 inline __device__ void SumElementStrided1d(SharedData_Hip &data, const CeedInt elem, const CeedInt … in SumElementStrided1d() argument 61 const CeedInt ind = node * strides_node + elem * strides_elem; in SumElementStrided1d() 77 inline __device__ void ReadElementStrided2d(SharedData_Hip &data, const CeedInt elem, const CeedInt… in ReadElementStrided2d() argument 81 const CeedInt ind = node * strides_node + elem * strides_elem; in ReadElementStrided2d() 93 inline __device__ void WriteElementStrided2d(SharedData_Hip &data, const CeedInt elem, const CeedIn… in WriteElementStrided2d() argument 97 const CeedInt ind = node * strides_node + elem * strides_elem; in WriteElementStrided2d() [all …]
|
| H A D | hip-ref-basis-tensor.h | 42 for (CeedInt elem = blockIdx.x; elem < num_elem; elem += gridDim.x) { in Interp() local 44 const CeedScalar *cur_u = &u[elem * u_stride + comp * u_comp_stride]; in Interp() 45 CeedScalar *cur_v = &v[elem * v_stride + comp * v_comp_stride]; in Interp() 103 for (CeedInt elem = blockIdx.x; elem < num_elem; elem += gridDim.x) { in Grad() local 109 const CeedScalar *cur_u = &u[elem * u_stride + dim_1 * u_dim_stride + comp * u_comp_stride]; in Grad() 110 CeedScalar *cur_v = &v[elem * v_stride + dim_1 * v_dim_stride + comp * v_comp_stride]; in Grad() 146 const size_t elem = blockIdx.x; in Weight1d() local 148 if (elem < num_elem) w[elem * BASIS_Q_1D + i] = q_weight_1d[i]; in Weight1d() 160 const size_t elem = blockIdx.x; in Weight2d() local 162 if (elem < num_elem) { in Weight2d() [all …]
|
| H A D | hip-gen-templates.h | 28 inline __device__ void ReadPoint(SharedData_Hip &data, const CeedInt elem, const CeedInt p, const C… in ReadPoint() argument 30 const CeedInt ind = indices[p + elem * NUM_PTS]; in ReadPoint() 41 inline __device__ void WritePoint(SharedData_Hip &data, const CeedInt elem, const CeedInt p, const … in WritePoint() argument 44 const CeedInt ind = indices[p + elem * NUM_PTS]; in WritePoint() 73 …ecStandard1d(SharedData_Hip &data, const CeedInt num_nodes, const CeedInt elem, const CeedInt *__r… in ReadLVecStandard1d() argument 77 const CeedInt ind = indices[node + elem * P_1D]; in ReadLVecStandard1d() 87 inline __device__ void ReadLVecStrided1d(SharedData_Hip &data, const CeedInt elem, const CeedScalar… in ReadLVecStrided1d() argument 90 const CeedInt ind = node * STRIDES_NODE + elem * STRIDES_ELEM; in ReadLVecStrided1d() 100 …ecStandard1d(SharedData_Hip &data, const CeedInt num_nodes, const CeedInt elem, const CeedInt *__r… in WriteLVecStandard1d() argument 104 const CeedInt ind = indices[node + elem * P_1D]; in WriteLVecStandard1d() [all …]
|
| H A D | hip-ref-restriction-curl-oriented.h | 19 const CeedInt elem = node / RSTR_ELEM_SIZE; in CurlOrientedNoTranspose() local 32 v[loc_node + comp * RSTR_ELEM_SIZE * RSTR_NUM_ELEM + elem * RSTR_ELEM_SIZE] = value; in CurlOrientedNoTranspose() 44 const CeedInt elem = node / RSTR_ELEM_SIZE; in CurlOrientedUnsignedNoTranspose() local 57 v[loc_node + comp * RSTR_ELEM_SIZE * RSTR_NUM_ELEM + elem * RSTR_ELEM_SIZE] = value; in CurlOrientedUnsignedNoTranspose() 71 const CeedInt elem = node / RSTR_ELEM_SIZE; in CurlOrientedTranspose() local 78 …value += loc_node > 0 ? u[loc_node - 1 + comp * RSTR_ELEM_SIZE * RSTR_NUM_ELEM + elem * RSTR_ELEM_… in CurlOrientedTranspose() 79 …value += u[loc_node + comp * RSTR_ELEM_SIZE * RSTR_NUM_ELEM + elem * RSTR_ELEM_SIZE] * curl_orient… in CurlOrientedTranspose() 81 …ELEM_SIZE - 1) ? u[loc_node + 1 + comp * RSTR_ELEM_SIZE * RSTR_NUM_ELEM + elem * RSTR_ELEM_SIZE] *… in CurlOrientedTranspose() 102 const CeedInt elem = t_ind / RSTR_ELEM_SIZE; in CurlOrientedTranspose() local 108 …value[comp] += loc_node > 0 ? u[loc_node - 1 + comp * RSTR_ELEM_SIZE * RSTR_NUM_ELEM + elem * RSTR… in CurlOrientedTranspose() [all …]
|
| H A D | hip-ref-restriction-strided.h | 18 const CeedInt elem = node / RSTR_ELEM_SIZE; in StridedNoTranspose() local 21 v[loc_node + comp * RSTR_ELEM_SIZE * RSTR_NUM_ELEM + elem * RSTR_ELEM_SIZE] = in StridedNoTranspose() 22 u[loc_node * RSTR_STRIDE_NODES + comp * RSTR_STRIDE_COMP + elem * RSTR_STRIDE_ELEM]; in StridedNoTranspose() 33 const CeedInt elem = node / RSTR_ELEM_SIZE; in StridedTranspose() local 36 v[loc_node * RSTR_STRIDE_NODES + comp * RSTR_STRIDE_COMP + elem * RSTR_STRIDE_ELEM] += in StridedTranspose() 37 u[loc_node + comp * RSTR_ELEM_SIZE * RSTR_NUM_ELEM + elem * RSTR_ELEM_SIZE]; in StridedTranspose()
|
| H A D | hip-ref-basis-tensor-at-points.h | 67 for (CeedInt elem = blockIdx.x; elem < num_elem; elem += gridDim.x) { in InterpAtPoints() local 69 const CeedScalar *cur_u = &u[elem * u_stride + comp * u_comp_stride]; in InterpAtPoints() 70 CeedScalar *cur_v = &v[elem * v_stride + comp * v_comp_stride]; in InterpAtPoints() 108 …ChebyshevPolynomialsAtPoint<BASIS_Q_1D>(coords[elem * v_stride + d * v_comp_stride + p], chebyshev… in InterpAtPoints() 150 for (CeedInt elem = blockIdx.x; elem < num_elem; elem += gridDim.x) { in InterpTransposeAtPoints() local 152 const CeedScalar *cur_u = &u[elem * u_stride + comp * u_comp_stride]; in InterpTransposeAtPoints() 153 CeedScalar *cur_v = &v[elem * v_stride + comp * v_comp_stride]; in InterpTransposeAtPoints() 165 if (p >= points_per_elem[elem]) continue; in InterpTransposeAtPoints() 175 …ChebyshevPolynomialsAtPoint<BASIS_Q_1D>(coords[elem * u_stride + d * u_comp_stride + p], chebyshev… in InterpTransposeAtPoints() 248 for (CeedInt elem = blockIdx.x; elem < num_elem; elem += gridDim.x) { in GradAtPoints() local [all …]
|
| H A D | hip-ref-restriction-at-points.h | 21 const CeedInt elem = node / RSTR_ELEM_SIZE; in AtPointsTranspose() local 23 if (loc_node >= points_per_elem[elem]) continue; in AtPointsTranspose() 25 … * RSTR_COMP_STRIDE], u[loc_node + comp * RSTR_ELEM_SIZE * RSTR_NUM_ELEM + elem * RSTR_ELEM_SIZE]); in AtPointsTranspose() 45 const CeedInt elem = t_ind / RSTR_ELEM_SIZE; in AtPointsTranspose() local 47 if (loc_node >= points_per_elem[elem]) continue; in AtPointsTranspose() 49 value[comp] += u[loc_node + comp * RSTR_ELEM_SIZE * RSTR_NUM_ELEM + elem * RSTR_ELEM_SIZE]; in AtPointsTranspose()
|
| H A D | hip-ref-basis-nontensor-templates.h | 16 inline __device__ void Contract(const CeedInt elem, const CeedInt strides_elem_U, const CeedInt str… in Contract() argument 26 U = &d_U[elem * strides_elem_U + comp * strides_comp_U]; in Contract() 34 d_V[elem * strides_elem_V + comp * strides_comp_V + d * strides_q_comp_V + t_id] = r_V[d]; in Contract() 43 inline __device__ void ContractTranspose(const CeedInt elem, const CeedInt strides_elem_U, const Ce… in ContractTranspose() argument 55 U = &d_U[elem * strides_elem_U + comp * strides_comp_U + d * strides_q_comp_U]; in ContractTranspose() 58 d_V[elem * strides_elem_V + comp * strides_comp_V + t_id] += r_V; in ContractTranspose()
|
| /libCEED/include/ceed/jit-source/cuda/ |
| H A D | cuda-ref-basis-nontensor.h | 23 …for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * bl… in Interp() local 24 …Contract<BASIS_NUM_COMP, BASIS_Q_COMP_INTERP, BASIS_P, BASIS_Q>(elem, BASIS_P, BASIS_Q, BASIS_P * … in Interp() 31 …for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * bl… in InterpTranspose() local 32 …ContractTranspose<BASIS_NUM_COMP, BASIS_Q_COMP_INTERP, BASIS_P, BASIS_Q>(elem, BASIS_Q, BASIS_P, B… in InterpTranspose() 42 …for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * bl… in Deriv() local 43 …Contract<BASIS_NUM_COMP, BASIS_Q_COMP_DERIV, BASIS_P, BASIS_Q>(elem, BASIS_P, BASIS_Q, BASIS_P * n… in Deriv() 50 …for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * bl… in DerivTranspose() local 51 …ContractTranspose<BASIS_NUM_COMP, BASIS_Q_COMP_DERIV, BASIS_P, BASIS_Q>(elem, BASIS_Q, BASIS_P, BA… in DerivTranspose() 63 …for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * bl… in Weight() local 64 d_V[elem * BASIS_Q + t_id] = q_weight[t_id]; in Weight()
|
| H A D | cuda-shared-basis-tensor.h | 37 …for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * bl… in Interp() local 39 …ReadElementStrided1d<BASIS_NUM_COMP, BASIS_P_1D>(data, elem, 1, BASIS_P_1D * num_elem, BASIS_P_1D,… in Interp() 41 …WriteElementStrided1d<BASIS_NUM_COMP, BASIS_Q_1D>(data, elem, 1, BASIS_Q_1D * num_elem, BASIS_Q_1D… in Interp() 43 …ReadElementStrided2d<BASIS_NUM_COMP, BASIS_P_1D>(data, elem, 1, BASIS_P_1D * BASIS_P_1D * num_elem… in Interp() 45 …WriteElementStrided2d<BASIS_NUM_COMP, BASIS_Q_1D>(data, elem, 1, BASIS_Q_1D * BASIS_Q_1D * num_ele… in Interp() 47 …ReadElementStrided3d<BASIS_NUM_COMP, BASIS_P_1D>(data, elem, 1, BASIS_P_1D * BASIS_P_1D * BASIS_P_… in Interp() 50 …WriteElementStrided3d<BASIS_NUM_COMP, BASIS_Q_1D>(data, elem, 1, BASIS_Q_1D * BASIS_Q_1D * BASIS_Q… in Interp() 70 …for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * bl… in InterpCollocated() local 72 …ReadElementStrided1d<BASIS_NUM_COMP, BASIS_P_1D>(data, elem, 1, BASIS_P_1D * num_elem, BASIS_P_1D,… in InterpCollocated() 73 …WriteElementStrided1d<BASIS_NUM_COMP, BASIS_Q_1D>(data, elem, 1, BASIS_Q_1D * num_elem, BASIS_Q_1D… in InterpCollocated() [all …]
|
| H A D | cuda-shared-basis-nontensor.h | 37 …for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * bl… in Interp() local 38 …ReadElementStrided1d<BASIS_NUM_COMP, BASIS_P>(data, elem, 1, BASIS_P * num_elem, BASIS_P, d_U, r_U… in Interp() 40 …WriteElementStrided1d<BASIS_NUM_COMP, BASIS_Q>(data, elem, 1, BASIS_Q * num_elem, BASIS_Q, r_V, d_… in Interp() 64 …for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * bl… in InterpTranspose() local 65 …ReadElementStrided1d<BASIS_NUM_COMP, BASIS_Q>(data, elem, 1, BASIS_Q * num_elem, BASIS_Q, d_U, r_U… in InterpTranspose() 67 …WriteElementStrided1d<BASIS_NUM_COMP, BASIS_P>(data, elem, 1, BASIS_P * num_elem, BASIS_P, r_V, d_… in InterpTranspose() 91 …for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * bl… in InterpTransposeAdd() local 92 …ReadElementStrided1d<BASIS_NUM_COMP, BASIS_Q>(data, elem, 1, BASIS_Q * num_elem, BASIS_Q, d_U, r_U… in InterpTransposeAdd() 94 …SumElementStrided1d<BASIS_NUM_COMP, BASIS_P>(data, elem, 1, BASIS_P * num_elem, BASIS_P, r_V, d_V); in InterpTransposeAdd() 120 …for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * bl… in Grad() local [all …]
|
| H A D | cuda-shared-basis-tensor-at-points.h | 45 …for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * bl… in InterpAtPoints() local 48 …ReadElementStrided1d<BASIS_NUM_COMP, BASIS_P_1D>(data, elem, 1, BASIS_P_1D * num_elem, BASIS_P_1D,… in InterpAtPoints() 51 …ReadElementStrided2d<BASIS_NUM_COMP, BASIS_P_1D>(data, elem, 1, BASIS_P_1D * BASIS_P_1D * num_elem… in InterpAtPoints() 54 …ReadElementStrided3d<BASIS_NUM_COMP, BASIS_P_1D>(data, elem, 1, BASIS_P_1D * BASIS_P_1D * BASIS_P_… in InterpAtPoints() 65 …ReadPoint<BASIS_DIM, BASIS_NUM_PTS>(data, elem, p, BASIS_NUM_PTS, 1, num_elem * BASIS_NUM_PTS, BAS… in InterpAtPoints() 73 …WritePoint<BASIS_NUM_COMP, BASIS_NUM_PTS>(data, elem, p, BASIS_NUM_PTS, 1, num_elem * BASIS_NUM_PT… in InterpAtPoints() 101 …for (CeedInt elem = blockIdx.x * blockDim.z + threadIdx.z; elem < num_elem; elem += gridDim.x * bl… in InterpTransposeAtPoints() local 108 …WriteElementStrided1d<BASIS_NUM_COMP, BASIS_P_1D>(data, elem, 1, BASIS_P_1D * num_elem, BASIS_P_1D… in InterpTransposeAtPoints() 110 …WriteElementStrided2d<BASIS_NUM_COMP, BASIS_P_1D>(data, elem, 1, BASIS_P_1D * BASIS_P_1D * num_ele… in InterpTransposeAtPoints() 112 …WriteElementStrided3d<BASIS_NUM_COMP, BASIS_P_1D>(data, elem, 1, BASIS_P_1D * BASIS_P_1D * BASIS_P… in InterpTransposeAtPoints() [all …]
|
| H A D | cuda-shared-basis-read-write-templates.h | 28 inline __device__ void ReadElementStrided1d(SharedData_Cuda &data, const CeedInt elem, const CeedIn… in ReadElementStrided1d() argument 32 const CeedInt ind = node * strides_node + elem * strides_elem; in ReadElementStrided1d() 44 inline __device__ void WriteElementStrided1d(SharedData_Cuda &data, const CeedInt elem, const CeedI… in WriteElementStrided1d() argument 48 const CeedInt ind = node * strides_node + elem * strides_elem; in WriteElementStrided1d() 57 inline __device__ void SumElementStrided1d(SharedData_Cuda &data, const CeedInt elem, const CeedInt… in SumElementStrided1d() argument 61 const CeedInt ind = node * strides_node + elem * strides_elem; in SumElementStrided1d() 77 inline __device__ void ReadElementStrided2d(SharedData_Cuda &data, const CeedInt elem, const CeedIn… in ReadElementStrided2d() argument 81 const CeedInt ind = node * strides_node + elem * strides_elem; in ReadElementStrided2d() 93 inline __device__ void WriteElementStrided2d(SharedData_Cuda &data, const CeedInt elem, const CeedI… in WriteElementStrided2d() argument 97 const CeedInt ind = node * strides_node + elem * strides_elem; in WriteElementStrided2d() [all …]
|
| H A D | cuda-ref-basis-tensor.h | 42 for (CeedInt elem = blockIdx.x; elem < num_elem; elem += gridDim.x) { in Interp() local 44 const CeedScalar *cur_u = &u[elem * u_stride + comp * u_comp_stride]; in Interp() 45 CeedScalar *cur_v = &v[elem * v_stride + comp * v_comp_stride]; in Interp() 103 for (CeedInt elem = blockIdx.x; elem < num_elem; elem += gridDim.x) { in Grad() local 109 const CeedScalar *cur_u = &u[elem * u_stride + dim_1 * u_dim_stride + comp * u_comp_stride]; in Grad() 110 CeedScalar *cur_v = &v[elem * v_stride + dim_1 * v_dim_stride + comp * v_comp_stride]; in Grad() 146 const size_t elem = blockIdx.x; in Weight1d() local 148 if (elem < num_elem) w[elem * BASIS_Q_1D + i] = q_weight_1d[i]; in Weight1d() 160 const size_t elem = blockIdx.x; in Weight2d() local 162 if (elem < num_elem) { in Weight2d() [all …]
|
| H A D | cuda-gen-templates.h | 28 inline __device__ void ReadPoint(SharedData_Cuda &data, const CeedInt elem, const CeedInt p, const … in ReadPoint() argument 30 const CeedInt ind = indices[p + elem * NUM_PTS]; in ReadPoint() 41 inline __device__ void WritePoint(SharedData_Cuda &data, const CeedInt elem, const CeedInt p, const… in WritePoint() argument 44 const CeedInt ind = indices[p + elem * NUM_PTS]; in WritePoint() 73 …cStandard1d(SharedData_Cuda &data, const CeedInt num_nodes, const CeedInt elem, const CeedInt *__r… in ReadLVecStandard1d() argument 77 const CeedInt ind = indices[node + elem * P_1D]; in ReadLVecStandard1d() 87 inline __device__ void ReadLVecStrided1d(SharedData_Cuda &data, const CeedInt elem, const CeedScala… in ReadLVecStrided1d() argument 91 const CeedInt ind = node * STRIDES_NODE + elem * STRIDES_ELEM; in ReadLVecStrided1d() 101 …cStandard1d(SharedData_Cuda &data, const CeedInt num_nodes, const CeedInt elem, const CeedInt *__r… in WriteLVecStandard1d() argument 105 const CeedInt ind = indices[node + elem * P_1D]; in WriteLVecStandard1d() [all …]
|
| H A D | cuda-ref-restriction-curl-oriented.h | 19 const CeedInt elem = node / RSTR_ELEM_SIZE; in CurlOrientedNoTranspose() local 32 v[loc_node + comp * RSTR_ELEM_SIZE * RSTR_NUM_ELEM + elem * RSTR_ELEM_SIZE] = value; in CurlOrientedNoTranspose() 44 const CeedInt elem = node / RSTR_ELEM_SIZE; in CurlOrientedUnsignedNoTranspose() local 57 v[loc_node + comp * RSTR_ELEM_SIZE * RSTR_NUM_ELEM + elem * RSTR_ELEM_SIZE] = value; in CurlOrientedUnsignedNoTranspose() 71 const CeedInt elem = node / RSTR_ELEM_SIZE; in CurlOrientedTranspose() local 78 …value += loc_node > 0 ? u[loc_node - 1 + comp * RSTR_ELEM_SIZE * RSTR_NUM_ELEM + elem * RSTR_ELEM_… in CurlOrientedTranspose() 79 …value += u[loc_node + comp * RSTR_ELEM_SIZE * RSTR_NUM_ELEM + elem * RSTR_ELEM_SIZE] * curl_orient… in CurlOrientedTranspose() 81 …ELEM_SIZE - 1) ? u[loc_node + 1 + comp * RSTR_ELEM_SIZE * RSTR_NUM_ELEM + elem * RSTR_ELEM_SIZE] *… in CurlOrientedTranspose() 102 const CeedInt elem = t_ind / RSTR_ELEM_SIZE; in CurlOrientedTranspose() local 108 …value[comp] += loc_node > 0 ? u[loc_node - 1 + comp * RSTR_ELEM_SIZE * RSTR_NUM_ELEM + elem * RSTR… in CurlOrientedTranspose() [all …]
|
| H A D | cuda-ref-restriction-strided.h | 18 const CeedInt elem = node / RSTR_ELEM_SIZE; in StridedNoTranspose() local 21 v[loc_node + comp * RSTR_ELEM_SIZE * RSTR_NUM_ELEM + elem * RSTR_ELEM_SIZE] = in StridedNoTranspose() 22 u[loc_node * RSTR_STRIDE_NODES + comp * RSTR_STRIDE_COMP + elem * RSTR_STRIDE_ELEM]; in StridedNoTranspose() 33 const CeedInt elem = node / RSTR_ELEM_SIZE; in StridedTranspose() local 36 v[loc_node * RSTR_STRIDE_NODES + comp * RSTR_STRIDE_COMP + elem * RSTR_STRIDE_ELEM] += in StridedTranspose() 37 u[loc_node + comp * RSTR_ELEM_SIZE * RSTR_NUM_ELEM + elem * RSTR_ELEM_SIZE]; in StridedTranspose()
|
| H A D | cuda-ref-basis-tensor-at-points.h | 67 for (CeedInt elem = blockIdx.x; elem < num_elem; elem += gridDim.x) { in InterpAtPoints() local 69 const CeedScalar *cur_u = &u[elem * u_stride + comp * u_comp_stride]; in InterpAtPoints() 70 CeedScalar *cur_v = &v[elem * v_stride + comp * v_comp_stride]; in InterpAtPoints() 108 …ChebyshevPolynomialsAtPoint<BASIS_Q_1D>(coords[elem * v_stride + d * v_comp_stride + p], chebyshev… in InterpAtPoints() 150 for (CeedInt elem = blockIdx.x; elem < num_elem; elem += gridDim.x) { in InterpTransposeAtPoints() local 152 const CeedScalar *cur_u = &u[elem * u_stride + comp * u_comp_stride]; in InterpTransposeAtPoints() 153 CeedScalar *cur_v = &v[elem * v_stride + comp * v_comp_stride]; in InterpTransposeAtPoints() 165 if (p >= points_per_elem[elem]) continue; in InterpTransposeAtPoints() 175 …ChebyshevPolynomialsAtPoint<BASIS_Q_1D>(coords[elem * u_stride + d * u_comp_stride + p], chebyshev… in InterpTransposeAtPoints() 248 for (CeedInt elem = blockIdx.x; elem < num_elem; elem += gridDim.x) { in GradAtPoints() local [all …]
|
| H A D | cuda-ref-restriction-at-points.h | 21 const CeedInt elem = node / RSTR_ELEM_SIZE; in AtPointsTranspose() local 23 if (loc_node >= points_per_elem[elem]) continue; in AtPointsTranspose() 25 … * RSTR_COMP_STRIDE], u[loc_node + comp * RSTR_ELEM_SIZE * RSTR_NUM_ELEM + elem * RSTR_ELEM_SIZE]); in AtPointsTranspose() 45 const CeedInt elem = t_ind / RSTR_ELEM_SIZE; in AtPointsTranspose() local 47 if (loc_node >= points_per_elem[elem]) continue; in AtPointsTranspose() 49 value[comp] += u[loc_node + comp * RSTR_ELEM_SIZE * RSTR_NUM_ELEM + elem * RSTR_ELEM_SIZE]; in AtPointsTranspose()
|
| /libCEED/include/ceed/jit-source/sycl/ |
| H A D | sycl-shared-basis-read-write-templates.h | 32 const CeedInt elem = get_global_id(2); in ReadElementStrided1d() local 34 if (item_id_x < P_1D && elem < num_elem) { in ReadElementStrided1d() 36 const CeedInt ind = node * strides_node + elem * strides_elem; in ReadElementStrided1d() 50 const CeedInt elem = get_global_id(2); in WriteElementStrided1d() local 52 if (item_id_x < P_1D && elem < num_elem) { in WriteElementStrided1d() 54 const CeedInt ind = node * strides_node + elem * strides_elem; in WriteElementStrided1d() 73 const CeedInt elem = get_global_id(2); in ReadElementStrided2d() local 75 if (item_id_x < P_1D && item_id_y < P_1D && elem < num_elem) { in ReadElementStrided2d() 77 const CeedInt ind = node * strides_node + elem * strides_elem; in ReadElementStrided2d() 92 const CeedInt elem = get_global_id(2); in WriteElementStrided2d() local [all …]
|
| H A D | sycl-gen-templates.h | 36 const CeedInt elem = get_global_id(2); in readDofsOffset1d() local 38 if (item_id_x < P_1D && elem < num_elem) { in readDofsOffset1d() 40 const CeedInt ind = indices[node + elem * P_1D]; in readDofsOffset1d() 54 const CeedInt elem = get_global_id(2); in readDofsStrided1d() local 56 if (item_id_x < P_1D && elem < num_elem) { in readDofsStrided1d() 58 const CeedInt ind = node * strides_node + elem * strides_elem; in readDofsStrided1d() 71 const CeedInt elem = get_global_id(2); in writeDofsOffset1d() local 73 if (item_id_x < P_1D && elem < num_elem) { in writeDofsOffset1d() 75 const CeedInt ind = indices[node + elem * P_1D]; in writeDofsOffset1d() 88 const CeedInt elem = get_global_id(2); in writeDofsStrided1d() local [all …]
|