| /libCEED/include/ceed/jit-source/sycl/ |
| H A D | sycl-shared-basis-tensor.h | 93 local CeedScalar s_G[BASIS_Q_1D * (BASIS_HAS_COLLOCATED_GRAD ? BASIS_Q_1D : BASIS_P_1D)]; in Grad() local 104 loadMatrix(BASIS_Q_1D * (BASIS_HAS_COLLOCATED_GRAD ? BASIS_Q_1D : BASIS_P_1D), d_grad_1d, s_G); in Grad() 109 Grad1d(BASIS_NUM_COMP, BASIS_P_1D, BASIS_Q_1D, r_U, s_G, r_V, elem_scratch); in Grad() 114 GradTensor2d(BASIS_NUM_COMP, BASIS_P_1D, BASIS_Q_1D, r_U, s_B, s_G, r_V, elem_scratch); in Grad() 119 …) GradTensorCollocated3d(BASIS_NUM_COMP, BASIS_P_1D, BASIS_Q_1D, r_U, s_B, s_G, r_V, elem_scratch); in Grad() 120 else GradTensor3d(BASIS_NUM_COMP, BASIS_P_1D, BASIS_Q_1D, r_U, s_B, s_G, r_V, elem_scratch); in Grad() 128 local CeedScalar s_G[BASIS_Q_1D * (BASIS_HAS_COLLOCATED_GRAD ? BASIS_Q_1D : BASIS_P_1D)]; in GradTranspose() local 139 loadMatrix(BASIS_Q_1D * (BASIS_HAS_COLLOCATED_GRAD ? BASIS_Q_1D : BASIS_P_1D), d_grad_1d, s_G); in GradTranspose() 144 GradTranspose1d(BASIS_NUM_COMP, BASIS_P_1D, BASIS_Q_1D, r_U, s_G, r_V, elem_scratch); in GradTranspose() 149 GradTransposeTensor2d(BASIS_NUM_COMP, BASIS_P_1D, BASIS_Q_1D, r_U, s_B, s_G, r_V, elem_scratch); in GradTranspose() [all …]
|
| H A D | sycl-shared-basis-tensor-templates.h | 78 …local const CeedScalar *restrict s_G, private CeedScalar *restrict r_V, local CeedScalar *restrict… in Grad1d() argument 80 ContractX1d(P_1D, Q_1D, r_U + comp, s_G, r_V + comp, scratch); in Grad1d() 88 …local const CeedScalar *restrict s_G, private CeedScalar *restrict r_V, local CeedScalar *restrict… in GradTranspose1d() argument 90 ContractTransposeX1d(P_1D, Q_1D, r_U + comp, s_G, r_V + comp, scratch); in GradTranspose1d() 235 …local const CeedScalar *restrict s_B, local const CeedScalar *restrict s_G, private CeedScalar *re… in GradTensor2d() argument 240 ContractX2d(P_1D, Q_1D, r_U + comp, s_G, r_t, scratch); in GradTensor2d() 243 ContractY2d(P_1D, Q_1D, r_t, s_G, r_V + comp + 1 * NUM_COMP, scratch); in GradTensor2d() 251 …local const CeedScalar *restrict s_B, local const CeedScalar *restrict s_G, private CeedScalar *re… in GradTransposeTensor2d() argument 257 ContractTransposeX2d(P_1D, Q_1D, r_t, s_G, r_V + comp, scratch); in GradTransposeTensor2d() 258 ContractTransposeY2d(P_1D, Q_1D, r_U + comp + 1 * NUM_COMP, s_G, r_t, scratch); in GradTransposeTensor2d() [all …]
|
| H A D | sycl-gen-templates.h | 286 …const local CeedScalar *s_G, private CeedScalar *restrict r_V, local CeedScalar *restrict scratch)… in gradCollo3d() argument 300 …r_V[comp + 0 * num_comp] += s_G[i + item_id_x * Q_1D] * scratch[i + item_id_y * T_1D]; // Contrac… in gradCollo3d() 305 …r_V[comp + 1 * num_comp] += s_G[i + item_id_y * Q_1D] * scratch[item_id_x + i * T_1D]; // Contrac… in gradCollo3d() 309 …for (CeedInt i = 0; i < Q_1D; ++i) r_V[comp + 2 * num_comp] += s_G[i + q * Q_1D] * r_U[i + comp * … in gradCollo3d() 320 …const local CeedScalar *restrict s_G, private CeedScalar *restrict r_V, local CeedScalar *restrict… in gradColloTranspose3d() argument 333 …r_V[q + comp * Q_1D] += s_G[item_id_x + i * Q_1D] * scratch[i + item_id_y * T_1D]; // Contract x … in gradColloTranspose3d() 345 …r_V[q + comp * Q_1D] += s_G[item_id_y + i * Q_1D] * scratch[item_id_x + i * T_1D]; // Contract y … in gradColloTranspose3d() 352 …r_V[i + comp * Q_1D] += s_G[i + q * Q_1D] * r_U[comp + 2 * num_comp]; // PARTIAL contract z direc… in gradColloTranspose3d()
|
| /libCEED/include/ceed/jit-source/cuda/ |
| H A D | cuda-shared-basis-tensor.h | 244 __shared__ CeedScalar s_G[BASIS_Q_1D * (BASIS_HAS_COLLOCATED_GRAD ? BASIS_Q_1D : BASIS_P_1D)]; in Grad() local 245 LoadMatrix<BASIS_Q_1D, BASIS_HAS_COLLOCATED_GRAD ? BASIS_Q_1D : BASIS_P_1D>(data, c_G, s_G); in Grad() 252 Grad1d<BASIS_NUM_COMP, BASIS_P_1D, BASIS_Q_1D, BASIS_T_1D>(data, r_U, s_B, s_G, r_V); in Grad() 256 GradTensor2d<BASIS_NUM_COMP, BASIS_P_1D, BASIS_Q_1D, BASIS_T_1D>(data, r_U, s_B, s_G, r_V); in Grad() 262 …adTensorCollocated3d<BASIS_NUM_COMP, BASIS_P_1D, BASIS_Q_1D, BASIS_T_1D>(data, r_U, s_B, s_G, r_V); in Grad() 263 … else GradTensor3d<BASIS_NUM_COMP, BASIS_P_1D, BASIS_Q_1D, BASIS_T_1D>(data, r_U, s_B, s_G, r_V); in Grad() 285 __shared__ CeedScalar s_G[BASIS_Q_1D * (BASIS_HAS_COLLOCATED_GRAD ? BASIS_Q_1D : BASIS_P_1D)]; in GradCollocated() local 286 LoadMatrix<BASIS_Q_1D, BASIS_HAS_COLLOCATED_GRAD ? BASIS_Q_1D : BASIS_P_1D>(data, c_G, s_G); in GradCollocated() 293 Grad1d<BASIS_NUM_COMP, BASIS_P_1D, BASIS_Q_1D, BASIS_T_1D>(data, r_U, NULL, s_G, r_V); in GradCollocated() 297 …orCollocatedNodes2d<BASIS_NUM_COMP, BASIS_P_1D, BASIS_Q_1D, BASIS_T_1D>(data, r_U, NULL, s_G, r_V); in GradCollocated() [all …]
|
| H A D | cuda-shared-basis-nontensor.h | 115 __shared__ CeedScalar s_G[BASIS_P * BASIS_Q * BASIS_DIM]; in Grad() local 116 LoadMatrix<BASIS_P, BASIS_Q * BASIS_DIM>(data, c_G, s_G); in Grad() 122 GradNonTensor<BASIS_NUM_COMP, BASIS_DIM, BASIS_P, BASIS_Q, BASIS_T_1D>(data, r_U, s_G, r_V); in Grad() 142 __shared__ CeedScalar s_G[BASIS_P * BASIS_Q * BASIS_DIM]; in GradTranspose() local 143 LoadMatrix<BASIS_P, BASIS_Q * BASIS_DIM>(data, c_G, s_G); in GradTranspose() 149 …GradTransposeNonTensor<BASIS_NUM_COMP, BASIS_DIM, BASIS_P, BASIS_Q, BASIS_T_1D>(data, r_U, s_G, r_… in GradTranspose() 169 __shared__ CeedScalar s_G[BASIS_P * BASIS_Q * BASIS_DIM]; in GradTransposeAdd() local 170 LoadMatrix<BASIS_P, BASIS_Q * BASIS_DIM>(data, c_G, s_G); in GradTransposeAdd() 176 …GradTransposeNonTensor<BASIS_NUM_COMP, BASIS_DIM, BASIS_P, BASIS_Q, BASIS_T_1D>(data, r_U, s_G, r_… in GradTransposeAdd()
|
| /libCEED/include/ceed/jit-source/hip/ |
| H A D | hip-shared-basis-tensor.h | 246 __shared__ CeedScalar s_G[BASIS_Q_1D * (BASIS_HAS_COLLOCATED_GRAD ? BASIS_Q_1D : BASIS_P_1D)]; in __launch_bounds__() local 247 LoadMatrix<BASIS_Q_1D, BASIS_HAS_COLLOCATED_GRAD ? BASIS_Q_1D : BASIS_P_1D>(data, c_G, s_G); in __launch_bounds__() 254 Grad1d<BASIS_NUM_COMP, BASIS_P_1D, BASIS_Q_1D, BASIS_T_1D>(data, r_U, s_B, s_G, r_V); in __launch_bounds__() 258 GradTensor2d<BASIS_NUM_COMP, BASIS_P_1D, BASIS_Q_1D, BASIS_T_1D>(data, r_U, s_B, s_G, r_V); in __launch_bounds__() 264 …adTensorCollocated3d<BASIS_NUM_COMP, BASIS_P_1D, BASIS_Q_1D, BASIS_T_1D>(data, r_U, s_B, s_G, r_V); in __launch_bounds__() 265 … else GradTensor3d<BASIS_NUM_COMP, BASIS_P_1D, BASIS_Q_1D, BASIS_T_1D>(data, r_U, s_B, s_G, r_V); in __launch_bounds__() 288 __shared__ CeedScalar s_G[BASIS_Q_1D * (BASIS_HAS_COLLOCATED_GRAD ? BASIS_Q_1D : BASIS_P_1D)]; in __launch_bounds__() local 289 LoadMatrix<BASIS_Q_1D, BASIS_HAS_COLLOCATED_GRAD ? BASIS_Q_1D : BASIS_P_1D>(data, c_G, s_G); in __launch_bounds__() 296 Grad1d<BASIS_NUM_COMP, BASIS_P_1D, BASIS_Q_1D, BASIS_T_1D>(data, r_U, NULL, s_G, r_V); in __launch_bounds__() 300 …orCollocatedNodes2d<BASIS_NUM_COMP, BASIS_P_1D, BASIS_Q_1D, BASIS_T_1D>(data, r_U, NULL, s_G, r_V); in __launch_bounds__() [all …]
|
| H A D | hip-shared-basis-nontensor.h | 117 __shared__ CeedScalar s_G[BASIS_P * BASIS_Q * BASIS_DIM]; in __launch_bounds__() local 118 LoadMatrix<BASIS_P, BASIS_Q * BASIS_DIM>(data, c_G, s_G); in __launch_bounds__() 124 GradNonTensor<BASIS_NUM_COMP, BASIS_DIM, BASIS_P, BASIS_Q, BASIS_T_1D>(data, r_U, s_G, r_V); in __launch_bounds__() 144 __shared__ CeedScalar s_G[BASIS_P * BASIS_Q * BASIS_DIM]; in __launch_bounds__() local 145 LoadMatrix<BASIS_P, BASIS_Q * BASIS_DIM>(data, c_G, s_G); in __launch_bounds__() 151 …GradTransposeNonTensor<BASIS_NUM_COMP, BASIS_DIM, BASIS_P, BASIS_Q, BASIS_T_1D>(data, r_U, s_G, r_… in __launch_bounds__() 171 __shared__ CeedScalar s_G[BASIS_P * BASIS_Q * BASIS_DIM]; in __launch_bounds__() local 172 LoadMatrix<BASIS_P, BASIS_Q * BASIS_DIM>(data, c_G, s_G); in __launch_bounds__() 178 …GradTransposeNonTensor<BASIS_NUM_COMP, BASIS_DIM, BASIS_P, BASIS_Q, BASIS_T_1D>(data, r_U, s_G, r_… in __launch_bounds__()
|