Home
last modified time | relevance | path

Searched refs:s_G (Results 1 – 7 of 7) sorted by relevance

/libCEED/include/ceed/jit-source/sycl/
H A Dsycl-shared-basis-tensor.h93 local CeedScalar s_G[BASIS_Q_1D * (BASIS_HAS_COLLOCATED_GRAD ? BASIS_Q_1D : BASIS_P_1D)]; in Grad() local
104 loadMatrix(BASIS_Q_1D * (BASIS_HAS_COLLOCATED_GRAD ? BASIS_Q_1D : BASIS_P_1D), d_grad_1d, s_G); in Grad()
109 Grad1d(BASIS_NUM_COMP, BASIS_P_1D, BASIS_Q_1D, r_U, s_G, r_V, elem_scratch); in Grad()
114 GradTensor2d(BASIS_NUM_COMP, BASIS_P_1D, BASIS_Q_1D, r_U, s_B, s_G, r_V, elem_scratch); in Grad()
119 …) GradTensorCollocated3d(BASIS_NUM_COMP, BASIS_P_1D, BASIS_Q_1D, r_U, s_B, s_G, r_V, elem_scratch); in Grad()
120 else GradTensor3d(BASIS_NUM_COMP, BASIS_P_1D, BASIS_Q_1D, r_U, s_B, s_G, r_V, elem_scratch); in Grad()
128 local CeedScalar s_G[BASIS_Q_1D * (BASIS_HAS_COLLOCATED_GRAD ? BASIS_Q_1D : BASIS_P_1D)]; in GradTranspose() local
139 loadMatrix(BASIS_Q_1D * (BASIS_HAS_COLLOCATED_GRAD ? BASIS_Q_1D : BASIS_P_1D), d_grad_1d, s_G); in GradTranspose()
144 GradTranspose1d(BASIS_NUM_COMP, BASIS_P_1D, BASIS_Q_1D, r_U, s_G, r_V, elem_scratch); in GradTranspose()
149 GradTransposeTensor2d(BASIS_NUM_COMP, BASIS_P_1D, BASIS_Q_1D, r_U, s_B, s_G, r_V, elem_scratch); in GradTranspose()
[all …]
H A Dsycl-shared-basis-tensor-templates.h78 …local const CeedScalar *restrict s_G, private CeedScalar *restrict r_V, local CeedScalar *restrict… in Grad1d() argument
80 ContractX1d(P_1D, Q_1D, r_U + comp, s_G, r_V + comp, scratch); in Grad1d()
88 …local const CeedScalar *restrict s_G, private CeedScalar *restrict r_V, local CeedScalar *restrict… in GradTranspose1d() argument
90 ContractTransposeX1d(P_1D, Q_1D, r_U + comp, s_G, r_V + comp, scratch); in GradTranspose1d()
235 …local const CeedScalar *restrict s_B, local const CeedScalar *restrict s_G, private CeedScalar *re… in GradTensor2d() argument
240 ContractX2d(P_1D, Q_1D, r_U + comp, s_G, r_t, scratch); in GradTensor2d()
243 ContractY2d(P_1D, Q_1D, r_t, s_G, r_V + comp + 1 * NUM_COMP, scratch); in GradTensor2d()
251 …local const CeedScalar *restrict s_B, local const CeedScalar *restrict s_G, private CeedScalar *re… in GradTransposeTensor2d() argument
257 ContractTransposeX2d(P_1D, Q_1D, r_t, s_G, r_V + comp, scratch); in GradTransposeTensor2d()
258 ContractTransposeY2d(P_1D, Q_1D, r_U + comp + 1 * NUM_COMP, s_G, r_t, scratch); in GradTransposeTensor2d()
[all …]
H A Dsycl-gen-templates.h286 …const local CeedScalar *s_G, private CeedScalar *restrict r_V, local CeedScalar *restrict scratch)… in gradCollo3d() argument
300 …r_V[comp + 0 * num_comp] += s_G[i + item_id_x * Q_1D] * scratch[i + item_id_y * T_1D]; // Contrac… in gradCollo3d()
305 …r_V[comp + 1 * num_comp] += s_G[i + item_id_y * Q_1D] * scratch[item_id_x + i * T_1D]; // Contrac… in gradCollo3d()
309 …for (CeedInt i = 0; i < Q_1D; ++i) r_V[comp + 2 * num_comp] += s_G[i + q * Q_1D] * r_U[i + comp * … in gradCollo3d()
320 …const local CeedScalar *restrict s_G, private CeedScalar *restrict r_V, local CeedScalar *restrict… in gradColloTranspose3d() argument
333 …r_V[q + comp * Q_1D] += s_G[item_id_x + i * Q_1D] * scratch[i + item_id_y * T_1D]; // Contract x … in gradColloTranspose3d()
345 …r_V[q + comp * Q_1D] += s_G[item_id_y + i * Q_1D] * scratch[item_id_x + i * T_1D]; // Contract y … in gradColloTranspose3d()
352 …r_V[i + comp * Q_1D] += s_G[i + q * Q_1D] * r_U[comp + 2 * num_comp]; // PARTIAL contract z direc… in gradColloTranspose3d()
/libCEED/include/ceed/jit-source/cuda/
H A Dcuda-shared-basis-tensor.h244 __shared__ CeedScalar s_G[BASIS_Q_1D * (BASIS_HAS_COLLOCATED_GRAD ? BASIS_Q_1D : BASIS_P_1D)]; in Grad() local
245 LoadMatrix<BASIS_Q_1D, BASIS_HAS_COLLOCATED_GRAD ? BASIS_Q_1D : BASIS_P_1D>(data, c_G, s_G); in Grad()
252 Grad1d<BASIS_NUM_COMP, BASIS_P_1D, BASIS_Q_1D, BASIS_T_1D>(data, r_U, s_B, s_G, r_V); in Grad()
256 GradTensor2d<BASIS_NUM_COMP, BASIS_P_1D, BASIS_Q_1D, BASIS_T_1D>(data, r_U, s_B, s_G, r_V); in Grad()
262 …adTensorCollocated3d<BASIS_NUM_COMP, BASIS_P_1D, BASIS_Q_1D, BASIS_T_1D>(data, r_U, s_B, s_G, r_V); in Grad()
263 … else GradTensor3d<BASIS_NUM_COMP, BASIS_P_1D, BASIS_Q_1D, BASIS_T_1D>(data, r_U, s_B, s_G, r_V); in Grad()
285 __shared__ CeedScalar s_G[BASIS_Q_1D * (BASIS_HAS_COLLOCATED_GRAD ? BASIS_Q_1D : BASIS_P_1D)]; in GradCollocated() local
286 LoadMatrix<BASIS_Q_1D, BASIS_HAS_COLLOCATED_GRAD ? BASIS_Q_1D : BASIS_P_1D>(data, c_G, s_G); in GradCollocated()
293 Grad1d<BASIS_NUM_COMP, BASIS_P_1D, BASIS_Q_1D, BASIS_T_1D>(data, r_U, NULL, s_G, r_V); in GradCollocated()
297 …orCollocatedNodes2d<BASIS_NUM_COMP, BASIS_P_1D, BASIS_Q_1D, BASIS_T_1D>(data, r_U, NULL, s_G, r_V); in GradCollocated()
[all …]
H A Dcuda-shared-basis-nontensor.h115 __shared__ CeedScalar s_G[BASIS_P * BASIS_Q * BASIS_DIM]; in Grad() local
116 LoadMatrix<BASIS_P, BASIS_Q * BASIS_DIM>(data, c_G, s_G); in Grad()
122 GradNonTensor<BASIS_NUM_COMP, BASIS_DIM, BASIS_P, BASIS_Q, BASIS_T_1D>(data, r_U, s_G, r_V); in Grad()
142 __shared__ CeedScalar s_G[BASIS_P * BASIS_Q * BASIS_DIM]; in GradTranspose() local
143 LoadMatrix<BASIS_P, BASIS_Q * BASIS_DIM>(data, c_G, s_G); in GradTranspose()
149 …GradTransposeNonTensor<BASIS_NUM_COMP, BASIS_DIM, BASIS_P, BASIS_Q, BASIS_T_1D>(data, r_U, s_G, r_… in GradTranspose()
169 __shared__ CeedScalar s_G[BASIS_P * BASIS_Q * BASIS_DIM]; in GradTransposeAdd() local
170 LoadMatrix<BASIS_P, BASIS_Q * BASIS_DIM>(data, c_G, s_G); in GradTransposeAdd()
176 …GradTransposeNonTensor<BASIS_NUM_COMP, BASIS_DIM, BASIS_P, BASIS_Q, BASIS_T_1D>(data, r_U, s_G, r_… in GradTransposeAdd()
/libCEED/include/ceed/jit-source/hip/
H A Dhip-shared-basis-tensor.h246 __shared__ CeedScalar s_G[BASIS_Q_1D * (BASIS_HAS_COLLOCATED_GRAD ? BASIS_Q_1D : BASIS_P_1D)]; in __launch_bounds__() local
247 LoadMatrix<BASIS_Q_1D, BASIS_HAS_COLLOCATED_GRAD ? BASIS_Q_1D : BASIS_P_1D>(data, c_G, s_G); in __launch_bounds__()
254 Grad1d<BASIS_NUM_COMP, BASIS_P_1D, BASIS_Q_1D, BASIS_T_1D>(data, r_U, s_B, s_G, r_V); in __launch_bounds__()
258 GradTensor2d<BASIS_NUM_COMP, BASIS_P_1D, BASIS_Q_1D, BASIS_T_1D>(data, r_U, s_B, s_G, r_V); in __launch_bounds__()
264 …adTensorCollocated3d<BASIS_NUM_COMP, BASIS_P_1D, BASIS_Q_1D, BASIS_T_1D>(data, r_U, s_B, s_G, r_V); in __launch_bounds__()
265 … else GradTensor3d<BASIS_NUM_COMP, BASIS_P_1D, BASIS_Q_1D, BASIS_T_1D>(data, r_U, s_B, s_G, r_V); in __launch_bounds__()
288 __shared__ CeedScalar s_G[BASIS_Q_1D * (BASIS_HAS_COLLOCATED_GRAD ? BASIS_Q_1D : BASIS_P_1D)]; in __launch_bounds__() local
289 LoadMatrix<BASIS_Q_1D, BASIS_HAS_COLLOCATED_GRAD ? BASIS_Q_1D : BASIS_P_1D>(data, c_G, s_G); in __launch_bounds__()
296 Grad1d<BASIS_NUM_COMP, BASIS_P_1D, BASIS_Q_1D, BASIS_T_1D>(data, r_U, NULL, s_G, r_V); in __launch_bounds__()
300 …orCollocatedNodes2d<BASIS_NUM_COMP, BASIS_P_1D, BASIS_Q_1D, BASIS_T_1D>(data, r_U, NULL, s_G, r_V); in __launch_bounds__()
[all …]
H A Dhip-shared-basis-nontensor.h117 __shared__ CeedScalar s_G[BASIS_P * BASIS_Q * BASIS_DIM]; in __launch_bounds__() local
118 LoadMatrix<BASIS_P, BASIS_Q * BASIS_DIM>(data, c_G, s_G); in __launch_bounds__()
124 GradNonTensor<BASIS_NUM_COMP, BASIS_DIM, BASIS_P, BASIS_Q, BASIS_T_1D>(data, r_U, s_G, r_V); in __launch_bounds__()
144 __shared__ CeedScalar s_G[BASIS_P * BASIS_Q * BASIS_DIM]; in __launch_bounds__() local
145 LoadMatrix<BASIS_P, BASIS_Q * BASIS_DIM>(data, c_G, s_G); in __launch_bounds__()
151 …GradTransposeNonTensor<BASIS_NUM_COMP, BASIS_DIM, BASIS_P, BASIS_Q, BASIS_T_1D>(data, r_U, s_G, r_… in __launch_bounds__()
171 __shared__ CeedScalar s_G[BASIS_P * BASIS_Q * BASIS_DIM]; in __launch_bounds__() local
172 LoadMatrix<BASIS_P, BASIS_Q * BASIS_DIM>(data, c_G, s_G); in __launch_bounds__()
178 …GradTransposeNonTensor<BASIS_NUM_COMP, BASIS_DIM, BASIS_P, BASIS_Q, BASIS_T_1D>(data, r_U, s_G, r_… in __launch_bounds__()