Lines Matching refs:data
20 inline __device__ void ContractX1d(SharedData_Cuda &data, const CeedScalar *U, const CeedScalar *B,… in ContractX1d() argument
22 data.slice[data.t_id_x] = *U; in ContractX1d()
25 if (data.t_id_x < Q_1D) { in ContractX1d()
27 *V += B[i + data.t_id_x * P_1D] * data.slice[i]; // Contract x direction in ContractX1d()
36 inline __device__ void ContractTransposeX1d(SharedData_Cuda &data, const CeedScalar *U, const CeedS… in ContractTransposeX1d() argument
38 data.slice[data.t_id_x] = *U; in ContractTransposeX1d()
41 if (data.t_id_x < P_1D) { in ContractTransposeX1d()
43 *V += B[data.t_id_x + i * P_1D] * data.slice[i]; // Contract x direction in ContractTransposeX1d()
52 inline __device__ void Interp1d(SharedData_Cuda &data, const CeedScalar *__restrict__ r_U, const Ce… in Interp1d() argument
54 ContractX1d<NUM_COMP, P_1D, Q_1D>(data, &r_U[comp], c_B, &r_V[comp]); in Interp1d()
62 inline __device__ void InterpTranspose1d(SharedData_Cuda &data, const CeedScalar *__restrict__ r_U,… in InterpTranspose1d() argument
65 ContractTransposeX1d<NUM_COMP, P_1D, Q_1D>(data, &r_U[comp], c_B, &r_V[comp]); in InterpTranspose1d()
73 inline __device__ void InterpCollocatedNodes1d(SharedData_Cuda &data, const CeedScalar *__restrict_… in InterpCollocatedNodes1d() argument
84 inline __device__ void InterpTransposeCollocatedNodes1d(SharedData_Cuda &data, const CeedScalar *__… in InterpTransposeCollocatedNodes1d() argument
95 inline __device__ void Grad1d(SharedData_Cuda &data, const CeedScalar *__restrict__ r_U, const Ceed… in Grad1d() argument
98 ContractX1d<NUM_COMP, P_1D, Q_1D>(data, &r_U[comp], c_G, &r_V[comp]); in Grad1d()
106 inline __device__ void GradTranspose1d(SharedData_Cuda &data, const CeedScalar *__restrict__ r_U, c… in GradTranspose1d() argument
109 ContractTransposeX1d<NUM_COMP, P_1D, Q_1D>(data, &r_U[comp], c_G, &r_V[comp]); in GradTranspose1d()
117 inline __device__ void Weight1d(SharedData_Cuda &data, const CeedScalar *__restrict__ q_weight_1d, … in Weight1d() argument
118 *w = (data.t_id_x < Q_1D) ? q_weight_1d[data.t_id_x] : 0.0; in Weight1d()
129 inline __device__ void ContractX2d(SharedData_Cuda &data, const CeedScalar *U, const CeedScalar *B,… in ContractX2d() argument
131 data.slice[data.t_id_x + data.t_id_y * T_1D] = *U; in ContractX2d()
134 if (data.t_id_x < Q_1D && data.t_id_y < P_1D) { in ContractX2d()
136 *V += B[i + data.t_id_x * P_1D] * data.slice[i + data.t_id_y * T_1D]; // Contract x direction in ContractX2d()
145 inline __device__ void ContractY2d(SharedData_Cuda &data, const CeedScalar *U, const CeedScalar *B,… in ContractY2d() argument
147 data.slice[data.t_id_x + data.t_id_y * T_1D] = *U; in ContractY2d()
150 if (data.t_id_x < Q_1D && data.t_id_y < Q_1D) { in ContractY2d()
152 *V += B[i + data.t_id_y * P_1D] * data.slice[data.t_id_x + i * T_1D]; // Contract y direction in ContractY2d()
161 inline __device__ void ContractTransposeY2d(SharedData_Cuda &data, const CeedScalar *U, const CeedS… in ContractTransposeY2d() argument
163 data.slice[data.t_id_x + data.t_id_y * T_1D] = *U; in ContractTransposeY2d()
166 if (data.t_id_x < Q_1D && data.t_id_y < P_1D) { in ContractTransposeY2d()
168 *V += B[data.t_id_y + i * P_1D] * data.slice[data.t_id_x + i * T_1D]; // Contract y direction in ContractTransposeY2d()
177 inline __device__ void ContractTransposeX2d(SharedData_Cuda &data, const CeedScalar *U, const CeedS… in ContractTransposeX2d() argument
179 data.slice[data.t_id_x + data.t_id_y * T_1D] = *U; in ContractTransposeX2d()
182 if (data.t_id_x < P_1D && data.t_id_y < P_1D) { in ContractTransposeX2d()
184 *V += B[data.t_id_x + i * P_1D] * data.slice[i + data.t_id_y * T_1D]; // Contract x direction in ContractTransposeX2d()
193 inline __device__ void ContractTransposeAddX2d(SharedData_Cuda &data, const CeedScalar *U, const Ce… in ContractTransposeAddX2d() argument
195 data.slice[data.t_id_x + data.t_id_y * T_1D] = *U; in ContractTransposeAddX2d()
197 if (data.t_id_x < P_1D && data.t_id_y < P_1D) { in ContractTransposeAddX2d()
199 *V += B[data.t_id_x + i * P_1D] * data.slice[i + data.t_id_y * T_1D]; // Contract x direction in ContractTransposeAddX2d()
208 inline __device__ void InterpTensor2d(SharedData_Cuda &data, const CeedScalar *__restrict__ r_U, co… in InterpTensor2d() argument
212 ContractX2d<NUM_COMP, P_1D, Q_1D, T_1D>(data, &r_U[comp], c_B, r_t); in InterpTensor2d()
213 ContractY2d<NUM_COMP, P_1D, Q_1D, T_1D>(data, r_t, c_B, &r_V[comp]); in InterpTensor2d()
221 inline __device__ void InterpTransposeTensor2d(SharedData_Cuda &data, const CeedScalar *__restrict_… in InterpTransposeTensor2d() argument
225 ContractTransposeY2d<NUM_COMP, P_1D, Q_1D, T_1D>(data, &r_U[comp], c_B, r_t); in InterpTransposeTensor2d()
226 ContractTransposeX2d<NUM_COMP, P_1D, Q_1D, T_1D>(data, r_t, c_B, &r_V[comp]); in InterpTransposeTensor2d()
234 inline __device__ void InterpTensorCollocatedNodes2d(SharedData_Cuda &data, const CeedScalar *__res… in InterpTensorCollocatedNodes2d() argument
245 inline __device__ void InterpTransposeTensorCollocatedNodes2d(SharedData_Cuda &data, const CeedScal… in InterpTransposeTensorCollocatedNodes2d() argument
256 inline __device__ void GradTensor2d(SharedData_Cuda &data, const CeedScalar *__restrict__ r_U, cons… in GradTensor2d() argument
260 ContractX2d<NUM_COMP, P_1D, Q_1D, T_1D>(data, &r_U[comp], c_G, r_t); in GradTensor2d()
261 ContractY2d<NUM_COMP, P_1D, Q_1D, T_1D>(data, r_t, c_B, &r_V[comp + 0 * NUM_COMP]); in GradTensor2d()
262 ContractX2d<NUM_COMP, P_1D, Q_1D, T_1D>(data, &r_U[comp], c_B, r_t); in GradTensor2d()
263 ContractY2d<NUM_COMP, P_1D, Q_1D, T_1D>(data, r_t, c_G, &r_V[comp + 1 * NUM_COMP]); in GradTensor2d()
271 inline __device__ void GradTransposeTensor2d(SharedData_Cuda &data, const CeedScalar *__restrict__ … in GradTransposeTensor2d() argument
275 ContractTransposeY2d<NUM_COMP, P_1D, Q_1D, T_1D>(data, &r_U[comp + 0 * NUM_COMP], c_B, r_t); in GradTransposeTensor2d()
276 ContractTransposeX2d<NUM_COMP, P_1D, Q_1D, T_1D>(data, r_t, c_G, &r_V[comp]); in GradTransposeTensor2d()
277 ContractTransposeY2d<NUM_COMP, P_1D, Q_1D, T_1D>(data, &r_U[comp + 1 * NUM_COMP], c_G, r_t); in GradTransposeTensor2d()
278 ContractTransposeAddX2d<NUM_COMP, P_1D, Q_1D, T_1D>(data, r_t, c_B, &r_V[comp]); in GradTransposeTensor2d()
286 inline __device__ void GradTensorCollocatedNodes2d(SharedData_Cuda &data, const CeedScalar *__restr… in GradTensorCollocatedNodes2d() argument
289 ContractX2d<NUM_COMP, P_1D, Q_1D, T_1D>(data, &r_U[comp], c_G, &r_V[comp + 0 * NUM_COMP]); in GradTensorCollocatedNodes2d()
290 ContractY2d<NUM_COMP, P_1D, Q_1D, T_1D>(data, &r_U[comp], c_G, &r_V[comp + 1 * NUM_COMP]); in GradTensorCollocatedNodes2d()
298 inline __device__ void GradTransposeTensorCollocatedNodes2d(SharedData_Cuda &data, const CeedScalar… in GradTransposeTensorCollocatedNodes2d() argument
301 …ContractTransposeY2d<NUM_COMP, P_1D, Q_1D, T_1D>(data, &r_U[comp + 1 * NUM_COMP], c_G, &r_V[comp]); in GradTransposeTensorCollocatedNodes2d()
302 …ContractTransposeAddX2d<NUM_COMP, P_1D, Q_1D, T_1D>(data, &r_U[comp + 0 * NUM_COMP], c_G, &r_V[com… in GradTransposeTensorCollocatedNodes2d()
310 inline __device__ void WeightTensor2d(SharedData_Cuda &data, const CeedScalar *__restrict__ q_weigh… in WeightTensor2d() argument
311 …*w = (data.t_id_x < Q_1D && data.t_id_y < Q_1D) ? q_weight_1d[data.t_id_x] * q_weight_1d[data.t_id… in WeightTensor2d()
322 inline __device__ void ContractX3d(SharedData_Cuda &data, const CeedScalar *U, const CeedScalar *B,… in ContractX3d() argument
325 r_B[i] = B[i + data.t_id_x * P_1D]; in ContractX3d()
330 data.slice[data.t_id_x + data.t_id_y * T_1D] = U[k]; in ContractX3d()
333 if (data.t_id_x < Q_1D && data.t_id_y < P_1D) { in ContractX3d()
335 V[k] += r_B[i] * data.slice[i + data.t_id_y * T_1D]; // Contract x direction in ContractX3d()
345 inline __device__ void ContractY3d(SharedData_Cuda &data, const CeedScalar *U, const CeedScalar *B,… in ContractY3d() argument
348 r_B[i] = B[i + data.t_id_y * P_1D]; in ContractY3d()
353 data.slice[data.t_id_x + data.t_id_y * T_1D] = U[k]; in ContractY3d()
356 if (data.t_id_x < Q_1D && data.t_id_y < Q_1D) { in ContractY3d()
358 V[k] += r_B[i] * data.slice[data.t_id_x + i * T_1D]; // Contract y direction in ContractY3d()
368 inline __device__ void ContractZ3d(SharedData_Cuda &data, const CeedScalar *U, const CeedScalar *B,… in ContractZ3d() argument
371 if (data.t_id_x < Q_1D && data.t_id_y < Q_1D) { in ContractZ3d()
383 inline __device__ void ContractTransposeZ3d(SharedData_Cuda &data, const CeedScalar *U, const CeedS… in ContractTransposeZ3d() argument
386 if (data.t_id_x < Q_1D && data.t_id_y < Q_1D) { in ContractTransposeZ3d()
398 inline __device__ void ContractTransposeY3d(SharedData_Cuda &data, const CeedScalar *U, const CeedS… in ContractTransposeY3d() argument
401 r_B[i] = B[data.t_id_y + i * P_1D]; in ContractTransposeY3d()
406 data.slice[data.t_id_x + data.t_id_y * T_1D] = U[k]; in ContractTransposeY3d()
409 if (data.t_id_x < Q_1D && data.t_id_y < P_1D) { in ContractTransposeY3d()
411 V[k] += r_B[i] * data.slice[data.t_id_x + i * T_1D]; // Contract y direction in ContractTransposeY3d()
421 inline __device__ void ContractTransposeAddY3d(SharedData_Cuda &data, const CeedScalar *U, const Ce… in ContractTransposeAddY3d() argument
424 r_B[i] = B[data.t_id_y + i * P_1D]; in ContractTransposeAddY3d()
429 data.slice[data.t_id_x + data.t_id_y * T_1D] = U[k]; in ContractTransposeAddY3d()
431 if (data.t_id_x < Q_1D && data.t_id_y < P_1D) { in ContractTransposeAddY3d()
433 V[k] += r_B[i] * data.slice[data.t_id_x + i * T_1D]; // Contract y direction in ContractTransposeAddY3d()
443 inline __device__ void ContractTransposeX3d(SharedData_Cuda &data, const CeedScalar *U, const CeedS… in ContractTransposeX3d() argument
446 r_B[i] = B[data.t_id_x + i * P_1D]; in ContractTransposeX3d()
451 data.slice[data.t_id_x + data.t_id_y * T_1D] = U[k]; in ContractTransposeX3d()
454 if (data.t_id_x < P_1D && data.t_id_y < P_1D) { in ContractTransposeX3d()
456 V[k] += r_B[i] * data.slice[i + data.t_id_y * T_1D]; // Contract x direction in ContractTransposeX3d()
466 inline __device__ void ContractTransposeAddX3d(SharedData_Cuda &data, const CeedScalar *U, const Ce… in ContractTransposeAddX3d() argument
469 r_B[i] = B[data.t_id_x + i * P_1D]; in ContractTransposeAddX3d()
474 data.slice[data.t_id_x + data.t_id_y * T_1D] = U[k]; in ContractTransposeAddX3d()
476 if (data.t_id_x < P_1D && data.t_id_y < P_1D) { in ContractTransposeAddX3d()
478 V[k] += r_B[i] * data.slice[i + data.t_id_y * T_1D]; // Contract x direction in ContractTransposeAddX3d()
488 inline __device__ void InterpTensor3d(SharedData_Cuda &data, const CeedScalar *__restrict__ r_U, co… in InterpTensor3d() argument
493 ContractX3d<NUM_COMP, P_1D, Q_1D, T_1D>(data, &r_U[comp * P_1D], c_B, r_t1); in InterpTensor3d()
494 ContractY3d<NUM_COMP, P_1D, Q_1D, T_1D>(data, r_t1, c_B, r_t2); in InterpTensor3d()
495 ContractZ3d<NUM_COMP, P_1D, Q_1D, T_1D>(data, r_t2, c_B, &r_V[comp * Q_1D]); in InterpTensor3d()
503 inline __device__ void InterpTransposeTensor3d(SharedData_Cuda &data, const CeedScalar *__restrict_… in InterpTransposeTensor3d() argument
508 ContractTransposeZ3d<NUM_COMP, P_1D, Q_1D, T_1D>(data, &r_U[comp * Q_1D], c_B, r_t1); in InterpTransposeTensor3d()
509 ContractTransposeY3d<NUM_COMP, P_1D, Q_1D, T_1D>(data, r_t1, c_B, r_t2); in InterpTransposeTensor3d()
510 ContractTransposeX3d<NUM_COMP, P_1D, Q_1D, T_1D>(data, r_t2, c_B, &r_V[comp * P_1D]); in InterpTransposeTensor3d()
518 inline __device__ void InterpTensorCollocatedNodes3d(SharedData_Cuda &data, const CeedScalar *__res… in InterpTensorCollocatedNodes3d() argument
531 inline __device__ void InterpTransposeTensorCollocatedNodes3d(SharedData_Cuda &data, const CeedScal… in InterpTransposeTensorCollocatedNodes3d() argument
544 inline __device__ void GradTensor3d(SharedData_Cuda &data, const CeedScalar *__restrict__ r_U, cons… in GradTensor3d() argument
549 ContractX3d<NUM_COMP, P_1D, Q_1D, T_1D>(data, &r_U[comp * P_1D], c_G, r_t1); in GradTensor3d()
550 ContractY3d<NUM_COMP, P_1D, Q_1D, T_1D>(data, r_t1, c_B, r_t2); in GradTensor3d()
551 … ContractZ3d<NUM_COMP, P_1D, Q_1D, T_1D>(data, r_t2, c_B, &r_V[comp * Q_1D + 0 * NUM_COMP * Q_1D]); in GradTensor3d()
552 ContractX3d<NUM_COMP, P_1D, Q_1D, T_1D>(data, &r_U[comp * P_1D], c_B, r_t1); in GradTensor3d()
553 ContractY3d<NUM_COMP, P_1D, Q_1D, T_1D>(data, r_t1, c_G, r_t2); in GradTensor3d()
554 … ContractZ3d<NUM_COMP, P_1D, Q_1D, T_1D>(data, r_t2, c_B, &r_V[comp * Q_1D + 1 * NUM_COMP * Q_1D]); in GradTensor3d()
555 ContractX3d<NUM_COMP, P_1D, Q_1D, T_1D>(data, &r_U[comp * P_1D], c_B, r_t1); in GradTensor3d()
556 ContractY3d<NUM_COMP, P_1D, Q_1D, T_1D>(data, r_t1, c_B, r_t2); in GradTensor3d()
557 … ContractZ3d<NUM_COMP, P_1D, Q_1D, T_1D>(data, r_t2, c_G, &r_V[comp * Q_1D + 2 * NUM_COMP * Q_1D]); in GradTensor3d()
565 inline __device__ void GradTransposeTensor3d(SharedData_Cuda &data, const CeedScalar *__restrict__ … in GradTransposeTensor3d() argument
570 …ContractTransposeZ3d<NUM_COMP, P_1D, Q_1D, T_1D>(data, &r_U[comp * Q_1D + 0 * NUM_COMP * Q_1D], c_… in GradTransposeTensor3d()
571 ContractTransposeY3d<NUM_COMP, P_1D, Q_1D, T_1D>(data, r_t1, c_B, r_t2); in GradTransposeTensor3d()
572 ContractTransposeX3d<NUM_COMP, P_1D, Q_1D, T_1D>(data, r_t2, c_G, &r_V[comp * P_1D]); in GradTransposeTensor3d()
573 …ContractTransposeZ3d<NUM_COMP, P_1D, Q_1D, T_1D>(data, &r_U[comp * Q_1D + 1 * NUM_COMP * Q_1D], c_… in GradTransposeTensor3d()
574 ContractTransposeY3d<NUM_COMP, P_1D, Q_1D, T_1D>(data, r_t1, c_G, r_t2); in GradTransposeTensor3d()
575 ContractTransposeAddX3d<NUM_COMP, P_1D, Q_1D, T_1D>(data, r_t2, c_B, &r_V[comp * P_1D]); in GradTransposeTensor3d()
576 …ContractTransposeZ3d<NUM_COMP, P_1D, Q_1D, T_1D>(data, &r_U[comp * Q_1D + 2 * NUM_COMP * Q_1D], c_… in GradTransposeTensor3d()
577 ContractTransposeY3d<NUM_COMP, P_1D, Q_1D, T_1D>(data, r_t1, c_B, r_t2); in GradTransposeTensor3d()
578 ContractTransposeAddX3d<NUM_COMP, P_1D, Q_1D, T_1D>(data, r_t2, c_B, &r_V[comp * P_1D]); in GradTransposeTensor3d()
586 inline __device__ void GradTensorCollocated3d(SharedData_Cuda &data, const CeedScalar *__restrict__… in GradTensorCollocated3d() argument
591 ContractX3d<NUM_COMP, P_1D, Q_1D, T_1D>(data, &r_U[comp * P_1D], c_B, r_t1); in GradTensorCollocated3d()
592 ContractY3d<NUM_COMP, P_1D, Q_1D, T_1D>(data, r_t1, c_B, r_t2); in GradTensorCollocated3d()
593 ContractZ3d<NUM_COMP, P_1D, Q_1D, T_1D>(data, r_t2, c_B, r_t1); in GradTensorCollocated3d()
594 … ContractX3d<NUM_COMP, Q_1D, Q_1D, T_1D>(data, r_t1, c_G, &r_V[comp * Q_1D + 0 * NUM_COMP * Q_1D]); in GradTensorCollocated3d()
595 … ContractY3d<NUM_COMP, Q_1D, Q_1D, T_1D>(data, r_t1, c_G, &r_V[comp * Q_1D + 1 * NUM_COMP * Q_1D]); in GradTensorCollocated3d()
596 … ContractZ3d<NUM_COMP, Q_1D, Q_1D, T_1D>(data, r_t1, c_G, &r_V[comp * Q_1D + 2 * NUM_COMP * Q_1D]); in GradTensorCollocated3d()
604 inline __device__ void GradTransposeTensorCollocated3d(SharedData_Cuda &data, const CeedScalar *__r… in GradTransposeTensorCollocated3d() argument
609 …ContractTransposeZ3d<NUM_COMP, Q_1D, Q_1D, T_1D>(data, &r_U[comp * Q_1D + 2 * NUM_COMP * Q_1D], c_… in GradTransposeTensorCollocated3d()
610 …ContractTransposeAddY3d<NUM_COMP, Q_1D, Q_1D, T_1D>(data, &r_U[comp * Q_1D + 1 * NUM_COMP * Q_1D],… in GradTransposeTensorCollocated3d()
611 …ContractTransposeAddX3d<NUM_COMP, Q_1D, Q_1D, T_1D>(data, &r_U[comp * Q_1D + 0 * NUM_COMP * Q_1D],… in GradTransposeTensorCollocated3d()
612 ContractTransposeZ3d<NUM_COMP, P_1D, Q_1D, T_1D>(data, r_t2, c_B, r_t1); in GradTransposeTensorCollocated3d()
613 ContractTransposeY3d<NUM_COMP, P_1D, Q_1D, T_1D>(data, r_t1, c_B, r_t2); in GradTransposeTensorCollocated3d()
614 ContractTransposeX3d<NUM_COMP, P_1D, Q_1D, T_1D>(data, r_t2, c_B, &r_V[comp * P_1D]); in GradTransposeTensorCollocated3d()
622 inline __device__ void GradTensorCollocatedNodes3d(SharedData_Cuda &data, const CeedScalar *__restr… in GradTensorCollocatedNodes3d() argument
625 …ContractX3d<NUM_COMP, P_1D, Q_1D, T_1D>(data, &r_U[comp * P_1D], c_G, &r_V[comp * Q_1D + 0 * NUM_C… in GradTensorCollocatedNodes3d()
626 …ContractY3d<NUM_COMP, P_1D, Q_1D, T_1D>(data, &r_U[comp * P_1D], c_G, &r_V[comp * Q_1D + 1 * NUM_C… in GradTensorCollocatedNodes3d()
627 …ContractZ3d<NUM_COMP, P_1D, Q_1D, T_1D>(data, &r_U[comp * P_1D], c_G, &r_V[comp * Q_1D + 2 * NUM_C… in GradTensorCollocatedNodes3d()
635 inline __device__ void GradTransposeTensorCollocatedNodes3d(SharedData_Cuda &data, const CeedScalar… in GradTransposeTensorCollocatedNodes3d() argument
638 …ContractTransposeZ3d<NUM_COMP, P_1D, Q_1D, T_1D>(data, &r_U[comp * Q_1D + 2 * NUM_COMP * Q_1D], c_… in GradTransposeTensorCollocatedNodes3d()
639 …ContractTransposeAddY3d<NUM_COMP, P_1D, Q_1D, T_1D>(data, &r_U[comp * Q_1D + 1 * NUM_COMP * Q_1D],… in GradTransposeTensorCollocatedNodes3d()
640 …ContractTransposeAddX3d<NUM_COMP, P_1D, Q_1D, T_1D>(data, &r_U[comp * Q_1D + 0 * NUM_COMP * Q_1D],… in GradTransposeTensorCollocatedNodes3d()
648 inline __device__ void WeightTensor3d(SharedData_Cuda &data, const CeedScalar *__restrict__ q_weigh… in WeightTensor3d() argument
649 const bool quad = (data.t_id_x < Q_1D && data.t_id_y < Q_1D); in WeightTensor3d()
650 const CeedScalar pw = quad ? q_weight_1d[data.t_id_x] * q_weight_1d[data.t_id_y] : 0.0; in WeightTensor3d()