Lines Matching refs:tx
19 …d_device(const T *sT, T rU[DIM_U][NUM_COMP][rU_SIZE], T rV[DIM_V][NUM_COMP][rV_SIZE], const int tx, in magma_interp_3d_device() argument
34 if (tx < (P * P)) { in magma_interp_3d_device()
35 const int batchid = tx; in magma_interp_3d_device()
49 if (tx < (P * Q)) { in magma_interp_3d_device()
50 const int batchid = tx / Q; in magma_interp_3d_device()
51 const int tx_ = tx % Q; in magma_interp_3d_device()
64 if (tx < (P * Q)) { in magma_interp_3d_device()
65 const int batchid = tx / Q; in magma_interp_3d_device()
66 const int tx_ = tx % Q; in magma_interp_3d_device()
76 if (tx < (Q * Q)) { in magma_interp_3d_device()
84 rTmp[0] += sTmp(tx, i, sld) * sT(i, j); in magma_interp_3d_device()
99 const int tx = threadIdx.x; in __launch_bounds__()
120 read_T_notrans_gm2sm<BASIS_P, BASIS_Q>(tx, dT, sT); in __launch_bounds__()
124 read_U_3d<CeedScalar, BASIS_P, 1, BASIS_NUM_COMP, BASIS_P, 0>(dU, cstrdU, rU, sTmp, tx); in __launch_bounds__()
127 …<CeedScalar, 1, 1, BASIS_NUM_COMP, BASIS_P, BASIS_Q, BASIS_P, BASIS_Q>(sT, rU, rV, tx, rTmp, sTmp); in __launch_bounds__()
131 write_V_3d<CeedScalar, BASIS_Q, 1, BASIS_NUM_COMP, BASIS_Q, 0>(dV, cstrdV, rV, tx); in __launch_bounds__()
140 const int tx = threadIdx.x; in __launch_bounds__()
161 read_T_trans_gm2sm<BASIS_Q, BASIS_P>(tx, dT, sT); in __launch_bounds__()
165 read_U_3d<CeedScalar, BASIS_Q, 1, BASIS_NUM_COMP, BASIS_Q, 0>(dU, cstrdU, rU, sTmp, tx); in __launch_bounds__()
168 …<CeedScalar, 1, 1, BASIS_NUM_COMP, BASIS_Q, BASIS_P, BASIS_Q, BASIS_P>(sT, rU, rV, tx, rTmp, sTmp); in __launch_bounds__()
172 write_V_3d<CeedScalar, BASIS_P, 1, BASIS_NUM_COMP, BASIS_P, 0>(dV, cstrdV, rV, tx); in __launch_bounds__()
181 const int tx = threadIdx.x; in __launch_bounds__()
202 read_T_trans_gm2sm<BASIS_Q, BASIS_P>(tx, dT, sT); in __launch_bounds__()
206 read_U_3d<CeedScalar, BASIS_Q, 1, BASIS_NUM_COMP, BASIS_Q, 0>(dU, cstrdU, rU, sTmp, tx); in __launch_bounds__()
209 …<CeedScalar, 1, 1, BASIS_NUM_COMP, BASIS_Q, BASIS_P, BASIS_Q, BASIS_P>(sT, rU, rV, tx, rTmp, sTmp); in __launch_bounds__()
213 sum_V_3d<CeedScalar, BASIS_P, 1, BASIS_NUM_COMP, BASIS_P, 0>(dV, cstrdV, rV, tx); in __launch_bounds__()