Lines Matching refs:Q

19 template <typename T, int P, int Q, int BY>
20 …line__ void read_A_notrans_g2r_1D_nosync(const int tx, const int ty, const T *dA, T *sA, T rA[Q]) { in read_A_notrans_g2r_1D_nosync() argument
25 for (i = 0; i < P * Q - P * BY; i += P * BY) { in read_A_notrans_g2r_1D_nosync()
28 if (i + tid < P * Q) { in read_A_notrans_g2r_1D_nosync()
34 for (int j = 0; j < Q; j++) { in read_A_notrans_g2r_1D_nosync()
44 template <typename T, int P, int Q, int BY>
45 …inline__ void read_A_trans_g2r_1D_nosync(const int tx, const int ty, const T *dA, T *sA, T rA[Q]) { in read_A_trans_g2r_1D_nosync() argument
50 for (i = 0; i < P * Q - P * BY; i += P * BY) { in read_A_trans_g2r_1D_nosync()
53 if (i + tid < P * Q) { in read_A_trans_g2r_1D_nosync()
59 for (int j = 0; j < Q; j++) { in read_A_trans_g2r_1D_nosync()
60 rA[j] = sA[tx * Q + j]; in read_A_trans_g2r_1D_nosync()
69 template <typename T, int P, int Q, int NB>
74 for (i = 0; i < Q * n - P; i += P) { in read_B_g2s_1D_nosync()
79 for (i = 0; i < Q * NB - P; i += P) { in read_B_g2s_1D_nosync()
83 if (i + tx < Q * n) { in read_B_g2s_1D_nosync()
93 template <typename T, int P, int Q, int NB>
112 template <typename T, int P, int Q, int NB>
132 template <typename T, int P, int Q, int NB>
133 static __device__ __inline__ void mul_rAsBrC_1D_nosync(T rA[Q], T *sB, T rC[NB]) { in mul_rAsBrC_1D_nosync() argument
134 T rB[Q]; in mul_rAsBrC_1D_nosync()
139 for (int j = 0; j < Q; j++) { in mul_rAsBrC_1D_nosync()
140 rB[j] = sB[i * Q + j]; in mul_rAsBrC_1D_nosync()
144 for (int j = 0; j < Q; j++) { in mul_rAsBrC_1D_nosync()
156 template <typename T, int P, int Q, int NB>
157 static __device__ __inline__ void addmul_rAsBrC_1D_nosync(T rA[Q], T *sB, T rC[NB]) { in addmul_rAsBrC_1D_nosync() argument
158 T rB[Q]; in addmul_rAsBrC_1D_nosync()
163 for (int j = 0; j < Q; j++) { in addmul_rAsBrC_1D_nosync()
164 rB[j] = sB[i * Q + j]; in addmul_rAsBrC_1D_nosync()
167 for (int j = 0; j < Q; j++) { in addmul_rAsBrC_1D_nosync()