Lines Matching refs:P
19 template <typename T, int P, int Q, int BY>
21 const int tid = ty * P + tx; in read_A_notrans_g2r_1D_nosync()
25 for (i = 0; i < P * Q - P * BY; i += P * BY) { in read_A_notrans_g2r_1D_nosync()
28 if (i + tid < P * Q) { in read_A_notrans_g2r_1D_nosync()
35 rA[j] = sA[j * P + tx]; in read_A_notrans_g2r_1D_nosync()
44 template <typename T, int P, int Q, int BY>
46 const int tid = ty * P + tx; in read_A_trans_g2r_1D_nosync()
50 for (i = 0; i < P * Q - P * BY; i += P * BY) { in read_A_trans_g2r_1D_nosync()
53 if (i + tid < P * Q) { in read_A_trans_g2r_1D_nosync()
69 template <typename T, int P, int Q, int NB>
74 for (i = 0; i < Q * n - P; i += P) { in read_B_g2s_1D_nosync()
79 for (i = 0; i < Q * NB - P; i += P) { in read_B_g2s_1D_nosync()
93 template <typename T, int P, int Q, int NB>
97 dC[i * P + tx] = rC[i]; in write_C_r2g_1D_nosync()
102 dC[i * P + tx] = rC[i]; in write_C_r2g_1D_nosync()
112 template <typename T, int P, int Q, int NB>
116 dC[i * P + tx] += rC[i]; in sum_C_r2g_1D_nosync()
121 dC[i * P + tx] += rC[i]; in sum_C_r2g_1D_nosync()
132 template <typename T, int P, int Q, int NB>
156 template <typename T, int P, int Q, int NB>