Lines Matching refs:NB
13 template <typename T, int Q_COMP, int P, int Q, int NB>
19 const int nblocks = (n + NB - 1) / NB; in magma_basis_nontensor_device_n()
20 const int myn = min(NB, n - id * NB); in magma_basis_nontensor_device_n()
22 dB += id * P * NB; in magma_basis_nontensor_device_n()
23 dC += id * Q * NB; in magma_basis_nontensor_device_n()
26 CeedScalar *sB = shared_data + ty * P * NB; in magma_basis_nontensor_device_n()
27 CeedScalar *sA = shared_data + blockDim.y * P * NB; in magma_basis_nontensor_device_n()
31 read_B_g2s_1D_nosync<CeedScalar, Q, P, NB>(tx, myn, dB, sB); in magma_basis_nontensor_device_n()
40 CeedScalar rC[NB]; in magma_basis_nontensor_device_n()
41 mul_rAsBrC_1D_nosync<CeedScalar, Q, P, NB>(rA, sB, rC); in magma_basis_nontensor_device_n()
45 write_C_r2g_1D_nosync<CeedScalar, Q, P, NB>(tx, myn, rC, dC); in magma_basis_nontensor_device_n()
56 template <typename T, int Q_COMP, int P, int Q, int NB>
62 const int nblocks = (n + NB - 1) / NB; in magma_basis_nontensor_device_t()
63 const int myn = min(NB, n - id * NB); in magma_basis_nontensor_device_t()
65 dB += id * Q * NB; in magma_basis_nontensor_device_t()
66 dC += id * P * NB; in magma_basis_nontensor_device_t()
70 CeedScalar *sB = shared_data + ty * Q * NB; in magma_basis_nontensor_device_t()
72 CeedScalar rC[NB] = {0.0}; in magma_basis_nontensor_device_t()
83 read_B_g2s_1D_nosync<CeedScalar, P, Q, NB>(tx, myn, dB, sB); in magma_basis_nontensor_device_t()
87 addmul_rAsBrC_1D_nosync<CeedScalar, P, Q, NB>(rA, sB, rC); in magma_basis_nontensor_device_t()
97 write_C_r2g_1D_nosync<CeedScalar, P, Q, NB>(tx, myn, rC, dC); in magma_basis_nontensor_device_t()
102 template <typename T, int Q_COMP, int P, int Q, int NB>
108 const int nblocks = (n + NB - 1) / NB; in magma_basis_nontensor_device_ta()
109 const int myn = min(NB, n - id * NB); in magma_basis_nontensor_device_ta()
111 dB += id * Q * NB; in magma_basis_nontensor_device_ta()
112 dC += id * P * NB; in magma_basis_nontensor_device_ta()
116 CeedScalar *sB = shared_data + ty * Q * NB; in magma_basis_nontensor_device_ta()
118 CeedScalar rC[NB] = {0.0}; in magma_basis_nontensor_device_ta()
129 read_B_g2s_1D_nosync<CeedScalar, P, Q, NB>(tx, myn, dB, sB); in magma_basis_nontensor_device_ta()
133 addmul_rAsBrC_1D_nosync<CeedScalar, P, Q, NB>(rA, sB, rC); in magma_basis_nontensor_device_ta()
143 sum_C_r2g_1D_nosync<CeedScalar, P, Q, NB>(tx, myn, rC, dC); in magma_basis_nontensor_device_ta()
148 template <typename T, int P, int Q, int NB>
154 const int nblocks = (n + NB - 1) / NB; in magma_basis_nontensor_device_n1()
155 const int myn = min(NB, n - id * NB); in magma_basis_nontensor_device_n1()
157 dB += id * P * NB; in magma_basis_nontensor_device_n1()
158 dC += id * Q * NB; in magma_basis_nontensor_device_n1()
162 CeedScalar *sB = shared_data + ty * P * NB; in magma_basis_nontensor_device_n1()
173 read_B_g2s_1D_nosync<CeedScalar, Q, P, NB>(tx, myn, dB, sB); in magma_basis_nontensor_device_n1()
176 CeedScalar rC[NB]; in magma_basis_nontensor_device_n1()
177 mul_rAsBrC_1D_nosync<CeedScalar, Q, P, NB>(rA, sB, rC); in magma_basis_nontensor_device_n1()
180 write_C_r2g_1D_nosync<CeedScalar, Q, P, NB>(tx, myn, rC, dC); in magma_basis_nontensor_device_n1()
184 template <typename T, int P, int Q, int NB>
190 const int nblocks = (n + NB - 1) / NB; in magma_basis_nontensor_device_t1()
191 const int myn = min(NB, n - id * NB); in magma_basis_nontensor_device_t1()
193 dB += id * Q * NB; in magma_basis_nontensor_device_t1()
194 dC += id * P * NB; in magma_basis_nontensor_device_t1()
198 CeedScalar *sB = shared_data + ty * Q * NB; in magma_basis_nontensor_device_t1()
209 read_B_g2s_1D_nosync<CeedScalar, P, Q, NB>(tx, myn, dB, sB); in magma_basis_nontensor_device_t1()
212 CeedScalar rC[NB]; in magma_basis_nontensor_device_t1()
213 mul_rAsBrC_1D_nosync<CeedScalar, P, Q, NB>(rA, sB, rC); in magma_basis_nontensor_device_t1()
216 write_C_r2g_1D_nosync<CeedScalar, P, Q, NB>(tx, myn, rC, dC); in magma_basis_nontensor_device_t1()
220 template <typename T, int P, int Q, int NB>
226 const int nblocks = (n + NB - 1) / NB; in magma_basis_nontensor_device_ta1()
227 const int myn = min(NB, n - id * NB); in magma_basis_nontensor_device_ta1()
229 dB += id * Q * NB; in magma_basis_nontensor_device_ta1()
230 dC += id * P * NB; in magma_basis_nontensor_device_ta1()
234 CeedScalar *sB = shared_data + ty * Q * NB; in magma_basis_nontensor_device_ta1()
245 read_B_g2s_1D_nosync<CeedScalar, P, Q, NB>(tx, myn, dB, sB); in magma_basis_nontensor_device_ta1()
248 CeedScalar rC[NB]; in magma_basis_nontensor_device_ta1()
249 mul_rAsBrC_1D_nosync<CeedScalar, P, Q, NB>(rA, sB, rC); in magma_basis_nontensor_device_ta1()
252 sum_C_r2g_1D_nosync<CeedScalar, P, Q, NB>(tx, myn, rC, dC); in magma_basis_nontensor_device_ta1()