Lines Matching refs:CeedScalar
14 …void magma_basis_nontensor_device_n(const int n, CeedScalar const *dA, CeedScalar const *dB, CeedS… in magma_basis_nontensor_device_n()
15 CeedScalar *shared_data) { in magma_basis_nontensor_device_n()
26 CeedScalar *sB = shared_data + ty * P * NB; in magma_basis_nontensor_device_n()
27 CeedScalar *sA = shared_data + blockDim.y * P * NB; in magma_basis_nontensor_device_n()
31 read_B_g2s_1D_nosync<CeedScalar, Q, P, NB>(tx, myn, dB, sB); in magma_basis_nontensor_device_n()
37 CeedScalar rA[P]; in magma_basis_nontensor_device_n()
38 …read_A_trans_g2r_1D_nosync<CeedScalar, Q, P, MAGMA_BASIS_NTCOL(Q, MAGMA_MAXTHREADS_1D)>(tx, ty, dA… in magma_basis_nontensor_device_n()
40 CeedScalar rC[NB]; in magma_basis_nontensor_device_n()
41 mul_rAsBrC_1D_nosync<CeedScalar, Q, P, NB>(rA, sB, rC); in magma_basis_nontensor_device_n()
45 write_C_r2g_1D_nosync<CeedScalar, Q, P, NB>(tx, myn, rC, dC); in magma_basis_nontensor_device_n()
57 …void magma_basis_nontensor_device_t(const int n, CeedScalar const *dA, CeedScalar const *dB, CeedS… in magma_basis_nontensor_device_t()
58 CeedScalar *shared_data) { in magma_basis_nontensor_device_t()
69 CeedScalar *sA = shared_data; in magma_basis_nontensor_device_t()
70 CeedScalar *sB = shared_data + ty * Q * NB; in magma_basis_nontensor_device_t()
72 CeedScalar rC[NB] = {0.0}; in magma_basis_nontensor_device_t()
77 CeedScalar rA[Q]; in magma_basis_nontensor_device_t()
78 …read_A_notrans_g2r_1D_nosync<CeedScalar, P, Q, MAGMA_BASIS_NTCOL(P, MAGMA_MAXTHREADS_1D)>(tx, ty, … in magma_basis_nontensor_device_t()
83 read_B_g2s_1D_nosync<CeedScalar, P, Q, NB>(tx, myn, dB, sB); in magma_basis_nontensor_device_t()
87 addmul_rAsBrC_1D_nosync<CeedScalar, P, Q, NB>(rA, sB, rC); in magma_basis_nontensor_device_t()
97 write_C_r2g_1D_nosync<CeedScalar, P, Q, NB>(tx, myn, rC, dC); in magma_basis_nontensor_device_t()
103 … magma_basis_nontensor_device_ta(const int n, const CeedScalar *dA, const CeedScalar *dB, CeedScal… in magma_basis_nontensor_device_ta()
104 CeedScalar *shared_data) { in magma_basis_nontensor_device_ta()
115 CeedScalar *sA = shared_data; in magma_basis_nontensor_device_ta()
116 CeedScalar *sB = shared_data + ty * Q * NB; in magma_basis_nontensor_device_ta()
118 CeedScalar rC[NB] = {0.0}; in magma_basis_nontensor_device_ta()
123 CeedScalar rA[Q]; in magma_basis_nontensor_device_ta()
124 …read_A_notrans_g2r_1D_nosync<CeedScalar, P, Q, MAGMA_BASIS_NTCOL(P, MAGMA_MAXTHREADS_1D)>(tx, ty, … in magma_basis_nontensor_device_ta()
129 read_B_g2s_1D_nosync<CeedScalar, P, Q, NB>(tx, myn, dB, sB); in magma_basis_nontensor_device_ta()
133 addmul_rAsBrC_1D_nosync<CeedScalar, P, Q, NB>(rA, sB, rC); in magma_basis_nontensor_device_ta()
143 sum_C_r2g_1D_nosync<CeedScalar, P, Q, NB>(tx, myn, rC, dC); in magma_basis_nontensor_device_ta()
149 …oid magma_basis_nontensor_device_n1(const int n, CeedScalar const *dA, CeedScalar const *dB, CeedS… in magma_basis_nontensor_device_n1()
150 CeedScalar *shared_data) { in magma_basis_nontensor_device_n1()
161 CeedScalar *sA = shared_data; in magma_basis_nontensor_device_n1()
162 CeedScalar *sB = shared_data + ty * P * NB; in magma_basis_nontensor_device_n1()
165 CeedScalar rA[P]; in magma_basis_nontensor_device_n1()
166 …read_A_trans_g2r_1D_nosync<CeedScalar, Q, P, MAGMA_BASIS_NTCOL(Q, MAGMA_MAXTHREADS_1D)>(tx, ty, dA… in magma_basis_nontensor_device_n1()
173 read_B_g2s_1D_nosync<CeedScalar, Q, P, NB>(tx, myn, dB, sB); in magma_basis_nontensor_device_n1()
176 CeedScalar rC[NB]; in magma_basis_nontensor_device_n1()
177 mul_rAsBrC_1D_nosync<CeedScalar, Q, P, NB>(rA, sB, rC); in magma_basis_nontensor_device_n1()
180 write_C_r2g_1D_nosync<CeedScalar, Q, P, NB>(tx, myn, rC, dC); in magma_basis_nontensor_device_n1()
185 …oid magma_basis_nontensor_device_t1(const int n, CeedScalar const *dA, CeedScalar const *dB, CeedS… in magma_basis_nontensor_device_t1()
186 CeedScalar *shared_data) { in magma_basis_nontensor_device_t1()
197 CeedScalar *sA = shared_data; in magma_basis_nontensor_device_t1()
198 CeedScalar *sB = shared_data + ty * Q * NB; in magma_basis_nontensor_device_t1()
201 CeedScalar rA[Q]; in magma_basis_nontensor_device_t1()
202 …read_A_notrans_g2r_1D_nosync<CeedScalar, P, Q, MAGMA_BASIS_NTCOL(P, MAGMA_MAXTHREADS_1D)>(tx, ty, … in magma_basis_nontensor_device_t1()
209 read_B_g2s_1D_nosync<CeedScalar, P, Q, NB>(tx, myn, dB, sB); in magma_basis_nontensor_device_t1()
212 CeedScalar rC[NB]; in magma_basis_nontensor_device_t1()
213 mul_rAsBrC_1D_nosync<CeedScalar, P, Q, NB>(rA, sB, rC); in magma_basis_nontensor_device_t1()
216 write_C_r2g_1D_nosync<CeedScalar, P, Q, NB>(tx, myn, rC, dC); in magma_basis_nontensor_device_t1()
221 …id magma_basis_nontensor_device_ta1(const int n, CeedScalar const *dA, CeedScalar const *dB, CeedS… in magma_basis_nontensor_device_ta1()
222 CeedScalar *shared_data) { in magma_basis_nontensor_device_ta1()
233 CeedScalar *sA = shared_data; in magma_basis_nontensor_device_ta1()
234 CeedScalar *sB = shared_data + ty * Q * NB; in magma_basis_nontensor_device_ta1()
237 CeedScalar rA[Q]; in magma_basis_nontensor_device_ta1()
238 …read_A_notrans_g2r_1D_nosync<CeedScalar, P, Q, MAGMA_BASIS_NTCOL(P, MAGMA_MAXTHREADS_1D)>(tx, ty, … in magma_basis_nontensor_device_ta1()
245 read_B_g2s_1D_nosync<CeedScalar, P, Q, NB>(tx, myn, dB, sB); in magma_basis_nontensor_device_ta1()
248 CeedScalar rC[NB]; in magma_basis_nontensor_device_ta1()
249 mul_rAsBrC_1D_nosync<CeedScalar, P, Q, NB>(rA, sB, rC); in magma_basis_nontensor_device_ta1()
252 sum_C_r2g_1D_nosync<CeedScalar, P, Q, NB>(tx, myn, rC, dC); in magma_basis_nontensor_device_ta1()
257 …gma_interp_nontensor_n(const int n, CeedScalar const *__restrict__ dA, CeedScalar const *__restric… in __launch_bounds__()
258 MAGMA_DEVICE_SHARED(CeedScalar, shared_data); in __launch_bounds__()
261 …magma_basis_nontensor_device_n1<CeedScalar, BASIS_P, BASIS_Q, BASIS_NB_INTERP_N>(n, dA, dB, dC, (C… in __launch_bounds__()
263 …ma_basis_nontensor_device_n<CeedScalar, BASIS_Q_COMP_INTERP, BASIS_P, BASIS_Q, BASIS_NB_INTERP_N>(… in __launch_bounds__()
269 …gma_interp_nontensor_t(const int n, CeedScalar const *__restrict__ dA, CeedScalar const *__restric… in __launch_bounds__()
270 MAGMA_DEVICE_SHARED(CeedScalar, shared_data); in __launch_bounds__()
273 …magma_basis_nontensor_device_t1<CeedScalar, BASIS_P, BASIS_Q, BASIS_NB_INTERP_T>(n, dA, dB, dC, (C… in __launch_bounds__()
275 …ma_basis_nontensor_device_t<CeedScalar, BASIS_Q_COMP_INTERP, BASIS_P, BASIS_Q, BASIS_NB_INTERP_T>(… in __launch_bounds__()
281 …ma_interp_nontensor_ta(const int n, CeedScalar const *__restrict__ dA, CeedScalar const *__restric… in __launch_bounds__()
282 MAGMA_DEVICE_SHARED(CeedScalar, shared_data); in __launch_bounds__()
285 …magma_basis_nontensor_device_ta1<CeedScalar, BASIS_P, BASIS_Q, BASIS_NB_INTERP_T>(n, dA, dB, dC, (… in __launch_bounds__()
287 …a_basis_nontensor_device_ta<CeedScalar, BASIS_Q_COMP_INTERP, BASIS_P, BASIS_Q, BASIS_NB_INTERP_T>(… in __launch_bounds__()
293 …agma_deriv_nontensor_n(const int n, CeedScalar const *__restrict__ dA, CeedScalar const *__restric… in __launch_bounds__()
294 MAGMA_DEVICE_SHARED(CeedScalar, shared_data); in __launch_bounds__()
297 …magma_basis_nontensor_device_n1<CeedScalar, BASIS_P, BASIS_Q, BASIS_NB_DERIV_N>(n, dA, dB, dC, (Ce… in __launch_bounds__()
299 …gma_basis_nontensor_device_n<CeedScalar, BASIS_Q_COMP_DERIV, BASIS_P, BASIS_Q, BASIS_NB_DERIV_N>(n… in __launch_bounds__()
305 …agma_deriv_nontensor_t(const int n, CeedScalar const *__restrict__ dA, CeedScalar const *__restric… in __launch_bounds__()
306 MAGMA_DEVICE_SHARED(CeedScalar, shared_data); in __launch_bounds__()
309 …magma_basis_nontensor_device_t1<CeedScalar, BASIS_P, BASIS_Q, BASIS_NB_DERIV_T>(n, dA, dB, dC, (Ce… in __launch_bounds__()
311 …gma_basis_nontensor_device_t<CeedScalar, BASIS_Q_COMP_DERIV, BASIS_P, BASIS_Q, BASIS_NB_DERIV_T>(n… in __launch_bounds__()
317 …gma_deriv_nontensor_ta(const int n, CeedScalar const *__restrict__ dA, CeedScalar const *__restric… in __launch_bounds__()
318 MAGMA_DEVICE_SHARED(CeedScalar, shared_data); in __launch_bounds__()
321 …magma_basis_nontensor_device_ta1<CeedScalar, BASIS_P, BASIS_Q, BASIS_NB_DERIV_T>(n, dA, dB, dC, (C… in __launch_bounds__()
323 …ma_basis_nontensor_device_ta<CeedScalar, BASIS_Q_COMP_DERIV, BASIS_P, BASIS_Q, BASIS_NB_DERIV_T>(n… in __launch_bounds__()