Lines Matching +full:- +full:t

1 // Copyright (c) 2017-2026, Lawrence Livermore National Security, LLC and other CEED contributors.
2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
4 // SPDX-License-Identifier: BSD-2-Clause
9 /// Internal header for MAGMA backend common non-tensor basis definitions
12 #include "magma-common-defs.h"
15 // read A (no-trans) from global to reg.
19 template <typename T, int P, int Q, int BY>
20 …line__ void read_A_notrans_g2r_1D_nosync(const int tx, const int ty, const T *dA, T *sA, T rA[Q]) { in read_A_notrans_g2r_1D_nosync()
25 for (i = 0; i < P * Q - P * BY; i += P * BY) { in read_A_notrans_g2r_1D_nosync()
44 template <typename T, int P, int Q, int BY>
45 …inline__ void read_A_trans_g2r_1D_nosync(const int tx, const int ty, const T *dA, T *sA, T rA[Q]) { in read_A_trans_g2r_1D_nosync()
50 for (i = 0; i < P * Q - P * BY; i += P * BY) { in read_A_trans_g2r_1D_nosync()
69 template <typename T, int P, int Q, int NB>
70 static __device__ __inline__ void read_B_g2s_1D_nosync(const int tx, const int n, const T *dB, T *s… in read_B_g2s_1D_nosync()
74 for (i = 0; i < Q * n - P; i += P) { in read_B_g2s_1D_nosync()
79 for (i = 0; i < Q * NB - P; i += P) { in read_B_g2s_1D_nosync()
93 template <typename T, int P, int Q, int NB>
94 static __device__ __inline__ void write_C_r2g_1D_nosync(const int tx, const int n, T rC[NB], T *dC)… in write_C_r2g_1D_nosync()
112 template <typename T, int P, int Q, int NB>
113 static __device__ __inline__ void sum_C_r2g_1D_nosync(const int tx, const int n, T rC[NB], T *dC) { in sum_C_r2g_1D_nosync()
130 // C in registers -- one row per thread
132 template <typename T, int P, int Q, int NB>
133 static __device__ __inline__ void mul_rAsBrC_1D_nosync(T rA[Q], T *sB, T rC[NB]) { in mul_rAsBrC_1D_nosync()
134 T rB[Q]; in mul_rAsBrC_1D_nosync()
154 // C in registers -- one row per thread
156 template <typename T, int P, int Q, int NB>
157 static __device__ __inline__ void addmul_rAsBrC_1D_nosync(T rA[Q], T *sB, T rC[NB]) { in addmul_rAsBrC_1D_nosync()
158 T rB[Q]; in addmul_rAsBrC_1D_nosync()