Lines Matching +full:- +full:t

1 // Copyright (c) 2017-2026, Lawrence Livermore National Security, LLC and other CEED contributors.
2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
4 // SPDX-License-Identifier: BSD-2-Clause
12 #include "magma-common-defs.h"
15 // read U or V of a 1D element into shared memory sU[][] or sV[][] -- for all components
18 template <typename T, int LENGTH, int NUM_COMP>
19 static __device__ __inline__ void read_1d(const T *devptr, const int compstride, T *sBuffer[NUM_COM… in read_1d()
28 // write V of a 1D element into global memory from sV[][] -- for all components
30 template <typename T, int LENGTH, int NUM_COMP>
31 static __device__ __inline__ void write_1d(T *sBuffer[NUM_COMP], T *devptr, const int compstride, c… in write_1d()
40 // sum into V of a 1D element into global memory from sV[][] -- for all components
42 template <typename T, int LENGTH, int NUM_COMP>
43 static __device__ __inline__ void sum_1d(T *sBuffer[NUM_COMP], T *devptr, const int compstride, con… in sum_1d()
52 // read U of a 2D element into registers rU[][][] -- for all components of a single dim
53 // dU is assumed to be offset by elem-stride and dim-stride
58 template <typename T, int P, int DIM_U, int NUM_COMP, int rU_SIZE, int i_DIM>
59 …c __device__ __inline__ void read_U_2d(const T *dU, const int compstride, T rU[DIM_U][NUM_COMP][rU… in read_U_2d()
61 // vec 0 : [u0, u1, u2, ... u_(P-1)] -- contiguous in memory in read_U_2d()
62 // vec 1 : [u0, u1, u2, ... u_(P-1)] -- contiguous in memory in read_U_2d()
64 // vec P-1: [u0, u1, u2, ... u_(P-1)] -- contiguous in memory in read_U_2d()
89 // read V of a 2D element into registers rV[][][] -- for all components of a single dim
90 // dV is assumed to be offset by elem-stride and dim-stride
94 template <typename T, int Q, int DIM_V, int NUM_COMP, int rV_SIZE, int i_DIM>
95 static __device__ __inline__ void read_V_2d(const T *dV, const int compstride, T rV[DIM_V][NUM_COMP… in read_V_2d()
106 // write V of a 2D element from registers rV[][][] to global memory -- for all components of a sin…
107 // dV is assumed to be offset by elem-stride and dim-stride
111 template <typename T, int Q, int DIM_V, int NUM_COMP, int rV_SIZE, int i_DIM>
112 static __device__ __inline__ void write_V_2d(T *dV, const int compstride, T rV[DIM_V][NUM_COMP][rV_… in write_V_2d()
123 // sum into V of a 2D element from registers rV[][][] to global memory -- for all components of a …
124 // dV is assumed to be offset by elem-stride and dim-stride
128 template <typename T, int Q, int DIM_V, int NUM_COMP, int rV_SIZE, int i_DIM>
129 static __device__ __inline__ void sum_V_2d(T *dV, const int compstride, T rV[DIM_V][NUM_COMP][rV_SI… in sum_V_2d()
140 // read U of a 3D element into registers rU[][][] -- for all components of a single dim
141 // dU is assumed to be offset by elem-stride and dim-stride
146 template <typename T, int P, int DIM_U, int NUM_COMP, int rU_SIZE, int i_DIM>
147 …c __device__ __inline__ void read_U_3d(const T *dU, const int compstride, T rU[DIM_U][NUM_COMP][rU… in read_U_3d()
149 // vec 0 : [u0, u1, u2, ... u_(P-1)] -- contiguous in memory in read_U_3d()
150 // vec 1 : [u0, u1, u2, ... u_(P-1)] -- contiguous in memory in read_U_3d()
152 // vec P^2-1: [u0, u1, u2, ... u_(P-1)] -- contiguous in memory in read_U_3d()
177 // read V of a 3D element into registers rV[][][] -- for all components of a single dim
178 // dV is assumed to be offset by elem-stride and dim-stride
182 template <typename T, int Q, int DIM_V, int NUM_COMP, int rV_SIZE, int i_DIM>
183 static __device__ __inline__ void read_V_3d(const T *dV, const int compstride, T rV[DIM_V][NUM_COMP… in read_V_3d()
194 // write V of a 3D element from registers rV[][][] to global memory -- for all components of a sin…
195 // dV is assumed to point directly to the element (i.e. already offset by elem-stride)
199 template <typename T, int Q, int DIM_V, int NUM_COMP, int rV_SIZE, int i_DIM>
200 static __device__ __inline__ void write_V_3d(T *dV, const int compstride, T rV[DIM_V][NUM_COMP][rV_… in write_V_3d()
211 // sum into V of a 3D element from registers rV[][][] to global memory -- for all components of a …
212 // dV is assumed to point directly to the element (i.e. already offset by elem-stride)
216 template <typename T, int Q, int DIM_V, int NUM_COMP, int rV_SIZE, int i_DIM>
217 static __device__ __inline__ void sum_V_3d(T *dV, const int compstride, T rV[DIM_V][NUM_COMP][rV_SI… in sum_V_3d()
228 // reads T (no-trans) into shared memory
229 // T is B x J
242 // reads T (trans) into shared memory
243 // T is J x B