Lines Matching +full:- +full:t

1 // Copyright (c) 2017-2026, Lawrence Livermore National Security, LLC and other CEED contributors.
2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
4 // SPDX-License-Identifier: BSD-2-Clause
39 //------------------------------------------------------------------------------
41 //------------------------------------------------------------------------------
43 … const CeedScalar *restrict t, CeedTransposeMode t_mode, const CeedInt add, in CeedTensorContract_Avx_Blocked() argument
62 rtype tqv = set1(t[(j + jj) * t_stride_0 + b * t_stride_1]); in CeedTensorContract_Avx_Blocked()
80 for (CeedInt jj = 0; jj < J - j; jj++) { in CeedTensorContract_Avx_Blocked()
84 for (CeedInt jj = 0; jj < J - j; jj++) { // doesn't unroll in CeedTensorContract_Avx_Blocked()
85 rtype tqv = set1(t[(j + jj) * t_stride_0 + b * t_stride_1]); in CeedTensorContract_Avx_Blocked()
92 for (CeedInt jj = 0; jj < J - j; jj++) { in CeedTensorContract_Avx_Blocked()
101 //------------------------------------------------------------------------------
103 //------------------------------------------------------------------------------
105 … const CeedScalar *restrict t, CeedTransposeMode t_mode, const CeedInt add, in CeedTensorContract_Avx_Remainder() argument
114 const CeedInt J_break = J % JJ ? (J / JJ) * JJ : (J / JJ - 1) * JJ; in CeedTensorContract_Avx_Remainder()
127 if (C - c == 1) tqu = set(0.0, 0.0, 0.0, u[(a * B + b) * C + c + 0]); in CeedTensorContract_Avx_Remainder()
128 … else if (C - c == 2) tqu = set(0.0, 0.0, u[(a * B + b) * C + c + 1], u[(a * B + b) * C + c + 0]); in CeedTensorContract_Avx_Remainder()
129 …else if (C - c == 3) tqu = set(0.0, u[(a * B + b) * C + c + 2], u[(a * B + b) * C + c + 1], u[(a *… in CeedTensorContract_Avx_Remainder()
132 fmadd(vv[jj], tqu, set1(t[(j + jj) * t_stride_0 + b * t_stride_1])); in CeedTensorContract_Avx_Remainder()
141 const CeedScalar tq = t[j * t_stride_0 + b * t_stride_1]; in CeedTensorContract_Avx_Remainder()
150 //------------------------------------------------------------------------------
152 //------------------------------------------------------------------------------
153 …dTensorContract contract, CeedInt A, CeedInt B, CeedInt C, CeedInt J, const CeedScalar *restrict t, in CeedTensorContract_Avx_Single() argument
173 …rtype tqv = set(t[(j + jj * 4 + 3) * t_stride_0 + b * t_stride_1], t[(j + jj * 4 + 2) * t_stride_0… in CeedTensorContract_Avx_Single()
174t[(j + jj * 4 + 1) * t_stride_0 + b * t_stride_1], t[(j + jj * 4 + 0) * t_stride_0 + b * t_stride_… in CeedTensorContract_Avx_Single()
192 for (CeedInt aa = 0; aa < A - a; aa++) { in CeedTensorContract_Avx_Single()
197 …rtype tqv = set(t[(j + jj * 4 + 3) * t_stride_0 + b * t_stride_1], t[(j + jj * 4 + 2) * t_stride_0… in CeedTensorContract_Avx_Single()
198t[(j + jj * 4 + 1) * t_stride_0 + b * t_stride_1], t[(j + jj * 4 + 0) * t_stride_0 + b * t_stride_… in CeedTensorContract_Avx_Single()
200 for (CeedInt aa = 0; aa < A - a; aa++) { // unroll in CeedTensorContract_Avx_Single()
205 for (CeedInt aa = 0; aa < A - a; aa++) { in CeedTensorContract_Avx_Single()
210 const CeedInt A_break = A % AA ? (A / AA) * AA : (A / AA - 1) * AA; in CeedTensorContract_Avx_Single()
222 if (J - j == 1) { in CeedTensorContract_Avx_Single()
223 tqv = set(0.0, 0.0, 0.0, t[(j + 0) * t_stride_0 + b * t_stride_1]); in CeedTensorContract_Avx_Single()
224 } else if (J - j == 2) { in CeedTensorContract_Avx_Single()
225 …tqv = set(0.0, 0.0, t[(j + 1) * t_stride_0 + b * t_stride_1], t[(j + 0) * t_stride_0 + b * t_strid… in CeedTensorContract_Avx_Single()
226 } else if (J - 3 == j) { in CeedTensorContract_Avx_Single()
228 …set(0.0, t[(j + 2) * t_stride_0 + b * t_stride_1], t[(j + 1) * t_stride_0 + b * t_stride_1], t[(j … in CeedTensorContract_Avx_Single()
230 …tqv = set(t[(j + 3) * t_stride_0 + b * t_stride_1], t[(j + 2) * t_stride_0 + b * t_stride_1], t[(j… in CeedTensorContract_Avx_Single()
231 t[(j + 0) * t_stride_0 + b * t_stride_1]); in CeedTensorContract_Avx_Single()
243 const CeedScalar tq = t[j * t_stride_0 + b * t_stride_1]; in CeedTensorContract_Avx_Single()
251 //------------------------------------------------------------------------------
252 // Tensor Contract - Common Sizes
253 //------------------------------------------------------------------------------
254 …dTensorContract contract, CeedInt A, CeedInt B, CeedInt C, CeedInt J, const CeedScalar *restrict t, in CeedTensorContract_Avx_Blocked_4_8() argument
256 return CeedTensorContract_Avx_Blocked(contract, A, B, C, J, t, t_mode, add, u, v, 4, 8); in CeedTensorContract_Avx_Blocked_4_8()
258 …dTensorContract contract, CeedInt A, CeedInt B, CeedInt C, CeedInt J, const CeedScalar *restrict t, in CeedTensorContract_Avx_Remainder_8_8() argument
260 return CeedTensorContract_Avx_Remainder(contract, A, B, C, J, t, t_mode, add, u, v, 8, 8); in CeedTensorContract_Avx_Remainder_8_8()
262 …dTensorContract contract, CeedInt A, CeedInt B, CeedInt C, CeedInt J, const CeedScalar *restrict t, in CeedTensorContract_Avx_Single_4_8() argument
264 return CeedTensorContract_Avx_Single(contract, A, B, C, J, t, t_mode, add, u, v, 4, 8); in CeedTensorContract_Avx_Single_4_8()
267 //------------------------------------------------------------------------------
269 //------------------------------------------------------------------------------
270 …dTensorContract contract, CeedInt A, CeedInt B, CeedInt C, CeedInt J, const CeedScalar *restrict t, in CeedTensorContractApply_Avx() argument
280 CeedTensorContract_Avx_Single_4_8(contract, A, B, C, J, t, t_mode, true, u, v); in CeedTensorContractApply_Avx()
283 …if (C >= blk_size) CeedTensorContract_Avx_Blocked_4_8(contract, A, B, C, J, t, t_mode, true, u, v); in CeedTensorContractApply_Avx()
285 …if (C % blk_size) CeedTensorContract_Avx_Remainder_8_8(contract, A, B, C, J, t, t_mode, true, u, v… in CeedTensorContractApply_Avx()
290 //------------------------------------------------------------------------------
292 //------------------------------------------------------------------------------
298 //------------------------------------------------------------------------------