Lines Matching refs:v

44 …        const CeedScalar *restrict u, CeedScalar *restrict v, const CeedInt JJ, const CeedInt CC) {  in CeedTensorContract_Avx_Blocked()  argument
58 … for (CeedInt cc = 0; cc < CC / 4; cc++) vv[jj][cc] = loadu(&v[(a * J + j + jj) * C + c + cc * 4]); in CeedTensorContract_Avx_Blocked()
69 … for (CeedInt cc = 0; cc < CC / 4; cc++) storeu(&v[(a * J + j + jj) * C + c + cc * 4], vv[jj][cc]); in CeedTensorContract_Avx_Blocked()
81 … for (CeedInt cc = 0; cc < CC / 4; cc++) vv[jj][cc] = loadu(&v[(a * J + j + jj) * C + c + cc * 4]); in CeedTensorContract_Avx_Blocked()
93 … for (CeedInt cc = 0; cc < CC / 4; cc++) storeu(&v[(a * J + j + jj) * C + c + cc * 4], vv[jj][cc]); in CeedTensorContract_Avx_Blocked()
106 … const CeedScalar *restrict u, CeedScalar *restrict v, const CeedInt JJ, const CeedInt CC) { in CeedTensorContract_Avx_Remainder() argument
123 for (CeedInt jj = 0; jj < JJ; jj++) vv[jj] = loadu(&v[(a * J + j + jj) * C + c]); in CeedTensorContract_Avx_Remainder()
135 for (CeedInt jj = 0; jj < JJ; jj++) storeu(&v[(a * J + j + jj) * C + c], vv[jj]); in CeedTensorContract_Avx_Remainder()
143 … for (CeedInt c = (C / CC) * CC; c < C; c++) v[(a * J + j) * C + c] += tq * u[(a * B + b) * C + c]; in CeedTensorContract_Avx_Remainder()
154 … CeedTransposeMode t_mode, const CeedInt add, const CeedScalar *restrict u, CeedScalar *restrict v, in CeedTensorContract_Avx_Single() argument
169 for (CeedInt jj = 0; jj < JJ / 4; jj++) vv[aa][jj] = loadu(&v[(a + aa) * J + j + jj * 4]); in CeedTensorContract_Avx_Single()
182 for (CeedInt jj = 0; jj < JJ / 4; jj++) storeu(&v[(a + aa) * J + j + jj * 4], vv[aa][jj]); in CeedTensorContract_Avx_Single()
193 for (CeedInt jj = 0; jj < JJ / 4; jj++) vv[aa][jj] = loadu(&v[(a + aa) * J + j + jj * 4]); in CeedTensorContract_Avx_Single()
206 for (CeedInt jj = 0; jj < JJ / 4; jj++) storeu(&v[(a + aa) * J + j + jj * 4], vv[aa][jj]); in CeedTensorContract_Avx_Single()
218 for (CeedInt aa = 0; aa < AA; aa++) vv[aa] = loadu(&v[(a + aa) * J + j]); in CeedTensorContract_Avx_Single()
237 for (CeedInt aa = 0; aa < AA; aa++) storeu(&v[(a + aa) * J + j], vv[aa]); in CeedTensorContract_Avx_Single()
245 for (CeedInt a = A_break; a < A; a++) v[a * J + j] += tq * u[a * B + b]; in CeedTensorContract_Avx_Single()
255 …eedTransposeMode t_mode, const CeedInt add, const CeedScalar *restrict u, CeedScalar *restrict v) { in CeedTensorContract_Avx_Blocked_4_8() argument
256 return CeedTensorContract_Avx_Blocked(contract, A, B, C, J, t, t_mode, add, u, v, 4, 8); in CeedTensorContract_Avx_Blocked_4_8()
259 …eedTransposeMode t_mode, const CeedInt add, const CeedScalar *restrict u, CeedScalar *restrict v) { in CeedTensorContract_Avx_Remainder_8_8() argument
260 return CeedTensorContract_Avx_Remainder(contract, A, B, C, J, t, t_mode, add, u, v, 8, 8); in CeedTensorContract_Avx_Remainder_8_8()
263 …eedTransposeMode t_mode, const CeedInt add, const CeedScalar *restrict u, CeedScalar *restrict v) { in CeedTensorContract_Avx_Single_4_8() argument
264 return CeedTensorContract_Avx_Single(contract, A, B, C, J, t, t_mode, add, u, v, 4, 8); in CeedTensorContract_Avx_Single_4_8()
271 …eedTransposeMode t_mode, const CeedInt add, const CeedScalar *restrict u, CeedScalar *restrict v) { in CeedTensorContractApply_Avx() argument
275 for (CeedInt q = 0; q < A * J * C; q++) v[q] = (CeedScalar)0.0; in CeedTensorContractApply_Avx()
280 CeedTensorContract_Avx_Single_4_8(contract, A, B, C, J, t, t_mode, true, u, v); in CeedTensorContractApply_Avx()
283 …if (C >= blk_size) CeedTensorContract_Avx_Blocked_4_8(contract, A, B, C, J, t, t_mode, true, u, v); in CeedTensorContractApply_Avx()
285 …f (C % blk_size) CeedTensorContract_Avx_Remainder_8_8(contract, A, B, C, J, t, t_mode, true, u, v); in CeedTensorContractApply_Avx()