Lines Matching refs:A
42 static inline int CeedTensorContract_Avx_Blocked(CeedTensorContract contract, CeedInt A, CeedInt B,… in CeedTensorContract_Avx_Blocked() argument
52 for (CeedInt a = 0; a < A; a++) { in CeedTensorContract_Avx_Blocked()
104 static inline int CeedTensorContract_Avx_Remainder(CeedTensorContract contract, CeedInt A, CeedInt … in CeedTensorContract_Avx_Remainder() argument
116 for (CeedInt a = 0; a < A; a++) { in CeedTensorContract_Avx_Remainder()
153 static inline int CeedTensorContract_Avx_Single(CeedTensorContract contract, CeedInt A, CeedInt B, … in CeedTensorContract_Avx_Single() argument
164 for (CeedInt a = 0; a < (A / AA) * AA; a += AA) { in CeedTensorContract_Avx_Single()
187 const CeedInt a = (A / AA) * AA; in CeedTensorContract_Avx_Single()
192 for (CeedInt aa = 0; aa < A - a; aa++) { in CeedTensorContract_Avx_Single()
200 for (CeedInt aa = 0; aa < A - a; aa++) { // unroll in CeedTensorContract_Avx_Single()
205 for (CeedInt aa = 0; aa < A - a; aa++) { in CeedTensorContract_Avx_Single()
210 const CeedInt A_break = A % AA ? (A / AA) * AA : (A / AA - 1) * AA; in CeedTensorContract_Avx_Single()
245 for (CeedInt a = A_break; a < A; a++) v[a * J + j] += tq * u[a * B + b]; in CeedTensorContract_Avx_Single()
254 static int CeedTensorContract_Avx_Blocked_4_8(CeedTensorContract contract, CeedInt A, CeedInt B, Ce… in CeedTensorContract_Avx_Blocked_4_8() argument
256 return CeedTensorContract_Avx_Blocked(contract, A, B, C, J, t, t_mode, add, u, v, 4, 8); in CeedTensorContract_Avx_Blocked_4_8()
258 static int CeedTensorContract_Avx_Remainder_8_8(CeedTensorContract contract, CeedInt A, CeedInt B, … in CeedTensorContract_Avx_Remainder_8_8() argument
260 return CeedTensorContract_Avx_Remainder(contract, A, B, C, J, t, t_mode, add, u, v, 8, 8); in CeedTensorContract_Avx_Remainder_8_8()
262 static int CeedTensorContract_Avx_Single_4_8(CeedTensorContract contract, CeedInt A, CeedInt B, Cee… in CeedTensorContract_Avx_Single_4_8() argument
264 return CeedTensorContract_Avx_Single(contract, A, B, C, J, t, t_mode, add, u, v, 4, 8); in CeedTensorContract_Avx_Single_4_8()
270 static int CeedTensorContractApply_Avx(CeedTensorContract contract, CeedInt A, CeedInt B, CeedInt C… in CeedTensorContractApply_Avx() argument
275 for (CeedInt q = 0; q < A * J * C; q++) v[q] = (CeedScalar)0.0; in CeedTensorContractApply_Avx()
280 CeedTensorContract_Avx_Single_4_8(contract, A, B, C, J, t, t_mode, true, u, v); in CeedTensorContractApply_Avx()
283 …if (C >= blk_size) CeedTensorContract_Avx_Blocked_4_8(contract, A, B, C, J, t, t_mode, true, u, v); in CeedTensorContractApply_Avx()
285 …if (C % blk_size) CeedTensorContract_Avx_Remainder_8_8(contract, A, B, C, J, t, t_mode, true, u, v… in CeedTensorContractApply_Avx()