Lines Matching refs:c
21 #define fmadd(c, a, b) (c) = _mm256_fmadd_pd((a), (b), (c)) argument
23 #define fmadd(c, a, b) (c) += _mm256_mul_pd((a), (b)) argument
33 #define fmadd(c, a, b) (c) = _mm_fmadd_ps((a), (b), (c)) argument
35 #define fmadd(c, a, b) (c) += _mm_mul_ps((a), (b)) argument
55 for (CeedInt c = 0; c < (C / CC) * CC; c += CC) { in CeedTensorContract_Avx_Blocked() local
58 … for (CeedInt cc = 0; cc < CC / 4; cc++) vv[jj][cc] = loadu(&v[(a * J + j + jj) * C + c + cc * 4]); in CeedTensorContract_Avx_Blocked()
64 fmadd(vv[jj][cc], tqv, loadu(&u[(a * B + b) * C + c + cc * 4])); in CeedTensorContract_Avx_Blocked()
69 … for (CeedInt cc = 0; cc < CC / 4; cc++) storeu(&v[(a * J + j + jj) * C + c + cc * 4], vv[jj][cc]); in CeedTensorContract_Avx_Blocked()
77 for (CeedInt c = 0; c < (C / CC) * CC; c += CC) { in CeedTensorContract_Avx_Blocked() local
81 … for (CeedInt cc = 0; cc < CC / 4; cc++) vv[jj][cc] = loadu(&v[(a * J + j + jj) * C + c + cc * 4]); in CeedTensorContract_Avx_Blocked()
88 fmadd(vv[jj][cc], tqv, loadu(&u[(a * B + b) * C + c + cc * 4])); in CeedTensorContract_Avx_Blocked()
93 … for (CeedInt cc = 0; cc < CC / 4; cc++) storeu(&v[(a * J + j + jj) * C + c + cc * 4], vv[jj][cc]); in CeedTensorContract_Avx_Blocked()
118 for (CeedInt c = (C / CC) * CC; c < C; c += 4) { in CeedTensorContract_Avx_Remainder() local
123 for (CeedInt jj = 0; jj < JJ; jj++) vv[jj] = loadu(&v[(a * J + j + jj) * C + c]); in CeedTensorContract_Avx_Remainder()
127 if (C - c == 1) tqu = set(0.0, 0.0, 0.0, u[(a * B + b) * C + c + 0]); in CeedTensorContract_Avx_Remainder()
128 … else if (C - c == 2) tqu = set(0.0, 0.0, u[(a * B + b) * C + c + 1], u[(a * B + b) * C + c + 0]); in CeedTensorContract_Avx_Remainder()
129 …else if (C - c == 3) tqu = set(0.0, u[(a * B + b) * C + c + 2], u[(a * B + b) * C + c + 1], u[(a *… in CeedTensorContract_Avx_Remainder()
130 else tqu = loadu(&u[(a * B + b) * C + c]); in CeedTensorContract_Avx_Remainder()
135 for (CeedInt jj = 0; jj < JJ; jj++) storeu(&v[(a * J + j + jj) * C + c], vv[jj]); in CeedTensorContract_Avx_Remainder()
143 … for (CeedInt c = (C / CC) * CC; c < C; c++) v[(a * J + j) * C + c] += tq * u[(a * B + b) * C + c]; in CeedTensorContract_Avx_Remainder() local