Lines Matching refs:b

21 #define fmadd(c, a, b) (c) = _mm256_fmadd_pd((a), (b), (c))  argument
23 #define fmadd(c, a, b) (c) += _mm256_mul_pd((a), (b)) argument
33 #define fmadd(c, a, b) (c) = _mm_fmadd_ps((a), (b), (c)) argument
35 #define fmadd(c, a, b) (c) += _mm_mul_ps((a), (b)) argument
60 for (CeedInt b = 0; b < B; b++) { in CeedTensorContract_Avx_Blocked() local
62 rtype tqv = set1(t[(j + jj) * t_stride_0 + b * t_stride_1]); in CeedTensorContract_Avx_Blocked()
64 fmadd(vv[jj][cc], tqv, loadu(&u[(a * B + b) * C + c + cc * 4])); in CeedTensorContract_Avx_Blocked()
83 for (CeedInt b = 0; b < B; b++) { in CeedTensorContract_Avx_Blocked() local
85 rtype tqv = set1(t[(j + jj) * t_stride_0 + b * t_stride_1]); in CeedTensorContract_Avx_Blocked()
88 fmadd(vv[jj][cc], tqv, loadu(&u[(a * B + b) * C + c + cc * 4])); in CeedTensorContract_Avx_Blocked()
124 for (CeedInt b = 0; b < B; b++) { in CeedTensorContract_Avx_Remainder() local
127 if (C - c == 1) tqu = set(0.0, 0.0, 0.0, u[(a * B + b) * C + c + 0]); in CeedTensorContract_Avx_Remainder()
128 … else if (C - c == 2) tqu = set(0.0, 0.0, u[(a * B + b) * C + c + 1], u[(a * B + b) * C + c + 0]); in CeedTensorContract_Avx_Remainder()
129 …else if (C - c == 3) tqu = set(0.0, u[(a * B + b) * C + c + 2], u[(a * B + b) * C + c + 1], u[(a *… in CeedTensorContract_Avx_Remainder()
130 else tqu = loadu(&u[(a * B + b) * C + c]); in CeedTensorContract_Avx_Remainder()
132 fmadd(vv[jj], tqu, set1(t[(j + jj) * t_stride_0 + b * t_stride_1])); in CeedTensorContract_Avx_Remainder()
140 for (CeedInt b = 0; b < B; b++) { in CeedTensorContract_Avx_Remainder() local
141 const CeedScalar tq = t[j * t_stride_0 + b * t_stride_1]; in CeedTensorContract_Avx_Remainder()
143 … for (CeedInt c = (C / CC) * CC; c < C; c++) v[(a * J + j) * C + c] += tq * u[(a * B + b) * C + c]; in CeedTensorContract_Avx_Remainder()
171 for (CeedInt b = 0; b < B; b++) { in CeedTensorContract_Avx_Single() local
173 …rtype tqv = set(t[(j + jj * 4 + 3) * t_stride_0 + b * t_stride_1], t[(j + jj * 4 + 2) * t_stride_0… in CeedTensorContract_Avx_Single()
174 …t[(j + jj * 4 + 1) * t_stride_0 + b * t_stride_1], t[(j + jj * 4 + 0) * t_stride_0 + b * t_stride_… in CeedTensorContract_Avx_Single()
177 fmadd(vv[aa][jj], tqv, set1(u[(a + aa) * B + b])); in CeedTensorContract_Avx_Single()
195 for (CeedInt b = 0; b < B; b++) { in CeedTensorContract_Avx_Single() local
197 …rtype tqv = set(t[(j + jj * 4 + 3) * t_stride_0 + b * t_stride_1], t[(j + jj * 4 + 2) * t_stride_0… in CeedTensorContract_Avx_Single()
198 …t[(j + jj * 4 + 1) * t_stride_0 + b * t_stride_1], t[(j + jj * 4 + 0) * t_stride_0 + b * t_stride_… in CeedTensorContract_Avx_Single()
201 fmadd(vv[aa][jj], tqv, set1(u[(a + aa) * B + b])); in CeedTensorContract_Avx_Single()
219 for (CeedInt b = 0; b < B; b++) { in CeedTensorContract_Avx_Single() local
223 tqv = set(0.0, 0.0, 0.0, t[(j + 0) * t_stride_0 + b * t_stride_1]); in CeedTensorContract_Avx_Single()
225 …tqv = set(0.0, 0.0, t[(j + 1) * t_stride_0 + b * t_stride_1], t[(j + 0) * t_stride_0 + b * t_strid… in CeedTensorContract_Avx_Single()
228 …set(0.0, t[(j + 2) * t_stride_0 + b * t_stride_1], t[(j + 1) * t_stride_0 + b * t_stride_1], t[(j … in CeedTensorContract_Avx_Single()
230 …qv = set(t[(j + 3) * t_stride_0 + b * t_stride_1], t[(j + 2) * t_stride_0 + b * t_stride_1], t[(j … in CeedTensorContract_Avx_Single()
231 t[(j + 0) * t_stride_0 + b * t_stride_1]); in CeedTensorContract_Avx_Single()
234 fmadd(vv[aa], tqv, set1(u[(a + aa) * B + b])); in CeedTensorContract_Avx_Single()
241 for (CeedInt b = 0; b < B; b++) { in CeedTensorContract_Avx_Single() local
243 const CeedScalar tq = t[j * t_stride_0 + b * t_stride_1]; in CeedTensorContract_Avx_Single()
245 for (CeedInt a = A_break; a < A; a++) v[a * J + j] += tq * u[a * B + b]; in CeedTensorContract_Avx_Single()