Lines Matching full:cc
44 … const CeedScalar *restrict u, CeedScalar *restrict v, const CeedInt JJ, const CeedInt CC) { in CeedTensorContract_Avx_Blocked() argument
55 for (CeedInt c = 0; c < (C / CC) * CC; c += CC) { in CeedTensorContract_Avx_Blocked()
56 rtype vv[JJ][CC / 4]; // Output tile to be held in registers in CeedTensorContract_Avx_Blocked()
58 … for (CeedInt cc = 0; cc < CC / 4; cc++) vv[jj][cc] = loadu(&v[(a * J + j + jj) * C + c + cc * 4]); in CeedTensorContract_Avx_Blocked() local
63 for (CeedInt cc = 0; cc < CC / 4; cc++) { // unroll in CeedTensorContract_Avx_Blocked() local
64 fmadd(vv[jj][cc], tqv, loadu(&u[(a * B + b) * C + c + cc * 4])); in CeedTensorContract_Avx_Blocked()
69 … for (CeedInt cc = 0; cc < CC / 4; cc++) storeu(&v[(a * J + j + jj) * C + c + cc * 4], vv[jj][cc]); in CeedTensorContract_Avx_Blocked() local
77 for (CeedInt c = 0; c < (C / CC) * CC; c += CC) { in CeedTensorContract_Avx_Blocked()
78 rtype vv[JJ][CC / 4]; // Output tile to be held in registers in CeedTensorContract_Avx_Blocked()
81 … for (CeedInt cc = 0; cc < CC / 4; cc++) vv[jj][cc] = loadu(&v[(a * J + j + jj) * C + c + cc * 4]); in CeedTensorContract_Avx_Blocked() local
87 for (CeedInt cc = 0; cc < CC / 4; cc++) { // unroll in CeedTensorContract_Avx_Blocked() local
88 fmadd(vv[jj][cc], tqv, loadu(&u[(a * B + b) * C + c + cc * 4])); in CeedTensorContract_Avx_Blocked()
93 … for (CeedInt cc = 0; cc < CC / 4; cc++) storeu(&v[(a * J + j + jj) * C + c + cc * 4], vv[jj][cc]); in CeedTensorContract_Avx_Blocked() local
106 … const CeedScalar *restrict u, CeedScalar *restrict v, const CeedInt JJ, const CeedInt CC) { in CeedTensorContract_Avx_Remainder() argument
118 for (CeedInt c = (C / CC) * CC; c < C; c += 4) { in CeedTensorContract_Avx_Remainder()
143 … for (CeedInt c = (C / CC) * CC; c < C; c++) v[(a * J + j) * C + c] += tq * u[(a * B + b) * C + c]; in CeedTensorContract_Avx_Remainder()