Lines Matching refs:CC

44 …        const CeedScalar *restrict u, CeedScalar *restrict v, const CeedInt JJ, const CeedInt CC) {  in CeedTensorContract_Avx_Blocked()  argument
55 for (CeedInt c = 0; c < (C / CC) * CC; c += CC) { in CeedTensorContract_Avx_Blocked()
56 rtype vv[JJ][CC / 4]; // Output tile to be held in registers in CeedTensorContract_Avx_Blocked()
58 … for (CeedInt cc = 0; cc < CC / 4; cc++) vv[jj][cc] = loadu(&v[(a * J + j + jj) * C + c + cc * 4]); in CeedTensorContract_Avx_Blocked()
63 for (CeedInt cc = 0; cc < CC / 4; cc++) { // unroll in CeedTensorContract_Avx_Blocked()
69 … for (CeedInt cc = 0; cc < CC / 4; cc++) storeu(&v[(a * J + j + jj) * C + c + cc * 4], vv[jj][cc]); in CeedTensorContract_Avx_Blocked()
77 for (CeedInt c = 0; c < (C / CC) * CC; c += CC) { in CeedTensorContract_Avx_Blocked()
78 rtype vv[JJ][CC / 4]; // Output tile to be held in registers in CeedTensorContract_Avx_Blocked()
81 … for (CeedInt cc = 0; cc < CC / 4; cc++) vv[jj][cc] = loadu(&v[(a * J + j + jj) * C + c + cc * 4]); in CeedTensorContract_Avx_Blocked()
87 for (CeedInt cc = 0; cc < CC / 4; cc++) { // unroll in CeedTensorContract_Avx_Blocked()
93 … for (CeedInt cc = 0; cc < CC / 4; cc++) storeu(&v[(a * J + j + jj) * C + c + cc * 4], vv[jj][cc]); in CeedTensorContract_Avx_Blocked()
106 … const CeedScalar *restrict u, CeedScalar *restrict v, const CeedInt JJ, const CeedInt CC) { in CeedTensorContract_Avx_Remainder() argument
118 for (CeedInt c = (C / CC) * CC; c < C; c += 4) { in CeedTensorContract_Avx_Remainder()
143 … for (CeedInt c = (C / CC) * CC; c < C; c++) v[(a * J + j) * C + c] += tq * u[(a * B + b) * C + c]; in CeedTensorContract_Avx_Remainder()