Searched refs:loadu (Results 1 – 1 of 1) sorted by relevance
15 #define loadu _mm256_loadu_pd macro27 #define loadu _mm_loadu_ps macro58 … for (CeedInt cc = 0; cc < CC / 4; cc++) vv[jj][cc] = loadu(&v[(a * J + j + jj) * C + c + cc * 4]); in CeedTensorContract_Avx_Blocked()64 fmadd(vv[jj][cc], tqv, loadu(&u[(a * B + b) * C + c + cc * 4])); in CeedTensorContract_Avx_Blocked()81 … for (CeedInt cc = 0; cc < CC / 4; cc++) vv[jj][cc] = loadu(&v[(a * J + j + jj) * C + c + cc * 4]); in CeedTensorContract_Avx_Blocked()88 fmadd(vv[jj][cc], tqv, loadu(&u[(a * B + b) * C + c + cc * 4])); in CeedTensorContract_Avx_Blocked()123 for (CeedInt jj = 0; jj < JJ; jj++) vv[jj] = loadu(&v[(a * J + j + jj) * C + c]); in CeedTensorContract_Avx_Remainder()130 else tqu = loadu(&u[(a * B + b) * C + c]); in CeedTensorContract_Avx_Remainder()169 for (CeedInt jj = 0; jj < JJ / 4; jj++) vv[aa][jj] = loadu(&v[(a + aa) * J + j + jj * 4]); in CeedTensorContract_Avx_Single()193 for (CeedInt jj = 0; jj < JJ / 4; jj++) vv[aa][jj] = loadu(&v[(a + aa) * J + j + jj * 4]); in CeedTensorContract_Avx_Single()[all …]