Lines Matching refs:j

54     for (CeedInt j = 0; j < (J / JJ) * JJ; j += JJ) {  in CeedTensorContract_Avx_Blocked()  local
58 … for (CeedInt cc = 0; cc < CC / 4; cc++) vv[jj][cc] = loadu(&v[(a * J + j + jj) * C + c + cc * 4]); in CeedTensorContract_Avx_Blocked()
62 rtype tqv = set1(t[(j + jj) * t_stride_0 + b * t_stride_1]); in CeedTensorContract_Avx_Blocked()
69 … for (CeedInt cc = 0; cc < CC / 4; cc++) storeu(&v[(a * J + j + jj) * C + c + cc * 4], vv[jj][cc]); in CeedTensorContract_Avx_Blocked()
74 const CeedInt j = (J / JJ) * JJ; in CeedTensorContract_Avx_Blocked() local
76 if (j < J) { in CeedTensorContract_Avx_Blocked()
80 for (CeedInt jj = 0; jj < J - j; jj++) { in CeedTensorContract_Avx_Blocked()
81 … for (CeedInt cc = 0; cc < CC / 4; cc++) vv[jj][cc] = loadu(&v[(a * J + j + jj) * C + c + cc * 4]); in CeedTensorContract_Avx_Blocked()
84 for (CeedInt jj = 0; jj < J - j; jj++) { // doesn't unroll in CeedTensorContract_Avx_Blocked()
85 rtype tqv = set1(t[(j + jj) * t_stride_0 + b * t_stride_1]); in CeedTensorContract_Avx_Blocked()
92 for (CeedInt jj = 0; jj < J - j; jj++) { in CeedTensorContract_Avx_Blocked()
93 … for (CeedInt cc = 0; cc < CC / 4; cc++) storeu(&v[(a * J + j + jj) * C + c + cc * 4], vv[jj][cc]); in CeedTensorContract_Avx_Blocked()
120 for (CeedInt j = 0; j < J_break; j += JJ) { in CeedTensorContract_Avx_Remainder() local
123 for (CeedInt jj = 0; jj < JJ; jj++) vv[jj] = loadu(&v[(a * J + j + jj) * C + c]); in CeedTensorContract_Avx_Remainder()
132 fmadd(vv[jj], tqu, set1(t[(j + jj) * t_stride_0 + b * t_stride_1])); in CeedTensorContract_Avx_Remainder()
135 for (CeedInt jj = 0; jj < JJ; jj++) storeu(&v[(a * J + j + jj) * C + c], vv[jj]); in CeedTensorContract_Avx_Remainder()
139 for (CeedInt j = J_break; j < J; j++) { in CeedTensorContract_Avx_Remainder() local
141 const CeedScalar tq = t[j * t_stride_0 + b * t_stride_1]; in CeedTensorContract_Avx_Remainder()
143 … for (CeedInt c = (C / CC) * CC; c < C; c++) v[(a * J + j) * C + c] += tq * u[(a * B + b) * C + c]; in CeedTensorContract_Avx_Remainder()
165 for (CeedInt j = 0; j < (J / JJ) * JJ; j += JJ) { in CeedTensorContract_Avx_Single() local
169 for (CeedInt jj = 0; jj < JJ / 4; jj++) vv[aa][jj] = loadu(&v[(a + aa) * J + j + jj * 4]); in CeedTensorContract_Avx_Single()
173 …rtype tqv = set(t[(j + jj * 4 + 3) * t_stride_0 + b * t_stride_1], t[(j + jj * 4 + 2) * t_stride_0… in CeedTensorContract_Avx_Single()
174 …t[(j + jj * 4 + 1) * t_stride_0 + b * t_stride_1], t[(j + jj * 4 + 0) * t_stride_0 + b * t_stride_… in CeedTensorContract_Avx_Single()
182 for (CeedInt jj = 0; jj < JJ / 4; jj++) storeu(&v[(a + aa) * J + j + jj * 4], vv[aa][jj]); in CeedTensorContract_Avx_Single()
189 for (CeedInt j = 0; j < (J / JJ) * JJ; j += JJ) { in CeedTensorContract_Avx_Single() local
193 for (CeedInt jj = 0; jj < JJ / 4; jj++) vv[aa][jj] = loadu(&v[(a + aa) * J + j + jj * 4]); in CeedTensorContract_Avx_Single()
197 …rtype tqv = set(t[(j + jj * 4 + 3) * t_stride_0 + b * t_stride_1], t[(j + jj * 4 + 2) * t_stride_0… in CeedTensorContract_Avx_Single()
198 …t[(j + jj * 4 + 1) * t_stride_0 + b * t_stride_1], t[(j + jj * 4 + 0) * t_stride_0 + b * t_stride_… in CeedTensorContract_Avx_Single()
206 for (CeedInt jj = 0; jj < JJ / 4; jj++) storeu(&v[(a + aa) * J + j + jj * 4], vv[aa][jj]); in CeedTensorContract_Avx_Single()
213 for (CeedInt j = (J / JJ) * JJ; j < J; j += 4) { in CeedTensorContract_Avx_Single() local
218 for (CeedInt aa = 0; aa < AA; aa++) vv[aa] = loadu(&v[(a + aa) * J + j]); in CeedTensorContract_Avx_Single()
222 if (J - j == 1) { in CeedTensorContract_Avx_Single()
223 tqv = set(0.0, 0.0, 0.0, t[(j + 0) * t_stride_0 + b * t_stride_1]); in CeedTensorContract_Avx_Single()
224 } else if (J - j == 2) { in CeedTensorContract_Avx_Single()
225 …tqv = set(0.0, 0.0, t[(j + 1) * t_stride_0 + b * t_stride_1], t[(j + 0) * t_stride_0 + b * t_strid… in CeedTensorContract_Avx_Single()
226 } else if (J - 3 == j) { in CeedTensorContract_Avx_Single()
228 …set(0.0, t[(j + 2) * t_stride_0 + b * t_stride_1], t[(j + 1) * t_stride_0 + b * t_stride_1], t[(j in CeedTensorContract_Avx_Single()
230 …tqv = set(t[(j + 3) * t_stride_0 + b * t_stride_1], t[(j + 2) * t_stride_0 + b * t_stride_1], t[(j in CeedTensorContract_Avx_Single()
231 t[(j + 0) * t_stride_0 + b * t_stride_1]); in CeedTensorContract_Avx_Single()
237 for (CeedInt aa = 0; aa < AA; aa++) storeu(&v[(a + aa) * J + j], vv[aa]); in CeedTensorContract_Avx_Single()
242 for (CeedInt j = (J / JJ) * JJ; j < J; j++) { in CeedTensorContract_Avx_Single() local
243 const CeedScalar tq = t[j * t_stride_0 + b * t_stride_1]; in CeedTensorContract_Avx_Single()
245 for (CeedInt a = A_break; a < A; a++) v[a * J + j] += tq * u[a * B + b]; in CeedTensorContract_Avx_Single()