Lines Matching refs:JJ

44 …        const CeedScalar *restrict u, CeedScalar *restrict v, const CeedInt JJ, const CeedInt CC) {  in CeedTensorContract_Avx_Blocked()  argument
54 for (CeedInt j = 0; j < (J / JJ) * JJ; j += JJ) { in CeedTensorContract_Avx_Blocked()
56 rtype vv[JJ][CC / 4]; // Output tile to be held in registers in CeedTensorContract_Avx_Blocked()
57 for (CeedInt jj = 0; jj < JJ; jj++) { in CeedTensorContract_Avx_Blocked()
61 for (CeedInt jj = 0; jj < JJ; jj++) { // unroll in CeedTensorContract_Avx_Blocked()
68 for (CeedInt jj = 0; jj < JJ; jj++) { in CeedTensorContract_Avx_Blocked()
74 const CeedInt j = (J / JJ) * JJ; in CeedTensorContract_Avx_Blocked()
78 rtype vv[JJ][CC / 4]; // Output tile to be held in registers in CeedTensorContract_Avx_Blocked()
106 … const CeedScalar *restrict u, CeedScalar *restrict v, const CeedInt JJ, const CeedInt CC) { in CeedTensorContract_Avx_Remainder() argument
114 const CeedInt J_break = J % JJ ? (J / JJ) * JJ : (J / JJ - 1) * JJ; in CeedTensorContract_Avx_Remainder()
120 for (CeedInt j = 0; j < J_break; j += JJ) { in CeedTensorContract_Avx_Remainder()
121 rtype vv[JJ]; // Output tile to be held in registers in CeedTensorContract_Avx_Remainder()
123 for (CeedInt jj = 0; jj < JJ; jj++) vv[jj] = loadu(&v[(a * J + j + jj) * C + c]); in CeedTensorContract_Avx_Remainder()
131 for (CeedInt jj = 0; jj < JJ; jj++) { // unroll in CeedTensorContract_Avx_Remainder()
135 for (CeedInt jj = 0; jj < JJ; jj++) storeu(&v[(a * J + j + jj) * C + c], vv[jj]); in CeedTensorContract_Avx_Remainder()
155 const CeedInt AA, const CeedInt JJ) { in CeedTensorContract_Avx_Single() argument
165 for (CeedInt j = 0; j < (J / JJ) * JJ; j += JJ) { in CeedTensorContract_Avx_Single()
166 rtype vv[AA][JJ / 4]; // Output tile to be held in registers in CeedTensorContract_Avx_Single()
169 for (CeedInt jj = 0; jj < JJ / 4; jj++) vv[aa][jj] = loadu(&v[(a + aa) * J + j + jj * 4]); in CeedTensorContract_Avx_Single()
172 for (CeedInt jj = 0; jj < JJ / 4; jj++) { // unroll in CeedTensorContract_Avx_Single()
182 for (CeedInt jj = 0; jj < JJ / 4; jj++) storeu(&v[(a + aa) * J + j + jj * 4], vv[aa][jj]); in CeedTensorContract_Avx_Single()
189 for (CeedInt j = 0; j < (J / JJ) * JJ; j += JJ) { in CeedTensorContract_Avx_Single()
190 rtype vv[AA][JJ / 4]; // Output tile to be held in registers in CeedTensorContract_Avx_Single()
193 for (CeedInt jj = 0; jj < JJ / 4; jj++) vv[aa][jj] = loadu(&v[(a + aa) * J + j + jj * 4]); in CeedTensorContract_Avx_Single()
196 for (CeedInt jj = 0; jj < JJ / 4; jj++) { // unroll in CeedTensorContract_Avx_Single()
206 for (CeedInt jj = 0; jj < JJ / 4; jj++) storeu(&v[(a + aa) * J + j + jj * 4], vv[aa][jj]); in CeedTensorContract_Avx_Single()
213 for (CeedInt j = (J / JJ) * JJ; j < J; j += 4) { in CeedTensorContract_Avx_Single()
242 for (CeedInt j = (J / JJ) * JJ; j < J; j++) { in CeedTensorContract_Avx_Single()