Lines Matching refs:AA
155 const CeedInt AA, const CeedInt JJ) { in CeedTensorContract_Avx_Single() argument
164 for (CeedInt a = 0; a < (A / AA) * AA; a += AA) { in CeedTensorContract_Avx_Single()
166 rtype vv[AA][JJ / 4]; // Output tile to be held in registers in CeedTensorContract_Avx_Single()
168 for (CeedInt aa = 0; aa < AA; aa++) { in CeedTensorContract_Avx_Single()
176 for (CeedInt aa = 0; aa < AA; aa++) { // unroll in CeedTensorContract_Avx_Single()
181 for (CeedInt aa = 0; aa < AA; aa++) { in CeedTensorContract_Avx_Single()
187 const CeedInt a = (A / AA) * AA; in CeedTensorContract_Avx_Single()
190 rtype vv[AA][JJ / 4]; // Output tile to be held in registers in CeedTensorContract_Avx_Single()
210 const CeedInt A_break = A % AA ? (A / AA) * AA : (A / AA - 1) * AA; in CeedTensorContract_Avx_Single()
215 for (CeedInt a = 0; a < A_break; a += AA) { in CeedTensorContract_Avx_Single()
216 rtype vv[AA]; // Output tile to be held in registers in CeedTensorContract_Avx_Single()
218 for (CeedInt aa = 0; aa < AA; aa++) vv[aa] = loadu(&v[(a + aa) * J + j]); in CeedTensorContract_Avx_Single()
233 for (CeedInt aa = 0; aa < AA; aa++) { // unroll in CeedTensorContract_Avx_Single()
237 for (CeedInt aa = 0; aa < AA; aa++) storeu(&v[(a + aa) * J + j], vv[aa]); in CeedTensorContract_Avx_Single()