// Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC. // Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707. // All Rights reserved. See files LICENSE and NOTICE for details. // // This file is part of CEED, a collection of benchmarks, miniapps, software // libraries and APIs for efficient high-order finite element and spectral // element discretizations for exascale applications. For more information and // source code availability see http://github.com/ceed. // // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC, // a collaborative effort of two U.S. Department of Energy organizations (Office // of Science and the National Nuclear Security Administration) responsible for // the planning and preparation of a capable exascale ecosystem, including // software, applications, hardware, advanced system engineering and early // testbed platforms, in support of the nation's exascale computing imperative. #include "ceed-avx.h" // c += a * b #ifdef __FMA__ # define fmadd(c,a,b) (c) = _mm256_fmadd_pd((a), (b), (c)) #else # define fmadd(c,a,b) (c) += _mm256_mul_pd((a), (b)) #endif //------------------------------------------------------------------------------ // Blocked Tensor Contract //------------------------------------------------------------------------------ static inline int CeedTensorContract_Avx_Blocked(CeedTensorContract contract, CeedInt A, CeedInt B, CeedInt C, CeedInt J, const CeedScalar *restrict t, CeedTransposeMode tmode, const CeedInt Add, const CeedScalar *restrict u, CeedScalar *restrict v, const CeedInt JJ, const CeedInt CC) { CeedInt tstride0 = B, tstride1 = 1; if (tmode == CEED_TRANSPOSE) { tstride0 = 1; tstride1 = J; } for (CeedInt a=0; a= blksize) CeedTensorContract_Avx_Blocked_4_8(contract, A, B, C, J, t, tmode, true, u, v); // Remainder of columns if (C % blksize) CeedTensorContract_Avx_Remainder_8_8(contract, A, B, C, J, t, tmode, true, u, v); } return 0; } //------------------------------------------------------------------------------ // Tensor Contract Destroy //------------------------------------------------------------------------------ static int CeedTensorContractDestroy_Avx(CeedTensorContract contract) { return 0; } //------------------------------------------------------------------------------ // Tensor Contract Create //------------------------------------------------------------------------------ int CeedTensorContractCreate_Avx(CeedBasis basis, CeedTensorContract contract) { int ierr; Ceed ceed; ierr = CeedTensorContractGetCeed(contract, &ceed); CeedChk(ierr); ierr = CeedSetBackendFunction(ceed, "TensorContract", contract, "Apply", CeedTensorContractApply_Avx); CeedChk(ierr); ierr = CeedSetBackendFunction(ceed, "TensorContract", contract, "Destroy", CeedTensorContractDestroy_Avx); CeedChk(ierr); return 0; } //------------------------------------------------------------------------------