1*9b2a10adSJeremy L Thompson // Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC. 2*9b2a10adSJeremy L Thompson // Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707. 3*9b2a10adSJeremy L Thompson // All Rights reserved. See files LICENSE and NOTICE for details. 4*9b2a10adSJeremy L Thompson // 5*9b2a10adSJeremy L Thompson // This file is part of CEED, a collection of benchmarks, miniapps, software 6*9b2a10adSJeremy L Thompson // libraries and APIs for efficient high-order finite element and spectral 7*9b2a10adSJeremy L Thompson // element discretizations for exascale applications. For more information and 8*9b2a10adSJeremy L Thompson // source code availability see http://github.com/ceed. 9*9b2a10adSJeremy L Thompson // 10*9b2a10adSJeremy L Thompson // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC, 11*9b2a10adSJeremy L Thompson // a collaborative effort of two U.S. Department of Energy organizations (Office 12*9b2a10adSJeremy L Thompson // of Science and the National Nuclear Security Administration) responsible for 13*9b2a10adSJeremy L Thompson // the planning and preparation of a capable exascale ecosystem, including 14*9b2a10adSJeremy L Thompson // software, applications, hardware, advanced system engineering and early 15*9b2a10adSJeremy L Thompson // testbed platforms, in support of the nation's exascale computing imperative. 16*9b2a10adSJeremy L Thompson 17*9b2a10adSJeremy L Thompson #include <ceed/ceed.h> 18*9b2a10adSJeremy L Thompson #include <ceed/backend.h> 19*9b2a10adSJeremy L Thompson #include "ceed-opt.h" 20*9b2a10adSJeremy L Thompson 21*9b2a10adSJeremy L Thompson //------------------------------------------------------------------------------ 22*9b2a10adSJeremy L Thompson // Tensor Contract Core loop 23*9b2a10adSJeremy L Thompson //------------------------------------------------------------------------------ 24*9b2a10adSJeremy L Thompson static inline int CeedTensorContractApply_Core_Opt(CeedTensorContract contract, 25*9b2a10adSJeremy L Thompson CeedInt A, CeedInt B, CeedInt C, CeedInt J, const CeedScalar *restrict t, 26*9b2a10adSJeremy L Thompson CeedTransposeMode t_mode, const CeedInt add, const CeedScalar *restrict u, 27*9b2a10adSJeremy L Thompson CeedScalar *restrict v) { 28*9b2a10adSJeremy L Thompson CeedInt t_stride_0 = B, t_stride_1 = 1; 29*9b2a10adSJeremy L Thompson if (t_mode == CEED_TRANSPOSE) { 30*9b2a10adSJeremy L Thompson t_stride_0 = 1; t_stride_1 = J; 31*9b2a10adSJeremy L Thompson } 32*9b2a10adSJeremy L Thompson 33*9b2a10adSJeremy L Thompson for (CeedInt a=0; a<A; a++) 34*9b2a10adSJeremy L Thompson for (CeedInt b=0; b<B; b++) 35*9b2a10adSJeremy L Thompson for (CeedInt j=0; j<J; j++) { 36*9b2a10adSJeremy L Thompson CeedScalar tq = t[j*t_stride_0 + b*t_stride_1]; 37*9b2a10adSJeremy L Thompson for (CeedInt c=0; c<C; c++) 38*9b2a10adSJeremy L Thompson v[(a*J+j)*C+c] += tq * u[(a*B+b)*C+c]; 39*9b2a10adSJeremy L Thompson } 40*9b2a10adSJeremy L Thompson 41*9b2a10adSJeremy L Thompson return CEED_ERROR_SUCCESS; 42*9b2a10adSJeremy L Thompson } 43*9b2a10adSJeremy L Thompson 44*9b2a10adSJeremy L Thompson //------------------------------------------------------------------------------ 45*9b2a10adSJeremy L Thompson // Tensor Contract Apply 46*9b2a10adSJeremy L Thompson //------------------------------------------------------------------------------ 47*9b2a10adSJeremy L Thompson static int CeedTensorContractApply_Opt(CeedTensorContract contract, CeedInt A, 48*9b2a10adSJeremy L Thompson CeedInt B, CeedInt C, CeedInt J, 49*9b2a10adSJeremy L Thompson const CeedScalar *restrict t, 50*9b2a10adSJeremy L Thompson CeedTransposeMode t_mode, const CeedInt add, 51*9b2a10adSJeremy L Thompson const CeedScalar *restrict u, 52*9b2a10adSJeremy L Thompson CeedScalar *restrict v) { 53*9b2a10adSJeremy L Thompson if (!add) 54*9b2a10adSJeremy L Thompson for (CeedInt q=0; q<A*J*C; q++) 55*9b2a10adSJeremy L Thompson v[q] = (CeedScalar) 0.0; 56*9b2a10adSJeremy L Thompson 57*9b2a10adSJeremy L Thompson if (C == 1) 58*9b2a10adSJeremy L Thompson return CeedTensorContractApply_Core_Opt(contract, A, B, 1, J, t, t_mode, 59*9b2a10adSJeremy L Thompson add, u, v); 60*9b2a10adSJeremy L Thompson else 61*9b2a10adSJeremy L Thompson return CeedTensorContractApply_Core_Opt(contract, A, B, C, J, t, t_mode, 62*9b2a10adSJeremy L Thompson add, u, v); 63*9b2a10adSJeremy L Thompson 64*9b2a10adSJeremy L Thompson return CEED_ERROR_SUCCESS; 65*9b2a10adSJeremy L Thompson } 66*9b2a10adSJeremy L Thompson 67*9b2a10adSJeremy L Thompson //------------------------------------------------------------------------------ 68*9b2a10adSJeremy L Thompson // Tensor Contract Destroy 69*9b2a10adSJeremy L Thompson //------------------------------------------------------------------------------ 70*9b2a10adSJeremy L Thompson static int CeedTensorContractDestroy_Opt(CeedTensorContract contract) { 71*9b2a10adSJeremy L Thompson return CEED_ERROR_SUCCESS; 72*9b2a10adSJeremy L Thompson } 73*9b2a10adSJeremy L Thompson 74*9b2a10adSJeremy L Thompson //------------------------------------------------------------------------------ 75*9b2a10adSJeremy L Thompson // Tensor Contract Create 76*9b2a10adSJeremy L Thompson //------------------------------------------------------------------------------ 77*9b2a10adSJeremy L Thompson int CeedTensorContractCreate_Opt(CeedBasis basis, CeedTensorContract contract) { 78*9b2a10adSJeremy L Thompson int ierr; 79*9b2a10adSJeremy L Thompson Ceed ceed; 80*9b2a10adSJeremy L Thompson ierr = CeedTensorContractGetCeed(contract, &ceed); CeedChkBackend(ierr); 81*9b2a10adSJeremy L Thompson 82*9b2a10adSJeremy L Thompson ierr = CeedSetBackendFunction(ceed, "TensorContract", contract, "Apply", 83*9b2a10adSJeremy L Thompson CeedTensorContractApply_Opt); CeedChkBackend(ierr); 84*9b2a10adSJeremy L Thompson ierr = CeedSetBackendFunction(ceed, "TensorContract", contract, "Destroy", 85*9b2a10adSJeremy L Thompson CeedTensorContractDestroy_Opt); CeedChkBackend(ierr); 86*9b2a10adSJeremy L Thompson 87*9b2a10adSJeremy L Thompson return CEED_ERROR_SUCCESS; 88*9b2a10adSJeremy L Thompson } 89*9b2a10adSJeremy L Thompson //------------------------------------------------------------------------------ 90