xref: /libCEED/backends/opt/ceed-opt-tensor.c (revision 9b2a10adca6d745eeaf97f6468bfab9f8937faaf)
1*9b2a10adSJeremy L Thompson // Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC.
2*9b2a10adSJeremy L Thompson // Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707.
3*9b2a10adSJeremy L Thompson // All Rights reserved. See files LICENSE and NOTICE for details.
4*9b2a10adSJeremy L Thompson //
5*9b2a10adSJeremy L Thompson // This file is part of CEED, a collection of benchmarks, miniapps, software
6*9b2a10adSJeremy L Thompson // libraries and APIs for efficient high-order finite element and spectral
7*9b2a10adSJeremy L Thompson // element discretizations for exascale applications. For more information and
8*9b2a10adSJeremy L Thompson // source code availability see http://github.com/ceed.
9*9b2a10adSJeremy L Thompson //
10*9b2a10adSJeremy L Thompson // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
11*9b2a10adSJeremy L Thompson // a collaborative effort of two U.S. Department of Energy organizations (Office
12*9b2a10adSJeremy L Thompson // of Science and the National Nuclear Security Administration) responsible for
13*9b2a10adSJeremy L Thompson // the planning and preparation of a capable exascale ecosystem, including
14*9b2a10adSJeremy L Thompson // software, applications, hardware, advanced system engineering and early
15*9b2a10adSJeremy L Thompson // testbed platforms, in support of the nation's exascale computing imperative.
16*9b2a10adSJeremy L Thompson 
17*9b2a10adSJeremy L Thompson #include <ceed/ceed.h>
18*9b2a10adSJeremy L Thompson #include <ceed/backend.h>
19*9b2a10adSJeremy L Thompson #include "ceed-opt.h"
20*9b2a10adSJeremy L Thompson 
21*9b2a10adSJeremy L Thompson //------------------------------------------------------------------------------
22*9b2a10adSJeremy L Thompson // Tensor Contract Core loop
23*9b2a10adSJeremy L Thompson //------------------------------------------------------------------------------
24*9b2a10adSJeremy L Thompson static inline int CeedTensorContractApply_Core_Opt(CeedTensorContract contract,
25*9b2a10adSJeremy L Thompson     CeedInt A, CeedInt B, CeedInt C, CeedInt J, const CeedScalar *restrict t,
26*9b2a10adSJeremy L Thompson     CeedTransposeMode t_mode, const CeedInt add, const CeedScalar *restrict u,
27*9b2a10adSJeremy L Thompson     CeedScalar *restrict v) {
28*9b2a10adSJeremy L Thompson   CeedInt t_stride_0 = B, t_stride_1 = 1;
29*9b2a10adSJeremy L Thompson   if (t_mode == CEED_TRANSPOSE) {
30*9b2a10adSJeremy L Thompson     t_stride_0 = 1; t_stride_1 = J;
31*9b2a10adSJeremy L Thompson   }
32*9b2a10adSJeremy L Thompson 
33*9b2a10adSJeremy L Thompson   for (CeedInt a=0; a<A; a++)
34*9b2a10adSJeremy L Thompson     for (CeedInt b=0; b<B; b++)
35*9b2a10adSJeremy L Thompson       for (CeedInt j=0; j<J; j++) {
36*9b2a10adSJeremy L Thompson         CeedScalar tq = t[j*t_stride_0 + b*t_stride_1];
37*9b2a10adSJeremy L Thompson         for (CeedInt c=0; c<C; c++)
38*9b2a10adSJeremy L Thompson           v[(a*J+j)*C+c] += tq * u[(a*B+b)*C+c];
39*9b2a10adSJeremy L Thompson       }
40*9b2a10adSJeremy L Thompson 
41*9b2a10adSJeremy L Thompson   return CEED_ERROR_SUCCESS;
42*9b2a10adSJeremy L Thompson }
43*9b2a10adSJeremy L Thompson 
44*9b2a10adSJeremy L Thompson //------------------------------------------------------------------------------
45*9b2a10adSJeremy L Thompson // Tensor Contract Apply
46*9b2a10adSJeremy L Thompson //------------------------------------------------------------------------------
47*9b2a10adSJeremy L Thompson static int CeedTensorContractApply_Opt(CeedTensorContract contract, CeedInt A,
48*9b2a10adSJeremy L Thompson                                        CeedInt B, CeedInt C, CeedInt J,
49*9b2a10adSJeremy L Thompson                                        const CeedScalar *restrict t,
50*9b2a10adSJeremy L Thompson                                        CeedTransposeMode t_mode, const CeedInt add,
51*9b2a10adSJeremy L Thompson                                        const CeedScalar *restrict u,
52*9b2a10adSJeremy L Thompson                                        CeedScalar *restrict v) {
53*9b2a10adSJeremy L Thompson   if (!add)
54*9b2a10adSJeremy L Thompson     for (CeedInt q=0; q<A*J*C; q++)
55*9b2a10adSJeremy L Thompson       v[q] = (CeedScalar) 0.0;
56*9b2a10adSJeremy L Thompson 
57*9b2a10adSJeremy L Thompson   if (C == 1)
58*9b2a10adSJeremy L Thompson     return CeedTensorContractApply_Core_Opt(contract, A, B, 1, J, t, t_mode,
59*9b2a10adSJeremy L Thompson                                             add, u, v);
60*9b2a10adSJeremy L Thompson   else
61*9b2a10adSJeremy L Thompson     return CeedTensorContractApply_Core_Opt(contract, A, B, C, J, t, t_mode,
62*9b2a10adSJeremy L Thompson                                             add, u, v);
63*9b2a10adSJeremy L Thompson 
64*9b2a10adSJeremy L Thompson   return CEED_ERROR_SUCCESS;
65*9b2a10adSJeremy L Thompson }
66*9b2a10adSJeremy L Thompson 
67*9b2a10adSJeremy L Thompson //------------------------------------------------------------------------------
68*9b2a10adSJeremy L Thompson // Tensor Contract Destroy
69*9b2a10adSJeremy L Thompson //------------------------------------------------------------------------------
70*9b2a10adSJeremy L Thompson static int CeedTensorContractDestroy_Opt(CeedTensorContract contract) {
71*9b2a10adSJeremy L Thompson   return CEED_ERROR_SUCCESS;
72*9b2a10adSJeremy L Thompson }
73*9b2a10adSJeremy L Thompson 
74*9b2a10adSJeremy L Thompson //------------------------------------------------------------------------------
75*9b2a10adSJeremy L Thompson // Tensor Contract Create
76*9b2a10adSJeremy L Thompson //------------------------------------------------------------------------------
77*9b2a10adSJeremy L Thompson int CeedTensorContractCreate_Opt(CeedBasis basis, CeedTensorContract contract) {
78*9b2a10adSJeremy L Thompson   int ierr;
79*9b2a10adSJeremy L Thompson   Ceed ceed;
80*9b2a10adSJeremy L Thompson   ierr = CeedTensorContractGetCeed(contract, &ceed); CeedChkBackend(ierr);
81*9b2a10adSJeremy L Thompson 
82*9b2a10adSJeremy L Thompson   ierr = CeedSetBackendFunction(ceed, "TensorContract", contract, "Apply",
83*9b2a10adSJeremy L Thompson                                 CeedTensorContractApply_Opt); CeedChkBackend(ierr);
84*9b2a10adSJeremy L Thompson   ierr = CeedSetBackendFunction(ceed, "TensorContract", contract, "Destroy",
85*9b2a10adSJeremy L Thompson                                 CeedTensorContractDestroy_Opt); CeedChkBackend(ierr);
86*9b2a10adSJeremy L Thompson 
87*9b2a10adSJeremy L Thompson   return CEED_ERROR_SUCCESS;
88*9b2a10adSJeremy L Thompson }
89*9b2a10adSJeremy L Thompson //------------------------------------------------------------------------------
90