xref: /libCEED/backends/hip-ref/ceed-hip-ref-qfunction-load.cpp (revision 0d0321e0e600f17fbb9528732fcb5c1d5c63fc0f)
1*0d0321e0SJeremy L Thompson // Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC.
2*0d0321e0SJeremy L Thompson // Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707.
3*0d0321e0SJeremy L Thompson // All Rights reserved. See files LICENSE and NOTICE for details.
4*0d0321e0SJeremy L Thompson //
5*0d0321e0SJeremy L Thompson // This file is part of CEED, a collection of benchmarks, miniapps, software
6*0d0321e0SJeremy L Thompson // libraries and APIs for efficient high-order finite element and spectral
7*0d0321e0SJeremy L Thompson // element discretizations for exascale applications. For more information and
8*0d0321e0SJeremy L Thompson // source code availability see http://github.com/ceed.
9*0d0321e0SJeremy L Thompson //
10*0d0321e0SJeremy L Thompson // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
11*0d0321e0SJeremy L Thompson // a collaborative effort of two U.S. Department of Energy organizations (Office
12*0d0321e0SJeremy L Thompson // of Science and the National Nuclear Security Administration) responsible for
13*0d0321e0SJeremy L Thompson // the planning and preparation of a capable exascale ecosystem, including
14*0d0321e0SJeremy L Thompson // software, applications, hardware, advanced system engineering and early
15*0d0321e0SJeremy L Thompson // testbed platforms, in support of the nation's exascale computing imperative.
16*0d0321e0SJeremy L Thompson 
17*0d0321e0SJeremy L Thompson #include <ceed/ceed.h>
18*0d0321e0SJeremy L Thompson #include <ceed/backend.h>
19*0d0321e0SJeremy L Thompson #include <iostream>
20*0d0321e0SJeremy L Thompson #include <sstream>
21*0d0321e0SJeremy L Thompson #include <string.h>
22*0d0321e0SJeremy L Thompson #include "ceed-hip-ref.h"
23*0d0321e0SJeremy L Thompson #include "../hip/ceed-hip-compile.h"
24*0d0321e0SJeremy L Thompson 
25*0d0321e0SJeremy L Thompson static const char *qReadWrite = QUOTE(
26*0d0321e0SJeremy L Thompson template <int SIZE>
27*0d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
28*0d0321e0SJeremy L Thompson // Read from quadrature points
29*0d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
30*0d0321e0SJeremy L Thompson inline __device__ void readQuads(const CeedInt quad, const CeedInt nquads, const CeedScalar* d_u, CeedScalar* r_u) {
31*0d0321e0SJeremy L Thompson   for(CeedInt comp = 0; comp < SIZE; ++comp) {
32*0d0321e0SJeremy L Thompson     r_u[comp] = d_u[quad + nquads * comp];
33*0d0321e0SJeremy L Thompson   }
34*0d0321e0SJeremy L Thompson }
35*0d0321e0SJeremy L Thompson 
36*0d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
37*0d0321e0SJeremy L Thompson // Write at quadrature points
38*0d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
39*0d0321e0SJeremy L Thompson template <int SIZE>
40*0d0321e0SJeremy L Thompson inline __device__ void writeQuads(const CeedInt quad, const CeedInt nquads, const CeedScalar* r_v, CeedScalar* d_v) {
41*0d0321e0SJeremy L Thompson   for(CeedInt comp = 0; comp < SIZE; ++comp) {
42*0d0321e0SJeremy L Thompson     d_v[quad + nquads * comp] = r_v[comp];
43*0d0321e0SJeremy L Thompson   }
44*0d0321e0SJeremy L Thompson }
45*0d0321e0SJeremy L Thompson );
46*0d0321e0SJeremy L Thompson 
47*0d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
48*0d0321e0SJeremy L Thompson // Build QFunction kernel
49*0d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
50*0d0321e0SJeremy L Thompson extern "C" int CeedHipBuildQFunction(CeedQFunction qf) {
51*0d0321e0SJeremy L Thompson   CeedInt ierr;
52*0d0321e0SJeremy L Thompson   using std::ostringstream;
53*0d0321e0SJeremy L Thompson   using std::string;
54*0d0321e0SJeremy L Thompson   CeedQFunction_Hip *data;
55*0d0321e0SJeremy L Thompson   ierr = CeedQFunctionGetData(qf, (void **)&data); CeedChkBackend(ierr);
56*0d0321e0SJeremy L Thompson   // QFunction is built
57*0d0321e0SJeremy L Thompson   if (!data->qFunctionSource)
58*0d0321e0SJeremy L Thompson     return CEED_ERROR_SUCCESS;
59*0d0321e0SJeremy L Thompson 
60*0d0321e0SJeremy L Thompson   // QFunction kernel generation
61*0d0321e0SJeremy L Thompson   CeedInt numinputfields, numoutputfields, size;
62*0d0321e0SJeremy L Thompson   CeedQFunctionField *qfinputfields, *qfoutputfields;
63*0d0321e0SJeremy L Thompson   ierr = CeedQFunctionGetFields(qf, &numinputfields, &qfinputfields, &numoutputfields, &qfoutputfields);
64*0d0321e0SJeremy L Thompson   CeedChkBackend(ierr);
65*0d0321e0SJeremy L Thompson 
66*0d0321e0SJeremy L Thompson   // Build strings for final kernel
67*0d0321e0SJeremy L Thompson   string qFunction(data->qFunctionSource);
68*0d0321e0SJeremy L Thompson   string qReadWriteS(qReadWrite);
69*0d0321e0SJeremy L Thompson   ostringstream code;
70*0d0321e0SJeremy L Thompson   string qFunctionName(data->qFunctionName);
71*0d0321e0SJeremy L Thompson   string kernelName;
72*0d0321e0SJeremy L Thompson   kernelName = "CeedKernel_Hip_ref_" + qFunctionName;
73*0d0321e0SJeremy L Thompson 
74*0d0321e0SJeremy L Thompson   // Defintions
75*0d0321e0SJeremy L Thompson   code << "\n#define CEED_QFUNCTION(name) inline __device__ int name\n";
76*0d0321e0SJeremy L Thompson   code << "#define CEED_QFUNCTION_HELPER inline __device__ __forceinline__\n";
77*0d0321e0SJeremy L Thompson   code << "#define CeedPragmaSIMD\n";
78*0d0321e0SJeremy L Thompson   code << "#define CEED_ERROR_SUCCESS 0\n";
79*0d0321e0SJeremy L Thompson   code << "#define CEED_Q_VLA 1\n\n";
80*0d0321e0SJeremy L Thompson   code << "typedef struct { const CeedScalar* inputs[16]; CeedScalar* outputs[16]; } Fields_Hip;\n";
81*0d0321e0SJeremy L Thompson   code << qReadWriteS;
82*0d0321e0SJeremy L Thompson   code << qFunction;
83*0d0321e0SJeremy L Thompson   code << "extern \"C\" __global__ void " << kernelName << "(void *ctx, CeedInt Q, Fields_Hip fields) {\n";
84*0d0321e0SJeremy L Thompson 
85*0d0321e0SJeremy L Thompson   // Inputs
86*0d0321e0SJeremy L Thompson   for (CeedInt i = 0; i < numinputfields; i++) {
87*0d0321e0SJeremy L Thompson     code << "// Input field "<<i<<"\n";
88*0d0321e0SJeremy L Thompson     ierr = CeedQFunctionFieldGetSize(qfinputfields[i], &size); CeedChkBackend(ierr);
89*0d0321e0SJeremy L Thompson     code << "  const CeedInt size_in_"<<i<<" = "<<size<<";\n";
90*0d0321e0SJeremy L Thompson     code << "  CeedScalar r_q"<<i<<"[size_in_"<<i<<"];\n";
91*0d0321e0SJeremy L Thompson   }
92*0d0321e0SJeremy L Thompson 
93*0d0321e0SJeremy L Thompson   // Outputs
94*0d0321e0SJeremy L Thompson   for (CeedInt i = 0; i < numoutputfields; i++) {
95*0d0321e0SJeremy L Thompson     code << "// Output field "<<i<<"\n";
96*0d0321e0SJeremy L Thompson     ierr = CeedQFunctionFieldGetSize(qfoutputfields[i], &size); CeedChkBackend(ierr);
97*0d0321e0SJeremy L Thompson     code << "  const CeedInt size_out_"<<i<<" = "<<size<<";\n";
98*0d0321e0SJeremy L Thompson     code << "  CeedScalar r_qq"<<i<<"[size_out_"<<i<<"];\n";
99*0d0321e0SJeremy L Thompson   }
100*0d0321e0SJeremy L Thompson 
101*0d0321e0SJeremy L Thompson   // Setup input/output arrays
102*0d0321e0SJeremy L Thompson   code << "  const CeedScalar* in["<<numinputfields<<"];\n";
103*0d0321e0SJeremy L Thompson   for (CeedInt i = 0; i < numinputfields; i++) {
104*0d0321e0SJeremy L Thompson     code << "    in["<<i<<"] = r_q"<<i<<";\n";
105*0d0321e0SJeremy L Thompson   }
106*0d0321e0SJeremy L Thompson   code << "  CeedScalar* out["<<numoutputfields<<"];\n";
107*0d0321e0SJeremy L Thompson   for (CeedInt i = 0; i < numoutputfields; i++) {
108*0d0321e0SJeremy L Thompson     code << "    out["<<i<<"] = r_qq"<<i<<";\n";
109*0d0321e0SJeremy L Thompson   }
110*0d0321e0SJeremy L Thompson 
111*0d0321e0SJeremy L Thompson   // Loop over quadrature points
112*0d0321e0SJeremy L Thompson   code << "  for (CeedInt q = blockIdx.x * blockDim.x + threadIdx.x; q < Q; q += blockDim.x * gridDim.x) {\n";
113*0d0321e0SJeremy L Thompson 
114*0d0321e0SJeremy L Thompson   // Load inputs
115*0d0321e0SJeremy L Thompson   for (CeedInt i = 0; i < numinputfields; i++) {
116*0d0321e0SJeremy L Thompson     code << "// Input field "<<i<<"\n";
117*0d0321e0SJeremy L Thompson     code << "  readQuads<size_in_"<<i<<">(q, Q, fields.inputs["<<i<<"], r_q"<<i<<");\n";
118*0d0321e0SJeremy L Thompson   }
119*0d0321e0SJeremy L Thompson   // QFunction
120*0d0321e0SJeremy L Thompson   code << "// QFunction\n";
121*0d0321e0SJeremy L Thompson   code << "    "<<qFunctionName<<"(ctx, 1, in, out);\n";
122*0d0321e0SJeremy L Thompson 
123*0d0321e0SJeremy L Thompson   // Write outputs
124*0d0321e0SJeremy L Thompson   for (CeedInt i = 0; i < numoutputfields; i++) {
125*0d0321e0SJeremy L Thompson     code << "// Output field "<<i<<"\n";
126*0d0321e0SJeremy L Thompson     code << "  writeQuads<size_out_"<<i<<">(q, Q, r_qq"<<i<<", fields.outputs["<<i<<"]);\n";
127*0d0321e0SJeremy L Thompson   }
128*0d0321e0SJeremy L Thompson   code << "  }\n";
129*0d0321e0SJeremy L Thompson   code << "}\n";
130*0d0321e0SJeremy L Thompson 
131*0d0321e0SJeremy L Thompson   // View kernel for debugging
132*0d0321e0SJeremy L Thompson   Ceed ceed;
133*0d0321e0SJeremy L Thompson   CeedQFunctionGetCeed(qf, &ceed);
134*0d0321e0SJeremy L Thompson   CeedDebug(ceed, code.str().c_str());
135*0d0321e0SJeremy L Thompson 
136*0d0321e0SJeremy L Thompson   // Compile kernel
137*0d0321e0SJeremy L Thompson   ierr = CeedCompileHip(ceed, code.str().c_str(), &data->module, 0);
138*0d0321e0SJeremy L Thompson   CeedChkBackend(ierr);
139*0d0321e0SJeremy L Thompson   ierr = CeedGetKernelHip(ceed, data->module, kernelName.c_str(), &data->qFunction);
140*0d0321e0SJeremy L Thompson   CeedChkBackend(ierr);
141*0d0321e0SJeremy L Thompson 
142*0d0321e0SJeremy L Thompson   // Cleanup
143*0d0321e0SJeremy L Thompson   ierr = CeedFree(&data->qFunctionSource); CeedChkBackend(ierr);
144*0d0321e0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
145*0d0321e0SJeremy L Thompson }
146*0d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
147