xref: /libCEED/backends/hip-ref/ceed-hip-ref-qfunction-load.cpp (revision 1f9221fe62e2635d1a8dba99f0dac116b51a901d)
1 // Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC.
2 // Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707.
3 // All Rights reserved. See files LICENSE and NOTICE for details.
4 //
5 // This file is part of CEED, a collection of benchmarks, miniapps, software
6 // libraries and APIs for efficient high-order finite element and spectral
7 // element discretizations for exascale applications. For more information and
8 // source code availability see http://github.com/ceed.
9 //
10 // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
11 // a collaborative effort of two U.S. Department of Energy organizations (Office
12 // of Science and the National Nuclear Security Administration) responsible for
13 // the planning and preparation of a capable exascale ecosystem, including
14 // software, applications, hardware, advanced system engineering and early
15 // testbed platforms, in support of the nation's exascale computing imperative.
16 
17 #include <ceed/ceed.h>
18 #include <ceed/backend.h>
19 #include <ceed/jit-tools.h>
20 #include <iostream>
21 #include <sstream>
22 #include <string.h>
23 #include "ceed-hip-ref.h"
24 #include "../hip/ceed-hip-compile.h"
25 
26 //------------------------------------------------------------------------------
27 // Build QFunction kernel
28 //------------------------------------------------------------------------------
29 extern "C" int CeedHipBuildQFunction(CeedQFunction qf) {
30   CeedInt ierr;
31   using std::ostringstream;
32   using std::string;
33   Ceed ceed;
34   CeedQFunctionGetCeed(qf, &ceed);
35   Ceed_Hip *ceed_Hip;
36   ierr = CeedGetData(ceed, &ceed_Hip); CeedChkBackend(ierr);
37   CeedQFunction_Hip *data;
38   ierr = CeedQFunctionGetData(qf, (void **)&data); CeedChkBackend(ierr);
39 
40   // QFunction is built
41   if (data->QFunction)
42     return CEED_ERROR_SUCCESS;
43 
44   if (!data->qfunction_source)
45     // LCOV_EXCL_START
46     return CeedError(ceed, CEED_ERROR_BACKEND,
47                      "No QFunction source or hipFunction_t provided.");
48   // LCOV_EXCL_STOP
49 
50   // QFunction kernel generation
51   CeedInt num_input_fields, num_output_fields, size;
52   CeedQFunctionField *input_fields, *output_fields;
53   ierr = CeedQFunctionGetFields(qf, &num_input_fields, &input_fields,
54                                 &num_output_fields, &output_fields);
55   CeedChkBackend(ierr);
56 
57   // Build strings for final kernel
58   char *read_write_kernel_path, *read_write_kernel_source;
59   ierr = CeedPathConcatenate(ceed, __FILE__, "kernels/hip-ref-qfunction.h",
60                              &read_write_kernel_path); CeedChkBackend(ierr);
61   CeedDebug256(ceed, 2, "----- Loading QFunction Read/Write Kernel Source -----\n");
62   ierr = CeedLoadSourceToBuffer(ceed, read_write_kernel_path, &read_write_kernel_source);
63   CeedChkBackend(ierr);
64   CeedDebug256(ceed, 2, "----- Loading QFunction Read/Write Kernel Source Complete! -----\n");
65   string qfunction_source(data->qfunction_source);
66   string qfunction_name(data->qfunction_name);
67   string read_write(read_write_kernel_source);
68   string kernel_name = "CeedKernel_Hip_ref_" + qfunction_name;
69   ostringstream code;
70 
71   // Defintions
72   code << "\n#define CEED_QFUNCTION(name) inline __device__ int name\n";
73   code << "#define CEED_QFUNCTION_HELPER inline __device__ __forceinline__\n";
74   code << "#define CeedPragmaSIMD\n";
75   code << "#define CEED_ERROR_SUCCESS 0\n";
76   code << "#define CEED_Q_VLA 1\n\n";
77   code << "typedef struct { const CeedScalar* inputs[16]; CeedScalar* outputs[16]; } Fields_Hip;\n";
78   code << read_write;
79   code << qfunction_source;
80   code << "\n";
81   code << "extern \"C\" __launch_bounds__(BLOCK_SIZE)\n";
82   code << "__global__ void " << kernel_name << "(void *ctx, CeedInt Q, Fields_Hip fields) {\n";
83 
84   // Inputs
85   code << "  // Input fields\n";
86   for (CeedInt i = 0; i < num_input_fields; i++) {
87     ierr = CeedQFunctionFieldGetSize(input_fields[i], &size); CeedChkBackend(ierr);
88     code << "  const CeedInt size_input_" << i << " = " << size << ";\n";
89     code << "  CeedScalar input_" << i << "[size_input_" << i << "];\n";
90   }
91   code << "  const CeedScalar* inputs[" << num_input_fields << "];\n";
92   for (CeedInt i = 0; i < num_input_fields; i++) {
93     code << "  inputs[" << i << "] = input_" << i << ";\n";
94   }
95   code << "\n";
96 
97   // Outputs
98   code << "  // Output fields\n";
99   for (CeedInt i = 0; i < num_output_fields; i++) {
100     ierr = CeedQFunctionFieldGetSize(output_fields[i], &size); CeedChkBackend(ierr);
101     code << "  const CeedInt size_output_" << i << " = " << size << ";\n";
102     code << "  CeedScalar output_" << i << "[size_output_" << i << "];\n";
103   }
104   code << "  CeedScalar* outputs[" << num_output_fields << "];\n";
105   for (CeedInt i = 0; i < num_output_fields; i++) {
106     code << "  outputs[" << i << "] = output_" << i << ";\n";
107   }
108   code << "\n";
109 
110   // Loop over quadrature points
111   code << "  // Loop over quadrature points\n";
112   code << "  for (CeedInt q = blockIdx.x * blockDim.x + threadIdx.x; q < Q; q += blockDim.x * gridDim.x) {\n";
113 
114   // Load inputs
115   code << "    // -- Load inputs\n";
116   for (CeedInt i = 0; i < num_input_fields; i++) {
117     code << "    readQuads<size_input_" << i << ">(q, Q, fields.inputs[" << i << "], input_" << i << ");\n";
118   }
119   code << "\n";
120 
121   // QFunction
122   code << "    // -- Call QFunction\n";
123   code << "    " << qfunction_name << "(ctx, 1, inputs, outputs);\n\n";
124 
125   // Write outputs
126   code << "    // -- Write outputs\n";
127   for (CeedInt i = 0; i < num_output_fields; i++) {
128     code << "    writeQuads<size_output_" << i << ">(q, Q, output_" << i << ", fields.outputs[" << i << "]);\n";
129   }
130   code << "  }\n";
131   code << "}\n";
132 
133   // View kernel for debugging
134   CeedDebug256(ceed, 2, "Generated QFunction Kernels:\n");
135   CeedDebug(ceed, code.str().c_str());
136 
137   // Compile kernel
138   ierr = CeedCompileHip(ceed, code.str().c_str(), &data->module,
139 		        1, "BLOCK_SIZE", ceed_Hip->opt_block_size);
140   CeedChkBackend(ierr);
141   ierr = CeedGetKernelHip(ceed, data->module, kernel_name.c_str(), &data->QFunction);
142   CeedChkBackend(ierr);
143 
144   // Cleanup
145   ierr = CeedFree(&data->qfunction_source); CeedChkBackend(ierr);
146   ierr = CeedFree(&read_write_kernel_path); CeedChkBackend(ierr);
147   ierr = CeedFree(&read_write_kernel_source); CeedChkBackend(ierr);
148 
149   return CEED_ERROR_SUCCESS;
150 }
151 //------------------------------------------------------------------------------
152