xref: /libCEED/backends/cuda-ref/ceed-cuda-ref-qfunction-load.cpp (revision 7bfe0f0e497883534890a072c2a8b865352898b0)
1 // Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC.
2 // Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707.
3 // All Rights reserved. See files LICENSE and NOTICE for details.
4 //
5 // This file is part of CEED, a collection of benchmarks, miniapps, software
6 // libraries and APIs for efficient high-order finite element and spectral
7 // element discretizations for exascale applications. For more information and
8 // source code availability see http://github.com/ceed.
9 //
10 // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
11 // a collaborative effort of two U.S. Department of Energy organizations (Office
12 // of Science and the National Nuclear Security Administration) responsible for
13 // the planning and preparation of a capable exascale ecosystem, including
14 // software, applications, hardware, advanced system engineering and early
15 // testbed platforms, in support of the nation's exascale computing imperative.
16 
17 #include <ceed/ceed.h>
18 #include <ceed/backend.h>
19 #include <ceed/jit-tools.h>
20 #include <iostream>
21 #include <sstream>
22 #include <string.h>
23 #include "ceed-cuda-ref.h"
24 #include "../cuda/ceed-cuda-compile.h"
25 
26 //------------------------------------------------------------------------------
27 // Build QFunction kernel
28 //------------------------------------------------------------------------------
29 extern "C" int CeedCudaBuildQFunction(CeedQFunction qf) {
30   CeedInt ierr;
31   using std::ostringstream;
32   using std::string;
33   Ceed ceed;
34   CeedQFunctionGetCeed(qf, &ceed);
35   CeedQFunction_Cuda *data;
36   ierr = CeedQFunctionGetData(qf, (void **)&data); CeedChkBackend(ierr);
37 
38   // QFunction is built
39   if (data->QFunction)
40     return CEED_ERROR_SUCCESS;
41 
42   if (!data->qfunction_source)
43     // LCOV_EXCL_START
44     return CeedError(ceed, CEED_ERROR_BACKEND,
45                      "No QFunction source or CUfunction provided.");
46   // LCOV_EXCL_STOP
47 
48   // QFunction kernel generation
49   CeedInt num_input_fields, num_output_fields, size;
50   CeedQFunctionField *input_fields, *output_fields;
51   ierr = CeedQFunctionGetFields(qf, &num_input_fields, &input_fields,
52                                 &num_output_fields, &output_fields);
53   CeedChkBackend(ierr);
54 
55   // Build strings for final kernel
56   char *read_write_kernel_path, *read_write_kernel_source;
57   ierr = CeedPathConcatenate(ceed, __FILE__, "kernels/cuda-ref-qfunction.h",
58                              &read_write_kernel_path); CeedChkBackend(ierr);
59   CeedDebug256(ceed, 2, "----- Loading QFunction Read/Write Kernel Source -----\n");
60   ierr = CeedLoadSourceToBuffer(ceed, read_write_kernel_path, &read_write_kernel_source);
61   CeedChkBackend(ierr);
62   CeedDebug256(ceed, 2, "----- Loading QFunction Read/Write Kernel Source Complete! -----\n");
63   string qfunction_source(data->qfunction_source);
64   string qfunction_name(data->qfunction_name);
65   string read_write(read_write_kernel_source);
66   string kernel_name = "CeedKernel_Cuda_ref_" + qfunction_name;
67   ostringstream code;
68 
69   // Defintions
70   code << "\n#define CEED_QFUNCTION(name) inline __device__ int name\n";
71   code << "#define CEED_QFUNCTION_HELPER inline __device__\n";
72   code << "#define CeedPragmaSIMD\n";
73   code << "#define CEED_ERROR_SUCCESS 0\n";
74   code << "#define CEED_Q_VLA 1\n\n";
75   code << "typedef struct { const CeedScalar* inputs[16]; CeedScalar* outputs[16]; } Fields_Cuda;\n";
76   code << read_write;
77   code << qfunction_source;
78   code << "\n";
79   code << "extern \"C\" __global__ void " << kernel_name << "(void *ctx, CeedInt Q, Fields_Cuda fields) {\n";
80 
81   // Inputs
82   code << "  // Input fields\n";
83   for (CeedInt i = 0; i < num_input_fields; i++) {
84     ierr = CeedQFunctionFieldGetSize(input_fields[i], &size); CeedChkBackend(ierr);
85     code << "  const CeedInt size_input_" << i << " = " << size << ";\n";
86     code << "  CeedScalar input_" << i << "[size_input_" << i << "];\n";
87   }
88   code << "  const CeedScalar* inputs[" << num_input_fields << "];\n";
89   for (CeedInt i = 0; i < num_input_fields; i++) {
90     code << "  inputs[" << i << "] = input_" << i << ";\n";
91   }
92   code << "\n";
93 
94   // Outputs
95   code << "  // Output fields\n";
96   for (CeedInt i = 0; i < num_output_fields; i++) {
97     ierr = CeedQFunctionFieldGetSize(output_fields[i], &size); CeedChkBackend(ierr);
98     code << "  const CeedInt size_output_" << i << " = " << size << ";\n";
99     code << "  CeedScalar output_" << i << "[size_output_" << i << "];\n";
100   }
101   code << "  CeedScalar* outputs[" << num_output_fields << "];\n";
102   for (CeedInt i = 0; i < num_output_fields; i++) {
103     code << "  outputs[" << i << "] = output_" << i << ";\n";
104   }
105   code << "\n";
106 
107   // Loop over quadrature points
108   code << "  // Loop over quadrature points\n";
109   code << "  for (CeedInt q = blockIdx.x * blockDim.x + threadIdx.x; q < Q; q += blockDim.x * gridDim.x) {\n";
110 
111   // Load inputs
112   code << "    // -- Load inputs\n";
113   for (CeedInt i = 0; i < num_input_fields; i++) {
114     code << "    readQuads<size_input_" << i << ">(q, Q, fields.inputs[" << i << "], input_" << i << ");\n";
115   }
116   code << "\n";
117 
118   // QFunction
119   code << "    // -- Call QFunction\n";
120   code << "    " << qfunction_name << "(ctx, 1, inputs, outputs);\n\n";
121 
122   // Write outputs
123   code << "    // -- Write outputs\n";
124   for (CeedInt i = 0; i < num_output_fields; i++) {
125     code << "    writeQuads<size_output_" << i << ">(q, Q, output_" << i << ", fields.outputs[" << i << "]);\n";
126   }
127   code << "  }\n";
128   code << "}\n";
129 
130   // View kernel for debugging
131   CeedDebug256(ceed, 2, "Generated QFunction Kernels:\n");
132   CeedDebug(ceed, code.str().c_str());
133 
134   // Compile kernel
135   ierr = CeedCompileCuda(ceed, code.str().c_str(), &data->module, 0);
136   CeedChkBackend(ierr);
137   ierr = CeedGetKernelCuda(ceed, data->module, kernel_name.c_str(), &data->QFunction);
138   CeedChkBackend(ierr);
139 
140   // Cleanup
141   ierr = CeedFree(&data->qfunction_source); CeedChkBackend(ierr);
142   ierr = CeedFree(&read_write_kernel_path); CeedChkBackend(ierr);
143   ierr = CeedFree(&read_write_kernel_source); CeedChkBackend(ierr);
144 
145   return CEED_ERROR_SUCCESS;
146 }
147 //------------------------------------------------------------------------------
148