xref: /libCEED/backends/hip-ref/ceed-hip-ref-qfunction-load.cpp (revision 86e1ed65013ccad5b26f17713749c9f7d6be2d31)
10d0321e0SJeremy L Thompson // Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC.
20d0321e0SJeremy L Thompson // Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707.
30d0321e0SJeremy L Thompson // All Rights reserved. See files LICENSE and NOTICE for details.
40d0321e0SJeremy L Thompson //
50d0321e0SJeremy L Thompson // This file is part of CEED, a collection of benchmarks, miniapps, software
60d0321e0SJeremy L Thompson // libraries and APIs for efficient high-order finite element and spectral
70d0321e0SJeremy L Thompson // element discretizations for exascale applications. For more information and
80d0321e0SJeremy L Thompson // source code availability see http://github.com/ceed.
90d0321e0SJeremy L Thompson //
100d0321e0SJeremy L Thompson // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
110d0321e0SJeremy L Thompson // a collaborative effort of two U.S. Department of Energy organizations (Office
120d0321e0SJeremy L Thompson // of Science and the National Nuclear Security Administration) responsible for
130d0321e0SJeremy L Thompson // the planning and preparation of a capable exascale ecosystem, including
140d0321e0SJeremy L Thompson // software, applications, hardware, advanced system engineering and early
150d0321e0SJeremy L Thompson // testbed platforms, in support of the nation's exascale computing imperative.
160d0321e0SJeremy L Thompson 
170d0321e0SJeremy L Thompson #include <ceed/ceed.h>
180d0321e0SJeremy L Thompson #include <ceed/backend.h>
19437930d1SJeremy L Thompson #include <ceed/jit-tools.h>
200d0321e0SJeremy L Thompson #include <iostream>
210d0321e0SJeremy L Thompson #include <sstream>
220d0321e0SJeremy L Thompson #include <string.h>
230d0321e0SJeremy L Thompson #include "ceed-hip-ref.h"
240d0321e0SJeremy L Thompson #include "../hip/ceed-hip-compile.h"
250d0321e0SJeremy L Thompson 
260d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
270d0321e0SJeremy L Thompson // Build QFunction kernel
280d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
290d0321e0SJeremy L Thompson extern "C" int CeedHipBuildQFunction(CeedQFunction qf) {
300d0321e0SJeremy L Thompson   CeedInt ierr;
310d0321e0SJeremy L Thompson   using std::ostringstream;
320d0321e0SJeremy L Thompson   using std::string;
33437930d1SJeremy L Thompson   Ceed ceed;
34437930d1SJeremy L Thompson   CeedQFunctionGetCeed(qf, &ceed);
35*86e1ed65Snbeams   Ceed_Hip *ceed_Hip;
36*86e1ed65Snbeams   ierr = CeedGetData(ceed, &ceed_Hip); CeedChkBackend(ierr);
370d0321e0SJeremy L Thompson   CeedQFunction_Hip *data;
380d0321e0SJeremy L Thompson   ierr = CeedQFunctionGetData(qf, (void **)&data); CeedChkBackend(ierr);
39437930d1SJeremy L Thompson 
400d0321e0SJeremy L Thompson   // QFunction is built
41437930d1SJeremy L Thompson   if (data->QFunction)
420d0321e0SJeremy L Thompson     return CEED_ERROR_SUCCESS;
430d0321e0SJeremy L Thompson 
44437930d1SJeremy L Thompson   if (!data->qfunction_source)
45437930d1SJeremy L Thompson     // LCOV_EXCL_START
46437930d1SJeremy L Thompson     return CeedError(ceed, CEED_ERROR_BACKEND,
47437930d1SJeremy L Thompson                      "No QFunction source or hipFunction_t provided.");
48437930d1SJeremy L Thompson   // LCOV_EXCL_STOP
49437930d1SJeremy L Thompson 
500d0321e0SJeremy L Thompson   // QFunction kernel generation
51437930d1SJeremy L Thompson   CeedInt num_input_fields, num_output_fields, size;
52437930d1SJeremy L Thompson   CeedQFunctionField *input_fields, *output_fields;
53437930d1SJeremy L Thompson   ierr = CeedQFunctionGetFields(qf, &num_input_fields, &input_fields,
54437930d1SJeremy L Thompson                                 &num_output_fields, &output_fields);
550d0321e0SJeremy L Thompson   CeedChkBackend(ierr);
560d0321e0SJeremy L Thompson 
570d0321e0SJeremy L Thompson   // Build strings for final kernel
58437930d1SJeremy L Thompson   char *read_write_kernel_path, *read_write_kernel_source;
59437930d1SJeremy L Thompson   ierr = CeedPathConcatenate(ceed, __FILE__, "kernels/hip-ref-qfunction.h",
60437930d1SJeremy L Thompson                              &read_write_kernel_path); CeedChkBackend(ierr);
6146dc0734SJeremy L Thompson   CeedDebug256(ceed, 2, "----- Loading QFunction Read/Write Kernel Source -----\n");
62437930d1SJeremy L Thompson   ierr = CeedLoadSourceToBuffer(ceed, read_write_kernel_path, &read_write_kernel_source);
63437930d1SJeremy L Thompson   CeedChkBackend(ierr);
6446dc0734SJeremy L Thompson   CeedDebug256(ceed, 2, "----- Loading QFunction Read/Write Kernel Source Complete! -----\n");
65437930d1SJeremy L Thompson   string qfunction_source(data->qfunction_source);
66437930d1SJeremy L Thompson   string qfunction_name(data->qfunction_name);
67437930d1SJeremy L Thompson   string read_write(read_write_kernel_source);
68437930d1SJeremy L Thompson   string kernel_name = "CeedKernel_Hip_ref_" + qfunction_name;
690d0321e0SJeremy L Thompson   ostringstream code;
700d0321e0SJeremy L Thompson 
710d0321e0SJeremy L Thompson   // Defintions
720d0321e0SJeremy L Thompson   code << "\n#define CEED_QFUNCTION(name) inline __device__ int name\n";
730d0321e0SJeremy L Thompson   code << "#define CEED_QFUNCTION_HELPER inline __device__ __forceinline__\n";
740d0321e0SJeremy L Thompson   code << "#define CeedPragmaSIMD\n";
750d0321e0SJeremy L Thompson   code << "#define CEED_ERROR_SUCCESS 0\n";
760d0321e0SJeremy L Thompson   code << "#define CEED_Q_VLA 1\n\n";
770d0321e0SJeremy L Thompson   code << "typedef struct { const CeedScalar* inputs[16]; CeedScalar* outputs[16]; } Fields_Hip;\n";
78437930d1SJeremy L Thompson   code << read_write;
79437930d1SJeremy L Thompson   code << qfunction_source;
8046dc0734SJeremy L Thompson   code << "\n";
81*86e1ed65Snbeams   code << "extern \"C\" __launch_bounds__(BLOCK_SIZE)\n";
82*86e1ed65Snbeams   code << "__global__ void " << kernel_name << "(void *ctx, CeedInt Q, Fields_Hip fields) {\n";
830d0321e0SJeremy L Thompson 
840d0321e0SJeremy L Thompson   // Inputs
8546dc0734SJeremy L Thompson   code << "  // Input fields\n";
86437930d1SJeremy L Thompson   for (CeedInt i = 0; i < num_input_fields; i++) {
87437930d1SJeremy L Thompson     ierr = CeedQFunctionFieldGetSize(input_fields[i], &size); CeedChkBackend(ierr);
8846dc0734SJeremy L Thompson     code << "  const CeedInt size_input_" << i << " = " << size << ";\n";
8946dc0734SJeremy L Thompson     code << "  CeedScalar input_" << i << "[size_input_" << i << "];\n";
9046dc0734SJeremy L Thompson   }
9146dc0734SJeremy L Thompson   code << "  const CeedScalar* inputs[" << num_input_fields << "];\n";
9246dc0734SJeremy L Thompson   for (CeedInt i = 0; i < num_input_fields; i++) {
9346dc0734SJeremy L Thompson     code << "  inputs[" << i << "] = input_" << i << ";\n";
940d0321e0SJeremy L Thompson   }
95437930d1SJeremy L Thompson   code << "\n";
960d0321e0SJeremy L Thompson 
970d0321e0SJeremy L Thompson   // Outputs
9846dc0734SJeremy L Thompson   code << "  // Output fields\n";
99437930d1SJeremy L Thompson   for (CeedInt i = 0; i < num_output_fields; i++) {
100437930d1SJeremy L Thompson     ierr = CeedQFunctionFieldGetSize(output_fields[i], &size); CeedChkBackend(ierr);
10146dc0734SJeremy L Thompson     code << "  const CeedInt size_output_" << i << " = " << size << ";\n";
10246dc0734SJeremy L Thompson     code << "  CeedScalar output_" << i << "[size_output_" << i << "];\n";
1030d0321e0SJeremy L Thompson   }
10446dc0734SJeremy L Thompson   code << "  CeedScalar* outputs[" << num_output_fields << "];\n";
105437930d1SJeremy L Thompson   for (CeedInt i = 0; i < num_output_fields; i++) {
10646dc0734SJeremy L Thompson     code << "  outputs[" << i << "] = output_" << i << ";\n";
1070d0321e0SJeremy L Thompson   }
108437930d1SJeremy L Thompson   code << "\n";
1090d0321e0SJeremy L Thompson 
1100d0321e0SJeremy L Thompson   // Loop over quadrature points
11146dc0734SJeremy L Thompson   code << "  // Loop over quadrature points\n";
1120d0321e0SJeremy L Thompson   code << "  for (CeedInt q = blockIdx.x * blockDim.x + threadIdx.x; q < Q; q += blockDim.x * gridDim.x) {\n";
1130d0321e0SJeremy L Thompson 
1140d0321e0SJeremy L Thompson   // Load inputs
11546dc0734SJeremy L Thompson   code << "    // -- Load inputs\n";
116437930d1SJeremy L Thompson   for (CeedInt i = 0; i < num_input_fields; i++) {
11746dc0734SJeremy L Thompson     code << "    readQuads<size_input_" << i << ">(q, Q, fields.inputs[" << i << "], input_" << i << ");\n";
1180d0321e0SJeremy L Thompson   }
11946dc0734SJeremy L Thompson   code << "\n";
12046dc0734SJeremy L Thompson 
1210d0321e0SJeremy L Thompson   // QFunction
12246dc0734SJeremy L Thompson   code << "    // -- Call QFunction\n";
12346dc0734SJeremy L Thompson   code << "    " << qfunction_name << "(ctx, 1, inputs, outputs);\n\n";
1240d0321e0SJeremy L Thompson 
1250d0321e0SJeremy L Thompson   // Write outputs
12646dc0734SJeremy L Thompson   code << "    // -- Write outputs\n";
127437930d1SJeremy L Thompson   for (CeedInt i = 0; i < num_output_fields; i++) {
12846dc0734SJeremy L Thompson     code << "    writeQuads<size_output_" << i << ">(q, Q, output_" << i << ", fields.outputs[" << i << "]);\n";
1290d0321e0SJeremy L Thompson   }
1300d0321e0SJeremy L Thompson   code << "  }\n";
1310d0321e0SJeremy L Thompson   code << "}\n";
1320d0321e0SJeremy L Thompson 
1330d0321e0SJeremy L Thompson   // View kernel for debugging
13446dc0734SJeremy L Thompson   CeedDebug256(ceed, 2, "Generated QFunction Kernels:\n");
1350d0321e0SJeremy L Thompson   CeedDebug(ceed, code.str().c_str());
1360d0321e0SJeremy L Thompson 
1370d0321e0SJeremy L Thompson   // Compile kernel
138*86e1ed65Snbeams   ierr = CeedCompileHip(ceed, code.str().c_str(), &data->module,
139*86e1ed65Snbeams 		        1, "BLOCK_SIZE", ceed_Hip->opt_block_size);
1400d0321e0SJeremy L Thompson   CeedChkBackend(ierr);
141437930d1SJeremy L Thompson   ierr = CeedGetKernelHip(ceed, data->module, kernel_name.c_str(), &data->QFunction);
1420d0321e0SJeremy L Thompson   CeedChkBackend(ierr);
1430d0321e0SJeremy L Thompson 
1440d0321e0SJeremy L Thompson   // Cleanup
145437930d1SJeremy L Thompson   ierr = CeedFree(&data->qfunction_source); CeedChkBackend(ierr);
146437930d1SJeremy L Thompson   ierr = CeedFree(&read_write_kernel_path); CeedChkBackend(ierr);
147437930d1SJeremy L Thompson   ierr = CeedFree(&read_write_kernel_source); CeedChkBackend(ierr);
148437930d1SJeremy L Thompson 
1490d0321e0SJeremy L Thompson   return CEED_ERROR_SUCCESS;
1500d0321e0SJeremy L Thompson }
1510d0321e0SJeremy L Thompson //------------------------------------------------------------------------------
152