1*9ba83ac0SJeremy L Thompson // Copyright (c) 2017-2026, Lawrence Livermore National Security, LLC and other CEED contributors.
2bd882c8aSJames Wright // All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
3bd882c8aSJames Wright //
4bd882c8aSJames Wright // SPDX-License-Identifier: BSD-2-Clause
5bd882c8aSJames Wright //
6bd882c8aSJames Wright // This file is part of CEED: http://github.com/ceed
7bd882c8aSJames Wright
8bd882c8aSJames Wright #include <ceed/backend.h>
9bd882c8aSJames Wright #include <ceed/ceed.h>
10bd882c8aSJames Wright #include <ceed/jit-tools.h>
11bd882c8aSJames Wright
12bd882c8aSJames Wright #include <iostream>
13bd882c8aSJames Wright #include <sstream>
14bd882c8aSJames Wright #include <string>
15bd882c8aSJames Wright #include <string_view>
16bd882c8aSJames Wright #include <sycl/sycl.hpp>
17bd882c8aSJames Wright #include <vector>
18bd882c8aSJames Wright
19bd882c8aSJames Wright #include "../sycl/ceed-sycl-compile.hpp"
20bd882c8aSJames Wright #include "ceed-sycl-ref.hpp"
21bd882c8aSJames Wright
22bd882c8aSJames Wright #define SUB_GROUP_SIZE_QF 16
23bd882c8aSJames Wright
24bd882c8aSJames Wright //------------------------------------------------------------------------------
25bd882c8aSJames Wright // Build QFunction kernel
26bd882c8aSJames Wright //
27bd882c8aSJames Wright // TODO: Refactor
28bd882c8aSJames Wright //------------------------------------------------------------------------------
CeedQFunctionBuildKernel_Sycl(CeedQFunction qf)29eb7e6cafSJeremy L Thompson extern "C" int CeedQFunctionBuildKernel_Sycl(CeedQFunction qf) {
30dd64fc84SJeremy L Thompson Ceed ceed;
31dd64fc84SJeremy L Thompson Ceed_Sycl *data;
32f8d308faSJed Brown const char *read_write_kernel_path, *read_write_kernel_source;
337d023984SJeremy L Thompson const char *qfunction_name, *qfunction_source;
34dd64fc84SJeremy L Thompson CeedInt num_input_fields, num_output_fields;
35dd64fc84SJeremy L Thompson CeedQFunctionField *input_fields, *output_fields;
36bd882c8aSJames Wright CeedQFunction_Sycl *impl;
37dd64fc84SJeremy L Thompson
38bd882c8aSJames Wright // QFunction is built
399bc66399SJeremy L Thompson CeedCallBackend(CeedQFunctionGetData(qf, (void **)&impl));
40bd882c8aSJames Wright if (impl->QFunction) return CEED_ERROR_SUCCESS;
41bd882c8aSJames Wright
426e536b99SJeremy L Thompson CeedCallBackend(CeedQFunctionGetCeed(qf, &ceed));
43bd882c8aSJames Wright CeedCallBackend(CeedGetData(ceed, &data));
44bd882c8aSJames Wright
45bd882c8aSJames Wright // QFunction kernel generation
46bd882c8aSJames Wright CeedCallBackend(CeedQFunctionGetFields(qf, &num_input_fields, &input_fields, &num_output_fields, &output_fields));
47bd882c8aSJames Wright
48bd882c8aSJames Wright std::vector<CeedInt> input_sizes(num_input_fields);
49bd882c8aSJames Wright CeedQFunctionField *input_i = input_fields;
50dd64fc84SJeremy L Thompson
51bd882c8aSJames Wright for (auto &size_i : input_sizes) {
52bd882c8aSJames Wright CeedCallBackend(CeedQFunctionFieldGetSize(*input_i, &size_i));
53bd882c8aSJames Wright ++input_i;
54bd882c8aSJames Wright }
55bd882c8aSJames Wright
56bd882c8aSJames Wright std::vector<CeedInt> output_sizes(num_output_fields);
57bd882c8aSJames Wright CeedQFunctionField *output_i = output_fields;
58dd64fc84SJeremy L Thompson
59bd882c8aSJames Wright for (auto &size_i : output_sizes) {
60bd882c8aSJames Wright CeedCallBackend(CeedQFunctionFieldGetSize(*output_i, &size_i));
61bd882c8aSJames Wright ++output_i;
62bd882c8aSJames Wright }
63bd882c8aSJames Wright
64bd882c8aSJames Wright CeedCallBackend(CeedQFunctionGetKernelName(qf, &qfunction_name));
65bd882c8aSJames Wright
6623d4529eSJeremy L Thompson CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading QFunction User Source -----\n");
67bd882c8aSJames Wright CeedCallBackend(CeedQFunctionLoadSourceToBuffer(qf, &qfunction_source));
6823d4529eSJeremy L Thompson CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading QFunction User Source Complete! -----\n");
69bd882c8aSJames Wright
70bd882c8aSJames Wright CeedCallBackend(CeedGetJitAbsolutePath(ceed, "ceed/jit-source/sycl/sycl-ref-qfunction.h", &read_write_kernel_path));
71bd882c8aSJames Wright
7223d4529eSJeremy L Thompson CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading QFunction Read/Write Kernel Source -----\n");
73f8608ea8SJed Brown {
7422070f95SJeremy L Thompson char *source;
7522070f95SJeremy L Thompson
7622070f95SJeremy L Thompson CeedCallBackend(CeedLoadSourceToBuffer(ceed, read_write_kernel_path, &source));
7722070f95SJeremy L Thompson read_write_kernel_source = source;
78f8608ea8SJed Brown }
7923d4529eSJeremy L Thompson CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading QFunction Read/Write Kernel Source Complete! -----\n");
80bd882c8aSJames Wright
81bd882c8aSJames Wright std::string_view qf_name_view(qfunction_name);
82bd882c8aSJames Wright std::string_view qf_source_view(qfunction_source);
83bd882c8aSJames Wright std::string_view rw_source_view(read_write_kernel_source);
84bd882c8aSJames Wright const std::string kernel_name = "CeedKernelSyclRefQFunction_" + std::string(qf_name_view);
85bd882c8aSJames Wright
86bd882c8aSJames Wright // Defintions
87bd882c8aSJames Wright std::ostringstream code;
88bd882c8aSJames Wright code << rw_source_view;
89bd882c8aSJames Wright code << qf_source_view;
90bd882c8aSJames Wright code << "\n";
91bd882c8aSJames Wright
92bd882c8aSJames Wright // Kernel function
93bd882c8aSJames Wright // Here we are fixing a lower sub-group size value to avoid register spills
94bd882c8aSJames Wright // This needs to be revisited if all qfunctions require this.
95bd882c8aSJames Wright code << "__attribute__((intel_reqd_sub_group_size(" << SUB_GROUP_SIZE_QF << "))) __kernel void " << kernel_name
96bd882c8aSJames Wright << "(__global void *ctx, CeedInt Q,\n";
97bd882c8aSJames Wright
98bd882c8aSJames Wright // OpenCL doesn't allow for structs with pointers.
99bd882c8aSJames Wright // We will need to pass all of the arguments individually.
100bd882c8aSJames Wright // Input parameters
101bd882c8aSJames Wright for (CeedInt i = 0; i < num_input_fields; ++i) {
102bd882c8aSJames Wright code << " "
103bd882c8aSJames Wright << "__global const CeedScalar *in_" << i << ",\n";
104bd882c8aSJames Wright }
105bd882c8aSJames Wright
106bd882c8aSJames Wright // Output parameters
107bd882c8aSJames Wright code << " "
108bd882c8aSJames Wright << "__global CeedScalar *out_0";
109bd882c8aSJames Wright for (CeedInt i = 1; i < num_output_fields; ++i) {
110bd882c8aSJames Wright code << "\n, "
111bd882c8aSJames Wright << "__global CeedScalar *out_" << i;
112bd882c8aSJames Wright }
113bd882c8aSJames Wright // Begin kernel function body
114bd882c8aSJames Wright code << ") {\n\n";
115bd882c8aSJames Wright
116bd882c8aSJames Wright // Inputs
117bd882c8aSJames Wright code << " // Input fields\n";
118bd882c8aSJames Wright for (CeedInt i = 0; i < num_input_fields; ++i) {
119bd882c8aSJames Wright code << " CeedScalar U_" << i << "[" << input_sizes[i] << "];\n";
120bd882c8aSJames Wright }
1219b443e3bSJeremy L Thompson code << " const CeedScalar *inputs[" << CeedIntMax(num_input_fields, 1) << "] = {U_0";
122bd882c8aSJames Wright for (CeedInt i = 1; i < num_input_fields; i++) {
123bd882c8aSJames Wright code << ", U_" << i << "\n";
124bd882c8aSJames Wright }
125bd882c8aSJames Wright code << "};\n\n";
126bd882c8aSJames Wright
127bd882c8aSJames Wright // Outputs
128bd882c8aSJames Wright code << " // Output fields\n";
129bd882c8aSJames Wright for (CeedInt i = 0; i < num_output_fields; i++) {
130bd882c8aSJames Wright code << " CeedScalar V_" << i << "[" << output_sizes[i] << "];\n";
131bd882c8aSJames Wright }
1329b443e3bSJeremy L Thompson code << " CeedScalar *outputs[" << CeedIntMax(num_output_fields, 1) << "] = {V_0";
133bd882c8aSJames Wright for (CeedInt i = 1; i < num_output_fields; i++) {
134bd882c8aSJames Wright code << ", V_" << i << "\n";
135bd882c8aSJames Wright }
136bd882c8aSJames Wright code << "};\n\n";
137bd882c8aSJames Wright
138bd882c8aSJames Wright code << " const CeedInt q = get_global_linear_id();\n\n";
139bd882c8aSJames Wright
140bd882c8aSJames Wright code << "if(q < Q){ \n\n";
141bd882c8aSJames Wright
142bd882c8aSJames Wright // Load inputs
143bd882c8aSJames Wright code << " // -- Load inputs\n";
144bd882c8aSJames Wright for (CeedInt i = 0; i < num_input_fields; i++) {
145bd882c8aSJames Wright code << " readQuads(" << input_sizes[i] << ", Q, q, "
146bd882c8aSJames Wright << "in_" << i << ", U_" << i << ");\n";
147bd882c8aSJames Wright }
148bd882c8aSJames Wright code << "\n";
149bd882c8aSJames Wright
150bd882c8aSJames Wright // QFunction
151bd882c8aSJames Wright code << " // -- Call QFunction\n";
152bd882c8aSJames Wright code << " " << qf_name_view << "(ctx, 1, inputs, outputs);\n\n";
153bd882c8aSJames Wright
154bd882c8aSJames Wright // Write outputs
155bd882c8aSJames Wright code << " // -- Write outputs\n";
156bd882c8aSJames Wright for (CeedInt i = 0; i < num_output_fields; i++) {
157bd882c8aSJames Wright code << " writeQuads(" << output_sizes[i] << ", Q, q, "
158bd882c8aSJames Wright << "V_" << i << ", out_" << i << ");\n";
159bd882c8aSJames Wright }
160bd882c8aSJames Wright code << "\n";
161bd882c8aSJames Wright
162bd882c8aSJames Wright // End kernel function body
163bd882c8aSJames Wright code << "}\n";
164bd882c8aSJames Wright code << "}\n";
165bd882c8aSJames Wright
166bd882c8aSJames Wright // View kernel for debugging
16723d4529eSJeremy L Thompson CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "Generated QFunction Kernels:\n");
168bd882c8aSJames Wright CeedDebug(ceed, code.str().c_str());
169bd882c8aSJames Wright
170bd882c8aSJames Wright // Compile kernel
171eb7e6cafSJeremy L Thompson CeedCallBackend(CeedBuildModule_Sycl(ceed, code.str(), &impl->sycl_module));
172eb7e6cafSJeremy L Thompson CeedCallBackend(CeedGetKernel_Sycl(ceed, impl->sycl_module, kernel_name, &impl->QFunction));
173bd882c8aSJames Wright
174bd882c8aSJames Wright // Cleanup
175bd882c8aSJames Wright CeedCallBackend(CeedFree(&qfunction_source));
176bd882c8aSJames Wright CeedCallBackend(CeedFree(&read_write_kernel_path));
177bd882c8aSJames Wright CeedCallBackend(CeedFree(&read_write_kernel_source));
1789bc66399SJeremy L Thompson CeedCallBackend(CeedDestroy(&ceed));
179bd882c8aSJames Wright return CEED_ERROR_SUCCESS;
180bd882c8aSJames Wright }
181ff1e7120SSebastian Grimberg
182bd882c8aSJames Wright //------------------------------------------------------------------------------
183