1*9ba83ac0SJeremy L Thompson // Copyright (c) 2017-2026, Lawrence Livermore National Security, LLC and other CEED contributors. 2bd882c8aSJames Wright // All Rights Reserved. See the top-level LICENSE and NOTICE files for details. 3bd882c8aSJames Wright // 4bd882c8aSJames Wright // SPDX-License-Identifier: BSD-2-Clause 5bd882c8aSJames Wright // 6bd882c8aSJames Wright // This file is part of CEED: http://github.com/ceed 7bd882c8aSJames Wright 8bd882c8aSJames Wright #include <ceed/backend.h> 9bd882c8aSJames Wright #include <ceed/ceed.h> 10bd882c8aSJames Wright #include <ceed/jit-tools.h> 11bd882c8aSJames Wright 12bd882c8aSJames Wright #include <iostream> 13bd882c8aSJames Wright #include <sstream> 14bd882c8aSJames Wright #include <string> 15bd882c8aSJames Wright #include <string_view> 16bd882c8aSJames Wright #include <sycl/sycl.hpp> 17bd882c8aSJames Wright #include <vector> 18bd882c8aSJames Wright 19bd882c8aSJames Wright #include "../sycl/ceed-sycl-compile.hpp" 20bd882c8aSJames Wright #include "ceed-sycl-ref.hpp" 21bd882c8aSJames Wright 22bd882c8aSJames Wright #define SUB_GROUP_SIZE_QF 16 23bd882c8aSJames Wright 24bd882c8aSJames Wright //------------------------------------------------------------------------------ 25bd882c8aSJames Wright // Build QFunction kernel 26bd882c8aSJames Wright // 27bd882c8aSJames Wright // TODO: Refactor 28bd882c8aSJames Wright //------------------------------------------------------------------------------ 29eb7e6cafSJeremy L Thompson extern "C" int CeedQFunctionBuildKernel_Sycl(CeedQFunction qf) { 30dd64fc84SJeremy L Thompson Ceed ceed; 31dd64fc84SJeremy L Thompson Ceed_Sycl *data; 32f8d308faSJed Brown const char *read_write_kernel_path, *read_write_kernel_source; 337d023984SJeremy L Thompson const char *qfunction_name, *qfunction_source; 34dd64fc84SJeremy L Thompson CeedInt num_input_fields, num_output_fields; 35dd64fc84SJeremy L Thompson CeedQFunctionField *input_fields, *output_fields; 36bd882c8aSJames Wright CeedQFunction_Sycl *impl; 37dd64fc84SJeremy L Thompson 38bd882c8aSJames Wright // QFunction is built 399bc66399SJeremy L Thompson CeedCallBackend(CeedQFunctionGetData(qf, (void **)&impl)); 40bd882c8aSJames Wright if (impl->QFunction) return CEED_ERROR_SUCCESS; 41bd882c8aSJames Wright 426e536b99SJeremy L Thompson CeedCallBackend(CeedQFunctionGetCeed(qf, &ceed)); 43bd882c8aSJames Wright CeedCallBackend(CeedGetData(ceed, &data)); 44bd882c8aSJames Wright 45bd882c8aSJames Wright // QFunction kernel generation 46bd882c8aSJames Wright CeedCallBackend(CeedQFunctionGetFields(qf, &num_input_fields, &input_fields, &num_output_fields, &output_fields)); 47bd882c8aSJames Wright 48bd882c8aSJames Wright std::vector<CeedInt> input_sizes(num_input_fields); 49bd882c8aSJames Wright CeedQFunctionField *input_i = input_fields; 50dd64fc84SJeremy L Thompson 51bd882c8aSJames Wright for (auto &size_i : input_sizes) { 52bd882c8aSJames Wright CeedCallBackend(CeedQFunctionFieldGetSize(*input_i, &size_i)); 53bd882c8aSJames Wright ++input_i; 54bd882c8aSJames Wright } 55bd882c8aSJames Wright 56bd882c8aSJames Wright std::vector<CeedInt> output_sizes(num_output_fields); 57bd882c8aSJames Wright CeedQFunctionField *output_i = output_fields; 58dd64fc84SJeremy L Thompson 59bd882c8aSJames Wright for (auto &size_i : output_sizes) { 60bd882c8aSJames Wright CeedCallBackend(CeedQFunctionFieldGetSize(*output_i, &size_i)); 61bd882c8aSJames Wright ++output_i; 62bd882c8aSJames Wright } 63bd882c8aSJames Wright 64bd882c8aSJames Wright CeedCallBackend(CeedQFunctionGetKernelName(qf, &qfunction_name)); 65bd882c8aSJames Wright 6623d4529eSJeremy L Thompson CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading QFunction User Source -----\n"); 67bd882c8aSJames Wright CeedCallBackend(CeedQFunctionLoadSourceToBuffer(qf, &qfunction_source)); 6823d4529eSJeremy L Thompson CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading QFunction User Source Complete! -----\n"); 69bd882c8aSJames Wright 70bd882c8aSJames Wright CeedCallBackend(CeedGetJitAbsolutePath(ceed, "ceed/jit-source/sycl/sycl-ref-qfunction.h", &read_write_kernel_path)); 71bd882c8aSJames Wright 7223d4529eSJeremy L Thompson CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading QFunction Read/Write Kernel Source -----\n"); 73f8608ea8SJed Brown { 7422070f95SJeremy L Thompson char *source; 7522070f95SJeremy L Thompson 7622070f95SJeremy L Thompson CeedCallBackend(CeedLoadSourceToBuffer(ceed, read_write_kernel_path, &source)); 7722070f95SJeremy L Thompson read_write_kernel_source = source; 78f8608ea8SJed Brown } 7923d4529eSJeremy L Thompson CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "----- Loading QFunction Read/Write Kernel Source Complete! -----\n"); 80bd882c8aSJames Wright 81bd882c8aSJames Wright std::string_view qf_name_view(qfunction_name); 82bd882c8aSJames Wright std::string_view qf_source_view(qfunction_source); 83bd882c8aSJames Wright std::string_view rw_source_view(read_write_kernel_source); 84bd882c8aSJames Wright const std::string kernel_name = "CeedKernelSyclRefQFunction_" + std::string(qf_name_view); 85bd882c8aSJames Wright 86bd882c8aSJames Wright // Defintions 87bd882c8aSJames Wright std::ostringstream code; 88bd882c8aSJames Wright code << rw_source_view; 89bd882c8aSJames Wright code << qf_source_view; 90bd882c8aSJames Wright code << "\n"; 91bd882c8aSJames Wright 92bd882c8aSJames Wright // Kernel function 93bd882c8aSJames Wright // Here we are fixing a lower sub-group size value to avoid register spills 94bd882c8aSJames Wright // This needs to be revisited if all qfunctions require this. 95bd882c8aSJames Wright code << "__attribute__((intel_reqd_sub_group_size(" << SUB_GROUP_SIZE_QF << "))) __kernel void " << kernel_name 96bd882c8aSJames Wright << "(__global void *ctx, CeedInt Q,\n"; 97bd882c8aSJames Wright 98bd882c8aSJames Wright // OpenCL doesn't allow for structs with pointers. 99bd882c8aSJames Wright // We will need to pass all of the arguments individually. 100bd882c8aSJames Wright // Input parameters 101bd882c8aSJames Wright for (CeedInt i = 0; i < num_input_fields; ++i) { 102bd882c8aSJames Wright code << " " 103bd882c8aSJames Wright << "__global const CeedScalar *in_" << i << ",\n"; 104bd882c8aSJames Wright } 105bd882c8aSJames Wright 106bd882c8aSJames Wright // Output parameters 107bd882c8aSJames Wright code << " " 108bd882c8aSJames Wright << "__global CeedScalar *out_0"; 109bd882c8aSJames Wright for (CeedInt i = 1; i < num_output_fields; ++i) { 110bd882c8aSJames Wright code << "\n, " 111bd882c8aSJames Wright << "__global CeedScalar *out_" << i; 112bd882c8aSJames Wright } 113bd882c8aSJames Wright // Begin kernel function body 114bd882c8aSJames Wright code << ") {\n\n"; 115bd882c8aSJames Wright 116bd882c8aSJames Wright // Inputs 117bd882c8aSJames Wright code << " // Input fields\n"; 118bd882c8aSJames Wright for (CeedInt i = 0; i < num_input_fields; ++i) { 119bd882c8aSJames Wright code << " CeedScalar U_" << i << "[" << input_sizes[i] << "];\n"; 120bd882c8aSJames Wright } 1219b443e3bSJeremy L Thompson code << " const CeedScalar *inputs[" << CeedIntMax(num_input_fields, 1) << "] = {U_0"; 122bd882c8aSJames Wright for (CeedInt i = 1; i < num_input_fields; i++) { 123bd882c8aSJames Wright code << ", U_" << i << "\n"; 124bd882c8aSJames Wright } 125bd882c8aSJames Wright code << "};\n\n"; 126bd882c8aSJames Wright 127bd882c8aSJames Wright // Outputs 128bd882c8aSJames Wright code << " // Output fields\n"; 129bd882c8aSJames Wright for (CeedInt i = 0; i < num_output_fields; i++) { 130bd882c8aSJames Wright code << " CeedScalar V_" << i << "[" << output_sizes[i] << "];\n"; 131bd882c8aSJames Wright } 1329b443e3bSJeremy L Thompson code << " CeedScalar *outputs[" << CeedIntMax(num_output_fields, 1) << "] = {V_0"; 133bd882c8aSJames Wright for (CeedInt i = 1; i < num_output_fields; i++) { 134bd882c8aSJames Wright code << ", V_" << i << "\n"; 135bd882c8aSJames Wright } 136bd882c8aSJames Wright code << "};\n\n"; 137bd882c8aSJames Wright 138bd882c8aSJames Wright code << " const CeedInt q = get_global_linear_id();\n\n"; 139bd882c8aSJames Wright 140bd882c8aSJames Wright code << "if(q < Q){ \n\n"; 141bd882c8aSJames Wright 142bd882c8aSJames Wright // Load inputs 143bd882c8aSJames Wright code << " // -- Load inputs\n"; 144bd882c8aSJames Wright for (CeedInt i = 0; i < num_input_fields; i++) { 145bd882c8aSJames Wright code << " readQuads(" << input_sizes[i] << ", Q, q, " 146bd882c8aSJames Wright << "in_" << i << ", U_" << i << ");\n"; 147bd882c8aSJames Wright } 148bd882c8aSJames Wright code << "\n"; 149bd882c8aSJames Wright 150bd882c8aSJames Wright // QFunction 151bd882c8aSJames Wright code << " // -- Call QFunction\n"; 152bd882c8aSJames Wright code << " " << qf_name_view << "(ctx, 1, inputs, outputs);\n\n"; 153bd882c8aSJames Wright 154bd882c8aSJames Wright // Write outputs 155bd882c8aSJames Wright code << " // -- Write outputs\n"; 156bd882c8aSJames Wright for (CeedInt i = 0; i < num_output_fields; i++) { 157bd882c8aSJames Wright code << " writeQuads(" << output_sizes[i] << ", Q, q, " 158bd882c8aSJames Wright << "V_" << i << ", out_" << i << ");\n"; 159bd882c8aSJames Wright } 160bd882c8aSJames Wright code << "\n"; 161bd882c8aSJames Wright 162bd882c8aSJames Wright // End kernel function body 163bd882c8aSJames Wright code << "}\n"; 164bd882c8aSJames Wright code << "}\n"; 165bd882c8aSJames Wright 166bd882c8aSJames Wright // View kernel for debugging 16723d4529eSJeremy L Thompson CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "Generated QFunction Kernels:\n"); 168bd882c8aSJames Wright CeedDebug(ceed, code.str().c_str()); 169bd882c8aSJames Wright 170bd882c8aSJames Wright // Compile kernel 171eb7e6cafSJeremy L Thompson CeedCallBackend(CeedBuildModule_Sycl(ceed, code.str(), &impl->sycl_module)); 172eb7e6cafSJeremy L Thompson CeedCallBackend(CeedGetKernel_Sycl(ceed, impl->sycl_module, kernel_name, &impl->QFunction)); 173bd882c8aSJames Wright 174bd882c8aSJames Wright // Cleanup 175bd882c8aSJames Wright CeedCallBackend(CeedFree(&qfunction_source)); 176bd882c8aSJames Wright CeedCallBackend(CeedFree(&read_write_kernel_path)); 177bd882c8aSJames Wright CeedCallBackend(CeedFree(&read_write_kernel_source)); 1789bc66399SJeremy L Thompson CeedCallBackend(CeedDestroy(&ceed)); 179bd882c8aSJames Wright return CEED_ERROR_SUCCESS; 180bd882c8aSJames Wright } 181ff1e7120SSebastian Grimberg 182bd882c8aSJames Wright //------------------------------------------------------------------------------ 183