ceed-cuda-compile.cpp (2027fb9d13fe34211738d8539f90542a9801ae2c) ceed-cuda-compile.cpp (9b5f41c81b637db3e5453a22df59a4f47deed499)
1// Copyright (c) 2017-2025, Lawrence Livermore National Security, LLC and other CEED contributors.
2// All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
3//
4// SPDX-License-Identifier: BSD-2-Clause
5//
6// This file is part of CEED: http://github.com/ceed
7
8#include "ceed-cuda-compile.h"
9
10#include <ceed.h>
11#include <ceed/backend.h>
12#include <ceed/jit-tools.h>
13#include <cuda_runtime.h>
14#include <dirent.h>
15#include <nvrtc.h>
16#include <stdarg.h>
17#include <string.h>
1// Copyright (c) 2017-2025, Lawrence Livermore National Security, LLC and other CEED contributors.
2// All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
3//
4// SPDX-License-Identifier: BSD-2-Clause
5//
6// This file is part of CEED: http://github.com/ceed
7
8#include "ceed-cuda-compile.h"
9
10#include <ceed.h>
11#include <ceed/backend.h>
12#include <ceed/jit-tools.h>
13#include <cuda_runtime.h>
14#include <dirent.h>
15#include <nvrtc.h>
16#include <stdarg.h>
17#include <string.h>
18#include <sys/stat.h>
18#include <sys/types.h>
19
20#include <cstdlib>
21#include <fstream>
22#include <iostream>
23#include <sstream>
24#include <string>
25

--- 23 unchanged lines hidden (view full) ---

49 CeedCheck(output_stream != nullptr, ceed, CEED_ERROR_BACKEND, "Failed to %s with command: %s", message, command);
50
51 char output[4 * CEED_MAX_RESOURCE_LEN];
52
53 while (fgets(output, sizeof(output), output_stream) != nullptr) {
54 }
55 CeedDebug(ceed, "Command output:\n%s\n", output);
56
19#include <sys/types.h>
20
21#include <cstdlib>
22#include <fstream>
23#include <iostream>
24#include <sstream>
25#include <string>
26

--- 23 unchanged lines hidden (view full) ---

50 CeedCheck(output_stream != nullptr, ceed, CEED_ERROR_BACKEND, "Failed to %s with command: %s", message, command);
51
52 char output[4 * CEED_MAX_RESOURCE_LEN];
53
54 while (fgets(output, sizeof(output), output_stream) != nullptr) {
55 }
56 CeedDebug(ceed, "Command output:\n%s\n", output);
57
57 CeedCheck(pclose(output_stream) == 0, ceed, CEED_ERROR_BACKEND, "Failed to %s with error: %s", message, output);
58 CeedCheck(pclose(output_stream) == 0, ceed, CEED_ERROR_BACKEND, "Failed to %s with command: %s\nand error: %s", message, command, output);
58 return CEED_ERROR_SUCCESS;
59}
60
61//------------------------------------------------------------------------------
62// Compile CUDA kernel
63//------------------------------------------------------------------------------
64using std::ifstream;
65using std::ofstream;

--- 83 unchanged lines hidden (view full) ---

149 CeedCallBackend(CeedStringAllocCopy(define_arg.str().c_str(), (char **)&opts[num_opts + num_jit_source_dirs + i]));
150 }
151 CeedCallBackend(CeedRestoreJitDefines(ceed, &jit_defines));
152 }
153
154 // Add string source argument provided in call
155 code << source;
156
59 return CEED_ERROR_SUCCESS;
60}
61
62//------------------------------------------------------------------------------
63// Compile CUDA kernel
64//------------------------------------------------------------------------------
65using std::ifstream;
66using std::ofstream;

--- 83 unchanged lines hidden (view full) ---

150 CeedCallBackend(CeedStringAllocCopy(define_arg.str().c_str(), (char **)&opts[num_opts + num_jit_source_dirs + i]));
151 }
152 CeedCallBackend(CeedRestoreJitDefines(ceed, &jit_defines));
153 }
154
155 // Add string source argument provided in call
156 code << source;
157
157 // Create Program
158
159 // Compile kernel
160 CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "---------- ATTEMPTING TO COMPILE JIT SOURCE ----------\n");
161 CeedDebug(ceed, "Source:\n%s\n", code.str().c_str());
162 CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "---------- END OF JIT SOURCE ----------\n");
163
164 if (!using_clang) {
165 CeedCallNvrtc(ceed, nvrtcCreateProgram(&prog, code.str().c_str(), NULL, 0, NULL, NULL));
166

--- 49 unchanged lines hidden (view full) ---

216 CeedCallNvrtc(ceed, nvrtcGetPTX(prog, ptx));
217#endif
218 CeedCallNvrtc(ceed, nvrtcDestroyProgram(&prog));
219
220 CeedCallCuda(ceed, cuModuleLoadData(module, ptx));
221 CeedCallBackend(CeedFree(&ptx));
222 return CEED_ERROR_SUCCESS;
223 } else {
158 // Compile kernel
159 CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "---------- ATTEMPTING TO COMPILE JIT SOURCE ----------\n");
160 CeedDebug(ceed, "Source:\n%s\n", code.str().c_str());
161 CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "---------- END OF JIT SOURCE ----------\n");
162
163 if (!using_clang) {
164 CeedCallNvrtc(ceed, nvrtcCreateProgram(&prog, code.str().c_str(), NULL, 0, NULL, NULL));
165

--- 49 unchanged lines hidden (view full) ---

215 CeedCallNvrtc(ceed, nvrtcGetPTX(prog, ptx));
216#endif
217 CeedCallNvrtc(ceed, nvrtcDestroyProgram(&prog));
218
219 CeedCallCuda(ceed, cuModuleLoadData(module, ptx));
220 CeedCallBackend(CeedFree(&ptx));
221 return CEED_ERROR_SUCCESS;
222 } else {
224 const char *full_filename = "temp_kernel_source.cu";
225 FILE *file = fopen(full_filename, "w");
223 srand(time(NULL));
224 const int build_id = rand();
226
225
227 CeedCheck(file, ceed, CEED_ERROR_BACKEND, "Failed to create file. Write access is required for cuda-clang\n");
228 fputs(code.str().c_str(), file);
229 fclose(file);
226 // Create temp dir if needed
227 {
228 DIR *dir = opendir("temp");
230
229
231 // Get rust crate directories
230 if (dir) {
231 closedir(dir);
232 } else {
233 mkdir("temp", 0777);
234 chmod("temp", 0777);
235 }
236 }
237 // Write code to temp file
238 {
239 std::string filename = std::string("temp/kernel_") + std::to_string(build_id) + std::string("_0_source.cu");
240 FILE *file = fopen(filename.c_str(), "w");
232
241
242 CeedCheck(file, ceed, CEED_ERROR_BACKEND, "Failed to create file. Write access is required for cuda-clang");
243 fputs(code.str().c_str(), file);
244 fclose(file);
245 }
246
247 // Get rust crate directories
233 const char **rust_source_dirs = nullptr;
234 int num_rust_source_dirs = 0;
235
236 CeedCallBackend(CeedGetRustSourceRoots(ceed, &num_rust_source_dirs, &rust_source_dirs));
237
238 std::string rust_dirs[10];
239
240 if (num_rust_source_dirs > 0) {

--- 19 unchanged lines hidden (view full) ---

260 for (CeedInt i = 0; i < num_rust_source_dirs; i++) {
261 command = "cargo +" + std::string(rust_toolchain) + " build --release --target nvptx64-nvidia-cuda --config " + rust_dirs[i] +
262 "/.cargo/config.toml --manifest-path " + rust_dirs[i] + "/Cargo.toml";
263 CeedCallSystem(ceed, command.c_str(), "build Rust crate");
264 }
265
266 // Compile wrapper kernel
267 command = "clang++ -flto=thin --cuda-gpu-arch=sm_" + std::to_string(prop.major) + std::to_string(prop.minor) +
248 const char **rust_source_dirs = nullptr;
249 int num_rust_source_dirs = 0;
250
251 CeedCallBackend(CeedGetRustSourceRoots(ceed, &num_rust_source_dirs, &rust_source_dirs));
252
253 std::string rust_dirs[10];
254
255 if (num_rust_source_dirs > 0) {

--- 19 unchanged lines hidden (view full) ---

275 for (CeedInt i = 0; i < num_rust_source_dirs; i++) {
276 command = "cargo +" + std::string(rust_toolchain) + " build --release --target nvptx64-nvidia-cuda --config " + rust_dirs[i] +
277 "/.cargo/config.toml --manifest-path " + rust_dirs[i] + "/Cargo.toml";
278 CeedCallSystem(ceed, command.c_str(), "build Rust crate");
279 }
280
281 // Compile wrapper kernel
282 command = "clang++ -flto=thin --cuda-gpu-arch=sm_" + std::to_string(prop.major) + std::to_string(prop.minor) +
268 " --cuda-device-only -emit-llvm -S temp_kernel_source.cu -o temp_kernel.ll ";
283 " --cuda-device-only -emit-llvm -S temp/kernel_" + std::to_string(build_id) + "_0_source.cu -o temp/kernel_" +
284 std::to_string(build_id) + "_1_wrapped.ll ";
269 command += opts[4];
270 CeedCallSystem(ceed, command.c_str(), "JiT kernel source");
271
272 // the find command finds the rust-installed llvm-link tool and runs it
285 command += opts[4];
286 CeedCallSystem(ceed, command.c_str(), "JiT kernel source");
287
288 // the find command finds the rust-installed llvm-link tool and runs it
273 command = "$(find $(rustup run " + std::string(rust_toolchain) +
274 " rustc --print sysroot) -name llvm-link) temp_kernel.ll --ignore-non-bitcode --internalize --only-needed -S -o "
275 "temp_kernel_linked.ll ";
289 command = "$(find $(rustup run " + std::string(rust_toolchain) + " rustc --print sysroot) -name llvm-link) temp/kernel_" +
290 std::to_string(build_id) +
291 "_1_wrapped.ll --ignore-non-bitcode --internalize --only-needed -S -o "
292 "temp/kernel_" +
293 std::to_string(build_id) + "_2_linked.ll ";
276
277 // Searches for .a files in rust directoy
278 // Note: this is necessary because rust crate names may not match the folder they are in
279 for (CeedInt i = 0; i < num_rust_source_dirs; i++) {
280 std::string dir = rust_dirs[i] + "/target/nvptx64-nvidia-cuda/release";
281 DIR *dp = opendir(dir.c_str());
282
283 CeedCheck(dp != nullptr, ceed, CEED_ERROR_BACKEND, "Could not open directory: %s", dir.c_str());

--- 9 unchanged lines hidden (view full) ---

293 }
294 closedir(dp);
295 // TODO: when libCEED switches to c++17, switch to std::filesystem for the loop above
296 }
297
298 // Link, optimize, and compile final CUDA kernel
299 // note that the find command is used to find the rust-installed llvm tool
300 CeedCallSystem(ceed, command.c_str(), "link C and Rust source");
294
295 // Searches for .a files in rust directoy
296 // Note: this is necessary because rust crate names may not match the folder they are in
297 for (CeedInt i = 0; i < num_rust_source_dirs; i++) {
298 std::string dir = rust_dirs[i] + "/target/nvptx64-nvidia-cuda/release";
299 DIR *dp = opendir(dir.c_str());
300
301 CeedCheck(dp != nullptr, ceed, CEED_ERROR_BACKEND, "Could not open directory: %s", dir.c_str());

--- 9 unchanged lines hidden (view full) ---

311 }
312 closedir(dp);
313 // TODO: when libCEED switches to c++17, switch to std::filesystem for the loop above
314 }
315
316 // Link, optimize, and compile final CUDA kernel
317 // note that the find command is used to find the rust-installed llvm tool
318 CeedCallSystem(ceed, command.c_str(), "link C and Rust source");
319 CeedCallSystem(
320 ceed,
321 ("$(find $(rustup run " + std::string(rust_toolchain) + " rustc --print sysroot) -name opt) --passes internalize,inline temp/kernel_" +
322 std::to_string(build_id) + "_2_linked.ll -o temp/kernel_" + std::to_string(build_id) + "_3_opt.bc")
323 .c_str(),
324 "optimize linked C and Rust source");
301 CeedCallSystem(ceed,
325 CeedCallSystem(ceed,
302 ("$(find $(rustup run " + std::string(rust_toolchain) +
303 " rustc --print sysroot) -name opt) --passes internalize,inline temp_kernel_linked.ll -o temp_kernel_opt.bc")
304 .c_str(),
305 "optimize linked C and Rust source");
306 CeedCallSystem(ceed,
307 ("$(find $(rustup run " + std::string(rust_toolchain) + " rustc --print sysroot) -name llc) -O3 -mcpu=sm_" +
326 ("$(find $(rustup run " + std::string(rust_toolchain) + " rustc --print sysroot) -name llc) -O3 -mcpu=sm_" +
308 std::to_string(prop.major) + std::to_string(prop.minor) + " temp_kernel_opt.bc -o temp_kernel_final.ptx")
327 std::to_string(prop.major) + std::to_string(prop.minor) + " temp/kernel_" + std::to_string(build_id) +
328 "_3_opt.bc -o temp/kernel_" + std::to_string(build_id) + "_4_final.ptx")
309 .c_str(),
310 "compile final CUDA kernel");
311
329 .c_str(),
330 "compile final CUDA kernel");
331
312 ifstream ptxfile("temp_kernel_final.ptx");
332 ifstream ptxfile("temp/kernel_" + std::to_string(build_id) + "_4_final.ptx");
313 ostringstream sstr;
314
315 sstr << ptxfile.rdbuf();
316
317 auto ptx_data = sstr.str();
318 ptx_size = ptx_data.length();
319
320 int result = cuModuleLoadData(module, ptx_data.c_str());

--- 130 unchanged lines hidden ---
333 ostringstream sstr;
334
335 sstr << ptxfile.rdbuf();
336
337 auto ptx_data = sstr.str();
338 ptx_size = ptx_data.length();
339
340 int result = cuModuleLoadData(module, ptx_data.c_str());

--- 130 unchanged lines hidden ---