| ceed-cuda-compile.cpp (33cc410d9e7c7c1ec2b31835f296422ec750fa8c) | ceed-cuda-compile.cpp (fa619ecc52f58ebd3ff3ef012ebe7a24b3c56483) |
|---|---|
| 1// Copyright (c) 2017-2025, Lawrence Livermore National Security, LLC and other CEED contributors. 2// All Rights Reserved. See the top-level LICENSE and NOTICE files for details. 3// 4// SPDX-License-Identifier: BSD-2-Clause 5// 6// This file is part of CEED: http://github.com/ceed 7 8#include "ceed-cuda-compile.h" 9 10#include <ceed.h> 11#include <ceed/backend.h> 12#include <ceed/jit-tools.h> 13#include <cuda_runtime.h> 14#include <dirent.h> 15#include <nvrtc.h> 16#include <stdarg.h> | 1// Copyright (c) 2017-2025, Lawrence Livermore National Security, LLC and other CEED contributors. 2// All Rights Reserved. See the top-level LICENSE and NOTICE files for details. 3// 4// SPDX-License-Identifier: BSD-2-Clause 5// 6// This file is part of CEED: http://github.com/ceed 7 8#include "ceed-cuda-compile.h" 9 10#include <ceed.h> 11#include <ceed/backend.h> 12#include <ceed/jit-tools.h> 13#include <cuda_runtime.h> 14#include <dirent.h> 15#include <nvrtc.h> 16#include <stdarg.h> |
| 17#include <stdio.h> |
|
| 17#include <string.h> 18#include <sys/stat.h> 19#include <sys/types.h> 20 21#include <cstdlib> 22#include <fstream> 23#include <iostream> 24#include <sstream> --- 14 unchanged lines hidden (view full) --- 39 } while (0) 40 41#define CeedCallSystem(ceed, command, message) CeedCallBackend(CeedCallSystem_Core(ceed, command, message)) 42 43//------------------------------------------------------------------------------ 44// Call system command and capture stdout + stderr 45//------------------------------------------------------------------------------ 46static int CeedCallSystem_Core(Ceed ceed, const char *command, const char *message) { | 18#include <string.h> 19#include <sys/stat.h> 20#include <sys/types.h> 21 22#include <cstdlib> 23#include <fstream> 24#include <iostream> 25#include <sstream> --- 14 unchanged lines hidden (view full) --- 40 } while (0) 41 42#define CeedCallSystem(ceed, command, message) CeedCallBackend(CeedCallSystem_Core(ceed, command, message)) 43 44//------------------------------------------------------------------------------ 45// Call system command and capture stdout + stderr 46//------------------------------------------------------------------------------ 47static int CeedCallSystem_Core(Ceed ceed, const char *command, const char *message) { |
| 47 CeedDebug(ceed, "Running command:\n$ %s\n", command); | 48 CeedDebug(ceed, "Running command:\n$ %s", command); |
| 48 FILE *output_stream = popen((command + std::string(" 2>&1")).c_str(), "r"); 49 | 49 FILE *output_stream = popen((command + std::string(" 2>&1")).c_str(), "r"); 50 |
| 50 CeedCheck(output_stream != nullptr, ceed, CEED_ERROR_BACKEND, "Failed to %s with command: %s", message, command); | 51 CeedCheck(output_stream != nullptr, ceed, CEED_ERROR_BACKEND, "Failed to %s\ncommand:\n$ %s", message, command); |
| 51 | 52 |
| 52 char output[4 * CEED_MAX_RESOURCE_LEN]; | 53 char line[CEED_MAX_RESOURCE_LEN] = ""; 54 std::string output = ""; |
| 53 | 55 |
| 54 while (fgets(output, sizeof(output), output_stream) != nullptr) { | 56 while (fgets(line, sizeof(line), output_stream) != nullptr) { 57 output += line; |
| 55 } | 58 } |
| 56 CeedDebug(ceed, "Command output:\n%s\n", output); 57 58 CeedCheck(pclose(output_stream) == 0, ceed, CEED_ERROR_BACKEND, "Failed to %s with command: %s\nand error: %s", message, command, output); | 59 CeedDebug(ceed, "output:\n%s\n", output.c_str()); 60 CeedCheck(pclose(output_stream) == 0, ceed, CEED_ERROR_BACKEND, "Failed to %s\ncommand:\n$ %s\nerror:\n%s", message, command, output.c_str()); |
| 59 return CEED_ERROR_SUCCESS; 60} 61 62//------------------------------------------------------------------------------ 63// Compile CUDA kernel 64//------------------------------------------------------------------------------ 65using std::ifstream; 66using std::ofstream; --- 208 unchanged lines hidden (view full) --- 275 std::string command; 276 277 for (CeedInt i = 0; i < num_rust_source_dirs; i++) { 278 command = "cargo +" + std::string(rust_toolchain) + " build --release --target nvptx64-nvidia-cuda --config " + rust_dirs[i] + 279 "/.cargo/config.toml --manifest-path " + rust_dirs[i] + "/Cargo.toml"; 280 CeedCallSystem(ceed, command.c_str(), "build Rust crate"); 281 } 282 | 61 return CEED_ERROR_SUCCESS; 62} 63 64//------------------------------------------------------------------------------ 65// Compile CUDA kernel 66//------------------------------------------------------------------------------ 67using std::ifstream; 68using std::ofstream; --- 208 unchanged lines hidden (view full) --- 277 std::string command; 278 279 for (CeedInt i = 0; i < num_rust_source_dirs; i++) { 280 command = "cargo +" + std::string(rust_toolchain) + " build --release --target nvptx64-nvidia-cuda --config " + rust_dirs[i] + 281 "/.cargo/config.toml --manifest-path " + rust_dirs[i] + "/Cargo.toml"; 282 CeedCallSystem(ceed, command.c_str(), "build Rust crate"); 283 } 284 |
| 285 // Get Clang version 286 bool use_llvm_version = ceed_data->use_llvm_version; 287 int llvm_version = ceed_data->llvm_version; 288 289 if (llvm_version == 0) { 290 command = "$(find $(rustup run " + std::string(rust_toolchain) + " rustc --print sysroot) -name llvm-link) --version"; 291 CeedDebug(ceed, "Attempting to detect Rust LLVM version.\ncommand:\n$ %s", command.c_str()); 292 FILE *output_stream = popen((command + std::string(" 2>&1")).c_str(), "r"); 293 294 CeedCheck(output_stream != nullptr, ceed, CEED_ERROR_BACKEND, "Failed to detect Rust LLVM version"); 295 296 char line[CEED_MAX_RESOURCE_LEN] = ""; 297 std::string output = ""; 298 299 while (fgets(line, sizeof(line), output_stream) != nullptr) { 300 output += line; 301 } 302 CeedDebug(ceed, "output:\n%s", output.c_str()); 303 CeedCheck(pclose(output_stream) == 0, ceed, CEED_ERROR_BACKEND, "Failed to detect Rust LLVM version\ncommand:\n$ %s\nerror:\n%s", 304 command.c_str(), output.c_str()); 305 306 const char *version_substring = strstr(output.c_str(), "LLVM version "); 307 308 version_substring += 13; 309 310 char *next_dot = strchr((char *)version_substring, '.'); 311 312 next_dot[0] = '\0'; 313 ceed_data->llvm_version = llvm_version = std::stoi(version_substring); 314 CeedDebug(ceed, "Rust LLVM version number: %d\n", llvm_version); 315 316 command = std::string("clang++-") + std::to_string(llvm_version); 317 output_stream = popen((command + std::string(" 2>&1")).c_str(), "r"); 318 ceed_data->use_llvm_version = use_llvm_version = pclose(output_stream) == 0; 319 } 320 |
|
| 283 // Compile wrapper kernel | 321 // Compile wrapper kernel |
| 284 command = "clang++ -flto=thin --cuda-gpu-arch=sm_" + std::to_string(prop.major) + std::to_string(prop.minor) + 285 " --cuda-device-only -emit-llvm -S temp/kernel_" + std::to_string(build_id) + "_0_source.cu -o temp/kernel_" + 286 std::to_string(build_id) + "_1_wrapped.ll "; | 322 command = "clang++" + (use_llvm_version ? (std::string("-") + std::to_string(llvm_version)) : "") + " -flto=thin --cuda-gpu-arch=sm_" + 323 std::to_string(prop.major) + std::to_string(prop.minor) + " --cuda-device-only -emit-llvm -S temp/kernel_" + std::to_string(build_id) + 324 "_0_source.cu -o temp/kernel_" + std::to_string(build_id) + "_1_wrapped.ll "; |
| 287 command += opts[4]; 288 CeedCallSystem(ceed, command.c_str(), "JiT kernel source"); 289 CeedCallSystem(ceed, ("chmod 0777 temp/kernel_" + std::to_string(build_id) + "_1_wrapped.ll").c_str(), "update JiT file permissions"); 290 | 325 command += opts[4]; 326 CeedCallSystem(ceed, command.c_str(), "JiT kernel source"); 327 CeedCallSystem(ceed, ("chmod 0777 temp/kernel_" + std::to_string(build_id) + "_1_wrapped.ll").c_str(), "update JiT file permissions"); 328 |
| 291 // the find command finds the rust-installed llvm-link tool and runs it | 329 // Find Rust's llvm-link tool and runs it |
| 292 command = "$(find $(rustup run " + std::string(rust_toolchain) + " rustc --print sysroot) -name llvm-link) temp/kernel_" + 293 std::to_string(build_id) + 294 "_1_wrapped.ll --ignore-non-bitcode --internalize --only-needed -S -o " 295 "temp/kernel_" + 296 std::to_string(build_id) + "_2_linked.ll "; 297 298 // Searches for .a files in rust directoy | 330 command = "$(find $(rustup run " + std::string(rust_toolchain) + " rustc --print sysroot) -name llvm-link) temp/kernel_" + 331 std::to_string(build_id) + 332 "_1_wrapped.ll --ignore-non-bitcode --internalize --only-needed -S -o " 333 "temp/kernel_" + 334 std::to_string(build_id) + "_2_linked.ll "; 335 336 // Searches for .a files in rust directoy |
| 299 // Note: this is necessary because rust crate names may not match the folder they are in | 337 // Note: this is necessary because Rust crate names may not match the folder they are in |
| 300 for (CeedInt i = 0; i < num_rust_source_dirs; i++) { 301 std::string dir = rust_dirs[i] + "/target/nvptx64-nvidia-cuda/release"; 302 DIR *dp = opendir(dir.c_str()); 303 304 CeedCheck(dp != nullptr, ceed, CEED_ERROR_BACKEND, "Could not open directory: %s", dir.c_str()); 305 struct dirent *entry; 306 | 338 for (CeedInt i = 0; i < num_rust_source_dirs; i++) { 339 std::string dir = rust_dirs[i] + "/target/nvptx64-nvidia-cuda/release"; 340 DIR *dp = opendir(dir.c_str()); 341 342 CeedCheck(dp != nullptr, ceed, CEED_ERROR_BACKEND, "Could not open directory: %s", dir.c_str()); 343 struct dirent *entry; 344 |
| 307 // finds files ending in .a | 345 // Find files ending in .a |
| 308 while ((entry = readdir(dp)) != nullptr) { 309 std::string filename(entry->d_name); 310 311 if (filename.size() >= 2 && filename.substr(filename.size() - 2) == ".a") { 312 command += dir + "/" + filename + " "; 313 } 314 } 315 closedir(dp); 316 // TODO: when libCEED switches to c++17, switch to std::filesystem for the loop above 317 } 318 319 // Link, optimize, and compile final CUDA kernel | 346 while ((entry = readdir(dp)) != nullptr) { 347 std::string filename(entry->d_name); 348 349 if (filename.size() >= 2 && filename.substr(filename.size() - 2) == ".a") { 350 command += dir + "/" + filename + " "; 351 } 352 } 353 closedir(dp); 354 // TODO: when libCEED switches to c++17, switch to std::filesystem for the loop above 355 } 356 357 // Link, optimize, and compile final CUDA kernel |
| 320 // note that the find command is used to find the rust-installed llvm tool | |
| 321 CeedCallSystem(ceed, command.c_str(), "link C and Rust source"); 322 CeedCallSystem( 323 ceed, 324 ("$(find $(rustup run " + std::string(rust_toolchain) + " rustc --print sysroot) -name opt) --passes internalize,inline temp/kernel_" + 325 std::to_string(build_id) + "_2_linked.ll -o temp/kernel_" + std::to_string(build_id) + "_3_opt.bc") 326 .c_str(), 327 "optimize linked C and Rust source"); 328 CeedCallSystem(ceed, ("chmod 0777 temp/kernel_" + std::to_string(build_id) + "_2_linked.ll").c_str(), "update JiT file permissions"); --- 147 unchanged lines hidden --- | 358 CeedCallSystem(ceed, command.c_str(), "link C and Rust source"); 359 CeedCallSystem( 360 ceed, 361 ("$(find $(rustup run " + std::string(rust_toolchain) + " rustc --print sysroot) -name opt) --passes internalize,inline temp/kernel_" + 362 std::to_string(build_id) + "_2_linked.ll -o temp/kernel_" + std::to_string(build_id) + "_3_opt.bc") 363 .c_str(), 364 "optimize linked C and Rust source"); 365 CeedCallSystem(ceed, ("chmod 0777 temp/kernel_" + std::to_string(build_id) + "_2_linked.ll").c_str(), "update JiT file permissions"); --- 147 unchanged lines hidden --- |