| ceed-cuda-compile.cpp (2027fb9d13fe34211738d8539f90542a9801ae2c) | ceed-cuda-compile.cpp (9b5f41c81b637db3e5453a22df59a4f47deed499) |
|---|---|
| 1// Copyright (c) 2017-2025, Lawrence Livermore National Security, LLC and other CEED contributors. 2// All Rights Reserved. See the top-level LICENSE and NOTICE files for details. 3// 4// SPDX-License-Identifier: BSD-2-Clause 5// 6// This file is part of CEED: http://github.com/ceed 7 8#include "ceed-cuda-compile.h" 9 10#include <ceed.h> 11#include <ceed/backend.h> 12#include <ceed/jit-tools.h> 13#include <cuda_runtime.h> 14#include <dirent.h> 15#include <nvrtc.h> 16#include <stdarg.h> 17#include <string.h> | 1// Copyright (c) 2017-2025, Lawrence Livermore National Security, LLC and other CEED contributors. 2// All Rights Reserved. See the top-level LICENSE and NOTICE files for details. 3// 4// SPDX-License-Identifier: BSD-2-Clause 5// 6// This file is part of CEED: http://github.com/ceed 7 8#include "ceed-cuda-compile.h" 9 10#include <ceed.h> 11#include <ceed/backend.h> 12#include <ceed/jit-tools.h> 13#include <cuda_runtime.h> 14#include <dirent.h> 15#include <nvrtc.h> 16#include <stdarg.h> 17#include <string.h> |
| 18#include <sys/stat.h> |
|
| 18#include <sys/types.h> 19 20#include <cstdlib> 21#include <fstream> 22#include <iostream> 23#include <sstream> 24#include <string> 25 --- 23 unchanged lines hidden (view full) --- 49 CeedCheck(output_stream != nullptr, ceed, CEED_ERROR_BACKEND, "Failed to %s with command: %s", message, command); 50 51 char output[4 * CEED_MAX_RESOURCE_LEN]; 52 53 while (fgets(output, sizeof(output), output_stream) != nullptr) { 54 } 55 CeedDebug(ceed, "Command output:\n%s\n", output); 56 | 19#include <sys/types.h> 20 21#include <cstdlib> 22#include <fstream> 23#include <iostream> 24#include <sstream> 25#include <string> 26 --- 23 unchanged lines hidden (view full) --- 50 CeedCheck(output_stream != nullptr, ceed, CEED_ERROR_BACKEND, "Failed to %s with command: %s", message, command); 51 52 char output[4 * CEED_MAX_RESOURCE_LEN]; 53 54 while (fgets(output, sizeof(output), output_stream) != nullptr) { 55 } 56 CeedDebug(ceed, "Command output:\n%s\n", output); 57 |
| 57 CeedCheck(pclose(output_stream) == 0, ceed, CEED_ERROR_BACKEND, "Failed to %s with error: %s", message, output); | 58 CeedCheck(pclose(output_stream) == 0, ceed, CEED_ERROR_BACKEND, "Failed to %s with command: %s\nand error: %s", message, command, output); |
| 58 return CEED_ERROR_SUCCESS; 59} 60 61//------------------------------------------------------------------------------ 62// Compile CUDA kernel 63//------------------------------------------------------------------------------ 64using std::ifstream; 65using std::ofstream; --- 83 unchanged lines hidden (view full) --- 149 CeedCallBackend(CeedStringAllocCopy(define_arg.str().c_str(), (char **)&opts[num_opts + num_jit_source_dirs + i])); 150 } 151 CeedCallBackend(CeedRestoreJitDefines(ceed, &jit_defines)); 152 } 153 154 // Add string source argument provided in call 155 code << source; 156 | 59 return CEED_ERROR_SUCCESS; 60} 61 62//------------------------------------------------------------------------------ 63// Compile CUDA kernel 64//------------------------------------------------------------------------------ 65using std::ifstream; 66using std::ofstream; --- 83 unchanged lines hidden (view full) --- 150 CeedCallBackend(CeedStringAllocCopy(define_arg.str().c_str(), (char **)&opts[num_opts + num_jit_source_dirs + i])); 151 } 152 CeedCallBackend(CeedRestoreJitDefines(ceed, &jit_defines)); 153 } 154 155 // Add string source argument provided in call 156 code << source; 157 |
| 157 // Create Program 158 | |
| 159 // Compile kernel 160 CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "---------- ATTEMPTING TO COMPILE JIT SOURCE ----------\n"); 161 CeedDebug(ceed, "Source:\n%s\n", code.str().c_str()); 162 CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "---------- END OF JIT SOURCE ----------\n"); 163 164 if (!using_clang) { 165 CeedCallNvrtc(ceed, nvrtcCreateProgram(&prog, code.str().c_str(), NULL, 0, NULL, NULL)); 166 --- 49 unchanged lines hidden (view full) --- 216 CeedCallNvrtc(ceed, nvrtcGetPTX(prog, ptx)); 217#endif 218 CeedCallNvrtc(ceed, nvrtcDestroyProgram(&prog)); 219 220 CeedCallCuda(ceed, cuModuleLoadData(module, ptx)); 221 CeedCallBackend(CeedFree(&ptx)); 222 return CEED_ERROR_SUCCESS; 223 } else { | 158 // Compile kernel 159 CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "---------- ATTEMPTING TO COMPILE JIT SOURCE ----------\n"); 160 CeedDebug(ceed, "Source:\n%s\n", code.str().c_str()); 161 CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "---------- END OF JIT SOURCE ----------\n"); 162 163 if (!using_clang) { 164 CeedCallNvrtc(ceed, nvrtcCreateProgram(&prog, code.str().c_str(), NULL, 0, NULL, NULL)); 165 --- 49 unchanged lines hidden (view full) --- 215 CeedCallNvrtc(ceed, nvrtcGetPTX(prog, ptx)); 216#endif 217 CeedCallNvrtc(ceed, nvrtcDestroyProgram(&prog)); 218 219 CeedCallCuda(ceed, cuModuleLoadData(module, ptx)); 220 CeedCallBackend(CeedFree(&ptx)); 221 return CEED_ERROR_SUCCESS; 222 } else { |
| 224 const char *full_filename = "temp_kernel_source.cu"; 225 FILE *file = fopen(full_filename, "w"); | 223 srand(time(NULL)); 224 const int build_id = rand(); |
| 226 | 225 |
| 227 CeedCheck(file, ceed, CEED_ERROR_BACKEND, "Failed to create file. Write access is required for cuda-clang\n"); 228 fputs(code.str().c_str(), file); 229 fclose(file); | 226 // Create temp dir if needed 227 { 228 DIR *dir = opendir("temp"); |
| 230 | 229 |
| 231 // Get rust crate directories | 230 if (dir) { 231 closedir(dir); 232 } else { 233 mkdir("temp", 0777); 234 chmod("temp", 0777); 235 } 236 } 237 // Write code to temp file 238 { 239 std::string filename = std::string("temp/kernel_") + std::to_string(build_id) + std::string("_0_source.cu"); 240 FILE *file = fopen(filename.c_str(), "w"); |
| 232 | 241 |
| 242 CeedCheck(file, ceed, CEED_ERROR_BACKEND, "Failed to create file. Write access is required for cuda-clang"); 243 fputs(code.str().c_str(), file); 244 fclose(file); 245 } 246 247 // Get rust crate directories |
|
| 233 const char **rust_source_dirs = nullptr; 234 int num_rust_source_dirs = 0; 235 236 CeedCallBackend(CeedGetRustSourceRoots(ceed, &num_rust_source_dirs, &rust_source_dirs)); 237 238 std::string rust_dirs[10]; 239 240 if (num_rust_source_dirs > 0) { --- 19 unchanged lines hidden (view full) --- 260 for (CeedInt i = 0; i < num_rust_source_dirs; i++) { 261 command = "cargo +" + std::string(rust_toolchain) + " build --release --target nvptx64-nvidia-cuda --config " + rust_dirs[i] + 262 "/.cargo/config.toml --manifest-path " + rust_dirs[i] + "/Cargo.toml"; 263 CeedCallSystem(ceed, command.c_str(), "build Rust crate"); 264 } 265 266 // Compile wrapper kernel 267 command = "clang++ -flto=thin --cuda-gpu-arch=sm_" + std::to_string(prop.major) + std::to_string(prop.minor) + | 248 const char **rust_source_dirs = nullptr; 249 int num_rust_source_dirs = 0; 250 251 CeedCallBackend(CeedGetRustSourceRoots(ceed, &num_rust_source_dirs, &rust_source_dirs)); 252 253 std::string rust_dirs[10]; 254 255 if (num_rust_source_dirs > 0) { --- 19 unchanged lines hidden (view full) --- 275 for (CeedInt i = 0; i < num_rust_source_dirs; i++) { 276 command = "cargo +" + std::string(rust_toolchain) + " build --release --target nvptx64-nvidia-cuda --config " + rust_dirs[i] + 277 "/.cargo/config.toml --manifest-path " + rust_dirs[i] + "/Cargo.toml"; 278 CeedCallSystem(ceed, command.c_str(), "build Rust crate"); 279 } 280 281 // Compile wrapper kernel 282 command = "clang++ -flto=thin --cuda-gpu-arch=sm_" + std::to_string(prop.major) + std::to_string(prop.minor) + |
| 268 " --cuda-device-only -emit-llvm -S temp_kernel_source.cu -o temp_kernel.ll "; | 283 " --cuda-device-only -emit-llvm -S temp/kernel_" + std::to_string(build_id) + "_0_source.cu -o temp/kernel_" + 284 std::to_string(build_id) + "_1_wrapped.ll "; |
| 269 command += opts[4]; 270 CeedCallSystem(ceed, command.c_str(), "JiT kernel source"); 271 272 // the find command finds the rust-installed llvm-link tool and runs it | 285 command += opts[4]; 286 CeedCallSystem(ceed, command.c_str(), "JiT kernel source"); 287 288 // the find command finds the rust-installed llvm-link tool and runs it |
| 273 command = "$(find $(rustup run " + std::string(rust_toolchain) + 274 " rustc --print sysroot) -name llvm-link) temp_kernel.ll --ignore-non-bitcode --internalize --only-needed -S -o " 275 "temp_kernel_linked.ll "; | 289 command = "$(find $(rustup run " + std::string(rust_toolchain) + " rustc --print sysroot) -name llvm-link) temp/kernel_" + 290 std::to_string(build_id) + 291 "_1_wrapped.ll --ignore-non-bitcode --internalize --only-needed -S -o " 292 "temp/kernel_" + 293 std::to_string(build_id) + "_2_linked.ll "; |
| 276 277 // Searches for .a files in rust directoy 278 // Note: this is necessary because rust crate names may not match the folder they are in 279 for (CeedInt i = 0; i < num_rust_source_dirs; i++) { 280 std::string dir = rust_dirs[i] + "/target/nvptx64-nvidia-cuda/release"; 281 DIR *dp = opendir(dir.c_str()); 282 283 CeedCheck(dp != nullptr, ceed, CEED_ERROR_BACKEND, "Could not open directory: %s", dir.c_str()); --- 9 unchanged lines hidden (view full) --- 293 } 294 closedir(dp); 295 // TODO: when libCEED switches to c++17, switch to std::filesystem for the loop above 296 } 297 298 // Link, optimize, and compile final CUDA kernel 299 // note that the find command is used to find the rust-installed llvm tool 300 CeedCallSystem(ceed, command.c_str(), "link C and Rust source"); | 294 295 // Searches for .a files in rust directoy 296 // Note: this is necessary because rust crate names may not match the folder they are in 297 for (CeedInt i = 0; i < num_rust_source_dirs; i++) { 298 std::string dir = rust_dirs[i] + "/target/nvptx64-nvidia-cuda/release"; 299 DIR *dp = opendir(dir.c_str()); 300 301 CeedCheck(dp != nullptr, ceed, CEED_ERROR_BACKEND, "Could not open directory: %s", dir.c_str()); --- 9 unchanged lines hidden (view full) --- 311 } 312 closedir(dp); 313 // TODO: when libCEED switches to c++17, switch to std::filesystem for the loop above 314 } 315 316 // Link, optimize, and compile final CUDA kernel 317 // note that the find command is used to find the rust-installed llvm tool 318 CeedCallSystem(ceed, command.c_str(), "link C and Rust source"); |
| 319 CeedCallSystem( 320 ceed, 321 ("$(find $(rustup run " + std::string(rust_toolchain) + " rustc --print sysroot) -name opt) --passes internalize,inline temp/kernel_" + 322 std::to_string(build_id) + "_2_linked.ll -o temp/kernel_" + std::to_string(build_id) + "_3_opt.bc") 323 .c_str(), 324 "optimize linked C and Rust source"); |
|
| 301 CeedCallSystem(ceed, | 325 CeedCallSystem(ceed, |
| 302 ("$(find $(rustup run " + std::string(rust_toolchain) + 303 " rustc --print sysroot) -name opt) --passes internalize,inline temp_kernel_linked.ll -o temp_kernel_opt.bc") 304 .c_str(), 305 "optimize linked C and Rust source"); 306 CeedCallSystem(ceed, | |
| 307 ("$(find $(rustup run " + std::string(rust_toolchain) + " rustc --print sysroot) -name llc) -O3 -mcpu=sm_" + | 326 ("$(find $(rustup run " + std::string(rust_toolchain) + " rustc --print sysroot) -name llc) -O3 -mcpu=sm_" + |
| 308 std::to_string(prop.major) + std::to_string(prop.minor) + " temp_kernel_opt.bc -o temp_kernel_final.ptx") | 327 std::to_string(prop.major) + std::to_string(prop.minor) + " temp/kernel_" + std::to_string(build_id) + 328 "_3_opt.bc -o temp/kernel_" + std::to_string(build_id) + "_4_final.ptx") |
| 309 .c_str(), 310 "compile final CUDA kernel"); 311 | 329 .c_str(), 330 "compile final CUDA kernel"); 331 |
| 312 ifstream ptxfile("temp_kernel_final.ptx"); | 332 ifstream ptxfile("temp/kernel_" + std::to_string(build_id) + "_4_final.ptx"); |
| 313 ostringstream sstr; 314 315 sstr << ptxfile.rdbuf(); 316 317 auto ptx_data = sstr.str(); 318 ptx_size = ptx_data.length(); 319 320 int result = cuModuleLoadData(module, ptx_data.c_str()); --- 130 unchanged lines hidden --- | 333 ostringstream sstr; 334 335 sstr << ptxfile.rdbuf(); 336 337 auto ptx_data = sstr.str(); 338 ptx_size = ptx_data.length(); 339 340 int result = cuModuleLoadData(module, ptx_data.c_str()); --- 130 unchanged lines hidden --- |