Lines Matching +full:- +full:- +full:ceed
1 // Copyright (c) 2017-2026, Lawrence Livermore National Security, LLC and other CEED contributors.
2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
4 // SPDX-License-Identifier: BSD-2-Clause
6 // This file is part of CEED: http://github.com/ceed
8 #include "ceed-cuda-compile.h"
10 #include <ceed.h>
11 #include <ceed/backend.h>
12 #include <ceed/jit-tools.h>
28 #include "ceed-cuda-common.h"
30 #define CeedChk_Nvrtc(ceed, x) … argument
33 …if (result != NVRTC_SUCCESS) return CeedError((ceed), CEED_ERROR_BACKEND, nvrtcGetErrorString(resu…
36 #define CeedCallNvrtc(ceed, ...) \ argument
39 CeedChk_Nvrtc(ceed, ierr_q_); \
42 #define CeedCallSystem(ceed, command, message) CeedCallBackend(CeedCallSystem_Core(ceed, command, m… argument
44 //------------------------------------------------------------------------------
46 //------------------------------------------------------------------------------
47 static int CeedCallSystem_Core(Ceed ceed, const char *command, const char *message) { in CeedCallSystem_Core() argument
48 CeedDebug(ceed, "Running command:\n$ %s", command); in CeedCallSystem_Core()
51 …CeedCheck(output_stream != nullptr, ceed, CEED_ERROR_BACKEND, "Failed to %s\ncommand:\n$ %s", mess… in CeedCallSystem_Core()
59 CeedDebug(ceed, "output:\n%s\n", output.c_str()); in CeedCallSystem_Core()
60 …CeedCheck(pclose(output_stream) == 0, ceed, CEED_ERROR_BACKEND, "Failed to %s\ncommand:\n$ %s\nerr… in CeedCallSystem_Core()
64 //------------------------------------------------------------------------------
66 //------------------------------------------------------------------------------
71 static int CeedCompileCore_Cuda(Ceed ceed, const char *source, const bool throw_error, bool *is_com… in CeedCompileCore_Cuda() argument
87 CeedCallBackend(CeedGetIsClang(ceed, &using_clang)); in CeedCompileCore_Cuda()
89 CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, in CeedCompileCore_Cuda()
107 code << "#include <ceed/jit-source/cuda/cuda-jit.h>\n\n"; in CeedCompileCore_Cuda()
109 // Non-macro options in CeedCompileCore_Cuda()
111 opts[0] = "-default-device"; in CeedCompileCore_Cuda()
112 CeedCallBackend(CeedGetData(ceed, &ceed_data)); in CeedCompileCore_Cuda()
113 CeedCallCuda(ceed, cudaGetDeviceProperties(&prop, ceed_data->device_id)); in CeedCompileCore_Cuda()
117 // -arch, since it was only emitting PTX. It will now support actual in CeedCompileCore_Cuda()
119 // https://docs.nvidia.com/cuda/cuda-c-best-practices-guide/index.html#dynamic-code-generation in CeedCompileCore_Cuda()
120 "-arch=sm_" in CeedCompileCore_Cuda()
122 "-arch=compute_" in CeedCompileCore_Cuda()
126 opts[2] = "-Dint32_t=int"; in CeedCompileCore_Cuda()
127 opts[3] = "-DCEED_RUNNING_JIT_PASS=1"; in CeedCompileCore_Cuda()
132 CeedCallBackend(CeedGetJitSourceRoots(ceed, &num_jit_source_dirs, &jit_source_dirs)); in CeedCompileCore_Cuda()
137 include_dir_arg << "-I" << jit_source_dirs[i]; in CeedCompileCore_Cuda()
140 CeedCallBackend(CeedRestoreJitSourceRoots(ceed, &jit_source_dirs)); in CeedCompileCore_Cuda()
146 CeedCallBackend(CeedGetJitDefines(ceed, &num_jit_defines, &jit_defines)); in CeedCompileCore_Cuda()
151 define_arg << "-D" << jit_defines[i]; in CeedCompileCore_Cuda()
154 CeedCallBackend(CeedRestoreJitDefines(ceed, &jit_defines)); in CeedCompileCore_Cuda()
161 …CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "---------- ATTEMPTING TO COMPILE JIT SOURCE --------… in CeedCompileCore_Cuda()
162 CeedDebug(ceed, "Source:\n%s\n", code.str().c_str()); in CeedCompileCore_Cuda()
163 CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "---------- END OF JIT SOURCE ----------\n"); in CeedCompileCore_Cuda()
166 CeedCallNvrtc(ceed, nvrtcCreateProgram(&prog, code.str().c_str(), NULL, 0, NULL, NULL)); in CeedCompileCore_Cuda()
168 if (CeedDebugFlag(ceed)) { in CeedCompileCore_Cuda()
170 CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "---------- JiT COMPILER OPTIONS ----------\n"); in CeedCompileCore_Cuda()
172 CeedDebug(ceed, "Option %d: %s", i, opts[i]); in CeedCompileCore_Cuda()
174 CeedDebug(ceed, ""); in CeedCompileCore_Cuda()
175 …CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "---------- END OF JiT COMPILER OPTIONS ----------\n"… in CeedCompileCore_Cuda()
193 CeedCallNvrtc(ceed, nvrtcGetProgramLogSize(prog, &log_size)); in CeedCompileCore_Cuda()
195 CeedCallNvrtc(ceed, nvrtcGetProgramLog(prog, log)); in CeedCompileCore_Cuda()
197 return CeedError(ceed, CEED_ERROR_BACKEND, "%s\n%s", nvrtcGetErrorString(result), log); in CeedCompileCore_Cuda()
200 … CeedDebug256(ceed, CEED_DEBUG_COLOR_ERROR, "---------- COMPILE ERROR DETECTED ----------\n"); in CeedCompileCore_Cuda()
201 CeedDebug(ceed, "Error: %s\nCompile log:\n%s\n", nvrtcGetErrorString(result), log); in CeedCompileCore_Cuda()
202 CeedDebug256(ceed, CEED_DEBUG_COLOR_ERROR, "---------- BACKEND MAY FALLBACK ----------\n"); in CeedCompileCore_Cuda()
204 CeedCallNvrtc(ceed, nvrtcDestroyProgram(&prog)); in CeedCompileCore_Cuda()
211 CeedCallNvrtc(ceed, nvrtcGetCUBINSize(prog, &ptx_size)); in CeedCompileCore_Cuda()
213 CeedCallNvrtc(ceed, nvrtcGetCUBIN(prog, ptx)); in CeedCompileCore_Cuda()
215 CeedCallNvrtc(ceed, nvrtcGetPTXSize(prog, &ptx_size)); in CeedCompileCore_Cuda()
217 CeedCallNvrtc(ceed, nvrtcGetPTX(prog, ptx)); in CeedCompileCore_Cuda()
219 CeedCallNvrtc(ceed, nvrtcDestroyProgram(&prog)); in CeedCompileCore_Cuda()
221 CeedCallCuda(ceed, cuModuleLoadData(module, ptx)); in CeedCompileCore_Cuda()
246 …CeedCheck(file, ceed, CEED_ERROR_BACKEND, "Failed to create file. Write access is required for cud… in CeedCompileCore_Cuda()
255 CeedCallBackend(CeedGetRustSourceRoots(ceed, &num_rust_source_dirs, &rust_source_dirs)); in CeedCompileCore_Cuda()
260 …CeedDebug(ceed, "There are %d source dirs, including %s\n", num_rust_source_dirs, rust_source_dirs… in CeedCompileCore_Cuda()
267 CeedCallBackend(CeedRestoreRustSourceRoots(ceed, &rust_source_dirs)); in CeedCompileCore_Cuda()
280 …and = "cargo +" + std::string(rust_toolchain) + " build --release --target nvptx64-nvidia-cuda --c… in CeedCompileCore_Cuda()
281 "/.cargo/config.toml --manifest-path " + rust_dirs[i] + "/Cargo.toml"; in CeedCompileCore_Cuda()
282 CeedCallSystem(ceed, command.c_str(), "build Rust crate"); in CeedCompileCore_Cuda()
286 bool use_llvm_version = ceed_data->use_llvm_version; in CeedCompileCore_Cuda()
287 int llvm_version = ceed_data->llvm_version; in CeedCompileCore_Cuda()
290 …nd $(rustup run " + std::string(rust_toolchain) + " rustc --print sysroot) -name llvm-link) --vers… in CeedCompileCore_Cuda()
291 CeedDebug(ceed, "Attempting to detect Rust LLVM version.\ncommand:\n$ %s", command.c_str()); in CeedCompileCore_Cuda()
294 …CeedCheck(output_stream != nullptr, ceed, CEED_ERROR_BACKEND, "Failed to detect Rust LLVM version"… in CeedCompileCore_Cuda()
302 CeedDebug(ceed, "output:\n%s", output.c_str()); in CeedCompileCore_Cuda()
303 …CeedCheck(pclose(output_stream) == 0, ceed, CEED_ERROR_BACKEND, "Failed to detect Rust LLVM versio… in CeedCompileCore_Cuda()
314 ceed_data->llvm_version = llvm_version = std::stoi(version_substring); in CeedCompileCore_Cuda()
315 CeedDebug(ceed, "Rust LLVM version number: %d\n", llvm_version); in CeedCompileCore_Cuda()
317 command = std::string("clang++-") + std::to_string(llvm_version); in CeedCompileCore_Cuda()
319 ceed_data->use_llvm_version = use_llvm_version = pclose(output_stream) == 0; in CeedCompileCore_Cuda()
321 ceed_data->llvm_version = -1; in CeedCompileCore_Cuda()
322 ceed_data->use_llvm_version = use_llvm_version = false; in CeedCompileCore_Cuda()
327 …ng++" + (use_llvm_version ? (std::string("-") + std::to_string(llvm_version)) : "") + " -flto=thin… in CeedCompileCore_Cuda()
328 …std::to_string(prop.major) + std::to_string(prop.minor) + " --cuda-device-only -emit-llvm -S temp/… in CeedCompileCore_Cuda()
329 "_0_source.cu -o temp/kernel_" + std::to_string(build_id) + "_1_wrapped.ll "; in CeedCompileCore_Cuda()
331 CeedCallSystem(ceed, command.c_str(), "JiT kernel source"); in CeedCompileCore_Cuda()
332 …CeedCallSystem(ceed, ("chmod 0777 temp/kernel_" + std::to_string(build_id) + "_1_wrapped.ll").c_st… in CeedCompileCore_Cuda()
334 // Find Rust's llvm-link tool and run it in CeedCompileCore_Cuda()
335 …$(find $(rustup run " + std::string(rust_toolchain) + " rustc --print sysroot) -name llvm-link) te… in CeedCompileCore_Cuda()
337 "_1_wrapped.ll --ignore-non-bitcode --internalize --only-needed -S -o " in CeedCompileCore_Cuda()
345 std::string dir = rust_dirs[i] + "/target/nvptx64-nvidia-cuda/release"; in CeedCompileCore_Cuda()
348 … CeedCheck(dp != nullptr, ceed, CEED_ERROR_BACKEND, "Could not open directory: %s", dir.c_str()); in CeedCompileCore_Cuda()
353 std::string filename(entry->d_name); in CeedCompileCore_Cuda()
355 if (filename.size() >= 2 && filename.substr(filename.size() - 2) == ".a") { in CeedCompileCore_Cuda()
363 CeedCallSystem(ceed, command.c_str(), "link C and Rust source"); in CeedCompileCore_Cuda()
364 CeedCallSystem(ceed, in CeedCompileCore_Cuda()
366 …" rustc --print sysroot) -name opt) --passes internalize,inline temp/kernel_" + std::to_string(bui… in CeedCompileCore_Cuda()
367 "_2_linked.ll -o temp/kernel_" + std::to_string(build_id) + "_3_opt.bc") in CeedCompileCore_Cuda()
370 …CeedCallSystem(ceed, ("chmod 0777 temp/kernel_" + std::to_string(build_id) + "_2_linked.ll").c_str… in CeedCompileCore_Cuda()
371 CeedCallSystem(ceed, in CeedCompileCore_Cuda()
372 …find $(rustup run " + std::string(rust_toolchain) + " rustc --print sysroot) -name llc) -O3 -mcpu=… in CeedCompileCore_Cuda()
374 "_3_opt.bc -o temp/kernel_" + std::to_string(build_id) + "_4_final.ptx") in CeedCompileCore_Cuda()
377 …CeedCallSystem(ceed, ("chmod 0777 temp/kernel_" + std::to_string(build_id) + "_4_final.ptx").c_str… in CeedCompileCore_Cuda()
393 return CeedError(ceed, CEED_ERROR_BACKEND, "Failed to load module data"); in CeedCompileCore_Cuda()
396 … CeedDebug256(ceed, CEED_DEBUG_COLOR_ERROR, "---------- COMPILE ERROR DETECTED ----------\n"); in CeedCompileCore_Cuda()
397 CeedDebug(ceed, "Error: Failed to load module data"); in CeedCompileCore_Cuda()
398 CeedDebug256(ceed, CEED_DEBUG_COLOR_ERROR, "---------- BACKEND MAY FALLBACK ----------\n"); in CeedCompileCore_Cuda()
407 int CeedCompile_Cuda(Ceed ceed, const char *source, CUmodule *module, const CeedInt num_defines, ..… in CeedCompile_Cuda() argument
412 …const CeedInt ierr = CeedCompileCore_Cuda(ceed, source, true, &is_compile_good, module, num_define… in CeedCompile_Cuda()
419 int CeedTryCompile_Cuda(Ceed ceed, const char *source, bool *is_compile_good, CUmodule *module, con… in CeedTryCompile_Cuda() argument
423 …const CeedInt ierr = CeedCompileCore_Cuda(ceed, source, false, is_compile_good, module, num_define… in CeedTryCompile_Cuda()
430 //------------------------------------------------------------------------------
432 //------------------------------------------------------------------------------
433 int CeedGetKernel_Cuda(Ceed ceed, CUmodule module, const char *name, CUfunction *kernel) { in CeedGetKernel_Cuda() argument
434 CeedCallCuda(ceed, cuModuleGetFunction(kernel, module, name)); in CeedGetKernel_Cuda()
438 //------------------------------------------------------------------------------
442 //------------------------------------------------------------------------------
443 int CeedRunKernelAutoblockCuda(Ceed ceed, CUfunction kernel, size_t points, void **args) { in CeedRunKernelAutoblockCuda() argument
446 …CeedCallCuda(ceed, cuOccupancyMaxPotentialBlockSize(&min_grid_size, &max_block_size, kernel, NULL,… in CeedRunKernelAutoblockCuda()
447 …CeedCallBackend(CeedRunKernel_Cuda(ceed, kernel, CeedDivUpInt(points, max_block_size), max_block_s… in CeedRunKernelAutoblockCuda()
451 //------------------------------------------------------------------------------
453 //------------------------------------------------------------------------------
454 int CeedRunKernel_Cuda(Ceed ceed, CUfunction kernel, const int grid_size, const int block_size, voi… in CeedRunKernel_Cuda() argument
455 …CeedCallBackend(CeedRunKernelDimShared_Cuda(ceed, kernel, NULL, grid_size, block_size, 1, 1, 0, ar… in CeedRunKernel_Cuda()
459 //------------------------------------------------------------------------------
461 //------------------------------------------------------------------------------
462 int CeedRunKernelDim_Cuda(Ceed ceed, CUfunction kernel, const int grid_size, const int block_size_x… in CeedRunKernelDim_Cuda() argument
464 …CeedCallBackend(CeedRunKernelDimShared_Cuda(ceed, kernel, NULL, grid_size, block_size_x, block_siz… in CeedRunKernelDim_Cuda()
468 //------------------------------------------------------------------------------
470 //------------------------------------------------------------------------------
471 static int CeedRunKernelDimSharedCore_Cuda(Ceed ceed, CUfunction kernel, CUstream stream, const int… in CeedRunKernelDimSharedCore_Cuda() argument
486 return CeedError(ceed, CEED_ERROR_BACKEND, in CeedRunKernelDimSharedCore_Cuda()
491 CeedDebug256(ceed, CEED_DEBUG_COLOR_ERROR, "---------- LAUNCH ERROR DETECTED ----------\n"); in CeedRunKernelDimSharedCore_Cuda()
492 …CeedDebug(ceed, "CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES: max_threads_per_block %d on block size (%d,%d… in CeedRunKernelDimSharedCore_Cuda()
494 CeedDebug256(ceed, CEED_DEBUG_COLOR_WARNING, "---------- BACKEND MAY FALLBACK ----------\n"); in CeedRunKernelDimSharedCore_Cuda()
498 } else CeedChk_Cu(ceed, result); in CeedRunKernelDimSharedCore_Cuda()
502 int CeedRunKernelDimShared_Cuda(Ceed ceed, CUfunction kernel, CUstream stream, const int grid_size,… in CeedRunKernelDimShared_Cuda() argument
506 …CeedCallBackend(CeedRunKernelDimSharedCore_Cuda(ceed, kernel, stream, grid_size, block_size_x, blo… in CeedRunKernelDimShared_Cuda()
511 int CeedTryRunKernelDimShared_Cuda(Ceed ceed, CUfunction kernel, CUstream stream, const int grid_si… in CeedTryRunKernelDimShared_Cuda() argument
513 …CeedCallBackend(CeedRunKernelDimSharedCore_Cuda(ceed, kernel, stream, grid_size, block_size_x, blo… in CeedTryRunKernelDimShared_Cuda()
518 //------------------------------------------------------------------------------