1*9ba83ac0SJeremy L Thompson // Copyright (c) 2017-2026, Lawrence Livermore National Security, LLC and other CEED contributors.
23d8e8822SJeremy L Thompson // All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
330f4f45fSnbeams //
43d8e8822SJeremy L Thompson // SPDX-License-Identifier: BSD-2-Clause
530f4f45fSnbeams //
63d8e8822SJeremy L Thompson // This file is part of CEED: http://github.com/ceed
730f4f45fSnbeams
82b730f8bSJeremy L Thompson #include "ceed-hip-compile.h"
92b730f8bSJeremy L Thompson
1049aac155SJeremy L Thompson #include <ceed.h>
11ec3da8bcSJed Brown #include <ceed/backend.h>
12c9c2c079SJeremy L Thompson #include <ceed/jit-tools.h>
1330f4f45fSnbeams #include <stdarg.h>
143d576824SJeremy L Thompson #include <string.h>
15c85e8640SSebastian Grimberg #include <hip/hiprtc.h>
162b730f8bSJeremy L Thompson
172b730f8bSJeremy L Thompson #include <sstream>
182b730f8bSJeremy L Thompson
197fcac036SJeremy L Thompson #include "ceed-hip-common.h"
2030f4f45fSnbeams
2130f4f45fSnbeams #define CeedChk_hiprtc(ceed, x) \
2230f4f45fSnbeams do { \
2330f4f45fSnbeams hiprtcResult result = static_cast<hiprtcResult>(x); \
242b730f8bSJeremy L Thompson if (result != HIPRTC_SUCCESS) return CeedError((ceed), CEED_ERROR_BACKEND, hiprtcGetErrorString(result)); \
2530f4f45fSnbeams } while (0)
2630f4f45fSnbeams
272b730f8bSJeremy L Thompson #define CeedCallHiprtc(ceed, ...) \
282b730f8bSJeremy L Thompson do { \
292b730f8bSJeremy L Thompson int ierr_q_ = __VA_ARGS__; \
302b730f8bSJeremy L Thompson CeedChk_hiprtc(ceed, ierr_q_); \
316574a04fSJeremy L Thompson } while (0)
322b730f8bSJeremy L Thompson
3330f4f45fSnbeams //------------------------------------------------------------------------------
3430f4f45fSnbeams // Compile HIP kernel
3530f4f45fSnbeams //------------------------------------------------------------------------------
CeedCompileCore_Hip(Ceed ceed,const char * source,const bool throw_error,bool * is_compile_good,hipModule_t * module,const CeedInt num_defines,va_list args)368d12f40eSJeremy L Thompson static int CeedCompileCore_Hip(Ceed ceed, const char *source, const bool throw_error, bool *is_compile_good, hipModule_t *module,
378d12f40eSJeremy L Thompson const CeedInt num_defines, va_list args) {
38b7453713SJeremy L Thompson size_t ptx_size;
3991adc9c8SJeremy L Thompson char *ptx;
40a491a57eSJeremy L Thompson const int num_opts = 4;
414753b775SJeremy L Thompson CeedInt num_jit_source_dirs = 0, num_jit_defines = 0;
42b13efd58SJeremy L Thompson const char **opts;
43b7453713SJeremy L Thompson int runtime_version;
4430f4f45fSnbeams hiprtcProgram prog;
45b7453713SJeremy L Thompson struct hipDeviceProp_t prop;
46b7453713SJeremy L Thompson Ceed_Hip *ceed_data;
47b7453713SJeremy L Thompson
48b7453713SJeremy L Thompson hipFree(0); // Make sure a Context exists for hiprtc
4930f4f45fSnbeams
5030f4f45fSnbeams std::ostringstream code;
51c9c2c079SJeremy L Thompson
52ea61e9acSJeremy L Thompson // Add hip runtime include statement for generation if runtime < 40400000 (implies ROCm < 4.5)
532b730f8bSJeremy L Thompson CeedCallHip(ceed, hipRuntimeGetVersion(&runtime_version));
549faa5937SNatalie Beams if (runtime_version < 40400000) {
5530f4f45fSnbeams code << "\n#include <hip/hip_runtime.h>\n";
569faa5937SNatalie Beams }
57ea61e9acSJeremy L Thompson // With ROCm 4.5, need to include these definitions specifically for hiprtc (but cannot include the runtime header)
589faa5937SNatalie Beams else {
599faa5937SNatalie Beams code << "#include <stddef.h>\n";
609faa5937SNatalie Beams code << "#define __forceinline__ inline __attribute__((always_inline))\n";
619faa5937SNatalie Beams code << "#define HIP_DYNAMIC_SHARED(type, var) extern __shared__ type var[];\n";
629faa5937SNatalie Beams }
6330f4f45fSnbeams
64c9c2c079SJeremy L Thompson // Kernel specific options, such as kernel constants
65c9c2c079SJeremy L Thompson if (num_defines > 0) {
6630f4f45fSnbeams char *name;
6730f4f45fSnbeams int val;
68b7453713SJeremy L Thompson
69c9c2c079SJeremy L Thompson for (int i = 0; i < num_defines; i++) {
7030f4f45fSnbeams name = va_arg(args, char *);
7130f4f45fSnbeams val = va_arg(args, int);
7230f4f45fSnbeams code << "#define " << name << " " << val << "\n";
7330f4f45fSnbeams }
7430f4f45fSnbeams }
7530f4f45fSnbeams
76c9c2c079SJeremy L Thompson // Standard libCEED definitions for HIP backends
7791adc9c8SJeremy L Thompson code << "#include <ceed/jit-source/hip/hip-jit.h>\n\n";
7830f4f45fSnbeams
7930f4f45fSnbeams // Non-macro options
80b13efd58SJeremy L Thompson CeedCallBackend(CeedCalloc(num_opts, &opts));
8130f4f45fSnbeams opts[0] = "-default-device";
822b730f8bSJeremy L Thompson CeedCallBackend(CeedGetData(ceed, (void **)&ceed_data));
832b730f8bSJeremy L Thompson CeedCallHip(ceed, hipGetDeviceProperties(&prop, ceed_data->device_id));
840d0321e0SJeremy L Thompson std::string arch_arg = "--gpu-architecture=" + std::string(prop.gcnArchName);
850d0321e0SJeremy L Thompson opts[1] = arch_arg.c_str();
86b3c5430cSnbeams opts[2] = "-munsafe-fp-atomics";
87a491a57eSJeremy L Thompson opts[3] = "-DCEED_RUNNING_JIT_PASS=1";
884753b775SJeremy L Thompson // Additional include dirs
89b13efd58SJeremy L Thompson {
90b13efd58SJeremy L Thompson const char **jit_source_dirs;
91b13efd58SJeremy L Thompson
92b13efd58SJeremy L Thompson CeedCallBackend(CeedGetJitSourceRoots(ceed, &num_jit_source_dirs, &jit_source_dirs));
93b13efd58SJeremy L Thompson CeedCallBackend(CeedRealloc(num_opts + num_jit_source_dirs, &opts));
94b13efd58SJeremy L Thompson for (CeedInt i = 0; i < num_jit_source_dirs; i++) {
954753b775SJeremy L Thompson std::ostringstream include_dir_arg;
96b13efd58SJeremy L Thompson
974753b775SJeremy L Thompson include_dir_arg << "-I" << jit_source_dirs[i];
984753b775SJeremy L Thompson CeedCallBackend(CeedStringAllocCopy(include_dir_arg.str().c_str(), (char **)&opts[num_opts + i]));
99b13efd58SJeremy L Thompson }
100b13efd58SJeremy L Thompson CeedCallBackend(CeedRestoreJitSourceRoots(ceed, &jit_source_dirs));
101b13efd58SJeremy L Thompson }
1024753b775SJeremy L Thompson // User defines
1034753b775SJeremy L Thompson {
1044753b775SJeremy L Thompson const char **jit_defines;
1054753b775SJeremy L Thompson
1064753b775SJeremy L Thompson CeedCallBackend(CeedGetJitDefines(ceed, &num_jit_defines, &jit_defines));
1074753b775SJeremy L Thompson CeedCallBackend(CeedRealloc(num_opts + num_jit_source_dirs + num_jit_defines, &opts));
1084753b775SJeremy L Thompson for (CeedInt i = 0; i < num_jit_defines; i++) {
1094753b775SJeremy L Thompson std::ostringstream define_arg;
1104753b775SJeremy L Thompson
1114753b775SJeremy L Thompson define_arg << "-D" << jit_defines[i];
1124753b775SJeremy L Thompson CeedCallBackend(CeedStringAllocCopy(define_arg.str().c_str(), (char **)&opts[num_opts + num_jit_source_dirs + i]));
1134753b775SJeremy L Thompson }
1144753b775SJeremy L Thompson CeedCallBackend(CeedRestoreJitDefines(ceed, &jit_defines));
1154753b775SJeremy L Thompson }
11630f4f45fSnbeams
11730f4f45fSnbeams // Add string source argument provided in call
11830f4f45fSnbeams code << source;
11930f4f45fSnbeams
12030f4f45fSnbeams // Create Program
1212b730f8bSJeremy L Thompson CeedCallHiprtc(ceed, hiprtcCreateProgram(&prog, code.str().c_str(), NULL, 0, NULL, NULL));
12230f4f45fSnbeams
12330f4f45fSnbeams // Compile kernel
124c21e34e2SJeremy L Thompson CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "---------- ATTEMPTING TO COMPILE JIT SOURCE ----------\n");
12526ef7cdaSJeremy L Thompson CeedDebug(ceed, "Source:\n%s\n", code.str().c_str());
126c21e34e2SJeremy L Thompson CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "---------- END OF JIT SOURCE ----------\n");
127bdcc2728SJeremy L Thompson if (CeedDebugFlag(ceed)) {
128bdcc2728SJeremy L Thompson // LCOV_EXCL_START
129c21e34e2SJeremy L Thompson CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "---------- JiT COMPILER OPTIONS ----------\n");
130bdcc2728SJeremy L Thompson for (CeedInt i = 0; i < num_opts + num_jit_source_dirs + num_jit_defines; i++) {
131bdcc2728SJeremy L Thompson CeedDebug(ceed, "Option %d: %s", i, opts[i]);
132bdcc2728SJeremy L Thompson }
133bdcc2728SJeremy L Thompson CeedDebug(ceed, "");
134c21e34e2SJeremy L Thompson CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "---------- END OF JiT COMPILER OPTIONS ----------\n");
135bdcc2728SJeremy L Thompson // LCOV_EXCL_STOP
136bdcc2728SJeremy L Thompson }
1374753b775SJeremy L Thompson hiprtcResult result = hiprtcCompileProgram(prog, num_opts + num_jit_source_dirs + num_jit_defines, opts);
138b7453713SJeremy L Thompson
139b13efd58SJeremy L Thompson for (CeedInt i = 0; i < num_jit_source_dirs; i++) {
140b13efd58SJeremy L Thompson CeedCallBackend(CeedFree(&opts[num_opts + i]));
141b13efd58SJeremy L Thompson }
1424753b775SJeremy L Thompson for (CeedInt i = 0; i < num_jit_defines; i++) {
1434753b775SJeremy L Thompson CeedCallBackend(CeedFree(&opts[num_opts + num_jit_source_dirs + i]));
1444753b775SJeremy L Thompson }
145b13efd58SJeremy L Thompson CeedCallBackend(CeedFree(&opts));
1468d12f40eSJeremy L Thompson *is_compile_good = result == HIPRTC_SUCCESS;
14728c1f747SJeremy L Thompson if (!*is_compile_good) {
1480d0321e0SJeremy L Thompson size_t log_size;
14930f4f45fSnbeams char *log;
150b7453713SJeremy L Thompson
151b7453713SJeremy L Thompson CeedChk_hiprtc(ceed, hiprtcGetProgramLogSize(prog, &log_size));
1522b730f8bSJeremy L Thompson CeedCallBackend(CeedMalloc(log_size, &log));
1532b730f8bSJeremy L Thompson CeedCallHiprtc(ceed, hiprtcGetProgramLog(prog, log));
15428c1f747SJeremy L Thompson if (throw_error) {
1552b730f8bSJeremy L Thompson return CeedError(ceed, CEED_ERROR_BACKEND, "%s\n%s", hiprtcGetErrorString(result), log);
15628c1f747SJeremy L Thompson } else {
157c49dc7a7SJeremy L Thompson // LCOV_EXCL_START
15828c1f747SJeremy L Thompson CeedDebug256(ceed, CEED_DEBUG_COLOR_ERROR, "---------- COMPILE ERROR DETECTED ----------\n");
15928c1f747SJeremy L Thompson CeedDebug(ceed, "Error: %s\nCompile log:\n%s\n", hiprtcGetErrorString(result), log);
160c21e34e2SJeremy L Thompson CeedDebug256(ceed, CEED_DEBUG_COLOR_WARNING, "---------- BACKEND MAY FALLBACK ----------\n");
16128c1f747SJeremy L Thompson CeedCallBackend(CeedFree(&log));
16228c1f747SJeremy L Thompson CeedCallHiprtc(ceed, hiprtcDestroyProgram(&prog));
16328c1f747SJeremy L Thompson return CEED_ERROR_SUCCESS;
164c49dc7a7SJeremy L Thompson // LCOV_EXCL_STOP
16528c1f747SJeremy L Thompson }
16630f4f45fSnbeams }
16730f4f45fSnbeams
1682b730f8bSJeremy L Thompson CeedCallHiprtc(ceed, hiprtcGetCodeSize(prog, &ptx_size));
1692b730f8bSJeremy L Thompson CeedCallBackend(CeedMalloc(ptx_size, &ptx));
1702b730f8bSJeremy L Thompson CeedCallHiprtc(ceed, hiprtcGetCode(prog, ptx));
1712b730f8bSJeremy L Thompson CeedCallHiprtc(ceed, hiprtcDestroyProgram(&prog));
17230f4f45fSnbeams
1732b730f8bSJeremy L Thompson CeedCallHip(ceed, hipModuleLoadData(module, ptx));
1742b730f8bSJeremy L Thompson CeedCallBackend(CeedFree(&ptx));
175e15f9bd0SJeremy L Thompson return CEED_ERROR_SUCCESS;
17630f4f45fSnbeams }
17730f4f45fSnbeams
CeedCompile_Hip(Ceed ceed,const char * source,hipModule_t * module,const CeedInt num_defines,...)1788d12f40eSJeremy L Thompson int CeedCompile_Hip(Ceed ceed, const char *source, hipModule_t *module, const CeedInt num_defines, ...) {
1798d12f40eSJeremy L Thompson bool is_compile_good = true;
1808d12f40eSJeremy L Thompson va_list args;
1818d12f40eSJeremy L Thompson
1828d12f40eSJeremy L Thompson va_start(args, num_defines);
18318c38aeeSJeremy L Thompson const CeedInt ierr = CeedCompileCore_Hip(ceed, source, true, &is_compile_good, module, num_defines, args);
18418c38aeeSJeremy L Thompson
1858d12f40eSJeremy L Thompson va_end(args);
18618c38aeeSJeremy L Thompson CeedCallBackend(ierr);
1878d12f40eSJeremy L Thompson return CEED_ERROR_SUCCESS;
1888d12f40eSJeremy L Thompson }
1898d12f40eSJeremy L Thompson
CeedTryCompile_Hip(Ceed ceed,const char * source,bool * is_compile_good,hipModule_t * module,const CeedInt num_defines,...)1908d12f40eSJeremy L Thompson int CeedTryCompile_Hip(Ceed ceed, const char *source, bool *is_compile_good, hipModule_t *module, const CeedInt num_defines, ...) {
1918d12f40eSJeremy L Thompson va_list args;
1928d12f40eSJeremy L Thompson
1938d12f40eSJeremy L Thompson va_start(args, num_defines);
19418c38aeeSJeremy L Thompson const CeedInt ierr = CeedCompileCore_Hip(ceed, source, false, is_compile_good, module, num_defines, args);
19518c38aeeSJeremy L Thompson
1968d12f40eSJeremy L Thompson va_end(args);
19718c38aeeSJeremy L Thompson CeedCallBackend(ierr);
1988d12f40eSJeremy L Thompson return CEED_ERROR_SUCCESS;
1998d12f40eSJeremy L Thompson }
2008d12f40eSJeremy L Thompson
20130f4f45fSnbeams //------------------------------------------------------------------------------
20230f4f45fSnbeams // Get HIP kernel
20330f4f45fSnbeams //------------------------------------------------------------------------------
CeedGetKernel_Hip(Ceed ceed,hipModule_t module,const char * name,hipFunction_t * kernel)204eb7e6cafSJeremy L Thompson int CeedGetKernel_Hip(Ceed ceed, hipModule_t module, const char *name, hipFunction_t *kernel) {
2052b730f8bSJeremy L Thompson CeedCallHip(ceed, hipModuleGetFunction(kernel, module, name));
206e15f9bd0SJeremy L Thompson return CEED_ERROR_SUCCESS;
20730f4f45fSnbeams }
20830f4f45fSnbeams
20930f4f45fSnbeams //------------------------------------------------------------------------------
21030f4f45fSnbeams // Run HIP kernel
21130f4f45fSnbeams //------------------------------------------------------------------------------
CeedRunKernel_Hip(Ceed ceed,hipFunction_t kernel,const int grid_size,const int block_size,void ** args)212eb7e6cafSJeremy L Thompson int CeedRunKernel_Hip(Ceed ceed, hipFunction_t kernel, const int grid_size, const int block_size, void **args) {
2132b730f8bSJeremy L Thompson CeedCallHip(ceed, hipModuleLaunchKernel(kernel, grid_size, 1, 1, block_size, 1, 1, 0, NULL, args, NULL));
214e15f9bd0SJeremy L Thompson return CEED_ERROR_SUCCESS;
21530f4f45fSnbeams }
21630f4f45fSnbeams
21730f4f45fSnbeams //------------------------------------------------------------------------------
21830f4f45fSnbeams // Run HIP kernel for spatial dimension
21930f4f45fSnbeams //------------------------------------------------------------------------------
CeedRunKernelDim_Hip(Ceed ceed,hipFunction_t kernel,const int grid_size,const int block_size_x,const int block_size_y,const int block_size_z,void ** args)220eb7e6cafSJeremy L Thompson int CeedRunKernelDim_Hip(Ceed ceed, hipFunction_t kernel, const int grid_size, const int block_size_x, const int block_size_y, const int block_size_z,
2212b730f8bSJeremy L Thompson void **args) {
2222b730f8bSJeremy L Thompson CeedCallHip(ceed, hipModuleLaunchKernel(kernel, grid_size, 1, 1, block_size_x, block_size_y, block_size_z, 0, NULL, args, NULL));
223e15f9bd0SJeremy L Thompson return CEED_ERROR_SUCCESS;
22430f4f45fSnbeams }
22530f4f45fSnbeams
22630f4f45fSnbeams //------------------------------------------------------------------------------
227e15f9bd0SJeremy L Thompson // Run HIP kernel for spatial dimension with shared memory
22830f4f45fSnbeams //------------------------------------------------------------------------------
CeedRunKernelDimSharedCore_Hip(Ceed ceed,hipFunction_t kernel,hipStream_t stream,const int grid_size,const int block_size_x,const int block_size_y,const int block_size_z,const int shared_mem_size,const bool throw_error,bool * is_good_run,void ** args)229e9c76bddSJeremy L Thompson static int CeedRunKernelDimSharedCore_Hip(Ceed ceed, hipFunction_t kernel, hipStream_t stream, const int grid_size, const int block_size_x,
230e9c76bddSJeremy L Thompson const int block_size_y, const int block_size_z, const int shared_mem_size, const bool throw_error,
231e9c76bddSJeremy L Thompson bool *is_good_run, void **args) {
232e9c76bddSJeremy L Thompson hipError_t result = hipModuleLaunchKernel(kernel, grid_size, 1, 1, block_size_x, block_size_y, block_size_z, shared_mem_size, stream, args, NULL);
2338d12f40eSJeremy L Thompson
234c49dc7a7SJeremy L Thompson if (result == hipSuccess) {
235c49dc7a7SJeremy L Thompson *is_good_run = true;
236c49dc7a7SJeremy L Thompson } else {
237c49dc7a7SJeremy L Thompson if (throw_error) {
238c49dc7a7SJeremy L Thompson CeedCallHip(ceed, result);
239c49dc7a7SJeremy L Thompson } else {
240c49dc7a7SJeremy L Thompson // LCOV_EXCL_START
241c49dc7a7SJeremy L Thompson const char *message = hipGetErrorName(result);
242c49dc7a7SJeremy L Thompson
243c49dc7a7SJeremy L Thompson CeedDebug256(ceed, CEED_DEBUG_COLOR_ERROR, "---------- LAUNCH ERROR DETECTED ----------\n");
244c49dc7a7SJeremy L Thompson CeedDebug(ceed, "%s\n", message);
245c21e34e2SJeremy L Thompson CeedDebug256(ceed, CEED_DEBUG_COLOR_WARNING, "---------- BACKEND MAY FALLBACK ----------\n");
246c49dc7a7SJeremy L Thompson // LCOV_EXCL_STOP
247c49dc7a7SJeremy L Thompson }
248c49dc7a7SJeremy L Thompson *is_good_run = false;
249c49dc7a7SJeremy L Thompson }
2508d12f40eSJeremy L Thompson return CEED_ERROR_SUCCESS;
2518d12f40eSJeremy L Thompson }
2528d12f40eSJeremy L Thompson
CeedRunKernelDimShared_Hip(Ceed ceed,hipFunction_t kernel,hipStream_t stream,const int grid_size,const int block_size_x,const int block_size_y,const int block_size_z,const int shared_mem_size,void ** args)253e9c76bddSJeremy L Thompson int CeedRunKernelDimShared_Hip(Ceed ceed, hipFunction_t kernel, hipStream_t stream, const int grid_size, const int block_size_x,
254e9c76bddSJeremy L Thompson const int block_size_y, const int block_size_z, const int shared_mem_size, void **args) {
2558d12f40eSJeremy L Thompson bool is_good_run = true;
2568d12f40eSJeremy L Thompson
257e9c76bddSJeremy L Thompson CeedCallBackend(CeedRunKernelDimSharedCore_Hip(ceed, kernel, stream, grid_size, block_size_x, block_size_y, block_size_z, shared_mem_size, true,
258e9c76bddSJeremy L Thompson &is_good_run, args));
2598d12f40eSJeremy L Thompson return CEED_ERROR_SUCCESS;
2608d12f40eSJeremy L Thompson }
2618d12f40eSJeremy L Thompson
CeedTryRunKernelDimShared_Hip(Ceed ceed,hipFunction_t kernel,hipStream_t stream,const int grid_size,const int block_size_x,const int block_size_y,const int block_size_z,const int shared_mem_size,bool * is_good_run,void ** args)262e9c76bddSJeremy L Thompson int CeedTryRunKernelDimShared_Hip(Ceed ceed, hipFunction_t kernel, hipStream_t stream, const int grid_size, const int block_size_x,
263e9c76bddSJeremy L Thompson const int block_size_y, const int block_size_z, const int shared_mem_size, bool *is_good_run, void **args) {
264e9c76bddSJeremy L Thompson CeedCallBackend(CeedRunKernelDimSharedCore_Hip(ceed, kernel, stream, grid_size, block_size_x, block_size_y, block_size_z, shared_mem_size, false,
265e9c76bddSJeremy L Thompson is_good_run, args));
266e15f9bd0SJeremy L Thompson return CEED_ERROR_SUCCESS;
26730f4f45fSnbeams }
2682a86cc9dSSebastian Grimberg
2692a86cc9dSSebastian Grimberg //------------------------------------------------------------------------------
270