1 // Copyright (c) 2017-2026, Lawrence Livermore National Security, LLC and other CEED contributors.
2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
3 //
4 // SPDX-License-Identifier: BSD-2-Clause
5 //
6 // This file is part of CEED: http://github.com/ceed
7
8 #include "ceed-hip-compile.h"
9
10 #include <ceed.h>
11 #include <ceed/backend.h>
12 #include <ceed/jit-tools.h>
13 #include <stdarg.h>
14 #include <string.h>
15 #include <hip/hiprtc.h>
16
17 #include <sstream>
18
19 #include "ceed-hip-common.h"
20
21 #define CeedChk_hiprtc(ceed, x) \
22 do { \
23 hiprtcResult result = static_cast<hiprtcResult>(x); \
24 if (result != HIPRTC_SUCCESS) return CeedError((ceed), CEED_ERROR_BACKEND, hiprtcGetErrorString(result)); \
25 } while (0)
26
27 #define CeedCallHiprtc(ceed, ...) \
28 do { \
29 int ierr_q_ = __VA_ARGS__; \
30 CeedChk_hiprtc(ceed, ierr_q_); \
31 } while (0)
32
33 //------------------------------------------------------------------------------
34 // Compile HIP kernel
35 //------------------------------------------------------------------------------
CeedCompileCore_Hip(Ceed ceed,const char * source,const bool throw_error,bool * is_compile_good,hipModule_t * module,const CeedInt num_defines,va_list args)36 static int CeedCompileCore_Hip(Ceed ceed, const char *source, const bool throw_error, bool *is_compile_good, hipModule_t *module,
37 const CeedInt num_defines, va_list args) {
38 size_t ptx_size;
39 char *ptx;
40 const int num_opts = 4;
41 CeedInt num_jit_source_dirs = 0, num_jit_defines = 0;
42 const char **opts;
43 int runtime_version;
44 hiprtcProgram prog;
45 struct hipDeviceProp_t prop;
46 Ceed_Hip *ceed_data;
47
48 hipFree(0); // Make sure a Context exists for hiprtc
49
50 std::ostringstream code;
51
52 // Add hip runtime include statement for generation if runtime < 40400000 (implies ROCm < 4.5)
53 CeedCallHip(ceed, hipRuntimeGetVersion(&runtime_version));
54 if (runtime_version < 40400000) {
55 code << "\n#include <hip/hip_runtime.h>\n";
56 }
57 // With ROCm 4.5, need to include these definitions specifically for hiprtc (but cannot include the runtime header)
58 else {
59 code << "#include <stddef.h>\n";
60 code << "#define __forceinline__ inline __attribute__((always_inline))\n";
61 code << "#define HIP_DYNAMIC_SHARED(type, var) extern __shared__ type var[];\n";
62 }
63
64 // Kernel specific options, such as kernel constants
65 if (num_defines > 0) {
66 char *name;
67 int val;
68
69 for (int i = 0; i < num_defines; i++) {
70 name = va_arg(args, char *);
71 val = va_arg(args, int);
72 code << "#define " << name << " " << val << "\n";
73 }
74 }
75
76 // Standard libCEED definitions for HIP backends
77 code << "#include <ceed/jit-source/hip/hip-jit.h>\n\n";
78
79 // Non-macro options
80 CeedCallBackend(CeedCalloc(num_opts, &opts));
81 opts[0] = "-default-device";
82 CeedCallBackend(CeedGetData(ceed, (void **)&ceed_data));
83 CeedCallHip(ceed, hipGetDeviceProperties(&prop, ceed_data->device_id));
84 std::string arch_arg = "--gpu-architecture=" + std::string(prop.gcnArchName);
85 opts[1] = arch_arg.c_str();
86 opts[2] = "-munsafe-fp-atomics";
87 opts[3] = "-DCEED_RUNNING_JIT_PASS=1";
88 // Additional include dirs
89 {
90 const char **jit_source_dirs;
91
92 CeedCallBackend(CeedGetJitSourceRoots(ceed, &num_jit_source_dirs, &jit_source_dirs));
93 CeedCallBackend(CeedRealloc(num_opts + num_jit_source_dirs, &opts));
94 for (CeedInt i = 0; i < num_jit_source_dirs; i++) {
95 std::ostringstream include_dir_arg;
96
97 include_dir_arg << "-I" << jit_source_dirs[i];
98 CeedCallBackend(CeedStringAllocCopy(include_dir_arg.str().c_str(), (char **)&opts[num_opts + i]));
99 }
100 CeedCallBackend(CeedRestoreJitSourceRoots(ceed, &jit_source_dirs));
101 }
102 // User defines
103 {
104 const char **jit_defines;
105
106 CeedCallBackend(CeedGetJitDefines(ceed, &num_jit_defines, &jit_defines));
107 CeedCallBackend(CeedRealloc(num_opts + num_jit_source_dirs + num_jit_defines, &opts));
108 for (CeedInt i = 0; i < num_jit_defines; i++) {
109 std::ostringstream define_arg;
110
111 define_arg << "-D" << jit_defines[i];
112 CeedCallBackend(CeedStringAllocCopy(define_arg.str().c_str(), (char **)&opts[num_opts + num_jit_source_dirs + i]));
113 }
114 CeedCallBackend(CeedRestoreJitDefines(ceed, &jit_defines));
115 }
116
117 // Add string source argument provided in call
118 code << source;
119
120 // Create Program
121 CeedCallHiprtc(ceed, hiprtcCreateProgram(&prog, code.str().c_str(), NULL, 0, NULL, NULL));
122
123 // Compile kernel
124 CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "---------- ATTEMPTING TO COMPILE JIT SOURCE ----------\n");
125 CeedDebug(ceed, "Source:\n%s\n", code.str().c_str());
126 CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "---------- END OF JIT SOURCE ----------\n");
127 if (CeedDebugFlag(ceed)) {
128 // LCOV_EXCL_START
129 CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "---------- JiT COMPILER OPTIONS ----------\n");
130 for (CeedInt i = 0; i < num_opts + num_jit_source_dirs + num_jit_defines; i++) {
131 CeedDebug(ceed, "Option %d: %s", i, opts[i]);
132 }
133 CeedDebug(ceed, "");
134 CeedDebug256(ceed, CEED_DEBUG_COLOR_SUCCESS, "---------- END OF JiT COMPILER OPTIONS ----------\n");
135 // LCOV_EXCL_STOP
136 }
137 hiprtcResult result = hiprtcCompileProgram(prog, num_opts + num_jit_source_dirs + num_jit_defines, opts);
138
139 for (CeedInt i = 0; i < num_jit_source_dirs; i++) {
140 CeedCallBackend(CeedFree(&opts[num_opts + i]));
141 }
142 for (CeedInt i = 0; i < num_jit_defines; i++) {
143 CeedCallBackend(CeedFree(&opts[num_opts + num_jit_source_dirs + i]));
144 }
145 CeedCallBackend(CeedFree(&opts));
146 *is_compile_good = result == HIPRTC_SUCCESS;
147 if (!*is_compile_good) {
148 size_t log_size;
149 char *log;
150
151 CeedChk_hiprtc(ceed, hiprtcGetProgramLogSize(prog, &log_size));
152 CeedCallBackend(CeedMalloc(log_size, &log));
153 CeedCallHiprtc(ceed, hiprtcGetProgramLog(prog, log));
154 if (throw_error) {
155 return CeedError(ceed, CEED_ERROR_BACKEND, "%s\n%s", hiprtcGetErrorString(result), log);
156 } else {
157 // LCOV_EXCL_START
158 CeedDebug256(ceed, CEED_DEBUG_COLOR_ERROR, "---------- COMPILE ERROR DETECTED ----------\n");
159 CeedDebug(ceed, "Error: %s\nCompile log:\n%s\n", hiprtcGetErrorString(result), log);
160 CeedDebug256(ceed, CEED_DEBUG_COLOR_WARNING, "---------- BACKEND MAY FALLBACK ----------\n");
161 CeedCallBackend(CeedFree(&log));
162 CeedCallHiprtc(ceed, hiprtcDestroyProgram(&prog));
163 return CEED_ERROR_SUCCESS;
164 // LCOV_EXCL_STOP
165 }
166 }
167
168 CeedCallHiprtc(ceed, hiprtcGetCodeSize(prog, &ptx_size));
169 CeedCallBackend(CeedMalloc(ptx_size, &ptx));
170 CeedCallHiprtc(ceed, hiprtcGetCode(prog, ptx));
171 CeedCallHiprtc(ceed, hiprtcDestroyProgram(&prog));
172
173 CeedCallHip(ceed, hipModuleLoadData(module, ptx));
174 CeedCallBackend(CeedFree(&ptx));
175 return CEED_ERROR_SUCCESS;
176 }
177
CeedCompile_Hip(Ceed ceed,const char * source,hipModule_t * module,const CeedInt num_defines,...)178 int CeedCompile_Hip(Ceed ceed, const char *source, hipModule_t *module, const CeedInt num_defines, ...) {
179 bool is_compile_good = true;
180 va_list args;
181
182 va_start(args, num_defines);
183 const CeedInt ierr = CeedCompileCore_Hip(ceed, source, true, &is_compile_good, module, num_defines, args);
184
185 va_end(args);
186 CeedCallBackend(ierr);
187 return CEED_ERROR_SUCCESS;
188 }
189
CeedTryCompile_Hip(Ceed ceed,const char * source,bool * is_compile_good,hipModule_t * module,const CeedInt num_defines,...)190 int CeedTryCompile_Hip(Ceed ceed, const char *source, bool *is_compile_good, hipModule_t *module, const CeedInt num_defines, ...) {
191 va_list args;
192
193 va_start(args, num_defines);
194 const CeedInt ierr = CeedCompileCore_Hip(ceed, source, false, is_compile_good, module, num_defines, args);
195
196 va_end(args);
197 CeedCallBackend(ierr);
198 return CEED_ERROR_SUCCESS;
199 }
200
201 //------------------------------------------------------------------------------
202 // Get HIP kernel
203 //------------------------------------------------------------------------------
CeedGetKernel_Hip(Ceed ceed,hipModule_t module,const char * name,hipFunction_t * kernel)204 int CeedGetKernel_Hip(Ceed ceed, hipModule_t module, const char *name, hipFunction_t *kernel) {
205 CeedCallHip(ceed, hipModuleGetFunction(kernel, module, name));
206 return CEED_ERROR_SUCCESS;
207 }
208
209 //------------------------------------------------------------------------------
210 // Run HIP kernel
211 //------------------------------------------------------------------------------
CeedRunKernel_Hip(Ceed ceed,hipFunction_t kernel,const int grid_size,const int block_size,void ** args)212 int CeedRunKernel_Hip(Ceed ceed, hipFunction_t kernel, const int grid_size, const int block_size, void **args) {
213 CeedCallHip(ceed, hipModuleLaunchKernel(kernel, grid_size, 1, 1, block_size, 1, 1, 0, NULL, args, NULL));
214 return CEED_ERROR_SUCCESS;
215 }
216
217 //------------------------------------------------------------------------------
218 // Run HIP kernel for spatial dimension
219 //------------------------------------------------------------------------------
CeedRunKernelDim_Hip(Ceed ceed,hipFunction_t kernel,const int grid_size,const int block_size_x,const int block_size_y,const int block_size_z,void ** args)220 int CeedRunKernelDim_Hip(Ceed ceed, hipFunction_t kernel, const int grid_size, const int block_size_x, const int block_size_y, const int block_size_z,
221 void **args) {
222 CeedCallHip(ceed, hipModuleLaunchKernel(kernel, grid_size, 1, 1, block_size_x, block_size_y, block_size_z, 0, NULL, args, NULL));
223 return CEED_ERROR_SUCCESS;
224 }
225
226 //------------------------------------------------------------------------------
227 // Run HIP kernel for spatial dimension with shared memory
228 //------------------------------------------------------------------------------
CeedRunKernelDimSharedCore_Hip(Ceed ceed,hipFunction_t kernel,hipStream_t stream,const int grid_size,const int block_size_x,const int block_size_y,const int block_size_z,const int shared_mem_size,const bool throw_error,bool * is_good_run,void ** args)229 static int CeedRunKernelDimSharedCore_Hip(Ceed ceed, hipFunction_t kernel, hipStream_t stream, const int grid_size, const int block_size_x,
230 const int block_size_y, const int block_size_z, const int shared_mem_size, const bool throw_error,
231 bool *is_good_run, void **args) {
232 hipError_t result = hipModuleLaunchKernel(kernel, grid_size, 1, 1, block_size_x, block_size_y, block_size_z, shared_mem_size, stream, args, NULL);
233
234 if (result == hipSuccess) {
235 *is_good_run = true;
236 } else {
237 if (throw_error) {
238 CeedCallHip(ceed, result);
239 } else {
240 // LCOV_EXCL_START
241 const char *message = hipGetErrorName(result);
242
243 CeedDebug256(ceed, CEED_DEBUG_COLOR_ERROR, "---------- LAUNCH ERROR DETECTED ----------\n");
244 CeedDebug(ceed, "%s\n", message);
245 CeedDebug256(ceed, CEED_DEBUG_COLOR_WARNING, "---------- BACKEND MAY FALLBACK ----------\n");
246 // LCOV_EXCL_STOP
247 }
248 *is_good_run = false;
249 }
250 return CEED_ERROR_SUCCESS;
251 }
252
CeedRunKernelDimShared_Hip(Ceed ceed,hipFunction_t kernel,hipStream_t stream,const int grid_size,const int block_size_x,const int block_size_y,const int block_size_z,const int shared_mem_size,void ** args)253 int CeedRunKernelDimShared_Hip(Ceed ceed, hipFunction_t kernel, hipStream_t stream, const int grid_size, const int block_size_x,
254 const int block_size_y, const int block_size_z, const int shared_mem_size, void **args) {
255 bool is_good_run = true;
256
257 CeedCallBackend(CeedRunKernelDimSharedCore_Hip(ceed, kernel, stream, grid_size, block_size_x, block_size_y, block_size_z, shared_mem_size, true,
258 &is_good_run, args));
259 return CEED_ERROR_SUCCESS;
260 }
261
CeedTryRunKernelDimShared_Hip(Ceed ceed,hipFunction_t kernel,hipStream_t stream,const int grid_size,const int block_size_x,const int block_size_y,const int block_size_z,const int shared_mem_size,bool * is_good_run,void ** args)262 int CeedTryRunKernelDimShared_Hip(Ceed ceed, hipFunction_t kernel, hipStream_t stream, const int grid_size, const int block_size_x,
263 const int block_size_y, const int block_size_z, const int shared_mem_size, bool *is_good_run, void **args) {
264 CeedCallBackend(CeedRunKernelDimSharedCore_Hip(ceed, kernel, stream, grid_size, block_size_x, block_size_y, block_size_z, shared_mem_size, false,
265 is_good_run, args));
266 return CEED_ERROR_SUCCESS;
267 }
268
269 //------------------------------------------------------------------------------
270