xref: /petsc/src/sys/objects/device/impls/sycl/syclcontext.sycl.cxx (revision a336c15037c72f93cd561f5a5e11e93175f2efd9)
1 #include "sycldevice.hpp"
2 #include <sycl/sycl.hpp>
3 #include <chrono>
4 
5 namespace Petsc
6 {
7 
8 namespace device
9 {
10 
11 namespace sycl
12 {
13 
14 namespace impl
15 {
16 
17 class DeviceContext {
18 public:
19   struct PetscDeviceContext_SYCL {
20     ::sycl::event event;
21     ::sycl::event begin; // timer-only
22     ::sycl::event end;   // timer-only
23 #if PetscDefined(USE_DEBUG)
24     PetscBool timerInUse{};
25 #endif
26     ::sycl::queue queue;
27 
28     std::chrono::time_point<std::chrono::steady_clock> timeBegin{};
29   };
30 
31 private:
32   static bool initialized_;
33 
34   static PetscErrorCode finalize_() noexcept
35   {
36     PetscFunctionBegin;
37     initialized_ = false;
38     PetscFunctionReturn(PETSC_SUCCESS);
39   }
40 
41   static PetscErrorCode initialize_(PetscInt id, PetscDeviceContext dctx) noexcept
42   {
43     PetscFunctionBegin;
44     PetscCall(PetscDeviceCheckDeviceCount_Internal(id));
45     if (!initialized_) {
46       initialized_ = true;
47       PetscCall(PetscRegisterFinalize(finalize_));
48     }
49     PetscFunctionReturn(PETSC_SUCCESS);
50   }
51 
52 public:
53   const struct _DeviceContextOps ops = {destroy, changeStreamType, setUp, query, waitForContext, synchronize, getBlasHandle, getSolverHandle, getStreamHandle, beginTimer, endTimer, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr};
54 
55   // default constructor
56   DeviceContext() noexcept = default;
57 
58   // All of these functions MUST be static in order to be callable from C, otherwise they
59   // get the implicit 'this' pointer tacked on
60   static PetscErrorCode destroy(PetscDeviceContext dctx) noexcept
61   {
62     PetscFunctionBegin;
63     delete static_cast<PetscDeviceContext_SYCL *>(dctx->data);
64     dctx->data = nullptr;
65     PetscFunctionReturn(PETSC_SUCCESS);
66   }
67 
68   static PetscErrorCode setUp(PetscDeviceContext dctx) noexcept
69   {
70     PetscDevice dev;
71     PetscInt    id;
72 
73     PetscFunctionBegin;
74 #if PetscDefined(USE_DEBUG)
75     static_cast<PetscDeviceContext_SYCL *>(dctx->data)->timerInUse = PETSC_FALSE;
76 #endif
77     PetscCall(PetscDeviceContextGetDevice(dctx, &dev));
78     PetscCall(PetscDeviceGetDeviceId(dev, &id));
79     const ::sycl::device &syclDevice = (id == PETSC_SYCL_DEVICE_HOST) ? ::sycl::device(::sycl::cpu_selector_v) : ::sycl::device::get_devices(::sycl::info::device_type::gpu)[id];
80 
81     static_cast<PetscDeviceContext_SYCL *>(dctx->data)->queue = ::sycl::queue(syclDevice, ::sycl::property::queue::in_order());
82     PetscFunctionReturn(PETSC_SUCCESS);
83   }
84 
85   static PetscErrorCode query(PetscDeviceContext dctx, PetscBool *idle) noexcept
86   {
87     PetscFunctionBegin;
88     // available in future, https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/supported/sycl_ext_oneapi_queue_empty.asciidoc
89     // *idle = static_cast<PetscDeviceContext_SYCL*>(dctx->data)->queue.empty() ? PETSC_TRUE : PETSC_FALSE;
90     *idle = PETSC_FALSE;
91     PetscFunctionReturn(PETSC_SUCCESS);
92   }
93 
94   static PetscErrorCode synchronize(PetscDeviceContext dctx) noexcept
95   {
96     PetscBool  idle = PETSC_TRUE;
97     const auto dci  = static_cast<PetscDeviceContext_SYCL *>(dctx->data);
98 
99     PetscFunctionBegin;
100     PetscCall(query(dctx, &idle));
101     if (!idle) PetscCallCXX(dci->queue.wait());
102     PetscFunctionReturn(PETSC_SUCCESS);
103   }
104 
105   static PetscErrorCode getStreamHandle(PetscDeviceContext dctx, void **handle) noexcept
106   {
107     PetscFunctionBegin;
108     *reinterpret_cast<::sycl::queue **>(handle) = &(static_cast<PetscDeviceContext_SYCL *>(dctx->data)->queue);
109     PetscFunctionReturn(PETSC_SUCCESS);
110   }
111 
112   static PetscErrorCode beginTimer(PetscDeviceContext dctx) noexcept
113   {
114     const auto dci = static_cast<PetscDeviceContext_SYCL *>(dctx->data);
115 
116     PetscFunctionBegin;
117 #if PetscDefined(USE_DEBUG)
118     PetscCheck(!dci->timerInUse, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Forgot to call PetscLogGpuTimeEnd()?");
119     dci->timerInUse = PETSC_TRUE;
120 #endif
121     // It is not a good approach to time SYCL kernels because the timer starts at the kernel launch time at host,
122     // not at the start of execution time on device. SYCL provides this style of kernel timing:
123     /*
124       sycl::queue q(sycl::default_selector_v, sycl::property::queue::enable_profiling{});
125       sycl::event e = q.submit([&](sycl::handler &h) {
126         ...
127       });
128       e.wait();
129       auto start_time = e.get_profiling_info<sycl::info::event_profiling::command_start>();
130       auto end_time = e.get_profiling_info<sycl::info::event_profiling::command_end>();
131       long long kernel_duration_ns = end_time - start_time;
132     */
133     // It requires 1) enable profiling at the queue's creation time, and 2) store the event returned by kernel launch.
134     // But neither we have control of the input queue, nor does PetscDeviceContext support 2), so we just use a
135     // host side timer.
136     PetscCallCXX(dci->timeBegin = std::chrono::steady_clock::now());
137     PetscFunctionReturn(PETSC_SUCCESS);
138   }
139 
140   static PetscErrorCode endTimer(PetscDeviceContext dctx, PetscLogDouble *elapsed) noexcept
141   {
142     const auto                    dci = static_cast<PetscDeviceContext_SYCL *>(dctx->data);
143     std::chrono::duration<double> duration;
144 
145     PetscFunctionBegin;
146 #if PetscDefined(USE_DEBUG)
147     PetscCheck(dci->timerInUse, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Forgot to call PetscLogGpuTimeBegin()?");
148     dci->timerInUse = PETSC_FALSE;
149 #endif
150     PetscCallCXX(dci->queue.wait());
151     PetscCallCXX(duration = std::chrono::steady_clock::now() - dci->timeBegin);
152     PetscCallCXX(*elapsed = duration.count());
153     PetscFunctionReturn(PETSC_SUCCESS);
154   }
155 
156   static PetscErrorCode changeStreamType(PetscDeviceContext, PetscStreamType) noexcept { SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Not implemented"); }
157   static PetscErrorCode waitForContext(PetscDeviceContext, PetscDeviceContext) noexcept { SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Not implemented"); }
158   static PetscErrorCode getBlasHandle(PetscDeviceContext, void *) noexcept { SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Not implemented"); }
159   static PetscErrorCode getSolverHandle(PetscDeviceContext, void *) noexcept { SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Not implemented"); }
160 };
161 
162 } // namespace impl
163 
164 } // namespace sycl
165 
166 } // namespace device
167 
168 } // namespace Petsc
169 
170 PetscErrorCode PetscDeviceContextCreate_SYCL(PetscDeviceContext dctx)
171 {
172   using namespace Petsc::device::sycl::impl;
173 
174   static const DeviceContext syclctx;
175 
176   PetscFunctionBegin;
177   PetscCallCXX(dctx->data = new DeviceContext::PetscDeviceContext_SYCL());
178   dctx->ops[0] = syclctx.ops;
179   PetscFunctionReturn(PETSC_SUCCESS);
180 }
181