1 #include "sycldevice.hpp" 2 #include <sycl/sycl.hpp> 3 #include <chrono> 4 5 namespace Petsc 6 { 7 8 namespace device 9 { 10 11 namespace sycl 12 { 13 14 namespace impl 15 { 16 17 class DeviceContext { 18 public: 19 struct PetscDeviceContext_SYCL { 20 ::sycl::event event; 21 ::sycl::event begin; // timer-only 22 ::sycl::event end; // timer-only 23 #if PetscDefined(USE_DEBUG) 24 PetscBool timerInUse{}; 25 #endif 26 ::sycl::queue queue; 27 28 std::chrono::time_point<std::chrono::steady_clock> timeBegin{}; 29 }; 30 31 private: 32 static bool initialized_; 33 34 static PetscErrorCode finalize_() noexcept 35 { 36 PetscFunctionBegin; 37 initialized_ = false; 38 PetscFunctionReturn(PETSC_SUCCESS); 39 } 40 41 static PetscErrorCode initialize_(PetscInt id, PetscDeviceContext dctx) noexcept 42 { 43 PetscFunctionBegin; 44 PetscCall(PetscDeviceCheckDeviceCount_Internal(id)); 45 if (!initialized_) { 46 initialized_ = true; 47 PetscCall(PetscRegisterFinalize(finalize_)); 48 } 49 PetscFunctionReturn(PETSC_SUCCESS); 50 } 51 52 public: 53 const struct _DeviceContextOps ops = {destroy, changeStreamType, setUp, query, waitForContext, synchronize, getBlasHandle, getSolverHandle, getStreamHandle, beginTimer, endTimer, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr}; 54 55 // default constructor 56 DeviceContext() noexcept = default; 57 58 // All of these functions MUST be static in order to be callable from C, otherwise they 59 // get the implicit 'this' pointer tacked on 60 static PetscErrorCode destroy(PetscDeviceContext dctx) noexcept 61 { 62 PetscFunctionBegin; 63 delete static_cast<PetscDeviceContext_SYCL *>(dctx->data); 64 dctx->data = nullptr; 65 PetscFunctionReturn(PETSC_SUCCESS); 66 } 67 68 static PetscErrorCode setUp(PetscDeviceContext dctx) noexcept 69 { 70 PetscDevice dev; 71 PetscInt id; 72 73 PetscFunctionBegin; 74 #if PetscDefined(USE_DEBUG) 75 static_cast<PetscDeviceContext_SYCL *>(dctx->data)->timerInUse = PETSC_FALSE; 76 #endif 77 PetscCall(PetscDeviceContextGetDevice(dctx, &dev)); 78 PetscCall(PetscDeviceGetDeviceId(dev, &id)); 79 const ::sycl::device &syclDevice = (id == PETSC_SYCL_DEVICE_HOST) ? ::sycl::device(::sycl::cpu_selector_v) : ::sycl::device::get_devices(::sycl::info::device_type::gpu)[id]; 80 81 static_cast<PetscDeviceContext_SYCL *>(dctx->data)->queue = ::sycl::queue(syclDevice, ::sycl::property::queue::in_order()); 82 PetscFunctionReturn(PETSC_SUCCESS); 83 } 84 85 static PetscErrorCode query(PetscDeviceContext dctx, PetscBool *idle) noexcept 86 { 87 PetscFunctionBegin; 88 // available in future, https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/supported/sycl_ext_oneapi_queue_empty.asciidoc 89 // *idle = static_cast<PetscDeviceContext_SYCL*>(dctx->data)->queue.empty() ? PETSC_TRUE : PETSC_FALSE; 90 *idle = PETSC_FALSE; 91 PetscFunctionReturn(PETSC_SUCCESS); 92 } 93 94 static PetscErrorCode synchronize(PetscDeviceContext dctx) noexcept 95 { 96 PetscBool idle = PETSC_TRUE; 97 const auto dci = static_cast<PetscDeviceContext_SYCL *>(dctx->data); 98 99 PetscFunctionBegin; 100 PetscCall(query(dctx, &idle)); 101 if (!idle) PetscCallCXX(dci->queue.wait()); 102 PetscFunctionReturn(PETSC_SUCCESS); 103 } 104 105 static PetscErrorCode getStreamHandle(PetscDeviceContext dctx, void **handle) noexcept 106 { 107 PetscFunctionBegin; 108 *reinterpret_cast<::sycl::queue **>(handle) = &(static_cast<PetscDeviceContext_SYCL *>(dctx->data)->queue); 109 PetscFunctionReturn(PETSC_SUCCESS); 110 } 111 112 static PetscErrorCode beginTimer(PetscDeviceContext dctx) noexcept 113 { 114 const auto dci = static_cast<PetscDeviceContext_SYCL *>(dctx->data); 115 116 PetscFunctionBegin; 117 #if PetscDefined(USE_DEBUG) 118 PetscCheck(!dci->timerInUse, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Forgot to call PetscLogGpuTimeEnd()?"); 119 dci->timerInUse = PETSC_TRUE; 120 #endif 121 // It is not a good approach to time SYCL kernels because the timer starts at the kernel launch time at host, 122 // not at the start of execution time on device. SYCL provides this style of kernel timing: 123 /* 124 sycl::queue q(sycl::default_selector_v, sycl::property::queue::enable_profiling{}); 125 sycl::event e = q.submit([&](sycl::handler &h) { 126 ... 127 }); 128 e.wait(); 129 auto start_time = e.get_profiling_info<sycl::info::event_profiling::command_start>(); 130 auto end_time = e.get_profiling_info<sycl::info::event_profiling::command_end>(); 131 long long kernel_duration_ns = end_time - start_time; 132 */ 133 // It requires 1) enable profiling at the queue's creation time, and 2) store the event returned by kernel launch. 134 // But neither we have control of the input queue, nor does PetscDeviceContext support 2), so we just use a 135 // host side timer. 136 PetscCallCXX(dci->timeBegin = std::chrono::steady_clock::now()); 137 PetscFunctionReturn(PETSC_SUCCESS); 138 } 139 140 static PetscErrorCode endTimer(PetscDeviceContext dctx, PetscLogDouble *elapsed) noexcept 141 { 142 const auto dci = static_cast<PetscDeviceContext_SYCL *>(dctx->data); 143 std::chrono::duration<double> duration; 144 145 PetscFunctionBegin; 146 #if PetscDefined(USE_DEBUG) 147 PetscCheck(dci->timerInUse, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Forgot to call PetscLogGpuTimeBegin()?"); 148 dci->timerInUse = PETSC_FALSE; 149 #endif 150 PetscCallCXX(dci->queue.wait()); 151 PetscCallCXX(duration = std::chrono::steady_clock::now() - dci->timeBegin); 152 PetscCallCXX(*elapsed = duration.count()); 153 PetscFunctionReturn(PETSC_SUCCESS); 154 } 155 156 static PetscErrorCode changeStreamType(PetscDeviceContext, PetscStreamType) noexcept { SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Not implemented"); } 157 static PetscErrorCode waitForContext(PetscDeviceContext, PetscDeviceContext) noexcept { SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Not implemented"); } 158 static PetscErrorCode getBlasHandle(PetscDeviceContext, void *) noexcept { SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Not implemented"); } 159 static PetscErrorCode getSolverHandle(PetscDeviceContext, void *) noexcept { SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Not implemented"); } 160 }; 161 162 } // namespace impl 163 164 } // namespace sycl 165 166 } // namespace device 167 168 } // namespace Petsc 169 170 PetscErrorCode PetscDeviceContextCreate_SYCL(PetscDeviceContext dctx) 171 { 172 using namespace Petsc::device::sycl::impl; 173 174 static const DeviceContext syclctx; 175 176 PetscFunctionBegin; 177 PetscCallCXX(dctx->data = new DeviceContext::PetscDeviceContext_SYCL()); 178 dctx->ops[0] = syclctx.ops; 179 PetscFunctionReturn(PETSC_SUCCESS); 180 } 181