1 #include "sycldevice.hpp" 2 #include <CL/sycl.hpp> 3 #include <Kokkos_Core.hpp> 4 5 namespace Petsc 6 { 7 8 namespace device 9 { 10 11 namespace sycl 12 { 13 14 namespace impl 15 { 16 17 class DeviceContext { 18 public: 19 struct PetscDeviceContext_SYCL { 20 ::sycl::event event; 21 ::sycl::event begin; // timer-only 22 ::sycl::event end; // timer-only 23 Kokkos::Timer timer{}; // use cpu time since sycl events are return value of queue submission and we have no infrastructure to store them 24 double timeBegin{}; 25 #if PetscDefined(USE_DEBUG) 26 PetscBool timerInUse{}; 27 #endif 28 ::sycl::queue queue; 29 }; 30 31 private: 32 static bool initialized_; 33 34 static PetscErrorCode finalize_() noexcept 35 { 36 PetscFunctionBegin; 37 initialized_ = false; 38 PetscFunctionReturn(PETSC_SUCCESS); 39 } 40 41 static PetscErrorCode initialize_(PetscInt id, PetscDeviceContext dctx) noexcept 42 { 43 PetscFunctionBegin; 44 PetscCall(PetscDeviceCheckDeviceCount_Internal(id)); 45 if (!initialized_) { 46 initialized_ = true; 47 PetscCall(PetscRegisterFinalize(finalize_)); 48 } 49 PetscFunctionReturn(PETSC_SUCCESS); 50 } 51 52 public: 53 const struct _DeviceContextOps ops = {destroy, changeStreamType, setUp, query, waitForContext, synchronize, getBlasHandle, getSolverHandle, getStreamHandle, beginTimer, endTimer, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr}; 54 55 // default constructor 56 DeviceContext() noexcept = default; 57 58 // All of these functions MUST be static in order to be callable from C, otherwise they 59 // get the implicit 'this' pointer tacked on 60 static PetscErrorCode destroy(PetscDeviceContext dctx) noexcept 61 { 62 PetscFunctionBegin; 63 delete static_cast<PetscDeviceContext_SYCL *>(dctx->data); 64 dctx->data = nullptr; 65 PetscFunctionReturn(PETSC_SUCCESS); 66 } 67 68 static PetscErrorCode setUp(PetscDeviceContext dctx) noexcept 69 { 70 PetscFunctionBegin; 71 #if PetscDefined(USE_DEBUG) 72 static_cast<PetscDeviceContext_SYCL *>(dctx->data)->timerInUse = PETSC_FALSE; 73 #endif 74 // petsc/sycl currently only uses Kokkos's default execution space (and its queue), 75 // so in some sense, we have only one petsc device context. 76 PetscCall(PetscKokkosInitializeCheck()); 77 static_cast<PetscDeviceContext_SYCL *>(dctx->data)->queue = Kokkos::DefaultExecutionSpace().sycl_queue(); 78 PetscFunctionReturn(PETSC_SUCCESS); 79 } 80 81 static PetscErrorCode query(PetscDeviceContext dctx, PetscBool *idle) noexcept 82 { 83 PetscFunctionBegin; 84 // available in future, https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/supported/sycl_ext_oneapi_queue_empty.asciidoc 85 // *idle = static_cast<PetscDeviceContext_SYCL*>(dctx->data)->queue.empty() ? PETSC_TRUE : PETSC_FALSE; 86 *idle = PETSC_FALSE; 87 PetscFunctionReturn(PETSC_SUCCESS); 88 } 89 90 static PetscErrorCode synchronize(PetscDeviceContext dctx) noexcept 91 { 92 PetscBool idle = PETSC_TRUE; 93 const auto dci = static_cast<PetscDeviceContext_SYCL *>(dctx->data); 94 95 PetscFunctionBegin; 96 PetscCall(query(dctx, &idle)); 97 if (!idle) PetscCallCXX(dci->queue.wait()); 98 PetscFunctionReturn(PETSC_SUCCESS); 99 } 100 101 static PetscErrorCode getStreamHandle(PetscDeviceContext dctx, void **handle) noexcept 102 { 103 PetscFunctionBegin; 104 *reinterpret_cast<::sycl::queue **>(handle) = &(static_cast<PetscDeviceContext_SYCL *>(dctx->data)->queue); 105 PetscFunctionReturn(PETSC_SUCCESS); 106 } 107 108 static PetscErrorCode beginTimer(PetscDeviceContext dctx) noexcept 109 { 110 const auto dci = static_cast<PetscDeviceContext_SYCL *>(dctx->data); 111 112 PetscFunctionBegin; 113 #if PetscDefined(USE_DEBUG) 114 PetscCheck(!dci->timerInUse, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Forgot to call PetscLogGpuTimeEnd()?"); 115 dci->timerInUse = PETSC_TRUE; 116 #endif 117 PetscCallCXX(dci->timeBegin = dci->timer.seconds()); 118 PetscFunctionReturn(PETSC_SUCCESS); 119 } 120 121 static PetscErrorCode endTimer(PetscDeviceContext dctx, PetscLogDouble *elapsed) noexcept 122 { 123 const auto dci = static_cast<PetscDeviceContext_SYCL *>(dctx->data); 124 125 PetscFunctionBegin; 126 #if PetscDefined(USE_DEBUG) 127 PetscCheck(dci->timerInUse, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Forgot to call PetscLogGpuTimeBegin()?"); 128 dci->timerInUse = PETSC_FALSE; 129 #endif 130 PetscCallCXX(dci->queue.wait()); 131 PetscCallCXX(*elapsed = dci->timer.seconds() - dci->timeBegin); 132 PetscFunctionReturn(PETSC_SUCCESS); 133 } 134 135 static PetscErrorCode changeStreamType(PetscDeviceContext, PetscStreamType) noexcept { SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Not implemented"); } 136 static PetscErrorCode waitForContext(PetscDeviceContext, PetscDeviceContext) noexcept { SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Not implemented"); } 137 static PetscErrorCode getBlasHandle(PetscDeviceContext, void *) noexcept { SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Not implemented"); } 138 static PetscErrorCode getSolverHandle(PetscDeviceContext, void *) noexcept { SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Not implemented"); } 139 }; 140 141 } // namespace impl 142 143 } // namespace sycl 144 145 } // namespace device 146 147 } // namespace Petsc 148 149 PetscErrorCode PetscDeviceContextCreate_SYCL(PetscDeviceContext dctx) 150 { 151 using namespace Petsc::device::sycl::impl; 152 153 static const DeviceContext syclctx; 154 155 PetscFunctionBegin; 156 PetscCallCXX(dctx->data = new DeviceContext::PetscDeviceContext_SYCL()); 157 dctx->ops[0] = syclctx.ops; 158 PetscFunctionReturn(PETSC_SUCCESS); 159 } 160