1 #include "petscdevice_interface_internal.hpp" /*I <petscdevice.h> I*/ 2 3 #include <petsc/private/cpp/object_pool.hpp> 4 #include <petsc/private/cpp/utility.hpp> 5 #include <petsc/private/cpp/unordered_map.hpp> 6 7 #include <algorithm> // std::remove_if(), std::find_if() 8 #include <vector> 9 #include <string> 10 #include <sstream> // std::ostringstream 11 12 #if defined(__clang__) 13 #pragma clang diagnostic push 14 #pragma clang diagnostic ignored "-Wgnu-zero-variadic-macro-arguments" 15 #endif 16 17 // ========================================================================================== 18 // PetscEvent 19 // ========================================================================================== 20 21 class PetscEventConstructor : public Petsc::ConstructorInterface<_n_PetscEvent, PetscEventConstructor> { 22 public: 23 PetscErrorCode construct_(PetscEvent event) const noexcept 24 { 25 PetscFunctionBegin; 26 PetscCall(PetscArrayzero(event, 1)); 27 PetscCall(underlying().reset(event)); 28 PetscFunctionReturn(PETSC_SUCCESS); 29 } 30 31 PetscErrorCode destroy_(PetscEvent event) const noexcept 32 { 33 PetscFunctionBegin; 34 PetscCall(underlying().reset(event)); 35 PetscFunctionReturn(PETSC_SUCCESS); 36 } 37 38 static PetscErrorCode reset_(PetscEvent event) noexcept 39 { 40 PetscFunctionBegin; 41 if (auto &destroy = event->destroy) { 42 PetscCall((*destroy)(event)); 43 destroy = nullptr; 44 } 45 PetscAssert(!event->data, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Event failed to destroy its data member: %p", event->data); 46 event->dctx_id = 0; 47 event->dctx_state = 0; 48 event->dtype = PETSC_DEVICE_DEFAULT(); 49 PetscFunctionReturn(PETSC_SUCCESS); 50 } 51 52 static PetscErrorCode invalidate_(PetscEvent) noexcept { return PETSC_SUCCESS; } 53 }; 54 55 static Petsc::ObjectPool<_n_PetscEvent, PetscEventConstructor> event_pool; 56 57 static PetscErrorCode PetscDeviceContextCreateEvent_Private(PetscDeviceContext dctx, PetscEvent *event) 58 { 59 PetscFunctionBegin; 60 PetscValidDeviceContext(dctx, 1); 61 PetscValidPointer(event, 2); 62 PetscCall(event_pool.allocate(event)); 63 PetscCall(PetscDeviceContextGetDeviceType(dctx, &(*event)->dtype)); 64 PetscTryTypeMethod(dctx, createevent, *event); 65 PetscFunctionReturn(PETSC_SUCCESS); 66 } 67 68 static PetscErrorCode PetscEventDestroy_Private(PetscEvent *event) 69 { 70 PetscFunctionBegin; 71 PetscValidPointer(event, 1); 72 if (*event) PetscCall(event_pool.deallocate(event)); 73 PetscFunctionReturn(PETSC_SUCCESS); 74 } 75 76 static PetscErrorCode PetscDeviceContextRecordEvent_Private(PetscDeviceContext dctx, PetscEvent event) 77 { 78 PetscObjectId id; 79 PetscObjectState state; 80 81 PetscFunctionBegin; 82 PetscValidDeviceContext(dctx, 1); 83 PetscValidPointer(event, 2); 84 id = PetscObjectCast(dctx)->id; 85 state = PetscObjectCast(dctx)->state; 86 // technically state can never be less than event->dctx_state (only equal) but we include 87 // it in the check just in case 88 if ((id == event->dctx_id) && (state <= event->dctx_state)) PetscFunctionReturn(PETSC_SUCCESS); 89 if (dctx->ops->recordevent) { 90 // REVIEW ME: 91 // TODO maybe move this to impls, as they can determine whether they can interoperate with 92 // other device types more readily 93 if (PetscDefined(USE_DEBUG) && (event->dtype != PETSC_DEVICE_HOST)) { 94 PetscDeviceType dtype; 95 96 PetscCall(PetscDeviceContextGetDeviceType(dctx, &dtype)); 97 PetscCheck(event->dtype == dtype, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Event type %s does not match device context type %s", PetscDeviceTypes[event->dtype], PetscDeviceTypes[dtype]); 98 } 99 PetscUseTypeMethod(dctx, recordevent, event); 100 } 101 event->dctx_id = id; 102 event->dctx_state = state; 103 PetscFunctionReturn(PETSC_SUCCESS); 104 } 105 106 static PetscErrorCode PetscDeviceContextWaitForEvent_Private(PetscDeviceContext dctx, PetscEvent event) 107 { 108 PetscFunctionBegin; 109 PetscValidDeviceContext(dctx, 1); 110 PetscValidPointer(event, 2); 111 // empty data implies you cannot wait on this event 112 if (!event->data) PetscFunctionReturn(PETSC_SUCCESS); 113 if (PetscDefined(USE_DEBUG)) { 114 const auto etype = event->dtype; 115 PetscDeviceType dtype; 116 117 PetscCall(PetscDeviceContextGetDeviceType(dctx, &dtype)); 118 PetscCheck(etype == dtype, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Event type %s does not match device context type %s", PetscDeviceTypes[etype], PetscDeviceTypes[dtype]); 119 } 120 if (PetscObjectCast(dctx)->id == event->dctx_id) PetscFunctionReturn(PETSC_SUCCESS); 121 PetscTryTypeMethod(dctx, waitforevent, event); 122 PetscFunctionReturn(PETSC_SUCCESS); 123 } 124 125 // ========================================================================================== 126 // PetscStackFrame 127 // 128 // A helper class that (when debugging is enabled) contains the stack frame from which 129 // PetscDeviceContextMakrIntentFromID(). It is intended to be derived from, since this enables 130 // empty-base-class optimization to kick in when debugging is disabled. 131 // ========================================================================================== 132 133 template <bool use_debug> 134 struct PetscStackFrame; 135 136 template <> 137 struct PetscStackFrame</* use_debug = */ true> { 138 std::string file{}; 139 std::string function{}; 140 int line{}; 141 142 PetscStackFrame() = default; 143 144 PetscStackFrame(const char *file_, const char *func_, int line_) noexcept : file(split_on_petsc_path_(file_)), function(func_), line(line_) { } 145 146 bool operator==(const PetscStackFrame &other) const noexcept { return line == other.line && file == other.file && function == other.function; } 147 148 PETSC_NODISCARD std::string to_string() const noexcept 149 { 150 std::string ret; 151 152 ret = '(' + function + "() at " + file + ':' + std::to_string(line) + ')'; 153 return ret; 154 } 155 156 private: 157 static std::string split_on_petsc_path_(std::string &&in) noexcept 158 { 159 auto pos = in.find("petsc/src"); 160 161 if (pos == std::string::npos) pos = in.find("petsc/include"); 162 if (pos == std::string::npos) pos = 0; 163 return in.substr(pos); 164 } 165 166 friend std::ostream &operator<<(std::ostream &os, const PetscStackFrame &frame) 167 { 168 os << frame.to_string(); 169 return os; 170 } 171 172 friend void swap(PetscStackFrame &lhs, PetscStackFrame &rhs) noexcept 173 { 174 using std::swap; 175 176 swap(lhs.file, rhs.file); 177 swap(lhs.function, rhs.function); 178 swap(lhs.line, rhs.line); 179 } 180 }; 181 182 template <> 183 struct PetscStackFrame</* use_debug = */ false> { 184 template <typename... T> 185 constexpr PetscStackFrame(T &&...) noexcept 186 { 187 } 188 189 constexpr bool operator==(const PetscStackFrame &) const noexcept { return true; } 190 191 PETSC_NODISCARD static std::string to_string() noexcept { return "(unknown)"; } 192 193 friend std::ostream &operator<<(std::ostream &os, const PetscStackFrame &) noexcept 194 { 195 os << "(unknown)"; 196 return os; 197 } 198 }; 199 200 // ========================================================================================== 201 // MarkedObjectMap 202 // 203 // A mapping from a PetscObjectId to a PetscEvent and (if debugging is enabled) a 204 // PetscStackFrame containing the location where PetscDeviceContextMarkIntentFromID was called 205 // ========================================================================================== 206 207 class MarkedObjectMap : public Petsc::RegisterFinalizeable<MarkedObjectMap> { 208 public: 209 // Note we derive from PetscStackFrame so that the empty base class optimization can kick 210 // in. If it were just a member it would still take up storage in optimized builds 211 class snapshot_type : private PetscStackFrame<PetscDefined(USE_DEBUG) && !PetscDefined(HAVE_THREADSAFETY)> { 212 public: 213 using frame_type = PetscStackFrame<PetscDefined(USE_DEBUG) && !PetscDefined(HAVE_THREADSAFETY)>; 214 215 snapshot_type() = default; 216 snapshot_type(PetscDeviceContext, frame_type) noexcept; 217 218 ~snapshot_type() noexcept; 219 220 // movable 221 snapshot_type(snapshot_type &&) noexcept; 222 snapshot_type &operator=(snapshot_type &&) noexcept; 223 224 // not copyable 225 snapshot_type(const snapshot_type &) noexcept = delete; 226 snapshot_type &operator=(const snapshot_type &) noexcept = delete; 227 228 PETSC_NODISCARD PetscEvent event() const noexcept { return event_; } 229 PETSC_NODISCARD const frame_type &frame() const noexcept { return *this; } 230 PETSC_NODISCARD frame_type &frame() noexcept { return *this; } 231 232 PETSC_NODISCARD PetscObjectId dctx_id() const noexcept 233 { 234 PetscFunctionBegin; 235 PetscAssertAbort(event(), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Snapshot %s does not contain an event!", frame().to_string().c_str()); 236 PetscFunctionReturn(event()->dctx_id); 237 } 238 239 PetscErrorCode ensure_event(PetscDeviceContext) noexcept; 240 241 friend void swap(snapshot_type &, snapshot_type &) noexcept; 242 243 private: 244 PetscEvent event_{}; // the state of device context when this snapshot was recorded 245 246 PETSC_NODISCARD static PetscEvent init_event_(PetscDeviceContext) noexcept; 247 }; 248 249 // the "value" each key maps to 250 struct mapped_type { 251 using dependency_type = std::vector<snapshot_type>; 252 253 PetscMemoryAccessMode mode = PETSC_MEMORY_ACCESS_READ; 254 snapshot_type last_write{}; 255 dependency_type dependencies{}; 256 }; 257 258 using map_type = Petsc::UnorderedMap<PetscObjectId, mapped_type>; 259 260 map_type map; 261 262 private: 263 friend RegisterFinalizeable; 264 265 PetscErrorCode finalize_() noexcept; 266 }; 267 268 // ========================================================================================== 269 // MarkedObjectMap Private API 270 // ========================================================================================== 271 272 inline PetscErrorCode MarkedObjectMap::finalize_() noexcept 273 { 274 PetscFunctionBegin; 275 PetscCall(PetscInfo(nullptr, "Finalizing marked object map\n")); 276 PetscCall(map.clear()); 277 PetscFunctionReturn(PETSC_SUCCESS); 278 } 279 280 // ========================================================================================== 281 // MarkedObjectMap::snapshot_type Private API 282 // ========================================================================================== 283 284 inline PetscEvent MarkedObjectMap::snapshot_type::init_event_(PetscDeviceContext dctx) noexcept 285 { 286 PetscEvent event = nullptr; 287 288 PetscFunctionBegin; 289 PetscCallAbort(PETSC_COMM_SELF, PetscDeviceContextCreateEvent_Private(dctx, &event)); 290 PetscCallAbort(PETSC_COMM_SELF, PetscDeviceContextRecordEvent_Private(dctx, event)); 291 PetscFunctionReturn(event); 292 } 293 294 // ========================================================================================== 295 // MarkedObjectMap::snapshot_type Public API 296 // ========================================================================================== 297 298 MarkedObjectMap::snapshot_type::snapshot_type(PetscDeviceContext dctx, frame_type frame) noexcept : frame_type(std::move(frame)), event_(init_event_(dctx)) { } 299 300 MarkedObjectMap::snapshot_type::~snapshot_type() noexcept 301 { 302 PetscFunctionBegin; 303 PetscCallAbort(PETSC_COMM_SELF, PetscEventDestroy_Private(&event_)); 304 PetscFunctionReturnVoid(); 305 } 306 307 // movable 308 MarkedObjectMap::snapshot_type::snapshot_type(snapshot_type &&other) noexcept : frame_type(std::move(other)), event_(Petsc::util::exchange(other.event_, nullptr)) { } 309 310 MarkedObjectMap::snapshot_type &MarkedObjectMap::snapshot_type::operator=(snapshot_type &&other) noexcept 311 { 312 PetscFunctionBegin; 313 if (this != &other) { 314 frame_type::operator=(std::move(other)); 315 PetscCallAbort(PETSC_COMM_SELF, PetscEventDestroy_Private(&event_)); 316 event_ = Petsc::util::exchange(other.event_, nullptr); 317 } 318 PetscFunctionReturn(*this); 319 } 320 321 PetscErrorCode MarkedObjectMap::snapshot_type::ensure_event(PetscDeviceContext dctx) noexcept 322 { 323 PetscFunctionBegin; 324 if (PetscUnlikely(!event_)) PetscCall(PetscDeviceContextCreateEvent_Private(dctx, &event_)); 325 PetscFunctionReturn(PETSC_SUCCESS); 326 } 327 328 void swap(MarkedObjectMap::snapshot_type &lhs, MarkedObjectMap::snapshot_type &rhs) noexcept 329 { 330 using std::swap; 331 332 swap(lhs.frame(), rhs.frame()); 333 swap(lhs.event_, rhs.event_); 334 } 335 336 // A mapping between PetscObjectId (i.e. some PetscObject) to the list of PetscEvent's encoding 337 // the last time the PetscObject was accessed 338 static MarkedObjectMap marked_object_map; 339 340 // ========================================================================================== 341 // Utility Functions 342 // ========================================================================================== 343 344 PetscErrorCode PetscGetMarkedObjectMap_Internal(std::size_t *nkeys, PetscObjectId **keys, PetscMemoryAccessMode **modes, std::size_t **ndeps, PetscEvent ***dependencies) 345 { 346 std::size_t i = 0; 347 const auto &map = marked_object_map.map; 348 const auto size = *nkeys = map.size(); 349 350 PetscFunctionBegin; 351 PetscCall(PetscMalloc4(size, keys, size, modes, size, ndeps, size, dependencies)); 352 for (auto it_ = map.begin(); it_ != map.end(); ++it_) { 353 auto &it = *it_; 354 std::size_t j = 0; 355 356 (*keys)[i] = it.first; 357 (*modes)[i] = it.second.mode; 358 (*ndeps)[i] = it.second.dependencies.size(); 359 (*dependencies)[i] = nullptr; 360 PetscCall(PetscMalloc1((*ndeps)[i], (*dependencies) + i)); 361 for (auto &&dep : it.second.dependencies) (*dependencies)[i][j++] = dep.event(); 362 ++i; 363 } 364 PetscFunctionReturn(PETSC_SUCCESS); 365 } 366 367 PetscErrorCode PetscRestoreMarkedObjectMap_Internal(std::size_t nkeys, PetscObjectId **keys, PetscMemoryAccessMode **modes, std::size_t **ndeps, PetscEvent ***dependencies) 368 { 369 PetscFunctionBegin; 370 for (std::size_t i = 0; i < nkeys; ++i) PetscCall(PetscFree((*dependencies)[i])); 371 PetscCall(PetscFree4(*keys, *modes, *ndeps, *dependencies)); 372 PetscFunctionReturn(PETSC_SUCCESS); 373 } 374 375 template <typename T> 376 static PetscErrorCode PetscDeviceContextMapIterVisitor(PetscDeviceContext dctx, T &&callback) noexcept 377 { 378 const auto dctx_id = PetscObjectCast(dctx)->id; 379 auto &dctx_deps = CxxDataCast(dctx)->deps; 380 auto &object_map = marked_object_map.map; 381 382 PetscFunctionBegin; 383 for (auto &&dep : dctx_deps) { 384 const auto mapit = object_map.find(dep); 385 386 // Need this check since the final PetscDeviceContext may run through this *after* the map 387 // has been finalized (and cleared), and hence might fail to find its dependencies. This is 388 // perfectly valid since the user no longer cares about dangling dependencies after PETSc 389 // is finalized 390 if (PetscLikely(mapit != object_map.end())) { 391 auto &deps = mapit->second.dependencies; 392 const auto end = deps.end(); 393 const auto it = std::remove_if(deps.begin(), end, [&](const MarkedObjectMap::snapshot_type &obj) { return obj.dctx_id() == dctx_id; }); 394 395 PetscCall(callback(mapit, deps.cbegin(), static_cast<decltype(deps.cend())>(it))); 396 // remove ourselves 397 PetscCallCXX(deps.erase(it, end)); 398 // continue to next object, but erase this one if it has no more dependencies 399 if (deps.empty()) PetscCallCXX(object_map.erase(mapit)); 400 } 401 } 402 PetscCallCXX(dctx_deps.clear()); 403 PetscFunctionReturn(PETSC_SUCCESS); 404 } 405 406 PetscErrorCode PetscDeviceContextSyncClearMap_Internal(PetscDeviceContext dctx) 407 { 408 using map_iterator = MarkedObjectMap::map_type::const_iterator; 409 using dep_iterator = MarkedObjectMap::mapped_type::dependency_type::const_iterator; 410 411 PetscFunctionBegin; 412 PetscCall(PetscDeviceContextMapIterVisitor(dctx, [&](map_iterator mapit, dep_iterator it, dep_iterator end) { 413 PetscFunctionBegin; 414 if (PetscDefined(USE_DEBUG_AND_INFO)) { 415 std::ostringstream oss; 416 const auto mode = PetscMemoryAccessModeToString(mapit->second.mode); 417 418 oss << "synced dctx " << PetscObjectCast(dctx)->id << ", remaining leaves for obj " << mapit->first << ": {"; 419 while (it != end) { 420 oss << "[dctx " << it->dctx_id() << ", " << mode << ' ' << it->frame() << ']'; 421 if (++it != end) oss << ", "; 422 } 423 oss << '}'; 424 PetscCall(PetscInfo(nullptr, "%s\n", oss.str().c_str())); 425 } 426 PetscFunctionReturn(PETSC_SUCCESS); 427 })); 428 { 429 // the recursive sync clear map call is unbounded in case of a dependenct loop so we make a 430 // copy 431 // clang-format off 432 const std::vector<CxxData::upstream_type::value_type> upstream_copy( 433 std::make_move_iterator(CxxDataCast(dctx)->upstream.begin()), 434 std::make_move_iterator(CxxDataCast(dctx)->upstream.end()) 435 ); 436 // clang-format on 437 438 // aftermath, clear our set of parents (to avoid infinite recursion) and mark ourselves as no 439 // longer contained (while the empty graph technically *is* always contained, it is not what 440 // we mean by it) 441 PetscCall(CxxDataCast(dctx)->clear()); 442 //dctx->contained = PETSC_FALSE; 443 for (auto &&upstrm : upstream_copy) { 444 // check that this parent still points to what we originally thought it was 445 PetscCheck(upstrm.second.id == PetscObjectCast(upstrm.first)->id, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Upstream dctx %" PetscInt64_FMT " no longer exists, now has id %" PetscInt64_FMT, upstrm.second.id, PetscObjectCast(upstrm.first)->id); 446 PetscCall(PetscDeviceContextSyncClearMap_Internal(upstrm.first)); 447 } 448 } 449 PetscFunctionReturn(PETSC_SUCCESS); 450 } 451 452 PetscErrorCode PetscDeviceContextCheckNotOrphaned_Internal(PetscDeviceContext dctx) 453 { 454 std::ostringstream oss; 455 //const auto allow = dctx->options.allow_orphans, contained = dctx->contained; 456 const auto allow = true, contained = true; 457 auto wrote_to_oss = false; 458 using map_iterator = MarkedObjectMap::map_type::const_iterator; 459 using dep_iterator = MarkedObjectMap::mapped_type::dependency_type::const_iterator; 460 461 PetscFunctionBegin; 462 PetscCall(PetscDeviceContextMapIterVisitor(dctx, [&](map_iterator mapit, dep_iterator it, dep_iterator end) { 463 PetscFunctionBegin; 464 if (allow || contained) PetscFunctionReturn(PETSC_SUCCESS); 465 wrote_to_oss = true; 466 oss << "- PetscObject (id " << mapit->first << "), intent " << PetscMemoryAccessModeToString(mapit->second.mode) << ' ' << it->frame(); 467 if (std::distance(it, end) == 0) oss << " (orphaned)"; // we were the only dependency 468 oss << '\n'; 469 PetscFunctionReturn(PETSC_SUCCESS); 470 })); 471 PetscCheck(!wrote_to_oss, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Destroying PetscDeviceContext ('%s', id %" PetscInt64_FMT ") would leave the following dangling (possibly orphaned) dependants:\n%s\nMust synchronize before destroying it, or allow it to be destroyed with orphans", 472 PetscObjectCast(dctx)->name ? PetscObjectCast(dctx)->name : "unnamed", PetscObjectCast(dctx)->id, oss.str().c_str()); 473 PetscCall(CxxDataCast(dctx)->clear()); 474 PetscFunctionReturn(PETSC_SUCCESS); 475 } 476 477 #define DEBUG_INFO(mess, ...) PetscDebugInfo(dctx, "dctx %" PetscInt64_FMT " (%s) - obj %" PetscInt64_FMT " (%s): " mess, PetscObjectCast(dctx)->id, PetscObjectCast(dctx)->name ? PetscObjectCast(dctx)->name : "unnamed", id, name, ##__VA_ARGS__) 478 479 // The current mode is compatible with the previous mode (i.e. read-read) so we need only 480 // update the existing version and possibly appeand ourselves to the dependency list 481 482 template <bool use_debug> 483 static PetscErrorCode MarkFromID_CompatibleModes(MarkedObjectMap::mapped_type &marked, PetscDeviceContext dctx, PetscObjectId id, PetscMemoryAccessMode mode, PetscStackFrame<use_debug> &frame, const char *PETSC_UNUSED name, bool *update_object_dependencies) 484 { 485 const auto dctx_id = PetscObjectCast(dctx)->id; 486 auto &object_dependencies = marked.dependencies; 487 const auto end = object_dependencies.end(); 488 const auto it = std::find_if(object_dependencies.begin(), end, [&](const MarkedObjectMap::snapshot_type &obj) { return obj.dctx_id() == dctx_id; }); 489 490 PetscFunctionBegin; 491 PetscCall(DEBUG_INFO("new mode (%s) COMPATIBLE with %s mode (%s), no need to serialize\n", PetscMemoryAccessModeToString(mode), object_dependencies.empty() ? "default" : "old", PetscMemoryAccessModeToString(marked.mode))); 492 if (it != end) { 493 using std::swap; 494 495 // we have been here before, all we must do is update our entry then we can bail 496 PetscCall(DEBUG_INFO("found old self as dependency, updating\n")); 497 PetscAssert(CxxDataCast(dctx)->deps.find(id) != CxxDataCast(dctx)->deps.end(), PETSC_COMM_SELF, PETSC_ERR_PLIB, "PetscDeviceContext %" PetscInt64_FMT " listed as dependency for object %" PetscInt64_FMT " (%s), but does not have the object in private dependency list!", dctx_id, id, name); 498 swap(it->frame(), frame); 499 PetscCall(PetscDeviceContextRecordEvent_Private(dctx, it->event())); 500 *update_object_dependencies = false; 501 PetscFunctionReturn(PETSC_SUCCESS); 502 } 503 504 // we have not been here before, need to serialize with the last write event (if it exists) 505 // and add ourselves to the dependency list 506 if (const auto event = marked.last_write.event()) PetscCall(PetscDeviceContextWaitForEvent_Private(dctx, event)); 507 PetscFunctionReturn(PETSC_SUCCESS); 508 } 509 510 template <bool use_debug> 511 static PetscErrorCode MarkFromID_IncompatibleModes_UpdateLastWrite(MarkedObjectMap::mapped_type &marked, PetscDeviceContext dctx, PetscObjectId id, PetscMemoryAccessMode mode, PetscStackFrame<use_debug> &frame, const char *PETSC_UNUSED name, bool *update_object_dependencies) 512 { 513 const auto dctx_id = PetscObjectCast(dctx)->id; 514 auto &last_write = marked.last_write; 515 auto &last_dep = marked.dependencies.back(); 516 PetscDeviceType dtype; 517 518 PetscFunctionBegin; 519 PetscAssert(marked.dependencies.size() == 1, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Can only have a single writer as dependency, have %zu!", marked.dependencies.size()); 520 PetscCall(PetscDeviceContextGetDeviceType(dctx, &dtype)); 521 if (last_dep.event()->dtype != dtype) { 522 PetscCall(DEBUG_INFO("moving last write dependency (intent %s)\n", PetscMemoryAccessModeToString(marked.mode))); 523 last_write = std::move(last_dep); 524 PetscFunctionReturn(PETSC_SUCCESS); 525 } 526 527 // we match the device type of the dependency, we can reuse its event! 528 auto &dctx_upstream_deps = CxxDataCast(dctx)->deps; 529 const auto last_write_was_also_us = last_write.event() && (last_write.dctx_id() == dctx_id); 530 using std::swap; 531 532 PetscCall(DEBUG_INFO("we matched the previous write dependency's (intent %s) device type (%s), swapping last dependency with last write\n", PetscMemoryAccessModeToString(marked.mode), PetscDeviceTypes[dtype])); 533 if (last_dep.event()->dctx_id != dctx_id) dctx_upstream_deps.emplace(id); 534 PetscAssert(dctx_upstream_deps.find(id) != dctx_upstream_deps.end(), PETSC_COMM_SELF, PETSC_ERR_PLIB, "Did not find id %" PetscInt64_FMT "in object dependencies, but we have apparently recorded the last dependency %s!", id, 535 last_write.frame().to_string().c_str()); 536 swap(last_write, last_dep); 537 if (last_write_was_also_us) { 538 PetscCall(DEBUG_INFO("we were also the last write event (intent %s), updating\n", PetscMemoryAccessModeToString(mode))); 539 // we are both the last to write *and* the last to leave a write event. This is the 540 // fast path, we only need to update the frame and update the recorded event 541 swap(last_dep.frame(), frame); 542 // last used to be last_write which is not guaranteed to have an event, so must 543 // create it now 544 PetscCall(last_dep.ensure_event(dctx)); 545 PetscCall(PetscDeviceContextRecordEvent_Private(dctx, last_dep.event())); 546 *update_object_dependencies = false; 547 } 548 PetscFunctionReturn(PETSC_SUCCESS); 549 } 550 551 // The current mode is NOT compatible with the previous mode. We must serialize with all events 552 // in the dependency list, possibly clear it, and update the previous write event 553 554 template <bool use_debug> 555 static PetscErrorCode MarkFromID_IncompatibleModes(MarkedObjectMap::mapped_type &marked, PetscDeviceContext dctx, PetscObjectId id, PetscMemoryAccessMode mode, PetscStackFrame<use_debug> &frame, const char *name, bool *update_object_dependencies) 556 { 557 auto &old_mode = marked.mode; 558 auto &object_dependencies = marked.dependencies; 559 560 PetscFunctionBegin; 561 // we are NOT compatible with the previous mode 562 PetscCall(DEBUG_INFO("new mode (%s) NOT COMPATIBLE with %s mode (%s), serializing then clearing (%zu) %s\n", PetscMemoryAccessModeToString(mode), object_dependencies.empty() ? "default" : "old", PetscMemoryAccessModeToString(old_mode), 563 object_dependencies.size(), object_dependencies.size() == 1 ? "dependency" : "dependencies")); 564 565 for (const auto &dep : object_dependencies) PetscCall(PetscDeviceContextWaitForEvent_Private(dctx, dep.event())); 566 // if the previous mode wrote, update the last write node with it 567 if (PetscMemoryAccessWrite(old_mode)) PetscCall(MarkFromID_IncompatibleModes_UpdateLastWrite(marked, dctx, id, mode, frame, name, update_object_dependencies)); 568 569 old_mode = mode; 570 // clear out the old dependencies if are about to append ourselves 571 if (*update_object_dependencies) object_dependencies.clear(); 572 PetscFunctionReturn(PETSC_SUCCESS); 573 } 574 575 template <bool use_debug> 576 static PetscErrorCode PetscDeviceContextMarkIntentFromID_Private(PetscDeviceContext dctx, PetscObjectId id, PetscMemoryAccessMode mode, PetscStackFrame<use_debug> frame, const char *name) 577 { 578 auto &marked = marked_object_map.map[id]; 579 auto &object_dependencies = marked.dependencies; 580 auto update_object_dependencies = true; 581 582 PetscFunctionBegin; 583 if ((marked.mode == PETSC_MEMORY_ACCESS_READ) && (mode == PETSC_MEMORY_ACCESS_READ)) { 584 PetscCall(MarkFromID_CompatibleModes(marked, dctx, id, mode, frame, name, &update_object_dependencies)); 585 } else { 586 PetscCall(MarkFromID_IncompatibleModes(marked, dctx, id, mode, frame, name, &update_object_dependencies)); 587 } 588 if (update_object_dependencies) { 589 // become the new leaf by appending ourselves 590 PetscCall(DEBUG_INFO("%s with intent %s\n", object_dependencies.empty() ? "dependency list is empty, creating new leaf" : "appending to existing leaves", PetscMemoryAccessModeToString(mode))); 591 PetscCallCXX(object_dependencies.emplace_back(dctx, std::move(frame))); 592 PetscCallCXX(CxxDataCast(dctx)->deps.emplace(id)); 593 } 594 PetscFunctionReturn(PETSC_SUCCESS); 595 } 596 597 #undef DEBUG_INFO 598 599 /*@C 600 PetscDeviceContextMarkIntentFromID - Indicate a `PetscDeviceContext`s access intent to the 601 auto-dependency system 602 603 Not Collective 604 605 Input Parameters: 606 + dctx - The `PetscDeviceContext` 607 . id - The `PetscObjectId` to mark 608 . mode - The desired access intent 609 - name - The object name (for debug purposes, ignored in optimized builds) 610 611 Notes: 612 This routine formally informs the dependency system that `dctx` will access the object 613 represented by `id` with `mode` and adds `dctx` to `id`'s list of dependencies (termed 614 "leaves"). 615 616 If the existing set of leaves have an incompatible `PetscMemoryAccessMode` to `mode`, `dctx` 617 will be serialized against them. 618 619 Level: intermediate 620 621 .seealso: `PetscDeviceContextWaitForContext()`, `PetscDeviceContextSynchronize()`, 622 `PetscObjectGetId()`, `PetscMemoryAccessMode` 623 @*/ 624 PetscErrorCode PetscDeviceContextMarkIntentFromID(PetscDeviceContext dctx, PetscObjectId id, PetscMemoryAccessMode mode, const char name[]) 625 { 626 #if PetscDefined(USE_DEBUG) && !PetscDefined(HAVE_THREADSAFETY) 627 const auto index = petscstack.currentsize > 2 ? petscstack.currentsize - 2 : 0; 628 const auto file = petscstack.file[index]; 629 const auto function = petscstack.function[index]; 630 const auto line = petscstack.line[index]; 631 #else 632 constexpr const char *file = nullptr; 633 constexpr const char *function = nullptr; 634 constexpr auto line = 0; 635 #endif 636 637 PetscFunctionBegin; 638 PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx)); 639 if (name) PetscValidCharPointer(name, 4); 640 PetscCall(marked_object_map.register_finalize()); 641 PetscCall(PetscLogEventBegin(DCONTEXT_Mark, dctx, nullptr, nullptr, nullptr)); 642 PetscCall(PetscDeviceContextMarkIntentFromID_Private(dctx, id, mode, MarkedObjectMap::snapshot_type::frame_type{file, function, line}, name ? name : "unknown object")); 643 PetscCall(PetscLogEventEnd(DCONTEXT_Mark, dctx, nullptr, nullptr, nullptr)); 644 PetscFunctionReturn(PETSC_SUCCESS); 645 } 646 647 #if defined(__clang__) 648 #pragma clang diagnostic pop 649 #endif 650