1 #include "petscdevice_interface_internal.hpp" /*I <petscdevice.h> I*/ 2 3 #include <petsc/private/cpp/object_pool.hpp> 4 #include <petsc/private/cpp/utility.hpp> 5 #include <petsc/private/cpp/unordered_map.hpp> 6 7 #include <algorithm> // std::remove_if(), std::find_if() 8 #include <vector> 9 #include <string> 10 #include <sstream> // std::ostringstream 11 12 #if defined(__clang__) 13 PETSC_PRAGMA_DIAGNOSTIC_IGNORED_BEGIN("-Wgnu-zero-variadic-macro-arguments") 14 #endif 15 16 // ========================================================================================== 17 // PetscEvent 18 // ========================================================================================== 19 20 class PetscEventConstructor : public Petsc::ConstructorInterface<_n_PetscEvent, PetscEventConstructor> { 21 public: 22 PetscErrorCode construct_(PetscEvent event) const noexcept 23 { 24 PetscFunctionBegin; 25 PetscCall(PetscMemzero(event, sizeof(*event))); 26 PetscCall(underlying().reset(event)); 27 PetscFunctionReturn(PETSC_SUCCESS); 28 } 29 30 PetscErrorCode destroy_(PetscEvent event) const noexcept 31 { 32 PetscFunctionBegin; 33 PetscCall(underlying().reset(event)); 34 PetscFunctionReturn(PETSC_SUCCESS); 35 } 36 37 static PetscErrorCode reset_(PetscEvent event) noexcept 38 { 39 PetscFunctionBegin; 40 if (auto &destroy = event->destroy) { 41 PetscCall((*destroy)(event)); 42 destroy = nullptr; 43 } 44 PetscAssert(!event->data, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Event failed to destroy its data member: %p", event->data); 45 event->dctx_id = 0; 46 event->dctx_state = 0; 47 event->dtype = PETSC_DEVICE_DEFAULT(); 48 PetscFunctionReturn(PETSC_SUCCESS); 49 } 50 51 static PetscErrorCode invalidate_(PetscEvent) noexcept { return PETSC_SUCCESS; } 52 }; 53 54 static Petsc::ObjectPool<_n_PetscEvent, PetscEventConstructor> event_pool; 55 56 static PetscErrorCode PetscDeviceContextCreateEvent_Private(PetscDeviceContext dctx, PetscEvent *event) 57 { 58 PetscFunctionBegin; 59 PetscValidDeviceContext(dctx, 1); 60 PetscValidPointer(event, 2); 61 PetscCall(event_pool.allocate(event)); 62 PetscCall(PetscDeviceContextGetDeviceType(dctx, &(*event)->dtype)); 63 PetscTryTypeMethod(dctx, createevent, *event); 64 PetscFunctionReturn(PETSC_SUCCESS); 65 } 66 67 static PetscErrorCode PetscEventDestroy_Private(PetscEvent *event) 68 { 69 PetscFunctionBegin; 70 PetscValidPointer(event, 1); 71 if (*event) PetscCall(event_pool.deallocate(event)); 72 PetscFunctionReturn(PETSC_SUCCESS); 73 } 74 75 static PetscErrorCode PetscDeviceContextRecordEvent_Private(PetscDeviceContext dctx, PetscEvent event) 76 { 77 PetscObjectId id; 78 PetscObjectState state; 79 80 PetscFunctionBegin; 81 PetscValidDeviceContext(dctx, 1); 82 PetscValidPointer(event, 2); 83 id = PetscObjectCast(dctx)->id; 84 state = PetscObjectCast(dctx)->state; 85 // technically state can never be less than event->dctx_state (only equal) but we include 86 // it in the check just in case 87 if ((id == event->dctx_id) && (state <= event->dctx_state)) PetscFunctionReturn(PETSC_SUCCESS); 88 if (dctx->ops->recordevent) { 89 // REVIEW ME: 90 // TODO maybe move this to impls, as they can determine whether they can interoperate with 91 // other device types more readily 92 if (PetscDefined(USE_DEBUG) && (event->dtype != PETSC_DEVICE_HOST)) { 93 PetscDeviceType dtype; 94 95 PetscCall(PetscDeviceContextGetDeviceType(dctx, &dtype)); 96 PetscCheck(event->dtype == dtype, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Event type %s does not match device context type %s", PetscDeviceTypes[event->dtype], PetscDeviceTypes[dtype]); 97 } 98 PetscUseTypeMethod(dctx, recordevent, event); 99 } 100 event->dctx_id = id; 101 event->dctx_state = state; 102 PetscFunctionReturn(PETSC_SUCCESS); 103 } 104 105 static PetscErrorCode PetscDeviceContextWaitForEvent_Private(PetscDeviceContext dctx, PetscEvent event) 106 { 107 PetscFunctionBegin; 108 PetscValidDeviceContext(dctx, 1); 109 PetscValidPointer(event, 2); 110 // empty data implies you cannot wait on this event 111 if (!event->data) PetscFunctionReturn(PETSC_SUCCESS); 112 if (PetscDefined(USE_DEBUG)) { 113 const auto etype = event->dtype; 114 PetscDeviceType dtype; 115 116 PetscCall(PetscDeviceContextGetDeviceType(dctx, &dtype)); 117 PetscCheck(etype == dtype, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Event type %s does not match device context type %s", PetscDeviceTypes[etype], PetscDeviceTypes[dtype]); 118 } 119 if (PetscObjectCast(dctx)->id == event->dctx_id) PetscFunctionReturn(PETSC_SUCCESS); 120 PetscTryTypeMethod(dctx, waitforevent, event); 121 PetscFunctionReturn(PETSC_SUCCESS); 122 } 123 124 // ========================================================================================== 125 // PetscStackFrame 126 // 127 // A helper class that (when debugging is enabled) contains the stack frame from which 128 // PetscDeviceContextMakrIntentFromID(). It is intended to be derived from, since this enables 129 // empty-base-class optimization to kick in when debugging is disabled. 130 // ========================================================================================== 131 132 template <bool use_debug> 133 struct PetscStackFrame; 134 135 template <> 136 struct PetscStackFrame</* use_debug = */ true> { 137 std::string file{}; 138 std::string function{}; 139 int line{}; 140 141 PetscStackFrame() = default; 142 143 PetscStackFrame(const char *file_, const char *func_, int line_) noexcept : file(split_on_petsc_path_(file_)), function(func_), line(line_) { } 144 145 bool operator==(const PetscStackFrame &other) const noexcept { return line == other.line && file == other.file && function == other.function; } 146 147 PETSC_NODISCARD std::string to_string() const noexcept 148 { 149 std::string ret; 150 151 ret = '(' + function + "() at " + file + ':' + std::to_string(line) + ')'; 152 return ret; 153 } 154 155 private: 156 static std::string split_on_petsc_path_(std::string &&in) noexcept 157 { 158 auto pos = in.find("petsc/src"); 159 160 if (pos == std::string::npos) pos = in.find("petsc/include"); 161 if (pos == std::string::npos) pos = 0; 162 return in.substr(pos); 163 } 164 165 friend std::ostream &operator<<(std::ostream &os, const PetscStackFrame &frame) 166 { 167 os << frame.to_string(); 168 return os; 169 } 170 171 friend void swap(PetscStackFrame &lhs, PetscStackFrame &rhs) noexcept 172 { 173 using std::swap; 174 175 swap(lhs.file, rhs.file); 176 swap(lhs.function, rhs.function); 177 swap(lhs.line, rhs.line); 178 } 179 }; 180 181 template <> 182 struct PetscStackFrame</* use_debug = */ false> { 183 template <typename... T> 184 constexpr PetscStackFrame(T &&...) noexcept 185 { 186 } 187 188 constexpr bool operator==(const PetscStackFrame &) const noexcept { return true; } 189 190 PETSC_NODISCARD static std::string to_string() noexcept { return "(unknown)"; } 191 192 friend std::ostream &operator<<(std::ostream &os, const PetscStackFrame &) noexcept 193 { 194 os << "(unknown)"; 195 return os; 196 } 197 }; 198 199 // ========================================================================================== 200 // MarkedObjectMap 201 // 202 // A mapping from a PetscObjectId to a PetscEvent and (if debugging is enabled) a 203 // PetscStackFrame containing the location where PetscDeviceContextMarkIntentFromID was called 204 // ========================================================================================== 205 206 class MarkedObjectMap : public Petsc::RegisterFinalizeable<MarkedObjectMap> { 207 public: 208 // Note we derive from PetscStackFrame so that the empty base class optimization can kick 209 // in. If it were just a member it would still take up storage in optimized builds 210 class snapshot_type : private PetscStackFrame<PetscDefined(USE_DEBUG) && !PetscDefined(HAVE_THREADSAFETY)> { 211 public: 212 using frame_type = PetscStackFrame<PetscDefined(USE_DEBUG) && !PetscDefined(HAVE_THREADSAFETY)>; 213 214 snapshot_type() = default; 215 snapshot_type(PetscDeviceContext, frame_type) noexcept; 216 217 ~snapshot_type() noexcept; 218 219 // movable 220 snapshot_type(snapshot_type &&) noexcept; 221 snapshot_type &operator=(snapshot_type &&) noexcept; 222 223 // not copyable 224 snapshot_type(const snapshot_type &) noexcept = delete; 225 snapshot_type &operator=(const snapshot_type &) noexcept = delete; 226 227 PETSC_NODISCARD PetscEvent event() const noexcept { return event_; } 228 PETSC_NODISCARD const frame_type &frame() const noexcept { return *this; } 229 PETSC_NODISCARD frame_type &frame() noexcept { return *this; } 230 231 PETSC_NODISCARD PetscObjectId dctx_id() const noexcept 232 { 233 PetscFunctionBegin; 234 PetscAssertAbort(event(), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Snapshot %s does not contain an event!", frame().to_string().c_str()); 235 PetscFunctionReturn(event()->dctx_id); 236 } 237 238 PetscErrorCode ensure_event(PetscDeviceContext) noexcept; 239 240 friend void swap(snapshot_type &, snapshot_type &) noexcept; 241 242 private: 243 PetscEvent event_{}; // the state of device context when this snapshot was recorded 244 245 PETSC_NODISCARD static PetscEvent init_event_(PetscDeviceContext) noexcept; 246 }; 247 248 // the "value" each key maps to 249 struct mapped_type { 250 using dependency_type = std::vector<snapshot_type>; 251 252 mapped_type() noexcept; 253 254 PetscMemoryAccessMode mode{PETSC_MEMORY_ACCESS_READ}; 255 snapshot_type last_write{}; 256 dependency_type dependencies{}; 257 }; 258 259 using map_type = Petsc::UnorderedMap<PetscObjectId, mapped_type>; 260 261 map_type map; 262 263 private: 264 friend RegisterFinalizeable; 265 266 PetscErrorCode finalize_() noexcept; 267 }; 268 269 // ========================================================================================== 270 // MarkedObjectMap::mapped_type -- Public API 271 // ========================================================================================== 272 273 // workaround for clang bug that produces the following warning 274 // 275 // src/sys/objects/device/interface/mark_dcontext.cxx:253:5: error: default member initializer 276 // for 'mode' needed within definition of enclosing class 'MarkedObjectMap' outside of member 277 // functions 278 // mapped_type() noexcept = default; 279 // ^ 280 // https://stackoverflow.com/questions/53408962/try-to-understand-compiler-error-message-default-member-initializer-required-be 281 MarkedObjectMap::mapped_type::mapped_type() noexcept = default; 282 283 // ========================================================================================== 284 // MarkedObjectMap Private API 285 // ========================================================================================== 286 287 inline PetscErrorCode MarkedObjectMap::finalize_() noexcept 288 { 289 PetscFunctionBegin; 290 PetscCall(PetscInfo(nullptr, "Finalizing marked object map\n")); 291 PetscCall(map.clear()); 292 PetscFunctionReturn(PETSC_SUCCESS); 293 } 294 295 // ========================================================================================== 296 // MarkedObjectMap::snapshot_type Private API 297 // ========================================================================================== 298 299 inline PetscEvent MarkedObjectMap::snapshot_type::init_event_(PetscDeviceContext dctx) noexcept 300 { 301 PetscEvent event = nullptr; 302 303 PetscFunctionBegin; 304 PetscCallAbort(PETSC_COMM_SELF, PetscDeviceContextCreateEvent_Private(dctx, &event)); 305 PetscCallAbort(PETSC_COMM_SELF, PetscDeviceContextRecordEvent_Private(dctx, event)); 306 PetscFunctionReturn(event); 307 } 308 309 // ========================================================================================== 310 // MarkedObjectMap::snapshot_type Public API 311 // ========================================================================================== 312 313 MarkedObjectMap::snapshot_type::snapshot_type(PetscDeviceContext dctx, frame_type frame) noexcept : frame_type(std::move(frame)), event_(init_event_(dctx)) { } 314 315 MarkedObjectMap::snapshot_type::~snapshot_type() noexcept 316 { 317 PetscFunctionBegin; 318 PetscCallAbort(PETSC_COMM_SELF, PetscEventDestroy_Private(&event_)); 319 PetscFunctionReturnVoid(); 320 } 321 322 // movable 323 MarkedObjectMap::snapshot_type::snapshot_type(snapshot_type &&other) noexcept : frame_type(std::move(other)), event_(Petsc::util::exchange(other.event_, nullptr)) { } 324 325 MarkedObjectMap::snapshot_type &MarkedObjectMap::snapshot_type::operator=(snapshot_type &&other) noexcept 326 { 327 PetscFunctionBegin; 328 if (this != &other) { 329 frame_type::operator=(std::move(other)); 330 PetscCallAbort(PETSC_COMM_SELF, PetscEventDestroy_Private(&event_)); 331 event_ = Petsc::util::exchange(other.event_, nullptr); 332 } 333 PetscFunctionReturn(*this); 334 } 335 336 PetscErrorCode MarkedObjectMap::snapshot_type::ensure_event(PetscDeviceContext dctx) noexcept 337 { 338 PetscFunctionBegin; 339 if (PetscUnlikely(!event_)) PetscCall(PetscDeviceContextCreateEvent_Private(dctx, &event_)); 340 PetscFunctionReturn(PETSC_SUCCESS); 341 } 342 343 void swap(MarkedObjectMap::snapshot_type &lhs, MarkedObjectMap::snapshot_type &rhs) noexcept 344 { 345 using std::swap; 346 347 swap(lhs.frame(), rhs.frame()); 348 swap(lhs.event_, rhs.event_); 349 } 350 351 // A mapping between PetscObjectId (i.e. some PetscObject) to the list of PetscEvent's encoding 352 // the last time the PetscObject was accessed 353 static MarkedObjectMap marked_object_map; 354 355 // ========================================================================================== 356 // Utility Functions 357 // ========================================================================================== 358 359 PetscErrorCode PetscGetMarkedObjectMap_Internal(std::size_t *nkeys, PetscObjectId **keys, PetscMemoryAccessMode **modes, std::size_t **ndeps, PetscEvent ***dependencies) 360 { 361 std::size_t i = 0; 362 const auto &map = marked_object_map.map; 363 const auto size = *nkeys = map.size(); 364 365 PetscFunctionBegin; 366 PetscCall(PetscMalloc4(size, keys, size, modes, size, ndeps, size, dependencies)); 367 for (auto it_ = map.begin(); it_ != map.end(); ++it_) { 368 auto &it = *it_; 369 std::size_t j = 0; 370 371 (*keys)[i] = it.first; 372 (*modes)[i] = it.second.mode; 373 (*ndeps)[i] = it.second.dependencies.size(); 374 (*dependencies)[i] = nullptr; 375 PetscCall(PetscMalloc1((*ndeps)[i], (*dependencies) + i)); 376 for (auto &&dep : it.second.dependencies) (*dependencies)[i][j++] = dep.event(); 377 ++i; 378 } 379 PetscFunctionReturn(PETSC_SUCCESS); 380 } 381 382 PetscErrorCode PetscRestoreMarkedObjectMap_Internal(std::size_t nkeys, PetscObjectId **keys, PetscMemoryAccessMode **modes, std::size_t **ndeps, PetscEvent ***dependencies) 383 { 384 PetscFunctionBegin; 385 for (std::size_t i = 0; i < nkeys; ++i) PetscCall(PetscFree((*dependencies)[i])); 386 PetscCall(PetscFree4(*keys, *modes, *ndeps, *dependencies)); 387 PetscFunctionReturn(PETSC_SUCCESS); 388 } 389 390 template <typename T> 391 static PetscErrorCode PetscDeviceContextMapIterVisitor(PetscDeviceContext dctx, T &&callback) noexcept 392 { 393 const auto dctx_id = PetscObjectCast(dctx)->id; 394 auto &dctx_deps = CxxDataCast(dctx)->deps; 395 auto &object_map = marked_object_map.map; 396 397 PetscFunctionBegin; 398 for (auto &&dep : dctx_deps) { 399 const auto mapit = object_map.find(dep); 400 401 // Need this check since the final PetscDeviceContext may run through this *after* the map 402 // has been finalized (and cleared), and hence might fail to find its dependencies. This is 403 // perfectly valid since the user no longer cares about dangling dependencies after PETSc 404 // is finalized 405 if (PetscLikely(mapit != object_map.end())) { 406 auto &deps = mapit->second.dependencies; 407 const auto end = deps.end(); 408 const auto it = std::remove_if(deps.begin(), end, [&](const MarkedObjectMap::snapshot_type &obj) { return obj.dctx_id() == dctx_id; }); 409 410 PetscCall(callback(mapit, deps.cbegin(), static_cast<decltype(deps.cend())>(it))); 411 // remove ourselves 412 PetscCallCXX(deps.erase(it, end)); 413 // continue to next object, but erase this one if it has no more dependencies 414 if (deps.empty()) PetscCallCXX(object_map.erase(mapit)); 415 } 416 } 417 PetscCallCXX(dctx_deps.clear()); 418 PetscFunctionReturn(PETSC_SUCCESS); 419 } 420 421 PetscErrorCode PetscDeviceContextSyncClearMap_Internal(PetscDeviceContext dctx) 422 { 423 using map_iterator = MarkedObjectMap::map_type::const_iterator; 424 using dep_iterator = MarkedObjectMap::mapped_type::dependency_type::const_iterator; 425 426 PetscFunctionBegin; 427 PetscCall(PetscDeviceContextMapIterVisitor(dctx, [&](map_iterator mapit, dep_iterator it, dep_iterator end) { 428 PetscFunctionBegin; 429 if (PetscDefined(USE_DEBUG_AND_INFO)) { 430 std::ostringstream oss; 431 const auto mode = PetscMemoryAccessModeToString(mapit->second.mode); 432 433 oss << "synced dctx " << PetscObjectCast(dctx)->id << ", remaining leaves for obj " << mapit->first << ": {"; 434 while (it != end) { 435 oss << "[dctx " << it->dctx_id() << ", " << mode << ' ' << it->frame() << ']'; 436 if (++it != end) oss << ", "; 437 } 438 oss << '}'; 439 PetscCall(PetscInfo(nullptr, "%s\n", oss.str().c_str())); 440 } 441 PetscFunctionReturn(PETSC_SUCCESS); 442 })); 443 { 444 // the recursive sync clear map call is unbounded in case of a dependenct loop so we make a 445 // copy 446 // clang-format off 447 const std::vector<CxxData::upstream_type::value_type> upstream_copy( 448 std::make_move_iterator(CxxDataCast(dctx)->upstream.begin()), 449 std::make_move_iterator(CxxDataCast(dctx)->upstream.end()) 450 ); 451 // clang-format on 452 453 // aftermath, clear our set of parents (to avoid infinite recursion) and mark ourselves as no 454 // longer contained (while the empty graph technically *is* always contained, it is not what 455 // we mean by it) 456 PetscCall(CxxDataCast(dctx)->clear()); 457 //dctx->contained = PETSC_FALSE; 458 for (auto &&upstrm : upstream_copy) { 459 // check that this parent still points to what we originally thought it was 460 PetscCheck(upstrm.second.id == PetscObjectCast(upstrm.first)->id, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Upstream dctx %" PetscInt64_FMT " no longer exists, now has id %" PetscInt64_FMT, upstrm.second.id, PetscObjectCast(upstrm.first)->id); 461 PetscCall(PetscDeviceContextSyncClearMap_Internal(upstrm.first)); 462 } 463 } 464 PetscFunctionReturn(PETSC_SUCCESS); 465 } 466 467 PetscErrorCode PetscDeviceContextCheckNotOrphaned_Internal(PetscDeviceContext dctx) 468 { 469 std::ostringstream oss; 470 //const auto allow = dctx->options.allow_orphans, contained = dctx->contained; 471 const auto allow = true, contained = true; 472 auto wrote_to_oss = false; 473 using map_iterator = MarkedObjectMap::map_type::const_iterator; 474 using dep_iterator = MarkedObjectMap::mapped_type::dependency_type::const_iterator; 475 476 PetscFunctionBegin; 477 PetscCall(PetscDeviceContextMapIterVisitor(dctx, [&](map_iterator mapit, dep_iterator it, dep_iterator end) { 478 PetscFunctionBegin; 479 if (allow || contained) PetscFunctionReturn(PETSC_SUCCESS); 480 wrote_to_oss = true; 481 oss << "- PetscObject (id " << mapit->first << "), intent " << PetscMemoryAccessModeToString(mapit->second.mode) << ' ' << it->frame(); 482 if (std::distance(it, end) == 0) oss << " (orphaned)"; // we were the only dependency 483 oss << '\n'; 484 PetscFunctionReturn(PETSC_SUCCESS); 485 })); 486 PetscCheck(!wrote_to_oss, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Destroying PetscDeviceContext ('%s', id %" PetscInt64_FMT ") would leave the following dangling (possibly orphaned) dependents:\n%s\nMust synchronize before destroying it, or allow it to be destroyed with orphans", 487 PetscObjectCast(dctx)->name ? PetscObjectCast(dctx)->name : "unnamed", PetscObjectCast(dctx)->id, oss.str().c_str()); 488 PetscCall(CxxDataCast(dctx)->clear()); 489 PetscFunctionReturn(PETSC_SUCCESS); 490 } 491 492 #define DEBUG_INFO(mess, ...) PetscDebugInfo(dctx, "dctx %" PetscInt64_FMT " (%s) - obj %" PetscInt64_FMT " (%s): " mess, PetscObjectCast(dctx)->id, PetscObjectCast(dctx)->name ? PetscObjectCast(dctx)->name : "unnamed", id, name, ##__VA_ARGS__) 493 494 // The current mode is compatible with the previous mode (i.e. read-read) so we need only 495 // update the existing version and possibly appeand ourselves to the dependency list 496 497 template <bool use_debug> 498 static PetscErrorCode MarkFromID_CompatibleModes(MarkedObjectMap::mapped_type &marked, PetscDeviceContext dctx, PetscObjectId id, PetscMemoryAccessMode mode, PetscStackFrame<use_debug> &frame, PETSC_UNUSED const char *name, bool *update_object_dependencies) 499 { 500 const auto dctx_id = PetscObjectCast(dctx)->id; 501 auto &object_dependencies = marked.dependencies; 502 const auto end = object_dependencies.end(); 503 const auto it = std::find_if(object_dependencies.begin(), end, [&](const MarkedObjectMap::snapshot_type &obj) { return obj.dctx_id() == dctx_id; }); 504 505 PetscFunctionBegin; 506 PetscCall(DEBUG_INFO("new mode (%s) COMPATIBLE with %s mode (%s), no need to serialize\n", PetscMemoryAccessModeToString(mode), object_dependencies.empty() ? "default" : "old", PetscMemoryAccessModeToString(marked.mode))); 507 (void)mode; 508 if (it != end) { 509 using std::swap; 510 511 // we have been here before, all we must do is update our entry then we can bail 512 PetscCall(DEBUG_INFO("found old self as dependency, updating\n")); 513 PetscAssert(CxxDataCast(dctx)->deps.find(id) != CxxDataCast(dctx)->deps.end(), PETSC_COMM_SELF, PETSC_ERR_PLIB, "PetscDeviceContext %" PetscInt64_FMT " listed as dependency for object %" PetscInt64_FMT " (%s), but does not have the object in private dependency list!", dctx_id, id, name); 514 swap(it->frame(), frame); 515 PetscCall(PetscDeviceContextRecordEvent_Private(dctx, it->event())); 516 *update_object_dependencies = false; 517 PetscFunctionReturn(PETSC_SUCCESS); 518 } 519 520 // we have not been here before, need to serialize with the last write event (if it exists) 521 // and add ourselves to the dependency list 522 if (const auto event = marked.last_write.event()) PetscCall(PetscDeviceContextWaitForEvent_Private(dctx, event)); 523 PetscFunctionReturn(PETSC_SUCCESS); 524 } 525 526 template <bool use_debug> 527 static PetscErrorCode MarkFromID_IncompatibleModes_UpdateLastWrite(MarkedObjectMap::mapped_type &marked, PetscDeviceContext dctx, PetscObjectId id, PetscMemoryAccessMode mode, PetscStackFrame<use_debug> &frame, PETSC_UNUSED const char *name, bool *update_object_dependencies) 528 { 529 const auto dctx_id = PetscObjectCast(dctx)->id; 530 auto &last_write = marked.last_write; 531 auto &last_dep = marked.dependencies.back(); 532 PetscDeviceType dtype; 533 534 PetscFunctionBegin; 535 PetscAssert(marked.dependencies.size() == 1, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Can only have a single writer as dependency, have %zu!", marked.dependencies.size()); 536 PetscCall(PetscDeviceContextGetDeviceType(dctx, &dtype)); 537 if (last_dep.event()->dtype != dtype) { 538 PetscCall(DEBUG_INFO("moving last write dependency (intent %s)\n", PetscMemoryAccessModeToString(marked.mode))); 539 last_write = std::move(last_dep); 540 PetscFunctionReturn(PETSC_SUCCESS); 541 } 542 543 // we match the device type of the dependency, we can reuse its event! 544 auto &dctx_upstream_deps = CxxDataCast(dctx)->deps; 545 const auto last_write_was_also_us = last_write.event() && (last_write.dctx_id() == dctx_id); 546 using std::swap; 547 548 PetscCall(DEBUG_INFO("we matched the previous write dependency's (intent %s) device type (%s), swapping last dependency with last write\n", PetscMemoryAccessModeToString(marked.mode), PetscDeviceTypes[dtype])); 549 if (last_dep.event()->dctx_id != dctx_id) dctx_upstream_deps.emplace(id); 550 PetscAssert(dctx_upstream_deps.find(id) != dctx_upstream_deps.end(), PETSC_COMM_SELF, PETSC_ERR_PLIB, "Did not find id %" PetscInt64_FMT "in object dependencies, but we have apparently recorded the last dependency %s!", id, 551 last_write.frame().to_string().c_str()); 552 swap(last_write, last_dep); 553 if (last_write_was_also_us) { 554 PetscCall(DEBUG_INFO("we were also the last write event (intent %s), updating\n", PetscMemoryAccessModeToString(mode))); 555 (void)mode; 556 // we are both the last to write *and* the last to leave a write event. This is the 557 // fast path, we only need to update the frame and update the recorded event 558 swap(last_dep.frame(), frame); 559 // last used to be last_write which is not guaranteed to have an event, so must 560 // create it now 561 PetscCall(last_dep.ensure_event(dctx)); 562 PetscCall(PetscDeviceContextRecordEvent_Private(dctx, last_dep.event())); 563 *update_object_dependencies = false; 564 } 565 PetscFunctionReturn(PETSC_SUCCESS); 566 } 567 568 // The current mode is NOT compatible with the previous mode. We must serialize with all events 569 // in the dependency list, possibly clear it, and update the previous write event 570 571 template <bool use_debug> 572 static PetscErrorCode MarkFromID_IncompatibleModes(MarkedObjectMap::mapped_type &marked, PetscDeviceContext dctx, PetscObjectId id, PetscMemoryAccessMode mode, PetscStackFrame<use_debug> &frame, const char *name, bool *update_object_dependencies) 573 { 574 auto &old_mode = marked.mode; 575 auto &object_dependencies = marked.dependencies; 576 577 PetscFunctionBegin; 578 // we are NOT compatible with the previous mode 579 PetscCall(DEBUG_INFO("new mode (%s) NOT COMPATIBLE with %s mode (%s), serializing then clearing (%zu) %s\n", PetscMemoryAccessModeToString(mode), object_dependencies.empty() ? "default" : "old", PetscMemoryAccessModeToString(old_mode), 580 object_dependencies.size(), object_dependencies.size() == 1 ? "dependency" : "dependencies")); 581 582 for (const auto &dep : object_dependencies) PetscCall(PetscDeviceContextWaitForEvent_Private(dctx, dep.event())); 583 // if the previous mode wrote, update the last write node with it 584 if (PetscMemoryAccessWrite(old_mode)) PetscCall(MarkFromID_IncompatibleModes_UpdateLastWrite(marked, dctx, id, mode, frame, name, update_object_dependencies)); 585 586 old_mode = mode; 587 // clear out the old dependencies if are about to append ourselves 588 if (*update_object_dependencies) object_dependencies.clear(); 589 PetscFunctionReturn(PETSC_SUCCESS); 590 } 591 592 template <bool use_debug> 593 static PetscErrorCode PetscDeviceContextMarkIntentFromID_Private(PetscDeviceContext dctx, PetscObjectId id, PetscMemoryAccessMode mode, PetscStackFrame<use_debug> frame, const char *name) 594 { 595 auto &marked = marked_object_map.map[id]; 596 auto &object_dependencies = marked.dependencies; 597 auto update_object_dependencies = true; 598 599 PetscFunctionBegin; 600 if ((marked.mode == PETSC_MEMORY_ACCESS_READ) && (mode == PETSC_MEMORY_ACCESS_READ)) { 601 PetscCall(MarkFromID_CompatibleModes(marked, dctx, id, mode, frame, name, &update_object_dependencies)); 602 } else { 603 PetscCall(MarkFromID_IncompatibleModes(marked, dctx, id, mode, frame, name, &update_object_dependencies)); 604 } 605 if (update_object_dependencies) { 606 // become the new leaf by appending ourselves 607 PetscCall(DEBUG_INFO("%s with intent %s\n", object_dependencies.empty() ? "dependency list is empty, creating new leaf" : "appending to existing leaves", PetscMemoryAccessModeToString(mode))); 608 PetscCallCXX(object_dependencies.emplace_back(dctx, std::move(frame))); 609 PetscCallCXX(CxxDataCast(dctx)->deps.emplace(id)); 610 } 611 PetscFunctionReturn(PETSC_SUCCESS); 612 } 613 614 #undef DEBUG_INFO 615 616 /*@C 617 PetscDeviceContextMarkIntentFromID - Indicate a `PetscDeviceContext`s access intent to the 618 auto-dependency system 619 620 Not Collective 621 622 Input Parameters: 623 + dctx - The `PetscDeviceContext` 624 . id - The `PetscObjectId` to mark 625 . mode - The desired access intent 626 - name - The object name (for debug purposes, ignored in optimized builds) 627 628 Notes: 629 This routine formally informs the dependency system that `dctx` will access the object 630 represented by `id` with `mode` and adds `dctx` to `id`'s list of dependencies (termed 631 "leaves"). 632 633 If the existing set of leaves have an incompatible `PetscMemoryAccessMode` to `mode`, `dctx` 634 will be serialized against them. 635 636 Level: intermediate 637 638 .seealso: `PetscDeviceContextWaitForContext()`, `PetscDeviceContextSynchronize()`, 639 `PetscObjectGetId()`, `PetscMemoryAccessMode` 640 @*/ 641 PetscErrorCode PetscDeviceContextMarkIntentFromID(PetscDeviceContext dctx, PetscObjectId id, PetscMemoryAccessMode mode, const char name[]) 642 { 643 #if PetscDefined(USE_DEBUG) && !PetscDefined(HAVE_THREADSAFETY) 644 const auto index = petscstack.currentsize > 2 ? petscstack.currentsize - 2 : 0; 645 const auto file = petscstack.file[index]; 646 const auto function = petscstack.function[index]; 647 const auto line = petscstack.line[index]; 648 #else 649 constexpr const char *file = nullptr; 650 constexpr const char *function = nullptr; 651 constexpr auto line = 0; 652 #endif 653 654 PetscFunctionBegin; 655 PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx)); 656 if (name) PetscValidCharPointer(name, 4); 657 PetscCall(marked_object_map.register_finalize()); 658 PetscCall(PetscLogEventBegin(DCONTEXT_Mark, dctx, nullptr, nullptr, nullptr)); 659 PetscCall(PetscDeviceContextMarkIntentFromID_Private(dctx, id, mode, MarkedObjectMap::snapshot_type::frame_type{file, function, line}, name ? name : "unknown object")); 660 PetscCall(PetscLogEventEnd(DCONTEXT_Mark, dctx, nullptr, nullptr, nullptr)); 661 PetscFunctionReturn(PETSC_SUCCESS); 662 } 663