1 #include <petsc/private/deviceimpl.h> /*I <petscdevice.h> I*/ 2 3 #include <petsc/private/cpp/register_finalize.hpp> 4 #include <petsc/private/cpp/type_traits.hpp> // integral_value 5 #include <petsc/private/cpp/unordered_map.hpp> 6 7 #include <algorithm> // std::find_if 8 #include <cstring> // std::memset 9 10 #include <petsc/private/cpp/object_pool.hpp> 11 12 namespace Petsc 13 { 14 15 namespace memory 16 { 17 18 typename PoolAllocated::allocator_type PoolAllocated::pool_{}; 19 20 } // namespace memory 21 22 } // namespace Petsc 23 24 const char *const PetscDeviceCopyModes[] = {"host_to_host", "device_to_host", "host_to_device", "device_to_device", "auto", "PetscDeviceCopyMode", "PETSC_DEVICE_COPY_", nullptr}; 25 static_assert(Petsc::util::to_underlying(PETSC_DEVICE_COPY_HTOH) == 0, ""); 26 static_assert(Petsc::util::to_underlying(PETSC_DEVICE_COPY_DTOH) == 1, ""); 27 static_assert(Petsc::util::to_underlying(PETSC_DEVICE_COPY_HTOD) == 2, ""); 28 static_assert(Petsc::util::to_underlying(PETSC_DEVICE_COPY_DTOD) == 3, ""); 29 static_assert(Petsc::util::to_underlying(PETSC_DEVICE_COPY_AUTO) == 4, ""); 30 31 // GCC implementation for std::hash<T*>. LLVM's libc++ is almost 2x slower because they do all 32 // kinds of complicated murmur hashing, so we make sure to enforce GCC's version. 33 struct PointerHash { 34 template <typename T> 35 PETSC_NODISCARD std::size_t operator()(const T *ptr) const noexcept 36 { 37 return reinterpret_cast<std::size_t>(ptr); 38 } 39 }; 40 41 // ========================================================================================== 42 // PointerAttributes 43 // 44 // A set of attributes for a pointer 45 // ========================================================================================== 46 47 struct PointerAttributes { 48 PetscMemType mtype = PETSC_MEMTYPE_HOST; // memtype of allocation 49 PetscObjectId id = 0; // id of allocation 50 std::size_t size = 0; // size of allocation (bytes) 51 52 // even though this is a POD and can be aggregate initialized, the STL uses () constructors 53 // in unordered_map and so we need to provide a trivial constructor... 54 constexpr PointerAttributes() = default; 55 constexpr PointerAttributes(PetscMemType, PetscObjectId, std::size_t) noexcept; 56 57 bool operator==(const PointerAttributes &) const noexcept; 58 59 PETSC_NODISCARD bool contains(const void *, const void *) const noexcept; 60 }; 61 62 // ========================================================================================== 63 // PointerAttributes - Public API 64 // ========================================================================================== 65 66 inline constexpr PointerAttributes::PointerAttributes(PetscMemType mtype_, PetscObjectId id_, std::size_t size_) noexcept : mtype(mtype_), id(id_), size(size_) { } 67 68 inline bool PointerAttributes::operator==(const PointerAttributes &other) const noexcept 69 { 70 return (mtype == other.mtype) && (id == other.id) && (size == other.size); 71 } 72 73 /* 74 PointerAttributes::contains - asks and answers the question, does ptr_begin contain ptr 75 76 Input Parameters: 77 + ptr_begin - pointer to the start of the range to check 78 - ptr - the pointer to query 79 80 Notes: 81 Returns true if ptr falls within ptr_begins range, false otherwise. 82 */ 83 inline bool PointerAttributes::contains(const void *ptr_begin, const void *ptr) const noexcept 84 { 85 return (ptr >= ptr_begin) && (ptr < (static_cast<const char *>(ptr_begin) + size)); 86 } 87 88 // ========================================================================================== 89 // MemoryMap 90 // 91 // Since the pointers allocated via PetscDeviceAllocate_Private() may be device pointers we 92 // cannot just store meta-data within the pointer itself (as we can't dereference them). So 93 // instead we need to keep an extra map to keep track of them 94 // 95 // Each entry maps pointer -> { 96 // PetscMemType - The memtype of the pointer 97 // PetscObjectId - A unique ID assigned at allocation or registration so auto-dep can 98 // identify the pointer 99 // size - The size (in bytes) of the allocation 100 // } 101 // ========================================================================================== 102 103 class MemoryMap : public Petsc::RegisterFinalizeable<MemoryMap> { 104 public: 105 using map_type = Petsc::UnorderedMap<void *, PointerAttributes, PointerHash>; 106 107 map_type map{}; 108 109 PETSC_NODISCARD map_type::const_iterator search_for(const void *, bool = false) const noexcept; 110 111 private: 112 friend class Petsc::RegisterFinalizeable<MemoryMap>; 113 PetscErrorCode register_finalize_() noexcept; 114 PetscErrorCode finalize_() noexcept; 115 }; 116 117 // ========================================================================================== 118 // MemoryMap - Private API 119 // ========================================================================================== 120 121 PetscErrorCode MemoryMap::register_finalize_() noexcept 122 { 123 PetscFunctionBegin; 124 // Preallocate, this does give a modest performance bump since unordered_map is so __dog__ 125 // slow if it needs to rehash. Experiments show that users tend not to have more than 5 or 126 // so concurrently live pointers lying around. 10 at most. 127 PetscCall(map.reserve(16)); 128 PetscFunctionReturn(PETSC_SUCCESS); 129 } 130 131 PetscErrorCode MemoryMap::finalize_() noexcept 132 { 133 PetscFunctionBegin; 134 PetscCall(PetscInfo(nullptr, "Finalizing memory map\n")); 135 PetscCallCXX(map = map_type{}); 136 PetscFunctionReturn(PETSC_SUCCESS); 137 } 138 139 // ========================================================================================== 140 // MemoryMap - Public API 141 // ========================================================================================== 142 143 /* 144 MemoryMap::search_for - retrieve an iterator to the key-value pair for a pointer in the map 145 146 Input Parameters: 147 + ptr - pointer to search for 148 - must_find - true if an error is raised if the pointer is not found (default: false) 149 150 Notes: 151 Accounts for sub-regions, i.e. if ptr is contained within another pointers region, it returns 152 the iterator to the super-pointers key-value pair. 153 154 If ptr is not found and must_find is false returns map.end(), otherwise raises an error 155 */ 156 MemoryMap::map_type::const_iterator MemoryMap::search_for(const void *ptr, bool must_find) const noexcept 157 { 158 const auto end_it = map.end(); 159 auto it = map.find(const_cast<map_type::key_type>(ptr)); 160 161 // ptr was found, and points to an entire block 162 PetscFunctionBegin; 163 if (it != end_it) PetscFunctionReturn(it); 164 // wasn't found, but maybe its part of a block. have to search every block for it 165 // clang-format off 166 it = std::find_if(map.begin(), end_it, [ptr](map_type::const_iterator::reference map_it) { 167 return map_it.second.contains(map_it.first, ptr); 168 }); 169 // clang-format on 170 PetscCheckAbort(!must_find || it != end_it, PETSC_COMM_SELF, PETSC_ERR_POINTER, "Pointer %p was not registered with the memory tracker, call PetscDeviceRegisterMemory() on it", ptr); 171 PetscFunctionReturn(it); 172 } 173 174 static MemoryMap memory_map; 175 176 // ========================================================================================== 177 // Utility functions 178 // ========================================================================================== 179 180 static PetscErrorCode PetscDeviceCheckCapable_Private(PetscDeviceContext dctx, bool cond, const char descr[]) 181 { 182 PetscFunctionBegin; 183 PetscCheck(cond, PETSC_COMM_SELF, PETSC_ERR_SUP, "Device context (id: %" PetscInt64_FMT ", name: %s, type: %s) can only handle %s host memory", PetscObjectCast(dctx)->id, PetscObjectCast(dctx)->name, dctx->device ? PetscDeviceTypes[dctx->device->type] : "unknown", descr); 184 PetscFunctionReturn(PETSC_SUCCESS); 185 } 186 187 // A helper utility, since register is called from PetscDeviceRegisterMemory() and 188 // PetscDevicAllocate(). The latter also needs the generated id, so instead of making it search 189 // the map again we just return it here 190 static PetscErrorCode PetscDeviceRegisterMemory_Private(const void *PETSC_RESTRICT ptr, PetscMemType mtype, std::size_t size, PetscObjectId *PETSC_RESTRICT id = nullptr) 191 { 192 auto &map = memory_map.map; 193 const auto it = memory_map.search_for(ptr); 194 195 PetscFunctionBegin; 196 if (it == map.cend()) { 197 // pointer was never registered with the map, insert it and bail 198 const auto newid = PetscObjectNewId_Internal(); 199 200 if (PetscDefined(USE_DEBUG)) { 201 const auto tmp = PointerAttributes(mtype, newid, size); 202 203 for (const auto &entry : map) { 204 auto &&attr = entry.second; 205 206 // REVIEW ME: maybe this should just be handled... 207 PetscCheck(!tmp.contains(ptr, entry.first), PETSC_COMM_SELF, PETSC_ERR_ORDER, "Trying to register pointer %p (memtype %s, size %zu) but it appears you have already registered a sub-region of it (pointer %p, memtype %s, size %zu). Must register the larger region first", ptr, PetscMemTypeToString(mtype), size, 208 entry.first, PetscMemTypeToString(attr.mtype), attr.size); 209 } 210 } 211 // clang-format off 212 if (id) *id = newid; 213 PetscCallCXX(map.emplace( 214 std::piecewise_construct, 215 std::forward_as_tuple(const_cast<MemoryMap::map_type::key_type>(ptr)), 216 std::forward_as_tuple(mtype, newid, size) 217 )); 218 // clang-format on 219 PetscFunctionReturn(PETSC_SUCCESS); 220 } 221 if (PetscDefined(USE_DEBUG)) { 222 const auto &old = it->second; 223 224 PetscCheck(PointerAttributes(mtype, old.id, size) == old, PETSC_COMM_SELF, PETSC_ERR_LIB, "Pointer %p appears to have been previously allocated with memtype %s, size %zu and assigned id %" PetscInt64_FMT ", which does not match new values: (mtype %s, size %zu, id %" PetscInt64_FMT ")", it->first, 225 PetscMemTypeToString(old.mtype), old.size, old.id, PetscMemTypeToString(mtype), size, old.id); 226 } 227 if (id) *id = it->second.id; 228 PetscFunctionReturn(PETSC_SUCCESS); 229 } 230 231 /*@C 232 PetscDeviceRegisterMemory - Register a pointer for use with device-aware memory system 233 234 Not Collective 235 236 Input Parameters: 237 + ptr - The pointer to register 238 . mtype - The `PetscMemType` of the pointer 239 - size - The size (in bytes) of the memory region 240 241 Notes: 242 `ptr` need not point to the beginning of the memory range, however the user should register 243 the 244 245 It's OK to re-register the same `ptr` repeatedly (subsequent registrations do nothing) 246 however the given `mtype` and `size` must match the original registration. 247 248 `size` may be 0 (in which case this routine does nothing). 249 250 Level: intermediate 251 252 .seealso: `PetscDeviceMalloc()`, `PetscDeviceArrayCopy()`, `PetscDeviceFree()`, 253 `PetscDeviceArrayZero()` 254 @*/ 255 PetscErrorCode PetscDeviceRegisterMemory(const void *PETSC_RESTRICT ptr, PetscMemType mtype, std::size_t size) 256 { 257 PetscFunctionBegin; 258 if (PetscMemTypeHost(mtype)) PetscAssertPointer(ptr, 1); 259 if (PetscUnlikely(!size)) PetscFunctionReturn(PETSC_SUCCESS); // there is no point registering empty range 260 PetscCall(PetscDeviceRegisterMemory_Private(ptr, mtype, size)); 261 PetscFunctionReturn(PETSC_SUCCESS); 262 } 263 264 /* 265 PetscDeviceAllocate_Private - Allocate device-aware memory 266 267 Not Collective, Asynchronous, Auto-dependency aware 268 269 Input Parameters: 270 + dctx - The `PetscDeviceContext` used to allocate the memory 271 . clear - Whether or not the memory should be zeroed 272 . mtype - The type of memory to allocate 273 . n - The amount (in bytes) to allocate 274 - alignment - The alignment requirement (in bytes) of the allocated pointer 275 276 Output Parameter: 277 . ptr - The pointer to store the result in 278 279 Notes: 280 The user should prefer `PetscDeviceMalloc()` over this routine as it automatically computes 281 the size of the allocation and alignment based on the size of the datatype. 282 283 If the user is unsure about `alignment` -- or unable to compute it -- passing 284 `PETSC_MEMALIGN` will always work, though the user should beware that this may be quite 285 wasteful for very small allocations. 286 287 Memory allocated with this function must be freed with `PetscDeviceFree()` (or 288 `PetscDeviceDeallocate_Private()`). 289 290 If `n` is zero, then `ptr` is set to `PETSC_NULLPTR`. 291 292 This routine falls back to using `PetscMalloc1()` or `PetscCalloc1()` (depending on the value 293 of `clear`) if PETSc was not configured with device support. The user should note that 294 `mtype` and `alignment` are ignored in this case, as these routines allocate only host memory 295 aligned to `PETSC_MEMALIGN`. 296 297 Note result stored `ptr` is immediately valid and the user may freely inspect or manipulate 298 its value on function return, i.e.\: 299 300 .vb 301 PetscInt *ptr; 302 303 PetscDeviceAllocate_Private(dctx, PETSC_FALSE, PETSC_MEMTYPE_DEVICE, 20, alignof(PetscInt), (void**)&ptr); 304 305 PetscInt *sub_ptr = ptr + 10; // OK, no need to synchronize 306 307 ptr[0] = 10; // ERROR, directly accessing contents of ptr is undefined until synchronization 308 .ve 309 310 DAG representation: 311 .vb 312 time -> 313 314 -> dctx - |= CALL =| -\- dctx --> 315 \- ptr -> 316 .ve 317 318 Level: intermediate 319 320 .N ASYNC_API 321 322 .seealso: `PetscDeviceMalloc()`, `PetscDeviceFree()`, `PetscDeviceDeallocate_Private()`, 323 `PetscDeviceArrayCopy()`, `PetscDeviceArrayZero()`, `PetscMemType` 324 */ 325 PetscErrorCode PetscDeviceAllocate_Private(PetscDeviceContext dctx, PetscBool clear, PetscMemType mtype, std::size_t n, std::size_t alignment, void **PETSC_RESTRICT ptr) 326 { 327 PetscObjectId id = 0; 328 329 PetscFunctionBegin; 330 if (PetscDefined(USE_DEBUG)) { 331 const auto is_power_of_2 = [](std::size_t num) { return (num & (num - 1)) == 0; }; 332 333 PetscCheck(alignment != 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Requested alignment %zu cannot be 0", alignment); 334 PetscCheck(is_power_of_2(alignment), PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Requested alignment %zu must be a power of 2", alignment); 335 } 336 PetscAssertPointer(ptr, 6); 337 *ptr = nullptr; 338 if (PetscUnlikely(!n)) PetscFunctionReturn(PETSC_SUCCESS); 339 PetscCall(memory_map.register_finalize()); 340 PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx)); 341 342 // get our pointer here 343 if (dctx->ops->memalloc) { 344 PetscUseTypeMethod(dctx, memalloc, clear, mtype, n, alignment, ptr); 345 } else { 346 PetscCall(PetscDeviceCheckCapable_Private(dctx, PetscMemTypeHost(mtype), "allocating")); 347 PetscCall(PetscMallocA(1, clear, __LINE__, PETSC_FUNCTION_NAME, __FILE__, n, ptr)); 348 } 349 PetscCall(PetscDeviceRegisterMemory_Private(*ptr, mtype, n, &id)); 350 // Note this is a "write" so that the next dctx to try and read from the pointer has to wait 351 // for the allocation to be ready 352 PetscCall(PetscDeviceContextMarkIntentFromID(dctx, id, PETSC_MEMORY_ACCESS_WRITE, "memory allocation")); 353 PetscFunctionReturn(PETSC_SUCCESS); 354 } 355 356 /* 357 PetscDeviceDeallocate_Private - Free device-aware memory 358 359 Not Collective, Asynchronous, Auto-dependency aware 360 361 Input Parameters: 362 + dctx - The `PetscDeviceContext` used to free the memory 363 - ptr - The pointer to free 364 365 Level: intermediate 366 367 Notes: 368 `ptr` must have been allocated using any of `PetscDeviceMalloc()`, `PetscDeviceCalloc()` or 369 `PetscDeviceAllocate_Private()`, or registered with the system via `PetscDeviceRegisterMemory()`. 370 371 The user should prefer `PetscDeviceFree()` over this routine as it automatically sets `ptr` 372 to `PETSC_NULLPTR` on successful deallocation. 373 374 `ptr` may be `NULL`. 375 376 This routine falls back to using `PetscFree()` if PETSc was not configured with device 377 support. The user should note that `PetscFree()` frees only host memory. 378 379 DAG representation: 380 .vb 381 time -> 382 383 -> dctx -/- |= CALL =| - dctx -> 384 -> ptr -/ 385 .ve 386 387 .N ASYNC_API 388 389 .seealso: `PetscDeviceFree()`, `PetscDeviceAllocate_Private()` 390 */ 391 PetscErrorCode PetscDeviceDeallocate_Private(PetscDeviceContext dctx, void *PETSC_RESTRICT ptr) 392 { 393 PetscFunctionBegin; 394 if (ptr) { 395 auto &map = memory_map.map; 396 const auto found_it = map.find(const_cast<MemoryMap::map_type::key_type>(ptr)); 397 398 if (PetscUnlikelyDebug(found_it == map.end())) { 399 // OK this is a bad pointer, now determine why 400 const auto it = memory_map.search_for(ptr); 401 402 // if it is map.cend() then no allocation owns it, meaning it was not allocated by us! 403 PetscCheck(it != map.cend(), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Pointer %p was not allocated via PetscDeviceAllocate_Private()", ptr); 404 // if we are here then we did allocate it but the user has tried to do something along 405 // the lines of: 406 // 407 // allocate(&ptr, size); 408 // deallocate(ptr+5); 409 // 410 auto &&attr = it->second; 411 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Attempting to deallocate pointer %p which is a suballocation of %p (memtype %s, id %" PetscInt64_FMT ", size %zu bytes)", ptr, it->first, PetscMemTypeToString(attr.mtype), attr.id, attr.size); 412 } 413 auto &&attr = found_it->second; 414 PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx)); 415 // mark intent BEFORE we free, note we mark as write so that we are made to wait on any 416 // outstanding reads (don't want to kill the pointer before they are done) 417 PetscCall(PetscDeviceContextMarkIntentFromID(dctx, attr.id, PETSC_MEMORY_ACCESS_WRITE, "memory deallocation")); 418 // do free 419 if (dctx->ops->memfree) { 420 PetscUseTypeMethod(dctx, memfree, attr.mtype, (void **)&ptr); 421 } else { 422 PetscCall(PetscDeviceCheckCapable_Private(dctx, PetscMemTypeHost(attr.mtype), "freeing")); 423 } 424 // if ptr still exists, then the device context could not handle it 425 if (ptr) PetscCall(PetscFree(ptr)); 426 PetscCallCXX(map.erase(found_it)); 427 } 428 PetscFunctionReturn(PETSC_SUCCESS); 429 } 430 431 // PetscClangLinter pragma disable: -fdoc-section-header-unknown 432 /*@C 433 PetscDeviceMemcpy - Copy memory in a device-aware manner 434 435 Not Collective, Asynchronous, Auto-dependency aware 436 437 Input Parameters: 438 + dctx - The `PetscDeviceContext` used to copy the memory 439 . dest - The pointer to copy to 440 . src - The pointer to copy from 441 - n - The amount (in bytes) to copy 442 443 Level: intermediate 444 445 Notes: 446 Both `dest` and `src` must have been allocated by `PetscDeviceMalloc()` or 447 `PetscDeviceCalloc()`. 448 449 `src` and `dest` cannot overlap. 450 451 If both `src` and `dest` are on the host this routine is fully synchronous. 452 453 The user should prefer `PetscDeviceArrayCopy()` over this routine as it automatically 454 computes the number of bytes to copy from the size of the pointer types. 455 456 DAG representation: 457 .vb 458 time -> 459 460 -> dctx - |= CALL =| - dctx -> 461 -> dest ---------------------> 462 -> src ----------------------> 463 .ve 464 465 .N ASYNC_API 466 467 .seealso: `PetscDeviceArrayCopy()`, `PetscDeviceMalloc()`, `PetscDeviceCalloc()`, 468 `PetscDeviceFree()` 469 @*/ 470 PetscErrorCode PetscDeviceMemcpy(PetscDeviceContext dctx, void *PETSC_RESTRICT dest, const void *PETSC_RESTRICT src, std::size_t n) 471 { 472 PetscFunctionBegin; 473 if (!n) PetscFunctionReturn(PETSC_SUCCESS); 474 PetscCheck(dest, PETSC_COMM_SELF, PETSC_ERR_POINTER, "Trying to copy to a NULL pointer"); 475 PetscCheck(src, PETSC_COMM_SELF, PETSC_ERR_POINTER, "Trying to copy from a NULL pointer"); 476 if (dest == src) PetscFunctionReturn(PETSC_SUCCESS); 477 PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx)); 478 { 479 const auto &dest_attr = memory_map.search_for(dest, true)->second; 480 const auto &src_attr = memory_map.search_for(src, true)->second; 481 const auto mode = PetscMemTypeToDeviceCopyMode(dest_attr.mtype, src_attr.mtype); 482 483 PetscCall(PetscDeviceContextMarkIntentFromID(dctx, src_attr.id, PETSC_MEMORY_ACCESS_READ, "memory copy (src)")); 484 PetscCall(PetscDeviceContextMarkIntentFromID(dctx, dest_attr.id, PETSC_MEMORY_ACCESS_WRITE, "memory copy (dest)")); 485 // perform the copy 486 if (dctx->ops->memcopy) { 487 PetscUseTypeMethod(dctx, memcopy, dest, src, n, mode); 488 if (mode == PETSC_DEVICE_COPY_HTOD) { 489 PetscCall(PetscLogCpuToGpu(n)); 490 } else if (mode == PETSC_DEVICE_COPY_DTOH) { 491 PetscCall(PetscLogGpuToCpu(n)); 492 } 493 } else { 494 // REVIEW ME: we might potentially need to sync here if the memory is device-allocated 495 // (pinned) but being copied by a host dctx 496 PetscCall(PetscDeviceCheckCapable_Private(dctx, mode == PETSC_DEVICE_COPY_HTOH, "copying")); 497 PetscCall(PetscMemcpy(dest, src, n)); 498 } 499 } 500 PetscFunctionReturn(PETSC_SUCCESS); 501 } 502 503 // PetscClangLinter pragma disable: -fdoc-section-header-unknown 504 /*@C 505 PetscDeviceMemset - Memset device-aware memory 506 507 Not Collective, Asynchronous, Auto-dependency aware 508 509 Input Parameters: 510 + dctx - The `PetscDeviceContext` used to memset the memory 511 . ptr - The pointer to the memory 512 . v - The value to set 513 - n - The amount (in bytes) to set 514 515 Level: intermediate 516 517 Notes: 518 `ptr` must have been allocated by `PetscDeviceMalloc()` or `PetscDeviceCalloc()`. 519 520 The user should prefer `PetscDeviceArrayZero()` over this routine as it automatically 521 computes the number of bytes to copy from the size of the pointer types, though they should 522 note that it only zeros memory. 523 524 This routine is analogous to `memset()`. That is, this routine copies the value 525 `static_cast<unsigned char>(v)` into each of the first count characters of the object pointed 526 to by `dest`. 527 528 If `dest` is on device, this routine is asynchronous. 529 530 DAG representation: 531 .vb 532 time -> 533 534 -> dctx - |= CALL =| - dctx -> 535 -> dest ---------------------> 536 .ve 537 538 .N ASYNC_API 539 540 .seealso: `PetscDeviceArrayZero()`, `PetscDeviceMalloc()`, `PetscDeviceCalloc()`, 541 `PetscDeviceFree()` 542 @*/ 543 PetscErrorCode PetscDeviceMemset(PetscDeviceContext dctx, void *ptr, PetscInt v, std::size_t n) 544 { 545 PetscFunctionBegin; 546 if (PetscUnlikely(!n)) PetscFunctionReturn(PETSC_SUCCESS); 547 PetscCheck(ptr, PETSC_COMM_SELF, PETSC_ERR_POINTER, "Trying to memset a NULL pointer"); 548 PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx)); 549 { 550 const auto &attr = memory_map.search_for(ptr, true)->second; 551 552 PetscCall(PetscDeviceContextMarkIntentFromID(dctx, attr.id, PETSC_MEMORY_ACCESS_WRITE, "memory set")); 553 if (dctx->ops->memset) { 554 PetscUseTypeMethod(dctx, memset, attr.mtype, ptr, v, n); 555 } else { 556 // REVIEW ME: we might potentially need to sync here if the memory is device-allocated 557 // (pinned) but being memset by a host dctx 558 PetscCall(PetscDeviceCheckCapable_Private(dctx, PetscMemTypeHost(attr.mtype), "memsetting")); 559 std::memset(ptr, static_cast<int>(v), n); 560 } 561 } 562 PetscFunctionReturn(PETSC_SUCCESS); 563 } 564