1 #include <petsc/private/deviceimpl.h> /*I <petscdevice.h> I*/ 2 3 #include <petsc/private/cpp/register_finalize.hpp> 4 #include <petsc/private/cpp/type_traits.hpp> // integral_value 5 #include <petsc/private/cpp/unordered_map.hpp> 6 7 #include <algorithm> // std::find_if 8 #include <cstring> // std::memset 9 10 const char *const PetscDeviceCopyModes[] = {"host_to_host", "device_to_host", "host_to_device", "device_to_device", "auto", "PetscDeviceCopyMode", "PETSC_DEVICE_COPY_", nullptr}; 11 static_assert(Petsc::util::to_underlying(PETSC_DEVICE_COPY_HTOH) == 0, ""); 12 static_assert(Petsc::util::to_underlying(PETSC_DEVICE_COPY_DTOH) == 1, ""); 13 static_assert(Petsc::util::to_underlying(PETSC_DEVICE_COPY_HTOD) == 2, ""); 14 static_assert(Petsc::util::to_underlying(PETSC_DEVICE_COPY_DTOD) == 3, ""); 15 static_assert(Petsc::util::to_underlying(PETSC_DEVICE_COPY_AUTO) == 4, ""); 16 17 // GCC implementation for std::hash<T*>. LLVM's libc++ is almost 2x slower because they do all 18 // kinds of complicated murmur hashing, so we make sure to enforce GCC's version. 19 struct PointerHash { 20 template <typename T> 21 PETSC_NODISCARD std::size_t operator()(const T *ptr) const noexcept 22 { 23 return reinterpret_cast<std::size_t>(ptr); 24 } 25 }; 26 27 // ========================================================================================== 28 // PointerAttributes 29 // 30 // A set of attributes for a pointer 31 // ========================================================================================== 32 33 struct PointerAttributes { 34 PetscMemType mtype = PETSC_MEMTYPE_HOST; // memtype of allocation 35 PetscObjectId id = 0; // id of allocation 36 std::size_t size = 0; // size of allocation (bytes) 37 38 // even though this is a POD and can be aggregate initialized, the STL uses () constructors 39 // in unordered_map and so we need to provide a trivial constructor... 40 constexpr PointerAttributes() = default; 41 constexpr PointerAttributes(PetscMemType, PetscObjectId, std::size_t) noexcept; 42 43 bool operator==(const PointerAttributes &) const noexcept; 44 45 PETSC_NODISCARD bool contains(const void *, const void *) const noexcept; 46 }; 47 48 // ========================================================================================== 49 // PointerAttributes - Public API 50 // ========================================================================================== 51 52 inline constexpr PointerAttributes::PointerAttributes(PetscMemType mtype_, PetscObjectId id_, std::size_t size_) noexcept : mtype(mtype_), id(id_), size(size_) { } 53 54 inline bool PointerAttributes::operator==(const PointerAttributes &other) const noexcept 55 { 56 return (mtype == other.mtype) && (id == other.id) && (size == other.size); 57 } 58 59 /* 60 PointerAttributes::contains - asks and answers the question, does ptr_begin contain ptr 61 62 Input Parameters: 63 + ptr_begin - pointer to the start of the range to check 64 - ptr - the pointer to query 65 66 Notes: 67 Returns true if ptr falls within ptr_begins range, false otherwise. 68 */ 69 inline bool PointerAttributes::contains(const void *ptr_begin, const void *ptr) const noexcept 70 { 71 return (ptr >= ptr_begin) && (ptr < (static_cast<const char *>(ptr_begin) + size)); 72 } 73 74 // ========================================================================================== 75 // MemoryMap 76 // 77 // Since the pointers allocated via PetscDeviceAllocate_Private() may be device pointers we 78 // cannot just store meta-data within the pointer itself (as we can't dereference them). So 79 // instead we need to keep an extra map to keep track of them 80 // 81 // Each entry maps pointer -> { 82 // PetscMemType - The memtype of the pointer 83 // PetscObjectId - A unique ID assigned at allocation or registration so auto-dep can 84 // identify the pointer 85 // size - The size (in bytes) of the allocation 86 // } 87 // ========================================================================================== 88 89 class MemoryMap : public Petsc::RegisterFinalizeable<MemoryMap> { 90 public: 91 using map_type = Petsc::UnorderedMap<void *, PointerAttributes, PointerHash>; 92 93 map_type map{}; 94 95 PETSC_NODISCARD map_type::const_iterator search_for(const void *, bool = false) const noexcept; 96 97 private: 98 friend class Petsc::RegisterFinalizeable<MemoryMap>; 99 PetscErrorCode register_finalize_() noexcept; 100 PetscErrorCode finalize_() noexcept; 101 }; 102 103 // ========================================================================================== 104 // MemoryMap - Private API 105 // ========================================================================================== 106 107 PetscErrorCode MemoryMap::register_finalize_() noexcept 108 { 109 PetscFunctionBegin; 110 // Preallocate, this does give a modest performance bump since unordered_map is so __dog__ 111 // slow if it needs to rehash. Experiments show that users tend not to have more than 5 or 112 // so concurrently live pointers lying around. 10 at most. 113 PetscCall(map.reserve(16)); 114 PetscFunctionReturn(PETSC_SUCCESS); 115 } 116 117 PetscErrorCode MemoryMap::finalize_() noexcept 118 { 119 PetscFunctionBegin; 120 PetscCall(PetscInfo(nullptr, "Finalizing memory map\n")); 121 PetscCallCXX(map = map_type{}); 122 PetscFunctionReturn(PETSC_SUCCESS); 123 } 124 125 // ========================================================================================== 126 // MemoryMap - Public API 127 // ========================================================================================== 128 129 /* 130 MemoryMap::search_for - retrieve an iterator to the key-value pair for a pointer in the map 131 132 Input Parameters: 133 + ptr - pointer to search for 134 - must_find - true if an error is raised if the pointer is not found (default: false) 135 136 Notes: 137 Accounts for sub-regions, i.e. if ptr is contained within another pointers region, it returns 138 the iterator to the super-pointers key-value pair. 139 140 If ptr is not found and must_find is false returns map.end(), otherwise raises an error 141 */ 142 MemoryMap::map_type::const_iterator MemoryMap::search_for(const void *ptr, bool must_find) const noexcept 143 { 144 const auto end_it = map.end(); 145 auto it = map.find(const_cast<map_type::key_type>(ptr)); 146 147 // ptr was found, and points to an entire block 148 PetscFunctionBegin; 149 if (it != end_it) PetscFunctionReturn(it); 150 // wasn't found, but maybe its part of a block. have to search every block for it 151 // clang-format off 152 it = std::find_if(map.begin(), end_it, [ptr](map_type::const_iterator::reference map_it) { 153 return map_it.second.contains(map_it.first, ptr); 154 }); 155 // clang-format on 156 PetscCheckAbort(!must_find || it != end_it, PETSC_COMM_SELF, PETSC_ERR_POINTER, "Pointer %p was not registered with the memory tracker, call PetscDeviceRegisterMemory() on it", ptr); 157 PetscFunctionReturn(it); 158 } 159 160 static MemoryMap memory_map; 161 162 // ========================================================================================== 163 // Utility functions 164 // ========================================================================================== 165 166 static PetscErrorCode PetscDeviceCheckCapable_Private(PetscDeviceContext dctx, bool cond, const char descr[]) 167 { 168 PetscFunctionBegin; 169 PetscCheck(cond, PETSC_COMM_SELF, PETSC_ERR_SUP, "Device context (id: %" PetscInt64_FMT ", name: %s, type: %s) can only handle %s host memory", PetscObjectCast(dctx)->id, PetscObjectCast(dctx)->name, dctx->device ? PetscDeviceTypes[dctx->device->type] : "unknown", descr); 170 PetscFunctionReturn(PETSC_SUCCESS); 171 } 172 173 // A helper utility, since register is called from PetscDeviceRegisterMemory() and 174 // PetscDevicAllocate(). The latter also needs the generated id, so instead of making it search 175 // the map again we just return it here 176 static PetscErrorCode PetscDeviceRegisterMemory_Private(const void *PETSC_RESTRICT ptr, PetscMemType mtype, std::size_t size, PetscObjectId *PETSC_RESTRICT id = nullptr) 177 { 178 auto &map = memory_map.map; 179 const auto it = memory_map.search_for(ptr); 180 181 PetscFunctionBegin; 182 if (it == map.cend()) { 183 // pointer was never registered with the map, insert it and bail 184 const auto newid = PetscObjectNewId_Internal(); 185 186 if (PetscDefined(USE_DEBUG)) { 187 const auto tmp = PointerAttributes(mtype, newid, size); 188 189 for (const auto &entry : map) { 190 auto &&attr = entry.second; 191 192 // REVIEW ME: maybe this should just be handled... 193 PetscCheck(!tmp.contains(ptr, entry.first), PETSC_COMM_SELF, PETSC_ERR_ORDER, "Trying to register pointer %p (memtype %s, size %zu) but it appears you have already registered a sub-region of it (pointer %p, memtype %s, size %zu). Must register the larger region first", ptr, PetscMemTypeToString(mtype), size, 194 entry.first, PetscMemTypeToString(attr.mtype), attr.size); 195 } 196 } 197 // clang-format off 198 if (id) *id = newid; 199 PetscCallCXX(map.emplace( 200 std::piecewise_construct, 201 std::forward_as_tuple(const_cast<MemoryMap::map_type::key_type>(ptr)), 202 std::forward_as_tuple(mtype, newid, size) 203 )); 204 // clang-format on 205 PetscFunctionReturn(PETSC_SUCCESS); 206 } 207 if (PetscDefined(USE_DEBUG)) { 208 const auto &old = it->second; 209 210 PetscCheck(PointerAttributes(mtype, old.id, size) == old, PETSC_COMM_SELF, PETSC_ERR_LIB, "Pointer %p appears to have been previously allocated with memtype %s, size %zu and assigned id %" PetscInt64_FMT ", which does not match new values: (mtype %s, size %zu, id %" PetscInt64_FMT ")", it->first, 211 PetscMemTypeToString(old.mtype), old.size, old.id, PetscMemTypeToString(mtype), size, old.id); 212 } 213 if (id) *id = it->second.id; 214 PetscFunctionReturn(PETSC_SUCCESS); 215 } 216 217 /*@C 218 PetscDeviceRegisterMemory - Register a pointer for use with device-aware memory system 219 220 Not Collective 221 222 Input Parameters: 223 + ptr - The pointer to register 224 . mtype - The `PetscMemType` of the pointer 225 - size - The size (in bytes) of the memory region 226 227 Notes: 228 `ptr` need not point to the beginning of the memory range, however the user should register 229 the 230 231 It's OK to re-register the same `ptr` repeatedly (subsequent registrations do nothing) 232 however the given `mtype` and `size` must match the original registration. 233 234 `size` may be 0 (in which case this routine does nothing). 235 236 Level: intermediate 237 238 .seealso: `PetscDeviceMalloc()`, `PetscDeviceArrayCopy()`, `PetscDeviceFree()`, 239 `PetscDeviceArrayZero()` 240 @*/ 241 PetscErrorCode PetscDeviceRegisterMemory(const void *PETSC_RESTRICT ptr, PetscMemType mtype, std::size_t size) 242 { 243 PetscFunctionBegin; 244 if (PetscMemTypeHost(mtype)) PetscValidPointer(ptr, 1); 245 if (PetscUnlikely(!size)) PetscFunctionReturn(PETSC_SUCCESS); // there is no point registering empty range 246 PetscCall(PetscDeviceRegisterMemory_Private(ptr, mtype, size)); 247 PetscFunctionReturn(PETSC_SUCCESS); 248 } 249 250 /* 251 PetscDeviceAllocate_Private - Allocate device-aware memory 252 253 Not Collective, Asynchronous, Auto-dependency aware 254 255 Input Parameters: 256 + dctx - The `PetscDeviceContext` used to allocate the memory 257 . clear - Whether or not the memory should be zeroed 258 . mtype - The type of memory to allocate 259 . n - The amount (in bytes) to allocate 260 - alignment - The alignment requirement (in bytes) of the allocated pointer 261 262 Output Parameter: 263 . ptr - The pointer to store the result in 264 265 Notes: 266 The user should prefer `PetscDeviceMalloc()` over this routine as it automatically computes 267 the size of the allocation and alignment based on the size of the datatype. 268 269 If the user is unsure about `alignment` -- or unable to compute it -- passing 270 `PETSC_MEMALIGN` will always work, though the user should beware that this may be quite 271 wasteful for very small allocations. 272 273 Memory allocated with this function must be freed with `PetscDeviceFree()` (or 274 `PetscDeviceDeallocate_Private()`). 275 276 If `n` is zero, then `ptr` is set to `PETSC_NULLPTR`. 277 278 This routine falls back to using `PetscMalloc1()` or `PetscCalloc1()` (depending on the value 279 of `clear`) if PETSc was not configured with device support. The user should note that 280 `mtype` and `alignment` are ignored in this case, as these routines allocate only host memory 281 aligned to `PETSC_MEMALIGN`. 282 283 Note result stored `ptr` is immediately valid and the user may freely inspect or manipulate 284 its value on function return, i.e.\: 285 286 .vb 287 PetscInt *ptr; 288 289 PetscDeviceAllocate_Private(dctx, PETSC_FALSE, PETSC_MEMTYPE_DEVICE, 20, alignof(PetscInt), (void**)&ptr); 290 291 PetscInt *sub_ptr = ptr + 10; // OK, no need to synchronize 292 293 ptr[0] = 10; // ERROR, directly accessing contents of ptr is undefined until synchronization 294 .ve 295 296 DAG representation: 297 .vb 298 time -> 299 300 -> dctx - |= CALL =| -\- dctx --> 301 \- ptr -> 302 .ve 303 304 Level: intermediate 305 306 .N ASYNC_API 307 308 .seealso: `PetscDeviceMalloc()`, `PetscDeviceFree()`, `PetscDeviceDeallocate_Private()`, 309 `PetscDeviceArrayCopy()`, `PetscDeviceArrayZero()`, `PetscMemType` 310 */ 311 PetscErrorCode PetscDeviceAllocate_Private(PetscDeviceContext dctx, PetscBool clear, PetscMemType mtype, std::size_t n, std::size_t alignment, void **PETSC_RESTRICT ptr) 312 { 313 PetscObjectId id = 0; 314 315 PetscFunctionBegin; 316 if (PetscDefined(USE_DEBUG)) { 317 const auto is_power_of_2 = [](std::size_t num) { return (num & (num - 1)) == 0; }; 318 319 PetscCheck(alignment != 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Requested alignment %zu cannot be 0", alignment); 320 PetscCheck(is_power_of_2(alignment), PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Requested alignment %zu must be a power of 2", alignment); 321 } 322 PetscValidPointer(ptr, 6); 323 *ptr = nullptr; 324 if (PetscUnlikely(!n)) PetscFunctionReturn(PETSC_SUCCESS); 325 PetscCall(memory_map.register_finalize()); 326 PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx)); 327 328 // get our pointer here 329 if (dctx->ops->memalloc) { 330 PetscUseTypeMethod(dctx, memalloc, clear, mtype, n, alignment, ptr); 331 } else { 332 PetscCall(PetscDeviceCheckCapable_Private(dctx, PetscMemTypeHost(mtype), "allocating")); 333 PetscCall(PetscMallocA(1, clear, __LINE__, PETSC_FUNCTION_NAME, __FILE__, n, ptr)); 334 } 335 PetscCall(PetscDeviceRegisterMemory_Private(*ptr, mtype, n, &id)); 336 // Note this is a "write" so that the next dctx to try and read from the pointer has to wait 337 // for the allocation to be ready 338 PetscCall(PetscDeviceContextMarkIntentFromID(dctx, id, PETSC_MEMORY_ACCESS_WRITE, "memory allocation")); 339 PetscFunctionReturn(PETSC_SUCCESS); 340 } 341 342 /* 343 PetscDeviceDeallocate_Private - Free device-aware memory 344 345 Not Collective, Asynchronous, Auto-dependency aware 346 347 Input Parameters: 348 + dctx - The `PetscDeviceContext` used to free the memory 349 - ptr - The pointer to free 350 351 Notes: 352 `ptr` must have been allocated using any of `PetscDeviceMalloc()`, `PetscDeviceCalloc()` or 353 `PetscDeviceAllocate_Private()`, or registered with the system via `PetscDeviceRegisterMemory()`. 354 355 The user should prefer `PetscDeviceFree()` over this routine as it automatically sets `ptr` 356 to `PETSC_NULLPTR` on successful deallocation. 357 358 `ptr` may be `NULL`. 359 360 This routine falls back to using `PetscFree()` if PETSc was not configured with device 361 support. The user should note that `PetscFree()` frees only host memory. 362 363 DAG representation: 364 .vb 365 time -> 366 367 -> dctx -/- |= CALL =| - dctx -> 368 -> ptr -/ 369 .ve 370 371 Level: intermediate 372 373 .N ASYNC_API 374 375 .seealso: `PetscDeviceFree()`, `PetscDeviceAllocate_Private()` 376 */ 377 PetscErrorCode PetscDeviceDeallocate_Private(PetscDeviceContext dctx, void *PETSC_RESTRICT ptr) 378 { 379 PetscFunctionBegin; 380 if (ptr) { 381 auto &map = memory_map.map; 382 const auto found_it = map.find(const_cast<MemoryMap::map_type::key_type>(ptr)); 383 384 if (PetscUnlikelyDebug(found_it == map.end())) { 385 // OK this is a bad pointer, now determine why 386 const auto it = memory_map.search_for(ptr); 387 388 // if it is map.cend() then no allocation owns it, meaning it was not allocated by us! 389 PetscCheck(it != map.cend(), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Pointer %p was not allocated via PetscDeviceAllocate_Private()", ptr); 390 // if we are here then we did allocate it but the user has tried to do something along 391 // the lines of: 392 // 393 // allocate(&ptr, size); 394 // deallocate(ptr+5); 395 // 396 auto &&attr = it->second; 397 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Attempting to deallocate pointer %p which is a suballocation of %p (memtype %s, id %" PetscInt64_FMT ", size %zu bytes)", ptr, it->first, PetscMemTypeToString(attr.mtype), attr.id, attr.size); 398 } 399 auto &&attr = found_it->second; 400 PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx)); 401 // mark intent BEFORE we free, note we mark as write so that we are made to wait on any 402 // outstanding reads (don't want to kill the pointer before they are done) 403 PetscCall(PetscDeviceContextMarkIntentFromID(dctx, attr.id, PETSC_MEMORY_ACCESS_WRITE, "memory deallocation")); 404 // do free 405 if (dctx->ops->memfree) { 406 PetscUseTypeMethod(dctx, memfree, attr.mtype, (void **)&ptr); 407 } else { 408 PetscCall(PetscDeviceCheckCapable_Private(dctx, PetscMemTypeHost(attr.mtype), "freeing")); 409 } 410 // if ptr still exists, then the device context could not handle it 411 if (ptr) PetscCall(PetscFree(ptr)); 412 PetscCallCXX(map.erase(found_it)); 413 } 414 PetscFunctionReturn(PETSC_SUCCESS); 415 } 416 417 /*@C 418 PetscDeviceMemcpy - Copy memory in a device-aware manner 419 420 Not Collective, Asynchronous, Auto-dependency aware 421 422 Input Parameters: 423 + dctx - The `PetscDeviceContext` used to copy the memory 424 . dest - The pointer to copy to 425 . src - The pointer to copy from 426 - n - The amount (in bytes) to copy 427 428 Notes: 429 Both `dest` and `src` must have been allocated by `PetscDeviceMalloc()` or 430 `PetscDeviceCalloc()`. 431 432 `src` and `dest` cannot overlap. 433 434 If both `src` and `dest` are on the host this routine is fully synchronous. 435 436 The user should prefer `PetscDeviceArrayCopy()` over this routine as it automatically 437 computes the number of bytes to copy from the size of the pointer types. 438 439 DAG representation: 440 .vb 441 time -> 442 443 -> dctx - |= CALL =| - dctx -> 444 -> dest ---------------------> 445 -> src ----------------------> 446 .ve 447 448 Level: intermediate 449 450 .N ASYNC_API 451 452 .seealso: `PetscDeviceArrayCopy()`, `PetscDeviceMalloc()`, `PetscDeviceCalloc()`, 453 `PetscDeviceFree()` 454 @*/ 455 PetscErrorCode PetscDeviceMemcpy(PetscDeviceContext dctx, void *PETSC_RESTRICT dest, const void *PETSC_RESTRICT src, std::size_t n) 456 { 457 PetscFunctionBegin; 458 if (!n) PetscFunctionReturn(PETSC_SUCCESS); 459 PetscCheck(dest, PETSC_COMM_SELF, PETSC_ERR_POINTER, "Trying to copy to a NULL pointer"); 460 PetscCheck(src, PETSC_COMM_SELF, PETSC_ERR_POINTER, "Trying to copy from a NULL pointer"); 461 if (dest == src) PetscFunctionReturn(PETSC_SUCCESS); 462 PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx)); 463 { 464 const auto &dest_attr = memory_map.search_for(dest, true)->second; 465 const auto &src_attr = memory_map.search_for(src, true)->second; 466 const auto mode = PetscMemTypeToDeviceCopyMode(dest_attr.mtype, src_attr.mtype); 467 468 PetscCall(PetscDeviceContextMarkIntentFromID(dctx, src_attr.id, PETSC_MEMORY_ACCESS_READ, "memory copy (src)")); 469 PetscCall(PetscDeviceContextMarkIntentFromID(dctx, dest_attr.id, PETSC_MEMORY_ACCESS_WRITE, "memory copy (dest)")); 470 // perform the copy 471 if (dctx->ops->memcopy) { 472 PetscUseTypeMethod(dctx, memcopy, dest, src, n, mode); 473 if (mode == PETSC_DEVICE_COPY_HTOD) { 474 PetscCall(PetscLogCpuToGpu(n)); 475 } else if (mode == PETSC_DEVICE_COPY_DTOH) { 476 PetscCall(PetscLogGpuToCpu(n)); 477 } 478 } else { 479 // REVIEW ME: we might potentially need to sync here if the memory is device-allocated 480 // (pinned) but being copied by a host dctx 481 PetscCall(PetscDeviceCheckCapable_Private(dctx, mode == PETSC_DEVICE_COPY_HTOH, "copying")); 482 PetscCall(PetscMemcpy(dest, src, n)); 483 } 484 } 485 PetscFunctionReturn(PETSC_SUCCESS); 486 } 487 488 /*@C 489 PetscDeviceMemset - Memset device-aware memory 490 491 Not Collective, Asynchronous, Auto-dependency aware 492 493 Input Parameters: 494 + dctx - The `PetscDeviceContext` used to memset the memory 495 . ptr - The pointer to the memory 496 . v - The value to set 497 - n - The amount (in bytes) to set 498 499 Notes: 500 `ptr` must have been allocated by `PetscDeviceMalloc()` or `PetscDeviceCalloc()`. 501 502 The user should prefer `PetscDeviceArrayZero()` over this routine as it automatically 503 computes the number of bytes to copy from the size of the pointer types, though they should 504 note that it only zeros memory. 505 506 This routine is analogous to `memset()`. That is, this routine copies the value 507 `static_cast<unsigned char>(v)` into each of the first count characters of the object pointed 508 to by `dest`. 509 510 If `dest` is on device, this routine is asynchronous. 511 512 DAG representation: 513 .vb 514 time -> 515 516 -> dctx - |= CALL =| - dctx -> 517 -> dest ---------------------> 518 .ve 519 520 Level: intermediate 521 522 .N ASYNC_API 523 524 .seealso: `PetscDeviceArrayZero()`, `PetscDeviceMalloc()`, `PetscDeviceCalloc()`, 525 `PetscDeviceFree()` 526 @*/ 527 PetscErrorCode PetscDeviceMemset(PetscDeviceContext dctx, void *ptr, PetscInt v, std::size_t n) 528 { 529 PetscFunctionBegin; 530 if (PetscUnlikely(!n)) PetscFunctionReturn(PETSC_SUCCESS); 531 PetscCheck(ptr, PETSC_COMM_SELF, PETSC_ERR_POINTER, "Trying to memset a NULL pointer"); 532 PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx)); 533 { 534 const auto &attr = memory_map.search_for(ptr, true)->second; 535 536 PetscCall(PetscDeviceContextMarkIntentFromID(dctx, attr.id, PETSC_MEMORY_ACCESS_WRITE, "memory set")); 537 if (dctx->ops->memset) { 538 PetscUseTypeMethod(dctx, memset, attr.mtype, ptr, v, n); 539 } else { 540 // REVIEW ME: we might potentially need to sync here if the memory is device-allocated 541 // (pinned) but being memset by a host dctx 542 PetscCall(PetscDeviceCheckCapable_Private(dctx, PetscMemTypeHost(attr.mtype), "memsetting")); 543 std::memset(ptr, static_cast<int>(v), n); 544 } 545 } 546 PetscFunctionReturn(PETSC_SUCCESS); 547 } 548