1 #include <petsc/private/deviceimpl.h> /*I <petscdevice.h> I*/ 2 3 #include <petsc/private/cpp/register_finalize.hpp> 4 #include <petsc/private/cpp/type_traits.hpp> // integral_value 5 6 #include <unordered_map> 7 #include <algorithm> // std::find_if 8 #include <cstring> // std::memset 9 10 const char *const PetscDeviceCopyModes[] = {"host_to_host", "device_to_host", "host_to_device", "device_to_device", "auto", "PetscDeviceCopyMode", "PETSC_DEVICE_COPY_", nullptr}; 11 static_assert(Petsc::util::integral_value(PETSC_DEVICE_COPY_HTOH) == 0, ""); 12 static_assert(Petsc::util::integral_value(PETSC_DEVICE_COPY_DTOH) == 1, ""); 13 static_assert(Petsc::util::integral_value(PETSC_DEVICE_COPY_HTOD) == 2, ""); 14 static_assert(Petsc::util::integral_value(PETSC_DEVICE_COPY_DTOD) == 3, ""); 15 static_assert(Petsc::util::integral_value(PETSC_DEVICE_COPY_AUTO) == 4, ""); 16 17 // ========================================================================================== 18 // MemoryMap 19 // 20 // Since the pointers allocated via PetscDeviceAllocate_Private() may be device pointers we 21 // cannot just store meta-data within the pointer itself (as we can't dereference them). So 22 // instead we need to keep an extra map to keep track of them 23 // 24 // Each entry maps pointer -> { 25 // PetscMemType - The memtype of the pointer 26 // PetscObjectId - A unique ID assigned at allocation or registratrion so auto-dep can 27 // identify the pointer 28 // size - The size (in bytes) of the allocation 29 // } 30 // ========================================================================================== 31 32 // GCC implementation for std::hash<T*>. LLVM's libc++ is almost 2x slower because they do all 33 // kinds of complicated murmur hashing, so we make sure to enforce GCC's version. 34 struct PointerHash { 35 template <typename T> 36 PETSC_NODISCARD std::size_t operator()(const T *ptr) const noexcept 37 { 38 return reinterpret_cast<std::size_t>(ptr); 39 } 40 }; 41 42 class MemoryMap : public Petsc::RegisterFinalizeable<MemoryMap> { 43 public: 44 struct PointerAttributes { 45 PetscMemType mtype{}; // memtype of allocation 46 PetscObjectId id{}; // id of allocation 47 std::size_t size{}; // size of allocation (bytes) 48 49 // even though this is a POD and can be aggregate initialized, the STL uses () constructors 50 // in unordered_map and so we need to provide a trivial contructor... 51 constexpr PointerAttributes(PetscMemType, PetscObjectId, std::size_t) noexcept; 52 constexpr PointerAttributes() noexcept = default; 53 constexpr PointerAttributes(const PointerAttributes &) noexcept = default; 54 PETSC_CONSTEXPR_14 PointerAttributes &operator=(const PointerAttributes &) noexcept = default; 55 constexpr PointerAttributes(PointerAttributes &&) noexcept = default; 56 PETSC_CONSTEXPR_14 PointerAttributes &operator=(PointerAttributes &&) noexcept = default; 57 58 bool operator==(const PointerAttributes &) const noexcept; 59 60 PETSC_NODISCARD bool contains(const void *, const void *) const noexcept; 61 }; 62 63 using map_type = std::unordered_map<void *, PointerAttributes, PointerHash>; 64 65 map_type map; 66 67 // return the iterator of the allocation containing ptr, or map.cend() if not found 68 PETSC_NODISCARD map_type::const_iterator search_for(const void *, bool = false) const noexcept; 69 70 private: 71 friend class Petsc::RegisterFinalizeable<MemoryMap>; 72 PETSC_NODISCARD PetscErrorCode register_finalize_() noexcept; 73 PETSC_NODISCARD PetscErrorCode finalize_() noexcept; 74 }; 75 76 // ========================================================================================== 77 // PointerAttributes 78 // ========================================================================================== 79 80 constexpr MemoryMap::PointerAttributes::PointerAttributes(PetscMemType mtype_, PetscObjectId id_, std::size_t size_) noexcept : mtype(mtype_), id(id_), size(size_) { } 81 82 bool MemoryMap::PointerAttributes::operator==(const PointerAttributes &other) const noexcept 83 { 84 return mtype == other.mtype && id == other.id && size == other.size; 85 } 86 87 bool MemoryMap::PointerAttributes::contains(const void *ptr_begin, const void *ptr) const noexcept 88 { 89 return (ptr >= ptr_begin) && (ptr < (static_cast<const char *>(ptr_begin) + size)); 90 } 91 92 // ========================================================================================== 93 // Memory map - Private API 94 // ========================================================================================== 95 96 PetscErrorCode MemoryMap::register_finalize_() noexcept 97 { 98 PetscFunctionBegin; 99 // Preallocate, this does give a modest performance bump since unordered_map is so __dog__ 100 // slow if it needs to rehash. Experiments show that users tend not to have more than 5 or 101 // so concurrently live pointers lying around. 10 at most. 102 PetscCallCXX(map.reserve(16)); 103 PetscFunctionReturn(0); 104 } 105 106 PetscErrorCode MemoryMap::finalize_() noexcept 107 { 108 PetscFunctionBegin; 109 PetscCall(PetscInfo(nullptr, "Finalizing memory map\n")); 110 PetscCallCXX(map = map_type{}); 111 PetscFunctionReturn(0); 112 } 113 114 // ========================================================================================== 115 // Memory map - Public API 116 // ========================================================================================== 117 118 /* 119 MemoryMap::search_for - retrieve an iterator to the key-value pair for a pointer in the map 120 121 Input Parameters: 122 + ptr - pointer to search for 123 - must_find - true if an error is raised if the pointer is not found (default: false) 124 125 Notes: 126 Accounts for sub-regions, i.e. if ptr is contained within another pointers region, it returns 127 the iterator to the super-pointers key-value pair. 128 129 If ptr is not found and must_find is false returns map.end(), otherwise raises an error 130 */ 131 MemoryMap::map_type::const_iterator MemoryMap::search_for(const void *ptr, bool must_find) const noexcept 132 { 133 const auto end = map.end(); 134 auto it = map.find(const_cast<map_type::key_type>(ptr)); 135 136 // ptr was found, and points to an entire block 137 PetscFunctionBegin; 138 if (it != end) PetscFunctionReturn(it); 139 // wasn't found, but maybe its part of a block. have to search every block for it 140 // clang-format off 141 it = std::find_if(map.begin(), end, [ptr](const map_type::const_iterator::value_type &map_it) { 142 return map_it.second.contains(map_it.first, ptr); 143 }); 144 PetscCheckAbort(!must_find || it != end, PETSC_COMM_SELF, PETSC_ERR_POINTER, "Pointer %p was not registered with the memory tracker, call PetscDeviceRegisterMemory() on it", ptr); 145 PetscFunctionReturn(it); 146 // clang-format on 147 } 148 149 static MemoryMap memory_map; 150 151 // ========================================================================================== 152 // Utility functions 153 // ========================================================================================== 154 155 static PetscErrorCode PetscDeviceCheckCapable_Private(PetscDeviceContext dctx, bool cond, const char descr[]) 156 { 157 PetscFunctionBegin; 158 PetscCheck(cond, PETSC_COMM_SELF, PETSC_ERR_SUP, "Device context (id: %" PetscInt64_FMT ", name: %s, type: %s) can only handle %s host memory", PetscObjectCast(dctx)->id, PetscObjectCast(dctx)->name, dctx->device ? PetscDeviceTypes[dctx->device->type] : "unknown", descr); 159 PetscFunctionReturn(0); 160 } 161 162 // A helper utility, since register is called from PetscDeviceRegisterMemory() and 163 // PetscDevicAllocate(). The latter also needs the generated id, so instead of making it search 164 // the map again we just return it here 165 static PetscErrorCode PetscDeviceRegisterMemory_Private(const void *PETSC_RESTRICT ptr, PetscMemType mtype, std::size_t size, PetscObjectId *PETSC_RESTRICT id = nullptr) 166 { 167 auto &map = memory_map.map; 168 const auto it = memory_map.search_for(ptr); 169 170 PetscFunctionBegin; 171 if (it == map.cend()) { 172 // pointer was never registered with the map, insert it and bail 173 const auto newid = PetscObjectNewId_Internal(); 174 175 if (PetscDefined(USE_DEBUG)) { 176 const auto tmp = MemoryMap::PointerAttributes(mtype, newid, size); 177 178 for (const auto &entry : map) { 179 // REVIEW ME: maybe this should just be handled... 180 PetscCheck(!tmp.contains(ptr, entry.first), PETSC_COMM_SELF, PETSC_ERR_ORDER, "Trying to register pointer %p (memtype %s, size %zu) but it appears you have already registered a sub-region of it (pointer %p, memtype %s, size %zu). Must register the larger region first", ptr, PetscMemTypeToString(mtype), size, 181 entry.first, PetscMemTypeToString(entry.second.mtype), entry.second.size); 182 } 183 } 184 // clang-format off 185 if (id) *id = newid; 186 PetscCallCXX(map.emplace( 187 std::piecewise_construct, 188 std::forward_as_tuple(const_cast<MemoryMap::map_type::key_type>(ptr)), 189 std::forward_as_tuple(mtype, newid, size) 190 )); 191 // clang-format on 192 PetscFunctionReturn(0); 193 } 194 if (PetscDefined(USE_DEBUG)) { 195 const auto &old = it->second; 196 197 PetscCheck(MemoryMap::PointerAttributes(mtype, old.id, size) == old, PETSC_COMM_SELF, PETSC_ERR_LIB, "Pointer %p appears to have been previously allocated with memtype %s, size %zu and assigned id %" PetscInt64_FMT ", which does not match new values: (mtype %s, size %zu, id %" PetscInt64_FMT ")", it->first, 198 PetscMemTypeToString(old.mtype), old.size, old.id, PetscMemTypeToString(mtype), size, old.id); 199 } 200 if (id) *id = it->second.id; 201 PetscFunctionReturn(0); 202 } 203 204 /*@C 205 PetscDeviceRegisterMemory - Register a pointer for use with device-aware memory system 206 207 Not Collective 208 209 Input Parameters: 210 + ptr - The pointer to register 211 . mtype - The `PetscMemType` of the pointer 212 - size - The size (in bytes) of the memory region 213 214 Notes: 215 `ptr` need not point to the beginning of the memory range, however the user should register 216 the 217 218 It's OK to re-register the same `ptr` repeatedly (subsequent registrations do nothing) 219 however the given `mtype` and `size` must match the original registration. 220 221 `size` may be 0 (in which case this routine does nothing). 222 223 Level: intermediate 224 225 .seealso: `PetscDeviceMalloc()`, `PetscDeviceArrayCopy()`, `PetscDeviceFree()`, 226 `PetscDeviceArrayZero()` 227 @*/ 228 PetscErrorCode PetscDeviceRegisterMemory(const void *PETSC_RESTRICT ptr, PetscMemType mtype, std::size_t size) 229 { 230 PetscFunctionBegin; 231 if (PetscMemTypeHost(mtype)) PetscValidPointer(ptr, 1); 232 if (PetscUnlikely(!size)) PetscFunctionReturn(0); // there is no point registering empty range 233 PetscCall(PetscDeviceRegisterMemory_Private(ptr, mtype, size)); 234 PetscFunctionReturn(0); 235 } 236 237 /* 238 PetscDeviceAllocate_Private - Allocate device-aware memory 239 240 Not Collective, Asynchronous, Auto-dependency aware 241 242 Input Parameters: 243 + dctx - The `PetscDeviceContext` used to allocate the memory 244 . clear - Whether or not the memory should be zeroed 245 . mtype - The type of memory to allocate 246 . n - The amount (in bytes) to allocate 247 - alignment - The alignment requirement (in bytes) of the allocated pointer 248 249 Output Parameter: 250 . ptr - The pointer to store the result in 251 252 Notes: 253 The user should prefer `PetscDeviceMalloc()` over this routine as it automatically computes 254 the size of the allocation and alignment based on the size of the datatype. 255 256 If the user is unsure about `alignment` -- or unable to compute it -- passing 257 `PETSC_MEMALIGN` will always work, though the user should beware that this may be quite 258 wasteful for very small allocations. 259 260 Memory allocated with this function must be freed with `PetscDeviceFree()` (or 261 `PetscDeviceDeallocate_Private()`). 262 263 If `n` is zero, then `ptr` is set to `PETSC_NULLPTR`. 264 265 This routine falls back to using `PetscMalloc1()` or `PetscCalloc1()` (depending on the value 266 of `clear`) if PETSc was not configured with device support. The user should note that 267 `mtype` and `alignment` are ignored in this case, as these routines allocate only host memory 268 aligned to `PETSC_MEMALIGN`. 269 270 Note result stored `ptr` is immediately valid and the user may freely inspect or manipulate 271 its value on function return, i.e.\: 272 273 .vb 274 PetscInt *ptr; 275 276 PetscDeviceAllocate_Private(dctx, PETSC_FALSE, PETSC_MEMTYPE_DEVICE, 20, alignof(PetscInt), (void**)&ptr); 277 278 PetscInt *sub_ptr = ptr + 10; // OK, no need to synchronize 279 280 ptr[0] = 10; // ERROR, directly accessing contents of ptr is undefined until synchronization 281 .ve 282 283 DAG representation: 284 .vb 285 time -> 286 287 -> dctx - |= CALL =| -\- dctx --> 288 \- ptr -> 289 .ve 290 291 Level: intermediate 292 293 .N ASYNC_API 294 295 .seealso: `PetscDeviceMalloc()`, `PetscDeviceFree()`, `PetscDeviceDeallocate_Private()`, 296 `PetscDeviceArrayCopy()`, `PetscDeviceArrayZero()`, `PetscMemType` 297 */ 298 PetscErrorCode PetscDeviceAllocate_Private(PetscDeviceContext dctx, PetscBool clear, PetscMemType mtype, std::size_t n, std::size_t alignment, void **PETSC_RESTRICT ptr) 299 { 300 PetscObjectId id = 0; 301 302 PetscFunctionBegin; 303 if (PetscDefined(USE_DEBUG)) { 304 const auto is_power_of_2 = [](std::size_t num) { return (num & (num - 1)) == 0; }; 305 306 PetscCheck(alignment != 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Requested alignment %zu cannot be 0", alignment); 307 PetscCheck(is_power_of_2(alignment), PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Requested alignment %zu must be a power of 2", alignment); 308 } 309 PetscValidPointer(ptr, 6); 310 *ptr = nullptr; 311 if (PetscUnlikely(!n)) PetscFunctionReturn(0); 312 PetscCall(memory_map.register_finalize()); 313 PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx)); 314 315 // get our pointer here 316 if (dctx->ops->memalloc) { 317 PetscUseTypeMethod(dctx, memalloc, clear, mtype, n, alignment, ptr); 318 } else { 319 PetscCall(PetscDeviceCheckCapable_Private(dctx, PetscMemTypeHost(mtype), "allocating")); 320 PetscCall(PetscMallocA(1, clear, __LINE__, PETSC_FUNCTION_NAME, __FILE__, n, ptr)); 321 } 322 PetscCall(PetscDeviceRegisterMemory_Private(*ptr, mtype, n, &id)); 323 // Note this is a "write" so that the next dctx to try and read from the pointer has to wait 324 // for the allocation to be ready 325 PetscCall(PetscDeviceContextMarkIntentFromID(dctx, id, PETSC_MEMORY_ACCESS_WRITE, "memory allocation")); 326 PetscFunctionReturn(0); 327 } 328 329 /* 330 PetscDeviceDeallocate_Private - Free device-aware memory 331 332 Not Collective, Asynchronous, Auto-dependency aware 333 334 Input Parameters: 335 + dctx - The `PetscDeviceContext` used to free the memory 336 - ptr - The pointer to free 337 338 Notes: 339 `ptr` must have been allocated using any of `PetscDeviceMalloc()`, `PetscDeviceCalloc()` or 340 `PetscDeviceAllocate_Private()`, or registered with the system via `PetscDeviceRegisterMemory()`. 341 342 The user should prefer `PetscDeviceFree()` over this routine as it automatically sets `ptr` 343 to `PETSC_NULLPTR` on successful deallocation. 344 345 `ptr` may be `NULL`. 346 347 This routine falls back to using `PetscFree()` if PETSc was not configured with device 348 support. The user should note that `PetscFree()` frees only host memory. 349 350 DAG representation: 351 .vb 352 time -> 353 354 -> dctx -/- |= CALL =| - dctx -> 355 -> ptr -/ 356 .ve 357 358 Level: intermediate 359 360 .N ASYNC_API 361 362 .seealso: `PetscDeviceFree()`, `PetscDeviceAllocate_Private()` 363 */ 364 PetscErrorCode PetscDeviceDeallocate_Private(PetscDeviceContext dctx, void *PETSC_RESTRICT ptr) 365 { 366 PetscFunctionBegin; 367 if (ptr) { 368 auto &map = memory_map.map; 369 const auto found_it = map.find(const_cast<MemoryMap::map_type::key_type>(ptr)); 370 371 if (PetscUnlikelyDebug(found_it == map.end())) { 372 // OK this is a bad pointer, now determine why 373 const auto it = memory_map.search_for(ptr); 374 375 // if it is map.cend() then no allocation owns it, meaning it was not allocated by us! 376 PetscCheck(it != map.cend(), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Pointer %p was not allocated via PetscDeviceAllocate_Private()", ptr); 377 // if we are here then we did allocate it but the user has tried to do something along 378 // the lines of: 379 // 380 // allocate(&ptr, size); 381 // deallocate(ptr+5); 382 // 383 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Attempting to deallocate pointer %p which is a suballocation of %p (memtype %s, id %" PetscInt64_FMT ", size %zu bytes)", ptr, it->first, PetscMemTypeToString(it->second.mtype), it->second.id, 384 it->second.size); 385 } 386 387 PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx)); 388 // mark intent BEFORE we free, note we mark as write so that we are made to wait on any 389 // outstanding reads (don't want to kill the pointer before they are done) 390 PetscCall(PetscDeviceContextMarkIntentFromID(dctx, found_it->second.id, PETSC_MEMORY_ACCESS_WRITE, "memory deallocation")); 391 // do free 392 if (dctx->ops->memfree) { 393 PetscUseTypeMethod(dctx, memfree, found_it->second.mtype, (void **)&ptr); 394 } else { 395 PetscCall(PetscDeviceCheckCapable_Private(dctx, PetscMemTypeHost(found_it->second.mtype), "freeing")); 396 } 397 // if ptr still exists, then the device context could not handle it 398 if (ptr) PetscCall(PetscFree(ptr)); 399 PetscCallCXX(map.erase(found_it)); 400 } 401 PetscFunctionReturn(0); 402 } 403 404 /*@C 405 PetscDeviceMemcpy - Copy memory in a device-aware manner 406 407 Not Collective, Asynchronous, Auto-dependency aware 408 409 Input Parameters: 410 + dctx - The `PetscDeviceContext` used to copy the memory 411 . dest - The pointer to copy to 412 . src - The pointer to copy from 413 - n - The amount (in bytes) to copy 414 415 Notes: 416 Both `dest` and `src` must have been allocated by `PetscDeviceMalloc()` or 417 `PetscDeviceCalloc()`. 418 419 `src` and `dest` cannot overlap. 420 421 If both `src` and `dest` are on the host this routine is fully synchronous. 422 423 The user should prefer `PetscDeviceArrayCopy()` over this routine as it automatically 424 computes the number of bytes to copy from the size of the pointer types. 425 426 DAG representation: 427 .vb 428 time -> 429 430 -> dctx - |= CALL =| - dctx -> 431 -> dest ---------------------> 432 -> src ----------------------> 433 .ve 434 435 Level: intermediate 436 437 .N ASYNC_API 438 439 .seealso: `PetscDeviceArrayCopy()`, `PetscDeviceMalloc()`, `PetscDeviceCalloc()`, 440 `PetscDeviceFree()` 441 @*/ 442 PetscErrorCode PetscDeviceMemcpy(PetscDeviceContext dctx, void *PETSC_RESTRICT dest, const void *PETSC_RESTRICT src, std::size_t n) 443 { 444 PetscFunctionBegin; 445 if (!n) PetscFunctionReturn(0); 446 PetscCheck(dest, PETSC_COMM_SELF, PETSC_ERR_POINTER, "Trying to copy to a NULL pointer"); 447 PetscCheck(src, PETSC_COMM_SELF, PETSC_ERR_POINTER, "Trying to copy from a NULL pointer"); 448 if (dest == src) PetscFunctionReturn(0); 449 PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx)); 450 { 451 const auto dest_it = memory_map.search_for(dest, true); 452 const auto src_it = memory_map.search_for(src, true); 453 const auto mode = PetscMemTypeToDeviceCopyMode(dest_it->second.mtype, src_it->second.mtype); 454 455 PetscCall(PetscDeviceContextMarkIntentFromID(dctx, src_it->second.id, PETSC_MEMORY_ACCESS_READ, "memory copy (src)")); 456 PetscCall(PetscDeviceContextMarkIntentFromID(dctx, dest_it->second.id, PETSC_MEMORY_ACCESS_WRITE, "memory copy (dest)")); 457 // perform the copy 458 if (dctx->ops->memcopy) { 459 PetscUseTypeMethod(dctx, memcopy, dest, src, n, mode); 460 if (mode == PETSC_DEVICE_COPY_HTOD) { 461 PetscCall(PetscLogCpuToGpu(n)); 462 } else if (mode == PETSC_DEVICE_COPY_DTOH) { 463 PetscCall(PetscLogGpuToCpu(n)); 464 } 465 } else { 466 // REVIEW ME: we might potentially need to sync here if the memory is device-allocated 467 // (pinned) but being copied by a host dctx 468 PetscCall(PetscDeviceCheckCapable_Private(dctx, mode == PETSC_DEVICE_COPY_HTOH, "copying")); 469 PetscCall(PetscMemcpy(dest, src, n)); 470 } 471 } 472 PetscFunctionReturn(0); 473 } 474 475 /*@C 476 PetscDeviceMemset - Memset device-aware memory 477 478 Not Collective, Asynchronous, Auto-dependency aware 479 480 Input Parameters: 481 + dctx - The `PetscDeviceContext` used to memset the memory 482 . ptr - The pointer to the memory 483 . v - The value to set 484 - n - The amount (in bytes) to set 485 486 Notes: 487 `ptr` must have been allocated by `PetscDeviceMalloc()` or `PetscDeviceCalloc()`. 488 489 The user should prefer `PetscDeviceArrayZero()` over this routine as it automatically 490 computes the number of bytes to copy from the size of the pointer types, though they should 491 note that it only zeros memory. 492 493 This routine is analogous to `memset()`. That is, this routine copies the value 494 `static_cast<unsigned char>(v)` into each of the first count characters of the object pointed 495 to by `dest`. 496 497 If `dest` is on device, this routine is asynchronous. 498 499 DAG representation: 500 .vb 501 time -> 502 503 -> dctx - |= CALL =| - dctx -> 504 -> dest ---------------------> 505 .ve 506 507 Level: intermediate 508 509 .N ASYNC_API 510 511 .seealso: `PetscDeviceArrayZero()`, `PetscDeviceMalloc()`, `PetscDeviceCalloc()`, 512 `PetscDeviceFree()` 513 @*/ 514 PetscErrorCode PetscDeviceMemset(PetscDeviceContext dctx, void *ptr, PetscInt v, std::size_t n) 515 { 516 PetscFunctionBegin; 517 if (PetscUnlikely(!n)) PetscFunctionReturn(0); 518 PetscCheck(ptr, PETSC_COMM_SELF, PETSC_ERR_POINTER, "Trying to memset a NULL pointer"); 519 PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx)); 520 { 521 const auto ptr_it = memory_map.search_for(ptr, true); 522 const auto mtype = ptr_it->second.mtype; 523 524 PetscCall(PetscDeviceContextMarkIntentFromID(dctx, ptr_it->second.id, PETSC_MEMORY_ACCESS_WRITE, "memory set")); 525 if (dctx->ops->memset) { 526 PetscUseTypeMethod(dctx, memset, mtype, ptr, v, n); 527 } else { 528 // REVIEW ME: we might potentially need to sync here if the memory is device-allocated 529 // (pinned) but being memset by a host dctx 530 PetscCall(PetscDeviceCheckCapable_Private(dctx, PetscMemTypeHost(mtype), "memsetting")); 531 std::memset(ptr, static_cast<int>(v), n); 532 } 533 } 534 PetscFunctionReturn(0); 535 } 536