xref: /petsc/src/sys/objects/device/interface/memory.cxx (revision 350f13859a6dc43e5dcf4542b37cd48c6b1803a2)
1 #include <petsc/private/deviceimpl.h> /*I <petscdevice.h> I*/
2 
3 #include <petsc/private/cpp/register_finalize.hpp>
4 #include <petsc/private/cpp/type_traits.hpp> // integral_value
5 #include <petsc/private/cpp/unordered_map.hpp>
6 
7 #include <algorithm> // std::find_if
8 #include <cstring>   // std::memset
9 
10 const char *const PetscDeviceCopyModes[] = {"host_to_host", "device_to_host", "host_to_device", "device_to_device", "auto", "PetscDeviceCopyMode", "PETSC_DEVICE_COPY_", nullptr};
11 static_assert(Petsc::util::to_underlying(PETSC_DEVICE_COPY_HTOH) == 0, "");
12 static_assert(Petsc::util::to_underlying(PETSC_DEVICE_COPY_DTOH) == 1, "");
13 static_assert(Petsc::util::to_underlying(PETSC_DEVICE_COPY_HTOD) == 2, "");
14 static_assert(Petsc::util::to_underlying(PETSC_DEVICE_COPY_DTOD) == 3, "");
15 static_assert(Petsc::util::to_underlying(PETSC_DEVICE_COPY_AUTO) == 4, "");
16 
17 // GCC implementation for std::hash<T*>. LLVM's libc++ is almost 2x slower because they do all
18 // kinds of complicated murmur hashing, so we make sure to enforce GCC's version.
19 struct PointerHash {
20   template <typename T>
21   PETSC_NODISCARD std::size_t operator()(const T *ptr) const noexcept
22   {
23     return reinterpret_cast<std::size_t>(ptr);
24   }
25 };
26 
27 // ==========================================================================================
28 // PointerAttributes
29 //
30 // A set of attributes for a pointer
31 // ==========================================================================================
32 
33 struct PointerAttributes {
34   PetscMemType  mtype = PETSC_MEMTYPE_HOST; // memtype of allocation
35   PetscObjectId id    = 0;                  // id of allocation
36   std::size_t   size  = 0;                  // size of allocation (bytes)
37 
38   // even though this is a POD and can be aggregate initialized, the STL uses () constructors
39   // in unordered_map and so we need to provide a trivial constructor...
40   constexpr PointerAttributes() = default;
41   constexpr PointerAttributes(PetscMemType, PetscObjectId, std::size_t) noexcept;
42 
43   bool operator==(const PointerAttributes &) const noexcept;
44 
45   PETSC_NODISCARD bool contains(const void *, const void *) const noexcept;
46 };
47 
48 // ==========================================================================================
49 // PointerAttributes - Public API
50 // ==========================================================================================
51 
52 inline constexpr PointerAttributes::PointerAttributes(PetscMemType mtype_, PetscObjectId id_, std::size_t size_) noexcept : mtype(mtype_), id(id_), size(size_) { }
53 
54 inline bool PointerAttributes::operator==(const PointerAttributes &other) const noexcept
55 {
56   return (mtype == other.mtype) && (id == other.id) && (size == other.size);
57 }
58 
59 /*
60   PointerAttributes::contains - asks and answers the question, does ptr_begin contain ptr
61 
62   Input Parameters:
63 + ptr_begin - pointer to the start of the range to check
64 - ptr       - the pointer to query
65 
66   Notes:
67   Returns true if ptr falls within ptr_begins range, false otherwise.
68 */
69 inline bool PointerAttributes::contains(const void *ptr_begin, const void *ptr) const noexcept
70 {
71   return (ptr >= ptr_begin) && (ptr < (static_cast<const char *>(ptr_begin) + size));
72 }
73 
74 // ==========================================================================================
75 // MemoryMap
76 //
77 // Since the pointers allocated via PetscDeviceAllocate_Private() may be device pointers we
78 // cannot just store meta-data within the pointer itself (as we can't dereference them). So
79 // instead we need to keep an extra map to keep track of them
80 //
81 // Each entry maps pointer -> {
82 //   PetscMemType  - The memtype of the pointer
83 //   PetscObjectId - A unique ID assigned at allocation or registration so auto-dep can
84 //                   identify the pointer
85 //   size          - The size (in bytes) of the allocation
86 // }
87 // ==========================================================================================
88 
89 class MemoryMap : public Petsc::RegisterFinalizeable<MemoryMap> {
90 public:
91   using map_type = Petsc::UnorderedMap<void *, PointerAttributes, PointerHash>;
92 
93   map_type map{};
94 
95   PETSC_NODISCARD map_type::const_iterator search_for(const void *, bool = false) const noexcept;
96 
97 private:
98   friend class Petsc::RegisterFinalizeable<MemoryMap>;
99   PetscErrorCode register_finalize_() noexcept;
100   PetscErrorCode finalize_() noexcept;
101 };
102 
103 // ==========================================================================================
104 // MemoryMap - Private API
105 // ==========================================================================================
106 
107 PetscErrorCode MemoryMap::register_finalize_() noexcept
108 {
109   PetscFunctionBegin;
110   // Preallocate, this does give a modest performance bump since unordered_map is so __dog__
111   // slow if it needs to rehash. Experiments show that users tend not to have more than 5 or
112   // so concurrently live pointers lying around. 10 at most.
113   PetscCall(map.reserve(16));
114   PetscFunctionReturn(PETSC_SUCCESS);
115 }
116 
117 PetscErrorCode MemoryMap::finalize_() noexcept
118 {
119   PetscFunctionBegin;
120   PetscCall(PetscInfo(nullptr, "Finalizing memory map\n"));
121   PetscCallCXX(map = map_type{});
122   PetscFunctionReturn(PETSC_SUCCESS);
123 }
124 
125 // ==========================================================================================
126 // MemoryMap - Public API
127 // ==========================================================================================
128 
129 /*
130   MemoryMap::search_for - retrieve an iterator to the key-value pair for a pointer in the map
131 
132   Input Parameters:
133 + ptr       - pointer to search for
134 - must_find - true if an error is raised if the pointer is not found (default: false)
135 
136   Notes:
137   Accounts for sub-regions, i.e. if ptr is contained within another pointers region, it returns
138   the iterator to the super-pointers key-value pair.
139 
140   If ptr is not found and must_find is false returns map.end(), otherwise raises an error
141 */
142 MemoryMap::map_type::const_iterator MemoryMap::search_for(const void *ptr, bool must_find) const noexcept
143 {
144   const auto end_it = map.end();
145   auto       it     = map.find(const_cast<map_type::key_type>(ptr));
146 
147   // ptr was found, and points to an entire block
148   PetscFunctionBegin;
149   if (it != end_it) PetscFunctionReturn(it);
150   // wasn't found, but maybe its part of a block. have to search every block for it
151   // clang-format off
152   it = std::find_if(map.begin(), end_it, [ptr](map_type::const_iterator::reference map_it) {
153     return map_it.second.contains(map_it.first, ptr);
154   });
155   // clang-format on
156   PetscCheckAbort(!must_find || it != end_it, PETSC_COMM_SELF, PETSC_ERR_POINTER, "Pointer %p was not registered with the memory tracker, call PetscDeviceRegisterMemory() on it", ptr);
157   PetscFunctionReturn(it);
158 }
159 
160 static MemoryMap memory_map;
161 
162 // ==========================================================================================
163 // Utility functions
164 // ==========================================================================================
165 
166 static PetscErrorCode PetscDeviceCheckCapable_Private(PetscDeviceContext dctx, bool cond, const char descr[])
167 {
168   PetscFunctionBegin;
169   PetscCheck(cond, PETSC_COMM_SELF, PETSC_ERR_SUP, "Device context (id: %" PetscInt64_FMT ", name: %s, type: %s) can only handle %s host memory", PetscObjectCast(dctx)->id, PetscObjectCast(dctx)->name, dctx->device ? PetscDeviceTypes[dctx->device->type] : "unknown", descr);
170   PetscFunctionReturn(PETSC_SUCCESS);
171 }
172 
173 // A helper utility, since register is called from PetscDeviceRegisterMemory() and
174 // PetscDevicAllocate(). The latter also needs the generated id, so instead of making it search
175 // the map again we just return it here
176 static PetscErrorCode PetscDeviceRegisterMemory_Private(const void *PETSC_RESTRICT ptr, PetscMemType mtype, std::size_t size, PetscObjectId *PETSC_RESTRICT id = nullptr)
177 {
178   auto      &map = memory_map.map;
179   const auto it  = memory_map.search_for(ptr);
180 
181   PetscFunctionBegin;
182   if (it == map.cend()) {
183     // pointer was never registered with the map, insert it and bail
184     const auto newid = PetscObjectNewId_Internal();
185 
186     if (PetscDefined(USE_DEBUG)) {
187       const auto tmp = PointerAttributes(mtype, newid, size);
188 
189       for (const auto &entry : map) {
190         auto &&attr = entry.second;
191 
192         // REVIEW ME: maybe this should just be handled...
193         PetscCheck(!tmp.contains(ptr, entry.first), PETSC_COMM_SELF, PETSC_ERR_ORDER, "Trying to register pointer %p (memtype %s, size %zu) but it appears you have already registered a sub-region of it (pointer %p, memtype %s, size %zu). Must register the larger region first", ptr, PetscMemTypeToString(mtype), size,
194                    entry.first, PetscMemTypeToString(attr.mtype), attr.size);
195       }
196     }
197     // clang-format off
198     if (id) *id = newid;
199     PetscCallCXX(map.emplace(
200       std::piecewise_construct,
201       std::forward_as_tuple(const_cast<MemoryMap::map_type::key_type>(ptr)),
202       std::forward_as_tuple(mtype, newid, size)
203     ));
204     // clang-format on
205     PetscFunctionReturn(PETSC_SUCCESS);
206   }
207   if (PetscDefined(USE_DEBUG)) {
208     const auto &old = it->second;
209 
210     PetscCheck(PointerAttributes(mtype, old.id, size) == old, PETSC_COMM_SELF, PETSC_ERR_LIB, "Pointer %p appears to have been previously allocated with memtype %s, size %zu and assigned id %" PetscInt64_FMT ", which does not match new values: (mtype %s, size %zu, id %" PetscInt64_FMT ")", it->first,
211                PetscMemTypeToString(old.mtype), old.size, old.id, PetscMemTypeToString(mtype), size, old.id);
212   }
213   if (id) *id = it->second.id;
214   PetscFunctionReturn(PETSC_SUCCESS);
215 }
216 
217 /*@C
218   PetscDeviceRegisterMemory - Register a pointer for use with device-aware memory system
219 
220   Not Collective
221 
222   Input Parameters:
223 + ptr   - The pointer to register
224 . mtype - The `PetscMemType` of the pointer
225 - size  - The size (in bytes) of the memory region
226 
227   Notes:
228   `ptr` need not point to the beginning of the memory range, however the user should register
229   the
230 
231   It's OK to re-register the same `ptr` repeatedly (subsequent registrations do nothing)
232   however the given `mtype` and `size` must match the original registration.
233 
234   `size` may be 0 (in which case this routine does nothing).
235 
236   Level: intermediate
237 
238 .seealso: `PetscDeviceMalloc()`, `PetscDeviceArrayCopy()`, `PetscDeviceFree()`,
239 `PetscDeviceArrayZero()`
240 @*/
241 PetscErrorCode PetscDeviceRegisterMemory(const void *PETSC_RESTRICT ptr, PetscMemType mtype, std::size_t size)
242 {
243   PetscFunctionBegin;
244   if (PetscMemTypeHost(mtype)) PetscValidPointer(ptr, 1);
245   if (PetscUnlikely(!size)) PetscFunctionReturn(PETSC_SUCCESS); // there is no point registering empty range
246   PetscCall(PetscDeviceRegisterMemory_Private(ptr, mtype, size));
247   PetscFunctionReturn(PETSC_SUCCESS);
248 }
249 
250 /*
251   PetscDeviceAllocate_Private - Allocate device-aware memory
252 
253   Not Collective, Asynchronous, Auto-dependency aware
254 
255   Input Parameters:
256 + dctx      - The `PetscDeviceContext` used to allocate the memory
257 . clear     - Whether or not the memory should be zeroed
258 . mtype     - The type of memory to allocate
259 . n         - The amount (in bytes) to allocate
260 - alignment - The alignment requirement (in bytes) of the allocated pointer
261 
262   Output Parameter:
263 . ptr - The pointer to store the result in
264 
265   Notes:
266   The user should prefer `PetscDeviceMalloc()` over this routine as it automatically computes
267   the size of the allocation and alignment based on the size of the datatype.
268 
269   If the user is unsure about `alignment` -- or unable to compute it -- passing
270   `PETSC_MEMALIGN` will always work, though the user should beware that this may be quite
271   wasteful for very small allocations.
272 
273   Memory allocated with this function must be freed with `PetscDeviceFree()` (or
274   `PetscDeviceDeallocate_Private()`).
275 
276   If `n` is zero, then `ptr` is set to `PETSC_NULLPTR`.
277 
278   This routine falls back to using `PetscMalloc1()` or `PetscCalloc1()` (depending on the value
279   of `clear`) if PETSc was not configured with device support. The user should note that
280   `mtype` and `alignment` are ignored in this case, as these routines allocate only host memory
281   aligned to `PETSC_MEMALIGN`.
282 
283   Note result stored `ptr` is immediately valid and the user may freely inspect or manipulate
284   its value on function return, i.e.\:
285 
286 .vb
287   PetscInt *ptr;
288 
289   PetscDeviceAllocate_Private(dctx, PETSC_FALSE, PETSC_MEMTYPE_DEVICE, 20, alignof(PetscInt), (void**)&ptr);
290 
291   PetscInt *sub_ptr = ptr + 10; // OK, no need to synchronize
292 
293   ptr[0] = 10; // ERROR, directly accessing contents of ptr is undefined until synchronization
294 .ve
295 
296   DAG representation:
297 .vb
298   time ->
299 
300   -> dctx - |= CALL =| -\- dctx -->
301                          \- ptr ->
302 .ve
303 
304   Level: intermediate
305 
306 .N ASYNC_API
307 
308 .seealso: `PetscDeviceMalloc()`, `PetscDeviceFree()`, `PetscDeviceDeallocate_Private()`,
309 `PetscDeviceArrayCopy()`, `PetscDeviceArrayZero()`, `PetscMemType`
310 */
311 PetscErrorCode PetscDeviceAllocate_Private(PetscDeviceContext dctx, PetscBool clear, PetscMemType mtype, std::size_t n, std::size_t alignment, void **PETSC_RESTRICT ptr)
312 {
313   PetscObjectId id = 0;
314 
315   PetscFunctionBegin;
316   if (PetscDefined(USE_DEBUG)) {
317     const auto is_power_of_2 = [](std::size_t num) { return (num & (num - 1)) == 0; };
318 
319     PetscCheck(alignment != 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Requested alignment %zu cannot be 0", alignment);
320     PetscCheck(is_power_of_2(alignment), PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Requested alignment %zu must be a power of 2", alignment);
321   }
322   PetscValidPointer(ptr, 6);
323   *ptr = nullptr;
324   if (PetscUnlikely(!n)) PetscFunctionReturn(PETSC_SUCCESS);
325   PetscCall(memory_map.register_finalize());
326   PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx));
327 
328   // get our pointer here
329   if (dctx->ops->memalloc) {
330     PetscUseTypeMethod(dctx, memalloc, clear, mtype, n, alignment, ptr);
331   } else {
332     PetscCall(PetscDeviceCheckCapable_Private(dctx, PetscMemTypeHost(mtype), "allocating"));
333     PetscCall(PetscMallocA(1, clear, __LINE__, PETSC_FUNCTION_NAME, __FILE__, n, ptr));
334   }
335   PetscCall(PetscDeviceRegisterMemory_Private(*ptr, mtype, n, &id));
336   // Note this is a "write" so that the next dctx to try and read from the pointer has to wait
337   // for the allocation to be ready
338   PetscCall(PetscDeviceContextMarkIntentFromID(dctx, id, PETSC_MEMORY_ACCESS_WRITE, "memory allocation"));
339   PetscFunctionReturn(PETSC_SUCCESS);
340 }
341 
342 /*
343   PetscDeviceDeallocate_Private - Free device-aware memory
344 
345   Not Collective, Asynchronous, Auto-dependency aware
346 
347   Input Parameters:
348 + dctx  - The `PetscDeviceContext` used to free the memory
349 - ptr   - The pointer to free
350 
351   Level: intermediate
352 
353   Notes:
354   `ptr` must have been allocated using any of `PetscDeviceMalloc()`, `PetscDeviceCalloc()` or
355   `PetscDeviceAllocate_Private()`, or registered with the system via `PetscDeviceRegisterMemory()`.
356 
357   The user should prefer `PetscDeviceFree()` over this routine as it automatically sets `ptr`
358   to `PETSC_NULLPTR` on successful deallocation.
359 
360   `ptr` may be `NULL`.
361 
362   This routine falls back to using `PetscFree()` if PETSc was not configured with device
363   support. The user should note that `PetscFree()` frees only host memory.
364 
365   DAG representation:
366 .vb
367   time ->
368 
369   -> dctx -/- |= CALL =| - dctx ->
370   -> ptr -/
371 .ve
372 
373 .N ASYNC_API
374 
375 .seealso: `PetscDeviceFree()`, `PetscDeviceAllocate_Private()`
376 */
377 PetscErrorCode PetscDeviceDeallocate_Private(PetscDeviceContext dctx, void *PETSC_RESTRICT ptr)
378 {
379   PetscFunctionBegin;
380   if (ptr) {
381     auto      &map      = memory_map.map;
382     const auto found_it = map.find(const_cast<MemoryMap::map_type::key_type>(ptr));
383 
384     if (PetscUnlikelyDebug(found_it == map.end())) {
385       // OK this is a bad pointer, now determine why
386       const auto it = memory_map.search_for(ptr);
387 
388       // if it is map.cend() then no allocation owns it, meaning it was not allocated by us!
389       PetscCheck(it != map.cend(), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Pointer %p was not allocated via PetscDeviceAllocate_Private()", ptr);
390       // if we are here then we did allocate it but the user has tried to do something along
391       // the lines of:
392       //
393       // allocate(&ptr, size);
394       // deallocate(ptr+5);
395       //
396       auto &&attr = it->second;
397       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Attempting to deallocate pointer %p which is a suballocation of %p (memtype %s, id %" PetscInt64_FMT ", size %zu bytes)", ptr, it->first, PetscMemTypeToString(attr.mtype), attr.id, attr.size);
398     }
399     auto &&attr = found_it->second;
400     PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx));
401     // mark intent BEFORE we free, note we mark as write so that we are made to wait on any
402     // outstanding reads (don't want to kill the pointer before they are done)
403     PetscCall(PetscDeviceContextMarkIntentFromID(dctx, attr.id, PETSC_MEMORY_ACCESS_WRITE, "memory deallocation"));
404     // do free
405     if (dctx->ops->memfree) {
406       PetscUseTypeMethod(dctx, memfree, attr.mtype, (void **)&ptr);
407     } else {
408       PetscCall(PetscDeviceCheckCapable_Private(dctx, PetscMemTypeHost(attr.mtype), "freeing"));
409     }
410     // if ptr still exists, then the device context could not handle it
411     if (ptr) PetscCall(PetscFree(ptr));
412     PetscCallCXX(map.erase(found_it));
413   }
414   PetscFunctionReturn(PETSC_SUCCESS);
415 }
416 
417 /*@C
418   PetscDeviceMemcpy - Copy memory in a device-aware manner
419 
420   Not Collective, Asynchronous, Auto-dependency aware
421 
422   Input Parameters:
423 + dctx - The `PetscDeviceContext` used to copy the memory
424 . dest - The pointer to copy to
425 . src  - The pointer to copy from
426 - n    - The amount (in bytes) to copy
427 
428   Level: intermediate
429 
430   Notes:
431   Both `dest` and `src` must have been allocated by `PetscDeviceMalloc()` or
432   `PetscDeviceCalloc()`.
433 
434   `src` and `dest` cannot overlap.
435 
436   If both `src` and `dest` are on the host this routine is fully synchronous.
437 
438   The user should prefer `PetscDeviceArrayCopy()` over this routine as it automatically
439   computes the number of bytes to copy from the size of the pointer types.
440 
441   DAG representation:
442 .vb
443   time ->
444 
445   -> dctx - |= CALL =| - dctx ->
446   -> dest --------------------->
447   -> src ---------------------->
448 .ve
449 
450 .N ASYNC_API
451 
452 .seealso: `PetscDeviceArrayCopy()`, `PetscDeviceMalloc()`, `PetscDeviceCalloc()`,
453 `PetscDeviceFree()`
454 @*/
455 PetscErrorCode PetscDeviceMemcpy(PetscDeviceContext dctx, void *PETSC_RESTRICT dest, const void *PETSC_RESTRICT src, std::size_t n)
456 {
457   PetscFunctionBegin;
458   if (!n) PetscFunctionReturn(PETSC_SUCCESS);
459   PetscCheck(dest, PETSC_COMM_SELF, PETSC_ERR_POINTER, "Trying to copy to a NULL pointer");
460   PetscCheck(src, PETSC_COMM_SELF, PETSC_ERR_POINTER, "Trying to copy from a NULL pointer");
461   if (dest == src) PetscFunctionReturn(PETSC_SUCCESS);
462   PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx));
463   {
464     const auto &dest_attr = memory_map.search_for(dest, true)->second;
465     const auto &src_attr  = memory_map.search_for(src, true)->second;
466     const auto  mode      = PetscMemTypeToDeviceCopyMode(dest_attr.mtype, src_attr.mtype);
467 
468     PetscCall(PetscDeviceContextMarkIntentFromID(dctx, src_attr.id, PETSC_MEMORY_ACCESS_READ, "memory copy (src)"));
469     PetscCall(PetscDeviceContextMarkIntentFromID(dctx, dest_attr.id, PETSC_MEMORY_ACCESS_WRITE, "memory copy (dest)"));
470     // perform the copy
471     if (dctx->ops->memcopy) {
472       PetscUseTypeMethod(dctx, memcopy, dest, src, n, mode);
473       if (mode == PETSC_DEVICE_COPY_HTOD) {
474         PetscCall(PetscLogCpuToGpu(n));
475       } else if (mode == PETSC_DEVICE_COPY_DTOH) {
476         PetscCall(PetscLogGpuToCpu(n));
477       }
478     } else {
479       // REVIEW ME: we might potentially need to sync here if the memory is device-allocated
480       // (pinned) but being copied by a host dctx
481       PetscCall(PetscDeviceCheckCapable_Private(dctx, mode == PETSC_DEVICE_COPY_HTOH, "copying"));
482       PetscCall(PetscMemcpy(dest, src, n));
483     }
484   }
485   PetscFunctionReturn(PETSC_SUCCESS);
486 }
487 
488 /*@C
489   PetscDeviceMemset - Memset device-aware memory
490 
491   Not Collective, Asynchronous, Auto-dependency aware
492 
493   Input Parameters:
494 + dctx  - The `PetscDeviceContext` used to memset the memory
495 . ptr   - The pointer to the memory
496 . v     - The value to set
497 - n     - The amount (in bytes) to set
498 
499   Level: intermediate
500 
501   Notes:
502   `ptr` must have been allocated by `PetscDeviceMalloc()` or `PetscDeviceCalloc()`.
503 
504   The user should prefer `PetscDeviceArrayZero()` over this routine as it automatically
505   computes the number of bytes to copy from the size of the pointer types, though they should
506   note that it only zeros memory.
507 
508   This routine is analogous to `memset()`. That is, this routine copies the value
509   `static_cast<unsigned char>(v)` into each of the first count characters of the object pointed
510   to by `dest`.
511 
512   If `dest` is on device, this routine is asynchronous.
513 
514   DAG representation:
515 .vb
516   time ->
517 
518   -> dctx - |= CALL =| - dctx ->
519   -> dest --------------------->
520 .ve
521 
522 .N ASYNC_API
523 
524 .seealso: `PetscDeviceArrayZero()`, `PetscDeviceMalloc()`, `PetscDeviceCalloc()`,
525 `PetscDeviceFree()`
526 @*/
527 PetscErrorCode PetscDeviceMemset(PetscDeviceContext dctx, void *ptr, PetscInt v, std::size_t n)
528 {
529   PetscFunctionBegin;
530   if (PetscUnlikely(!n)) PetscFunctionReturn(PETSC_SUCCESS);
531   PetscCheck(ptr, PETSC_COMM_SELF, PETSC_ERR_POINTER, "Trying to memset a NULL pointer");
532   PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx));
533   {
534     const auto &attr = memory_map.search_for(ptr, true)->second;
535 
536     PetscCall(PetscDeviceContextMarkIntentFromID(dctx, attr.id, PETSC_MEMORY_ACCESS_WRITE, "memory set"));
537     if (dctx->ops->memset) {
538       PetscUseTypeMethod(dctx, memset, attr.mtype, ptr, v, n);
539     } else {
540       // REVIEW ME: we might potentially need to sync here if the memory is device-allocated
541       // (pinned) but being memset by a host dctx
542       PetscCall(PetscDeviceCheckCapable_Private(dctx, PetscMemTypeHost(attr.mtype), "memsetting"));
543       std::memset(ptr, static_cast<int>(v), n);
544     }
545   }
546   PetscFunctionReturn(PETSC_SUCCESS);
547 }
548