xref: /petsc/src/sys/objects/device/interface/memory.cxx (revision dce8aeba1c9b69b19f651c53d8a6b674bd7e9cbd)
1 #include <petsc/private/deviceimpl.h> /*I <petscdevice.h> I*/
2 
3 #include <petsc/private/cpp/register_finalize.hpp>
4 #include <petsc/private/cpp/type_traits.hpp> // integral_value
5 
6 #include <unordered_map>
7 #include <algorithm> // std::find_if
8 #include <cstring>   // std::memset
9 
10 const char *const PetscDeviceCopyModes[] = {"host_to_host", "device_to_host", "host_to_device", "device_to_device", "auto", "PetscDeviceCopyMode", "PETSC_DEVICE_COPY_", nullptr};
11 static_assert(Petsc::util::integral_value(PETSC_DEVICE_COPY_HTOH) == 0, "");
12 static_assert(Petsc::util::integral_value(PETSC_DEVICE_COPY_DTOH) == 1, "");
13 static_assert(Petsc::util::integral_value(PETSC_DEVICE_COPY_HTOD) == 2, "");
14 static_assert(Petsc::util::integral_value(PETSC_DEVICE_COPY_DTOD) == 3, "");
15 static_assert(Petsc::util::integral_value(PETSC_DEVICE_COPY_AUTO) == 4, "");
16 
17 // GCC implementation for std::hash<T*>. LLVM's libc++ is almost 2x slower because they do all
18 // kinds of complicated murmur hashing, so we make sure to enforce GCC's version.
19 struct PointerHash {
20   template <typename T>
21   PETSC_NODISCARD std::size_t operator()(const T *ptr) const noexcept
22   {
23     return reinterpret_cast<std::size_t>(ptr);
24   }
25 };
26 
27 // ==========================================================================================
28 // PointerAttributes
29 // ==========================================================================================
30 
31 struct PointerAttributes {
32   PetscMemType  mtype = PETSC_MEMTYPE_HOST; // memtype of allocation
33   PetscObjectId id    = 0;                  // id of allocation
34   std::size_t   size  = 0;                  // size of allocation (bytes)
35 
36   // even though this is a POD and can be aggregate initialized, the STL uses () constructors
37   // in unordered_map and so we need to provide a trivial contructor...
38   constexpr PointerAttributes(PetscMemType, PetscObjectId, std::size_t) noexcept;
39 
40   bool operator==(const PointerAttributes &) const noexcept;
41 
42   PETSC_NODISCARD bool contains(const void *, const void *) const noexcept;
43 };
44 
45 // ==========================================================================================
46 // PointerAttributes - Public API
47 // ==========================================================================================
48 
49 inline constexpr PointerAttributes::PointerAttributes(PetscMemType mtype_, PetscObjectId id_, std::size_t size_) noexcept : mtype(mtype_), id(id_), size(size_) { }
50 
51 inline bool PointerAttributes::operator==(const PointerAttributes &other) const noexcept
52 {
53   return (mtype == other.mtype) && (id == other.id) && (size == other.size);
54 }
55 
56 /*
57   PointerAttributes::contains - asks and answers the question, does ptr_begin contain ptr
58 
59   Input Parameters:
60 + ptr_begin - pointer to the start of the range to check
61 - ptr       - the pointer to query
62 
63   Notes:
64   Returns true if ptr falls within ptr_begins range, false otherwise.
65 */
66 inline bool PointerAttributes::contains(const void *ptr_begin, const void *ptr) const noexcept
67 {
68   return (ptr >= ptr_begin) && (ptr < (static_cast<const char *>(ptr_begin) + size));
69 }
70 
71 // ==========================================================================================
72 // MemoryMap
73 //
74 // Since the pointers allocated via PetscDeviceAllocate_Private() may be device pointers we
75 // cannot just store meta-data within the pointer itself (as we can't dereference them). So
76 // instead we need to keep an extra map to keep track of them
77 //
78 // Each entry maps pointer -> {
79 //   PetscMemType  - The memtype of the pointer
80 //   PetscObjectId - A unique ID assigned at allocation or registratrion so auto-dep can
81 //                   identify the pointer
82 //   size          - The size (in bytes) of the allocation
83 // }
84 // ==========================================================================================
85 
86 class MemoryMap : public Petsc::RegisterFinalizeable<MemoryMap> {
87 public:
88   using map_type = std::unordered_map<void *, PointerAttributes, PointerHash>;
89 
90   map_type map{};
91 
92   PETSC_NODISCARD map_type::const_iterator search_for(const void *, bool = false) const noexcept;
93 
94 private:
95   friend class Petsc::RegisterFinalizeable<MemoryMap>;
96   PETSC_NODISCARD PetscErrorCode register_finalize_() noexcept;
97   PETSC_NODISCARD PetscErrorCode finalize_() noexcept;
98 };
99 
100 // ==========================================================================================
101 // MemoryMap - Private API
102 // ==========================================================================================
103 
104 PetscErrorCode MemoryMap::register_finalize_() noexcept
105 {
106   PetscFunctionBegin;
107   // Preallocate, this does give a modest performance bump since unordered_map is so __dog__
108   // slow if it needs to rehash. Experiments show that users tend not to have more than 5 or
109   // so concurrently live pointers lying around. 10 at most.
110   PetscCallCXX(map.reserve(16));
111   PetscFunctionReturn(0);
112 }
113 
114 PetscErrorCode MemoryMap::finalize_() noexcept
115 {
116   PetscFunctionBegin;
117   PetscCall(PetscInfo(nullptr, "Finalizing memory map\n"));
118   PetscCallCXX(map = map_type{});
119   PetscFunctionReturn(0);
120 }
121 
122 // ==========================================================================================
123 // MemoryMap - Public API
124 // ==========================================================================================
125 
126 /*
127   MemoryMap::search_for - retrieve an iterator to the key-value pair for a pointer in the map
128 
129   Input Parameters:
130 + ptr       - pointer to search for
131 - must_find - true if an error is raised if the pointer is not found (default: false)
132 
133   Notes:
134   Accounts for sub-regions, i.e. if ptr is contained within another pointers region, it returns
135   the iterator to the super-pointers key-value pair.
136 
137   If ptr is not found and must_find is false returns map.end(), otherwise raises an error
138 */
139 MemoryMap::map_type::const_iterator MemoryMap::search_for(const void *ptr, bool must_find) const noexcept
140 {
141   const auto end = map.end();
142   auto       it  = map.find(const_cast<map_type::key_type>(ptr));
143 
144   // ptr was found, and points to an entire block
145   PetscFunctionBegin;
146   if (it != end) PetscFunctionReturn(it);
147   // wasn't found, but maybe its part of a block. have to search every block for it
148   // clang-format off
149   it = std::find_if(map.begin(), end, [ptr](const map_type::const_iterator::value_type &map_it) {
150     return map_it.second.contains(map_it.first, ptr);
151   });
152   PetscCheckAbort(!must_find || it != end, PETSC_COMM_SELF, PETSC_ERR_POINTER, "Pointer %p was not registered with the memory tracker, call PetscDeviceRegisterMemory() on it", ptr);
153   PetscFunctionReturn(it);
154   // clang-format on
155 }
156 
157 static MemoryMap memory_map;
158 
159 // ==========================================================================================
160 // Utility functions
161 // ==========================================================================================
162 
163 static PetscErrorCode PetscDeviceCheckCapable_Private(PetscDeviceContext dctx, bool cond, const char descr[])
164 {
165   PetscFunctionBegin;
166   PetscCheck(cond, PETSC_COMM_SELF, PETSC_ERR_SUP, "Device context (id: %" PetscInt64_FMT ", name: %s, type: %s) can only handle %s host memory", PetscObjectCast(dctx)->id, PetscObjectCast(dctx)->name, dctx->device ? PetscDeviceTypes[dctx->device->type] : "unknown", descr);
167   PetscFunctionReturn(0);
168 }
169 
170 // A helper utility, since register is called from PetscDeviceRegisterMemory() and
171 // PetscDevicAllocate(). The latter also needs the generated id, so instead of making it search
172 // the map again we just return it here
173 static PetscErrorCode PetscDeviceRegisterMemory_Private(const void *PETSC_RESTRICT ptr, PetscMemType mtype, std::size_t size, PetscObjectId *PETSC_RESTRICT id = nullptr)
174 {
175   auto      &map = memory_map.map;
176   const auto it  = memory_map.search_for(ptr);
177 
178   PetscFunctionBegin;
179   if (it == map.cend()) {
180     // pointer was never registered with the map, insert it and bail
181     const auto newid = PetscObjectNewId_Internal();
182 
183     if (PetscDefined(USE_DEBUG)) {
184       const auto tmp = PointerAttributes(mtype, newid, size);
185 
186       for (const auto &entry : map) {
187         // REVIEW ME: maybe this should just be handled...
188         PetscCheck(!tmp.contains(ptr, entry.first), PETSC_COMM_SELF, PETSC_ERR_ORDER, "Trying to register pointer %p (memtype %s, size %zu) but it appears you have already registered a sub-region of it (pointer %p, memtype %s, size %zu). Must register the larger region first", ptr, PetscMemTypeToString(mtype), size,
189                    entry.first, PetscMemTypeToString(entry.second.mtype), entry.second.size);
190       }
191     }
192     // clang-format off
193     if (id) *id = newid;
194     PetscCallCXX(map.emplace(
195       std::piecewise_construct,
196       std::forward_as_tuple(const_cast<MemoryMap::map_type::key_type>(ptr)),
197       std::forward_as_tuple(mtype, newid, size)
198     ));
199     // clang-format on
200     PetscFunctionReturn(0);
201   }
202   if (PetscDefined(USE_DEBUG)) {
203     const auto &old = it->second;
204 
205     PetscCheck(PointerAttributes(mtype, old.id, size) == old, PETSC_COMM_SELF, PETSC_ERR_LIB, "Pointer %p appears to have been previously allocated with memtype %s, size %zu and assigned id %" PetscInt64_FMT ", which does not match new values: (mtype %s, size %zu, id %" PetscInt64_FMT ")", it->first,
206                PetscMemTypeToString(old.mtype), old.size, old.id, PetscMemTypeToString(mtype), size, old.id);
207   }
208   if (id) *id = it->second.id;
209   PetscFunctionReturn(0);
210 }
211 
212 /*@C
213   PetscDeviceRegisterMemory - Register a pointer for use with device-aware memory system
214 
215   Not Collective
216 
217   Input Parameters:
218 + ptr   - The pointer to register
219 . mtype - The `PetscMemType` of the pointer
220 - size  - The size (in bytes) of the memory region
221 
222   Notes:
223   `ptr` need not point to the beginning of the memory range, however the user should register
224   the
225 
226   It's OK to re-register the same `ptr` repeatedly (subsequent registrations do nothing)
227   however the given `mtype` and `size` must match the original registration.
228 
229   `size` may be 0 (in which case this routine does nothing).
230 
231   Level: intermediate
232 
233 .seealso: `PetscDeviceMalloc()`, `PetscDeviceArrayCopy()`, `PetscDeviceFree()`,
234 `PetscDeviceArrayZero()`
235 @*/
236 PetscErrorCode PetscDeviceRegisterMemory(const void *PETSC_RESTRICT ptr, PetscMemType mtype, std::size_t size)
237 {
238   PetscFunctionBegin;
239   if (PetscMemTypeHost(mtype)) PetscValidPointer(ptr, 1);
240   if (PetscUnlikely(!size)) PetscFunctionReturn(0); // there is no point registering empty range
241   PetscCall(PetscDeviceRegisterMemory_Private(ptr, mtype, size));
242   PetscFunctionReturn(0);
243 }
244 
245 /*
246   PetscDeviceAllocate_Private - Allocate device-aware memory
247 
248   Not Collective, Asynchronous, Auto-dependency aware
249 
250   Input Parameters:
251 + dctx      - The `PetscDeviceContext` used to allocate the memory
252 . clear     - Whether or not the memory should be zeroed
253 . mtype     - The type of memory to allocate
254 . n         - The amount (in bytes) to allocate
255 - alignment - The alignment requirement (in bytes) of the allocated pointer
256 
257   Output Parameter:
258 . ptr - The pointer to store the result in
259 
260   Notes:
261   The user should prefer `PetscDeviceMalloc()` over this routine as it automatically computes
262   the size of the allocation and alignment based on the size of the datatype.
263 
264   If the user is unsure about `alignment` -- or unable to compute it -- passing
265   `PETSC_MEMALIGN` will always work, though the user should beware that this may be quite
266   wasteful for very small allocations.
267 
268   Memory allocated with this function must be freed with `PetscDeviceFree()` (or
269   `PetscDeviceDeallocate_Private()`).
270 
271   If `n` is zero, then `ptr` is set to `PETSC_NULLPTR`.
272 
273   This routine falls back to using `PetscMalloc1()` or `PetscCalloc1()` (depending on the value
274   of `clear`) if PETSc was not configured with device support. The user should note that
275   `mtype` and `alignment` are ignored in this case, as these routines allocate only host memory
276   aligned to `PETSC_MEMALIGN`.
277 
278   Note result stored `ptr` is immediately valid and the user may freely inspect or manipulate
279   its value on function return, i.e.\:
280 
281 .vb
282   PetscInt *ptr;
283 
284   PetscDeviceAllocate_Private(dctx, PETSC_FALSE, PETSC_MEMTYPE_DEVICE, 20, alignof(PetscInt), (void**)&ptr);
285 
286   PetscInt *sub_ptr = ptr + 10; // OK, no need to synchronize
287 
288   ptr[0] = 10; // ERROR, directly accessing contents of ptr is undefined until synchronization
289 .ve
290 
291   DAG representation:
292 .vb
293   time ->
294 
295   -> dctx - |= CALL =| -\- dctx -->
296                          \- ptr ->
297 .ve
298 
299   Level: intermediate
300 
301 .N ASYNC_API
302 
303 .seealso: `PetscDeviceMalloc()`, `PetscDeviceFree()`, `PetscDeviceDeallocate_Private()`,
304 `PetscDeviceArrayCopy()`, `PetscDeviceArrayZero()`, `PetscMemType`
305 */
306 PetscErrorCode PetscDeviceAllocate_Private(PetscDeviceContext dctx, PetscBool clear, PetscMemType mtype, std::size_t n, std::size_t alignment, void **PETSC_RESTRICT ptr)
307 {
308   PetscObjectId id = 0;
309 
310   PetscFunctionBegin;
311   if (PetscDefined(USE_DEBUG)) {
312     const auto is_power_of_2 = [](std::size_t num) { return (num & (num - 1)) == 0; };
313 
314     PetscCheck(alignment != 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Requested alignment %zu cannot be 0", alignment);
315     PetscCheck(is_power_of_2(alignment), PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Requested alignment %zu must be a power of 2", alignment);
316   }
317   PetscValidPointer(ptr, 6);
318   *ptr = nullptr;
319   if (PetscUnlikely(!n)) PetscFunctionReturn(0);
320   PetscCall(memory_map.register_finalize());
321   PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx));
322 
323   // get our pointer here
324   if (dctx->ops->memalloc) {
325     PetscUseTypeMethod(dctx, memalloc, clear, mtype, n, alignment, ptr);
326   } else {
327     PetscCall(PetscDeviceCheckCapable_Private(dctx, PetscMemTypeHost(mtype), "allocating"));
328     PetscCall(PetscMallocA(1, clear, __LINE__, PETSC_FUNCTION_NAME, __FILE__, n, ptr));
329   }
330   PetscCall(PetscDeviceRegisterMemory_Private(*ptr, mtype, n, &id));
331   // Note this is a "write" so that the next dctx to try and read from the pointer has to wait
332   // for the allocation to be ready
333   PetscCall(PetscDeviceContextMarkIntentFromID(dctx, id, PETSC_MEMORY_ACCESS_WRITE, "memory allocation"));
334   PetscFunctionReturn(0);
335 }
336 
337 /*
338   PetscDeviceDeallocate_Private - Free device-aware memory
339 
340   Not Collective, Asynchronous, Auto-dependency aware
341 
342   Input Parameters:
343 + dctx  - The `PetscDeviceContext` used to free the memory
344 - ptr   - The pointer to free
345 
346   Notes:
347   `ptr` must have been allocated using any of `PetscDeviceMalloc()`, `PetscDeviceCalloc()` or
348   `PetscDeviceAllocate_Private()`, or registered with the system via `PetscDeviceRegisterMemory()`.
349 
350   The user should prefer `PetscDeviceFree()` over this routine as it automatically sets `ptr`
351   to `PETSC_NULLPTR` on successful deallocation.
352 
353   `ptr` may be `NULL`.
354 
355   This routine falls back to using `PetscFree()` if PETSc was not configured with device
356   support. The user should note that `PetscFree()` frees only host memory.
357 
358   DAG representation:
359 .vb
360   time ->
361 
362   -> dctx -/- |= CALL =| - dctx ->
363   -> ptr -/
364 .ve
365 
366   Level: intermediate
367 
368 .N ASYNC_API
369 
370 .seealso: `PetscDeviceFree()`, `PetscDeviceAllocate_Private()`
371 */
372 PetscErrorCode PetscDeviceDeallocate_Private(PetscDeviceContext dctx, void *PETSC_RESTRICT ptr)
373 {
374   PetscFunctionBegin;
375   if (ptr) {
376     auto      &map      = memory_map.map;
377     const auto found_it = map.find(const_cast<MemoryMap::map_type::key_type>(ptr));
378 
379     if (PetscUnlikelyDebug(found_it == map.end())) {
380       // OK this is a bad pointer, now determine why
381       const auto it = memory_map.search_for(ptr);
382 
383       // if it is map.cend() then no allocation owns it, meaning it was not allocated by us!
384       PetscCheck(it != map.cend(), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Pointer %p was not allocated via PetscDeviceAllocate_Private()", ptr);
385       // if we are here then we did allocate it but the user has tried to do something along
386       // the lines of:
387       //
388       // allocate(&ptr, size);
389       // deallocate(ptr+5);
390       //
391       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Attempting to deallocate pointer %p which is a suballocation of %p (memtype %s, id %" PetscInt64_FMT ", size %zu bytes)", ptr, it->first, PetscMemTypeToString(it->second.mtype), it->second.id,
392               it->second.size);
393     }
394 
395     PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx));
396     // mark intent BEFORE we free, note we mark as write so that we are made to wait on any
397     // outstanding reads (don't want to kill the pointer before they are done)
398     PetscCall(PetscDeviceContextMarkIntentFromID(dctx, found_it->second.id, PETSC_MEMORY_ACCESS_WRITE, "memory deallocation"));
399     // do free
400     if (dctx->ops->memfree) {
401       PetscUseTypeMethod(dctx, memfree, found_it->second.mtype, (void **)&ptr);
402     } else {
403       PetscCall(PetscDeviceCheckCapable_Private(dctx, PetscMemTypeHost(found_it->second.mtype), "freeing"));
404     }
405     // if ptr still exists, then the device context could not handle it
406     if (ptr) PetscCall(PetscFree(ptr));
407     PetscCallCXX(map.erase(found_it));
408   }
409   PetscFunctionReturn(0);
410 }
411 
412 /*@C
413   PetscDeviceMemcpy - Copy memory in a device-aware manner
414 
415   Not Collective, Asynchronous, Auto-dependency aware
416 
417   Input Parameters:
418 + dctx - The `PetscDeviceContext` used to copy the memory
419 . dest - The pointer to copy to
420 . src  - The pointer to copy from
421 - n    - The amount (in bytes) to copy
422 
423   Notes:
424   Both `dest` and `src` must have been allocated by `PetscDeviceMalloc()` or
425   `PetscDeviceCalloc()`.
426 
427   `src` and `dest` cannot overlap.
428 
429   If both `src` and `dest` are on the host this routine is fully synchronous.
430 
431   The user should prefer `PetscDeviceArrayCopy()` over this routine as it automatically
432   computes the number of bytes to copy from the size of the pointer types.
433 
434   DAG representation:
435 .vb
436   time ->
437 
438   -> dctx - |= CALL =| - dctx ->
439   -> dest --------------------->
440   -> src ---------------------->
441 .ve
442 
443   Level: intermediate
444 
445 .N ASYNC_API
446 
447 .seealso: `PetscDeviceArrayCopy()`, `PetscDeviceMalloc()`, `PetscDeviceCalloc()`,
448 `PetscDeviceFree()`
449 @*/
450 PetscErrorCode PetscDeviceMemcpy(PetscDeviceContext dctx, void *PETSC_RESTRICT dest, const void *PETSC_RESTRICT src, std::size_t n)
451 {
452   PetscFunctionBegin;
453   if (!n) PetscFunctionReturn(0);
454   PetscCheck(dest, PETSC_COMM_SELF, PETSC_ERR_POINTER, "Trying to copy to a NULL pointer");
455   PetscCheck(src, PETSC_COMM_SELF, PETSC_ERR_POINTER, "Trying to copy from a NULL pointer");
456   if (dest == src) PetscFunctionReturn(0);
457   PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx));
458   {
459     const auto dest_it = memory_map.search_for(dest, true);
460     const auto src_it  = memory_map.search_for(src, true);
461     const auto mode    = PetscMemTypeToDeviceCopyMode(dest_it->second.mtype, src_it->second.mtype);
462 
463     PetscCall(PetscDeviceContextMarkIntentFromID(dctx, src_it->second.id, PETSC_MEMORY_ACCESS_READ, "memory copy (src)"));
464     PetscCall(PetscDeviceContextMarkIntentFromID(dctx, dest_it->second.id, PETSC_MEMORY_ACCESS_WRITE, "memory copy (dest)"));
465     // perform the copy
466     if (dctx->ops->memcopy) {
467       PetscUseTypeMethod(dctx, memcopy, dest, src, n, mode);
468       if (mode == PETSC_DEVICE_COPY_HTOD) {
469         PetscCall(PetscLogCpuToGpu(n));
470       } else if (mode == PETSC_DEVICE_COPY_DTOH) {
471         PetscCall(PetscLogGpuToCpu(n));
472       }
473     } else {
474       // REVIEW ME: we might potentially need to sync here if the memory is device-allocated
475       // (pinned) but being copied by a host dctx
476       PetscCall(PetscDeviceCheckCapable_Private(dctx, mode == PETSC_DEVICE_COPY_HTOH, "copying"));
477       PetscCall(PetscMemcpy(dest, src, n));
478     }
479   }
480   PetscFunctionReturn(0);
481 }
482 
483 /*@C
484   PetscDeviceMemset - Memset device-aware memory
485 
486   Not Collective, Asynchronous, Auto-dependency aware
487 
488   Input Parameters:
489 + dctx  - The `PetscDeviceContext` used to memset the memory
490 . ptr   - The pointer to the memory
491 . v     - The value to set
492 - n     - The amount (in bytes) to set
493 
494   Notes:
495   `ptr` must have been allocated by `PetscDeviceMalloc()` or `PetscDeviceCalloc()`.
496 
497   The user should prefer `PetscDeviceArrayZero()` over this routine as it automatically
498   computes the number of bytes to copy from the size of the pointer types, though they should
499   note that it only zeros memory.
500 
501   This routine is analogous to `memset()`. That is, this routine copies the value
502   `static_cast<unsigned char>(v)` into each of the first count characters of the object pointed
503   to by `dest`.
504 
505   If `dest` is on device, this routine is asynchronous.
506 
507   DAG representation:
508 .vb
509   time ->
510 
511   -> dctx - |= CALL =| - dctx ->
512   -> dest --------------------->
513 .ve
514 
515   Level: intermediate
516 
517 .N ASYNC_API
518 
519 .seealso: `PetscDeviceArrayZero()`, `PetscDeviceMalloc()`, `PetscDeviceCalloc()`,
520 `PetscDeviceFree()`
521 @*/
522 PetscErrorCode PetscDeviceMemset(PetscDeviceContext dctx, void *ptr, PetscInt v, std::size_t n)
523 {
524   PetscFunctionBegin;
525   if (PetscUnlikely(!n)) PetscFunctionReturn(0);
526   PetscCheck(ptr, PETSC_COMM_SELF, PETSC_ERR_POINTER, "Trying to memset a NULL pointer");
527   PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx));
528   {
529     const auto ptr_it = memory_map.search_for(ptr, true);
530     const auto mtype  = ptr_it->second.mtype;
531 
532     PetscCall(PetscDeviceContextMarkIntentFromID(dctx, ptr_it->second.id, PETSC_MEMORY_ACCESS_WRITE, "memory set"));
533     if (dctx->ops->memset) {
534       PetscUseTypeMethod(dctx, memset, mtype, ptr, v, n);
535     } else {
536       // REVIEW ME: we might potentially need to sync here if the memory is device-allocated
537       // (pinned) but being memset by a host dctx
538       PetscCall(PetscDeviceCheckCapable_Private(dctx, PetscMemTypeHost(mtype), "memsetting"));
539       std::memset(ptr, static_cast<int>(v), n);
540     }
541   }
542   PetscFunctionReturn(0);
543 }
544