xref: /petsc/src/sys/objects/device/interface/memory.cxx (revision 862e4a309d45a165aaa4da0d704ba733429d833a)
1 #include <petsc/private/deviceimpl.h> /*I <petscdevice.h> I*/
2 
3 #include <petsc/private/cpp/register_finalize.hpp>
4 #include <petsc/private/cpp/type_traits.hpp> // integral_value
5 
6 #include <unordered_map>
7 #include <algorithm> // std::find_if
8 #include <cstring>   // std::memset
9 
10 const char *const PetscDeviceCopyModes[] = {"host_to_host", "device_to_host", "host_to_device", "device_to_device", "auto", "PetscDeviceCopyMode", "PETSC_DEVICE_COPY_", nullptr};
11 static_assert(Petsc::util::integral_value(PETSC_DEVICE_COPY_HTOH) == 0, "");
12 static_assert(Petsc::util::integral_value(PETSC_DEVICE_COPY_DTOH) == 1, "");
13 static_assert(Petsc::util::integral_value(PETSC_DEVICE_COPY_HTOD) == 2, "");
14 static_assert(Petsc::util::integral_value(PETSC_DEVICE_COPY_DTOD) == 3, "");
15 static_assert(Petsc::util::integral_value(PETSC_DEVICE_COPY_AUTO) == 4, "");
16 
17 // ==========================================================================================
18 // MemoryMap
19 //
20 // Since the pointers allocated via PetscDeviceAllocate_Private() may be device pointers we
21 // cannot just store meta-data within the pointer itself (as we can't dereference them). So
22 // instead we need to keep an extra map to keep track of them
23 //
24 // Each entry maps pointer -> {
25 //   PetscMemType  - The memtype of the pointer
26 //   PetscObjectId - A unique ID assigned at allocation or registratrion so auto-dep can
27 //                   identify the pointer
28 //   size          - The size (in bytes) of the allocation
29 // }
30 // ==========================================================================================
31 
32 // GCC implementation for std::hash<T*>. LLVM's libc++ is almost 2x slower because they do all
33 // kinds of complicated murmur hashing, so we make sure to enforce GCC's version.
34 struct PointerHash {
35   template <typename T>
36   PETSC_NODISCARD std::size_t operator()(const T *ptr) const noexcept
37   {
38     return reinterpret_cast<std::size_t>(ptr);
39   }
40 };
41 
42 class MemoryMap : public Petsc::RegisterFinalizeable<MemoryMap> {
43 public:
44   struct PointerAttributes {
45     PetscMemType  mtype{}; // memtype of allocation
46     PetscObjectId id{};    // id of allocation
47     std::size_t   size{};  // size of allocation (bytes)
48 
49     // even though this is a POD and can be aggregate initialized, the STL uses () constructors
50     // in unordered_map and so we need to provide a trivial contructor...
51     constexpr PointerAttributes(PetscMemType, PetscObjectId, std::size_t) noexcept;
52     constexpr PointerAttributes() noexcept                                              = default;
53     constexpr PointerAttributes(const PointerAttributes &) noexcept                     = default;
54     PETSC_CONSTEXPR_14 PointerAttributes &operator=(const PointerAttributes &) noexcept = default;
55     constexpr PointerAttributes(PointerAttributes &&) noexcept                          = default;
56     PETSC_CONSTEXPR_14 PointerAttributes &operator=(PointerAttributes &&) noexcept      = default;
57 
58     bool operator==(const PointerAttributes &) const noexcept;
59 
60     PETSC_NODISCARD bool contains(const void *, const void *) const noexcept;
61   };
62 
63   using map_type = std::unordered_map<void *, PointerAttributes, PointerHash>;
64 
65   map_type map;
66 
67   // return the iterator of the allocation containing ptr, or map.cend() if not found
68   PETSC_NODISCARD map_type::const_iterator search_for(const void *, bool = false) const noexcept;
69 
70 private:
71   friend class Petsc::RegisterFinalizeable<MemoryMap>;
72   PETSC_NODISCARD PetscErrorCode register_finalize_() noexcept;
73   PETSC_NODISCARD PetscErrorCode finalize_() noexcept;
74 };
75 
76 // ==========================================================================================
77 // PointerAttributes
78 // ==========================================================================================
79 
80 constexpr MemoryMap::PointerAttributes::PointerAttributes(PetscMemType mtype_, PetscObjectId id_, std::size_t size_) noexcept : mtype(mtype_), id(id_), size(size_) { }
81 
82 bool MemoryMap::PointerAttributes::operator==(const PointerAttributes &other) const noexcept
83 {
84   return mtype == other.mtype && id == other.id && size == other.size;
85 }
86 
87 bool MemoryMap::PointerAttributes::contains(const void *ptr_begin, const void *ptr) const noexcept
88 {
89   return (ptr >= ptr_begin) && (ptr < (static_cast<const char *>(ptr_begin) + size));
90 }
91 
92 // ==========================================================================================
93 // Memory map - Private API
94 // ==========================================================================================
95 
96 PetscErrorCode MemoryMap::register_finalize_() noexcept
97 {
98   PetscFunctionBegin;
99   // Preallocate, this does give a modest performance bump since unordered_map is so __dog__
100   // slow if it needs to rehash. Experiments show that users tend not to have more than 5 or
101   // so concurrently live pointers lying around. 10 at most.
102   PetscCallCXX(map.reserve(16));
103   PetscFunctionReturn(0);
104 }
105 
106 PetscErrorCode MemoryMap::finalize_() noexcept
107 {
108   PetscFunctionBegin;
109   PetscCall(PetscInfo(nullptr, "Finalizing memory map\n"));
110   PetscCallCXX(map = map_type{});
111   PetscFunctionReturn(0);
112 }
113 
114 // ==========================================================================================
115 // Memory map - Public API
116 // ==========================================================================================
117 
118 /*
119   MemoryMap::search_for - retrieve an iterator to the key-value pair for a pointer in the map
120 
121   Input Parameters:
122 + ptr       - pointer to search for
123 - must_find - true if an error is raised if the pointer is not found (default: false)
124 
125   Notes:
126   Accounts for sub-regions, i.e. if ptr is contained within another pointers region, it returns
127   the iterator to the super-pointers key-value pair.
128 
129   If ptr is not found and must_find is false returns map.end(), otherwise raises an error
130 */
131 MemoryMap::map_type::const_iterator MemoryMap::search_for(const void *ptr, bool must_find) const noexcept
132 {
133   const auto end = map.end();
134   auto       it  = map.find(const_cast<map_type::key_type>(ptr));
135 
136   // ptr was found, and points to an entire block
137   PetscFunctionBegin;
138   if (it != end) PetscFunctionReturn(it);
139   // wasn't found, but maybe its part of a block. have to search every block for it
140   // clang-format off
141   it = std::find_if(map.begin(), end, [ptr](const map_type::const_iterator::value_type &map_it) {
142     return map_it.second.contains(map_it.first, ptr);
143   });
144   PetscCheckAbort(!must_find || it != end, PETSC_COMM_SELF, PETSC_ERR_POINTER, "Pointer %p was not registered with the memory tracker, call PetscDeviceRegisterMemory() on it", ptr);
145   PetscFunctionReturn(it);
146   // clang-format on
147 }
148 
149 static MemoryMap memory_map;
150 
151 // ==========================================================================================
152 // Utility functions
153 // ==========================================================================================
154 
155 static PetscErrorCode PetscDeviceCheckCapable_Private(PetscDeviceContext dctx, bool cond, const char descr[])
156 {
157   PetscFunctionBegin;
158   PetscCheck(cond, PETSC_COMM_SELF, PETSC_ERR_SUP, "Device context (id: %" PetscInt64_FMT ", name: %s, type: %s) can only handle %s host memory", PetscObjectCast(dctx)->id, PetscObjectCast(dctx)->name, dctx->device ? PetscDeviceTypes[dctx->device->type] : "unknown", descr);
159   PetscFunctionReturn(0);
160 }
161 
162 // A helper utility, since register is called from PetscDeviceRegisterMemory() and
163 // PetscDevicAllocate(). The latter also needs the generated id, so instead of making it search
164 // the map again we just return it here
165 static PetscErrorCode PetscDeviceRegisterMemory_Private(const void *PETSC_RESTRICT ptr, PetscMemType mtype, std::size_t size, PetscObjectId *PETSC_RESTRICT id = nullptr)
166 {
167   auto      &map = memory_map.map;
168   const auto it  = memory_map.search_for(ptr);
169 
170   PetscFunctionBegin;
171   if (it == map.cend()) {
172     // pointer was never registered with the map, insert it and bail
173     const auto newid = PetscObjectNewId_Internal();
174 
175     if (PetscDefined(USE_DEBUG)) {
176       const auto tmp = MemoryMap::PointerAttributes(mtype, newid, size);
177 
178       for (const auto &entry : map) {
179         // REVIEW ME: maybe this should just be handled...
180         PetscCheck(!tmp.contains(ptr, entry.first), PETSC_COMM_SELF, PETSC_ERR_ORDER, "Trying to register pointer %p (memtype %s, size %zu) but it appears you have already registered a sub-region of it (pointer %p, memtype %s, size %zu). Must register the larger region first", ptr, PetscMemTypeToString(mtype), size,
181                    entry.first, PetscMemTypeToString(entry.second.mtype), entry.second.size);
182       }
183     }
184     // clang-format off
185     if (id) *id = newid;
186     PetscCallCXX(map.emplace(
187       std::piecewise_construct,
188       std::forward_as_tuple(const_cast<MemoryMap::map_type::key_type>(ptr)),
189       std::forward_as_tuple(mtype, newid, size)
190     ));
191     // clang-format on
192     PetscFunctionReturn(0);
193   }
194   if (PetscDefined(USE_DEBUG)) {
195     const auto &old = it->second;
196 
197     PetscCheck(MemoryMap::PointerAttributes(mtype, old.id, size) == old, PETSC_COMM_SELF, PETSC_ERR_LIB, "Pointer %p appears to have been previously allocated with memtype %s, size %zu and assigned id %" PetscInt64_FMT ", which does not match new values: (mtype %s, size %zu, id %" PetscInt64_FMT ")", it->first,
198                PetscMemTypeToString(old.mtype), old.size, old.id, PetscMemTypeToString(mtype), size, old.id);
199   }
200   if (id) *id = it->second.id;
201   PetscFunctionReturn(0);
202 }
203 
204 /*@C
205   PetscDeviceRegisterMemory - Register a pointer for use with device-aware memory system
206 
207   Not Collective
208 
209   Input Parameters:
210 + ptr   - The pointer to register
211 . mtype - The `PetscMemType` of the pointer
212 - size  - The size (in bytes) of the memory region
213 
214   Notes:
215   `ptr` need not point to the beginning of the memory range, however the user should register
216   the
217 
218   It's OK to re-register the same `ptr` repeatedly (subsequent registrations do nothing)
219   however the given `mtype` and `size` must match the original registration.
220 
221   `size` may be 0 (in which case this routine does nothing).
222 
223   Level: intermediate
224 
225 .seealso: `PetscDeviceMalloc()`, `PetscDeviceArrayCopy()`, `PetscDeviceFree()`,
226 `PetscDeviceArrayZero()`
227 @*/
228 PetscErrorCode PetscDeviceRegisterMemory(const void *PETSC_RESTRICT ptr, PetscMemType mtype, std::size_t size)
229 {
230   PetscFunctionBegin;
231   if (PetscMemTypeHost(mtype)) PetscValidPointer(ptr, 1);
232   if (PetscUnlikely(!size)) PetscFunctionReturn(0); // there is no point registering empty range
233   PetscCall(PetscDeviceRegisterMemory_Private(ptr, mtype, size));
234   PetscFunctionReturn(0);
235 }
236 
237 /*
238   PetscDeviceAllocate_Private - Allocate device-aware memory
239 
240   Not Collective, Asynchronous, Auto-dependency aware
241 
242   Input Parameters:
243 + dctx      - The `PetscDeviceContext` used to allocate the memory
244 . clear     - Whether or not the memory should be zeroed
245 . mtype     - The type of memory to allocate
246 . n         - The amount (in bytes) to allocate
247 - alignment - The alignment requirement (in bytes) of the allocated pointer
248 
249   Output Parameter:
250 . ptr - The pointer to store the result in
251 
252   Notes:
253   The user should prefer `PetscDeviceMalloc()` over this routine as it automatically computes
254   the size of the allocation and alignment based on the size of the datatype.
255 
256   If the user is unsure about `alignment` -- or unable to compute it -- passing
257   `PETSC_MEMALIGN` will always work, though the user should beware that this may be quite
258   wasteful for very small allocations.
259 
260   Memory allocated with this function must be freed with `PetscDeviceFree()` (or
261   `PetscDeviceDeallocate_Private()`).
262 
263   If `n` is zero, then `ptr` is set to `PETSC_NULLPTR`.
264 
265   This routine falls back to using `PetscMalloc1()` or `PetscCalloc1()` (depending on the value
266   of `clear`) if PETSc was not configured with device support. The user should note that
267   `mtype` and `alignment` are ignored in this case, as these routines allocate only host memory
268   aligned to `PETSC_MEMALIGN`.
269 
270   Note result stored `ptr` is immediately valid and the user may freely inspect or manipulate
271   its value on function return, i.e.\:
272 
273 .vb
274   PetscInt *ptr;
275 
276   PetscDeviceAllocate_Private(dctx, PETSC_FALSE, PETSC_MEMTYPE_DEVICE, 20, alignof(PetscInt), (void**)&ptr);
277 
278   PetscInt *sub_ptr = ptr + 10; // OK, no need to synchronize
279 
280   ptr[0] = 10; // ERROR, directly accessing contents of ptr is undefined until synchronization
281 .ve
282 
283   DAG representation:
284 .vb
285   time ->
286 
287   -> dctx - |= CALL =| -\- dctx -->
288                          \- ptr ->
289 .ve
290 
291   Level: intermediate
292 
293 .N ASYNC_API
294 
295 .seealso: `PetscDeviceMalloc()`, `PetscDeviceFree()`, `PetscDeviceDeallocate_Private()`,
296 `PetscDeviceArrayCopy()`, `PetscDeviceArrayZero()`, `PetscMemType`
297 */
298 PetscErrorCode PetscDeviceAllocate_Private(PetscDeviceContext dctx, PetscBool clear, PetscMemType mtype, std::size_t n, std::size_t alignment, void **PETSC_RESTRICT ptr)
299 {
300   PetscObjectId id = 0;
301 
302   PetscFunctionBegin;
303   if (PetscDefined(USE_DEBUG)) {
304     const auto is_power_of_2 = [](std::size_t num) { return (num & (num - 1)) == 0; };
305 
306     PetscCheck(alignment != 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Requested alignment %zu cannot be 0", alignment);
307     PetscCheck(is_power_of_2(alignment), PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Requested alignment %zu must be a power of 2", alignment);
308   }
309   PetscValidPointer(ptr, 6);
310   *ptr = nullptr;
311   if (PetscUnlikely(!n)) PetscFunctionReturn(0);
312   PetscCall(memory_map.register_finalize());
313   PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx));
314 
315   // get our pointer here
316   if (dctx->ops->memalloc) {
317     PetscUseTypeMethod(dctx, memalloc, clear, mtype, n, alignment, ptr);
318   } else {
319     PetscCall(PetscDeviceCheckCapable_Private(dctx, PetscMemTypeHost(mtype), "allocating"));
320     PetscCall(PetscMallocA(1, clear, __LINE__, PETSC_FUNCTION_NAME, __FILE__, n, ptr));
321   }
322   PetscCall(PetscDeviceRegisterMemory_Private(*ptr, mtype, n, &id));
323   // Note this is a "write" so that the next dctx to try and read from the pointer has to wait
324   // for the allocation to be ready
325   PetscCall(PetscDeviceContextMarkIntentFromID(dctx, id, PETSC_MEMORY_ACCESS_WRITE, "memory allocation"));
326   PetscFunctionReturn(0);
327 }
328 
329 /*
330   PetscDeviceDeallocate_Private - Free device-aware memory
331 
332   Not Collective, Asynchronous, Auto-dependency aware
333 
334   Input Parameters:
335 + dctx  - The `PetscDeviceContext` used to free the memory
336 - ptr   - The pointer to free
337 
338   Notes:
339   `ptr` must have been allocated using any of `PetscDeviceMalloc()`, `PetscDeviceCalloc()` or
340   `PetscDeviceAllocate_Private()`, or registered with the system via `PetscDeviceRegisterMemory()`.
341 
342   The user should prefer `PetscDeviceFree()` over this routine as it automatically sets `ptr`
343   to `PETSC_NULLPTR` on successful deallocation.
344 
345   `ptr` may be `NULL`.
346 
347   This routine falls back to using `PetscFree()` if PETSc was not configured with device
348   support. The user should note that `PetscFree()` frees only host memory.
349 
350   DAG representation:
351 .vb
352   time ->
353 
354   -> dctx -/- |= CALL =| - dctx ->
355   -> ptr -/
356 .ve
357 
358   Level: intermediate
359 
360 .N ASYNC_API
361 
362 .seealso: `PetscDeviceFree()`, `PetscDeviceAllocate_Private()`
363 */
364 PetscErrorCode PetscDeviceDeallocate_Private(PetscDeviceContext dctx, void *PETSC_RESTRICT ptr)
365 {
366   PetscFunctionBegin;
367   if (ptr) {
368     auto      &map      = memory_map.map;
369     const auto found_it = map.find(const_cast<MemoryMap::map_type::key_type>(ptr));
370 
371     if (PetscUnlikelyDebug(found_it == map.end())) {
372       // OK this is a bad pointer, now determine why
373       const auto it = memory_map.search_for(ptr);
374 
375       // if it is map.cend() then no allocation owns it, meaning it was not allocated by us!
376       PetscCheck(it != map.cend(), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Pointer %p was not allocated via PetscDeviceAllocate_Private()", ptr);
377       // if we are here then we did allocate it but the user has tried to do something along
378       // the lines of:
379       //
380       // allocate(&ptr, size);
381       // deallocate(ptr+5);
382       //
383       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Attempting to deallocate pointer %p which is a suballocation of %p (memtype %s, id %" PetscInt64_FMT ", size %zu bytes)", ptr, it->first, PetscMemTypeToString(it->second.mtype), it->second.id,
384               it->second.size);
385     }
386 
387     PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx));
388     // mark intent BEFORE we free, note we mark as write so that we are made to wait on any
389     // outstanding reads (don't want to kill the pointer before they are done)
390     PetscCall(PetscDeviceContextMarkIntentFromID(dctx, found_it->second.id, PETSC_MEMORY_ACCESS_WRITE, "memory deallocation"));
391     // do free
392     if (dctx->ops->memfree) {
393       PetscUseTypeMethod(dctx, memfree, found_it->second.mtype, (void **)&ptr);
394     } else {
395       PetscCall(PetscDeviceCheckCapable_Private(dctx, PetscMemTypeHost(found_it->second.mtype), "freeing"));
396     }
397     // if ptr still exists, then the device context could not handle it
398     if (ptr) PetscCall(PetscFree(ptr));
399     PetscCallCXX(map.erase(found_it));
400   }
401   PetscFunctionReturn(0);
402 }
403 
404 /*@C
405   PetscDeviceMemcpy - Copy memory in a device-aware manner
406 
407   Not Collective, Asynchronous, Auto-dependency aware
408 
409   Input Parameters:
410 + dctx - The `PetscDeviceContext` used to copy the memory
411 . dest - The pointer to copy to
412 . src  - The pointer to copy from
413 - n    - The amount (in bytes) to copy
414 
415   Notes:
416   Both `dest` and `src` must have been allocated by `PetscDeviceMalloc()` or
417   `PetscDeviceCalloc()`.
418 
419   `src` and `dest` cannot overlap.
420 
421   If both `src` and `dest` are on the host this routine is fully synchronous.
422 
423   The user should prefer `PetscDeviceArrayCopy()` over this routine as it automatically
424   computes the number of bytes to copy from the size of the pointer types.
425 
426   DAG representation:
427 .vb
428   time ->
429 
430   -> dctx - |= CALL =| - dctx ->
431   -> dest --------------------->
432   -> src ---------------------->
433 .ve
434 
435   Level: intermediate
436 
437 .N ASYNC_API
438 
439 .seealso: `PetscDeviceArrayCopy()`, `PetscDeviceMalloc()`, `PetscDeviceCalloc()`,
440 `PetscDeviceFree()`
441 @*/
442 PetscErrorCode PetscDeviceMemcpy(PetscDeviceContext dctx, void *PETSC_RESTRICT dest, const void *PETSC_RESTRICT src, std::size_t n)
443 {
444   PetscFunctionBegin;
445   if (!n) PetscFunctionReturn(0);
446   PetscCheck(dest, PETSC_COMM_SELF, PETSC_ERR_POINTER, "Trying to copy to a NULL pointer");
447   PetscCheck(src, PETSC_COMM_SELF, PETSC_ERR_POINTER, "Trying to copy from a NULL pointer");
448   if (dest == src) PetscFunctionReturn(0);
449   PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx));
450   {
451     const auto dest_it = memory_map.search_for(dest, true);
452     const auto src_it  = memory_map.search_for(src, true);
453     const auto mode    = PetscMemTypeToDeviceCopyMode(dest_it->second.mtype, src_it->second.mtype);
454 
455     PetscCall(PetscDeviceContextMarkIntentFromID(dctx, src_it->second.id, PETSC_MEMORY_ACCESS_READ, "memory copy (src)"));
456     PetscCall(PetscDeviceContextMarkIntentFromID(dctx, dest_it->second.id, PETSC_MEMORY_ACCESS_WRITE, "memory copy (dest)"));
457     // perform the copy
458     if (dctx->ops->memcopy) {
459       PetscUseTypeMethod(dctx, memcopy, dest, src, n, mode);
460       if (mode == PETSC_DEVICE_COPY_HTOD) {
461         PetscCall(PetscLogCpuToGpu(n));
462       } else if (mode == PETSC_DEVICE_COPY_DTOH) {
463         PetscCall(PetscLogGpuToCpu(n));
464       }
465     } else {
466       // REVIEW ME: we might potentially need to sync here if the memory is device-allocated
467       // (pinned) but being copied by a host dctx
468       PetscCall(PetscDeviceCheckCapable_Private(dctx, mode == PETSC_DEVICE_COPY_HTOH, "copying"));
469       PetscCall(PetscMemcpy(dest, src, n));
470     }
471   }
472   PetscFunctionReturn(0);
473 }
474 
475 /*@C
476   PetscDeviceMemset - Memset device-aware memory
477 
478   Not Collective, Asynchronous, Auto-dependency aware
479 
480   Input Parameters:
481 + dctx  - The `PetscDeviceContext` used to memset the memory
482 . ptr   - The pointer to the memory
483 . v     - The value to set
484 - n     - The amount (in bytes) to set
485 
486   Notes:
487   `ptr` must have been allocated by `PetscDeviceMalloc()` or `PetscDeviceCalloc()`.
488 
489   The user should prefer `PetscDeviceArrayZero()` over this routine as it automatically
490   computes the number of bytes to copy from the size of the pointer types, though they should
491   note that it only zeros memory.
492 
493   This routine is analogous to `memset()`. That is, this routine copies the value
494   `static_cast<unsigned char>(v)` into each of the first count characters of the object pointed
495   to by `dest`.
496 
497   If `dest` is on device, this routine is asynchronous.
498 
499   DAG representation:
500 .vb
501   time ->
502 
503   -> dctx - |= CALL =| - dctx ->
504   -> dest --------------------->
505 .ve
506 
507   Level: intermediate
508 
509 .N ASYNC_API
510 
511 .seealso: `PetscDeviceArrayZero()`, `PetscDeviceMalloc()`, `PetscDeviceCalloc()`,
512 `PetscDeviceFree()`
513 @*/
514 PetscErrorCode PetscDeviceMemset(PetscDeviceContext dctx, void *ptr, PetscInt v, std::size_t n)
515 {
516   PetscFunctionBegin;
517   if (PetscUnlikely(!n)) PetscFunctionReturn(0);
518   PetscCheck(ptr, PETSC_COMM_SELF, PETSC_ERR_POINTER, "Trying to memset a NULL pointer");
519   PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx));
520   {
521     const auto ptr_it = memory_map.search_for(ptr, true);
522     const auto mtype  = ptr_it->second.mtype;
523 
524     PetscCall(PetscDeviceContextMarkIntentFromID(dctx, ptr_it->second.id, PETSC_MEMORY_ACCESS_WRITE, "memory set"));
525     if (dctx->ops->memset) {
526       PetscUseTypeMethod(dctx, memset, mtype, ptr, v, n);
527     } else {
528       // REVIEW ME: we might potentially need to sync here if the memory is device-allocated
529       // (pinned) but being memset by a host dctx
530       PetscCall(PetscDeviceCheckCapable_Private(dctx, PetscMemTypeHost(mtype), "memsetting"));
531       std::memset(ptr, static_cast<int>(v), n);
532     }
533   }
534   PetscFunctionReturn(0);
535 }
536