xref: /petsc/src/sys/objects/device/interface/memory.cxx (revision c6013d6697bf944c1d88c636ba21f931f774b638)
1 #include <petsc/private/deviceimpl.h> /*I <petscdevice.h> I*/
2 
3 #include <petsc/private/cpp/register_finalize.hpp>
4 #include <petsc/private/cpp/type_traits.hpp> // integral_value
5 #include <petsc/private/cpp/unordered_map.hpp>
6 
7 #include <algorithm> // std::find_if
8 #include <cstring>   // std::memset
9 
10 #include <petsc/private/cpp/object_pool.hpp>
11 
12 namespace Petsc
13 {
14 
15 namespace memory
16 {
17 
18 typename PoolAllocated::allocator_type PoolAllocated::pool_{};
19 
20 } // namespace memory
21 
22 } // namespace Petsc
23 
24 const char *const PetscDeviceCopyModes[] = {"host_to_host", "device_to_host", "host_to_device", "device_to_device", "auto", "PetscDeviceCopyMode", "PETSC_DEVICE_COPY_", nullptr};
25 static_assert(Petsc::util::to_underlying(PETSC_DEVICE_COPY_HTOH) == 0, "");
26 static_assert(Petsc::util::to_underlying(PETSC_DEVICE_COPY_DTOH) == 1, "");
27 static_assert(Petsc::util::to_underlying(PETSC_DEVICE_COPY_HTOD) == 2, "");
28 static_assert(Petsc::util::to_underlying(PETSC_DEVICE_COPY_DTOD) == 3, "");
29 static_assert(Petsc::util::to_underlying(PETSC_DEVICE_COPY_AUTO) == 4, "");
30 
31 // GCC implementation for std::hash<T*>. LLVM's libc++ is almost 2x slower because they do all
32 // kinds of complicated murmur hashing, so we make sure to enforce GCC's version.
33 struct PointerHash {
34   template <typename T>
operator ()PointerHash35   PETSC_NODISCARD std::size_t operator()(const T *ptr) const noexcept
36   {
37     return reinterpret_cast<std::size_t>(ptr);
38   }
39 };
40 
41 // ==========================================================================================
42 // PointerAttributes
43 //
44 // A set of attributes for a pointer
45 // ==========================================================================================
46 
47 struct PointerAttributes {
48   PetscMemType  mtype = PETSC_MEMTYPE_HOST; // memtype of allocation
49   PetscObjectId id    = 0;                  // id of allocation
50   std::size_t   size  = 0;                  // size of allocation (bytes)
51 
52   // even though this is a POD and can be aggregate initialized, the STL uses () constructors
53   // in unordered_map and so we need to provide a trivial constructor...
54   constexpr PointerAttributes() = default;
55   constexpr PointerAttributes(PetscMemType, PetscObjectId, std::size_t) noexcept;
56 
57   bool operator==(const PointerAttributes &) const noexcept;
58 
59   PETSC_NODISCARD bool contains(const void *, const void *) const noexcept;
60 };
61 
62 // ==========================================================================================
63 // PointerAttributes - Public API
64 // ==========================================================================================
65 
PointerAttributes(PetscMemType mtype_,PetscObjectId id_,std::size_t size_)66 inline constexpr PointerAttributes::PointerAttributes(PetscMemType mtype_, PetscObjectId id_, std::size_t size_) noexcept : mtype(mtype_), id(id_), size(size_) { }
67 
operator ==(const PointerAttributes & other) const68 inline bool PointerAttributes::operator==(const PointerAttributes &other) const noexcept
69 {
70   return (mtype == other.mtype) && (id == other.id) && (size == other.size);
71 }
72 
73 /*
74   PointerAttributes::contains - asks and answers the question, does ptr_begin contain ptr
75 
76   Input Parameters:
77 + ptr_begin - pointer to the start of the range to check
78 - ptr       - the pointer to query
79 
80   Notes:
81   Returns true if ptr falls within ptr_begins range, false otherwise.
82 */
contains(const void * ptr_begin,const void * ptr) const83 inline bool PointerAttributes::contains(const void *ptr_begin, const void *ptr) const noexcept
84 {
85   return (ptr >= ptr_begin) && (ptr < (static_cast<const char *>(ptr_begin) + size));
86 }
87 
88 // ==========================================================================================
89 // MemoryMap
90 //
91 // Since the pointers allocated via PetscDeviceAllocate_Private() may be device pointers we
92 // cannot just store meta-data within the pointer itself (as we can't dereference them). So
93 // instead we need to keep an extra map to keep track of them
94 //
95 // Each entry maps pointer -> {
96 //   PetscMemType  - The memtype of the pointer
97 //   PetscObjectId - A unique ID assigned at allocation or registration so auto-dep can
98 //                   identify the pointer
99 //   size          - The size (in bytes) of the allocation
100 // }
101 // ==========================================================================================
102 
103 class MemoryMap : public Petsc::RegisterFinalizeable<MemoryMap> {
104 public:
105   using map_type = Petsc::UnorderedMap<void *, PointerAttributes, PointerHash>;
106 
107   map_type map{};
108 
109   PETSC_NODISCARD map_type::const_iterator search_for(const void *, bool = false) const noexcept;
110 
111 private:
112   friend class Petsc::RegisterFinalizeable<MemoryMap>;
113   PetscErrorCode register_finalize_() noexcept;
114   PetscErrorCode finalize_() noexcept;
115 };
116 
117 // ==========================================================================================
118 // MemoryMap - Private API
119 // ==========================================================================================
120 
register_finalize_()121 PetscErrorCode MemoryMap::register_finalize_() noexcept
122 {
123   PetscFunctionBegin;
124   // Preallocate, this does give a modest performance bump since unordered_map is so __dog__
125   // slow if it needs to rehash. Experiments show that users tend not to have more than 5 or
126   // so concurrently live pointers lying around. 10 at most.
127   PetscCall(map.reserve(16));
128   PetscFunctionReturn(PETSC_SUCCESS);
129 }
130 
finalize_()131 PetscErrorCode MemoryMap::finalize_() noexcept
132 {
133   PetscFunctionBegin;
134   PetscCall(PetscInfo(nullptr, "Finalizing memory map\n"));
135   PetscCallCXX(map = map_type{});
136   PetscFunctionReturn(PETSC_SUCCESS);
137 }
138 
139 // ==========================================================================================
140 // MemoryMap - Public API
141 // ==========================================================================================
142 
143 /*
144   MemoryMap::search_for - retrieve an iterator to the key-value pair for a pointer in the map
145 
146   Input Parameters:
147 + ptr       - pointer to search for
148 - must_find - true if an error is raised if the pointer is not found (default: false)
149 
150   Notes:
151   Accounts for sub-regions, i.e. if ptr is contained within another pointers region, it returns
152   the iterator to the super-pointers key-value pair.
153 
154   If ptr is not found and must_find is false returns map.end(), otherwise raises an error
155 */
search_for(const void * ptr,bool must_find) const156 MemoryMap::map_type::const_iterator MemoryMap::search_for(const void *ptr, bool must_find) const noexcept
157 {
158   const auto end_it = map.end();
159   auto       it     = map.find(const_cast<map_type::key_type>(ptr));
160 
161   // ptr was found, and points to an entire block
162   PetscFunctionBegin;
163   if (it != end_it) PetscFunctionReturn(it);
164   // wasn't found, but maybe its part of a block. have to search every block for it
165   // clang-format off
166   it = std::find_if(map.begin(), end_it, [ptr](map_type::const_iterator::reference map_it) {
167     return map_it.second.contains(map_it.first, ptr);
168   });
169   // clang-format on
170   PetscCheckAbort(!must_find || it != end_it, PETSC_COMM_SELF, PETSC_ERR_POINTER, "Pointer %p was not registered with the memory tracker, call PetscDeviceRegisterMemory() on it", ptr);
171   PetscFunctionReturn(it);
172 }
173 
174 static MemoryMap memory_map;
175 
176 // ==========================================================================================
177 // Utility functions
178 // ==========================================================================================
179 
PetscDeviceCheckCapable_Private(PetscDeviceContext dctx,bool cond,const char descr[])180 static PetscErrorCode PetscDeviceCheckCapable_Private(PetscDeviceContext dctx, bool cond, const char descr[])
181 {
182   PetscFunctionBegin;
183   PetscCheck(cond, PETSC_COMM_SELF, PETSC_ERR_SUP, "Device context (id: %" PetscInt64_FMT ", name: %s, type: %s) can only handle %s host memory", PetscObjectCast(dctx)->id, PetscObjectCast(dctx)->name, dctx->device ? PetscDeviceTypes[dctx->device->type] : "unknown", descr);
184   PetscFunctionReturn(PETSC_SUCCESS);
185 }
186 
187 // A helper utility, since register is called from PetscDeviceRegisterMemory() and
188 // PetscDevicAllocate(). The latter also needs the generated id, so instead of making it search
189 // the map again we just return it here
PetscDeviceRegisterMemory_Private(const void * PETSC_RESTRICT ptr,PetscMemType mtype,std::size_t size,PetscObjectId * PETSC_RESTRICT id=nullptr)190 static PetscErrorCode PetscDeviceRegisterMemory_Private(const void *PETSC_RESTRICT ptr, PetscMemType mtype, std::size_t size, PetscObjectId *PETSC_RESTRICT id = nullptr)
191 {
192   auto      &map = memory_map.map;
193   const auto it  = memory_map.search_for(ptr);
194 
195   PetscFunctionBegin;
196   if (it == map.cend()) {
197     // pointer was never registered with the map, insert it and bail
198     const auto newid = PetscObjectNewId_Internal();
199 
200     if (PetscDefined(USE_DEBUG)) {
201       const auto tmp = PointerAttributes(mtype, newid, size);
202 
203       for (const auto &entry : map) {
204         auto &&attr = entry.second;
205 
206         // REVIEW ME: maybe this should just be handled...
207         PetscCheck(!tmp.contains(ptr, entry.first), PETSC_COMM_SELF, PETSC_ERR_ORDER, "Trying to register pointer %p (memtype %s, size %zu) but it appears you have already registered a sub-region of it (pointer %p, memtype %s, size %zu). Must register the larger region first", ptr, PetscMemTypeToString(mtype), size,
208                    entry.first, PetscMemTypeToString(attr.mtype), attr.size);
209       }
210     }
211     // clang-format off
212     if (id) *id = newid;
213     PetscCallCXX(map.emplace(
214       std::piecewise_construct,
215       std::forward_as_tuple(const_cast<MemoryMap::map_type::key_type>(ptr)),
216       std::forward_as_tuple(mtype, newid, size)
217     ));
218     // clang-format on
219     PetscFunctionReturn(PETSC_SUCCESS);
220   }
221   if (PetscDefined(USE_DEBUG)) {
222     const auto &old = it->second;
223 
224     PetscCheck(PointerAttributes(mtype, old.id, size) == old, PETSC_COMM_SELF, PETSC_ERR_LIB, "Pointer %p appears to have been previously allocated with memtype %s, size %zu and assigned id %" PetscInt64_FMT ", which does not match new values: (mtype %s, size %zu, id %" PetscInt64_FMT ")", it->first,
225                PetscMemTypeToString(old.mtype), old.size, old.id, PetscMemTypeToString(mtype), size, old.id);
226   }
227   if (id) *id = it->second.id;
228   PetscFunctionReturn(PETSC_SUCCESS);
229 }
230 
231 /*@C
232   PetscDeviceRegisterMemory - Register a pointer for use with device-aware memory system
233 
234   Not Collective
235 
236   Input Parameters:
237 + ptr   - The pointer to register
238 . mtype - The `PetscMemType` of the pointer
239 - size  - The size (in bytes) of the memory region
240 
241   Notes:
242   `ptr` need not point to the beginning of the memory range, however the user should register
243   the
244 
245   It's OK to re-register the same `ptr` repeatedly (subsequent registrations do nothing)
246   however the given `mtype` and `size` must match the original registration.
247 
248   `size` may be 0 (in which case this routine does nothing).
249 
250   Level: intermediate
251 
252 .seealso: `PetscDeviceMalloc()`, `PetscDeviceArrayCopy()`, `PetscDeviceFree()`,
253 `PetscDeviceArrayZero()`
254 @*/
PetscDeviceRegisterMemory(const void * PETSC_RESTRICT ptr,PetscMemType mtype,std::size_t size)255 PetscErrorCode PetscDeviceRegisterMemory(const void *PETSC_RESTRICT ptr, PetscMemType mtype, std::size_t size)
256 {
257   PetscFunctionBegin;
258   if (PetscMemTypeHost(mtype)) PetscAssertPointer(ptr, 1);
259   if (PetscUnlikely(!size)) PetscFunctionReturn(PETSC_SUCCESS); // there is no point registering empty range
260   PetscCall(PetscDeviceRegisterMemory_Private(ptr, mtype, size));
261   PetscFunctionReturn(PETSC_SUCCESS);
262 }
263 
264 /*
265   PetscDeviceAllocate_Private - Allocate device-aware memory
266 
267   Not Collective, Asynchronous, Auto-dependency aware
268 
269   Input Parameters:
270 + dctx      - The `PetscDeviceContext` used to allocate the memory
271 . clear     - Whether or not the memory should be zeroed
272 . mtype     - The type of memory to allocate
273 . n         - The amount (in bytes) to allocate
274 - alignment - The alignment requirement (in bytes) of the allocated pointer
275 
276   Output Parameter:
277 . ptr - The pointer to store the result in
278 
279   Notes:
280   The user should prefer `PetscDeviceMalloc()` over this routine as it automatically computes
281   the size of the allocation and alignment based on the size of the datatype.
282 
283   If the user is unsure about `alignment` -- or unable to compute it -- passing
284   `PETSC_MEMALIGN` will always work, though the user should beware that this may be quite
285   wasteful for very small allocations.
286 
287   Memory allocated with this function must be freed with `PetscDeviceFree()` (or
288   `PetscDeviceDeallocate_Private()`).
289 
290   If `n` is zero, then `ptr` is set to `PETSC_NULLPTR`.
291 
292   This routine falls back to using `PetscMalloc1()` or `PetscCalloc1()` (depending on the value
293   of `clear`) if PETSc was not configured with device support. The user should note that
294   `mtype` and `alignment` are ignored in this case, as these routines allocate only host memory
295   aligned to `PETSC_MEMALIGN`.
296 
297   Note result stored `ptr` is immediately valid and the user may freely inspect or manipulate
298   its value on function return, i.e.\:
299 
300 .vb
301   PetscInt *ptr;
302 
303   PetscDeviceAllocate_Private(dctx, PETSC_FALSE, PETSC_MEMTYPE_DEVICE, 20, alignof(PetscInt), (void**)&ptr);
304 
305   PetscInt *sub_ptr = ptr + 10; // OK, no need to synchronize
306 
307   ptr[0] = 10; // ERROR, directly accessing contents of ptr is undefined until synchronization
308 .ve
309 
310   DAG representation:
311 .vb
312   time ->
313 
314   -> dctx - |= CALL =| -\- dctx -->
315                          \- ptr ->
316 .ve
317 
318   Level: intermediate
319 
320 .N ASYNC_API
321 
322 .seealso: `PetscDeviceMalloc()`, `PetscDeviceFree()`, `PetscDeviceDeallocate_Private()`,
323 `PetscDeviceArrayCopy()`, `PetscDeviceArrayZero()`, `PetscMemType`
324 */
PetscDeviceAllocate_Private(PetscDeviceContext dctx,PetscBool clear,PetscMemType mtype,std::size_t n,std::size_t alignment,void ** PETSC_RESTRICT ptr)325 PetscErrorCode PetscDeviceAllocate_Private(PetscDeviceContext dctx, PetscBool clear, PetscMemType mtype, std::size_t n, std::size_t alignment, void **PETSC_RESTRICT ptr)
326 {
327   PetscObjectId id = 0;
328 
329   PetscFunctionBegin;
330   if (PetscDefined(USE_DEBUG)) {
331     const auto is_power_of_2 = [](std::size_t num) { return (num & (num - 1)) == 0; };
332 
333     PetscCheck(alignment != 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Requested alignment %zu cannot be 0", alignment);
334     PetscCheck(is_power_of_2(alignment), PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Requested alignment %zu must be a power of 2", alignment);
335   }
336   PetscAssertPointer(ptr, 6);
337   *ptr = nullptr;
338   if (PetscUnlikely(!n)) PetscFunctionReturn(PETSC_SUCCESS);
339   PetscCall(memory_map.register_finalize());
340   PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx));
341 
342   // get our pointer here
343   if (dctx->ops->memalloc) {
344     PetscUseTypeMethod(dctx, memalloc, clear, mtype, n, alignment, ptr);
345   } else {
346     PetscCall(PetscDeviceCheckCapable_Private(dctx, PetscMemTypeHost(mtype), "allocating"));
347     PetscCall(PetscMallocA(1, clear, __LINE__, PETSC_FUNCTION_NAME, __FILE__, n, ptr));
348   }
349   PetscCall(PetscDeviceRegisterMemory_Private(*ptr, mtype, n, &id));
350   // Note this is a "write" so that the next dctx to try and read from the pointer has to wait
351   // for the allocation to be ready
352   PetscCall(PetscDeviceContextMarkIntentFromID(dctx, id, PETSC_MEMORY_ACCESS_WRITE, "memory allocation"));
353   PetscFunctionReturn(PETSC_SUCCESS);
354 }
355 
356 /*
357   PetscDeviceDeallocate_Private - Free device-aware memory
358 
359   Not Collective, Asynchronous, Auto-dependency aware
360 
361   Input Parameters:
362 + dctx  - The `PetscDeviceContext` used to free the memory
363 - ptr   - The pointer to free
364 
365   Level: intermediate
366 
367   Notes:
368   `ptr` must have been allocated using any of `PetscDeviceMalloc()`, `PetscDeviceCalloc()` or
369   `PetscDeviceAllocate_Private()`, or registered with the system via `PetscDeviceRegisterMemory()`.
370 
371   The user should prefer `PetscDeviceFree()` over this routine as it automatically sets `ptr`
372   to `PETSC_NULLPTR` on successful deallocation.
373 
374   `ptr` may be `NULL`.
375 
376   This routine falls back to using `PetscFree()` if PETSc was not configured with device
377   support. The user should note that `PetscFree()` frees only host memory.
378 
379   DAG representation:
380 .vb
381   time ->
382 
383   -> dctx -/- |= CALL =| - dctx ->
384   -> ptr -/
385 .ve
386 
387 .N ASYNC_API
388 
389 .seealso: `PetscDeviceFree()`, `PetscDeviceAllocate_Private()`
390 */
PetscDeviceDeallocate_Private(PetscDeviceContext dctx,void * PETSC_RESTRICT ptr)391 PetscErrorCode PetscDeviceDeallocate_Private(PetscDeviceContext dctx, void *PETSC_RESTRICT ptr)
392 {
393   PetscFunctionBegin;
394   if (ptr) {
395     auto      &map      = memory_map.map;
396     const auto found_it = map.find(const_cast<MemoryMap::map_type::key_type>(ptr));
397 
398     if (PetscUnlikelyDebug(found_it == map.end())) {
399       // OK this is a bad pointer, now determine why
400       const auto it = memory_map.search_for(ptr);
401 
402       // if it is map.cend() then no allocation owns it, meaning it was not allocated by us!
403       PetscCheck(it != map.cend(), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Pointer %p was not allocated via PetscDeviceAllocate_Private()", ptr);
404       // if we are here then we did allocate it but the user has tried to do something along
405       // the lines of:
406       //
407       // allocate(&ptr, size);
408       // deallocate(ptr+5);
409       //
410       auto &&attr = it->second;
411       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Attempting to deallocate pointer %p which is a suballocation of %p (memtype %s, id %" PetscInt64_FMT ", size %zu bytes)", ptr, it->first, PetscMemTypeToString(attr.mtype), attr.id, attr.size);
412     }
413     auto &&attr = found_it->second;
414     PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx));
415     // mark intent BEFORE we free, note we mark as write so that we are made to wait on any
416     // outstanding reads (don't want to kill the pointer before they are done)
417     PetscCall(PetscDeviceContextMarkIntentFromID(dctx, attr.id, PETSC_MEMORY_ACCESS_WRITE, "memory deallocation"));
418     // do free
419     if (dctx->ops->memfree) {
420       PetscUseTypeMethod(dctx, memfree, attr.mtype, (void **)&ptr);
421     } else {
422       PetscCall(PetscDeviceCheckCapable_Private(dctx, PetscMemTypeHost(attr.mtype), "freeing"));
423     }
424     // if ptr still exists, then the device context could not handle it
425     if (ptr) PetscCall(PetscFree(ptr));
426     PetscCallCXX(map.erase(found_it));
427   }
428   PetscFunctionReturn(PETSC_SUCCESS);
429 }
430 
431 // PetscClangLinter pragma disable: -fdoc-section-header-unknown
432 /*@C
433   PetscDeviceMemcpy - Copy memory in a device-aware manner
434 
435   Not Collective, Asynchronous, Auto-dependency aware
436 
437   Input Parameters:
438 + dctx - The `PetscDeviceContext` used to copy the memory
439 . dest - The pointer to copy to
440 . src  - The pointer to copy from
441 - n    - The amount (in bytes) to copy
442 
443   Level: intermediate
444 
445   Notes:
446   Both `dest` and `src` must have been allocated by `PetscDeviceMalloc()` or
447   `PetscDeviceCalloc()`.
448 
449   `src` and `dest` cannot overlap.
450 
451   If both `src` and `dest` are on the host this routine is fully synchronous.
452 
453   The user should prefer `PetscDeviceArrayCopy()` over this routine as it automatically
454   computes the number of bytes to copy from the size of the pointer types.
455 
456   DAG representation:
457 .vb
458   time ->
459 
460   -> dctx - |= CALL =| - dctx ->
461   -> dest --------------------->
462   -> src ---------------------->
463 .ve
464 
465 .N ASYNC_API
466 
467 .seealso: `PetscDeviceArrayCopy()`, `PetscDeviceMalloc()`, `PetscDeviceCalloc()`,
468 `PetscDeviceFree()`
469 @*/
PetscDeviceMemcpy(PetscDeviceContext dctx,void * PETSC_RESTRICT dest,const void * PETSC_RESTRICT src,std::size_t n)470 PetscErrorCode PetscDeviceMemcpy(PetscDeviceContext dctx, void *PETSC_RESTRICT dest, const void *PETSC_RESTRICT src, std::size_t n)
471 {
472   PetscFunctionBegin;
473   if (!n) PetscFunctionReturn(PETSC_SUCCESS);
474   PetscCheck(dest, PETSC_COMM_SELF, PETSC_ERR_POINTER, "Trying to copy to a NULL pointer");
475   PetscCheck(src, PETSC_COMM_SELF, PETSC_ERR_POINTER, "Trying to copy from a NULL pointer");
476   if (dest == src) PetscFunctionReturn(PETSC_SUCCESS);
477   PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx));
478   {
479     const auto &dest_attr = memory_map.search_for(dest, true)->second;
480     const auto &src_attr  = memory_map.search_for(src, true)->second;
481     const auto  mode      = PetscMemTypeToDeviceCopyMode(dest_attr.mtype, src_attr.mtype);
482 
483     PetscCall(PetscDeviceContextMarkIntentFromID(dctx, src_attr.id, PETSC_MEMORY_ACCESS_READ, "memory copy (src)"));
484     PetscCall(PetscDeviceContextMarkIntentFromID(dctx, dest_attr.id, PETSC_MEMORY_ACCESS_WRITE, "memory copy (dest)"));
485     // perform the copy
486     if (dctx->ops->memcopy) {
487       PetscUseTypeMethod(dctx, memcopy, dest, src, n, mode);
488       if (mode == PETSC_DEVICE_COPY_HTOD) {
489         PetscCall(PetscLogCpuToGpu(n));
490       } else if (mode == PETSC_DEVICE_COPY_DTOH) {
491         PetscCall(PetscLogGpuToCpu(n));
492       }
493     } else {
494       // REVIEW ME: we might potentially need to sync here if the memory is device-allocated
495       // (pinned) but being copied by a host dctx
496       PetscCall(PetscDeviceCheckCapable_Private(dctx, mode == PETSC_DEVICE_COPY_HTOH, "copying"));
497       PetscCall(PetscMemcpy(dest, src, n));
498     }
499   }
500   PetscFunctionReturn(PETSC_SUCCESS);
501 }
502 
503 // PetscClangLinter pragma disable: -fdoc-section-header-unknown
504 /*@C
505   PetscDeviceMemset - Memset device-aware memory
506 
507   Not Collective, Asynchronous, Auto-dependency aware
508 
509   Input Parameters:
510 + dctx - The `PetscDeviceContext` used to memset the memory
511 . ptr  - The pointer to the memory
512 . v    - The value to set
513 - n    - The amount (in bytes) to set
514 
515   Level: intermediate
516 
517   Notes:
518   `ptr` must have been allocated by `PetscDeviceMalloc()` or `PetscDeviceCalloc()`.
519 
520   The user should prefer `PetscDeviceArrayZero()` over this routine as it automatically
521   computes the number of bytes to copy from the size of the pointer types, though they should
522   note that it only zeros memory.
523 
524   This routine is analogous to `memset()`. That is, this routine copies the value
525   `static_cast<unsigned char>(v)` into each of the first count characters of the object pointed
526   to by `dest`.
527 
528   If `dest` is on device, this routine is asynchronous.
529 
530   DAG representation:
531 .vb
532   time ->
533 
534   -> dctx - |= CALL =| - dctx ->
535   -> dest --------------------->
536 .ve
537 
538 .N ASYNC_API
539 
540 .seealso: `PetscDeviceArrayZero()`, `PetscDeviceMalloc()`, `PetscDeviceCalloc()`,
541 `PetscDeviceFree()`
542 @*/
PetscDeviceMemset(PetscDeviceContext dctx,void * ptr,PetscInt v,std::size_t n)543 PetscErrorCode PetscDeviceMemset(PetscDeviceContext dctx, void *ptr, PetscInt v, std::size_t n)
544 {
545   PetscFunctionBegin;
546   if (PetscUnlikely(!n)) PetscFunctionReturn(PETSC_SUCCESS);
547   PetscCheck(ptr, PETSC_COMM_SELF, PETSC_ERR_POINTER, "Trying to memset a NULL pointer");
548   PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx));
549   {
550     const auto &attr = memory_map.search_for(ptr, true)->second;
551 
552     PetscCall(PetscDeviceContextMarkIntentFromID(dctx, attr.id, PETSC_MEMORY_ACCESS_WRITE, "memory set"));
553     if (dctx->ops->memset) {
554       PetscUseTypeMethod(dctx, memset, attr.mtype, ptr, v, n);
555     } else {
556       // REVIEW ME: we might potentially need to sync here if the memory is device-allocated
557       // (pinned) but being memset by a host dctx
558       PetscCall(PetscDeviceCheckCapable_Private(dctx, PetscMemTypeHost(attr.mtype), "memsetting"));
559       std::memset(ptr, static_cast<int>(v), n);
560     }
561   }
562   PetscFunctionReturn(PETSC_SUCCESS);
563 }
564