1 #include <petsc/private/deviceimpl.h> /*I <petscdevice.h> I*/
2
3 #include <petsc/private/cpp/register_finalize.hpp>
4 #include <petsc/private/cpp/type_traits.hpp> // integral_value
5 #include <petsc/private/cpp/unordered_map.hpp>
6
7 #include <algorithm> // std::find_if
8 #include <cstring> // std::memset
9
10 #include <petsc/private/cpp/object_pool.hpp>
11
12 namespace Petsc
13 {
14
15 namespace memory
16 {
17
18 typename PoolAllocated::allocator_type PoolAllocated::pool_{};
19
20 } // namespace memory
21
22 } // namespace Petsc
23
24 const char *const PetscDeviceCopyModes[] = {"host_to_host", "device_to_host", "host_to_device", "device_to_device", "auto", "PetscDeviceCopyMode", "PETSC_DEVICE_COPY_", nullptr};
25 static_assert(Petsc::util::to_underlying(PETSC_DEVICE_COPY_HTOH) == 0, "");
26 static_assert(Petsc::util::to_underlying(PETSC_DEVICE_COPY_DTOH) == 1, "");
27 static_assert(Petsc::util::to_underlying(PETSC_DEVICE_COPY_HTOD) == 2, "");
28 static_assert(Petsc::util::to_underlying(PETSC_DEVICE_COPY_DTOD) == 3, "");
29 static_assert(Petsc::util::to_underlying(PETSC_DEVICE_COPY_AUTO) == 4, "");
30
31 // GCC implementation for std::hash<T*>. LLVM's libc++ is almost 2x slower because they do all
32 // kinds of complicated murmur hashing, so we make sure to enforce GCC's version.
33 struct PointerHash {
34 template <typename T>
operator ()PointerHash35 PETSC_NODISCARD std::size_t operator()(const T *ptr) const noexcept
36 {
37 return reinterpret_cast<std::size_t>(ptr);
38 }
39 };
40
41 // ==========================================================================================
42 // PointerAttributes
43 //
44 // A set of attributes for a pointer
45 // ==========================================================================================
46
47 struct PointerAttributes {
48 PetscMemType mtype = PETSC_MEMTYPE_HOST; // memtype of allocation
49 PetscObjectId id = 0; // id of allocation
50 std::size_t size = 0; // size of allocation (bytes)
51
52 // even though this is a POD and can be aggregate initialized, the STL uses () constructors
53 // in unordered_map and so we need to provide a trivial constructor...
54 constexpr PointerAttributes() = default;
55 constexpr PointerAttributes(PetscMemType, PetscObjectId, std::size_t) noexcept;
56
57 bool operator==(const PointerAttributes &) const noexcept;
58
59 PETSC_NODISCARD bool contains(const void *, const void *) const noexcept;
60 };
61
62 // ==========================================================================================
63 // PointerAttributes - Public API
64 // ==========================================================================================
65
PointerAttributes(PetscMemType mtype_,PetscObjectId id_,std::size_t size_)66 inline constexpr PointerAttributes::PointerAttributes(PetscMemType mtype_, PetscObjectId id_, std::size_t size_) noexcept : mtype(mtype_), id(id_), size(size_) { }
67
operator ==(const PointerAttributes & other) const68 inline bool PointerAttributes::operator==(const PointerAttributes &other) const noexcept
69 {
70 return (mtype == other.mtype) && (id == other.id) && (size == other.size);
71 }
72
73 /*
74 PointerAttributes::contains - asks and answers the question, does ptr_begin contain ptr
75
76 Input Parameters:
77 + ptr_begin - pointer to the start of the range to check
78 - ptr - the pointer to query
79
80 Notes:
81 Returns true if ptr falls within ptr_begins range, false otherwise.
82 */
contains(const void * ptr_begin,const void * ptr) const83 inline bool PointerAttributes::contains(const void *ptr_begin, const void *ptr) const noexcept
84 {
85 return (ptr >= ptr_begin) && (ptr < (static_cast<const char *>(ptr_begin) + size));
86 }
87
88 // ==========================================================================================
89 // MemoryMap
90 //
91 // Since the pointers allocated via PetscDeviceAllocate_Private() may be device pointers we
92 // cannot just store meta-data within the pointer itself (as we can't dereference them). So
93 // instead we need to keep an extra map to keep track of them
94 //
95 // Each entry maps pointer -> {
96 // PetscMemType - The memtype of the pointer
97 // PetscObjectId - A unique ID assigned at allocation or registration so auto-dep can
98 // identify the pointer
99 // size - The size (in bytes) of the allocation
100 // }
101 // ==========================================================================================
102
103 class MemoryMap : public Petsc::RegisterFinalizeable<MemoryMap> {
104 public:
105 using map_type = Petsc::UnorderedMap<void *, PointerAttributes, PointerHash>;
106
107 map_type map{};
108
109 PETSC_NODISCARD map_type::const_iterator search_for(const void *, bool = false) const noexcept;
110
111 private:
112 friend class Petsc::RegisterFinalizeable<MemoryMap>;
113 PetscErrorCode register_finalize_() noexcept;
114 PetscErrorCode finalize_() noexcept;
115 };
116
117 // ==========================================================================================
118 // MemoryMap - Private API
119 // ==========================================================================================
120
register_finalize_()121 PetscErrorCode MemoryMap::register_finalize_() noexcept
122 {
123 PetscFunctionBegin;
124 // Preallocate, this does give a modest performance bump since unordered_map is so __dog__
125 // slow if it needs to rehash. Experiments show that users tend not to have more than 5 or
126 // so concurrently live pointers lying around. 10 at most.
127 PetscCall(map.reserve(16));
128 PetscFunctionReturn(PETSC_SUCCESS);
129 }
130
finalize_()131 PetscErrorCode MemoryMap::finalize_() noexcept
132 {
133 PetscFunctionBegin;
134 PetscCall(PetscInfo(nullptr, "Finalizing memory map\n"));
135 PetscCallCXX(map = map_type{});
136 PetscFunctionReturn(PETSC_SUCCESS);
137 }
138
139 // ==========================================================================================
140 // MemoryMap - Public API
141 // ==========================================================================================
142
143 /*
144 MemoryMap::search_for - retrieve an iterator to the key-value pair for a pointer in the map
145
146 Input Parameters:
147 + ptr - pointer to search for
148 - must_find - true if an error is raised if the pointer is not found (default: false)
149
150 Notes:
151 Accounts for sub-regions, i.e. if ptr is contained within another pointers region, it returns
152 the iterator to the super-pointers key-value pair.
153
154 If ptr is not found and must_find is false returns map.end(), otherwise raises an error
155 */
search_for(const void * ptr,bool must_find) const156 MemoryMap::map_type::const_iterator MemoryMap::search_for(const void *ptr, bool must_find) const noexcept
157 {
158 const auto end_it = map.end();
159 auto it = map.find(const_cast<map_type::key_type>(ptr));
160
161 // ptr was found, and points to an entire block
162 PetscFunctionBegin;
163 if (it != end_it) PetscFunctionReturn(it);
164 // wasn't found, but maybe its part of a block. have to search every block for it
165 // clang-format off
166 it = std::find_if(map.begin(), end_it, [ptr](map_type::const_iterator::reference map_it) {
167 return map_it.second.contains(map_it.first, ptr);
168 });
169 // clang-format on
170 PetscCheckAbort(!must_find || it != end_it, PETSC_COMM_SELF, PETSC_ERR_POINTER, "Pointer %p was not registered with the memory tracker, call PetscDeviceRegisterMemory() on it", ptr);
171 PetscFunctionReturn(it);
172 }
173
174 static MemoryMap memory_map;
175
176 // ==========================================================================================
177 // Utility functions
178 // ==========================================================================================
179
PetscDeviceCheckCapable_Private(PetscDeviceContext dctx,bool cond,const char descr[])180 static PetscErrorCode PetscDeviceCheckCapable_Private(PetscDeviceContext dctx, bool cond, const char descr[])
181 {
182 PetscFunctionBegin;
183 PetscCheck(cond, PETSC_COMM_SELF, PETSC_ERR_SUP, "Device context (id: %" PetscInt64_FMT ", name: %s, type: %s) can only handle %s host memory", PetscObjectCast(dctx)->id, PetscObjectCast(dctx)->name, dctx->device ? PetscDeviceTypes[dctx->device->type] : "unknown", descr);
184 PetscFunctionReturn(PETSC_SUCCESS);
185 }
186
187 // A helper utility, since register is called from PetscDeviceRegisterMemory() and
188 // PetscDevicAllocate(). The latter also needs the generated id, so instead of making it search
189 // the map again we just return it here
PetscDeviceRegisterMemory_Private(const void * PETSC_RESTRICT ptr,PetscMemType mtype,std::size_t size,PetscObjectId * PETSC_RESTRICT id=nullptr)190 static PetscErrorCode PetscDeviceRegisterMemory_Private(const void *PETSC_RESTRICT ptr, PetscMemType mtype, std::size_t size, PetscObjectId *PETSC_RESTRICT id = nullptr)
191 {
192 auto &map = memory_map.map;
193 const auto it = memory_map.search_for(ptr);
194
195 PetscFunctionBegin;
196 if (it == map.cend()) {
197 // pointer was never registered with the map, insert it and bail
198 const auto newid = PetscObjectNewId_Internal();
199
200 if (PetscDefined(USE_DEBUG)) {
201 const auto tmp = PointerAttributes(mtype, newid, size);
202
203 for (const auto &entry : map) {
204 auto &&attr = entry.second;
205
206 // REVIEW ME: maybe this should just be handled...
207 PetscCheck(!tmp.contains(ptr, entry.first), PETSC_COMM_SELF, PETSC_ERR_ORDER, "Trying to register pointer %p (memtype %s, size %zu) but it appears you have already registered a sub-region of it (pointer %p, memtype %s, size %zu). Must register the larger region first", ptr, PetscMemTypeToString(mtype), size,
208 entry.first, PetscMemTypeToString(attr.mtype), attr.size);
209 }
210 }
211 // clang-format off
212 if (id) *id = newid;
213 PetscCallCXX(map.emplace(
214 std::piecewise_construct,
215 std::forward_as_tuple(const_cast<MemoryMap::map_type::key_type>(ptr)),
216 std::forward_as_tuple(mtype, newid, size)
217 ));
218 // clang-format on
219 PetscFunctionReturn(PETSC_SUCCESS);
220 }
221 if (PetscDefined(USE_DEBUG)) {
222 const auto &old = it->second;
223
224 PetscCheck(PointerAttributes(mtype, old.id, size) == old, PETSC_COMM_SELF, PETSC_ERR_LIB, "Pointer %p appears to have been previously allocated with memtype %s, size %zu and assigned id %" PetscInt64_FMT ", which does not match new values: (mtype %s, size %zu, id %" PetscInt64_FMT ")", it->first,
225 PetscMemTypeToString(old.mtype), old.size, old.id, PetscMemTypeToString(mtype), size, old.id);
226 }
227 if (id) *id = it->second.id;
228 PetscFunctionReturn(PETSC_SUCCESS);
229 }
230
231 /*@C
232 PetscDeviceRegisterMemory - Register a pointer for use with device-aware memory system
233
234 Not Collective
235
236 Input Parameters:
237 + ptr - The pointer to register
238 . mtype - The `PetscMemType` of the pointer
239 - size - The size (in bytes) of the memory region
240
241 Notes:
242 `ptr` need not point to the beginning of the memory range, however the user should register
243 the
244
245 It's OK to re-register the same `ptr` repeatedly (subsequent registrations do nothing)
246 however the given `mtype` and `size` must match the original registration.
247
248 `size` may be 0 (in which case this routine does nothing).
249
250 Level: intermediate
251
252 .seealso: `PetscDeviceMalloc()`, `PetscDeviceArrayCopy()`, `PetscDeviceFree()`,
253 `PetscDeviceArrayZero()`
254 @*/
PetscDeviceRegisterMemory(const void * PETSC_RESTRICT ptr,PetscMemType mtype,std::size_t size)255 PetscErrorCode PetscDeviceRegisterMemory(const void *PETSC_RESTRICT ptr, PetscMemType mtype, std::size_t size)
256 {
257 PetscFunctionBegin;
258 if (PetscMemTypeHost(mtype)) PetscAssertPointer(ptr, 1);
259 if (PetscUnlikely(!size)) PetscFunctionReturn(PETSC_SUCCESS); // there is no point registering empty range
260 PetscCall(PetscDeviceRegisterMemory_Private(ptr, mtype, size));
261 PetscFunctionReturn(PETSC_SUCCESS);
262 }
263
264 /*
265 PetscDeviceAllocate_Private - Allocate device-aware memory
266
267 Not Collective, Asynchronous, Auto-dependency aware
268
269 Input Parameters:
270 + dctx - The `PetscDeviceContext` used to allocate the memory
271 . clear - Whether or not the memory should be zeroed
272 . mtype - The type of memory to allocate
273 . n - The amount (in bytes) to allocate
274 - alignment - The alignment requirement (in bytes) of the allocated pointer
275
276 Output Parameter:
277 . ptr - The pointer to store the result in
278
279 Notes:
280 The user should prefer `PetscDeviceMalloc()` over this routine as it automatically computes
281 the size of the allocation and alignment based on the size of the datatype.
282
283 If the user is unsure about `alignment` -- or unable to compute it -- passing
284 `PETSC_MEMALIGN` will always work, though the user should beware that this may be quite
285 wasteful for very small allocations.
286
287 Memory allocated with this function must be freed with `PetscDeviceFree()` (or
288 `PetscDeviceDeallocate_Private()`).
289
290 If `n` is zero, then `ptr` is set to `PETSC_NULLPTR`.
291
292 This routine falls back to using `PetscMalloc1()` or `PetscCalloc1()` (depending on the value
293 of `clear`) if PETSc was not configured with device support. The user should note that
294 `mtype` and `alignment` are ignored in this case, as these routines allocate only host memory
295 aligned to `PETSC_MEMALIGN`.
296
297 Note result stored `ptr` is immediately valid and the user may freely inspect or manipulate
298 its value on function return, i.e.\:
299
300 .vb
301 PetscInt *ptr;
302
303 PetscDeviceAllocate_Private(dctx, PETSC_FALSE, PETSC_MEMTYPE_DEVICE, 20, alignof(PetscInt), (void**)&ptr);
304
305 PetscInt *sub_ptr = ptr + 10; // OK, no need to synchronize
306
307 ptr[0] = 10; // ERROR, directly accessing contents of ptr is undefined until synchronization
308 .ve
309
310 DAG representation:
311 .vb
312 time ->
313
314 -> dctx - |= CALL =| -\- dctx -->
315 \- ptr ->
316 .ve
317
318 Level: intermediate
319
320 .N ASYNC_API
321
322 .seealso: `PetscDeviceMalloc()`, `PetscDeviceFree()`, `PetscDeviceDeallocate_Private()`,
323 `PetscDeviceArrayCopy()`, `PetscDeviceArrayZero()`, `PetscMemType`
324 */
PetscDeviceAllocate_Private(PetscDeviceContext dctx,PetscBool clear,PetscMemType mtype,std::size_t n,std::size_t alignment,void ** PETSC_RESTRICT ptr)325 PetscErrorCode PetscDeviceAllocate_Private(PetscDeviceContext dctx, PetscBool clear, PetscMemType mtype, std::size_t n, std::size_t alignment, void **PETSC_RESTRICT ptr)
326 {
327 PetscObjectId id = 0;
328
329 PetscFunctionBegin;
330 if (PetscDefined(USE_DEBUG)) {
331 const auto is_power_of_2 = [](std::size_t num) { return (num & (num - 1)) == 0; };
332
333 PetscCheck(alignment != 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Requested alignment %zu cannot be 0", alignment);
334 PetscCheck(is_power_of_2(alignment), PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Requested alignment %zu must be a power of 2", alignment);
335 }
336 PetscAssertPointer(ptr, 6);
337 *ptr = nullptr;
338 if (PetscUnlikely(!n)) PetscFunctionReturn(PETSC_SUCCESS);
339 PetscCall(memory_map.register_finalize());
340 PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx));
341
342 // get our pointer here
343 if (dctx->ops->memalloc) {
344 PetscUseTypeMethod(dctx, memalloc, clear, mtype, n, alignment, ptr);
345 } else {
346 PetscCall(PetscDeviceCheckCapable_Private(dctx, PetscMemTypeHost(mtype), "allocating"));
347 PetscCall(PetscMallocA(1, clear, __LINE__, PETSC_FUNCTION_NAME, __FILE__, n, ptr));
348 }
349 PetscCall(PetscDeviceRegisterMemory_Private(*ptr, mtype, n, &id));
350 // Note this is a "write" so that the next dctx to try and read from the pointer has to wait
351 // for the allocation to be ready
352 PetscCall(PetscDeviceContextMarkIntentFromID(dctx, id, PETSC_MEMORY_ACCESS_WRITE, "memory allocation"));
353 PetscFunctionReturn(PETSC_SUCCESS);
354 }
355
356 /*
357 PetscDeviceDeallocate_Private - Free device-aware memory
358
359 Not Collective, Asynchronous, Auto-dependency aware
360
361 Input Parameters:
362 + dctx - The `PetscDeviceContext` used to free the memory
363 - ptr - The pointer to free
364
365 Level: intermediate
366
367 Notes:
368 `ptr` must have been allocated using any of `PetscDeviceMalloc()`, `PetscDeviceCalloc()` or
369 `PetscDeviceAllocate_Private()`, or registered with the system via `PetscDeviceRegisterMemory()`.
370
371 The user should prefer `PetscDeviceFree()` over this routine as it automatically sets `ptr`
372 to `PETSC_NULLPTR` on successful deallocation.
373
374 `ptr` may be `NULL`.
375
376 This routine falls back to using `PetscFree()` if PETSc was not configured with device
377 support. The user should note that `PetscFree()` frees only host memory.
378
379 DAG representation:
380 .vb
381 time ->
382
383 -> dctx -/- |= CALL =| - dctx ->
384 -> ptr -/
385 .ve
386
387 .N ASYNC_API
388
389 .seealso: `PetscDeviceFree()`, `PetscDeviceAllocate_Private()`
390 */
PetscDeviceDeallocate_Private(PetscDeviceContext dctx,void * PETSC_RESTRICT ptr)391 PetscErrorCode PetscDeviceDeallocate_Private(PetscDeviceContext dctx, void *PETSC_RESTRICT ptr)
392 {
393 PetscFunctionBegin;
394 if (ptr) {
395 auto &map = memory_map.map;
396 const auto found_it = map.find(const_cast<MemoryMap::map_type::key_type>(ptr));
397
398 if (PetscUnlikelyDebug(found_it == map.end())) {
399 // OK this is a bad pointer, now determine why
400 const auto it = memory_map.search_for(ptr);
401
402 // if it is map.cend() then no allocation owns it, meaning it was not allocated by us!
403 PetscCheck(it != map.cend(), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Pointer %p was not allocated via PetscDeviceAllocate_Private()", ptr);
404 // if we are here then we did allocate it but the user has tried to do something along
405 // the lines of:
406 //
407 // allocate(&ptr, size);
408 // deallocate(ptr+5);
409 //
410 auto &&attr = it->second;
411 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Attempting to deallocate pointer %p which is a suballocation of %p (memtype %s, id %" PetscInt64_FMT ", size %zu bytes)", ptr, it->first, PetscMemTypeToString(attr.mtype), attr.id, attr.size);
412 }
413 auto &&attr = found_it->second;
414 PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx));
415 // mark intent BEFORE we free, note we mark as write so that we are made to wait on any
416 // outstanding reads (don't want to kill the pointer before they are done)
417 PetscCall(PetscDeviceContextMarkIntentFromID(dctx, attr.id, PETSC_MEMORY_ACCESS_WRITE, "memory deallocation"));
418 // do free
419 if (dctx->ops->memfree) {
420 PetscUseTypeMethod(dctx, memfree, attr.mtype, (void **)&ptr);
421 } else {
422 PetscCall(PetscDeviceCheckCapable_Private(dctx, PetscMemTypeHost(attr.mtype), "freeing"));
423 }
424 // if ptr still exists, then the device context could not handle it
425 if (ptr) PetscCall(PetscFree(ptr));
426 PetscCallCXX(map.erase(found_it));
427 }
428 PetscFunctionReturn(PETSC_SUCCESS);
429 }
430
431 // PetscClangLinter pragma disable: -fdoc-section-header-unknown
432 /*@C
433 PetscDeviceMemcpy - Copy memory in a device-aware manner
434
435 Not Collective, Asynchronous, Auto-dependency aware
436
437 Input Parameters:
438 + dctx - The `PetscDeviceContext` used to copy the memory
439 . dest - The pointer to copy to
440 . src - The pointer to copy from
441 - n - The amount (in bytes) to copy
442
443 Level: intermediate
444
445 Notes:
446 Both `dest` and `src` must have been allocated by `PetscDeviceMalloc()` or
447 `PetscDeviceCalloc()`.
448
449 `src` and `dest` cannot overlap.
450
451 If both `src` and `dest` are on the host this routine is fully synchronous.
452
453 The user should prefer `PetscDeviceArrayCopy()` over this routine as it automatically
454 computes the number of bytes to copy from the size of the pointer types.
455
456 DAG representation:
457 .vb
458 time ->
459
460 -> dctx - |= CALL =| - dctx ->
461 -> dest --------------------->
462 -> src ---------------------->
463 .ve
464
465 .N ASYNC_API
466
467 .seealso: `PetscDeviceArrayCopy()`, `PetscDeviceMalloc()`, `PetscDeviceCalloc()`,
468 `PetscDeviceFree()`
469 @*/
PetscDeviceMemcpy(PetscDeviceContext dctx,void * PETSC_RESTRICT dest,const void * PETSC_RESTRICT src,std::size_t n)470 PetscErrorCode PetscDeviceMemcpy(PetscDeviceContext dctx, void *PETSC_RESTRICT dest, const void *PETSC_RESTRICT src, std::size_t n)
471 {
472 PetscFunctionBegin;
473 if (!n) PetscFunctionReturn(PETSC_SUCCESS);
474 PetscCheck(dest, PETSC_COMM_SELF, PETSC_ERR_POINTER, "Trying to copy to a NULL pointer");
475 PetscCheck(src, PETSC_COMM_SELF, PETSC_ERR_POINTER, "Trying to copy from a NULL pointer");
476 if (dest == src) PetscFunctionReturn(PETSC_SUCCESS);
477 PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx));
478 {
479 const auto &dest_attr = memory_map.search_for(dest, true)->second;
480 const auto &src_attr = memory_map.search_for(src, true)->second;
481 const auto mode = PetscMemTypeToDeviceCopyMode(dest_attr.mtype, src_attr.mtype);
482
483 PetscCall(PetscDeviceContextMarkIntentFromID(dctx, src_attr.id, PETSC_MEMORY_ACCESS_READ, "memory copy (src)"));
484 PetscCall(PetscDeviceContextMarkIntentFromID(dctx, dest_attr.id, PETSC_MEMORY_ACCESS_WRITE, "memory copy (dest)"));
485 // perform the copy
486 if (dctx->ops->memcopy) {
487 PetscUseTypeMethod(dctx, memcopy, dest, src, n, mode);
488 if (mode == PETSC_DEVICE_COPY_HTOD) {
489 PetscCall(PetscLogCpuToGpu(n));
490 } else if (mode == PETSC_DEVICE_COPY_DTOH) {
491 PetscCall(PetscLogGpuToCpu(n));
492 }
493 } else {
494 // REVIEW ME: we might potentially need to sync here if the memory is device-allocated
495 // (pinned) but being copied by a host dctx
496 PetscCall(PetscDeviceCheckCapable_Private(dctx, mode == PETSC_DEVICE_COPY_HTOH, "copying"));
497 PetscCall(PetscMemcpy(dest, src, n));
498 }
499 }
500 PetscFunctionReturn(PETSC_SUCCESS);
501 }
502
503 // PetscClangLinter pragma disable: -fdoc-section-header-unknown
504 /*@C
505 PetscDeviceMemset - Memset device-aware memory
506
507 Not Collective, Asynchronous, Auto-dependency aware
508
509 Input Parameters:
510 + dctx - The `PetscDeviceContext` used to memset the memory
511 . ptr - The pointer to the memory
512 . v - The value to set
513 - n - The amount (in bytes) to set
514
515 Level: intermediate
516
517 Notes:
518 `ptr` must have been allocated by `PetscDeviceMalloc()` or `PetscDeviceCalloc()`.
519
520 The user should prefer `PetscDeviceArrayZero()` over this routine as it automatically
521 computes the number of bytes to copy from the size of the pointer types, though they should
522 note that it only zeros memory.
523
524 This routine is analogous to `memset()`. That is, this routine copies the value
525 `static_cast<unsigned char>(v)` into each of the first count characters of the object pointed
526 to by `dest`.
527
528 If `dest` is on device, this routine is asynchronous.
529
530 DAG representation:
531 .vb
532 time ->
533
534 -> dctx - |= CALL =| - dctx ->
535 -> dest --------------------->
536 .ve
537
538 .N ASYNC_API
539
540 .seealso: `PetscDeviceArrayZero()`, `PetscDeviceMalloc()`, `PetscDeviceCalloc()`,
541 `PetscDeviceFree()`
542 @*/
PetscDeviceMemset(PetscDeviceContext dctx,void * ptr,PetscInt v,std::size_t n)543 PetscErrorCode PetscDeviceMemset(PetscDeviceContext dctx, void *ptr, PetscInt v, std::size_t n)
544 {
545 PetscFunctionBegin;
546 if (PetscUnlikely(!n)) PetscFunctionReturn(PETSC_SUCCESS);
547 PetscCheck(ptr, PETSC_COMM_SELF, PETSC_ERR_POINTER, "Trying to memset a NULL pointer");
548 PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx));
549 {
550 const auto &attr = memory_map.search_for(ptr, true)->second;
551
552 PetscCall(PetscDeviceContextMarkIntentFromID(dctx, attr.id, PETSC_MEMORY_ACCESS_WRITE, "memory set"));
553 if (dctx->ops->memset) {
554 PetscUseTypeMethod(dctx, memset, attr.mtype, ptr, v, n);
555 } else {
556 // REVIEW ME: we might potentially need to sync here if the memory is device-allocated
557 // (pinned) but being memset by a host dctx
558 PetscCall(PetscDeviceCheckCapable_Private(dctx, PetscMemTypeHost(attr.mtype), "memsetting"));
559 std::memset(ptr, static_cast<int>(v), n);
560 }
561 }
562 PetscFunctionReturn(PETSC_SUCCESS);
563 }
564