xref: /petsc/src/sys/objects/device/impls/cupm/cupmdevice.cxx (revision 2d30e087755efd99e28fdfe792ffbeb2ee1ea928)
1 #include "../../interface/cupmdevice.hpp"
2 #include <algorithm>
3 #include <csetjmp> // for cuda mpi awareness
4 #include <csignal> // SIGSEGV
5 #include <iterator>
6 #include <type_traits>
7 
8 #if PetscDefined(USE_LOG)
9 PETSC_INTERN PetscErrorCode PetscLogInitialize(void);
10 #else
11 #define PetscLogInitialize() 0
12 #endif
13 
14 namespace Petsc {
15 
16 namespace Device {
17 
18 namespace CUPM {
19 
20 // internal "impls" class for CUPMDevice. Each instance represents a single cupm device
21 template <DeviceType T>
22 class Device<T>::DeviceInternal {
23   const int        id_;
24   bool             devInitialized_ = false;
25   cupmDeviceProp_t dprop_; // cudaDeviceProp appears to be an actual struct, i.e. you can't
26                            // initialize it with nullptr or NULL (i've tried)
27 
28   PETSC_CXX_COMPAT_DECL(bool CUPMAwareMPI_());
29 
30 public:
31   // default constructor
32   explicit constexpr DeviceInternal(int dev) noexcept : id_(dev) { }
33 
34   // gather all relevant information for a particular device, a cupmDeviceProp_t is
35   // usually sufficient here
36   PETSC_NODISCARD PetscErrorCode initialize() noexcept;
37   PETSC_NODISCARD PetscErrorCode configure() noexcept;
38   PETSC_NODISCARD PetscErrorCode view(PetscViewer) const noexcept;
39   PETSC_NODISCARD PetscErrorCode getattribute(PetscDeviceAttribute, void *) const noexcept;
40   PETSC_NODISCARD PetscErrorCode finalize() noexcept;
41 
42   PETSC_NODISCARD auto id() const -> decltype(id_) { return id_; }
43   PETSC_NODISCARD auto initialized() const -> decltype(devInitialized_) { return devInitialized_; }
44   PETSC_NODISCARD auto prop() const -> const decltype(dprop_) & { return dprop_; }
45 
46   // factory
47 #if __cplusplus >= 201402L
48   PETSC_CXX_COMPAT_DECL(std::unique_ptr<DeviceInternal> makeDevice(int i)) {
49     return std::make_unique<DeviceInternal>(i);
50   }
51 #else
52   PETSC_CXX_COMPAT_DECL(std::unique_ptr<DeviceInternal> makeDevice(int i)) {
53     return std::unique_ptr<DeviceInternal>(new DeviceInternal(i));
54   }
55 #endif
56 };
57 
58 // the goal here is simply to get the cupm backend to create its context, not to do any type of
59 // modification of it, or create objects (since these may be affected by subsequent
60 // configuration changes)
61 template <DeviceType T>
62 PetscErrorCode Device<T>::DeviceInternal::initialize() noexcept {
63   PetscFunctionBegin;
64   if (devInitialized_) PetscFunctionReturn(0);
65   devInitialized_ = true;
66   // need to do this BEFORE device has been set, although if the user
67   // has already done this then we just ignore it
68   if (cupmSetDeviceFlags(cupmDeviceMapHost) == cupmErrorSetOnActiveProcess) {
69     // reset the error if it was cupmErrorSetOnActiveProcess
70     const auto PETSC_UNUSED unused = cupmGetLastError();
71   } else {
72     PetscCallCUPM(cupmGetLastError());
73   }
74   // cuda 5.0+ will create a context when cupmSetDevice is called
75   if (cupmSetDevice(id_) != cupmErrorDeviceAlreadyInUse) PetscCallCUPM(cupmGetLastError());
76   // forces cuda < 5.0 to initialize a context
77   PetscCallCUPM(cupmFree(nullptr));
78   // where is this variable defined and when is it set? who knows! but it is defined and set
79   // at this point. either way, each device must make this check since I guess MPI might not be
80   // aware of all of them?
81   if (use_gpu_aware_mpi) {
82     // For OpenMPI, we could do a compile time check with
83     // "defined(PETSC_HAVE_OMPI_MAJOR_VERSION) && defined(MPIX_CUDA_AWARE_SUPPORT) &&
84     // MPIX_CUDA_AWARE_SUPPORT" to see if it is CUDA-aware. However, recent versions of IBM
85     // Spectrum MPI (e.g., 10.3.1) on Summit meet above conditions, but one has to use jsrun
86     // --smpiargs=-gpu to really enable GPU-aware MPI. So we do the check at runtime with a
87     // code that works only with GPU-aware MPI.
88     if (PetscUnlikely(!CUPMAwareMPI_())) {
89       (*PetscErrorPrintf)("PETSc is configured with GPU support, but your MPI is not GPU-aware. For better performance, please use a GPU-aware MPI.\n");
90       (*PetscErrorPrintf)("If you do not care, add option -use_gpu_aware_mpi 0. To not see the message again, add the option to your .petscrc, OR add it to the env var PETSC_OPTIONS.\n");
91       (*PetscErrorPrintf)("If you do care, for IBM Spectrum MPI on OLCF Summit, you may need jsrun --smpiargs=-gpu.\n");
92       (*PetscErrorPrintf)("For OpenMPI, you need to configure it --with-cuda (https://www.open-mpi.org/faq/?category=buildcuda)\n");
93       (*PetscErrorPrintf)("For MVAPICH2-GDR, you need to set MV2_USE_CUDA=1 (http://mvapich.cse.ohio-state.edu/userguide/gdr/)\n");
94       (*PetscErrorPrintf)("For Cray-MPICH, you need to set MPICH_RDMA_ENABLED_CUDA=1 (https://www.olcf.ornl.gov/tutorials/gpudirect-mpich-enabled-cuda/)\n");
95       PETSCABORT(PETSC_COMM_SELF, PETSC_ERR_LIB);
96     }
97   }
98   PetscFunctionReturn(0);
99 }
100 
101 template <DeviceType T>
102 PetscErrorCode Device<T>::DeviceInternal::configure() noexcept {
103   PetscFunctionBegin;
104   PetscAssert(devInitialized_, PETSC_COMM_SELF, PETSC_ERR_COR, "Device %d being configured before it was initialized", id_);
105   // why on EARTH nvidia insists on making otherwise informational states into
106   // fully-fledged error codes is beyond me. Why couldn't a pointer to bool argument have
107   // sufficed?!?!?!
108   if (cupmSetDevice(id_) != cupmErrorDeviceAlreadyInUse) PetscCallCUPM(cupmGetLastError());
109   // need to update the device properties
110   PetscCallCUPM(cupmGetDeviceProperties(&dprop_, id_));
111   PetscCall(PetscInfo(nullptr, "Configured device %d\n", id_));
112   PetscFunctionReturn(0);
113 }
114 
115 template <DeviceType T>
116 PetscErrorCode Device<T>::DeviceInternal::view(PetscViewer viewer) const noexcept {
117   PetscBool iascii;
118 
119   PetscFunctionBegin;
120   PetscAssert(devInitialized_, PETSC_COMM_SELF, PETSC_ERR_COR, "Device %d being viewed before it was initialized or configured", id_);
121   PetscCall(PetscObjectTypeCompare(PetscObjectCast(viewer), PETSCVIEWERASCII, &iascii));
122   if (iascii) {
123     MPI_Comm    comm;
124     PetscMPIInt rank;
125     PetscViewer sviewer;
126 
127     PetscCall(PetscObjectGetComm(PetscObjectCast(viewer), &comm));
128     PetscCallMPI(MPI_Comm_rank(comm, &rank));
129     PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
130     PetscCall(PetscViewerASCIIPrintf(sviewer, "[%d] device %d: %s\n", rank, id_, dprop_.name));
131     PetscCall(PetscViewerASCIIPushTab(sviewer));
132     PetscCall(PetscViewerASCIIPrintf(sviewer, "Compute capability: %d.%d\n", dprop_.major, dprop_.minor));
133     PetscCall(PetscViewerASCIIPrintf(sviewer, "Multiprocessor Count: %d\n", dprop_.multiProcessorCount));
134     PetscCall(PetscViewerASCIIPrintf(sviewer, "Maximum Grid Dimensions: %d x %d x %d\n", dprop_.maxGridSize[0], dprop_.maxGridSize[1], dprop_.maxGridSize[2]));
135     PetscCall(PetscViewerASCIIPrintf(sviewer, "Maximum Block Dimensions: %d x %d x %d\n", dprop_.maxThreadsDim[0], dprop_.maxThreadsDim[1], dprop_.maxThreadsDim[2]));
136     PetscCall(PetscViewerASCIIPrintf(sviewer, "Maximum Threads Per Block: %d\n", dprop_.maxThreadsPerBlock));
137     PetscCall(PetscViewerASCIIPrintf(sviewer, "Warp Size: %d\n", dprop_.warpSize));
138     PetscCall(PetscViewerASCIIPrintf(sviewer, "Total Global Memory (bytes): %zu\n", dprop_.totalGlobalMem));
139     PetscCall(PetscViewerASCIIPrintf(sviewer, "Total Constant Memory (bytes): %zu\n", dprop_.totalConstMem));
140     PetscCall(PetscViewerASCIIPrintf(sviewer, "Shared Memory Per Block (bytes): %zu\n", dprop_.sharedMemPerBlock));
141     PetscCall(PetscViewerASCIIPrintf(sviewer, "Multiprocessor Clock Rate (KHz): %d\n", dprop_.clockRate));
142     PetscCall(PetscViewerASCIIPrintf(sviewer, "Memory Clock Rate (KHz): %d\n", dprop_.memoryClockRate));
143     PetscCall(PetscViewerASCIIPrintf(sviewer, "Memory Bus Width (bits): %d\n", dprop_.memoryBusWidth));
144     PetscCall(PetscViewerASCIIPrintf(sviewer, "Peak Memory Bandwidth (GB/s): %f\n", 2.0 * dprop_.memoryClockRate * (dprop_.memoryBusWidth / 8) / 1.0e6));
145     PetscCall(PetscViewerASCIIPrintf(sviewer, "Can map host memory: %s\n", dprop_.canMapHostMemory ? "PETSC_TRUE" : "PETSC_FALSE"));
146     PetscCall(PetscViewerASCIIPrintf(sviewer, "Can execute multiple kernels concurrently: %s\n", dprop_.concurrentKernels ? "PETSC_TRUE" : "PETSC_FALSE"));
147     PetscCall(PetscViewerASCIIPopTab(sviewer));
148     PetscCall(PetscViewerFlush(sviewer));
149     PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
150     PetscCall(PetscViewerFlush(viewer));
151   }
152   PetscFunctionReturn(0);
153 }
154 
155 template <DeviceType T>
156 PetscErrorCode Device<T>::DeviceInternal::getattribute(PetscDeviceAttribute attr, void *value) const noexcept {
157   PetscFunctionBegin;
158   PetscAssert(initialized(), PETSC_COMM_SELF, PETSC_ERR_COR, "Device %d was not initialized", id());
159   switch (attr) {
160   case PETSC_DEVICE_ATTR_SIZE_T_SHARED_MEM_PER_BLOCK: *static_cast<std::size_t *>(value) = prop().sharedMemPerBlock;
161   case PETSC_DEVICE_ATTR_MAX: break;
162   }
163   PetscFunctionReturn(0);
164 }
165 
166 static std::jmp_buf cupmMPIAwareJumpBuffer;
167 static bool         cupmMPIAwareJumpBufferSet;
168 
169 // godspeed to anyone that attempts to call this function
170 void SilenceVariableIsNotNeededAndWillNotBeEmittedWarning_ThisFunctionShouldNeverBeCalled() {
171   PETSCABORT(MPI_COMM_NULL, INT_MAX);
172   if (cupmMPIAwareJumpBufferSet) (void)cupmMPIAwareJumpBuffer;
173 }
174 
175 #define CHKCUPMAWARE(...) \
176   do { \
177     cupmError_t cerr_ = __VA_ARGS__; \
178     if (PetscUnlikely(cerr_ != cupmSuccess)) return false; \
179   } while (0)
180 
181 template <DeviceType T>
182 PETSC_CXX_COMPAT_DEFN(bool Device<T>::DeviceInternal::CUPMAwareMPI_()) {
183   constexpr int  bufSize           = 2;
184   constexpr int  hbuf[bufSize]     = {1, 0};
185   int           *dbuf              = nullptr;
186   constexpr auto bytes             = bufSize * sizeof(*dbuf);
187   auto           awareness         = false;
188   const auto     cupmSignalHandler = [](int signal, void *ptr) -> PetscErrorCode {
189     if ((signal == SIGSEGV) && cupmMPIAwareJumpBufferSet) std::longjmp(cupmMPIAwareJumpBuffer, 1);
190     return PetscSignalHandlerDefault(signal, ptr);
191   };
192 
193   PetscFunctionBegin;
194   CHKCUPMAWARE(cupmMalloc(reinterpret_cast<void **>(&dbuf), bytes));
195   CHKCUPMAWARE(cupmMemcpy(dbuf, hbuf, bytes, cupmMemcpyHostToDevice));
196   PetscCallAbort(PETSC_COMM_SELF, PetscPushSignalHandler(cupmSignalHandler, nullptr));
197   cupmMPIAwareJumpBufferSet = true;
198   if (setjmp(cupmMPIAwareJumpBuffer)) {
199     // if a segv was triggered in the MPI_Allreduce below, it is very likely due to MPI not
200     // being GPU-aware
201     awareness = false;
202     // control flow up until this point:
203     // 1. CUPMDevice<T>::CUPMDeviceInternal::MPICUPMAware__()
204     // 2. MPI_Allreduce
205     // 3. SIGSEGV
206     // 4. PetscSignalHandler_Private
207     // 5. cupmSignalHandler (lambda function)
208     // 6. here
209     // PetscSignalHandler_Private starts with PetscFunctionBegin and is pushed onto the stack
210     // so we must undo this. This would be most naturally done in cupmSignalHandler, however
211     // the C/C++ standard dictates:
212     //
213     // After invoking longjmp(), non-volatile-qualified local objects should not be accessed if
214     // their values could have changed since the invocation of setjmp(). Their value in this
215     // case is considered indeterminate, and accessing them is undefined behavior.
216     //
217     // so for safety (since we don't know what PetscStackPop may try to read/declare) we do it
218     // outside of the longjmp control flow
219     PetscStackPop;
220   } else if (!MPI_Allreduce(dbuf, dbuf + 1, 1, MPI_INT, MPI_SUM, PETSC_COMM_SELF)) awareness = true;
221   cupmMPIAwareJumpBufferSet = false;
222   PetscCallAbort(PETSC_COMM_SELF, PetscPopSignalHandler());
223   CHKCUPMAWARE(cupmFree(dbuf));
224   PetscFunctionReturn(awareness);
225 }
226 
227 #undef CHKCUPMAWARE
228 
229 template <DeviceType T>
230 PetscErrorCode Device<T>::DeviceInternal::finalize() noexcept {
231   PetscFunctionBegin;
232   devInitialized_ = false;
233   PetscFunctionReturn(0);
234 }
235 
236 template <DeviceType T>
237 PetscErrorCode Device<T>::finalize_() noexcept {
238   PetscFunctionBegin;
239   if (!initialized_) PetscFunctionReturn(0);
240   for (auto &&device : devices_) {
241     if (device) {
242       PetscCall(device->finalize());
243       device.reset();
244     }
245   }
246   defaultDevice_ = PETSC_CUPM_DEVICE_NONE; // disabled by default
247   initialized_   = false;
248   PetscFunctionReturn(0);
249 }
250 
251 // these functions should be named identically to the option they produce where "CUPMTYPE" and
252 // "cupmtype" are the uppercase and lowercase string versions of the cupm backend respectively
253 template <DeviceType T>
254 PETSC_CXX_COMPAT_DECL(PETSC_CONSTEXPR_14 const char *PetscDevice_CUPMTYPE_Options()) {
255   switch (T) {
256   case DeviceType::CUDA: return "PetscDevice CUDA Options";
257   case DeviceType::HIP: return "PetscDevice HIP Options";
258   }
259   PetscUnreachable();
260   return "PETSC_ERROR_PLIB";
261 }
262 
263 template <DeviceType T>
264 PETSC_CXX_COMPAT_DECL(PETSC_CONSTEXPR_14 const char *device_enable_cupmtype()) {
265   switch (T) {
266   case DeviceType::CUDA: return "-device_enable_cuda";
267   case DeviceType::HIP: return "-device_enable_hip";
268   }
269   PetscUnreachable();
270   return "PETSC_ERROR_PLIB";
271 }
272 
273 template <DeviceType T>
274 PETSC_CXX_COMPAT_DECL(PETSC_CONSTEXPR_14 const char *device_select_cupmtype()) {
275   switch (T) {
276   case DeviceType::CUDA: return "-device_select_cuda";
277   case DeviceType::HIP: return "-device_select_hip";
278   }
279   PetscUnreachable();
280   return "PETSC_ERROR_PLIB";
281 }
282 
283 template <DeviceType T>
284 PETSC_CXX_COMPAT_DECL(PETSC_CONSTEXPR_14 const char *device_view_cupmtype()) {
285   switch (T) {
286   case DeviceType::CUDA: return "-device_view_cuda";
287   case DeviceType::HIP: return "-device_view_hip";
288   }
289   PetscUnreachable();
290   return "PETSC_ERROR_PLIB";
291 }
292 
293 template <DeviceType T>
294 PETSC_CXX_COMPAT_DECL(PETSC_CONSTEXPR_14 const char *CUPM_VISIBLE_DEVICES()) {
295   switch (T) {
296   case DeviceType::CUDA: return "CUDA_VISIBLE_DEVICES";
297   case DeviceType::HIP: return "HIP_VISIBLE_DEVICES";
298   }
299   PetscUnreachable();
300   return "PETSC_ERROR_PLIB";
301 }
302 
303 template <DeviceType T>
304 PetscErrorCode Device<T>::initialize(MPI_Comm comm, PetscInt *defaultDeviceId, PetscDeviceInitType *defaultInitType) noexcept {
305   PetscInt  initTypeCUPM = *defaultInitType, id = *defaultDeviceId;
306   PetscBool view = PETSC_FALSE, flg;
307   int       ndev = 0;
308 
309   PetscFunctionBegin;
310   if (initialized_) PetscFunctionReturn(0);
311   initialized_ = true;
312   PetscCall(PetscRegisterFinalize(finalize_));
313 
314   {
315     // the functions to populate the command line strings are named after the string they return
316     PetscOptionsBegin(comm, nullptr, PetscDevice_CUPMTYPE_Options<T>(), "Sys");
317     PetscCall(PetscOptionsEList(device_enable_cupmtype<T>(), "How (or whether) to initialize a device", "CUPMDevice<CUPMDeviceType>::initialize()", PetscDeviceInitTypes, 3, PetscDeviceInitTypes[initTypeCUPM], &initTypeCUPM, nullptr));
318     PetscCall(PetscOptionsRangeInt(device_select_cupmtype<T>(), "Which device to use. Pass " PetscStringize(PETSC_DECIDE) " to have PETSc decide or (given they exist) [0-NUM_DEVICE) for a specific device", "PetscDeviceCreate", id, &id, nullptr, PETSC_DECIDE, std::numeric_limits<decltype(defaultDevice_)>::max()));
319     PetscCall(PetscOptionsBool(device_view_cupmtype<T>(), "Display device information and assignments (forces eager initialization)", nullptr, view, &view, &flg));
320     PetscOptionsEnd();
321   }
322 
323   if (initTypeCUPM == PETSC_DEVICE_INIT_NONE) {
324     id = PETSC_CUPM_DEVICE_NONE;
325   } else if (auto cerr = cupmGetDeviceCount(&ndev)) {
326     auto PETSC_UNUSED ignored = cupmGetLastError();
327     // we won't be initializing anything anyways
328     initTypeCUPM              = PETSC_DEVICE_INIT_NONE;
329     // save the error code for later
330     id                        = -static_cast<decltype(id)>(cerr);
331 
332     if (PetscUnlikely((initTypeCUPM == PETSC_DEVICE_INIT_EAGER) || (view && flg))) {
333       const auto name    = cupmGetErrorName(cerr);
334       const auto desc    = cupmGetErrorString(cerr);
335       const auto backend = cupmName();
336       SETERRQ(comm, PETSC_ERR_USER_INPUT, "Cannot eagerly initialize %s, as doing so results in %s error %d (%s) : %s", backend, backend, static_cast<PetscErrorCode>(cerr), name, desc);
337     }
338   }
339 
340   // check again for init type, since the device count may have changed it
341   if (initTypeCUPM == PETSC_DEVICE_INIT_NONE) {
342     // id < 0 (excluding PETSC_DECIDE) indicates an error has occurred during setup
343     if ((id > 0) || (id == PETSC_DECIDE)) id = PETSC_CUPM_DEVICE_NONE;
344   } else {
345     PetscCall(PetscDeviceCheckDeviceCount_Internal(ndev));
346     if (id == PETSC_DECIDE) {
347       if (ndev) {
348         PetscMPIInt rank;
349 
350         PetscCallMPI(MPI_Comm_rank(comm, &rank));
351         id = rank % ndev;
352       } else id = 0;
353     }
354     view = static_cast<decltype(view)>(view && flg);
355     if (view) initTypeCUPM = PETSC_DEVICE_INIT_EAGER;
356   }
357 
358   static_assert(std::is_same<PetscMPIInt, decltype(defaultDevice_)>::value, "");
359   // id is PetscInt, _defaultDevice is int
360   PetscCall(PetscMPIIntCast(id, &defaultDevice_));
361   if (initTypeCUPM == PETSC_DEVICE_INIT_EAGER) {
362     devices_[defaultDevice_] = DeviceInternal::makeDevice(defaultDevice_);
363     PetscCall(devices_[defaultDevice_]->initialize());
364     PetscCall(devices_[defaultDevice_]->configure());
365     if (view) {
366       PetscViewer vwr;
367 
368       PetscCall(PetscLogInitialize());
369       PetscCall(PetscViewerASCIIGetStdout(comm, &vwr));
370       PetscCall(devices_[defaultDevice_]->view(vwr));
371     }
372   }
373 
374   // record the results of the initialization
375   *defaultInitType = static_cast<PetscDeviceInitType>(initTypeCUPM);
376   *defaultDeviceId = id;
377   PetscFunctionReturn(0);
378 }
379 
380 template <DeviceType T>
381 PetscErrorCode Device<T>::getDevice(PetscDevice device, PetscInt id) const noexcept {
382   const auto cerr = static_cast<cupmError_t>(-defaultDevice_);
383 
384   PetscFunctionBegin;
385   PetscCheck(defaultDevice_ != PETSC_CUPM_DEVICE_NONE, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Trying to retrieve a %s PetscDevice when it has been disabled", cupmName());
386   PetscCheck(defaultDevice_ >= 0, PETSC_COMM_SELF, PETSC_ERR_GPU, "Cannot lazily initialize PetscDevice: %s error %d (%s) : %s", cupmName(), static_cast<PetscErrorCode>(cerr), cupmGetErrorName(cerr), cupmGetErrorString(cerr));
387   if (id == PETSC_DECIDE) id = defaultDevice_;
388   PetscAssert(static_cast<decltype(devices_.size())>(id) < devices_.size(), PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Only supports %zu number of devices but trying to get device with id %" PetscInt_FMT, devices_.size(), id);
389   if (devices_[id]) {
390     PetscAssert(id == devices_[id]->id(), PETSC_COMM_SELF, PETSC_ERR_PLIB, "Entry %" PetscInt_FMT " contains device with mismatching id %d", id, devices_[id]->id());
391   } else devices_[id] = DeviceInternal::makeDevice(id);
392   PetscCall(devices_[id]->initialize());
393   device->deviceId           = devices_[id]->id(); // technically id = _devices[id]->_id here
394   device->ops->createcontext = create_;
395   device->ops->configure     = this->configureDevice;
396   device->ops->view          = this->viewDevice;
397   device->ops->getattribute  = this->getAttribute;
398   PetscFunctionReturn(0);
399 }
400 
401 template <DeviceType T>
402 PetscErrorCode Device<T>::configureDevice(PetscDevice device) noexcept {
403   PetscFunctionBegin;
404   PetscCall(devices_[device->deviceId]->configure());
405   PetscFunctionReturn(0);
406 }
407 
408 template <DeviceType T>
409 PetscErrorCode Device<T>::viewDevice(PetscDevice device, PetscViewer viewer) noexcept {
410   PetscFunctionBegin;
411   // now this __shouldn't__ reconfigure the device, but there is a petscinfo call to indicate
412   // it is being reconfigured
413   PetscCall(devices_[device->deviceId]->configure());
414   PetscCall(devices_[device->deviceId]->view(viewer));
415   PetscFunctionReturn(0);
416 }
417 
418 template <DeviceType T>
419 PetscErrorCode Device<T>::getAttribute(PetscDevice device, PetscDeviceAttribute attr, void *value) noexcept {
420   PetscFunctionBegin;
421   PetscCall(devices_[device->deviceId]->getattribute(attr, value));
422   PetscFunctionReturn(0);
423 }
424 
425 // explicitly instantiate the classes
426 #if PetscDefined(HAVE_CUDA)
427 template class Device<DeviceType::CUDA>;
428 #endif
429 #if PetscDefined(HAVE_HIP)
430 template class Device<DeviceType::HIP>;
431 #endif
432 
433 } // namespace CUPM
434 
435 } // namespace Device
436 
437 } // namespace Petsc
438