xref: /petsc/src/sys/objects/device/interface/device.cxx (revision d756bedd70a89ca052be956bccd75c5761cb2ab4)
1 #include "petscdevice_interface_internal.hpp" /*I <petscdevice.h> I*/
2 #include <petsc/private/petscadvancedmacros.h>
3 
4 #include <petsc/private/cpp/register_finalize.hpp>
5 
6 #include "../impls/host/hostdevice.hpp"
7 #if PetscDefined(HAVE_CUPM)
8   #include "../impls/cupm/cupmdevice.hpp"
9 #endif
10 #if PetscDefined(HAVE_SYCL)
11   #include "../impls/sycl/sycldevice.hpp"
12 #endif
13 
14 #include <utility> // std::make_pair
15 
16 using namespace Petsc::device;
17 
18 #if defined(PETSC_HAVE_CUPM)
19 int PetscDeviceCUPMRuntimeArch = 0;
20 #endif
21 
22 namespace
23 {
24 
25 /*
26   note to anyone adding more classes, the name must be ALL_CAPS_SHORT_NAME + Device exactly to
27   be picked up by the switch-case macros below
28 */
29 host::Device HOSTDevice{PetscDeviceContextCreate_HOST};
30 #if PetscDefined(HAVE_CUDA)
31 cupm::Device<cupm::DeviceType::CUDA> CUDADevice{PetscDeviceContextCreate_CUDA};
32 #endif
33 #if PetscDefined(HAVE_HIP)
34 cupm::Device<cupm::DeviceType::HIP> HIPDevice{PetscDeviceContextCreate_HIP};
35 #endif
36 #if PetscDefined(HAVE_SYCL)
37 sycl::Device SYCLDevice{PetscDeviceContextCreate_SYCL};
38 #endif
39 
40 } // namespace
41 
42 #define PETSC_DEVICE_CASE(IMPLS, func, ...) \
43   case PetscConcat_(PETSC_DEVICE_, IMPLS): { \
44     PetscCall(PetscConcat_(IMPLS, Device).func(__VA_ARGS__)); \
45   } break
46 
47 #define PETSC_VOID_0(...) ((void)0)
48 
49 /*
50   Suppose you have:
51 
52   CUDADevice.myFunction(arg1,arg2)
53 
54   that you would like to conditionally define and call in a switch-case:
55 
56   switch(PetscDeviceType) {
57   #if PetscDefined(HAVE_CUDA)
58   case PETSC_DEVICE_CUDA: {
59     PetscCall(CUDADevice.myFunction(arg1,arg2));
60   } break;
61   #endif
62   }
63 
64   then calling this macro:
65 
66   PETSC_DEVICE_CASE_IF_PETSC_DEFINED(CUDA,myFunction,arg1,arg2)
67 
68   will expand to the following case statement:
69 
70   case PETSC_DEVICE_CUDA: {
71     PetscCall(CUDADevice.myFunction(arg1,arg2));
72   } break
73 
74   if PetscDefined(HAVE_CUDA) evaluates to 1, and expand to nothing otherwise
75 */
76 #define PETSC_DEVICE_CASE_IF_PETSC_DEFINED(IMPLS, func, ...) PetscIfPetscDefined(PetscConcat_(HAVE_, IMPLS), PETSC_DEVICE_CASE, PETSC_VOID_0)(IMPLS, func, __VA_ARGS__)
77 
78 /*@C
79   PetscDeviceCreate - Get a new handle for a particular device (often a GPU) type
80 
81   Not Collective
82 
83   Input Parameters:
84 + type  - The type of `PetscDevice`
85 - devid - The numeric ID# of the device (pass `PETSC_DECIDE` to assign automatically)
86 
87   Output Parameter:
88 . device - The `PetscDevice`
89 
90   Level: beginner
91 
92   Notes:
93   This routine may initialize `PetscDevice`. If this is the case, it may cause some sort of
94   device synchronization.
95 
96   `devid` is what you might pass to `cudaSetDevice()` for example.
97 
98 .seealso: `PetscDevice`, `PetscDeviceInitType`,
99 `PetscDeviceInitialize()`, `PetscDeviceInitialized()`, `PetscDeviceConfigure()`,
100 `PetscDeviceView()`, `PetscDeviceDestroy()`
101 @*/
102 PetscErrorCode PetscDeviceCreate(PetscDeviceType type, PetscInt devid, PetscDevice *device)
103 {
104   static PetscInt PetscDeviceCounter = 0;
105 
106   PetscFunctionBegin;
107   PetscValidDeviceType(type, 1);
108   PetscAssertPointer(device, 3);
109   PetscCall(PetscDeviceInitializePackage());
110 
111   PetscCall(PetscNew(device));
112   (*device)->id     = PetscDeviceCounter++;
113   (*device)->type   = type;
114   (*device)->refcnt = 1;
115   /*
116     if you are adding a device, you also need to add its initialization in
117     PetscDeviceInitializeTypeFromOptions_Private() below
118   */
119   switch (type) {
120     PETSC_DEVICE_CASE_IF_PETSC_DEFINED(HOST, getDevice, *device, devid);
121     PETSC_DEVICE_CASE_IF_PETSC_DEFINED(CUDA, getDevice, *device, devid);
122     PETSC_DEVICE_CASE_IF_PETSC_DEFINED(HIP, getDevice, *device, devid);
123     PETSC_DEVICE_CASE_IF_PETSC_DEFINED(SYCL, getDevice, *device, devid);
124   default:
125     /* in case the above macros expand to nothing this silences any unused variable warnings */
126     (void)devid;
127     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_PLIB, "PETSc was seemingly configured for PetscDeviceType %s but we've fallen through all cases in a switch", PetscDeviceTypes[type]);
128   }
129   PetscFunctionReturn(PETSC_SUCCESS);
130 }
131 
132 /*@C
133   PetscDeviceDestroy - Free a `PetscDevice`
134 
135   Not Collective
136 
137   Input Parameter:
138 . device - The `PetscDevice`
139 
140   Level: beginner
141 
142 .seealso: `PetscDevice`, `PetscDeviceCreate()`, `PetscDeviceConfigure()`, `PetscDeviceView()`,
143 `PetscDeviceGetType()`, `PetscDeviceGetDeviceId()`
144 @*/
145 PetscErrorCode PetscDeviceDestroy(PetscDevice *device)
146 {
147   PetscFunctionBegin;
148   PetscAssertPointer(device, 1);
149   if (!*device) PetscFunctionReturn(PETSC_SUCCESS);
150   PetscValidDevice(*device, 1);
151   PetscCall(PetscDeviceDereference_Internal(*device));
152   if ((*device)->refcnt) {
153     *device = nullptr;
154     PetscFunctionReturn(PETSC_SUCCESS);
155   }
156   PetscCall(PetscFree((*device)->data));
157   PetscCall(PetscFree(*device));
158   PetscFunctionReturn(PETSC_SUCCESS);
159 }
160 
161 /*@C
162   PetscDeviceConfigure - Configure a particular `PetscDevice`
163 
164   Not Collective
165 
166   Input Parameter:
167 . device - The `PetscDevice` to configure
168 
169   Level: beginner
170 
171   Notes:
172   The user should not assume that this is a cheap operation.
173 
174 .seealso: `PetscDevice`, `PetscDeviceCreate()`, `PetscDeviceView()`, `PetscDeviceDestroy()`,
175 `PetscDeviceGetType()`, `PetscDeviceGetDeviceId()`
176 @*/
177 PetscErrorCode PetscDeviceConfigure(PetscDevice device)
178 {
179   PetscFunctionBegin;
180   PetscValidDevice(device, 1);
181   /*
182     if no available configuration is available, this cascades all the way down to default
183     and error
184   */
185   switch (const auto dtype = device->type) {
186   case PETSC_DEVICE_HOST:
187     if (PetscDefined(HAVE_HOST)) break; // always true
188   case PETSC_DEVICE_CUDA:
189     if (PetscDefined(HAVE_CUDA)) break;
190     goto error;
191   case PETSC_DEVICE_HIP:
192     if (PetscDefined(HAVE_HIP)) break;
193     goto error;
194   case PETSC_DEVICE_SYCL:
195     if (PetscDefined(HAVE_SYCL)) break;
196     goto error;
197   default:
198   error:
199     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "PETSc was not configured for PetscDeviceType %s", PetscDeviceTypes[dtype]);
200   }
201   PetscUseTypeMethod(device, configure);
202   PetscFunctionReturn(PETSC_SUCCESS);
203 }
204 
205 /*@
206   PetscDeviceView - View a `PetscDevice`
207 
208   Collective on viewer
209 
210   Input Parameters:
211 + device - The `PetscDevice` to view
212 - viewer - The `PetscViewer` to view the device with (`NULL` for `PETSC_VIEWER_STDOUT_WORLD`)
213 
214   Level: beginner
215 
216 .seealso: `PetscDevice`, `PetscDeviceCreate()`, `PetscDeviceConfigure()`,
217 `PetscDeviceDestroy()`, `PetscDeviceGetType()`, `PetscDeviceGetDeviceId()`
218 @*/
219 PetscErrorCode PetscDeviceView(PetscDevice device, PetscViewer viewer)
220 {
221   auto      sub = viewer;
222   PetscBool isascii;
223 
224   PetscFunctionBegin;
225   PetscValidDevice(device, 1);
226   if (viewer) {
227     PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2);
228     PetscCall(PetscObjectTypeCompare(PetscObjectCast(viewer), PETSCVIEWERASCII, &isascii));
229   } else {
230     PetscCall(PetscViewerASCIIGetStdout(PETSC_COMM_WORLD, &viewer));
231     isascii = PETSC_TRUE;
232   }
233 
234   if (isascii) {
235     auto        dtype = PETSC_DEVICE_HOST;
236     MPI_Comm    comm;
237     PetscMPIInt size;
238     PetscInt    id = 0;
239 
240     PetscCall(PetscObjectGetComm(PetscObjectCast(viewer), &comm));
241     PetscCallMPI(MPI_Comm_size(comm, &size));
242 
243     PetscCall(PetscDeviceGetDeviceId(device, &id));
244     PetscCall(PetscDeviceGetType(device, &dtype));
245     PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sub));
246     PetscCall(PetscViewerASCIIPrintf(sub, "PetscDevice Object: %d MPI %s\n", size, size == 1 ? "process" : "processes"));
247     PetscCall(PetscViewerASCIIPushTab(sub));
248     PetscCall(PetscViewerASCIIPrintf(sub, "type: %s\n", PetscDeviceTypes[dtype]));
249     PetscCall(PetscViewerASCIIPrintf(sub, "id: %" PetscInt_FMT "\n", id));
250   }
251 
252   // see if impls has extra viewer stuff
253   PetscTryTypeMethod(device, view, sub);
254 
255   if (isascii) {
256     // undo the ASCII specific stuff
257     PetscCall(PetscViewerASCIIPopTab(sub));
258     PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sub));
259   }
260   PetscFunctionReturn(PETSC_SUCCESS);
261 }
262 
263 /*@
264   PetscDeviceGetType - Get the type of device
265 
266   Not Collective
267 
268   Input Parameter:
269 . device - The `PetscDevice`
270 
271   Output Parameter:
272 . type - The `PetscDeviceType`
273 
274   Level: beginner
275 
276 .seealso: `PetscDevice`, `PetscDeviceType`, `PetscDeviceSetDefaultDeviceType()`,
277 `PetscDeviceCreate()`, `PetscDeviceConfigure()`, `PetscDeviceDestroy()`,
278 `PetscDeviceGetDeviceId()`, `PETSC_DEVICE_DEFAULT()`
279 @*/
280 PetscErrorCode PetscDeviceGetType(PetscDevice device, PetscDeviceType *type)
281 {
282   PetscFunctionBegin;
283   PetscValidDevice(device, 1);
284   PetscAssertPointer(type, 2);
285   *type = device->type;
286   PetscFunctionReturn(PETSC_SUCCESS);
287 }
288 
289 /*@C
290   PetscDeviceGetDeviceId - Get the device ID for a `PetscDevice`
291 
292   Not Collective
293 
294   Input Parameter:
295 . device - The `PetscDevice`
296 
297   Output Parameter:
298 . id - The id
299 
300   Level: beginner
301 
302   Notes:
303   The returned ID may have been assigned by the underlying device backend. For example if the
304   backend is CUDA then `id` is exactly the value returned by `cudaGetDevice()` at the time when
305   this device was configured.
306 
307 .seealso: `PetscDevice`, `PetscDeviceCreate()`, `PetscDeviceGetType()`
308 @*/
309 PetscErrorCode PetscDeviceGetDeviceId(PetscDevice device, PetscInt *id)
310 {
311   PetscFunctionBegin;
312   PetscValidDevice(device, 1);
313   PetscAssertPointer(id, 2);
314   *id = device->deviceId;
315   PetscFunctionReturn(PETSC_SUCCESS);
316 }
317 
318 namespace
319 {
320 
321 struct DefaultDeviceType : public Petsc::RegisterFinalizeable<DefaultDeviceType> {
322   PetscDeviceType type = PETSC_DEVICE_HARDWARE_DEFAULT_TYPE;
323 
324   PetscErrorCode finalize_() noexcept
325   {
326     PetscFunctionBegin;
327     type = PETSC_DEVICE_HARDWARE_DEFAULT_TYPE;
328     PetscFunctionReturn(PETSC_SUCCESS);
329   }
330 };
331 
332 auto default_device_type = DefaultDeviceType();
333 
334 } // namespace
335 
336 /*@C
337   PETSC_DEVICE_DEFAULT - Retrieve the current default `PetscDeviceType`
338 
339   Not Collective
340 
341   Level: beginner
342 
343   Notes:
344   Unless selected by the user, the default device is selected in the following order\:
345   `PETSC_DEVICE_HIP`, `PETSC_DEVICE_CUDA`, `PETSC_DEVICE_SYCL`, `PETSC_DEVICE_HOST`.
346 
347 .seealso: `PetscDeviceType`, `PetscDeviceSetDefaultDeviceType()`, `PetscDeviceGetType()`
348 @*/
349 PetscDeviceType PETSC_DEVICE_DEFAULT(void)
350 {
351   return default_device_type.type;
352 }
353 
354 /*@C
355   PetscDeviceSetDefaultDeviceType - Set the default device type for `PetscDevice`
356 
357   Not Collective
358 
359   Input Parameter:
360 . type - the new default device type
361 
362   Level: beginner
363 
364   Notes:
365   This sets the `PetscDeviceType` returned by `PETSC_DEVICE_DEFAULT()`.
366 
367 .seealso: `PetscDeviceType`, `PetscDeviceGetType`,
368 @*/
369 PetscErrorCode PetscDeviceSetDefaultDeviceType(PetscDeviceType type)
370 {
371   PetscFunctionBegin;
372   PetscValidDeviceType(type, 1);
373   if (default_device_type.type != type) {
374     // no need to waste a PetscRegisterFinalize() slot if we don't change it
375     default_device_type.type = type;
376     PetscCall(default_device_type.register_finalize());
377   }
378   PetscFunctionReturn(PETSC_SUCCESS);
379 }
380 
381 namespace
382 {
383 
384 std::array<std::pair<PetscDevice, bool>, PETSC_DEVICE_MAX> defaultDevices = {};
385 
386 /*
387   Actual initialization function; any functions claiming to initialize PetscDevice or
388   PetscDeviceContext will have to run through this one
389 */
390 PetscErrorCode PetscDeviceInitializeDefaultDevice_Internal(PetscDeviceType type, PetscInt defaultDeviceId)
391 {
392   PetscFunctionBegin;
393   PetscValidDeviceType(type, 1);
394   if (PetscUnlikely(!PetscDeviceInitialized(type))) {
395     auto &dev  = defaultDevices[type].first;
396     auto &init = defaultDevices[type].second;
397 
398     PetscAssert(!dev, PETSC_COMM_SELF, PETSC_ERR_MEM, "Trying to overwrite existing default device of type %s", PetscDeviceTypes[type]);
399     PetscCall(PetscDeviceCreate(type, defaultDeviceId, &dev));
400     PetscCall(PetscDeviceConfigure(dev));
401     init = true;
402   }
403   PetscFunctionReturn(PETSC_SUCCESS);
404 }
405 
406 } // namespace
407 
408 /*@C
409   PetscDeviceInitialize - Initialize `PetscDevice`
410 
411   Not Collective
412 
413   Input Parameter:
414 . type - The `PetscDeviceType` to initialize
415 
416   Level: beginner
417 
418   Notes:
419   Eagerly initializes the corresponding `PetscDeviceType` if needed. If this is the case it may
420   result in device synchronization.
421 
422 .seealso: `PetscDevice`, `PetscDeviceInitType`, `PetscDeviceInitialized()`,
423 `PetscDeviceCreate()`, `PetscDeviceDestroy()`
424 @*/
425 PetscErrorCode PetscDeviceInitialize(PetscDeviceType type)
426 {
427   PetscFunctionBegin;
428   PetscValidDeviceType(type, 1);
429   PetscCall(PetscDeviceInitializeDefaultDevice_Internal(type, PETSC_DECIDE));
430   PetscFunctionReturn(PETSC_SUCCESS);
431 }
432 
433 /*@C
434   PetscDeviceInitialized - Determines whether `PetscDevice` is initialized for a particular
435   `PetscDeviceType`
436 
437   Not Collective
438 
439   Input Parameter:
440 . type - The `PetscDeviceType` to check
441 
442   Level: beginner
443 
444   Notes:
445   Returns `PETSC_TRUE` if `type` is initialized, `PETSC_FALSE` otherwise.
446 
447   If one has not configured PETSc for a particular `PetscDeviceType` then this routine will
448   return `PETSC_FALSE` for that `PetscDeviceType`.
449 
450 .seealso: `PetscDevice`, `PetscDeviceInitType`, `PetscDeviceInitialize()`,
451 `PetscDeviceCreate()`, `PetscDeviceDestroy()`
452 @*/
453 PetscBool PetscDeviceInitialized(PetscDeviceType type)
454 {
455   return static_cast<PetscBool>(PetscDeviceConfiguredFor_Internal(type) && defaultDevices[type].second);
456 }
457 
458 /* Get the default PetscDevice for a particular type and constructs them if lazily initialized. */
459 PetscErrorCode PetscDeviceGetDefaultForType_Internal(PetscDeviceType type, PetscDevice *device)
460 {
461   PetscFunctionBegin;
462   PetscAssertPointer(device, 2);
463   PetscCall(PetscDeviceInitialize(type));
464   *device = defaultDevices[type].first;
465   PetscFunctionReturn(PETSC_SUCCESS);
466 }
467 
468 /*@C
469   PetscDeviceGetAttribute - Query a particular attribute of a `PetscDevice`
470 
471   Not Collective
472 
473   Input Parameters:
474 + device - The `PetscDevice`
475 - attr   - The attribute
476 
477   Output Parameter:
478 . value - The value of the attribute
479 
480   Level: intermediate
481 
482   Notes:
483   Since different attributes are often different types `value` is a `void *` to accommodate
484   them all. The underlying type of the attribute is therefore included in the name of the
485   `PetscDeviceAttribute` responsible for querying it. For example,
486   `PETSC_DEVICE_ATTR_SIZE_T_SHARED_MEM_PER_BLOCK` is of type `size_t`.
487 
488 .seealso: `PetscDeviceAtrtibute`, `PetscDeviceConfigure()`, `PetscDevice`
489 @*/
490 PetscErrorCode PetscDeviceGetAttribute(PetscDevice device, PetscDeviceAttribute attr, void *value)
491 {
492   PetscFunctionBegin;
493   PetscValidDevice(device, 1);
494   PetscValidDeviceAttribute(attr, 2);
495   PetscAssertPointer(value, 3);
496   PetscUseTypeMethod(device, getattribute, attr, value);
497   PetscFunctionReturn(PETSC_SUCCESS);
498 }
499 
500 namespace
501 {
502 
503 PetscErrorCode PetscDeviceInitializeTypeFromOptions_Private(MPI_Comm comm, PetscDeviceType type, PetscInt defaultDeviceId, PetscBool defaultView, PetscDeviceInitType *defaultInitType)
504 {
505   PetscFunctionBegin;
506   if (!PetscDeviceConfiguredFor_Internal(type)) {
507     PetscCall(PetscInfo(nullptr, "PetscDeviceType %s not available\n", PetscDeviceTypes[type]));
508     defaultDevices[type].first = nullptr;
509     PetscFunctionReturn(PETSC_SUCCESS);
510   }
511   PetscCall(PetscInfo(nullptr, "PetscDeviceType %s available, initializing\n", PetscDeviceTypes[type]));
512   /* ugly switch needed to pick the right global variable... could maybe do this as a union? */
513   switch (type) {
514     PETSC_DEVICE_CASE_IF_PETSC_DEFINED(HOST, initialize, comm, &defaultDeviceId, &defaultView, defaultInitType);
515     PETSC_DEVICE_CASE_IF_PETSC_DEFINED(CUDA, initialize, comm, &defaultDeviceId, &defaultView, defaultInitType);
516     PETSC_DEVICE_CASE_IF_PETSC_DEFINED(HIP, initialize, comm, &defaultDeviceId, &defaultView, defaultInitType);
517     PETSC_DEVICE_CASE_IF_PETSC_DEFINED(SYCL, initialize, comm, &defaultDeviceId, &defaultView, defaultInitType);
518   default:
519     SETERRQ(comm, PETSC_ERR_PLIB, "PETSc was seemingly configured for PetscDeviceType %s but we've fallen through all cases in a switch", PetscDeviceTypes[type]);
520   }
521   PetscCall(PetscInfo(nullptr, "PetscDevice %s initialized, default device id %" PetscInt_FMT ", view %s, init type %s\n", PetscDeviceTypes[type], defaultDeviceId, PetscBools[defaultView], PetscDeviceInitTypes[Petsc::util::to_underlying(*defaultInitType)]));
522   /*
523     defaultInitType, defaultView  and defaultDeviceId now represent what the individual TYPES
524     have decided to initialize as
525   */
526   if ((*defaultInitType == PETSC_DEVICE_INIT_EAGER) || defaultView) {
527     PetscCall(PetscInfo(nullptr, "Eagerly initializing %s PetscDevice\n", PetscDeviceTypes[type]));
528     PetscCall(PetscDeviceInitializeDefaultDevice_Internal(type, defaultDeviceId));
529     if (defaultView) PetscCall(PetscDeviceView(defaultDevices[type].first, nullptr));
530   }
531   PetscFunctionReturn(PETSC_SUCCESS);
532 }
533 
534 PetscErrorCode PetscDeviceInitializeQueryOptions_Private(MPI_Comm comm, PetscDeviceType *deviceContextInitDevice, PetscDeviceInitType *defaultInitType, PetscInt *defaultDeviceId, PetscBool *defaultDeviceIdSet, PetscBool *defaultView)
535 {
536   PetscInt initIdx       = PETSC_DEVICE_INIT_LAZY;
537   auto     initDeviceIdx = static_cast<PetscInt>(*deviceContextInitDevice);
538   auto     flg           = PETSC_FALSE;
539 
540   PetscFunctionBegin;
541   PetscCall(PetscOptionsHasName(nullptr, nullptr, "-log_view_gpu_time", &flg));
542   if (flg) PetscCall(PetscLogGpuTime());
543   PetscCall(PetscOptionsHasName(nullptr, nullptr, "-log_view_gpu_energy_meter", &flg));
544   if (flg) PetscCall(PetscLogGpuEnergyMeter());
545   PetscCall(PetscOptionsHasName(nullptr, nullptr, "-log_view_gpu_energy", &flg));
546   if (flg) PetscCall(PetscLogGpuEnergy());
547 
548   PetscOptionsBegin(comm, nullptr, "PetscDevice Options", "Sys");
549   PetscCall(PetscOptionsEList("-device_enable", "How (or whether) to initialize PetscDevices", "PetscDeviceInitialize()", PetscDeviceInitTypes, 3, PetscDeviceInitTypes[initIdx], &initIdx, nullptr));
550   PetscCall(PetscOptionsEList("-default_device_type", "Set the PetscDeviceType returned by PETSC_DEVICE_DEFAULT()", "PetscDeviceSetDefaultDeviceType()", PetscDeviceTypes, PETSC_DEVICE_MAX, PetscDeviceTypes[initDeviceIdx], &initDeviceIdx, defaultDeviceIdSet));
551   PetscCall(PetscOptionsRangeInt("-device_select", "Which device to use. Pass " PetscStringize(PETSC_DECIDE) " to have PETSc decide or (given they exist) [0-" PetscStringize(PETSC_DEVICE_MAX_DEVICES) ") for a specific device", "PetscDeviceCreate()", *defaultDeviceId, defaultDeviceId, nullptr, PETSC_DECIDE, PETSC_DEVICE_MAX_DEVICES));
552   PetscCall(PetscOptionsBool("-device_view", "Display device information and assignments (forces eager initialization)", "PetscDeviceView()", *defaultView, defaultView, &flg));
553   PetscOptionsEnd();
554 
555   if (initIdx == PETSC_DEVICE_INIT_NONE) {
556     /* disabled all device initialization if devices are globally disabled */
557     PetscCheck(*defaultDeviceId == PETSC_DECIDE, comm, PETSC_ERR_USER_INPUT, "You have disabled devices but also specified a particular device to use, these options are mutually exclusive");
558     *defaultView  = PETSC_FALSE;
559     initDeviceIdx = PETSC_DEVICE_HOST;
560   } else {
561     *defaultView = static_cast<PetscBool>(*defaultView && flg);
562     if (*defaultView) initIdx = PETSC_DEVICE_INIT_EAGER;
563   }
564   *defaultInitType         = PetscDeviceInitTypeCast(initIdx);
565   *deviceContextInitDevice = PetscDeviceTypeCast(initDeviceIdx);
566   PetscFunctionReturn(PETSC_SUCCESS);
567 }
568 
569 /* called from PetscFinalize() do not call yourself! */
570 PetscErrorCode PetscDeviceFinalize_Private()
571 {
572   PetscFunctionBegin;
573   if (PetscDefined(USE_DEBUG)) {
574     /*
575       you might be thinking, why on earth are you registered yet another finalizer in a
576       function already called during PetscRegisterFinalizeAll()? If this seems stupid it's
577       because it is.
578 
579       The crux of the problem is that the initializer (and therefore the ~finalizer~) of
580       PetscDeviceContext is guaranteed to run after PetscDevice's. So if the global context had
581       a default PetscDevice attached, that PetscDevice will have a reference count >0 and hence
582       won't be destroyed yet. So we need to repeat the check that all devices have been
583       destroyed again ~after~ the global context is destroyed. In summary:
584 
585       1. This finalizer runs and destroys all devices, except it may not because the global
586          context may still hold a reference!
587       2. The global context finalizer runs and does the final reference count decrement
588          required, which actually destroys the held device.
589       3. Our newly added finalizer runs and checks that all is well.
590     */
591     PetscCall(PetscRegisterFinalize([] {
592       PetscFunctionBegin;
593       for (auto &&device : defaultDevices) {
594         const auto dev = device.first;
595 
596         PetscCheck(!dev, PETSC_COMM_WORLD, PETSC_ERR_COR, "Device of type '%s' had reference count %" PetscInt_FMT " and was not fully destroyed during PetscFinalize()", PetscDeviceTypes[dev->type], dev->refcnt);
597       }
598       PetscFunctionReturn(PETSC_SUCCESS);
599     }));
600   }
601   for (auto &&device : defaultDevices) {
602     PetscCall(PetscDeviceDestroy(&device.first));
603     device.second = false;
604   }
605   PetscFunctionReturn(PETSC_SUCCESS);
606 }
607 
608 } // namespace
609 
610 /*
611   Begins the init proceeedings for the entire PetscDevice stack. there are 3 stages of
612   initialization types:
613 
614   1. defaultInitType - how does PetscDevice as a whole expect to initialize?
615   2. subTypeDefaultInitType - how does each PetscDevice implementation expect to initialize?
616      e.g. you may want to blanket disable PetscDevice init (and disable say Kokkos init), but
617      have all CUDA devices still initialize.
618 
619   All told the following happens:
620 
621   0. defaultInitType -> LAZY
622   1. Check for log_view/log_summary, if yes defaultInitType -> EAGER
623   2. PetscDevice initializes each sub type with deviceDefaultInitType.
624   2.1 Each enabled PetscDevice sub-type then does the above disable or view check in addition
625       to checking for specific device init. if view or specific device init
626       subTypeDefaultInitType -> EAGER. disabled once again overrides all.
627 */
628 
629 PetscErrorCode PetscDeviceInitializeFromOptions_Internal(MPI_Comm comm)
630 {
631   auto defaultView                    = PETSC_FALSE;
632   auto initializeDeviceContextEagerly = PETSC_FALSE;
633   auto defaultDeviceIdSet             = PETSC_FALSE;
634   auto defaultDeviceId                = PetscInt{PETSC_DECIDE};
635   auto deviceContextInitDevice        = PETSC_DEVICE_DEFAULT();
636   auto defaultInitType                = PETSC_DEVICE_INIT_LAZY;
637 
638   PetscFunctionBegin;
639   if (PetscDefined(USE_DEBUG)) {
640     int result;
641 
642     PetscCallMPI(MPI_Comm_compare(comm, PETSC_COMM_WORLD, &result));
643     /* in order to accurately assign ranks to gpus we need to get the MPI_Comm_rank of the
644      * global space */
645     if (PetscUnlikely(result != MPI_IDENT)) {
646       char name[MPI_MAX_OBJECT_NAME] = {};
647       int  len; /* unused */
648 
649       PetscCallMPI(MPI_Comm_get_name(comm, name, &len));
650       SETERRQ(comm, PETSC_ERR_MPI, "Default devices being initialized on MPI_Comm '%s' not PETSC_COMM_WORLD", name);
651     }
652   }
653   comm = PETSC_COMM_WORLD; /* from this point on we assume we're on PETSC_COMM_WORLD */
654   PetscCall(PetscRegisterFinalize(PetscDeviceFinalize_Private));
655 
656   PetscCall(PetscDeviceInitializeQueryOptions_Private(comm, &deviceContextInitDevice, &defaultInitType, &defaultDeviceId, &defaultDeviceIdSet, &defaultView));
657 
658   // the precise values don't matter here, so long as they are sequential
659   static_assert(Petsc::util::to_underlying(PETSC_DEVICE_HOST) == 0, "");
660   static_assert(Petsc::util::to_underlying(PETSC_DEVICE_CUDA) == 1, "");
661   static_assert(Petsc::util::to_underlying(PETSC_DEVICE_HIP) == 2, "");
662   static_assert(Petsc::util::to_underlying(PETSC_DEVICE_SYCL) == 3, "");
663   static_assert(Petsc::util::to_underlying(PETSC_DEVICE_MAX) == 4, "");
664   for (int i = PETSC_DEVICE_HOST; i < PETSC_DEVICE_MAX; ++i) {
665     const auto deviceType = PetscDeviceTypeCast(i);
666     auto       initType   = defaultInitType;
667 
668     PetscCall(PetscDeviceInitializeTypeFromOptions_Private(comm, deviceType, defaultDeviceId, defaultView, &initType));
669     if (PetscDeviceConfiguredFor_Internal(deviceType)) {
670       if (initType == PETSC_DEVICE_INIT_EAGER) {
671         initializeDeviceContextEagerly = PETSC_TRUE;
672         // only update the default device if the user hasn't set it previously
673         if (!defaultDeviceIdSet) {
674           deviceContextInitDevice = deviceType;
675           PetscCall(PetscInfo(nullptr, "PetscDevice %s set as default device type due to eager initialization\n", PetscDeviceTypes[deviceType]));
676         }
677       } else if (initType == PETSC_DEVICE_INIT_NONE) {
678         if (deviceType != PETSC_DEVICE_HOST) PetscCheck(!defaultDeviceIdSet || (deviceType != deviceContextInitDevice), comm, PETSC_ERR_USER_INPUT, "Cannot explicitly disable the device set as default device type (%s)", PetscDeviceTypes[deviceType]);
679       }
680     }
681   }
682 
683   PetscCall(PetscDeviceSetDefaultDeviceType(deviceContextInitDevice));
684   PetscCall(PetscDeviceContextSetRootDeviceType_Internal(PETSC_DEVICE_DEFAULT()));
685   /*                       PetscDevice is now fully initialized                          */
686   {
687     /*
688       query the options db to get the root settings from the user (if any).
689 
690       This section is a bit of a hack. We have to reach across to dcontext.cxx to all but call
691       PetscDeviceContextSetFromOptions() before we even have one, then set a few static
692       variables in that file with the results.
693     */
694     auto dtype = std::make_pair(PETSC_DEVICE_DEFAULT(), PETSC_FALSE);
695     auto stype = std::make_pair(PETSC_DEVICE_CONTEXT_DEFAULT_STREAM_TYPE, PETSC_FALSE);
696 
697     PetscOptionsBegin(comm, "root_", "Root PetscDeviceContext Options", "Sys");
698     PetscCall(PetscDeviceContextQueryOptions_Internal(PetscOptionsObject, dtype, stype));
699     PetscOptionsEnd();
700 
701     if (dtype.second) PetscCall(PetscDeviceContextSetRootDeviceType_Internal(dtype.first));
702     if (stype.second) PetscCall(PetscDeviceContextSetRootStreamType_Internal(stype.first));
703   }
704 
705   if (initializeDeviceContextEagerly) {
706     PetscDeviceContext dctx;
707 
708     PetscCall(PetscInfo(nullptr, "Eagerly initializing PetscDeviceContext with %s device\n", PetscDeviceTypes[deviceContextInitDevice]));
709     /* instantiates the device context */
710     PetscCall(PetscDeviceContextGetCurrentContext(&dctx));
711     PetscCall(PetscDeviceContextSetUp(dctx));
712   }
713   PetscFunctionReturn(PETSC_SUCCESS);
714 }
715