xref: /petsc/src/sys/objects/device/interface/device.cxx (revision bb4f2003add1a5f6cc89fa7c885e06e586ebae74)
1 #include "petscdevice_interface_internal.hpp" /*I <petscdevice.h> I*/
2 #include <petsc/private/petscadvancedmacros.h>
3 
4 #include <petsc/private/cpp/register_finalize.hpp>
5 
6 #include "../impls/host/hostdevice.hpp"
7 #if PetscDefined(HAVE_CUPM)
8   #include "../impls/cupm/cupmdevice.hpp"
9 #endif
10 #if PetscDefined(HAVE_SYCL)
11   #include "../impls/sycl/sycldevice.hpp"
12 #endif
13 
14 #include <utility> // std::make_pair
15 
16 using namespace Petsc::device;
17 
18 #if defined(PETSC_HAVE_CUPM)
19 int PetscDeviceCUPMRuntimeArch = 0;
20 #endif
21 
22 namespace
23 {
24 
25 /*
26   note to anyone adding more classes, the name must be ALL_CAPS_SHORT_NAME + Device exactly to
27   be picked up by the switch-case macros below
28 */
29 host::Device HOSTDevice{PetscDeviceContextCreate_HOST};
30 #if PetscDefined(HAVE_CUDA)
31 cupm::Device<cupm::DeviceType::CUDA> CUDADevice{PetscDeviceContextCreate_CUDA};
32 #endif
33 #if PetscDefined(HAVE_HIP)
34 cupm::Device<cupm::DeviceType::HIP> HIPDevice{PetscDeviceContextCreate_HIP};
35 #endif
36 #if PetscDefined(HAVE_SYCL)
37 sycl::Device SYCLDevice{PetscDeviceContextCreate_SYCL};
38 #endif
39 
40 } // namespace
41 
42 #define PETSC_DEVICE_CASE(IMPLS, func, ...) \
43   case PetscConcat_(PETSC_DEVICE_, IMPLS): { \
44     PetscCall(PetscConcat_(IMPLS, Device).func(__VA_ARGS__)); \
45   } break
46 
47 #define PETSC_VOID_0(...) ((void)0)
48 
49 /*
50   Suppose you have:
51 
52   CUDADevice.myFunction(arg1,arg2)
53 
54   that you would like to conditionally define and call in a switch-case:
55 
56   switch(PetscDeviceType) {
57   #if PetscDefined(HAVE_CUDA)
58   case PETSC_DEVICE_CUDA: {
59     PetscCall(CUDADevice.myFunction(arg1,arg2));
60   } break;
61   #endif
62   }
63 
64   then calling this macro:
65 
66   PETSC_DEVICE_CASE_IF_PETSC_DEFINED(CUDA,myFunction,arg1,arg2)
67 
68   will expand to the following case statement:
69 
70   case PETSC_DEVICE_CUDA: {
71     PetscCall(CUDADevice.myFunction(arg1,arg2));
72   } break
73 
74   if PetscDefined(HAVE_CUDA) evaluates to 1, and expand to nothing otherwise
75 */
76 #define PETSC_DEVICE_CASE_IF_PETSC_DEFINED(IMPLS, func, ...) PetscIfPetscDefined(PetscConcat_(HAVE_, IMPLS), PETSC_DEVICE_CASE, PETSC_VOID_0)(IMPLS, func, __VA_ARGS__)
77 
78 /*@C
79   PetscDeviceCreate - Get a new handle for a particular device (often a GPU) type
80 
81   Not Collective
82 
83   Input Parameters:
84 + type  - The type of `PetscDevice`
85 - devid - The numeric ID# of the device (pass `PETSC_DECIDE` to assign automatically)
86 
87   Output Parameter:
88 . device - The `PetscDevice`
89 
90   Level: beginner
91 
92   Notes:
93   This routine may initialize `PetscDevice`. If this is the case, it may cause some sort of
94   device synchronization.
95 
96   `devid` is what you might pass to `cudaSetDevice()` for example.
97 
98 .seealso: `PetscDevice`, `PetscDeviceInitType`,
99 `PetscDeviceInitialize()`, `PetscDeviceInitialized()`, `PetscDeviceConfigure()`,
100 `PetscDeviceView()`, `PetscDeviceDestroy()`
101 @*/
102 PetscErrorCode PetscDeviceCreate(PetscDeviceType type, PetscInt devid, PetscDevice *device)
103 {
104   static PetscInt PetscDeviceCounter = 0;
105 
106   PetscFunctionBegin;
107   PetscValidDeviceType(type, 1);
108   PetscAssertPointer(device, 3);
109   PetscCall(PetscDeviceInitializePackage());
110   PetscCall(PetscNew(device));
111   (*device)->id     = PetscDeviceCounter++;
112   (*device)->type   = type;
113   (*device)->refcnt = 1;
114   /*
115     if you are adding a device, you also need to add its initialization in
116     PetscDeviceInitializeTypeFromOptions_Private() below
117   */
118   switch (type) {
119     PETSC_DEVICE_CASE_IF_PETSC_DEFINED(HOST, getDevice, *device, devid);
120     PETSC_DEVICE_CASE_IF_PETSC_DEFINED(CUDA, getDevice, *device, devid);
121     PETSC_DEVICE_CASE_IF_PETSC_DEFINED(HIP, getDevice, *device, devid);
122     PETSC_DEVICE_CASE_IF_PETSC_DEFINED(SYCL, getDevice, *device, devid);
123   default:
124     /* in case the above macros expand to nothing this silences any unused variable warnings */
125     (void)(devid);
126     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_PLIB, "PETSc was seemingly configured for PetscDeviceType %s but we've fallen through all cases in a switch", PetscDeviceTypes[type]);
127   }
128   PetscFunctionReturn(PETSC_SUCCESS);
129 }
130 
131 /*@C
132   PetscDeviceDestroy - Free a `PetscDevice`
133 
134   Not Collective
135 
136   Input Parameter:
137 . device - The `PetscDevice`
138 
139   Level: beginner
140 
141 .seealso: `PetscDevice`, `PetscDeviceCreate()`, `PetscDeviceConfigure()`, `PetscDeviceView()`,
142 `PetscDeviceGetType()`, `PetscDeviceGetDeviceId()`
143 @*/
144 PetscErrorCode PetscDeviceDestroy(PetscDevice *device)
145 {
146   PetscFunctionBegin;
147   PetscAssertPointer(device, 1);
148   if (!*device) PetscFunctionReturn(PETSC_SUCCESS);
149   PetscValidDevice(*device, 1);
150   PetscCall(PetscDeviceDereference_Internal(*device));
151   if ((*device)->refcnt) {
152     *device = nullptr;
153     PetscFunctionReturn(PETSC_SUCCESS);
154   }
155   PetscCall(PetscFree((*device)->data));
156   PetscCall(PetscFree(*device));
157   PetscFunctionReturn(PETSC_SUCCESS);
158 }
159 
160 /*@C
161   PetscDeviceConfigure - Configure a particular `PetscDevice`
162 
163   Not Collective
164 
165   Input Parameter:
166 . device - The `PetscDevice` to configure
167 
168   Level: beginner
169 
170   Notes:
171   The user should not assume that this is a cheap operation.
172 
173 .seealso: `PetscDevice`, `PetscDeviceCreate()`, `PetscDeviceView()`, `PetscDeviceDestroy()`,
174 `PetscDeviceGetType()`, `PetscDeviceGetDeviceId()`
175 @*/
176 PetscErrorCode PetscDeviceConfigure(PetscDevice device)
177 {
178   PetscFunctionBegin;
179   PetscValidDevice(device, 1);
180   /*
181     if no available configuration is available, this cascades all the way down to default
182     and error
183   */
184   switch (const auto dtype = device->type) {
185   case PETSC_DEVICE_HOST:
186     if (PetscDefined(HAVE_HOST)) break; // always true
187   case PETSC_DEVICE_CUDA:
188     if (PetscDefined(HAVE_CUDA)) break;
189     goto error;
190   case PETSC_DEVICE_HIP:
191     if (PetscDefined(HAVE_HIP)) break;
192     goto error;
193   case PETSC_DEVICE_SYCL:
194     if (PetscDefined(HAVE_SYCL)) break;
195     goto error;
196   default:
197   error:
198     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "PETSc was not configured for PetscDeviceType %s", PetscDeviceTypes[dtype]);
199   }
200   PetscUseTypeMethod(device, configure);
201   PetscFunctionReturn(PETSC_SUCCESS);
202 }
203 
204 /*@C
205   PetscDeviceView - View a `PetscDevice`
206 
207   Collective on viewer
208 
209   Input Parameters:
210 + device - The `PetscDevice` to view
211 - viewer - The `PetscViewer` to view the device with (`NULL` for `PETSC_VIEWER_STDOUT_WORLD`)
212 
213   Level: beginner
214 
215 .seealso: `PetscDevice`, `PetscDeviceCreate()`, `PetscDeviceConfigure()`,
216 `PetscDeviceDestroy()`, `PetscDeviceGetType()`, `PetscDeviceGetDeviceId()`
217 @*/
218 PetscErrorCode PetscDeviceView(PetscDevice device, PetscViewer viewer)
219 {
220   auto      sub = viewer;
221   PetscBool iascii;
222 
223   PetscFunctionBegin;
224   PetscValidDevice(device, 1);
225   if (viewer) {
226     PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2);
227     PetscCall(PetscObjectTypeCompare(PetscObjectCast(viewer), PETSCVIEWERASCII, &iascii));
228   } else {
229     PetscCall(PetscViewerASCIIGetStdout(PETSC_COMM_WORLD, &viewer));
230     iascii = PETSC_TRUE;
231   }
232 
233   if (iascii) {
234     auto        dtype = PETSC_DEVICE_HOST;
235     MPI_Comm    comm;
236     PetscMPIInt size;
237     PetscInt    id = 0;
238 
239     PetscCall(PetscObjectGetComm(PetscObjectCast(viewer), &comm));
240     PetscCallMPI(MPI_Comm_size(comm, &size));
241 
242     PetscCall(PetscDeviceGetDeviceId(device, &id));
243     PetscCall(PetscDeviceGetType(device, &dtype));
244     PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sub));
245     PetscCall(PetscViewerASCIIPrintf(sub, "PetscDevice Object: %d MPI %s\n", size, size == 1 ? "process" : "processes"));
246     PetscCall(PetscViewerASCIIPushTab(sub));
247     PetscCall(PetscViewerASCIIPrintf(sub, "type: %s\n", PetscDeviceTypes[dtype]));
248     PetscCall(PetscViewerASCIIPrintf(sub, "id: %" PetscInt_FMT "\n", id));
249   }
250 
251   // see if impls has extra viewer stuff
252   PetscTryTypeMethod(device, view, sub);
253 
254   if (iascii) {
255     // undo the ASCII specific stuff
256     PetscCall(PetscViewerASCIIPopTab(sub));
257     PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sub));
258   }
259   PetscFunctionReturn(PETSC_SUCCESS);
260 }
261 
262 /*@C
263   PetscDeviceGetType - Get the type of device
264 
265   Not Collective
266 
267   Input Parameter:
268 . device - The `PetscDevice`
269 
270   Output Parameter:
271 . type - The `PetscDeviceType`
272 
273   Level: beginner
274 
275 .seealso: `PetscDevice`, `PetscDeviceType`, `PetscDeviceSetDefaultDeviceType()`,
276 `PetscDeviceCreate()`, `PetscDeviceConfigure()`, `PetscDeviceDestroy()`,
277 `PetscDeviceGetDeviceId()`, `PETSC_DEVICE_DEFAULT()`
278 @*/
279 PetscErrorCode PetscDeviceGetType(PetscDevice device, PetscDeviceType *type)
280 {
281   PetscFunctionBegin;
282   PetscValidDevice(device, 1);
283   PetscAssertPointer(type, 2);
284   *type = device->type;
285   PetscFunctionReturn(PETSC_SUCCESS);
286 }
287 
288 /*@C
289   PetscDeviceGetDeviceId - Get the device ID for a `PetscDevice`
290 
291   Not Collective
292 
293   Input Parameter:
294 . device - The `PetscDevice`
295 
296   Output Parameter:
297 . id - The id
298 
299   Level: beginner
300 
301   Notes:
302   The returned ID may have been assigned by the underlying device backend. For example if the
303   backend is CUDA then `id` is exactly the value returned by `cudaGetDevice()` at the time when
304   this device was configured.
305 
306 .seealso: `PetscDevice`, `PetscDeviceCreate()`, `PetscDeviceGetType()`
307 @*/
308 PetscErrorCode PetscDeviceGetDeviceId(PetscDevice device, PetscInt *id)
309 {
310   PetscFunctionBegin;
311   PetscValidDevice(device, 1);
312   PetscAssertPointer(id, 2);
313   *id = device->deviceId;
314   PetscFunctionReturn(PETSC_SUCCESS);
315 }
316 
317 namespace
318 {
319 
320 struct DefaultDeviceType : public Petsc::RegisterFinalizeable<DefaultDeviceType> {
321   PetscDeviceType type = PETSC_DEVICE_HARDWARE_DEFAULT_TYPE;
322 
323   PetscErrorCode finalize_() noexcept
324   {
325     PetscFunctionBegin;
326     type = PETSC_DEVICE_HARDWARE_DEFAULT_TYPE;
327     PetscFunctionReturn(PETSC_SUCCESS);
328   }
329 };
330 
331 auto default_device_type = DefaultDeviceType();
332 
333 } // namespace
334 
335 /*@C
336   PETSC_DEVICE_DEFAULT - Retrieve the current default `PetscDeviceType`
337 
338   Not Collective
339 
340   Level: beginner
341 
342   Notes:
343   Unless selected by the user, the default device is selected in the following order\:
344   `PETSC_DEVICE_HIP`, `PETSC_DEVICE_CUDA`, `PETSC_DEVICE_SYCL`, `PETSC_DEVICE_HOST`.
345 
346 .seealso: `PetscDeviceType`, `PetscDeviceSetDefaultDeviceType()`, `PetscDeviceGetType()`
347 @*/
348 PetscDeviceType PETSC_DEVICE_DEFAULT(void)
349 {
350   return default_device_type.type;
351 }
352 
353 /*@C
354   PetscDeviceSetDefaultDeviceType - Set the default device type for `PetscDevice`
355 
356   Not Collective
357 
358   Input Parameter:
359 . type - the new default device type
360 
361   Level: beginner
362 
363   Notes:
364   This sets the `PetscDeviceType` returned by `PETSC_DEVICE_DEFAULT()`.
365 
366 .seealso: `PetscDeviceType`, `PetscDeviceGetType`,
367 @*/
368 PetscErrorCode PetscDeviceSetDefaultDeviceType(PetscDeviceType type)
369 {
370   PetscFunctionBegin;
371   PetscValidDeviceType(type, 1);
372   if (default_device_type.type != type) {
373     // no need to waster a PetscRegisterFinalize() slot if we don't change it
374     default_device_type.type = type;
375     PetscCall(default_device_type.register_finalize());
376   }
377   PetscFunctionReturn(PETSC_SUCCESS);
378 }
379 
380 namespace
381 {
382 
383 std::array<std::pair<PetscDevice, bool>, PETSC_DEVICE_MAX> defaultDevices = {};
384 
385 /*
386   Actual initialization function; any functions claiming to initialize PetscDevice or
387   PetscDeviceContext will have to run through this one
388 */
389 PetscErrorCode PetscDeviceInitializeDefaultDevice_Internal(PetscDeviceType type, PetscInt defaultDeviceId)
390 {
391   PetscFunctionBegin;
392   PetscValidDeviceType(type, 1);
393   if (PetscUnlikely(!PetscDeviceInitialized(type))) {
394     auto &dev  = defaultDevices[type].first;
395     auto &init = defaultDevices[type].second;
396 
397     PetscAssert(!dev, PETSC_COMM_SELF, PETSC_ERR_MEM, "Trying to overwrite existing default device of type %s", PetscDeviceTypes[type]);
398     PetscCall(PetscDeviceCreate(type, defaultDeviceId, &dev));
399     PetscCall(PetscDeviceConfigure(dev));
400     init = true;
401   }
402   PetscFunctionReturn(PETSC_SUCCESS);
403 }
404 
405 } // namespace
406 
407 /*@C
408   PetscDeviceInitialize - Initialize `PetscDevice`
409 
410   Not Collective
411 
412   Input Parameter:
413 . type - The `PetscDeviceType` to initialize
414 
415   Level: beginner
416 
417   Notes:
418   Eagerly initializes the corresponding `PetscDeviceType` if needed. If this is the case it may
419   result in device synchronization.
420 
421 .seealso: `PetscDevice`, `PetscDeviceInitType`, `PetscDeviceInitialized()`,
422 `PetscDeviceCreate()`, `PetscDeviceDestroy()`
423 @*/
424 PetscErrorCode PetscDeviceInitialize(PetscDeviceType type)
425 {
426   PetscFunctionBegin;
427   PetscValidDeviceType(type, 1);
428   PetscCall(PetscDeviceInitializeDefaultDevice_Internal(type, PETSC_DECIDE));
429   PetscFunctionReturn(PETSC_SUCCESS);
430 }
431 
432 /*@C
433   PetscDeviceInitialized - Determines whether `PetscDevice` is initialized for a particular
434   `PetscDeviceType`
435 
436   Not Collective
437 
438   Input Parameter:
439 . type - The `PetscDeviceType` to check
440 
441   Level: beginner
442 
443   Notes:
444   Returns `PETSC_TRUE` if `type` is initialized, `PETSC_FALSE` otherwise.
445 
446   If one has not configured PETSc for a particular `PetscDeviceType` then this routine will
447   return `PETSC_FALSE` for that `PetscDeviceType`.
448 
449 .seealso: `PetscDevice`, `PetscDeviceInitType`, `PetscDeviceInitialize()`,
450 `PetscDeviceCreate()`, `PetscDeviceDestroy()`
451 @*/
452 PetscBool PetscDeviceInitialized(PetscDeviceType type)
453 {
454   return static_cast<PetscBool>(PetscDeviceConfiguredFor_Internal(type) && defaultDevices[type].second);
455 }
456 
457 /* Get the default PetscDevice for a particular type and constructs them if lazily initialized. */
458 PetscErrorCode PetscDeviceGetDefaultForType_Internal(PetscDeviceType type, PetscDevice *device)
459 {
460   PetscFunctionBegin;
461   PetscAssertPointer(device, 2);
462   PetscCall(PetscDeviceInitialize(type));
463   *device = defaultDevices[type].first;
464   PetscFunctionReturn(PETSC_SUCCESS);
465 }
466 
467 /*@C
468   PetscDeviceGetAttribute - Query a particular attribute of a `PetscDevice`
469 
470   Not Collective
471 
472   Input Parameters:
473 + device - The `PetscDevice`
474 - attr   - The attribute
475 
476   Output Parameter:
477 . value - The value of the attribute
478 
479   Level: intermediate
480 
481   Notes:
482   Since different attributes are often different types `value` is a `void *` to accommodate
483   them all. The underlying type of the attribute is therefore included in the name of the
484   `PetscDeviceAttribute` responsible for querying it. For example,
485   `PETSC_DEVICE_ATTR_SIZE_T_SHARED_MEM_PER_BLOCK` is of type `size_t`.
486 
487 .seealso: `PetscDeviceAtrtibute`, `PetscDeviceConfigure()`, `PetscDevice`
488 @*/
489 PetscErrorCode PetscDeviceGetAttribute(PetscDevice device, PetscDeviceAttribute attr, void *value)
490 {
491   PetscFunctionBegin;
492   PetscValidDevice(device, 1);
493   PetscValidDeviceAttribute(attr, 2);
494   PetscAssertPointer(value, 3);
495   PetscUseTypeMethod(device, getattribute, attr, value);
496   PetscFunctionReturn(PETSC_SUCCESS);
497 }
498 
499 namespace
500 {
501 
502 PetscErrorCode PetscDeviceInitializeTypeFromOptions_Private(MPI_Comm comm, PetscDeviceType type, PetscInt defaultDeviceId, PetscBool defaultView, PetscDeviceInitType *defaultInitType)
503 {
504   PetscFunctionBegin;
505   if (!PetscDeviceConfiguredFor_Internal(type)) {
506     PetscCall(PetscInfo(nullptr, "PetscDeviceType %s not available\n", PetscDeviceTypes[type]));
507     defaultDevices[type].first = nullptr;
508     PetscFunctionReturn(PETSC_SUCCESS);
509   }
510   PetscCall(PetscInfo(nullptr, "PetscDeviceType %s available, initializing\n", PetscDeviceTypes[type]));
511   /* ugly switch needed to pick the right global variable... could maybe do this as a union? */
512   switch (type) {
513     PETSC_DEVICE_CASE_IF_PETSC_DEFINED(HOST, initialize, comm, &defaultDeviceId, &defaultView, defaultInitType);
514     PETSC_DEVICE_CASE_IF_PETSC_DEFINED(CUDA, initialize, comm, &defaultDeviceId, &defaultView, defaultInitType);
515     PETSC_DEVICE_CASE_IF_PETSC_DEFINED(HIP, initialize, comm, &defaultDeviceId, &defaultView, defaultInitType);
516     PETSC_DEVICE_CASE_IF_PETSC_DEFINED(SYCL, initialize, comm, &defaultDeviceId, &defaultView, defaultInitType);
517   default:
518     SETERRQ(comm, PETSC_ERR_PLIB, "PETSc was seemingly configured for PetscDeviceType %s but we've fallen through all cases in a switch", PetscDeviceTypes[type]);
519   }
520   PetscCall(PetscInfo(nullptr, "PetscDevice %s initialized, default device id %" PetscInt_FMT ", view %s, init type %s\n", PetscDeviceTypes[type], defaultDeviceId, PetscBools[defaultView], PetscDeviceInitTypes[Petsc::util::to_underlying(*defaultInitType)]));
521   /*
522     defaultInitType, defaultView  and defaultDeviceId now represent what the individual TYPES
523     have decided to initialize as
524   */
525   if ((*defaultInitType == PETSC_DEVICE_INIT_EAGER) || defaultView) {
526     PetscCall(PetscInfo(nullptr, "Eagerly initializing %s PetscDevice\n", PetscDeviceTypes[type]));
527     PetscCall(PetscDeviceInitializeDefaultDevice_Internal(type, defaultDeviceId));
528     if (defaultView) PetscCall(PetscDeviceView(defaultDevices[type].first, nullptr));
529   }
530   PetscFunctionReturn(PETSC_SUCCESS);
531 }
532 
533 PetscErrorCode PetscDeviceInitializeQueryOptions_Private(MPI_Comm comm, PetscDeviceType *deviceContextInitDevice, PetscDeviceInitType *defaultInitType, PetscInt *defaultDevice, PetscBool *defaultDeviceSet, PetscBool *defaultView)
534 {
535   PetscInt initIdx       = PETSC_DEVICE_INIT_LAZY;
536   auto     initDeviceIdx = static_cast<PetscInt>(*deviceContextInitDevice);
537   auto     flg           = PETSC_FALSE;
538 
539   PetscFunctionBegin;
540   PetscCall(PetscOptionsHasName(nullptr, nullptr, "-log_view_gpu_time", &flg));
541   if (flg) PetscCall(PetscLogGpuTime());
542 
543   PetscOptionsBegin(comm, nullptr, "PetscDevice Options", "Sys");
544   PetscCall(PetscOptionsEList("-device_enable", "How (or whether) to initialize PetscDevices", "PetscDeviceInitialize()", PetscDeviceInitTypes, 3, PetscDeviceInitTypes[initIdx], &initIdx, nullptr));
545   PetscCall(PetscOptionsEList("-default_device_type", "Set the PetscDeviceType returned by PETSC_DEVICE_DEFAULT()", "PetscDeviceSetDefaultDeviceType()", PetscDeviceTypes, PETSC_DEVICE_MAX, PetscDeviceTypes[initDeviceIdx], &initDeviceIdx, defaultDeviceSet));
546   PetscCall(PetscOptionsRangeInt("-device_select", "Which device to use. Pass " PetscStringize(PETSC_DECIDE) " to have PETSc decide or (given they exist) [0-" PetscStringize(PETSC_DEVICE_MAX_DEVICES) ") for a specific device", "PetscDeviceCreate()", *defaultDevice, defaultDevice, nullptr, PETSC_DECIDE, PETSC_DEVICE_MAX_DEVICES));
547   PetscCall(PetscOptionsBool("-device_view", "Display device information and assignments (forces eager initialization)", "PetscDeviceView()", *defaultView, defaultView, &flg));
548   PetscOptionsEnd();
549 
550   if (initIdx == PETSC_DEVICE_INIT_NONE) {
551     /* disabled all device initialization if devices are globally disabled */
552     PetscCheck(*defaultDevice == PETSC_DECIDE, comm, PETSC_ERR_USER_INPUT, "You have disabled devices but also specified a particular device to use, these options are mutually exclusive");
553     *defaultView  = PETSC_FALSE;
554     initDeviceIdx = PETSC_DEVICE_HOST;
555   } else {
556     *defaultView = static_cast<PetscBool>(*defaultView && flg);
557     if (*defaultView) initIdx = PETSC_DEVICE_INIT_EAGER;
558   }
559   *defaultInitType         = PetscDeviceInitTypeCast(initIdx);
560   *deviceContextInitDevice = PetscDeviceTypeCast(initDeviceIdx);
561   PetscFunctionReturn(PETSC_SUCCESS);
562 }
563 
564 /* called from PetscFinalize() do not call yourself! */
565 PetscErrorCode PetscDeviceFinalize_Private()
566 {
567   PetscFunctionBegin;
568   if (PetscDefined(USE_DEBUG)) {
569     /*
570       you might be thinking, why on earth are you registered yet another finalizer in a
571       function already called during PetscRegisterFinalizeAll()? If this seems stupid it's
572       because it is.
573 
574       The crux of the problem is that the initializer (and therefore the ~finalizer~) of
575       PetscDeviceContext is guaranteed to run after PetscDevice's. So if the global context had
576       a default PetscDevice attached, that PetscDevice will have a reference count >0 and hence
577       won't be destroyed yet. So we need to repeat the check that all devices have been
578       destroyed again ~after~ the global context is destroyed. In summary:
579 
580       1. This finalizer runs and destroys all devices, except it may not because the global
581          context may still hold a reference!
582       2. The global context finalizer runs and does the final reference count decrement
583          required, which actually destroys the held device.
584       3. Our newly added finalizer runs and checks that all is well.
585     */
586     PetscCall(PetscRegisterFinalize([] {
587       PetscFunctionBegin;
588       for (auto &&device : defaultDevices) {
589         const auto dev = device.first;
590 
591         PetscCheck(!dev, PETSC_COMM_WORLD, PETSC_ERR_COR, "Device of type '%s' had reference count %" PetscInt_FMT " and was not fully destroyed during PetscFinalize()", PetscDeviceTypes[dev->type], dev->refcnt);
592       }
593       PetscFunctionReturn(PETSC_SUCCESS);
594     }));
595   }
596   for (auto &&device : defaultDevices) {
597     PetscCall(PetscDeviceDestroy(&device.first));
598     device.second = false;
599   }
600   PetscFunctionReturn(PETSC_SUCCESS);
601 }
602 
603 } // namespace
604 
605 /*
606   Begins the init proceeedings for the entire PetscDevice stack. there are 3 stages of
607   initialization types:
608 
609   1. defaultInitType - how does PetscDevice as a whole expect to initialize?
610   2. subTypeDefaultInitType - how does each PetscDevice implementation expect to initialize?
611      e.g. you may want to blanket disable PetscDevice init (and disable say Kokkos init), but
612      have all CUDA devices still initialize.
613 
614   All told the following happens:
615 
616   0. defaultInitType -> LAZY
617   1. Check for log_view/log_summary, if yes defaultInitType -> EAGER
618   2. PetscDevice initializes each sub type with deviceDefaultInitType.
619   2.1 Each enabled PetscDevice sub-type then does the above disable or view check in addition
620       to checking for specific device init. if view or specific device init
621       subTypeDefaultInitType -> EAGER. disabled once again overrides all.
622 */
623 
624 PetscErrorCode PetscDeviceInitializeFromOptions_Internal(MPI_Comm comm)
625 {
626   auto defaultView                    = PETSC_FALSE;
627   auto initializeDeviceContextEagerly = PETSC_FALSE;
628   auto defaultDeviceSet               = PETSC_FALSE;
629   auto defaultDevice                  = PetscInt{PETSC_DECIDE};
630   auto deviceContextInitDevice        = PETSC_DEVICE_DEFAULT();
631   auto defaultInitType                = PETSC_DEVICE_INIT_LAZY;
632 
633   PetscFunctionBegin;
634   if (PetscDefined(USE_DEBUG)) {
635     int result;
636 
637     PetscCallMPI(MPI_Comm_compare(comm, PETSC_COMM_WORLD, &result));
638     /* in order to accurately assign ranks to gpus we need to get the MPI_Comm_rank of the
639      * global space */
640     if (PetscUnlikely(result != MPI_IDENT)) {
641       char name[MPI_MAX_OBJECT_NAME] = {};
642       int  len; /* unused */
643 
644       PetscCallMPI(MPI_Comm_get_name(comm, name, &len));
645       SETERRQ(comm, PETSC_ERR_MPI, "Default devices being initialized on MPI_Comm '%s' not PETSC_COMM_WORLD", name);
646     }
647   }
648   comm = PETSC_COMM_WORLD; /* from this point on we assume we're on PETSC_COMM_WORLD */
649   PetscCall(PetscRegisterFinalize(PetscDeviceFinalize_Private));
650 
651   PetscCall(PetscDeviceInitializeQueryOptions_Private(comm, &deviceContextInitDevice, &defaultInitType, &defaultDevice, &defaultDeviceSet, &defaultView));
652 
653   // the precise values don't matter here, so long as they are sequential
654   static_assert(Petsc::util::to_underlying(PETSC_DEVICE_HOST) == 0, "");
655   static_assert(Petsc::util::to_underlying(PETSC_DEVICE_CUDA) == 1, "");
656   static_assert(Petsc::util::to_underlying(PETSC_DEVICE_HIP) == 2, "");
657   static_assert(Petsc::util::to_underlying(PETSC_DEVICE_SYCL) == 3, "");
658   static_assert(Petsc::util::to_underlying(PETSC_DEVICE_MAX) == 4, "");
659   for (int i = PETSC_DEVICE_HOST; i < PETSC_DEVICE_MAX; ++i) {
660     const auto deviceType = PetscDeviceTypeCast(i);
661     auto       initType   = defaultInitType;
662 
663     PetscCall(PetscDeviceInitializeTypeFromOptions_Private(comm, deviceType, defaultDevice, defaultView, &initType));
664     if (PetscDeviceConfiguredFor_Internal(deviceType)) {
665       if (initType == PETSC_DEVICE_INIT_EAGER) {
666         initializeDeviceContextEagerly = PETSC_TRUE;
667         // only update the default device if the user hasn't set it previously
668         if (!defaultDeviceSet) {
669           deviceContextInitDevice = deviceType;
670           PetscCall(PetscInfo(nullptr, "PetscDevice %s set as default device type due to eager initialization\n", PetscDeviceTypes[deviceType]));
671         }
672       } else if (initType == PETSC_DEVICE_INIT_NONE) {
673         if (deviceType != PETSC_DEVICE_HOST) PetscCheck(!defaultDeviceSet || (deviceType != deviceContextInitDevice), comm, PETSC_ERR_USER_INPUT, "Cannot explicitly disable the device set as default device type (%s)", PetscDeviceTypes[deviceType]);
674       }
675     }
676   }
677 
678   PetscCall(PetscDeviceSetDefaultDeviceType(deviceContextInitDevice));
679   PetscCall(PetscDeviceContextSetRootDeviceType_Internal(PETSC_DEVICE_DEFAULT()));
680   /* ----------------------------------------------------------------------------------- */
681   /*                       PetscDevice is now fully initialized                          */
682   /* ----------------------------------------------------------------------------------- */
683   {
684     /*
685       query the options db to get the root settings from the user (if any).
686 
687       This section is a bit of a hack. We have to reach across to dcontext.cxx to all but call
688       PetscDeviceContextSetFromOptions() before we even have one, then set a few static
689       variables in that file with the results.
690     */
691     auto dtype = std::make_pair(PETSC_DEVICE_DEFAULT(), PETSC_FALSE);
692     auto stype = std::make_pair(PETSC_DEVICE_CONTEXT_DEFAULT_STREAM_TYPE, PETSC_FALSE);
693 
694     PetscOptionsBegin(comm, "root_", "Root PetscDeviceContext Options", "Sys");
695     PetscCall(PetscDeviceContextQueryOptions_Internal(PetscOptionsObject, dtype, stype));
696     PetscOptionsEnd();
697 
698     if (dtype.second) PetscCall(PetscDeviceContextSetRootDeviceType_Internal(dtype.first));
699     if (stype.second) PetscCall(PetscDeviceContextSetRootStreamType_Internal(stype.first));
700   }
701 
702   if (initializeDeviceContextEagerly) {
703     PetscDeviceContext dctx;
704 
705     PetscCall(PetscInfo(nullptr, "Eagerly initializing PetscDeviceContext with %s device\n", PetscDeviceTypes[deviceContextInitDevice]));
706     /* instantiates the device context */
707     PetscCall(PetscDeviceContextGetCurrentContext(&dctx));
708     PetscCall(PetscDeviceContextSetUp(dctx));
709   }
710   PetscFunctionReturn(PETSC_SUCCESS);
711 }
712