xref: /petsc/src/sys/objects/device/interface/device.cxx (revision 58d68138c660dfb4e9f5b03334792cd4f2ffd7cc)
1 #include "cupmdevice.hpp" /* I "petscdevice.h" */
2 #include <petsc/private/petscadvancedmacros.h>
3 
4 using namespace Petsc::Device;
5 
6 /*
7   note to anyone adding more classes, the name must be ALL_CAPS_SHORT_NAME + Device exactly to
8   be picked up by the switch-case macros below
9 */
10 #if PetscDefined(HAVE_CUDA)
11 static CUPM::Device<CUPM::DeviceType::CUDA> CUDADevice(PetscDeviceContextCreate_CUDA);
12 #endif
13 #if PetscDefined(HAVE_HIP)
14 static CUPM::Device<CUPM::DeviceType::HIP> HIPDevice(PetscDeviceContextCreate_HIP);
15 #endif
16 #if PetscDefined(HAVE_SYCL)
17 #include "sycldevice.hpp"
18 static SYCL::Device SYCLDevice(PetscDeviceContextCreate_SYCL);
19 #endif
20 
21 static_assert(Petsc::util::integral_value(PETSC_DEVICE_INVALID) == 0, "");
22 static_assert(Petsc::util::integral_value(PETSC_DEVICE_CUDA) == 1, "");
23 static_assert(Petsc::util::integral_value(PETSC_DEVICE_HIP) == 2, "");
24 static_assert(Petsc::util::integral_value(PETSC_DEVICE_SYCL) == 3, "");
25 static_assert(Petsc::util::integral_value(PETSC_DEVICE_MAX) == 4, "");
26 const char *const PetscDeviceTypes[] = {"invalid", "cuda", "hip", "sycl", "max", "PetscDeviceType", "PETSC_DEVICE_", PETSC_NULLPTR};
27 
28 static_assert(Petsc::util::integral_value(PETSC_DEVICE_INIT_NONE) == 0, "");
29 static_assert(Petsc::util::integral_value(PETSC_DEVICE_INIT_LAZY) == 1, "");
30 static_assert(Petsc::util::integral_value(PETSC_DEVICE_INIT_EAGER) == 2, "");
31 const char *const PetscDeviceInitTypes[] = {"none", "lazy", "eager", "PetscDeviceInitType", "PETSC_DEVICE_INIT_", PETSC_NULLPTR};
32 static_assert(sizeof(PetscDeviceInitTypes) / sizeof(*PetscDeviceInitTypes) == 6, "Must change CUPMDevice<T>::initialize number of enum values in -device_enable_cupm to match!");
33 
34 #define PETSC_DEVICE_CASE(IMPLS, func, ...) \
35   case PetscConcat_(PETSC_DEVICE_, IMPLS): { \
36     PetscCall(PetscConcat_(IMPLS, Device).func(__VA_ARGS__)); \
37   } break
38 
39 /*
40   Suppose you have:
41 
42   CUDADevice.myFunction(arg1,arg2)
43 
44   that you would like to conditionally define and call in a switch-case:
45 
46   switch(PetscDeviceType) {
47   #if PetscDefined(HAVE_CUDA)
48   case PETSC_DEVICE_CUDA: {
49     PetscCall(CUDADevice.myFunction(arg1,arg2));
50   } break;
51   #endif
52   }
53 
54   then calling this macro:
55 
56   PETSC_DEVICE_CASE_IF_PETSC_DEFINED(CUDA,myFunction,arg1,arg2)
57 
58   will expand to the following case statement:
59 
60   case PETSC_DEVICE_CUDA: {
61     PetscCall(CUDADevice.myFunction(arg1,arg2));
62   } break
63 
64   if PetscDefined(HAVE_CUDA) evaluates to 1, and expand to nothing otherwise
65 */
66 #define PETSC_DEVICE_CASE_IF_PETSC_DEFINED(IMPLS, func, ...) PetscIfPetscDefined(PetscConcat_(HAVE_, IMPLS), PETSC_DEVICE_CASE, PetscExpandToNothing)(IMPLS, func, __VA_ARGS__)
67 
68 /*@C
69   PetscDeviceCreate - Get a new handle for a particular device type
70 
71   Not Collective, Possibly Synchronous
72 
73   Input Parameters:
74 + type  - The type of PetscDevice
75 - devid - The numeric ID# of the device (pass PETSC_DECIDE to assign automatically)
76 
77   Output Parameter:
78 . device - The PetscDevice
79 
80   Notes:
81   This routine may initialize PetscDevice. If this is the case, this will most likely cause
82   some sort of device synchronization.
83 
84   devid is what you might pass to cudaSetDevice() for example.
85 
86   Level: beginner
87 
88 .seealso: `PetscDevice`, `PetscDeviceInitType`, `PetscDeviceInitialize()`,
89           `PetscDeviceInitialized()`, `PetscDeviceConfigure()`, `PetscDeviceView()`, `PetscDeviceDestroy()`
90 @*/
91 PetscErrorCode PetscDeviceCreate(PetscDeviceType type, PetscInt devid, PetscDevice *device) {
92   static PetscInt PetscDeviceCounter = 0;
93   PetscDevice     dev;
94 
95   PetscFunctionBegin;
96   PetscValidDeviceType(type, 1);
97   PetscValidPointer(device, 3);
98   PetscCall(PetscDeviceInitializePackage());
99   PetscCall(PetscNew(&dev));
100   dev->id     = PetscDeviceCounter++;
101   dev->type   = type;
102   dev->refcnt = 1;
103   /*
104     if you are adding a device, you also need to add it's initialization in
105     PetscDeviceInitializeTypeFromOptions_Private() below
106   */
107   switch (type) {
108     PETSC_DEVICE_CASE_IF_PETSC_DEFINED(CUDA, getDevice, dev, devid);
109     PETSC_DEVICE_CASE_IF_PETSC_DEFINED(HIP, getDevice, dev, devid);
110     PETSC_DEVICE_CASE_IF_PETSC_DEFINED(SYCL, getDevice, dev, devid);
111   default:
112     /* in case the above macros expand to nothing this silences any unused variable warnings */
113     (void)(devid);
114     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_PLIB, "PETSc was seemingly configured for PetscDeviceType %s but we've fallen through all cases in a switch", PetscDeviceTypes[type]);
115   }
116   *device = dev;
117   PetscFunctionReturn(0);
118 }
119 
120 /*@C
121   PetscDeviceDestroy - Free a PetscDevice
122 
123   Not Collective, Asynchronous
124 
125   Input Parameter:
126 . device - The PetscDevice
127 
128   Level: beginner
129 
130 .seealso: `PetscDevice`, `PetscDeviceCreate()`, `PetscDeviceConfigure()`, `PetscDeviceView()`
131 @*/
132 PetscErrorCode PetscDeviceDestroy(PetscDevice *device) {
133   PetscFunctionBegin;
134   if (!*device) PetscFunctionReturn(0);
135   PetscValidDevice(*device, 1);
136   PetscCall(PetscDeviceDereference_Internal(*device));
137   if ((*device)->refcnt) {
138     *device = PETSC_NULLPTR;
139     PetscFunctionReturn(0);
140   }
141   PetscCall(PetscFree((*device)->data));
142   PetscCall(PetscFree(*device));
143   PetscFunctionReturn(0);
144 }
145 
146 /*@C
147   PetscDeviceConfigure - Configure a particular PetscDevice
148 
149   Not Collective, Asynchronous
150 
151   Input Parameter:
152 . device - The PetscDevice to configure
153 
154   Notes:
155   The user should not assume that this is a cheap operation
156 
157   Level: beginner
158 
159 .seealso: `PetscDevice`, `PetscDeviceCreate()`, `PetscDeviceView()`, `PetscDeviceDestroy()`
160 @*/
161 PetscErrorCode PetscDeviceConfigure(PetscDevice device) {
162   PetscFunctionBegin;
163   PetscValidDevice(device, 1);
164   if (PetscDefined(USE_DEBUG)) {
165     /*
166       if no available configuration is available, this cascades all the way down to default
167       and error
168     */
169     switch (device->type) {
170     case PETSC_DEVICE_CUDA:
171       if (PetscDefined(HAVE_CUDA)) break;
172     case PETSC_DEVICE_HIP:
173       if (PetscDefined(HAVE_HIP)) break;
174     case PETSC_DEVICE_SYCL:
175       if (PetscDefined(HAVE_SYCL)) break;
176     default: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_PLIB, "PETSc was seemingly configured for PetscDeviceType %s but we've fallen through all cases in a switch", PetscDeviceTypes[device->type]);
177     }
178   }
179   PetscUseTypeMethod(device, configure);
180   PetscFunctionReturn(0);
181 }
182 
183 /*@C
184   PetscDeviceView - View a PetscDevice
185 
186   Collective on viewer, Asynchronous
187 
188   Input Parameters:
189 + device - The PetscDevice to view
190 - viewer - The PetscViewer to view the device with (NULL for PETSC_VIEWER_STDOUT_WORLD)
191 
192   Level: beginner
193 
194 .seealso: `PetscDevice`, `PetscDeviceCreate()`, `PetscDeviceConfigure()`, `PetscDeviceDestroy()`
195 @*/
196 PetscErrorCode PetscDeviceView(PetscDevice device, PetscViewer viewer) {
197   PetscFunctionBegin;
198   PetscValidDevice(device, 1);
199   if (!viewer) PetscCall(PetscViewerASCIIGetStdout(PETSC_COMM_WORLD, &viewer));
200   PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2);
201   PetscUseTypeMethod(device, view, viewer);
202   PetscFunctionReturn(0);
203 }
204 
205 /*@C
206   PetscDeviceGetDeviceId - Get the device id
207 
208   Not collective
209 
210   Input Parameter:
211 . device - The PetscDevice
212 
213   Output Parameter:
214 . id - The device id
215 
216   Level: beginner
217 
218 .seealso: `PetscDevice`, `PetscDeviceCreate()`, `PetscDeviceConfigure()`, `PetscDeviceDestroy()`
219 @*/
220 PetscErrorCode PetscDeviceGetDeviceId(PetscDevice device, PetscInt *id) {
221   PetscFunctionBegin;
222   PetscValidDevice(device, 1);
223   PetscValidIntPointer(id, 2);
224   *id = device->deviceId;
225   PetscFunctionReturn(0);
226 }
227 
228 static std::array<bool, PETSC_DEVICE_MAX>        initializedDevice = {};
229 static std::array<PetscDevice, PETSC_DEVICE_MAX> defaultDevices    = {};
230 static_assert(initializedDevice.size() == defaultDevices.size(), "");
231 
232 /*@C
233   PetscDeviceInitialize - Initialize PetscDevice
234 
235   Not Collective, Possibly Synchronous
236 
237   Input Parameter:
238 . type - The PetscDeviceType to initialize
239 
240   Notes:
241   Eagerly initializes the corresponding PetscDeviceType if needed.
242 
243   Level: beginner
244 
245 .seealso: `PetscDevice`, `PetscDeviceInitType`, `PetscDeviceInitialized()`, `PetscDeviceCreate()`, `PetscDeviceDestroy()`
246 @*/
247 PetscErrorCode PetscDeviceInitialize(PetscDeviceType type) {
248   PetscFunctionBegin;
249   PetscValidDeviceType(type, 1);
250   PetscCall(PetscDeviceInitializeDefaultDevice_Internal(type, PETSC_DECIDE));
251   PetscFunctionReturn(0);
252 }
253 
254 /*@C
255   PetscDeviceInitialized - Determines whether PetscDevice is initialized for a particular
256   PetscDeviceType
257 
258   Not Collective, Asynchronous
259 
260   Input Parameter:
261 . type - The PetscDeviceType to check
262 
263   Output Parameter:
264 . [return value] - PETSC_TRUE if type is initialized, PETSC_FALSE otherwise
265 
266   Notes:
267   If one has not configured PETSc for a particular PetscDeviceType then this routine will
268   return PETSC_FALSE for that PetscDeviceType.
269 
270   Level: beginner
271 
272 .seealso: `PetscDevice`, `PetscDeviceInitType`, `PetscDeviceInitialize()`, `PetscDeviceCreate()`, `PetscDeviceDestroy()`
273 @*/
274 PetscBool PetscDeviceInitialized(PetscDeviceType type) {
275   return static_cast<PetscBool>(PetscDeviceConfiguredFor_Internal(type) && initializedDevice[type]);
276 }
277 
278 /*
279   Actual intialization function; any functions claiming to initialize PetscDevice or
280   PetscDeviceContext will have to run through this one
281 */
282 PetscErrorCode PetscDeviceInitializeDefaultDevice_Internal(PetscDeviceType type, PetscInt defaultDeviceId) {
283   PetscFunctionBegin;
284   PetscValidDeviceType(type, 1);
285   if (PetscLikely(PetscDeviceInitialized(type))) PetscFunctionReturn(0);
286   PetscAssert(!defaultDevices[type], PETSC_COMM_SELF, PETSC_ERR_MEM, "Trying to overwrite existing default device of type %s", PetscDeviceTypes[type]);
287   PetscCall(PetscDeviceCreate(type, defaultDeviceId, &defaultDevices[type]));
288   PetscCall(PetscDeviceConfigure(defaultDevices[type]));
289   initializedDevice[type] = true;
290   PetscFunctionReturn(0);
291 }
292 
293 #if PetscDefined(USE_LOG)
294 PETSC_INTERN PetscErrorCode PetscLogInitialize(void);
295 #else
296 #define PetscLogInitialize() 0
297 #endif
298 
299 static PetscErrorCode PetscDeviceInitializeTypeFromOptions_Private(MPI_Comm comm, PetscDeviceType type, PetscInt defaultDeviceId, PetscBool defaultView, PetscDeviceInitType *defaultInitType) {
300   PetscFunctionBegin;
301   if (!PetscDeviceConfiguredFor_Internal(type)) {
302     PetscCall(PetscInfo(PETSC_NULLPTR, "PetscDeviceType %s not supported\n", PetscDeviceTypes[type]));
303     defaultDevices[type] = PETSC_NULLPTR;
304     PetscFunctionReturn(0);
305   }
306   PetscCall(PetscInfo(PETSC_NULLPTR, "PetscDeviceType %s supported, initializing\n", PetscDeviceTypes[type]));
307   /* ugly switch needed to pick the right global variable... could maybe do this as a union? */
308   switch (type) {
309     PETSC_DEVICE_CASE_IF_PETSC_DEFINED(CUDA, initialize, comm, &defaultDeviceId, defaultInitType);
310     PETSC_DEVICE_CASE_IF_PETSC_DEFINED(HIP, initialize, comm, &defaultDeviceId, defaultInitType);
311     PETSC_DEVICE_CASE_IF_PETSC_DEFINED(SYCL, initialize, comm, &defaultDeviceId, defaultInitType);
312   default: SETERRQ(comm, PETSC_ERR_PLIB, "PETSc was seemingly configured for PetscDeviceType %s but we've fallen through all cases in a switch", PetscDeviceTypes[type]);
313   }
314   /*
315     defaultInitType and defaultDeviceId now represent what the individual TYPES have decided to
316     initialize as
317   */
318   if (*defaultInitType == PETSC_DEVICE_INIT_EAGER) {
319     PetscCall(PetscLogInitialize());
320     PetscCall(PetscInfo(PETSC_NULLPTR, "Eagerly initializing %s PetscDevice\n", PetscDeviceTypes[type]));
321     PetscCall(PetscDeviceInitializeDefaultDevice_Internal(type, defaultDeviceId));
322     if (defaultView) {
323       PetscViewer vwr;
324 
325       PetscCall(PetscViewerASCIIGetStdout(comm, &vwr));
326       PetscCall(PetscDeviceView(defaultDevices[type], vwr));
327     }
328   }
329   PetscFunctionReturn(0);
330 }
331 
332 /* called from PetscFinalize() do not call yourself! */
333 static PetscErrorCode PetscDeviceFinalize_Private(void) {
334   PetscFunctionBegin;
335   if (PetscDefined(USE_DEBUG)) {
336     const auto PetscDeviceCheckAllDestroyedAfterFinalize = [] {
337       PetscFunctionBegin;
338       for (auto &&device : defaultDevices)
339         PetscCheck(!device, PETSC_COMM_WORLD, PETSC_ERR_COR, "Device of type '%s' had reference count %" PetscInt_FMT " and was not fully destroyed during PetscFinalize()", PetscDeviceTypes[device->type], device->refcnt);
340       PetscFunctionReturn(0);
341     };
342     /*
343       you might be thinking, why on earth are you registered yet another finalizer in a
344       function already called during PetscRegisterFinalizeAll()? If this seems stupid it's
345       because it is.
346 
347       The crux of the problem is that the initializer (and therefore the ~finalizer~) of
348       PetscDeviceContext is guaranteed to run after PetscDevice's. So if the global context had
349       a default PetscDevice attached, that PetscDevice will have a reference count >0 and hence
350       won't be destroyed yet. So we need to repeat the check that all devices have been
351       destroyed again ~after~ the global context is destroyed. In summary:
352 
353       1. This finalizer runs and destroys all devices, except it may not because the global
354          context may still hold a reference!
355       2. The global context finalizer runs and does the final reference count decrement
356          required, which actually destroys the held device.
357       3. Our newly added finalizer runs and checks that all is well.
358     */
359     PetscCall(PetscRegisterFinalize(PetscDeviceCheckAllDestroyedAfterFinalize));
360   }
361   for (auto &&device : defaultDevices) PetscCall(PetscDeviceDestroy(&device));
362   PetscCallCXX(initializedDevice.fill(false));
363   PetscFunctionReturn(0);
364 }
365 
366 /*
367   Begins the init proceeedings for the entire PetscDevice stack. there are 3 stages of
368   initialization types:
369 
370   1. defaultInitType - how does PetscDevice as a whole expect to initialize?
371   2. subTypeDefaultInitType - how does each PetscDevice implementation expect to initialize?
372      e.g. you may want to blanket disable PetscDevice init (and disable say Kokkos init), but
373      have all CUDA devices still initialize.
374 
375   All told the following happens:
376 
377   0. defaultInitType -> LAZY
378   1. Check for log_view/log_summary, if yes defaultInitType -> EAGER
379   2. PetscDevice initializes each sub type with deviceDefaultInitType.
380   2.1 Each enabled PetscDevice sub-type then does the above disable or view check in addition
381       to checking for specific device init. if view or specific device init
382       subTypeDefaultInitType -> EAGER. disabled once again overrides all.
383 */
384 PetscErrorCode PetscDeviceInitializeFromOptions_Internal(MPI_Comm comm) {
385   auto                defaultView                    = PETSC_FALSE;
386   auto                initializeDeviceContextEagerly = PETSC_FALSE;
387   auto                defaultDevice                  = PetscInt{PETSC_DECIDE};
388   auto                deviceContextInitDevice        = PETSC_DEVICE_DEFAULT;
389   PetscDeviceInitType defaultInitType;
390 
391   PetscFunctionBegin;
392   if (PetscDefined(USE_DEBUG)) {
393     int result;
394 
395     PetscCallMPI(MPI_Comm_compare(comm, PETSC_COMM_WORLD, &result));
396     /* in order to accurately assign ranks to gpus we need to get the MPI_Comm_rank of the
397      * global space */
398     if (PetscUnlikely(result != MPI_IDENT)) {
399       char name[MPI_MAX_OBJECT_NAME] = {};
400       int  len; /* unused */
401 
402       PetscCallMPI(MPI_Comm_get_name(comm, name, &len));
403       SETERRQ(comm, PETSC_ERR_MPI, "Default devices being initialized on MPI_Comm '%s' not PETSC_COMM_WORLD", name);
404     }
405   }
406   comm = PETSC_COMM_WORLD; /* from this point on we assume we're on PETSC_COMM_WORLD */
407   PetscCall(PetscRegisterFinalize(PetscDeviceFinalize_Private));
408 
409   {
410     PetscInt  initIdx = PETSC_DEVICE_INIT_LAZY;
411     PetscBool flg;
412 
413     PetscCall(PetscOptionsHasName(PETSC_NULLPTR, PETSC_NULLPTR, "-log_view_gpu_time", &flg));
414     if (flg) PetscCall(PetscLogGpuTime());
415 
416     /* ----------------------------------------------------------------------------------- */
417     /*                              Global PetscDevice Options                             */
418     /* ----------------------------------------------------------------------------------- */
419     PetscOptionsBegin(comm, PETSC_NULLPTR, "PetscDevice Options", "Sys");
420     PetscCall(PetscOptionsEList("-device_enable", "How (or whether) to initialize PetscDevices", "PetscDeviceInitializeFromOptions_Internal()", PetscDeviceInitTypes, 3, PetscDeviceInitTypes[initIdx], &initIdx, PETSC_NULLPTR));
421     PetscCall(PetscOptionsRangeInt("-device_select", "Which device to use. Pass " PetscStringize(PETSC_DECIDE) " to have PETSc decide or (given they exist) [0-NUM_DEVICE) for a specific device", "PetscDeviceCreate()", defaultDevice, &defaultDevice, PETSC_NULLPTR, PETSC_DECIDE, std::numeric_limits<int>::max()));
422     PetscCall(PetscOptionsBool("-device_view", "Display device information and assignments (forces eager initialization)", PETSC_NULLPTR, defaultView, &defaultView, &flg));
423     PetscOptionsEnd();
424 
425     if (initIdx == PETSC_DEVICE_INIT_NONE) {
426       /* disabled all device initialization if devices are globally disabled */
427       PetscCheck(defaultDevice == PETSC_DECIDE, comm, PETSC_ERR_USER_INPUT, "You have disabled devices but also specified a particular device to use, these options are mutually exlusive");
428       defaultView = PETSC_FALSE;
429     } else {
430       defaultView = static_cast<decltype(defaultView)>(defaultView && flg);
431       if (defaultView) initIdx = PETSC_DEVICE_INIT_EAGER;
432     }
433     defaultInitType = static_cast<decltype(defaultInitType)>(initIdx);
434   }
435   static_assert((PETSC_DEVICE_INVALID == 0) && (PETSC_DEVICE_MAX < std::numeric_limits<int>::max()), "");
436   for (int i = 1; i < PETSC_DEVICE_MAX; ++i) {
437     const auto deviceType = static_cast<PetscDeviceType>(i);
438     auto       initType   = defaultInitType;
439 
440     PetscCall(PetscDeviceInitializeTypeFromOptions_Private(comm, deviceType, defaultDevice, defaultView, &initType));
441     if (PetscDeviceConfiguredFor_Internal(deviceType) && (initType == PETSC_DEVICE_INIT_EAGER)) {
442       initializeDeviceContextEagerly = PETSC_TRUE;
443       deviceContextInitDevice        = deviceType;
444       PetscCall(PetscInfo(PETSC_NULLPTR, "PetscDevice %s set as default device type due to eager initialization\n", PetscDeviceTypes[deviceType]));
445     }
446   }
447   if (initializeDeviceContextEagerly) {
448     PetscDeviceContext dctx;
449 
450     /*
451       somewhat inefficient here as the device context is potentially fully set up twice (once
452       when retrieved then the second time if setfromoptions makes changes)
453     */
454     PetscCall(PetscInfo(PETSC_NULLPTR, "Eagerly initializing PetscDeviceContext with %s device\n", PetscDeviceTypes[deviceContextInitDevice]));
455     PetscCall(PetscDeviceContextSetRootDeviceType_Internal(deviceContextInitDevice));
456     PetscCall(PetscDeviceContextGetCurrentContext(&dctx));
457     PetscCall(PetscDeviceContextSetFromOptions(comm, "root_", dctx));
458     PetscCall(PetscDeviceContextSetUp(dctx));
459   }
460   PetscFunctionReturn(0);
461 }
462 
463 /* Get the default PetscDevice for a particular type and constructs them if lazily initialized. */
464 PetscErrorCode PetscDeviceGetDefaultForType_Internal(PetscDeviceType type, PetscDevice *device) {
465   PetscFunctionBegin;
466   PetscValidPointer(device, 2);
467   PetscCall(PetscDeviceInitialize(type));
468   *device = defaultDevices[type];
469   PetscFunctionReturn(0);
470 }
471