xref: /petsc/src/sys/objects/device/interface/device.cxx (revision f97672e55eacc8688507b9471cd7ec2664d7f203)
1 #include "cupmdevice.hpp" /* I "petscdevice.h" */
2 #include <petsc/private/petscadvancedmacros.h>
3 
4 using namespace Petsc::Device;
5 
6 /*
7   note to anyone adding more classes, the name must be ALL_CAPS_SHORT_NAME + Device exactly to
8   be picked up by the switch-case macros below
9 */
10 #if PetscDefined(HAVE_CUDA)
11 static CUPM::Device<CUPM::DeviceType::CUDA> CUDADevice(PetscDeviceContextCreate_CUDA);
12 #endif
13 #if PetscDefined(HAVE_HIP)
14 static CUPM::Device<CUPM::DeviceType::HIP>  HIPDevice(PetscDeviceContextCreate_HIP);
15 #endif
16 #if PetscDefined(HAVE_SYCL)
17 #include "sycldevice.hpp"
18 static SYCL::Device                         SYCLDevice(PetscDeviceContextCreate_SYCL);
19 #endif
20 
21 static_assert(Petsc::util::integral_value(PETSC_DEVICE_INVALID) == 0,"");
22 static_assert(Petsc::util::integral_value(PETSC_DEVICE_CUDA)    == 1,"");
23 static_assert(Petsc::util::integral_value(PETSC_DEVICE_HIP)     == 2,"");
24 static_assert(Petsc::util::integral_value(PETSC_DEVICE_SYCL)    == 3,"");
25 static_assert(Petsc::util::integral_value(PETSC_DEVICE_MAX)     == 4,"");
26 const char *const PetscDeviceTypes[] = {
27   "invalid",
28   "cuda",
29   "hip",
30   "sycl",
31   "max",
32   "PetscDeviceType",
33   "PETSC_DEVICE_",
34   PETSC_NULLPTR
35 };
36 
37 static_assert(Petsc::util::integral_value(PETSC_DEVICE_INIT_NONE)  == 0,"");
38 static_assert(Petsc::util::integral_value(PETSC_DEVICE_INIT_LAZY)  == 1,"");
39 static_assert(Petsc::util::integral_value(PETSC_DEVICE_INIT_EAGER) == 2,"");
40 const char *const PetscDeviceInitTypes[] = {
41   "none",
42   "lazy",
43   "eager",
44   "PetscDeviceInitType",
45   "PETSC_DEVICE_INIT_",
46   PETSC_NULLPTR
47 };
48 static_assert(
49   sizeof(PetscDeviceInitTypes)/sizeof(*PetscDeviceInitTypes) == 6,
50   "Must change CUPMDevice<T>::initialize number of enum values in -device_enable_cupm to match!"
51 );
52 
53 #define PETSC_DEVICE_CASE(IMPLS,func,...)                                     \
54   case PetscConcat_(PETSC_DEVICE_,IMPLS): {                                   \
55     PetscCall(PetscConcat_(IMPLS,Device).func(__VA_ARGS__));                    \
56   } break
57 
58 /*
59   Suppose you have:
60 
61   CUDADevice.myFunction(arg1,arg2)
62 
63   that you would like to conditionally define and call in a switch-case:
64 
65   switch(PetscDeviceType) {
66   #if PetscDefined(HAVE_CUDA)
67   case PETSC_DEVICE_CUDA: {
68     PetscCall(CUDADevice.myFunction(arg1,arg2));
69   } break;
70   #endif
71   }
72 
73   then calling this macro:
74 
75   PETSC_DEVICE_CASE_IF_PETSC_DEFINED(CUDA,myFunction,arg1,arg2)
76 
77   will expand to the following case statement:
78 
79   case PETSC_DEVICE_CUDA: {
80     PetscCall(CUDADevice.myFunction(arg1,arg2));
81   } break
82 
83   if PetscDefined(HAVE_CUDA) evaluates to 1, and expand to nothing otherwise
84 */
85 #define PETSC_DEVICE_CASE_IF_PETSC_DEFINED(IMPLS,func,...)                                     \
86   PetscIfPetscDefined(PetscConcat_(HAVE_,IMPLS),PETSC_DEVICE_CASE,PetscExpandToNothing)(IMPLS,func,__VA_ARGS__)
87 
88 /*@C
89   PetscDeviceCreate - Get a new handle for a particular device type
90 
91   Not Collective, Possibly Synchronous
92 
93   Input Parameters:
94 + type  - The type of PetscDevice
95 - devid - The numeric ID# of the device (pass PETSC_DECIDE to assign automatically)
96 
97   Output Parameter:
98 . device - The PetscDevice
99 
100   Notes:
101   This routine may initialize PetscDevice. If this is the case, this will most likely cause
102   some sort of device synchronization.
103 
104   devid is what you might pass to cudaSetDevice() for example.
105 
106   Level: beginner
107 
108 .seealso: `PetscDevice`, `PetscDeviceInitType`, `PetscDeviceInitialize()`,
109           `PetscDeviceInitialized()`, `PetscDeviceConfigure()`, `PetscDeviceView()`, `PetscDeviceDestroy()`
110 @*/
111 PetscErrorCode PetscDeviceCreate(PetscDeviceType type, PetscInt devid, PetscDevice *device)
112 {
113   static PetscInt PetscDeviceCounter = 0;
114   PetscDevice     dev;
115 
116   PetscFunctionBegin;
117   PetscValidDeviceType(type,1);
118   PetscValidPointer(device,3);
119   PetscCall(PetscDeviceInitializePackage());
120   PetscCall(PetscNew(&dev));
121   dev->id     = PetscDeviceCounter++;
122   dev->type   = type;
123   dev->refcnt = 1;
124   /*
125     if you are adding a device, you also need to add it's initialization in
126     PetscDeviceInitializeTypeFromOptions_Private() below
127   */
128   switch (type) {
129     PETSC_DEVICE_CASE_IF_PETSC_DEFINED(CUDA,getDevice,dev,devid);
130     PETSC_DEVICE_CASE_IF_PETSC_DEFINED(HIP,getDevice,dev,devid);
131     PETSC_DEVICE_CASE_IF_PETSC_DEFINED(SYCL,getDevice,dev,devid);
132   default:
133     /* in case the above macros expand to nothing this silences any unused variable warnings */
134     (void)(devid);
135     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"PETSc was seemingly configured for PetscDeviceType %s but we've fallen through all cases in a switch",PetscDeviceTypes[type]);
136   }
137   *device = dev;
138   PetscFunctionReturn(0);
139 }
140 
141 /*@C
142   PetscDeviceDestroy - Free a PetscDevice
143 
144   Not Collective, Asynchronous
145 
146   Input Parameter:
147 . device - The PetscDevice
148 
149   Level: beginner
150 
151 .seealso: `PetscDevice`, `PetscDeviceCreate()`, `PetscDeviceConfigure()`, `PetscDeviceView()`
152 @*/
153 PetscErrorCode PetscDeviceDestroy(PetscDevice *device)
154 {
155   PetscFunctionBegin;
156   if (!*device) PetscFunctionReturn(0);
157   PetscValidDevice(*device,1);
158   PetscCall(PetscDeviceDereference_Internal(*device));
159   if ((*device)->refcnt) {
160     *device = PETSC_NULLPTR;
161     PetscFunctionReturn(0);
162   }
163   PetscCall(PetscFree((*device)->data));
164   PetscCall(PetscFree(*device));
165   PetscFunctionReturn(0);
166 }
167 
168 /*@C
169   PetscDeviceConfigure - Configure a particular PetscDevice
170 
171   Not Collective, Asynchronous
172 
173   Input Parameter:
174 . device - The PetscDevice to configure
175 
176   Notes:
177   The user should not assume that this is a cheap operation
178 
179   Level: beginner
180 
181 .seealso: `PetscDevice`, `PetscDeviceCreate()`, `PetscDeviceView()`, `PetscDeviceDestroy()`
182 @*/
183 PetscErrorCode PetscDeviceConfigure(PetscDevice device)
184 {
185   PetscFunctionBegin;
186   PetscValidDevice(device,1);
187   if (PetscDefined(USE_DEBUG)) {
188     /*
189       if no available configuration is available, this cascades all the way down to default
190       and error
191     */
192     switch (device->type) {
193     case PETSC_DEVICE_CUDA: if (PetscDefined(HAVE_CUDA)) break;
194     case PETSC_DEVICE_HIP:  if (PetscDefined(HAVE_HIP))  break;
195     case PETSC_DEVICE_SYCL: if (PetscDefined(HAVE_SYCL)) break;
196     default:
197       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"PETSc was seemingly configured for PetscDeviceType %s but we've fallen through all cases in a switch",PetscDeviceTypes[device->type]);
198     }
199   }
200   PetscCall((*device->ops->configure)(device));
201   PetscFunctionReturn(0);
202 }
203 
204 /*@C
205   PetscDeviceView - View a PetscDevice
206 
207   Collective on viewer, Asynchronous
208 
209   Input Parameters:
210 + device - The PetscDevice to view
211 - viewer - The PetscViewer to view the device with (NULL for PETSC_VIEWER_STDOUT_WORLD)
212 
213   Level: beginner
214 
215 .seealso: `PetscDevice`, `PetscDeviceCreate()`, `PetscDeviceConfigure()`, `PetscDeviceDestroy()`
216 @*/
217 PetscErrorCode PetscDeviceView(PetscDevice device, PetscViewer viewer)
218 {
219   PetscFunctionBegin;
220   PetscValidDevice(device,1);
221   if (!viewer) PetscCall(PetscViewerASCIIGetStdout(PETSC_COMM_WORLD,&viewer));
222   PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2);
223   PetscCall((*device->ops->view)(device,viewer));
224   PetscFunctionReturn(0);
225 }
226 
227 /*@C
228   PetscDeviceGetDeviceId - Get the device id
229 
230   Not collective
231 
232   Input Parameter:
233 . device - The PetscDevice
234 
235   Output Parameter:
236 . id - The device id
237 
238   Level: beginner
239 
240 .seealso: `PetscDevice`, `PetscDeviceCreate()`, `PetscDeviceConfigure()`, `PetscDeviceDestroy()`
241 @*/
242 PetscErrorCode PetscDeviceGetDeviceId(PetscDevice device, PetscInt *id)
243 {
244   PetscFunctionBegin;
245   PetscValidDevice(device,1);
246   PetscValidIntPointer(id,2);
247   *id = device->deviceId;
248   PetscFunctionReturn(0);
249 }
250 
251 static std::array<bool,PETSC_DEVICE_MAX>        initializedDevice = {};
252 static std::array<PetscDevice,PETSC_DEVICE_MAX> defaultDevices    = {};
253 static_assert(initializedDevice.size() == defaultDevices.size(),"");
254 
255 /*@C
256   PetscDeviceInitialize - Initialize PetscDevice
257 
258   Not Collective, Possibly Synchronous
259 
260   Input Parameter:
261 . type - The PetscDeviceType to initialize
262 
263   Notes:
264   Eagerly initializes the corresponding PetscDeviceType if needed.
265 
266   Level: beginner
267 
268 .seealso: `PetscDevice`, `PetscDeviceInitType`, `PetscDeviceInitialized()`, `PetscDeviceCreate()`, `PetscDeviceDestroy()`
269 @*/
270 PetscErrorCode PetscDeviceInitialize(PetscDeviceType type)
271 {
272   PetscFunctionBegin;
273   PetscValidDeviceType(type,1);
274   PetscCall(PetscDeviceInitializeDefaultDevice_Internal(type,PETSC_DECIDE));
275   PetscFunctionReturn(0);
276 }
277 
278 /*@C
279   PetscDeviceInitialized - Determines whether PetscDevice is initialized for a particular
280   PetscDeviceType
281 
282   Not Collective, Asynchronous
283 
284   Input Parameter:
285 . type - The PetscDeviceType to check
286 
287   Output Parameter:
288 . [return value] - PETSC_TRUE if type is initialized, PETSC_FALSE otherwise
289 
290   Notes:
291   If one has not configured PETSc for a particular PetscDeviceType then this routine will
292   return PETSC_FALSE for that PetscDeviceType.
293 
294   Level: beginner
295 
296 .seealso: `PetscDevice`, `PetscDeviceInitType`, `PetscDeviceInitialize()`, `PetscDeviceCreate()`, `PetscDeviceDestroy()`
297 @*/
298 PetscBool PetscDeviceInitialized(PetscDeviceType type)
299 {
300   return static_cast<PetscBool>(PetscDeviceConfiguredFor_Internal(type) && initializedDevice[type]);
301 }
302 
303 /*
304   Actual intialization function; any functions claiming to initialize PetscDevice or
305   PetscDeviceContext will have to run through this one
306 */
307 PetscErrorCode PetscDeviceInitializeDefaultDevice_Internal(PetscDeviceType type, PetscInt defaultDeviceId)
308 {
309   PetscFunctionBegin;
310   PetscValidDeviceType(type,1);
311   if (PetscLikely(PetscDeviceInitialized(type))) PetscFunctionReturn(0);
312   PetscAssert(!defaultDevices[type],PETSC_COMM_SELF,PETSC_ERR_MEM,"Trying to overwrite existing default device of type %s",PetscDeviceTypes[type]);
313   PetscCall(PetscDeviceCreate(type,defaultDeviceId,&defaultDevices[type]));
314   PetscCall(PetscDeviceConfigure(defaultDevices[type]));
315   initializedDevice[type] = true;
316   PetscFunctionReturn(0);
317 }
318 
319 #if PetscDefined(USE_LOG)
320 PETSC_INTERN PetscErrorCode PetscLogInitialize(void);
321 #else
322 #define PetscLogInitialize() 0
323 #endif
324 
325 static PetscErrorCode PetscDeviceInitializeTypeFromOptions_Private(MPI_Comm comm, PetscDeviceType type, PetscInt defaultDeviceId, PetscBool defaultView, PetscDeviceInitType *defaultInitType)
326 {
327   PetscFunctionBegin;
328   if (!PetscDeviceConfiguredFor_Internal(type)) {
329     PetscCall(PetscInfo(PETSC_NULLPTR,"PetscDeviceType %s not supported\n",PetscDeviceTypes[type]));
330     defaultDevices[type] = PETSC_NULLPTR;
331     PetscFunctionReturn(0);
332   }
333   PetscCall(PetscInfo(PETSC_NULLPTR,"PetscDeviceType %s supported, initializing\n",PetscDeviceTypes[type]));
334   /* ugly switch needed to pick the right global variable... could maybe do this as a union? */
335   switch (type) {
336     PETSC_DEVICE_CASE_IF_PETSC_DEFINED(CUDA,initialize,comm,&defaultDeviceId,defaultInitType);
337     PETSC_DEVICE_CASE_IF_PETSC_DEFINED(HIP,initialize,comm,&defaultDeviceId,defaultInitType);
338     PETSC_DEVICE_CASE_IF_PETSC_DEFINED(SYCL,initialize,comm,&defaultDeviceId,defaultInitType);
339   default:
340     SETERRQ(comm,PETSC_ERR_PLIB,"PETSc was seemingly configured for PetscDeviceType %s but we've fallen through all cases in a switch",PetscDeviceTypes[type]);
341   }
342   /*
343     defaultInitType and defaultDeviceId now represent what the individual TYPES have decided to
344     initialize as
345   */
346   if (*defaultInitType == PETSC_DEVICE_INIT_EAGER) {
347     PetscCall(PetscInfo(PETSC_NULLPTR,"Eagerly initializing %s PetscDevice\n",PetscDeviceTypes[type]));
348     PetscCall(PetscDeviceInitializeDefaultDevice_Internal(type,defaultDeviceId));
349     if (defaultView) {
350       PetscViewer vwr;
351 
352       PetscCall(PetscLogInitialize());
353       PetscCall(PetscViewerASCIIGetStdout(comm,&vwr));
354       PetscCall(PetscDeviceView(defaultDevices[type],vwr));
355     }
356   }
357   PetscFunctionReturn(0);
358 }
359 
360 /* called from PetscFinalize() do not call yourself! */
361 static PetscErrorCode PetscDeviceFinalize_Private(void)
362 {
363   PetscFunctionBegin;
364   if (PetscDefined(USE_DEBUG)) {
365     const auto PetscDeviceCheckAllDestroyedAfterFinalize = []{
366       PetscFunctionBegin;
367       for (auto&& device : defaultDevices) PetscCheck(!device,PETSC_COMM_WORLD,PETSC_ERR_COR,"Device of type '%s' had reference count %" PetscInt_FMT " and was not fully destroyed during PetscFinalize()",PetscDeviceTypes[device->type],device->refcnt);
368       PetscFunctionReturn(0);
369     };
370     /*
371       you might be thinking, why on earth are you registered yet another finalizer in a
372       function already called during PetscRegisterFinalizeAll()? If this seems stupid it's
373       because it is.
374 
375       The crux of the problem is that the initializer (and therefore the ~finalizer~) of
376       PetscDeviceContext is guaranteed to run after PetscDevice's. So if the global context had
377       a default PetscDevice attached, that PetscDevice will have a reference count >0 and hence
378       won't be destroyed yet. So we need to repeat the check that all devices have been
379       destroyed again ~after~ the global context is destroyed. In summary:
380 
381       1. This finalizer runs and destroys all devices, except it may not because the global
382          context may still hold a reference!
383       2. The global context finalizer runs and does the final reference count decrement
384          required, which actually destroys the held device.
385       3. Our newly added finalizer runs and checks that all is well.
386     */
387     PetscCall(PetscRegisterFinalize(PetscDeviceCheckAllDestroyedAfterFinalize));
388   }
389   for (auto &&device : defaultDevices) PetscCall(PetscDeviceDestroy(&device));
390   PetscCallCXX(initializedDevice.fill(false));
391   PetscFunctionReturn(0);
392 }
393 
394 /*
395   Begins the init proceeedings for the entire PetscDevice stack. there are 3 stages of
396   initialization types:
397 
398   1. defaultInitType - how does PetscDevice as a whole expect to initialize?
399   2. subTypeDefaultInitType - how does each PetscDevice implementation expect to initialize?
400      e.g. you may want to blanket disable PetscDevice init (and disable say Kokkos init), but
401      have all CUDA devices still initialize.
402 
403   All told the following happens:
404 
405   0. defaultInitType -> LAZY
406   1. Check for log_view/log_summary, if yes defaultInitType -> EAGER
407   2. PetscDevice initializes each sub type with deviceDefaultInitType.
408   2.1 Each enabled PetscDevice sub-type then does the above disable or view check in addition
409       to checking for specific device init. if view or specific device init
410       subTypeDefaultInitType -> EAGER. disabled once again overrides all.
411 */
412 PetscErrorCode PetscDeviceInitializeFromOptions_Internal(MPI_Comm comm)
413 {
414   PetscBool           flg,defaultView = PETSC_FALSE,initializeDeviceContextEagerly = PETSC_FALSE;
415   PetscInt            defaultDevice   = PETSC_DECIDE;
416   PetscDeviceType     deviceContextInitDevice = PETSC_DEVICE_DEFAULT;
417   PetscDeviceInitType defaultInitType;
418 
419   PetscFunctionBegin;
420   if (PetscDefined(USE_DEBUG)) {
421     int result;
422 
423     PetscCallMPI(MPI_Comm_compare(comm,PETSC_COMM_WORLD,&result));
424     /* in order to accurately assign ranks to gpus we need to get the MPI_Comm_rank of the
425      * global space */
426     if (PetscUnlikely(result != MPI_IDENT)) {
427       char name[MPI_MAX_OBJECT_NAME] = {};
428       int  len; /* unused */
429 
430       PetscCallMPI(MPI_Comm_get_name(comm,name,&len));
431       SETERRQ(comm,PETSC_ERR_MPI,"Default devices being initialized on MPI_Comm '%s' not PETSC_COMM_WORLD",name);
432     }
433   }
434   comm = PETSC_COMM_WORLD; /* from this point on we assume we're on PETSC_COMM_WORLD */
435   PetscCall(PetscRegisterFinalize(PetscDeviceFinalize_Private));
436   PetscCall(PetscOptionsHasName(PETSC_NULLPTR,PETSC_NULLPTR,"-log_view",&flg));
437   if (!flg) PetscCall(PetscOptionsHasName(PETSC_NULLPTR,PETSC_NULLPTR,"-log_summary",&flg));
438 #if defined(PETSC_HAVE_DEVICE)
439   PetscBool gtime;
440   PetscCall(PetscOptionsHasName(NULL,NULL,"-log_view_gpu_time",&gtime));
441   if (gtime) PetscCall(PetscLogGpuTime());
442 #endif
443   {
444     PetscInt initIdx = flg ? PETSC_DEVICE_INIT_EAGER : PETSC_DEVICE_INIT_LAZY;
445 
446     PetscOptionsBegin(comm,PETSC_NULLPTR,"PetscDevice Options","Sys");
447     PetscCall(PetscOptionsEList("-device_enable","How (or whether) to initialize PetscDevices","PetscDeviceInitializeFromOptions_Internal()",PetscDeviceInitTypes,3,PetscDeviceInitTypes[initIdx],&initIdx,PETSC_NULLPTR));
448     PetscCall(PetscOptionsRangeInt("-device_select","Which device to use. Pass " PetscStringize(PETSC_DECIDE) " to have PETSc decide or (given they exist) [0-NUM_DEVICE) for a specific device","PetscDeviceCreate()",defaultDevice,&defaultDevice,PETSC_NULLPTR,PETSC_DECIDE,std::numeric_limits<int>::max()));
449     PetscCall(PetscOptionsBool("-device_view","Display device information and assignments (forces eager initialization)",PETSC_NULLPTR,defaultView,&defaultView,&flg));
450     PetscOptionsEnd();
451     if (initIdx == PETSC_DEVICE_INIT_NONE) {
452       /* disabled all device initialization if devices are globally disabled */
453       PetscCheck(defaultDevice == PETSC_DECIDE,comm,PETSC_ERR_USER_INPUT,"You have disabled devices but also specified a particular device to use, these options are mutually exlusive");
454       defaultView = PETSC_FALSE;
455     } else {
456       defaultView = static_cast<decltype(defaultView)>(defaultView && flg);
457       if (defaultView) initIdx = PETSC_DEVICE_INIT_EAGER;
458     }
459     defaultInitType = static_cast<decltype(defaultInitType)>(initIdx);
460   }
461   static_assert((PETSC_DEVICE_INVALID == 0) && (PETSC_DEVICE_MAX < std::numeric_limits<int>::max()),"");
462   for (int i = 1; i < PETSC_DEVICE_MAX; ++i) {
463     const auto deviceType = static_cast<PetscDeviceType>(i);
464     auto initType         = defaultInitType;
465 
466     PetscCall(PetscDeviceInitializeTypeFromOptions_Private(comm,deviceType,defaultDevice,defaultView,&initType));
467     if (PetscDeviceConfiguredFor_Internal(deviceType) && (initType == PETSC_DEVICE_INIT_EAGER)) {
468       initializeDeviceContextEagerly = PETSC_TRUE;
469       deviceContextInitDevice        = deviceType;
470     }
471   }
472   if (initializeDeviceContextEagerly) {
473     PetscDeviceContext dctx;
474 
475     /*
476       somewhat inefficient here as the device context is potentially fully set up twice (once
477       when retrieved then the second time if setfromoptions makes changes)
478     */
479     PetscCall(PetscInfo(PETSC_NULLPTR,"Eagerly initializing PetscDeviceContext with %s device\n",PetscDeviceTypes[deviceContextInitDevice]));
480     PetscCall(PetscDeviceContextSetRootDeviceType_Internal(deviceContextInitDevice));
481     PetscCall(PetscDeviceContextGetCurrentContext(&dctx));
482     PetscCall(PetscDeviceContextSetFromOptions(comm,"root_",dctx));
483     PetscCall(PetscDeviceContextSetUp(dctx));
484   }
485   PetscFunctionReturn(0);
486 }
487 
488 /* Get the default PetscDevice for a particular type and constructs them if lazily initialized. */
489 PetscErrorCode PetscDeviceGetDefaultForType_Internal(PetscDeviceType type, PetscDevice *device)
490 {
491   PetscFunctionBegin;
492   PetscValidPointer(device,2);
493   PetscCall(PetscDeviceInitialize(type));
494   *device = defaultDevices[type];
495   PetscFunctionReturn(0);
496 }
497