xref: /petsc/src/sys/objects/device/interface/device.cxx (revision 2292213e75f50eade7dffbb625f9e7b8550bf661)
1 #include "cupmdevice.hpp" /* I "petscdevice.h" */
2 #include <petsc/private/petscadvancedmacros.h>
3 
4 using namespace Petsc::Device;
5 
6 /*
7   note to anyone adding more classes, the name must be ALL_CAPS_SHORT_NAME + Device exactly to
8   be picked up by the switch-case macros below
9 */
10 #if PetscDefined(HAVE_CUDA)
11 static CUPM::Device<CUPM::DeviceType::CUDA> CUDADevice(PetscDeviceContextCreate_CUDA);
12 #endif
13 #if PetscDefined(HAVE_HIP)
14 static CUPM::Device<CUPM::DeviceType::HIP>  HIPDevice(PetscDeviceContextCreate_HIP);
15 #endif
16 #if PetscDefined(HAVE_SYCL)
17 #include "sycldevice.hpp"
18 static SYCL::Device                         SYCLDevice(PetscDeviceContextCreate_SYCL);
19 #endif
20 
21 static_assert(Petsc::util::integral_value(PETSC_DEVICE_INVALID) == 0,"");
22 static_assert(Petsc::util::integral_value(PETSC_DEVICE_CUDA)    == 1,"");
23 static_assert(Petsc::util::integral_value(PETSC_DEVICE_HIP)     == 2,"");
24 static_assert(Petsc::util::integral_value(PETSC_DEVICE_SYCL)    == 3,"");
25 static_assert(Petsc::util::integral_value(PETSC_DEVICE_MAX)     == 4,"");
26 const char *const PetscDeviceTypes[] = {
27   "invalid",
28   "cuda",
29   "hip",
30   "sycl",
31   "max",
32   "PetscDeviceType",
33   "PETSC_DEVICE_",
34   PETSC_NULLPTR
35 };
36 
37 static_assert(Petsc::util::integral_value(PETSC_DEVICE_INIT_NONE)  == 0,"");
38 static_assert(Petsc::util::integral_value(PETSC_DEVICE_INIT_LAZY)  == 1,"");
39 static_assert(Petsc::util::integral_value(PETSC_DEVICE_INIT_EAGER) == 2,"");
40 const char *const PetscDeviceInitTypes[] = {
41   "none",
42   "lazy",
43   "eager",
44   "PetscDeviceInitType",
45   "PETSC_DEVICE_INIT_",
46   PETSC_NULLPTR
47 };
48 static_assert(
49   sizeof(PetscDeviceInitTypes)/sizeof(*PetscDeviceInitTypes) == 6,
50   "Must change CUPMDevice<T>::initialize number of enum values in -device_enable_cupm to match!"
51 );
52 
53 #define PETSC_DEVICE_CASE(IMPLS,func,...)                                     \
54   case PetscConcat_(PETSC_DEVICE_,IMPLS): {                                   \
55     auto ierr_ = PetscConcat_(IMPLS,Device).func(__VA_ARGS__);CHKERRQ(ierr_); \
56   } break
57 
58 /*
59   Suppose you have:
60 
61   CUDADevice.myFunction(arg1,arg2)
62 
63   that you would like to conditionally define and call in a switch-case:
64 
65   switch(PetscDeviceType) {
66   #if PetscDefined(HAVE_CUDA)
67   case PETSC_DEVICE_CUDA: {
68     auto ierr = CUDADevice.myFunction(arg1,arg2);CHKERRQ(ierr);
69   } break;
70   #endif
71   }
72 
73   then calling this macro:
74 
75   PETSC_DEVICE_CASE_IF_PETSC_DEFINED(CUDA,myFunction,arg1,arg2)
76 
77   will expand to the following case statement:
78 
79   case PETSC_DEVICE_CUDA: {
80     auto ierr = CUDADevice.myFunction(arg1,arg2);CHKERRQ(ierr);
81   } break
82 
83   if PetscDefined(HAVE_CUDA) evaluates to 1, and expand to nothing otherwise
84 */
85 #define PETSC_DEVICE_CASE_IF_PETSC_DEFINED(IMPLS,func,...)                                     \
86   PetscIfPetscDefined(PetscConcat_(HAVE_,IMPLS),PETSC_DEVICE_CASE,PetscExpandToNothing)(IMPLS,func,__VA_ARGS__)
87 
88 /*@C
89   PetscDeviceCreate - Get a new handle for a particular device type
90 
91   Not Collective, Possibly Synchronous
92 
93   Input Parameters:
94 + type  - The type of PetscDevice
95 - devid - The numeric ID# of the device (pass PETSC_DECIDE to assign automatically)
96 
97   Output Parameter:
98 . device - The PetscDevice
99 
100   Notes:
101   This routine may initialize PetscDevice. If this is the case, this will most likely cause
102   some sort of device synchronization.
103 
104   devid is what you might pass to cudaSetDevice() for example.
105 
106   Level: beginner
107 
108 .seealso: PetscDevice, PetscDeviceInitType, PetscDeviceInitialize(),
109 PetscDeviceInitialized(), PetscDeviceConfigure(), PetscDeviceView(), PetscDeviceDestroy()
110 @*/
111 PetscErrorCode PetscDeviceCreate(PetscDeviceType type, PetscInt devid, PetscDevice *device)
112 {
113   static PetscInt PetscDeviceCounter = 0;
114   PetscDevice     dev;
115   PetscErrorCode  ierr;
116 
117   PetscFunctionBegin;
118   PetscValidDeviceType(type,1);
119   PetscValidPointer(device,3);
120   ierr = PetscDeviceInitializePackage();CHKERRQ(ierr);
121   ierr = PetscNew(&dev);CHKERRQ(ierr);
122   dev->id     = PetscDeviceCounter++;
123   dev->type   = type;
124   dev->refcnt = 1;
125   /*
126     if you are adding a device, you also need to add it's initialization in
127     PetscDeviceInitializeTypeFromOptions_Private() below
128   */
129   switch (type) {
130     PETSC_DEVICE_CASE_IF_PETSC_DEFINED(CUDA,getDevice,dev,devid);
131     PETSC_DEVICE_CASE_IF_PETSC_DEFINED(HIP,getDevice,dev,devid);
132     PETSC_DEVICE_CASE_IF_PETSC_DEFINED(SYCL,getDevice,dev,devid);
133   default:
134     /* in case the above macros expand to nothing this silences any unused variable warnings */
135     (void)(devid);
136     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"PETSc was seemingly configured for PetscDeviceType %s but we've fallen through all cases in a switch",PetscDeviceTypes[type]);
137   }
138   *device = dev;
139   PetscFunctionReturn(0);
140 }
141 
142 /*@C
143   PetscDeviceDestroy - Free a PetscDevice
144 
145   Not Collective, Asynchronous
146 
147   Input Parameter:
148 . device - The PetscDevice
149 
150   Level: beginner
151 
152 .seealso: PetscDevice, PetscDeviceCreate(), PetscDeviceConfigure(), PetscDeviceView()
153 @*/
154 PetscErrorCode PetscDeviceDestroy(PetscDevice *device)
155 {
156   PetscErrorCode ierr;
157 
158   PetscFunctionBegin;
159   if (!*device) PetscFunctionReturn(0);
160   PetscValidDevice(*device,1);
161   ierr = PetscDeviceDereference_Internal(*device);CHKERRQ(ierr);
162   if ((*device)->refcnt) {
163     *device = PETSC_NULLPTR;
164     PetscFunctionReturn(0);
165   }
166   ierr = PetscFree((*device)->data);CHKERRQ(ierr);
167   ierr = PetscFree(*device);CHKERRQ(ierr);
168   PetscFunctionReturn(0);
169 }
170 
171 /*@C
172   PetscDeviceConfigure - Configure a particular PetscDevice
173 
174   Not Collective, Asynchronous
175 
176   Input Parameter:
177 . device - The PetscDevice to configure
178 
179   Notes:
180   The user should not assume that this is a cheap operation
181 
182   Level: beginner
183 
184 .seealso: PetscDevice, PetscDeviceCreate(), PetscDeviceView(), PetscDeviceDestroy()
185 @*/
186 PetscErrorCode PetscDeviceConfigure(PetscDevice device)
187 {
188   PetscErrorCode ierr;
189 
190   PetscFunctionBegin;
191   PetscValidDevice(device,1);
192   if (PetscDefined(USE_DEBUG)) {
193     /*
194       if no available configuration is available, this cascades all the way down to default
195       and error
196     */
197     switch (device->type) {
198     case PETSC_DEVICE_CUDA: if (PetscDefined(HAVE_CUDA)) break;
199     case PETSC_DEVICE_HIP:  if (PetscDefined(HAVE_HIP))  break;
200     case PETSC_DEVICE_SYCL: if (PetscDefined(HAVE_SYCL)) break;
201     default:
202       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"PETSc was seemingly configured for PetscDeviceType %s but we've fallen through all cases in a switch",PetscDeviceTypes[device->type]);
203     }
204   }
205   ierr = (*device->ops->configure)(device);CHKERRQ(ierr);
206   PetscFunctionReturn(0);
207 }
208 
209 /*@C
210   PetscDeviceView - View a PetscDevice
211 
212   Collective on viewer, Asynchronous
213 
214   Input Parameter:
215 + device - The PetscDevice to view
216 - viewer - The PetscViewer to view the device with (NULL for PETSC_VIEWER_STDOUT_WORLD)
217 
218   Level: beginner
219 
220 .seealso: PetscDevice, PetscDeviceCreate(), PetscDeviceConfigure(), PetscDeviceDestroy()
221 @*/
222 PetscErrorCode PetscDeviceView(PetscDevice device, PetscViewer viewer)
223 {
224   PetscErrorCode ierr;
225 
226   PetscFunctionBegin;
227   PetscValidDevice(device,1);
228   if (!viewer) {ierr = PetscViewerASCIIGetStdout(PETSC_COMM_WORLD,&viewer);CHKERRQ(ierr);}
229   PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2);
230   ierr = (*device->ops->view)(device,viewer);CHKERRQ(ierr);
231   PetscFunctionReturn(0);
232 }
233 
234 static std::array<bool,PETSC_DEVICE_MAX>        initializedDevice = {};
235 static std::array<PetscDevice,PETSC_DEVICE_MAX> defaultDevices    = {};
236 static_assert(initializedDevice.size() == defaultDevices.size(),"");
237 
238 /*@C
239   PetscDeviceInitialize - Initialize PetscDevice
240 
241   Not Collective, Possibly Synchronous
242 
243   Input Parameter:
244 . type - The PetscDeviceType to initialize
245 
246   Notes:
247   Eagerly initializes the corresponding PetscDeviceType if needed.
248 
249   Level: beginner
250 
251 .seealso: PetscDevice, PetscDeviceInitType, PetscDeviceInitialized(), PetscDeviceCreate(), PetscDeviceDestroy()
252 @*/
253 PetscErrorCode PetscDeviceInitialize(PetscDeviceType type)
254 {
255   PetscErrorCode ierr;
256 
257   PetscFunctionBegin;
258   PetscValidDeviceType(type,1);
259   ierr = PetscDeviceInitializeDefaultDevice_Internal(type,PETSC_DECIDE);CHKERRQ(ierr);
260   PetscFunctionReturn(0);
261 }
262 
263 /*@C
264   PetscDeviceInitialized - Determines whether PetscDevice is initialized for a particular
265   PetscDeviceType
266 
267   Not Collective, Asynchronous
268 
269   Input Parameter:
270 . type - The PetscDeviceType to check
271 
272   Output Parameter:
273 . [return value] - PETSC_TRUE if type is initialized, PETSC_FALSE otherwise
274 
275   Notes:
276   If one has not configured PETSc for a particular PetscDeviceType then this routine will
277   return PETSC_FALSE for that PetscDeviceType.
278 
279   Level: beginner
280 
281 .seealso: PetscDevice, PetscDeviceInitType, PetscDeviceInitialize(), PetscDeviceCreate(), PetscDeviceDestroy()
282 @*/
283 PetscBool PetscDeviceInitialized(PetscDeviceType type)
284 {
285   return static_cast<PetscBool>(PetscDeviceConfiguredFor_Internal(type) && initializedDevice[type]);
286 }
287 
288 /*
289   Actual intialization function; any functions claiming to initialize PetscDevice or
290   PetscDeviceContext will have to run through this one
291 */
292 PetscErrorCode PetscDeviceInitializeDefaultDevice_Internal(PetscDeviceType type, PetscInt defaultDeviceId)
293 {
294   PetscErrorCode ierr;
295 
296   PetscFunctionBegin;
297   PetscValidDeviceType(type,1);
298   if (PetscLikely(PetscDeviceInitialized(type))) PetscFunctionReturn(0);
299   PetscAssert(!defaultDevices[type],PETSC_COMM_SELF,PETSC_ERR_MEM,"Trying to overwrite existing default device of type %s",PetscDeviceTypes[type]);
300   ierr = PetscDeviceCreate(type,defaultDeviceId,&defaultDevices[type]);CHKERRQ(ierr);
301   ierr = PetscDeviceConfigure(defaultDevices[type]);CHKERRQ(ierr);
302   initializedDevice[type] = true;
303   PetscFunctionReturn(0);
304 }
305 
306 #if PetscDefined(USE_LOG)
307 PETSC_INTERN PetscErrorCode PetscLogInitialize(void);
308 #else
309 #define PetscLogInitialize() 0
310 #endif
311 
312 static PetscErrorCode PetscDeviceInitializeTypeFromOptions_Private(MPI_Comm comm, PetscDeviceType type, PetscInt defaultDeviceId, PetscBool defaultView, PetscDeviceInitType *defaultInitType)
313 {
314   PetscErrorCode ierr;
315 
316   PetscFunctionBegin;
317   if (!PetscDeviceConfiguredFor_Internal(type)) {
318     ierr = PetscInfo(PETSC_NULLPTR,"PetscDeviceType %s not supported\n",PetscDeviceTypes[type]);CHKERRQ(ierr);
319     defaultDevices[type] = PETSC_NULLPTR;
320     PetscFunctionReturn(0);
321   }
322   ierr = PetscInfo(PETSC_NULLPTR,"PetscDeviceType %s supported, initializing\n",PetscDeviceTypes[type]);CHKERRQ(ierr);
323   /* ugly switch needed to pick the right global variable... could maybe do this as a union? */
324   switch (type) {
325     PETSC_DEVICE_CASE_IF_PETSC_DEFINED(CUDA,initialize,comm,&defaultDeviceId,defaultInitType);
326     PETSC_DEVICE_CASE_IF_PETSC_DEFINED(HIP,initialize,comm,&defaultDeviceId,defaultInitType);
327     PETSC_DEVICE_CASE_IF_PETSC_DEFINED(SYCL,initialize,comm,&defaultDeviceId,defaultInitType);
328   default:
329     SETERRQ(comm,PETSC_ERR_PLIB,"PETSc was seemingly configured for PetscDeviceType %s but we've fallen through all cases in a switch",PetscDeviceTypes[type]);
330   }
331   /*
332     defaultInitType and defaultDeviceId now represent what the individual TYPES have decided to
333     initialize as
334   */
335   if (*defaultInitType == PETSC_DEVICE_INIT_EAGER) {
336     ierr = PetscInfo(PETSC_NULLPTR,"Eagerly initializing %s PetscDevice\n",PetscDeviceTypes[type]);CHKERRQ(ierr);
337     ierr = PetscDeviceInitializeDefaultDevice_Internal(type,defaultDeviceId);CHKERRQ(ierr);
338     if (defaultView) {
339       PetscViewer vwr;
340 
341       ierr = PetscLogInitialize();CHKERRQ(ierr);
342       ierr = PetscViewerASCIIGetStdout(comm,&vwr);CHKERRQ(ierr);
343       ierr = PetscDeviceView(defaultDevices[type],vwr);CHKERRQ(ierr);
344     }
345   }
346   PetscFunctionReturn(0);
347 }
348 
349 /* called from PetscFinalize() do not call yourself! */
350 static PetscErrorCode PetscDeviceFinalize_Private(void)
351 {
352   PetscErrorCode ierr;
353 
354   PetscFunctionBegin;
355   if (PetscDefined(USE_DEBUG)) {
356     const auto PetscDeviceCheckAllDestroyedAfterFinalize = []{
357       PetscFunctionBegin;
358       for (auto&& device : defaultDevices) PetscCheck(!device,PETSC_COMM_WORLD,PETSC_ERR_COR,"Device of type '%s' had reference count %" PetscInt_FMT " and was not fully destroyed during PetscFinalize()",PetscDeviceTypes[device->type],device->refcnt);
359       PetscFunctionReturn(0);
360     };
361     /*
362       you might be thinking, why on earth are you registered yet another finalizer in a
363       function already called during PetscRegisterFinalizeAll()? If this seems stupid it's
364       because it is.
365 
366       The crux of the problem is that the initializer (and therefore the ~finalizer~) of
367       PetscDeviceContext is guaranteed to run after PetscDevice's. So if the global context had
368       a default PetscDevice attached, that PetscDevice will have a reference count >0 and hence
369       won't be destroyed yet. So we need to repeat the check that all devices have been
370       destroyed again ~after~ the global context is destroyed. In summary:
371 
372       1. This finalizer runs and destroys all devices, except it may not because the global
373          context may still hold a reference!
374       2. The global context finalizer runs and does the final reference count decrement
375          required, which actually destroys the held device.
376       3. Our newly added finalizer runs and checks that all is well.
377     */
378     ierr = PetscRegisterFinalize(PetscDeviceCheckAllDestroyedAfterFinalize);CHKERRQ(ierr);
379   }
380   for (auto &&device : defaultDevices) {ierr = PetscDeviceDestroy(&device);CHKERRQ(ierr);}
381   CHKERRCXX(initializedDevice.fill(false));
382   PetscFunctionReturn(0);
383 }
384 
385 /*
386   Begins the init proceeedings for the entire PetscDevice stack. there are 3 stages of
387   initialization types:
388 
389   1. defaultInitType - how does PetscDevice as a whole expect to initialize?
390   2. subTypeDefaultInitType - how does each PetscDevice implementation expect to initialize?
391      e.g. you may want to blanket disable PetscDevice init (and disable say Kokkos init), but
392      have all CUDA devices still initialize.
393 
394   All told the following happens:
395 
396   0. defaultInitType -> LAZY
397   1. Check for log_view/log_summary, if yes defaultInitType -> EAGER
398   2. PetscDevice initializes each sub type with deviceDefaultInitType.
399   2.1 Each enabled PetscDevice sub-type then does the above disable or view check in addition
400       to checking for specific device init. if view or specific device init
401       subTypeDefaultInitType -> EAGER. disabled once again overrides all.
402 */
403 PetscErrorCode PetscDeviceInitializeFromOptions_Internal(MPI_Comm comm)
404 {
405   PetscBool           flg,defaultView = PETSC_FALSE,initializeDeviceContextEagerly = PETSC_FALSE;
406   PetscInt            defaultDevice   = PETSC_DECIDE;
407   PetscDeviceType     deviceContextInitDevice = PETSC_DEVICE_DEFAULT;
408   PetscDeviceInitType defaultInitType;
409   PetscErrorCode      ierr;
410 
411   PetscFunctionBegin;
412   if (PetscDefined(USE_DEBUG)) {
413     int result;
414 
415     ierr = MPI_Comm_compare(comm,PETSC_COMM_WORLD,&result);CHKERRMPI(ierr);
416     /* in order to accurately assign ranks to gpus we need to get the MPI_Comm_rank of the
417      * global space */
418     if (PetscUnlikely(result != MPI_IDENT)) {
419       char name[MPI_MAX_OBJECT_NAME] = {};
420       int  len; /* unused */
421 
422       ierr = MPI_Comm_get_name(comm,name,&len);CHKERRMPI(ierr);
423       SETERRQ(comm,PETSC_ERR_MPI,"Default devices being initialized on MPI_Comm '%s' not PETSC_COMM_WORLD",name);
424     }
425   }
426   comm = PETSC_COMM_WORLD; /* from this point on we assume we're on PETSC_COMM_WORLD */
427   ierr = PetscRegisterFinalize(PetscDeviceFinalize_Private);CHKERRQ(ierr);
428   ierr = PetscOptionsHasName(PETSC_NULLPTR,PETSC_NULLPTR,"-log_view",&flg);CHKERRQ(ierr);
429   if (!flg) {
430     ierr = PetscOptionsHasName(PETSC_NULLPTR,PETSC_NULLPTR,"-log_summary",&flg);CHKERRQ(ierr);
431   }
432   {
433     PetscInt initIdx = flg ? PETSC_DEVICE_INIT_EAGER : PETSC_DEVICE_INIT_LAZY;
434 
435     ierr = PetscOptionsBegin(comm,PETSC_NULLPTR,"PetscDevice Options","Sys");CHKERRQ(ierr);
436     ierr = PetscOptionsEList("-device_enable","How (or whether) to initialize PetscDevices","PetscDeviceInitializeFromOptions_Internal()",PetscDeviceInitTypes,3,PetscDeviceInitTypes[initIdx],&initIdx,PETSC_NULLPTR);CHKERRQ(ierr);
437     ierr = PetscOptionsRangeInt("-device_select","Which device to use. Pass " PetscStringize(PETSC_DECIDE) " to have PETSc decide or (given they exist) [0-NUM_DEVICE) for a specific device","PetscDeviceCreate()",defaultDevice,&defaultDevice,PETSC_NULLPTR,PETSC_DECIDE,std::numeric_limits<int>::max());CHKERRQ(ierr);
438     ierr = PetscOptionsBool("-device_view","Display device information and assignments (forces eager initialization)",PETSC_NULLPTR,defaultView,&defaultView,&flg);CHKERRQ(ierr);
439     ierr = PetscOptionsEnd();CHKERRQ(ierr);
440     if (initIdx == PETSC_DEVICE_INIT_NONE) {
441       /* disabled all device initialization if devices are globally disabled */
442       PetscCheck(defaultDevice == PETSC_DECIDE,comm,PETSC_ERR_USER_INPUT,"You have disabled devices but also specified a particular device to use, these options are mutually exlusive");
443       defaultView = PETSC_FALSE;
444     } else {
445       defaultView = static_cast<decltype(defaultView)>(defaultView && flg);
446       if (defaultView) initIdx = PETSC_DEVICE_INIT_EAGER;
447     }
448     defaultInitType = static_cast<decltype(defaultInitType)>(initIdx);
449   }
450   static_assert((PETSC_DEVICE_INVALID == 0) && (PETSC_DEVICE_MAX < std::numeric_limits<int>::max()),"");
451   for (int i = 1; i < PETSC_DEVICE_MAX; ++i) {
452     const auto deviceType = static_cast<PetscDeviceType>(i);
453     auto initType         = defaultInitType;
454 
455     ierr = PetscDeviceInitializeTypeFromOptions_Private(comm,deviceType,defaultDevice,defaultView,&initType);CHKERRQ(ierr);
456     if (PetscDeviceConfiguredFor_Internal(deviceType) && (initType == PETSC_DEVICE_INIT_EAGER)) {
457       initializeDeviceContextEagerly = PETSC_TRUE;
458       deviceContextInitDevice        = deviceType;
459     }
460   }
461   if (initializeDeviceContextEagerly) {
462     PetscDeviceContext dctx;
463 
464     /*
465       somewhat inefficient here as the device context is potentially fully set up twice (once
466       when retrieved then the second time if setfromoptions makes changes)
467     */
468     ierr = PetscInfo(PETSC_NULLPTR,"Eagerly initializing PetscDeviceContext with %s device\n",PetscDeviceTypes[deviceContextInitDevice]);CHKERRQ(ierr);
469     ierr = PetscDeviceContextSetRootDeviceType_Internal(deviceContextInitDevice);CHKERRQ(ierr);
470     ierr = PetscDeviceContextGetCurrentContext(&dctx);CHKERRQ(ierr);
471     ierr = PetscDeviceContextSetFromOptions(comm,"root_",dctx);CHKERRQ(ierr);
472     ierr = PetscDeviceContextSetUp(dctx);CHKERRQ(ierr);
473   }
474   PetscFunctionReturn(0);
475 }
476 
477 /* Get the default PetscDevice for a particular type and constructs them if lazily initialized. */
478 PetscErrorCode PetscDeviceGetDefaultForType_Internal(PetscDeviceType type, PetscDevice *device)
479 {
480   PetscErrorCode ierr;
481 
482   PetscFunctionBegin;
483   PetscValidPointer(device,2);
484   ierr = PetscDeviceInitialize(type);CHKERRQ(ierr);
485   *device = defaultDevices[type];
486   PetscFunctionReturn(0);
487 }
488