xref: /petsc/src/sys/objects/device/interface/device.cxx (revision 17f48955e3445b92d4f06cdd133e9c1d2d0ea0c8)
1 #include "cupmdevice.hpp" /* I "petscdevice.h" */
2 #include <petsc/private/petscadvancedmacros.h>
3 
4 using namespace Petsc::Device;
5 
6 /* note to anyone adding more classes, the name must be ALL_CAPS_SHORT_NAME + Device exactly to
7  * be picked up by the switch-case macros below. */
8 #if PetscDefined(HAVE_CUDA)
9 static CUPM::Device<CUPM::DeviceType::CUDA> CUDADevice(PetscDeviceContextCreate_CUDA);
10 #endif
11 #if PetscDefined(HAVE_HIP)
12 static CUPM::Device<CUPM::DeviceType::HIP>  HIPDevice(PetscDeviceContextCreate_HIP);
13 #endif
14 #if PetscDefined(HAVE_SYCL)
15 #include "sycldevice.hpp"
16 static SYCL::Device                         SYCLDevice(PetscDeviceContextCreate_SYCL);
17 #endif
18 
19 static_assert(Petsc::util::integral_value(PETSC_DEVICE_INVALID) == 0,"");
20 static_assert(Petsc::util::integral_value(PETSC_DEVICE_CUDA)    == 1,"");
21 static_assert(Petsc::util::integral_value(PETSC_DEVICE_HIP)     == 2,"");
22 static_assert(Petsc::util::integral_value(PETSC_DEVICE_SYCL)    == 3,"");
23 static_assert(Petsc::util::integral_value(PETSC_DEVICE_MAX)     == 4,"");
24 const char *const PetscDeviceTypes[] = {
25   "invalid",
26   "cuda",
27   "hip",
28   "sycl",
29   "max",
30   "PetscDeviceType",
31   "PETSC_DEVICE_",
32   PETSC_NULLPTR
33 };
34 
35 static_assert(Petsc::util::integral_value(PETSC_DEVICE_INIT_NONE)  == 0,"");
36 static_assert(Petsc::util::integral_value(PETSC_DEVICE_INIT_LAZY)  == 1,"");
37 static_assert(Petsc::util::integral_value(PETSC_DEVICE_INIT_EAGER) == 2,"");
38 const char *const PetscDeviceInitTypes[] = {
39   "none",
40   "lazy",
41   "eager",
42   "PetscDeviceInitType",
43   "PETSC_DEVICE_INIT_",
44   PETSC_NULLPTR
45 };
46 static_assert(
47   sizeof(PetscDeviceInitTypes)/sizeof(*PetscDeviceInitTypes) == 6,
48   "Must change CUPMDevice<T>::initialize number of enum values in -device_enable_cupm to match!"
49 );
50 
51 #define PETSC_DEVICE_CASE(IMPLS,func,...)                                     \
52   case PetscConcat_(PETSC_DEVICE_,IMPLS): {                                   \
53     auto ierr_ = PetscConcat_(IMPLS,Device).func(__VA_ARGS__);CHKERRQ(ierr_); \
54   } break
55 
56 /* Suppose you have:
57  *
58  * CUDADevice.myFunction(arg1,arg2)
59  *
60  * that you would like to conditionally define and call in a switch-case:
61  *
62  * switch(PetscDeviceType) {
63  * #if PetscDefined(HAVE_CUDA)
64  * case PETSC_DEVICE_CUDA: {
65  *   auto ierr = CUDADevice.myFunction(arg1,arg2);CHKERRQ(ierr);
66  * } break;
67  * #endif
68  * }
69  *
70  * then calling this macro:
71  *
72  * PETSC_DEVICE_CASE_IF_PETSC_DEFINED(CUDA,myFunction,arg1,arg2)
73  *
74  * will expand to the following case statement:
75  *
76  * case PETSC_DEVICE_CUDA: {
77  *   auto ierr = CUDADevice.myFunction(arg1,arg2);CHKERRQ(ierr);
78  * } break
79  *
80  * if PetscDefined(HAVE_CUDA) evaluates to 1, and expand to nothing otherwise
81  */
82 #define PETSC_DEVICE_CASE_IF_PETSC_DEFINED(IMPLS,func,...)                                     \
83   PetscIfPetscDefined(PetscConcat_(HAVE_,IMPLS),PETSC_DEVICE_CASE,PetscExpandToNothing)(IMPLS,func,__VA_ARGS__)
84 
85 /*@C
86   PetscDeviceCreate - Get a new handle for a particular device type
87 
88   Not Collective, Possibly Synchronous
89 
90   Input Parameter:
91 . type  - The type of PetscDevice
92 . devid - The numeric ID# of the device (pass PETSC_DECIDE to assign automatically)
93 
94   Output Parameter:
95 . device - The PetscDevice
96 
97   Notes:
98   This routine may initialize PetscDevice. If this is the case, this will most likely cause
99   some sort of device synchronization.
100 
101   devid is what you might pass to cudaSetDevice() for example.
102 
103   Level: beginner
104 
105 .seealso: PetscDevice, PetscDeviceInitType, PetscDeviceInitialize(),
106 PetscDeviceInitialized(), PetscDeviceConfigure(), PetscDeviceView(), PetscDeviceDestroy()
107 @*/
108 PetscErrorCode PetscDeviceCreate(PetscDeviceType type, PetscInt devid, PetscDevice *device)
109 {
110   static PetscInt PetscDeviceCounter = 0;
111   PetscDevice     dev;
112   PetscErrorCode  ierr;
113 
114   PetscFunctionBegin;
115   PetscValidDeviceType(type,1);
116   PetscValidPointer(device,3);
117   ierr = PetscDeviceInitializePackage();CHKERRQ(ierr);
118   ierr = PetscNew(&dev);CHKERRQ(ierr);
119   dev->id     = PetscDeviceCounter++;
120   dev->type   = type;
121   dev->refcnt = 1;
122   /* if you are adding a device, you also need to add it's initialization in
123    * PetscDeviceInitializeTypeFromOptions_Private() below */
124   switch (type) {
125     PETSC_DEVICE_CASE_IF_PETSC_DEFINED(CUDA,getDevice,dev,devid);
126     PETSC_DEVICE_CASE_IF_PETSC_DEFINED(HIP,getDevice,dev,devid);
127     PETSC_DEVICE_CASE_IF_PETSC_DEFINED(SYCL,getDevice,dev,devid);
128   default:
129     /* in case the above macros expand to nothing this silences any unused variable warnings */
130     (void)(devid);
131     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"PETSc was seemingly configured for PetscDeviceType %s but we've fallen through all cases in a switch",PetscDeviceTypes[type]);
132   }
133   *device = dev;
134   PetscFunctionReturn(0);
135 }
136 
137 /*@C
138   PetscDeviceDestroy - Free a PetscDevice
139 
140   Not Collective, Asynchronous
141 
142   Input Parameter:
143 . device - The PetscDevice
144 
145   Level: beginner
146 
147 .seealso: PetscDevice, PetscDeviceCreate(), PetscDeviceConfigure(), PetscDeviceView()
148 @*/
149 PetscErrorCode PetscDeviceDestroy(PetscDevice *device)
150 {
151   PetscErrorCode ierr;
152 
153   PetscFunctionBegin;
154   if (!*device) PetscFunctionReturn(0);
155   PetscValidDevice(*device,1);
156   ierr = PetscDeviceDereference_Internal(*device);CHKERRQ(ierr);
157   if ((*device)->refcnt) {
158     *device = PETSC_NULLPTR;
159     PetscFunctionReturn(0);
160   }
161   ierr = PetscFree((*device)->data);CHKERRQ(ierr);
162   ierr = PetscFree(*device);CHKERRQ(ierr);
163   PetscFunctionReturn(0);
164 }
165 
166 /*@C
167   PetscDeviceConfigure - Configure a particular PetscDevice
168 
169   Not Collective, Asynchronous
170 
171   Input Parameter:
172 . device - The PetscDevice to configure
173 
174   Notes:
175   The user should not assume that this is a cheap operation
176 
177   Level: beginner
178 
179 .seealso: PetscDevice, PetscDeviceCreate(), PetscDeviceView(), PetscDeviceDestroy()
180 @*/
181 PetscErrorCode PetscDeviceConfigure(PetscDevice device)
182 {
183   PetscErrorCode ierr;
184 
185   PetscFunctionBegin;
186   PetscValidDevice(device,1);
187   if (PetscDefined(USE_DEBUG)) {
188     /* if no available configuration is available, this cascades all the way down to default
189      * and error */
190     switch (device->type) {
191     case PETSC_DEVICE_CUDA: if (PetscDefined(HAVE_CUDA)) break;
192     case PETSC_DEVICE_HIP:  if (PetscDefined(HAVE_HIP))  break;
193     case PETSC_DEVICE_SYCL: if (PetscDefined(HAVE_SYCL)) break;
194     default:
195       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"PETSc was seemingly configured for PetscDeviceType %s but we've fallen through all cases in a switch",PetscDeviceTypes[device->type]);
196     }
197   }
198   ierr = (*device->ops->configure)(device);CHKERRQ(ierr);
199   PetscFunctionReturn(0);
200 }
201 
202 /*@C
203   PetscDeviceView - View a PetscDevice
204 
205   Collective on viewer, Asynchronous
206 
207   Input Parameter:
208 + device - The PetscDevice to view
209 - viewer - The PetscViewer to view the device with (NULL for PETSC_VIEWER_STDOUT_WORLD)
210 
211   Level: beginner
212 
213 .seealso: PetscDevice, PetscDeviceCreate(), PetscDeviceConfigure(), PetscDeviceDestroy()
214 @*/
215 PetscErrorCode PetscDeviceView(PetscDevice device, PetscViewer viewer)
216 {
217   PetscErrorCode ierr;
218 
219   PetscFunctionBegin;
220   PetscValidDevice(device,1);
221   if (!viewer) {ierr = PetscViewerASCIIGetStdout(PETSC_COMM_WORLD,&viewer);CHKERRQ(ierr);}
222   PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2);
223   ierr = (*device->ops->view)(device,viewer);CHKERRQ(ierr);
224   PetscFunctionReturn(0);
225 }
226 
227 static std::array<bool,PETSC_DEVICE_MAX>        initializedDevice = {};
228 static std::array<PetscDevice,PETSC_DEVICE_MAX> defaultDevices    = {};
229 static_assert(initializedDevice.size() == defaultDevices.size(),"");
230 
231 /*@C
232   PetscDeviceInitialize - Initialize PetscDevice
233 
234   Not Collective, Possibly Synchronous
235 
236   Input Parameter:
237 . type - The PetscDeviceType to initialize
238 
239   Notes:
240   Eagerly initializes the corresponding PetscDeviceType if needed.
241 
242   Level: beginner
243 
244 .seealso: PetscDevice, PetscDeviceInitType, PetscDeviceInitialized(), PetscDeviceCreate(), PetscDeviceDestroy()
245 @*/
246 PetscErrorCode PetscDeviceInitialize(PetscDeviceType type)
247 {
248   PetscErrorCode ierr;
249 
250   PetscFunctionBegin;
251   PetscValidDeviceType(type,1);
252   ierr = PetscDeviceInitializeDefaultDevice_Internal(type,PETSC_DECIDE);CHKERRQ(ierr);
253   PetscFunctionReturn(0);
254 }
255 
256 /*@C
257   PetscDeviceInitialized - Determines whether PetscDevice is initialized for a particular
258   PetscDeviceType
259 
260   Not Collective, Asynchronous
261 
262   Input Parameter:
263 . type - The PetscDeviceType to check
264 
265   Output Parameter:
266 . [return value] - PETSC_TRUE if type is initialized, PETSC_FALSE otherwise
267 
268   Notes:
269   If one has not configured PETSc for a particular PetscDeviceType then this routine will
270   return PETSC_FALSE for that PetscDeviceType.
271 
272   Level: beginner
273 
274 .seealso: PetscDevice, PetscDeviceInitType, PetscDeviceInitialize(), PetscDeviceCreate(), PetscDeviceDestroy()
275 @*/
276 PetscBool PetscDeviceInitialized(PetscDeviceType type)
277 {
278   return static_cast<PetscBool>(PetscDeviceConfiguredFor_Internal(type) && initializedDevice[type]);
279 }
280 
281 /* Actual intialization function; any functions claiming to initialize PetscDevice or
282  * PetscDeviceContext will have to run through this one */
283 PetscErrorCode PetscDeviceInitializeDefaultDevice_Internal(PetscDeviceType type, PetscInt defaultDeviceId)
284 {
285   PetscErrorCode ierr;
286 
287   PetscFunctionBegin;
288   PetscValidDeviceType(type,1);
289   if (PetscLikely(PetscDeviceInitialized(type))) PetscFunctionReturn(0);
290   if (PetscUnlikelyDebug(defaultDevices[type])) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MEM,"Trying to overwrite existing default device of type %s",PetscDeviceTypes[type]);
291   ierr = PetscDeviceCreate(type,defaultDeviceId,&defaultDevices[type]);CHKERRQ(ierr);
292   ierr = PetscDeviceConfigure(defaultDevices[type]);CHKERRQ(ierr);
293   initializedDevice[type] = true;
294   PetscFunctionReturn(0);
295 }
296 
297 static PetscErrorCode PetscDeviceInitializeTypeFromOptions_Private(MPI_Comm comm, PetscDeviceType type, PetscInt defaultDeviceId, PetscBool defaultView, PetscDeviceInitType *defaultInitType)
298 {
299   PetscErrorCode ierr;
300 
301   PetscFunctionBegin;
302   if (!PetscDeviceConfiguredFor_Internal(type)) {
303     ierr = PetscInfo1(PETSC_NULLPTR,"PetscDeviceType %s not supported\n",PetscDeviceTypes[type]);CHKERRQ(ierr);
304     defaultDevices[type] = PETSC_NULLPTR;
305     PetscFunctionReturn(0);
306   }
307   ierr = PetscInfo1(PETSC_NULLPTR,"PetscDeviceType %s supported, initializing\n",PetscDeviceTypes[type]);CHKERRQ(ierr);
308   /* ugly switch needed to pick the right global variable... could maybe do this as a union? */
309   switch (type) {
310     PETSC_DEVICE_CASE_IF_PETSC_DEFINED(CUDA,initialize,comm,&defaultDeviceId,defaultInitType);
311     PETSC_DEVICE_CASE_IF_PETSC_DEFINED(HIP,initialize,comm,&defaultDeviceId,defaultInitType);
312     PETSC_DEVICE_CASE_IF_PETSC_DEFINED(SYCL,initialize,comm,&defaultDeviceId,defaultInitType);
313   default:
314     SETERRQ1(comm,PETSC_ERR_PLIB,"PETSc was seemingly configured for PetscDeviceType %s but we've fallen through all cases in a switch",PetscDeviceTypes[type]);
315   }
316   /* defaultInitType and defaultDeviceId now represent what the individual TYPES have decided
317    * to initialize as */
318   if (*defaultInitType == PETSC_DEVICE_INIT_EAGER) {
319     ierr = PetscInfo1(PETSC_NULLPTR,"Eagerly initializing %s PetscDevice\n",PetscDeviceTypes[type]);CHKERRQ(ierr);
320     ierr = PetscDeviceInitializeDefaultDevice_Internal(type,defaultDeviceId);CHKERRQ(ierr);
321     if (defaultView) {
322       PetscViewer vwr;
323 
324       ierr = PetscViewerASCIIGetStdout(comm,&vwr);CHKERRQ(ierr);
325       ierr = PetscDeviceView(defaultDevices[type],vwr);CHKERRQ(ierr);
326     }
327   }
328   PetscFunctionReturn(0);
329 }
330 
331 /* called from PetscFinalize() do not call yourself! */
332 static PetscErrorCode PetscDeviceFinalize_Private(void)
333 {
334   PetscErrorCode ierr;
335 
336   PetscFunctionBegin;
337   if (PetscDefined(USE_DEBUG)) {
338     const auto PetscDeviceCheckAllDestroyedAfterFinalize = [](){
339       PetscFunctionBegin;
340       for (auto&& device : defaultDevices) {
341         if (PetscUnlikely(device)) SETERRQ2(PETSC_COMM_WORLD,PETSC_ERR_COR,"Device of type '%s' had reference count %" PetscInt_FMT " and was not fully destroyed during PetscFinalize()",PetscDeviceTypes[device->type],device->refcnt);
342       }
343       PetscFunctionReturn(0);
344     };
345     /* you might be thinking, why on earth are you registered yet another finalizer in a
346      * function already called during PetscRegisterFinalizeAll()? If this seems stupid it's
347      * because it is.
348      *
349      * The crux of the problem is that the initializer (and therefore the ~finalizer~) of
350      * PetscDeviceContext is guaranteed to run after PetscDevice's. So if the global context
351      * had a default PetscDevice attached, that PetscDevice will have a reference count >0 and
352      * hence won't be destroyed yet. So we need to repeat the check that all devices have been
353      * destroyed again ~after~ the global context is destroyed. In summary:
354      *
355      * 1. This finalizer runs and destroys all devices, except it may not because the global
356      *    context may still hold a reference!
357      * 2. The global context finalizer runs and does the final reference count decrement
358      *    required, which actually destroys the held device.
359      * 3. Our newly added finalizer runs and checks that all is well.
360      */
361     ierr = PetscRegisterFinalize(PetscDeviceCheckAllDestroyedAfterFinalize);CHKERRQ(ierr);
362   }
363   for (auto &&device : defaultDevices) {ierr = PetscDeviceDestroy(&device);CHKERRQ(ierr);}
364   CHKERRCXX(initializedDevice.fill(false));
365   PetscFunctionReturn(0);
366 }
367 
368 /* begins the init proceeedings for the entire PetscDevice stack. there are 3 stages of
369  * initialization types:
370  1. defaultInitType - how does PetscDevice as a whole expect to initialize?
371  2. subTypeDefaultInitType - how does each PetscDevice implementation expect to initialize?
372     e.g. you may want to blanket disable PetscDevice init (and disable say Kokkos init), but
373     have all CUDA devices still initialize.
374 
375  All told the following happens:
376  0. defaultInitType -> LAZY
377  1. Check for log_view/log_summary, if yes defaultInitType -> EAGER
378  2. PetscDevice initializes each sub type with deviceDefaultInitType.
379  2.1 Each enabled PetscDevice sub-type then does the above disable or view check in addition
380      to checking for specific device init. if view or specific device init
381      subTypeDefaultInitType -> EAGER. disabled once again overrides all.
382  */
383 PetscErrorCode PetscDeviceInitializeFromOptions_Internal(MPI_Comm comm)
384 {
385   PetscBool           flg,defaultView = PETSC_FALSE,initializeDeviceContextEagerly = PETSC_FALSE;
386   PetscInt            defaultDevice   = PETSC_DECIDE;
387   PetscDeviceType     deviceContextInitDevice = PETSC_DEVICE_DEFAULT;
388   PetscDeviceInitType defaultInitType;
389   PetscErrorCode      ierr;
390 
391   PetscFunctionBegin;
392   if (PetscDefined(USE_DEBUG)) {
393     int result;
394 
395     ierr = MPI_Comm_compare(comm,PETSC_COMM_WORLD,&result);CHKERRMPI(ierr);
396     /* in order to accurately assign ranks to gpus we need to get the MPI_Comm_rank of the
397      * global space */
398     if (PetscUnlikely(result != MPI_IDENT)) {
399       char name[MPI_MAX_OBJECT_NAME] = {};
400       int  len; /* unused */
401 
402       ierr = MPI_Comm_get_name(comm,name,&len);CHKERRMPI(ierr);
403       SETERRQ1(comm,PETSC_ERR_MPI,"Default devices being initialized on MPI_Comm '%s' not PETSC_COMM_WORLD",name);
404     }
405   }
406   comm = PETSC_COMM_WORLD; /* from this point on we assume we're on PETSC_COMM_WORLD */
407   ierr = PetscRegisterFinalize(PetscDeviceFinalize_Private);CHKERRQ(ierr);
408   ierr = PetscOptionsHasName(PETSC_NULLPTR,PETSC_NULLPTR,"-log_view",&flg);CHKERRQ(ierr);
409   if (!flg) {
410     ierr = PetscOptionsHasName(PETSC_NULLPTR,PETSC_NULLPTR,"-log_summary",&flg);CHKERRQ(ierr);
411   }
412   {
413     PetscInt initIdx = flg ? PETSC_DEVICE_INIT_EAGER : PETSC_DEVICE_INIT_LAZY;
414 
415     ierr = PetscOptionsBegin(comm,PETSC_NULLPTR,"PetscDevice Options","Sys");CHKERRQ(ierr);
416     ierr = PetscOptionsEList("-device_enable","How (or whether) to initialize PetscDevices","PetscDeviceInitializeFromOptions_Internal()",PetscDeviceInitTypes,3,PetscDeviceInitTypes[initIdx],&initIdx,PETSC_NULLPTR);CHKERRQ(ierr);
417     ierr = PetscOptionsRangeInt("-device_select","Which device to use. Pass " PetscStringize(PETSC_DECIDE) " to have PETSc decide or (given they exist) [0-NUM_DEVICE) for a specific device","PetscDeviceCreate()",defaultDevice,&defaultDevice,PETSC_NULLPTR,PETSC_DECIDE,std::numeric_limits<int>::max());CHKERRQ(ierr);
418     ierr = PetscOptionsBool("-device_view","Display device information and assignments (forces eager initialization)",PETSC_NULLPTR,defaultView,&defaultView,&flg);CHKERRQ(ierr);
419     ierr = PetscOptionsEnd();CHKERRQ(ierr);
420     if (initIdx == PETSC_DEVICE_INIT_NONE) {
421       /* disabled all device initialization if devices are globally disabled */
422       if (PetscUnlikely(defaultDevice != PETSC_DECIDE)) SETERRQ(comm,PETSC_ERR_USER_INPUT,"You have disabled devices but also specified a particular device to use, these options are mutually exlusive");
423       defaultView = PETSC_FALSE;
424     } else {
425       defaultView = static_cast<decltype(defaultView)>(defaultView && flg);
426       if (defaultView) initIdx = PETSC_DEVICE_INIT_EAGER;
427     }
428     defaultInitType = static_cast<decltype(defaultInitType)>(initIdx);
429   }
430   static_assert((PETSC_DEVICE_INVALID == 0) && (PETSC_DEVICE_MAX < std::numeric_limits<int>::max()),"");
431   for (int i = 1; i < PETSC_DEVICE_MAX; ++i) {
432     const auto deviceType = static_cast<PetscDeviceType>(i);
433     auto initType         = defaultInitType;
434 
435     ierr = PetscDeviceInitializeTypeFromOptions_Private(comm,deviceType,defaultDevice,defaultView,&initType);CHKERRQ(ierr);
436     if (PetscDeviceConfiguredFor_Internal(deviceType) && (initType == PETSC_DEVICE_INIT_EAGER)) {
437       initializeDeviceContextEagerly = PETSC_TRUE;
438       deviceContextInitDevice        = deviceType;
439     }
440   }
441   if (initializeDeviceContextEagerly) {
442     PetscDeviceContext dctx;
443 
444     /* somewhat inefficient here as the device context is potentially fully set up twice (once
445      * when retrieved then the second time if setfromoptions makes changes) */
446     ierr = PetscInfo1(PETSC_NULLPTR,"Eagerly initializing PetscDeviceContext with %s device\n",PetscDeviceTypes[deviceContextInitDevice]);CHKERRQ(ierr);
447     ierr = PetscDeviceContextSetRootDeviceType_Internal(deviceContextInitDevice);CHKERRQ(ierr);
448     ierr = PetscDeviceContextGetCurrentContext(&dctx);CHKERRQ(ierr);
449     ierr = PetscDeviceContextSetFromOptions(comm,"root_",dctx);CHKERRQ(ierr);
450     ierr = PetscDeviceContextSetUp(dctx);CHKERRQ(ierr);
451   }
452   PetscFunctionReturn(0);
453 }
454 
455 /* Get the default PetscDevice for a particular type and constructs them if lazily initialized. */
456 PetscErrorCode PetscDeviceGetDefaultForType_Internal(PetscDeviceType type, PetscDevice *device)
457 {
458   PetscErrorCode ierr;
459 
460   PetscFunctionBegin;
461   PetscValidPointer(device,2);
462   ierr = PetscDeviceInitialize(type);CHKERRQ(ierr);
463   *device = defaultDevices[type];
464   PetscFunctionReturn(0);
465 }
466