xref: /petsc/src/sys/objects/device/interface/device.cxx (revision 8ebe3e4e9e00d86ece2e9fcd0cc84910b0ad437c)
1 #include "cupmdevice.hpp" /* I "petscdevice.h" */
2 
3 using namespace Petsc;
4 
5 /* note to anyone adding more classes, the name must be ALL_CAPS_SHORT_NAME + Device exactly to
6  * be picked up by the switch-case macros below. */
7 #if PetscDefined(HAVE_CUDA)
8 static CUPMDevice<CUPMDeviceType::CUDA> CUDADevice(PetscDeviceContextCreate_CUDA);
9 #endif
10 #if PetscDefined(HAVE_HIP)
11 static CUPMDevice<CUPMDeviceType::HIP>  HIPDevice(PetscDeviceContextCreate_HIP);
12 #endif
13 #if PetscDefined(HAVE_SYCL)
14 #include "sycldevice.hpp"
15 static SyclDevice                       SYCLDevice(PetscDeviceContextCreate_SYCL);
16 #endif
17 
18 const char *const PetscDeviceTypes[] = {
19   "invalid",
20   "cuda",
21   "hip",
22   "sycl",
23   "max",
24   "PetscDeviceType",
25   "PETSC_DEVICE_",
26   PETSC_NULLPTR
27 };
28 
29 const char *const PetscDeviceInitTypes[] = {
30   "none",
31   "lazy",
32   "eager",
33   "PetscDeviceInitType",
34   "PETSC_DEVICE_INIT_",
35   PETSC_NULLPTR
36 };
37 static_assert(
38   sizeof(PetscDeviceInitTypes)/sizeof(*PetscDeviceInitTypes) == 6,
39   "Must change CUPMDevice<T>::initialize number of enum values in -device_enable_cupm to match!"
40 );
41 
42 #define PETSC_DEVICE_DEFAULT_CASE(comm,type)                            \
43   SETERRQ1((comm),PETSC_ERR_PLIB,                                       \
44            "PETSc was seemingly configured for PetscDeviceType %s but " \
45            "we've fallen through all cases in a switch",                \
46            PetscDeviceTypes[type])
47 
48 #define CAT_(a,...) a ## __VA_ARGS__
49 #define CAT(a,...)  CAT_(a,__VA_ARGS__)
50 
51 #define PETSC_DEVICE_CASE_IF_PETSC_DEFINED__0(IMPLS,func,...)
52 #define PETSC_DEVICE_CASE_IF_PETSC_DEFINED__1(IMPLS,func,...)           \
53   case CAT(PETSC_DEVICE_,IMPLS):                                        \
54   {                                                                     \
55     auto ierr = CAT(IMPLS,Device).func(__VA_ARGS__);CHKERRQ(ierr);      \
56     break;                                                              \
57   }
58 
59 #define PETSC_DEVICE_CASE_IF_PETSC_DEFINED_(IMPLS,...)                  \
60   CAT(PETSC_DEVICE_CASE_IF_PETSC_DEFINED__,PetscDefined(CAT(HAVE_,IMPLS)))(IMPLS,__VA_ARGS__)
61 
62 #define PETSC_DEVICE_CASE_IF_PETSC_DEFINED(IMPLS,...)           \
63   PETSC_DEVICE_CASE_IF_PETSC_DEFINED_(IMPLS,__VA_ARGS__)
64 
65 #define PETSC_DEVICE_UNUSED_IF_NO_DEVICE(var) (void)(var)
66 
67 /*@C
68   PetscDeviceCreate - Get a new handle for a particular device type
69 
70   Not Collective, Possibly Synchronous
71 
72   Input Parameter:
73 . type  - The type of PetscDevice
74 . devid - The numeric ID# of the device (pass PETSC_DECIDE to assign automatically)
75 
76   Output Parameter:
77 . device - The PetscDevice
78 
79   Notes:
80   This routine may initialize PetscDevice. If this is the case, this will most likely cause
81   some sort of device synchronization.
82 
83   devid is what you might pass to cudaSetDevice() for example.
84 
85   Level: beginner
86 
87 .seealso: PetscDevice, PetscDeviceInitType, PetscDeviceInitialize(),
88 PetscDeviceInitialized(), PetscDeviceConfigure(), PetscDeviceView(), PetscDeviceDestroy()
89 @*/
90 PetscErrorCode PetscDeviceCreate(PetscDeviceType type, PetscInt devid, PetscDevice *device)
91 {
92   static PetscInt PetscDeviceCounter = 0;
93   PetscDevice     dev;
94   PetscErrorCode  ierr;
95 
96   PetscFunctionBegin;
97   PetscValidDeviceType(type,1);
98   PetscValidPointer(device,3);
99   ierr = PetscDeviceInitializePackage();CHKERRQ(ierr);
100   ierr = PetscNew(&dev);CHKERRQ(ierr);
101   dev->id     = PetscDeviceCounter++;
102   dev->type   = type;
103   dev->refcnt = 1;
104   /* if you are adding a device, you also need to add it's initialization in
105      PetscDeviceInitializeTypeFromOptions_Private() below */
106   switch (type) {
107     PETSC_DEVICE_CASE_IF_PETSC_DEFINED(CUDA,getDevice,dev,devid);
108     PETSC_DEVICE_CASE_IF_PETSC_DEFINED(HIP,getDevice,dev,devid);
109     PETSC_DEVICE_CASE_IF_PETSC_DEFINED(SYCL,getDevice,dev,devid);
110   default:
111     PETSC_DEVICE_UNUSED_IF_NO_DEVICE(devid);
112     PETSC_DEVICE_DEFAULT_CASE(PETSC_COMM_SELF,type);
113   }
114   *device = dev;
115   PetscFunctionReturn(0);
116 }
117 
118 /*@C
119   PetscDeviceDestroy - Free a PetscDevice
120 
121   Not Collective, Asynchronous
122 
123   Input Parameter:
124 . device - The PetscDevice
125 
126   Level: beginner
127 
128 .seealso: PetscDevice, PetscDeviceCreate(), PetscDeviceConfigure(), PetscDeviceView()
129 @*/
130 PetscErrorCode PetscDeviceDestroy(PetscDevice *device)
131 {
132   PetscErrorCode ierr;
133 
134   PetscFunctionBegin;
135   if (!*device) PetscFunctionReturn(0);
136   PetscValidDevice(*device,1);
137   ierr = PetscDeviceDereference_Internal(*device);CHKERRQ(ierr);
138   if ((*device)->refcnt) {
139     *device = PETSC_NULLPTR;
140     PetscFunctionReturn(0);
141   }
142   ierr = PetscFree((*device)->data);CHKERRQ(ierr);
143   ierr = PetscFree(*device);CHKERRQ(ierr);
144   PetscFunctionReturn(0);
145 }
146 
147 /*@C
148   PetscDeviceConfigure - Configure a particular PetscDevice
149 
150   Not Collective, Asynchronous
151 
152   Input Parameter:
153 . device - The PetscDevice to configure
154 
155   Notes:
156   The user should not assume that this is a cheap operation
157 
158   Level: beginner
159 
160 .seealso: PetscDevice, PetscDeviceCreate(), PetscDeviceView(), PetscDeviceDestroy()
161 @*/
162 PetscErrorCode PetscDeviceConfigure(PetscDevice device)
163 {
164   PetscErrorCode ierr;
165 
166   PetscFunctionBegin;
167   PetscValidDevice(device,1);
168   if (PetscDefined(USE_DEBUG)) {
169     /* if no available configuration is available, this cascades all the way down to default
170        and error */
171     switch (device->type) {
172     case PETSC_DEVICE_CUDA: if (PetscDefined(HAVE_CUDA)) break;
173     case PETSC_DEVICE_HIP:  if (PetscDefined(HAVE_HIP))  break;
174     case PETSC_DEVICE_SYCL: if (PetscDefined(HAVE_SYCL)) break;
175     default:
176       PETSC_DEVICE_DEFAULT_CASE(PETSC_COMM_SELF,device->type);
177       break;
178     }
179   }
180   ierr = (*device->ops->configure)(device);CHKERRQ(ierr);
181   PetscFunctionReturn(0);
182 }
183 
184 /*@C
185   PetscDeviceView - View a PetscDevice
186 
187   Collective on viewer, Asynchronous
188 
189   Input Parameter:
190 + device - The PetscDevice to view
191 - viewer - The PetscViewer to view the device with (NULL for PETSC_VIEWER_STDOUT_WORLD)
192 
193   Level: beginner
194 
195 .seealso: PetscDevice, PetscDeviceCreate(), PetscDeviceConfigure(), PetscDeviceDestroy()
196 @*/
197 PetscErrorCode PetscDeviceView(PetscDevice device, PetscViewer viewer)
198 {
199   PetscErrorCode ierr;
200 
201   PetscFunctionBegin;
202   PetscValidDevice(device,1);
203   if (!viewer) {ierr = PetscViewerASCIIGetStdout(PETSC_COMM_WORLD,&viewer);CHKERRQ(ierr);}
204   PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2);
205   ierr = (*device->ops->view)(device,viewer);CHKERRQ(ierr);
206   PetscFunctionReturn(0);
207 }
208 
209 static std::array<bool,PETSC_DEVICE_MAX>        initializedDevice = {};
210 static std::array<PetscDevice,PETSC_DEVICE_MAX> defaultDevices    = {};
211 static_assert(initializedDevice.size() == defaultDevices.size(),"");
212 
213 /*@C
214   PetscDeviceInitialize - Initialize PetscDevice
215 
216   Not Collective, Possibly Synchronous
217 
218   Input Parameter:
219 . type - The PetscDeviceType to initialize
220 
221   Notes:
222   Eagerly initializes the corresponding PetscDeviceType if needed.
223 
224   Level: beginner
225 
226 .seealso: PetscDevice, PetscDeviceInitType, PetscDeviceInitialized(), PetscDeviceCreate(), PetscDeviceDestroy()
227 @*/
228 PetscErrorCode PetscDeviceInitialize(PetscDeviceType type)
229 {
230   PetscErrorCode ierr;
231 
232   PetscFunctionBegin;
233   PetscValidDeviceType(type,1);
234   ierr = PetscDeviceInitializeDefaultDevice_Internal(type,PETSC_DECIDE);CHKERRQ(ierr);
235   PetscFunctionReturn(0);
236 }
237 
238 /*@C
239   PetscDeviceInitialized - Determines whether PetscDevice is initialized for a particular
240   PetscDeviceType
241 
242   Not Collective, Asynchronous
243 
244   Input Parameter:
245 . type - The PetscDeviceType to check
246 
247   Output Parameter:
248 . [return value] - PETSC_TRUE if type is initialized, PETSC_FALSE otherwise
249 
250   Notes:
251   If one has not configured PETSc for a particular PetscDeviceType then this routine will
252   return PETSC_FALSE for that PetscDeviceType.
253 
254   Level: beginner
255 
256 .seealso: PetscDevice, PetscDeviceInitType, PetscDeviceInitialize(), PetscDeviceCreate(), PetscDeviceDestroy()
257 @*/
258 PetscBool PetscDeviceInitialized(PetscDeviceType type)
259 {
260   return static_cast<PetscBool>(PetscDeviceConfiguredFor_Internal(type) && initializedDevice[type]);
261 }
262 
263 /* Actual intialization function; any functions claiming to initialize PetscDevice or
264  * PetscDeviceContext will have to run through this one */
265 PetscErrorCode PetscDeviceInitializeDefaultDevice_Internal(PetscDeviceType type, PetscInt defaultDeviceId)
266 {
267   PetscErrorCode ierr;
268 
269   PetscFunctionBegin;
270   PetscValidDeviceType(type,1);
271   if (PetscLikely(PetscDeviceInitialized(type))) PetscFunctionReturn(0);
272   if (PetscUnlikelyDebug(defaultDevices[type])) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MEM,"Trying to overwrite existing default device of type %s",PetscDeviceTypes[type]);
273   ierr = PetscDeviceCreate(type,defaultDeviceId,&defaultDevices[type]);CHKERRQ(ierr);
274   ierr = PetscDeviceConfigure(defaultDevices[type]);CHKERRQ(ierr);
275   /* the default devices are all automatically "referenced" at least once, otherwise the
276    * reference counting is off for them. We could alternatively increase the reference count
277    * when they are retrieved but that is a lot more brittle; what's to stop someone from doing
278    * the following?
279 
280    for (int i = 0; i < 10000; ++i) auto device = PetscDeviceDefault_Internal();
281    */
282   initializedDevice[type] = true;
283   PetscFunctionReturn(0);
284 }
285 
286 static PetscErrorCode PetscDeviceInitializeTypeFromOptions_Private(MPI_Comm comm, PetscDeviceType type, PetscInt defaultDeviceId, PetscBool defaultView, PetscDeviceInitType *defaultInitType)
287 {
288   PetscErrorCode ierr;
289 
290   PetscFunctionBegin;
291   if (!PetscDeviceConfiguredFor_Internal(type)) {
292     ierr = PetscInfo1(PETSC_NULLPTR,"PetscDeviceType %s not supported\n",PetscDeviceTypes[type]);CHKERRQ(ierr);
293     defaultDevices[type] = PETSC_NULLPTR;
294     PetscFunctionReturn(0);
295   }
296   ierr = PetscInfo1(PETSC_NULLPTR,"PetscDeviceType %s supported, initializing\n",PetscDeviceTypes[type]);CHKERRQ(ierr);
297   /* ugly switch needed to pick the right global variable... could maybe do this as a union? */
298   switch (type) {
299     PETSC_DEVICE_CASE_IF_PETSC_DEFINED(CUDA,initialize,comm,&defaultDeviceId,defaultInitType);
300     PETSC_DEVICE_CASE_IF_PETSC_DEFINED(HIP,initialize,comm,&defaultDeviceId,defaultInitType);
301     PETSC_DEVICE_CASE_IF_PETSC_DEFINED(SYCL,initialize,comm,&defaultDeviceId,defaultInitType);
302   default:
303     PETSC_DEVICE_DEFAULT_CASE(comm,type);
304     break;
305   }
306   /* defaultInitType and defaultDeviceId now represent what the individual TYPES have decided
307    * to initialize as */
308   if (*defaultInitType == PETSC_DEVICE_INIT_EAGER) {
309     ierr = PetscInfo1(PETSC_NULLPTR,"Greedily initializing %s PetscDevice\n",PetscDeviceTypes[type]);CHKERRQ(ierr);
310     ierr = PetscDeviceInitializeDefaultDevice_Internal(type,defaultDeviceId);CHKERRQ(ierr);
311     if (defaultView) {
312       PetscViewer vwr;
313 
314       ierr = PetscViewerASCIIGetStdout(comm,&vwr);CHKERRQ(ierr);
315       ierr = PetscDeviceView(defaultDevices[type],vwr);CHKERRQ(ierr);
316     }
317   }
318   PetscFunctionReturn(0);
319 }
320 
321 /* called from PetscFinalize() do not call yourself! */
322 static PetscErrorCode PetscDeviceFinalize_Private(void)
323 {
324   PetscErrorCode ierr;
325 
326   PetscFunctionBegin;
327   if (PetscDefined(USE_DEBUG)) {
328     PETSC_CONSTEXPR_17 auto PetscDeviceCheckAllDestroyedAfterFinalize = [](){
329       PetscFunctionBegin;
330       for (const auto &device : defaultDevices) {
331         if (PetscUnlikely(device)) SETERRQ2(PETSC_COMM_WORLD,PETSC_ERR_COR,"Device of type '%s' had reference count %" PetscInt_FMT " and was not fully destroyed during PetscFinalize()",PetscDeviceTypes[device->type],device->refcnt);
332       }
333       PetscFunctionReturn(0);
334     };
335     /* you might be thinking, why on earth are you registered yet another finalizer in a
336      * function already called during PetscRegisterFinalizeAll()? If this seems stupid it's
337      * because it is.
338      *
339      * The crux of the problem is that the initializer (and therefore the ~finalizer~) of
340      * PetscDeviceContext is guaranteed to run after this finalizer. So if the global context
341      * had a default PetscDevice attached it will hold a reference this routine won't destroy
342      * it. So we need to check that all devices have been destroyed after the global context is
343      * destroyed. In summary:
344      *
345      * 1. This finalizer runs and destroys all devices, except it may not because the global
346      *    context may still hold a reference!
347      * 2. The global context finalizer runs and in turn actually destroys the referenced
348      *    device.
349      * 3. Our newly added finalizer runs and checks that all is well.
350      */
351     ierr = PetscRegisterFinalize(PetscDeviceCheckAllDestroyedAfterFinalize);CHKERRQ(ierr);
352   }
353   for (auto &&device : defaultDevices) {ierr = PetscDeviceDestroy(&device);CHKERRQ(ierr);}
354   CHKERRCXX(initializedDevice.fill(false));
355   PetscFunctionReturn(0);
356 }
357 
358 /* begins the init proceeedings for the entire PetscDevice stack. there are 3 stages of
359  * initialization types:
360  1. defaultInitType - how does PetscDevice as a whole expect to initialize?
361  2. subTypeDefaultInitType - how does each PetscDevice implementation expect to initialize?
362     e.g. you may want to blanket disable PetscDevice init (and disable say Kokkos init), but
363     have all CUDA devices still initialize.
364 
365  All told the following happens:
366  0. defaultInitType -> LAZY
367  1. Check for log_view/log_summary, if yes defaultInitType -> EAGER
368  2. PetscDevice initializes each sub type with deviceDefaultInitType.
369  2.1 Each enabled PetscDevice sub-type then does the above disable or view check in addition
370      to checking for specific device init. if view or specific device init
371      subTypeDefaultInitType -> EAGER. disabled once again overrides all.
372  */
373 PetscErrorCode PetscDeviceInitializeFromOptions_Internal(MPI_Comm comm)
374 {
375   PetscBool           flg,defaultView = PETSC_FALSE,initializeDeviceContextEagerly = PETSC_FALSE;
376   PetscInt            defaultDevice   = PETSC_DECIDE;
377   PetscDeviceType     deviceContextInitDevice = PETSC_DEVICE_DEFAULT;
378   PetscDeviceInitType defaultInitType;
379   PetscErrorCode      ierr;
380 
381   PetscFunctionBegin;
382   if (PetscDefined(USE_DEBUG)) {
383     int result;
384 
385     ierr = MPI_Comm_compare(comm,PETSC_COMM_WORLD,&result);CHKERRMPI(ierr);
386     /* in order to accurately assign ranks to gpus we need to get the MPI_Comm_rank of the
387      * global space */
388     if (PetscUnlikely(result != MPI_IDENT)) {
389       char name[MPI_MAX_OBJECT_NAME] = {};
390       int  len; /* unused */
391 
392       ierr = MPI_Comm_get_name(comm,name,&len);CHKERRMPI(ierr);
393       SETERRQ1(comm,PETSC_ERR_MPI,"Default devices being initialized on MPI_Comm '%s' not PETSC_COMM_WORLD",name);
394     }
395   }
396   comm = PETSC_COMM_WORLD; /* from this point on we assume we're on PETSC_COMM_WORLD */
397   ierr = PetscRegisterFinalize(PetscDeviceFinalize_Private);CHKERRQ(ierr);
398   ierr = PetscOptionsHasName(PETSC_NULLPTR,PETSC_NULLPTR,"-log_view",&flg);CHKERRQ(ierr);
399   if (!flg) {
400     ierr = PetscOptionsHasName(PETSC_NULLPTR,PETSC_NULLPTR,"-log_summary",&flg);CHKERRQ(ierr);
401   }
402   {
403     PetscInt initIdx = flg ? PETSC_DEVICE_INIT_EAGER : PETSC_DEVICE_INIT_LAZY;
404 
405     ierr = PetscOptionsBegin(comm,PETSC_NULLPTR,"PetscDevice Options","Sys");CHKERRQ(ierr);
406     ierr = PetscOptionsEList("-device_enable","How (or whether) to initialize PetscDevices","PetscDeviceInitializeFromOptions_Internal()",PetscDeviceInitTypes,3,PetscDeviceInitTypes[initIdx],&initIdx,PETSC_NULLPTR);CHKERRQ(ierr);
407     ierr = PetscOptionsRangeInt("-device_select","Which device to use. Pass " PetscStringize(PETSC_DECIDE) " to have PETSc decide or (given they exist) [0-NUM_DEVICE) for a specific device","PetscDeviceCreate",defaultDevice,&defaultDevice,PETSC_NULLPTR,PETSC_DECIDE,std::numeric_limits<int>::max());CHKERRQ(ierr);
408     ierr = PetscOptionsBool("-device_view","Display device information and assignments (forces eager initialization)",PETSC_NULLPTR,defaultView,&defaultView,&flg);CHKERRQ(ierr);
409     ierr = PetscOptionsEnd();CHKERRQ(ierr);
410     if (initIdx == PETSC_DEVICE_INIT_NONE) {
411       /* disabled all device initialization if devices are globally disabled */
412       if (PetscUnlikely(defaultDevice != PETSC_DECIDE)) SETERRQ(comm,PETSC_ERR_USER_INPUT,"You have disabled devices but also specified a particular device to use, these options are mutually exlusive");
413       defaultView = PETSC_FALSE;
414     } else {
415       defaultView = static_cast<decltype(defaultView)>(defaultView && flg);
416       if (defaultView) initIdx = PETSC_DEVICE_INIT_EAGER;
417     }
418     defaultInitType = static_cast<decltype(defaultInitType)>(initIdx);
419   }
420   static_assert((PETSC_DEVICE_INVALID == 0) && (PETSC_DEVICE_MAX < std::numeric_limits<int>::max()),"");
421   for (int i = 1; i < PETSC_DEVICE_MAX; ++i) {
422     const auto deviceType = static_cast<PetscDeviceType>(i);
423     auto initType         = defaultInitType;
424 
425     ierr = PetscDeviceInitializeTypeFromOptions_Private(comm,deviceType,defaultDevice,defaultView,&initType);CHKERRQ(ierr);
426     if (PetscDeviceConfiguredFor_Internal(deviceType) && (initType == PETSC_DEVICE_INIT_EAGER)) {
427       initializeDeviceContextEagerly = PETSC_TRUE;
428       deviceContextInitDevice        = deviceType;
429     }
430   }
431   if (initializeDeviceContextEagerly) {
432     PetscDeviceContext dctx;
433 
434     /* somewhat inefficient here as the device context is potentially fully set up twice (once
435      * when retrieved then the second time if setfromoptions makes changes) */
436     ierr = PetscInfo1(PETSC_NULLPTR,"Eagerly initializing PetscDeviceContext with %s device\n",PetscDeviceTypes[deviceContextInitDevice]);CHKERRQ(ierr);
437     ierr = PetscDeviceContextSetRootDeviceType_Internal(deviceContextInitDevice);CHKERRQ(ierr);
438     ierr = PetscDeviceContextGetCurrentContext(&dctx);CHKERRQ(ierr);
439     ierr = PetscDeviceContextSetFromOptions(comm,"root_",dctx);CHKERRQ(ierr);
440     ierr = PetscDeviceContextSetUp(dctx);CHKERRQ(ierr);
441   }
442   PetscFunctionReturn(0);
443 }
444 
445 /* Get the default PetscDevice for a particular type and constructs them if lazily initialized. */
446 PetscErrorCode PetscDeviceGetDefaultForType_Internal(PetscDeviceType type, PetscDevice *device)
447 {
448   PetscErrorCode ierr;
449 
450   PetscFunctionBegin;
451   PetscValidPointer(device,2);
452   ierr = PetscDeviceInitialize(type);CHKERRQ(ierr);
453   *device = defaultDevices[type];
454   PetscFunctionReturn(0);
455 }
456