1 #include "cupmdevice.hpp" /* I "petscdevice.h" */ 2 #include <petsc/private/petscadvancedmacros.h> 3 4 using namespace Petsc::Device; 5 6 /* 7 note to anyone adding more classes, the name must be ALL_CAPS_SHORT_NAME + Device exactly to 8 be picked up by the switch-case macros below 9 */ 10 #if PetscDefined(HAVE_CUDA) 11 static CUPM::Device<CUPM::DeviceType::CUDA> CUDADevice(PetscDeviceContextCreate_CUDA); 12 #endif 13 #if PetscDefined(HAVE_HIP) 14 static CUPM::Device<CUPM::DeviceType::HIP> HIPDevice(PetscDeviceContextCreate_HIP); 15 #endif 16 #if PetscDefined(HAVE_SYCL) 17 #include "sycldevice.hpp" 18 static SYCL::Device SYCLDevice(PetscDeviceContextCreate_SYCL); 19 #endif 20 21 static_assert(Petsc::util::integral_value(PETSC_DEVICE_INVALID) == 0,""); 22 static_assert(Petsc::util::integral_value(PETSC_DEVICE_CUDA) == 1,""); 23 static_assert(Petsc::util::integral_value(PETSC_DEVICE_HIP) == 2,""); 24 static_assert(Petsc::util::integral_value(PETSC_DEVICE_SYCL) == 3,""); 25 static_assert(Petsc::util::integral_value(PETSC_DEVICE_MAX) == 4,""); 26 const char *const PetscDeviceTypes[] = { 27 "invalid", 28 "cuda", 29 "hip", 30 "sycl", 31 "max", 32 "PetscDeviceType", 33 "PETSC_DEVICE_", 34 PETSC_NULLPTR 35 }; 36 37 static_assert(Petsc::util::integral_value(PETSC_DEVICE_INIT_NONE) == 0,""); 38 static_assert(Petsc::util::integral_value(PETSC_DEVICE_INIT_LAZY) == 1,""); 39 static_assert(Petsc::util::integral_value(PETSC_DEVICE_INIT_EAGER) == 2,""); 40 const char *const PetscDeviceInitTypes[] = { 41 "none", 42 "lazy", 43 "eager", 44 "PetscDeviceInitType", 45 "PETSC_DEVICE_INIT_", 46 PETSC_NULLPTR 47 }; 48 static_assert( 49 sizeof(PetscDeviceInitTypes)/sizeof(*PetscDeviceInitTypes) == 6, 50 "Must change CUPMDevice<T>::initialize number of enum values in -device_enable_cupm to match!" 51 ); 52 53 #define PETSC_DEVICE_CASE(IMPLS,func,...) \ 54 case PetscConcat_(PETSC_DEVICE_,IMPLS): { \ 55 PetscCall(PetscConcat_(IMPLS,Device).func(__VA_ARGS__)); \ 56 } break 57 58 /* 59 Suppose you have: 60 61 CUDADevice.myFunction(arg1,arg2) 62 63 that you would like to conditionally define and call in a switch-case: 64 65 switch(PetscDeviceType) { 66 #if PetscDefined(HAVE_CUDA) 67 case PETSC_DEVICE_CUDA: { 68 PetscCall(CUDADevice.myFunction(arg1,arg2)); 69 } break; 70 #endif 71 } 72 73 then calling this macro: 74 75 PETSC_DEVICE_CASE_IF_PETSC_DEFINED(CUDA,myFunction,arg1,arg2) 76 77 will expand to the following case statement: 78 79 case PETSC_DEVICE_CUDA: { 80 PetscCall(CUDADevice.myFunction(arg1,arg2)); 81 } break 82 83 if PetscDefined(HAVE_CUDA) evaluates to 1, and expand to nothing otherwise 84 */ 85 #define PETSC_DEVICE_CASE_IF_PETSC_DEFINED(IMPLS,func,...) \ 86 PetscIfPetscDefined(PetscConcat_(HAVE_,IMPLS),PETSC_DEVICE_CASE,PetscExpandToNothing)(IMPLS,func,__VA_ARGS__) 87 88 /*@C 89 PetscDeviceCreate - Get a new handle for a particular device type 90 91 Not Collective, Possibly Synchronous 92 93 Input Parameters: 94 + type - The type of PetscDevice 95 - devid - The numeric ID# of the device (pass PETSC_DECIDE to assign automatically) 96 97 Output Parameter: 98 . device - The PetscDevice 99 100 Notes: 101 This routine may initialize PetscDevice. If this is the case, this will most likely cause 102 some sort of device synchronization. 103 104 devid is what you might pass to cudaSetDevice() for example. 105 106 Level: beginner 107 108 .seealso: `PetscDevice`, `PetscDeviceInitType`, `PetscDeviceInitialize()`, 109 `PetscDeviceInitialized()`, `PetscDeviceConfigure()`, `PetscDeviceView()`, `PetscDeviceDestroy()` 110 @*/ 111 PetscErrorCode PetscDeviceCreate(PetscDeviceType type, PetscInt devid, PetscDevice *device) 112 { 113 static PetscInt PetscDeviceCounter = 0; 114 PetscDevice dev; 115 116 PetscFunctionBegin; 117 PetscValidDeviceType(type,1); 118 PetscValidPointer(device,3); 119 PetscCall(PetscDeviceInitializePackage()); 120 PetscCall(PetscNew(&dev)); 121 dev->id = PetscDeviceCounter++; 122 dev->type = type; 123 dev->refcnt = 1; 124 /* 125 if you are adding a device, you also need to add it's initialization in 126 PetscDeviceInitializeTypeFromOptions_Private() below 127 */ 128 switch (type) { 129 PETSC_DEVICE_CASE_IF_PETSC_DEFINED(CUDA,getDevice,dev,devid); 130 PETSC_DEVICE_CASE_IF_PETSC_DEFINED(HIP,getDevice,dev,devid); 131 PETSC_DEVICE_CASE_IF_PETSC_DEFINED(SYCL,getDevice,dev,devid); 132 default: 133 /* in case the above macros expand to nothing this silences any unused variable warnings */ 134 (void)(devid); 135 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"PETSc was seemingly configured for PetscDeviceType %s but we've fallen through all cases in a switch",PetscDeviceTypes[type]); 136 } 137 *device = dev; 138 PetscFunctionReturn(0); 139 } 140 141 /*@C 142 PetscDeviceDestroy - Free a PetscDevice 143 144 Not Collective, Asynchronous 145 146 Input Parameter: 147 . device - The PetscDevice 148 149 Level: beginner 150 151 .seealso: `PetscDevice`, `PetscDeviceCreate()`, `PetscDeviceConfigure()`, `PetscDeviceView()` 152 @*/ 153 PetscErrorCode PetscDeviceDestroy(PetscDevice *device) 154 { 155 PetscFunctionBegin; 156 if (!*device) PetscFunctionReturn(0); 157 PetscValidDevice(*device,1); 158 PetscCall(PetscDeviceDereference_Internal(*device)); 159 if ((*device)->refcnt) { 160 *device = PETSC_NULLPTR; 161 PetscFunctionReturn(0); 162 } 163 PetscCall(PetscFree((*device)->data)); 164 PetscCall(PetscFree(*device)); 165 PetscFunctionReturn(0); 166 } 167 168 /*@C 169 PetscDeviceConfigure - Configure a particular PetscDevice 170 171 Not Collective, Asynchronous 172 173 Input Parameter: 174 . device - The PetscDevice to configure 175 176 Notes: 177 The user should not assume that this is a cheap operation 178 179 Level: beginner 180 181 .seealso: `PetscDevice`, `PetscDeviceCreate()`, `PetscDeviceView()`, `PetscDeviceDestroy()` 182 @*/ 183 PetscErrorCode PetscDeviceConfigure(PetscDevice device) 184 { 185 PetscFunctionBegin; 186 PetscValidDevice(device,1); 187 if (PetscDefined(USE_DEBUG)) { 188 /* 189 if no available configuration is available, this cascades all the way down to default 190 and error 191 */ 192 switch (device->type) { 193 case PETSC_DEVICE_CUDA: if (PetscDefined(HAVE_CUDA)) break; 194 case PETSC_DEVICE_HIP: if (PetscDefined(HAVE_HIP)) break; 195 case PETSC_DEVICE_SYCL: if (PetscDefined(HAVE_SYCL)) break; 196 default: 197 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"PETSc was seemingly configured for PetscDeviceType %s but we've fallen through all cases in a switch",PetscDeviceTypes[device->type]); 198 } 199 } 200 PetscCall((*device->ops->configure)(device)); 201 PetscFunctionReturn(0); 202 } 203 204 /*@C 205 PetscDeviceView - View a PetscDevice 206 207 Collective on viewer, Asynchronous 208 209 Input Parameters: 210 + device - The PetscDevice to view 211 - viewer - The PetscViewer to view the device with (NULL for PETSC_VIEWER_STDOUT_WORLD) 212 213 Level: beginner 214 215 .seealso: `PetscDevice`, `PetscDeviceCreate()`, `PetscDeviceConfigure()`, `PetscDeviceDestroy()` 216 @*/ 217 PetscErrorCode PetscDeviceView(PetscDevice device, PetscViewer viewer) 218 { 219 PetscFunctionBegin; 220 PetscValidDevice(device,1); 221 if (!viewer) PetscCall(PetscViewerASCIIGetStdout(PETSC_COMM_WORLD,&viewer)); 222 PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2); 223 PetscCall((*device->ops->view)(device,viewer)); 224 PetscFunctionReturn(0); 225 } 226 227 /*@C 228 PetscDeviceGetDeviceId - Get the device id 229 230 Not collective 231 232 Input Parameter: 233 . device - The PetscDevice 234 235 Output Parameter: 236 . id - The device id 237 238 Level: beginner 239 240 .seealso: `PetscDevice`, `PetscDeviceCreate()`, `PetscDeviceConfigure()`, `PetscDeviceDestroy()` 241 @*/ 242 PetscErrorCode PetscDeviceGetDeviceId(PetscDevice device, PetscInt *id) 243 { 244 PetscFunctionBegin; 245 PetscValidDevice(device,1); 246 PetscValidIntPointer(id,2); 247 *id = device->deviceId; 248 PetscFunctionReturn(0); 249 } 250 251 static std::array<bool,PETSC_DEVICE_MAX> initializedDevice = {}; 252 static std::array<PetscDevice,PETSC_DEVICE_MAX> defaultDevices = {}; 253 static_assert(initializedDevice.size() == defaultDevices.size(),""); 254 255 /*@C 256 PetscDeviceInitialize - Initialize PetscDevice 257 258 Not Collective, Possibly Synchronous 259 260 Input Parameter: 261 . type - The PetscDeviceType to initialize 262 263 Notes: 264 Eagerly initializes the corresponding PetscDeviceType if needed. 265 266 Level: beginner 267 268 .seealso: `PetscDevice`, `PetscDeviceInitType`, `PetscDeviceInitialized()`, `PetscDeviceCreate()`, `PetscDeviceDestroy()` 269 @*/ 270 PetscErrorCode PetscDeviceInitialize(PetscDeviceType type) 271 { 272 PetscFunctionBegin; 273 PetscValidDeviceType(type,1); 274 PetscCall(PetscDeviceInitializeDefaultDevice_Internal(type,PETSC_DECIDE)); 275 PetscFunctionReturn(0); 276 } 277 278 /*@C 279 PetscDeviceInitialized - Determines whether PetscDevice is initialized for a particular 280 PetscDeviceType 281 282 Not Collective, Asynchronous 283 284 Input Parameter: 285 . type - The PetscDeviceType to check 286 287 Output Parameter: 288 . [return value] - PETSC_TRUE if type is initialized, PETSC_FALSE otherwise 289 290 Notes: 291 If one has not configured PETSc for a particular PetscDeviceType then this routine will 292 return PETSC_FALSE for that PetscDeviceType. 293 294 Level: beginner 295 296 .seealso: `PetscDevice`, `PetscDeviceInitType`, `PetscDeviceInitialize()`, `PetscDeviceCreate()`, `PetscDeviceDestroy()` 297 @*/ 298 PetscBool PetscDeviceInitialized(PetscDeviceType type) 299 { 300 return static_cast<PetscBool>(PetscDeviceConfiguredFor_Internal(type) && initializedDevice[type]); 301 } 302 303 /* 304 Actual intialization function; any functions claiming to initialize PetscDevice or 305 PetscDeviceContext will have to run through this one 306 */ 307 PetscErrorCode PetscDeviceInitializeDefaultDevice_Internal(PetscDeviceType type, PetscInt defaultDeviceId) 308 { 309 PetscFunctionBegin; 310 PetscValidDeviceType(type,1); 311 if (PetscLikely(PetscDeviceInitialized(type))) PetscFunctionReturn(0); 312 PetscAssert(!defaultDevices[type],PETSC_COMM_SELF,PETSC_ERR_MEM,"Trying to overwrite existing default device of type %s",PetscDeviceTypes[type]); 313 PetscCall(PetscDeviceCreate(type,defaultDeviceId,&defaultDevices[type])); 314 PetscCall(PetscDeviceConfigure(defaultDevices[type])); 315 initializedDevice[type] = true; 316 PetscFunctionReturn(0); 317 } 318 319 #if PetscDefined(USE_LOG) 320 PETSC_INTERN PetscErrorCode PetscLogInitialize(void); 321 #else 322 #define PetscLogInitialize() 0 323 #endif 324 325 static PetscErrorCode PetscDeviceInitializeTypeFromOptions_Private(MPI_Comm comm, PetscDeviceType type, PetscInt defaultDeviceId, PetscBool defaultView, PetscDeviceInitType *defaultInitType) 326 { 327 PetscFunctionBegin; 328 if (!PetscDeviceConfiguredFor_Internal(type)) { 329 PetscCall(PetscInfo(PETSC_NULLPTR,"PetscDeviceType %s not supported\n",PetscDeviceTypes[type])); 330 defaultDevices[type] = PETSC_NULLPTR; 331 PetscFunctionReturn(0); 332 } 333 PetscCall(PetscInfo(PETSC_NULLPTR,"PetscDeviceType %s supported, initializing\n",PetscDeviceTypes[type])); 334 /* ugly switch needed to pick the right global variable... could maybe do this as a union? */ 335 switch (type) { 336 PETSC_DEVICE_CASE_IF_PETSC_DEFINED(CUDA,initialize,comm,&defaultDeviceId,defaultInitType); 337 PETSC_DEVICE_CASE_IF_PETSC_DEFINED(HIP,initialize,comm,&defaultDeviceId,defaultInitType); 338 PETSC_DEVICE_CASE_IF_PETSC_DEFINED(SYCL,initialize,comm,&defaultDeviceId,defaultInitType); 339 default: 340 SETERRQ(comm,PETSC_ERR_PLIB,"PETSc was seemingly configured for PetscDeviceType %s but we've fallen through all cases in a switch",PetscDeviceTypes[type]); 341 } 342 /* 343 defaultInitType and defaultDeviceId now represent what the individual TYPES have decided to 344 initialize as 345 */ 346 if (*defaultInitType == PETSC_DEVICE_INIT_EAGER) { 347 PetscCall(PetscInfo(PETSC_NULLPTR,"Eagerly initializing %s PetscDevice\n",PetscDeviceTypes[type])); 348 PetscCall(PetscDeviceInitializeDefaultDevice_Internal(type,defaultDeviceId)); 349 if (defaultView) { 350 PetscViewer vwr; 351 352 PetscCall(PetscLogInitialize()); 353 PetscCall(PetscViewerASCIIGetStdout(comm,&vwr)); 354 PetscCall(PetscDeviceView(defaultDevices[type],vwr)); 355 } 356 } 357 PetscFunctionReturn(0); 358 } 359 360 /* called from PetscFinalize() do not call yourself! */ 361 static PetscErrorCode PetscDeviceFinalize_Private(void) 362 { 363 PetscFunctionBegin; 364 if (PetscDefined(USE_DEBUG)) { 365 const auto PetscDeviceCheckAllDestroyedAfterFinalize = []{ 366 PetscFunctionBegin; 367 for (auto&& device : defaultDevices) PetscCheck(!device,PETSC_COMM_WORLD,PETSC_ERR_COR,"Device of type '%s' had reference count %" PetscInt_FMT " and was not fully destroyed during PetscFinalize()",PetscDeviceTypes[device->type],device->refcnt); 368 PetscFunctionReturn(0); 369 }; 370 /* 371 you might be thinking, why on earth are you registered yet another finalizer in a 372 function already called during PetscRegisterFinalizeAll()? If this seems stupid it's 373 because it is. 374 375 The crux of the problem is that the initializer (and therefore the ~finalizer~) of 376 PetscDeviceContext is guaranteed to run after PetscDevice's. So if the global context had 377 a default PetscDevice attached, that PetscDevice will have a reference count >0 and hence 378 won't be destroyed yet. So we need to repeat the check that all devices have been 379 destroyed again ~after~ the global context is destroyed. In summary: 380 381 1. This finalizer runs and destroys all devices, except it may not because the global 382 context may still hold a reference! 383 2. The global context finalizer runs and does the final reference count decrement 384 required, which actually destroys the held device. 385 3. Our newly added finalizer runs and checks that all is well. 386 */ 387 PetscCall(PetscRegisterFinalize(PetscDeviceCheckAllDestroyedAfterFinalize)); 388 } 389 for (auto &&device : defaultDevices) PetscCall(PetscDeviceDestroy(&device)); 390 PetscCallCXX(initializedDevice.fill(false)); 391 PetscFunctionReturn(0); 392 } 393 394 /* 395 Begins the init proceeedings for the entire PetscDevice stack. there are 3 stages of 396 initialization types: 397 398 1. defaultInitType - how does PetscDevice as a whole expect to initialize? 399 2. subTypeDefaultInitType - how does each PetscDevice implementation expect to initialize? 400 e.g. you may want to blanket disable PetscDevice init (and disable say Kokkos init), but 401 have all CUDA devices still initialize. 402 403 All told the following happens: 404 405 0. defaultInitType -> LAZY 406 1. Check for log_view/log_summary, if yes defaultInitType -> EAGER 407 2. PetscDevice initializes each sub type with deviceDefaultInitType. 408 2.1 Each enabled PetscDevice sub-type then does the above disable or view check in addition 409 to checking for specific device init. if view or specific device init 410 subTypeDefaultInitType -> EAGER. disabled once again overrides all. 411 */ 412 PetscErrorCode PetscDeviceInitializeFromOptions_Internal(MPI_Comm comm) 413 { 414 PetscBool flg,defaultView = PETSC_FALSE,initializeDeviceContextEagerly = PETSC_FALSE; 415 PetscInt defaultDevice = PETSC_DECIDE; 416 PetscDeviceType deviceContextInitDevice = PETSC_DEVICE_DEFAULT; 417 PetscDeviceInitType defaultInitType; 418 419 PetscFunctionBegin; 420 if (PetscDefined(USE_DEBUG)) { 421 int result; 422 423 PetscCallMPI(MPI_Comm_compare(comm,PETSC_COMM_WORLD,&result)); 424 /* in order to accurately assign ranks to gpus we need to get the MPI_Comm_rank of the 425 * global space */ 426 if (PetscUnlikely(result != MPI_IDENT)) { 427 char name[MPI_MAX_OBJECT_NAME] = {}; 428 int len; /* unused */ 429 430 PetscCallMPI(MPI_Comm_get_name(comm,name,&len)); 431 SETERRQ(comm,PETSC_ERR_MPI,"Default devices being initialized on MPI_Comm '%s' not PETSC_COMM_WORLD",name); 432 } 433 } 434 comm = PETSC_COMM_WORLD; /* from this point on we assume we're on PETSC_COMM_WORLD */ 435 PetscCall(PetscRegisterFinalize(PetscDeviceFinalize_Private)); 436 PetscCall(PetscOptionsHasName(PETSC_NULLPTR,PETSC_NULLPTR,"-log_view",&flg)); 437 if (!flg) PetscCall(PetscOptionsHasName(PETSC_NULLPTR,PETSC_NULLPTR,"-log_summary",&flg)); 438 #if defined(PETSC_HAVE_DEVICE) 439 PetscBool gtime; 440 PetscCall(PetscOptionsHasName(NULL,NULL,"-log_view_gpu_time",>ime)); 441 if (gtime) PetscCall(PetscLogGpuTime()); 442 #endif 443 { 444 PetscInt initIdx = flg ? PETSC_DEVICE_INIT_EAGER : PETSC_DEVICE_INIT_LAZY; 445 446 PetscOptionsBegin(comm,PETSC_NULLPTR,"PetscDevice Options","Sys"); 447 PetscCall(PetscOptionsEList("-device_enable","How (or whether) to initialize PetscDevices","PetscDeviceInitializeFromOptions_Internal()",PetscDeviceInitTypes,3,PetscDeviceInitTypes[initIdx],&initIdx,PETSC_NULLPTR)); 448 PetscCall(PetscOptionsRangeInt("-device_select","Which device to use. Pass " PetscStringize(PETSC_DECIDE) " to have PETSc decide or (given they exist) [0-NUM_DEVICE) for a specific device","PetscDeviceCreate()",defaultDevice,&defaultDevice,PETSC_NULLPTR,PETSC_DECIDE,std::numeric_limits<int>::max())); 449 PetscCall(PetscOptionsBool("-device_view","Display device information and assignments (forces eager initialization)",PETSC_NULLPTR,defaultView,&defaultView,&flg)); 450 PetscOptionsEnd(); 451 if (initIdx == PETSC_DEVICE_INIT_NONE) { 452 /* disabled all device initialization if devices are globally disabled */ 453 PetscCheck(defaultDevice == PETSC_DECIDE,comm,PETSC_ERR_USER_INPUT,"You have disabled devices but also specified a particular device to use, these options are mutually exlusive"); 454 defaultView = PETSC_FALSE; 455 } else { 456 defaultView = static_cast<decltype(defaultView)>(defaultView && flg); 457 if (defaultView) initIdx = PETSC_DEVICE_INIT_EAGER; 458 } 459 defaultInitType = static_cast<decltype(defaultInitType)>(initIdx); 460 } 461 static_assert((PETSC_DEVICE_INVALID == 0) && (PETSC_DEVICE_MAX < std::numeric_limits<int>::max()),""); 462 for (int i = 1; i < PETSC_DEVICE_MAX; ++i) { 463 const auto deviceType = static_cast<PetscDeviceType>(i); 464 auto initType = defaultInitType; 465 466 PetscCall(PetscDeviceInitializeTypeFromOptions_Private(comm,deviceType,defaultDevice,defaultView,&initType)); 467 if (PetscDeviceConfiguredFor_Internal(deviceType) && (initType == PETSC_DEVICE_INIT_EAGER)) { 468 initializeDeviceContextEagerly = PETSC_TRUE; 469 deviceContextInitDevice = deviceType; 470 } 471 } 472 if (initializeDeviceContextEagerly) { 473 PetscDeviceContext dctx; 474 475 /* 476 somewhat inefficient here as the device context is potentially fully set up twice (once 477 when retrieved then the second time if setfromoptions makes changes) 478 */ 479 PetscCall(PetscInfo(PETSC_NULLPTR,"Eagerly initializing PetscDeviceContext with %s device\n",PetscDeviceTypes[deviceContextInitDevice])); 480 PetscCall(PetscDeviceContextSetRootDeviceType_Internal(deviceContextInitDevice)); 481 PetscCall(PetscDeviceContextGetCurrentContext(&dctx)); 482 PetscCall(PetscDeviceContextSetFromOptions(comm,"root_",dctx)); 483 PetscCall(PetscDeviceContextSetUp(dctx)); 484 } 485 PetscFunctionReturn(0); 486 } 487 488 /* Get the default PetscDevice for a particular type and constructs them if lazily initialized. */ 489 PetscErrorCode PetscDeviceGetDefaultForType_Internal(PetscDeviceType type, PetscDevice *device) 490 { 491 PetscFunctionBegin; 492 PetscValidPointer(device,2); 493 PetscCall(PetscDeviceInitialize(type)); 494 *device = defaultDevices[type]; 495 PetscFunctionReturn(0); 496 } 497