1 #include "cupmdevice.hpp" /* I "petscdevice.h" */ 2 #include <petsc/private/petscadvancedmacros.h> 3 4 using namespace Petsc::Device; 5 6 /* 7 note to anyone adding more classes, the name must be ALL_CAPS_SHORT_NAME + Device exactly to 8 be picked up by the switch-case macros below 9 */ 10 #if PetscDefined(HAVE_CUDA) 11 static CUPM::Device<CUPM::DeviceType::CUDA> CUDADevice(PetscDeviceContextCreate_CUDA); 12 #endif 13 #if PetscDefined(HAVE_HIP) 14 static CUPM::Device<CUPM::DeviceType::HIP> HIPDevice(PetscDeviceContextCreate_HIP); 15 #endif 16 #if PetscDefined(HAVE_SYCL) 17 #include "sycldevice.hpp" 18 static SYCL::Device SYCLDevice(PetscDeviceContextCreate_SYCL); 19 #endif 20 21 static_assert(Petsc::util::integral_value(PETSC_DEVICE_INVALID) == 0, ""); 22 static_assert(Petsc::util::integral_value(PETSC_DEVICE_CUDA) == 1, ""); 23 static_assert(Petsc::util::integral_value(PETSC_DEVICE_HIP) == 2, ""); 24 static_assert(Petsc::util::integral_value(PETSC_DEVICE_SYCL) == 3, ""); 25 static_assert(Petsc::util::integral_value(PETSC_DEVICE_MAX) == 4, ""); 26 const char *const PetscDeviceTypes[] = {"invalid", "cuda", "hip", "sycl", "max", "PetscDeviceType", "PETSC_DEVICE_", PETSC_NULLPTR}; 27 28 static_assert(Petsc::util::integral_value(PETSC_DEVICE_INIT_NONE) == 0, ""); 29 static_assert(Petsc::util::integral_value(PETSC_DEVICE_INIT_LAZY) == 1, ""); 30 static_assert(Petsc::util::integral_value(PETSC_DEVICE_INIT_EAGER) == 2, ""); 31 const char *const PetscDeviceInitTypes[] = {"none", "lazy", "eager", "PetscDeviceInitType", "PETSC_DEVICE_INIT_", PETSC_NULLPTR}; 32 static_assert(sizeof(PetscDeviceInitTypes) / sizeof(*PetscDeviceInitTypes) == 6, "Must change CUPMDevice<T>::initialize number of enum values in -device_enable_cupm to match!"); 33 34 #define PETSC_DEVICE_CASE(IMPLS, func, ...) \ 35 case PetscConcat_(PETSC_DEVICE_, IMPLS): { \ 36 PetscCall(PetscConcat_(IMPLS, Device).func(__VA_ARGS__)); \ 37 } break 38 39 /* 40 Suppose you have: 41 42 CUDADevice.myFunction(arg1,arg2) 43 44 that you would like to conditionally define and call in a switch-case: 45 46 switch(PetscDeviceType) { 47 #if PetscDefined(HAVE_CUDA) 48 case PETSC_DEVICE_CUDA: { 49 PetscCall(CUDADevice.myFunction(arg1,arg2)); 50 } break; 51 #endif 52 } 53 54 then calling this macro: 55 56 PETSC_DEVICE_CASE_IF_PETSC_DEFINED(CUDA,myFunction,arg1,arg2) 57 58 will expand to the following case statement: 59 60 case PETSC_DEVICE_CUDA: { 61 PetscCall(CUDADevice.myFunction(arg1,arg2)); 62 } break 63 64 if PetscDefined(HAVE_CUDA) evaluates to 1, and expand to nothing otherwise 65 */ 66 #define PETSC_DEVICE_CASE_IF_PETSC_DEFINED(IMPLS, func, ...) PetscIfPetscDefined(PetscConcat_(HAVE_, IMPLS), PETSC_DEVICE_CASE, PetscExpandToNothing)(IMPLS, func, __VA_ARGS__) 67 68 /*@C 69 PetscDeviceCreate - Get a new handle for a particular device type 70 71 Not Collective, Possibly Synchronous 72 73 Input Parameters: 74 + type - The type of PetscDevice 75 - devid - The numeric ID# of the device (pass PETSC_DECIDE to assign automatically) 76 77 Output Parameter: 78 . device - The PetscDevice 79 80 Notes: 81 This routine may initialize PetscDevice. If this is the case, this will most likely cause 82 some sort of device synchronization. 83 84 devid is what you might pass to cudaSetDevice() for example. 85 86 Level: beginner 87 88 .seealso: `PetscDevice`, `PetscDeviceInitType`, `PetscDeviceInitialize()`, 89 `PetscDeviceInitialized()`, `PetscDeviceConfigure()`, `PetscDeviceView()`, `PetscDeviceDestroy()` 90 @*/ 91 PetscErrorCode PetscDeviceCreate(PetscDeviceType type, PetscInt devid, PetscDevice *device) { 92 static PetscInt PetscDeviceCounter = 0; 93 PetscDevice dev; 94 95 PetscFunctionBegin; 96 PetscValidDeviceType(type, 1); 97 PetscValidPointer(device, 3); 98 PetscCall(PetscDeviceInitializePackage()); 99 PetscCall(PetscNew(&dev)); 100 dev->id = PetscDeviceCounter++; 101 dev->type = type; 102 dev->refcnt = 1; 103 /* 104 if you are adding a device, you also need to add it's initialization in 105 PetscDeviceInitializeTypeFromOptions_Private() below 106 */ 107 switch (type) { 108 PETSC_DEVICE_CASE_IF_PETSC_DEFINED(CUDA, getDevice, dev, devid); 109 PETSC_DEVICE_CASE_IF_PETSC_DEFINED(HIP, getDevice, dev, devid); 110 PETSC_DEVICE_CASE_IF_PETSC_DEFINED(SYCL, getDevice, dev, devid); 111 default: 112 /* in case the above macros expand to nothing this silences any unused variable warnings */ 113 (void)(devid); 114 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_PLIB, "PETSc was seemingly configured for PetscDeviceType %s but we've fallen through all cases in a switch", PetscDeviceTypes[type]); 115 } 116 *device = dev; 117 PetscFunctionReturn(0); 118 } 119 120 /*@C 121 PetscDeviceDestroy - Free a PetscDevice 122 123 Not Collective, Asynchronous 124 125 Input Parameter: 126 . device - The PetscDevice 127 128 Level: beginner 129 130 .seealso: `PetscDevice`, `PetscDeviceCreate()`, `PetscDeviceConfigure()`, `PetscDeviceView()` 131 @*/ 132 PetscErrorCode PetscDeviceDestroy(PetscDevice *device) { 133 PetscFunctionBegin; 134 if (!*device) PetscFunctionReturn(0); 135 PetscValidDevice(*device, 1); 136 PetscCall(PetscDeviceDereference_Internal(*device)); 137 if ((*device)->refcnt) { 138 *device = PETSC_NULLPTR; 139 PetscFunctionReturn(0); 140 } 141 PetscCall(PetscFree((*device)->data)); 142 PetscCall(PetscFree(*device)); 143 PetscFunctionReturn(0); 144 } 145 146 /*@C 147 PetscDeviceConfigure - Configure a particular PetscDevice 148 149 Not Collective, Asynchronous 150 151 Input Parameter: 152 . device - The PetscDevice to configure 153 154 Notes: 155 The user should not assume that this is a cheap operation 156 157 Level: beginner 158 159 .seealso: `PetscDevice`, `PetscDeviceCreate()`, `PetscDeviceView()`, `PetscDeviceDestroy()` 160 @*/ 161 PetscErrorCode PetscDeviceConfigure(PetscDevice device) { 162 PetscFunctionBegin; 163 PetscValidDevice(device, 1); 164 if (PetscDefined(USE_DEBUG)) { 165 /* 166 if no available configuration is available, this cascades all the way down to default 167 and error 168 */ 169 switch (device->type) { 170 case PETSC_DEVICE_CUDA: 171 if (PetscDefined(HAVE_CUDA)) break; 172 case PETSC_DEVICE_HIP: 173 if (PetscDefined(HAVE_HIP)) break; 174 case PETSC_DEVICE_SYCL: 175 if (PetscDefined(HAVE_SYCL)) break; 176 default: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_PLIB, "PETSc was seemingly configured for PetscDeviceType %s but we've fallen through all cases in a switch", PetscDeviceTypes[device->type]); 177 } 178 } 179 PetscUseTypeMethod(device, configure); 180 PetscFunctionReturn(0); 181 } 182 183 /*@C 184 PetscDeviceView - View a PetscDevice 185 186 Collective on viewer, Asynchronous 187 188 Input Parameters: 189 + device - The PetscDevice to view 190 - viewer - The PetscViewer to view the device with (NULL for PETSC_VIEWER_STDOUT_WORLD) 191 192 Level: beginner 193 194 .seealso: `PetscDevice`, `PetscDeviceCreate()`, `PetscDeviceConfigure()`, `PetscDeviceDestroy()` 195 @*/ 196 PetscErrorCode PetscDeviceView(PetscDevice device, PetscViewer viewer) { 197 PetscFunctionBegin; 198 PetscValidDevice(device, 1); 199 if (!viewer) PetscCall(PetscViewerASCIIGetStdout(PETSC_COMM_WORLD, &viewer)); 200 PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2); 201 PetscUseTypeMethod(device, view, viewer); 202 PetscFunctionReturn(0); 203 } 204 205 /*@C 206 PetscDeviceGetDeviceId - Get the device id 207 208 Not collective 209 210 Input Parameter: 211 . device - The PetscDevice 212 213 Output Parameter: 214 . id - The device id 215 216 Level: beginner 217 218 .seealso: `PetscDevice`, `PetscDeviceCreate()`, `PetscDeviceConfigure()`, `PetscDeviceDestroy()` 219 @*/ 220 PetscErrorCode PetscDeviceGetDeviceId(PetscDevice device, PetscInt *id) { 221 PetscFunctionBegin; 222 PetscValidDevice(device, 1); 223 PetscValidIntPointer(id, 2); 224 *id = device->deviceId; 225 PetscFunctionReturn(0); 226 } 227 228 static std::array<bool, PETSC_DEVICE_MAX> initializedDevice = {}; 229 static std::array<PetscDevice, PETSC_DEVICE_MAX> defaultDevices = {}; 230 static_assert(initializedDevice.size() == defaultDevices.size(), ""); 231 232 /*@C 233 PetscDeviceInitialize - Initialize PetscDevice 234 235 Not Collective, Possibly Synchronous 236 237 Input Parameter: 238 . type - The PetscDeviceType to initialize 239 240 Notes: 241 Eagerly initializes the corresponding PetscDeviceType if needed. 242 243 Level: beginner 244 245 .seealso: `PetscDevice`, `PetscDeviceInitType`, `PetscDeviceInitialized()`, `PetscDeviceCreate()`, `PetscDeviceDestroy()` 246 @*/ 247 PetscErrorCode PetscDeviceInitialize(PetscDeviceType type) { 248 PetscFunctionBegin; 249 PetscValidDeviceType(type, 1); 250 PetscCall(PetscDeviceInitializeDefaultDevice_Internal(type, PETSC_DECIDE)); 251 PetscFunctionReturn(0); 252 } 253 254 /*@C 255 PetscDeviceInitialized - Determines whether PetscDevice is initialized for a particular 256 PetscDeviceType 257 258 Not Collective, Asynchronous 259 260 Input Parameter: 261 . type - The PetscDeviceType to check 262 263 Output Parameter: 264 . [return value] - PETSC_TRUE if type is initialized, PETSC_FALSE otherwise 265 266 Notes: 267 If one has not configured PETSc for a particular PetscDeviceType then this routine will 268 return PETSC_FALSE for that PetscDeviceType. 269 270 Level: beginner 271 272 .seealso: `PetscDevice`, `PetscDeviceInitType`, `PetscDeviceInitialize()`, `PetscDeviceCreate()`, `PetscDeviceDestroy()` 273 @*/ 274 PetscBool PetscDeviceInitialized(PetscDeviceType type) { 275 return static_cast<PetscBool>(PetscDeviceConfiguredFor_Internal(type) && initializedDevice[type]); 276 } 277 278 /* 279 Actual intialization function; any functions claiming to initialize PetscDevice or 280 PetscDeviceContext will have to run through this one 281 */ 282 PetscErrorCode PetscDeviceInitializeDefaultDevice_Internal(PetscDeviceType type, PetscInt defaultDeviceId) { 283 PetscFunctionBegin; 284 PetscValidDeviceType(type, 1); 285 if (PetscLikely(PetscDeviceInitialized(type))) PetscFunctionReturn(0); 286 PetscAssert(!defaultDevices[type], PETSC_COMM_SELF, PETSC_ERR_MEM, "Trying to overwrite existing default device of type %s", PetscDeviceTypes[type]); 287 PetscCall(PetscDeviceCreate(type, defaultDeviceId, &defaultDevices[type])); 288 PetscCall(PetscDeviceConfigure(defaultDevices[type])); 289 initializedDevice[type] = true; 290 PetscFunctionReturn(0); 291 } 292 293 #if PetscDefined(USE_LOG) 294 PETSC_INTERN PetscErrorCode PetscLogInitialize(void); 295 #else 296 #define PetscLogInitialize() 0 297 #endif 298 299 static PetscErrorCode PetscDeviceInitializeTypeFromOptions_Private(MPI_Comm comm, PetscDeviceType type, PetscInt defaultDeviceId, PetscBool defaultView, PetscDeviceInitType *defaultInitType) { 300 PetscFunctionBegin; 301 if (!PetscDeviceConfiguredFor_Internal(type)) { 302 PetscCall(PetscInfo(PETSC_NULLPTR, "PetscDeviceType %s not supported\n", PetscDeviceTypes[type])); 303 defaultDevices[type] = PETSC_NULLPTR; 304 PetscFunctionReturn(0); 305 } 306 PetscCall(PetscInfo(PETSC_NULLPTR, "PetscDeviceType %s supported, initializing\n", PetscDeviceTypes[type])); 307 /* ugly switch needed to pick the right global variable... could maybe do this as a union? */ 308 switch (type) { 309 PETSC_DEVICE_CASE_IF_PETSC_DEFINED(CUDA, initialize, comm, &defaultDeviceId, defaultInitType); 310 PETSC_DEVICE_CASE_IF_PETSC_DEFINED(HIP, initialize, comm, &defaultDeviceId, defaultInitType); 311 PETSC_DEVICE_CASE_IF_PETSC_DEFINED(SYCL, initialize, comm, &defaultDeviceId, defaultInitType); 312 default: SETERRQ(comm, PETSC_ERR_PLIB, "PETSc was seemingly configured for PetscDeviceType %s but we've fallen through all cases in a switch", PetscDeviceTypes[type]); 313 } 314 /* 315 defaultInitType and defaultDeviceId now represent what the individual TYPES have decided to 316 initialize as 317 */ 318 if (*defaultInitType == PETSC_DEVICE_INIT_EAGER) { 319 PetscCall(PetscLogInitialize()); 320 PetscCall(PetscInfo(PETSC_NULLPTR, "Eagerly initializing %s PetscDevice\n", PetscDeviceTypes[type])); 321 PetscCall(PetscDeviceInitializeDefaultDevice_Internal(type, defaultDeviceId)); 322 if (defaultView) { 323 PetscViewer vwr; 324 325 PetscCall(PetscViewerASCIIGetStdout(comm, &vwr)); 326 PetscCall(PetscDeviceView(defaultDevices[type], vwr)); 327 } 328 } 329 PetscFunctionReturn(0); 330 } 331 332 /* called from PetscFinalize() do not call yourself! */ 333 static PetscErrorCode PetscDeviceFinalize_Private(void) { 334 PetscFunctionBegin; 335 if (PetscDefined(USE_DEBUG)) { 336 const auto PetscDeviceCheckAllDestroyedAfterFinalize = [] { 337 PetscFunctionBegin; 338 for (auto &&device : defaultDevices) 339 PetscCheck(!device, PETSC_COMM_WORLD, PETSC_ERR_COR, "Device of type '%s' had reference count %" PetscInt_FMT " and was not fully destroyed during PetscFinalize()", PetscDeviceTypes[device->type], device->refcnt); 340 PetscFunctionReturn(0); 341 }; 342 /* 343 you might be thinking, why on earth are you registered yet another finalizer in a 344 function already called during PetscRegisterFinalizeAll()? If this seems stupid it's 345 because it is. 346 347 The crux of the problem is that the initializer (and therefore the ~finalizer~) of 348 PetscDeviceContext is guaranteed to run after PetscDevice's. So if the global context had 349 a default PetscDevice attached, that PetscDevice will have a reference count >0 and hence 350 won't be destroyed yet. So we need to repeat the check that all devices have been 351 destroyed again ~after~ the global context is destroyed. In summary: 352 353 1. This finalizer runs and destroys all devices, except it may not because the global 354 context may still hold a reference! 355 2. The global context finalizer runs and does the final reference count decrement 356 required, which actually destroys the held device. 357 3. Our newly added finalizer runs and checks that all is well. 358 */ 359 PetscCall(PetscRegisterFinalize(PetscDeviceCheckAllDestroyedAfterFinalize)); 360 } 361 for (auto &&device : defaultDevices) PetscCall(PetscDeviceDestroy(&device)); 362 PetscCallCXX(initializedDevice.fill(false)); 363 PetscFunctionReturn(0); 364 } 365 366 /* 367 Begins the init proceeedings for the entire PetscDevice stack. there are 3 stages of 368 initialization types: 369 370 1. defaultInitType - how does PetscDevice as a whole expect to initialize? 371 2. subTypeDefaultInitType - how does each PetscDevice implementation expect to initialize? 372 e.g. you may want to blanket disable PetscDevice init (and disable say Kokkos init), but 373 have all CUDA devices still initialize. 374 375 All told the following happens: 376 377 0. defaultInitType -> LAZY 378 1. Check for log_view/log_summary, if yes defaultInitType -> EAGER 379 2. PetscDevice initializes each sub type with deviceDefaultInitType. 380 2.1 Each enabled PetscDevice sub-type then does the above disable or view check in addition 381 to checking for specific device init. if view or specific device init 382 subTypeDefaultInitType -> EAGER. disabled once again overrides all. 383 */ 384 PetscErrorCode PetscDeviceInitializeFromOptions_Internal(MPI_Comm comm) { 385 auto defaultView = PETSC_FALSE; 386 auto initializeDeviceContextEagerly = PETSC_FALSE; 387 auto defaultDevice = PetscInt{PETSC_DECIDE}; 388 auto deviceContextInitDevice = PETSC_DEVICE_DEFAULT; 389 PetscDeviceInitType defaultInitType; 390 391 PetscFunctionBegin; 392 if (PetscDefined(USE_DEBUG)) { 393 int result; 394 395 PetscCallMPI(MPI_Comm_compare(comm, PETSC_COMM_WORLD, &result)); 396 /* in order to accurately assign ranks to gpus we need to get the MPI_Comm_rank of the 397 * global space */ 398 if (PetscUnlikely(result != MPI_IDENT)) { 399 char name[MPI_MAX_OBJECT_NAME] = {}; 400 int len; /* unused */ 401 402 PetscCallMPI(MPI_Comm_get_name(comm, name, &len)); 403 SETERRQ(comm, PETSC_ERR_MPI, "Default devices being initialized on MPI_Comm '%s' not PETSC_COMM_WORLD", name); 404 } 405 } 406 comm = PETSC_COMM_WORLD; /* from this point on we assume we're on PETSC_COMM_WORLD */ 407 PetscCall(PetscRegisterFinalize(PetscDeviceFinalize_Private)); 408 409 { 410 PetscInt initIdx = PETSC_DEVICE_INIT_LAZY; 411 PetscBool flg; 412 413 PetscCall(PetscOptionsHasName(PETSC_NULLPTR, PETSC_NULLPTR, "-log_view_gpu_time", &flg)); 414 if (flg) PetscCall(PetscLogGpuTime()); 415 416 /* ----------------------------------------------------------------------------------- */ 417 /* Global PetscDevice Options */ 418 /* ----------------------------------------------------------------------------------- */ 419 PetscOptionsBegin(comm, PETSC_NULLPTR, "PetscDevice Options", "Sys"); 420 PetscCall(PetscOptionsEList("-device_enable", "How (or whether) to initialize PetscDevices", "PetscDeviceInitializeFromOptions_Internal()", PetscDeviceInitTypes, 3, PetscDeviceInitTypes[initIdx], &initIdx, PETSC_NULLPTR)); 421 PetscCall(PetscOptionsRangeInt("-device_select", "Which device to use. Pass " PetscStringize(PETSC_DECIDE) " to have PETSc decide or (given they exist) [0-NUM_DEVICE) for a specific device", "PetscDeviceCreate()", defaultDevice, &defaultDevice, PETSC_NULLPTR, PETSC_DECIDE, std::numeric_limits<int>::max())); 422 PetscCall(PetscOptionsBool("-device_view", "Display device information and assignments (forces eager initialization)", PETSC_NULLPTR, defaultView, &defaultView, &flg)); 423 PetscOptionsEnd(); 424 425 if (initIdx == PETSC_DEVICE_INIT_NONE) { 426 /* disabled all device initialization if devices are globally disabled */ 427 PetscCheck(defaultDevice == PETSC_DECIDE, comm, PETSC_ERR_USER_INPUT, "You have disabled devices but also specified a particular device to use, these options are mutually exlusive"); 428 defaultView = PETSC_FALSE; 429 } else { 430 defaultView = static_cast<decltype(defaultView)>(defaultView && flg); 431 if (defaultView) initIdx = PETSC_DEVICE_INIT_EAGER; 432 } 433 defaultInitType = static_cast<decltype(defaultInitType)>(initIdx); 434 } 435 static_assert((PETSC_DEVICE_INVALID == 0) && (PETSC_DEVICE_MAX < std::numeric_limits<int>::max()), ""); 436 for (int i = 1; i < PETSC_DEVICE_MAX; ++i) { 437 const auto deviceType = static_cast<PetscDeviceType>(i); 438 auto initType = defaultInitType; 439 440 PetscCall(PetscDeviceInitializeTypeFromOptions_Private(comm, deviceType, defaultDevice, defaultView, &initType)); 441 if (PetscDeviceConfiguredFor_Internal(deviceType) && (initType == PETSC_DEVICE_INIT_EAGER)) { 442 initializeDeviceContextEagerly = PETSC_TRUE; 443 deviceContextInitDevice = deviceType; 444 PetscCall(PetscInfo(PETSC_NULLPTR, "PetscDevice %s set as default device type due to eager initialization\n", PetscDeviceTypes[deviceType])); 445 } 446 } 447 if (initializeDeviceContextEagerly) { 448 PetscDeviceContext dctx; 449 450 /* 451 somewhat inefficient here as the device context is potentially fully set up twice (once 452 when retrieved then the second time if setfromoptions makes changes) 453 */ 454 PetscCall(PetscInfo(PETSC_NULLPTR, "Eagerly initializing PetscDeviceContext with %s device\n", PetscDeviceTypes[deviceContextInitDevice])); 455 PetscCall(PetscDeviceContextSetRootDeviceType_Internal(deviceContextInitDevice)); 456 PetscCall(PetscDeviceContextGetCurrentContext(&dctx)); 457 PetscCall(PetscDeviceContextSetFromOptions(comm, "root_", dctx)); 458 PetscCall(PetscDeviceContextSetUp(dctx)); 459 } 460 PetscFunctionReturn(0); 461 } 462 463 /* Get the default PetscDevice for a particular type and constructs them if lazily initialized. */ 464 PetscErrorCode PetscDeviceGetDefaultForType_Internal(PetscDeviceType type, PetscDevice *device) { 465 PetscFunctionBegin; 466 PetscValidPointer(device, 2); 467 PetscCall(PetscDeviceInitialize(type)); 468 *device = defaultDevices[type]; 469 PetscFunctionReturn(0); 470 } 471