1 #include "cupmdevice.hpp" /* I "petscdevice.h" */ 2 #include <petsc/private/petscadvancedmacros.h> 3 4 using namespace Petsc::Device; 5 6 /* 7 note to anyone adding more classes, the name must be ALL_CAPS_SHORT_NAME + Device exactly to 8 be picked up by the switch-case macros below 9 */ 10 #if PetscDefined(HAVE_CUDA) 11 static CUPM::Device<CUPM::DeviceType::CUDA> CUDADevice(PetscDeviceContextCreate_CUDA); 12 #endif 13 #if PetscDefined(HAVE_HIP) 14 static CUPM::Device<CUPM::DeviceType::HIP> HIPDevice(PetscDeviceContextCreate_HIP); 15 #endif 16 #if PetscDefined(HAVE_SYCL) 17 #include "sycldevice.hpp" 18 static SYCL::Device SYCLDevice(PetscDeviceContextCreate_SYCL); 19 #endif 20 21 static_assert(Petsc::util::integral_value(PETSC_DEVICE_INVALID) == 0,""); 22 static_assert(Petsc::util::integral_value(PETSC_DEVICE_CUDA) == 1,""); 23 static_assert(Petsc::util::integral_value(PETSC_DEVICE_HIP) == 2,""); 24 static_assert(Petsc::util::integral_value(PETSC_DEVICE_SYCL) == 3,""); 25 static_assert(Petsc::util::integral_value(PETSC_DEVICE_MAX) == 4,""); 26 const char *const PetscDeviceTypes[] = { 27 "invalid", 28 "cuda", 29 "hip", 30 "sycl", 31 "max", 32 "PetscDeviceType", 33 "PETSC_DEVICE_", 34 PETSC_NULLPTR 35 }; 36 37 static_assert(Petsc::util::integral_value(PETSC_DEVICE_INIT_NONE) == 0,""); 38 static_assert(Petsc::util::integral_value(PETSC_DEVICE_INIT_LAZY) == 1,""); 39 static_assert(Petsc::util::integral_value(PETSC_DEVICE_INIT_EAGER) == 2,""); 40 const char *const PetscDeviceInitTypes[] = { 41 "none", 42 "lazy", 43 "eager", 44 "PetscDeviceInitType", 45 "PETSC_DEVICE_INIT_", 46 PETSC_NULLPTR 47 }; 48 static_assert( 49 sizeof(PetscDeviceInitTypes)/sizeof(*PetscDeviceInitTypes) == 6, 50 "Must change CUPMDevice<T>::initialize number of enum values in -device_enable_cupm to match!" 51 ); 52 53 #define PETSC_DEVICE_CASE(IMPLS,func,...) \ 54 case PetscConcat_(PETSC_DEVICE_,IMPLS): { \ 55 auto ierr_ = PetscConcat_(IMPLS,Device).func(__VA_ARGS__);CHKERRQ(ierr_); \ 56 } break 57 58 /* 59 Suppose you have: 60 61 CUDADevice.myFunction(arg1,arg2) 62 63 that you would like to conditionally define and call in a switch-case: 64 65 switch(PetscDeviceType) { 66 #if PetscDefined(HAVE_CUDA) 67 case PETSC_DEVICE_CUDA: { 68 auto ierr = CUDADevice.myFunction(arg1,arg2);CHKERRQ(ierr); 69 } break; 70 #endif 71 } 72 73 then calling this macro: 74 75 PETSC_DEVICE_CASE_IF_PETSC_DEFINED(CUDA,myFunction,arg1,arg2) 76 77 will expand to the following case statement: 78 79 case PETSC_DEVICE_CUDA: { 80 auto ierr = CUDADevice.myFunction(arg1,arg2);CHKERRQ(ierr); 81 } break 82 83 if PetscDefined(HAVE_CUDA) evaluates to 1, and expand to nothing otherwise 84 */ 85 #define PETSC_DEVICE_CASE_IF_PETSC_DEFINED(IMPLS,func,...) \ 86 PetscIfPetscDefined(PetscConcat_(HAVE_,IMPLS),PETSC_DEVICE_CASE,PetscExpandToNothing)(IMPLS,func,__VA_ARGS__) 87 88 /*@C 89 PetscDeviceCreate - Get a new handle for a particular device type 90 91 Not Collective, Possibly Synchronous 92 93 Input Parameter: 94 . type - The type of PetscDevice 95 . devid - The numeric ID# of the device (pass PETSC_DECIDE to assign automatically) 96 97 Output Parameter: 98 . device - The PetscDevice 99 100 Notes: 101 This routine may initialize PetscDevice. If this is the case, this will most likely cause 102 some sort of device synchronization. 103 104 devid is what you might pass to cudaSetDevice() for example. 105 106 Level: beginner 107 108 .seealso: PetscDevice, PetscDeviceInitType, PetscDeviceInitialize(), 109 PetscDeviceInitialized(), PetscDeviceConfigure(), PetscDeviceView(), PetscDeviceDestroy() 110 @*/ 111 PetscErrorCode PetscDeviceCreate(PetscDeviceType type, PetscInt devid, PetscDevice *device) 112 { 113 static PetscInt PetscDeviceCounter = 0; 114 PetscDevice dev; 115 PetscErrorCode ierr; 116 117 PetscFunctionBegin; 118 PetscValidDeviceType(type,1); 119 PetscValidPointer(device,3); 120 ierr = PetscDeviceInitializePackage();CHKERRQ(ierr); 121 ierr = PetscNew(&dev);CHKERRQ(ierr); 122 dev->id = PetscDeviceCounter++; 123 dev->type = type; 124 dev->refcnt = 1; 125 /* 126 if you are adding a device, you also need to add it's initialization in 127 PetscDeviceInitializeTypeFromOptions_Private() below 128 */ 129 switch (type) { 130 PETSC_DEVICE_CASE_IF_PETSC_DEFINED(CUDA,getDevice,dev,devid); 131 PETSC_DEVICE_CASE_IF_PETSC_DEFINED(HIP,getDevice,dev,devid); 132 PETSC_DEVICE_CASE_IF_PETSC_DEFINED(SYCL,getDevice,dev,devid); 133 default: 134 /* in case the above macros expand to nothing this silences any unused variable warnings */ 135 (void)(devid); 136 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"PETSc was seemingly configured for PetscDeviceType %s but we've fallen through all cases in a switch",PetscDeviceTypes[type]); 137 } 138 *device = dev; 139 PetscFunctionReturn(0); 140 } 141 142 /*@C 143 PetscDeviceDestroy - Free a PetscDevice 144 145 Not Collective, Asynchronous 146 147 Input Parameter: 148 . device - The PetscDevice 149 150 Level: beginner 151 152 .seealso: PetscDevice, PetscDeviceCreate(), PetscDeviceConfigure(), PetscDeviceView() 153 @*/ 154 PetscErrorCode PetscDeviceDestroy(PetscDevice *device) 155 { 156 PetscErrorCode ierr; 157 158 PetscFunctionBegin; 159 if (!*device) PetscFunctionReturn(0); 160 PetscValidDevice(*device,1); 161 ierr = PetscDeviceDereference_Internal(*device);CHKERRQ(ierr); 162 if ((*device)->refcnt) { 163 *device = PETSC_NULLPTR; 164 PetscFunctionReturn(0); 165 } 166 ierr = PetscFree((*device)->data);CHKERRQ(ierr); 167 ierr = PetscFree(*device);CHKERRQ(ierr); 168 PetscFunctionReturn(0); 169 } 170 171 /*@C 172 PetscDeviceConfigure - Configure a particular PetscDevice 173 174 Not Collective, Asynchronous 175 176 Input Parameter: 177 . device - The PetscDevice to configure 178 179 Notes: 180 The user should not assume that this is a cheap operation 181 182 Level: beginner 183 184 .seealso: PetscDevice, PetscDeviceCreate(), PetscDeviceView(), PetscDeviceDestroy() 185 @*/ 186 PetscErrorCode PetscDeviceConfigure(PetscDevice device) 187 { 188 PetscErrorCode ierr; 189 190 PetscFunctionBegin; 191 PetscValidDevice(device,1); 192 if (PetscDefined(USE_DEBUG)) { 193 /* 194 if no available configuration is available, this cascades all the way down to default 195 and error 196 */ 197 switch (device->type) { 198 case PETSC_DEVICE_CUDA: if (PetscDefined(HAVE_CUDA)) break; 199 case PETSC_DEVICE_HIP: if (PetscDefined(HAVE_HIP)) break; 200 case PETSC_DEVICE_SYCL: if (PetscDefined(HAVE_SYCL)) break; 201 default: 202 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"PETSc was seemingly configured for PetscDeviceType %s but we've fallen through all cases in a switch",PetscDeviceTypes[device->type]); 203 } 204 } 205 ierr = (*device->ops->configure)(device);CHKERRQ(ierr); 206 PetscFunctionReturn(0); 207 } 208 209 /*@C 210 PetscDeviceView - View a PetscDevice 211 212 Collective on viewer, Asynchronous 213 214 Input Parameter: 215 + device - The PetscDevice to view 216 - viewer - The PetscViewer to view the device with (NULL for PETSC_VIEWER_STDOUT_WORLD) 217 218 Level: beginner 219 220 .seealso: PetscDevice, PetscDeviceCreate(), PetscDeviceConfigure(), PetscDeviceDestroy() 221 @*/ 222 PetscErrorCode PetscDeviceView(PetscDevice device, PetscViewer viewer) 223 { 224 PetscErrorCode ierr; 225 226 PetscFunctionBegin; 227 PetscValidDevice(device,1); 228 if (!viewer) {ierr = PetscViewerASCIIGetStdout(PETSC_COMM_WORLD,&viewer);CHKERRQ(ierr);} 229 PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2); 230 ierr = (*device->ops->view)(device,viewer);CHKERRQ(ierr); 231 PetscFunctionReturn(0); 232 } 233 234 static std::array<bool,PETSC_DEVICE_MAX> initializedDevice = {}; 235 static std::array<PetscDevice,PETSC_DEVICE_MAX> defaultDevices = {}; 236 static_assert(initializedDevice.size() == defaultDevices.size(),""); 237 238 /*@C 239 PetscDeviceInitialize - Initialize PetscDevice 240 241 Not Collective, Possibly Synchronous 242 243 Input Parameter: 244 . type - The PetscDeviceType to initialize 245 246 Notes: 247 Eagerly initializes the corresponding PetscDeviceType if needed. 248 249 Level: beginner 250 251 .seealso: PetscDevice, PetscDeviceInitType, PetscDeviceInitialized(), PetscDeviceCreate(), PetscDeviceDestroy() 252 @*/ 253 PetscErrorCode PetscDeviceInitialize(PetscDeviceType type) 254 { 255 PetscErrorCode ierr; 256 257 PetscFunctionBegin; 258 PetscValidDeviceType(type,1); 259 ierr = PetscDeviceInitializeDefaultDevice_Internal(type,PETSC_DECIDE);CHKERRQ(ierr); 260 PetscFunctionReturn(0); 261 } 262 263 /*@C 264 PetscDeviceInitialized - Determines whether PetscDevice is initialized for a particular 265 PetscDeviceType 266 267 Not Collective, Asynchronous 268 269 Input Parameter: 270 . type - The PetscDeviceType to check 271 272 Output Parameter: 273 . [return value] - PETSC_TRUE if type is initialized, PETSC_FALSE otherwise 274 275 Notes: 276 If one has not configured PETSc for a particular PetscDeviceType then this routine will 277 return PETSC_FALSE for that PetscDeviceType. 278 279 Level: beginner 280 281 .seealso: PetscDevice, PetscDeviceInitType, PetscDeviceInitialize(), PetscDeviceCreate(), PetscDeviceDestroy() 282 @*/ 283 PetscBool PetscDeviceInitialized(PetscDeviceType type) 284 { 285 return static_cast<PetscBool>(PetscDeviceConfiguredFor_Internal(type) && initializedDevice[type]); 286 } 287 288 /* 289 Actual intialization function; any functions claiming to initialize PetscDevice or 290 PetscDeviceContext will have to run through this one 291 */ 292 PetscErrorCode PetscDeviceInitializeDefaultDevice_Internal(PetscDeviceType type, PetscInt defaultDeviceId) 293 { 294 PetscErrorCode ierr; 295 296 PetscFunctionBegin; 297 PetscValidDeviceType(type,1); 298 if (PetscLikely(PetscDeviceInitialized(type))) PetscFunctionReturn(0); 299 PetscAssertFalse(defaultDevices[type],PETSC_COMM_SELF,PETSC_ERR_MEM,"Trying to overwrite existing default device of type %s",PetscDeviceTypes[type]); 300 ierr = PetscDeviceCreate(type,defaultDeviceId,&defaultDevices[type]);CHKERRQ(ierr); 301 ierr = PetscDeviceConfigure(defaultDevices[type]);CHKERRQ(ierr); 302 initializedDevice[type] = true; 303 PetscFunctionReturn(0); 304 } 305 306 #if PetscDefined(USE_LOG) 307 PETSC_INTERN PetscErrorCode PetscLogInitialize(void); 308 #else 309 #define PetscLogInitialize() 0 310 #endif 311 312 static PetscErrorCode PetscDeviceInitializeTypeFromOptions_Private(MPI_Comm comm, PetscDeviceType type, PetscInt defaultDeviceId, PetscBool defaultView, PetscDeviceInitType *defaultInitType) 313 { 314 PetscErrorCode ierr; 315 316 PetscFunctionBegin; 317 if (!PetscDeviceConfiguredFor_Internal(type)) { 318 ierr = PetscInfo(PETSC_NULLPTR,"PetscDeviceType %s not supported\n",PetscDeviceTypes[type]);CHKERRQ(ierr); 319 defaultDevices[type] = PETSC_NULLPTR; 320 PetscFunctionReturn(0); 321 } 322 ierr = PetscInfo(PETSC_NULLPTR,"PetscDeviceType %s supported, initializing\n",PetscDeviceTypes[type]);CHKERRQ(ierr); 323 /* ugly switch needed to pick the right global variable... could maybe do this as a union? */ 324 switch (type) { 325 PETSC_DEVICE_CASE_IF_PETSC_DEFINED(CUDA,initialize,comm,&defaultDeviceId,defaultInitType); 326 PETSC_DEVICE_CASE_IF_PETSC_DEFINED(HIP,initialize,comm,&defaultDeviceId,defaultInitType); 327 PETSC_DEVICE_CASE_IF_PETSC_DEFINED(SYCL,initialize,comm,&defaultDeviceId,defaultInitType); 328 default: 329 SETERRQ(comm,PETSC_ERR_PLIB,"PETSc was seemingly configured for PetscDeviceType %s but we've fallen through all cases in a switch",PetscDeviceTypes[type]); 330 } 331 /* 332 defaultInitType and defaultDeviceId now represent what the individual TYPES have decided to 333 initialize as 334 */ 335 if (*defaultInitType == PETSC_DEVICE_INIT_EAGER) { 336 ierr = PetscInfo(PETSC_NULLPTR,"Eagerly initializing %s PetscDevice\n",PetscDeviceTypes[type]);CHKERRQ(ierr); 337 ierr = PetscDeviceInitializeDefaultDevice_Internal(type,defaultDeviceId);CHKERRQ(ierr); 338 if (defaultView) { 339 PetscViewer vwr; 340 341 ierr = PetscLogInitialize();CHKERRQ(ierr); 342 ierr = PetscViewerASCIIGetStdout(comm,&vwr);CHKERRQ(ierr); 343 ierr = PetscDeviceView(defaultDevices[type],vwr);CHKERRQ(ierr); 344 } 345 } 346 PetscFunctionReturn(0); 347 } 348 349 /* called from PetscFinalize() do not call yourself! */ 350 static PetscErrorCode PetscDeviceFinalize_Private(void) 351 { 352 PetscErrorCode ierr; 353 354 PetscFunctionBegin; 355 if (PetscDefined(USE_DEBUG)) { 356 const auto PetscDeviceCheckAllDestroyedAfterFinalize = [](){ 357 PetscFunctionBegin; 358 for (auto&& device : defaultDevices) { 359 PetscCheckFalse(device,PETSC_COMM_WORLD,PETSC_ERR_COR,"Device of type '%s' had reference count %" PetscInt_FMT " and was not fully destroyed during PetscFinalize()",PetscDeviceTypes[device->type],device->refcnt); 360 } 361 PetscFunctionReturn(0); 362 }; 363 /* you might be thinking, why on earth are you registered yet another finalizer in a 364 * function already called during PetscRegisterFinalizeAll()? If this seems stupid it's 365 * because it is. 366 * 367 * The crux of the problem is that the initializer (and therefore the ~finalizer~) of 368 * PetscDeviceContext is guaranteed to run after PetscDevice's. So if the global context 369 * had a default PetscDevice attached, that PetscDevice will have a reference count >0 and 370 * hence won't be destroyed yet. So we need to repeat the check that all devices have been 371 * destroyed again ~after~ the global context is destroyed. In summary: 372 * 373 * 1. This finalizer runs and destroys all devices, except it may not because the global 374 * context may still hold a reference! 375 * 2. The global context finalizer runs and does the final reference count decrement 376 * required, which actually destroys the held device. 377 * 3. Our newly added finalizer runs and checks that all is well. 378 */ 379 ierr = PetscRegisterFinalize(PetscDeviceCheckAllDestroyedAfterFinalize);CHKERRQ(ierr); 380 } 381 for (auto &&device : defaultDevices) {ierr = PetscDeviceDestroy(&device);CHKERRQ(ierr);} 382 CHKERRCXX(initializedDevice.fill(false)); 383 PetscFunctionReturn(0); 384 } 385 386 /* 387 Begins the init proceeedings for the entire PetscDevice stack. there are 3 stages of 388 initialization types: 389 390 1. defaultInitType - how does PetscDevice as a whole expect to initialize? 391 2. subTypeDefaultInitType - how does each PetscDevice implementation expect to initialize? 392 e.g. you may want to blanket disable PetscDevice init (and disable say Kokkos init), but 393 have all CUDA devices still initialize. 394 395 All told the following happens: 396 397 0. defaultInitType -> LAZY 398 1. Check for log_view/log_summary, if yes defaultInitType -> EAGER 399 2. PetscDevice initializes each sub type with deviceDefaultInitType. 400 2.1 Each enabled PetscDevice sub-type then does the above disable or view check in addition 401 to checking for specific device init. if view or specific device init 402 subTypeDefaultInitType -> EAGER. disabled once again overrides all. 403 */ 404 PetscErrorCode PetscDeviceInitializeFromOptions_Internal(MPI_Comm comm) 405 { 406 PetscBool flg,defaultView = PETSC_FALSE,initializeDeviceContextEagerly = PETSC_FALSE; 407 PetscInt defaultDevice = PETSC_DECIDE; 408 PetscDeviceType deviceContextInitDevice = PETSC_DEVICE_DEFAULT; 409 PetscDeviceInitType defaultInitType; 410 PetscErrorCode ierr; 411 412 PetscFunctionBegin; 413 if (PetscDefined(USE_DEBUG)) { 414 int result; 415 416 ierr = MPI_Comm_compare(comm,PETSC_COMM_WORLD,&result);CHKERRMPI(ierr); 417 /* in order to accurately assign ranks to gpus we need to get the MPI_Comm_rank of the 418 * global space */ 419 if (PetscUnlikely(result != MPI_IDENT)) { 420 char name[MPI_MAX_OBJECT_NAME] = {}; 421 int len; /* unused */ 422 423 ierr = MPI_Comm_get_name(comm,name,&len);CHKERRMPI(ierr); 424 SETERRQ(comm,PETSC_ERR_MPI,"Default devices being initialized on MPI_Comm '%s' not PETSC_COMM_WORLD",name); 425 } 426 } 427 comm = PETSC_COMM_WORLD; /* from this point on we assume we're on PETSC_COMM_WORLD */ 428 ierr = PetscRegisterFinalize(PetscDeviceFinalize_Private);CHKERRQ(ierr); 429 ierr = PetscOptionsHasName(PETSC_NULLPTR,PETSC_NULLPTR,"-log_view",&flg);CHKERRQ(ierr); 430 if (!flg) { 431 ierr = PetscOptionsHasName(PETSC_NULLPTR,PETSC_NULLPTR,"-log_summary",&flg);CHKERRQ(ierr); 432 } 433 { 434 PetscInt initIdx = flg ? PETSC_DEVICE_INIT_EAGER : PETSC_DEVICE_INIT_LAZY; 435 436 ierr = PetscOptionsBegin(comm,PETSC_NULLPTR,"PetscDevice Options","Sys");CHKERRQ(ierr); 437 ierr = PetscOptionsEList("-device_enable","How (or whether) to initialize PetscDevices","PetscDeviceInitializeFromOptions_Internal()",PetscDeviceInitTypes,3,PetscDeviceInitTypes[initIdx],&initIdx,PETSC_NULLPTR);CHKERRQ(ierr); 438 ierr = PetscOptionsRangeInt("-device_select","Which device to use. Pass " PetscStringize(PETSC_DECIDE) " to have PETSc decide or (given they exist) [0-NUM_DEVICE) for a specific device","PetscDeviceCreate()",defaultDevice,&defaultDevice,PETSC_NULLPTR,PETSC_DECIDE,std::numeric_limits<int>::max());CHKERRQ(ierr); 439 ierr = PetscOptionsBool("-device_view","Display device information and assignments (forces eager initialization)",PETSC_NULLPTR,defaultView,&defaultView,&flg);CHKERRQ(ierr); 440 ierr = PetscOptionsEnd();CHKERRQ(ierr); 441 if (initIdx == PETSC_DEVICE_INIT_NONE) { 442 /* disabled all device initialization if devices are globally disabled */ 443 PetscCheckFalse(defaultDevice != PETSC_DECIDE,comm,PETSC_ERR_USER_INPUT,"You have disabled devices but also specified a particular device to use, these options are mutually exlusive"); 444 defaultView = PETSC_FALSE; 445 } else { 446 defaultView = static_cast<decltype(defaultView)>(defaultView && flg); 447 if (defaultView) initIdx = PETSC_DEVICE_INIT_EAGER; 448 } 449 defaultInitType = static_cast<decltype(defaultInitType)>(initIdx); 450 } 451 static_assert((PETSC_DEVICE_INVALID == 0) && (PETSC_DEVICE_MAX < std::numeric_limits<int>::max()),""); 452 for (int i = 1; i < PETSC_DEVICE_MAX; ++i) { 453 const auto deviceType = static_cast<PetscDeviceType>(i); 454 auto initType = defaultInitType; 455 456 ierr = PetscDeviceInitializeTypeFromOptions_Private(comm,deviceType,defaultDevice,defaultView,&initType);CHKERRQ(ierr); 457 if (PetscDeviceConfiguredFor_Internal(deviceType) && (initType == PETSC_DEVICE_INIT_EAGER)) { 458 initializeDeviceContextEagerly = PETSC_TRUE; 459 deviceContextInitDevice = deviceType; 460 } 461 } 462 if (initializeDeviceContextEagerly) { 463 PetscDeviceContext dctx; 464 465 /* 466 somewhat inefficient here as the device context is potentially fully set up twice (once 467 when retrieved then the second time if setfromoptions makes changes) 468 */ 469 ierr = PetscInfo(PETSC_NULLPTR,"Eagerly initializing PetscDeviceContext with %s device\n",PetscDeviceTypes[deviceContextInitDevice]);CHKERRQ(ierr); 470 ierr = PetscDeviceContextSetRootDeviceType_Internal(deviceContextInitDevice);CHKERRQ(ierr); 471 ierr = PetscDeviceContextGetCurrentContext(&dctx);CHKERRQ(ierr); 472 ierr = PetscDeviceContextSetFromOptions(comm,"root_",dctx);CHKERRQ(ierr); 473 ierr = PetscDeviceContextSetUp(dctx);CHKERRQ(ierr); 474 } 475 PetscFunctionReturn(0); 476 } 477 478 /* Get the default PetscDevice for a particular type and constructs them if lazily initialized. */ 479 PetscErrorCode PetscDeviceGetDefaultForType_Internal(PetscDeviceType type, PetscDevice *device) 480 { 481 PetscErrorCode ierr; 482 483 PetscFunctionBegin; 484 PetscValidPointer(device,2); 485 ierr = PetscDeviceInitialize(type);CHKERRQ(ierr); 486 *device = defaultDevices[type]; 487 PetscFunctionReturn(0); 488 } 489