1 #include "cupmdevice.hpp" /* I "petscdevice.h" */ 2 #include <petsc/private/petscadvancedmacros.h> 3 4 using namespace Petsc::Device; 5 6 /* note to anyone adding more classes, the name must be ALL_CAPS_SHORT_NAME + Device exactly to 7 * be picked up by the switch-case macros below. */ 8 #if PetscDefined(HAVE_CUDA) 9 static CUPM::Device<CUPM::DeviceType::CUDA> CUDADevice(PetscDeviceContextCreate_CUDA); 10 #endif 11 #if PetscDefined(HAVE_HIP) 12 static CUPM::Device<CUPM::DeviceType::HIP> HIPDevice(PetscDeviceContextCreate_HIP); 13 #endif 14 #if PetscDefined(HAVE_SYCL) 15 #include "sycldevice.hpp" 16 static SYCL::Device SYCLDevice(PetscDeviceContextCreate_SYCL); 17 #endif 18 19 static_assert(Petsc::util::integral_value(PETSC_DEVICE_INVALID) == 0,""); 20 static_assert(Petsc::util::integral_value(PETSC_DEVICE_CUDA) == 1,""); 21 static_assert(Petsc::util::integral_value(PETSC_DEVICE_HIP) == 2,""); 22 static_assert(Petsc::util::integral_value(PETSC_DEVICE_SYCL) == 3,""); 23 static_assert(Petsc::util::integral_value(PETSC_DEVICE_MAX) == 4,""); 24 const char *const PetscDeviceTypes[] = { 25 "invalid", 26 "cuda", 27 "hip", 28 "sycl", 29 "max", 30 "PetscDeviceType", 31 "PETSC_DEVICE_", 32 PETSC_NULLPTR 33 }; 34 35 static_assert(Petsc::util::integral_value(PETSC_DEVICE_INIT_NONE) == 0,""); 36 static_assert(Petsc::util::integral_value(PETSC_DEVICE_INIT_LAZY) == 1,""); 37 static_assert(Petsc::util::integral_value(PETSC_DEVICE_INIT_EAGER) == 2,""); 38 const char *const PetscDeviceInitTypes[] = { 39 "none", 40 "lazy", 41 "eager", 42 "PetscDeviceInitType", 43 "PETSC_DEVICE_INIT_", 44 PETSC_NULLPTR 45 }; 46 static_assert( 47 sizeof(PetscDeviceInitTypes)/sizeof(*PetscDeviceInitTypes) == 6, 48 "Must change CUPMDevice<T>::initialize number of enum values in -device_enable_cupm to match!" 49 ); 50 51 #define PETSC_DEVICE_CASE(IMPLS,func,...) \ 52 case PetscConcat_(PETSC_DEVICE_,IMPLS): { \ 53 auto ierr_ = PetscConcat_(IMPLS,Device).func(__VA_ARGS__);CHKERRQ(ierr_); \ 54 } break 55 56 /* Suppose you have: 57 * 58 * CUDADevice.myFunction(arg1,arg2) 59 * 60 * that you would like to conditionally define and call in a switch-case: 61 * 62 * switch(PetscDeviceType) { 63 * #if PetscDefined(HAVE_CUDA) 64 * case PETSC_DEVICE_CUDA: { 65 * auto ierr = CUDADevice.myFunction(arg1,arg2);CHKERRQ(ierr); 66 * } break; 67 * #endif 68 * } 69 * 70 * then calling this macro: 71 * 72 * PETSC_DEVICE_CASE_IF_PETSC_DEFINED(CUDA,myFunction,arg1,arg2) 73 * 74 * will expand to the following case statement: 75 * 76 * case PETSC_DEVICE_CUDA: { 77 * auto ierr = CUDADevice.myFunction(arg1,arg2);CHKERRQ(ierr); 78 * } break 79 * 80 * if PetscDefined(HAVE_CUDA) evaluates to 1, and expand to nothing otherwise 81 */ 82 #define PETSC_DEVICE_CASE_IF_PETSC_DEFINED(IMPLS,func,...) \ 83 PetscIfPetscDefined(PetscConcat_(HAVE_,IMPLS),PETSC_DEVICE_CASE,PetscExpandToNothing)(IMPLS,func,__VA_ARGS__) 84 85 /*@C 86 PetscDeviceCreate - Get a new handle for a particular device type 87 88 Not Collective, Possibly Synchronous 89 90 Input Parameter: 91 . type - The type of PetscDevice 92 . devid - The numeric ID# of the device (pass PETSC_DECIDE to assign automatically) 93 94 Output Parameter: 95 . device - The PetscDevice 96 97 Notes: 98 This routine may initialize PetscDevice. If this is the case, this will most likely cause 99 some sort of device synchronization. 100 101 devid is what you might pass to cudaSetDevice() for example. 102 103 Level: beginner 104 105 .seealso: PetscDevice, PetscDeviceInitType, PetscDeviceInitialize(), 106 PetscDeviceInitialized(), PetscDeviceConfigure(), PetscDeviceView(), PetscDeviceDestroy() 107 @*/ 108 PetscErrorCode PetscDeviceCreate(PetscDeviceType type, PetscInt devid, PetscDevice *device) 109 { 110 static PetscInt PetscDeviceCounter = 0; 111 PetscDevice dev; 112 PetscErrorCode ierr; 113 114 PetscFunctionBegin; 115 PetscValidDeviceType(type,1); 116 PetscValidPointer(device,3); 117 ierr = PetscDeviceInitializePackage();CHKERRQ(ierr); 118 ierr = PetscNew(&dev);CHKERRQ(ierr); 119 dev->id = PetscDeviceCounter++; 120 dev->type = type; 121 dev->refcnt = 1; 122 /* if you are adding a device, you also need to add it's initialization in 123 * PetscDeviceInitializeTypeFromOptions_Private() below */ 124 switch (type) { 125 PETSC_DEVICE_CASE_IF_PETSC_DEFINED(CUDA,getDevice,dev,devid); 126 PETSC_DEVICE_CASE_IF_PETSC_DEFINED(HIP,getDevice,dev,devid); 127 PETSC_DEVICE_CASE_IF_PETSC_DEFINED(SYCL,getDevice,dev,devid); 128 default: 129 /* in case the above macros expand to nothing this silences any unused variable warnings */ 130 (void)(devid); 131 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"PETSc was seemingly configured for PetscDeviceType %s but we've fallen through all cases in a switch",PetscDeviceTypes[type]); 132 } 133 *device = dev; 134 PetscFunctionReturn(0); 135 } 136 137 /*@C 138 PetscDeviceDestroy - Free a PetscDevice 139 140 Not Collective, Asynchronous 141 142 Input Parameter: 143 . device - The PetscDevice 144 145 Level: beginner 146 147 .seealso: PetscDevice, PetscDeviceCreate(), PetscDeviceConfigure(), PetscDeviceView() 148 @*/ 149 PetscErrorCode PetscDeviceDestroy(PetscDevice *device) 150 { 151 PetscErrorCode ierr; 152 153 PetscFunctionBegin; 154 if (!*device) PetscFunctionReturn(0); 155 PetscValidDevice(*device,1); 156 ierr = PetscDeviceDereference_Internal(*device);CHKERRQ(ierr); 157 if ((*device)->refcnt) { 158 *device = PETSC_NULLPTR; 159 PetscFunctionReturn(0); 160 } 161 ierr = PetscFree((*device)->data);CHKERRQ(ierr); 162 ierr = PetscFree(*device);CHKERRQ(ierr); 163 PetscFunctionReturn(0); 164 } 165 166 /*@C 167 PetscDeviceConfigure - Configure a particular PetscDevice 168 169 Not Collective, Asynchronous 170 171 Input Parameter: 172 . device - The PetscDevice to configure 173 174 Notes: 175 The user should not assume that this is a cheap operation 176 177 Level: beginner 178 179 .seealso: PetscDevice, PetscDeviceCreate(), PetscDeviceView(), PetscDeviceDestroy() 180 @*/ 181 PetscErrorCode PetscDeviceConfigure(PetscDevice device) 182 { 183 PetscErrorCode ierr; 184 185 PetscFunctionBegin; 186 PetscValidDevice(device,1); 187 if (PetscDefined(USE_DEBUG)) { 188 /* if no available configuration is available, this cascades all the way down to default 189 * and error */ 190 switch (device->type) { 191 case PETSC_DEVICE_CUDA: if (PetscDefined(HAVE_CUDA)) break; 192 case PETSC_DEVICE_HIP: if (PetscDefined(HAVE_HIP)) break; 193 case PETSC_DEVICE_SYCL: if (PetscDefined(HAVE_SYCL)) break; 194 default: 195 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"PETSc was seemingly configured for PetscDeviceType %s but we've fallen through all cases in a switch",PetscDeviceTypes[device->type]); 196 } 197 } 198 ierr = (*device->ops->configure)(device);CHKERRQ(ierr); 199 PetscFunctionReturn(0); 200 } 201 202 /*@C 203 PetscDeviceView - View a PetscDevice 204 205 Collective on viewer, Asynchronous 206 207 Input Parameter: 208 + device - The PetscDevice to view 209 - viewer - The PetscViewer to view the device with (NULL for PETSC_VIEWER_STDOUT_WORLD) 210 211 Level: beginner 212 213 .seealso: PetscDevice, PetscDeviceCreate(), PetscDeviceConfigure(), PetscDeviceDestroy() 214 @*/ 215 PetscErrorCode PetscDeviceView(PetscDevice device, PetscViewer viewer) 216 { 217 PetscErrorCode ierr; 218 219 PetscFunctionBegin; 220 PetscValidDevice(device,1); 221 if (!viewer) {ierr = PetscViewerASCIIGetStdout(PETSC_COMM_WORLD,&viewer);CHKERRQ(ierr);} 222 PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2); 223 ierr = (*device->ops->view)(device,viewer);CHKERRQ(ierr); 224 PetscFunctionReturn(0); 225 } 226 227 static std::array<bool,PETSC_DEVICE_MAX> initializedDevice = {}; 228 static std::array<PetscDevice,PETSC_DEVICE_MAX> defaultDevices = {}; 229 static_assert(initializedDevice.size() == defaultDevices.size(),""); 230 231 /*@C 232 PetscDeviceInitialize - Initialize PetscDevice 233 234 Not Collective, Possibly Synchronous 235 236 Input Parameter: 237 . type - The PetscDeviceType to initialize 238 239 Notes: 240 Eagerly initializes the corresponding PetscDeviceType if needed. 241 242 Level: beginner 243 244 .seealso: PetscDevice, PetscDeviceInitType, PetscDeviceInitialized(), PetscDeviceCreate(), PetscDeviceDestroy() 245 @*/ 246 PetscErrorCode PetscDeviceInitialize(PetscDeviceType type) 247 { 248 PetscErrorCode ierr; 249 250 PetscFunctionBegin; 251 PetscValidDeviceType(type,1); 252 ierr = PetscDeviceInitializeDefaultDevice_Internal(type,PETSC_DECIDE);CHKERRQ(ierr); 253 PetscFunctionReturn(0); 254 } 255 256 /*@C 257 PetscDeviceInitialized - Determines whether PetscDevice is initialized for a particular 258 PetscDeviceType 259 260 Not Collective, Asynchronous 261 262 Input Parameter: 263 . type - The PetscDeviceType to check 264 265 Output Parameter: 266 . [return value] - PETSC_TRUE if type is initialized, PETSC_FALSE otherwise 267 268 Notes: 269 If one has not configured PETSc for a particular PetscDeviceType then this routine will 270 return PETSC_FALSE for that PetscDeviceType. 271 272 Level: beginner 273 274 .seealso: PetscDevice, PetscDeviceInitType, PetscDeviceInitialize(), PetscDeviceCreate(), PetscDeviceDestroy() 275 @*/ 276 PetscBool PetscDeviceInitialized(PetscDeviceType type) 277 { 278 return static_cast<PetscBool>(PetscDeviceConfiguredFor_Internal(type) && initializedDevice[type]); 279 } 280 281 /* Actual intialization function; any functions claiming to initialize PetscDevice or 282 * PetscDeviceContext will have to run through this one */ 283 PetscErrorCode PetscDeviceInitializeDefaultDevice_Internal(PetscDeviceType type, PetscInt defaultDeviceId) 284 { 285 PetscErrorCode ierr; 286 287 PetscFunctionBegin; 288 PetscValidDeviceType(type,1); 289 if (PetscLikely(PetscDeviceInitialized(type))) PetscFunctionReturn(0); 290 if (PetscUnlikelyDebug(defaultDevices[type])) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MEM,"Trying to overwrite existing default device of type %s",PetscDeviceTypes[type]); 291 ierr = PetscDeviceCreate(type,defaultDeviceId,&defaultDevices[type]);CHKERRQ(ierr); 292 ierr = PetscDeviceConfigure(defaultDevices[type]);CHKERRQ(ierr); 293 initializedDevice[type] = true; 294 PetscFunctionReturn(0); 295 } 296 297 static PetscErrorCode PetscDeviceInitializeTypeFromOptions_Private(MPI_Comm comm, PetscDeviceType type, PetscInt defaultDeviceId, PetscBool defaultView, PetscDeviceInitType *defaultInitType) 298 { 299 PetscErrorCode ierr; 300 301 PetscFunctionBegin; 302 if (!PetscDeviceConfiguredFor_Internal(type)) { 303 ierr = PetscInfo1(PETSC_NULLPTR,"PetscDeviceType %s not supported\n",PetscDeviceTypes[type]);CHKERRQ(ierr); 304 defaultDevices[type] = PETSC_NULLPTR; 305 PetscFunctionReturn(0); 306 } 307 ierr = PetscInfo1(PETSC_NULLPTR,"PetscDeviceType %s supported, initializing\n",PetscDeviceTypes[type]);CHKERRQ(ierr); 308 /* ugly switch needed to pick the right global variable... could maybe do this as a union? */ 309 switch (type) { 310 PETSC_DEVICE_CASE_IF_PETSC_DEFINED(CUDA,initialize,comm,&defaultDeviceId,defaultInitType); 311 PETSC_DEVICE_CASE_IF_PETSC_DEFINED(HIP,initialize,comm,&defaultDeviceId,defaultInitType); 312 PETSC_DEVICE_CASE_IF_PETSC_DEFINED(SYCL,initialize,comm,&defaultDeviceId,defaultInitType); 313 default: 314 SETERRQ1(comm,PETSC_ERR_PLIB,"PETSc was seemingly configured for PetscDeviceType %s but we've fallen through all cases in a switch",PetscDeviceTypes[type]); 315 } 316 /* defaultInitType and defaultDeviceId now represent what the individual TYPES have decided 317 * to initialize as */ 318 if (*defaultInitType == PETSC_DEVICE_INIT_EAGER) { 319 ierr = PetscInfo1(PETSC_NULLPTR,"Eagerly initializing %s PetscDevice\n",PetscDeviceTypes[type]);CHKERRQ(ierr); 320 ierr = PetscDeviceInitializeDefaultDevice_Internal(type,defaultDeviceId);CHKERRQ(ierr); 321 if (defaultView) { 322 PetscViewer vwr; 323 324 ierr = PetscViewerASCIIGetStdout(comm,&vwr);CHKERRQ(ierr); 325 ierr = PetscDeviceView(defaultDevices[type],vwr);CHKERRQ(ierr); 326 } 327 } 328 PetscFunctionReturn(0); 329 } 330 331 /* called from PetscFinalize() do not call yourself! */ 332 static PetscErrorCode PetscDeviceFinalize_Private(void) 333 { 334 PetscErrorCode ierr; 335 336 PetscFunctionBegin; 337 if (PetscDefined(USE_DEBUG)) { 338 const auto PetscDeviceCheckAllDestroyedAfterFinalize = [](){ 339 PetscFunctionBegin; 340 for (auto&& device : defaultDevices) { 341 if (PetscUnlikely(device)) SETERRQ2(PETSC_COMM_WORLD,PETSC_ERR_COR,"Device of type '%s' had reference count %" PetscInt_FMT " and was not fully destroyed during PetscFinalize()",PetscDeviceTypes[device->type],device->refcnt); 342 } 343 PetscFunctionReturn(0); 344 }; 345 /* you might be thinking, why on earth are you registered yet another finalizer in a 346 * function already called during PetscRegisterFinalizeAll()? If this seems stupid it's 347 * because it is. 348 * 349 * The crux of the problem is that the initializer (and therefore the ~finalizer~) of 350 * PetscDeviceContext is guaranteed to run after PetscDevice's. So if the global context 351 * had a default PetscDevice attached, that PetscDevice will have a reference count >0 and 352 * hence won't be destroyed yet. So we need to repeat the check that all devices have been 353 * destroyed again ~after~ the global context is destroyed. In summary: 354 * 355 * 1. This finalizer runs and destroys all devices, except it may not because the global 356 * context may still hold a reference! 357 * 2. The global context finalizer runs and does the final reference count decrement 358 * required, which actually destroys the held device. 359 * 3. Our newly added finalizer runs and checks that all is well. 360 */ 361 ierr = PetscRegisterFinalize(PetscDeviceCheckAllDestroyedAfterFinalize);CHKERRQ(ierr); 362 } 363 for (auto &&device : defaultDevices) {ierr = PetscDeviceDestroy(&device);CHKERRQ(ierr);} 364 CHKERRCXX(initializedDevice.fill(false)); 365 PetscFunctionReturn(0); 366 } 367 368 /* begins the init proceeedings for the entire PetscDevice stack. there are 3 stages of 369 * initialization types: 370 1. defaultInitType - how does PetscDevice as a whole expect to initialize? 371 2. subTypeDefaultInitType - how does each PetscDevice implementation expect to initialize? 372 e.g. you may want to blanket disable PetscDevice init (and disable say Kokkos init), but 373 have all CUDA devices still initialize. 374 375 All told the following happens: 376 0. defaultInitType -> LAZY 377 1. Check for log_view/log_summary, if yes defaultInitType -> EAGER 378 2. PetscDevice initializes each sub type with deviceDefaultInitType. 379 2.1 Each enabled PetscDevice sub-type then does the above disable or view check in addition 380 to checking for specific device init. if view or specific device init 381 subTypeDefaultInitType -> EAGER. disabled once again overrides all. 382 */ 383 PetscErrorCode PetscDeviceInitializeFromOptions_Internal(MPI_Comm comm) 384 { 385 PetscBool flg,defaultView = PETSC_FALSE,initializeDeviceContextEagerly = PETSC_FALSE; 386 PetscInt defaultDevice = PETSC_DECIDE; 387 PetscDeviceType deviceContextInitDevice = PETSC_DEVICE_DEFAULT; 388 PetscDeviceInitType defaultInitType; 389 PetscErrorCode ierr; 390 391 PetscFunctionBegin; 392 if (PetscDefined(USE_DEBUG)) { 393 int result; 394 395 ierr = MPI_Comm_compare(comm,PETSC_COMM_WORLD,&result);CHKERRMPI(ierr); 396 /* in order to accurately assign ranks to gpus we need to get the MPI_Comm_rank of the 397 * global space */ 398 if (PetscUnlikely(result != MPI_IDENT)) { 399 char name[MPI_MAX_OBJECT_NAME] = {}; 400 int len; /* unused */ 401 402 ierr = MPI_Comm_get_name(comm,name,&len);CHKERRMPI(ierr); 403 SETERRQ1(comm,PETSC_ERR_MPI,"Default devices being initialized on MPI_Comm '%s' not PETSC_COMM_WORLD",name); 404 } 405 } 406 comm = PETSC_COMM_WORLD; /* from this point on we assume we're on PETSC_COMM_WORLD */ 407 ierr = PetscRegisterFinalize(PetscDeviceFinalize_Private);CHKERRQ(ierr); 408 ierr = PetscOptionsHasName(PETSC_NULLPTR,PETSC_NULLPTR,"-log_view",&flg);CHKERRQ(ierr); 409 if (!flg) { 410 ierr = PetscOptionsHasName(PETSC_NULLPTR,PETSC_NULLPTR,"-log_summary",&flg);CHKERRQ(ierr); 411 } 412 { 413 PetscInt initIdx = flg ? PETSC_DEVICE_INIT_EAGER : PETSC_DEVICE_INIT_LAZY; 414 415 ierr = PetscOptionsBegin(comm,PETSC_NULLPTR,"PetscDevice Options","Sys");CHKERRQ(ierr); 416 ierr = PetscOptionsEList("-device_enable","How (or whether) to initialize PetscDevices","PetscDeviceInitializeFromOptions_Internal()",PetscDeviceInitTypes,3,PetscDeviceInitTypes[initIdx],&initIdx,PETSC_NULLPTR);CHKERRQ(ierr); 417 ierr = PetscOptionsRangeInt("-device_select","Which device to use. Pass " PetscStringize(PETSC_DECIDE) " to have PETSc decide or (given they exist) [0-NUM_DEVICE) for a specific device","PetscDeviceCreate()",defaultDevice,&defaultDevice,PETSC_NULLPTR,PETSC_DECIDE,std::numeric_limits<int>::max());CHKERRQ(ierr); 418 ierr = PetscOptionsBool("-device_view","Display device information and assignments (forces eager initialization)",PETSC_NULLPTR,defaultView,&defaultView,&flg);CHKERRQ(ierr); 419 ierr = PetscOptionsEnd();CHKERRQ(ierr); 420 if (initIdx == PETSC_DEVICE_INIT_NONE) { 421 /* disabled all device initialization if devices are globally disabled */ 422 if (PetscUnlikely(defaultDevice != PETSC_DECIDE)) SETERRQ(comm,PETSC_ERR_USER_INPUT,"You have disabled devices but also specified a particular device to use, these options are mutually exlusive"); 423 defaultView = PETSC_FALSE; 424 } else { 425 defaultView = static_cast<decltype(defaultView)>(defaultView && flg); 426 if (defaultView) initIdx = PETSC_DEVICE_INIT_EAGER; 427 } 428 defaultInitType = static_cast<decltype(defaultInitType)>(initIdx); 429 } 430 static_assert((PETSC_DEVICE_INVALID == 0) && (PETSC_DEVICE_MAX < std::numeric_limits<int>::max()),""); 431 for (int i = 1; i < PETSC_DEVICE_MAX; ++i) { 432 const auto deviceType = static_cast<PetscDeviceType>(i); 433 auto initType = defaultInitType; 434 435 ierr = PetscDeviceInitializeTypeFromOptions_Private(comm,deviceType,defaultDevice,defaultView,&initType);CHKERRQ(ierr); 436 if (PetscDeviceConfiguredFor_Internal(deviceType) && (initType == PETSC_DEVICE_INIT_EAGER)) { 437 initializeDeviceContextEagerly = PETSC_TRUE; 438 deviceContextInitDevice = deviceType; 439 } 440 } 441 if (initializeDeviceContextEagerly) { 442 PetscDeviceContext dctx; 443 444 /* somewhat inefficient here as the device context is potentially fully set up twice (once 445 * when retrieved then the second time if setfromoptions makes changes) */ 446 ierr = PetscInfo1(PETSC_NULLPTR,"Eagerly initializing PetscDeviceContext with %s device\n",PetscDeviceTypes[deviceContextInitDevice]);CHKERRQ(ierr); 447 ierr = PetscDeviceContextSetRootDeviceType_Internal(deviceContextInitDevice);CHKERRQ(ierr); 448 ierr = PetscDeviceContextGetCurrentContext(&dctx);CHKERRQ(ierr); 449 ierr = PetscDeviceContextSetFromOptions(comm,"root_",dctx);CHKERRQ(ierr); 450 ierr = PetscDeviceContextSetUp(dctx);CHKERRQ(ierr); 451 } 452 PetscFunctionReturn(0); 453 } 454 455 /* Get the default PetscDevice for a particular type and constructs them if lazily initialized. */ 456 PetscErrorCode PetscDeviceGetDefaultForType_Internal(PetscDeviceType type, PetscDevice *device) 457 { 458 PetscErrorCode ierr; 459 460 PetscFunctionBegin; 461 PetscValidPointer(device,2); 462 ierr = PetscDeviceInitialize(type);CHKERRQ(ierr); 463 *device = defaultDevices[type]; 464 PetscFunctionReturn(0); 465 } 466