1 #include "cupmdevice.hpp" /* I "petscdevice.h" */ 2 3 using namespace Petsc; 4 5 /* note to anyone adding more classes, the name must be ALL_CAPS_SHORT_NAME + Device exactly to 6 * be picked up by the switch-case macros below. */ 7 #if PetscDefined(HAVE_CUDA) 8 static CUPMDevice<CUPMDeviceType::CUDA> CUDADevice(PetscDeviceContextCreate_CUDA); 9 #endif 10 #if PetscDefined(HAVE_HIP) 11 static CUPMDevice<CUPMDeviceType::HIP> HIPDevice(PetscDeviceContextCreate_HIP); 12 #endif 13 #if PetscDefined(HAVE_SYCL) 14 #include "sycldevice.hpp" 15 static SyclDevice SYCLDevice(PetscDeviceContextCreate_SYCL); 16 #endif 17 18 const char *const PetscDeviceTypes[] = { 19 "invalid", 20 "cuda", 21 "hip", 22 "sycl", 23 "max", 24 "PetscDeviceType", 25 "PETSC_DEVICE_", 26 PETSC_NULLPTR 27 }; 28 29 const char *const PetscDeviceInitTypes[] = { 30 "none", 31 "lazy", 32 "eager", 33 "PetscDeviceInitType", 34 "PETSC_DEVICE_INIT_", 35 PETSC_NULLPTR 36 }; 37 static_assert( 38 sizeof(PetscDeviceInitTypes)/sizeof(*PetscDeviceInitTypes) == 6, 39 "Must change CUPMDevice<T>::initialize number of enum values in -device_enable_cupm to match!" 40 ); 41 42 #define PETSC_DEVICE_DEFAULT_CASE(comm,type) \ 43 SETERRQ1((comm),PETSC_ERR_PLIB, \ 44 "PETSc was seemingly configured for PetscDeviceType %s but " \ 45 "we've fallen through all cases in a switch", \ 46 PetscDeviceTypes[type]) 47 48 #define CAT_(a,...) a ## __VA_ARGS__ 49 #define CAT(a,...) CAT_(a,__VA_ARGS__) 50 51 #define PETSC_DEVICE_CASE_IF_PETSC_DEFINED__0(IMPLS,func,...) 52 #define PETSC_DEVICE_CASE_IF_PETSC_DEFINED__1(IMPLS,func,...) \ 53 case CAT(PETSC_DEVICE_,IMPLS): \ 54 { \ 55 auto ierr = CAT(IMPLS,Device).func(__VA_ARGS__);CHKERRQ(ierr); \ 56 break; \ 57 } 58 59 #define PETSC_DEVICE_CASE_IF_PETSC_DEFINED_(IMPLS,...) \ 60 CAT(PETSC_DEVICE_CASE_IF_PETSC_DEFINED__,PetscDefined(CAT(HAVE_,IMPLS)))(IMPLS,__VA_ARGS__) 61 62 #define PETSC_DEVICE_CASE_IF_PETSC_DEFINED(IMPLS,...) \ 63 PETSC_DEVICE_CASE_IF_PETSC_DEFINED_(IMPLS,__VA_ARGS__) 64 65 #define PETSC_DEVICE_UNUSED_IF_NO_DEVICE(var) (void)(var) 66 67 /*@C 68 PetscDeviceCreate - Get a new handle for a particular device type 69 70 Not Collective, Possibly Synchronous 71 72 Input Parameter: 73 . type - The type of PetscDevice 74 . devid - The numeric ID# of the device (pass PETSC_DECIDE to assign automatically) 75 76 Output Parameter: 77 . device - The PetscDevice 78 79 Notes: 80 This routine may initialize PetscDevice. If this is the case, this will most likely cause 81 some sort of device synchronization. 82 83 devid is what you might pass to cudaSetDevice() for example. 84 85 Level: beginner 86 87 .seealso: PetscDevice, PetscDeviceInitType, PetscDeviceInitialize(), 88 PetscDeviceInitialized(), PetscDeviceConfigure(), PetscDeviceView(), PetscDeviceDestroy() 89 @*/ 90 PetscErrorCode PetscDeviceCreate(PetscDeviceType type, PetscInt devid, PetscDevice *device) 91 { 92 static PetscInt PetscDeviceCounter = 0; 93 PetscDevice dev; 94 PetscErrorCode ierr; 95 96 PetscFunctionBegin; 97 PetscValidDeviceType(type,1); 98 PetscValidPointer(device,3); 99 ierr = PetscDeviceInitializePackage();CHKERRQ(ierr); 100 ierr = PetscNew(&dev);CHKERRQ(ierr); 101 dev->id = PetscDeviceCounter++; 102 dev->type = type; 103 dev->refcnt = 1; 104 /* if you are adding a device, you also need to add it's initialization in 105 PetscDeviceInitializeTypeFromOptions_Private() below */ 106 switch (type) { 107 PETSC_DEVICE_CASE_IF_PETSC_DEFINED(CUDA,getDevice,dev,devid); 108 PETSC_DEVICE_CASE_IF_PETSC_DEFINED(HIP,getDevice,dev,devid); 109 PETSC_DEVICE_CASE_IF_PETSC_DEFINED(SYCL,getDevice,dev,devid); 110 default: 111 PETSC_DEVICE_UNUSED_IF_NO_DEVICE(devid); 112 PETSC_DEVICE_DEFAULT_CASE(PETSC_COMM_SELF,type); 113 } 114 *device = dev; 115 PetscFunctionReturn(0); 116 } 117 118 /*@C 119 PetscDeviceDestroy - Free a PetscDevice 120 121 Not Collective, Asynchronous 122 123 Input Parameter: 124 . device - The PetscDevice 125 126 Level: beginner 127 128 .seealso: PetscDevice, PetscDeviceCreate(), PetscDeviceConfigure(), PetscDeviceView() 129 @*/ 130 PetscErrorCode PetscDeviceDestroy(PetscDevice *device) 131 { 132 PetscErrorCode ierr; 133 134 PetscFunctionBegin; 135 if (!*device) PetscFunctionReturn(0); 136 PetscValidDevice(*device,1); 137 ierr = PetscDeviceDereference_Internal(*device);CHKERRQ(ierr); 138 if ((*device)->refcnt) { 139 *device = PETSC_NULLPTR; 140 PetscFunctionReturn(0); 141 } 142 ierr = PetscFree((*device)->data);CHKERRQ(ierr); 143 ierr = PetscFree(*device);CHKERRQ(ierr); 144 PetscFunctionReturn(0); 145 } 146 147 /*@C 148 PetscDeviceConfigure - Configure a particular PetscDevice 149 150 Not Collective, Asynchronous 151 152 Input Parameter: 153 . device - The PetscDevice to configure 154 155 Notes: 156 The user should not assume that this is a cheap operation 157 158 Level: beginner 159 160 .seealso: PetscDevice, PetscDeviceCreate(), PetscDeviceView(), PetscDeviceDestroy() 161 @*/ 162 PetscErrorCode PetscDeviceConfigure(PetscDevice device) 163 { 164 PetscErrorCode ierr; 165 166 PetscFunctionBegin; 167 PetscValidDevice(device,1); 168 if (PetscDefined(USE_DEBUG)) { 169 /* if no available configuration is available, this cascades all the way down to default 170 and error */ 171 switch (device->type) { 172 case PETSC_DEVICE_CUDA: if (PetscDefined(HAVE_CUDA)) break; 173 case PETSC_DEVICE_HIP: if (PetscDefined(HAVE_HIP)) break; 174 case PETSC_DEVICE_SYCL: if (PetscDefined(HAVE_SYCL)) break; 175 default: 176 PETSC_DEVICE_DEFAULT_CASE(PETSC_COMM_SELF,device->type); 177 break; 178 } 179 } 180 ierr = (*device->ops->configure)(device);CHKERRQ(ierr); 181 PetscFunctionReturn(0); 182 } 183 184 /*@C 185 PetscDeviceView - View a PetscDevice 186 187 Collective on viewer, Asynchronous 188 189 Input Parameter: 190 + device - The PetscDevice to view 191 - viewer - The PetscViewer to view the device with (NULL for PETSC_VIEWER_STDOUT_WORLD) 192 193 Level: beginner 194 195 .seealso: PetscDevice, PetscDeviceCreate(), PetscDeviceConfigure(), PetscDeviceDestroy() 196 @*/ 197 PetscErrorCode PetscDeviceView(PetscDevice device, PetscViewer viewer) 198 { 199 PetscErrorCode ierr; 200 201 PetscFunctionBegin; 202 PetscValidDevice(device,1); 203 if (!viewer) {ierr = PetscViewerASCIIGetStdout(PETSC_COMM_WORLD,&viewer);CHKERRQ(ierr);} 204 PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2); 205 ierr = (*device->ops->view)(device,viewer);CHKERRQ(ierr); 206 PetscFunctionReturn(0); 207 } 208 209 static std::array<bool,PETSC_DEVICE_MAX> initializedDevice = {}; 210 static std::array<PetscDevice,PETSC_DEVICE_MAX> defaultDevices = {}; 211 static_assert(initializedDevice.size() == defaultDevices.size(),""); 212 213 /*@C 214 PetscDeviceInitialize - Initialize PetscDevice 215 216 Not Collective, Possibly Synchronous 217 218 Input Parameter: 219 . type - The PetscDeviceType to initialize 220 221 Notes: 222 Eagerly initializes the corresponding PetscDeviceType if needed. 223 224 Level: beginner 225 226 .seealso: PetscDevice, PetscDeviceInitType, PetscDeviceInitialized(), PetscDeviceCreate(), PetscDeviceDestroy() 227 @*/ 228 PetscErrorCode PetscDeviceInitialize(PetscDeviceType type) 229 { 230 PetscErrorCode ierr; 231 232 PetscFunctionBegin; 233 PetscValidDeviceType(type,1); 234 ierr = PetscDeviceInitializeDefaultDevice_Internal(type,PETSC_DECIDE);CHKERRQ(ierr); 235 PetscFunctionReturn(0); 236 } 237 238 /*@C 239 PetscDeviceInitialized - Determines whether PetscDevice is initialized for a particular 240 PetscDeviceType 241 242 Not Collective, Asynchronous 243 244 Input Parameter: 245 . type - The PetscDeviceType to check 246 247 Output Parameter: 248 . [return value] - PETSC_TRUE if type is initialized, PETSC_FALSE otherwise 249 250 Notes: 251 If one has not configured PETSc for a particular PetscDeviceType then this routine will 252 return PETSC_FALSE for that PetscDeviceType. 253 254 Level: beginner 255 256 .seealso: PetscDevice, PetscDeviceInitType, PetscDeviceInitialize(), PetscDeviceCreate(), PetscDeviceDestroy() 257 @*/ 258 PetscBool PetscDeviceInitialized(PetscDeviceType type) 259 { 260 return static_cast<PetscBool>(PetscDeviceConfiguredFor_Internal(type) && initializedDevice[type]); 261 } 262 263 /* Actual intialization function; any functions claiming to initialize PetscDevice or 264 * PetscDeviceContext will have to run through this one */ 265 PetscErrorCode PetscDeviceInitializeDefaultDevice_Internal(PetscDeviceType type, PetscInt defaultDeviceId) 266 { 267 PetscErrorCode ierr; 268 269 PetscFunctionBegin; 270 PetscValidDeviceType(type,1); 271 if (PetscLikely(PetscDeviceInitialized(type))) PetscFunctionReturn(0); 272 if (PetscUnlikelyDebug(defaultDevices[type])) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MEM,"Trying to overwrite existing default device of type %s",PetscDeviceTypes[type]); 273 ierr = PetscDeviceCreate(type,defaultDeviceId,&defaultDevices[type]);CHKERRQ(ierr); 274 ierr = PetscDeviceConfigure(defaultDevices[type]);CHKERRQ(ierr); 275 /* the default devices are all automatically "referenced" at least once, otherwise the 276 * reference counting is off for them. We could alternatively increase the reference count 277 * when they are retrieved but that is a lot more brittle; what's to stop someone from doing 278 * the following? 279 280 for (int i = 0; i < 10000; ++i) auto device = PetscDeviceDefault_Internal(); 281 */ 282 initializedDevice[type] = true; 283 PetscFunctionReturn(0); 284 } 285 286 static PetscErrorCode PetscDeviceInitializeTypeFromOptions_Private(MPI_Comm comm, PetscDeviceType type, PetscInt defaultDeviceId, PetscBool defaultView, PetscDeviceInitType *defaultInitType) 287 { 288 PetscErrorCode ierr; 289 290 PetscFunctionBegin; 291 if (!PetscDeviceConfiguredFor_Internal(type)) { 292 ierr = PetscInfo1(PETSC_NULLPTR,"PetscDeviceType %s not supported\n",PetscDeviceTypes[type]);CHKERRQ(ierr); 293 defaultDevices[type] = PETSC_NULLPTR; 294 PetscFunctionReturn(0); 295 } 296 ierr = PetscInfo1(PETSC_NULLPTR,"PetscDeviceType %s supported, initializing\n",PetscDeviceTypes[type]);CHKERRQ(ierr); 297 /* ugly switch needed to pick the right global variable... could maybe do this as a union? */ 298 switch (type) { 299 PETSC_DEVICE_CASE_IF_PETSC_DEFINED(CUDA,initialize,comm,&defaultDeviceId,defaultInitType); 300 PETSC_DEVICE_CASE_IF_PETSC_DEFINED(HIP,initialize,comm,&defaultDeviceId,defaultInitType); 301 PETSC_DEVICE_CASE_IF_PETSC_DEFINED(SYCL,initialize,comm,&defaultDeviceId,defaultInitType); 302 default: 303 PETSC_DEVICE_DEFAULT_CASE(comm,type); 304 break; 305 } 306 /* defaultInitType and defaultDeviceId now represent what the individual TYPES have decided 307 * to initialize as */ 308 if (*defaultInitType == PETSC_DEVICE_INIT_EAGER) { 309 ierr = PetscInfo1(PETSC_NULLPTR,"Greedily initializing %s PetscDevice\n",PetscDeviceTypes[type]);CHKERRQ(ierr); 310 ierr = PetscDeviceInitializeDefaultDevice_Internal(type,defaultDeviceId);CHKERRQ(ierr); 311 if (defaultView) { 312 PetscViewer vwr; 313 314 ierr = PetscViewerASCIIGetStdout(comm,&vwr);CHKERRQ(ierr); 315 ierr = PetscDeviceView(defaultDevices[type],vwr);CHKERRQ(ierr); 316 } 317 } 318 PetscFunctionReturn(0); 319 } 320 321 /* called from PetscFinalize() do not call yourself! */ 322 static PetscErrorCode PetscDeviceFinalize_Private(void) 323 { 324 PetscErrorCode ierr; 325 326 PetscFunctionBegin; 327 if (PetscDefined(USE_DEBUG)) { 328 PETSC_CONSTEXPR_17 auto PetscDeviceCheckAllDestroyedAfterFinalize = [](){ 329 PetscFunctionBegin; 330 for (const auto &device : defaultDevices) { 331 if (PetscUnlikely(device)) SETERRQ2(PETSC_COMM_WORLD,PETSC_ERR_COR,"Device of type '%s' had reference count %" PetscInt_FMT " and was not fully destroyed during PetscFinalize()",PetscDeviceTypes[device->type],device->refcnt); 332 } 333 PetscFunctionReturn(0); 334 }; 335 /* you might be thinking, why on earth are you registered yet another finalizer in a 336 * function already called during PetscRegisterFinalizeAll()? If this seems stupid it's 337 * because it is. 338 * 339 * The crux of the problem is that the initializer (and therefore the ~finalizer~) of 340 * PetscDeviceContext is guaranteed to run after this finalizer. So if the global context 341 * had a default PetscDevice attached it will hold a reference this routine won't destroy 342 * it. So we need to check that all devices have been destroyed after the global context is 343 * destroyed. In summary: 344 * 345 * 1. This finalizer runs and destroys all devices, except it may not because the global 346 * context may still hold a reference! 347 * 2. The global context finalizer runs and in turn actually destroys the referenced 348 * device. 349 * 3. Our newly added finalizer runs and checks that all is well. 350 */ 351 ierr = PetscRegisterFinalize(PetscDeviceCheckAllDestroyedAfterFinalize);CHKERRQ(ierr); 352 } 353 for (auto &&device : defaultDevices) {ierr = PetscDeviceDestroy(&device);CHKERRQ(ierr);} 354 CHKERRCXX(initializedDevice.fill(false)); 355 PetscFunctionReturn(0); 356 } 357 358 /* begins the init proceeedings for the entire PetscDevice stack. there are 3 stages of 359 * initialization types: 360 1. defaultInitType - how does PetscDevice as a whole expect to initialize? 361 2. subTypeDefaultInitType - how does each PetscDevice implementation expect to initialize? 362 e.g. you may want to blanket disable PetscDevice init (and disable say Kokkos init), but 363 have all CUDA devices still initialize. 364 365 All told the following happens: 366 0. defaultInitType -> LAZY 367 1. Check for log_view/log_summary, if yes defaultInitType -> EAGER 368 2. PetscDevice initializes each sub type with deviceDefaultInitType. 369 2.1 Each enabled PetscDevice sub-type then does the above disable or view check in addition 370 to checking for specific device init. if view or specific device init 371 subTypeDefaultInitType -> EAGER. disabled once again overrides all. 372 */ 373 PetscErrorCode PetscDeviceInitializeFromOptions_Internal(MPI_Comm comm) 374 { 375 PetscBool flg,defaultView = PETSC_FALSE,initializeDeviceContextEagerly = PETSC_FALSE; 376 PetscInt defaultDevice = PETSC_DECIDE; 377 PetscDeviceType deviceContextInitDevice = PETSC_DEVICE_DEFAULT; 378 PetscDeviceInitType defaultInitType; 379 PetscErrorCode ierr; 380 381 PetscFunctionBegin; 382 if (PetscDefined(USE_DEBUG)) { 383 int result; 384 385 ierr = MPI_Comm_compare(comm,PETSC_COMM_WORLD,&result);CHKERRMPI(ierr); 386 /* in order to accurately assign ranks to gpus we need to get the MPI_Comm_rank of the 387 * global space */ 388 if (PetscUnlikely(result != MPI_IDENT)) { 389 char name[MPI_MAX_OBJECT_NAME] = {}; 390 int len; /* unused */ 391 392 ierr = MPI_Comm_get_name(comm,name,&len);CHKERRMPI(ierr); 393 SETERRQ1(comm,PETSC_ERR_MPI,"Default devices being initialized on MPI_Comm '%s' not PETSC_COMM_WORLD",name); 394 } 395 } 396 comm = PETSC_COMM_WORLD; /* from this point on we assume we're on PETSC_COMM_WORLD */ 397 ierr = PetscRegisterFinalize(PetscDeviceFinalize_Private);CHKERRQ(ierr); 398 ierr = PetscOptionsHasName(PETSC_NULLPTR,PETSC_NULLPTR,"-log_view",&flg);CHKERRQ(ierr); 399 if (!flg) { 400 ierr = PetscOptionsHasName(PETSC_NULLPTR,PETSC_NULLPTR,"-log_summary",&flg);CHKERRQ(ierr); 401 } 402 { 403 PetscInt initIdx = flg ? PETSC_DEVICE_INIT_EAGER : PETSC_DEVICE_INIT_LAZY; 404 405 ierr = PetscOptionsBegin(comm,PETSC_NULLPTR,"PetscDevice Options","Sys");CHKERRQ(ierr); 406 ierr = PetscOptionsEList("-device_enable","How (or whether) to initialize PetscDevices","PetscDeviceInitializeFromOptions_Internal()",PetscDeviceInitTypes,3,PetscDeviceInitTypes[initIdx],&initIdx,PETSC_NULLPTR);CHKERRQ(ierr); 407 ierr = PetscOptionsRangeInt("-device_select","Which device to use. Pass " PetscStringize(PETSC_DECIDE) " to have PETSc decide or (given they exist) [0-NUM_DEVICE) for a specific device","PetscDeviceCreate",defaultDevice,&defaultDevice,PETSC_NULLPTR,PETSC_DECIDE,std::numeric_limits<int>::max());CHKERRQ(ierr); 408 ierr = PetscOptionsBool("-device_view","Display device information and assignments (forces eager initialization)",PETSC_NULLPTR,defaultView,&defaultView,&flg);CHKERRQ(ierr); 409 ierr = PetscOptionsEnd();CHKERRQ(ierr); 410 if (initIdx == PETSC_DEVICE_INIT_NONE) { 411 /* disabled all device initialization if devices are globally disabled */ 412 if (PetscUnlikely(defaultDevice != PETSC_DECIDE)) SETERRQ(comm,PETSC_ERR_USER_INPUT,"You have disabled devices but also specified a particular device to use, these options are mutually exlusive"); 413 defaultView = PETSC_FALSE; 414 } else { 415 defaultView = static_cast<decltype(defaultView)>(defaultView && flg); 416 if (defaultView) initIdx = PETSC_DEVICE_INIT_EAGER; 417 } 418 defaultInitType = static_cast<decltype(defaultInitType)>(initIdx); 419 } 420 static_assert((PETSC_DEVICE_INVALID == 0) && (PETSC_DEVICE_MAX < std::numeric_limits<int>::max()),""); 421 for (int i = 1; i < PETSC_DEVICE_MAX; ++i) { 422 const auto deviceType = static_cast<PetscDeviceType>(i); 423 auto initType = defaultInitType; 424 425 ierr = PetscDeviceInitializeTypeFromOptions_Private(comm,deviceType,defaultDevice,defaultView,&initType);CHKERRQ(ierr); 426 if (PetscDeviceConfiguredFor_Internal(deviceType) && (initType == PETSC_DEVICE_INIT_EAGER)) { 427 initializeDeviceContextEagerly = PETSC_TRUE; 428 deviceContextInitDevice = deviceType; 429 } 430 } 431 if (initializeDeviceContextEagerly) { 432 PetscDeviceContext dctx; 433 434 /* somewhat inefficient here as the device context is potentially fully set up twice (once 435 * when retrieved then the second time if setfromoptions makes changes) */ 436 ierr = PetscInfo1(PETSC_NULLPTR,"Eagerly initializing PetscDeviceContext with %s device\n",PetscDeviceTypes[deviceContextInitDevice]);CHKERRQ(ierr); 437 ierr = PetscDeviceContextSetRootDeviceType_Internal(deviceContextInitDevice);CHKERRQ(ierr); 438 ierr = PetscDeviceContextGetCurrentContext(&dctx);CHKERRQ(ierr); 439 ierr = PetscDeviceContextSetFromOptions(comm,"root_",dctx);CHKERRQ(ierr); 440 ierr = PetscDeviceContextSetUp(dctx);CHKERRQ(ierr); 441 } 442 PetscFunctionReturn(0); 443 } 444 445 /* Get the default PetscDevice for a particular type and constructs them if lazily initialized. */ 446 PetscErrorCode PetscDeviceGetDefaultForType_Internal(PetscDeviceType type, PetscDevice *device) 447 { 448 PetscErrorCode ierr; 449 450 PetscFunctionBegin; 451 PetscValidPointer(device,2); 452 ierr = PetscDeviceInitialize(type);CHKERRQ(ierr); 453 *device = defaultDevices[type]; 454 PetscFunctionReturn(0); 455 } 456