1 #include "cupmdevice.hpp" /* I "petscdevice.h" */ 2 3 using namespace Petsc; 4 5 /* note to anyone adding more classes, the name must be ALL_CAPS_SHORT_NAME + Device exactly to 6 * be picked up by the switch-case macros below. */ 7 #if PetscDefined(HAVE_CUDA) 8 static CUPMDevice<CUPMDeviceType::CUDA> CUDADevice(PetscDeviceContextCreate_CUDA); 9 #endif 10 #if PetscDefined(HAVE_HIP) 11 static CUPMDevice<CUPMDeviceType::HIP> HIPDevice(PetscDeviceContextCreate_HIP); 12 #endif 13 14 const char *const PetscDeviceTypes[] = { 15 "invalid", 16 "cuda", 17 "hip", 18 "max", 19 "PetscDeviceType", 20 "PETSC_DEVICE_", 21 PETSC_NULLPTR 22 }; 23 24 const char *const PetscDeviceInitTypes[] = { 25 "none", 26 "lazy", 27 "eager", 28 "PetscDeviceInitType", 29 "PETSC_DEVICE_INIT_", 30 PETSC_NULLPTR 31 }; 32 static_assert( 33 sizeof(PetscDeviceInitTypes)/sizeof(*PetscDeviceInitTypes) == 6, 34 "Must change CUPMDevice<T>::initialize number of enum values in -device_enable_cupm to match!" 35 ); 36 37 #define PETSC_DEVICE_DEFAULT_CASE(comm,type) \ 38 SETERRQ1((comm),PETSC_ERR_PLIB, \ 39 "PETSc was seemingly configured for PetscDeviceType %s but " \ 40 "we've fallen through all cases in a switch", \ 41 PetscDeviceTypes[type]) 42 43 #define CAT_(a,...) a ## __VA_ARGS__ 44 #define CAT(a,...) CAT_(a,__VA_ARGS__) 45 46 #define PETSC_DEVICE_CASE_IF_PETSC_DEFINED__0(IMPLS,func,...) 47 #define PETSC_DEVICE_CASE_IF_PETSC_DEFINED__1(IMPLS,func,...) \ 48 case CAT(PETSC_DEVICE_,IMPLS): \ 49 { \ 50 auto ierr = CAT(IMPLS,Device).func(__VA_ARGS__);CHKERRQ(ierr); \ 51 break; \ 52 } 53 54 #define PETSC_DEVICE_CASE_IF_PETSC_DEFINED_(IMPLS,...) \ 55 CAT(PETSC_DEVICE_CASE_IF_PETSC_DEFINED__,PetscDefined(CAT(HAVE_,IMPLS)))(IMPLS,__VA_ARGS__) 56 57 #define PETSC_DEVICE_CASE_IF_PETSC_DEFINED(IMPLS,...) \ 58 PETSC_DEVICE_CASE_IF_PETSC_DEFINED_(IMPLS,__VA_ARGS__) 59 60 #define PETSC_DEVICE_UNUSED_IF_NO_DEVICE(var) (void)(var) 61 62 /*@C 63 PetscDeviceCreate - Get a new handle for a particular device type 64 65 Not Collective, Possibly Synchronous 66 67 Input Parameter: 68 . type - The type of PetscDevice 69 . devid - The numeric ID# of the device (pass PETSC_DECIDE to assign automatically) 70 71 Output Parameter: 72 . device - The PetscDevice 73 74 Notes: 75 This routine may initialize PetscDevice. If this is the case, this will most likely cause 76 some sort of device synchronization. 77 78 devid is what you might pass to cudaSetDevice() for example. 79 80 Level: beginner 81 82 .seealso: PetscDevice, PetscDeviceInitType, PetscDeviceInitialize(), 83 PetscDeviceInitialized(), PetscDeviceConfigure(), PetscDeviceView(), PetscDeviceDestroy() 84 @*/ 85 PetscErrorCode PetscDeviceCreate(PetscDeviceType type, PetscInt devid, PetscDevice *device) 86 { 87 static PetscInt PetscDeviceCounter = 0; 88 PetscDevice dev; 89 PetscErrorCode ierr; 90 91 PetscFunctionBegin; 92 PetscValidDeviceType(type,1); 93 PetscValidPointer(device,3); 94 ierr = PetscDeviceInitializePackage();CHKERRQ(ierr); 95 ierr = PetscNew(&dev);CHKERRQ(ierr); 96 dev->id = PetscDeviceCounter++; 97 dev->type = type; 98 dev->refcnt = 1; 99 /* if you are adding a device, you also need to add it's initialization in 100 PetscDeviceInitializeTypeFromOptions_Private() below */ 101 switch (type) { 102 PETSC_DEVICE_CASE_IF_PETSC_DEFINED(CUDA,getDevice,dev,devid); 103 PETSC_DEVICE_CASE_IF_PETSC_DEFINED(HIP,getDevice,dev,devid); 104 default: 105 PETSC_DEVICE_UNUSED_IF_NO_DEVICE(devid); 106 PETSC_DEVICE_DEFAULT_CASE(PETSC_COMM_SELF,type); 107 } 108 *device = dev; 109 PetscFunctionReturn(0); 110 } 111 112 /*@C 113 PetscDeviceDestroy - Free a PetscDevice 114 115 Not Collective, Asynchronous 116 117 Input Parameter: 118 . device - The PetscDevice 119 120 Level: beginner 121 122 .seealso: PetscDevice, PetscDeviceCreate(), PetscDeviceConfigure(), PetscDeviceView() 123 @*/ 124 PetscErrorCode PetscDeviceDestroy(PetscDevice *device) 125 { 126 PetscErrorCode ierr; 127 128 PetscFunctionBegin; 129 if (!*device) PetscFunctionReturn(0); 130 PetscValidDevice(*device,1); 131 ierr = PetscDeviceDereference_Internal(*device);CHKERRQ(ierr); 132 if ((*device)->refcnt) { 133 *device = PETSC_NULLPTR; 134 PetscFunctionReturn(0); 135 } 136 ierr = PetscFree((*device)->data);CHKERRQ(ierr); 137 ierr = PetscFree(*device);CHKERRQ(ierr); 138 PetscFunctionReturn(0); 139 } 140 141 /*@C 142 PetscDeviceConfigure - Configure a particular PetscDevice 143 144 Not Collective, Asynchronous 145 146 Input Parameter: 147 . device - The PetscDevice to configure 148 149 Notes: 150 The user should not assume that this is a cheap operation 151 152 Level: beginner 153 154 .seealso: PetscDevice, PetscDeviceCreate(), PetscDeviceView(), PetscDeviceDestroy() 155 @*/ 156 PetscErrorCode PetscDeviceConfigure(PetscDevice device) 157 { 158 PetscErrorCode ierr; 159 160 PetscFunctionBegin; 161 PetscValidDevice(device,1); 162 if (PetscDefined(USE_DEBUG)) { 163 /* if no available configuration is available, this cascades all the way down to default 164 and error */ 165 switch (device->type) { 166 case PETSC_DEVICE_CUDA: if (PetscDefined(HAVE_CUDA)) break; 167 case PETSC_DEVICE_HIP: if (PetscDefined(HAVE_HIP)) break; 168 default: 169 PETSC_DEVICE_DEFAULT_CASE(PETSC_COMM_SELF,device->type); 170 break; 171 } 172 } 173 ierr = (*device->ops->configure)(device);CHKERRQ(ierr); 174 PetscFunctionReturn(0); 175 } 176 177 /*@C 178 PetscDeviceView - View a PetscDevice 179 180 Collective on viewer, Asynchronous 181 182 Input Parameter: 183 + device - The PetscDevice to view 184 - viewer - The PetscViewer to view the device with (NULL for PETSC_VIEWER_STDOUT_WORLD) 185 186 Level: beginner 187 188 .seealso: PetscDevice, PetscDeviceCreate(), PetscDeviceConfigure(), PetscDeviceDestroy() 189 @*/ 190 PetscErrorCode PetscDeviceView(PetscDevice device, PetscViewer viewer) 191 { 192 PetscErrorCode ierr; 193 194 PetscFunctionBegin; 195 PetscValidDevice(device,1); 196 if (!viewer) {ierr = PetscViewerASCIIGetStdout(PETSC_COMM_WORLD,&viewer);CHKERRQ(ierr);} 197 PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2); 198 ierr = (*device->ops->view)(device,viewer);CHKERRQ(ierr); 199 PetscFunctionReturn(0); 200 } 201 202 static std::array<bool,PETSC_DEVICE_MAX> initializedDevice = {}; 203 static std::array<PetscDevice,PETSC_DEVICE_MAX> defaultDevices = {}; 204 static_assert(initializedDevice.size() == defaultDevices.size(),""); 205 206 /*@C 207 PetscDeviceInitialize - Initialize PetscDevice 208 209 Not Collective, Possibly Synchronous 210 211 Input Parameter: 212 . type - The PetscDeviceType to initialize 213 214 Notes: 215 Eagerly initializes the corresponding PetscDeviceType if needed. 216 217 Level: beginner 218 219 .seealso: PetscDevice, PetscDeviceInitType, PetscDeviceInitialized(), PetscDeviceCreate(), PetscDeviceDestroy() 220 @*/ 221 PetscErrorCode PetscDeviceInitialize(PetscDeviceType type) 222 { 223 PetscErrorCode ierr; 224 225 PetscFunctionBegin; 226 PetscValidDeviceType(type,1); 227 ierr = PetscDeviceInitializeDefaultDevice_Internal(type,PETSC_DECIDE);CHKERRQ(ierr); 228 PetscFunctionReturn(0); 229 } 230 231 /*@C 232 PetscDeviceInitialized - Determines whether PetscDevice is initialized for a particular 233 PetscDeviceType 234 235 Not Collective, Asynchronous 236 237 Input Parameter: 238 . type - The PetscDeviceType to check 239 240 Output Parameter: 241 . [return value] - PETSC_TRUE if type is initialized, PETSC_FALSE otherwise 242 243 Notes: 244 If one has not configured PETSc for a particular PetscDeviceType then this routine will 245 return PETSC_FALSE for that PetscDeviceType. 246 247 Level: beginner 248 249 .seealso: PetscDevice, PetscDeviceInitType, PetscDeviceInitialize(), PetscDeviceCreate(), PetscDeviceDestroy() 250 @*/ 251 PetscBool PetscDeviceInitialized(PetscDeviceType type) 252 { 253 return static_cast<PetscBool>(PetscDeviceConfiguredFor_Internal(type) && initializedDevice[type]); 254 } 255 256 /* Actual intialization function; any functions claiming to initialize PetscDevice or 257 * PetscDeviceContext will have to run through this one */ 258 PetscErrorCode PetscDeviceInitializeDefaultDevice_Internal(PetscDeviceType type, PetscInt defaultDeviceId) 259 { 260 PetscErrorCode ierr; 261 262 PetscFunctionBegin; 263 PetscValidDeviceType(type,1); 264 if (PetscLikely(PetscDeviceInitialized(type))) PetscFunctionReturn(0); 265 if (PetscUnlikelyDebug(defaultDevices[type])) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MEM,"Trying to overwrite existing default device of type %s",PetscDeviceTypes[type]); 266 ierr = PetscDeviceCreate(type,defaultDeviceId,&defaultDevices[type]);CHKERRQ(ierr); 267 ierr = PetscDeviceConfigure(defaultDevices[type]);CHKERRQ(ierr); 268 /* the default devices are all automatically "referenced" at least once, otherwise the 269 * reference counting is off for them. We could alternatively increase the reference count 270 * when they are retrieved but that is a lot more brittle; what's to stop someone from doing 271 * the following? 272 273 for (int i = 0; i < 10000; ++i) auto device = PetscDeviceDefault_Internal(); 274 */ 275 initializedDevice[type] = true; 276 PetscFunctionReturn(0); 277 } 278 279 static PetscErrorCode PetscDeviceInitializeTypeFromOptions_Private(MPI_Comm comm, PetscDeviceType type, PetscInt defaultDeviceId, PetscBool defaultView, PetscDeviceInitType *defaultInitType) 280 { 281 PetscErrorCode ierr; 282 283 PetscFunctionBegin; 284 if (!PetscDeviceConfiguredFor_Internal(type)) { 285 ierr = PetscInfo1(PETSC_NULLPTR,"PetscDeviceType %s not supported\n",PetscDeviceTypes[type]);CHKERRQ(ierr); 286 defaultDevices[type] = PETSC_NULLPTR; 287 PetscFunctionReturn(0); 288 } 289 ierr = PetscInfo1(PETSC_NULLPTR,"PetscDeviceType %s supported, initializing\n",PetscDeviceTypes[type]);CHKERRQ(ierr); 290 /* ugly switch needed to pick the right global variable... could maybe do this as a union? */ 291 switch (type) { 292 PETSC_DEVICE_CASE_IF_PETSC_DEFINED(CUDA,initialize,comm,&defaultDeviceId,defaultInitType); 293 PETSC_DEVICE_CASE_IF_PETSC_DEFINED(HIP,initialize,comm,&defaultDeviceId,defaultInitType); 294 default: 295 PETSC_DEVICE_DEFAULT_CASE(comm,type); 296 break; 297 } 298 /* defaultInitType and defaultDeviceId now represent what the individual TYPES have decided 299 * to initialize as */ 300 if (*defaultInitType == PETSC_DEVICE_INIT_EAGER) { 301 ierr = PetscInfo1(PETSC_NULLPTR,"Greedily initializing %s PetscDevice\n",PetscDeviceTypes[type]);CHKERRQ(ierr); 302 ierr = PetscDeviceInitializeDefaultDevice_Internal(type,defaultDeviceId);CHKERRQ(ierr); 303 if (defaultView) { 304 PetscViewer vwr; 305 306 ierr = PetscViewerASCIIGetStdout(comm,&vwr);CHKERRQ(ierr); 307 ierr = PetscDeviceView(defaultDevices[type],vwr);CHKERRQ(ierr); 308 } 309 } 310 PetscFunctionReturn(0); 311 } 312 313 /* called from PetscFinalize() do not call yourself! */ 314 static PetscErrorCode PetscDeviceFinalize_Private(void) 315 { 316 PetscErrorCode ierr; 317 318 PetscFunctionBegin; 319 if (PetscDefined(USE_DEBUG)) { 320 PETSC_CONSTEXPR_17 auto PetscDeviceCheckAllDestroyedAfterFinalize = [](){ 321 PetscFunctionBegin; 322 for (const auto &device : defaultDevices) { 323 if (PetscUnlikely(device)) SETERRQ2(PETSC_COMM_WORLD,PETSC_ERR_COR,"Device of type '%s' had reference count %" PetscInt_FMT " and was not fully destroyed during PetscFinalize()",PetscDeviceTypes[device->type],device->refcnt); 324 } 325 PetscFunctionReturn(0); 326 }; 327 /* you might be thinking, why on earth are you registered yet another finalizer in a 328 * function already called during PetscRegisterFinalizeAll()? If this seems stupid it's 329 * because it is. 330 * 331 * The crux of the problem is that the initializer (and therefore the ~finalizer~) of 332 * PetscDeviceContext is guaranteed to run after this finalizer. So if the global context 333 * had a default PetscDevice attached it will hold a reference this routine won't destroy 334 * it. So we need to check that all devices have been destroyed after the global context is 335 * destroyed. In summary: 336 * 337 * 1. This finalizer runs and destroys all devices, except it may not because the global 338 * context may still hold a reference! 339 * 2. The global context finalizer runs and in turn actually destroys the referenced 340 * device. 341 * 3. Our newly added finalizer runs and checks that all is well. 342 */ 343 ierr = PetscRegisterFinalize(PetscDeviceCheckAllDestroyedAfterFinalize);CHKERRQ(ierr); 344 } 345 for (auto &&device : defaultDevices) {ierr = PetscDeviceDestroy(&device);CHKERRQ(ierr);} 346 CHKERRCXX(initializedDevice.fill(false)); 347 PetscFunctionReturn(0); 348 } 349 350 /* begins the init proceeedings for the entire PetscDevice stack. there are 3 stages of 351 * initialization types: 352 1. defaultInitType - how does PetscDevice as a whole expect to initialize? 353 2. subTypeDefaultInitType - how does each PetscDevice implementation expect to initialize? 354 e.g. you may want to blanket disable PetscDevice init (and disable say Kokkos init), but 355 have all CUDA devices still initialize. 356 357 All told the following happens: 358 0. defaultInitType -> LAZY 359 1. Check for log_view/log_summary, if yes defaultInitType -> EAGER 360 2. PetscDevice initializes each sub type with deviceDefaultInitType. 361 2.1 Each enabled PetscDevice sub-type then does the above disable or view check in addition 362 to checking for specific device init. if view or specific device init 363 subTypeDefaultInitType -> EAGER. disabled once again overrides all. 364 */ 365 PetscErrorCode PetscDeviceInitializeFromOptions_Internal(MPI_Comm comm) 366 { 367 PetscBool flg,defaultView = PETSC_FALSE,initializeDeviceContextEagerly = PETSC_FALSE; 368 PetscInt defaultDevice = PETSC_DECIDE; 369 PetscDeviceType deviceContextInitDevice = PETSC_DEVICE_DEFAULT; 370 PetscDeviceInitType defaultInitType; 371 PetscErrorCode ierr; 372 373 PetscFunctionBegin; 374 if (PetscDefined(USE_DEBUG)) { 375 int result; 376 377 ierr = MPI_Comm_compare(comm,PETSC_COMM_WORLD,&result);CHKERRMPI(ierr); 378 /* in order to accurately assign ranks to gpus we need to get the MPI_Comm_rank of the 379 * global space */ 380 if (PetscUnlikely(result != MPI_IDENT)) { 381 char name[MPI_MAX_OBJECT_NAME] = {}; 382 int len; /* unused */ 383 384 ierr = MPI_Comm_get_name(comm,name,&len);CHKERRMPI(ierr); 385 SETERRQ1(comm,PETSC_ERR_MPI,"Default devices being initialized on MPI_Comm '%s' not PETSC_COMM_WORLD",name); 386 } 387 } 388 comm = PETSC_COMM_WORLD; /* from this point on we assume we're on PETSC_COMM_WORLD */ 389 ierr = PetscRegisterFinalize(PetscDeviceFinalize_Private);CHKERRQ(ierr); 390 ierr = PetscOptionsHasName(PETSC_NULLPTR,PETSC_NULLPTR,"-log_view",&flg);CHKERRQ(ierr); 391 if (!flg) { 392 ierr = PetscOptionsHasName(PETSC_NULLPTR,PETSC_NULLPTR,"-log_summary",&flg);CHKERRQ(ierr); 393 } 394 { 395 PetscInt initIdx = flg ? PETSC_DEVICE_INIT_EAGER : PETSC_DEVICE_INIT_LAZY; 396 397 ierr = PetscOptionsBegin(comm,PETSC_NULLPTR,"PetscDevice Options","Sys");CHKERRQ(ierr); 398 ierr = PetscOptionsEList("-device_enable","How (or whether) to initialize PetscDevices","PetscDeviceInitializeFromOptions_Internal()",PetscDeviceInitTypes,3,PetscDeviceInitTypes[initIdx],&initIdx,PETSC_NULLPTR);CHKERRQ(ierr); 399 ierr = PetscOptionsRangeInt("-device_select","Which device to use. Pass " PetscStringize(PETSC_DECIDE) " to have PETSc decide or (given they exist) [0-NUM_DEVICE) for a specific device","PetscDeviceCreate",defaultDevice,&defaultDevice,PETSC_NULLPTR,PETSC_DECIDE,std::numeric_limits<int>::max());CHKERRQ(ierr); 400 ierr = PetscOptionsBool("-device_view","Display device information and assignments (forces eager initialization)",PETSC_NULLPTR,defaultView,&defaultView,&flg);CHKERRQ(ierr); 401 ierr = PetscOptionsEnd();CHKERRQ(ierr); 402 if (initIdx == PETSC_DEVICE_INIT_NONE) { 403 /* disabled all device initialization if devices are globally disabled */ 404 if (PetscUnlikely(defaultDevice != PETSC_DECIDE)) SETERRQ(comm,PETSC_ERR_USER_INPUT,"You have disabled devices but also specified a particular device to use, these options are mutually exlusive"); 405 defaultView = PETSC_FALSE; 406 } else { 407 defaultView = static_cast<decltype(defaultView)>(defaultView && flg); 408 if (defaultView) initIdx = PETSC_DEVICE_INIT_EAGER; 409 } 410 defaultInitType = static_cast<decltype(defaultInitType)>(initIdx); 411 } 412 static_assert((PETSC_DEVICE_INVALID == 0) && (PETSC_DEVICE_MAX < std::numeric_limits<int>::max()),""); 413 for (int i = 1; i < PETSC_DEVICE_MAX; ++i) { 414 const auto deviceType = static_cast<PetscDeviceType>(i); 415 auto initType = defaultInitType; 416 417 ierr = PetscDeviceInitializeTypeFromOptions_Private(comm,deviceType,defaultDevice,defaultView,&initType);CHKERRQ(ierr); 418 if (PetscDeviceConfiguredFor_Internal(deviceType) && (initType == PETSC_DEVICE_INIT_EAGER)) { 419 initializeDeviceContextEagerly = PETSC_TRUE; 420 deviceContextInitDevice = deviceType; 421 } 422 } 423 if (initializeDeviceContextEagerly) { 424 PetscDeviceContext dctx; 425 426 /* somewhat inefficient here as the device context is potentially fully set up twice (once 427 * when retrieved then the second time if setfromoptions makes changes) */ 428 ierr = PetscInfo1(PETSC_NULLPTR,"Eagerly initializing PetscDeviceContext with %s device\n",PetscDeviceTypes[deviceContextInitDevice]);CHKERRQ(ierr); 429 ierr = PetscDeviceContextSetRootDeviceType_Internal(deviceContextInitDevice);CHKERRQ(ierr); 430 ierr = PetscDeviceContextGetCurrentContext(&dctx);CHKERRQ(ierr); 431 ierr = PetscDeviceContextSetFromOptions(comm,"root_",dctx);CHKERRQ(ierr); 432 ierr = PetscDeviceContextSetUp(dctx);CHKERRQ(ierr); 433 } 434 PetscFunctionReturn(0); 435 } 436 437 /* Get the default PetscDevice for a particular type and constructs them if lazily initialized. */ 438 PetscErrorCode PetscDeviceGetDefaultForType_Internal(PetscDeviceType type, PetscDevice *device) 439 { 440 PetscErrorCode ierr; 441 442 PetscFunctionBegin; 443 PetscValidPointer(device,2); 444 ierr = PetscDeviceInitialize(type);CHKERRQ(ierr); 445 *device = defaultDevices[type]; 446 PetscFunctionReturn(0); 447 } 448