1 #include "../../interface/cupmdevice.hpp" 2 #include <algorithm> 3 #include <csetjmp> // for cuda mpi awareness 4 #include <csignal> // SIGSEGV 5 #include <iterator> 6 #include <type_traits> 7 8 #if PetscDefined(USE_LOG) 9 PETSC_INTERN PetscErrorCode PetscLogInitialize(void); 10 #else 11 #define PetscLogInitialize() 0 12 #endif 13 14 namespace Petsc 15 { 16 17 namespace Device 18 { 19 20 namespace CUPM 21 { 22 23 // internal "impls" class for CUPMDevice. Each instance represents a single cupm device 24 template <DeviceType T> 25 class Device<T>::DeviceInternal 26 { 27 const int id_; 28 bool devInitialized_ = false; 29 cupmDeviceProp_t dprop_; // cudaDeviceProp appears to be an actual struct, i.e. you can't 30 // initialize it with nullptr or NULL (i've tried) 31 32 PETSC_CXX_COMPAT_DECL(bool CUPMAwareMPI_()); 33 34 public: 35 // default constructor 36 explicit constexpr DeviceInternal(int dev) noexcept : id_(dev) { } 37 38 // gather all relevant information for a particular device, a cupmDeviceProp_t is 39 // usually sufficient here 40 PETSC_NODISCARD PetscErrorCode initialize() noexcept; 41 PETSC_NODISCARD PetscErrorCode configure() noexcept; 42 PETSC_NODISCARD PetscErrorCode view(PetscViewer) const noexcept; 43 PETSC_NODISCARD PetscErrorCode finalize() noexcept; 44 45 PETSC_NODISCARD auto id() const -> decltype(id_) { return id_; } 46 PETSC_NODISCARD auto initialized() const -> decltype(devInitialized_) { return devInitialized_; } 47 PETSC_NODISCARD auto prop() const -> const decltype(dprop_)& { return dprop_; } 48 49 // factory 50 PETSC_CXX_COMPAT_DECL(std::unique_ptr<DeviceInternal> makeDevice(int i)) 51 { 52 return std::unique_ptr<DeviceInternal>(new DeviceInternal(i)); 53 } 54 }; 55 56 // the goal here is simply to get the cupm backend to create its context, not to do any type of 57 // modification of it, or create objects (since these may be affected by subsequent 58 // configuration changes) 59 template <DeviceType T> 60 PetscErrorCode Device<T>::DeviceInternal::initialize() noexcept 61 { 62 PetscFunctionBegin; 63 if (devInitialized_) PetscFunctionReturn(0); 64 devInitialized_ = true; 65 // need to do this BEFORE device has been set, although if the user 66 // has already done this then we just ignore it 67 if (cupmSetDeviceFlags(cupmDeviceMapHost) == cupmErrorSetOnActiveProcess) { 68 // reset the error if it was cupmErrorSetOnActiveProcess 69 const auto PETSC_UNUSED unused = cupmGetLastError(); 70 } else {PetscCallCUPM(cupmGetLastError());} 71 // cuda 5.0+ will create a context when cupmSetDevice is called 72 if (cupmSetDevice(id_) != cupmErrorDeviceAlreadyInUse) PetscCallCUPM(cupmGetLastError()); 73 // forces cuda < 5.0 to initialize a context 74 PetscCallCUPM(cupmFree(nullptr)); 75 // where is this variable defined and when is it set? who knows! but it is defined and set 76 // at this point. either way, each device must make this check since I guess MPI might not be 77 // aware of all of them? 78 if (use_gpu_aware_mpi) { 79 // For OpenMPI, we could do a compile time check with 80 // "defined(PETSC_HAVE_OMPI_MAJOR_VERSION) && defined(MPIX_CUDA_AWARE_SUPPORT) && 81 // MPIX_CUDA_AWARE_SUPPORT" to see if it is CUDA-aware. However, recent versions of IBM 82 // Spectrum MPI (e.g., 10.3.1) on Summit meet above conditions, but one has to use jsrun 83 // --smpiargs=-gpu to really enable GPU-aware MPI. So we do the check at runtime with a 84 // code that works only with GPU-aware MPI. 85 if (PetscUnlikely(!CUPMAwareMPI_())) { 86 (*PetscErrorPrintf)("PETSc is configured with GPU support, but your MPI is not GPU-aware. For better performance, please use a GPU-aware MPI.\n"); 87 (*PetscErrorPrintf)("If you do not care, add option -use_gpu_aware_mpi 0. To not see the message again, add the option to your .petscrc, OR add it to the env var PETSC_OPTIONS.\n"); 88 (*PetscErrorPrintf)("If you do care, for IBM Spectrum MPI on OLCF Summit, you may need jsrun --smpiargs=-gpu.\n"); 89 (*PetscErrorPrintf)("For OpenMPI, you need to configure it --with-cuda (https://www.open-mpi.org/faq/?category=buildcuda)\n"); 90 (*PetscErrorPrintf)("For MVAPICH2-GDR, you need to set MV2_USE_CUDA=1 (http://mvapich.cse.ohio-state.edu/userguide/gdr/)\n"); 91 (*PetscErrorPrintf)("For Cray-MPICH, you need to set MPICH_RDMA_ENABLED_CUDA=1 (https://www.olcf.ornl.gov/tutorials/gpudirect-mpich-enabled-cuda/)\n"); 92 PETSCABORT(PETSC_COMM_SELF,PETSC_ERR_LIB); 93 } 94 } 95 PetscFunctionReturn(0); 96 } 97 98 template <DeviceType T> 99 PetscErrorCode Device<T>::DeviceInternal::configure() noexcept 100 { 101 PetscFunctionBegin; 102 PetscAssert(devInitialized_,PETSC_COMM_SELF,PETSC_ERR_COR,"Device %d being configured before it was initialized",id_); 103 // why on EARTH nvidia insists on making otherwise informational states into 104 // fully-fledged error codes is beyond me. Why couldn't a pointer to bool argument have 105 // sufficed?!?!?! 106 if (cupmSetDevice(id_) != cupmErrorDeviceAlreadyInUse) PetscCallCUPM(cupmGetLastError()); 107 // need to update the device properties 108 PetscCallCUPM(cupmGetDeviceProperties(&dprop_,id_)); 109 PetscCall(PetscInfo(nullptr,"Configured device %d\n",id_)); 110 PetscFunctionReturn(0); 111 } 112 113 template <DeviceType T> 114 PetscErrorCode Device<T>::DeviceInternal::view(PetscViewer viewer) const noexcept 115 { 116 PetscBool iascii; 117 118 PetscFunctionBegin; 119 PetscAssert(devInitialized_,PETSC_COMM_SELF,PETSC_ERR_COR,"Device %d being viewed before it was initialized or configured",id_); 120 PetscCall(PetscObjectTypeCompare(PetscObjectCast(viewer),PETSCVIEWERASCII,&iascii)); 121 if (iascii) { 122 MPI_Comm comm; 123 PetscMPIInt rank; 124 PetscViewer sviewer; 125 126 PetscCall(PetscObjectGetComm(PetscObjectCast(viewer),&comm)); 127 PetscCallMPI(MPI_Comm_rank(comm,&rank)); 128 PetscCall(PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer)); 129 PetscCall(PetscViewerASCIIPrintf(sviewer,"[%d] device %d: %s\n",rank,id_,dprop_.name)); 130 PetscCall(PetscViewerASCIIPushTab(sviewer)); 131 PetscCall(PetscViewerASCIIPrintf(sviewer,"Compute capability: %d.%d\n",dprop_.major,dprop_.minor)); 132 PetscCall(PetscViewerASCIIPrintf(sviewer,"Multiprocessor Count: %d\n",dprop_.multiProcessorCount)); 133 PetscCall(PetscViewerASCIIPrintf(sviewer,"Maximum Grid Dimensions: %d x %d x %d\n",dprop_.maxGridSize[0],dprop_.maxGridSize[1],dprop_.maxGridSize[2])); 134 PetscCall(PetscViewerASCIIPrintf(sviewer,"Maximum Block Dimensions: %d x %d x %d\n",dprop_.maxThreadsDim[0],dprop_.maxThreadsDim[1],dprop_.maxThreadsDim[2])); 135 PetscCall(PetscViewerASCIIPrintf(sviewer,"Maximum Threads Per Block: %d\n",dprop_.maxThreadsPerBlock)); 136 PetscCall(PetscViewerASCIIPrintf(sviewer,"Warp Size: %d\n",dprop_.warpSize)); 137 PetscCall(PetscViewerASCIIPrintf(sviewer,"Total Global Memory (bytes): %zu\n",dprop_.totalGlobalMem)); 138 PetscCall(PetscViewerASCIIPrintf(sviewer,"Total Constant Memory (bytes): %zu\n",dprop_.totalConstMem)); 139 PetscCall(PetscViewerASCIIPrintf(sviewer,"Shared Memory Per Block (bytes): %zu\n",dprop_.sharedMemPerBlock)); 140 PetscCall(PetscViewerASCIIPrintf(sviewer,"Multiprocessor Clock Rate (KHz): %d\n",dprop_.clockRate)); 141 PetscCall(PetscViewerASCIIPrintf(sviewer,"Memory Clock Rate (KHz): %d\n",dprop_.memoryClockRate)); 142 PetscCall(PetscViewerASCIIPrintf(sviewer,"Memory Bus Width (bits): %d\n",dprop_.memoryBusWidth)); 143 PetscCall(PetscViewerASCIIPrintf(sviewer,"Peak Memory Bandwidth (GB/s): %f\n",2.0*dprop_.memoryClockRate*(dprop_.memoryBusWidth/8)/1.0e6)); 144 PetscCall(PetscViewerASCIIPrintf(sviewer,"Can map host memory: %s\n",dprop_.canMapHostMemory ? "PETSC_TRUE" : "PETSC_FALSE")); 145 PetscCall(PetscViewerASCIIPrintf(sviewer,"Can execute multiple kernels concurrently: %s\n",dprop_.concurrentKernels ? "PETSC_TRUE" : "PETSC_FALSE")); 146 PetscCall(PetscViewerASCIIPopTab(sviewer)); 147 PetscCall(PetscViewerFlush(sviewer)); 148 PetscCall(PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer)); 149 PetscCall(PetscViewerFlush(viewer)); 150 } 151 PetscFunctionReturn(0); 152 } 153 154 static std::jmp_buf cupmMPIAwareJumpBuffer; 155 static bool cupmMPIAwareJumpBufferSet; 156 157 // godspeed to anyone that attempts to call this function 158 void SilenceVariableIsNotNeededAndWillNotBeEmittedWarning_ThisFunctionShouldNeverBeCalled() 159 { 160 PETSCABORT(MPI_COMM_NULL,INT_MAX); 161 if (cupmMPIAwareJumpBufferSet) (void)cupmMPIAwareJumpBuffer; 162 } 163 164 #define CHKCUPMAWARE(...) do { \ 165 cupmError_t cerr_ = __VA_ARGS__; \ 166 if (PetscUnlikely(cerr_ != cupmSuccess)) return false; \ 167 } while (0) 168 169 template <DeviceType T> 170 PETSC_CXX_COMPAT_DEFN(bool Device<T>::DeviceInternal::CUPMAwareMPI_()) 171 { 172 constexpr int bufSize = 2; 173 constexpr int hbuf[bufSize] = {1,0}; 174 int *dbuf = nullptr; 175 constexpr auto bytes = bufSize*sizeof(*dbuf); 176 auto awareness = false; 177 const auto cupmSignalHandler = [](int signal, void *ptr) -> PetscErrorCode { 178 if ((signal == SIGSEGV) && cupmMPIAwareJumpBufferSet) std::longjmp(cupmMPIAwareJumpBuffer,1); 179 return PetscSignalHandlerDefault(signal,ptr); 180 }; 181 182 PetscFunctionBegin; 183 CHKCUPMAWARE(cupmMalloc(reinterpret_cast<void**>(&dbuf),bytes)); 184 CHKCUPMAWARE(cupmMemcpy(dbuf,hbuf,bytes,cupmMemcpyHostToDevice)); 185 PetscCallAbort(PETSC_COMM_SELF,PetscPushSignalHandler(cupmSignalHandler,nullptr)); 186 cupmMPIAwareJumpBufferSet = true; 187 if (setjmp(cupmMPIAwareJumpBuffer)) { 188 // if a segv was triggered in the MPI_Allreduce below, it is very likely due to MPI not 189 // being GPU-aware 190 awareness = false; 191 // control flow up until this point: 192 // 1. CUPMDevice<T>::CUPMDeviceInternal::MPICUPMAware__() 193 // 2. MPI_Allreduce 194 // 3. SIGSEGV 195 // 4. PetscSignalHandler_Private 196 // 5. cupmSignalHandler (lambda function) 197 // 6. here 198 // PetscSignalHandler_Private starts with PetscFunctionBegin and is pushed onto the stack 199 // so we must undo this. This would be most naturally done in cupmSignalHandler, however 200 // the C/C++ standard dictates: 201 // 202 // After invoking longjmp(), non-volatile-qualified local objects should not be accessed if 203 // their values could have changed since the invocation of setjmp(). Their value in this 204 // case is considered indeterminate, and accessing them is undefined behavior. 205 // 206 // so for safety (since we don't know what PetscStackPop may try to read/declare) we do it 207 // outside of the longjmp control flow 208 PetscStackPop; 209 } else if (!MPI_Allreduce(dbuf,dbuf+1,1,MPI_INT,MPI_SUM,PETSC_COMM_SELF)) awareness = true; 210 cupmMPIAwareJumpBufferSet = false; 211 PetscCallAbort(PETSC_COMM_SELF,PetscPopSignalHandler()); 212 CHKCUPMAWARE(cupmFree(dbuf)); 213 PetscFunctionReturn(awareness); 214 } 215 216 #undef CHKCUPMAWARE 217 218 template <DeviceType T> 219 PetscErrorCode Device<T>::DeviceInternal::finalize() noexcept 220 { 221 PetscFunctionBegin; 222 devInitialized_ = false; 223 PetscFunctionReturn(0); 224 } 225 226 template <DeviceType T> 227 PetscErrorCode Device<T>::finalize_() noexcept 228 { 229 PetscFunctionBegin; 230 if (!initialized_) PetscFunctionReturn(0); 231 for (auto&& device : devices_) { 232 if (device) { 233 PetscCall(device->finalize()); 234 device.reset(); 235 } 236 } 237 defaultDevice_ = PETSC_CUPM_DEVICE_NONE; // disabled by default 238 initialized_ = false; 239 PetscFunctionReturn(0); 240 } 241 242 // these functions should be named identically to the option they produce where "CUPMTYPE" and 243 // "cupmtype" are the uppercase and lowercase string versions of the cupm backend respectively 244 template <DeviceType T> 245 PETSC_CXX_COMPAT_DECL(PETSC_CONSTEXPR_14 const char* PetscDevice_CUPMTYPE_Options()) 246 { 247 switch (T) { 248 case DeviceType::CUDA: return "PetscDevice CUDA Options"; 249 case DeviceType::HIP: return "PetscDevice HIP Options"; 250 } 251 PetscUnreachable(); 252 return "PETSC_ERROR_PLIB"; 253 } 254 255 template <DeviceType T> 256 PETSC_CXX_COMPAT_DECL(PETSC_CONSTEXPR_14 const char* device_enable_cupmtype()) 257 { 258 switch (T) { 259 case DeviceType::CUDA: return "-device_enable_cuda"; 260 case DeviceType::HIP: return "-device_enable_hip"; 261 } 262 PetscUnreachable(); 263 return "PETSC_ERROR_PLIB"; 264 } 265 266 template <DeviceType T> 267 PETSC_CXX_COMPAT_DECL(PETSC_CONSTEXPR_14 const char* device_select_cupmtype()) 268 { 269 switch (T) { 270 case DeviceType::CUDA: return "-device_select_cuda"; 271 case DeviceType::HIP: return "-device_select_hip"; 272 } 273 PetscUnreachable(); 274 return "PETSC_ERROR_PLIB"; 275 } 276 277 template <DeviceType T> 278 PETSC_CXX_COMPAT_DECL(PETSC_CONSTEXPR_14 const char* device_view_cupmtype()) 279 { 280 switch (T) { 281 case DeviceType::CUDA: return "-device_view_cuda"; 282 case DeviceType::HIP: return "-device_view_hip"; 283 } 284 PetscUnreachable(); 285 return "PETSC_ERROR_PLIB"; 286 } 287 288 template <DeviceType T> 289 PETSC_CXX_COMPAT_DECL(PETSC_CONSTEXPR_14 const char* CUPM_VISIBLE_DEVICES()) 290 { 291 switch (T) { 292 case DeviceType::CUDA: return "CUDA_VISIBLE_DEVICES"; 293 case DeviceType::HIP: return "HIP_VISIBLE_DEVICES"; 294 } 295 PetscUnreachable(); 296 return "PETSC_ERROR_PLIB"; 297 } 298 299 template <DeviceType T> 300 PetscErrorCode Device<T>::initialize(MPI_Comm comm, PetscInt *defaultDeviceId, PetscDeviceInitType *defaultInitType) noexcept 301 { 302 PetscInt initTypeCUPM = *defaultInitType,id = *defaultDeviceId; 303 PetscBool view = PETSC_FALSE,flg; 304 int ndev; 305 cupmError_t cerr; 306 307 PetscFunctionBegin; 308 if (initialized_) PetscFunctionReturn(0); 309 initialized_ = true; 310 PetscCall(PetscRegisterFinalize(finalize_)); 311 312 { 313 PetscErrorCode ierr; 314 315 // the functions to populate the command line strings are named after the string they return 316 ierr = PetscOptionsBegin(comm,nullptr,PetscDevice_CUPMTYPE_Options<T>(),"Sys");PetscCall(ierr); 317 PetscCall(PetscOptionsEList(device_enable_cupmtype<T>(),"How (or whether) to initialize a device","CUPMDevice<CUPMDeviceType>::initialize()",PetscDeviceInitTypes,3,PetscDeviceInitTypes[initTypeCUPM],&initTypeCUPM,nullptr)); 318 PetscCall(PetscOptionsRangeInt(device_select_cupmtype<T>(),"Which device to use. Pass " PetscStringize(PETSC_DECIDE) " to have PETSc decide or (given they exist) [0-NUM_DEVICE) for a specific device","PetscDeviceCreate",id,&id,nullptr,PETSC_DECIDE,std::numeric_limits<decltype(defaultDevice_)>::max())); 319 PetscCall(PetscOptionsBool(device_view_cupmtype<T>(),"Display device information and assignments (forces eager initialization)",nullptr,view,&view,&flg)); 320 ierr = PetscOptionsEnd();PetscCall(ierr); 321 } 322 323 cerr = cupmGetDeviceCount(&ndev); 324 switch (cerr) { 325 case cupmErrorNoDevice: { 326 PetscBool found; 327 PetscBool ignoreCupmError = PETSC_FALSE; 328 char buf[16]; 329 330 PetscCall(PetscOptionsGetenv(comm,CUPM_VISIBLE_DEVICES<T>(),buf,sizeof(buf),&found)); 331 if (found) { 332 size_t len; 333 334 PetscCall(PetscStrlen(buf,&len)); 335 if (!len || buf[0] == '-') ignoreCupmError = PETSC_TRUE; 336 } 337 id = PETSC_CUPM_DEVICE_NONE; // there are no devices anyway 338 if (ignoreCupmError) { 339 initTypeCUPM = PETSC_DEVICE_INIT_NONE; 340 auto PETSC_UNUSED ignored = cupmGetLastError(); 341 break; 342 } 343 // if we don't outright ignore the error we then drop and check if the user tried to 344 // eagerly initialize the device 345 } 346 case cupmErrorStubLibrary: 347 if (PetscUnlikely((initTypeCUPM == PETSC_DEVICE_INIT_EAGER) || (view && flg))) { 348 const auto name = cupmGetErrorName(cerr); 349 const auto desc = cupmGetErrorString(cerr); 350 const auto backend = cupmName(); 351 SETERRQ(comm,PETSC_ERR_USER_INPUT,"Cannot eagerly initialize %s, as doing so results in %s error %d (%s) : %s",backend,backend,static_cast<PetscErrorCode>(cerr),name,desc); 352 } 353 initTypeCUPM = PETSC_DEVICE_INIT_NONE; 354 {auto PETSC_UNUSED ignored = cupmGetLastError();} 355 break; 356 default: 357 PetscCallCUPM(cerr); 358 break; 359 } 360 361 if (initTypeCUPM == PETSC_DEVICE_INIT_NONE) { 362 if ((id > 0) || (id == PETSC_DECIDE)) id = PETSC_CUPM_DEVICE_NONE; 363 } else { 364 PetscCall(PetscDeviceCheckDeviceCount_Internal(ndev)); 365 if (id == PETSC_DECIDE) { 366 if (ndev) { 367 PetscMPIInt rank; 368 369 PetscCallMPI(MPI_Comm_rank(comm,&rank)); 370 id = rank % ndev; 371 } else id = 0; 372 } 373 view = static_cast<decltype(view)>(view && flg); 374 if (view) initTypeCUPM = PETSC_DEVICE_INIT_EAGER; 375 } 376 377 static_assert(std::is_same<PetscMPIInt,decltype(defaultDevice_)>::value,""); 378 // id is PetscInt, _defaultDevice is int 379 PetscCall(PetscMPIIntCast(id,&defaultDevice_)); 380 if (initTypeCUPM == PETSC_DEVICE_INIT_EAGER) { 381 devices_[defaultDevice_] = DeviceInternal::makeDevice(defaultDevice_); 382 PetscCall(devices_[defaultDevice_]->initialize()); 383 PetscCall(devices_[defaultDevice_]->configure()); 384 if (view) { 385 PetscViewer vwr; 386 387 PetscCall(PetscLogInitialize()); 388 PetscCall(PetscViewerASCIIGetStdout(comm,&vwr)); 389 PetscCall(devices_[defaultDevice_]->view(vwr)); 390 } 391 } 392 393 // record the results of the initialization 394 *defaultInitType = static_cast<PetscDeviceInitType>(initTypeCUPM); 395 *defaultDeviceId = id; 396 PetscFunctionReturn(0); 397 } 398 399 template <DeviceType T> 400 PetscErrorCode Device<T>::getDevice(PetscDevice device, PetscInt id) const noexcept 401 { 402 const auto cerr = static_cast<cupmError_t>(-defaultDevice_); 403 404 PetscFunctionBegin; 405 PetscCheck(defaultDevice_ != PETSC_CUPM_DEVICE_NONE,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Trying to retrieve a %s PetscDevice when it has been disabled",cupmName()); 406 PetscCheck(defaultDevice_ >= 0,PETSC_COMM_SELF,PETSC_ERR_GPU,"Cannot lazily initialize PetscDevice: %s error %d (%s) : %s",cupmName(),static_cast<PetscErrorCode>(cerr),cupmGetErrorName(cerr),cupmGetErrorString(cerr)); 407 if (id == PETSC_DECIDE) id = defaultDevice_; 408 PetscAssert(static_cast<decltype(devices_.size())>(id) < devices_.size(),PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only supports %zu number of devices but trying to get device with id %" PetscInt_FMT,devices_.size(),id); 409 if (devices_[id]) { 410 PetscAssert(id == devices_[id]->id(),PETSC_COMM_SELF,PETSC_ERR_PLIB,"Entry %" PetscInt_FMT " contains device with mismatching id %d",id,devices_[id]->id()); 411 } else devices_[id] = DeviceInternal::makeDevice(id); 412 PetscCall(devices_[id]->initialize()); 413 device->deviceId = devices_[id]->id(); // technically id = _devices[id]->_id here 414 device->ops->createcontext = create_; 415 device->ops->configure = this->configureDevice; 416 device->ops->view = this->viewDevice; 417 PetscFunctionReturn(0); 418 } 419 420 template <DeviceType T> 421 PetscErrorCode Device<T>::configureDevice(PetscDevice device) noexcept 422 { 423 PetscFunctionBegin; 424 PetscCall(devices_[device->deviceId]->configure()); 425 PetscFunctionReturn(0); 426 } 427 428 template <DeviceType T> 429 PetscErrorCode Device<T>::viewDevice(PetscDevice device, PetscViewer viewer) noexcept 430 { 431 PetscFunctionBegin; 432 // now this __shouldn't__ reconfigure the device, but there is a petscinfo call to indicate 433 // it is being reconfigured 434 PetscCall(devices_[device->deviceId]->configure()); 435 PetscCall(devices_[device->deviceId]->view(viewer)); 436 PetscFunctionReturn(0); 437 } 438 439 // explicitly instantiate the classes 440 #if PetscDefined(HAVE_CUDA) 441 template class Device<DeviceType::CUDA>; 442 #endif 443 #if PetscDefined(HAVE_HIP) 444 template class Device<DeviceType::HIP>; 445 #endif 446 447 } // namespace CUPM 448 449 } // namespace Device 450 451 } // namespace Petsc 452