1 #include "../../interface/cupmdevice.hpp" 2 #include <algorithm> 3 #include <csetjmp> // for cuda mpi awareness 4 #include <csignal> // SIGSEGV 5 #include <iterator> 6 #include <type_traits> 7 8 #if PetscDefined(USE_LOG) 9 PETSC_INTERN PetscErrorCode PetscLogInitialize(void); 10 #else 11 #define PetscLogInitialize() 0 12 #endif 13 14 namespace Petsc 15 { 16 17 // internal "impls" class for CUPMDevice. Each instance represents a single cupm device 18 template <CUPMDeviceType T> 19 class CUPMDevice<T>::CUPMDeviceInternal 20 { 21 const int _id; 22 bool _devInitialized = false; 23 cupmDeviceProp_t _dprop; // cudaDeviceProp appears to be an actual struct, i.e. you can't 24 // initialize it with nullptr or NULL (i've tried) 25 26 PETSC_NODISCARD static bool __CUPMAwareMPI() noexcept; 27 28 public: 29 // default constructor 30 explicit constexpr CUPMDeviceInternal(int dev) noexcept : _id(dev) { } 31 32 // gather all relevant information for a particular device, a cupmDeviceProp_t is 33 // usually sufficient here 34 PETSC_NODISCARD PetscErrorCode initialize() noexcept; 35 PETSC_NODISCARD PetscErrorCode configure() noexcept; 36 PETSC_NODISCARD PetscErrorCode view(PetscViewer) const noexcept; 37 PETSC_NODISCARD PetscErrorCode finalize() noexcept; 38 39 PETSC_NODISCARD auto id() const -> decltype(_id) { return _id; } 40 PETSC_NODISCARD auto initialized() const -> decltype(_devInitialized) { return _devInitialized; } 41 PETSC_NODISCARD auto prop() const -> const decltype(_dprop)& { return _dprop; } 42 43 // factory 44 static constexpr std::unique_ptr<CUPMDeviceInternal> makeDevice(int i) noexcept 45 { 46 return std::unique_ptr<CUPMDeviceInternal>(new CUPMDeviceInternal(i)); 47 } 48 }; 49 50 // the goal here is simply to get the cupm backend to create its context, not to do any type of 51 // modification of it, or create objects (since these may be affected by subsequent 52 // configuration changes) 53 template <CUPMDeviceType T> 54 PetscErrorCode CUPMDevice<T>::CUPMDeviceInternal::initialize() noexcept 55 { 56 cupmError_t cerr; 57 58 PetscFunctionBegin; 59 if (_devInitialized) PetscFunctionReturn(0); 60 _devInitialized = true; 61 // need to do this BEFORE device has been set, although if the user 62 // has already done this then we just ignore it 63 if (cupmSetDeviceFlags(cupmDeviceMapHost) == cupmErrorSetOnActiveProcess) { 64 // reset the error if it was cupmErrorSetOnActiveProcess 65 const auto PETSC_UNUSED unused = cupmGetLastError(); 66 } else {CHKERRCUPM(cupmGetLastError());} 67 // cuda 5.0+ will create a context when cupmSetDevice is called 68 if (cupmSetDevice(_id) != cupmErrorDeviceAlreadyInUse) CHKERRCUPM(cupmGetLastError()); 69 // forces cuda < 5.0 to initialize a context 70 cerr = cupmFree(nullptr);CHKERRCUPM(cerr); 71 // where is this variable defined and when is it set? who knows! but it is defined and set 72 // at this point. either way, each device must make this check since I guess MPI might not be 73 // aware of all of them? 74 if (use_gpu_aware_mpi) { 75 // For OpenMPI, we could do a compile time check with 76 // "defined(PETSC_HAVE_OMPI_MAJOR_VERSION) && defined(MPIX_CUDA_AWARE_SUPPORT) && 77 // MPIX_CUDA_AWARE_SUPPORT" to see if it is CUDA-aware. However, recent versions of IBM 78 // Spectrum MPI (e.g., 10.3.1) on Summit meet above conditions, but one has to use jsrun 79 // --smpiargs=-gpu to really enable GPU-aware MPI. So we do the check at runtime with a 80 // code that works only with GPU-aware MPI. 81 if (PetscUnlikely(!__CUPMAwareMPI())) { 82 (*PetscErrorPrintf)("PETSc is configured with GPU support, but your MPI is not GPU-aware. For better performance, please use a GPU-aware MPI.\n"); 83 (*PetscErrorPrintf)("If you do not care, add option -use_gpu_aware_mpi 0. To not see the message again, add the option to your .petscrc, OR add it to the env var PETSC_OPTIONS.\n"); 84 (*PetscErrorPrintf)("If you do care, for IBM Spectrum MPI on OLCF Summit, you may need jsrun --smpiargs=-gpu.\n"); 85 (*PetscErrorPrintf)("For OpenMPI, you need to configure it --with-cuda (https://www.open-mpi.org/faq/?category=buildcuda)\n"); 86 (*PetscErrorPrintf)("For MVAPICH2-GDR, you need to set MV2_USE_CUDA=1 (http://mvapich.cse.ohio-state.edu/userguide/gdr/)\n"); 87 (*PetscErrorPrintf)("For Cray-MPICH, you need to set MPICH_RDMA_ENABLED_CUDA=1 (https://www.olcf.ornl.gov/tutorials/gpudirect-mpich-enabled-cuda/)\n"); 88 PETSCABORT(PETSC_COMM_SELF,PETSC_ERR_LIB); 89 } 90 } 91 PetscFunctionReturn(0); 92 } 93 94 template <CUPMDeviceType T> 95 PetscErrorCode CUPMDevice<T>::CUPMDeviceInternal::configure() noexcept 96 { 97 cupmError_t cerr; 98 PetscErrorCode ierr; 99 100 PetscFunctionBegin; 101 if (PetscUnlikelyDebug(!_devInitialized)) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_COR,"Device %d being configured before it was initialized",_id); 102 // why on EARTH nvidia insists on making otherwise informational states into 103 // fully-fledged error codes is beyond me. Why couldn't a pointer to bool argument have 104 // sufficed?!?!?! 105 if (cupmSetDevice(_id) != cupmErrorDeviceAlreadyInUse) CHKERRCUPM(cupmGetLastError()); 106 // need to update the device properties 107 cerr = cupmGetDeviceProperties(&_dprop,_id);CHKERRCUPM(cerr); 108 ierr = PetscInfo1(nullptr,"Configured device %d\n",_id);CHKERRQ(ierr); 109 PetscFunctionReturn(0); 110 } 111 112 template <CUPMDeviceType T> 113 PetscErrorCode CUPMDevice<T>::CUPMDeviceInternal::view(PetscViewer viewer) const noexcept 114 { 115 MPI_Comm comm; 116 PetscMPIInt rank; 117 PetscBool iascii; 118 PetscErrorCode ierr; 119 120 PetscFunctionBegin; 121 if (PetscUnlikelyDebug(!_devInitialized)) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_COR,"Device %d being viewed before it was initialized or configured",_id); 122 ierr = PetscObjectTypeCompare(reinterpret_cast<PetscObject>(viewer),PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 123 ierr = PetscObjectGetComm(reinterpret_cast<PetscObject>(viewer),&comm);CHKERRQ(ierr); 124 if (PetscUnlikely(!iascii)) SETERRQ(comm,PETSC_ERR_SUP,"Only PetscViewer of type PETSCVIEWERASCII is supported"); 125 ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr); 126 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 127 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] device %d: %s\n",rank,_id,_dprop.name);CHKERRQ(ierr); 128 // flush the assignment information 129 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 130 ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr); 131 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Compute capability: %d.%d\n",_dprop.major,_dprop.minor);CHKERRQ(ierr); 132 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Multiprocessor Count: %d\n",_dprop.multiProcessorCount);CHKERRQ(ierr); 133 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Maximum Grid Dimensions: %d x %d x %d\n",_dprop.maxGridSize[0],_dprop.maxGridSize[1],_dprop.maxGridSize[2]);CHKERRQ(ierr); 134 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Maximum Block Dimensions: %d x %d x %d\n",_dprop.maxThreadsDim[0],_dprop.maxThreadsDim[1],_dprop.maxThreadsDim[2]);CHKERRQ(ierr); 135 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Maximum Threads Per Block: %d\n",_dprop.maxThreadsPerBlock);CHKERRQ(ierr); 136 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Warp Size: %d\n",_dprop.warpSize);CHKERRQ(ierr); 137 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Total Global Memory (bytes): %zu\n",_dprop.totalGlobalMem);CHKERRQ(ierr); 138 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Total Constant Memory (bytes): %zu\n",_dprop.totalConstMem);CHKERRQ(ierr); 139 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Shared Memory Per Block (bytes): %zu\n",_dprop.sharedMemPerBlock);CHKERRQ(ierr); 140 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Multiprocessor Clock Rate (KHz): %d\n",_dprop.clockRate);CHKERRQ(ierr); 141 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Memory Clock Rate (KHz): %d\n",_dprop.memoryClockRate);CHKERRQ(ierr); 142 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Memory Bus Width (bits): %d\n",_dprop.memoryBusWidth);CHKERRQ(ierr); 143 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Peak Memory Bandwidth (GB/s): %f\n",2.0*_dprop.memoryClockRate*(_dprop.memoryBusWidth/8)/1.0e6);CHKERRQ(ierr); 144 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Can map host memory: %s\n",_dprop.canMapHostMemory ? "PETSC_TRUE" : "PETSC_FALSE");CHKERRQ(ierr); 145 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Can execute multiple kernels concurrently: %s\n",_dprop.concurrentKernels ? "PETSC_TRUE" : "PETSC_FALSE");CHKERRQ(ierr); 146 ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr); 147 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 148 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 149 PetscFunctionReturn(0); 150 } 151 152 static std::jmp_buf cupmMPIAwareJumpBuffer; 153 static bool cupmMPIAwareJumpBufferSet; 154 155 // godspeed to anyone that attempts to call this function 156 void SilenceVariableIsNotNeededAndWillNotBeEmittedWarning_ThisFunctionShouldNeverBeCalled() 157 { 158 PETSCABORT(MPI_COMM_NULL,INT_MAX); 159 if (cupmMPIAwareJumpBufferSet) (void)cupmMPIAwareJumpBuffer; 160 } 161 162 #define CHKCUPMAWARE(expr) if (PetscUnlikely((expr) != cupmSuccess)) return false; 163 164 template <CUPMDeviceType T> 165 bool CUPMDevice<T>::CUPMDeviceInternal::__CUPMAwareMPI() noexcept 166 { 167 constexpr int bufSize = 2; 168 constexpr int hbuf[bufSize] = {1,0}; 169 int *dbuf = nullptr; 170 bool awareness = false; 171 cupmError_t cerr; 172 PetscErrorCode ierr; 173 const auto cupmSignalHandler = [](int signal, void *ptr) -> PetscErrorCode { 174 if ((signal == SIGSEGV) && cupmMPIAwareJumpBufferSet) std::longjmp(cupmMPIAwareJumpBuffer,1); 175 return PetscSignalHandlerDefault(signal,ptr); 176 }; 177 178 PetscFunctionBegin; 179 cerr = cupmMalloc(reinterpret_cast<void**>(&dbuf),sizeof(*dbuf)*bufSize);CHKCUPMAWARE(cerr); 180 cerr = cupmMemcpy(dbuf,hbuf,sizeof(*dbuf)*bufSize,cupmMemcpyHostToDevice);CHKCUPMAWARE(cerr); 181 ierr = PetscPushSignalHandler(cupmSignalHandler,nullptr);CHKERRABORT(PETSC_COMM_SELF,ierr); 182 cupmMPIAwareJumpBufferSet = true; 183 if (setjmp(cupmMPIAwareJumpBuffer)) { 184 // if a segv was triggered in the MPI_Allreduce below, it is very likely due to MPI not 185 // being GPU-aware 186 awareness = false; 187 // control flow up until this point: 188 // 1. CUPMDevice<T>::CUPMDeviceInternal::__MPICUPMAware() 189 // 2. MPI_Allreduce 190 // 3. SIGSEGV 191 // 4. PetscSignalHandler_Private 192 // 5. cupmSignalHandler (lambda function) 193 // 6. here 194 // PetscSignalHandler_Private starts with PetscFunctionBegin and is pushed onto the stack 195 // so we must undo this. This would be most naturally done in cupmSignalHandler, however 196 // the C/C++ standard dictates: 197 // 198 // After invoking longjmp(), non-volatile-qualified local objects should not be accessed if 199 // their values could have changed since the invocation of setjmp(). Their value in this 200 // case is considered indeterminate, and accessing them is undefined behavior. 201 // 202 // so for safety (since we don't know what PetscStackPop may try to read/declare) we do it 203 // outside of the longjmp control flow 204 PetscStackPop; 205 } else if (!MPI_Allreduce(dbuf,dbuf+1,1,MPI_INT,MPI_SUM,PETSC_COMM_SELF)) awareness = true; 206 cupmMPIAwareJumpBufferSet = false; 207 ierr = PetscPopSignalHandler();CHKERRABORT(PETSC_COMM_SELF,ierr); 208 cerr = cupmFree(dbuf);CHKCUPMAWARE(cerr); 209 PetscFunctionReturn(awareness); 210 } 211 212 template <CUPMDeviceType T> 213 PetscErrorCode CUPMDevice<T>::CUPMDeviceInternal::finalize() noexcept 214 { 215 PetscFunctionBegin; 216 _devInitialized = false; 217 PetscFunctionReturn(0); 218 } 219 220 template <CUPMDeviceType T> 221 PetscErrorCode CUPMDevice<T>::__finalize() noexcept 222 { 223 PetscFunctionBegin; 224 if (!_initialized) PetscFunctionReturn(0); 225 for (auto&& device : _devices) { 226 if (device) { 227 const auto ierr = device->finalize();CHKERRQ(ierr); 228 device.reset(); 229 } 230 } 231 _defaultDevice = PETSC_CUPM_DEVICE_NONE; // disabled by default 232 _initialized = false; 233 PetscFunctionReturn(0); 234 } 235 236 template <CUPMDeviceType T> 237 static constexpr const std::array<const char*const,4> cupmOptions() noexcept; 238 239 #define CAT_(x,y) x ## y 240 #define CAT(x,y) CAT_(x,y) 241 242 // PetscDefined(HAVE_TYPE) = 0 -> expands to nothing 243 #define CUPM_DECLARE_OPTIONS_IF_PETSC_DEFINED_0(TYPE,type) 244 // PetscDefined(HAVE_TYPE) = 1 -> expands to the function 245 #define CUPM_DECLARE_OPTIONS_IF_PETSC_DEFINED_1(TYPE,type) \ 246 template <> \ 247 constexpr const std::array<const char*const,4> \ 248 cupmOptions<CUPMDeviceType::TYPE>() noexcept \ 249 { \ 250 return { \ 251 "PetscDevice " PetscStringize(TYPE) " Options", \ 252 "-device_enable_" PetscStringize(type), \ 253 "-device_select_" PetscStringize(type), \ 254 "-device_view_" PetscStringize(type) \ 255 }; \ 256 } 257 258 // expands to either the cupmOptions function or nothing, we have to do this with macros 259 // because for all the lovely compile time features c++ provides, the one thing it can't do is 260 // compile time string concatenation. 261 #define CUPM_DECLARE_OPTIONS_IF_PETSC_DEFINED(TYPE,type) \ 262 CAT(CUPM_DECLARE_OPTIONS_IF_PETSC_DEFINED_,PetscDefined(CAT(HAVE_,TYPE)))(TYPE,type) 263 264 CUPM_DECLARE_OPTIONS_IF_PETSC_DEFINED(CUDA,cuda) 265 CUPM_DECLARE_OPTIONS_IF_PETSC_DEFINED(HIP,hip) 266 267 template <CUPMDeviceType T> 268 PetscErrorCode CUPMDevice<T>::initialize(MPI_Comm comm, PetscInt *defaultDeviceId, PetscDeviceInitType *defaultInitType) noexcept 269 { 270 PetscInt initTypeCUPM = *defaultInitType,id = *defaultDeviceId; 271 PetscBool view = PETSC_FALSE,flg; 272 cupmError_t cerr; 273 int ndev; 274 PetscErrorCode ierr; 275 276 PetscFunctionBegin; 277 if (_initialized) PetscFunctionReturn(0); 278 _initialized = true; 279 ierr = PetscRegisterFinalize(__finalize);CHKERRQ(ierr); 280 281 { 282 constexpr const auto options = cupmOptions<T>(); 283 284 ierr = PetscOptionsBegin(comm,nullptr,std::get<0>(options),"Sys");CHKERRQ(ierr); 285 ierr = PetscOptionsEList(std::get<1>(options),"How (or whether) to initialize a device","CUPMDevice<CUPMDeviceType>::initialize()",PetscDeviceInitTypes,3,PetscDeviceInitTypes[initTypeCUPM],&initTypeCUPM,nullptr);CHKERRQ(ierr); 286 ierr = PetscOptionsRangeInt(std::get<2>(options),"Which device to use. Pass " PetscStringize(PETSC_DECIDE) " to have PETSc decide or (given they exist) [0-NUM_DEVICE) for a specific device","PetscDeviceCreate",id,&id,nullptr,PETSC_DECIDE,std::numeric_limits<int>::max());CHKERRQ(ierr); 287 ierr = PetscOptionsBool(std::get<3>(options),"Display device information and assignments (forces eager initialization)",nullptr,view,&view,&flg);CHKERRQ(ierr); 288 ierr = PetscOptionsEnd();CHKERRQ(ierr); 289 } 290 291 // post-process the options and lay the groundwork for initialization if needs be 292 cerr = cupmGetDeviceCount(&ndev); 293 if (PetscUnlikely(cerr == cupmErrorStubLibrary)) { 294 if (PetscUnlikely((initTypeCUPM == PETSC_DEVICE_INIT_EAGER) || (view && flg))) { 295 const auto name = cupmGetErrorName(cerr); 296 const auto desc = cupmGetErrorString(cerr); 297 const auto backend = cupmName(); 298 SETERRQ5(comm,PETSC_ERR_USER_INPUT,"Cannot eagerly initialize %s, as doing so results in %s error %d (%s) : %s",backend,backend,static_cast<PetscErrorCode>(cerr),name,desc); 299 } 300 cerr = cupmGetLastError(); // reset error 301 initTypeCUPM = PETSC_DEVICE_INIT_NONE; 302 } else {CHKERRCUPM(cerr);} 303 304 if (initTypeCUPM == PETSC_DEVICE_INIT_NONE) id = PETSC_CUPM_DEVICE_NONE; 305 else { 306 ierr = PetscDeviceCheckDeviceCount_Internal(ndev);CHKERRQ(ierr); 307 if (id == PETSC_DECIDE) { 308 if (ndev) { 309 PetscMPIInt rank; 310 311 ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr); 312 id = rank % ndev; 313 } else id = 0; 314 } 315 view = static_cast<decltype(view)>(view && flg); 316 if (view) initTypeCUPM = PETSC_DEVICE_INIT_EAGER; 317 } 318 319 // id is PetscInt, _defaultDevice is int 320 static_assert(std::is_same<PetscMPIInt,decltype(_defaultDevice)>::value,""); 321 ierr = PetscMPIIntCast(id,&_defaultDevice);CHKERRQ(ierr); 322 if (initTypeCUPM == PETSC_DEVICE_INIT_EAGER) { 323 _devices[_defaultDevice] = CUPMDeviceInternal::makeDevice(_defaultDevice); 324 ierr = _devices[_defaultDevice]->initialize();CHKERRQ(ierr); 325 ierr = _devices[_defaultDevice]->configure();CHKERRQ(ierr); 326 if (view) { 327 PetscViewer vwr; 328 329 ierr = PetscLogInitialize();CHKERRQ(ierr); 330 ierr = PetscViewerASCIIGetStdout(comm,&vwr);CHKERRQ(ierr); 331 ierr = _devices[_defaultDevice]->view(vwr);CHKERRQ(ierr); 332 } 333 } 334 335 // record the results of the initialization 336 *defaultInitType = static_cast<PetscDeviceInitType>(initTypeCUPM); 337 *defaultDeviceId = id; 338 PetscFunctionReturn(0); 339 } 340 341 template <CUPMDeviceType T> 342 PetscErrorCode CUPMDevice<T>::getDevice(PetscDevice device, PetscInt id) const noexcept 343 { 344 PetscErrorCode ierr; 345 346 PetscFunctionBegin; 347 if (PetscUnlikelyDebug(_defaultDevice == PETSC_CUPM_DEVICE_NONE)) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Trying to retrieve a %s PetscDevice when it has been disabled",cupmName()); 348 if (id == PETSC_DECIDE) id = _defaultDevice; 349 if (PetscUnlikelyDebug(static_cast<std::size_t>(id) >= _devices.size())) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only supports %zu number of devices but trying to get device with id %" PetscInt_FMT,_devices.size(),id); 350 if (_devices[id]) { 351 if (PetscUnlikelyDebug(id != _devices[id]->id())) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Entry %" PetscInt_FMT " contains device with mismatching id %d",id,_devices[id]->id()); 352 } else _devices[id] = CUPMDeviceInternal::makeDevice(id); 353 ierr = _devices[id]->initialize();CHKERRQ(ierr); 354 device->deviceId = _devices[id]->id(); // technically id = _devices[id]->_id here 355 device->ops->createcontext = _create; 356 device->ops->configure = this->configureDevice; 357 device->ops->view = this->viewDevice; 358 PetscFunctionReturn(0); 359 } 360 361 template <CUPMDeviceType T> 362 PetscErrorCode CUPMDevice<T>::configureDevice(PetscDevice device) noexcept 363 { 364 PetscErrorCode ierr; 365 366 PetscFunctionBegin; 367 ierr = _devices[device->deviceId]->configure();CHKERRQ(ierr); 368 PetscFunctionReturn(0); 369 } 370 371 template <CUPMDeviceType T> 372 PetscErrorCode CUPMDevice<T>::viewDevice(PetscDevice device, PetscViewer viewer) noexcept 373 { 374 PetscErrorCode ierr; 375 376 PetscFunctionBegin; 377 // now this __shouldn't__ reconfigure the device, but there is a petscinfo call to indicate 378 // it is being reconfigured 379 ierr = _devices[device->deviceId]->configure();CHKERRQ(ierr); 380 ierr = _devices[device->deviceId]->view(viewer);CHKERRQ(ierr); 381 PetscFunctionReturn(0); 382 } 383 384 // explicitly instantiate the classes 385 #if PetscDefined(HAVE_CUDA) 386 template class CUPMDevice<CUPMDeviceType::CUDA>; 387 #endif 388 #if PetscDefined(HAVE_HIP) 389 template class CUPMDevice<CUPMDeviceType::HIP>; 390 #endif 391 392 } // namespace Petsc 393