1 #include <petsc/private/deviceimpl.h> 2 #include <petsc/private/kokkosimpl.hpp> 3 #include <petscpkg_version.h> 4 #include <petsc_kokkos.hpp> 5 6 PetscBool PetscKokkosInitialized = PETSC_FALSE; 7 PetscScalar *PetscScalarPool = nullptr; 8 PetscInt PetscScalarPoolSize = 0; 9 10 Kokkos::DefaultExecutionSpace *PetscKokkosExecutionSpacePtr = nullptr; 11 12 PetscErrorCode PetscKokkosFinalize_Private(void) 13 { 14 PetscFunctionBegin; 15 PetscCallCXX(delete PetscKokkosExecutionSpacePtr); 16 PetscCallCXX(Kokkos::kokkos_free(PetscScalarPool)); 17 PetscScalarPoolSize = 0; 18 if (PetscBeganKokkos) { 19 PetscCallCXX(Kokkos::finalize()); 20 PetscBeganKokkos = PETSC_FALSE; 21 } 22 PetscFunctionReturn(PETSC_SUCCESS); 23 } 24 25 PetscErrorCode PetscKokkosIsInitialized_Private(PetscBool *isInitialized) 26 { 27 PetscFunctionBegin; 28 *isInitialized = Kokkos::is_initialized() ? PETSC_TRUE : PETSC_FALSE; 29 PetscFunctionReturn(PETSC_SUCCESS); 30 } 31 32 /* Initialize Kokkos if not yet */ 33 PetscErrorCode PetscKokkosInitializeCheck(void) 34 { 35 PetscFunctionBegin; 36 if (!Kokkos::is_initialized()) { 37 #if PETSC_PKG_KOKKOS_VERSION_GE(3, 7, 0) 38 auto args = Kokkos::InitializationSettings(); 39 #else 40 auto args = Kokkos::InitArguments{}; /* use default constructor */ 41 #endif 42 43 #if (defined(KOKKOS_ENABLE_CUDA) && PetscDefined(HAVE_CUDA)) || (defined(KOKKOS_ENABLE_HIP) && PetscDefined(HAVE_HIP)) || (defined(KOKKOS_ENABLE_SYCL) && PetscDefined(HAVE_SYCL)) 44 /* Kokkos does not support CUDA and HIP at the same time (but we do :)) */ 45 PetscDevice device; 46 PetscInt deviceId; 47 PetscCall(PetscDeviceCreate(PETSC_DEVICE_DEFAULT(), PETSC_DECIDE, &device)); 48 PetscCall(PetscDeviceGetDeviceId(device, &deviceId)); 49 PetscCall(PetscDeviceDestroy(&device)); 50 #if PETSC_PKG_KOKKOS_VERSION_GE(4, 0, 0) 51 // if device_id is not set, and no gpus have been found, kokkos will use CPU 52 if (deviceId >= 0) args.set_device_id(static_cast<int>(deviceId)); 53 #elif PETSC_PKG_KOKKOS_VERSION_GE(3, 7, 0) 54 args.set_device_id(static_cast<int>(deviceId)); 55 #else 56 PetscCall(PetscMPIIntCast(deviceId, &args.device_id)); 57 #endif 58 #endif 59 60 #if PETSC_PKG_KOKKOS_VERSION_GE(3, 7, 0) 61 args.set_disable_warnings(!PetscDefined(HAVE_KOKKOS_INIT_WARNINGS)); 62 #else 63 args.disable_warnings = !PetscDefined(HAVE_KOKKOS_INIT_WARNINGS); 64 #endif 65 66 /* To use PetscNumOMPThreads, one has to configure petsc --with-openmp. 67 Otherwise, let's keep the default value (-1) of args.num_threads. 68 */ 69 #if defined(KOKKOS_ENABLE_OPENMP) && PetscDefined(HAVE_OPENMP) 70 #if PETSC_PKG_KOKKOS_VERSION_GE(3, 7, 0) 71 args.set_num_threads(PetscNumOMPThreads); 72 #else 73 args.num_threads = PetscNumOMPThreads; 74 #endif 75 #endif 76 PetscCallCXX(Kokkos::initialize(args)); 77 PetscBeganKokkos = PETSC_TRUE; 78 } 79 if (!PetscKokkosExecutionSpacePtr) { // No matter Kokkos is init'ed by petsc or by user, we need to init PetscKokkosExecutionSpacePtr 80 #if defined(PETSC_HAVE_CUDA) 81 extern cudaStream_t PetscDefaultCudaStream; 82 PetscCallCXX(PetscKokkosExecutionSpacePtr = new Kokkos::DefaultExecutionSpace(PetscDefaultCudaStream)); 83 #elif defined(PETS_HAVE_HIP) 84 extern hipStream_t PetscDefaultHipStream; 85 PetscCallCXX(PetscKokkosExecutionSpacePtr = new Kokkos::DefaultExecutionSpace(PetscDefaultHipStream)); 86 #else 87 PetscCallCXX(PetscKokkosExecutionSpacePtr = new Kokkos::DefaultExecutionSpace()); 88 #endif 89 } 90 if (!PetscScalarPoolSize) { // A pool for a small count of PetscScalars 91 PetscScalarPoolSize = 1024; 92 PetscCallCXX(PetscScalarPool = static_cast<PetscScalar *>(Kokkos::kokkos_malloc(sizeof(PetscScalar) * PetscScalarPoolSize))); 93 } 94 95 PetscKokkosInitialized = PETSC_TRUE; // PetscKokkosInitializeCheck() was called 96 PetscFunctionReturn(PETSC_SUCCESS); 97 } 98