xref: /petsc/src/sys/objects/kokkos/kinit.kokkos.cxx (revision d9acb416d05abeed0a33bde3a81aeb2ea0364f6a)
1 #include <petsc/private/deviceimpl.h>
2 #include <petsc/private/kokkosimpl.hpp>
3 #include <petscpkg_version.h>
4 #include <petsc_kokkos.hpp>
5 
6 PetscBool    PetscKokkosInitialized = PETSC_FALSE;
7 PetscScalar *PetscScalarPool        = nullptr;
8 PetscInt     PetscScalarPoolSize    = 0;
9 
10 Kokkos::DefaultExecutionSpace *PetscKokkosExecutionSpacePtr = nullptr;
11 
12 PetscErrorCode PetscKokkosFinalize_Private(void)
13 {
14   PetscFunctionBegin;
15   PetscCallCXX(delete PetscKokkosExecutionSpacePtr);
16   PetscCallCXX(Kokkos::kokkos_free(PetscScalarPool));
17   PetscScalarPoolSize = 0;
18   if (PetscBeganKokkos) {
19     PetscCallCXX(Kokkos::finalize());
20     PetscBeganKokkos = PETSC_FALSE;
21   }
22   PetscFunctionReturn(PETSC_SUCCESS);
23 }
24 
25 PetscErrorCode PetscKokkosIsInitialized_Private(PetscBool *isInitialized)
26 {
27   PetscFunctionBegin;
28   *isInitialized = Kokkos::is_initialized() ? PETSC_TRUE : PETSC_FALSE;
29   PetscFunctionReturn(PETSC_SUCCESS);
30 }
31 
32 /* Initialize Kokkos if not yet */
33 PetscErrorCode PetscKokkosInitializeCheck(void)
34 {
35   PetscFunctionBegin;
36   if (!Kokkos::is_initialized()) {
37 #if PETSC_PKG_KOKKOS_VERSION_GE(3, 7, 0)
38     auto args = Kokkos::InitializationSettings();
39 #else
40     auto args             = Kokkos::InitArguments{}; /* use default constructor */
41 #endif
42 
43 #if (defined(KOKKOS_ENABLE_CUDA) && PetscDefined(HAVE_CUDA)) || (defined(KOKKOS_ENABLE_HIP) && PetscDefined(HAVE_HIP)) || (defined(KOKKOS_ENABLE_SYCL) && PetscDefined(HAVE_SYCL))
44     /* Kokkos does not support CUDA and HIP at the same time (but we do :)) */
45     PetscDevice device;
46     PetscInt    deviceId;
47     PetscCall(PetscDeviceCreate(PETSC_DEVICE_DEFAULT(), PETSC_DECIDE, &device));
48     PetscCall(PetscDeviceGetDeviceId(device, &deviceId));
49     PetscCall(PetscDeviceDestroy(&device));
50   #if PETSC_PKG_KOKKOS_VERSION_GE(4, 0, 0)
51     // if device_id is not set, and no gpus have been found, kokkos will use CPU
52     if (deviceId >= 0) args.set_device_id(static_cast<int>(deviceId));
53   #elif PETSC_PKG_KOKKOS_VERSION_GE(3, 7, 0)
54     args.set_device_id(static_cast<int>(deviceId));
55   #else
56     PetscCall(PetscMPIIntCast(deviceId, &args.device_id));
57   #endif
58 #endif
59 
60 #if PETSC_PKG_KOKKOS_VERSION_GE(3, 7, 0)
61     args.set_disable_warnings(!PetscDefined(HAVE_KOKKOS_INIT_WARNINGS));
62 #else
63     args.disable_warnings = !PetscDefined(HAVE_KOKKOS_INIT_WARNINGS);
64 #endif
65 
66     /* To use PetscNumOMPThreads, one has to configure petsc --with-openmp.
67        Otherwise, let's keep the default value (-1) of args.num_threads.
68     */
69 #if defined(KOKKOS_ENABLE_OPENMP) && PetscDefined(HAVE_OPENMP)
70   #if PETSC_PKG_KOKKOS_VERSION_GE(3, 7, 0)
71     args.set_num_threads(PetscNumOMPThreads);
72   #else
73     args.num_threads = PetscNumOMPThreads;
74   #endif
75 #endif
76     PetscCallCXX(Kokkos::initialize(args));
77     PetscBeganKokkos = PETSC_TRUE;
78   }
79   if (!PetscKokkosExecutionSpacePtr) { // No matter Kokkos is init'ed by petsc or by user, we need to init PetscKokkosExecutionSpacePtr
80 #if defined(PETSC_HAVE_CUDA)
81     extern cudaStream_t PetscDefaultCudaStream;
82     PetscCallCXX(PetscKokkosExecutionSpacePtr = new Kokkos::DefaultExecutionSpace(PetscDefaultCudaStream));
83 #elif defined(PETS_HAVE_HIP)
84     extern hipStream_t PetscDefaultHipStream;
85     PetscCallCXX(PetscKokkosExecutionSpacePtr = new Kokkos::DefaultExecutionSpace(PetscDefaultHipStream));
86 #else
87     PetscCallCXX(PetscKokkosExecutionSpacePtr = new Kokkos::DefaultExecutionSpace());
88 #endif
89   }
90   if (!PetscScalarPoolSize) { // A pool for a small count of PetscScalars
91     PetscScalarPoolSize = 1024;
92     PetscCallCXX(PetscScalarPool = static_cast<PetscScalar *>(Kokkos::kokkos_malloc(sizeof(PetscScalar) * PetscScalarPoolSize)));
93   }
94 
95   PetscKokkosInitialized = PETSC_TRUE; // PetscKokkosInitializeCheck() was called
96   PetscFunctionReturn(PETSC_SUCCESS);
97 }
98