xref: /petsc/src/sys/objects/kokkos/kinit.kokkos.cxx (revision e7804dd3b592d0d334d8c38946304769084f2fe4)
1a4af0ceeSJacob Faibussowitsch #include <petsc/private/deviceimpl.h>
2e907feaaSJunchao Zhang #include <petsc/private/kokkosimpl.hpp>
30e6b6b59SJacob Faibussowitsch #include <petscpkg_version.h>
4524fe776SJunchao Zhang #include <petsc_kokkos.hpp>
5c9903f8fSJunchao Zhang #include <petscdevice_cupm.h>
6c2b86a48SJunchao Zhang 
7f0b74427SPierre Jolivet PetscBool    PetscKokkosInitialized = PETSC_FALSE; // Has Kokkos been initialized (either by PETSc or by users)?
8e907feaaSJunchao Zhang PetscScalar *PetscScalarPool        = nullptr;
9e907feaaSJunchao Zhang PetscInt     PetscScalarPoolSize    = 0;
1045639126SStefano Zampini 
11524fe776SJunchao Zhang Kokkos::DefaultExecutionSpace *PetscKokkosExecutionSpacePtr = nullptr;
12524fe776SJunchao Zhang 
PetscKokkosFinalize_Private(void)13d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscKokkosFinalize_Private(void)
14d71ae5a4SJacob Faibussowitsch {
15c2b86a48SJunchao Zhang   PetscFunctionBegin;
16524fe776SJunchao Zhang   PetscCallCXX(delete PetscKokkosExecutionSpacePtr);
17fde95f08SJunchao Zhang   PetscKokkosExecutionSpacePtr = nullptr;
18e907feaaSJunchao Zhang   PetscCallCXX(Kokkos::kokkos_free(PetscScalarPool));
19e907feaaSJunchao Zhang   PetscScalarPoolSize = 0;
20e907feaaSJunchao Zhang   if (PetscBeganKokkos) {
21e907feaaSJunchao Zhang     PetscCallCXX(Kokkos::finalize());
22e907feaaSJunchao Zhang     PetscBeganKokkos = PETSC_FALSE;
23e907feaaSJunchao Zhang   }
243ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
25c2b86a48SJunchao Zhang }
26c2b86a48SJunchao Zhang 
PetscKokkosIsInitialized_Private(PetscBool * isInitialized)27d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscKokkosIsInitialized_Private(PetscBool *isInitialized)
28d71ae5a4SJacob Faibussowitsch {
29c2b86a48SJunchao Zhang   PetscFunctionBegin;
30c2b86a48SJunchao Zhang   *isInitialized = Kokkos::is_initialized() ? PETSC_TRUE : PETSC_FALSE;
313ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
32c2b86a48SJunchao Zhang }
33375e5adfSJunchao Zhang 
3434766dafSJunchao Zhang /* Initialize Kokkos if not yet */
PetscKokkosInitializeCheck(void)35d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscKokkosInitializeCheck(void)
36d71ae5a4SJacob Faibussowitsch {
37375e5adfSJunchao Zhang   PetscFunctionBegin;
3834766dafSJunchao Zhang   if (!Kokkos::is_initialized()) {
39471471fdSJunchao Zhang #if PETSC_PKG_KOKKOS_VERSION_GE(3, 7, 0)
40c66e0907SJunchao Zhang     auto args = Kokkos::InitializationSettings();
41c66e0907SJunchao Zhang #else
4262825ce1SJacob Faibussowitsch     auto args = Kokkos::InitArguments{}; /* use default constructor */
43c66e0907SJunchao Zhang #endif
44b84ac304SJunchao Zhang 
45c9903f8fSJunchao Zhang #if (defined(KOKKOS_ENABLE_CUDA) && defined(PETSC_HAVE_CUDA)) || (defined(KOKKOS_ENABLE_HIP) && defined(PETSC_HAVE_HIP)) || (defined(KOKKOS_ENABLE_SYCL) && defined(PETSC_HAVE_SYCL))
4662825ce1SJacob Faibussowitsch     /* Kokkos does not support CUDA and HIP at the same time (but we do :)) */
47ab4ee011SJunchao Zhang     PetscDevice device;
48ab4ee011SJunchao Zhang     PetscInt    deviceId;
49ab4ee011SJunchao Zhang     PetscCall(PetscDeviceCreate(PETSC_DEVICE_DEFAULT(), PETSC_DECIDE, &device));
50ab4ee011SJunchao Zhang     PetscCall(PetscDeviceGetDeviceId(device, &deviceId));
51ab4ee011SJunchao Zhang     PetscCall(PetscDeviceDestroy(&device));
52ab4ee011SJunchao Zhang   #if PETSC_PKG_KOKKOS_VERSION_GE(4, 0, 0)
53ab4ee011SJunchao Zhang     // if device_id is not set, and no gpus have been found, kokkos will use CPU
54ab4ee011SJunchao Zhang     if (deviceId >= 0) args.set_device_id(static_cast<int>(deviceId));
55ab4ee011SJunchao Zhang   #elif PETSC_PKG_KOKKOS_VERSION_GE(3, 7, 0)
56ab4ee011SJunchao Zhang     args.set_device_id(static_cast<int>(deviceId));
57c66e0907SJunchao Zhang   #else
58ab4ee011SJunchao Zhang     PetscCall(PetscMPIIntCast(deviceId, &args.device_id));
59375e5adfSJunchao Zhang   #endif
60c66e0907SJunchao Zhang #endif
6111f0be55SJunchao Zhang 
62f0b74427SPierre Jolivet     /* To use PetscNumOMPThreads, one has to configure PETSc --with-openmp.
6311f0be55SJunchao Zhang        Otherwise, let's keep the default value (-1) of args.num_threads.
6411f0be55SJunchao Zhang     */
6562825ce1SJacob Faibussowitsch #if defined(KOKKOS_ENABLE_OPENMP) && PetscDefined(HAVE_OPENMP)
66471471fdSJunchao Zhang   #if PETSC_PKG_KOKKOS_VERSION_GE(3, 7, 0)
67c66e0907SJunchao Zhang     args.set_num_threads(PetscNumOMPThreads);
68c66e0907SJunchao Zhang   #else
6911f0be55SJunchao Zhang     args.num_threads = PetscNumOMPThreads;
7011f0be55SJunchao Zhang   #endif
71c66e0907SJunchao Zhang #endif
72ab4ee011SJunchao Zhang     PetscCallCXX(Kokkos::initialize(args));
739c9deb76SJunchao Zhang     PetscBeganKokkos = PETSC_TRUE;
749c9deb76SJunchao Zhang   }
75c9903f8fSJunchao Zhang 
76f0b74427SPierre Jolivet   if (!PetscKokkosExecutionSpacePtr) { // No matter Kokkos is init'ed by PETSc or by user, we need to init PetscKokkosExecutionSpacePtr
77*7a4760caSJunchao Zhang #if (defined(KOKKOS_ENABLE_CUDA) && defined(PETSC_HAVE_CUDA)) || (defined(KOKKOS_ENABLE_HIP) && defined(PETSC_HAVE_HIP)) || (defined(KOKKOS_ENABLE_SYCL) && defined(PETSC_HAVE_SYCL))
78c9903f8fSJunchao Zhang     PetscDeviceContext dctx;
79c9903f8fSJunchao Zhang     PetscDeviceType    dtype;
80c9903f8fSJunchao Zhang 
81a9949e74SJunchao Zhang     PetscCall(PetscDeviceContextGetCurrentContext(&dctx)); // it internally sets PetscDefaultCuda/HipStream
82a9949e74SJunchao Zhang     PetscCall(PetscDeviceContextGetDeviceType(dctx, &dtype));
83c9903f8fSJunchao Zhang 
84524fe776SJunchao Zhang   #if defined(PETSC_HAVE_CUDA)
85c9903f8fSJunchao Zhang     if (dtype == PETSC_DEVICE_CUDA) PetscCallCXX(PetscKokkosExecutionSpacePtr = new Kokkos::DefaultExecutionSpace(PetscDefaultCudaStream));
86c9903f8fSJunchao Zhang   #elif defined(PETSC_HAVE_HIP)
87c9903f8fSJunchao Zhang     if (dtype == PETSC_DEVICE_HIP) PetscCallCXX(PetscKokkosExecutionSpacePtr = new Kokkos::DefaultExecutionSpace(PetscDefaultHipStream));
88*7a4760caSJunchao Zhang   #elif defined(PETSC_HAVE_SYCL)
89*7a4760caSJunchao Zhang     if (dtype == PETSC_DEVICE_SYCL) {
90*7a4760caSJunchao Zhang       void *handle;
91*7a4760caSJunchao Zhang       PetscCall(PetscDeviceContextGetStreamHandle(dctx, &handle)); // Kind of PetscDefaultSyclStream
92*7a4760caSJunchao Zhang       PetscCallCXX(PetscKokkosExecutionSpacePtr = new Kokkos::DefaultExecutionSpace(*(sycl::queue *)handle));
93*7a4760caSJunchao Zhang     }
94c9903f8fSJunchao Zhang   #endif
95524fe776SJunchao Zhang #else
96c9903f8fSJunchao Zhang     // In all other cases, we use Kokkos default
97524fe776SJunchao Zhang     PetscCallCXX(PetscKokkosExecutionSpacePtr = new Kokkos::DefaultExecutionSpace());
98524fe776SJunchao Zhang #endif
9959e55d94SJunchao Zhang   }
100c9903f8fSJunchao Zhang 
101e907feaaSJunchao Zhang   if (!PetscScalarPoolSize) { // A pool for a small count of PetscScalars
102e907feaaSJunchao Zhang     PetscScalarPoolSize = 1024;
103e907feaaSJunchao Zhang     PetscCallCXX(PetscScalarPool = static_cast<PetscScalar *>(Kokkos::kokkos_malloc(sizeof(PetscScalar) * PetscScalarPoolSize)));
104e907feaaSJunchao Zhang   }
105e907feaaSJunchao Zhang 
106e907feaaSJunchao Zhang   PetscKokkosInitialized = PETSC_TRUE; // PetscKokkosInitializeCheck() was called
1073ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
108375e5adfSJunchao Zhang }
109