1ce78bad3SBarry Smith #include <petscsys.h> /*I "petscsys.h" I*/
2ce78bad3SBarry Smith #include <petsc/private/petscimpl.h>
3ce78bad3SBarry Smith #include <pthread.h>
4ce78bad3SBarry Smith #include <hwloc.h>
5ce78bad3SBarry Smith #include <omp.h>
6ce78bad3SBarry Smith
7ce78bad3SBarry Smith /* Use mmap() to allocate shared mmeory (for the pthread_barrier_t object) if it is available,
8ce78bad3SBarry Smith otherwise use MPI_Win_allocate_shared. They should have the same effect except MPI-3 is much
9ce78bad3SBarry Smith simpler to use. However, on a Cori Haswell node with Cray MPI, MPI-3 worsened a test's performance
10ce78bad3SBarry Smith by 50%. Until the reason is found out, we use mmap() instead.
11ce78bad3SBarry Smith */
12ce78bad3SBarry Smith #define USE_MMAP_ALLOCATE_SHARED_MEMORY
13ce78bad3SBarry Smith
14ce78bad3SBarry Smith #if defined(USE_MMAP_ALLOCATE_SHARED_MEMORY) && defined(PETSC_HAVE_MMAP)
15ce78bad3SBarry Smith #include <sys/mman.h>
16ce78bad3SBarry Smith #include <sys/types.h>
17ce78bad3SBarry Smith #include <sys/stat.h>
18ce78bad3SBarry Smith #include <fcntl.h>
19ce78bad3SBarry Smith #endif
20ce78bad3SBarry Smith
21ce78bad3SBarry Smith struct _n_PetscOmpCtrl {
22ce78bad3SBarry Smith MPI_Comm omp_comm; /* a shared memory communicator to spawn omp threads */
23ce78bad3SBarry Smith MPI_Comm omp_master_comm; /* a communicator to give to third party libraries */
24ce78bad3SBarry Smith PetscMPIInt omp_comm_size; /* size of omp_comm, a kind of OMP_NUM_THREADS */
25ce78bad3SBarry Smith PetscBool is_omp_master; /* rank 0's in omp_comm */
26ce78bad3SBarry Smith MPI_Win omp_win; /* a shared memory window containing a barrier */
27ce78bad3SBarry Smith pthread_barrier_t *barrier; /* pointer to the barrier */
28ce78bad3SBarry Smith hwloc_topology_t topology;
29ce78bad3SBarry Smith hwloc_cpuset_t cpuset; /* cpu bindings of omp master */
30ce78bad3SBarry Smith hwloc_cpuset_t omp_cpuset; /* union of cpu bindings of ranks in omp_comm */
31ce78bad3SBarry Smith };
32ce78bad3SBarry Smith
33ce78bad3SBarry Smith /* Allocate and initialize a pthread_barrier_t object in memory shared by processes in omp_comm
34ce78bad3SBarry Smith contained by the controller.
35ce78bad3SBarry Smith
36ce78bad3SBarry Smith PETSc OpenMP controller users do not call this function directly. This function exists
37ce78bad3SBarry Smith only because we want to separate shared memory allocation methods from other code.
38ce78bad3SBarry Smith */
PetscOmpCtrlCreateBarrier(PetscOmpCtrl ctrl)39ce78bad3SBarry Smith static inline PetscErrorCode PetscOmpCtrlCreateBarrier(PetscOmpCtrl ctrl)
40ce78bad3SBarry Smith {
41ce78bad3SBarry Smith MPI_Aint size;
42ce78bad3SBarry Smith void *baseptr;
43ce78bad3SBarry Smith pthread_barrierattr_t attr;
44ce78bad3SBarry Smith
45ce78bad3SBarry Smith #if defined(USE_MMAP_ALLOCATE_SHARED_MEMORY) && defined(PETSC_HAVE_MMAP)
46ce78bad3SBarry Smith int fd;
47ce78bad3SBarry Smith char pathname[PETSC_MAX_PATH_LEN];
48ce78bad3SBarry Smith #else
49ce78bad3SBarry Smith PetscMPIInt disp_unit;
50ce78bad3SBarry Smith #endif
51ce78bad3SBarry Smith
52ce78bad3SBarry Smith PetscFunctionBegin;
53ce78bad3SBarry Smith #if defined(USE_MMAP_ALLOCATE_SHARED_MEMORY) && defined(PETSC_HAVE_MMAP)
54ce78bad3SBarry Smith size = sizeof(pthread_barrier_t);
55ce78bad3SBarry Smith if (ctrl->is_omp_master) {
56ce78bad3SBarry Smith /* use PETSC_COMM_SELF in PetscGetTmp, since it is a collective call. Using omp_comm would otherwise bcast the partially populated pathname to slaves */
57ce78bad3SBarry Smith PetscCall(PetscGetTmp(PETSC_COMM_SELF, pathname, PETSC_MAX_PATH_LEN));
58ce78bad3SBarry Smith PetscCall(PetscStrlcat(pathname, "/petsc-shm-XXXXXX", PETSC_MAX_PATH_LEN));
59ce78bad3SBarry Smith /* mkstemp replaces XXXXXX with a unique file name and opens the file for us */
60ce78bad3SBarry Smith fd = mkstemp(pathname);
61ce78bad3SBarry Smith PetscCheck(fd != -1, PETSC_COMM_SELF, PETSC_ERR_LIB, "Could not create tmp file %s with mkstemp", pathname);
62ce78bad3SBarry Smith PetscCallExternal(ftruncate, fd, size);
63ce78bad3SBarry Smith baseptr = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
64ce78bad3SBarry Smith PetscCheck(baseptr != MAP_FAILED, PETSC_COMM_SELF, PETSC_ERR_LIB, "mmap() failed");
65ce78bad3SBarry Smith PetscCallExternal(close, fd);
66ce78bad3SBarry Smith PetscCallMPI(MPI_Bcast(pathname, PETSC_MAX_PATH_LEN, MPI_CHAR, 0, ctrl->omp_comm));
67ce78bad3SBarry Smith /* this MPI_Barrier is to wait slaves to open the file before master unlinks it */
68ce78bad3SBarry Smith PetscCallMPI(MPI_Barrier(ctrl->omp_comm));
69ce78bad3SBarry Smith PetscCallExternal(unlink, pathname);
70ce78bad3SBarry Smith } else {
71ce78bad3SBarry Smith PetscCallMPI(MPI_Bcast(pathname, PETSC_MAX_PATH_LEN, MPI_CHAR, 0, ctrl->omp_comm));
72ce78bad3SBarry Smith fd = open(pathname, O_RDWR);
73ce78bad3SBarry Smith PetscCheck(fd != -1, PETSC_COMM_SELF, PETSC_ERR_LIB, "Could not open tmp file %s", pathname);
74ce78bad3SBarry Smith baseptr = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
75ce78bad3SBarry Smith PetscCheck(baseptr != MAP_FAILED, PETSC_COMM_SELF, PETSC_ERR_LIB, "mmap() failed");
76ce78bad3SBarry Smith PetscCallExternal(close, fd);
77ce78bad3SBarry Smith PetscCallMPI(MPI_Barrier(ctrl->omp_comm));
78ce78bad3SBarry Smith }
79ce78bad3SBarry Smith #else
80ce78bad3SBarry Smith size = ctrl->is_omp_master ? sizeof(pthread_barrier_t) : 0;
81ce78bad3SBarry Smith PetscCallMPI(MPI_Win_allocate_shared(size, 1, MPI_INFO_NULL, ctrl->omp_comm, &baseptr, &ctrl->omp_win));
82ce78bad3SBarry Smith PetscCallMPI(MPI_Win_shared_query(ctrl->omp_win, 0, &size, &disp_unit, &baseptr));
83ce78bad3SBarry Smith #endif
84ce78bad3SBarry Smith ctrl->barrier = (pthread_barrier_t *)baseptr;
85ce78bad3SBarry Smith
86ce78bad3SBarry Smith /* omp master initializes the barrier */
87ce78bad3SBarry Smith if (ctrl->is_omp_master) {
88ce78bad3SBarry Smith PetscCallMPI(MPI_Comm_size(ctrl->omp_comm, &ctrl->omp_comm_size));
89ce78bad3SBarry Smith PetscCallExternal(pthread_barrierattr_init, &attr);
90ce78bad3SBarry Smith PetscCallExternal(pthread_barrierattr_setpshared, &attr, PTHREAD_PROCESS_SHARED); /* make the barrier also work for processes */
91ce78bad3SBarry Smith PetscCallExternal(pthread_barrier_init, ctrl->barrier, &attr, (unsigned int)ctrl->omp_comm_size);
92ce78bad3SBarry Smith PetscCallExternal(pthread_barrierattr_destroy, &attr);
93ce78bad3SBarry Smith }
94ce78bad3SBarry Smith
95ce78bad3SBarry Smith /* this MPI_Barrier is to make sure the omp barrier is initialized before slaves use it */
96ce78bad3SBarry Smith PetscCallMPI(MPI_Barrier(ctrl->omp_comm));
97ce78bad3SBarry Smith PetscFunctionReturn(PETSC_SUCCESS);
98ce78bad3SBarry Smith }
99ce78bad3SBarry Smith
100ce78bad3SBarry Smith /* Destroy the pthread barrier in the PETSc OpenMP controller */
PetscOmpCtrlDestroyBarrier(PetscOmpCtrl ctrl)101ce78bad3SBarry Smith static inline PetscErrorCode PetscOmpCtrlDestroyBarrier(PetscOmpCtrl ctrl)
102ce78bad3SBarry Smith {
103ce78bad3SBarry Smith PetscFunctionBegin;
104ce78bad3SBarry Smith /* this MPI_Barrier is to make sure slaves have finished using the omp barrier before master destroys it */
105ce78bad3SBarry Smith PetscCallMPI(MPI_Barrier(ctrl->omp_comm));
106ce78bad3SBarry Smith if (ctrl->is_omp_master) PetscCallExternal(pthread_barrier_destroy, ctrl->barrier);
107ce78bad3SBarry Smith
108ce78bad3SBarry Smith #if defined(USE_MMAP_ALLOCATE_SHARED_MEMORY) && defined(PETSC_HAVE_MMAP)
109ce78bad3SBarry Smith PetscCallExternal(munmap, ctrl->barrier, sizeof(pthread_barrier_t));
110ce78bad3SBarry Smith #else
111ce78bad3SBarry Smith PetscCallMPI(MPI_Win_free(&ctrl->omp_win));
112ce78bad3SBarry Smith #endif
113ce78bad3SBarry Smith PetscFunctionReturn(PETSC_SUCCESS);
114ce78bad3SBarry Smith }
115ce78bad3SBarry Smith
116ce78bad3SBarry Smith /*@C
117ce78bad3SBarry Smith PetscOmpCtrlCreate - create a PETSc OpenMP controller, which manages PETSc's interaction with third party libraries that use OpenMP
118ce78bad3SBarry Smith
119ce78bad3SBarry Smith Input Parameters:
120ce78bad3SBarry Smith + petsc_comm - a communicator some PETSc object (for example, a matrix) lives in
121ce78bad3SBarry Smith - nthreads - number of threads per MPI rank to spawn in a library using OpenMP. If nthreads = -1, let PETSc decide a suitable value
122ce78bad3SBarry Smith
123ce78bad3SBarry Smith Output Parameter:
124ce78bad3SBarry Smith . pctrl - a PETSc OpenMP controller
125ce78bad3SBarry Smith
126ce78bad3SBarry Smith Level: developer
127ce78bad3SBarry Smith
128ce78bad3SBarry Smith Developer Note:
129ce78bad3SBarry Smith Possibly use the variable `PetscNumOMPThreads` to determine the number for threads to use
130ce78bad3SBarry Smith
131ce78bad3SBarry Smith .seealso: `PetscOmpCtrlDestroy()`, `PetscOmpCtrlGetOmpComms()`, `PetscOmpCtrlBarrier()`, `PetscOmpCtrlOmpRegionOnMasterBegin()`, `PetscOmpCtrlOmpRegionOnMasterEnd()`,
132ce78bad3SBarry Smith @*/
PetscOmpCtrlCreate(MPI_Comm petsc_comm,PetscInt nthreads,PetscOmpCtrl * pctrl)133ce78bad3SBarry Smith PetscErrorCode PetscOmpCtrlCreate(MPI_Comm petsc_comm, PetscInt nthreads, PetscOmpCtrl *pctrl)
134ce78bad3SBarry Smith {
135ce78bad3SBarry Smith PetscOmpCtrl ctrl;
136ce78bad3SBarry Smith unsigned long *cpu_ulongs = NULL;
137ce78bad3SBarry Smith PetscShmComm pshmcomm;
138ce78bad3SBarry Smith MPI_Comm shm_comm;
139*d83f41f0SPierre Jolivet PetscMPIInt shm_rank, shm_comm_size, omp_rank, color, nr_cpu_ulongs;
140ce78bad3SBarry Smith PetscInt num_packages, num_cores;
141ce78bad3SBarry Smith
142ce78bad3SBarry Smith PetscFunctionBegin;
143ce78bad3SBarry Smith PetscCall(PetscNew(&ctrl));
144ce78bad3SBarry Smith
145ce78bad3SBarry Smith /*=================================================================================
146ce78bad3SBarry Smith Init hwloc
147ce78bad3SBarry Smith ==================================================================================*/
148ce78bad3SBarry Smith PetscCallExternal(hwloc_topology_init, &ctrl->topology);
149ce78bad3SBarry Smith #if HWLOC_API_VERSION >= 0x00020000
150ce78bad3SBarry Smith /* to filter out unneeded info and have faster hwloc_topology_load */
151ce78bad3SBarry Smith PetscCallExternal(hwloc_topology_set_all_types_filter, ctrl->topology, HWLOC_TYPE_FILTER_KEEP_NONE);
152ce78bad3SBarry Smith PetscCallExternal(hwloc_topology_set_type_filter, ctrl->topology, HWLOC_OBJ_CORE, HWLOC_TYPE_FILTER_KEEP_ALL);
153ce78bad3SBarry Smith #endif
154ce78bad3SBarry Smith PetscCallExternal(hwloc_topology_load, ctrl->topology);
155ce78bad3SBarry Smith
156ce78bad3SBarry Smith /*=================================================================================
157ce78bad3SBarry Smith Split petsc_comm into multiple omp_comms. Ranks in an omp_comm have access to
158ce78bad3SBarry Smith physically shared memory. Rank 0 of each omp_comm is called an OMP master, and
159ce78bad3SBarry Smith others are called slaves. OMP Masters make up a new comm called omp_master_comm,
160ce78bad3SBarry Smith which is usually passed to third party libraries.
161ce78bad3SBarry Smith ==================================================================================*/
162ce78bad3SBarry Smith
163ce78bad3SBarry Smith /* fetch the stored shared memory communicator */
164ce78bad3SBarry Smith PetscCall(PetscShmCommGet(petsc_comm, &pshmcomm));
165ce78bad3SBarry Smith PetscCall(PetscShmCommGetMpiShmComm(pshmcomm, &shm_comm));
166ce78bad3SBarry Smith
167ce78bad3SBarry Smith PetscCallMPI(MPI_Comm_rank(shm_comm, &shm_rank));
168ce78bad3SBarry Smith PetscCallMPI(MPI_Comm_size(shm_comm, &shm_comm_size));
169ce78bad3SBarry Smith
170ce78bad3SBarry Smith /* PETSc decides nthreads, which is the smaller of shm_comm_size or cores per package(socket) */
171ce78bad3SBarry Smith if (nthreads == -1) {
172ce78bad3SBarry Smith num_packages = hwloc_get_nbobjs_by_type(ctrl->topology, HWLOC_OBJ_PACKAGE) <= 0 ? 1 : hwloc_get_nbobjs_by_type(ctrl->topology, HWLOC_OBJ_PACKAGE);
173ce78bad3SBarry Smith num_cores = hwloc_get_nbobjs_by_type(ctrl->topology, HWLOC_OBJ_CORE) <= 0 ? 1 : hwloc_get_nbobjs_by_type(ctrl->topology, HWLOC_OBJ_CORE);
174ce78bad3SBarry Smith nthreads = num_cores / num_packages;
175ce78bad3SBarry Smith if (nthreads > shm_comm_size) nthreads = shm_comm_size;
176ce78bad3SBarry Smith }
177ce78bad3SBarry Smith
178ce78bad3SBarry Smith PetscCheck(nthreads >= 1 && nthreads <= shm_comm_size, petsc_comm, PETSC_ERR_ARG_OUTOFRANGE, "number of OpenMP threads %" PetscInt_FMT " can not be < 1 or > the MPI shared memory communicator size %d", nthreads, shm_comm_size);
179ce78bad3SBarry Smith if (shm_comm_size % nthreads) PetscCall(PetscPrintf(petsc_comm, "Warning: number of OpenMP threads %" PetscInt_FMT " is not a factor of the MPI shared memory communicator size %d, which may cause load-imbalance!\n", nthreads, shm_comm_size));
180ce78bad3SBarry Smith
181ce78bad3SBarry Smith /* split shm_comm into a set of omp_comms with each of size nthreads. Ex., if
182ce78bad3SBarry Smith shm_comm_size=16, nthreads=8, then ranks 0~7 get color 0 and ranks 8~15 get
183ce78bad3SBarry Smith color 1. They are put in two omp_comms. Note that petsc_ranks may or may not
184ce78bad3SBarry Smith be consecutive in a shm_comm, but shm_ranks always run from 0 to shm_comm_size-1.
185ce78bad3SBarry Smith Use 0 as key so that rank ordering wont change in new comm.
186ce78bad3SBarry Smith */
187ce78bad3SBarry Smith color = shm_rank / nthreads;
188ce78bad3SBarry Smith PetscCallMPI(MPI_Comm_split(shm_comm, color, 0 /*key*/, &ctrl->omp_comm));
189ce78bad3SBarry Smith
190ce78bad3SBarry Smith /* put rank 0's in omp_comms (i.e., master ranks) into a new comm - omp_master_comm */
191ce78bad3SBarry Smith PetscCallMPI(MPI_Comm_rank(ctrl->omp_comm, &omp_rank));
192ce78bad3SBarry Smith if (!omp_rank) {
193ce78bad3SBarry Smith ctrl->is_omp_master = PETSC_TRUE; /* master */
194ce78bad3SBarry Smith color = 0;
195ce78bad3SBarry Smith } else {
196ce78bad3SBarry Smith ctrl->is_omp_master = PETSC_FALSE; /* slave */
197ce78bad3SBarry Smith color = MPI_UNDEFINED; /* to make slaves get omp_master_comm = MPI_COMM_NULL in MPI_Comm_split */
198ce78bad3SBarry Smith }
199ce78bad3SBarry Smith PetscCallMPI(MPI_Comm_split(petsc_comm, color, 0 /*key*/, &ctrl->omp_master_comm));
200ce78bad3SBarry Smith
201ce78bad3SBarry Smith /*=================================================================================
202ce78bad3SBarry Smith Each omp_comm has a pthread_barrier_t in its shared memory, which is used to put
203ce78bad3SBarry Smith slave ranks in sleep and idle their CPU, so that the master can fork OMP threads
204ce78bad3SBarry Smith and run them on the idle CPUs.
205ce78bad3SBarry Smith ==================================================================================*/
206ce78bad3SBarry Smith PetscCall(PetscOmpCtrlCreateBarrier(ctrl));
207ce78bad3SBarry Smith
208ce78bad3SBarry Smith /*=================================================================================
209ce78bad3SBarry Smith omp master logs its cpu binding (i.e., cpu set) and computes a new binding that
210ce78bad3SBarry Smith is the union of the bindings of all ranks in the omp_comm
211ce78bad3SBarry Smith =================================================================================*/
212ce78bad3SBarry Smith
213ce78bad3SBarry Smith ctrl->cpuset = hwloc_bitmap_alloc();
214ce78bad3SBarry Smith PetscCheck(ctrl->cpuset, PETSC_COMM_SELF, PETSC_ERR_LIB, "hwloc_bitmap_alloc() failed");
215ce78bad3SBarry Smith PetscCallExternal(hwloc_get_cpubind, ctrl->topology, ctrl->cpuset, HWLOC_CPUBIND_PROCESS);
216ce78bad3SBarry Smith
217ce78bad3SBarry Smith /* hwloc main developer said they will add new APIs hwloc_bitmap_{nr,to,from}_ulongs in 2.1 to help us simplify the following bitmap pack/unpack code */
218ce78bad3SBarry Smith nr_cpu_ulongs = (hwloc_bitmap_last(hwloc_topology_get_topology_cpuset(ctrl->topology)) + sizeof(unsigned long) * 8) / sizeof(unsigned long) / 8;
219ce78bad3SBarry Smith PetscCall(PetscMalloc1(nr_cpu_ulongs, &cpu_ulongs));
220ce78bad3SBarry Smith if (nr_cpu_ulongs == 1) {
221ce78bad3SBarry Smith cpu_ulongs[0] = hwloc_bitmap_to_ulong(ctrl->cpuset);
222ce78bad3SBarry Smith } else {
223*d83f41f0SPierre Jolivet for (PetscMPIInt i = 0; i < nr_cpu_ulongs; i++) cpu_ulongs[i] = hwloc_bitmap_to_ith_ulong(ctrl->cpuset, (unsigned)i);
224ce78bad3SBarry Smith }
225ce78bad3SBarry Smith
226ce78bad3SBarry Smith PetscCallMPI(MPI_Reduce(ctrl->is_omp_master ? MPI_IN_PLACE : cpu_ulongs, cpu_ulongs, nr_cpu_ulongs, MPI_UNSIGNED_LONG, MPI_BOR, 0, ctrl->omp_comm));
227ce78bad3SBarry Smith
228ce78bad3SBarry Smith if (ctrl->is_omp_master) {
229ce78bad3SBarry Smith ctrl->omp_cpuset = hwloc_bitmap_alloc();
230ce78bad3SBarry Smith PetscCheck(ctrl->omp_cpuset, PETSC_COMM_SELF, PETSC_ERR_LIB, "hwloc_bitmap_alloc() failed");
231ce78bad3SBarry Smith if (nr_cpu_ulongs == 1) {
232ce78bad3SBarry Smith #if HWLOC_API_VERSION >= 0x00020000
233ce78bad3SBarry Smith PetscCallExternal(hwloc_bitmap_from_ulong, ctrl->omp_cpuset, cpu_ulongs[0]);
234ce78bad3SBarry Smith #else
235ce78bad3SBarry Smith hwloc_bitmap_from_ulong(ctrl->omp_cpuset, cpu_ulongs[0]);
236ce78bad3SBarry Smith #endif
237ce78bad3SBarry Smith } else {
238*d83f41f0SPierre Jolivet for (PetscMPIInt i = 0; i < nr_cpu_ulongs; i++) {
239ce78bad3SBarry Smith #if HWLOC_API_VERSION >= 0x00020000
240ce78bad3SBarry Smith PetscCallExternal(hwloc_bitmap_set_ith_ulong, ctrl->omp_cpuset, (unsigned)i, cpu_ulongs[i]);
241ce78bad3SBarry Smith #else
242ce78bad3SBarry Smith hwloc_bitmap_set_ith_ulong(ctrl->omp_cpuset, (unsigned)i, cpu_ulongs[i]);
243ce78bad3SBarry Smith #endif
244ce78bad3SBarry Smith }
245ce78bad3SBarry Smith }
246ce78bad3SBarry Smith }
247ce78bad3SBarry Smith PetscCall(PetscFree(cpu_ulongs));
248ce78bad3SBarry Smith *pctrl = ctrl;
249ce78bad3SBarry Smith PetscFunctionReturn(PETSC_SUCCESS);
250ce78bad3SBarry Smith }
251ce78bad3SBarry Smith
252ce78bad3SBarry Smith /*@C
253ce78bad3SBarry Smith PetscOmpCtrlDestroy - destroy the PETSc OpenMP controller
254ce78bad3SBarry Smith
255ce78bad3SBarry Smith Input Parameter:
256ce78bad3SBarry Smith . pctrl - a PETSc OpenMP controller
257ce78bad3SBarry Smith
258ce78bad3SBarry Smith Level: developer
259ce78bad3SBarry Smith
260ce78bad3SBarry Smith .seealso: `PetscOmpCtrlCreate()`, `PetscOmpCtrlGetOmpComms()`, `PetscOmpCtrlBarrier()`, `PetscOmpCtrlOmpRegionOnMasterBegin()`, `PetscOmpCtrlOmpRegionOnMasterEnd()`,
261ce78bad3SBarry Smith @*/
PetscOmpCtrlDestroy(PetscOmpCtrl * pctrl)262ce78bad3SBarry Smith PetscErrorCode PetscOmpCtrlDestroy(PetscOmpCtrl *pctrl)
263ce78bad3SBarry Smith {
264ce78bad3SBarry Smith PetscOmpCtrl ctrl = *pctrl;
265ce78bad3SBarry Smith
266ce78bad3SBarry Smith PetscFunctionBegin;
267ce78bad3SBarry Smith hwloc_bitmap_free(ctrl->cpuset);
268ce78bad3SBarry Smith hwloc_topology_destroy(ctrl->topology);
269ce78bad3SBarry Smith PetscCall(PetscOmpCtrlDestroyBarrier(ctrl));
270ce78bad3SBarry Smith PetscCallMPI(MPI_Comm_free(&ctrl->omp_comm));
271ce78bad3SBarry Smith if (ctrl->is_omp_master) {
272ce78bad3SBarry Smith hwloc_bitmap_free(ctrl->omp_cpuset);
273ce78bad3SBarry Smith PetscCallMPI(MPI_Comm_free(&ctrl->omp_master_comm));
274ce78bad3SBarry Smith }
275ce78bad3SBarry Smith PetscCall(PetscFree(ctrl));
276ce78bad3SBarry Smith PetscFunctionReturn(PETSC_SUCCESS);
277ce78bad3SBarry Smith }
278ce78bad3SBarry Smith
279ce78bad3SBarry Smith /*@C
280ce78bad3SBarry Smith PetscOmpCtrlGetOmpComms - Get MPI communicators from a PETSc OMP controller
281ce78bad3SBarry Smith
282ce78bad3SBarry Smith Input Parameter:
283ce78bad3SBarry Smith . ctrl - a PETSc OMP controller
284ce78bad3SBarry Smith
285ce78bad3SBarry Smith Output Parameters:
286ce78bad3SBarry Smith + omp_comm - a communicator that includes a master rank and slave ranks where master spawns threads
287ce78bad3SBarry Smith . omp_master_comm - on master ranks, return a communicator that include master ranks of each omp_comm;
288ce78bad3SBarry Smith on slave ranks, `MPI_COMM_NULL` will be return in reality.
289ce78bad3SBarry Smith - is_omp_master - true if the calling process is an OMP master rank.
290ce78bad3SBarry Smith
291ce78bad3SBarry Smith Note:
292ce78bad3SBarry Smith Any output parameter can be `NULL`. The parameter is just ignored.
293ce78bad3SBarry Smith
294ce78bad3SBarry Smith Level: developer
295ce78bad3SBarry Smith
296ce78bad3SBarry Smith .seealso: `PetscOmpCtrlCreate()`, `PetscOmpCtrlDestroy()`, `PetscOmpCtrlBarrier()`, `PetscOmpCtrlOmpRegionOnMasterBegin()`, `PetscOmpCtrlOmpRegionOnMasterEnd()`,
297ce78bad3SBarry Smith @*/
PetscOmpCtrlGetOmpComms(PetscOmpCtrl ctrl,MPI_Comm * omp_comm,MPI_Comm * omp_master_comm,PetscBool * is_omp_master)298ce78bad3SBarry Smith PetscErrorCode PetscOmpCtrlGetOmpComms(PetscOmpCtrl ctrl, MPI_Comm *omp_comm, MPI_Comm *omp_master_comm, PetscBool *is_omp_master)
299ce78bad3SBarry Smith {
300ce78bad3SBarry Smith PetscFunctionBegin;
301ce78bad3SBarry Smith if (omp_comm) *omp_comm = ctrl->omp_comm;
302ce78bad3SBarry Smith if (omp_master_comm) *omp_master_comm = ctrl->omp_master_comm;
303ce78bad3SBarry Smith if (is_omp_master) *is_omp_master = ctrl->is_omp_master;
304ce78bad3SBarry Smith PetscFunctionReturn(PETSC_SUCCESS);
305ce78bad3SBarry Smith }
306ce78bad3SBarry Smith
307ce78bad3SBarry Smith /*@C
308ce78bad3SBarry Smith PetscOmpCtrlBarrier - Do barrier on MPI ranks in omp_comm contained by the PETSc OMP controller (to let slave ranks free their CPU)
309ce78bad3SBarry Smith
310ce78bad3SBarry Smith Input Parameter:
311ce78bad3SBarry Smith . ctrl - a PETSc OMP controller
312ce78bad3SBarry Smith
313ce78bad3SBarry Smith Notes:
314ce78bad3SBarry Smith This is a pthread barrier on MPI ranks. Using `MPI_Barrier()` instead is conceptually correct. But MPI standard does not
315ce78bad3SBarry Smith require processes blocked by `MPI_Barrier()` free their CPUs to let other processes progress. In practice, to minilize latency,
316ce78bad3SBarry Smith MPI ranks stuck in `MPI_Barrier()` keep polling and do not free CPUs. In contrast, pthread_barrier has this requirement.
317ce78bad3SBarry Smith
318ce78bad3SBarry Smith A code using `PetscOmpCtrlBarrier()` would be like this,
319ce78bad3SBarry Smith .vb
320ce78bad3SBarry Smith if (is_omp_master) {
321ce78bad3SBarry Smith PetscOmpCtrlOmpRegionOnMasterBegin(ctrl);
322ce78bad3SBarry Smith Call the library using OpenMP
323ce78bad3SBarry Smith PetscOmpCtrlOmpRegionOnMasterEnd(ctrl);
324ce78bad3SBarry Smith }
325ce78bad3SBarry Smith PetscOmpCtrlBarrier(ctrl);
326ce78bad3SBarry Smith .ve
327ce78bad3SBarry Smith
328ce78bad3SBarry Smith Level: developer
329ce78bad3SBarry Smith
330ce78bad3SBarry Smith .seealso: `PetscOmpCtrlOmpRegionOnMasterBegin()`, `PetscOmpCtrlOmpRegionOnMasterEnd()`, `PetscOmpCtrlCreate()`, `PetscOmpCtrlDestroy()`,
331ce78bad3SBarry Smith @*/
PetscOmpCtrlBarrier(PetscOmpCtrl ctrl)332ce78bad3SBarry Smith PetscErrorCode PetscOmpCtrlBarrier(PetscOmpCtrl ctrl)
333ce78bad3SBarry Smith {
334ce78bad3SBarry Smith int err;
335ce78bad3SBarry Smith
336ce78bad3SBarry Smith PetscFunctionBegin;
337ce78bad3SBarry Smith err = pthread_barrier_wait(ctrl->barrier);
338ce78bad3SBarry Smith PetscCheck(!err || err == PTHREAD_BARRIER_SERIAL_THREAD, PETSC_COMM_SELF, PETSC_ERR_LIB, "pthread_barrier_wait failed within PetscOmpCtrlBarrier with return code %d", err);
339ce78bad3SBarry Smith PetscFunctionReturn(PETSC_SUCCESS);
340ce78bad3SBarry Smith }
341ce78bad3SBarry Smith
342ce78bad3SBarry Smith /*@C
343ce78bad3SBarry Smith PetscOmpCtrlOmpRegionOnMasterBegin - Mark the beginning of an OpenMP library call on master ranks
344ce78bad3SBarry Smith
345ce78bad3SBarry Smith Input Parameter:
346ce78bad3SBarry Smith . ctrl - a PETSc OMP controller
347ce78bad3SBarry Smith
348ce78bad3SBarry Smith Note:
349ce78bad3SBarry Smith Only master ranks can call this function. Call `PetscOmpCtrlGetOmpComms()` to know if this is a master rank.
350ce78bad3SBarry Smith This function changes CPU binding of master ranks and nthreads-var of OpenMP runtime
351ce78bad3SBarry Smith
352ce78bad3SBarry Smith Level: developer
353ce78bad3SBarry Smith
354ce78bad3SBarry Smith .seealso: `PetscOmpCtrlOmpRegionOnMasterEnd()`, `PetscOmpCtrlCreate()`, `PetscOmpCtrlDestroy()`, `PetscOmpCtrlBarrier()`
355ce78bad3SBarry Smith @*/
PetscOmpCtrlOmpRegionOnMasterBegin(PetscOmpCtrl ctrl)356ce78bad3SBarry Smith PetscErrorCode PetscOmpCtrlOmpRegionOnMasterBegin(PetscOmpCtrl ctrl)
357ce78bad3SBarry Smith {
358ce78bad3SBarry Smith PetscFunctionBegin;
359ce78bad3SBarry Smith PetscCallExternal(hwloc_set_cpubind, ctrl->topology, ctrl->omp_cpuset, HWLOC_CPUBIND_PROCESS);
360ce78bad3SBarry Smith omp_set_num_threads(ctrl->omp_comm_size); /* may override the OMP_NUM_THREAD env var */
361ce78bad3SBarry Smith PetscFunctionReturn(PETSC_SUCCESS);
362ce78bad3SBarry Smith }
363ce78bad3SBarry Smith
364ce78bad3SBarry Smith /*@C
365ce78bad3SBarry Smith PetscOmpCtrlOmpRegionOnMasterEnd - Mark the end of an OpenMP library call on master ranks
366ce78bad3SBarry Smith
367ce78bad3SBarry Smith Input Parameter:
368ce78bad3SBarry Smith . ctrl - a PETSc OMP controller
369ce78bad3SBarry Smith
370ce78bad3SBarry Smith Note:
371ce78bad3SBarry Smith Only master ranks can call this function. Call `PetscOmpCtrlGetOmpComms()` to know if this is a master rank.
372ce78bad3SBarry Smith This function restores the CPU binding of master ranks and set and nthreads-var of OpenMP runtime to 1.
373ce78bad3SBarry Smith
374ce78bad3SBarry Smith Level: developer
375ce78bad3SBarry Smith
376ce78bad3SBarry Smith .seealso: `PetscOmpCtrlOmpRegionOnMasterBegin()`, `PetscOmpCtrlCreate()`, `PetscOmpCtrlDestroy()`, `PetscOmpCtrlBarrier()`
377ce78bad3SBarry Smith @*/
PetscOmpCtrlOmpRegionOnMasterEnd(PetscOmpCtrl ctrl)378ce78bad3SBarry Smith PetscErrorCode PetscOmpCtrlOmpRegionOnMasterEnd(PetscOmpCtrl ctrl)
379ce78bad3SBarry Smith {
380ce78bad3SBarry Smith PetscFunctionBegin;
381ce78bad3SBarry Smith PetscCallExternal(hwloc_set_cpubind, ctrl->topology, ctrl->cpuset, HWLOC_CPUBIND_PROCESS);
382ce78bad3SBarry Smith omp_set_num_threads(1);
383ce78bad3SBarry Smith PetscFunctionReturn(PETSC_SUCCESS);
384ce78bad3SBarry Smith }
385ce78bad3SBarry Smith
386ce78bad3SBarry Smith #undef USE_MMAP_ALLOCATE_SHARED_MEMORY
387