1 #include <petscsys.h> /*I "petscsys.h" I*/ 2 #include <petsc/private/petscimpl.h> 3 4 struct _n_PetscShmComm { 5 PetscMPIInt *globranks; /* global ranks of each rank in the shared memory communicator */ 6 PetscMPIInt shmsize; /* size of the shared memory communicator */ 7 MPI_Comm globcomm,shmcomm; /* global communicator and shared memory communicator (a sub-communicator of the former) */ 8 }; 9 10 /* 11 Private routine to delete internal shared memory communicator when a communicator is freed. 12 13 This is called by MPI, not by users. This is called by MPI_Comm_free() when the communicator that has this data as an attribute is freed. 14 15 Note: this is declared extern "C" because it is passed to MPI_Comm_create_keyval() 16 17 */ 18 PETSC_EXTERN PetscMPIInt MPIAPI Petsc_ShmComm_Attr_Delete_Fn(MPI_Comm comm,PetscMPIInt keyval,void *val,void *extra_state) 19 { 20 PetscErrorCode ierr; 21 PetscShmComm p = (PetscShmComm)val; 22 23 PetscFunctionBegin; 24 ierr = PetscInfo1(NULL,"Deleting shared memory subcommunicator in a MPI_Comm %ld\n",(long)comm);CHKERRMPI(ierr); 25 ierr = MPI_Comm_free(&p->shmcomm);CHKERRMPI(ierr); 26 ierr = PetscFree(p->globranks);CHKERRMPI(ierr); 27 ierr = PetscFree(val);CHKERRMPI(ierr); 28 PetscFunctionReturn(MPI_SUCCESS); 29 } 30 31 /*@C 32 PetscShmCommGet - Given a PETSc communicator returns a communicator of all ranks that share a common memory 33 34 35 Collective. 36 37 Input Parameter: 38 . globcomm - MPI_Comm 39 40 Output Parameter: 41 . pshmcomm - the PETSc shared memory communicator object 42 43 Level: developer 44 45 Notes: 46 This should be called only with an PetscCommDuplicate() communictor 47 48 When used with MPICH, MPICH must be configured with --download-mpich-device=ch3:nemesis 49 50 @*/ 51 PetscErrorCode PetscShmCommGet(MPI_Comm globcomm,PetscShmComm *pshmcomm) 52 { 53 #ifdef PETSC_HAVE_MPI_PROCESS_SHARED_MEMORY 54 PetscErrorCode ierr; 55 MPI_Group globgroup,shmgroup; 56 PetscMPIInt *shmranks,i,flg; 57 PetscCommCounter *counter; 58 59 PetscFunctionBegin; 60 ierr = MPI_Comm_get_attr(globcomm,Petsc_Counter_keyval,&counter,&flg);CHKERRQ(ierr); 61 if (!flg) SETERRQ(globcomm,PETSC_ERR_ARG_CORRUPT,"Bad MPI communicator supplied; must be a PETSc communicator"); 62 63 ierr = MPI_Comm_get_attr(globcomm,Petsc_ShmComm_keyval,pshmcomm,&flg);CHKERRQ(ierr); 64 if (flg) PetscFunctionReturn(0); 65 66 ierr = PetscNew(pshmcomm);CHKERRQ(ierr); 67 (*pshmcomm)->globcomm = globcomm; 68 69 ierr = MPI_Comm_split_type(globcomm, MPI_COMM_TYPE_SHARED,0, MPI_INFO_NULL,&(*pshmcomm)->shmcomm);CHKERRQ(ierr); 70 71 ierr = MPI_Comm_size((*pshmcomm)->shmcomm,&(*pshmcomm)->shmsize);CHKERRQ(ierr); 72 ierr = MPI_Comm_group(globcomm, &globgroup);CHKERRQ(ierr); 73 ierr = MPI_Comm_group((*pshmcomm)->shmcomm, &shmgroup);CHKERRQ(ierr); 74 ierr = PetscMalloc1((*pshmcomm)->shmsize,&shmranks);CHKERRQ(ierr); 75 ierr = PetscMalloc1((*pshmcomm)->shmsize,&(*pshmcomm)->globranks);CHKERRQ(ierr); 76 for (i=0; i<(*pshmcomm)->shmsize; i++) shmranks[i] = i; 77 ierr = MPI_Group_translate_ranks(shmgroup, (*pshmcomm)->shmsize, shmranks, globgroup, (*pshmcomm)->globranks);CHKERRQ(ierr); 78 ierr = PetscFree(shmranks);CHKERRQ(ierr); 79 ierr = MPI_Group_free(&globgroup);CHKERRQ(ierr); 80 ierr = MPI_Group_free(&shmgroup);CHKERRQ(ierr); 81 82 for (i=0; i<(*pshmcomm)->shmsize; i++) { 83 ierr = PetscInfo2(NULL,"Shared memory rank %d global rank %d\n",i,(*pshmcomm)->globranks[i]);CHKERRQ(ierr); 84 } 85 ierr = MPI_Comm_set_attr(globcomm,Petsc_ShmComm_keyval,*pshmcomm);CHKERRQ(ierr); 86 PetscFunctionReturn(0); 87 #else 88 SETERRQ(globcomm, PETSC_ERR_SUP, "Shared memory communicators need MPI-3 package support.\nPlease upgrade your MPI or reconfigure with --download-mpich."); 89 #endif 90 } 91 92 /*@C 93 PetscShmCommGlobalToLocal - Given a global rank returns the local rank in the shared memory communicator 94 95 Input Parameters: 96 + pshmcomm - the shared memory communicator object 97 - grank - the global rank 98 99 Output Parameter: 100 . lrank - the local rank, or MPI_PROC_NULL if it does not exist 101 102 Level: developer 103 104 Developer Notes: 105 Assumes the pshmcomm->globranks[] is sorted 106 107 It may be better to rewrite this to map multiple global ranks to local in the same function call 108 109 @*/ 110 PetscErrorCode PetscShmCommGlobalToLocal(PetscShmComm pshmcomm,PetscMPIInt grank,PetscMPIInt *lrank) 111 { 112 PetscMPIInt low,high,t,i; 113 PetscBool flg = PETSC_FALSE; 114 PetscErrorCode ierr; 115 116 PetscFunctionBegin; 117 *lrank = MPI_PROC_NULL; 118 if (grank < pshmcomm->globranks[0]) PetscFunctionReturn(0); 119 if (grank > pshmcomm->globranks[pshmcomm->shmsize-1]) PetscFunctionReturn(0); 120 ierr = PetscOptionsGetBool(NULL,NULL,"-noshared",&flg,NULL);CHKERRQ(ierr); 121 if (flg) PetscFunctionReturn(0); 122 low = 0; 123 high = pshmcomm->shmsize; 124 while (high-low > 5) { 125 t = (low+high)/2; 126 if (pshmcomm->globranks[t] > grank) high = t; 127 else low = t; 128 } 129 for (i=low; i<high; i++) { 130 if (pshmcomm->globranks[i] > grank) PetscFunctionReturn(0); 131 if (pshmcomm->globranks[i] == grank) { 132 *lrank = i; 133 PetscFunctionReturn(0); 134 } 135 } 136 PetscFunctionReturn(0); 137 } 138 139 /*@C 140 PetscShmCommLocalToGlobal - Given a local rank in the shared memory communicator returns the global rank 141 142 Input Parameters: 143 + pshmcomm - the shared memory communicator object 144 - lrank - the local rank in the shared memory communicator 145 146 Output Parameter: 147 . grank - the global rank in the global communicator where the shared memory communicator is built 148 149 Level: developer 150 151 @*/ 152 PetscErrorCode PetscShmCommLocalToGlobal(PetscShmComm pshmcomm,PetscMPIInt lrank,PetscMPIInt *grank) 153 { 154 PetscFunctionBegin; 155 if (lrank < 0 || lrank >= pshmcomm->shmsize) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"No rank %D in the shared memory communicator",lrank); 156 *grank = pshmcomm->globranks[lrank]; 157 PetscFunctionReturn(0); 158 } 159 160 /*@C 161 PetscShmCommGetMpiShmComm - Returns the MPI communicator that represents all processes with common shared memory 162 163 Input Parameter: 164 . pshmcomm - PetscShmComm object obtained with PetscShmCommGet() 165 166 Output Parameter: 167 . comm - the MPI communicator 168 169 Level: developer 170 171 @*/ 172 PetscErrorCode PetscShmCommGetMpiShmComm(PetscShmComm pshmcomm,MPI_Comm *comm) 173 { 174 PetscFunctionBegin; 175 *comm = pshmcomm->shmcomm; 176 PetscFunctionReturn(0); 177 } 178 179 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 180 #include <pthread.h> 181 #include <hwloc.h> 182 #include <omp.h> 183 184 /* Use mmap() to allocate shared mmeory (for the pthread_barrier_t object) if it is available, 185 otherwise use MPI_Win_allocate_shared. They should have the same effect except MPI-3 is much 186 simpler to use. However, on a Cori Haswell node with Cray MPI, MPI-3 worsened a test's performance 187 by 50%. Until the reason is found out, we use mmap() instead. 188 */ 189 #define USE_MMAP_ALLOCATE_SHARED_MEMORY 190 191 #if defined(USE_MMAP_ALLOCATE_SHARED_MEMORY) && defined(PETSC_HAVE_MMAP) 192 #include <sys/mman.h> 193 #include <sys/types.h> 194 #include <sys/stat.h> 195 #include <fcntl.h> 196 #endif 197 198 struct _n_PetscOmpCtrl { 199 MPI_Comm omp_comm; /* a shared memory communicator to spawn omp threads */ 200 MPI_Comm omp_master_comm; /* a communicator to give to third party libraries */ 201 PetscMPIInt omp_comm_size; /* size of omp_comm, a kind of OMP_NUM_THREADS */ 202 PetscBool is_omp_master; /* rank 0's in omp_comm */ 203 MPI_Win omp_win; /* a shared memory window containing a barrier */ 204 pthread_barrier_t *barrier; /* pointer to the barrier */ 205 hwloc_topology_t topology; 206 hwloc_cpuset_t cpuset; /* cpu bindings of omp master */ 207 hwloc_cpuset_t omp_cpuset; /* union of cpu bindings of ranks in omp_comm */ 208 }; 209 210 211 /* Allocate and initialize a pthread_barrier_t object in memory shared by processes in omp_comm 212 contained by the controller. 213 214 PETSc OpenMP controller users do not call this function directly. This function exists 215 only because we want to separate shared memory allocation methods from other code. 216 */ 217 PETSC_STATIC_INLINE PetscErrorCode PetscOmpCtrlCreateBarrier(PetscOmpCtrl ctrl) 218 { 219 PetscErrorCode ierr; 220 MPI_Aint size; 221 void *baseptr; 222 pthread_barrierattr_t attr; 223 224 #if defined(USE_MMAP_ALLOCATE_SHARED_MEMORY) && defined(PETSC_HAVE_MMAP) 225 PetscInt fd; 226 PetscChar pathname[PETSC_MAX_PATH_LEN]; 227 #else 228 PetscMPIInt disp_unit; 229 #endif 230 231 PetscFunctionBegin; 232 #if defined(USE_MMAP_ALLOCATE_SHARED_MEMORY) && defined(PETSC_HAVE_MMAP) 233 size = sizeof(pthread_barrier_t); 234 if (ctrl->is_omp_master) { 235 /* use PETSC_COMM_SELF in PetscGetTmp, since it is a collective call. Using omp_comm would otherwise bcast the partially populated pathname to slaves */ 236 ierr = PetscGetTmp(PETSC_COMM_SELF,pathname,PETSC_MAX_PATH_LEN);CHKERRQ(ierr); 237 ierr = PetscStrlcat(pathname,"/petsc-shm-XXXXXX",PETSC_MAX_PATH_LEN);CHKERRQ(ierr); 238 /* mkstemp replaces XXXXXX with a unique file name and opens the file for us */ 239 fd = mkstemp(pathname); if(fd == -1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Could not create tmp file %s with mkstemp\n", pathname); 240 ierr = ftruncate(fd,size);CHKERRQ(ierr); 241 baseptr = mmap(NULL,size,PROT_READ | PROT_WRITE, MAP_SHARED,fd,0); if (baseptr == MAP_FAILED) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"mmap() failed\n"); 242 ierr = close(fd);CHKERRQ(ierr); 243 ierr = MPI_Bcast(pathname,PETSC_MAX_PATH_LEN,MPI_CHAR,0,ctrl->omp_comm);CHKERRQ(ierr); 244 /* this MPI_Barrier is to wait slaves to open the file before master unlinks it */ 245 ierr = MPI_Barrier(ctrl->omp_comm);CHKERRQ(ierr); 246 ierr = unlink(pathname);CHKERRQ(ierr); 247 } else { 248 ierr = MPI_Bcast(pathname,PETSC_MAX_PATH_LEN,MPI_CHAR,0,ctrl->omp_comm);CHKERRQ(ierr); 249 fd = open(pathname,O_RDWR); if(fd == -1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Could not open tmp file %s\n", pathname); 250 baseptr = mmap(NULL,size,PROT_READ | PROT_WRITE, MAP_SHARED,fd,0); if (baseptr == MAP_FAILED) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"mmap() failed\n"); 251 ierr = close(fd);CHKERRQ(ierr); 252 ierr = MPI_Barrier(ctrl->omp_comm);CHKERRQ(ierr); 253 } 254 #else 255 size = ctrl->is_omp_master ? sizeof(pthread_barrier_t) : 0; 256 ierr = MPI_Win_allocate_shared(size,1,MPI_INFO_NULL,ctrl->omp_comm,&baseptr,&ctrl->omp_win);CHKERRQ(ierr); 257 ierr = MPI_Win_shared_query(ctrl->omp_win,0,&size,&disp_unit,&baseptr);CHKERRQ(ierr); 258 #endif 259 ctrl->barrier = (pthread_barrier_t*)baseptr; 260 261 /* omp master initializes the barrier */ 262 if (ctrl->is_omp_master) { 263 ierr = MPI_Comm_size(ctrl->omp_comm,&ctrl->omp_comm_size);CHKERRQ(ierr); 264 ierr = pthread_barrierattr_init(&attr);CHKERRQ(ierr); 265 ierr = pthread_barrierattr_setpshared(&attr,PTHREAD_PROCESS_SHARED);CHKERRQ(ierr); /* make the barrier also work for processes */ 266 ierr = pthread_barrier_init(ctrl->barrier,&attr,(unsigned int)ctrl->omp_comm_size);CHKERRQ(ierr); 267 ierr = pthread_barrierattr_destroy(&attr);CHKERRQ(ierr); 268 } 269 270 /* this MPI_Barrier is to make sure the omp barrier is initialized before slaves use it */ 271 ierr = MPI_Barrier(ctrl->omp_comm);CHKERRQ(ierr); 272 PetscFunctionReturn(0); 273 } 274 275 /* Destroy the pthread barrier in the PETSc OpenMP controller */ 276 PETSC_STATIC_INLINE PetscErrorCode PetscOmpCtrlDestroyBarrier(PetscOmpCtrl ctrl) 277 { 278 PetscErrorCode ierr; 279 280 PetscFunctionBegin; 281 /* this MPI_Barrier is to make sure slaves have finished using the omp barrier before master destroys it */ 282 ierr = MPI_Barrier(ctrl->omp_comm);CHKERRQ(ierr); 283 if (ctrl->is_omp_master) { ierr = pthread_barrier_destroy(ctrl->barrier);CHKERRQ(ierr); } 284 285 #if defined(USE_MMAP_ALLOCATE_SHARED_MEMORY) && defined(PETSC_HAVE_MMAP) 286 ierr = munmap(ctrl->barrier,sizeof(pthread_barrier_t));CHKERRQ(ierr); 287 #else 288 ierr = MPI_Win_free(&ctrl->omp_win);CHKERRQ(ierr); 289 #endif 290 PetscFunctionReturn(0); 291 } 292 293 /*@C 294 PetscOmpCtrlCreate - create a PETSc OpenMP controller, which manages PETSc's interaction with third party libraries using OpenMP 295 296 Input Parameter: 297 + petsc_comm - a communicator some PETSc object (for example, a matrix) lives in 298 - nthreads - number of threads per MPI rank to spawn in a library using OpenMP. If nthreads = -1, let PETSc decide a suitable value 299 300 Output Parameter: 301 . pctrl - a PETSc OpenMP controller 302 303 Level: developer 304 305 TODO: Possibly use the variable PetscNumOMPThreads to determine the number for threads to use 306 307 .seealso PetscOmpCtrlDestroy() 308 @*/ 309 PetscErrorCode PetscOmpCtrlCreate(MPI_Comm petsc_comm,PetscInt nthreads,PetscOmpCtrl *pctrl) 310 { 311 PetscErrorCode ierr; 312 PetscOmpCtrl ctrl; 313 unsigned long *cpu_ulongs=NULL; 314 PetscInt i,nr_cpu_ulongs; 315 PetscShmComm pshmcomm; 316 MPI_Comm shm_comm; 317 PetscMPIInt shm_rank,shm_comm_size,omp_rank,color; 318 PetscInt num_packages,num_cores; 319 320 PetscFunctionBegin; 321 ierr = PetscNew(&ctrl);CHKERRQ(ierr); 322 323 /*================================================================================= 324 Init hwloc 325 ==================================================================================*/ 326 ierr = hwloc_topology_init(&ctrl->topology);CHKERRQ(ierr); 327 #if HWLOC_API_VERSION >= 0x00020000 328 /* to filter out unneeded info and have faster hwloc_topology_load */ 329 ierr = hwloc_topology_set_all_types_filter(ctrl->topology,HWLOC_TYPE_FILTER_KEEP_NONE);CHKERRQ(ierr); 330 ierr = hwloc_topology_set_type_filter(ctrl->topology,HWLOC_OBJ_CORE,HWLOC_TYPE_FILTER_KEEP_ALL);CHKERRQ(ierr); 331 #endif 332 ierr = hwloc_topology_load(ctrl->topology);CHKERRQ(ierr); 333 334 /*================================================================================= 335 Split petsc_comm into multiple omp_comms. Ranks in an omp_comm have access to 336 physically shared memory. Rank 0 of each omp_comm is called an OMP master, and 337 others are called slaves. OMP Masters make up a new comm called omp_master_comm, 338 which is usually passed to third party libraries. 339 ==================================================================================*/ 340 341 /* fetch the stored shared memory communicator */ 342 ierr = PetscShmCommGet(petsc_comm,&pshmcomm);CHKERRQ(ierr); 343 ierr = PetscShmCommGetMpiShmComm(pshmcomm,&shm_comm);CHKERRQ(ierr); 344 345 ierr = MPI_Comm_rank(shm_comm,&shm_rank);CHKERRQ(ierr); 346 ierr = MPI_Comm_size(shm_comm,&shm_comm_size);CHKERRQ(ierr); 347 348 /* PETSc decides nthreads, which is the smaller of shm_comm_size or cores per package(socket) */ 349 if (nthreads == -1) { 350 num_packages = hwloc_get_nbobjs_by_type(ctrl->topology,HWLOC_OBJ_PACKAGE) <= 0 ? 1 : hwloc_get_nbobjs_by_type(ctrl->topology,HWLOC_OBJ_PACKAGE); 351 num_cores = hwloc_get_nbobjs_by_type(ctrl->topology,HWLOC_OBJ_CORE) <= 0 ? 1 : hwloc_get_nbobjs_by_type(ctrl->topology,HWLOC_OBJ_CORE); 352 nthreads = num_cores/num_packages; 353 if (nthreads > shm_comm_size) nthreads = shm_comm_size; 354 } 355 356 if (nthreads < 1 || nthreads > shm_comm_size) SETERRQ2(petsc_comm,PETSC_ERR_ARG_OUTOFRANGE,"number of OpenMP threads %D can not be < 1 or > the MPI shared memory communicator size %d\n",nthreads,shm_comm_size); 357 if (shm_comm_size % nthreads) { ierr = PetscPrintf(petsc_comm,"Warning: number of OpenMP threads %D is not a factor of the MPI shared memory communicator size %d, which may cause load-imbalance!\n",nthreads,shm_comm_size);CHKERRQ(ierr); } 358 359 /* split shm_comm into a set of omp_comms with each of size nthreads. Ex., if 360 shm_comm_size=16, nthreads=8, then ranks 0~7 get color 0 and ranks 8~15 get 361 color 1. They are put in two omp_comms. Note that petsc_ranks may or may not 362 be consecutive in a shm_comm, but shm_ranks always run from 0 to shm_comm_size-1. 363 Use 0 as key so that rank ordering wont change in new comm. 364 */ 365 color = shm_rank / nthreads; 366 ierr = MPI_Comm_split(shm_comm,color,0/*key*/,&ctrl->omp_comm);CHKERRQ(ierr); 367 368 /* put rank 0's in omp_comms (i.e., master ranks) into a new comm - omp_master_comm */ 369 ierr = MPI_Comm_rank(ctrl->omp_comm,&omp_rank);CHKERRQ(ierr); 370 if (!omp_rank) { 371 ctrl->is_omp_master = PETSC_TRUE; /* master */ 372 color = 0; 373 } else { 374 ctrl->is_omp_master = PETSC_FALSE; /* slave */ 375 color = MPI_UNDEFINED; /* to make slaves get omp_master_comm = MPI_COMM_NULL in MPI_Comm_split */ 376 } 377 ierr = MPI_Comm_split(petsc_comm,color,0/*key*/,&ctrl->omp_master_comm);CHKERRQ(ierr); /* rank 0 in omp_master_comm is rank 0 in petsc_comm */ 378 379 /*================================================================================= 380 Each omp_comm has a pthread_barrier_t in its shared memory, which is used to put 381 slave ranks in sleep and idle their CPU, so that the master can fork OMP threads 382 and run them on the idle CPUs. 383 ==================================================================================*/ 384 ierr = PetscOmpCtrlCreateBarrier(ctrl);CHKERRQ(ierr); 385 386 /*================================================================================= 387 omp master logs its cpu binding (i.e., cpu set) and computes a new binding that 388 is the union of the bindings of all ranks in the omp_comm 389 =================================================================================*/ 390 391 ctrl->cpuset = hwloc_bitmap_alloc(); if (!ctrl->cpuset) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"hwloc_bitmap_alloc() failed\n"); 392 ierr = hwloc_get_cpubind(ctrl->topology,ctrl->cpuset, HWLOC_CPUBIND_PROCESS);CHKERRQ(ierr); 393 394 /* hwloc main developer said they will add new APIs hwloc_bitmap_{nr,to,from}_ulongs in 2.1 to help us simplify the following bitmap pack/unpack code */ 395 nr_cpu_ulongs = (hwloc_bitmap_last(hwloc_topology_get_topology_cpuset (ctrl->topology))+sizeof(unsigned long)*8)/sizeof(unsigned long)/8; 396 ierr = PetscMalloc1(nr_cpu_ulongs,&cpu_ulongs);CHKERRQ(ierr); 397 if (nr_cpu_ulongs == 1) { 398 cpu_ulongs[0] = hwloc_bitmap_to_ulong(ctrl->cpuset); 399 } else { 400 for (i=0; i<nr_cpu_ulongs; i++) cpu_ulongs[i] = hwloc_bitmap_to_ith_ulong(ctrl->cpuset,(unsigned)i); 401 } 402 403 ierr = MPI_Reduce(ctrl->is_omp_master ? MPI_IN_PLACE : cpu_ulongs, cpu_ulongs,nr_cpu_ulongs, MPI_UNSIGNED_LONG,MPI_BOR,0,ctrl->omp_comm);CHKERRQ(ierr); 404 405 if (ctrl->is_omp_master) { 406 ctrl->omp_cpuset = hwloc_bitmap_alloc(); if (!ctrl->omp_cpuset) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"hwloc_bitmap_alloc() failed\n"); 407 if (nr_cpu_ulongs == 1) { 408 #if HWLOC_API_VERSION >= 0x00020000 409 ierr = hwloc_bitmap_from_ulong(ctrl->omp_cpuset,cpu_ulongs[0]);CHKERRQ(ierr); 410 #else 411 hwloc_bitmap_from_ulong(ctrl->omp_cpuset,cpu_ulongs[0]); 412 #endif 413 } else { 414 for (i=0; i<nr_cpu_ulongs; i++) { 415 #if HWLOC_API_VERSION >= 0x00020000 416 ierr = hwloc_bitmap_set_ith_ulong(ctrl->omp_cpuset,(unsigned)i,cpu_ulongs[i]);CHKERRQ(ierr); 417 #else 418 hwloc_bitmap_set_ith_ulong(ctrl->omp_cpuset,(unsigned)i,cpu_ulongs[i]); 419 #endif 420 } 421 } 422 } 423 424 ierr = PetscFree(cpu_ulongs);CHKERRQ(ierr); 425 *pctrl = ctrl; 426 PetscFunctionReturn(0); 427 } 428 429 /*@C 430 PetscOmpCtrlDestroy - destroy the PETSc OpenMP controller 431 432 Input Parameter: 433 . pctrl - a PETSc OpenMP controller 434 435 Level: developer 436 437 .seealso PetscOmpCtrlCreate() 438 @*/ 439 PetscErrorCode PetscOmpCtrlDestroy(PetscOmpCtrl *pctrl) 440 { 441 PetscErrorCode ierr; 442 PetscOmpCtrl ctrl = *pctrl; 443 444 PetscFunctionBegin; 445 hwloc_bitmap_free(ctrl->cpuset); 446 hwloc_topology_destroy(ctrl->topology); 447 PetscOmpCtrlDestroyBarrier(ctrl); 448 ierr = MPI_Comm_free(&ctrl->omp_comm);CHKERRQ(ierr); 449 if (ctrl->is_omp_master) { 450 hwloc_bitmap_free(ctrl->omp_cpuset); 451 ierr = MPI_Comm_free(&ctrl->omp_master_comm);CHKERRQ(ierr); 452 } 453 ierr = PetscFree(ctrl);CHKERRQ(ierr); 454 PetscFunctionReturn(0); 455 } 456 457 /*@C 458 PetscOmpCtrlGetOmpComms - Get MPI communicators from a PETSc OMP controller 459 460 Input Parameter: 461 . ctrl - a PETSc OMP controller 462 463 Output Parameter: 464 + omp_comm - a communicator that includes a master rank and slave ranks where master spawns threads 465 . omp_master_comm - on master ranks, return a communicator that include master ranks of each omp_comm; 466 on slave ranks, MPI_COMM_NULL will be return in reality. 467 - is_omp_master - true if the calling process is an OMP master rank. 468 469 Notes: any output parameter can be NULL. The parameter is just ignored. 470 471 Level: developer 472 @*/ 473 PetscErrorCode PetscOmpCtrlGetOmpComms(PetscOmpCtrl ctrl,MPI_Comm *omp_comm,MPI_Comm *omp_master_comm,PetscBool *is_omp_master) 474 { 475 PetscFunctionBegin; 476 if (omp_comm) *omp_comm = ctrl->omp_comm; 477 if (omp_master_comm) *omp_master_comm = ctrl->omp_master_comm; 478 if (is_omp_master) *is_omp_master = ctrl->is_omp_master; 479 PetscFunctionReturn(0); 480 } 481 482 /*@C 483 PetscOmpCtrlBarrier - Do barrier on MPI ranks in omp_comm contained by the PETSc OMP controller (to let slave ranks free their CPU) 484 485 Input Parameter: 486 . ctrl - a PETSc OMP controller 487 488 Notes: 489 this is a pthread barrier on MPI processes. Using MPI_Barrier instead is conceptually correct. But MPI standard does not 490 require processes blocked by MPI_Barrier free their CPUs to let other processes progress. In practice, to minilize latency, 491 MPI processes stuck in MPI_Barrier keep polling and do not free CPUs. In contrast, pthread_barrier has this requirement. 492 493 A code using PetscOmpCtrlBarrier() would be like this, 494 495 if (is_omp_master) { 496 PetscOmpCtrlOmpRegionOnMasterBegin(ctrl); 497 Call the library using OpenMP 498 PetscOmpCtrlOmpRegionOnMasterEnd(ctrl); 499 } 500 PetscOmpCtrlBarrier(ctrl); 501 502 Level: developer 503 504 .seealso PetscOmpCtrlOmpRegionOnMasterBegin(), PetscOmpCtrlOmpRegionOnMasterEnd() 505 @*/ 506 PetscErrorCode PetscOmpCtrlBarrier(PetscOmpCtrl ctrl) 507 { 508 PetscErrorCode ierr; 509 510 PetscFunctionBegin; 511 ierr = pthread_barrier_wait(ctrl->barrier); 512 if (ierr && ierr != PTHREAD_BARRIER_SERIAL_THREAD) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"pthread_barrier_wait failed within PetscOmpCtrlBarrier with return code %D\n", ierr); 513 PetscFunctionReturn(0); 514 } 515 516 /*@C 517 PetscOmpCtrlOmpRegionOnMasterBegin - Mark the beginning of an OpenMP library call on master ranks 518 519 Input Parameter: 520 . ctrl - a PETSc OMP controller 521 522 Notes: 523 Only master ranks can call this function. Call PetscOmpCtrlGetOmpComms() to know if this is a master rank. 524 This function changes CPU binding of master ranks and nthreads-var of OpenMP runtime 525 526 Level: developer 527 528 .seealso: PetscOmpCtrlOmpRegionOnMasterEnd() 529 @*/ 530 PetscErrorCode PetscOmpCtrlOmpRegionOnMasterBegin(PetscOmpCtrl ctrl) 531 { 532 PetscErrorCode ierr; 533 534 PetscFunctionBegin; 535 ierr = hwloc_set_cpubind(ctrl->topology,ctrl->omp_cpuset,HWLOC_CPUBIND_PROCESS);CHKERRQ(ierr); 536 omp_set_num_threads(ctrl->omp_comm_size); /* may override the OMP_NUM_THREAD env var */ 537 PetscFunctionReturn(0); 538 } 539 540 /*@C 541 PetscOmpCtrlOmpRegionOnMasterEnd - Mark the end of an OpenMP library call on master ranks 542 543 Input Parameter: 544 . ctrl - a PETSc OMP controller 545 546 Notes: 547 Only master ranks can call this function. Call PetscOmpCtrlGetOmpComms() to know if this is a master rank. 548 This function restores the CPU binding of master ranks and set and nthreads-var of OpenMP runtime to 1. 549 550 Level: developer 551 552 .seealso: PetscOmpCtrlOmpRegionOnMasterBegin() 553 @*/ 554 PetscErrorCode PetscOmpCtrlOmpRegionOnMasterEnd(PetscOmpCtrl ctrl) 555 { 556 PetscErrorCode ierr; 557 558 PetscFunctionBegin; 559 ierr = hwloc_set_cpubind(ctrl->topology,ctrl->cpuset,HWLOC_CPUBIND_PROCESS);CHKERRQ(ierr); 560 omp_set_num_threads(1); 561 PetscFunctionReturn(0); 562 } 563 564 #undef USE_MMAP_ALLOCATE_SHARED_MEMORY 565 #endif /* defined(PETSC_HAVE_OPENMP_SUPPORT) */ 566