1 #include <petscsys.h> /*I "petscsys.h" I*/ 2 #include <petsc/private/petscimpl.h> 3 4 struct _n_PetscShmComm { 5 PetscMPIInt *globranks; /* global ranks of each rank in the shared memory communicator */ 6 PetscMPIInt shmsize; /* size of the shared memory communicator */ 7 MPI_Comm globcomm,shmcomm; /* global communicator and shared memory communicator (a sub-communicator of the former) */ 8 }; 9 10 /* 11 Private routine to delete internal tag/name shared memory communicator when a communicator is freed. 12 13 This is called by MPI, not by users. This is called by MPI_Comm_free() when the communicator that has this data as an attribute is freed. 14 15 Note: this is declared extern "C" because it is passed to MPI_Comm_create_keyval() 16 17 */ 18 PETSC_EXTERN PetscMPIInt MPIAPI Petsc_DelComm_Shm(MPI_Comm comm,PetscMPIInt keyval,void *val,void *extra_state) 19 { 20 PetscErrorCode ierr; 21 PetscShmComm p = (PetscShmComm)val; 22 23 PetscFunctionBegin; 24 ierr = PetscInfo1(NULL,"Deleting shared memory subcommunicator in a MPI_Comm %ld\n",(long)comm);CHKERRMPI(ierr); 25 ierr = MPI_Comm_free(&p->shmcomm);CHKERRMPI(ierr); 26 ierr = PetscFree(p->globranks);CHKERRMPI(ierr); 27 ierr = PetscFree(val);CHKERRMPI(ierr); 28 PetscFunctionReturn(MPI_SUCCESS); 29 } 30 31 /*@C 32 PetscShmCommGet - Given a PETSc communicator returns a communicator of all ranks that share a common memory 33 34 35 Collective. 36 37 Input Parameter: 38 . globcomm - MPI_Comm 39 40 Output Parameter: 41 . pshmcomm - the PETSc shared memory communicator object 42 43 Level: developer 44 45 Notes: 46 This should be called only with an PetscCommDuplicate() communictor 47 48 When used with MPICH, MPICH must be configured with --download-mpich-device=ch3:nemesis 49 50 @*/ 51 PetscErrorCode PetscShmCommGet(MPI_Comm globcomm,PetscShmComm *pshmcomm) 52 { 53 #ifdef PETSC_HAVE_MPI_PROCESS_SHARED_MEMORY 54 PetscErrorCode ierr; 55 MPI_Group globgroup,shmgroup; 56 PetscMPIInt *shmranks,i,flg; 57 PetscCommCounter *counter; 58 59 PetscFunctionBegin; 60 ierr = MPI_Comm_get_attr(globcomm,Petsc_Counter_keyval,&counter,&flg);CHKERRQ(ierr); 61 if (!flg) SETERRQ(globcomm,PETSC_ERR_ARG_CORRUPT,"Bad MPI communicator supplied; must be a PETSc communicator"); 62 63 ierr = MPI_Comm_get_attr(globcomm,Petsc_ShmComm_keyval,pshmcomm,&flg);CHKERRQ(ierr); 64 if (flg) PetscFunctionReturn(0); 65 66 ierr = PetscNew(pshmcomm);CHKERRQ(ierr); 67 (*pshmcomm)->globcomm = globcomm; 68 69 ierr = MPI_Comm_split_type(globcomm, MPI_COMM_TYPE_SHARED,0, MPI_INFO_NULL,&(*pshmcomm)->shmcomm);CHKERRQ(ierr); 70 71 ierr = MPI_Comm_size((*pshmcomm)->shmcomm,&(*pshmcomm)->shmsize);CHKERRQ(ierr); 72 ierr = MPI_Comm_group(globcomm, &globgroup);CHKERRQ(ierr); 73 ierr = MPI_Comm_group((*pshmcomm)->shmcomm, &shmgroup);CHKERRQ(ierr); 74 ierr = PetscMalloc1((*pshmcomm)->shmsize,&shmranks);CHKERRQ(ierr); 75 ierr = PetscMalloc1((*pshmcomm)->shmsize,&(*pshmcomm)->globranks);CHKERRQ(ierr); 76 for (i=0; i<(*pshmcomm)->shmsize; i++) shmranks[i] = i; 77 ierr = MPI_Group_translate_ranks(shmgroup, (*pshmcomm)->shmsize, shmranks, globgroup, (*pshmcomm)->globranks);CHKERRQ(ierr); 78 ierr = PetscFree(shmranks);CHKERRQ(ierr); 79 ierr = MPI_Group_free(&globgroup);CHKERRQ(ierr); 80 ierr = MPI_Group_free(&shmgroup);CHKERRQ(ierr); 81 82 for (i=0; i<(*pshmcomm)->shmsize; i++) { 83 ierr = PetscInfo2(NULL,"Shared memory rank %d global rank %d\n",i,(*pshmcomm)->globranks[i]);CHKERRQ(ierr); 84 } 85 ierr = MPI_Comm_set_attr(globcomm,Petsc_ShmComm_keyval,*pshmcomm);CHKERRQ(ierr); 86 PetscFunctionReturn(0); 87 #else 88 SETERRQ(globcomm, PETSC_ERR_SUP, "Shared memory communicators need MPI-3 package support.\nPlease upgrade your MPI or reconfigure with --download-mpich."); 89 #endif 90 } 91 92 /*@C 93 PetscShmCommGlobalToLocal - Given a global rank returns the local rank in the shared memory communicator 94 95 Input Parameters: 96 + pshmcomm - the shared memory communicator object 97 - grank - the global rank 98 99 Output Parameter: 100 . lrank - the local rank, or MPI_PROC_NULL if it does not exist 101 102 Level: developer 103 104 Developer Notes: 105 Assumes the pshmcomm->globranks[] is sorted 106 107 It may be better to rewrite this to map multiple global ranks to local in the same function call 108 109 @*/ 110 PetscErrorCode PetscShmCommGlobalToLocal(PetscShmComm pshmcomm,PetscMPIInt grank,PetscMPIInt *lrank) 111 { 112 PetscMPIInt low,high,t,i; 113 PetscBool flg = PETSC_FALSE; 114 PetscErrorCode ierr; 115 116 PetscFunctionBegin; 117 *lrank = MPI_PROC_NULL; 118 if (grank < pshmcomm->globranks[0]) PetscFunctionReturn(0); 119 if (grank > pshmcomm->globranks[pshmcomm->shmsize-1]) PetscFunctionReturn(0); 120 ierr = PetscOptionsGetBool(NULL,NULL,"-noshared",&flg,NULL);CHKERRQ(ierr); 121 if (flg) PetscFunctionReturn(0); 122 low = 0; 123 high = pshmcomm->shmsize; 124 while (high-low > 5) { 125 t = (low+high)/2; 126 if (pshmcomm->globranks[t] > grank) high = t; 127 else low = t; 128 } 129 for (i=low; i<high; i++) { 130 if (pshmcomm->globranks[i] > grank) PetscFunctionReturn(0); 131 if (pshmcomm->globranks[i] == grank) { 132 *lrank = i; 133 PetscFunctionReturn(0); 134 } 135 } 136 PetscFunctionReturn(0); 137 } 138 139 /*@C 140 PetscShmCommLocalToGlobal - Given a local rank in the shared memory communicator returns the global rank 141 142 Input Parameters: 143 + pshmcomm - the shared memory communicator object 144 - lrank - the local rank in the shared memory communicator 145 146 Output Parameter: 147 . grank - the global rank in the global communicator where the shared memory communicator is built 148 149 Level: developer 150 151 @*/ 152 PetscErrorCode PetscShmCommLocalToGlobal(PetscShmComm pshmcomm,PetscMPIInt lrank,PetscMPIInt *grank) 153 { 154 PetscFunctionBegin; 155 #ifdef PETSC_USE_DEBUG 156 { 157 PetscErrorCode ierr; 158 if (lrank < 0 || lrank >= pshmcomm->shmsize) { SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"No rank %D in the shared memory communicator",lrank);CHKERRQ(ierr); } 159 } 160 #endif 161 *grank = pshmcomm->globranks[lrank]; 162 PetscFunctionReturn(0); 163 } 164 165 /*@C 166 PetscShmCommGetMpiShmComm - Returns the MPI communicator that represents all processes with common shared memory 167 168 Input Parameter: 169 . pshmcomm - PetscShmComm object obtained with PetscShmCommGet() 170 171 Output Parameter: 172 . comm - the MPI communicator 173 174 Level: developer 175 176 @*/ 177 PetscErrorCode PetscShmCommGetMpiShmComm(PetscShmComm pshmcomm,MPI_Comm *comm) 178 { 179 PetscFunctionBegin; 180 *comm = pshmcomm->shmcomm; 181 PetscFunctionReturn(0); 182 } 183 184 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 185 #include <pthread.h> 186 #include <hwloc.h> 187 #include <omp.h> 188 189 /* Use mmap() to allocate shared mmeory (for the pthread_barrier_t object) if it is available, 190 otherwise use MPI_Win_allocate_shared. They should have the same effect except MPI-3 is much 191 simpler to use. However, on a Cori Haswell node with Cray MPI, MPI-3 worsened a test's performance 192 by 50%. Until the reason is found out, we use mmap() instead. 193 */ 194 #define USE_MMAP_ALLOCATE_SHARED_MEMORY 195 196 #if defined(USE_MMAP_ALLOCATE_SHARED_MEMORY) && defined(PETSC_HAVE_MMAP) 197 #include <sys/mman.h> 198 #include <sys/types.h> 199 #include <sys/stat.h> 200 #include <fcntl.h> 201 #endif 202 203 struct _n_PetscOmpCtrl { 204 MPI_Comm omp_comm; /* a shared memory communicator to spawn omp threads */ 205 MPI_Comm omp_master_comm; /* a communicator to give to third party libraries */ 206 PetscMPIInt omp_comm_size; /* size of omp_comm, a kind of OMP_NUM_THREADS */ 207 PetscBool is_omp_master; /* rank 0's in omp_comm */ 208 MPI_Win omp_win; /* a shared memory window containing a barrier */ 209 pthread_barrier_t *barrier; /* pointer to the barrier */ 210 hwloc_topology_t topology; 211 hwloc_cpuset_t cpuset; /* cpu bindings of omp master */ 212 hwloc_cpuset_t omp_cpuset; /* union of cpu bindings of ranks in omp_comm */ 213 }; 214 215 216 /* Allocate and initialize a pthread_barrier_t object in memory shared by processes in omp_comm 217 contained by the controller. 218 219 PETSc OpenMP controller users do not call this function directly. This function exists 220 only because we want to separate shared memory allocation methods from other code. 221 */ 222 PETSC_STATIC_INLINE PetscErrorCode PetscOmpCtrlCreateBarrier(PetscOmpCtrl ctrl) 223 { 224 PetscErrorCode ierr; 225 MPI_Aint size; 226 void *baseptr; 227 pthread_barrierattr_t attr; 228 229 #if defined(USE_MMAP_ALLOCATE_SHARED_MEMORY) && defined(PETSC_HAVE_MMAP) 230 PetscInt fd; 231 PetscChar pathname[PETSC_MAX_PATH_LEN]; 232 #else 233 PetscMPIInt disp_unit; 234 #endif 235 236 PetscFunctionBegin; 237 #if defined(USE_MMAP_ALLOCATE_SHARED_MEMORY) && defined(PETSC_HAVE_MMAP) 238 size = sizeof(pthread_barrier_t); 239 if (ctrl->is_omp_master) { 240 /* use PETSC_COMM_SELF in PetscGetTmp, since it is a collective call. Using omp_comm would otherwise bcast the partially populated pathname to slaves */ 241 ierr = PetscGetTmp(PETSC_COMM_SELF,pathname,PETSC_MAX_PATH_LEN);CHKERRQ(ierr); 242 ierr = PetscStrlcat(pathname,"/petsc-shm-XXXXXX",PETSC_MAX_PATH_LEN);CHKERRQ(ierr); 243 /* mkstemp replaces XXXXXX with a unique file name and opens the file for us */ 244 fd = mkstemp(pathname); if(fd == -1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Could not create tmp file %s with mkstemp\n", pathname); 245 ierr = ftruncate(fd,size);CHKERRQ(ierr); 246 baseptr = mmap(NULL,size,PROT_READ | PROT_WRITE, MAP_SHARED,fd,0); if (baseptr == MAP_FAILED) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"mmap() failed\n"); 247 ierr = close(fd);CHKERRQ(ierr); 248 ierr = MPI_Bcast(pathname,PETSC_MAX_PATH_LEN,MPI_CHAR,0,ctrl->omp_comm);CHKERRQ(ierr); 249 /* this MPI_Barrier is to wait slaves to open the file before master unlinks it */ 250 ierr = MPI_Barrier(ctrl->omp_comm);CHKERRQ(ierr); 251 ierr = unlink(pathname);CHKERRQ(ierr); 252 } else { 253 ierr = MPI_Bcast(pathname,PETSC_MAX_PATH_LEN,MPI_CHAR,0,ctrl->omp_comm);CHKERRQ(ierr); 254 fd = open(pathname,O_RDWR); if(fd == -1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Could not open tmp file %s\n", pathname); 255 baseptr = mmap(NULL,size,PROT_READ | PROT_WRITE, MAP_SHARED,fd,0); if (baseptr == MAP_FAILED) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"mmap() failed\n"); 256 ierr = close(fd);CHKERRQ(ierr); 257 ierr = MPI_Barrier(ctrl->omp_comm);CHKERRQ(ierr); 258 } 259 #else 260 size = ctrl->is_omp_master ? sizeof(pthread_barrier_t) : 0; 261 ierr = MPI_Win_allocate_shared(size,1,MPI_INFO_NULL,ctrl->omp_comm,&baseptr,&ctrl->omp_win);CHKERRQ(ierr); 262 ierr = MPI_Win_shared_query(ctrl->omp_win,0,&size,&disp_unit,&baseptr);CHKERRQ(ierr); 263 #endif 264 ctrl->barrier = (pthread_barrier_t*)baseptr; 265 266 /* omp master initializes the barrier */ 267 if (ctrl->is_omp_master) { 268 ierr = MPI_Comm_size(ctrl->omp_comm,&ctrl->omp_comm_size);CHKERRQ(ierr); 269 ierr = pthread_barrierattr_init(&attr);CHKERRQ(ierr); 270 ierr = pthread_barrierattr_setpshared(&attr,PTHREAD_PROCESS_SHARED);CHKERRQ(ierr); /* make the barrier also work for processes */ 271 ierr = pthread_barrier_init(ctrl->barrier,&attr,(unsigned int)ctrl->omp_comm_size);CHKERRQ(ierr); 272 ierr = pthread_barrierattr_destroy(&attr);CHKERRQ(ierr); 273 } 274 275 /* this MPI_Barrier is to make sure the omp barrier is initialized before slaves use it */ 276 ierr = MPI_Barrier(ctrl->omp_comm);CHKERRQ(ierr); 277 PetscFunctionReturn(0); 278 } 279 280 /* Destroy the pthread barrier in the PETSc OpenMP controller */ 281 PETSC_STATIC_INLINE PetscErrorCode PetscOmpCtrlDestroyBarrier(PetscOmpCtrl ctrl) 282 { 283 PetscErrorCode ierr; 284 285 PetscFunctionBegin; 286 /* this MPI_Barrier is to make sure slaves have finished using the omp barrier before master destroys it */ 287 ierr = MPI_Barrier(ctrl->omp_comm);CHKERRQ(ierr); 288 if (ctrl->is_omp_master) { ierr = pthread_barrier_destroy(ctrl->barrier);CHKERRQ(ierr); } 289 290 #if defined(USE_MMAP_ALLOCATE_SHARED_MEMORY) && defined(PETSC_HAVE_MMAP) 291 ierr = munmap(ctrl->barrier,sizeof(pthread_barrier_t));CHKERRQ(ierr); 292 #else 293 ierr = MPI_Win_free(&ctrl->omp_win);CHKERRQ(ierr); 294 #endif 295 PetscFunctionReturn(0); 296 } 297 298 /*@C 299 PetscOmpCtrlCreate - create a PETSc OpenMP controller, which manages PETSc's interaction with third party libraries using OpenMP 300 301 Input Parameter: 302 + petsc_comm - a communicator some PETSc object (for example, a matrix) lives in 303 - nthreads - number of threads per MPI rank to spawn in a library using OpenMP. If nthreads = -1, let PETSc decide a suitable value 304 305 Output Parameter: 306 . pctrl - a PETSc OpenMP controller 307 308 Level: developer 309 310 TODO: Possibly use the variable PetscNumOMPThreads to determine the number for threads to use 311 312 .seealso PetscOmpCtrlDestroy() 313 @*/ 314 PetscErrorCode PetscOmpCtrlCreate(MPI_Comm petsc_comm,PetscInt nthreads,PetscOmpCtrl *pctrl) 315 { 316 PetscErrorCode ierr; 317 PetscOmpCtrl ctrl; 318 unsigned long *cpu_ulongs=NULL; 319 PetscInt i,nr_cpu_ulongs; 320 PetscShmComm pshmcomm; 321 MPI_Comm shm_comm; 322 PetscMPIInt shm_rank,shm_comm_size,omp_rank,color; 323 PetscInt num_packages,num_cores; 324 325 PetscFunctionBegin; 326 ierr = PetscNew(&ctrl);CHKERRQ(ierr); 327 328 /*================================================================================= 329 Init hwloc 330 ==================================================================================*/ 331 ierr = hwloc_topology_init(&ctrl->topology);CHKERRQ(ierr); 332 #if HWLOC_API_VERSION >= 0x00020000 333 /* to filter out unneeded info and have faster hwloc_topology_load */ 334 ierr = hwloc_topology_set_all_types_filter(ctrl->topology,HWLOC_TYPE_FILTER_KEEP_NONE);CHKERRQ(ierr); 335 ierr = hwloc_topology_set_type_filter(ctrl->topology,HWLOC_OBJ_CORE,HWLOC_TYPE_FILTER_KEEP_ALL);CHKERRQ(ierr); 336 #endif 337 ierr = hwloc_topology_load(ctrl->topology);CHKERRQ(ierr); 338 339 /*================================================================================= 340 Split petsc_comm into multiple omp_comms. Ranks in an omp_comm have access to 341 physically shared memory. Rank 0 of each omp_comm is called an OMP master, and 342 others are called slaves. OMP Masters make up a new comm called omp_master_comm, 343 which is usually passed to third party libraries. 344 ==================================================================================*/ 345 346 /* fetch the stored shared memory communicator */ 347 ierr = PetscShmCommGet(petsc_comm,&pshmcomm);CHKERRQ(ierr); 348 ierr = PetscShmCommGetMpiShmComm(pshmcomm,&shm_comm);CHKERRQ(ierr); 349 350 ierr = MPI_Comm_rank(shm_comm,&shm_rank);CHKERRQ(ierr); 351 ierr = MPI_Comm_size(shm_comm,&shm_comm_size);CHKERRQ(ierr); 352 353 /* PETSc decides nthreads, which is the smaller of shm_comm_size or cores per package(socket) */ 354 if (nthreads == -1) { 355 num_packages = hwloc_get_nbobjs_by_type(ctrl->topology,HWLOC_OBJ_PACKAGE) <= 0 ? 1 : hwloc_get_nbobjs_by_type(ctrl->topology,HWLOC_OBJ_PACKAGE); 356 num_cores = hwloc_get_nbobjs_by_type(ctrl->topology,HWLOC_OBJ_CORE) <= 0 ? 1 : hwloc_get_nbobjs_by_type(ctrl->topology,HWLOC_OBJ_CORE); 357 nthreads = num_cores/num_packages; 358 if (nthreads > shm_comm_size) nthreads = shm_comm_size; 359 } 360 361 if (nthreads < 1 || nthreads > shm_comm_size) SETERRQ2(petsc_comm,PETSC_ERR_ARG_OUTOFRANGE,"number of OpenMP threads %D can not be < 1 or > the MPI shared memory communicator size %d\n",nthreads,shm_comm_size); 362 if (shm_comm_size % nthreads) { ierr = PetscPrintf(petsc_comm,"Warning: number of OpenMP threads %D is not a factor of the MPI shared memory communicator size %d, which may cause load-imbalance!\n",nthreads,shm_comm_size);CHKERRQ(ierr); } 363 364 /* split shm_comm into a set of omp_comms with each of size nthreads. Ex., if 365 shm_comm_size=16, nthreads=8, then ranks 0~7 get color 0 and ranks 8~15 get 366 color 1. They are put in two omp_comms. Note that petsc_ranks may or may not 367 be consecutive in a shm_comm, but shm_ranks always run from 0 to shm_comm_size-1. 368 Use 0 as key so that rank ordering wont change in new comm. 369 */ 370 color = shm_rank / nthreads; 371 ierr = MPI_Comm_split(shm_comm,color,0/*key*/,&ctrl->omp_comm);CHKERRQ(ierr); 372 373 /* put rank 0's in omp_comms (i.e., master ranks) into a new comm - omp_master_comm */ 374 ierr = MPI_Comm_rank(ctrl->omp_comm,&omp_rank);CHKERRQ(ierr); 375 if (!omp_rank) { 376 ctrl->is_omp_master = PETSC_TRUE; /* master */ 377 color = 0; 378 } else { 379 ctrl->is_omp_master = PETSC_FALSE; /* slave */ 380 color = MPI_UNDEFINED; /* to make slaves get omp_master_comm = MPI_COMM_NULL in MPI_Comm_split */ 381 } 382 ierr = MPI_Comm_split(petsc_comm,color,0/*key*/,&ctrl->omp_master_comm);CHKERRQ(ierr); /* rank 0 in omp_master_comm is rank 0 in petsc_comm */ 383 384 /*================================================================================= 385 Each omp_comm has a pthread_barrier_t in its shared memory, which is used to put 386 slave ranks in sleep and idle their CPU, so that the master can fork OMP threads 387 and run them on the idle CPUs. 388 ==================================================================================*/ 389 ierr = PetscOmpCtrlCreateBarrier(ctrl);CHKERRQ(ierr); 390 391 /*================================================================================= 392 omp master logs its cpu binding (i.e., cpu set) and computes a new binding that 393 is the union of the bindings of all ranks in the omp_comm 394 =================================================================================*/ 395 396 ctrl->cpuset = hwloc_bitmap_alloc(); if (!ctrl->cpuset) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"hwloc_bitmap_alloc() failed\n"); 397 ierr = hwloc_get_cpubind(ctrl->topology,ctrl->cpuset, HWLOC_CPUBIND_PROCESS);CHKERRQ(ierr); 398 399 /* hwloc main developer said they will add new APIs hwloc_bitmap_{nr,to,from}_ulongs in 2.1 to help us simplify the following bitmap pack/unpack code */ 400 nr_cpu_ulongs = (hwloc_bitmap_last(hwloc_topology_get_topology_cpuset (ctrl->topology))+sizeof(unsigned long)*8)/sizeof(unsigned long)/8; 401 ierr = PetscMalloc1(nr_cpu_ulongs,&cpu_ulongs);CHKERRQ(ierr); 402 if (nr_cpu_ulongs == 1) { 403 cpu_ulongs[0] = hwloc_bitmap_to_ulong(ctrl->cpuset); 404 } else { 405 for (i=0; i<nr_cpu_ulongs; i++) cpu_ulongs[i] = hwloc_bitmap_to_ith_ulong(ctrl->cpuset,(unsigned)i); 406 } 407 408 ierr = MPI_Reduce(ctrl->is_omp_master ? MPI_IN_PLACE : cpu_ulongs, cpu_ulongs,nr_cpu_ulongs, MPI_UNSIGNED_LONG,MPI_BOR,0,ctrl->omp_comm);CHKERRQ(ierr); 409 410 if (ctrl->is_omp_master) { 411 ctrl->omp_cpuset = hwloc_bitmap_alloc(); if (!ctrl->omp_cpuset) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"hwloc_bitmap_alloc() failed\n"); 412 if (nr_cpu_ulongs == 1) { 413 #if HWLOC_API_VERSION >= 0x00020000 414 ierr = hwloc_bitmap_from_ulong(ctrl->omp_cpuset,cpu_ulongs[0]);CHKERRQ(ierr); 415 #else 416 hwloc_bitmap_from_ulong(ctrl->omp_cpuset,cpu_ulongs[0]); 417 #endif 418 } else { 419 for (i=0; i<nr_cpu_ulongs; i++) { 420 #if HWLOC_API_VERSION >= 0x00020000 421 ierr = hwloc_bitmap_set_ith_ulong(ctrl->omp_cpuset,(unsigned)i,cpu_ulongs[i]);CHKERRQ(ierr); 422 #else 423 hwloc_bitmap_set_ith_ulong(ctrl->omp_cpuset,(unsigned)i,cpu_ulongs[i]); 424 #endif 425 } 426 } 427 } 428 429 ierr = PetscFree(cpu_ulongs);CHKERRQ(ierr); 430 *pctrl = ctrl; 431 PetscFunctionReturn(0); 432 } 433 434 /*@C 435 PetscOmpCtrlDestroy - destroy the PETSc OpenMP controller 436 437 Input Parameter: 438 . pctrl - a PETSc OpenMP controller 439 440 Level: developer 441 442 .seealso PetscOmpCtrlCreate() 443 @*/ 444 PetscErrorCode PetscOmpCtrlDestroy(PetscOmpCtrl *pctrl) 445 { 446 PetscErrorCode ierr; 447 PetscOmpCtrl ctrl = *pctrl; 448 449 PetscFunctionBegin; 450 hwloc_bitmap_free(ctrl->cpuset); 451 hwloc_topology_destroy(ctrl->topology); 452 PetscOmpCtrlDestroyBarrier(ctrl); 453 ierr = MPI_Comm_free(&ctrl->omp_comm);CHKERRQ(ierr); 454 if (ctrl->is_omp_master) { 455 hwloc_bitmap_free(ctrl->omp_cpuset); 456 ierr = MPI_Comm_free(&ctrl->omp_master_comm);CHKERRQ(ierr); 457 } 458 ierr = PetscFree(ctrl);CHKERRQ(ierr); 459 PetscFunctionReturn(0); 460 } 461 462 /*@C 463 PetscOmpCtrlGetOmpComms - Get MPI communicators from a PETSc OMP controller 464 465 Input Parameter: 466 . ctrl - a PETSc OMP controller 467 468 Output Parameter: 469 + omp_comm - a communicator that includes a master rank and slave ranks where master spawns threads 470 . omp_master_comm - on master ranks, return a communicator that include master ranks of each omp_comm; 471 on slave ranks, MPI_COMM_NULL will be return in reality. 472 - is_omp_master - true if the calling process is an OMP master rank. 473 474 Notes: any output parameter can be NULL. The parameter is just ignored. 475 476 Level: developer 477 @*/ 478 PetscErrorCode PetscOmpCtrlGetOmpComms(PetscOmpCtrl ctrl,MPI_Comm *omp_comm,MPI_Comm *omp_master_comm,PetscBool *is_omp_master) 479 { 480 PetscFunctionBegin; 481 if (omp_comm) *omp_comm = ctrl->omp_comm; 482 if (omp_master_comm) *omp_master_comm = ctrl->omp_master_comm; 483 if (is_omp_master) *is_omp_master = ctrl->is_omp_master; 484 PetscFunctionReturn(0); 485 } 486 487 /*@C 488 PetscOmpCtrlBarrier - Do barrier on MPI ranks in omp_comm contained by the PETSc OMP controller (to let slave ranks free their CPU) 489 490 Input Parameter: 491 . ctrl - a PETSc OMP controller 492 493 Notes: 494 this is a pthread barrier on MPI processes. Using MPI_Barrier instead is conceptually correct. But MPI standard does not 495 require processes blocked by MPI_Barrier free their CPUs to let other processes progress. In practice, to minilize latency, 496 MPI processes stuck in MPI_Barrier keep polling and do not free CPUs. In contrast, pthread_barrier has this requirement. 497 498 A code using PetscOmpCtrlBarrier() would be like this, 499 500 if (is_omp_master) { 501 PetscOmpCtrlOmpRegionOnMasterBegin(ctrl); 502 Call the library using OpenMP 503 PetscOmpCtrlOmpRegionOnMasterEnd(ctrl); 504 } 505 PetscOmpCtrlBarrier(ctrl); 506 507 Level: developer 508 509 .seealso PetscOmpCtrlOmpRegionOnMasterBegin(), PetscOmpCtrlOmpRegionOnMasterEnd() 510 @*/ 511 PetscErrorCode PetscOmpCtrlBarrier(PetscOmpCtrl ctrl) 512 { 513 PetscErrorCode ierr; 514 515 PetscFunctionBegin; 516 ierr = pthread_barrier_wait(ctrl->barrier); 517 if (ierr && ierr != PTHREAD_BARRIER_SERIAL_THREAD) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"pthread_barrier_wait failed within PetscOmpCtrlBarrier with return code %D\n", ierr); 518 PetscFunctionReturn(0); 519 } 520 521 /*@C 522 PetscOmpCtrlOmpRegionOnMasterBegin - Mark the beginning of an OpenMP library call on master ranks 523 524 Input Parameter: 525 . ctrl - a PETSc OMP controller 526 527 Notes: 528 Only master ranks can call this function. Call PetscOmpCtrlGetOmpComms() to know if this is a master rank. 529 This function changes CPU binding of master ranks and nthreads-var of OpenMP runtime 530 531 Level: developer 532 533 .seealso: PetscOmpCtrlOmpRegionOnMasterEnd() 534 @*/ 535 PetscErrorCode PetscOmpCtrlOmpRegionOnMasterBegin(PetscOmpCtrl ctrl) 536 { 537 PetscErrorCode ierr; 538 539 PetscFunctionBegin; 540 ierr = hwloc_set_cpubind(ctrl->topology,ctrl->omp_cpuset,HWLOC_CPUBIND_PROCESS);CHKERRQ(ierr); 541 omp_set_num_threads(ctrl->omp_comm_size); /* may override the OMP_NUM_THREAD env var */ 542 PetscFunctionReturn(0); 543 } 544 545 /*@C 546 PetscOmpCtrlOmpRegionOnMasterEnd - Mark the end of an OpenMP library call on master ranks 547 548 Input Parameter: 549 . ctrl - a PETSc OMP controller 550 551 Notes: 552 Only master ranks can call this function. Call PetscOmpCtrlGetOmpComms() to know if this is a master rank. 553 This function restores the CPU binding of master ranks and set and nthreads-var of OpenMP runtime to 1. 554 555 Level: developer 556 557 .seealso: PetscOmpCtrlOmpRegionOnMasterBegin() 558 @*/ 559 PetscErrorCode PetscOmpCtrlOmpRegionOnMasterEnd(PetscOmpCtrl ctrl) 560 { 561 PetscErrorCode ierr; 562 563 PetscFunctionBegin; 564 ierr = hwloc_set_cpubind(ctrl->topology,ctrl->cpuset,HWLOC_CPUBIND_PROCESS);CHKERRQ(ierr); 565 omp_set_num_threads(1); 566 PetscFunctionReturn(0); 567 } 568 569 #undef USE_MMAP_ALLOCATE_SHARED_MEMORY 570 #endif /* defined(PETSC_HAVE_OPENMP_SUPPORT) */ 571