1 #include <petscsys.h> /*I "petscsys.h" I*/ 2 #include <petsc/private/petscimpl.h> 3 4 struct _n_PetscShmComm { 5 PetscMPIInt *globranks; /* global ranks of each rank in the shared memory communicator */ 6 PetscMPIInt shmsize; /* size of the shared memory communicator */ 7 MPI_Comm globcomm,shmcomm; /* global communicator and shared memory communicator (a sub-communicator of the former) */ 8 }; 9 10 /* 11 Private routine to delete internal shared memory communicator when a communicator is freed. 12 13 This is called by MPI, not by users. This is called by MPI_Comm_free() when the communicator that has this data as an attribute is freed. 14 15 Note: this is declared extern "C" because it is passed to MPI_Comm_create_keyval() 16 17 */ 18 PETSC_EXTERN PetscMPIInt MPIAPI Petsc_ShmComm_Attr_Delete_Fn(MPI_Comm comm,PetscMPIInt keyval,void *val,void *extra_state) 19 { 20 PetscErrorCode ierr; 21 PetscShmComm p = (PetscShmComm)val; 22 23 PetscFunctionBegin; 24 ierr = PetscInfo1(NULL,"Deleting shared memory subcommunicator in a MPI_Comm %ld\n",(long)comm);CHKERRMPI(ierr); 25 ierr = MPI_Comm_free(&p->shmcomm);CHKERRMPI(ierr); 26 ierr = PetscFree(p->globranks);CHKERRMPI(ierr); 27 ierr = PetscFree(val);CHKERRMPI(ierr); 28 PetscFunctionReturn(MPI_SUCCESS); 29 } 30 31 /*@C 32 PetscShmCommGet - Given a PETSc communicator returns a communicator of all ranks that share a common memory 33 34 Collective. 35 36 Input Parameter: 37 . globcomm - MPI_Comm 38 39 Output Parameter: 40 . pshmcomm - the PETSc shared memory communicator object 41 42 Level: developer 43 44 Notes: 45 This should be called only with an PetscCommDuplicate() communictor 46 47 When used with MPICH, MPICH must be configured with --download-mpich-device=ch3:nemesis 48 49 @*/ 50 PetscErrorCode PetscShmCommGet(MPI_Comm globcomm,PetscShmComm *pshmcomm) 51 { 52 #ifdef PETSC_HAVE_MPI_PROCESS_SHARED_MEMORY 53 PetscErrorCode ierr; 54 MPI_Group globgroup,shmgroup; 55 PetscMPIInt *shmranks,i,flg; 56 PetscCommCounter *counter; 57 58 PetscFunctionBegin; 59 ierr = MPI_Comm_get_attr(globcomm,Petsc_Counter_keyval,&counter,&flg);CHKERRMPI(ierr); 60 if (!flg) SETERRQ(globcomm,PETSC_ERR_ARG_CORRUPT,"Bad MPI communicator supplied; must be a PETSc communicator"); 61 62 ierr = MPI_Comm_get_attr(globcomm,Petsc_ShmComm_keyval,pshmcomm,&flg);CHKERRMPI(ierr); 63 if (flg) PetscFunctionReturn(0); 64 65 ierr = PetscNew(pshmcomm);CHKERRQ(ierr); 66 (*pshmcomm)->globcomm = globcomm; 67 68 ierr = MPI_Comm_split_type(globcomm, MPI_COMM_TYPE_SHARED,0, MPI_INFO_NULL,&(*pshmcomm)->shmcomm);CHKERRMPI(ierr); 69 70 ierr = MPI_Comm_size((*pshmcomm)->shmcomm,&(*pshmcomm)->shmsize);CHKERRMPI(ierr); 71 ierr = MPI_Comm_group(globcomm, &globgroup);CHKERRMPI(ierr); 72 ierr = MPI_Comm_group((*pshmcomm)->shmcomm, &shmgroup);CHKERRMPI(ierr); 73 ierr = PetscMalloc1((*pshmcomm)->shmsize,&shmranks);CHKERRQ(ierr); 74 ierr = PetscMalloc1((*pshmcomm)->shmsize,&(*pshmcomm)->globranks);CHKERRQ(ierr); 75 for (i=0; i<(*pshmcomm)->shmsize; i++) shmranks[i] = i; 76 ierr = MPI_Group_translate_ranks(shmgroup, (*pshmcomm)->shmsize, shmranks, globgroup, (*pshmcomm)->globranks);CHKERRMPI(ierr); 77 ierr = PetscFree(shmranks);CHKERRQ(ierr); 78 ierr = MPI_Group_free(&globgroup);CHKERRMPI(ierr); 79 ierr = MPI_Group_free(&shmgroup);CHKERRMPI(ierr); 80 81 for (i=0; i<(*pshmcomm)->shmsize; i++) { 82 ierr = PetscInfo2(NULL,"Shared memory rank %d global rank %d\n",i,(*pshmcomm)->globranks[i]);CHKERRQ(ierr); 83 } 84 ierr = MPI_Comm_set_attr(globcomm,Petsc_ShmComm_keyval,*pshmcomm);CHKERRMPI(ierr); 85 PetscFunctionReturn(0); 86 #else 87 SETERRQ(globcomm, PETSC_ERR_SUP, "Shared memory communicators need MPI-3 package support.\nPlease upgrade your MPI or reconfigure with --download-mpich."); 88 #endif 89 } 90 91 /*@C 92 PetscShmCommGlobalToLocal - Given a global rank returns the local rank in the shared memory communicator 93 94 Input Parameters: 95 + pshmcomm - the shared memory communicator object 96 - grank - the global rank 97 98 Output Parameter: 99 . lrank - the local rank, or MPI_PROC_NULL if it does not exist 100 101 Level: developer 102 103 Developer Notes: 104 Assumes the pshmcomm->globranks[] is sorted 105 106 It may be better to rewrite this to map multiple global ranks to local in the same function call 107 108 @*/ 109 PetscErrorCode PetscShmCommGlobalToLocal(PetscShmComm pshmcomm,PetscMPIInt grank,PetscMPIInt *lrank) 110 { 111 PetscMPIInt low,high,t,i; 112 PetscBool flg = PETSC_FALSE; 113 PetscErrorCode ierr; 114 115 PetscFunctionBegin; 116 *lrank = MPI_PROC_NULL; 117 if (grank < pshmcomm->globranks[0]) PetscFunctionReturn(0); 118 if (grank > pshmcomm->globranks[pshmcomm->shmsize-1]) PetscFunctionReturn(0); 119 ierr = PetscOptionsGetBool(NULL,NULL,"-noshared",&flg,NULL);CHKERRQ(ierr); 120 if (flg) PetscFunctionReturn(0); 121 low = 0; 122 high = pshmcomm->shmsize; 123 while (high-low > 5) { 124 t = (low+high)/2; 125 if (pshmcomm->globranks[t] > grank) high = t; 126 else low = t; 127 } 128 for (i=low; i<high; i++) { 129 if (pshmcomm->globranks[i] > grank) PetscFunctionReturn(0); 130 if (pshmcomm->globranks[i] == grank) { 131 *lrank = i; 132 PetscFunctionReturn(0); 133 } 134 } 135 PetscFunctionReturn(0); 136 } 137 138 /*@C 139 PetscShmCommLocalToGlobal - Given a local rank in the shared memory communicator returns the global rank 140 141 Input Parameters: 142 + pshmcomm - the shared memory communicator object 143 - lrank - the local rank in the shared memory communicator 144 145 Output Parameter: 146 . grank - the global rank in the global communicator where the shared memory communicator is built 147 148 Level: developer 149 150 @*/ 151 PetscErrorCode PetscShmCommLocalToGlobal(PetscShmComm pshmcomm,PetscMPIInt lrank,PetscMPIInt *grank) 152 { 153 PetscFunctionBegin; 154 if (lrank < 0 || lrank >= pshmcomm->shmsize) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"No rank %D in the shared memory communicator",lrank); 155 *grank = pshmcomm->globranks[lrank]; 156 PetscFunctionReturn(0); 157 } 158 159 /*@C 160 PetscShmCommGetMpiShmComm - Returns the MPI communicator that represents all processes with common shared memory 161 162 Input Parameter: 163 . pshmcomm - PetscShmComm object obtained with PetscShmCommGet() 164 165 Output Parameter: 166 . comm - the MPI communicator 167 168 Level: developer 169 170 @*/ 171 PetscErrorCode PetscShmCommGetMpiShmComm(PetscShmComm pshmcomm,MPI_Comm *comm) 172 { 173 PetscFunctionBegin; 174 *comm = pshmcomm->shmcomm; 175 PetscFunctionReturn(0); 176 } 177 178 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 179 #include <pthread.h> 180 #include <hwloc.h> 181 #include <omp.h> 182 183 /* Use mmap() to allocate shared mmeory (for the pthread_barrier_t object) if it is available, 184 otherwise use MPI_Win_allocate_shared. They should have the same effect except MPI-3 is much 185 simpler to use. However, on a Cori Haswell node with Cray MPI, MPI-3 worsened a test's performance 186 by 50%. Until the reason is found out, we use mmap() instead. 187 */ 188 #define USE_MMAP_ALLOCATE_SHARED_MEMORY 189 190 #if defined(USE_MMAP_ALLOCATE_SHARED_MEMORY) && defined(PETSC_HAVE_MMAP) 191 #include <sys/mman.h> 192 #include <sys/types.h> 193 #include <sys/stat.h> 194 #include <fcntl.h> 195 #endif 196 197 struct _n_PetscOmpCtrl { 198 MPI_Comm omp_comm; /* a shared memory communicator to spawn omp threads */ 199 MPI_Comm omp_master_comm; /* a communicator to give to third party libraries */ 200 PetscMPIInt omp_comm_size; /* size of omp_comm, a kind of OMP_NUM_THREADS */ 201 PetscBool is_omp_master; /* rank 0's in omp_comm */ 202 MPI_Win omp_win; /* a shared memory window containing a barrier */ 203 pthread_barrier_t *barrier; /* pointer to the barrier */ 204 hwloc_topology_t topology; 205 hwloc_cpuset_t cpuset; /* cpu bindings of omp master */ 206 hwloc_cpuset_t omp_cpuset; /* union of cpu bindings of ranks in omp_comm */ 207 }; 208 209 /* Allocate and initialize a pthread_barrier_t object in memory shared by processes in omp_comm 210 contained by the controller. 211 212 PETSc OpenMP controller users do not call this function directly. This function exists 213 only because we want to separate shared memory allocation methods from other code. 214 */ 215 PETSC_STATIC_INLINE PetscErrorCode PetscOmpCtrlCreateBarrier(PetscOmpCtrl ctrl) 216 { 217 PetscErrorCode ierr; 218 MPI_Aint size; 219 void *baseptr; 220 pthread_barrierattr_t attr; 221 222 #if defined(USE_MMAP_ALLOCATE_SHARED_MEMORY) && defined(PETSC_HAVE_MMAP) 223 PetscInt fd; 224 PetscChar pathname[PETSC_MAX_PATH_LEN]; 225 #else 226 PetscMPIInt disp_unit; 227 #endif 228 229 PetscFunctionBegin; 230 #if defined(USE_MMAP_ALLOCATE_SHARED_MEMORY) && defined(PETSC_HAVE_MMAP) 231 size = sizeof(pthread_barrier_t); 232 if (ctrl->is_omp_master) { 233 /* use PETSC_COMM_SELF in PetscGetTmp, since it is a collective call. Using omp_comm would otherwise bcast the partially populated pathname to slaves */ 234 ierr = PetscGetTmp(PETSC_COMM_SELF,pathname,PETSC_MAX_PATH_LEN);CHKERRQ(ierr); 235 ierr = PetscStrlcat(pathname,"/petsc-shm-XXXXXX",PETSC_MAX_PATH_LEN);CHKERRQ(ierr); 236 /* mkstemp replaces XXXXXX with a unique file name and opens the file for us */ 237 fd = mkstemp(pathname); if (fd == -1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Could not create tmp file %s with mkstemp\n", pathname); 238 ierr = ftruncate(fd,size);CHKERRQ(ierr); 239 baseptr = mmap(NULL,size,PROT_READ | PROT_WRITE, MAP_SHARED,fd,0); if (baseptr == MAP_FAILED) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"mmap() failed\n"); 240 ierr = close(fd);CHKERRQ(ierr); 241 ierr = MPI_Bcast(pathname,PETSC_MAX_PATH_LEN,MPI_CHAR,0,ctrl->omp_comm);CHKERRMPI(ierr); 242 /* this MPI_Barrier is to wait slaves to open the file before master unlinks it */ 243 ierr = MPI_Barrier(ctrl->omp_comm);CHKERRMPI(ierr); 244 ierr = unlink(pathname);CHKERRQ(ierr); 245 } else { 246 ierr = MPI_Bcast(pathname,PETSC_MAX_PATH_LEN,MPI_CHAR,0,ctrl->omp_comm);CHKERRMPI(ierr); 247 fd = open(pathname,O_RDWR); if (fd == -1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Could not open tmp file %s\n", pathname); 248 baseptr = mmap(NULL,size,PROT_READ | PROT_WRITE, MAP_SHARED,fd,0); if (baseptr == MAP_FAILED) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"mmap() failed\n"); 249 ierr = close(fd);CHKERRQ(ierr); 250 ierr = MPI_Barrier(ctrl->omp_comm);CHKERRMPI(ierr); 251 } 252 #else 253 size = ctrl->is_omp_master ? sizeof(pthread_barrier_t) : 0; 254 ierr = MPI_Win_allocate_shared(size,1,MPI_INFO_NULL,ctrl->omp_comm,&baseptr,&ctrl->omp_win);CHKERRMPI(ierr); 255 ierr = MPI_Win_shared_query(ctrl->omp_win,0,&size,&disp_unit,&baseptr);CHKERRMPI(ierr); 256 #endif 257 ctrl->barrier = (pthread_barrier_t*)baseptr; 258 259 /* omp master initializes the barrier */ 260 if (ctrl->is_omp_master) { 261 ierr = MPI_Comm_size(ctrl->omp_comm,&ctrl->omp_comm_size);CHKERRMPI(ierr); 262 ierr = pthread_barrierattr_init(&attr);CHKERRQ(ierr); 263 ierr = pthread_barrierattr_setpshared(&attr,PTHREAD_PROCESS_SHARED);CHKERRQ(ierr); /* make the barrier also work for processes */ 264 ierr = pthread_barrier_init(ctrl->barrier,&attr,(unsigned int)ctrl->omp_comm_size);CHKERRQ(ierr); 265 ierr = pthread_barrierattr_destroy(&attr);CHKERRQ(ierr); 266 } 267 268 /* this MPI_Barrier is to make sure the omp barrier is initialized before slaves use it */ 269 ierr = MPI_Barrier(ctrl->omp_comm);CHKERRMPI(ierr); 270 PetscFunctionReturn(0); 271 } 272 273 /* Destroy the pthread barrier in the PETSc OpenMP controller */ 274 PETSC_STATIC_INLINE PetscErrorCode PetscOmpCtrlDestroyBarrier(PetscOmpCtrl ctrl) 275 { 276 PetscErrorCode ierr; 277 278 PetscFunctionBegin; 279 /* this MPI_Barrier is to make sure slaves have finished using the omp barrier before master destroys it */ 280 ierr = MPI_Barrier(ctrl->omp_comm);CHKERRMPI(ierr); 281 if (ctrl->is_omp_master) { ierr = pthread_barrier_destroy(ctrl->barrier);CHKERRQ(ierr); } 282 283 #if defined(USE_MMAP_ALLOCATE_SHARED_MEMORY) && defined(PETSC_HAVE_MMAP) 284 ierr = munmap(ctrl->barrier,sizeof(pthread_barrier_t));CHKERRQ(ierr); 285 #else 286 ierr = MPI_Win_free(&ctrl->omp_win);CHKERRMPI(ierr); 287 #endif 288 PetscFunctionReturn(0); 289 } 290 291 /*@C 292 PetscOmpCtrlCreate - create a PETSc OpenMP controller, which manages PETSc's interaction with third party libraries using OpenMP 293 294 Input Parameter: 295 + petsc_comm - a communicator some PETSc object (for example, a matrix) lives in 296 - nthreads - number of threads per MPI rank to spawn in a library using OpenMP. If nthreads = -1, let PETSc decide a suitable value 297 298 Output Parameter: 299 . pctrl - a PETSc OpenMP controller 300 301 Level: developer 302 303 TODO: Possibly use the variable PetscNumOMPThreads to determine the number for threads to use 304 305 .seealso PetscOmpCtrlDestroy() 306 @*/ 307 PetscErrorCode PetscOmpCtrlCreate(MPI_Comm petsc_comm,PetscInt nthreads,PetscOmpCtrl *pctrl) 308 { 309 PetscErrorCode ierr; 310 PetscOmpCtrl ctrl; 311 unsigned long *cpu_ulongs=NULL; 312 PetscInt i,nr_cpu_ulongs; 313 PetscShmComm pshmcomm; 314 MPI_Comm shm_comm; 315 PetscMPIInt shm_rank,shm_comm_size,omp_rank,color; 316 PetscInt num_packages,num_cores; 317 318 PetscFunctionBegin; 319 ierr = PetscNew(&ctrl);CHKERRQ(ierr); 320 321 /*================================================================================= 322 Init hwloc 323 ==================================================================================*/ 324 ierr = hwloc_topology_init(&ctrl->topology);CHKERRQ(ierr); 325 #if HWLOC_API_VERSION >= 0x00020000 326 /* to filter out unneeded info and have faster hwloc_topology_load */ 327 ierr = hwloc_topology_set_all_types_filter(ctrl->topology,HWLOC_TYPE_FILTER_KEEP_NONE);CHKERRQ(ierr); 328 ierr = hwloc_topology_set_type_filter(ctrl->topology,HWLOC_OBJ_CORE,HWLOC_TYPE_FILTER_KEEP_ALL);CHKERRQ(ierr); 329 #endif 330 ierr = hwloc_topology_load(ctrl->topology);CHKERRQ(ierr); 331 332 /*================================================================================= 333 Split petsc_comm into multiple omp_comms. Ranks in an omp_comm have access to 334 physically shared memory. Rank 0 of each omp_comm is called an OMP master, and 335 others are called slaves. OMP Masters make up a new comm called omp_master_comm, 336 which is usually passed to third party libraries. 337 ==================================================================================*/ 338 339 /* fetch the stored shared memory communicator */ 340 ierr = PetscShmCommGet(petsc_comm,&pshmcomm);CHKERRQ(ierr); 341 ierr = PetscShmCommGetMpiShmComm(pshmcomm,&shm_comm);CHKERRQ(ierr); 342 343 ierr = MPI_Comm_rank(shm_comm,&shm_rank);CHKERRMPI(ierr); 344 ierr = MPI_Comm_size(shm_comm,&shm_comm_size);CHKERRMPI(ierr); 345 346 /* PETSc decides nthreads, which is the smaller of shm_comm_size or cores per package(socket) */ 347 if (nthreads == -1) { 348 num_packages = hwloc_get_nbobjs_by_type(ctrl->topology,HWLOC_OBJ_PACKAGE) <= 0 ? 1 : hwloc_get_nbobjs_by_type(ctrl->topology,HWLOC_OBJ_PACKAGE); 349 num_cores = hwloc_get_nbobjs_by_type(ctrl->topology,HWLOC_OBJ_CORE) <= 0 ? 1 : hwloc_get_nbobjs_by_type(ctrl->topology,HWLOC_OBJ_CORE); 350 nthreads = num_cores/num_packages; 351 if (nthreads > shm_comm_size) nthreads = shm_comm_size; 352 } 353 354 if (nthreads < 1 || nthreads > shm_comm_size) SETERRQ2(petsc_comm,PETSC_ERR_ARG_OUTOFRANGE,"number of OpenMP threads %D can not be < 1 or > the MPI shared memory communicator size %d\n",nthreads,shm_comm_size); 355 if (shm_comm_size % nthreads) { ierr = PetscPrintf(petsc_comm,"Warning: number of OpenMP threads %D is not a factor of the MPI shared memory communicator size %d, which may cause load-imbalance!\n",nthreads,shm_comm_size);CHKERRQ(ierr); } 356 357 /* split shm_comm into a set of omp_comms with each of size nthreads. Ex., if 358 shm_comm_size=16, nthreads=8, then ranks 0~7 get color 0 and ranks 8~15 get 359 color 1. They are put in two omp_comms. Note that petsc_ranks may or may not 360 be consecutive in a shm_comm, but shm_ranks always run from 0 to shm_comm_size-1. 361 Use 0 as key so that rank ordering wont change in new comm. 362 */ 363 color = shm_rank / nthreads; 364 ierr = MPI_Comm_split(shm_comm,color,0/*key*/,&ctrl->omp_comm);CHKERRMPI(ierr); 365 366 /* put rank 0's in omp_comms (i.e., master ranks) into a new comm - omp_master_comm */ 367 ierr = MPI_Comm_rank(ctrl->omp_comm,&omp_rank);CHKERRMPI(ierr); 368 if (!omp_rank) { 369 ctrl->is_omp_master = PETSC_TRUE; /* master */ 370 color = 0; 371 } else { 372 ctrl->is_omp_master = PETSC_FALSE; /* slave */ 373 color = MPI_UNDEFINED; /* to make slaves get omp_master_comm = MPI_COMM_NULL in MPI_Comm_split */ 374 } 375 ierr = MPI_Comm_split(petsc_comm,color,0/*key*/,&ctrl->omp_master_comm);CHKERRMPI(ierr); 376 377 /*================================================================================= 378 Each omp_comm has a pthread_barrier_t in its shared memory, which is used to put 379 slave ranks in sleep and idle their CPU, so that the master can fork OMP threads 380 and run them on the idle CPUs. 381 ==================================================================================*/ 382 ierr = PetscOmpCtrlCreateBarrier(ctrl);CHKERRQ(ierr); 383 384 /*================================================================================= 385 omp master logs its cpu binding (i.e., cpu set) and computes a new binding that 386 is the union of the bindings of all ranks in the omp_comm 387 =================================================================================*/ 388 389 ctrl->cpuset = hwloc_bitmap_alloc(); if (!ctrl->cpuset) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"hwloc_bitmap_alloc() failed\n"); 390 ierr = hwloc_get_cpubind(ctrl->topology,ctrl->cpuset, HWLOC_CPUBIND_PROCESS);CHKERRQ(ierr); 391 392 /* hwloc main developer said they will add new APIs hwloc_bitmap_{nr,to,from}_ulongs in 2.1 to help us simplify the following bitmap pack/unpack code */ 393 nr_cpu_ulongs = (hwloc_bitmap_last(hwloc_topology_get_topology_cpuset (ctrl->topology))+sizeof(unsigned long)*8)/sizeof(unsigned long)/8; 394 ierr = PetscMalloc1(nr_cpu_ulongs,&cpu_ulongs);CHKERRQ(ierr); 395 if (nr_cpu_ulongs == 1) { 396 cpu_ulongs[0] = hwloc_bitmap_to_ulong(ctrl->cpuset); 397 } else { 398 for (i=0; i<nr_cpu_ulongs; i++) cpu_ulongs[i] = hwloc_bitmap_to_ith_ulong(ctrl->cpuset,(unsigned)i); 399 } 400 401 ierr = MPI_Reduce(ctrl->is_omp_master ? MPI_IN_PLACE : cpu_ulongs, cpu_ulongs,nr_cpu_ulongs, MPI_UNSIGNED_LONG,MPI_BOR,0,ctrl->omp_comm);CHKERRMPI(ierr); 402 403 if (ctrl->is_omp_master) { 404 ctrl->omp_cpuset = hwloc_bitmap_alloc(); if (!ctrl->omp_cpuset) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"hwloc_bitmap_alloc() failed\n"); 405 if (nr_cpu_ulongs == 1) { 406 #if HWLOC_API_VERSION >= 0x00020000 407 ierr = hwloc_bitmap_from_ulong(ctrl->omp_cpuset,cpu_ulongs[0]);CHKERRQ(ierr); 408 #else 409 hwloc_bitmap_from_ulong(ctrl->omp_cpuset,cpu_ulongs[0]); 410 #endif 411 } else { 412 for (i=0; i<nr_cpu_ulongs; i++) { 413 #if HWLOC_API_VERSION >= 0x00020000 414 ierr = hwloc_bitmap_set_ith_ulong(ctrl->omp_cpuset,(unsigned)i,cpu_ulongs[i]);CHKERRQ(ierr); 415 #else 416 hwloc_bitmap_set_ith_ulong(ctrl->omp_cpuset,(unsigned)i,cpu_ulongs[i]); 417 #endif 418 } 419 } 420 } 421 422 ierr = PetscFree(cpu_ulongs);CHKERRQ(ierr); 423 *pctrl = ctrl; 424 PetscFunctionReturn(0); 425 } 426 427 /*@C 428 PetscOmpCtrlDestroy - destroy the PETSc OpenMP controller 429 430 Input Parameter: 431 . pctrl - a PETSc OpenMP controller 432 433 Level: developer 434 435 .seealso PetscOmpCtrlCreate() 436 @*/ 437 PetscErrorCode PetscOmpCtrlDestroy(PetscOmpCtrl *pctrl) 438 { 439 PetscErrorCode ierr; 440 PetscOmpCtrl ctrl = *pctrl; 441 442 PetscFunctionBegin; 443 hwloc_bitmap_free(ctrl->cpuset); 444 hwloc_topology_destroy(ctrl->topology); 445 PetscOmpCtrlDestroyBarrier(ctrl); 446 ierr = MPI_Comm_free(&ctrl->omp_comm);CHKERRMPI(ierr); 447 if (ctrl->is_omp_master) { 448 hwloc_bitmap_free(ctrl->omp_cpuset); 449 ierr = MPI_Comm_free(&ctrl->omp_master_comm);CHKERRMPI(ierr); 450 } 451 ierr = PetscFree(ctrl);CHKERRQ(ierr); 452 PetscFunctionReturn(0); 453 } 454 455 /*@C 456 PetscOmpCtrlGetOmpComms - Get MPI communicators from a PETSc OMP controller 457 458 Input Parameter: 459 . ctrl - a PETSc OMP controller 460 461 Output Parameter: 462 + omp_comm - a communicator that includes a master rank and slave ranks where master spawns threads 463 . omp_master_comm - on master ranks, return a communicator that include master ranks of each omp_comm; 464 on slave ranks, MPI_COMM_NULL will be return in reality. 465 - is_omp_master - true if the calling process is an OMP master rank. 466 467 Notes: any output parameter can be NULL. The parameter is just ignored. 468 469 Level: developer 470 @*/ 471 PetscErrorCode PetscOmpCtrlGetOmpComms(PetscOmpCtrl ctrl,MPI_Comm *omp_comm,MPI_Comm *omp_master_comm,PetscBool *is_omp_master) 472 { 473 PetscFunctionBegin; 474 if (omp_comm) *omp_comm = ctrl->omp_comm; 475 if (omp_master_comm) *omp_master_comm = ctrl->omp_master_comm; 476 if (is_omp_master) *is_omp_master = ctrl->is_omp_master; 477 PetscFunctionReturn(0); 478 } 479 480 /*@C 481 PetscOmpCtrlBarrier - Do barrier on MPI ranks in omp_comm contained by the PETSc OMP controller (to let slave ranks free their CPU) 482 483 Input Parameter: 484 . ctrl - a PETSc OMP controller 485 486 Notes: 487 this is a pthread barrier on MPI processes. Using MPI_Barrier instead is conceptually correct. But MPI standard does not 488 require processes blocked by MPI_Barrier free their CPUs to let other processes progress. In practice, to minilize latency, 489 MPI processes stuck in MPI_Barrier keep polling and do not free CPUs. In contrast, pthread_barrier has this requirement. 490 491 A code using PetscOmpCtrlBarrier() would be like this, 492 493 if (is_omp_master) { 494 PetscOmpCtrlOmpRegionOnMasterBegin(ctrl); 495 Call the library using OpenMP 496 PetscOmpCtrlOmpRegionOnMasterEnd(ctrl); 497 } 498 PetscOmpCtrlBarrier(ctrl); 499 500 Level: developer 501 502 .seealso PetscOmpCtrlOmpRegionOnMasterBegin(), PetscOmpCtrlOmpRegionOnMasterEnd() 503 @*/ 504 PetscErrorCode PetscOmpCtrlBarrier(PetscOmpCtrl ctrl) 505 { 506 int err; 507 508 PetscFunctionBegin; 509 err = pthread_barrier_wait(ctrl->barrier); 510 if (err && err != PTHREAD_BARRIER_SERIAL_THREAD) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"pthread_barrier_wait failed within PetscOmpCtrlBarrier with return code %D\n", err); 511 PetscFunctionReturn(0); 512 } 513 514 /*@C 515 PetscOmpCtrlOmpRegionOnMasterBegin - Mark the beginning of an OpenMP library call on master ranks 516 517 Input Parameter: 518 . ctrl - a PETSc OMP controller 519 520 Notes: 521 Only master ranks can call this function. Call PetscOmpCtrlGetOmpComms() to know if this is a master rank. 522 This function changes CPU binding of master ranks and nthreads-var of OpenMP runtime 523 524 Level: developer 525 526 .seealso: PetscOmpCtrlOmpRegionOnMasterEnd() 527 @*/ 528 PetscErrorCode PetscOmpCtrlOmpRegionOnMasterBegin(PetscOmpCtrl ctrl) 529 { 530 PetscErrorCode ierr; 531 532 PetscFunctionBegin; 533 ierr = hwloc_set_cpubind(ctrl->topology,ctrl->omp_cpuset,HWLOC_CPUBIND_PROCESS);CHKERRQ(ierr); 534 omp_set_num_threads(ctrl->omp_comm_size); /* may override the OMP_NUM_THREAD env var */ 535 PetscFunctionReturn(0); 536 } 537 538 /*@C 539 PetscOmpCtrlOmpRegionOnMasterEnd - Mark the end of an OpenMP library call on master ranks 540 541 Input Parameter: 542 . ctrl - a PETSc OMP controller 543 544 Notes: 545 Only master ranks can call this function. Call PetscOmpCtrlGetOmpComms() to know if this is a master rank. 546 This function restores the CPU binding of master ranks and set and nthreads-var of OpenMP runtime to 1. 547 548 Level: developer 549 550 .seealso: PetscOmpCtrlOmpRegionOnMasterBegin() 551 @*/ 552 PetscErrorCode PetscOmpCtrlOmpRegionOnMasterEnd(PetscOmpCtrl ctrl) 553 { 554 PetscErrorCode ierr; 555 556 PetscFunctionBegin; 557 ierr = hwloc_set_cpubind(ctrl->topology,ctrl->cpuset,HWLOC_CPUBIND_PROCESS);CHKERRQ(ierr); 558 omp_set_num_threads(1); 559 PetscFunctionReturn(0); 560 } 561 562 #undef USE_MMAP_ALLOCATE_SHARED_MEMORY 563 #endif /* defined(PETSC_HAVE_OPENMP_SUPPORT) */ 564