15f7487a0SJunchao Zhang #include <petscsys.h> /*I "petscsys.h" I*/ 25f7487a0SJunchao Zhang #include <petsc/private/petscimpl.h> 35f7487a0SJunchao Zhang 45f7487a0SJunchao Zhang struct _n_PetscShmComm { 55f7487a0SJunchao Zhang PetscMPIInt *globranks; /* global ranks of each rank in the shared memory communicator */ 65f7487a0SJunchao Zhang PetscMPIInt shmsize; /* size of the shared memory communicator */ 75f7487a0SJunchao Zhang MPI_Comm globcomm,shmcomm; /* global communicator and shared memory communicator (a sub-communicator of the former) */ 85f7487a0SJunchao Zhang }; 95f7487a0SJunchao Zhang 105f7487a0SJunchao Zhang /* 1133779a13SJunchao Zhang Private routine to delete internal shared memory communicator when a communicator is freed. 125f7487a0SJunchao Zhang 135f7487a0SJunchao Zhang This is called by MPI, not by users. This is called by MPI_Comm_free() when the communicator that has this data as an attribute is freed. 145f7487a0SJunchao Zhang 155f7487a0SJunchao Zhang Note: this is declared extern "C" because it is passed to MPI_Comm_create_keyval() 165f7487a0SJunchao Zhang 175f7487a0SJunchao Zhang */ 1833779a13SJunchao Zhang PETSC_EXTERN PetscMPIInt MPIAPI Petsc_ShmComm_Attr_Delete_Fn(MPI_Comm comm,PetscMPIInt keyval,void *val,void *extra_state) 195f7487a0SJunchao Zhang { 205f7487a0SJunchao Zhang PetscErrorCode ierr; 215f7487a0SJunchao Zhang PetscShmComm p = (PetscShmComm)val; 225f7487a0SJunchao Zhang 235f7487a0SJunchao Zhang PetscFunctionBegin; 2402c9f0b5SLisandro Dalcin ierr = PetscInfo1(NULL,"Deleting shared memory subcommunicator in a MPI_Comm %ld\n",(long)comm);CHKERRMPI(ierr); 255f7487a0SJunchao Zhang ierr = MPI_Comm_free(&p->shmcomm);CHKERRMPI(ierr); 265f7487a0SJunchao Zhang ierr = PetscFree(p->globranks);CHKERRMPI(ierr); 275f7487a0SJunchao Zhang ierr = PetscFree(val);CHKERRMPI(ierr); 285f7487a0SJunchao Zhang PetscFunctionReturn(MPI_SUCCESS); 295f7487a0SJunchao Zhang } 305f7487a0SJunchao Zhang 31b48189acSJunchao Zhang #ifdef PETSC_HAVE_MPI_PROCESS_SHARED_MEMORY 32b48189acSJunchao Zhang /* Data structures to support freeing comms created in PetscShmCommGet(). 33b48189acSJunchao Zhang Since we predict communicators passed to PetscShmCommGet() are very likely 34b48189acSJunchao Zhang either a petsc inner communicator or an MPI communicator with a linked petsc 35b48189acSJunchao Zhang inner communicator, we use a simple static array to store dupped communicators 36b48189acSJunchao Zhang on rare cases otherwise. 37b48189acSJunchao Zhang */ 38b48189acSJunchao Zhang #define MAX_SHMCOMM_DUPPED_COMMS 16 39b48189acSJunchao Zhang static PetscInt num_dupped_comms=0; 40b48189acSJunchao Zhang static MPI_Comm shmcomm_dupped_comms[MAX_SHMCOMM_DUPPED_COMMS]; 41b48189acSJunchao Zhang static PetscErrorCode PetscShmCommDestroyDuppedComms(void) 42b48189acSJunchao Zhang { 43b48189acSJunchao Zhang PetscErrorCode ierr; 44b48189acSJunchao Zhang PetscInt i; 45b48189acSJunchao Zhang PetscFunctionBegin; 46b48189acSJunchao Zhang for (i=0; i<num_dupped_comms; i++) {ierr = PetscCommDestroy(&shmcomm_dupped_comms[i]);CHKERRQ(ierr);} 47b48189acSJunchao Zhang num_dupped_comms = 0; /* reset so that PETSc can be reinitialized */ 48b48189acSJunchao Zhang PetscFunctionReturn(0); 49b48189acSJunchao Zhang } 50b48189acSJunchao Zhang #endif 51b48189acSJunchao Zhang 525f7487a0SJunchao Zhang /*@C 53b48189acSJunchao Zhang PetscShmCommGet - Given a communicator returns a sub-communicator of all ranks that share a common memory 545f7487a0SJunchao Zhang 55d083f849SBarry Smith Collective. 565f7487a0SJunchao Zhang 575f7487a0SJunchao Zhang Input Parameter: 58b48189acSJunchao Zhang . globcomm - MPI_Comm, which can be a user MPI_Comm or a PETSc inner MPI_Comm 595f7487a0SJunchao Zhang 605f7487a0SJunchao Zhang Output Parameter: 615f7487a0SJunchao Zhang . pshmcomm - the PETSc shared memory communicator object 625f7487a0SJunchao Zhang 635f7487a0SJunchao Zhang Level: developer 645f7487a0SJunchao Zhang 655f7487a0SJunchao Zhang Notes: 665f7487a0SJunchao Zhang When used with MPICH, MPICH must be configured with --download-mpich-device=ch3:nemesis 675f7487a0SJunchao Zhang 685f7487a0SJunchao Zhang @*/ 695f7487a0SJunchao Zhang PetscErrorCode PetscShmCommGet(MPI_Comm globcomm,PetscShmComm *pshmcomm) 705f7487a0SJunchao Zhang { 715f7487a0SJunchao Zhang #ifdef PETSC_HAVE_MPI_PROCESS_SHARED_MEMORY 725f7487a0SJunchao Zhang PetscErrorCode ierr; 735f7487a0SJunchao Zhang MPI_Group globgroup,shmgroup; 745f7487a0SJunchao Zhang PetscMPIInt *shmranks,i,flg; 755f7487a0SJunchao Zhang PetscCommCounter *counter; 765f7487a0SJunchao Zhang 775f7487a0SJunchao Zhang PetscFunctionBegin; 78b48189acSJunchao Zhang /* Get a petsc inner comm, since we always want to stash pshmcomm on petsc inner comms */ 79ffc4695bSBarry Smith ierr = MPI_Comm_get_attr(globcomm,Petsc_Counter_keyval,&counter,&flg);CHKERRMPI(ierr); 80b48189acSJunchao Zhang if (!flg) { /* globcomm is not a petsc comm */ 81b48189acSJunchao Zhang union {MPI_Comm comm; void *ptr;} ucomm; 82b48189acSJunchao Zhang /* check if globcomm already has a linked petsc inner comm */ 83b48189acSJunchao Zhang ierr = MPI_Comm_get_attr(globcomm,Petsc_InnerComm_keyval,&ucomm,&flg);CHKERRMPI(ierr); 84b48189acSJunchao Zhang if (!flg) { 85b48189acSJunchao Zhang /* globcomm does not have a linked petsc inner comm, so we create one and replace globcomm with it */ 86b48189acSJunchao Zhang if (num_dupped_comms >= MAX_SHMCOMM_DUPPED_COMMS) SETERRQ1(globcomm,PETSC_ERR_PLIB,"PetscShmCommGet() is trying to dup more than %d MPI_Comms",MAX_SHMCOMM_DUPPED_COMMS); 87b48189acSJunchao Zhang ierr = PetscCommDuplicate(globcomm,&globcomm,NULL);CHKERRQ(ierr); 88b48189acSJunchao Zhang /* Register a function to free the dupped petsc comms at PetscFinalize at the first time */ 89b48189acSJunchao Zhang if (num_dupped_comms == 0) {ierr = PetscRegisterFinalize(PetscShmCommDestroyDuppedComms);CHKERRQ(ierr);} 90b48189acSJunchao Zhang shmcomm_dupped_comms[num_dupped_comms] = globcomm; 91b48189acSJunchao Zhang num_dupped_comms++; 92b48189acSJunchao Zhang } else { 93b48189acSJunchao Zhang /* otherwise, we pull out the inner comm and use it as globcomm */ 94b48189acSJunchao Zhang globcomm = ucomm.comm; 95b48189acSJunchao Zhang } 96b48189acSJunchao Zhang } 975f7487a0SJunchao Zhang 98b48189acSJunchao Zhang /* Check if globcomm already has an attached pshmcomm. If no, create one */ 99ffc4695bSBarry Smith ierr = MPI_Comm_get_attr(globcomm,Petsc_ShmComm_keyval,pshmcomm,&flg);CHKERRMPI(ierr); 1005f7487a0SJunchao Zhang if (flg) PetscFunctionReturn(0); 1015f7487a0SJunchao Zhang 1025f7487a0SJunchao Zhang ierr = PetscNew(pshmcomm);CHKERRQ(ierr); 1035f7487a0SJunchao Zhang (*pshmcomm)->globcomm = globcomm; 1045f7487a0SJunchao Zhang 105ffc4695bSBarry Smith ierr = MPI_Comm_split_type(globcomm, MPI_COMM_TYPE_SHARED,0, MPI_INFO_NULL,&(*pshmcomm)->shmcomm);CHKERRMPI(ierr); 1065f7487a0SJunchao Zhang 107ffc4695bSBarry Smith ierr = MPI_Comm_size((*pshmcomm)->shmcomm,&(*pshmcomm)->shmsize);CHKERRMPI(ierr); 108ffc4695bSBarry Smith ierr = MPI_Comm_group(globcomm, &globgroup);CHKERRMPI(ierr); 109ffc4695bSBarry Smith ierr = MPI_Comm_group((*pshmcomm)->shmcomm, &shmgroup);CHKERRMPI(ierr); 1105f7487a0SJunchao Zhang ierr = PetscMalloc1((*pshmcomm)->shmsize,&shmranks);CHKERRQ(ierr); 1115f7487a0SJunchao Zhang ierr = PetscMalloc1((*pshmcomm)->shmsize,&(*pshmcomm)->globranks);CHKERRQ(ierr); 1125f7487a0SJunchao Zhang for (i=0; i<(*pshmcomm)->shmsize; i++) shmranks[i] = i; 113ffc4695bSBarry Smith ierr = MPI_Group_translate_ranks(shmgroup, (*pshmcomm)->shmsize, shmranks, globgroup, (*pshmcomm)->globranks);CHKERRMPI(ierr); 1145f7487a0SJunchao Zhang ierr = PetscFree(shmranks);CHKERRQ(ierr); 115ffc4695bSBarry Smith ierr = MPI_Group_free(&globgroup);CHKERRMPI(ierr); 116ffc4695bSBarry Smith ierr = MPI_Group_free(&shmgroup);CHKERRMPI(ierr); 1175f7487a0SJunchao Zhang 1185f7487a0SJunchao Zhang for (i=0; i<(*pshmcomm)->shmsize; i++) { 1195f7487a0SJunchao Zhang ierr = PetscInfo2(NULL,"Shared memory rank %d global rank %d\n",i,(*pshmcomm)->globranks[i]);CHKERRQ(ierr); 1205f7487a0SJunchao Zhang } 121ffc4695bSBarry Smith ierr = MPI_Comm_set_attr(globcomm,Petsc_ShmComm_keyval,*pshmcomm);CHKERRMPI(ierr); 1225f7487a0SJunchao Zhang PetscFunctionReturn(0); 1235f7487a0SJunchao Zhang #else 1245f7487a0SJunchao Zhang SETERRQ(globcomm, PETSC_ERR_SUP, "Shared memory communicators need MPI-3 package support.\nPlease upgrade your MPI or reconfigure with --download-mpich."); 1255f7487a0SJunchao Zhang #endif 1265f7487a0SJunchao Zhang } 1275f7487a0SJunchao Zhang 1285f7487a0SJunchao Zhang /*@C 1295f7487a0SJunchao Zhang PetscShmCommGlobalToLocal - Given a global rank returns the local rank in the shared memory communicator 1305f7487a0SJunchao Zhang 1315f7487a0SJunchao Zhang Input Parameters: 1325f7487a0SJunchao Zhang + pshmcomm - the shared memory communicator object 1335f7487a0SJunchao Zhang - grank - the global rank 1345f7487a0SJunchao Zhang 1355f7487a0SJunchao Zhang Output Parameter: 1365f7487a0SJunchao Zhang . lrank - the local rank, or MPI_PROC_NULL if it does not exist 1375f7487a0SJunchao Zhang 1385f7487a0SJunchao Zhang Level: developer 1395f7487a0SJunchao Zhang 1405f7487a0SJunchao Zhang Developer Notes: 1415f7487a0SJunchao Zhang Assumes the pshmcomm->globranks[] is sorted 1425f7487a0SJunchao Zhang 1435f7487a0SJunchao Zhang It may be better to rewrite this to map multiple global ranks to local in the same function call 1445f7487a0SJunchao Zhang 1455f7487a0SJunchao Zhang @*/ 1465f7487a0SJunchao Zhang PetscErrorCode PetscShmCommGlobalToLocal(PetscShmComm pshmcomm,PetscMPIInt grank,PetscMPIInt *lrank) 1475f7487a0SJunchao Zhang { 1485f7487a0SJunchao Zhang PetscMPIInt low,high,t,i; 1495f7487a0SJunchao Zhang PetscBool flg = PETSC_FALSE; 1505f7487a0SJunchao Zhang PetscErrorCode ierr; 1515f7487a0SJunchao Zhang 1525f7487a0SJunchao Zhang PetscFunctionBegin; 1535f7487a0SJunchao Zhang *lrank = MPI_PROC_NULL; 1545f7487a0SJunchao Zhang if (grank < pshmcomm->globranks[0]) PetscFunctionReturn(0); 1555f7487a0SJunchao Zhang if (grank > pshmcomm->globranks[pshmcomm->shmsize-1]) PetscFunctionReturn(0); 1565f7487a0SJunchao Zhang ierr = PetscOptionsGetBool(NULL,NULL,"-noshared",&flg,NULL);CHKERRQ(ierr); 1575f7487a0SJunchao Zhang if (flg) PetscFunctionReturn(0); 1585f7487a0SJunchao Zhang low = 0; 1595f7487a0SJunchao Zhang high = pshmcomm->shmsize; 1605f7487a0SJunchao Zhang while (high-low > 5) { 1615f7487a0SJunchao Zhang t = (low+high)/2; 1625f7487a0SJunchao Zhang if (pshmcomm->globranks[t] > grank) high = t; 1635f7487a0SJunchao Zhang else low = t; 1645f7487a0SJunchao Zhang } 1655f7487a0SJunchao Zhang for (i=low; i<high; i++) { 1665f7487a0SJunchao Zhang if (pshmcomm->globranks[i] > grank) PetscFunctionReturn(0); 1675f7487a0SJunchao Zhang if (pshmcomm->globranks[i] == grank) { 1685f7487a0SJunchao Zhang *lrank = i; 1695f7487a0SJunchao Zhang PetscFunctionReturn(0); 1705f7487a0SJunchao Zhang } 1715f7487a0SJunchao Zhang } 1725f7487a0SJunchao Zhang PetscFunctionReturn(0); 1735f7487a0SJunchao Zhang } 1745f7487a0SJunchao Zhang 1755f7487a0SJunchao Zhang /*@C 1765f7487a0SJunchao Zhang PetscShmCommLocalToGlobal - Given a local rank in the shared memory communicator returns the global rank 1775f7487a0SJunchao Zhang 1785f7487a0SJunchao Zhang Input Parameters: 1795f7487a0SJunchao Zhang + pshmcomm - the shared memory communicator object 1805f7487a0SJunchao Zhang - lrank - the local rank in the shared memory communicator 1815f7487a0SJunchao Zhang 1825f7487a0SJunchao Zhang Output Parameter: 1835f7487a0SJunchao Zhang . grank - the global rank in the global communicator where the shared memory communicator is built 1845f7487a0SJunchao Zhang 1855f7487a0SJunchao Zhang Level: developer 1865f7487a0SJunchao Zhang 1875f7487a0SJunchao Zhang @*/ 1885f7487a0SJunchao Zhang PetscErrorCode PetscShmCommLocalToGlobal(PetscShmComm pshmcomm,PetscMPIInt lrank,PetscMPIInt *grank) 1895f7487a0SJunchao Zhang { 1905f7487a0SJunchao Zhang PetscFunctionBegin; 19176bd3646SJed Brown if (lrank < 0 || lrank >= pshmcomm->shmsize) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"No rank %D in the shared memory communicator",lrank); 1925f7487a0SJunchao Zhang *grank = pshmcomm->globranks[lrank]; 1935f7487a0SJunchao Zhang PetscFunctionReturn(0); 1945f7487a0SJunchao Zhang } 1955f7487a0SJunchao Zhang 1965f7487a0SJunchao Zhang /*@C 1975f7487a0SJunchao Zhang PetscShmCommGetMpiShmComm - Returns the MPI communicator that represents all processes with common shared memory 1985f7487a0SJunchao Zhang 1995f7487a0SJunchao Zhang Input Parameter: 2005f7487a0SJunchao Zhang . pshmcomm - PetscShmComm object obtained with PetscShmCommGet() 2015f7487a0SJunchao Zhang 2025f7487a0SJunchao Zhang Output Parameter: 2035f7487a0SJunchao Zhang . comm - the MPI communicator 2045f7487a0SJunchao Zhang 2055f7487a0SJunchao Zhang Level: developer 2065f7487a0SJunchao Zhang 2075f7487a0SJunchao Zhang @*/ 2085f7487a0SJunchao Zhang PetscErrorCode PetscShmCommGetMpiShmComm(PetscShmComm pshmcomm,MPI_Comm *comm) 2095f7487a0SJunchao Zhang { 2105f7487a0SJunchao Zhang PetscFunctionBegin; 2115f7487a0SJunchao Zhang *comm = pshmcomm->shmcomm; 2125f7487a0SJunchao Zhang PetscFunctionReturn(0); 2135f7487a0SJunchao Zhang } 2145f7487a0SJunchao Zhang 21520b3346cSJunchao Zhang #if defined(PETSC_HAVE_OPENMP_SUPPORT) 216a32e93adSJunchao Zhang #include <pthread.h> 217a32e93adSJunchao Zhang #include <hwloc.h> 218a32e93adSJunchao Zhang #include <omp.h> 219a32e93adSJunchao Zhang 220eff715bbSJunchao Zhang /* Use mmap() to allocate shared mmeory (for the pthread_barrier_t object) if it is available, 221eff715bbSJunchao Zhang otherwise use MPI_Win_allocate_shared. They should have the same effect except MPI-3 is much 2224df5c2c7SJunchao Zhang simpler to use. However, on a Cori Haswell node with Cray MPI, MPI-3 worsened a test's performance 2234df5c2c7SJunchao Zhang by 50%. Until the reason is found out, we use mmap() instead. 2244df5c2c7SJunchao Zhang */ 2254df5c2c7SJunchao Zhang #define USE_MMAP_ALLOCATE_SHARED_MEMORY 2264df5c2c7SJunchao Zhang 2274df5c2c7SJunchao Zhang #if defined(USE_MMAP_ALLOCATE_SHARED_MEMORY) && defined(PETSC_HAVE_MMAP) 2284df5c2c7SJunchao Zhang #include <sys/mman.h> 2294df5c2c7SJunchao Zhang #include <sys/types.h> 2304df5c2c7SJunchao Zhang #include <sys/stat.h> 2314df5c2c7SJunchao Zhang #include <fcntl.h> 2324df5c2c7SJunchao Zhang #endif 2334df5c2c7SJunchao Zhang 234a32e93adSJunchao Zhang struct _n_PetscOmpCtrl { 235a32e93adSJunchao Zhang MPI_Comm omp_comm; /* a shared memory communicator to spawn omp threads */ 236a32e93adSJunchao Zhang MPI_Comm omp_master_comm; /* a communicator to give to third party libraries */ 237a32e93adSJunchao Zhang PetscMPIInt omp_comm_size; /* size of omp_comm, a kind of OMP_NUM_THREADS */ 238a32e93adSJunchao Zhang PetscBool is_omp_master; /* rank 0's in omp_comm */ 239a32e93adSJunchao Zhang MPI_Win omp_win; /* a shared memory window containing a barrier */ 240a32e93adSJunchao Zhang pthread_barrier_t *barrier; /* pointer to the barrier */ 241a32e93adSJunchao Zhang hwloc_topology_t topology; 242a32e93adSJunchao Zhang hwloc_cpuset_t cpuset; /* cpu bindings of omp master */ 243a32e93adSJunchao Zhang hwloc_cpuset_t omp_cpuset; /* union of cpu bindings of ranks in omp_comm */ 244a32e93adSJunchao Zhang }; 245a32e93adSJunchao Zhang 246eff715bbSJunchao Zhang /* Allocate and initialize a pthread_barrier_t object in memory shared by processes in omp_comm 2478fcaa860SBarry Smith contained by the controller. 248eff715bbSJunchao Zhang 2498fcaa860SBarry Smith PETSc OpenMP controller users do not call this function directly. This function exists 250eff715bbSJunchao Zhang only because we want to separate shared memory allocation methods from other code. 251eff715bbSJunchao Zhang */ 252a32e93adSJunchao Zhang PETSC_STATIC_INLINE PetscErrorCode PetscOmpCtrlCreateBarrier(PetscOmpCtrl ctrl) 253a32e93adSJunchao Zhang { 254a32e93adSJunchao Zhang PetscErrorCode ierr; 255a32e93adSJunchao Zhang MPI_Aint size; 256a32e93adSJunchao Zhang void *baseptr; 257a32e93adSJunchao Zhang pthread_barrierattr_t attr; 258a32e93adSJunchao Zhang 2594df5c2c7SJunchao Zhang #if defined(USE_MMAP_ALLOCATE_SHARED_MEMORY) && defined(PETSC_HAVE_MMAP) 2604df5c2c7SJunchao Zhang PetscInt fd; 2614df5c2c7SJunchao Zhang PetscChar pathname[PETSC_MAX_PATH_LEN]; 2624df5c2c7SJunchao Zhang #else 2634df5c2c7SJunchao Zhang PetscMPIInt disp_unit; 2644df5c2c7SJunchao Zhang #endif 2654df5c2c7SJunchao Zhang 2664df5c2c7SJunchao Zhang PetscFunctionBegin; 2674df5c2c7SJunchao Zhang #if defined(USE_MMAP_ALLOCATE_SHARED_MEMORY) && defined(PETSC_HAVE_MMAP) 2684df5c2c7SJunchao Zhang size = sizeof(pthread_barrier_t); 2694df5c2c7SJunchao Zhang if (ctrl->is_omp_master) { 270eff715bbSJunchao Zhang /* use PETSC_COMM_SELF in PetscGetTmp, since it is a collective call. Using omp_comm would otherwise bcast the partially populated pathname to slaves */ 2714df5c2c7SJunchao Zhang ierr = PetscGetTmp(PETSC_COMM_SELF,pathname,PETSC_MAX_PATH_LEN);CHKERRQ(ierr); 2724df5c2c7SJunchao Zhang ierr = PetscStrlcat(pathname,"/petsc-shm-XXXXXX",PETSC_MAX_PATH_LEN);CHKERRQ(ierr); 2734df5c2c7SJunchao Zhang /* mkstemp replaces XXXXXX with a unique file name and opens the file for us */ 2744df5c2c7SJunchao Zhang fd = mkstemp(pathname); if (fd == -1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Could not create tmp file %s with mkstemp\n", pathname); 2754df5c2c7SJunchao Zhang ierr = ftruncate(fd,size);CHKERRQ(ierr); 2764df5c2c7SJunchao Zhang baseptr = mmap(NULL,size,PROT_READ | PROT_WRITE, MAP_SHARED,fd,0); if (baseptr == MAP_FAILED) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"mmap() failed\n"); 2774df5c2c7SJunchao Zhang ierr = close(fd);CHKERRQ(ierr); 27855b25c41SPierre Jolivet ierr = MPI_Bcast(pathname,PETSC_MAX_PATH_LEN,MPI_CHAR,0,ctrl->omp_comm);CHKERRMPI(ierr); 279eff715bbSJunchao Zhang /* this MPI_Barrier is to wait slaves to open the file before master unlinks it */ 28055b25c41SPierre Jolivet ierr = MPI_Barrier(ctrl->omp_comm);CHKERRMPI(ierr); 2814df5c2c7SJunchao Zhang ierr = unlink(pathname);CHKERRQ(ierr); 2824df5c2c7SJunchao Zhang } else { 28355b25c41SPierre Jolivet ierr = MPI_Bcast(pathname,PETSC_MAX_PATH_LEN,MPI_CHAR,0,ctrl->omp_comm);CHKERRMPI(ierr); 2844df5c2c7SJunchao Zhang fd = open(pathname,O_RDWR); if (fd == -1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Could not open tmp file %s\n", pathname); 2854df5c2c7SJunchao Zhang baseptr = mmap(NULL,size,PROT_READ | PROT_WRITE, MAP_SHARED,fd,0); if (baseptr == MAP_FAILED) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"mmap() failed\n"); 2864df5c2c7SJunchao Zhang ierr = close(fd);CHKERRQ(ierr); 28755b25c41SPierre Jolivet ierr = MPI_Barrier(ctrl->omp_comm);CHKERRMPI(ierr); 2884df5c2c7SJunchao Zhang } 2894df5c2c7SJunchao Zhang #else 290a32e93adSJunchao Zhang size = ctrl->is_omp_master ? sizeof(pthread_barrier_t) : 0; 291ffc4695bSBarry Smith ierr = MPI_Win_allocate_shared(size,1,MPI_INFO_NULL,ctrl->omp_comm,&baseptr,&ctrl->omp_win);CHKERRMPI(ierr); 292ffc4695bSBarry Smith ierr = MPI_Win_shared_query(ctrl->omp_win,0,&size,&disp_unit,&baseptr);CHKERRMPI(ierr); 2934df5c2c7SJunchao Zhang #endif 294a32e93adSJunchao Zhang ctrl->barrier = (pthread_barrier_t*)baseptr; 295a32e93adSJunchao Zhang 296a32e93adSJunchao Zhang /* omp master initializes the barrier */ 297a32e93adSJunchao Zhang if (ctrl->is_omp_master) { 298ffc4695bSBarry Smith ierr = MPI_Comm_size(ctrl->omp_comm,&ctrl->omp_comm_size);CHKERRMPI(ierr); 299a32e93adSJunchao Zhang ierr = pthread_barrierattr_init(&attr);CHKERRQ(ierr); 300a32e93adSJunchao Zhang ierr = pthread_barrierattr_setpshared(&attr,PTHREAD_PROCESS_SHARED);CHKERRQ(ierr); /* make the barrier also work for processes */ 301a32e93adSJunchao Zhang ierr = pthread_barrier_init(ctrl->barrier,&attr,(unsigned int)ctrl->omp_comm_size);CHKERRQ(ierr); 302a32e93adSJunchao Zhang ierr = pthread_barrierattr_destroy(&attr);CHKERRQ(ierr); 303a32e93adSJunchao Zhang } 304a32e93adSJunchao Zhang 3054df5c2c7SJunchao Zhang /* this MPI_Barrier is to make sure the omp barrier is initialized before slaves use it */ 306ffc4695bSBarry Smith ierr = MPI_Barrier(ctrl->omp_comm);CHKERRMPI(ierr); 307a32e93adSJunchao Zhang PetscFunctionReturn(0); 308a32e93adSJunchao Zhang } 309a32e93adSJunchao Zhang 3108fcaa860SBarry Smith /* Destroy the pthread barrier in the PETSc OpenMP controller */ 311a32e93adSJunchao Zhang PETSC_STATIC_INLINE PetscErrorCode PetscOmpCtrlDestroyBarrier(PetscOmpCtrl ctrl) 312a32e93adSJunchao Zhang { 313a32e93adSJunchao Zhang PetscErrorCode ierr; 314a32e93adSJunchao Zhang 3154df5c2c7SJunchao Zhang PetscFunctionBegin; 3164df5c2c7SJunchao Zhang /* this MPI_Barrier is to make sure slaves have finished using the omp barrier before master destroys it */ 317ffc4695bSBarry Smith ierr = MPI_Barrier(ctrl->omp_comm);CHKERRMPI(ierr); 318a32e93adSJunchao Zhang if (ctrl->is_omp_master) { ierr = pthread_barrier_destroy(ctrl->barrier);CHKERRQ(ierr); } 3194df5c2c7SJunchao Zhang 3204df5c2c7SJunchao Zhang #if defined(USE_MMAP_ALLOCATE_SHARED_MEMORY) && defined(PETSC_HAVE_MMAP) 3214df5c2c7SJunchao Zhang ierr = munmap(ctrl->barrier,sizeof(pthread_barrier_t));CHKERRQ(ierr); 3224df5c2c7SJunchao Zhang #else 323ffc4695bSBarry Smith ierr = MPI_Win_free(&ctrl->omp_win);CHKERRMPI(ierr); 3244df5c2c7SJunchao Zhang #endif 325a32e93adSJunchao Zhang PetscFunctionReturn(0); 326a32e93adSJunchao Zhang } 327a32e93adSJunchao Zhang 328eff715bbSJunchao Zhang /*@C 3298fcaa860SBarry Smith PetscOmpCtrlCreate - create a PETSc OpenMP controller, which manages PETSc's interaction with third party libraries using OpenMP 330eff715bbSJunchao Zhang 331*d8d19677SJose E. Roman Input Parameters: 332eff715bbSJunchao Zhang + petsc_comm - a communicator some PETSc object (for example, a matrix) lives in 333a2b725a8SWilliam Gropp - nthreads - number of threads per MPI rank to spawn in a library using OpenMP. If nthreads = -1, let PETSc decide a suitable value 334eff715bbSJunchao Zhang 335eff715bbSJunchao Zhang Output Parameter: 3368fcaa860SBarry Smith . pctrl - a PETSc OpenMP controller 337eff715bbSJunchao Zhang 338eff715bbSJunchao Zhang Level: developer 339eff715bbSJunchao Zhang 3408fcaa860SBarry Smith TODO: Possibly use the variable PetscNumOMPThreads to determine the number for threads to use 3418fcaa860SBarry Smith 342eff715bbSJunchao Zhang .seealso PetscOmpCtrlDestroy() 343eff715bbSJunchao Zhang @*/ 344a32e93adSJunchao Zhang PetscErrorCode PetscOmpCtrlCreate(MPI_Comm petsc_comm,PetscInt nthreads,PetscOmpCtrl *pctrl) 345a32e93adSJunchao Zhang { 346a32e93adSJunchao Zhang PetscErrorCode ierr; 347a32e93adSJunchao Zhang PetscOmpCtrl ctrl; 348a32e93adSJunchao Zhang unsigned long *cpu_ulongs=NULL; 349a32e93adSJunchao Zhang PetscInt i,nr_cpu_ulongs; 350a32e93adSJunchao Zhang PetscShmComm pshmcomm; 351a32e93adSJunchao Zhang MPI_Comm shm_comm; 352a32e93adSJunchao Zhang PetscMPIInt shm_rank,shm_comm_size,omp_rank,color; 3537c405c4aSJunchao Zhang PetscInt num_packages,num_cores; 354a32e93adSJunchao Zhang 355a32e93adSJunchao Zhang PetscFunctionBegin; 356a32e93adSJunchao Zhang ierr = PetscNew(&ctrl);CHKERRQ(ierr); 357a32e93adSJunchao Zhang 358a32e93adSJunchao Zhang /*================================================================================= 3597c405c4aSJunchao Zhang Init hwloc 3607c405c4aSJunchao Zhang ==================================================================================*/ 3617c405c4aSJunchao Zhang ierr = hwloc_topology_init(&ctrl->topology);CHKERRQ(ierr); 3627c405c4aSJunchao Zhang #if HWLOC_API_VERSION >= 0x00020000 3637c405c4aSJunchao Zhang /* to filter out unneeded info and have faster hwloc_topology_load */ 3647c405c4aSJunchao Zhang ierr = hwloc_topology_set_all_types_filter(ctrl->topology,HWLOC_TYPE_FILTER_KEEP_NONE);CHKERRQ(ierr); 3657c405c4aSJunchao Zhang ierr = hwloc_topology_set_type_filter(ctrl->topology,HWLOC_OBJ_CORE,HWLOC_TYPE_FILTER_KEEP_ALL);CHKERRQ(ierr); 3667c405c4aSJunchao Zhang #endif 3677c405c4aSJunchao Zhang ierr = hwloc_topology_load(ctrl->topology);CHKERRQ(ierr); 3687c405c4aSJunchao Zhang 3697c405c4aSJunchao Zhang /*================================================================================= 370a32e93adSJunchao Zhang Split petsc_comm into multiple omp_comms. Ranks in an omp_comm have access to 371a32e93adSJunchao Zhang physically shared memory. Rank 0 of each omp_comm is called an OMP master, and 372a32e93adSJunchao Zhang others are called slaves. OMP Masters make up a new comm called omp_master_comm, 373a32e93adSJunchao Zhang which is usually passed to third party libraries. 374a32e93adSJunchao Zhang ==================================================================================*/ 375a32e93adSJunchao Zhang 376a32e93adSJunchao Zhang /* fetch the stored shared memory communicator */ 377a32e93adSJunchao Zhang ierr = PetscShmCommGet(petsc_comm,&pshmcomm);CHKERRQ(ierr); 378a32e93adSJunchao Zhang ierr = PetscShmCommGetMpiShmComm(pshmcomm,&shm_comm);CHKERRQ(ierr); 379a32e93adSJunchao Zhang 380ffc4695bSBarry Smith ierr = MPI_Comm_rank(shm_comm,&shm_rank);CHKERRMPI(ierr); 381ffc4695bSBarry Smith ierr = MPI_Comm_size(shm_comm,&shm_comm_size);CHKERRMPI(ierr); 382a32e93adSJunchao Zhang 3837c405c4aSJunchao Zhang /* PETSc decides nthreads, which is the smaller of shm_comm_size or cores per package(socket) */ 3847c405c4aSJunchao Zhang if (nthreads == -1) { 385a312e481SBarry Smith num_packages = hwloc_get_nbobjs_by_type(ctrl->topology,HWLOC_OBJ_PACKAGE) <= 0 ? 1 : hwloc_get_nbobjs_by_type(ctrl->topology,HWLOC_OBJ_PACKAGE); 386a312e481SBarry Smith num_cores = hwloc_get_nbobjs_by_type(ctrl->topology,HWLOC_OBJ_CORE) <= 0 ? 1 : hwloc_get_nbobjs_by_type(ctrl->topology,HWLOC_OBJ_CORE); 3877c405c4aSJunchao Zhang nthreads = num_cores/num_packages; 3887c405c4aSJunchao Zhang if (nthreads > shm_comm_size) nthreads = shm_comm_size; 3897c405c4aSJunchao Zhang } 3907c405c4aSJunchao Zhang 391a312e481SBarry Smith if (nthreads < 1 || nthreads > shm_comm_size) SETERRQ2(petsc_comm,PETSC_ERR_ARG_OUTOFRANGE,"number of OpenMP threads %D can not be < 1 or > the MPI shared memory communicator size %d\n",nthreads,shm_comm_size); 392a312e481SBarry Smith if (shm_comm_size % nthreads) { ierr = PetscPrintf(petsc_comm,"Warning: number of OpenMP threads %D is not a factor of the MPI shared memory communicator size %d, which may cause load-imbalance!\n",nthreads,shm_comm_size);CHKERRQ(ierr); } 393a32e93adSJunchao Zhang 394a32e93adSJunchao Zhang /* split shm_comm into a set of omp_comms with each of size nthreads. Ex., if 395a32e93adSJunchao Zhang shm_comm_size=16, nthreads=8, then ranks 0~7 get color 0 and ranks 8~15 get 396a32e93adSJunchao Zhang color 1. They are put in two omp_comms. Note that petsc_ranks may or may not 397a32e93adSJunchao Zhang be consecutive in a shm_comm, but shm_ranks always run from 0 to shm_comm_size-1. 398a32e93adSJunchao Zhang Use 0 as key so that rank ordering wont change in new comm. 399a32e93adSJunchao Zhang */ 400a32e93adSJunchao Zhang color = shm_rank / nthreads; 40155b25c41SPierre Jolivet ierr = MPI_Comm_split(shm_comm,color,0/*key*/,&ctrl->omp_comm);CHKERRMPI(ierr); 402a32e93adSJunchao Zhang 403a32e93adSJunchao Zhang /* put rank 0's in omp_comms (i.e., master ranks) into a new comm - omp_master_comm */ 404ffc4695bSBarry Smith ierr = MPI_Comm_rank(ctrl->omp_comm,&omp_rank);CHKERRMPI(ierr); 405a32e93adSJunchao Zhang if (!omp_rank) { 406a32e93adSJunchao Zhang ctrl->is_omp_master = PETSC_TRUE; /* master */ 407a32e93adSJunchao Zhang color = 0; 408a32e93adSJunchao Zhang } else { 409a32e93adSJunchao Zhang ctrl->is_omp_master = PETSC_FALSE; /* slave */ 410a32e93adSJunchao Zhang color = MPI_UNDEFINED; /* to make slaves get omp_master_comm = MPI_COMM_NULL in MPI_Comm_split */ 411a32e93adSJunchao Zhang } 412ffc4695bSBarry Smith ierr = MPI_Comm_split(petsc_comm,color,0/*key*/,&ctrl->omp_master_comm);CHKERRMPI(ierr); 413a32e93adSJunchao Zhang 414a32e93adSJunchao Zhang /*================================================================================= 415a32e93adSJunchao Zhang Each omp_comm has a pthread_barrier_t in its shared memory, which is used to put 416a32e93adSJunchao Zhang slave ranks in sleep and idle their CPU, so that the master can fork OMP threads 417a32e93adSJunchao Zhang and run them on the idle CPUs. 418a32e93adSJunchao Zhang ==================================================================================*/ 419a32e93adSJunchao Zhang ierr = PetscOmpCtrlCreateBarrier(ctrl);CHKERRQ(ierr); 420a32e93adSJunchao Zhang 421a32e93adSJunchao Zhang /*================================================================================= 422a32e93adSJunchao Zhang omp master logs its cpu binding (i.e., cpu set) and computes a new binding that 423a32e93adSJunchao Zhang is the union of the bindings of all ranks in the omp_comm 424a32e93adSJunchao Zhang =================================================================================*/ 425a32e93adSJunchao Zhang 4263ab56b82SJunchao Zhang ctrl->cpuset = hwloc_bitmap_alloc(); if (!ctrl->cpuset) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"hwloc_bitmap_alloc() failed\n"); 427a32e93adSJunchao Zhang ierr = hwloc_get_cpubind(ctrl->topology,ctrl->cpuset, HWLOC_CPUBIND_PROCESS);CHKERRQ(ierr); 428a32e93adSJunchao Zhang 4293ab56b82SJunchao Zhang /* hwloc main developer said they will add new APIs hwloc_bitmap_{nr,to,from}_ulongs in 2.1 to help us simplify the following bitmap pack/unpack code */ 430a32e93adSJunchao Zhang nr_cpu_ulongs = (hwloc_bitmap_last(hwloc_topology_get_topology_cpuset (ctrl->topology))+sizeof(unsigned long)*8)/sizeof(unsigned long)/8; 431a32e93adSJunchao Zhang ierr = PetscMalloc1(nr_cpu_ulongs,&cpu_ulongs);CHKERRQ(ierr); 432a32e93adSJunchao Zhang if (nr_cpu_ulongs == 1) { 433a32e93adSJunchao Zhang cpu_ulongs[0] = hwloc_bitmap_to_ulong(ctrl->cpuset); 434a32e93adSJunchao Zhang } else { 435a32e93adSJunchao Zhang for (i=0; i<nr_cpu_ulongs; i++) cpu_ulongs[i] = hwloc_bitmap_to_ith_ulong(ctrl->cpuset,(unsigned)i); 436a32e93adSJunchao Zhang } 437a32e93adSJunchao Zhang 438ffc4695bSBarry Smith ierr = MPI_Reduce(ctrl->is_omp_master ? MPI_IN_PLACE : cpu_ulongs, cpu_ulongs,nr_cpu_ulongs, MPI_UNSIGNED_LONG,MPI_BOR,0,ctrl->omp_comm);CHKERRMPI(ierr); 439a32e93adSJunchao Zhang 440a32e93adSJunchao Zhang if (ctrl->is_omp_master) { 4413ab56b82SJunchao Zhang ctrl->omp_cpuset = hwloc_bitmap_alloc(); if (!ctrl->omp_cpuset) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"hwloc_bitmap_alloc() failed\n"); 442a32e93adSJunchao Zhang if (nr_cpu_ulongs == 1) { 4433ab56b82SJunchao Zhang #if HWLOC_API_VERSION >= 0x00020000 444a32e93adSJunchao Zhang ierr = hwloc_bitmap_from_ulong(ctrl->omp_cpuset,cpu_ulongs[0]);CHKERRQ(ierr); 4453ab56b82SJunchao Zhang #else 4463ab56b82SJunchao Zhang hwloc_bitmap_from_ulong(ctrl->omp_cpuset,cpu_ulongs[0]); 4473ab56b82SJunchao Zhang #endif 448a32e93adSJunchao Zhang } else { 4493ab56b82SJunchao Zhang for (i=0; i<nr_cpu_ulongs; i++) { 4503ab56b82SJunchao Zhang #if HWLOC_API_VERSION >= 0x00020000 4513ab56b82SJunchao Zhang ierr = hwloc_bitmap_set_ith_ulong(ctrl->omp_cpuset,(unsigned)i,cpu_ulongs[i]);CHKERRQ(ierr); 4523ab56b82SJunchao Zhang #else 4533ab56b82SJunchao Zhang hwloc_bitmap_set_ith_ulong(ctrl->omp_cpuset,(unsigned)i,cpu_ulongs[i]); 4543ab56b82SJunchao Zhang #endif 4553ab56b82SJunchao Zhang } 456a32e93adSJunchao Zhang } 457a32e93adSJunchao Zhang } 458a32e93adSJunchao Zhang 459a32e93adSJunchao Zhang ierr = PetscFree(cpu_ulongs);CHKERRQ(ierr); 460a32e93adSJunchao Zhang *pctrl = ctrl; 461a32e93adSJunchao Zhang PetscFunctionReturn(0); 462a32e93adSJunchao Zhang } 463a32e93adSJunchao Zhang 464eff715bbSJunchao Zhang /*@C 46564f49babSJed Brown PetscOmpCtrlDestroy - destroy the PETSc OpenMP controller 466eff715bbSJunchao Zhang 467eff715bbSJunchao Zhang Input Parameter: 4688fcaa860SBarry Smith . pctrl - a PETSc OpenMP controller 469eff715bbSJunchao Zhang 470eff715bbSJunchao Zhang Level: developer 471eff715bbSJunchao Zhang 472eff715bbSJunchao Zhang .seealso PetscOmpCtrlCreate() 473eff715bbSJunchao Zhang @*/ 474a32e93adSJunchao Zhang PetscErrorCode PetscOmpCtrlDestroy(PetscOmpCtrl *pctrl) 475a32e93adSJunchao Zhang { 476a32e93adSJunchao Zhang PetscErrorCode ierr; 477a32e93adSJunchao Zhang PetscOmpCtrl ctrl = *pctrl; 478a32e93adSJunchao Zhang 479a32e93adSJunchao Zhang PetscFunctionBegin; 480a32e93adSJunchao Zhang hwloc_bitmap_free(ctrl->cpuset); 481a32e93adSJunchao Zhang hwloc_topology_destroy(ctrl->topology); 482a32e93adSJunchao Zhang PetscOmpCtrlDestroyBarrier(ctrl); 483ffc4695bSBarry Smith ierr = MPI_Comm_free(&ctrl->omp_comm);CHKERRMPI(ierr); 484a32e93adSJunchao Zhang if (ctrl->is_omp_master) { 485a32e93adSJunchao Zhang hwloc_bitmap_free(ctrl->omp_cpuset); 486ffc4695bSBarry Smith ierr = MPI_Comm_free(&ctrl->omp_master_comm);CHKERRMPI(ierr); 487a32e93adSJunchao Zhang } 488a32e93adSJunchao Zhang ierr = PetscFree(ctrl);CHKERRQ(ierr); 489a32e93adSJunchao Zhang PetscFunctionReturn(0); 490a32e93adSJunchao Zhang } 491a32e93adSJunchao Zhang 492a32e93adSJunchao Zhang /*@C 4938fcaa860SBarry Smith PetscOmpCtrlGetOmpComms - Get MPI communicators from a PETSc OMP controller 494a32e93adSJunchao Zhang 495a32e93adSJunchao Zhang Input Parameter: 4968fcaa860SBarry Smith . ctrl - a PETSc OMP controller 497a32e93adSJunchao Zhang 498*d8d19677SJose E. Roman Output Parameters: 499eff715bbSJunchao Zhang + omp_comm - a communicator that includes a master rank and slave ranks where master spawns threads 500a32e93adSJunchao Zhang . omp_master_comm - on master ranks, return a communicator that include master ranks of each omp_comm; 501a32e93adSJunchao Zhang on slave ranks, MPI_COMM_NULL will be return in reality. 502a32e93adSJunchao Zhang - is_omp_master - true if the calling process is an OMP master rank. 503a32e93adSJunchao Zhang 504eff715bbSJunchao Zhang Notes: any output parameter can be NULL. The parameter is just ignored. 505eff715bbSJunchao Zhang 506a32e93adSJunchao Zhang Level: developer 507a32e93adSJunchao Zhang @*/ 508a32e93adSJunchao Zhang PetscErrorCode PetscOmpCtrlGetOmpComms(PetscOmpCtrl ctrl,MPI_Comm *omp_comm,MPI_Comm *omp_master_comm,PetscBool *is_omp_master) 509a32e93adSJunchao Zhang { 510a32e93adSJunchao Zhang PetscFunctionBegin; 511a32e93adSJunchao Zhang if (omp_comm) *omp_comm = ctrl->omp_comm; 512a32e93adSJunchao Zhang if (omp_master_comm) *omp_master_comm = ctrl->omp_master_comm; 513a32e93adSJunchao Zhang if (is_omp_master) *is_omp_master = ctrl->is_omp_master; 514a32e93adSJunchao Zhang PetscFunctionReturn(0); 515a32e93adSJunchao Zhang } 516a32e93adSJunchao Zhang 517eff715bbSJunchao Zhang /*@C 5188fcaa860SBarry Smith PetscOmpCtrlBarrier - Do barrier on MPI ranks in omp_comm contained by the PETSc OMP controller (to let slave ranks free their CPU) 519eff715bbSJunchao Zhang 520eff715bbSJunchao Zhang Input Parameter: 5218fcaa860SBarry Smith . ctrl - a PETSc OMP controller 522eff715bbSJunchao Zhang 523eff715bbSJunchao Zhang Notes: 524eff715bbSJunchao Zhang this is a pthread barrier on MPI processes. Using MPI_Barrier instead is conceptually correct. But MPI standard does not 525eff715bbSJunchao Zhang require processes blocked by MPI_Barrier free their CPUs to let other processes progress. In practice, to minilize latency, 526eff715bbSJunchao Zhang MPI processes stuck in MPI_Barrier keep polling and do not free CPUs. In contrast, pthread_barrier has this requirement. 527eff715bbSJunchao Zhang 528eff715bbSJunchao Zhang A code using PetscOmpCtrlBarrier() would be like this, 529eff715bbSJunchao Zhang 530eff715bbSJunchao Zhang if (is_omp_master) { 531eff715bbSJunchao Zhang PetscOmpCtrlOmpRegionOnMasterBegin(ctrl); 532eff715bbSJunchao Zhang Call the library using OpenMP 533eff715bbSJunchao Zhang PetscOmpCtrlOmpRegionOnMasterEnd(ctrl); 534eff715bbSJunchao Zhang } 535eff715bbSJunchao Zhang PetscOmpCtrlBarrier(ctrl); 536eff715bbSJunchao Zhang 537eff715bbSJunchao Zhang Level: developer 538eff715bbSJunchao Zhang 539eff715bbSJunchao Zhang .seealso PetscOmpCtrlOmpRegionOnMasterBegin(), PetscOmpCtrlOmpRegionOnMasterEnd() 540eff715bbSJunchao Zhang @*/ 541a32e93adSJunchao Zhang PetscErrorCode PetscOmpCtrlBarrier(PetscOmpCtrl ctrl) 542a32e93adSJunchao Zhang { 5432da392ccSBarry Smith int err; 544a32e93adSJunchao Zhang 545a32e93adSJunchao Zhang PetscFunctionBegin; 5462da392ccSBarry Smith err = pthread_barrier_wait(ctrl->barrier); 5472da392ccSBarry Smith if (err && err != PTHREAD_BARRIER_SERIAL_THREAD) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"pthread_barrier_wait failed within PetscOmpCtrlBarrier with return code %D\n", err); 548a32e93adSJunchao Zhang PetscFunctionReturn(0); 549a32e93adSJunchao Zhang } 550a32e93adSJunchao Zhang 551eff715bbSJunchao Zhang /*@C 552eff715bbSJunchao Zhang PetscOmpCtrlOmpRegionOnMasterBegin - Mark the beginning of an OpenMP library call on master ranks 553eff715bbSJunchao Zhang 554eff715bbSJunchao Zhang Input Parameter: 5558fcaa860SBarry Smith . ctrl - a PETSc OMP controller 556eff715bbSJunchao Zhang 557eff715bbSJunchao Zhang Notes: 5588fcaa860SBarry Smith Only master ranks can call this function. Call PetscOmpCtrlGetOmpComms() to know if this is a master rank. 559eff715bbSJunchao Zhang This function changes CPU binding of master ranks and nthreads-var of OpenMP runtime 560eff715bbSJunchao Zhang 561eff715bbSJunchao Zhang Level: developer 562eff715bbSJunchao Zhang 563eff715bbSJunchao Zhang .seealso: PetscOmpCtrlOmpRegionOnMasterEnd() 564eff715bbSJunchao Zhang @*/ 565a32e93adSJunchao Zhang PetscErrorCode PetscOmpCtrlOmpRegionOnMasterBegin(PetscOmpCtrl ctrl) 566a32e93adSJunchao Zhang { 567a32e93adSJunchao Zhang PetscErrorCode ierr; 568a32e93adSJunchao Zhang 569a32e93adSJunchao Zhang PetscFunctionBegin; 570a32e93adSJunchao Zhang ierr = hwloc_set_cpubind(ctrl->topology,ctrl->omp_cpuset,HWLOC_CPUBIND_PROCESS);CHKERRQ(ierr); 571eff715bbSJunchao Zhang omp_set_num_threads(ctrl->omp_comm_size); /* may override the OMP_NUM_THREAD env var */ 572a32e93adSJunchao Zhang PetscFunctionReturn(0); 573a32e93adSJunchao Zhang } 574a32e93adSJunchao Zhang 575eff715bbSJunchao Zhang /*@C 576eff715bbSJunchao Zhang PetscOmpCtrlOmpRegionOnMasterEnd - Mark the end of an OpenMP library call on master ranks 577eff715bbSJunchao Zhang 578eff715bbSJunchao Zhang Input Parameter: 5798fcaa860SBarry Smith . ctrl - a PETSc OMP controller 580eff715bbSJunchao Zhang 581eff715bbSJunchao Zhang Notes: 5828fcaa860SBarry Smith Only master ranks can call this function. Call PetscOmpCtrlGetOmpComms() to know if this is a master rank. 583eff715bbSJunchao Zhang This function restores the CPU binding of master ranks and set and nthreads-var of OpenMP runtime to 1. 584eff715bbSJunchao Zhang 585eff715bbSJunchao Zhang Level: developer 586eff715bbSJunchao Zhang 587eff715bbSJunchao Zhang .seealso: PetscOmpCtrlOmpRegionOnMasterBegin() 588eff715bbSJunchao Zhang @*/ 589a32e93adSJunchao Zhang PetscErrorCode PetscOmpCtrlOmpRegionOnMasterEnd(PetscOmpCtrl ctrl) 590a32e93adSJunchao Zhang { 591a32e93adSJunchao Zhang PetscErrorCode ierr; 592a32e93adSJunchao Zhang 593a32e93adSJunchao Zhang PetscFunctionBegin; 594a32e93adSJunchao Zhang ierr = hwloc_set_cpubind(ctrl->topology,ctrl->cpuset,HWLOC_CPUBIND_PROCESS);CHKERRQ(ierr); 595eff715bbSJunchao Zhang omp_set_num_threads(1); 596a32e93adSJunchao Zhang PetscFunctionReturn(0); 597a32e93adSJunchao Zhang } 598a32e93adSJunchao Zhang 5994df5c2c7SJunchao Zhang #undef USE_MMAP_ALLOCATE_SHARED_MEMORY 60020b3346cSJunchao Zhang #endif /* defined(PETSC_HAVE_OPENMP_SUPPORT) */ 601