xref: /petsc/src/sys/utils/mpishm.c (revision 09b68a49ed2854d1e4985cc2aa6af33c7c4e69b3)
15f7487a0SJunchao Zhang #include <petscsys.h> /*I  "petscsys.h"  I*/
25f7487a0SJunchao Zhang #include <petsc/private/petscimpl.h>
35f7487a0SJunchao Zhang 
45f7487a0SJunchao Zhang struct _n_PetscShmComm {
55f7487a0SJunchao Zhang   PetscMPIInt *globranks;         /* global ranks of each rank in the shared memory communicator */
65f7487a0SJunchao Zhang   PetscMPIInt  shmsize;           /* size of the shared memory communicator */
75f7487a0SJunchao Zhang   MPI_Comm     globcomm, shmcomm; /* global communicator and shared memory communicator (a sub-communicator of the former) */
85f7487a0SJunchao Zhang };
95f7487a0SJunchao Zhang 
105f7487a0SJunchao Zhang /*
1133779a13SJunchao Zhang    Private routine to delete internal shared memory communicator when a communicator is freed.
125f7487a0SJunchao Zhang 
135f7487a0SJunchao Zhang    This is called by MPI, not by users. This is called by MPI_Comm_free() when the communicator that has this  data as an attribute is freed.
145f7487a0SJunchao Zhang 
155f7487a0SJunchao Zhang    Note: this is declared extern "C" because it is passed to MPI_Comm_create_keyval()
165f7487a0SJunchao Zhang 
175f7487a0SJunchao Zhang */
Petsc_ShmComm_Attr_DeleteFn(MPI_Comm comm,PetscMPIInt keyval,void * val,void * extra_state)188434afd1SBarry Smith PETSC_EXTERN PetscMPIInt MPIAPI Petsc_ShmComm_Attr_DeleteFn(MPI_Comm comm, PetscMPIInt keyval, void *val, void *extra_state)
19d71ae5a4SJacob Faibussowitsch {
205f7487a0SJunchao Zhang   PetscShmComm p = (PetscShmComm)val;
215f7487a0SJunchao Zhang 
225f7487a0SJunchao Zhang   PetscFunctionBegin;
237c5b2466SBarry Smith   PetscCallReturnMPI(PetscInfo(NULL, "Deleting shared memory subcommunicator in a MPI_Comm %ld\n", (long)comm));
247c5b2466SBarry Smith   PetscCallMPIReturnMPI(MPI_Comm_free(&p->shmcomm));
257c5b2466SBarry Smith   PetscCallReturnMPI(PetscFree(p->globranks));
267c5b2466SBarry Smith   PetscCallReturnMPI(PetscFree(val));
275f7487a0SJunchao Zhang   PetscFunctionReturn(MPI_SUCCESS);
285f7487a0SJunchao Zhang }
295f7487a0SJunchao Zhang 
30b48189acSJunchao Zhang #ifdef PETSC_HAVE_MPI_PROCESS_SHARED_MEMORY
31b48189acSJunchao Zhang   /* Data structures to support freeing comms created in PetscShmCommGet().
32b48189acSJunchao Zhang   Since we predict communicators passed to PetscShmCommGet() are very likely
33*f0b74427SPierre Jolivet   either a PETSc inner communicator or an MPI communicator with a linked PETSc
34b48189acSJunchao Zhang   inner communicator, we use a simple static array to store dupped communicators
35b48189acSJunchao Zhang   on rare cases otherwise.
36b48189acSJunchao Zhang  */
37b48189acSJunchao Zhang   #define MAX_SHMCOMM_DUPPED_COMMS 16
38b48189acSJunchao Zhang static PetscInt       num_dupped_comms = 0;
39b48189acSJunchao Zhang static MPI_Comm       shmcomm_dupped_comms[MAX_SHMCOMM_DUPPED_COMMS];
PetscShmCommDestroyDuppedComms(void)40d71ae5a4SJacob Faibussowitsch static PetscErrorCode PetscShmCommDestroyDuppedComms(void)
41d71ae5a4SJacob Faibussowitsch {
42b48189acSJunchao Zhang   PetscFunctionBegin;
43cf27e480SPierre Jolivet   for (PetscInt i = 0; i < num_dupped_comms; i++) PetscCall(PetscCommDestroy(&shmcomm_dupped_comms[i]));
44b48189acSJunchao Zhang   num_dupped_comms = 0; /* reset so that PETSc can be reinitialized */
453ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
46b48189acSJunchao Zhang }
47b48189acSJunchao Zhang #endif
48b48189acSJunchao Zhang 
495f7487a0SJunchao Zhang /*@C
50811af0c4SBarry Smith   PetscShmCommGet - Returns a sub-communicator of all ranks that share a common memory
515f7487a0SJunchao Zhang 
52d083f849SBarry Smith   Collective.
535f7487a0SJunchao Zhang 
545f7487a0SJunchao Zhang   Input Parameter:
55a3b724e8SBarry Smith . globcomm - `MPI_Comm`, which can be a user `MPI_Comm` or a PETSc inner `MPI_Comm`
565f7487a0SJunchao Zhang 
575f7487a0SJunchao Zhang   Output Parameter:
585f7487a0SJunchao Zhang . pshmcomm - the PETSc shared memory communicator object
595f7487a0SJunchao Zhang 
605f7487a0SJunchao Zhang   Level: developer
615f7487a0SJunchao Zhang 
62811af0c4SBarry Smith   Note:
63a3b724e8SBarry Smith   When used with MPICH, MPICH must be configured with `--download-mpich-device=ch3:nemesis`
645f7487a0SJunchao Zhang 
65811af0c4SBarry Smith .seealso: `PetscShmCommGlobalToLocal()`, `PetscShmCommLocalToGlobal()`, `PetscShmCommGetMpiShmComm()`
665f7487a0SJunchao Zhang @*/
PetscShmCommGet(MPI_Comm globcomm,PetscShmComm * pshmcomm)67d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscShmCommGet(MPI_Comm globcomm, PetscShmComm *pshmcomm)
68d71ae5a4SJacob Faibussowitsch {
695f7487a0SJunchao Zhang #ifdef PETSC_HAVE_MPI_PROCESS_SHARED_MEMORY
705f7487a0SJunchao Zhang   MPI_Group         globgroup, shmgroup;
715f7487a0SJunchao Zhang   PetscMPIInt      *shmranks, i, flg;
725f7487a0SJunchao Zhang   PetscCommCounter *counter;
735f7487a0SJunchao Zhang 
745f7487a0SJunchao Zhang   PetscFunctionBegin;
754f572ea9SToby Isaac   PetscAssertPointer(pshmcomm, 2);
76*f0b74427SPierre Jolivet   /* Get a PETSc inner comm, since we always want to stash pshmcomm on PETSc inner comms */
779566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_get_attr(globcomm, Petsc_Counter_keyval, &counter, &flg));
78*f0b74427SPierre Jolivet   if (!flg) { /* globcomm is not a PETSc comm */
799371c9d4SSatish Balay     union
809371c9d4SSatish Balay     {
819371c9d4SSatish Balay       MPI_Comm comm;
829371c9d4SSatish Balay       void    *ptr;
839371c9d4SSatish Balay     } ucomm;
84*f0b74427SPierre Jolivet     /* check if globcomm already has a linked PETSc inner comm */
859566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Comm_get_attr(globcomm, Petsc_InnerComm_keyval, &ucomm, &flg));
86b48189acSJunchao Zhang     if (!flg) {
87*f0b74427SPierre Jolivet       /* globcomm does not have a linked PETSc inner comm, so we create one and replace globcomm with it */
8808401ef6SPierre Jolivet       PetscCheck(num_dupped_comms < MAX_SHMCOMM_DUPPED_COMMS, globcomm, PETSC_ERR_PLIB, "PetscShmCommGet() is trying to dup more than %d MPI_Comms", MAX_SHMCOMM_DUPPED_COMMS);
899566063dSJacob Faibussowitsch       PetscCall(PetscCommDuplicate(globcomm, &globcomm, NULL));
90*f0b74427SPierre Jolivet       /* Register a function to free the dupped PETSc comms at PetscFinalize() at the first time */
919566063dSJacob Faibussowitsch       if (num_dupped_comms == 0) PetscCall(PetscRegisterFinalize(PetscShmCommDestroyDuppedComms));
92b48189acSJunchao Zhang       shmcomm_dupped_comms[num_dupped_comms] = globcomm;
93b48189acSJunchao Zhang       num_dupped_comms++;
94b48189acSJunchao Zhang     } else {
95b48189acSJunchao Zhang       /* otherwise, we pull out the inner comm and use it as globcomm */
96b48189acSJunchao Zhang       globcomm = ucomm.comm;
97b48189acSJunchao Zhang     }
98b48189acSJunchao Zhang   }
995f7487a0SJunchao Zhang 
100b48189acSJunchao Zhang   /* Check if globcomm already has an attached pshmcomm. If no, create one */
1019566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_get_attr(globcomm, Petsc_ShmComm_keyval, pshmcomm, &flg));
1023ba16761SJacob Faibussowitsch   if (flg) PetscFunctionReturn(PETSC_SUCCESS);
1035f7487a0SJunchao Zhang 
1049566063dSJacob Faibussowitsch   PetscCall(PetscNew(pshmcomm));
1055f7487a0SJunchao Zhang   (*pshmcomm)->globcomm = globcomm;
1065f7487a0SJunchao Zhang 
1079566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_split_type(globcomm, MPI_COMM_TYPE_SHARED, 0, MPI_INFO_NULL, &(*pshmcomm)->shmcomm));
1085f7487a0SJunchao Zhang 
1099566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size((*pshmcomm)->shmcomm, &(*pshmcomm)->shmsize));
1109566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_group(globcomm, &globgroup));
1119566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_group((*pshmcomm)->shmcomm, &shmgroup));
1129566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1((*pshmcomm)->shmsize, &shmranks));
1139566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1((*pshmcomm)->shmsize, &(*pshmcomm)->globranks));
1145f7487a0SJunchao Zhang   for (i = 0; i < (*pshmcomm)->shmsize; i++) shmranks[i] = i;
1159566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Group_translate_ranks(shmgroup, (*pshmcomm)->shmsize, shmranks, globgroup, (*pshmcomm)->globranks));
1169566063dSJacob Faibussowitsch   PetscCall(PetscFree(shmranks));
1179566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Group_free(&globgroup));
1189566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Group_free(&shmgroup));
1195f7487a0SJunchao Zhang 
12048a46eb9SPierre Jolivet   for (i = 0; i < (*pshmcomm)->shmsize; i++) PetscCall(PetscInfo(NULL, "Shared memory rank %d global rank %d\n", i, (*pshmcomm)->globranks[i]));
1219566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_set_attr(globcomm, Petsc_ShmComm_keyval, *pshmcomm));
1223ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1235f7487a0SJunchao Zhang #else
1245f7487a0SJunchao Zhang   SETERRQ(globcomm, PETSC_ERR_SUP, "Shared memory communicators need MPI-3 package support.\nPlease upgrade your MPI or reconfigure with --download-mpich.");
1255f7487a0SJunchao Zhang #endif
1265f7487a0SJunchao Zhang }
1275f7487a0SJunchao Zhang 
1285f7487a0SJunchao Zhang /*@C
1295f7487a0SJunchao Zhang   PetscShmCommGlobalToLocal - Given a global rank returns the local rank in the shared memory communicator
1305f7487a0SJunchao Zhang 
1315f7487a0SJunchao Zhang   Input Parameters:
1325f7487a0SJunchao Zhang + pshmcomm - the shared memory communicator object
1335f7487a0SJunchao Zhang - grank    - the global rank
1345f7487a0SJunchao Zhang 
1355f7487a0SJunchao Zhang   Output Parameter:
136811af0c4SBarry Smith . lrank - the local rank, or `MPI_PROC_NULL` if it does not exist
1375f7487a0SJunchao Zhang 
1385f7487a0SJunchao Zhang   Level: developer
1395f7487a0SJunchao Zhang 
1405f7487a0SJunchao Zhang   Developer Notes:
1415f7487a0SJunchao Zhang   Assumes the pshmcomm->globranks[] is sorted
1425f7487a0SJunchao Zhang 
1435f7487a0SJunchao Zhang   It may be better to rewrite this to map multiple global ranks to local in the same function call
1445f7487a0SJunchao Zhang 
145811af0c4SBarry Smith .seealso: `PetscShmCommGet()`, `PetscShmCommLocalToGlobal()`, `PetscShmCommGetMpiShmComm()`
1465f7487a0SJunchao Zhang @*/
PetscShmCommGlobalToLocal(PetscShmComm pshmcomm,PetscMPIInt grank,PetscMPIInt * lrank)147d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscShmCommGlobalToLocal(PetscShmComm pshmcomm, PetscMPIInt grank, PetscMPIInt *lrank)
148d71ae5a4SJacob Faibussowitsch {
1495f7487a0SJunchao Zhang   PetscMPIInt low, high, t, i;
1505f7487a0SJunchao Zhang   PetscBool   flg = PETSC_FALSE;
1515f7487a0SJunchao Zhang 
1525f7487a0SJunchao Zhang   PetscFunctionBegin;
1534f572ea9SToby Isaac   PetscAssertPointer(pshmcomm, 1);
1544f572ea9SToby Isaac   PetscAssertPointer(lrank, 3);
1555f7487a0SJunchao Zhang   *lrank = MPI_PROC_NULL;
1563ba16761SJacob Faibussowitsch   if (grank < pshmcomm->globranks[0]) PetscFunctionReturn(PETSC_SUCCESS);
1573ba16761SJacob Faibussowitsch   if (grank > pshmcomm->globranks[pshmcomm->shmsize - 1]) PetscFunctionReturn(PETSC_SUCCESS);
1589566063dSJacob Faibussowitsch   PetscCall(PetscOptionsGetBool(NULL, NULL, "-noshared", &flg, NULL));
1593ba16761SJacob Faibussowitsch   if (flg) PetscFunctionReturn(PETSC_SUCCESS);
1605f7487a0SJunchao Zhang   low  = 0;
1615f7487a0SJunchao Zhang   high = pshmcomm->shmsize;
1625f7487a0SJunchao Zhang   while (high - low > 5) {
1635f7487a0SJunchao Zhang     t = (low + high) / 2;
1645f7487a0SJunchao Zhang     if (pshmcomm->globranks[t] > grank) high = t;
1655f7487a0SJunchao Zhang     else low = t;
1665f7487a0SJunchao Zhang   }
1675f7487a0SJunchao Zhang   for (i = low; i < high; i++) {
1683ba16761SJacob Faibussowitsch     if (pshmcomm->globranks[i] > grank) PetscFunctionReturn(PETSC_SUCCESS);
1695f7487a0SJunchao Zhang     if (pshmcomm->globranks[i] == grank) {
1705f7487a0SJunchao Zhang       *lrank = i;
1713ba16761SJacob Faibussowitsch       PetscFunctionReturn(PETSC_SUCCESS);
1725f7487a0SJunchao Zhang     }
1735f7487a0SJunchao Zhang   }
1743ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1755f7487a0SJunchao Zhang }
1765f7487a0SJunchao Zhang 
1775f7487a0SJunchao Zhang /*@C
1785f7487a0SJunchao Zhang   PetscShmCommLocalToGlobal - Given a local rank in the shared memory communicator returns the global rank
1795f7487a0SJunchao Zhang 
1805f7487a0SJunchao Zhang   Input Parameters:
1815f7487a0SJunchao Zhang + pshmcomm - the shared memory communicator object
1825f7487a0SJunchao Zhang - lrank    - the local rank in the shared memory communicator
1835f7487a0SJunchao Zhang 
1845f7487a0SJunchao Zhang   Output Parameter:
1855f7487a0SJunchao Zhang . grank - the global rank in the global communicator where the shared memory communicator is built
1865f7487a0SJunchao Zhang 
1875f7487a0SJunchao Zhang   Level: developer
1885f7487a0SJunchao Zhang 
189811af0c4SBarry Smith .seealso: `PetscShmCommGlobalToLocal()`, `PetscShmCommGet()`, `PetscShmCommGetMpiShmComm()`
1905f7487a0SJunchao Zhang @*/
PetscShmCommLocalToGlobal(PetscShmComm pshmcomm,PetscMPIInt lrank,PetscMPIInt * grank)191d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscShmCommLocalToGlobal(PetscShmComm pshmcomm, PetscMPIInt lrank, PetscMPIInt *grank)
192d71ae5a4SJacob Faibussowitsch {
1935f7487a0SJunchao Zhang   PetscFunctionBegin;
1944f572ea9SToby Isaac   PetscAssertPointer(pshmcomm, 1);
1954f572ea9SToby Isaac   PetscAssertPointer(grank, 3);
1962c71b3e2SJacob Faibussowitsch   PetscCheck(lrank >= 0 && lrank < pshmcomm->shmsize, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "No rank %d in the shared memory communicator", lrank);
1975f7487a0SJunchao Zhang   *grank = pshmcomm->globranks[lrank];
1983ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1995f7487a0SJunchao Zhang }
2005f7487a0SJunchao Zhang 
2015f7487a0SJunchao Zhang /*@C
2025f7487a0SJunchao Zhang   PetscShmCommGetMpiShmComm - Returns the MPI communicator that represents all processes with common shared memory
2035f7487a0SJunchao Zhang 
2045f7487a0SJunchao Zhang   Input Parameter:
2055f7487a0SJunchao Zhang . pshmcomm - PetscShmComm object obtained with PetscShmCommGet()
2065f7487a0SJunchao Zhang 
2075f7487a0SJunchao Zhang   Output Parameter:
2085f7487a0SJunchao Zhang . comm - the MPI communicator
2095f7487a0SJunchao Zhang 
2105f7487a0SJunchao Zhang   Level: developer
2115f7487a0SJunchao Zhang 
212811af0c4SBarry Smith .seealso: `PetscShmCommGlobalToLocal()`, `PetscShmCommGet()`, `PetscShmCommLocalToGlobal()`
2135f7487a0SJunchao Zhang @*/
PetscShmCommGetMpiShmComm(PetscShmComm pshmcomm,MPI_Comm * comm)214d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscShmCommGetMpiShmComm(PetscShmComm pshmcomm, MPI_Comm *comm)
215d71ae5a4SJacob Faibussowitsch {
2165f7487a0SJunchao Zhang   PetscFunctionBegin;
2174f572ea9SToby Isaac   PetscAssertPointer(pshmcomm, 1);
2184f572ea9SToby Isaac   PetscAssertPointer(comm, 2);
2195f7487a0SJunchao Zhang   *comm = pshmcomm->shmcomm;
2203ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2215f7487a0SJunchao Zhang }
222