15f7487a0SJunchao Zhang #include <petscsys.h> /*I "petscsys.h" I*/
25f7487a0SJunchao Zhang #include <petsc/private/petscimpl.h>
35f7487a0SJunchao Zhang
45f7487a0SJunchao Zhang struct _n_PetscShmComm {
55f7487a0SJunchao Zhang PetscMPIInt *globranks; /* global ranks of each rank in the shared memory communicator */
65f7487a0SJunchao Zhang PetscMPIInt shmsize; /* size of the shared memory communicator */
75f7487a0SJunchao Zhang MPI_Comm globcomm, shmcomm; /* global communicator and shared memory communicator (a sub-communicator of the former) */
85f7487a0SJunchao Zhang };
95f7487a0SJunchao Zhang
105f7487a0SJunchao Zhang /*
1133779a13SJunchao Zhang Private routine to delete internal shared memory communicator when a communicator is freed.
125f7487a0SJunchao Zhang
135f7487a0SJunchao Zhang This is called by MPI, not by users. This is called by MPI_Comm_free() when the communicator that has this data as an attribute is freed.
145f7487a0SJunchao Zhang
155f7487a0SJunchao Zhang Note: this is declared extern "C" because it is passed to MPI_Comm_create_keyval()
165f7487a0SJunchao Zhang
175f7487a0SJunchao Zhang */
Petsc_ShmComm_Attr_DeleteFn(MPI_Comm comm,PetscMPIInt keyval,void * val,void * extra_state)188434afd1SBarry Smith PETSC_EXTERN PetscMPIInt MPIAPI Petsc_ShmComm_Attr_DeleteFn(MPI_Comm comm, PetscMPIInt keyval, void *val, void *extra_state)
19d71ae5a4SJacob Faibussowitsch {
205f7487a0SJunchao Zhang PetscShmComm p = (PetscShmComm)val;
215f7487a0SJunchao Zhang
225f7487a0SJunchao Zhang PetscFunctionBegin;
237c5b2466SBarry Smith PetscCallReturnMPI(PetscInfo(NULL, "Deleting shared memory subcommunicator in a MPI_Comm %ld\n", (long)comm));
247c5b2466SBarry Smith PetscCallMPIReturnMPI(MPI_Comm_free(&p->shmcomm));
257c5b2466SBarry Smith PetscCallReturnMPI(PetscFree(p->globranks));
267c5b2466SBarry Smith PetscCallReturnMPI(PetscFree(val));
275f7487a0SJunchao Zhang PetscFunctionReturn(MPI_SUCCESS);
285f7487a0SJunchao Zhang }
295f7487a0SJunchao Zhang
30b48189acSJunchao Zhang #ifdef PETSC_HAVE_MPI_PROCESS_SHARED_MEMORY
31b48189acSJunchao Zhang /* Data structures to support freeing comms created in PetscShmCommGet().
32b48189acSJunchao Zhang Since we predict communicators passed to PetscShmCommGet() are very likely
33*f0b74427SPierre Jolivet either a PETSc inner communicator or an MPI communicator with a linked PETSc
34b48189acSJunchao Zhang inner communicator, we use a simple static array to store dupped communicators
35b48189acSJunchao Zhang on rare cases otherwise.
36b48189acSJunchao Zhang */
37b48189acSJunchao Zhang #define MAX_SHMCOMM_DUPPED_COMMS 16
38b48189acSJunchao Zhang static PetscInt num_dupped_comms = 0;
39b48189acSJunchao Zhang static MPI_Comm shmcomm_dupped_comms[MAX_SHMCOMM_DUPPED_COMMS];
PetscShmCommDestroyDuppedComms(void)40d71ae5a4SJacob Faibussowitsch static PetscErrorCode PetscShmCommDestroyDuppedComms(void)
41d71ae5a4SJacob Faibussowitsch {
42b48189acSJunchao Zhang PetscFunctionBegin;
43cf27e480SPierre Jolivet for (PetscInt i = 0; i < num_dupped_comms; i++) PetscCall(PetscCommDestroy(&shmcomm_dupped_comms[i]));
44b48189acSJunchao Zhang num_dupped_comms = 0; /* reset so that PETSc can be reinitialized */
453ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
46b48189acSJunchao Zhang }
47b48189acSJunchao Zhang #endif
48b48189acSJunchao Zhang
495f7487a0SJunchao Zhang /*@C
50811af0c4SBarry Smith PetscShmCommGet - Returns a sub-communicator of all ranks that share a common memory
515f7487a0SJunchao Zhang
52d083f849SBarry Smith Collective.
535f7487a0SJunchao Zhang
545f7487a0SJunchao Zhang Input Parameter:
55a3b724e8SBarry Smith . globcomm - `MPI_Comm`, which can be a user `MPI_Comm` or a PETSc inner `MPI_Comm`
565f7487a0SJunchao Zhang
575f7487a0SJunchao Zhang Output Parameter:
585f7487a0SJunchao Zhang . pshmcomm - the PETSc shared memory communicator object
595f7487a0SJunchao Zhang
605f7487a0SJunchao Zhang Level: developer
615f7487a0SJunchao Zhang
62811af0c4SBarry Smith Note:
63a3b724e8SBarry Smith When used with MPICH, MPICH must be configured with `--download-mpich-device=ch3:nemesis`
645f7487a0SJunchao Zhang
65811af0c4SBarry Smith .seealso: `PetscShmCommGlobalToLocal()`, `PetscShmCommLocalToGlobal()`, `PetscShmCommGetMpiShmComm()`
665f7487a0SJunchao Zhang @*/
PetscShmCommGet(MPI_Comm globcomm,PetscShmComm * pshmcomm)67d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscShmCommGet(MPI_Comm globcomm, PetscShmComm *pshmcomm)
68d71ae5a4SJacob Faibussowitsch {
695f7487a0SJunchao Zhang #ifdef PETSC_HAVE_MPI_PROCESS_SHARED_MEMORY
705f7487a0SJunchao Zhang MPI_Group globgroup, shmgroup;
715f7487a0SJunchao Zhang PetscMPIInt *shmranks, i, flg;
725f7487a0SJunchao Zhang PetscCommCounter *counter;
735f7487a0SJunchao Zhang
745f7487a0SJunchao Zhang PetscFunctionBegin;
754f572ea9SToby Isaac PetscAssertPointer(pshmcomm, 2);
76*f0b74427SPierre Jolivet /* Get a PETSc inner comm, since we always want to stash pshmcomm on PETSc inner comms */
779566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_get_attr(globcomm, Petsc_Counter_keyval, &counter, &flg));
78*f0b74427SPierre Jolivet if (!flg) { /* globcomm is not a PETSc comm */
799371c9d4SSatish Balay union
809371c9d4SSatish Balay {
819371c9d4SSatish Balay MPI_Comm comm;
829371c9d4SSatish Balay void *ptr;
839371c9d4SSatish Balay } ucomm;
84*f0b74427SPierre Jolivet /* check if globcomm already has a linked PETSc inner comm */
859566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_get_attr(globcomm, Petsc_InnerComm_keyval, &ucomm, &flg));
86b48189acSJunchao Zhang if (!flg) {
87*f0b74427SPierre Jolivet /* globcomm does not have a linked PETSc inner comm, so we create one and replace globcomm with it */
8808401ef6SPierre Jolivet PetscCheck(num_dupped_comms < MAX_SHMCOMM_DUPPED_COMMS, globcomm, PETSC_ERR_PLIB, "PetscShmCommGet() is trying to dup more than %d MPI_Comms", MAX_SHMCOMM_DUPPED_COMMS);
899566063dSJacob Faibussowitsch PetscCall(PetscCommDuplicate(globcomm, &globcomm, NULL));
90*f0b74427SPierre Jolivet /* Register a function to free the dupped PETSc comms at PetscFinalize() at the first time */
919566063dSJacob Faibussowitsch if (num_dupped_comms == 0) PetscCall(PetscRegisterFinalize(PetscShmCommDestroyDuppedComms));
92b48189acSJunchao Zhang shmcomm_dupped_comms[num_dupped_comms] = globcomm;
93b48189acSJunchao Zhang num_dupped_comms++;
94b48189acSJunchao Zhang } else {
95b48189acSJunchao Zhang /* otherwise, we pull out the inner comm and use it as globcomm */
96b48189acSJunchao Zhang globcomm = ucomm.comm;
97b48189acSJunchao Zhang }
98b48189acSJunchao Zhang }
995f7487a0SJunchao Zhang
100b48189acSJunchao Zhang /* Check if globcomm already has an attached pshmcomm. If no, create one */
1019566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_get_attr(globcomm, Petsc_ShmComm_keyval, pshmcomm, &flg));
1023ba16761SJacob Faibussowitsch if (flg) PetscFunctionReturn(PETSC_SUCCESS);
1035f7487a0SJunchao Zhang
1049566063dSJacob Faibussowitsch PetscCall(PetscNew(pshmcomm));
1055f7487a0SJunchao Zhang (*pshmcomm)->globcomm = globcomm;
1065f7487a0SJunchao Zhang
1079566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_split_type(globcomm, MPI_COMM_TYPE_SHARED, 0, MPI_INFO_NULL, &(*pshmcomm)->shmcomm));
1085f7487a0SJunchao Zhang
1099566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size((*pshmcomm)->shmcomm, &(*pshmcomm)->shmsize));
1109566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_group(globcomm, &globgroup));
1119566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_group((*pshmcomm)->shmcomm, &shmgroup));
1129566063dSJacob Faibussowitsch PetscCall(PetscMalloc1((*pshmcomm)->shmsize, &shmranks));
1139566063dSJacob Faibussowitsch PetscCall(PetscMalloc1((*pshmcomm)->shmsize, &(*pshmcomm)->globranks));
1145f7487a0SJunchao Zhang for (i = 0; i < (*pshmcomm)->shmsize; i++) shmranks[i] = i;
1159566063dSJacob Faibussowitsch PetscCallMPI(MPI_Group_translate_ranks(shmgroup, (*pshmcomm)->shmsize, shmranks, globgroup, (*pshmcomm)->globranks));
1169566063dSJacob Faibussowitsch PetscCall(PetscFree(shmranks));
1179566063dSJacob Faibussowitsch PetscCallMPI(MPI_Group_free(&globgroup));
1189566063dSJacob Faibussowitsch PetscCallMPI(MPI_Group_free(&shmgroup));
1195f7487a0SJunchao Zhang
12048a46eb9SPierre Jolivet for (i = 0; i < (*pshmcomm)->shmsize; i++) PetscCall(PetscInfo(NULL, "Shared memory rank %d global rank %d\n", i, (*pshmcomm)->globranks[i]));
1219566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_set_attr(globcomm, Petsc_ShmComm_keyval, *pshmcomm));
1223ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
1235f7487a0SJunchao Zhang #else
1245f7487a0SJunchao Zhang SETERRQ(globcomm, PETSC_ERR_SUP, "Shared memory communicators need MPI-3 package support.\nPlease upgrade your MPI or reconfigure with --download-mpich.");
1255f7487a0SJunchao Zhang #endif
1265f7487a0SJunchao Zhang }
1275f7487a0SJunchao Zhang
1285f7487a0SJunchao Zhang /*@C
1295f7487a0SJunchao Zhang PetscShmCommGlobalToLocal - Given a global rank returns the local rank in the shared memory communicator
1305f7487a0SJunchao Zhang
1315f7487a0SJunchao Zhang Input Parameters:
1325f7487a0SJunchao Zhang + pshmcomm - the shared memory communicator object
1335f7487a0SJunchao Zhang - grank - the global rank
1345f7487a0SJunchao Zhang
1355f7487a0SJunchao Zhang Output Parameter:
136811af0c4SBarry Smith . lrank - the local rank, or `MPI_PROC_NULL` if it does not exist
1375f7487a0SJunchao Zhang
1385f7487a0SJunchao Zhang Level: developer
1395f7487a0SJunchao Zhang
1405f7487a0SJunchao Zhang Developer Notes:
1415f7487a0SJunchao Zhang Assumes the pshmcomm->globranks[] is sorted
1425f7487a0SJunchao Zhang
1435f7487a0SJunchao Zhang It may be better to rewrite this to map multiple global ranks to local in the same function call
1445f7487a0SJunchao Zhang
145811af0c4SBarry Smith .seealso: `PetscShmCommGet()`, `PetscShmCommLocalToGlobal()`, `PetscShmCommGetMpiShmComm()`
1465f7487a0SJunchao Zhang @*/
PetscShmCommGlobalToLocal(PetscShmComm pshmcomm,PetscMPIInt grank,PetscMPIInt * lrank)147d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscShmCommGlobalToLocal(PetscShmComm pshmcomm, PetscMPIInt grank, PetscMPIInt *lrank)
148d71ae5a4SJacob Faibussowitsch {
1495f7487a0SJunchao Zhang PetscMPIInt low, high, t, i;
1505f7487a0SJunchao Zhang PetscBool flg = PETSC_FALSE;
1515f7487a0SJunchao Zhang
1525f7487a0SJunchao Zhang PetscFunctionBegin;
1534f572ea9SToby Isaac PetscAssertPointer(pshmcomm, 1);
1544f572ea9SToby Isaac PetscAssertPointer(lrank, 3);
1555f7487a0SJunchao Zhang *lrank = MPI_PROC_NULL;
1563ba16761SJacob Faibussowitsch if (grank < pshmcomm->globranks[0]) PetscFunctionReturn(PETSC_SUCCESS);
1573ba16761SJacob Faibussowitsch if (grank > pshmcomm->globranks[pshmcomm->shmsize - 1]) PetscFunctionReturn(PETSC_SUCCESS);
1589566063dSJacob Faibussowitsch PetscCall(PetscOptionsGetBool(NULL, NULL, "-noshared", &flg, NULL));
1593ba16761SJacob Faibussowitsch if (flg) PetscFunctionReturn(PETSC_SUCCESS);
1605f7487a0SJunchao Zhang low = 0;
1615f7487a0SJunchao Zhang high = pshmcomm->shmsize;
1625f7487a0SJunchao Zhang while (high - low > 5) {
1635f7487a0SJunchao Zhang t = (low + high) / 2;
1645f7487a0SJunchao Zhang if (pshmcomm->globranks[t] > grank) high = t;
1655f7487a0SJunchao Zhang else low = t;
1665f7487a0SJunchao Zhang }
1675f7487a0SJunchao Zhang for (i = low; i < high; i++) {
1683ba16761SJacob Faibussowitsch if (pshmcomm->globranks[i] > grank) PetscFunctionReturn(PETSC_SUCCESS);
1695f7487a0SJunchao Zhang if (pshmcomm->globranks[i] == grank) {
1705f7487a0SJunchao Zhang *lrank = i;
1713ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
1725f7487a0SJunchao Zhang }
1735f7487a0SJunchao Zhang }
1743ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
1755f7487a0SJunchao Zhang }
1765f7487a0SJunchao Zhang
1775f7487a0SJunchao Zhang /*@C
1785f7487a0SJunchao Zhang PetscShmCommLocalToGlobal - Given a local rank in the shared memory communicator returns the global rank
1795f7487a0SJunchao Zhang
1805f7487a0SJunchao Zhang Input Parameters:
1815f7487a0SJunchao Zhang + pshmcomm - the shared memory communicator object
1825f7487a0SJunchao Zhang - lrank - the local rank in the shared memory communicator
1835f7487a0SJunchao Zhang
1845f7487a0SJunchao Zhang Output Parameter:
1855f7487a0SJunchao Zhang . grank - the global rank in the global communicator where the shared memory communicator is built
1865f7487a0SJunchao Zhang
1875f7487a0SJunchao Zhang Level: developer
1885f7487a0SJunchao Zhang
189811af0c4SBarry Smith .seealso: `PetscShmCommGlobalToLocal()`, `PetscShmCommGet()`, `PetscShmCommGetMpiShmComm()`
1905f7487a0SJunchao Zhang @*/
PetscShmCommLocalToGlobal(PetscShmComm pshmcomm,PetscMPIInt lrank,PetscMPIInt * grank)191d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscShmCommLocalToGlobal(PetscShmComm pshmcomm, PetscMPIInt lrank, PetscMPIInt *grank)
192d71ae5a4SJacob Faibussowitsch {
1935f7487a0SJunchao Zhang PetscFunctionBegin;
1944f572ea9SToby Isaac PetscAssertPointer(pshmcomm, 1);
1954f572ea9SToby Isaac PetscAssertPointer(grank, 3);
1962c71b3e2SJacob Faibussowitsch PetscCheck(lrank >= 0 && lrank < pshmcomm->shmsize, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "No rank %d in the shared memory communicator", lrank);
1975f7487a0SJunchao Zhang *grank = pshmcomm->globranks[lrank];
1983ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
1995f7487a0SJunchao Zhang }
2005f7487a0SJunchao Zhang
2015f7487a0SJunchao Zhang /*@C
2025f7487a0SJunchao Zhang PetscShmCommGetMpiShmComm - Returns the MPI communicator that represents all processes with common shared memory
2035f7487a0SJunchao Zhang
2045f7487a0SJunchao Zhang Input Parameter:
2055f7487a0SJunchao Zhang . pshmcomm - PetscShmComm object obtained with PetscShmCommGet()
2065f7487a0SJunchao Zhang
2075f7487a0SJunchao Zhang Output Parameter:
2085f7487a0SJunchao Zhang . comm - the MPI communicator
2095f7487a0SJunchao Zhang
2105f7487a0SJunchao Zhang Level: developer
2115f7487a0SJunchao Zhang
212811af0c4SBarry Smith .seealso: `PetscShmCommGlobalToLocal()`, `PetscShmCommGet()`, `PetscShmCommLocalToGlobal()`
2135f7487a0SJunchao Zhang @*/
PetscShmCommGetMpiShmComm(PetscShmComm pshmcomm,MPI_Comm * comm)214d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscShmCommGetMpiShmComm(PetscShmComm pshmcomm, MPI_Comm *comm)
215d71ae5a4SJacob Faibussowitsch {
2165f7487a0SJunchao Zhang PetscFunctionBegin;
2174f572ea9SToby Isaac PetscAssertPointer(pshmcomm, 1);
2184f572ea9SToby Isaac PetscAssertPointer(comm, 2);
2195f7487a0SJunchao Zhang *comm = pshmcomm->shmcomm;
2203ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
2215f7487a0SJunchao Zhang }
222