1f6ced4a3SJed Brown #include <petscsys.h> /*I "petscsys.h" I*/ 295c0884eSLisandro Dalcin #include <petsc/private/petscimpl.h> 3f6ced4a3SJed Brown 495c0884eSLisandro Dalcin PetscLogEvent PETSC_BuildTwoSided; 595c0884eSLisandro Dalcin PetscLogEvent PETSC_BuildTwoSidedF; 63b3561c8SJed Brown 79371c9d4SSatish Balay const char *const PetscBuildTwoSidedTypes[] = {"ALLREDUCE", "IBARRIER", "REDSCATTER", "PetscBuildTwoSidedType", "PETSC_BUILDTWOSIDED_", NULL}; 8f6ced4a3SJed Brown 96145cd65SJed Brown static PetscBuildTwoSidedType _twosided_type = PETSC_BUILDTWOSIDED_NOTSET; 10f6ced4a3SJed Brown 116145cd65SJed Brown /*@ 126145cd65SJed Brown PetscCommBuildTwoSidedSetType - set algorithm to use when building two-sided communication 136145cd65SJed Brown 146145cd65SJed Brown Logically Collective 156145cd65SJed Brown 164165533cSJose E. Roman Input Parameters: 17811af0c4SBarry Smith + comm - `PETSC_COMM_WORLD` 18811af0c4SBarry Smith - twosided - algorithm to use in subsequent calls to `PetscCommBuildTwoSided()` 196145cd65SJed Brown 206145cd65SJed Brown Level: developer 216145cd65SJed Brown 226145cd65SJed Brown Note: 236145cd65SJed Brown This option is currently global, but could be made per-communicator. 246145cd65SJed Brown 25811af0c4SBarry Smith .seealso: `PetscCommBuildTwoSided()`, `PetscCommBuildTwoSidedGetType()`, `PetscBuildTwoSidedType` 266145cd65SJed Brown @*/ 27d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscCommBuildTwoSidedSetType(MPI_Comm comm, PetscBuildTwoSidedType twosided) 28d71ae5a4SJacob Faibussowitsch { 296145cd65SJed Brown PetscFunctionBegin; 3076bd3646SJed Brown if (PetscDefined(USE_DEBUG)) { /* We don't have a PetscObject so can't use PetscValidLogicalCollectiveEnum */ 316145cd65SJed Brown PetscMPIInt b1[2], b2[2]; 326145cd65SJed Brown b1[0] = -(PetscMPIInt)twosided; 336145cd65SJed Brown b1[1] = (PetscMPIInt)twosided; 341c2dc1cbSBarry Smith PetscCall(MPIU_Allreduce(b1, b2, 2, MPI_INT, MPI_MAX, comm)); 3508401ef6SPierre Jolivet PetscCheck(-b2[0] == b2[1], comm, PETSC_ERR_ARG_WRONG, "Enum value must be same on all processes"); 366145cd65SJed Brown } 376145cd65SJed Brown _twosided_type = twosided; 383ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 396145cd65SJed Brown } 406145cd65SJed Brown 416145cd65SJed Brown /*@ 42811af0c4SBarry Smith PetscCommBuildTwoSidedGetType - get algorithm used when building two-sided communication 436145cd65SJed Brown 446145cd65SJed Brown Logically Collective 456145cd65SJed Brown 464165533cSJose E. Roman Output Parameters: 476145cd65SJed Brown + comm - communicator on which to query algorithm 48811af0c4SBarry Smith - twosided - algorithm to use for `PetscCommBuildTwoSided()` 496145cd65SJed Brown 506145cd65SJed Brown Level: developer 516145cd65SJed Brown 52811af0c4SBarry Smith .seealso: `PetscCommBuildTwoSided()`, `PetscCommBuildTwoSidedSetType()`, `PetscBuildTwoSidedType` 536145cd65SJed Brown @*/ 54d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscCommBuildTwoSidedGetType(MPI_Comm comm, PetscBuildTwoSidedType *twosided) 55d71ae5a4SJacob Faibussowitsch { 5605342407SJunchao Zhang PetscMPIInt size; 57f6ced4a3SJed Brown 58f6ced4a3SJed Brown PetscFunctionBegin; 596145cd65SJed Brown *twosided = PETSC_BUILDTWOSIDED_NOTSET; 606145cd65SJed Brown if (_twosided_type == PETSC_BUILDTWOSIDED_NOTSET) { 619566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(comm, &size)); 6205342407SJunchao Zhang _twosided_type = PETSC_BUILDTWOSIDED_ALLREDUCE; /* default for small comms, see https://gitlab.com/petsc/petsc/-/merge_requests/2611 */ 638ee3e7ecSJunchao Zhang #if defined(PETSC_HAVE_MPI_NONBLOCKING_COLLECTIVES) 6405342407SJunchao Zhang if (size > 1024) _twosided_type = PETSC_BUILDTWOSIDED_IBARRIER; 656145cd65SJed Brown #endif 669566063dSJacob Faibussowitsch PetscCall(PetscOptionsGetEnum(NULL, NULL, "-build_twosided", PetscBuildTwoSidedTypes, (PetscEnum *)&_twosided_type, NULL)); 67f6ced4a3SJed Brown } 68f6ced4a3SJed Brown *twosided = _twosided_type; 693ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 70f6ced4a3SJed Brown } 71f6ced4a3SJed Brown 728ee3e7ecSJunchao Zhang #if defined(PETSC_HAVE_MPI_NONBLOCKING_COLLECTIVES) 73d71ae5a4SJacob Faibussowitsch static PetscErrorCode PetscCommBuildTwoSided_Ibarrier(MPI_Comm comm, PetscMPIInt count, MPI_Datatype dtype, PetscMPIInt nto, const PetscMPIInt *toranks, const void *todata, PetscMPIInt *nfrom, PetscMPIInt **fromranks, void *fromdata) 74d71ae5a4SJacob Faibussowitsch { 75c3a59b84SJed Brown PetscMPIInt nrecvs, tag, done, i; 76c3a59b84SJed Brown MPI_Aint lb, unitbytes; 77f6ced4a3SJed Brown char *tdata; 78f6ced4a3SJed Brown MPI_Request *sendreqs, barrier; 790f453b92SJed Brown PetscSegBuffer segrank, segdata; 8046e50bb6SJed Brown PetscBool barrier_started; 81f6ced4a3SJed Brown 82f6ced4a3SJed Brown PetscFunctionBegin; 839566063dSJacob Faibussowitsch PetscCall(PetscCommDuplicate(comm, &comm, &tag)); 849566063dSJacob Faibussowitsch PetscCallMPI(MPI_Type_get_extent(dtype, &lb, &unitbytes)); 8508401ef6SPierre Jolivet PetscCheck(lb == 0, comm, PETSC_ERR_SUP, "Datatype with nonzero lower bound %ld", (long)lb); 86f6ced4a3SJed Brown tdata = (char *)todata; 879566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nto, &sendreqs)); 8848a46eb9SPierre Jolivet for (i = 0; i < nto; i++) PetscCallMPI(MPI_Issend((void *)(tdata + count * unitbytes * i), count, dtype, toranks[i], tag, comm, sendreqs + i)); 899566063dSJacob Faibussowitsch PetscCall(PetscSegBufferCreate(sizeof(PetscMPIInt), 4, &segrank)); 909566063dSJacob Faibussowitsch PetscCall(PetscSegBufferCreate(unitbytes, 4 * count, &segdata)); 91f6ced4a3SJed Brown 92f6ced4a3SJed Brown nrecvs = 0; 93f6ced4a3SJed Brown barrier = MPI_REQUEST_NULL; 9446e50bb6SJed Brown /* MPICH-3.2 sometimes does not create a request in some "optimized" cases. This is arguably a standard violation, 9546e50bb6SJed Brown * but we need to work around it. */ 9646e50bb6SJed Brown barrier_started = PETSC_FALSE; 97f6ced4a3SJed Brown for (done = 0; !done;) { 98f6ced4a3SJed Brown PetscMPIInt flag; 99f6ced4a3SJed Brown MPI_Status status; 1009566063dSJacob Faibussowitsch PetscCallMPI(MPI_Iprobe(MPI_ANY_SOURCE, tag, comm, &flag, &status)); 101f6ced4a3SJed Brown if (flag) { /* incoming message */ 102f6ced4a3SJed Brown PetscMPIInt *recvrank; 103f6ced4a3SJed Brown void *buf; 1049566063dSJacob Faibussowitsch PetscCall(PetscSegBufferGet(segrank, 1, &recvrank)); 1059566063dSJacob Faibussowitsch PetscCall(PetscSegBufferGet(segdata, count, &buf)); 106f6ced4a3SJed Brown *recvrank = status.MPI_SOURCE; 1079566063dSJacob Faibussowitsch PetscCallMPI(MPI_Recv(buf, count, dtype, status.MPI_SOURCE, tag, comm, MPI_STATUS_IGNORE)); 108f6ced4a3SJed Brown nrecvs++; 109f6ced4a3SJed Brown } 11046e50bb6SJed Brown if (!barrier_started) { 1114dc2109aSBarry Smith PetscMPIInt sent, nsends; 1129566063dSJacob Faibussowitsch PetscCall(PetscMPIIntCast(nto, &nsends)); 1139566063dSJacob Faibussowitsch PetscCallMPI(MPI_Testall(nsends, sendreqs, &sent, MPI_STATUSES_IGNORE)); 114f6ced4a3SJed Brown if (sent) { 1159566063dSJacob Faibussowitsch PetscCallMPI(MPI_Ibarrier(comm, &barrier)); 11646e50bb6SJed Brown barrier_started = PETSC_TRUE; 1179566063dSJacob Faibussowitsch PetscCall(PetscFree(sendreqs)); 118f6ced4a3SJed Brown } 119f6ced4a3SJed Brown } else { 1209566063dSJacob Faibussowitsch PetscCallMPI(MPI_Test(&barrier, &done, MPI_STATUS_IGNORE)); 121f6ced4a3SJed Brown } 122f6ced4a3SJed Brown } 123f6ced4a3SJed Brown *nfrom = nrecvs; 1249566063dSJacob Faibussowitsch PetscCall(PetscSegBufferExtractAlloc(segrank, fromranks)); 1259566063dSJacob Faibussowitsch PetscCall(PetscSegBufferDestroy(&segrank)); 1269566063dSJacob Faibussowitsch PetscCall(PetscSegBufferExtractAlloc(segdata, fromdata)); 1279566063dSJacob Faibussowitsch PetscCall(PetscSegBufferDestroy(&segdata)); 1289566063dSJacob Faibussowitsch PetscCall(PetscCommDestroy(&comm)); 1293ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 130f6ced4a3SJed Brown } 131f6ced4a3SJed Brown #endif 132f6ced4a3SJed Brown 133d71ae5a4SJacob Faibussowitsch static PetscErrorCode PetscCommBuildTwoSided_Allreduce(MPI_Comm comm, PetscMPIInt count, MPI_Datatype dtype, PetscMPIInt nto, const PetscMPIInt *toranks, const void *todata, PetscMPIInt *nfrom, PetscMPIInt **fromranks, void *fromdata) 134d71ae5a4SJacob Faibussowitsch { 13505342407SJunchao Zhang PetscMPIInt size, rank, *iflags, nrecvs, tag, *franks, i, flg; 136c3a59b84SJed Brown MPI_Aint lb, unitbytes; 137f6ced4a3SJed Brown char *tdata, *fdata; 1385c0db29aSPierre Jolivet MPI_Request *reqs, *sendreqs = NULL; 139f6ced4a3SJed Brown MPI_Status *statuses; 14005342407SJunchao Zhang PetscCommCounter *counter; 141f6ced4a3SJed Brown 142f6ced4a3SJed Brown PetscFunctionBegin; 1439566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(comm, &size)); 1449566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_rank(comm, &rank)); 1459566063dSJacob Faibussowitsch PetscCall(PetscCommDuplicate(comm, &comm, &tag)); 1469566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_get_attr(comm, Petsc_Counter_keyval, &counter, &flg)); 14728b400f6SJacob Faibussowitsch PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Inner PETSc communicator does not have its tag/name counter attribute set"); 14805342407SJunchao Zhang if (!counter->iflags) { 1499566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(size, &counter->iflags)); 15005342407SJunchao Zhang iflags = counter->iflags; 15105342407SJunchao Zhang } else { 15205342407SJunchao Zhang iflags = counter->iflags; 1539566063dSJacob Faibussowitsch PetscCall(PetscArrayzero(iflags, size)); 15405342407SJunchao Zhang } 15505342407SJunchao Zhang for (i = 0; i < nto; i++) iflags[toranks[i]] = 1; 1561c2dc1cbSBarry Smith PetscCall(MPIU_Allreduce(MPI_IN_PLACE, iflags, size, MPI_INT, MPI_SUM, comm)); 15705342407SJunchao Zhang nrecvs = iflags[rank]; 1589566063dSJacob Faibussowitsch PetscCallMPI(MPI_Type_get_extent(dtype, &lb, &unitbytes)); 1595f80ce2aSJacob Faibussowitsch PetscCheck(lb == 0, comm, PETSC_ERR_SUP, "Datatype with nonzero lower bound %ld", (long)lb); 1609566063dSJacob Faibussowitsch PetscCall(PetscMalloc(nrecvs * count * unitbytes, &fdata)); 161f6ced4a3SJed Brown tdata = (char *)todata; 1629566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(nto + nrecvs, &reqs, nto + nrecvs, &statuses)); 1635c0db29aSPierre Jolivet if (nto) sendreqs = reqs + nrecvs; 16448a46eb9SPierre Jolivet for (i = 0; i < nrecvs; i++) PetscCallMPI(MPI_Irecv((void *)(fdata + count * unitbytes * i), count, dtype, MPI_ANY_SOURCE, tag, comm, reqs + i)); 16548a46eb9SPierre Jolivet for (i = 0; i < nto; i++) PetscCallMPI(MPI_Isend((void *)(tdata + count * unitbytes * i), count, dtype, toranks[i], tag, comm, sendreqs + i)); 1669566063dSJacob Faibussowitsch PetscCallMPI(MPI_Waitall(nto + nrecvs, reqs, statuses)); 1679566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nrecvs, &franks)); 168a297a907SKarl Rupp for (i = 0; i < nrecvs; i++) franks[i] = statuses[i].MPI_SOURCE; 1699566063dSJacob Faibussowitsch PetscCall(PetscFree2(reqs, statuses)); 1709566063dSJacob Faibussowitsch PetscCall(PetscCommDestroy(&comm)); 171f6ced4a3SJed Brown 172f6ced4a3SJed Brown *nfrom = nrecvs; 173f6ced4a3SJed Brown *fromranks = franks; 174f6ced4a3SJed Brown *(void **)fromdata = fdata; 1753ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 176f6ced4a3SJed Brown } 177f6ced4a3SJed Brown 1781654bf6bSJed Brown #if defined(PETSC_HAVE_MPI_REDUCE_SCATTER_BLOCK) 179d71ae5a4SJacob Faibussowitsch static PetscErrorCode PetscCommBuildTwoSided_RedScatter(MPI_Comm comm, PetscMPIInt count, MPI_Datatype dtype, PetscMPIInt nto, const PetscMPIInt *toranks, const void *todata, PetscMPIInt *nfrom, PetscMPIInt **fromranks, void *fromdata) 180d71ae5a4SJacob Faibussowitsch { 18105342407SJunchao Zhang PetscMPIInt size, *iflags, nrecvs, tag, *franks, i, flg; 182c3a59b84SJed Brown MPI_Aint lb, unitbytes; 1831654bf6bSJed Brown char *tdata, *fdata; 1841654bf6bSJed Brown MPI_Request *reqs, *sendreqs; 1851654bf6bSJed Brown MPI_Status *statuses; 18605342407SJunchao Zhang PetscCommCounter *counter; 1871654bf6bSJed Brown 1881654bf6bSJed Brown PetscFunctionBegin; 1899566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(comm, &size)); 1909566063dSJacob Faibussowitsch PetscCall(PetscCommDuplicate(comm, &comm, &tag)); 1919566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_get_attr(comm, Petsc_Counter_keyval, &counter, &flg)); 19228b400f6SJacob Faibussowitsch PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Inner PETSc communicator does not have its tag/name counter attribute set"); 19305342407SJunchao Zhang if (!counter->iflags) { 1949566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(size, &counter->iflags)); 19505342407SJunchao Zhang iflags = counter->iflags; 19605342407SJunchao Zhang } else { 19705342407SJunchao Zhang iflags = counter->iflags; 1989566063dSJacob Faibussowitsch PetscCall(PetscArrayzero(iflags, size)); 19905342407SJunchao Zhang } 2001654bf6bSJed Brown for (i = 0; i < nto; i++) iflags[toranks[i]] = 1; 2019566063dSJacob Faibussowitsch PetscCallMPI(MPI_Reduce_scatter_block(iflags, &nrecvs, 1, MPI_INT, MPI_SUM, comm)); 2029566063dSJacob Faibussowitsch PetscCallMPI(MPI_Type_get_extent(dtype, &lb, &unitbytes)); 20308401ef6SPierre Jolivet PetscCheck(lb == 0, comm, PETSC_ERR_SUP, "Datatype with nonzero lower bound %ld", (long)lb); 2049566063dSJacob Faibussowitsch PetscCall(PetscMalloc(nrecvs * count * unitbytes, &fdata)); 2051654bf6bSJed Brown tdata = (char *)todata; 2069566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(nto + nrecvs, &reqs, nto + nrecvs, &statuses)); 2071654bf6bSJed Brown sendreqs = reqs + nrecvs; 20848a46eb9SPierre Jolivet for (i = 0; i < nrecvs; i++) PetscCallMPI(MPI_Irecv((void *)(fdata + count * unitbytes * i), count, dtype, MPI_ANY_SOURCE, tag, comm, reqs + i)); 20948a46eb9SPierre Jolivet for (i = 0; i < nto; i++) PetscCallMPI(MPI_Isend((void *)(tdata + count * unitbytes * i), count, dtype, toranks[i], tag, comm, sendreqs + i)); 2109566063dSJacob Faibussowitsch PetscCallMPI(MPI_Waitall(nto + nrecvs, reqs, statuses)); 2119566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nrecvs, &franks)); 2121654bf6bSJed Brown for (i = 0; i < nrecvs; i++) franks[i] = statuses[i].MPI_SOURCE; 2139566063dSJacob Faibussowitsch PetscCall(PetscFree2(reqs, statuses)); 2149566063dSJacob Faibussowitsch PetscCall(PetscCommDestroy(&comm)); 2151654bf6bSJed Brown 2161654bf6bSJed Brown *nfrom = nrecvs; 2171654bf6bSJed Brown *fromranks = franks; 2181654bf6bSJed Brown *(void **)fromdata = fdata; 2193ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 2201654bf6bSJed Brown } 2211654bf6bSJed Brown #endif 2221654bf6bSJed Brown 223f6ced4a3SJed Brown /*@C 224f6ced4a3SJed Brown PetscCommBuildTwoSided - discovers communicating ranks given one-sided information, moving constant-sized data in the process (often message lengths) 225f6ced4a3SJed Brown 226d083f849SBarry Smith Collective 227f6ced4a3SJed Brown 2284165533cSJose E. Roman Input Parameters: 229f6ced4a3SJed Brown + comm - communicator 230f6ced4a3SJed Brown . count - number of entries to send/receive (must match on all ranks) 231f6ced4a3SJed Brown . dtype - datatype to send/receive from each rank (must match on all ranks) 232f6ced4a3SJed Brown . nto - number of ranks to send data to 233f6ced4a3SJed Brown . toranks - ranks to send to (array of length nto) 234f6ced4a3SJed Brown - todata - data to send to each rank (packed) 235f6ced4a3SJed Brown 2364165533cSJose E. Roman Output Parameters: 237f6ced4a3SJed Brown + nfrom - number of ranks receiving messages from 238667f096bSBarry Smith . fromranks - ranks receiving messages from (length `nfrom`, caller should `PetscFree()`) 239811af0c4SBarry Smith - fromdata - packed data from each rank, each with count entries of type dtype (length nfrom, caller responsible for `PetscFree()`) 240f6ced4a3SJed Brown 241811af0c4SBarry Smith Options Database Key: 242667f096bSBarry Smith . -build_twosided <allreduce|ibarrier|redscatter> - algorithm to set up two-sided communication. Default is allreduce for communicators with <= 1024 ranks, 243667f096bSBarry Smith otherwise ibarrier. 244667f096bSBarry Smith 245667f096bSBarry Smith Level: developer 2461654bf6bSJed Brown 247f6ced4a3SJed Brown Notes: 248811af0c4SBarry Smith This memory-scalable interface is an alternative to calling `PetscGatherNumberOfMessages()` and 249*1d27aa22SBarry Smith `PetscGatherMessageLengths()`, possibly with a subsequent round of communication to send other constant-size data, see {cite}`hoeflersiebretlumsdaine10`. 250f6ced4a3SJed Brown 251f6ced4a3SJed Brown Basic data types as well as contiguous types are supported, but non-contiguous (e.g., strided) types are not. 252f6ced4a3SJed Brown 253811af0c4SBarry Smith .seealso: `PetscGatherNumberOfMessages()`, `PetscGatherMessageLengths()`, `PetscCommBuildTwoSidedSetType()`, `PetscCommBuildTwoSidedType` 254f6ced4a3SJed Brown @*/ 255d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscCommBuildTwoSided(MPI_Comm comm, PetscMPIInt count, MPI_Datatype dtype, PetscMPIInt nto, const PetscMPIInt *toranks, const void *todata, PetscMPIInt *nfrom, PetscMPIInt **fromranks, void *fromdata) 256d71ae5a4SJacob Faibussowitsch { 2576145cd65SJed Brown PetscBuildTwoSidedType buildtype = PETSC_BUILDTWOSIDED_NOTSET; 258f6ced4a3SJed Brown 259f6ced4a3SJed Brown PetscFunctionBegin; 2609566063dSJacob Faibussowitsch PetscCall(PetscSysInitializePackage()); 2619566063dSJacob Faibussowitsch PetscCall(PetscLogEventSync(PETSC_BuildTwoSided, comm)); 2629566063dSJacob Faibussowitsch PetscCall(PetscLogEventBegin(PETSC_BuildTwoSided, 0, 0, 0, 0)); 2639566063dSJacob Faibussowitsch PetscCall(PetscCommBuildTwoSidedGetType(comm, &buildtype)); 264f6ced4a3SJed Brown switch (buildtype) { 2656145cd65SJed Brown case PETSC_BUILDTWOSIDED_IBARRIER: 2668ee3e7ecSJunchao Zhang #if defined(PETSC_HAVE_MPI_NONBLOCKING_COLLECTIVES) 2679566063dSJacob Faibussowitsch PetscCall(PetscCommBuildTwoSided_Ibarrier(comm, count, dtype, nto, toranks, todata, nfrom, fromranks, fromdata)); 268b458e8f1SJose E. Roman break; 2696145cd65SJed Brown #else 2706145cd65SJed Brown SETERRQ(comm, PETSC_ERR_PLIB, "MPI implementation does not provide MPI_Ibarrier (part of MPI-3)"); 271f6ced4a3SJed Brown #endif 272d71ae5a4SJacob Faibussowitsch case PETSC_BUILDTWOSIDED_ALLREDUCE: 273d71ae5a4SJacob Faibussowitsch PetscCall(PetscCommBuildTwoSided_Allreduce(comm, count, dtype, nto, toranks, todata, nfrom, fromranks, fromdata)); 274d71ae5a4SJacob Faibussowitsch break; 2751654bf6bSJed Brown case PETSC_BUILDTWOSIDED_REDSCATTER: 2761654bf6bSJed Brown #if defined(PETSC_HAVE_MPI_REDUCE_SCATTER_BLOCK) 2779566063dSJacob Faibussowitsch PetscCall(PetscCommBuildTwoSided_RedScatter(comm, count, dtype, nto, toranks, todata, nfrom, fromranks, fromdata)); 278b458e8f1SJose E. Roman break; 2791654bf6bSJed Brown #else 2801654bf6bSJed Brown SETERRQ(comm, PETSC_ERR_PLIB, "MPI implementation does not provide MPI_Reduce_scatter_block (part of MPI-2.2)"); 2811654bf6bSJed Brown #endif 282d71ae5a4SJacob Faibussowitsch default: 283d71ae5a4SJacob Faibussowitsch SETERRQ(comm, PETSC_ERR_PLIB, "Unknown method for building two-sided communication"); 284f6ced4a3SJed Brown } 2859566063dSJacob Faibussowitsch PetscCall(PetscLogEventEnd(PETSC_BuildTwoSided, 0, 0, 0, 0)); 2863ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 287f6ced4a3SJed Brown } 288d815da10SJed Brown 289d71ae5a4SJacob Faibussowitsch static PetscErrorCode PetscCommBuildTwoSidedFReq_Reference(MPI_Comm comm, PetscMPIInt count, MPI_Datatype dtype, PetscMPIInt nto, const PetscMPIInt *toranks, const void *todata, PetscMPIInt *nfrom, PetscMPIInt **fromranks, void *fromdata, PetscMPIInt ntags, MPI_Request **toreqs, MPI_Request **fromreqs, PetscErrorCode (*send)(MPI_Comm, const PetscMPIInt[], PetscMPIInt, PetscMPIInt, void *, MPI_Request[], void *), PetscErrorCode (*recv)(MPI_Comm, const PetscMPIInt[], PetscMPIInt, void *, MPI_Request[], void *), void *ctx) 290d71ae5a4SJacob Faibussowitsch { 291d815da10SJed Brown PetscMPIInt i, *tag; 292d815da10SJed Brown MPI_Aint lb, unitbytes; 293d815da10SJed Brown MPI_Request *sendreq, *recvreq; 294d815da10SJed Brown 295d815da10SJed Brown PetscFunctionBegin; 2969566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(ntags, &tag)); 29748a46eb9SPierre Jolivet if (ntags > 0) PetscCall(PetscCommDuplicate(comm, &comm, &tag[0])); 29848a46eb9SPierre Jolivet for (i = 1; i < ntags; i++) PetscCall(PetscCommGetNewTag(comm, &tag[i])); 299d815da10SJed Brown 300d815da10SJed Brown /* Perform complete initial rendezvous */ 3019566063dSJacob Faibussowitsch PetscCall(PetscCommBuildTwoSided(comm, count, dtype, nto, toranks, todata, nfrom, fromranks, fromdata)); 302d815da10SJed Brown 3039566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nto * ntags, &sendreq)); 3049566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(*nfrom * ntags, &recvreq)); 305d815da10SJed Brown 3069566063dSJacob Faibussowitsch PetscCallMPI(MPI_Type_get_extent(dtype, &lb, &unitbytes)); 30708401ef6SPierre Jolivet PetscCheck(lb == 0, comm, PETSC_ERR_SUP, "Datatype with nonzero lower bound %ld", (long)lb); 308d815da10SJed Brown for (i = 0; i < nto; i++) { 30989157a57SJed Brown PetscMPIInt k; 31089157a57SJed Brown for (k = 0; k < ntags; k++) sendreq[i * ntags + k] = MPI_REQUEST_NULL; 3119566063dSJacob Faibussowitsch PetscCall((*send)(comm, tag, i, toranks[i], ((char *)todata) + count * unitbytes * i, sendreq + i * ntags, ctx)); 312d815da10SJed Brown } 313d815da10SJed Brown for (i = 0; i < *nfrom; i++) { 314d815da10SJed Brown void *header = (*(char **)fromdata) + count * unitbytes * i; 31589157a57SJed Brown PetscMPIInt k; 31689157a57SJed Brown for (k = 0; k < ntags; k++) recvreq[i * ntags + k] = MPI_REQUEST_NULL; 3179566063dSJacob Faibussowitsch PetscCall((*recv)(comm, tag, (*fromranks)[i], header, recvreq + i * ntags, ctx)); 318d815da10SJed Brown } 3199566063dSJacob Faibussowitsch PetscCall(PetscFree(tag)); 3209566063dSJacob Faibussowitsch PetscCall(PetscCommDestroy(&comm)); 3216afbe7ddSJed Brown *toreqs = sendreq; 3226afbe7ddSJed Brown *fromreqs = recvreq; 3233ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 324d815da10SJed Brown } 325d815da10SJed Brown 3268ee3e7ecSJunchao Zhang #if defined(PETSC_HAVE_MPI_NONBLOCKING_COLLECTIVES) 327afb254cdSJed Brown 328d71ae5a4SJacob Faibussowitsch static PetscErrorCode PetscCommBuildTwoSidedFReq_Ibarrier(MPI_Comm comm, PetscMPIInt count, MPI_Datatype dtype, PetscMPIInt nto, const PetscMPIInt *toranks, const void *todata, PetscMPIInt *nfrom, PetscMPIInt **fromranks, void *fromdata, PetscMPIInt ntags, MPI_Request **toreqs, MPI_Request **fromreqs, PetscErrorCode (*send)(MPI_Comm, const PetscMPIInt[], PetscMPIInt, PetscMPIInt, void *, MPI_Request[], void *), PetscErrorCode (*recv)(MPI_Comm, const PetscMPIInt[], PetscMPIInt, void *, MPI_Request[], void *), void *ctx) 329d71ae5a4SJacob Faibussowitsch { 330afb254cdSJed Brown PetscMPIInt nrecvs, tag, *tags, done, i; 331afb254cdSJed Brown MPI_Aint lb, unitbytes; 332afb254cdSJed Brown char *tdata; 333afb254cdSJed Brown MPI_Request *sendreqs, *usendreqs, *req, barrier; 334afb254cdSJed Brown PetscSegBuffer segrank, segdata, segreq; 33546e50bb6SJed Brown PetscBool barrier_started; 336afb254cdSJed Brown 337afb254cdSJed Brown PetscFunctionBegin; 3389566063dSJacob Faibussowitsch PetscCall(PetscCommDuplicate(comm, &comm, &tag)); 3399566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(ntags, &tags)); 34048a46eb9SPierre Jolivet for (i = 0; i < ntags; i++) PetscCall(PetscCommGetNewTag(comm, &tags[i])); 3419566063dSJacob Faibussowitsch PetscCallMPI(MPI_Type_get_extent(dtype, &lb, &unitbytes)); 34208401ef6SPierre Jolivet PetscCheck(lb == 0, comm, PETSC_ERR_SUP, "Datatype with nonzero lower bound %ld", (long)lb); 343afb254cdSJed Brown tdata = (char *)todata; 3449566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nto, &sendreqs)); 3459566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nto * ntags, &usendreqs)); 346afb254cdSJed Brown /* Post synchronous sends */ 34748a46eb9SPierre Jolivet for (i = 0; i < nto; i++) PetscCallMPI(MPI_Issend((void *)(tdata + count * unitbytes * i), count, dtype, toranks[i], tag, comm, sendreqs + i)); 348afb254cdSJed Brown /* Post actual payloads. These are typically larger messages. Hopefully sending these later does not slow down the 349afb254cdSJed Brown * synchronous messages above. */ 350afb254cdSJed Brown for (i = 0; i < nto; i++) { 35189157a57SJed Brown PetscMPIInt k; 35289157a57SJed Brown for (k = 0; k < ntags; k++) usendreqs[i * ntags + k] = MPI_REQUEST_NULL; 3539566063dSJacob Faibussowitsch PetscCall((*send)(comm, tags, i, toranks[i], tdata + count * unitbytes * i, usendreqs + i * ntags, ctx)); 354afb254cdSJed Brown } 355afb254cdSJed Brown 3569566063dSJacob Faibussowitsch PetscCall(PetscSegBufferCreate(sizeof(PetscMPIInt), 4, &segrank)); 3579566063dSJacob Faibussowitsch PetscCall(PetscSegBufferCreate(unitbytes, 4 * count, &segdata)); 3589566063dSJacob Faibussowitsch PetscCall(PetscSegBufferCreate(sizeof(MPI_Request), 4, &segreq)); 359afb254cdSJed Brown 360afb254cdSJed Brown nrecvs = 0; 361afb254cdSJed Brown barrier = MPI_REQUEST_NULL; 36246e50bb6SJed Brown /* MPICH-3.2 sometimes does not create a request in some "optimized" cases. This is arguably a standard violation, 36346e50bb6SJed Brown * but we need to work around it. */ 36446e50bb6SJed Brown barrier_started = PETSC_FALSE; 365afb254cdSJed Brown for (done = 0; !done;) { 366afb254cdSJed Brown PetscMPIInt flag; 367afb254cdSJed Brown MPI_Status status; 3689566063dSJacob Faibussowitsch PetscCallMPI(MPI_Iprobe(MPI_ANY_SOURCE, tag, comm, &flag, &status)); 369afb254cdSJed Brown if (flag) { /* incoming message */ 37089157a57SJed Brown PetscMPIInt *recvrank, k; 371afb254cdSJed Brown void *buf; 3729566063dSJacob Faibussowitsch PetscCall(PetscSegBufferGet(segrank, 1, &recvrank)); 3739566063dSJacob Faibussowitsch PetscCall(PetscSegBufferGet(segdata, count, &buf)); 374afb254cdSJed Brown *recvrank = status.MPI_SOURCE; 3759566063dSJacob Faibussowitsch PetscCallMPI(MPI_Recv(buf, count, dtype, status.MPI_SOURCE, tag, comm, MPI_STATUS_IGNORE)); 3769566063dSJacob Faibussowitsch PetscCall(PetscSegBufferGet(segreq, ntags, &req)); 37789157a57SJed Brown for (k = 0; k < ntags; k++) req[k] = MPI_REQUEST_NULL; 3789566063dSJacob Faibussowitsch PetscCall((*recv)(comm, tags, status.MPI_SOURCE, buf, req, ctx)); 379afb254cdSJed Brown nrecvs++; 380afb254cdSJed Brown } 38146e50bb6SJed Brown if (!barrier_started) { 382afb254cdSJed Brown PetscMPIInt sent, nsends; 3839566063dSJacob Faibussowitsch PetscCall(PetscMPIIntCast(nto, &nsends)); 3849566063dSJacob Faibussowitsch PetscCallMPI(MPI_Testall(nsends, sendreqs, &sent, MPI_STATUSES_IGNORE)); 385afb254cdSJed Brown if (sent) { 3869566063dSJacob Faibussowitsch PetscCallMPI(MPI_Ibarrier(comm, &barrier)); 38746e50bb6SJed Brown barrier_started = PETSC_TRUE; 388afb254cdSJed Brown } 389afb254cdSJed Brown } else { 3909566063dSJacob Faibussowitsch PetscCallMPI(MPI_Test(&barrier, &done, MPI_STATUS_IGNORE)); 391afb254cdSJed Brown } 392afb254cdSJed Brown } 393afb254cdSJed Brown *nfrom = nrecvs; 3949566063dSJacob Faibussowitsch PetscCall(PetscSegBufferExtractAlloc(segrank, fromranks)); 3959566063dSJacob Faibussowitsch PetscCall(PetscSegBufferDestroy(&segrank)); 3969566063dSJacob Faibussowitsch PetscCall(PetscSegBufferExtractAlloc(segdata, fromdata)); 3979566063dSJacob Faibussowitsch PetscCall(PetscSegBufferDestroy(&segdata)); 3986afbe7ddSJed Brown *toreqs = usendreqs; 3999566063dSJacob Faibussowitsch PetscCall(PetscSegBufferExtractAlloc(segreq, fromreqs)); 4009566063dSJacob Faibussowitsch PetscCall(PetscSegBufferDestroy(&segreq)); 4019566063dSJacob Faibussowitsch PetscCall(PetscFree(sendreqs)); 4029566063dSJacob Faibussowitsch PetscCall(PetscFree(tags)); 4039566063dSJacob Faibussowitsch PetscCall(PetscCommDestroy(&comm)); 4043ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 405afb254cdSJed Brown } 406afb254cdSJed Brown #endif 407afb254cdSJed Brown 408d815da10SJed Brown /*@C 409d815da10SJed Brown PetscCommBuildTwoSidedF - discovers communicating ranks given one-sided information, calling user-defined functions during rendezvous 410d815da10SJed Brown 411d083f849SBarry Smith Collective 412d815da10SJed Brown 4134165533cSJose E. Roman Input Parameters: 414d815da10SJed Brown + comm - communicator 415d815da10SJed Brown . count - number of entries to send/receive in initial rendezvous (must match on all ranks) 416d815da10SJed Brown . dtype - datatype to send/receive from each rank (must match on all ranks) 417d815da10SJed Brown . nto - number of ranks to send data to 418d815da10SJed Brown . toranks - ranks to send to (array of length nto) 419d815da10SJed Brown . todata - data to send to each rank (packed) 420d815da10SJed Brown . ntags - number of tags needed by send/recv callbacks 421d815da10SJed Brown . send - callback invoked on sending process when ready to send primary payload 422d815da10SJed Brown . recv - callback invoked on receiving process after delivery of rendezvous message 423d815da10SJed Brown - ctx - context for callbacks 424d815da10SJed Brown 4254165533cSJose E. Roman Output Parameters: 426d815da10SJed Brown + nfrom - number of ranks receiving messages from 427811af0c4SBarry Smith . fromranks - ranks receiving messages from (length nfrom; caller should `PetscFree()`) 428811af0c4SBarry Smith - fromdata - packed data from each rank, each with count entries of type dtype (length nfrom, caller responsible for `PetscFree()`) 429d815da10SJed Brown 430d815da10SJed Brown Level: developer 431d815da10SJed Brown 432d815da10SJed Brown Notes: 433811af0c4SBarry Smith This memory-scalable interface is an alternative to calling `PetscGatherNumberOfMessages()` and 434*1d27aa22SBarry Smith `PetscGatherMessageLengths()`, possibly with a subsequent round of communication to send other data, {cite}`hoeflersiebretlumsdaine10`. 435d815da10SJed Brown 436d815da10SJed Brown Basic data types as well as contiguous types are supported, but non-contiguous (e.g., strided) types are not. 437d815da10SJed Brown 438db781477SPatrick Sanan .seealso: `PetscCommBuildTwoSided()`, `PetscCommBuildTwoSidedFReq()`, `PetscGatherNumberOfMessages()`, `PetscGatherMessageLengths()` 439d815da10SJed Brown @*/ 440d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscCommBuildTwoSidedF(MPI_Comm comm, PetscMPIInt count, MPI_Datatype dtype, PetscMPIInt nto, const PetscMPIInt *toranks, const void *todata, PetscMPIInt *nfrom, PetscMPIInt **fromranks, void *fromdata, PetscMPIInt ntags, PetscErrorCode (*send)(MPI_Comm, const PetscMPIInt[], PetscMPIInt, PetscMPIInt, void *, MPI_Request[], void *), PetscErrorCode (*recv)(MPI_Comm, const PetscMPIInt[], PetscMPIInt, void *, MPI_Request[], void *), void *ctx) 441d71ae5a4SJacob Faibussowitsch { 4426afbe7ddSJed Brown MPI_Request *toreqs, *fromreqs; 4436afbe7ddSJed Brown 4446afbe7ddSJed Brown PetscFunctionBegin; 4459566063dSJacob Faibussowitsch PetscCall(PetscCommBuildTwoSidedFReq(comm, count, dtype, nto, toranks, todata, nfrom, fromranks, fromdata, ntags, &toreqs, &fromreqs, send, recv, ctx)); 4469566063dSJacob Faibussowitsch PetscCallMPI(MPI_Waitall(nto * ntags, toreqs, MPI_STATUSES_IGNORE)); 4479566063dSJacob Faibussowitsch PetscCallMPI(MPI_Waitall(*nfrom * ntags, fromreqs, MPI_STATUSES_IGNORE)); 4489566063dSJacob Faibussowitsch PetscCall(PetscFree(toreqs)); 4499566063dSJacob Faibussowitsch PetscCall(PetscFree(fromreqs)); 4503ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 4516afbe7ddSJed Brown } 4526afbe7ddSJed Brown 4536afbe7ddSJed Brown /*@C 4546afbe7ddSJed Brown PetscCommBuildTwoSidedFReq - discovers communicating ranks given one-sided information, calling user-defined functions during rendezvous, returns requests 4556afbe7ddSJed Brown 456d083f849SBarry Smith Collective 4576afbe7ddSJed Brown 4584165533cSJose E. Roman Input Parameters: 4596afbe7ddSJed Brown + comm - communicator 4606afbe7ddSJed Brown . count - number of entries to send/receive in initial rendezvous (must match on all ranks) 4616afbe7ddSJed Brown . dtype - datatype to send/receive from each rank (must match on all ranks) 4626afbe7ddSJed Brown . nto - number of ranks to send data to 4636afbe7ddSJed Brown . toranks - ranks to send to (array of length nto) 4646afbe7ddSJed Brown . todata - data to send to each rank (packed) 4656afbe7ddSJed Brown . ntags - number of tags needed by send/recv callbacks 4666afbe7ddSJed Brown . send - callback invoked on sending process when ready to send primary payload 4676afbe7ddSJed Brown . recv - callback invoked on receiving process after delivery of rendezvous message 4686afbe7ddSJed Brown - ctx - context for callbacks 4696afbe7ddSJed Brown 4704165533cSJose E. Roman Output Parameters: 4716afbe7ddSJed Brown + nfrom - number of ranks receiving messages from 472811af0c4SBarry Smith . fromranks - ranks receiving messages from (length nfrom; caller should `PetscFree()`) 473811af0c4SBarry Smith . fromdata - packed data from each rank, each with count entries of type dtype (length nfrom, caller responsible for `PetscFree()`) 474811af0c4SBarry Smith . toreqs - array of nto*ntags sender requests (caller must wait on these, then `PetscFree()`) 475811af0c4SBarry Smith - fromreqs - array of nfrom*ntags receiver requests (caller must wait on these, then `PetscFree()`) 4766afbe7ddSJed Brown 4776afbe7ddSJed Brown Level: developer 4786afbe7ddSJed Brown 4796afbe7ddSJed Brown Notes: 480811af0c4SBarry Smith This memory-scalable interface is an alternative to calling `PetscGatherNumberOfMessages()` and 481*1d27aa22SBarry Smith `PetscGatherMessageLengths()`, possibly with a subsequent round of communication to send other data, {cite}`hoeflersiebretlumsdaine10`. 4826afbe7ddSJed Brown 4836afbe7ddSJed Brown Basic data types as well as contiguous types are supported, but non-contiguous (e.g., strided) types are not. 4846afbe7ddSJed Brown 485db781477SPatrick Sanan .seealso: `PetscCommBuildTwoSided()`, `PetscCommBuildTwoSidedF()`, `PetscGatherNumberOfMessages()`, `PetscGatherMessageLengths()` 4866afbe7ddSJed Brown @*/ 487d71ae5a4SJacob Faibussowitsch PetscErrorCode PetscCommBuildTwoSidedFReq(MPI_Comm comm, PetscMPIInt count, MPI_Datatype dtype, PetscMPIInt nto, const PetscMPIInt *toranks, const void *todata, PetscMPIInt *nfrom, PetscMPIInt **fromranks, void *fromdata, PetscMPIInt ntags, MPI_Request **toreqs, MPI_Request **fromreqs, PetscErrorCode (*send)(MPI_Comm, const PetscMPIInt[], PetscMPIInt, PetscMPIInt, void *, MPI_Request[], void *), PetscErrorCode (*recv)(MPI_Comm, const PetscMPIInt[], PetscMPIInt, void *, MPI_Request[], void *), void *ctx) 488d71ae5a4SJacob Faibussowitsch { 4899371c9d4SSatish Balay PetscErrorCode (*f)(MPI_Comm, PetscMPIInt, MPI_Datatype, PetscMPIInt, const PetscMPIInt[], const void *, PetscMPIInt *, PetscMPIInt **, void *, PetscMPIInt, MPI_Request **, MPI_Request **, PetscErrorCode (*send)(MPI_Comm, const PetscMPIInt[], PetscMPIInt, PetscMPIInt, void *, MPI_Request[], void *), PetscErrorCode (*recv)(MPI_Comm, const PetscMPIInt[], PetscMPIInt, void *, MPI_Request[], void *), void *ctx); 490d815da10SJed Brown PetscBuildTwoSidedType buildtype = PETSC_BUILDTWOSIDED_NOTSET; 4913461502dSJed Brown PetscMPIInt i, size; 492d815da10SJed Brown 493d815da10SJed Brown PetscFunctionBegin; 4949566063dSJacob Faibussowitsch PetscCall(PetscSysInitializePackage()); 4959566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(comm, &size)); 496ad540459SPierre Jolivet for (i = 0; i < nto; i++) PetscCheck(toranks[i] >= 0 && size > toranks[i], comm, PETSC_ERR_ARG_OUTOFRANGE, "toranks[%d] %d not in comm size %d", i, toranks[i], size); 4979566063dSJacob Faibussowitsch PetscCall(PetscLogEventSync(PETSC_BuildTwoSidedF, comm)); 4989566063dSJacob Faibussowitsch PetscCall(PetscLogEventBegin(PETSC_BuildTwoSidedF, 0, 0, 0, 0)); 4999566063dSJacob Faibussowitsch PetscCall(PetscCommBuildTwoSidedGetType(comm, &buildtype)); 500d815da10SJed Brown switch (buildtype) { 501d815da10SJed Brown case PETSC_BUILDTWOSIDED_IBARRIER: 5028ee3e7ecSJunchao Zhang #if defined(PETSC_HAVE_MPI_NONBLOCKING_COLLECTIVES) 5036afbe7ddSJed Brown f = PetscCommBuildTwoSidedFReq_Ibarrier; 504b458e8f1SJose E. Roman break; 505afb254cdSJed Brown #else 506afb254cdSJed Brown SETERRQ(comm, PETSC_ERR_PLIB, "MPI implementation does not provide MPI_Ibarrier (part of MPI-3)"); 507afb254cdSJed Brown #endif 508d815da10SJed Brown case PETSC_BUILDTWOSIDED_ALLREDUCE: 509d71ae5a4SJacob Faibussowitsch case PETSC_BUILDTWOSIDED_REDSCATTER: 510d71ae5a4SJacob Faibussowitsch f = PetscCommBuildTwoSidedFReq_Reference; 511d71ae5a4SJacob Faibussowitsch break; 512d71ae5a4SJacob Faibussowitsch default: 513d71ae5a4SJacob Faibussowitsch SETERRQ(comm, PETSC_ERR_PLIB, "Unknown method for building two-sided communication"); 514d815da10SJed Brown } 5159566063dSJacob Faibussowitsch PetscCall((*f)(comm, count, dtype, nto, toranks, todata, nfrom, fromranks, fromdata, ntags, toreqs, fromreqs, send, recv, ctx)); 5169566063dSJacob Faibussowitsch PetscCall(PetscLogEventEnd(PETSC_BuildTwoSidedF, 0, 0, 0, 0)); 5173ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 518d815da10SJed Brown } 519