1 #include <petscsys.h> /*I "petscsys.h" I*/ 2 #include <petsc/private/petscimpl.h> 3 4 PetscLogEvent PETSC_BuildTwoSided; 5 PetscLogEvent PETSC_BuildTwoSidedF; 6 7 const char *const PetscBuildTwoSidedTypes[] = {"ALLREDUCE", "IBARRIER", "REDSCATTER", "PetscBuildTwoSidedType", "PETSC_BUILDTWOSIDED_", NULL}; 8 9 static PetscBuildTwoSidedType _twosided_type = PETSC_BUILDTWOSIDED_NOTSET; 10 11 /*@ 12 PetscCommBuildTwoSidedSetType - set algorithm to use when building two-sided communication 13 14 Logically Collective 15 16 Input Parameters: 17 + comm - `PETSC_COMM_WORLD` 18 - twosided - algorithm to use in subsequent calls to `PetscCommBuildTwoSided()` 19 20 Level: developer 21 22 Note: 23 This option is currently global, but could be made per-communicator. 24 25 .seealso: `PetscCommBuildTwoSided()`, `PetscCommBuildTwoSidedGetType()`, `PetscBuildTwoSidedType` 26 @*/ 27 PetscErrorCode PetscCommBuildTwoSidedSetType(MPI_Comm comm, PetscBuildTwoSidedType twosided) { 28 PetscFunctionBegin; 29 if (PetscDefined(USE_DEBUG)) { /* We don't have a PetscObject so can't use PetscValidLogicalCollectiveEnum */ 30 PetscMPIInt b1[2], b2[2]; 31 b1[0] = -(PetscMPIInt)twosided; 32 b1[1] = (PetscMPIInt)twosided; 33 PetscCall(MPIU_Allreduce(b1, b2, 2, MPI_INT, MPI_MAX, comm)); 34 PetscCheck(-b2[0] == b2[1], comm, PETSC_ERR_ARG_WRONG, "Enum value must be same on all processes"); 35 } 36 _twosided_type = twosided; 37 PetscFunctionReturn(0); 38 } 39 40 /*@ 41 PetscCommBuildTwoSidedGetType - get algorithm used when building two-sided communication 42 43 Logically Collective 44 45 Output Parameters: 46 + comm - communicator on which to query algorithm 47 - twosided - algorithm to use for `PetscCommBuildTwoSided()` 48 49 Level: developer 50 51 .seealso: `PetscCommBuildTwoSided()`, `PetscCommBuildTwoSidedSetType()`, `PetscBuildTwoSidedType` 52 @*/ 53 PetscErrorCode PetscCommBuildTwoSidedGetType(MPI_Comm comm, PetscBuildTwoSidedType *twosided) { 54 PetscMPIInt size; 55 56 PetscFunctionBegin; 57 *twosided = PETSC_BUILDTWOSIDED_NOTSET; 58 if (_twosided_type == PETSC_BUILDTWOSIDED_NOTSET) { 59 PetscCallMPI(MPI_Comm_size(comm, &size)); 60 _twosided_type = PETSC_BUILDTWOSIDED_ALLREDUCE; /* default for small comms, see https://gitlab.com/petsc/petsc/-/merge_requests/2611 */ 61 #if defined(PETSC_HAVE_MPI_NONBLOCKING_COLLECTIVES) 62 if (size > 1024) _twosided_type = PETSC_BUILDTWOSIDED_IBARRIER; 63 #endif 64 PetscCall(PetscOptionsGetEnum(NULL, NULL, "-build_twosided", PetscBuildTwoSidedTypes, (PetscEnum *)&_twosided_type, NULL)); 65 } 66 *twosided = _twosided_type; 67 PetscFunctionReturn(0); 68 } 69 70 #if defined(PETSC_HAVE_MPI_NONBLOCKING_COLLECTIVES) 71 static PetscErrorCode PetscCommBuildTwoSided_Ibarrier(MPI_Comm comm, PetscMPIInt count, MPI_Datatype dtype, PetscMPIInt nto, const PetscMPIInt *toranks, const void *todata, PetscMPIInt *nfrom, PetscMPIInt **fromranks, void *fromdata) { 72 PetscMPIInt nrecvs, tag, done, i; 73 MPI_Aint lb, unitbytes; 74 char *tdata; 75 MPI_Request *sendreqs, barrier; 76 PetscSegBuffer segrank, segdata; 77 PetscBool barrier_started; 78 79 PetscFunctionBegin; 80 PetscCall(PetscCommDuplicate(comm, &comm, &tag)); 81 PetscCallMPI(MPI_Type_get_extent(dtype, &lb, &unitbytes)); 82 PetscCheck(lb == 0, comm, PETSC_ERR_SUP, "Datatype with nonzero lower bound %ld", (long)lb); 83 tdata = (char *)todata; 84 PetscCall(PetscMalloc1(nto, &sendreqs)); 85 for (i = 0; i < nto; i++) PetscCallMPI(MPI_Issend((void *)(tdata + count * unitbytes * i), count, dtype, toranks[i], tag, comm, sendreqs + i)); 86 PetscCall(PetscSegBufferCreate(sizeof(PetscMPIInt), 4, &segrank)); 87 PetscCall(PetscSegBufferCreate(unitbytes, 4 * count, &segdata)); 88 89 nrecvs = 0; 90 barrier = MPI_REQUEST_NULL; 91 /* MPICH-3.2 sometimes does not create a request in some "optimized" cases. This is arguably a standard violation, 92 * but we need to work around it. */ 93 barrier_started = PETSC_FALSE; 94 for (done = 0; !done;) { 95 PetscMPIInt flag; 96 MPI_Status status; 97 PetscCallMPI(MPI_Iprobe(MPI_ANY_SOURCE, tag, comm, &flag, &status)); 98 if (flag) { /* incoming message */ 99 PetscMPIInt *recvrank; 100 void *buf; 101 PetscCall(PetscSegBufferGet(segrank, 1, &recvrank)); 102 PetscCall(PetscSegBufferGet(segdata, count, &buf)); 103 *recvrank = status.MPI_SOURCE; 104 PetscCallMPI(MPI_Recv(buf, count, dtype, status.MPI_SOURCE, tag, comm, MPI_STATUS_IGNORE)); 105 nrecvs++; 106 } 107 if (!barrier_started) { 108 PetscMPIInt sent, nsends; 109 PetscCall(PetscMPIIntCast(nto, &nsends)); 110 PetscCallMPI(MPI_Testall(nsends, sendreqs, &sent, MPI_STATUSES_IGNORE)); 111 if (sent) { 112 PetscCallMPI(MPI_Ibarrier(comm, &barrier)); 113 barrier_started = PETSC_TRUE; 114 PetscCall(PetscFree(sendreqs)); 115 } 116 } else { 117 PetscCallMPI(MPI_Test(&barrier, &done, MPI_STATUS_IGNORE)); 118 } 119 } 120 *nfrom = nrecvs; 121 PetscCall(PetscSegBufferExtractAlloc(segrank, fromranks)); 122 PetscCall(PetscSegBufferDestroy(&segrank)); 123 PetscCall(PetscSegBufferExtractAlloc(segdata, fromdata)); 124 PetscCall(PetscSegBufferDestroy(&segdata)); 125 PetscCall(PetscCommDestroy(&comm)); 126 PetscFunctionReturn(0); 127 } 128 #endif 129 130 static PetscErrorCode PetscCommBuildTwoSided_Allreduce(MPI_Comm comm, PetscMPIInt count, MPI_Datatype dtype, PetscMPIInt nto, const PetscMPIInt *toranks, const void *todata, PetscMPIInt *nfrom, PetscMPIInt **fromranks, void *fromdata) { 131 PetscMPIInt size, rank, *iflags, nrecvs, tag, *franks, i, flg; 132 MPI_Aint lb, unitbytes; 133 char *tdata, *fdata; 134 MPI_Request *reqs, *sendreqs; 135 MPI_Status *statuses; 136 PetscCommCounter *counter; 137 138 PetscFunctionBegin; 139 PetscCallMPI(MPI_Comm_size(comm, &size)); 140 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 141 PetscCall(PetscCommDuplicate(comm, &comm, &tag)); 142 PetscCallMPI(MPI_Comm_get_attr(comm, Petsc_Counter_keyval, &counter, &flg)); 143 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Inner PETSc communicator does not have its tag/name counter attribute set"); 144 if (!counter->iflags) { 145 PetscCall(PetscCalloc1(size, &counter->iflags)); 146 iflags = counter->iflags; 147 } else { 148 iflags = counter->iflags; 149 PetscCall(PetscArrayzero(iflags, size)); 150 } 151 for (i = 0; i < nto; i++) iflags[toranks[i]] = 1; 152 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, iflags, size, MPI_INT, MPI_SUM, comm)); 153 nrecvs = iflags[rank]; 154 PetscCallMPI(MPI_Type_get_extent(dtype, &lb, &unitbytes)); 155 PetscCheck(lb == 0, comm, PETSC_ERR_SUP, "Datatype with nonzero lower bound %ld", (long)lb); 156 PetscCall(PetscMalloc(nrecvs * count * unitbytes, &fdata)); 157 tdata = (char *)todata; 158 PetscCall(PetscMalloc2(nto + nrecvs, &reqs, nto + nrecvs, &statuses)); 159 sendreqs = reqs + nrecvs; 160 for (i = 0; i < nrecvs; i++) PetscCallMPI(MPI_Irecv((void *)(fdata + count * unitbytes * i), count, dtype, MPI_ANY_SOURCE, tag, comm, reqs + i)); 161 for (i = 0; i < nto; i++) PetscCallMPI(MPI_Isend((void *)(tdata + count * unitbytes * i), count, dtype, toranks[i], tag, comm, sendreqs + i)); 162 PetscCallMPI(MPI_Waitall(nto + nrecvs, reqs, statuses)); 163 PetscCall(PetscMalloc1(nrecvs, &franks)); 164 for (i = 0; i < nrecvs; i++) franks[i] = statuses[i].MPI_SOURCE; 165 PetscCall(PetscFree2(reqs, statuses)); 166 PetscCall(PetscCommDestroy(&comm)); 167 168 *nfrom = nrecvs; 169 *fromranks = franks; 170 *(void **)fromdata = fdata; 171 PetscFunctionReturn(0); 172 } 173 174 #if defined(PETSC_HAVE_MPI_REDUCE_SCATTER_BLOCK) 175 static PetscErrorCode PetscCommBuildTwoSided_RedScatter(MPI_Comm comm, PetscMPIInt count, MPI_Datatype dtype, PetscMPIInt nto, const PetscMPIInt *toranks, const void *todata, PetscMPIInt *nfrom, PetscMPIInt **fromranks, void *fromdata) { 176 PetscMPIInt size, *iflags, nrecvs, tag, *franks, i, flg; 177 MPI_Aint lb, unitbytes; 178 char *tdata, *fdata; 179 MPI_Request *reqs, *sendreqs; 180 MPI_Status *statuses; 181 PetscCommCounter *counter; 182 183 PetscFunctionBegin; 184 PetscCallMPI(MPI_Comm_size(comm, &size)); 185 PetscCall(PetscCommDuplicate(comm, &comm, &tag)); 186 PetscCallMPI(MPI_Comm_get_attr(comm, Petsc_Counter_keyval, &counter, &flg)); 187 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Inner PETSc communicator does not have its tag/name counter attribute set"); 188 if (!counter->iflags) { 189 PetscCall(PetscCalloc1(size, &counter->iflags)); 190 iflags = counter->iflags; 191 } else { 192 iflags = counter->iflags; 193 PetscCall(PetscArrayzero(iflags, size)); 194 } 195 for (i = 0; i < nto; i++) iflags[toranks[i]] = 1; 196 PetscCallMPI(MPI_Reduce_scatter_block(iflags, &nrecvs, 1, MPI_INT, MPI_SUM, comm)); 197 PetscCallMPI(MPI_Type_get_extent(dtype, &lb, &unitbytes)); 198 PetscCheck(lb == 0, comm, PETSC_ERR_SUP, "Datatype with nonzero lower bound %ld", (long)lb); 199 PetscCall(PetscMalloc(nrecvs * count * unitbytes, &fdata)); 200 tdata = (char *)todata; 201 PetscCall(PetscMalloc2(nto + nrecvs, &reqs, nto + nrecvs, &statuses)); 202 sendreqs = reqs + nrecvs; 203 for (i = 0; i < nrecvs; i++) PetscCallMPI(MPI_Irecv((void *)(fdata + count * unitbytes * i), count, dtype, MPI_ANY_SOURCE, tag, comm, reqs + i)); 204 for (i = 0; i < nto; i++) PetscCallMPI(MPI_Isend((void *)(tdata + count * unitbytes * i), count, dtype, toranks[i], tag, comm, sendreqs + i)); 205 PetscCallMPI(MPI_Waitall(nto + nrecvs, reqs, statuses)); 206 PetscCall(PetscMalloc1(nrecvs, &franks)); 207 for (i = 0; i < nrecvs; i++) franks[i] = statuses[i].MPI_SOURCE; 208 PetscCall(PetscFree2(reqs, statuses)); 209 PetscCall(PetscCommDestroy(&comm)); 210 211 *nfrom = nrecvs; 212 *fromranks = franks; 213 *(void **)fromdata = fdata; 214 PetscFunctionReturn(0); 215 } 216 #endif 217 218 /*@C 219 PetscCommBuildTwoSided - discovers communicating ranks given one-sided information, moving constant-sized data in the process (often message lengths) 220 221 Collective 222 223 Input Parameters: 224 + comm - communicator 225 . count - number of entries to send/receive (must match on all ranks) 226 . dtype - datatype to send/receive from each rank (must match on all ranks) 227 . nto - number of ranks to send data to 228 . toranks - ranks to send to (array of length nto) 229 - todata - data to send to each rank (packed) 230 231 Output Parameters: 232 + nfrom - number of ranks receiving messages from 233 . fromranks - ranks receiving messages from (length nfrom; caller should `PetscFree()`) 234 - fromdata - packed data from each rank, each with count entries of type dtype (length nfrom, caller responsible for `PetscFree()`) 235 236 Level: developer 237 238 Options Database Key: 239 . -build_twosided <allreduce|ibarrier|redscatter> - algorithm to set up two-sided communication. Default is allreduce for communicators with <= 1024 ranks, otherwise ibarrier. 240 241 Notes: 242 This memory-scalable interface is an alternative to calling `PetscGatherNumberOfMessages()` and 243 `PetscGatherMessageLengths()`, possibly with a subsequent round of communication to send other constant-size data. 244 245 Basic data types as well as contiguous types are supported, but non-contiguous (e.g., strided) types are not. 246 247 References: 248 . * - Hoefler, Siebert and Lumsdaine, The MPI_Ibarrier implementation uses the algorithm in 249 Scalable communication protocols for dynamic sparse data exchange, 2010. 250 251 .seealso: `PetscGatherNumberOfMessages()`, `PetscGatherMessageLengths()`, `PetscCommBuildTwoSidedSetType()`, `PetscCommBuildTwoSidedType` 252 @*/ 253 PetscErrorCode PetscCommBuildTwoSided(MPI_Comm comm, PetscMPIInt count, MPI_Datatype dtype, PetscMPIInt nto, const PetscMPIInt *toranks, const void *todata, PetscMPIInt *nfrom, PetscMPIInt **fromranks, void *fromdata) { 254 PetscBuildTwoSidedType buildtype = PETSC_BUILDTWOSIDED_NOTSET; 255 256 PetscFunctionBegin; 257 PetscCall(PetscSysInitializePackage()); 258 PetscCall(PetscLogEventSync(PETSC_BuildTwoSided, comm)); 259 PetscCall(PetscLogEventBegin(PETSC_BuildTwoSided, 0, 0, 0, 0)); 260 PetscCall(PetscCommBuildTwoSidedGetType(comm, &buildtype)); 261 switch (buildtype) { 262 case PETSC_BUILDTWOSIDED_IBARRIER: 263 #if defined(PETSC_HAVE_MPI_NONBLOCKING_COLLECTIVES) 264 PetscCall(PetscCommBuildTwoSided_Ibarrier(comm, count, dtype, nto, toranks, todata, nfrom, fromranks, fromdata)); 265 break; 266 #else 267 SETERRQ(comm, PETSC_ERR_PLIB, "MPI implementation does not provide MPI_Ibarrier (part of MPI-3)"); 268 #endif 269 case PETSC_BUILDTWOSIDED_ALLREDUCE: PetscCall(PetscCommBuildTwoSided_Allreduce(comm, count, dtype, nto, toranks, todata, nfrom, fromranks, fromdata)); break; 270 case PETSC_BUILDTWOSIDED_REDSCATTER: 271 #if defined(PETSC_HAVE_MPI_REDUCE_SCATTER_BLOCK) 272 PetscCall(PetscCommBuildTwoSided_RedScatter(comm, count, dtype, nto, toranks, todata, nfrom, fromranks, fromdata)); 273 break; 274 #else 275 SETERRQ(comm, PETSC_ERR_PLIB, "MPI implementation does not provide MPI_Reduce_scatter_block (part of MPI-2.2)"); 276 #endif 277 default: SETERRQ(comm, PETSC_ERR_PLIB, "Unknown method for building two-sided communication"); 278 } 279 PetscCall(PetscLogEventEnd(PETSC_BuildTwoSided, 0, 0, 0, 0)); 280 PetscFunctionReturn(0); 281 } 282 283 static PetscErrorCode PetscCommBuildTwoSidedFReq_Reference(MPI_Comm comm, PetscMPIInt count, MPI_Datatype dtype, PetscMPIInt nto, const PetscMPIInt *toranks, const void *todata, PetscMPIInt *nfrom, PetscMPIInt **fromranks, void *fromdata, PetscMPIInt ntags, MPI_Request **toreqs, MPI_Request **fromreqs, PetscErrorCode (*send)(MPI_Comm, const PetscMPIInt[], PetscMPIInt, PetscMPIInt, void *, MPI_Request[], void *), PetscErrorCode (*recv)(MPI_Comm, const PetscMPIInt[], PetscMPIInt, void *, MPI_Request[], void *), void *ctx) { 284 PetscMPIInt i, *tag; 285 MPI_Aint lb, unitbytes; 286 MPI_Request *sendreq, *recvreq; 287 288 PetscFunctionBegin; 289 PetscCall(PetscMalloc1(ntags, &tag)); 290 if (ntags > 0) PetscCall(PetscCommDuplicate(comm, &comm, &tag[0])); 291 for (i = 1; i < ntags; i++) PetscCall(PetscCommGetNewTag(comm, &tag[i])); 292 293 /* Perform complete initial rendezvous */ 294 PetscCall(PetscCommBuildTwoSided(comm, count, dtype, nto, toranks, todata, nfrom, fromranks, fromdata)); 295 296 PetscCall(PetscMalloc1(nto * ntags, &sendreq)); 297 PetscCall(PetscMalloc1(*nfrom * ntags, &recvreq)); 298 299 PetscCallMPI(MPI_Type_get_extent(dtype, &lb, &unitbytes)); 300 PetscCheck(lb == 0, comm, PETSC_ERR_SUP, "Datatype with nonzero lower bound %ld", (long)lb); 301 for (i = 0; i < nto; i++) { 302 PetscMPIInt k; 303 for (k = 0; k < ntags; k++) sendreq[i * ntags + k] = MPI_REQUEST_NULL; 304 PetscCall((*send)(comm, tag, i, toranks[i], ((char *)todata) + count * unitbytes * i, sendreq + i * ntags, ctx)); 305 } 306 for (i = 0; i < *nfrom; i++) { 307 void *header = (*(char **)fromdata) + count * unitbytes * i; 308 PetscMPIInt k; 309 for (k = 0; k < ntags; k++) recvreq[i * ntags + k] = MPI_REQUEST_NULL; 310 PetscCall((*recv)(comm, tag, (*fromranks)[i], header, recvreq + i * ntags, ctx)); 311 } 312 PetscCall(PetscFree(tag)); 313 PetscCall(PetscCommDestroy(&comm)); 314 *toreqs = sendreq; 315 *fromreqs = recvreq; 316 PetscFunctionReturn(0); 317 } 318 319 #if defined(PETSC_HAVE_MPI_NONBLOCKING_COLLECTIVES) 320 321 static PetscErrorCode PetscCommBuildTwoSidedFReq_Ibarrier(MPI_Comm comm, PetscMPIInt count, MPI_Datatype dtype, PetscMPIInt nto, const PetscMPIInt *toranks, const void *todata, PetscMPIInt *nfrom, PetscMPIInt **fromranks, void *fromdata, PetscMPIInt ntags, MPI_Request **toreqs, MPI_Request **fromreqs, PetscErrorCode (*send)(MPI_Comm, const PetscMPIInt[], PetscMPIInt, PetscMPIInt, void *, MPI_Request[], void *), PetscErrorCode (*recv)(MPI_Comm, const PetscMPIInt[], PetscMPIInt, void *, MPI_Request[], void *), void *ctx) { 322 PetscMPIInt nrecvs, tag, *tags, done, i; 323 MPI_Aint lb, unitbytes; 324 char *tdata; 325 MPI_Request *sendreqs, *usendreqs, *req, barrier; 326 PetscSegBuffer segrank, segdata, segreq; 327 PetscBool barrier_started; 328 329 PetscFunctionBegin; 330 PetscCall(PetscCommDuplicate(comm, &comm, &tag)); 331 PetscCall(PetscMalloc1(ntags, &tags)); 332 for (i = 0; i < ntags; i++) PetscCall(PetscCommGetNewTag(comm, &tags[i])); 333 PetscCallMPI(MPI_Type_get_extent(dtype, &lb, &unitbytes)); 334 PetscCheck(lb == 0, comm, PETSC_ERR_SUP, "Datatype with nonzero lower bound %ld", (long)lb); 335 tdata = (char *)todata; 336 PetscCall(PetscMalloc1(nto, &sendreqs)); 337 PetscCall(PetscMalloc1(nto * ntags, &usendreqs)); 338 /* Post synchronous sends */ 339 for (i = 0; i < nto; i++) PetscCallMPI(MPI_Issend((void *)(tdata + count * unitbytes * i), count, dtype, toranks[i], tag, comm, sendreqs + i)); 340 /* Post actual payloads. These are typically larger messages. Hopefully sending these later does not slow down the 341 * synchronous messages above. */ 342 for (i = 0; i < nto; i++) { 343 PetscMPIInt k; 344 for (k = 0; k < ntags; k++) usendreqs[i * ntags + k] = MPI_REQUEST_NULL; 345 PetscCall((*send)(comm, tags, i, toranks[i], tdata + count * unitbytes * i, usendreqs + i * ntags, ctx)); 346 } 347 348 PetscCall(PetscSegBufferCreate(sizeof(PetscMPIInt), 4, &segrank)); 349 PetscCall(PetscSegBufferCreate(unitbytes, 4 * count, &segdata)); 350 PetscCall(PetscSegBufferCreate(sizeof(MPI_Request), 4, &segreq)); 351 352 nrecvs = 0; 353 barrier = MPI_REQUEST_NULL; 354 /* MPICH-3.2 sometimes does not create a request in some "optimized" cases. This is arguably a standard violation, 355 * but we need to work around it. */ 356 barrier_started = PETSC_FALSE; 357 for (done = 0; !done;) { 358 PetscMPIInt flag; 359 MPI_Status status; 360 PetscCallMPI(MPI_Iprobe(MPI_ANY_SOURCE, tag, comm, &flag, &status)); 361 if (flag) { /* incoming message */ 362 PetscMPIInt *recvrank, k; 363 void *buf; 364 PetscCall(PetscSegBufferGet(segrank, 1, &recvrank)); 365 PetscCall(PetscSegBufferGet(segdata, count, &buf)); 366 *recvrank = status.MPI_SOURCE; 367 PetscCallMPI(MPI_Recv(buf, count, dtype, status.MPI_SOURCE, tag, comm, MPI_STATUS_IGNORE)); 368 PetscCall(PetscSegBufferGet(segreq, ntags, &req)); 369 for (k = 0; k < ntags; k++) req[k] = MPI_REQUEST_NULL; 370 PetscCall((*recv)(comm, tags, status.MPI_SOURCE, buf, req, ctx)); 371 nrecvs++; 372 } 373 if (!barrier_started) { 374 PetscMPIInt sent, nsends; 375 PetscCall(PetscMPIIntCast(nto, &nsends)); 376 PetscCallMPI(MPI_Testall(nsends, sendreqs, &sent, MPI_STATUSES_IGNORE)); 377 if (sent) { 378 PetscCallMPI(MPI_Ibarrier(comm, &barrier)); 379 barrier_started = PETSC_TRUE; 380 } 381 } else { 382 PetscCallMPI(MPI_Test(&barrier, &done, MPI_STATUS_IGNORE)); 383 } 384 } 385 *nfrom = nrecvs; 386 PetscCall(PetscSegBufferExtractAlloc(segrank, fromranks)); 387 PetscCall(PetscSegBufferDestroy(&segrank)); 388 PetscCall(PetscSegBufferExtractAlloc(segdata, fromdata)); 389 PetscCall(PetscSegBufferDestroy(&segdata)); 390 *toreqs = usendreqs; 391 PetscCall(PetscSegBufferExtractAlloc(segreq, fromreqs)); 392 PetscCall(PetscSegBufferDestroy(&segreq)); 393 PetscCall(PetscFree(sendreqs)); 394 PetscCall(PetscFree(tags)); 395 PetscCall(PetscCommDestroy(&comm)); 396 PetscFunctionReturn(0); 397 } 398 #endif 399 400 /*@C 401 PetscCommBuildTwoSidedF - discovers communicating ranks given one-sided information, calling user-defined functions during rendezvous 402 403 Collective 404 405 Input Parameters: 406 + comm - communicator 407 . count - number of entries to send/receive in initial rendezvous (must match on all ranks) 408 . dtype - datatype to send/receive from each rank (must match on all ranks) 409 . nto - number of ranks to send data to 410 . toranks - ranks to send to (array of length nto) 411 . todata - data to send to each rank (packed) 412 . ntags - number of tags needed by send/recv callbacks 413 . send - callback invoked on sending process when ready to send primary payload 414 . recv - callback invoked on receiving process after delivery of rendezvous message 415 - ctx - context for callbacks 416 417 Output Parameters: 418 + nfrom - number of ranks receiving messages from 419 . fromranks - ranks receiving messages from (length nfrom; caller should `PetscFree()`) 420 - fromdata - packed data from each rank, each with count entries of type dtype (length nfrom, caller responsible for `PetscFree()`) 421 422 Level: developer 423 424 Notes: 425 This memory-scalable interface is an alternative to calling `PetscGatherNumberOfMessages()` and 426 `PetscGatherMessageLengths()`, possibly with a subsequent round of communication to send other data. 427 428 Basic data types as well as contiguous types are supported, but non-contiguous (e.g., strided) types are not. 429 430 References: 431 . * - Hoefler, Siebert and Lumsdaine, The MPI_Ibarrier implementation uses the algorithm in 432 Scalable communication protocols for dynamic sparse data exchange, 2010. 433 434 .seealso: `PetscCommBuildTwoSided()`, `PetscCommBuildTwoSidedFReq()`, `PetscGatherNumberOfMessages()`, `PetscGatherMessageLengths()` 435 @*/ 436 PetscErrorCode PetscCommBuildTwoSidedF(MPI_Comm comm, PetscMPIInt count, MPI_Datatype dtype, PetscMPIInt nto, const PetscMPIInt *toranks, const void *todata, PetscMPIInt *nfrom, PetscMPIInt **fromranks, void *fromdata, PetscMPIInt ntags, PetscErrorCode (*send)(MPI_Comm, const PetscMPIInt[], PetscMPIInt, PetscMPIInt, void *, MPI_Request[], void *), PetscErrorCode (*recv)(MPI_Comm, const PetscMPIInt[], PetscMPIInt, void *, MPI_Request[], void *), void *ctx) { 437 MPI_Request *toreqs, *fromreqs; 438 439 PetscFunctionBegin; 440 PetscCall(PetscCommBuildTwoSidedFReq(comm, count, dtype, nto, toranks, todata, nfrom, fromranks, fromdata, ntags, &toreqs, &fromreqs, send, recv, ctx)); 441 PetscCallMPI(MPI_Waitall(nto * ntags, toreqs, MPI_STATUSES_IGNORE)); 442 PetscCallMPI(MPI_Waitall(*nfrom * ntags, fromreqs, MPI_STATUSES_IGNORE)); 443 PetscCall(PetscFree(toreqs)); 444 PetscCall(PetscFree(fromreqs)); 445 PetscFunctionReturn(0); 446 } 447 448 /*@C 449 PetscCommBuildTwoSidedFReq - discovers communicating ranks given one-sided information, calling user-defined functions during rendezvous, returns requests 450 451 Collective 452 453 Input Parameters: 454 + comm - communicator 455 . count - number of entries to send/receive in initial rendezvous (must match on all ranks) 456 . dtype - datatype to send/receive from each rank (must match on all ranks) 457 . nto - number of ranks to send data to 458 . toranks - ranks to send to (array of length nto) 459 . todata - data to send to each rank (packed) 460 . ntags - number of tags needed by send/recv callbacks 461 . send - callback invoked on sending process when ready to send primary payload 462 . recv - callback invoked on receiving process after delivery of rendezvous message 463 - ctx - context for callbacks 464 465 Output Parameters: 466 + nfrom - number of ranks receiving messages from 467 . fromranks - ranks receiving messages from (length nfrom; caller should `PetscFree()`) 468 . fromdata - packed data from each rank, each with count entries of type dtype (length nfrom, caller responsible for `PetscFree()`) 469 . toreqs - array of nto*ntags sender requests (caller must wait on these, then `PetscFree()`) 470 - fromreqs - array of nfrom*ntags receiver requests (caller must wait on these, then `PetscFree()`) 471 472 Level: developer 473 474 Notes: 475 This memory-scalable interface is an alternative to calling `PetscGatherNumberOfMessages()` and 476 `PetscGatherMessageLengths()`, possibly with a subsequent round of communication to send other data. 477 478 Basic data types as well as contiguous types are supported, but non-contiguous (e.g., strided) types are not. 479 480 References: 481 . * - Hoefler, Siebert and Lumsdaine, The MPI_Ibarrier implementation uses the algorithm in 482 Scalable communication protocols for dynamic sparse data exchange, 2010. 483 484 .seealso: `PetscCommBuildTwoSided()`, `PetscCommBuildTwoSidedF()`, `PetscGatherNumberOfMessages()`, `PetscGatherMessageLengths()` 485 @*/ 486 PetscErrorCode PetscCommBuildTwoSidedFReq(MPI_Comm comm, PetscMPIInt count, MPI_Datatype dtype, PetscMPIInt nto, const PetscMPIInt *toranks, const void *todata, PetscMPIInt *nfrom, PetscMPIInt **fromranks, void *fromdata, PetscMPIInt ntags, MPI_Request **toreqs, MPI_Request **fromreqs, PetscErrorCode (*send)(MPI_Comm, const PetscMPIInt[], PetscMPIInt, PetscMPIInt, void *, MPI_Request[], void *), PetscErrorCode (*recv)(MPI_Comm, const PetscMPIInt[], PetscMPIInt, void *, MPI_Request[], void *), void *ctx) { 487 PetscErrorCode (*f)(MPI_Comm, PetscMPIInt, MPI_Datatype, PetscMPIInt, const PetscMPIInt[], const void *, PetscMPIInt *, PetscMPIInt **, void *, PetscMPIInt, MPI_Request **, MPI_Request **, PetscErrorCode (*send)(MPI_Comm, const PetscMPIInt[], PetscMPIInt, PetscMPIInt, void *, MPI_Request[], void *), PetscErrorCode (*recv)(MPI_Comm, const PetscMPIInt[], PetscMPIInt, void *, MPI_Request[], void *), void *ctx); 488 PetscBuildTwoSidedType buildtype = PETSC_BUILDTWOSIDED_NOTSET; 489 PetscMPIInt i, size; 490 491 PetscFunctionBegin; 492 PetscCall(PetscSysInitializePackage()); 493 PetscCallMPI(MPI_Comm_size(comm, &size)); 494 for (i = 0; i < nto; i++) PetscCheck(toranks[i] >= 0 && size > toranks[i], comm, PETSC_ERR_ARG_OUTOFRANGE, "toranks[%d] %d not in comm size %d", i, toranks[i], size); 495 PetscCall(PetscLogEventSync(PETSC_BuildTwoSidedF, comm)); 496 PetscCall(PetscLogEventBegin(PETSC_BuildTwoSidedF, 0, 0, 0, 0)); 497 PetscCall(PetscCommBuildTwoSidedGetType(comm, &buildtype)); 498 switch (buildtype) { 499 case PETSC_BUILDTWOSIDED_IBARRIER: 500 #if defined(PETSC_HAVE_MPI_NONBLOCKING_COLLECTIVES) 501 f = PetscCommBuildTwoSidedFReq_Ibarrier; 502 break; 503 #else 504 SETERRQ(comm, PETSC_ERR_PLIB, "MPI implementation does not provide MPI_Ibarrier (part of MPI-3)"); 505 #endif 506 case PETSC_BUILDTWOSIDED_ALLREDUCE: 507 case PETSC_BUILDTWOSIDED_REDSCATTER: f = PetscCommBuildTwoSidedFReq_Reference; break; 508 default: SETERRQ(comm, PETSC_ERR_PLIB, "Unknown method for building two-sided communication"); 509 } 510 PetscCall((*f)(comm, count, dtype, nto, toranks, todata, nfrom, fromranks, fromdata, ntags, toreqs, fromreqs, send, recv, ctx)); 511 PetscCall(PetscLogEventEnd(PETSC_BuildTwoSidedF, 0, 0, 0, 0)); 512 PetscFunctionReturn(0); 513 } 514