1 #include <petscsys.h> /*I "petscsys.h" I*/ 2 #include <petsc/private/petscimpl.h> 3 4 PetscLogEvent PETSC_BuildTwoSided; 5 PetscLogEvent PETSC_BuildTwoSidedF; 6 7 const char *const PetscBuildTwoSidedTypes[] = {"ALLREDUCE", "IBARRIER", "REDSCATTER", "PetscBuildTwoSidedType", "PETSC_BUILDTWOSIDED_", NULL}; 8 9 static PetscBuildTwoSidedType _twosided_type = PETSC_BUILDTWOSIDED_NOTSET; 10 11 /*@ 12 PetscCommBuildTwoSidedSetType - set algorithm to use when building two-sided communication 13 14 Logically Collective 15 16 Input Parameters: 17 + comm - `PETSC_COMM_WORLD` 18 - twosided - algorithm to use in subsequent calls to `PetscCommBuildTwoSided()` 19 20 Level: developer 21 22 Note: 23 This option is currently global, but could be made per-communicator. 24 25 .seealso: `PetscCommBuildTwoSided()`, `PetscCommBuildTwoSidedGetType()`, `PetscBuildTwoSidedType` 26 @*/ 27 PetscErrorCode PetscCommBuildTwoSidedSetType(MPI_Comm comm, PetscBuildTwoSidedType twosided) 28 { 29 PetscFunctionBegin; 30 if (PetscDefined(USE_DEBUG)) { /* We don't have a PetscObject so can't use PetscValidLogicalCollectiveEnum */ 31 PetscMPIInt b1[2], b2[2]; 32 b1[0] = -(PetscMPIInt)twosided; 33 b1[1] = (PetscMPIInt)twosided; 34 PetscCall(MPIU_Allreduce(b1, b2, 2, MPI_INT, MPI_MAX, comm)); 35 PetscCheck(-b2[0] == b2[1], comm, PETSC_ERR_ARG_WRONG, "Enum value must be same on all processes"); 36 } 37 _twosided_type = twosided; 38 PetscFunctionReturn(PETSC_SUCCESS); 39 } 40 41 /*@ 42 PetscCommBuildTwoSidedGetType - get algorithm used when building two-sided communication 43 44 Logically Collective 45 46 Output Parameters: 47 + comm - communicator on which to query algorithm 48 - twosided - algorithm to use for `PetscCommBuildTwoSided()` 49 50 Level: developer 51 52 .seealso: `PetscCommBuildTwoSided()`, `PetscCommBuildTwoSidedSetType()`, `PetscBuildTwoSidedType` 53 @*/ 54 PetscErrorCode PetscCommBuildTwoSidedGetType(MPI_Comm comm, PetscBuildTwoSidedType *twosided) 55 { 56 PetscMPIInt size; 57 58 PetscFunctionBegin; 59 *twosided = PETSC_BUILDTWOSIDED_NOTSET; 60 if (_twosided_type == PETSC_BUILDTWOSIDED_NOTSET) { 61 PetscCallMPI(MPI_Comm_size(comm, &size)); 62 _twosided_type = PETSC_BUILDTWOSIDED_ALLREDUCE; /* default for small comms, see https://gitlab.com/petsc/petsc/-/merge_requests/2611 */ 63 #if defined(PETSC_HAVE_MPI_NONBLOCKING_COLLECTIVES) 64 if (size > 1024) _twosided_type = PETSC_BUILDTWOSIDED_IBARRIER; 65 #endif 66 PetscCall(PetscOptionsGetEnum(NULL, NULL, "-build_twosided", PetscBuildTwoSidedTypes, (PetscEnum *)&_twosided_type, NULL)); 67 } 68 *twosided = _twosided_type; 69 PetscFunctionReturn(PETSC_SUCCESS); 70 } 71 72 #if defined(PETSC_HAVE_MPI_NONBLOCKING_COLLECTIVES) 73 static PetscErrorCode PetscCommBuildTwoSided_Ibarrier(MPI_Comm comm, PetscMPIInt count, MPI_Datatype dtype, PetscMPIInt nto, const PetscMPIInt *toranks, const void *todata, PetscMPIInt *nfrom, PetscMPIInt **fromranks, void *fromdata) 74 { 75 PetscMPIInt nrecvs, tag, done, i; 76 MPI_Aint lb, unitbytes; 77 char *tdata; 78 MPI_Request *sendreqs, barrier; 79 PetscSegBuffer segrank, segdata; 80 PetscBool barrier_started; 81 82 PetscFunctionBegin; 83 PetscCall(PetscCommDuplicate(comm, &comm, &tag)); 84 PetscCallMPI(MPI_Type_get_extent(dtype, &lb, &unitbytes)); 85 PetscCheck(lb == 0, comm, PETSC_ERR_SUP, "Datatype with nonzero lower bound %ld", (long)lb); 86 tdata = (char *)todata; 87 PetscCall(PetscMalloc1(nto, &sendreqs)); 88 for (i = 0; i < nto; i++) PetscCallMPI(MPI_Issend((void *)(tdata + count * unitbytes * i), count, dtype, toranks[i], tag, comm, sendreqs + i)); 89 PetscCall(PetscSegBufferCreate(sizeof(PetscMPIInt), 4, &segrank)); 90 PetscCall(PetscSegBufferCreate(unitbytes, 4 * count, &segdata)); 91 92 nrecvs = 0; 93 barrier = MPI_REQUEST_NULL; 94 /* MPICH-3.2 sometimes does not create a request in some "optimized" cases. This is arguably a standard violation, 95 * but we need to work around it. */ 96 barrier_started = PETSC_FALSE; 97 for (done = 0; !done;) { 98 PetscMPIInt flag; 99 MPI_Status status; 100 PetscCallMPI(MPI_Iprobe(MPI_ANY_SOURCE, tag, comm, &flag, &status)); 101 if (flag) { /* incoming message */ 102 PetscMPIInt *recvrank; 103 void *buf; 104 PetscCall(PetscSegBufferGet(segrank, 1, &recvrank)); 105 PetscCall(PetscSegBufferGet(segdata, count, &buf)); 106 *recvrank = status.MPI_SOURCE; 107 PetscCallMPI(MPI_Recv(buf, count, dtype, status.MPI_SOURCE, tag, comm, MPI_STATUS_IGNORE)); 108 nrecvs++; 109 } 110 if (!barrier_started) { 111 PetscMPIInt sent, nsends; 112 PetscCall(PetscMPIIntCast(nto, &nsends)); 113 PetscCallMPI(MPI_Testall(nsends, sendreqs, &sent, MPI_STATUSES_IGNORE)); 114 if (sent) { 115 PetscCallMPI(MPI_Ibarrier(comm, &barrier)); 116 barrier_started = PETSC_TRUE; 117 PetscCall(PetscFree(sendreqs)); 118 } 119 } else { 120 PetscCallMPI(MPI_Test(&barrier, &done, MPI_STATUS_IGNORE)); 121 } 122 } 123 *nfrom = nrecvs; 124 PetscCall(PetscSegBufferExtractAlloc(segrank, fromranks)); 125 PetscCall(PetscSegBufferDestroy(&segrank)); 126 PetscCall(PetscSegBufferExtractAlloc(segdata, fromdata)); 127 PetscCall(PetscSegBufferDestroy(&segdata)); 128 PetscCall(PetscCommDestroy(&comm)); 129 PetscFunctionReturn(PETSC_SUCCESS); 130 } 131 #endif 132 133 static PetscErrorCode PetscCommBuildTwoSided_Allreduce(MPI_Comm comm, PetscMPIInt count, MPI_Datatype dtype, PetscMPIInt nto, const PetscMPIInt *toranks, const void *todata, PetscMPIInt *nfrom, PetscMPIInt **fromranks, void *fromdata) 134 { 135 PetscMPIInt size, rank, *iflags, nrecvs, tag, *franks, i, flg; 136 MPI_Aint lb, unitbytes; 137 char *tdata, *fdata; 138 MPI_Request *reqs, *sendreqs; 139 MPI_Status *statuses; 140 PetscCommCounter *counter; 141 142 PetscFunctionBegin; 143 PetscCallMPI(MPI_Comm_size(comm, &size)); 144 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 145 PetscCall(PetscCommDuplicate(comm, &comm, &tag)); 146 PetscCallMPI(MPI_Comm_get_attr(comm, Petsc_Counter_keyval, &counter, &flg)); 147 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Inner PETSc communicator does not have its tag/name counter attribute set"); 148 if (!counter->iflags) { 149 PetscCall(PetscCalloc1(size, &counter->iflags)); 150 iflags = counter->iflags; 151 } else { 152 iflags = counter->iflags; 153 PetscCall(PetscArrayzero(iflags, size)); 154 } 155 for (i = 0; i < nto; i++) iflags[toranks[i]] = 1; 156 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, iflags, size, MPI_INT, MPI_SUM, comm)); 157 nrecvs = iflags[rank]; 158 PetscCallMPI(MPI_Type_get_extent(dtype, &lb, &unitbytes)); 159 PetscCheck(lb == 0, comm, PETSC_ERR_SUP, "Datatype with nonzero lower bound %ld", (long)lb); 160 PetscCall(PetscMalloc(nrecvs * count * unitbytes, &fdata)); 161 tdata = (char *)todata; 162 PetscCall(PetscMalloc2(nto + nrecvs, &reqs, nto + nrecvs, &statuses)); 163 sendreqs = reqs + nrecvs; 164 for (i = 0; i < nrecvs; i++) PetscCallMPI(MPI_Irecv((void *)(fdata + count * unitbytes * i), count, dtype, MPI_ANY_SOURCE, tag, comm, reqs + i)); 165 for (i = 0; i < nto; i++) PetscCallMPI(MPI_Isend((void *)(tdata + count * unitbytes * i), count, dtype, toranks[i], tag, comm, sendreqs + i)); 166 PetscCallMPI(MPI_Waitall(nto + nrecvs, reqs, statuses)); 167 PetscCall(PetscMalloc1(nrecvs, &franks)); 168 for (i = 0; i < nrecvs; i++) franks[i] = statuses[i].MPI_SOURCE; 169 PetscCall(PetscFree2(reqs, statuses)); 170 PetscCall(PetscCommDestroy(&comm)); 171 172 *nfrom = nrecvs; 173 *fromranks = franks; 174 *(void **)fromdata = fdata; 175 PetscFunctionReturn(PETSC_SUCCESS); 176 } 177 178 #if defined(PETSC_HAVE_MPI_REDUCE_SCATTER_BLOCK) 179 static PetscErrorCode PetscCommBuildTwoSided_RedScatter(MPI_Comm comm, PetscMPIInt count, MPI_Datatype dtype, PetscMPIInt nto, const PetscMPIInt *toranks, const void *todata, PetscMPIInt *nfrom, PetscMPIInt **fromranks, void *fromdata) 180 { 181 PetscMPIInt size, *iflags, nrecvs, tag, *franks, i, flg; 182 MPI_Aint lb, unitbytes; 183 char *tdata, *fdata; 184 MPI_Request *reqs, *sendreqs; 185 MPI_Status *statuses; 186 PetscCommCounter *counter; 187 188 PetscFunctionBegin; 189 PetscCallMPI(MPI_Comm_size(comm, &size)); 190 PetscCall(PetscCommDuplicate(comm, &comm, &tag)); 191 PetscCallMPI(MPI_Comm_get_attr(comm, Petsc_Counter_keyval, &counter, &flg)); 192 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Inner PETSc communicator does not have its tag/name counter attribute set"); 193 if (!counter->iflags) { 194 PetscCall(PetscCalloc1(size, &counter->iflags)); 195 iflags = counter->iflags; 196 } else { 197 iflags = counter->iflags; 198 PetscCall(PetscArrayzero(iflags, size)); 199 } 200 for (i = 0; i < nto; i++) iflags[toranks[i]] = 1; 201 PetscCallMPI(MPI_Reduce_scatter_block(iflags, &nrecvs, 1, MPI_INT, MPI_SUM, comm)); 202 PetscCallMPI(MPI_Type_get_extent(dtype, &lb, &unitbytes)); 203 PetscCheck(lb == 0, comm, PETSC_ERR_SUP, "Datatype with nonzero lower bound %ld", (long)lb); 204 PetscCall(PetscMalloc(nrecvs * count * unitbytes, &fdata)); 205 tdata = (char *)todata; 206 PetscCall(PetscMalloc2(nto + nrecvs, &reqs, nto + nrecvs, &statuses)); 207 sendreqs = reqs + nrecvs; 208 for (i = 0; i < nrecvs; i++) PetscCallMPI(MPI_Irecv((void *)(fdata + count * unitbytes * i), count, dtype, MPI_ANY_SOURCE, tag, comm, reqs + i)); 209 for (i = 0; i < nto; i++) PetscCallMPI(MPI_Isend((void *)(tdata + count * unitbytes * i), count, dtype, toranks[i], tag, comm, sendreqs + i)); 210 PetscCallMPI(MPI_Waitall(nto + nrecvs, reqs, statuses)); 211 PetscCall(PetscMalloc1(nrecvs, &franks)); 212 for (i = 0; i < nrecvs; i++) franks[i] = statuses[i].MPI_SOURCE; 213 PetscCall(PetscFree2(reqs, statuses)); 214 PetscCall(PetscCommDestroy(&comm)); 215 216 *nfrom = nrecvs; 217 *fromranks = franks; 218 *(void **)fromdata = fdata; 219 PetscFunctionReturn(PETSC_SUCCESS); 220 } 221 #endif 222 223 /*@C 224 PetscCommBuildTwoSided - discovers communicating ranks given one-sided information, moving constant-sized data in the process (often message lengths) 225 226 Collective 227 228 Input Parameters: 229 + comm - communicator 230 . count - number of entries to send/receive (must match on all ranks) 231 . dtype - datatype to send/receive from each rank (must match on all ranks) 232 . nto - number of ranks to send data to 233 . toranks - ranks to send to (array of length nto) 234 - todata - data to send to each rank (packed) 235 236 Output Parameters: 237 + nfrom - number of ranks receiving messages from 238 . fromranks - ranks receiving messages from (length nfrom; caller should `PetscFree()`) 239 - fromdata - packed data from each rank, each with count entries of type dtype (length nfrom, caller responsible for `PetscFree()`) 240 241 Level: developer 242 243 Options Database Key: 244 . -build_twosided <allreduce|ibarrier|redscatter> - algorithm to set up two-sided communication. Default is allreduce for communicators with <= 1024 ranks, otherwise ibarrier. 245 246 Notes: 247 This memory-scalable interface is an alternative to calling `PetscGatherNumberOfMessages()` and 248 `PetscGatherMessageLengths()`, possibly with a subsequent round of communication to send other constant-size data. 249 250 Basic data types as well as contiguous types are supported, but non-contiguous (e.g., strided) types are not. 251 252 References: 253 . * - Hoefler, Siebert and Lumsdaine, The MPI_Ibarrier implementation uses the algorithm in 254 Scalable communication protocols for dynamic sparse data exchange, 2010. 255 256 .seealso: `PetscGatherNumberOfMessages()`, `PetscGatherMessageLengths()`, `PetscCommBuildTwoSidedSetType()`, `PetscCommBuildTwoSidedType` 257 @*/ 258 PetscErrorCode PetscCommBuildTwoSided(MPI_Comm comm, PetscMPIInt count, MPI_Datatype dtype, PetscMPIInt nto, const PetscMPIInt *toranks, const void *todata, PetscMPIInt *nfrom, PetscMPIInt **fromranks, void *fromdata) 259 { 260 PetscBuildTwoSidedType buildtype = PETSC_BUILDTWOSIDED_NOTSET; 261 262 PetscFunctionBegin; 263 PetscCall(PetscSysInitializePackage()); 264 PetscCall(PetscLogEventSync(PETSC_BuildTwoSided, comm)); 265 PetscCall(PetscLogEventBegin(PETSC_BuildTwoSided, 0, 0, 0, 0)); 266 PetscCall(PetscCommBuildTwoSidedGetType(comm, &buildtype)); 267 switch (buildtype) { 268 case PETSC_BUILDTWOSIDED_IBARRIER: 269 #if defined(PETSC_HAVE_MPI_NONBLOCKING_COLLECTIVES) 270 PetscCall(PetscCommBuildTwoSided_Ibarrier(comm, count, dtype, nto, toranks, todata, nfrom, fromranks, fromdata)); 271 break; 272 #else 273 SETERRQ(comm, PETSC_ERR_PLIB, "MPI implementation does not provide MPI_Ibarrier (part of MPI-3)"); 274 #endif 275 case PETSC_BUILDTWOSIDED_ALLREDUCE: 276 PetscCall(PetscCommBuildTwoSided_Allreduce(comm, count, dtype, nto, toranks, todata, nfrom, fromranks, fromdata)); 277 break; 278 case PETSC_BUILDTWOSIDED_REDSCATTER: 279 #if defined(PETSC_HAVE_MPI_REDUCE_SCATTER_BLOCK) 280 PetscCall(PetscCommBuildTwoSided_RedScatter(comm, count, dtype, nto, toranks, todata, nfrom, fromranks, fromdata)); 281 break; 282 #else 283 SETERRQ(comm, PETSC_ERR_PLIB, "MPI implementation does not provide MPI_Reduce_scatter_block (part of MPI-2.2)"); 284 #endif 285 default: 286 SETERRQ(comm, PETSC_ERR_PLIB, "Unknown method for building two-sided communication"); 287 } 288 PetscCall(PetscLogEventEnd(PETSC_BuildTwoSided, 0, 0, 0, 0)); 289 PetscFunctionReturn(PETSC_SUCCESS); 290 } 291 292 static PetscErrorCode PetscCommBuildTwoSidedFReq_Reference(MPI_Comm comm, PetscMPIInt count, MPI_Datatype dtype, PetscMPIInt nto, const PetscMPIInt *toranks, const void *todata, PetscMPIInt *nfrom, PetscMPIInt **fromranks, void *fromdata, PetscMPIInt ntags, MPI_Request **toreqs, MPI_Request **fromreqs, PetscErrorCode (*send)(MPI_Comm, const PetscMPIInt[], PetscMPIInt, PetscMPIInt, void *, MPI_Request[], void *), PetscErrorCode (*recv)(MPI_Comm, const PetscMPIInt[], PetscMPIInt, void *, MPI_Request[], void *), void *ctx) 293 { 294 PetscMPIInt i, *tag; 295 MPI_Aint lb, unitbytes; 296 MPI_Request *sendreq, *recvreq; 297 298 PetscFunctionBegin; 299 PetscCall(PetscMalloc1(ntags, &tag)); 300 if (ntags > 0) PetscCall(PetscCommDuplicate(comm, &comm, &tag[0])); 301 for (i = 1; i < ntags; i++) PetscCall(PetscCommGetNewTag(comm, &tag[i])); 302 303 /* Perform complete initial rendezvous */ 304 PetscCall(PetscCommBuildTwoSided(comm, count, dtype, nto, toranks, todata, nfrom, fromranks, fromdata)); 305 306 PetscCall(PetscMalloc1(nto * ntags, &sendreq)); 307 PetscCall(PetscMalloc1(*nfrom * ntags, &recvreq)); 308 309 PetscCallMPI(MPI_Type_get_extent(dtype, &lb, &unitbytes)); 310 PetscCheck(lb == 0, comm, PETSC_ERR_SUP, "Datatype with nonzero lower bound %ld", (long)lb); 311 for (i = 0; i < nto; i++) { 312 PetscMPIInt k; 313 for (k = 0; k < ntags; k++) sendreq[i * ntags + k] = MPI_REQUEST_NULL; 314 PetscCall((*send)(comm, tag, i, toranks[i], ((char *)todata) + count * unitbytes * i, sendreq + i * ntags, ctx)); 315 } 316 for (i = 0; i < *nfrom; i++) { 317 void *header = (*(char **)fromdata) + count * unitbytes * i; 318 PetscMPIInt k; 319 for (k = 0; k < ntags; k++) recvreq[i * ntags + k] = MPI_REQUEST_NULL; 320 PetscCall((*recv)(comm, tag, (*fromranks)[i], header, recvreq + i * ntags, ctx)); 321 } 322 PetscCall(PetscFree(tag)); 323 PetscCall(PetscCommDestroy(&comm)); 324 *toreqs = sendreq; 325 *fromreqs = recvreq; 326 PetscFunctionReturn(PETSC_SUCCESS); 327 } 328 329 #if defined(PETSC_HAVE_MPI_NONBLOCKING_COLLECTIVES) 330 331 static PetscErrorCode PetscCommBuildTwoSidedFReq_Ibarrier(MPI_Comm comm, PetscMPIInt count, MPI_Datatype dtype, PetscMPIInt nto, const PetscMPIInt *toranks, const void *todata, PetscMPIInt *nfrom, PetscMPIInt **fromranks, void *fromdata, PetscMPIInt ntags, MPI_Request **toreqs, MPI_Request **fromreqs, PetscErrorCode (*send)(MPI_Comm, const PetscMPIInt[], PetscMPIInt, PetscMPIInt, void *, MPI_Request[], void *), PetscErrorCode (*recv)(MPI_Comm, const PetscMPIInt[], PetscMPIInt, void *, MPI_Request[], void *), void *ctx) 332 { 333 PetscMPIInt nrecvs, tag, *tags, done, i; 334 MPI_Aint lb, unitbytes; 335 char *tdata; 336 MPI_Request *sendreqs, *usendreqs, *req, barrier; 337 PetscSegBuffer segrank, segdata, segreq; 338 PetscBool barrier_started; 339 340 PetscFunctionBegin; 341 PetscCall(PetscCommDuplicate(comm, &comm, &tag)); 342 PetscCall(PetscMalloc1(ntags, &tags)); 343 for (i = 0; i < ntags; i++) PetscCall(PetscCommGetNewTag(comm, &tags[i])); 344 PetscCallMPI(MPI_Type_get_extent(dtype, &lb, &unitbytes)); 345 PetscCheck(lb == 0, comm, PETSC_ERR_SUP, "Datatype with nonzero lower bound %ld", (long)lb); 346 tdata = (char *)todata; 347 PetscCall(PetscMalloc1(nto, &sendreqs)); 348 PetscCall(PetscMalloc1(nto * ntags, &usendreqs)); 349 /* Post synchronous sends */ 350 for (i = 0; i < nto; i++) PetscCallMPI(MPI_Issend((void *)(tdata + count * unitbytes * i), count, dtype, toranks[i], tag, comm, sendreqs + i)); 351 /* Post actual payloads. These are typically larger messages. Hopefully sending these later does not slow down the 352 * synchronous messages above. */ 353 for (i = 0; i < nto; i++) { 354 PetscMPIInt k; 355 for (k = 0; k < ntags; k++) usendreqs[i * ntags + k] = MPI_REQUEST_NULL; 356 PetscCall((*send)(comm, tags, i, toranks[i], tdata + count * unitbytes * i, usendreqs + i * ntags, ctx)); 357 } 358 359 PetscCall(PetscSegBufferCreate(sizeof(PetscMPIInt), 4, &segrank)); 360 PetscCall(PetscSegBufferCreate(unitbytes, 4 * count, &segdata)); 361 PetscCall(PetscSegBufferCreate(sizeof(MPI_Request), 4, &segreq)); 362 363 nrecvs = 0; 364 barrier = MPI_REQUEST_NULL; 365 /* MPICH-3.2 sometimes does not create a request in some "optimized" cases. This is arguably a standard violation, 366 * but we need to work around it. */ 367 barrier_started = PETSC_FALSE; 368 for (done = 0; !done;) { 369 PetscMPIInt flag; 370 MPI_Status status; 371 PetscCallMPI(MPI_Iprobe(MPI_ANY_SOURCE, tag, comm, &flag, &status)); 372 if (flag) { /* incoming message */ 373 PetscMPIInt *recvrank, k; 374 void *buf; 375 PetscCall(PetscSegBufferGet(segrank, 1, &recvrank)); 376 PetscCall(PetscSegBufferGet(segdata, count, &buf)); 377 *recvrank = status.MPI_SOURCE; 378 PetscCallMPI(MPI_Recv(buf, count, dtype, status.MPI_SOURCE, tag, comm, MPI_STATUS_IGNORE)); 379 PetscCall(PetscSegBufferGet(segreq, ntags, &req)); 380 for (k = 0; k < ntags; k++) req[k] = MPI_REQUEST_NULL; 381 PetscCall((*recv)(comm, tags, status.MPI_SOURCE, buf, req, ctx)); 382 nrecvs++; 383 } 384 if (!barrier_started) { 385 PetscMPIInt sent, nsends; 386 PetscCall(PetscMPIIntCast(nto, &nsends)); 387 PetscCallMPI(MPI_Testall(nsends, sendreqs, &sent, MPI_STATUSES_IGNORE)); 388 if (sent) { 389 PetscCallMPI(MPI_Ibarrier(comm, &barrier)); 390 barrier_started = PETSC_TRUE; 391 } 392 } else { 393 PetscCallMPI(MPI_Test(&barrier, &done, MPI_STATUS_IGNORE)); 394 } 395 } 396 *nfrom = nrecvs; 397 PetscCall(PetscSegBufferExtractAlloc(segrank, fromranks)); 398 PetscCall(PetscSegBufferDestroy(&segrank)); 399 PetscCall(PetscSegBufferExtractAlloc(segdata, fromdata)); 400 PetscCall(PetscSegBufferDestroy(&segdata)); 401 *toreqs = usendreqs; 402 PetscCall(PetscSegBufferExtractAlloc(segreq, fromreqs)); 403 PetscCall(PetscSegBufferDestroy(&segreq)); 404 PetscCall(PetscFree(sendreqs)); 405 PetscCall(PetscFree(tags)); 406 PetscCall(PetscCommDestroy(&comm)); 407 PetscFunctionReturn(PETSC_SUCCESS); 408 } 409 #endif 410 411 /*@C 412 PetscCommBuildTwoSidedF - discovers communicating ranks given one-sided information, calling user-defined functions during rendezvous 413 414 Collective 415 416 Input Parameters: 417 + comm - communicator 418 . count - number of entries to send/receive in initial rendezvous (must match on all ranks) 419 . dtype - datatype to send/receive from each rank (must match on all ranks) 420 . nto - number of ranks to send data to 421 . toranks - ranks to send to (array of length nto) 422 . todata - data to send to each rank (packed) 423 . ntags - number of tags needed by send/recv callbacks 424 . send - callback invoked on sending process when ready to send primary payload 425 . recv - callback invoked on receiving process after delivery of rendezvous message 426 - ctx - context for callbacks 427 428 Output Parameters: 429 + nfrom - number of ranks receiving messages from 430 . fromranks - ranks receiving messages from (length nfrom; caller should `PetscFree()`) 431 - fromdata - packed data from each rank, each with count entries of type dtype (length nfrom, caller responsible for `PetscFree()`) 432 433 Level: developer 434 435 Notes: 436 This memory-scalable interface is an alternative to calling `PetscGatherNumberOfMessages()` and 437 `PetscGatherMessageLengths()`, possibly with a subsequent round of communication to send other data. 438 439 Basic data types as well as contiguous types are supported, but non-contiguous (e.g., strided) types are not. 440 441 References: 442 . * - Hoefler, Siebert and Lumsdaine, The MPI_Ibarrier implementation uses the algorithm in 443 Scalable communication protocols for dynamic sparse data exchange, 2010. 444 445 .seealso: `PetscCommBuildTwoSided()`, `PetscCommBuildTwoSidedFReq()`, `PetscGatherNumberOfMessages()`, `PetscGatherMessageLengths()` 446 @*/ 447 PetscErrorCode PetscCommBuildTwoSidedF(MPI_Comm comm, PetscMPIInt count, MPI_Datatype dtype, PetscMPIInt nto, const PetscMPIInt *toranks, const void *todata, PetscMPIInt *nfrom, PetscMPIInt **fromranks, void *fromdata, PetscMPIInt ntags, PetscErrorCode (*send)(MPI_Comm, const PetscMPIInt[], PetscMPIInt, PetscMPIInt, void *, MPI_Request[], void *), PetscErrorCode (*recv)(MPI_Comm, const PetscMPIInt[], PetscMPIInt, void *, MPI_Request[], void *), void *ctx) 448 { 449 MPI_Request *toreqs, *fromreqs; 450 451 PetscFunctionBegin; 452 PetscCall(PetscCommBuildTwoSidedFReq(comm, count, dtype, nto, toranks, todata, nfrom, fromranks, fromdata, ntags, &toreqs, &fromreqs, send, recv, ctx)); 453 PetscCallMPI(MPI_Waitall(nto * ntags, toreqs, MPI_STATUSES_IGNORE)); 454 PetscCallMPI(MPI_Waitall(*nfrom * ntags, fromreqs, MPI_STATUSES_IGNORE)); 455 PetscCall(PetscFree(toreqs)); 456 PetscCall(PetscFree(fromreqs)); 457 PetscFunctionReturn(PETSC_SUCCESS); 458 } 459 460 /*@C 461 PetscCommBuildTwoSidedFReq - discovers communicating ranks given one-sided information, calling user-defined functions during rendezvous, returns requests 462 463 Collective 464 465 Input Parameters: 466 + comm - communicator 467 . count - number of entries to send/receive in initial rendezvous (must match on all ranks) 468 . dtype - datatype to send/receive from each rank (must match on all ranks) 469 . nto - number of ranks to send data to 470 . toranks - ranks to send to (array of length nto) 471 . todata - data to send to each rank (packed) 472 . ntags - number of tags needed by send/recv callbacks 473 . send - callback invoked on sending process when ready to send primary payload 474 . recv - callback invoked on receiving process after delivery of rendezvous message 475 - ctx - context for callbacks 476 477 Output Parameters: 478 + nfrom - number of ranks receiving messages from 479 . fromranks - ranks receiving messages from (length nfrom; caller should `PetscFree()`) 480 . fromdata - packed data from each rank, each with count entries of type dtype (length nfrom, caller responsible for `PetscFree()`) 481 . toreqs - array of nto*ntags sender requests (caller must wait on these, then `PetscFree()`) 482 - fromreqs - array of nfrom*ntags receiver requests (caller must wait on these, then `PetscFree()`) 483 484 Level: developer 485 486 Notes: 487 This memory-scalable interface is an alternative to calling `PetscGatherNumberOfMessages()` and 488 `PetscGatherMessageLengths()`, possibly with a subsequent round of communication to send other data. 489 490 Basic data types as well as contiguous types are supported, but non-contiguous (e.g., strided) types are not. 491 492 References: 493 . * - Hoefler, Siebert and Lumsdaine, The MPI_Ibarrier implementation uses the algorithm in 494 Scalable communication protocols for dynamic sparse data exchange, 2010. 495 496 .seealso: `PetscCommBuildTwoSided()`, `PetscCommBuildTwoSidedF()`, `PetscGatherNumberOfMessages()`, `PetscGatherMessageLengths()` 497 @*/ 498 PetscErrorCode PetscCommBuildTwoSidedFReq(MPI_Comm comm, PetscMPIInt count, MPI_Datatype dtype, PetscMPIInt nto, const PetscMPIInt *toranks, const void *todata, PetscMPIInt *nfrom, PetscMPIInt **fromranks, void *fromdata, PetscMPIInt ntags, MPI_Request **toreqs, MPI_Request **fromreqs, PetscErrorCode (*send)(MPI_Comm, const PetscMPIInt[], PetscMPIInt, PetscMPIInt, void *, MPI_Request[], void *), PetscErrorCode (*recv)(MPI_Comm, const PetscMPIInt[], PetscMPIInt, void *, MPI_Request[], void *), void *ctx) 499 { 500 PetscErrorCode (*f)(MPI_Comm, PetscMPIInt, MPI_Datatype, PetscMPIInt, const PetscMPIInt[], const void *, PetscMPIInt *, PetscMPIInt **, void *, PetscMPIInt, MPI_Request **, MPI_Request **, PetscErrorCode (*send)(MPI_Comm, const PetscMPIInt[], PetscMPIInt, PetscMPIInt, void *, MPI_Request[], void *), PetscErrorCode (*recv)(MPI_Comm, const PetscMPIInt[], PetscMPIInt, void *, MPI_Request[], void *), void *ctx); 501 PetscBuildTwoSidedType buildtype = PETSC_BUILDTWOSIDED_NOTSET; 502 PetscMPIInt i, size; 503 504 PetscFunctionBegin; 505 PetscCall(PetscSysInitializePackage()); 506 PetscCallMPI(MPI_Comm_size(comm, &size)); 507 for (i = 0; i < nto; i++) PetscCheck(toranks[i] >= 0 && size > toranks[i], comm, PETSC_ERR_ARG_OUTOFRANGE, "toranks[%d] %d not in comm size %d", i, toranks[i], size); 508 PetscCall(PetscLogEventSync(PETSC_BuildTwoSidedF, comm)); 509 PetscCall(PetscLogEventBegin(PETSC_BuildTwoSidedF, 0, 0, 0, 0)); 510 PetscCall(PetscCommBuildTwoSidedGetType(comm, &buildtype)); 511 switch (buildtype) { 512 case PETSC_BUILDTWOSIDED_IBARRIER: 513 #if defined(PETSC_HAVE_MPI_NONBLOCKING_COLLECTIVES) 514 f = PetscCommBuildTwoSidedFReq_Ibarrier; 515 break; 516 #else 517 SETERRQ(comm, PETSC_ERR_PLIB, "MPI implementation does not provide MPI_Ibarrier (part of MPI-3)"); 518 #endif 519 case PETSC_BUILDTWOSIDED_ALLREDUCE: 520 case PETSC_BUILDTWOSIDED_REDSCATTER: 521 f = PetscCommBuildTwoSidedFReq_Reference; 522 break; 523 default: 524 SETERRQ(comm, PETSC_ERR_PLIB, "Unknown method for building two-sided communication"); 525 } 526 PetscCall((*f)(comm, count, dtype, nto, toranks, todata, nfrom, fromranks, fromdata, ntags, toreqs, fromreqs, send, recv, ctx)); 527 PetscCall(PetscLogEventEnd(PETSC_BuildTwoSidedF, 0, 0, 0, 0)); 528 PetscFunctionReturn(PETSC_SUCCESS); 529 } 530