1 #include <petscsys.h> /*I "petscsys.h" I*/ 2 #include <petsc/private/petscimpl.h> 3 4 PetscLogEvent PETSC_BuildTwoSided; 5 PetscLogEvent PETSC_BuildTwoSidedF; 6 7 const char *const PetscBuildTwoSidedTypes[] = {"ALLREDUCE", "IBARRIER", "REDSCATTER", "PetscBuildTwoSidedType", "PETSC_BUILDTWOSIDED_", NULL}; 8 9 static PetscBuildTwoSidedType _twosided_type = PETSC_BUILDTWOSIDED_NOTSET; 10 11 /*@ 12 PetscCommBuildTwoSidedSetType - set algorithm to use when building two-sided communication 13 14 Logically Collective 15 16 Input Parameters: 17 + comm - `PETSC_COMM_WORLD` 18 - twosided - algorithm to use in subsequent calls to `PetscCommBuildTwoSided()` 19 20 Level: developer 21 22 Note: 23 This option is currently global, but could be made per-communicator. 24 25 .seealso: `PetscCommBuildTwoSided()`, `PetscCommBuildTwoSidedGetType()`, `PetscBuildTwoSidedType` 26 @*/ 27 PetscErrorCode PetscCommBuildTwoSidedSetType(MPI_Comm comm, PetscBuildTwoSidedType twosided) 28 { 29 PetscFunctionBegin; 30 if (PetscDefined(USE_DEBUG)) { /* We don't have a PetscObject so can't use PetscValidLogicalCollectiveEnum */ 31 PetscMPIInt b1[2], b2[2]; 32 b1[0] = -(PetscMPIInt)twosided; 33 b1[1] = (PetscMPIInt)twosided; 34 PetscCall(MPIU_Allreduce(b1, b2, 2, MPI_INT, MPI_MAX, comm)); 35 PetscCheck(-b2[0] == b2[1], comm, PETSC_ERR_ARG_WRONG, "Enum value must be same on all processes"); 36 } 37 _twosided_type = twosided; 38 PetscFunctionReturn(PETSC_SUCCESS); 39 } 40 41 /*@ 42 PetscCommBuildTwoSidedGetType - get algorithm used when building two-sided communication 43 44 Logically Collective 45 46 Output Parameters: 47 + comm - communicator on which to query algorithm 48 - twosided - algorithm to use for `PetscCommBuildTwoSided()` 49 50 Level: developer 51 52 .seealso: `PetscCommBuildTwoSided()`, `PetscCommBuildTwoSidedSetType()`, `PetscBuildTwoSidedType` 53 @*/ 54 PetscErrorCode PetscCommBuildTwoSidedGetType(MPI_Comm comm, PetscBuildTwoSidedType *twosided) 55 { 56 PetscMPIInt size; 57 58 PetscFunctionBegin; 59 *twosided = PETSC_BUILDTWOSIDED_NOTSET; 60 if (_twosided_type == PETSC_BUILDTWOSIDED_NOTSET) { 61 PetscCallMPI(MPI_Comm_size(comm, &size)); 62 _twosided_type = PETSC_BUILDTWOSIDED_ALLREDUCE; /* default for small comms, see https://gitlab.com/petsc/petsc/-/merge_requests/2611 */ 63 #if defined(PETSC_HAVE_MPI_NONBLOCKING_COLLECTIVES) 64 if (size > 1024) _twosided_type = PETSC_BUILDTWOSIDED_IBARRIER; 65 #endif 66 PetscCall(PetscOptionsGetEnum(NULL, NULL, "-build_twosided", PetscBuildTwoSidedTypes, (PetscEnum *)&_twosided_type, NULL)); 67 } 68 *twosided = _twosided_type; 69 PetscFunctionReturn(PETSC_SUCCESS); 70 } 71 72 #if defined(PETSC_HAVE_MPI_NONBLOCKING_COLLECTIVES) 73 static PetscErrorCode PetscCommBuildTwoSided_Ibarrier(MPI_Comm comm, PetscMPIInt count, MPI_Datatype dtype, PetscMPIInt nto, const PetscMPIInt *toranks, const void *todata, PetscMPIInt *nfrom, PetscMPIInt **fromranks, void *fromdata) 74 { 75 PetscMPIInt nrecvs, tag, done, i; 76 MPI_Aint lb, unitbytes; 77 char *tdata; 78 MPI_Request *sendreqs, barrier; 79 PetscSegBuffer segrank, segdata; 80 PetscBool barrier_started; 81 82 PetscFunctionBegin; 83 PetscCall(PetscCommDuplicate(comm, &comm, &tag)); 84 PetscCallMPI(MPI_Type_get_extent(dtype, &lb, &unitbytes)); 85 PetscCheck(lb == 0, comm, PETSC_ERR_SUP, "Datatype with nonzero lower bound %ld", (long)lb); 86 tdata = (char *)todata; 87 PetscCall(PetscMalloc1(nto, &sendreqs)); 88 for (i = 0; i < nto; i++) PetscCallMPI(MPI_Issend((void *)(tdata + count * unitbytes * i), count, dtype, toranks[i], tag, comm, sendreqs + i)); 89 PetscCall(PetscSegBufferCreate(sizeof(PetscMPIInt), 4, &segrank)); 90 PetscCall(PetscSegBufferCreate(unitbytes, 4 * count, &segdata)); 91 92 nrecvs = 0; 93 barrier = MPI_REQUEST_NULL; 94 /* MPICH-3.2 sometimes does not create a request in some "optimized" cases. This is arguably a standard violation, 95 * but we need to work around it. */ 96 barrier_started = PETSC_FALSE; 97 for (done = 0; !done;) { 98 PetscMPIInt flag; 99 MPI_Status status; 100 PetscCallMPI(MPI_Iprobe(MPI_ANY_SOURCE, tag, comm, &flag, &status)); 101 if (flag) { /* incoming message */ 102 PetscMPIInt *recvrank; 103 void *buf; 104 PetscCall(PetscSegBufferGet(segrank, 1, &recvrank)); 105 PetscCall(PetscSegBufferGet(segdata, count, &buf)); 106 *recvrank = status.MPI_SOURCE; 107 PetscCallMPI(MPI_Recv(buf, count, dtype, status.MPI_SOURCE, tag, comm, MPI_STATUS_IGNORE)); 108 nrecvs++; 109 } 110 if (!barrier_started) { 111 PetscMPIInt sent, nsends; 112 PetscCall(PetscMPIIntCast(nto, &nsends)); 113 PetscCallMPI(MPI_Testall(nsends, sendreqs, &sent, MPI_STATUSES_IGNORE)); 114 if (sent) { 115 PetscCallMPI(MPI_Ibarrier(comm, &barrier)); 116 barrier_started = PETSC_TRUE; 117 PetscCall(PetscFree(sendreqs)); 118 } 119 } else { 120 PetscCallMPI(MPI_Test(&barrier, &done, MPI_STATUS_IGNORE)); 121 } 122 } 123 *nfrom = nrecvs; 124 PetscCall(PetscSegBufferExtractAlloc(segrank, fromranks)); 125 PetscCall(PetscSegBufferDestroy(&segrank)); 126 PetscCall(PetscSegBufferExtractAlloc(segdata, fromdata)); 127 PetscCall(PetscSegBufferDestroy(&segdata)); 128 PetscCall(PetscCommDestroy(&comm)); 129 PetscFunctionReturn(PETSC_SUCCESS); 130 } 131 #endif 132 133 static PetscErrorCode PetscCommBuildTwoSided_Allreduce(MPI_Comm comm, PetscMPIInt count, MPI_Datatype dtype, PetscMPIInt nto, const PetscMPIInt *toranks, const void *todata, PetscMPIInt *nfrom, PetscMPIInt **fromranks, void *fromdata) 134 { 135 PetscMPIInt size, rank, *iflags, nrecvs, tag, *franks, i, flg; 136 MPI_Aint lb, unitbytes; 137 char *tdata, *fdata; 138 MPI_Request *reqs, *sendreqs; 139 MPI_Status *statuses; 140 PetscCommCounter *counter; 141 142 PetscFunctionBegin; 143 PetscCallMPI(MPI_Comm_size(comm, &size)); 144 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 145 PetscCall(PetscCommDuplicate(comm, &comm, &tag)); 146 PetscCallMPI(MPI_Comm_get_attr(comm, Petsc_Counter_keyval, &counter, &flg)); 147 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Inner PETSc communicator does not have its tag/name counter attribute set"); 148 if (!counter->iflags) { 149 PetscCall(PetscCalloc1(size, &counter->iflags)); 150 iflags = counter->iflags; 151 } else { 152 iflags = counter->iflags; 153 PetscCall(PetscArrayzero(iflags, size)); 154 } 155 for (i = 0; i < nto; i++) iflags[toranks[i]] = 1; 156 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, iflags, size, MPI_INT, MPI_SUM, comm)); 157 nrecvs = iflags[rank]; 158 PetscCallMPI(MPI_Type_get_extent(dtype, &lb, &unitbytes)); 159 PetscCheck(lb == 0, comm, PETSC_ERR_SUP, "Datatype with nonzero lower bound %ld", (long)lb); 160 PetscCall(PetscMalloc(nrecvs * count * unitbytes, &fdata)); 161 tdata = (char *)todata; 162 PetscCall(PetscMalloc2(nto + nrecvs, &reqs, nto + nrecvs, &statuses)); 163 sendreqs = reqs + nrecvs; 164 for (i = 0; i < nrecvs; i++) PetscCallMPI(MPI_Irecv((void *)(fdata + count * unitbytes * i), count, dtype, MPI_ANY_SOURCE, tag, comm, reqs + i)); 165 for (i = 0; i < nto; i++) PetscCallMPI(MPI_Isend((void *)(tdata + count * unitbytes * i), count, dtype, toranks[i], tag, comm, sendreqs + i)); 166 PetscCallMPI(MPI_Waitall(nto + nrecvs, reqs, statuses)); 167 PetscCall(PetscMalloc1(nrecvs, &franks)); 168 for (i = 0; i < nrecvs; i++) franks[i] = statuses[i].MPI_SOURCE; 169 PetscCall(PetscFree2(reqs, statuses)); 170 PetscCall(PetscCommDestroy(&comm)); 171 172 *nfrom = nrecvs; 173 *fromranks = franks; 174 *(void **)fromdata = fdata; 175 PetscFunctionReturn(PETSC_SUCCESS); 176 } 177 178 #if defined(PETSC_HAVE_MPI_REDUCE_SCATTER_BLOCK) 179 static PetscErrorCode PetscCommBuildTwoSided_RedScatter(MPI_Comm comm, PetscMPIInt count, MPI_Datatype dtype, PetscMPIInt nto, const PetscMPIInt *toranks, const void *todata, PetscMPIInt *nfrom, PetscMPIInt **fromranks, void *fromdata) 180 { 181 PetscMPIInt size, *iflags, nrecvs, tag, *franks, i, flg; 182 MPI_Aint lb, unitbytes; 183 char *tdata, *fdata; 184 MPI_Request *reqs, *sendreqs; 185 MPI_Status *statuses; 186 PetscCommCounter *counter; 187 188 PetscFunctionBegin; 189 PetscCallMPI(MPI_Comm_size(comm, &size)); 190 PetscCall(PetscCommDuplicate(comm, &comm, &tag)); 191 PetscCallMPI(MPI_Comm_get_attr(comm, Petsc_Counter_keyval, &counter, &flg)); 192 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Inner PETSc communicator does not have its tag/name counter attribute set"); 193 if (!counter->iflags) { 194 PetscCall(PetscCalloc1(size, &counter->iflags)); 195 iflags = counter->iflags; 196 } else { 197 iflags = counter->iflags; 198 PetscCall(PetscArrayzero(iflags, size)); 199 } 200 for (i = 0; i < nto; i++) iflags[toranks[i]] = 1; 201 PetscCallMPI(MPI_Reduce_scatter_block(iflags, &nrecvs, 1, MPI_INT, MPI_SUM, comm)); 202 PetscCallMPI(MPI_Type_get_extent(dtype, &lb, &unitbytes)); 203 PetscCheck(lb == 0, comm, PETSC_ERR_SUP, "Datatype with nonzero lower bound %ld", (long)lb); 204 PetscCall(PetscMalloc(nrecvs * count * unitbytes, &fdata)); 205 tdata = (char *)todata; 206 PetscCall(PetscMalloc2(nto + nrecvs, &reqs, nto + nrecvs, &statuses)); 207 sendreqs = reqs + nrecvs; 208 for (i = 0; i < nrecvs; i++) PetscCallMPI(MPI_Irecv((void *)(fdata + count * unitbytes * i), count, dtype, MPI_ANY_SOURCE, tag, comm, reqs + i)); 209 for (i = 0; i < nto; i++) PetscCallMPI(MPI_Isend((void *)(tdata + count * unitbytes * i), count, dtype, toranks[i], tag, comm, sendreqs + i)); 210 PetscCallMPI(MPI_Waitall(nto + nrecvs, reqs, statuses)); 211 PetscCall(PetscMalloc1(nrecvs, &franks)); 212 for (i = 0; i < nrecvs; i++) franks[i] = statuses[i].MPI_SOURCE; 213 PetscCall(PetscFree2(reqs, statuses)); 214 PetscCall(PetscCommDestroy(&comm)); 215 216 *nfrom = nrecvs; 217 *fromranks = franks; 218 *(void **)fromdata = fdata; 219 PetscFunctionReturn(PETSC_SUCCESS); 220 } 221 #endif 222 223 /*@C 224 PetscCommBuildTwoSided - discovers communicating ranks given one-sided information, moving constant-sized data in the process (often message lengths) 225 226 Collective 227 228 Input Parameters: 229 + comm - communicator 230 . count - number of entries to send/receive (must match on all ranks) 231 . dtype - datatype to send/receive from each rank (must match on all ranks) 232 . nto - number of ranks to send data to 233 . toranks - ranks to send to (array of length nto) 234 - todata - data to send to each rank (packed) 235 236 Output Parameters: 237 + nfrom - number of ranks receiving messages from 238 . fromranks - ranks receiving messages from (length `nfrom`, caller should `PetscFree()`) 239 - fromdata - packed data from each rank, each with count entries of type dtype (length nfrom, caller responsible for `PetscFree()`) 240 241 Options Database Key: 242 . -build_twosided <allreduce|ibarrier|redscatter> - algorithm to set up two-sided communication. Default is allreduce for communicators with <= 1024 ranks, 243 otherwise ibarrier. 244 245 Level: developer 246 247 Notes: 248 This memory-scalable interface is an alternative to calling `PetscGatherNumberOfMessages()` and 249 `PetscGatherMessageLengths()`, possibly with a subsequent round of communication to send other constant-size data. 250 251 Basic data types as well as contiguous types are supported, but non-contiguous (e.g., strided) types are not. 252 253 References: 254 . * - Hoefler, Siebert and Lumsdaine, The MPI_Ibarrier implementation uses the algorithm in 255 Scalable communication protocols for dynamic sparse data exchange, 2010. 256 257 .seealso: `PetscGatherNumberOfMessages()`, `PetscGatherMessageLengths()`, `PetscCommBuildTwoSidedSetType()`, `PetscCommBuildTwoSidedType` 258 @*/ 259 PetscErrorCode PetscCommBuildTwoSided(MPI_Comm comm, PetscMPIInt count, MPI_Datatype dtype, PetscMPIInt nto, const PetscMPIInt *toranks, const void *todata, PetscMPIInt *nfrom, PetscMPIInt **fromranks, void *fromdata) 260 { 261 PetscBuildTwoSidedType buildtype = PETSC_BUILDTWOSIDED_NOTSET; 262 263 PetscFunctionBegin; 264 PetscCall(PetscSysInitializePackage()); 265 PetscCall(PetscLogEventSync(PETSC_BuildTwoSided, comm)); 266 PetscCall(PetscLogEventBegin(PETSC_BuildTwoSided, 0, 0, 0, 0)); 267 PetscCall(PetscCommBuildTwoSidedGetType(comm, &buildtype)); 268 switch (buildtype) { 269 case PETSC_BUILDTWOSIDED_IBARRIER: 270 #if defined(PETSC_HAVE_MPI_NONBLOCKING_COLLECTIVES) 271 PetscCall(PetscCommBuildTwoSided_Ibarrier(comm, count, dtype, nto, toranks, todata, nfrom, fromranks, fromdata)); 272 break; 273 #else 274 SETERRQ(comm, PETSC_ERR_PLIB, "MPI implementation does not provide MPI_Ibarrier (part of MPI-3)"); 275 #endif 276 case PETSC_BUILDTWOSIDED_ALLREDUCE: 277 PetscCall(PetscCommBuildTwoSided_Allreduce(comm, count, dtype, nto, toranks, todata, nfrom, fromranks, fromdata)); 278 break; 279 case PETSC_BUILDTWOSIDED_REDSCATTER: 280 #if defined(PETSC_HAVE_MPI_REDUCE_SCATTER_BLOCK) 281 PetscCall(PetscCommBuildTwoSided_RedScatter(comm, count, dtype, nto, toranks, todata, nfrom, fromranks, fromdata)); 282 break; 283 #else 284 SETERRQ(comm, PETSC_ERR_PLIB, "MPI implementation does not provide MPI_Reduce_scatter_block (part of MPI-2.2)"); 285 #endif 286 default: 287 SETERRQ(comm, PETSC_ERR_PLIB, "Unknown method for building two-sided communication"); 288 } 289 PetscCall(PetscLogEventEnd(PETSC_BuildTwoSided, 0, 0, 0, 0)); 290 PetscFunctionReturn(PETSC_SUCCESS); 291 } 292 293 static PetscErrorCode PetscCommBuildTwoSidedFReq_Reference(MPI_Comm comm, PetscMPIInt count, MPI_Datatype dtype, PetscMPIInt nto, const PetscMPIInt *toranks, const void *todata, PetscMPIInt *nfrom, PetscMPIInt **fromranks, void *fromdata, PetscMPIInt ntags, MPI_Request **toreqs, MPI_Request **fromreqs, PetscErrorCode (*send)(MPI_Comm, const PetscMPIInt[], PetscMPIInt, PetscMPIInt, void *, MPI_Request[], void *), PetscErrorCode (*recv)(MPI_Comm, const PetscMPIInt[], PetscMPIInt, void *, MPI_Request[], void *), void *ctx) 294 { 295 PetscMPIInt i, *tag; 296 MPI_Aint lb, unitbytes; 297 MPI_Request *sendreq, *recvreq; 298 299 PetscFunctionBegin; 300 PetscCall(PetscMalloc1(ntags, &tag)); 301 if (ntags > 0) PetscCall(PetscCommDuplicate(comm, &comm, &tag[0])); 302 for (i = 1; i < ntags; i++) PetscCall(PetscCommGetNewTag(comm, &tag[i])); 303 304 /* Perform complete initial rendezvous */ 305 PetscCall(PetscCommBuildTwoSided(comm, count, dtype, nto, toranks, todata, nfrom, fromranks, fromdata)); 306 307 PetscCall(PetscMalloc1(nto * ntags, &sendreq)); 308 PetscCall(PetscMalloc1(*nfrom * ntags, &recvreq)); 309 310 PetscCallMPI(MPI_Type_get_extent(dtype, &lb, &unitbytes)); 311 PetscCheck(lb == 0, comm, PETSC_ERR_SUP, "Datatype with nonzero lower bound %ld", (long)lb); 312 for (i = 0; i < nto; i++) { 313 PetscMPIInt k; 314 for (k = 0; k < ntags; k++) sendreq[i * ntags + k] = MPI_REQUEST_NULL; 315 PetscCall((*send)(comm, tag, i, toranks[i], ((char *)todata) + count * unitbytes * i, sendreq + i * ntags, ctx)); 316 } 317 for (i = 0; i < *nfrom; i++) { 318 void *header = (*(char **)fromdata) + count * unitbytes * i; 319 PetscMPIInt k; 320 for (k = 0; k < ntags; k++) recvreq[i * ntags + k] = MPI_REQUEST_NULL; 321 PetscCall((*recv)(comm, tag, (*fromranks)[i], header, recvreq + i * ntags, ctx)); 322 } 323 PetscCall(PetscFree(tag)); 324 PetscCall(PetscCommDestroy(&comm)); 325 *toreqs = sendreq; 326 *fromreqs = recvreq; 327 PetscFunctionReturn(PETSC_SUCCESS); 328 } 329 330 #if defined(PETSC_HAVE_MPI_NONBLOCKING_COLLECTIVES) 331 332 static PetscErrorCode PetscCommBuildTwoSidedFReq_Ibarrier(MPI_Comm comm, PetscMPIInt count, MPI_Datatype dtype, PetscMPIInt nto, const PetscMPIInt *toranks, const void *todata, PetscMPIInt *nfrom, PetscMPIInt **fromranks, void *fromdata, PetscMPIInt ntags, MPI_Request **toreqs, MPI_Request **fromreqs, PetscErrorCode (*send)(MPI_Comm, const PetscMPIInt[], PetscMPIInt, PetscMPIInt, void *, MPI_Request[], void *), PetscErrorCode (*recv)(MPI_Comm, const PetscMPIInt[], PetscMPIInt, void *, MPI_Request[], void *), void *ctx) 333 { 334 PetscMPIInt nrecvs, tag, *tags, done, i; 335 MPI_Aint lb, unitbytes; 336 char *tdata; 337 MPI_Request *sendreqs, *usendreqs, *req, barrier; 338 PetscSegBuffer segrank, segdata, segreq; 339 PetscBool barrier_started; 340 341 PetscFunctionBegin; 342 PetscCall(PetscCommDuplicate(comm, &comm, &tag)); 343 PetscCall(PetscMalloc1(ntags, &tags)); 344 for (i = 0; i < ntags; i++) PetscCall(PetscCommGetNewTag(comm, &tags[i])); 345 PetscCallMPI(MPI_Type_get_extent(dtype, &lb, &unitbytes)); 346 PetscCheck(lb == 0, comm, PETSC_ERR_SUP, "Datatype with nonzero lower bound %ld", (long)lb); 347 tdata = (char *)todata; 348 PetscCall(PetscMalloc1(nto, &sendreqs)); 349 PetscCall(PetscMalloc1(nto * ntags, &usendreqs)); 350 /* Post synchronous sends */ 351 for (i = 0; i < nto; i++) PetscCallMPI(MPI_Issend((void *)(tdata + count * unitbytes * i), count, dtype, toranks[i], tag, comm, sendreqs + i)); 352 /* Post actual payloads. These are typically larger messages. Hopefully sending these later does not slow down the 353 * synchronous messages above. */ 354 for (i = 0; i < nto; i++) { 355 PetscMPIInt k; 356 for (k = 0; k < ntags; k++) usendreqs[i * ntags + k] = MPI_REQUEST_NULL; 357 PetscCall((*send)(comm, tags, i, toranks[i], tdata + count * unitbytes * i, usendreqs + i * ntags, ctx)); 358 } 359 360 PetscCall(PetscSegBufferCreate(sizeof(PetscMPIInt), 4, &segrank)); 361 PetscCall(PetscSegBufferCreate(unitbytes, 4 * count, &segdata)); 362 PetscCall(PetscSegBufferCreate(sizeof(MPI_Request), 4, &segreq)); 363 364 nrecvs = 0; 365 barrier = MPI_REQUEST_NULL; 366 /* MPICH-3.2 sometimes does not create a request in some "optimized" cases. This is arguably a standard violation, 367 * but we need to work around it. */ 368 barrier_started = PETSC_FALSE; 369 for (done = 0; !done;) { 370 PetscMPIInt flag; 371 MPI_Status status; 372 PetscCallMPI(MPI_Iprobe(MPI_ANY_SOURCE, tag, comm, &flag, &status)); 373 if (flag) { /* incoming message */ 374 PetscMPIInt *recvrank, k; 375 void *buf; 376 PetscCall(PetscSegBufferGet(segrank, 1, &recvrank)); 377 PetscCall(PetscSegBufferGet(segdata, count, &buf)); 378 *recvrank = status.MPI_SOURCE; 379 PetscCallMPI(MPI_Recv(buf, count, dtype, status.MPI_SOURCE, tag, comm, MPI_STATUS_IGNORE)); 380 PetscCall(PetscSegBufferGet(segreq, ntags, &req)); 381 for (k = 0; k < ntags; k++) req[k] = MPI_REQUEST_NULL; 382 PetscCall((*recv)(comm, tags, status.MPI_SOURCE, buf, req, ctx)); 383 nrecvs++; 384 } 385 if (!barrier_started) { 386 PetscMPIInt sent, nsends; 387 PetscCall(PetscMPIIntCast(nto, &nsends)); 388 PetscCallMPI(MPI_Testall(nsends, sendreqs, &sent, MPI_STATUSES_IGNORE)); 389 if (sent) { 390 PetscCallMPI(MPI_Ibarrier(comm, &barrier)); 391 barrier_started = PETSC_TRUE; 392 } 393 } else { 394 PetscCallMPI(MPI_Test(&barrier, &done, MPI_STATUS_IGNORE)); 395 } 396 } 397 *nfrom = nrecvs; 398 PetscCall(PetscSegBufferExtractAlloc(segrank, fromranks)); 399 PetscCall(PetscSegBufferDestroy(&segrank)); 400 PetscCall(PetscSegBufferExtractAlloc(segdata, fromdata)); 401 PetscCall(PetscSegBufferDestroy(&segdata)); 402 *toreqs = usendreqs; 403 PetscCall(PetscSegBufferExtractAlloc(segreq, fromreqs)); 404 PetscCall(PetscSegBufferDestroy(&segreq)); 405 PetscCall(PetscFree(sendreqs)); 406 PetscCall(PetscFree(tags)); 407 PetscCall(PetscCommDestroy(&comm)); 408 PetscFunctionReturn(PETSC_SUCCESS); 409 } 410 #endif 411 412 /*@C 413 PetscCommBuildTwoSidedF - discovers communicating ranks given one-sided information, calling user-defined functions during rendezvous 414 415 Collective 416 417 Input Parameters: 418 + comm - communicator 419 . count - number of entries to send/receive in initial rendezvous (must match on all ranks) 420 . dtype - datatype to send/receive from each rank (must match on all ranks) 421 . nto - number of ranks to send data to 422 . toranks - ranks to send to (array of length nto) 423 . todata - data to send to each rank (packed) 424 . ntags - number of tags needed by send/recv callbacks 425 . send - callback invoked on sending process when ready to send primary payload 426 . recv - callback invoked on receiving process after delivery of rendezvous message 427 - ctx - context for callbacks 428 429 Output Parameters: 430 + nfrom - number of ranks receiving messages from 431 . fromranks - ranks receiving messages from (length nfrom; caller should `PetscFree()`) 432 - fromdata - packed data from each rank, each with count entries of type dtype (length nfrom, caller responsible for `PetscFree()`) 433 434 Level: developer 435 436 Notes: 437 This memory-scalable interface is an alternative to calling `PetscGatherNumberOfMessages()` and 438 `PetscGatherMessageLengths()`, possibly with a subsequent round of communication to send other data. 439 440 Basic data types as well as contiguous types are supported, but non-contiguous (e.g., strided) types are not. 441 442 References: 443 . * - Hoefler, Siebert and Lumsdaine, The MPI_Ibarrier implementation uses the algorithm in 444 Scalable communication protocols for dynamic sparse data exchange, 2010. 445 446 .seealso: `PetscCommBuildTwoSided()`, `PetscCommBuildTwoSidedFReq()`, `PetscGatherNumberOfMessages()`, `PetscGatherMessageLengths()` 447 @*/ 448 PetscErrorCode PetscCommBuildTwoSidedF(MPI_Comm comm, PetscMPIInt count, MPI_Datatype dtype, PetscMPIInt nto, const PetscMPIInt *toranks, const void *todata, PetscMPIInt *nfrom, PetscMPIInt **fromranks, void *fromdata, PetscMPIInt ntags, PetscErrorCode (*send)(MPI_Comm, const PetscMPIInt[], PetscMPIInt, PetscMPIInt, void *, MPI_Request[], void *), PetscErrorCode (*recv)(MPI_Comm, const PetscMPIInt[], PetscMPIInt, void *, MPI_Request[], void *), void *ctx) 449 { 450 MPI_Request *toreqs, *fromreqs; 451 452 PetscFunctionBegin; 453 PetscCall(PetscCommBuildTwoSidedFReq(comm, count, dtype, nto, toranks, todata, nfrom, fromranks, fromdata, ntags, &toreqs, &fromreqs, send, recv, ctx)); 454 PetscCallMPI(MPI_Waitall(nto * ntags, toreqs, MPI_STATUSES_IGNORE)); 455 PetscCallMPI(MPI_Waitall(*nfrom * ntags, fromreqs, MPI_STATUSES_IGNORE)); 456 PetscCall(PetscFree(toreqs)); 457 PetscCall(PetscFree(fromreqs)); 458 PetscFunctionReturn(PETSC_SUCCESS); 459 } 460 461 /*@C 462 PetscCommBuildTwoSidedFReq - discovers communicating ranks given one-sided information, calling user-defined functions during rendezvous, returns requests 463 464 Collective 465 466 Input Parameters: 467 + comm - communicator 468 . count - number of entries to send/receive in initial rendezvous (must match on all ranks) 469 . dtype - datatype to send/receive from each rank (must match on all ranks) 470 . nto - number of ranks to send data to 471 . toranks - ranks to send to (array of length nto) 472 . todata - data to send to each rank (packed) 473 . ntags - number of tags needed by send/recv callbacks 474 . send - callback invoked on sending process when ready to send primary payload 475 . recv - callback invoked on receiving process after delivery of rendezvous message 476 - ctx - context for callbacks 477 478 Output Parameters: 479 + nfrom - number of ranks receiving messages from 480 . fromranks - ranks receiving messages from (length nfrom; caller should `PetscFree()`) 481 . fromdata - packed data from each rank, each with count entries of type dtype (length nfrom, caller responsible for `PetscFree()`) 482 . toreqs - array of nto*ntags sender requests (caller must wait on these, then `PetscFree()`) 483 - fromreqs - array of nfrom*ntags receiver requests (caller must wait on these, then `PetscFree()`) 484 485 Level: developer 486 487 Notes: 488 This memory-scalable interface is an alternative to calling `PetscGatherNumberOfMessages()` and 489 `PetscGatherMessageLengths()`, possibly with a subsequent round of communication to send other data. 490 491 Basic data types as well as contiguous types are supported, but non-contiguous (e.g., strided) types are not. 492 493 References: 494 . * - Hoefler, Siebert and Lumsdaine, The MPI_Ibarrier implementation uses the algorithm in 495 Scalable communication protocols for dynamic sparse data exchange, 2010. 496 497 .seealso: `PetscCommBuildTwoSided()`, `PetscCommBuildTwoSidedF()`, `PetscGatherNumberOfMessages()`, `PetscGatherMessageLengths()` 498 @*/ 499 PetscErrorCode PetscCommBuildTwoSidedFReq(MPI_Comm comm, PetscMPIInt count, MPI_Datatype dtype, PetscMPIInt nto, const PetscMPIInt *toranks, const void *todata, PetscMPIInt *nfrom, PetscMPIInt **fromranks, void *fromdata, PetscMPIInt ntags, MPI_Request **toreqs, MPI_Request **fromreqs, PetscErrorCode (*send)(MPI_Comm, const PetscMPIInt[], PetscMPIInt, PetscMPIInt, void *, MPI_Request[], void *), PetscErrorCode (*recv)(MPI_Comm, const PetscMPIInt[], PetscMPIInt, void *, MPI_Request[], void *), void *ctx) 500 { 501 PetscErrorCode (*f)(MPI_Comm, PetscMPIInt, MPI_Datatype, PetscMPIInt, const PetscMPIInt[], const void *, PetscMPIInt *, PetscMPIInt **, void *, PetscMPIInt, MPI_Request **, MPI_Request **, PetscErrorCode (*send)(MPI_Comm, const PetscMPIInt[], PetscMPIInt, PetscMPIInt, void *, MPI_Request[], void *), PetscErrorCode (*recv)(MPI_Comm, const PetscMPIInt[], PetscMPIInt, void *, MPI_Request[], void *), void *ctx); 502 PetscBuildTwoSidedType buildtype = PETSC_BUILDTWOSIDED_NOTSET; 503 PetscMPIInt i, size; 504 505 PetscFunctionBegin; 506 PetscCall(PetscSysInitializePackage()); 507 PetscCallMPI(MPI_Comm_size(comm, &size)); 508 for (i = 0; i < nto; i++) PetscCheck(toranks[i] >= 0 && size > toranks[i], comm, PETSC_ERR_ARG_OUTOFRANGE, "toranks[%d] %d not in comm size %d", i, toranks[i], size); 509 PetscCall(PetscLogEventSync(PETSC_BuildTwoSidedF, comm)); 510 PetscCall(PetscLogEventBegin(PETSC_BuildTwoSidedF, 0, 0, 0, 0)); 511 PetscCall(PetscCommBuildTwoSidedGetType(comm, &buildtype)); 512 switch (buildtype) { 513 case PETSC_BUILDTWOSIDED_IBARRIER: 514 #if defined(PETSC_HAVE_MPI_NONBLOCKING_COLLECTIVES) 515 f = PetscCommBuildTwoSidedFReq_Ibarrier; 516 break; 517 #else 518 SETERRQ(comm, PETSC_ERR_PLIB, "MPI implementation does not provide MPI_Ibarrier (part of MPI-3)"); 519 #endif 520 case PETSC_BUILDTWOSIDED_ALLREDUCE: 521 case PETSC_BUILDTWOSIDED_REDSCATTER: 522 f = PetscCommBuildTwoSidedFReq_Reference; 523 break; 524 default: 525 SETERRQ(comm, PETSC_ERR_PLIB, "Unknown method for building two-sided communication"); 526 } 527 PetscCall((*f)(comm, count, dtype, nto, toranks, todata, nfrom, fromranks, fromdata, ntags, toreqs, fromreqs, send, recv, ctx)); 528 PetscCall(PetscLogEventEnd(PETSC_BuildTwoSidedF, 0, 0, 0, 0)); 529 PetscFunctionReturn(PETSC_SUCCESS); 530 } 531