1 #include <petscsys.h> /*I "petscsys.h" I*/ 2 #include <petsc/private/petscimpl.h> 3 4 PetscLogEvent PETSC_BuildTwoSided; 5 PetscLogEvent PETSC_BuildTwoSidedF; 6 7 const char *const PetscBuildTwoSidedTypes[] = {"ALLREDUCE", "IBARRIER", "REDSCATTER", "PetscBuildTwoSidedType", "PETSC_BUILDTWOSIDED_", NULL}; 8 9 static PetscBuildTwoSidedType _twosided_type = PETSC_BUILDTWOSIDED_NOTSET; 10 11 /*@ 12 PetscCommBuildTwoSidedSetType - set algorithm to use when building two-sided communication 13 14 Logically Collective 15 16 Input Parameters: 17 + comm - `PETSC_COMM_WORLD` 18 - twosided - algorithm to use in subsequent calls to `PetscCommBuildTwoSided()` 19 20 Level: developer 21 22 Note: 23 This option is currently global, but could be made per-communicator. 24 25 .seealso: `PetscCommBuildTwoSided()`, `PetscCommBuildTwoSidedGetType()`, `PetscBuildTwoSidedType` 26 @*/ 27 PetscErrorCode PetscCommBuildTwoSidedSetType(MPI_Comm comm, PetscBuildTwoSidedType twosided) 28 { 29 PetscFunctionBegin; 30 if (PetscDefined(USE_DEBUG)) { /* We don't have a PetscObject so can't use PetscValidLogicalCollectiveEnum */ 31 PetscMPIInt b1[2], b2[2]; 32 b1[0] = -(PetscMPIInt)twosided; 33 b1[1] = (PetscMPIInt)twosided; 34 PetscCallMPI(MPIU_Allreduce(b1, b2, 2, MPI_INT, MPI_MAX, comm)); 35 PetscCheck(-b2[0] == b2[1], comm, PETSC_ERR_ARG_WRONG, "Enum value must be same on all processes"); 36 } 37 _twosided_type = twosided; 38 PetscFunctionReturn(PETSC_SUCCESS); 39 } 40 41 /*@ 42 PetscCommBuildTwoSidedGetType - get algorithm used when building two-sided communication 43 44 Logically Collective 45 46 Output Parameters: 47 + comm - communicator on which to query algorithm 48 - twosided - algorithm to use for `PetscCommBuildTwoSided()` 49 50 Level: developer 51 52 .seealso: `PetscCommBuildTwoSided()`, `PetscCommBuildTwoSidedSetType()`, `PetscBuildTwoSidedType` 53 @*/ 54 PetscErrorCode PetscCommBuildTwoSidedGetType(MPI_Comm comm, PetscBuildTwoSidedType *twosided) 55 { 56 PetscMPIInt size; 57 58 PetscFunctionBegin; 59 *twosided = PETSC_BUILDTWOSIDED_NOTSET; 60 if (_twosided_type == PETSC_BUILDTWOSIDED_NOTSET) { 61 PetscCallMPI(MPI_Comm_size(comm, &size)); 62 _twosided_type = PETSC_BUILDTWOSIDED_ALLREDUCE; /* default for small comms, see https://gitlab.com/petsc/petsc/-/merge_requests/2611 */ 63 #if defined(PETSC_HAVE_MPI_NONBLOCKING_COLLECTIVES) 64 if (size > 1024) _twosided_type = PETSC_BUILDTWOSIDED_IBARRIER; 65 #endif 66 PetscCall(PetscOptionsGetEnum(NULL, NULL, "-build_twosided", PetscBuildTwoSidedTypes, (PetscEnum *)&_twosided_type, NULL)); 67 } 68 *twosided = _twosided_type; 69 PetscFunctionReturn(PETSC_SUCCESS); 70 } 71 72 #if defined(PETSC_HAVE_MPI_NONBLOCKING_COLLECTIVES) 73 static PetscErrorCode PetscCommBuildTwoSided_Ibarrier(MPI_Comm comm, PetscMPIInt count, MPI_Datatype dtype, PetscMPIInt nto, const PetscMPIInt *toranks, const void *todata, PetscMPIInt *nfrom, PetscMPIInt **fromranks, void *fromdata) 74 { 75 PetscMPIInt nrecvs, tag, done, i; 76 MPI_Aint lb, unitbytes; 77 char *tdata; 78 MPI_Request *sendreqs, barrier; 79 PetscSegBuffer segrank, segdata; 80 PetscBool barrier_started; 81 82 PetscFunctionBegin; 83 PetscCall(PetscCommDuplicate(comm, &comm, &tag)); 84 PetscCallMPI(MPI_Type_get_extent(dtype, &lb, &unitbytes)); 85 PetscCheck(lb == 0, comm, PETSC_ERR_SUP, "Datatype with nonzero lower bound %ld", (long)lb); 86 tdata = (char *)todata; 87 PetscCall(PetscMalloc1(nto, &sendreqs)); 88 for (i = 0; i < nto; i++) PetscCallMPI(MPI_Issend((void *)(tdata + count * unitbytes * i), count, dtype, toranks[i], tag, comm, sendreqs + i)); 89 PetscCall(PetscSegBufferCreate(sizeof(PetscMPIInt), 4, &segrank)); 90 PetscCall(PetscSegBufferCreate(unitbytes, 4 * count, &segdata)); 91 92 nrecvs = 0; 93 barrier = MPI_REQUEST_NULL; 94 /* MPICH-3.2 sometimes does not create a request in some "optimized" cases. This is arguably a standard violation, 95 * but we need to work around it. */ 96 barrier_started = PETSC_FALSE; 97 for (done = 0; !done;) { 98 PetscMPIInt flag; 99 MPI_Status status; 100 PetscCallMPI(MPI_Iprobe(MPI_ANY_SOURCE, tag, comm, &flag, &status)); 101 if (flag) { /* incoming message */ 102 PetscMPIInt *recvrank; 103 void *buf; 104 PetscCall(PetscSegBufferGet(segrank, 1, &recvrank)); 105 PetscCall(PetscSegBufferGet(segdata, count, &buf)); 106 *recvrank = status.MPI_SOURCE; 107 PetscCallMPI(MPI_Recv(buf, count, dtype, status.MPI_SOURCE, tag, comm, MPI_STATUS_IGNORE)); 108 nrecvs++; 109 } 110 if (!barrier_started) { 111 PetscMPIInt sent, nsends; 112 PetscCall(PetscMPIIntCast(nto, &nsends)); 113 PetscCallMPI(MPI_Testall(nsends, sendreqs, &sent, MPI_STATUSES_IGNORE)); 114 if (sent) { 115 PetscCallMPI(MPI_Ibarrier(comm, &barrier)); 116 barrier_started = PETSC_TRUE; 117 PetscCall(PetscFree(sendreqs)); 118 } 119 } else { 120 PetscCallMPI(MPI_Test(&barrier, &done, MPI_STATUS_IGNORE)); 121 } 122 } 123 *nfrom = nrecvs; 124 PetscCall(PetscSegBufferExtractAlloc(segrank, fromranks)); 125 PetscCall(PetscSegBufferDestroy(&segrank)); 126 PetscCall(PetscSegBufferExtractAlloc(segdata, fromdata)); 127 PetscCall(PetscSegBufferDestroy(&segdata)); 128 PetscCall(PetscCommDestroy(&comm)); 129 PetscFunctionReturn(PETSC_SUCCESS); 130 } 131 #endif 132 133 static PetscErrorCode PetscCommBuildTwoSided_Allreduce(MPI_Comm comm, PetscMPIInt count, MPI_Datatype dtype, PetscMPIInt nto, const PetscMPIInt *toranks, const void *todata, PetscMPIInt *nfrom, PetscMPIInt **fromranks, void *fromdata) 134 { 135 PetscMPIInt size, rank, *iflags, nrecvs, tag, *franks, i, flg; 136 MPI_Aint lb, unitbytes; 137 char *tdata, *fdata; 138 MPI_Request *reqs, *sendreqs; 139 MPI_Status *statuses; 140 PetscCommCounter *counter; 141 142 PetscFunctionBegin; 143 PetscCallMPI(MPI_Comm_size(comm, &size)); 144 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 145 PetscCall(PetscCommDuplicate(comm, &comm, &tag)); 146 PetscCallMPI(MPI_Comm_get_attr(comm, Petsc_Counter_keyval, &counter, &flg)); 147 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Inner PETSc communicator does not have its tag/name counter attribute set"); 148 if (!counter->iflags) { 149 PetscCall(PetscCalloc1(size, &counter->iflags)); 150 iflags = counter->iflags; 151 } else { 152 iflags = counter->iflags; 153 PetscCall(PetscArrayzero(iflags, size)); 154 } 155 for (i = 0; i < nto; i++) iflags[toranks[i]] = 1; 156 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, iflags, size, MPI_INT, MPI_SUM, comm)); 157 nrecvs = iflags[rank]; 158 PetscCallMPI(MPI_Type_get_extent(dtype, &lb, &unitbytes)); 159 PetscCheck(lb == 0, comm, PETSC_ERR_SUP, "Datatype with nonzero lower bound %ld", (long)lb); 160 PetscCall(PetscMalloc(nrecvs * count * unitbytes, &fdata)); 161 tdata = (char *)todata; 162 PetscCall(PetscMalloc2(nto + nrecvs, &reqs, nto + nrecvs, &statuses)); 163 sendreqs = PetscSafePointerPlusOffset(reqs, nrecvs); 164 for (i = 0; i < nrecvs; i++) PetscCallMPI(MPIU_Irecv((void *)(fdata + count * unitbytes * i), count, dtype, MPI_ANY_SOURCE, tag, comm, reqs + i)); 165 for (i = 0; i < nto; i++) PetscCallMPI(MPIU_Isend((void *)(tdata + count * unitbytes * i), count, dtype, toranks[i], tag, comm, sendreqs + i)); 166 PetscCallMPI(MPI_Waitall(nto + nrecvs, reqs, statuses)); 167 PetscCall(PetscMalloc1(nrecvs, &franks)); 168 for (i = 0; i < nrecvs; i++) franks[i] = statuses[i].MPI_SOURCE; 169 PetscCall(PetscFree2(reqs, statuses)); 170 PetscCall(PetscCommDestroy(&comm)); 171 172 *nfrom = nrecvs; 173 *fromranks = franks; 174 *(void **)fromdata = fdata; 175 PetscFunctionReturn(PETSC_SUCCESS); 176 } 177 178 #if defined(PETSC_HAVE_MPI_REDUCE_SCATTER_BLOCK) 179 static PetscErrorCode PetscCommBuildTwoSided_RedScatter(MPI_Comm comm, PetscMPIInt count, MPI_Datatype dtype, PetscMPIInt nto, const PetscMPIInt *toranks, const void *todata, PetscMPIInt *nfrom, PetscMPIInt **fromranks, void *fromdata) 180 { 181 PetscMPIInt size, *iflags, nrecvs, tag, *franks, i, flg; 182 MPI_Aint lb, unitbytes; 183 char *tdata, *fdata; 184 MPI_Request *reqs, *sendreqs; 185 MPI_Status *statuses; 186 PetscCommCounter *counter; 187 188 PetscFunctionBegin; 189 PetscCallMPI(MPI_Comm_size(comm, &size)); 190 PetscCall(PetscCommDuplicate(comm, &comm, &tag)); 191 PetscCallMPI(MPI_Comm_get_attr(comm, Petsc_Counter_keyval, &counter, &flg)); 192 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Inner PETSc communicator does not have its tag/name counter attribute set"); 193 if (!counter->iflags) { 194 PetscCall(PetscCalloc1(size, &counter->iflags)); 195 iflags = counter->iflags; 196 } else { 197 iflags = counter->iflags; 198 PetscCall(PetscArrayzero(iflags, size)); 199 } 200 for (i = 0; i < nto; i++) iflags[toranks[i]] = 1; 201 PetscCallMPI(MPI_Reduce_scatter_block(iflags, &nrecvs, 1, MPI_INT, MPI_SUM, comm)); 202 PetscCallMPI(MPI_Type_get_extent(dtype, &lb, &unitbytes)); 203 PetscCheck(lb == 0, comm, PETSC_ERR_SUP, "Datatype with nonzero lower bound %ld", (long)lb); 204 PetscCall(PetscMalloc(nrecvs * count * unitbytes, &fdata)); 205 tdata = (char *)todata; 206 PetscCall(PetscMalloc2(nto + nrecvs, &reqs, nto + nrecvs, &statuses)); 207 sendreqs = reqs + nrecvs; 208 for (i = 0; i < nrecvs; i++) PetscCallMPI(MPIU_Irecv((void *)(fdata + count * unitbytes * i), count, dtype, MPI_ANY_SOURCE, tag, comm, reqs + i)); 209 for (i = 0; i < nto; i++) PetscCallMPI(MPIU_Isend((void *)(tdata + count * unitbytes * i), count, dtype, toranks[i], tag, comm, sendreqs + i)); 210 PetscCallMPI(MPI_Waitall(nto + nrecvs, reqs, statuses)); 211 PetscCall(PetscMalloc1(nrecvs, &franks)); 212 for (i = 0; i < nrecvs; i++) franks[i] = statuses[i].MPI_SOURCE; 213 PetscCall(PetscFree2(reqs, statuses)); 214 PetscCall(PetscCommDestroy(&comm)); 215 216 *nfrom = nrecvs; 217 *fromranks = franks; 218 *(void **)fromdata = fdata; 219 PetscFunctionReturn(PETSC_SUCCESS); 220 } 221 #endif 222 223 /*@C 224 PetscCommBuildTwoSided - discovers communicating ranks given one-sided information, moving constant-sized data in the process (often message lengths) 225 226 Collective, No Fortran Support 227 228 Input Parameters: 229 + comm - communicator 230 . count - number of entries to send/receive (must match on all ranks) 231 . dtype - datatype to send/receive from each rank (must match on all ranks) 232 . nto - number of ranks to send data to 233 . toranks - ranks to send to (array of length `nto`) 234 - todata - data to send to each rank (packed) 235 236 Output Parameters: 237 + nfrom - number of ranks receiving messages from 238 . fromranks - ranks receiving messages from (length `nfrom`, caller should `PetscFree()`) 239 - fromdata - packed data from each rank, each with count entries of type dtype (length `nfrom`, caller responsible for `PetscFree()`) 240 241 Options Database Key: 242 . -build_twosided <allreduce|ibarrier|redscatter> - algorithm to set up two-sided communication. Default is allreduce for communicators with <= 1024 ranks, 243 otherwise ibarrier. 244 245 Level: developer 246 247 Notes: 248 This memory-scalable interface is an alternative to calling `PetscGatherNumberOfMessages()` and 249 `PetscGatherMessageLengths()`, possibly with a subsequent round of communication to send other constant-size data, see {cite}`hoeflersiebretlumsdaine10`. 250 251 Basic data types as well as contiguous types are supported, but non-contiguous (e.g., strided) types are not. 252 253 .seealso: `PetscGatherNumberOfMessages()`, `PetscGatherMessageLengths()`, `PetscCommBuildTwoSidedSetType()`, `PetscCommBuildTwoSidedType` 254 @*/ 255 PetscErrorCode PetscCommBuildTwoSided(MPI_Comm comm, PetscMPIInt count, MPI_Datatype dtype, PetscMPIInt nto, const PetscMPIInt toranks[], const void *todata, PetscMPIInt *nfrom, PetscMPIInt *fromranks[], void *fromdata) 256 { 257 PetscBuildTwoSidedType buildtype = PETSC_BUILDTWOSIDED_NOTSET; 258 259 PetscFunctionBegin; 260 PetscCall(PetscSysInitializePackage()); 261 PetscCall(PetscLogEventSync(PETSC_BuildTwoSided, comm)); 262 PetscCall(PetscLogEventBegin(PETSC_BuildTwoSided, 0, 0, 0, 0)); 263 PetscCall(PetscCommBuildTwoSidedGetType(comm, &buildtype)); 264 switch (buildtype) { 265 case PETSC_BUILDTWOSIDED_IBARRIER: 266 #if defined(PETSC_HAVE_MPI_NONBLOCKING_COLLECTIVES) 267 PetscCall(PetscCommBuildTwoSided_Ibarrier(comm, count, dtype, nto, toranks, todata, nfrom, fromranks, fromdata)); 268 break; 269 #else 270 SETERRQ(comm, PETSC_ERR_PLIB, "MPI implementation does not provide MPI_Ibarrier (part of MPI-3)"); 271 #endif 272 case PETSC_BUILDTWOSIDED_ALLREDUCE: 273 PetscCall(PetscCommBuildTwoSided_Allreduce(comm, count, dtype, nto, toranks, todata, nfrom, fromranks, fromdata)); 274 break; 275 case PETSC_BUILDTWOSIDED_REDSCATTER: 276 #if defined(PETSC_HAVE_MPI_REDUCE_SCATTER_BLOCK) 277 PetscCall(PetscCommBuildTwoSided_RedScatter(comm, count, dtype, nto, toranks, todata, nfrom, fromranks, fromdata)); 278 break; 279 #else 280 SETERRQ(comm, PETSC_ERR_PLIB, "MPI implementation does not provide MPI_Reduce_scatter_block (part of MPI-2.2)"); 281 #endif 282 default: 283 SETERRQ(comm, PETSC_ERR_PLIB, "Unknown method for building two-sided communication"); 284 } 285 PetscCall(PetscLogEventEnd(PETSC_BuildTwoSided, 0, 0, 0, 0)); 286 PetscFunctionReturn(PETSC_SUCCESS); 287 } 288 289 static PetscErrorCode PetscCommBuildTwoSidedFReq_Reference(MPI_Comm comm, PetscMPIInt count, MPI_Datatype dtype, PetscMPIInt nto, const PetscMPIInt *toranks, const void *todata, PetscMPIInt *nfrom, PetscMPIInt **fromranks, void *fromdata, PetscMPIInt ntags, MPI_Request **toreqs, MPI_Request **fromreqs, PetscErrorCode (*send)(MPI_Comm, const PetscMPIInt[], PetscMPIInt, PetscMPIInt, void *, MPI_Request[], void *), PetscErrorCode (*recv)(MPI_Comm, const PetscMPIInt[], PetscMPIInt, void *, MPI_Request[], void *), PetscCtx ctx) 290 { 291 PetscMPIInt i, *tag; 292 MPI_Aint lb, unitbytes; 293 MPI_Request *sendreq, *recvreq; 294 295 PetscFunctionBegin; 296 PetscCall(PetscMalloc1(ntags, &tag)); 297 if (ntags > 0) PetscCall(PetscCommDuplicate(comm, &comm, &tag[0])); 298 for (i = 1; i < ntags; i++) PetscCall(PetscCommGetNewTag(comm, &tag[i])); 299 300 /* Perform complete initial rendezvous */ 301 PetscCall(PetscCommBuildTwoSided(comm, count, dtype, nto, toranks, todata, nfrom, fromranks, fromdata)); 302 303 PetscCall(PetscMalloc1(nto * ntags, &sendreq)); 304 PetscCall(PetscMalloc1(*nfrom * ntags, &recvreq)); 305 306 PetscCallMPI(MPI_Type_get_extent(dtype, &lb, &unitbytes)); 307 PetscCheck(lb == 0, comm, PETSC_ERR_SUP, "Datatype with nonzero lower bound %ld", (long)lb); 308 for (i = 0; i < nto; i++) { 309 PetscMPIInt k; 310 for (k = 0; k < ntags; k++) sendreq[i * ntags + k] = MPI_REQUEST_NULL; 311 PetscCall((*send)(comm, tag, i, toranks[i], ((char *)todata) + count * unitbytes * i, sendreq + i * ntags, ctx)); 312 } 313 for (i = 0; i < *nfrom; i++) { 314 void *header = (*(char **)fromdata) + count * unitbytes * i; 315 PetscMPIInt k; 316 for (k = 0; k < ntags; k++) recvreq[i * ntags + k] = MPI_REQUEST_NULL; 317 PetscCall((*recv)(comm, tag, (*fromranks)[i], header, recvreq + i * ntags, ctx)); 318 } 319 PetscCall(PetscFree(tag)); 320 PetscCall(PetscCommDestroy(&comm)); 321 *toreqs = sendreq; 322 *fromreqs = recvreq; 323 PetscFunctionReturn(PETSC_SUCCESS); 324 } 325 326 #if defined(PETSC_HAVE_MPI_NONBLOCKING_COLLECTIVES) 327 328 static PetscErrorCode PetscCommBuildTwoSidedFReq_Ibarrier(MPI_Comm comm, PetscMPIInt count, MPI_Datatype dtype, PetscMPIInt nto, const PetscMPIInt *toranks, const void *todata, PetscMPIInt *nfrom, PetscMPIInt **fromranks, void *fromdata, PetscMPIInt ntags, MPI_Request **toreqs, MPI_Request **fromreqs, PetscErrorCode (*send)(MPI_Comm, const PetscMPIInt[], PetscMPIInt, PetscMPIInt, void *, MPI_Request[], void *), PetscErrorCode (*recv)(MPI_Comm, const PetscMPIInt[], PetscMPIInt, void *, MPI_Request[], void *), PetscCtx ctx) 329 { 330 PetscMPIInt nrecvs, tag, *tags, done, i; 331 MPI_Aint lb, unitbytes; 332 char *tdata; 333 MPI_Request *sendreqs, *usendreqs, *req, barrier; 334 PetscSegBuffer segrank, segdata, segreq; 335 PetscBool barrier_started; 336 337 PetscFunctionBegin; 338 PetscCall(PetscCommDuplicate(comm, &comm, &tag)); 339 PetscCall(PetscMalloc1(ntags, &tags)); 340 for (i = 0; i < ntags; i++) PetscCall(PetscCommGetNewTag(comm, &tags[i])); 341 PetscCallMPI(MPI_Type_get_extent(dtype, &lb, &unitbytes)); 342 PetscCheck(lb == 0, comm, PETSC_ERR_SUP, "Datatype with nonzero lower bound %ld", (long)lb); 343 tdata = (char *)todata; 344 PetscCall(PetscMalloc1(nto, &sendreqs)); 345 PetscCall(PetscMalloc1(nto * ntags, &usendreqs)); 346 /* Post synchronous sends */ 347 for (i = 0; i < nto; i++) PetscCallMPI(MPI_Issend((void *)(tdata + count * unitbytes * i), count, dtype, toranks[i], tag, comm, sendreqs + i)); 348 /* Post actual payloads. These are typically larger messages. Hopefully sending these later does not slow down the 349 * synchronous messages above. */ 350 for (i = 0; i < nto; i++) { 351 PetscMPIInt k; 352 for (k = 0; k < ntags; k++) usendreqs[i * ntags + k] = MPI_REQUEST_NULL; 353 PetscCall((*send)(comm, tags, i, toranks[i], tdata + count * unitbytes * i, usendreqs + i * ntags, ctx)); 354 } 355 356 PetscCall(PetscSegBufferCreate(sizeof(PetscMPIInt), 4, &segrank)); 357 PetscCall(PetscSegBufferCreate(unitbytes, 4 * count, &segdata)); 358 PetscCall(PetscSegBufferCreate(sizeof(MPI_Request), 4, &segreq)); 359 360 nrecvs = 0; 361 barrier = MPI_REQUEST_NULL; 362 /* MPICH-3.2 sometimes does not create a request in some "optimized" cases. This is arguably a standard violation, 363 * but we need to work around it. */ 364 barrier_started = PETSC_FALSE; 365 for (done = 0; !done;) { 366 PetscMPIInt flag; 367 MPI_Status status; 368 PetscCallMPI(MPI_Iprobe(MPI_ANY_SOURCE, tag, comm, &flag, &status)); 369 if (flag) { /* incoming message */ 370 PetscMPIInt *recvrank, k; 371 void *buf; 372 PetscCall(PetscSegBufferGet(segrank, 1, &recvrank)); 373 PetscCall(PetscSegBufferGet(segdata, count, &buf)); 374 *recvrank = status.MPI_SOURCE; 375 PetscCallMPI(MPI_Recv(buf, count, dtype, status.MPI_SOURCE, tag, comm, MPI_STATUS_IGNORE)); 376 PetscCall(PetscSegBufferGet(segreq, ntags, &req)); 377 for (k = 0; k < ntags; k++) req[k] = MPI_REQUEST_NULL; 378 PetscCall((*recv)(comm, tags, status.MPI_SOURCE, buf, req, ctx)); 379 nrecvs++; 380 } 381 if (!barrier_started) { 382 PetscMPIInt sent, nsends; 383 PetscCall(PetscMPIIntCast(nto, &nsends)); 384 PetscCallMPI(MPI_Testall(nsends, sendreqs, &sent, MPI_STATUSES_IGNORE)); 385 if (sent) { 386 PetscCallMPI(MPI_Ibarrier(comm, &barrier)); 387 barrier_started = PETSC_TRUE; 388 } 389 } else { 390 PetscCallMPI(MPI_Test(&barrier, &done, MPI_STATUS_IGNORE)); 391 } 392 } 393 *nfrom = nrecvs; 394 PetscCall(PetscSegBufferExtractAlloc(segrank, fromranks)); 395 PetscCall(PetscSegBufferDestroy(&segrank)); 396 PetscCall(PetscSegBufferExtractAlloc(segdata, fromdata)); 397 PetscCall(PetscSegBufferDestroy(&segdata)); 398 *toreqs = usendreqs; 399 PetscCall(PetscSegBufferExtractAlloc(segreq, fromreqs)); 400 PetscCall(PetscSegBufferDestroy(&segreq)); 401 PetscCall(PetscFree(sendreqs)); 402 PetscCall(PetscFree(tags)); 403 PetscCall(PetscCommDestroy(&comm)); 404 PetscFunctionReturn(PETSC_SUCCESS); 405 } 406 #endif 407 408 /*@C 409 PetscCommBuildTwoSidedF - discovers communicating ranks given one-sided information, calling user-defined functions during rendezvous 410 411 Collective, No Fortran Support 412 413 Input Parameters: 414 + comm - communicator 415 . count - number of entries to send/receive in initial rendezvous (must match on all ranks) 416 . dtype - datatype to send/receive from each rank (must match on all ranks) 417 . nto - number of ranks to send data to 418 . toranks - ranks to send to (array of length nto) 419 . todata - data to send to each rank (packed) 420 . ntags - number of tags needed by send/recv callbacks 421 . send - callback invoked on sending process when ready to send primary payload 422 . recv - callback invoked on receiving process after delivery of rendezvous message 423 - ctx - context for callbacks 424 425 Output Parameters: 426 + nfrom - number of ranks receiving messages from 427 . fromranks - ranks receiving messages from (length nfrom; caller should `PetscFree()`) 428 - fromdata - packed data from each rank, each with count entries of type dtype (length nfrom, caller responsible for `PetscFree()`) 429 430 Level: developer 431 432 Notes: 433 This memory-scalable interface is an alternative to calling `PetscGatherNumberOfMessages()` and 434 `PetscGatherMessageLengths()`, possibly with a subsequent round of communication to send other data, {cite}`hoeflersiebretlumsdaine10`. 435 436 Basic data types as well as contiguous types are supported, but non-contiguous (e.g., strided) types are not. 437 438 .seealso: `PetscCommBuildTwoSided()`, `PetscCommBuildTwoSidedFReq()`, `PetscGatherNumberOfMessages()`, `PetscGatherMessageLengths()` 439 @*/ 440 PetscErrorCode PetscCommBuildTwoSidedF(MPI_Comm comm, PetscMPIInt count, MPI_Datatype dtype, PetscMPIInt nto, const PetscMPIInt *toranks, const void *todata, PetscMPIInt *nfrom, PetscMPIInt **fromranks, void *fromdata, PetscMPIInt ntags, PetscErrorCode (*send)(MPI_Comm, const PetscMPIInt[], PetscMPIInt, PetscMPIInt, void *, MPI_Request[], void *), PetscErrorCode (*recv)(MPI_Comm, const PetscMPIInt[], PetscMPIInt, void *, MPI_Request[], void *), PetscCtx ctx) 441 { 442 MPI_Request *toreqs, *fromreqs; 443 444 PetscFunctionBegin; 445 PetscCall(PetscCommBuildTwoSidedFReq(comm, count, dtype, nto, toranks, todata, nfrom, fromranks, fromdata, ntags, &toreqs, &fromreqs, send, recv, ctx)); 446 PetscCallMPI(MPI_Waitall(nto * ntags, toreqs, MPI_STATUSES_IGNORE)); 447 PetscCallMPI(MPI_Waitall(*nfrom * ntags, fromreqs, MPI_STATUSES_IGNORE)); 448 PetscCall(PetscFree(toreqs)); 449 PetscCall(PetscFree(fromreqs)); 450 PetscFunctionReturn(PETSC_SUCCESS); 451 } 452 453 /*@C 454 PetscCommBuildTwoSidedFReq - discovers communicating ranks given one-sided information, calling user-defined functions during rendezvous, returns requests 455 456 Collective, No Fortran Support 457 458 Input Parameters: 459 + comm - communicator 460 . count - number of entries to send/receive in initial rendezvous (must match on all ranks) 461 . dtype - datatype to send/receive from each rank (must match on all ranks) 462 . nto - number of ranks to send data to 463 . toranks - ranks to send to (array of length nto) 464 . todata - data to send to each rank (packed) 465 . ntags - number of tags needed by send/recv callbacks 466 . send - callback invoked on sending process when ready to send primary payload 467 . recv - callback invoked on receiving process after delivery of rendezvous message 468 - ctx - context for callbacks 469 470 Output Parameters: 471 + nfrom - number of ranks receiving messages from 472 . fromranks - ranks receiving messages from (length nfrom; caller should `PetscFree()`) 473 . fromdata - packed data from each rank, each with count entries of type dtype (length nfrom, caller responsible for `PetscFree()`) 474 . toreqs - array of nto*ntags sender requests (caller must wait on these, then `PetscFree()`) 475 - fromreqs - array of nfrom*ntags receiver requests (caller must wait on these, then `PetscFree()`) 476 477 Level: developer 478 479 Notes: 480 This memory-scalable interface is an alternative to calling `PetscGatherNumberOfMessages()` and 481 `PetscGatherMessageLengths()`, possibly with a subsequent round of communication to send other data, {cite}`hoeflersiebretlumsdaine10`. 482 483 Basic data types as well as contiguous types are supported, but non-contiguous (e.g., strided) types are not. 484 485 .seealso: `PetscCommBuildTwoSided()`, `PetscCommBuildTwoSidedF()`, `PetscGatherNumberOfMessages()`, `PetscGatherMessageLengths()` 486 @*/ 487 PetscErrorCode PetscCommBuildTwoSidedFReq(MPI_Comm comm, PetscMPIInt count, MPI_Datatype dtype, PetscMPIInt nto, const PetscMPIInt *toranks, const void *todata, PetscMPIInt *nfrom, PetscMPIInt **fromranks, void *fromdata, PetscMPIInt ntags, MPI_Request **toreqs, MPI_Request **fromreqs, PetscErrorCode (*send)(MPI_Comm, const PetscMPIInt[], PetscMPIInt, PetscMPIInt, void *, MPI_Request[], void *), PetscErrorCode (*recv)(MPI_Comm, const PetscMPIInt[], PetscMPIInt, void *, MPI_Request[], void *), PetscCtx ctx) 488 { 489 PetscErrorCode (*f)(MPI_Comm, PetscMPIInt, MPI_Datatype, PetscMPIInt, const PetscMPIInt[], const void *, PetscMPIInt *, PetscMPIInt **, void *, PetscMPIInt, MPI_Request **, MPI_Request **, PetscErrorCode (*send)(MPI_Comm, const PetscMPIInt[], PetscMPIInt, PetscMPIInt, void *, MPI_Request[], void *), PetscErrorCode (*recv)(MPI_Comm, const PetscMPIInt[], PetscMPIInt, void *, MPI_Request[], void *), PetscCtx ctx); 490 PetscBuildTwoSidedType buildtype = PETSC_BUILDTWOSIDED_NOTSET; 491 PetscMPIInt i, size; 492 493 PetscFunctionBegin; 494 PetscCall(PetscSysInitializePackage()); 495 PetscCallMPI(MPI_Comm_size(comm, &size)); 496 for (i = 0; i < nto; i++) PetscCheck(toranks[i] >= 0 && size > toranks[i], comm, PETSC_ERR_ARG_OUTOFRANGE, "toranks[%d] %d not in comm size %d", i, toranks[i], size); 497 PetscCall(PetscLogEventSync(PETSC_BuildTwoSidedF, comm)); 498 PetscCall(PetscLogEventBegin(PETSC_BuildTwoSidedF, 0, 0, 0, 0)); 499 PetscCall(PetscCommBuildTwoSidedGetType(comm, &buildtype)); 500 switch (buildtype) { 501 case PETSC_BUILDTWOSIDED_IBARRIER: 502 #if defined(PETSC_HAVE_MPI_NONBLOCKING_COLLECTIVES) 503 f = PetscCommBuildTwoSidedFReq_Ibarrier; 504 break; 505 #else 506 SETERRQ(comm, PETSC_ERR_PLIB, "MPI implementation does not provide MPI_Ibarrier (part of MPI-3)"); 507 #endif 508 case PETSC_BUILDTWOSIDED_ALLREDUCE: 509 case PETSC_BUILDTWOSIDED_REDSCATTER: 510 f = PetscCommBuildTwoSidedFReq_Reference; 511 break; 512 default: 513 SETERRQ(comm, PETSC_ERR_PLIB, "Unknown method for building two-sided communication"); 514 } 515 PetscCall((*f)(comm, count, dtype, nto, toranks, todata, nfrom, fromranks, fromdata, ntags, toreqs, fromreqs, send, recv, ctx)); 516 PetscCall(PetscLogEventEnd(PETSC_BuildTwoSidedF, 0, 0, 0, 0)); 517 PetscFunctionReturn(PETSC_SUCCESS); 518 } 519