1 #include <petscsys.h> /*I "petscsys.h" I*/ 2 #include <petsc/private/petscimpl.h> 3 4 PetscLogEvent PETSC_BuildTwoSided; 5 PetscLogEvent PETSC_BuildTwoSidedF; 6 7 const char *const PetscBuildTwoSidedTypes[] = { 8 "ALLREDUCE", 9 "IBARRIER", 10 "REDSCATTER", 11 "PetscBuildTwoSidedType", 12 "PETSC_BUILDTWOSIDED_", 13 NULL 14 }; 15 16 static PetscBuildTwoSidedType _twosided_type = PETSC_BUILDTWOSIDED_NOTSET; 17 18 /*@ 19 PetscCommBuildTwoSidedSetType - set algorithm to use when building two-sided communication 20 21 Logically Collective 22 23 Input Parameters: 24 + comm - PETSC_COMM_WORLD 25 - twosided - algorithm to use in subsequent calls to PetscCommBuildTwoSided() 26 27 Level: developer 28 29 Note: 30 This option is currently global, but could be made per-communicator. 31 32 .seealso: PetscCommBuildTwoSided(), PetscCommBuildTwoSidedGetType() 33 @*/ 34 PetscErrorCode PetscCommBuildTwoSidedSetType(MPI_Comm comm,PetscBuildTwoSidedType twosided) 35 { 36 PetscFunctionBegin; 37 if (PetscDefined(USE_DEBUG)) { /* We don't have a PetscObject so can't use PetscValidLogicalCollectiveEnum */ 38 PetscMPIInt b1[2],b2[2]; 39 b1[0] = -(PetscMPIInt)twosided; 40 b1[1] = (PetscMPIInt)twosided; 41 PetscCall(MPIU_Allreduce(b1,b2,2,MPI_INT,MPI_MAX,comm)); 42 PetscCheck(-b2[0] == b2[1],comm,PETSC_ERR_ARG_WRONG,"Enum value must be same on all processes"); 43 } 44 _twosided_type = twosided; 45 PetscFunctionReturn(0); 46 } 47 48 /*@ 49 PetscCommBuildTwoSidedGetType - set algorithm to use when building two-sided communication 50 51 Logically Collective 52 53 Output Parameters: 54 + comm - communicator on which to query algorithm 55 - twosided - algorithm to use for PetscCommBuildTwoSided() 56 57 Level: developer 58 59 .seealso: PetscCommBuildTwoSided(), PetscCommBuildTwoSidedSetType() 60 @*/ 61 PetscErrorCode PetscCommBuildTwoSidedGetType(MPI_Comm comm,PetscBuildTwoSidedType *twosided) 62 { 63 PetscMPIInt size; 64 65 PetscFunctionBegin; 66 *twosided = PETSC_BUILDTWOSIDED_NOTSET; 67 if (_twosided_type == PETSC_BUILDTWOSIDED_NOTSET) { 68 PetscCallMPI(MPI_Comm_size(comm,&size)); 69 _twosided_type = PETSC_BUILDTWOSIDED_ALLREDUCE; /* default for small comms, see https://gitlab.com/petsc/petsc/-/merge_requests/2611 */ 70 #if defined(PETSC_HAVE_MPI_NONBLOCKING_COLLECTIVES) 71 if (size > 1024) _twosided_type = PETSC_BUILDTWOSIDED_IBARRIER; 72 #endif 73 PetscCall(PetscOptionsGetEnum(NULL,NULL,"-build_twosided",PetscBuildTwoSidedTypes,(PetscEnum*)&_twosided_type,NULL)); 74 } 75 *twosided = _twosided_type; 76 PetscFunctionReturn(0); 77 } 78 79 #if defined(PETSC_HAVE_MPI_NONBLOCKING_COLLECTIVES) 80 static PetscErrorCode PetscCommBuildTwoSided_Ibarrier(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata,PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata) 81 { 82 PetscMPIInt nrecvs,tag,done,i; 83 MPI_Aint lb,unitbytes; 84 char *tdata; 85 MPI_Request *sendreqs,barrier; 86 PetscSegBuffer segrank,segdata; 87 PetscBool barrier_started; 88 89 PetscFunctionBegin; 90 PetscCall(PetscCommDuplicate(comm,&comm,&tag)); 91 PetscCallMPI(MPI_Type_get_extent(dtype,&lb,&unitbytes)); 92 PetscCheck(lb == 0,comm,PETSC_ERR_SUP,"Datatype with nonzero lower bound %ld",(long)lb); 93 tdata = (char*)todata; 94 PetscCall(PetscMalloc1(nto,&sendreqs)); 95 for (i=0; i<nto; i++) { 96 PetscCallMPI(MPI_Issend((void*)(tdata+count*unitbytes*i),count,dtype,toranks[i],tag,comm,sendreqs+i)); 97 } 98 PetscCall(PetscSegBufferCreate(sizeof(PetscMPIInt),4,&segrank)); 99 PetscCall(PetscSegBufferCreate(unitbytes,4*count,&segdata)); 100 101 nrecvs = 0; 102 barrier = MPI_REQUEST_NULL; 103 /* MPICH-3.2 sometimes does not create a request in some "optimized" cases. This is arguably a standard violation, 104 * but we need to work around it. */ 105 barrier_started = PETSC_FALSE; 106 for (done=0; !done;) { 107 PetscMPIInt flag; 108 MPI_Status status; 109 PetscCallMPI(MPI_Iprobe(MPI_ANY_SOURCE,tag,comm,&flag,&status)); 110 if (flag) { /* incoming message */ 111 PetscMPIInt *recvrank; 112 void *buf; 113 PetscCall(PetscSegBufferGet(segrank,1,&recvrank)); 114 PetscCall(PetscSegBufferGet(segdata,count,&buf)); 115 *recvrank = status.MPI_SOURCE; 116 PetscCallMPI(MPI_Recv(buf,count,dtype,status.MPI_SOURCE,tag,comm,MPI_STATUS_IGNORE)); 117 nrecvs++; 118 } 119 if (!barrier_started) { 120 PetscMPIInt sent,nsends; 121 PetscCall(PetscMPIIntCast(nto,&nsends)); 122 PetscCallMPI(MPI_Testall(nsends,sendreqs,&sent,MPI_STATUSES_IGNORE)); 123 if (sent) { 124 PetscCallMPI(MPI_Ibarrier(comm,&barrier)); 125 barrier_started = PETSC_TRUE; 126 PetscCall(PetscFree(sendreqs)); 127 } 128 } else { 129 PetscCallMPI(MPI_Test(&barrier,&done,MPI_STATUS_IGNORE)); 130 } 131 } 132 *nfrom = nrecvs; 133 PetscCall(PetscSegBufferExtractAlloc(segrank,fromranks)); 134 PetscCall(PetscSegBufferDestroy(&segrank)); 135 PetscCall(PetscSegBufferExtractAlloc(segdata,fromdata)); 136 PetscCall(PetscSegBufferDestroy(&segdata)); 137 PetscCall(PetscCommDestroy(&comm)); 138 PetscFunctionReturn(0); 139 } 140 #endif 141 142 static PetscErrorCode PetscCommBuildTwoSided_Allreduce(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata,PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata) 143 { 144 PetscMPIInt size,rank,*iflags,nrecvs,tag,*franks,i,flg; 145 MPI_Aint lb,unitbytes; 146 char *tdata,*fdata; 147 MPI_Request *reqs,*sendreqs; 148 MPI_Status *statuses; 149 PetscCommCounter *counter; 150 151 PetscFunctionBegin; 152 PetscCallMPI(MPI_Comm_size(comm,&size)); 153 PetscCallMPI(MPI_Comm_rank(comm,&rank)); 154 PetscCall(PetscCommDuplicate(comm,&comm,&tag)); 155 PetscCallMPI(MPI_Comm_get_attr(comm,Petsc_Counter_keyval,&counter,&flg)); 156 PetscCheck(flg,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Inner PETSc communicator does not have its tag/name counter attribute set"); 157 if (!counter->iflags) { 158 PetscCall(PetscCalloc1(size,&counter->iflags)); 159 iflags = counter->iflags; 160 } else { 161 iflags = counter->iflags; 162 PetscCall(PetscArrayzero(iflags,size)); 163 } 164 for (i=0; i<nto; i++) iflags[toranks[i]] = 1; 165 PetscCall(MPIU_Allreduce(MPI_IN_PLACE,iflags,size,MPI_INT,MPI_SUM,comm)); 166 nrecvs = iflags[rank]; 167 PetscCallMPI(MPI_Type_get_extent(dtype,&lb,&unitbytes)); 168 PetscCheck(lb == 0,comm,PETSC_ERR_SUP,"Datatype with nonzero lower bound %ld",(long)lb); 169 PetscCall(PetscMalloc(nrecvs*count*unitbytes,&fdata)); 170 tdata = (char*)todata; 171 PetscCall(PetscMalloc2(nto+nrecvs,&reqs,nto+nrecvs,&statuses)); 172 sendreqs = reqs + nrecvs; 173 for (i=0; i<nrecvs; i++) { 174 PetscCallMPI(MPI_Irecv((void*)(fdata+count*unitbytes*i),count,dtype,MPI_ANY_SOURCE,tag,comm,reqs+i)); 175 } 176 for (i=0; i<nto; i++) { 177 PetscCallMPI(MPI_Isend((void*)(tdata+count*unitbytes*i),count,dtype,toranks[i],tag,comm,sendreqs+i)); 178 } 179 PetscCallMPI(MPI_Waitall(nto+nrecvs,reqs,statuses)); 180 PetscCall(PetscMalloc1(nrecvs,&franks)); 181 for (i=0; i<nrecvs; i++) franks[i] = statuses[i].MPI_SOURCE; 182 PetscCall(PetscFree2(reqs,statuses)); 183 PetscCall(PetscCommDestroy(&comm)); 184 185 *nfrom = nrecvs; 186 *fromranks = franks; 187 *(void**)fromdata = fdata; 188 PetscFunctionReturn(0); 189 } 190 191 #if defined(PETSC_HAVE_MPI_REDUCE_SCATTER_BLOCK) 192 static PetscErrorCode PetscCommBuildTwoSided_RedScatter(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata,PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata) 193 { 194 PetscMPIInt size,*iflags,nrecvs,tag,*franks,i,flg; 195 MPI_Aint lb,unitbytes; 196 char *tdata,*fdata; 197 MPI_Request *reqs,*sendreqs; 198 MPI_Status *statuses; 199 PetscCommCounter *counter; 200 201 PetscFunctionBegin; 202 PetscCallMPI(MPI_Comm_size(comm,&size)); 203 PetscCall(PetscCommDuplicate(comm,&comm,&tag)); 204 PetscCallMPI(MPI_Comm_get_attr(comm,Petsc_Counter_keyval,&counter,&flg)); 205 PetscCheck(flg,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Inner PETSc communicator does not have its tag/name counter attribute set"); 206 if (!counter->iflags) { 207 PetscCall(PetscCalloc1(size,&counter->iflags)); 208 iflags = counter->iflags; 209 } else { 210 iflags = counter->iflags; 211 PetscCall(PetscArrayzero(iflags,size)); 212 } 213 for (i=0; i<nto; i++) iflags[toranks[i]] = 1; 214 PetscCallMPI(MPI_Reduce_scatter_block(iflags,&nrecvs,1,MPI_INT,MPI_SUM,comm)); 215 PetscCallMPI(MPI_Type_get_extent(dtype,&lb,&unitbytes)); 216 PetscCheck(lb == 0,comm,PETSC_ERR_SUP,"Datatype with nonzero lower bound %ld",(long)lb); 217 PetscCall(PetscMalloc(nrecvs*count*unitbytes,&fdata)); 218 tdata = (char*)todata; 219 PetscCall(PetscMalloc2(nto+nrecvs,&reqs,nto+nrecvs,&statuses)); 220 sendreqs = reqs + nrecvs; 221 for (i=0; i<nrecvs; i++) { 222 PetscCallMPI(MPI_Irecv((void*)(fdata+count*unitbytes*i),count,dtype,MPI_ANY_SOURCE,tag,comm,reqs+i)); 223 } 224 for (i=0; i<nto; i++) { 225 PetscCallMPI(MPI_Isend((void*)(tdata+count*unitbytes*i),count,dtype,toranks[i],tag,comm,sendreqs+i)); 226 } 227 PetscCallMPI(MPI_Waitall(nto+nrecvs,reqs,statuses)); 228 PetscCall(PetscMalloc1(nrecvs,&franks)); 229 for (i=0; i<nrecvs; i++) franks[i] = statuses[i].MPI_SOURCE; 230 PetscCall(PetscFree2(reqs,statuses)); 231 PetscCall(PetscCommDestroy(&comm)); 232 233 *nfrom = nrecvs; 234 *fromranks = franks; 235 *(void**)fromdata = fdata; 236 PetscFunctionReturn(0); 237 } 238 #endif 239 240 /*@C 241 PetscCommBuildTwoSided - discovers communicating ranks given one-sided information, moving constant-sized data in the process (often message lengths) 242 243 Collective 244 245 Input Parameters: 246 + comm - communicator 247 . count - number of entries to send/receive (must match on all ranks) 248 . dtype - datatype to send/receive from each rank (must match on all ranks) 249 . nto - number of ranks to send data to 250 . toranks - ranks to send to (array of length nto) 251 - todata - data to send to each rank (packed) 252 253 Output Parameters: 254 + nfrom - number of ranks receiving messages from 255 . fromranks - ranks receiving messages from (length nfrom; caller should PetscFree()) 256 - fromdata - packed data from each rank, each with count entries of type dtype (length nfrom, caller responsible for PetscFree()) 257 258 Level: developer 259 260 Options Database Keys: 261 . -build_twosided <allreduce|ibarrier|redscatter> - algorithm to set up two-sided communication. Default is allreduce for communicators with <= 1024 ranks, otherwise ibarrier. 262 263 Notes: 264 This memory-scalable interface is an alternative to calling PetscGatherNumberOfMessages() and 265 PetscGatherMessageLengths(), possibly with a subsequent round of communication to send other constant-size data. 266 267 Basic data types as well as contiguous types are supported, but non-contiguous (e.g., strided) types are not. 268 269 References: 270 . * - Hoefler, Siebert and Lumsdaine, The MPI_Ibarrier implementation uses the algorithm in 271 Scalable communication protocols for dynamic sparse data exchange, 2010. 272 273 .seealso: PetscGatherNumberOfMessages(), PetscGatherMessageLengths() 274 @*/ 275 PetscErrorCode PetscCommBuildTwoSided(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata,PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata) 276 { 277 PetscBuildTwoSidedType buildtype = PETSC_BUILDTWOSIDED_NOTSET; 278 279 PetscFunctionBegin; 280 PetscCall(PetscSysInitializePackage()); 281 PetscCall(PetscLogEventSync(PETSC_BuildTwoSided,comm)); 282 PetscCall(PetscLogEventBegin(PETSC_BuildTwoSided,0,0,0,0)); 283 PetscCall(PetscCommBuildTwoSidedGetType(comm,&buildtype)); 284 switch (buildtype) { 285 case PETSC_BUILDTWOSIDED_IBARRIER: 286 #if defined(PETSC_HAVE_MPI_NONBLOCKING_COLLECTIVES) 287 PetscCall(PetscCommBuildTwoSided_Ibarrier(comm,count,dtype,nto,toranks,todata,nfrom,fromranks,fromdata)); 288 break; 289 #else 290 SETERRQ(comm,PETSC_ERR_PLIB,"MPI implementation does not provide MPI_Ibarrier (part of MPI-3)"); 291 #endif 292 case PETSC_BUILDTWOSIDED_ALLREDUCE: 293 PetscCall(PetscCommBuildTwoSided_Allreduce(comm,count,dtype,nto,toranks,todata,nfrom,fromranks,fromdata)); 294 break; 295 case PETSC_BUILDTWOSIDED_REDSCATTER: 296 #if defined(PETSC_HAVE_MPI_REDUCE_SCATTER_BLOCK) 297 PetscCall(PetscCommBuildTwoSided_RedScatter(comm,count,dtype,nto,toranks,todata,nfrom,fromranks,fromdata)); 298 break; 299 #else 300 SETERRQ(comm,PETSC_ERR_PLIB,"MPI implementation does not provide MPI_Reduce_scatter_block (part of MPI-2.2)"); 301 #endif 302 default: SETERRQ(comm,PETSC_ERR_PLIB,"Unknown method for building two-sided communication"); 303 } 304 PetscCall(PetscLogEventEnd(PETSC_BuildTwoSided,0,0,0,0)); 305 PetscFunctionReturn(0); 306 } 307 308 static PetscErrorCode PetscCommBuildTwoSidedFReq_Reference(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata, 309 PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata,PetscMPIInt ntags,MPI_Request **toreqs,MPI_Request **fromreqs, 310 PetscErrorCode (*send)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,PetscMPIInt,void*,MPI_Request[],void*), 311 PetscErrorCode (*recv)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,void*,MPI_Request[],void*),void *ctx) 312 { 313 PetscMPIInt i,*tag; 314 MPI_Aint lb,unitbytes; 315 MPI_Request *sendreq,*recvreq; 316 317 PetscFunctionBegin; 318 PetscCall(PetscMalloc1(ntags,&tag)); 319 if (ntags > 0) { 320 PetscCall(PetscCommDuplicate(comm,&comm,&tag[0])); 321 } 322 for (i=1; i<ntags; i++) { 323 PetscCall(PetscCommGetNewTag(comm,&tag[i])); 324 } 325 326 /* Perform complete initial rendezvous */ 327 PetscCall(PetscCommBuildTwoSided(comm,count,dtype,nto,toranks,todata,nfrom,fromranks,fromdata)); 328 329 PetscCall(PetscMalloc1(nto*ntags,&sendreq)); 330 PetscCall(PetscMalloc1(*nfrom*ntags,&recvreq)); 331 332 PetscCallMPI(MPI_Type_get_extent(dtype,&lb,&unitbytes)); 333 PetscCheck(lb == 0,comm,PETSC_ERR_SUP,"Datatype with nonzero lower bound %ld",(long)lb); 334 for (i=0; i<nto; i++) { 335 PetscMPIInt k; 336 for (k=0; k<ntags; k++) sendreq[i*ntags+k] = MPI_REQUEST_NULL; 337 PetscCall((*send)(comm,tag,i,toranks[i],((char*)todata)+count*unitbytes*i,sendreq+i*ntags,ctx)); 338 } 339 for (i=0; i<*nfrom; i++) { 340 void *header = (*(char**)fromdata) + count*unitbytes*i; 341 PetscMPIInt k; 342 for (k=0; k<ntags; k++) recvreq[i*ntags+k] = MPI_REQUEST_NULL; 343 PetscCall((*recv)(comm,tag,(*fromranks)[i],header,recvreq+i*ntags,ctx)); 344 } 345 PetscCall(PetscFree(tag)); 346 PetscCall(PetscCommDestroy(&comm)); 347 *toreqs = sendreq; 348 *fromreqs = recvreq; 349 PetscFunctionReturn(0); 350 } 351 352 #if defined(PETSC_HAVE_MPI_NONBLOCKING_COLLECTIVES) 353 354 static PetscErrorCode PetscCommBuildTwoSidedFReq_Ibarrier(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata, 355 PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata,PetscMPIInt ntags,MPI_Request **toreqs,MPI_Request **fromreqs, 356 PetscErrorCode (*send)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,PetscMPIInt,void*,MPI_Request[],void*), 357 PetscErrorCode (*recv)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,void*,MPI_Request[],void*),void *ctx) 358 { 359 PetscMPIInt nrecvs,tag,*tags,done,i; 360 MPI_Aint lb,unitbytes; 361 char *tdata; 362 MPI_Request *sendreqs,*usendreqs,*req,barrier; 363 PetscSegBuffer segrank,segdata,segreq; 364 PetscBool barrier_started; 365 366 PetscFunctionBegin; 367 PetscCall(PetscCommDuplicate(comm,&comm,&tag)); 368 PetscCall(PetscMalloc1(ntags,&tags)); 369 for (i=0; i<ntags; i++) { 370 PetscCall(PetscCommGetNewTag(comm,&tags[i])); 371 } 372 PetscCallMPI(MPI_Type_get_extent(dtype,&lb,&unitbytes)); 373 PetscCheck(lb == 0,comm,PETSC_ERR_SUP,"Datatype with nonzero lower bound %ld",(long)lb); 374 tdata = (char*)todata; 375 PetscCall(PetscMalloc1(nto,&sendreqs)); 376 PetscCall(PetscMalloc1(nto*ntags,&usendreqs)); 377 /* Post synchronous sends */ 378 for (i=0; i<nto; i++) { 379 PetscCallMPI(MPI_Issend((void*)(tdata+count*unitbytes*i),count,dtype,toranks[i],tag,comm,sendreqs+i)); 380 } 381 /* Post actual payloads. These are typically larger messages. Hopefully sending these later does not slow down the 382 * synchronous messages above. */ 383 for (i=0; i<nto; i++) { 384 PetscMPIInt k; 385 for (k=0; k<ntags; k++) usendreqs[i*ntags+k] = MPI_REQUEST_NULL; 386 PetscCall((*send)(comm,tags,i,toranks[i],tdata+count*unitbytes*i,usendreqs+i*ntags,ctx)); 387 } 388 389 PetscCall(PetscSegBufferCreate(sizeof(PetscMPIInt),4,&segrank)); 390 PetscCall(PetscSegBufferCreate(unitbytes,4*count,&segdata)); 391 PetscCall(PetscSegBufferCreate(sizeof(MPI_Request),4,&segreq)); 392 393 nrecvs = 0; 394 barrier = MPI_REQUEST_NULL; 395 /* MPICH-3.2 sometimes does not create a request in some "optimized" cases. This is arguably a standard violation, 396 * but we need to work around it. */ 397 barrier_started = PETSC_FALSE; 398 for (done=0; !done;) { 399 PetscMPIInt flag; 400 MPI_Status status; 401 PetscCallMPI(MPI_Iprobe(MPI_ANY_SOURCE,tag,comm,&flag,&status)); 402 if (flag) { /* incoming message */ 403 PetscMPIInt *recvrank,k; 404 void *buf; 405 PetscCall(PetscSegBufferGet(segrank,1,&recvrank)); 406 PetscCall(PetscSegBufferGet(segdata,count,&buf)); 407 *recvrank = status.MPI_SOURCE; 408 PetscCallMPI(MPI_Recv(buf,count,dtype,status.MPI_SOURCE,tag,comm,MPI_STATUS_IGNORE)); 409 PetscCall(PetscSegBufferGet(segreq,ntags,&req)); 410 for (k=0; k<ntags; k++) req[k] = MPI_REQUEST_NULL; 411 PetscCall((*recv)(comm,tags,status.MPI_SOURCE,buf,req,ctx)); 412 nrecvs++; 413 } 414 if (!barrier_started) { 415 PetscMPIInt sent,nsends; 416 PetscCall(PetscMPIIntCast(nto,&nsends)); 417 PetscCallMPI(MPI_Testall(nsends,sendreqs,&sent,MPI_STATUSES_IGNORE)); 418 if (sent) { 419 PetscCallMPI(MPI_Ibarrier(comm,&barrier)); 420 barrier_started = PETSC_TRUE; 421 } 422 } else { 423 PetscCallMPI(MPI_Test(&barrier,&done,MPI_STATUS_IGNORE)); 424 } 425 } 426 *nfrom = nrecvs; 427 PetscCall(PetscSegBufferExtractAlloc(segrank,fromranks)); 428 PetscCall(PetscSegBufferDestroy(&segrank)); 429 PetscCall(PetscSegBufferExtractAlloc(segdata,fromdata)); 430 PetscCall(PetscSegBufferDestroy(&segdata)); 431 *toreqs = usendreqs; 432 PetscCall(PetscSegBufferExtractAlloc(segreq,fromreqs)); 433 PetscCall(PetscSegBufferDestroy(&segreq)); 434 PetscCall(PetscFree(sendreqs)); 435 PetscCall(PetscFree(tags)); 436 PetscCall(PetscCommDestroy(&comm)); 437 PetscFunctionReturn(0); 438 } 439 #endif 440 441 /*@C 442 PetscCommBuildTwoSidedF - discovers communicating ranks given one-sided information, calling user-defined functions during rendezvous 443 444 Collective 445 446 Input Parameters: 447 + comm - communicator 448 . count - number of entries to send/receive in initial rendezvous (must match on all ranks) 449 . dtype - datatype to send/receive from each rank (must match on all ranks) 450 . nto - number of ranks to send data to 451 . toranks - ranks to send to (array of length nto) 452 . todata - data to send to each rank (packed) 453 . ntags - number of tags needed by send/recv callbacks 454 . send - callback invoked on sending process when ready to send primary payload 455 . recv - callback invoked on receiving process after delivery of rendezvous message 456 - ctx - context for callbacks 457 458 Output Parameters: 459 + nfrom - number of ranks receiving messages from 460 . fromranks - ranks receiving messages from (length nfrom; caller should PetscFree()) 461 - fromdata - packed data from each rank, each with count entries of type dtype (length nfrom, caller responsible for PetscFree()) 462 463 Level: developer 464 465 Notes: 466 This memory-scalable interface is an alternative to calling PetscGatherNumberOfMessages() and 467 PetscGatherMessageLengths(), possibly with a subsequent round of communication to send other data. 468 469 Basic data types as well as contiguous types are supported, but non-contiguous (e.g., strided) types are not. 470 471 References: 472 . * - Hoefler, Siebert and Lumsdaine, The MPI_Ibarrier implementation uses the algorithm in 473 Scalable communication protocols for dynamic sparse data exchange, 2010. 474 475 .seealso: PetscCommBuildTwoSided(), PetscCommBuildTwoSidedFReq(), PetscGatherNumberOfMessages(), PetscGatherMessageLengths() 476 @*/ 477 PetscErrorCode PetscCommBuildTwoSidedF(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata,PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata,PetscMPIInt ntags, 478 PetscErrorCode (*send)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,PetscMPIInt,void*,MPI_Request[],void*), 479 PetscErrorCode (*recv)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,void*,MPI_Request[],void*),void *ctx) 480 { 481 MPI_Request *toreqs,*fromreqs; 482 483 PetscFunctionBegin; 484 PetscCall(PetscCommBuildTwoSidedFReq(comm,count,dtype,nto,toranks,todata,nfrom,fromranks,fromdata,ntags,&toreqs,&fromreqs,send,recv,ctx)); 485 PetscCallMPI(MPI_Waitall(nto*ntags,toreqs,MPI_STATUSES_IGNORE)); 486 PetscCallMPI(MPI_Waitall(*nfrom*ntags,fromreqs,MPI_STATUSES_IGNORE)); 487 PetscCall(PetscFree(toreqs)); 488 PetscCall(PetscFree(fromreqs)); 489 PetscFunctionReturn(0); 490 } 491 492 /*@C 493 PetscCommBuildTwoSidedFReq - discovers communicating ranks given one-sided information, calling user-defined functions during rendezvous, returns requests 494 495 Collective 496 497 Input Parameters: 498 + comm - communicator 499 . count - number of entries to send/receive in initial rendezvous (must match on all ranks) 500 . dtype - datatype to send/receive from each rank (must match on all ranks) 501 . nto - number of ranks to send data to 502 . toranks - ranks to send to (array of length nto) 503 . todata - data to send to each rank (packed) 504 . ntags - number of tags needed by send/recv callbacks 505 . send - callback invoked on sending process when ready to send primary payload 506 . recv - callback invoked on receiving process after delivery of rendezvous message 507 - ctx - context for callbacks 508 509 Output Parameters: 510 + nfrom - number of ranks receiving messages from 511 . fromranks - ranks receiving messages from (length nfrom; caller should PetscFree()) 512 . fromdata - packed data from each rank, each with count entries of type dtype (length nfrom, caller responsible for PetscFree()) 513 . toreqs - array of nto*ntags sender requests (caller must wait on these, then PetscFree()) 514 - fromreqs - array of nfrom*ntags receiver requests (caller must wait on these, then PetscFree()) 515 516 Level: developer 517 518 Notes: 519 This memory-scalable interface is an alternative to calling PetscGatherNumberOfMessages() and 520 PetscGatherMessageLengths(), possibly with a subsequent round of communication to send other data. 521 522 Basic data types as well as contiguous types are supported, but non-contiguous (e.g., strided) types are not. 523 524 References: 525 . * - Hoefler, Siebert and Lumsdaine, The MPI_Ibarrier implementation uses the algorithm in 526 Scalable communication protocols for dynamic sparse data exchange, 2010. 527 528 .seealso: PetscCommBuildTwoSided(), PetscCommBuildTwoSidedF(), PetscGatherNumberOfMessages(), PetscGatherMessageLengths() 529 @*/ 530 PetscErrorCode PetscCommBuildTwoSidedFReq(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata, 531 PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata,PetscMPIInt ntags,MPI_Request **toreqs,MPI_Request **fromreqs, 532 PetscErrorCode (*send)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,PetscMPIInt,void*,MPI_Request[],void*), 533 PetscErrorCode (*recv)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,void*,MPI_Request[],void*),void *ctx) 534 { 535 PetscErrorCode (*f)(MPI_Comm,PetscMPIInt,MPI_Datatype,PetscMPIInt,const PetscMPIInt[],const void*, 536 PetscMPIInt*,PetscMPIInt**,void*,PetscMPIInt,MPI_Request**,MPI_Request**, 537 PetscErrorCode (*send)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,PetscMPIInt,void*,MPI_Request[],void*), 538 PetscErrorCode (*recv)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,void*,MPI_Request[],void*),void *ctx); 539 PetscBuildTwoSidedType buildtype = PETSC_BUILDTWOSIDED_NOTSET; 540 PetscMPIInt i,size; 541 542 PetscFunctionBegin; 543 PetscCall(PetscSysInitializePackage()); 544 PetscCallMPI(MPI_Comm_size(comm,&size)); 545 for (i=0; i<nto; i++) { 546 PetscCheck(toranks[i] >= 0 && size > toranks[i],comm,PETSC_ERR_ARG_OUTOFRANGE,"toranks[%d] %d not in comm size %d",i,toranks[i],size); 547 } 548 PetscCall(PetscLogEventSync(PETSC_BuildTwoSidedF,comm)); 549 PetscCall(PetscLogEventBegin(PETSC_BuildTwoSidedF,0,0,0,0)); 550 PetscCall(PetscCommBuildTwoSidedGetType(comm,&buildtype)); 551 switch (buildtype) { 552 case PETSC_BUILDTWOSIDED_IBARRIER: 553 #if defined(PETSC_HAVE_MPI_NONBLOCKING_COLLECTIVES) 554 f = PetscCommBuildTwoSidedFReq_Ibarrier; 555 break; 556 #else 557 SETERRQ(comm,PETSC_ERR_PLIB,"MPI implementation does not provide MPI_Ibarrier (part of MPI-3)"); 558 #endif 559 case PETSC_BUILDTWOSIDED_ALLREDUCE: 560 case PETSC_BUILDTWOSIDED_REDSCATTER: 561 f = PetscCommBuildTwoSidedFReq_Reference; 562 break; 563 default: SETERRQ(comm,PETSC_ERR_PLIB,"Unknown method for building two-sided communication"); 564 } 565 PetscCall((*f)(comm,count,dtype,nto,toranks,todata,nfrom,fromranks,fromdata,ntags,toreqs,fromreqs,send,recv,ctx)); 566 PetscCall(PetscLogEventEnd(PETSC_BuildTwoSidedF,0,0,0,0)); 567 PetscFunctionReturn(0); 568 } 569