1 #include <petscsys.h> /*I "petscsys.h" I*/ 2 3 PetscLogEvent PETSC_BuildTwoSided,PETSC_BuildTwoSidedF; 4 5 const char *const PetscBuildTwoSidedTypes[] = { 6 "ALLREDUCE", 7 "IBARRIER", 8 "REDSCATTER", 9 "PetscBuildTwoSidedType", 10 "PETSC_BUILDTWOSIDED_", 11 0 12 }; 13 14 static PetscBuildTwoSidedType _twosided_type = PETSC_BUILDTWOSIDED_NOTSET; 15 16 /*@ 17 PetscCommBuildTwoSidedSetType - set algorithm to use when building two-sided communication 18 19 Logically Collective 20 21 Input Arguments: 22 + comm - PETSC_COMM_WORLD 23 - twosided - algorithm to use in subsequent calls to PetscCommBuildTwoSided() 24 25 Level: developer 26 27 Note: 28 This option is currently global, but could be made per-communicator. 29 30 .seealso: PetscCommBuildTwoSided(), PetscCommBuildTwoSidedGetType() 31 @*/ 32 PetscErrorCode PetscCommBuildTwoSidedSetType(MPI_Comm comm,PetscBuildTwoSidedType twosided) 33 { 34 PetscFunctionBegin; 35 #if defined(PETSC_USE_DEBUG) 36 { /* We don't have a PetscObject so can't use PetscValidLogicalCollectiveEnum */ 37 PetscMPIInt ierr; 38 PetscMPIInt b1[2],b2[2]; 39 b1[0] = -(PetscMPIInt)twosided; 40 b1[1] = (PetscMPIInt)twosided; 41 ierr = MPIU_Allreduce(b1,b2,2,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr); 42 if (-b2[0] != b2[1]) SETERRQ(comm,PETSC_ERR_ARG_WRONG,"Enum value must be same on all processes"); 43 } 44 #endif 45 _twosided_type = twosided; 46 PetscFunctionReturn(0); 47 } 48 49 /*@ 50 PetscCommBuildTwoSidedGetType - set algorithm to use when building two-sided communication 51 52 Logically Collective 53 54 Output Arguments: 55 + comm - communicator on which to query algorithm 56 - twosided - algorithm to use for PetscCommBuildTwoSided() 57 58 Level: developer 59 60 .seealso: PetscCommBuildTwoSided(), PetscCommBuildTwoSidedSetType() 61 @*/ 62 PetscErrorCode PetscCommBuildTwoSidedGetType(MPI_Comm comm,PetscBuildTwoSidedType *twosided) 63 { 64 PetscErrorCode ierr; 65 66 PetscFunctionBegin; 67 *twosided = PETSC_BUILDTWOSIDED_NOTSET; 68 if (_twosided_type == PETSC_BUILDTWOSIDED_NOTSET) { 69 #if defined(PETSC_HAVE_MPI_IBARRIER) 70 # if defined(PETSC_HAVE_MPICH_CH3_SOCK) && !defined(PETSC_HAVE_MPICH_CH3_SOCK_FIXED_NBC_PROGRESS) 71 /* Deadlock in Ibarrier: http://trac.mpich.org/projects/mpich/ticket/1785 */ 72 _twosided_type = PETSC_BUILDTWOSIDED_ALLREDUCE; 73 # else 74 _twosided_type = PETSC_BUILDTWOSIDED_IBARRIER; 75 # endif 76 #else 77 _twosided_type = PETSC_BUILDTWOSIDED_ALLREDUCE; 78 #endif 79 ierr = PetscOptionsGetEnum(NULL,NULL,"-build_twosided",PetscBuildTwoSidedTypes,(PetscEnum*)&_twosided_type,NULL);CHKERRQ(ierr); 80 } 81 *twosided = _twosided_type; 82 PetscFunctionReturn(0); 83 } 84 85 #if defined(PETSC_HAVE_MPI_IBARRIER) || defined(PETSC_HAVE_MPIX_IBARRIER) 86 87 static PetscErrorCode PetscCommBuildTwoSided_Ibarrier(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata,PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata) 88 { 89 PetscErrorCode ierr; 90 PetscMPIInt nrecvs,tag,done,i; 91 MPI_Aint lb,unitbytes; 92 char *tdata; 93 MPI_Request *sendreqs,barrier; 94 PetscSegBuffer segrank,segdata; 95 PetscBool barrier_started; 96 97 PetscFunctionBegin; 98 ierr = PetscCommDuplicate(comm,&comm,&tag);CHKERRQ(ierr); 99 ierr = MPI_Type_get_extent(dtype,&lb,&unitbytes);CHKERRQ(ierr); 100 if (lb != 0) SETERRQ1(comm,PETSC_ERR_SUP,"Datatype with nonzero lower bound %ld\n",(long)lb); 101 tdata = (char*)todata; 102 ierr = PetscMalloc1(nto,&sendreqs);CHKERRQ(ierr); 103 for (i=0; i<nto; i++) { 104 ierr = MPI_Issend((void*)(tdata+count*unitbytes*i),count,dtype,toranks[i],tag,comm,sendreqs+i);CHKERRQ(ierr); 105 } 106 ierr = PetscSegBufferCreate(sizeof(PetscMPIInt),4,&segrank);CHKERRQ(ierr); 107 ierr = PetscSegBufferCreate(unitbytes,4*count,&segdata);CHKERRQ(ierr); 108 109 nrecvs = 0; 110 barrier = MPI_REQUEST_NULL; 111 /* MPICH-3.2 sometimes does not create a request in some "optimized" cases. This is arguably a standard violation, 112 * but we need to work around it. */ 113 barrier_started = PETSC_FALSE; 114 for (done=0; !done; ) { 115 PetscMPIInt flag; 116 MPI_Status status; 117 ierr = MPI_Iprobe(MPI_ANY_SOURCE,tag,comm,&flag,&status);CHKERRQ(ierr); 118 if (flag) { /* incoming message */ 119 PetscMPIInt *recvrank; 120 void *buf; 121 ierr = PetscSegBufferGet(segrank,1,&recvrank);CHKERRQ(ierr); 122 ierr = PetscSegBufferGet(segdata,count,&buf);CHKERRQ(ierr); 123 *recvrank = status.MPI_SOURCE; 124 ierr = MPI_Recv(buf,count,dtype,status.MPI_SOURCE,tag,comm,MPI_STATUS_IGNORE);CHKERRQ(ierr); 125 nrecvs++; 126 } 127 if (!barrier_started) { 128 PetscMPIInt sent,nsends; 129 ierr = PetscMPIIntCast(nto,&nsends);CHKERRQ(ierr); 130 ierr = MPI_Testall(nsends,sendreqs,&sent,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 131 if (sent) { 132 #if defined(PETSC_HAVE_MPI_IBARRIER) 133 ierr = MPI_Ibarrier(comm,&barrier);CHKERRQ(ierr); 134 #elif defined(PETSC_HAVE_MPIX_IBARRIER) 135 ierr = MPIX_Ibarrier(comm,&barrier);CHKERRQ(ierr); 136 #endif 137 barrier_started = PETSC_TRUE; 138 ierr = PetscFree(sendreqs);CHKERRQ(ierr); 139 } 140 } else { 141 ierr = MPI_Test(&barrier,&done,MPI_STATUS_IGNORE);CHKERRQ(ierr); 142 } 143 } 144 *nfrom = nrecvs; 145 ierr = PetscSegBufferExtractAlloc(segrank,fromranks);CHKERRQ(ierr); 146 ierr = PetscSegBufferDestroy(&segrank);CHKERRQ(ierr); 147 ierr = PetscSegBufferExtractAlloc(segdata,fromdata);CHKERRQ(ierr); 148 ierr = PetscSegBufferDestroy(&segdata);CHKERRQ(ierr); 149 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 150 PetscFunctionReturn(0); 151 } 152 #endif 153 154 static PetscErrorCode PetscCommBuildTwoSided_Allreduce(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata,PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata) 155 { 156 PetscErrorCode ierr; 157 PetscMPIInt size,*iflags,nrecvs,tag,*franks,i; 158 MPI_Aint lb,unitbytes; 159 char *tdata,*fdata; 160 MPI_Request *reqs,*sendreqs; 161 MPI_Status *statuses; 162 163 PetscFunctionBegin; 164 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 165 ierr = PetscCalloc1(size,&iflags);CHKERRQ(ierr); 166 for (i=0; i<nto; i++) iflags[toranks[i]] = 1; 167 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&nrecvs);CHKERRQ(ierr); 168 ierr = PetscFree(iflags);CHKERRQ(ierr); 169 170 ierr = PetscCommDuplicate(comm,&comm,&tag);CHKERRQ(ierr); 171 ierr = MPI_Type_get_extent(dtype,&lb,&unitbytes);CHKERRQ(ierr); 172 if (lb != 0) SETERRQ1(comm,PETSC_ERR_SUP,"Datatype with nonzero lower bound %ld\n",(long)lb); 173 ierr = PetscMalloc(nrecvs*count*unitbytes,&fdata);CHKERRQ(ierr); 174 tdata = (char*)todata; 175 ierr = PetscMalloc2(nto+nrecvs,&reqs,nto+nrecvs,&statuses);CHKERRQ(ierr); 176 sendreqs = reqs + nrecvs; 177 for (i=0; i<nrecvs; i++) { 178 ierr = MPI_Irecv((void*)(fdata+count*unitbytes*i),count,dtype,MPI_ANY_SOURCE,tag,comm,reqs+i);CHKERRQ(ierr); 179 } 180 for (i=0; i<nto; i++) { 181 ierr = MPI_Isend((void*)(tdata+count*unitbytes*i),count,dtype,toranks[i],tag,comm,sendreqs+i);CHKERRQ(ierr); 182 } 183 ierr = MPI_Waitall(nto+nrecvs,reqs,statuses);CHKERRQ(ierr); 184 ierr = PetscMalloc1(nrecvs,&franks);CHKERRQ(ierr); 185 for (i=0; i<nrecvs; i++) franks[i] = statuses[i].MPI_SOURCE; 186 ierr = PetscFree2(reqs,statuses);CHKERRQ(ierr); 187 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 188 189 *nfrom = nrecvs; 190 *fromranks = franks; 191 *(void**)fromdata = fdata; 192 PetscFunctionReturn(0); 193 } 194 195 #if defined(PETSC_HAVE_MPI_REDUCE_SCATTER_BLOCK) 196 static PetscErrorCode PetscCommBuildTwoSided_RedScatter(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata,PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata) 197 { 198 PetscErrorCode ierr; 199 PetscMPIInt size,*iflags,nrecvs,tag,*franks,i; 200 MPI_Aint lb,unitbytes; 201 char *tdata,*fdata; 202 MPI_Request *reqs,*sendreqs; 203 MPI_Status *statuses; 204 205 PetscFunctionBegin; 206 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 207 ierr = PetscMalloc1(size,&iflags);CHKERRQ(ierr); 208 ierr = PetscMemzero(iflags,size*sizeof(*iflags));CHKERRQ(ierr); 209 for (i=0; i<nto; i++) iflags[toranks[i]] = 1; 210 ierr = MPI_Reduce_scatter_block(iflags,&nrecvs,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 211 ierr = PetscFree(iflags);CHKERRQ(ierr); 212 213 ierr = PetscCommDuplicate(comm,&comm,&tag);CHKERRQ(ierr); 214 ierr = MPI_Type_get_extent(dtype,&lb,&unitbytes);CHKERRQ(ierr); 215 if (lb != 0) SETERRQ1(comm,PETSC_ERR_SUP,"Datatype with nonzero lower bound %ld\n",(long)lb); 216 ierr = PetscMalloc(nrecvs*count*unitbytes,&fdata);CHKERRQ(ierr); 217 tdata = (char*)todata; 218 ierr = PetscMalloc2(nto+nrecvs,&reqs,nto+nrecvs,&statuses);CHKERRQ(ierr); 219 sendreqs = reqs + nrecvs; 220 for (i=0; i<nrecvs; i++) { 221 ierr = MPI_Irecv((void*)(fdata+count*unitbytes*i),count,dtype,MPI_ANY_SOURCE,tag,comm,reqs+i);CHKERRQ(ierr); 222 } 223 for (i=0; i<nto; i++) { 224 ierr = MPI_Isend((void*)(tdata+count*unitbytes*i),count,dtype,toranks[i],tag,comm,sendreqs+i);CHKERRQ(ierr); 225 } 226 ierr = MPI_Waitall(nto+nrecvs,reqs,statuses);CHKERRQ(ierr); 227 ierr = PetscMalloc1(nrecvs,&franks);CHKERRQ(ierr); 228 for (i=0; i<nrecvs; i++) franks[i] = statuses[i].MPI_SOURCE; 229 ierr = PetscFree2(reqs,statuses);CHKERRQ(ierr); 230 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 231 232 *nfrom = nrecvs; 233 *fromranks = franks; 234 *(void**)fromdata = fdata; 235 PetscFunctionReturn(0); 236 } 237 #endif 238 239 /*@C 240 PetscCommBuildTwoSided - discovers communicating ranks given one-sided information, moving constant-sized data in the process (often message lengths) 241 242 Collective on MPI_Comm 243 244 Input Arguments: 245 + comm - communicator 246 . count - number of entries to send/receive (must match on all ranks) 247 . dtype - datatype to send/receive from each rank (must match on all ranks) 248 . nto - number of ranks to send data to 249 . toranks - ranks to send to (array of length nto) 250 - todata - data to send to each rank (packed) 251 252 Output Arguments: 253 + nfrom - number of ranks receiving messages from 254 . fromranks - ranks receiving messages from (length nfrom; caller should PetscFree()) 255 - fromdata - packed data from each rank, each with count entries of type dtype (length nfrom, caller responsible for PetscFree()) 256 257 Level: developer 258 259 Options Database Keys: 260 . -build_twosided <allreduce|ibarrier|redscatter> - algorithm to set up two-sided communication 261 262 Notes: 263 This memory-scalable interface is an alternative to calling PetscGatherNumberOfMessages() and 264 PetscGatherMessageLengths(), possibly with a subsequent round of communication to send other constant-size data. 265 266 Basic data types as well as contiguous types are supported, but non-contiguous (e.g., strided) types are not. 267 268 References: 269 . 1. - Hoefler, Siebert and Lumsdaine, The MPI_Ibarrier implementation uses the algorithm in 270 Scalable communication protocols for dynamic sparse data exchange, 2010. 271 272 .seealso: PetscGatherNumberOfMessages(), PetscGatherMessageLengths() 273 @*/ 274 PetscErrorCode PetscCommBuildTwoSided(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata,PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata) 275 { 276 PetscErrorCode ierr; 277 PetscBuildTwoSidedType buildtype = PETSC_BUILDTWOSIDED_NOTSET; 278 279 PetscFunctionBegin; 280 ierr = PetscSysInitializePackage();CHKERRQ(ierr); 281 ierr = PetscLogEventBegin(PETSC_BuildTwoSided,0,0,0,0);CHKERRQ(ierr); 282 ierr = PetscCommBuildTwoSidedGetType(comm,&buildtype);CHKERRQ(ierr); 283 switch (buildtype) { 284 case PETSC_BUILDTWOSIDED_IBARRIER: 285 #if defined(PETSC_HAVE_MPI_IBARRIER) || defined(PETSC_HAVE_MPIX_IBARRIER) 286 ierr = PetscCommBuildTwoSided_Ibarrier(comm,count,dtype,nto,toranks,todata,nfrom,fromranks,fromdata);CHKERRQ(ierr); 287 #else 288 SETERRQ(comm,PETSC_ERR_PLIB,"MPI implementation does not provide MPI_Ibarrier (part of MPI-3)"); 289 #endif 290 break; 291 case PETSC_BUILDTWOSIDED_ALLREDUCE: 292 ierr = PetscCommBuildTwoSided_Allreduce(comm,count,dtype,nto,toranks,todata,nfrom,fromranks,fromdata);CHKERRQ(ierr); 293 break; 294 case PETSC_BUILDTWOSIDED_REDSCATTER: 295 #if defined(PETSC_HAVE_MPI_REDUCE_SCATTER_BLOCK) 296 ierr = PetscCommBuildTwoSided_RedScatter(comm,count,dtype,nto,toranks,todata,nfrom,fromranks,fromdata);CHKERRQ(ierr); 297 #else 298 SETERRQ(comm,PETSC_ERR_PLIB,"MPI implementation does not provide MPI_Reduce_scatter_block (part of MPI-2.2)"); 299 #endif 300 break; 301 default: SETERRQ(comm,PETSC_ERR_PLIB,"Unknown method for building two-sided communication"); 302 } 303 ierr = PetscLogEventEnd(PETSC_BuildTwoSided,0,0,0,0);CHKERRQ(ierr); 304 PetscFunctionReturn(0); 305 } 306 307 static PetscErrorCode PetscCommBuildTwoSidedFReq_Reference(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata, 308 PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata,PetscMPIInt ntags,MPI_Request **toreqs,MPI_Request **fromreqs, 309 PetscErrorCode (*send)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,PetscMPIInt,void*,MPI_Request[],void*), 310 PetscErrorCode (*recv)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,void*,MPI_Request[],void*),void *ctx) 311 { 312 PetscErrorCode ierr; 313 PetscMPIInt i,*tag; 314 MPI_Aint lb,unitbytes; 315 MPI_Request *sendreq,*recvreq; 316 317 PetscFunctionBegin; 318 ierr = PetscMalloc1(ntags,&tag);CHKERRQ(ierr); 319 if (ntags > 0) { 320 ierr = PetscCommDuplicate(comm,&comm,&tag[0]);CHKERRQ(ierr); 321 } 322 for (i=1; i<ntags; i++) { 323 ierr = PetscCommGetNewTag(comm,&tag[i]);CHKERRQ(ierr); 324 } 325 326 /* Perform complete initial rendezvous */ 327 ierr = PetscCommBuildTwoSided(comm,count,dtype,nto,toranks,todata,nfrom,fromranks,fromdata);CHKERRQ(ierr); 328 329 ierr = PetscMalloc1(nto*ntags,&sendreq);CHKERRQ(ierr); 330 ierr = PetscMalloc1(*nfrom*ntags,&recvreq);CHKERRQ(ierr); 331 332 ierr = MPI_Type_get_extent(dtype,&lb,&unitbytes);CHKERRQ(ierr); 333 if (lb != 0) SETERRQ1(comm,PETSC_ERR_SUP,"Datatype with nonzero lower bound %ld\n",(long)lb); 334 for (i=0; i<nto; i++) { 335 PetscMPIInt k; 336 for (k=0; k<ntags; k++) sendreq[i*ntags+k] = MPI_REQUEST_NULL; 337 ierr = (*send)(comm,tag,i,toranks[i],((char*)todata)+count*unitbytes*i,sendreq+i*ntags,ctx);CHKERRQ(ierr); 338 } 339 for (i=0; i<*nfrom; i++) { 340 void *header = (*(char**)fromdata) + count*unitbytes*i; 341 PetscMPIInt k; 342 for (k=0; k<ntags; k++) recvreq[i*ntags+k] = MPI_REQUEST_NULL; 343 ierr = (*recv)(comm,tag,(*fromranks)[i],header,recvreq+i*ntags,ctx);CHKERRQ(ierr); 344 } 345 ierr = PetscFree(tag);CHKERRQ(ierr); 346 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 347 *toreqs = sendreq; 348 *fromreqs = recvreq; 349 PetscFunctionReturn(0); 350 } 351 352 #if defined(PETSC_HAVE_MPI_IBARRIER) || defined(PETSC_HAVE_MPIX_IBARRIER) 353 354 static PetscErrorCode PetscCommBuildTwoSidedFReq_Ibarrier(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata, 355 PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata,PetscMPIInt ntags,MPI_Request **toreqs,MPI_Request **fromreqs, 356 PetscErrorCode (*send)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,PetscMPIInt,void*,MPI_Request[],void*), 357 PetscErrorCode (*recv)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,void*,MPI_Request[],void*),void *ctx) 358 { 359 PetscErrorCode ierr; 360 PetscMPIInt nrecvs,tag,*tags,done,i; 361 MPI_Aint lb,unitbytes; 362 char *tdata; 363 MPI_Request *sendreqs,*usendreqs,*req,barrier; 364 PetscSegBuffer segrank,segdata,segreq; 365 PetscBool barrier_started; 366 367 PetscFunctionBegin; 368 ierr = PetscCommDuplicate(comm,&comm,&tag);CHKERRQ(ierr); 369 ierr = PetscMalloc1(ntags,&tags);CHKERRQ(ierr); 370 for (i=0; i<ntags; i++) { 371 ierr = PetscCommGetNewTag(comm,&tags[i]);CHKERRQ(ierr); 372 } 373 ierr = MPI_Type_get_extent(dtype,&lb,&unitbytes);CHKERRQ(ierr); 374 if (lb != 0) SETERRQ1(comm,PETSC_ERR_SUP,"Datatype with nonzero lower bound %ld\n",(long)lb); 375 tdata = (char*)todata; 376 ierr = PetscMalloc1(nto,&sendreqs);CHKERRQ(ierr); 377 ierr = PetscMalloc1(nto*ntags,&usendreqs);CHKERRQ(ierr); 378 /* Post synchronous sends */ 379 for (i=0; i<nto; i++) { 380 ierr = MPI_Issend((void*)(tdata+count*unitbytes*i),count,dtype,toranks[i],tag,comm,sendreqs+i);CHKERRQ(ierr); 381 } 382 /* Post actual payloads. These are typically larger messages. Hopefully sending these later does not slow down the 383 * synchronous messages above. */ 384 for (i=0; i<nto; i++) { 385 PetscMPIInt k; 386 for (k=0; k<ntags; k++) usendreqs[i*ntags+k] = MPI_REQUEST_NULL; 387 ierr = (*send)(comm,tags,i,toranks[i],tdata+count*unitbytes*i,usendreqs+i*ntags,ctx);CHKERRQ(ierr); 388 } 389 390 ierr = PetscSegBufferCreate(sizeof(PetscMPIInt),4,&segrank);CHKERRQ(ierr); 391 ierr = PetscSegBufferCreate(unitbytes,4*count,&segdata);CHKERRQ(ierr); 392 ierr = PetscSegBufferCreate(sizeof(MPI_Request),4,&segreq);CHKERRQ(ierr); 393 394 nrecvs = 0; 395 barrier = MPI_REQUEST_NULL; 396 /* MPICH-3.2 sometimes does not create a request in some "optimized" cases. This is arguably a standard violation, 397 * but we need to work around it. */ 398 barrier_started = PETSC_FALSE; 399 for (done=0; !done; ) { 400 PetscMPIInt flag; 401 MPI_Status status; 402 ierr = MPI_Iprobe(MPI_ANY_SOURCE,tag,comm,&flag,&status);CHKERRQ(ierr); 403 if (flag) { /* incoming message */ 404 PetscMPIInt *recvrank,k; 405 void *buf; 406 ierr = PetscSegBufferGet(segrank,1,&recvrank);CHKERRQ(ierr); 407 ierr = PetscSegBufferGet(segdata,count,&buf);CHKERRQ(ierr); 408 *recvrank = status.MPI_SOURCE; 409 ierr = MPI_Recv(buf,count,dtype,status.MPI_SOURCE,tag,comm,MPI_STATUS_IGNORE);CHKERRQ(ierr); 410 ierr = PetscSegBufferGet(segreq,ntags,&req);CHKERRQ(ierr); 411 for (k=0; k<ntags; k++) req[k] = MPI_REQUEST_NULL; 412 ierr = (*recv)(comm,tags,status.MPI_SOURCE,buf,req,ctx);CHKERRQ(ierr); 413 nrecvs++; 414 } 415 if (!barrier_started) { 416 PetscMPIInt sent,nsends; 417 ierr = PetscMPIIntCast(nto,&nsends);CHKERRQ(ierr); 418 ierr = MPI_Testall(nsends,sendreqs,&sent,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 419 if (sent) { 420 #if defined(PETSC_HAVE_MPI_IBARRIER) 421 ierr = MPI_Ibarrier(comm,&barrier);CHKERRQ(ierr); 422 #elif defined(PETSC_HAVE_MPIX_IBARRIER) 423 ierr = MPIX_Ibarrier(comm,&barrier);CHKERRQ(ierr); 424 #endif 425 barrier_started = PETSC_TRUE; 426 } 427 } else { 428 ierr = MPI_Test(&barrier,&done,MPI_STATUS_IGNORE);CHKERRQ(ierr); 429 } 430 } 431 *nfrom = nrecvs; 432 ierr = PetscSegBufferExtractAlloc(segrank,fromranks);CHKERRQ(ierr); 433 ierr = PetscSegBufferDestroy(&segrank);CHKERRQ(ierr); 434 ierr = PetscSegBufferExtractAlloc(segdata,fromdata);CHKERRQ(ierr); 435 ierr = PetscSegBufferDestroy(&segdata);CHKERRQ(ierr); 436 *toreqs = usendreqs; 437 ierr = PetscSegBufferExtractAlloc(segreq,fromreqs);CHKERRQ(ierr); 438 ierr = PetscSegBufferDestroy(&segreq);CHKERRQ(ierr); 439 ierr = PetscFree(sendreqs);CHKERRQ(ierr); 440 ierr = PetscFree(tags);CHKERRQ(ierr); 441 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 442 PetscFunctionReturn(0); 443 } 444 #endif 445 446 /*@C 447 PetscCommBuildTwoSidedF - discovers communicating ranks given one-sided information, calling user-defined functions during rendezvous 448 449 Collective on MPI_Comm 450 451 Input Arguments: 452 + comm - communicator 453 . count - number of entries to send/receive in initial rendezvous (must match on all ranks) 454 . dtype - datatype to send/receive from each rank (must match on all ranks) 455 . nto - number of ranks to send data to 456 . toranks - ranks to send to (array of length nto) 457 . todata - data to send to each rank (packed) 458 . ntags - number of tags needed by send/recv callbacks 459 . send - callback invoked on sending process when ready to send primary payload 460 . recv - callback invoked on receiving process after delivery of rendezvous message 461 - ctx - context for callbacks 462 463 Output Arguments: 464 + nfrom - number of ranks receiving messages from 465 . fromranks - ranks receiving messages from (length nfrom; caller should PetscFree()) 466 - fromdata - packed data from each rank, each with count entries of type dtype (length nfrom, caller responsible for PetscFree()) 467 468 Level: developer 469 470 Notes: 471 This memory-scalable interface is an alternative to calling PetscGatherNumberOfMessages() and 472 PetscGatherMessageLengths(), possibly with a subsequent round of communication to send other data. 473 474 Basic data types as well as contiguous types are supported, but non-contiguous (e.g., strided) types are not. 475 476 References: 477 . 1. - Hoefler, Siebert and Lumsdaine, The MPI_Ibarrier implementation uses the algorithm in 478 Scalable communication protocols for dynamic sparse data exchange, 2010. 479 480 .seealso: PetscCommBuildTwoSided(), PetscCommBuildTwoSidedFReq(), PetscGatherNumberOfMessages(), PetscGatherMessageLengths() 481 @*/ 482 PetscErrorCode PetscCommBuildTwoSidedF(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata,PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata,PetscMPIInt ntags, 483 PetscErrorCode (*send)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,PetscMPIInt,void*,MPI_Request[],void*), 484 PetscErrorCode (*recv)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,void*,MPI_Request[],void*),void *ctx) 485 { 486 PetscErrorCode ierr; 487 MPI_Request *toreqs,*fromreqs; 488 489 PetscFunctionBegin; 490 ierr = PetscCommBuildTwoSidedFReq(comm,count,dtype,nto,toranks,todata,nfrom,fromranks,fromdata,ntags,&toreqs,&fromreqs,send,recv,ctx);CHKERRQ(ierr); 491 ierr = MPI_Waitall(nto*ntags,toreqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 492 ierr = MPI_Waitall(*nfrom*ntags,fromreqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 493 ierr = PetscFree(toreqs);CHKERRQ(ierr); 494 ierr = PetscFree(fromreqs);CHKERRQ(ierr); 495 PetscFunctionReturn(0); 496 } 497 498 /*@C 499 PetscCommBuildTwoSidedFReq - discovers communicating ranks given one-sided information, calling user-defined functions during rendezvous, returns requests 500 501 Collective on MPI_Comm 502 503 Input Arguments: 504 + comm - communicator 505 . count - number of entries to send/receive in initial rendezvous (must match on all ranks) 506 . dtype - datatype to send/receive from each rank (must match on all ranks) 507 . nto - number of ranks to send data to 508 . toranks - ranks to send to (array of length nto) 509 . todata - data to send to each rank (packed) 510 . ntags - number of tags needed by send/recv callbacks 511 . send - callback invoked on sending process when ready to send primary payload 512 . recv - callback invoked on receiving process after delivery of rendezvous message 513 - ctx - context for callbacks 514 515 Output Arguments: 516 + nfrom - number of ranks receiving messages from 517 . fromranks - ranks receiving messages from (length nfrom; caller should PetscFree()) 518 . fromdata - packed data from each rank, each with count entries of type dtype (length nfrom, caller responsible for PetscFree()) 519 . toreqs - array of nto*ntags sender requests (caller must wait on these, then PetscFree()) 520 - fromreqs - array of nfrom*ntags receiver requests (caller must wait on these, then PetscFree()) 521 522 Level: developer 523 524 Notes: 525 This memory-scalable interface is an alternative to calling PetscGatherNumberOfMessages() and 526 PetscGatherMessageLengths(), possibly with a subsequent round of communication to send other data. 527 528 Basic data types as well as contiguous types are supported, but non-contiguous (e.g., strided) types are not. 529 530 References: 531 . 1. - Hoefler, Siebert and Lumsdaine, The MPI_Ibarrier implementation uses the algorithm in 532 Scalable communication protocols for dynamic sparse data exchange, 2010. 533 534 .seealso: PetscCommBuildTwoSided(), PetscCommBuildTwoSidedF(), PetscGatherNumberOfMessages(), PetscGatherMessageLengths() 535 @*/ 536 PetscErrorCode PetscCommBuildTwoSidedFReq(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata, 537 PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata,PetscMPIInt ntags,MPI_Request **toreqs,MPI_Request **fromreqs, 538 PetscErrorCode (*send)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,PetscMPIInt,void*,MPI_Request[],void*), 539 PetscErrorCode (*recv)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,void*,MPI_Request[],void*),void *ctx) 540 { 541 PetscErrorCode ierr,(*f)(MPI_Comm,PetscMPIInt,MPI_Datatype,PetscMPIInt,const PetscMPIInt[],const void*, 542 PetscMPIInt*,PetscMPIInt**,void*,PetscMPIInt,MPI_Request**,MPI_Request**, 543 PetscErrorCode (*send)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,PetscMPIInt,void*,MPI_Request[],void*), 544 PetscErrorCode (*recv)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,void*,MPI_Request[],void*),void *ctx); 545 PetscBuildTwoSidedType buildtype = PETSC_BUILDTWOSIDED_NOTSET; 546 PetscMPIInt i,size; 547 548 PetscFunctionBegin; 549 ierr = PetscSysInitializePackage();CHKERRQ(ierr); 550 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 551 for (i=0; i<nto; i++) { 552 if (toranks[i] < 0 || size <= toranks[i]) SETERRQ3(comm,PETSC_ERR_ARG_OUTOFRANGE,"toranks[%d] %d not in comm size %d",i,toranks[i],size); 553 } 554 ierr = PetscLogEventBegin(PETSC_BuildTwoSidedF,0,0,0,0);CHKERRQ(ierr); 555 ierr = PetscCommBuildTwoSidedGetType(comm,&buildtype);CHKERRQ(ierr); 556 switch (buildtype) { 557 case PETSC_BUILDTWOSIDED_IBARRIER: 558 #if defined(PETSC_HAVE_MPI_IBARRIER) || defined(PETSC_HAVE_MPIX_IBARRIER) 559 f = PetscCommBuildTwoSidedFReq_Ibarrier; 560 #else 561 SETERRQ(comm,PETSC_ERR_PLIB,"MPI implementation does not provide MPI_Ibarrier (part of MPI-3)"); 562 #endif 563 break; 564 case PETSC_BUILDTWOSIDED_ALLREDUCE: 565 case PETSC_BUILDTWOSIDED_REDSCATTER: 566 f = PetscCommBuildTwoSidedFReq_Reference; 567 break; 568 default: SETERRQ(comm,PETSC_ERR_PLIB,"Unknown method for building two-sided communication"); 569 } 570 ierr = (*f)(comm,count,dtype,nto,toranks,todata,nfrom,fromranks,fromdata,ntags,toreqs,fromreqs,send,recv,ctx);CHKERRQ(ierr); 571 ierr = PetscLogEventEnd(PETSC_BuildTwoSidedF,0,0,0,0);CHKERRQ(ierr); 572 PetscFunctionReturn(0); 573 } 574