1 #include <petscsys.h> /*I "petscsys.h" I*/ 2 3 PetscLogEvent PETSC_BuildTwoSided; 4 5 const char *const PetscBuildTwoSidedTypes[] = { 6 "ALLREDUCE", 7 "IBARRIER", 8 "REDSCATTER", 9 "PetscBuildTwoSidedType", 10 "PETSC_BUILDTWOSIDED_", 11 0 12 }; 13 14 static PetscBuildTwoSidedType _twosided_type = PETSC_BUILDTWOSIDED_NOTSET; 15 16 #undef __FUNCT__ 17 #define __FUNCT__ "PetscCommBuildTwoSidedSetType" 18 /*@ 19 PetscCommBuildTwoSidedSetType - set algorithm to use when building two-sided communication 20 21 Logically Collective 22 23 Input Arguments: 24 + comm - PETSC_COMM_WORLD 25 - twosided - algorithm to use in subsequent calls to PetscCommBuildTwoSided() 26 27 Level: developer 28 29 Note: 30 This option is currently global, but could be made per-communicator. 31 32 .seealso: PetscCommBuildTwoSided(), PetscCommBuildTwoSidedGetType() 33 @*/ 34 PetscErrorCode PetscCommBuildTwoSidedSetType(MPI_Comm comm,PetscBuildTwoSidedType twosided) 35 { 36 PetscFunctionBegin; 37 #if defined(PETSC_USE_DEBUG) 38 { /* We don't have a PetscObject so can't use PetscValidLogicalCollectiveEnum */ 39 PetscMPIInt ierr; 40 PetscMPIInt b1[2],b2[2]; 41 b1[0] = -(PetscMPIInt)twosided; 42 b1[1] = (PetscMPIInt)twosided; 43 ierr = MPI_Allreduce(b1,b2,2,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr); 44 if (-b2[0] != b2[1]) SETERRQ(comm,PETSC_ERR_ARG_WRONG,"Enum value must be same on all processes"); 45 } 46 #endif 47 _twosided_type = twosided; 48 PetscFunctionReturn(0); 49 } 50 51 #undef __FUNCT__ 52 #define __FUNCT__ "PetscCommBuildTwoSidedGetType" 53 /*@ 54 PetscCommBuildTwoSidedGetType - set algorithm to use when building two-sided communication 55 56 Logically Collective 57 58 Output Arguments: 59 + comm - communicator on which to query algorithm 60 - twosided - algorithm to use for PetscCommBuildTwoSided() 61 62 Level: developer 63 64 .seealso: PetscCommBuildTwoSided(), PetscCommBuildTwoSidedSetType() 65 @*/ 66 PetscErrorCode PetscCommBuildTwoSidedGetType(MPI_Comm comm,PetscBuildTwoSidedType *twosided) 67 { 68 PetscErrorCode ierr; 69 70 PetscFunctionBegin; 71 *twosided = PETSC_BUILDTWOSIDED_NOTSET; 72 if (_twosided_type == PETSC_BUILDTWOSIDED_NOTSET) { 73 #if defined(PETSC_HAVE_MPI_IBARRIER) 74 # if defined(PETSC_HAVE_MPICH_CH3_SOCK) && !defined(PETSC_HAVE_MPICH_CH3_SOCK_FIXED_NBC_PROGRESS) 75 /* Deadlock in Ibarrier: http://trac.mpich.org/projects/mpich/ticket/1785 */ 76 _twosided_type = PETSC_BUILDTWOSIDED_ALLREDUCE; 77 # else 78 _twosided_type = PETSC_BUILDTWOSIDED_IBARRIER; 79 # endif 80 #else 81 _twosided_type = PETSC_BUILDTWOSIDED_ALLREDUCE; 82 #endif 83 ierr = PetscOptionsGetEnum(NULL,"-build_twosided",PetscBuildTwoSidedTypes,(PetscEnum*)&_twosided_type,NULL);CHKERRQ(ierr); 84 } 85 *twosided = _twosided_type; 86 PetscFunctionReturn(0); 87 } 88 89 #if defined(PETSC_HAVE_MPI_IBARRIER) 90 91 #undef __FUNCT__ 92 #define __FUNCT__ "PetscCommBuildTwoSided_Ibarrier" 93 static PetscErrorCode PetscCommBuildTwoSided_Ibarrier(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata,PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata) 94 { 95 PetscErrorCode ierr; 96 PetscMPIInt nrecvs,tag,done,i; 97 MPI_Aint lb,unitbytes; 98 char *tdata; 99 MPI_Request *sendreqs,barrier; 100 PetscSegBuffer segrank,segdata; 101 102 PetscFunctionBegin; 103 ierr = PetscCommDuplicate(comm,&comm,&tag);CHKERRQ(ierr); 104 ierr = MPI_Type_get_extent(dtype,&lb,&unitbytes);CHKERRQ(ierr); 105 if (lb != 0) SETERRQ1(comm,PETSC_ERR_SUP,"Datatype with nonzero lower bound %ld\n",(long)lb); 106 tdata = (char*)todata; 107 ierr = PetscMalloc1(nto,&sendreqs);CHKERRQ(ierr); 108 for (i=0; i<nto; i++) { 109 ierr = MPI_Issend((void*)(tdata+count*unitbytes*i),count,dtype,toranks[i],tag,comm,sendreqs+i);CHKERRQ(ierr); 110 } 111 ierr = PetscSegBufferCreate(sizeof(PetscMPIInt),4,&segrank);CHKERRQ(ierr); 112 ierr = PetscSegBufferCreate(unitbytes,4*count,&segdata);CHKERRQ(ierr); 113 114 nrecvs = 0; 115 barrier = MPI_REQUEST_NULL; 116 for (done=0; !done; ) { 117 PetscMPIInt flag; 118 MPI_Status status; 119 ierr = MPI_Iprobe(MPI_ANY_SOURCE,tag,comm,&flag,&status);CHKERRQ(ierr); 120 if (flag) { /* incoming message */ 121 PetscMPIInt *recvrank; 122 void *buf; 123 ierr = PetscSegBufferGet(segrank,1,&recvrank);CHKERRQ(ierr); 124 ierr = PetscSegBufferGet(segdata,count,&buf);CHKERRQ(ierr); 125 *recvrank = status.MPI_SOURCE; 126 ierr = MPI_Recv(buf,count,dtype,status.MPI_SOURCE,tag,comm,MPI_STATUS_IGNORE);CHKERRQ(ierr); 127 nrecvs++; 128 } 129 if (barrier == MPI_REQUEST_NULL) { 130 PetscMPIInt sent,nsends; 131 ierr = PetscMPIIntCast(nto,&nsends);CHKERRQ(ierr); 132 ierr = MPI_Testall(nsends,sendreqs,&sent,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 133 if (sent) { 134 ierr = MPI_Ibarrier(comm,&barrier);CHKERRQ(ierr); 135 ierr = PetscFree(sendreqs);CHKERRQ(ierr); 136 } 137 } else { 138 ierr = MPI_Test(&barrier,&done,MPI_STATUS_IGNORE);CHKERRQ(ierr); 139 } 140 } 141 *nfrom = nrecvs; 142 ierr = PetscSegBufferExtractAlloc(segrank,fromranks);CHKERRQ(ierr); 143 ierr = PetscSegBufferDestroy(&segrank);CHKERRQ(ierr); 144 ierr = PetscSegBufferExtractAlloc(segdata,fromdata);CHKERRQ(ierr); 145 ierr = PetscSegBufferDestroy(&segdata);CHKERRQ(ierr); 146 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 147 PetscFunctionReturn(0); 148 } 149 #endif 150 151 #undef __FUNCT__ 152 #define __FUNCT__ "PetscCommBuildTwoSided_Allreduce" 153 static PetscErrorCode PetscCommBuildTwoSided_Allreduce(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata,PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata) 154 { 155 PetscErrorCode ierr; 156 PetscMPIInt size,*iflags,nrecvs,tag,*franks,i; 157 MPI_Aint lb,unitbytes; 158 char *tdata,*fdata; 159 MPI_Request *reqs,*sendreqs; 160 MPI_Status *statuses; 161 162 PetscFunctionBegin; 163 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 164 ierr = PetscCalloc1(size,&iflags);CHKERRQ(ierr); 165 for (i=0; i<nto; i++) iflags[toranks[i]] = 1; 166 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&nrecvs);CHKERRQ(ierr); 167 ierr = PetscFree(iflags);CHKERRQ(ierr); 168 169 ierr = PetscCommDuplicate(comm,&comm,&tag);CHKERRQ(ierr); 170 ierr = MPI_Type_get_extent(dtype,&lb,&unitbytes);CHKERRQ(ierr); 171 if (lb != 0) SETERRQ1(comm,PETSC_ERR_SUP,"Datatype with nonzero lower bound %ld\n",(long)lb); 172 ierr = PetscMalloc(nrecvs*count*unitbytes,&fdata);CHKERRQ(ierr); 173 tdata = (char*)todata; 174 ierr = PetscMalloc2(nto+nrecvs,&reqs,nto+nrecvs,&statuses);CHKERRQ(ierr); 175 sendreqs = reqs + nrecvs; 176 for (i=0; i<nrecvs; i++) { 177 ierr = MPI_Irecv((void*)(fdata+count*unitbytes*i),count,dtype,MPI_ANY_SOURCE,tag,comm,reqs+i);CHKERRQ(ierr); 178 } 179 for (i=0; i<nto; i++) { 180 ierr = MPI_Isend((void*)(tdata+count*unitbytes*i),count,dtype,toranks[i],tag,comm,sendreqs+i);CHKERRQ(ierr); 181 } 182 ierr = MPI_Waitall(nto+nrecvs,reqs,statuses);CHKERRQ(ierr); 183 ierr = PetscMalloc1(nrecvs,&franks);CHKERRQ(ierr); 184 for (i=0; i<nrecvs; i++) franks[i] = statuses[i].MPI_SOURCE; 185 ierr = PetscFree2(reqs,statuses);CHKERRQ(ierr); 186 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 187 188 *nfrom = nrecvs; 189 *fromranks = franks; 190 *(void**)fromdata = fdata; 191 PetscFunctionReturn(0); 192 } 193 194 #if defined(PETSC_HAVE_MPI_REDUCE_SCATTER_BLOCK) 195 #undef __FUNCT__ 196 #define __FUNCT__ "PetscCommBuildTwoSided_RedScatter" 197 static PetscErrorCode PetscCommBuildTwoSided_RedScatter(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata,PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata) 198 { 199 PetscErrorCode ierr; 200 PetscMPIInt size,*iflags,nrecvs,tag,*franks,i; 201 MPI_Aint lb,unitbytes; 202 char *tdata,*fdata; 203 MPI_Request *reqs,*sendreqs; 204 MPI_Status *statuses; 205 206 PetscFunctionBegin; 207 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 208 ierr = PetscMalloc1(size,&iflags);CHKERRQ(ierr); 209 ierr = PetscMemzero(iflags,size*sizeof(*iflags));CHKERRQ(ierr); 210 for (i=0; i<nto; i++) iflags[toranks[i]] = 1; 211 ierr = MPI_Reduce_scatter_block(iflags,&nrecvs,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 212 ierr = PetscFree(iflags);CHKERRQ(ierr); 213 214 ierr = PetscCommDuplicate(comm,&comm,&tag);CHKERRQ(ierr); 215 ierr = MPI_Type_get_extent(dtype,&lb,&unitbytes);CHKERRQ(ierr); 216 if (lb != 0) SETERRQ1(comm,PETSC_ERR_SUP,"Datatype with nonzero lower bound %ld\n",(long)lb); 217 ierr = PetscMalloc(nrecvs*count*unitbytes,&fdata);CHKERRQ(ierr); 218 tdata = (char*)todata; 219 ierr = PetscMalloc2(nto+nrecvs,&reqs,nto+nrecvs,&statuses);CHKERRQ(ierr); 220 sendreqs = reqs + nrecvs; 221 for (i=0; i<nrecvs; i++) { 222 ierr = MPI_Irecv((void*)(fdata+count*unitbytes*i),count,dtype,MPI_ANY_SOURCE,tag,comm,reqs+i);CHKERRQ(ierr); 223 } 224 for (i=0; i<nto; i++) { 225 ierr = MPI_Isend((void*)(tdata+count*unitbytes*i),count,dtype,toranks[i],tag,comm,sendreqs+i);CHKERRQ(ierr); 226 } 227 ierr = MPI_Waitall(nto+nrecvs,reqs,statuses);CHKERRQ(ierr); 228 ierr = PetscMalloc1(nrecvs,&franks);CHKERRQ(ierr); 229 for (i=0; i<nrecvs; i++) franks[i] = statuses[i].MPI_SOURCE; 230 ierr = PetscFree2(reqs,statuses);CHKERRQ(ierr); 231 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 232 233 *nfrom = nrecvs; 234 *fromranks = franks; 235 *(void**)fromdata = fdata; 236 PetscFunctionReturn(0); 237 } 238 #endif 239 240 #undef __FUNCT__ 241 #define __FUNCT__ "PetscCommBuildTwoSided" 242 /*@C 243 PetscCommBuildTwoSided - discovers communicating ranks given one-sided information, moving constant-sized data in the process (often message lengths) 244 245 Collective on MPI_Comm 246 247 Input Arguments: 248 + comm - communicator 249 . count - number of entries to send/receive (must match on all ranks) 250 . dtype - datatype to send/receive from each rank (must match on all ranks) 251 . nto - number of ranks to send data to 252 . toranks - ranks to send to (array of length nto) 253 - todata - data to send to each rank (packed) 254 255 Output Arguments: 256 + nfrom - number of ranks receiving messages from 257 . fromranks - ranks receiving messages from (length nfrom; caller should PetscFree()) 258 - fromdata - packed data from each rank, each with count entries of type dtype (length nfrom, caller responsible for PetscFree()) 259 260 Level: developer 261 262 Options Database Keys: 263 . -build_twosided <allreduce|ibarrier|redscatter> - algorithm to set up two-sided communication 264 265 Notes: 266 This memory-scalable interface is an alternative to calling PetscGatherNumberOfMessages() and 267 PetscGatherMessageLengths(), possibly with a subsequent round of communication to send other constant-size data. 268 269 Basic data types as well as contiguous types are supported, but non-contiguous (e.g., strided) types are not. 270 271 References: 272 The MPI_Ibarrier implementation uses the algorithm in 273 Hoefler, Siebert and Lumsdaine, Scalable communication protocols for dynamic sparse data exchange, 2010. 274 275 .seealso: PetscGatherNumberOfMessages(), PetscGatherMessageLengths() 276 @*/ 277 PetscErrorCode PetscCommBuildTwoSided(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata,PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata) 278 { 279 PetscErrorCode ierr; 280 PetscBuildTwoSidedType buildtype = PETSC_BUILDTWOSIDED_NOTSET; 281 282 PetscFunctionBegin; 283 ierr = PetscSysInitializePackage();CHKERRQ(ierr); 284 ierr = PetscLogEventBegin(PETSC_BuildTwoSided,0,0,0,0);CHKERRQ(ierr); 285 ierr = PetscCommBuildTwoSidedGetType(comm,&buildtype);CHKERRQ(ierr); 286 switch (buildtype) { 287 case PETSC_BUILDTWOSIDED_IBARRIER: 288 #if defined(PETSC_HAVE_MPI_IBARRIER) 289 ierr = PetscCommBuildTwoSided_Ibarrier(comm,count,dtype,nto,toranks,todata,nfrom,fromranks,fromdata);CHKERRQ(ierr); 290 #else 291 SETERRQ(comm,PETSC_ERR_PLIB,"MPI implementation does not provide MPI_Ibarrier (part of MPI-3)"); 292 #endif 293 break; 294 case PETSC_BUILDTWOSIDED_ALLREDUCE: 295 ierr = PetscCommBuildTwoSided_Allreduce(comm,count,dtype,nto,toranks,todata,nfrom,fromranks,fromdata);CHKERRQ(ierr); 296 break; 297 case PETSC_BUILDTWOSIDED_REDSCATTER: 298 #if defined(PETSC_HAVE_MPI_REDUCE_SCATTER_BLOCK) 299 ierr = PetscCommBuildTwoSided_RedScatter(comm,count,dtype,nto,toranks,todata,nfrom,fromranks,fromdata);CHKERRQ(ierr); 300 #else 301 SETERRQ(comm,PETSC_ERR_PLIB,"MPI implementation does not provide MPI_Reduce_scatter_block (part of MPI-2.2)"); 302 #endif 303 break; 304 default: SETERRQ(comm,PETSC_ERR_PLIB,"Unknown method for building two-sided communication"); 305 } 306 ierr = PetscLogEventEnd(PETSC_BuildTwoSided,0,0,0,0);CHKERRQ(ierr); 307 PetscFunctionReturn(0); 308 } 309 310 #undef __FUNCT__ 311 #define __FUNCT__ "PetscCommBuildTwoSidedFReq_Reference" 312 static PetscErrorCode PetscCommBuildTwoSidedFReq_Reference(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata, 313 PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata,PetscMPIInt ntags,MPI_Request **toreqs,MPI_Request **fromreqs, 314 PetscErrorCode (*send)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,PetscMPIInt,void*,MPI_Request[],void*), 315 PetscErrorCode (*recv)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,void*,MPI_Request[],void*),void *ctx) 316 { 317 PetscErrorCode ierr; 318 PetscMPIInt i,*tag; 319 MPI_Aint lb,unitbytes; 320 MPI_Request *sendreq,*recvreq; 321 322 PetscFunctionBegin; 323 ierr = PetscMalloc1(ntags,&tag);CHKERRQ(ierr); 324 if (ntags > 0) { 325 ierr = PetscCommDuplicate(comm,&comm,&tag[0]);CHKERRQ(ierr); 326 } 327 for (i=1; i<ntags; i++) { 328 ierr = PetscCommGetNewTag(comm,&tag[i]);CHKERRQ(ierr); 329 } 330 331 /* Perform complete initial rendezvous */ 332 ierr = PetscCommBuildTwoSided(comm,count,dtype,nto,toranks,todata,nfrom,fromranks,fromdata);CHKERRQ(ierr); 333 334 ierr = PetscMalloc1(nto*ntags,&sendreq);CHKERRQ(ierr); 335 ierr = PetscMalloc1(*nfrom*ntags,&recvreq);CHKERRQ(ierr); 336 337 ierr = MPI_Type_get_extent(dtype,&lb,&unitbytes);CHKERRQ(ierr); 338 if (lb != 0) SETERRQ1(comm,PETSC_ERR_SUP,"Datatype with nonzero lower bound %ld\n",(long)lb); 339 for (i=0; i<nto; i++) { 340 PetscMPIInt k; 341 for (k=0; k<ntags; k++) sendreq[i*ntags+k] = MPI_REQUEST_NULL; 342 ierr = (*send)(comm,tag,i,toranks[i],((char*)todata)+count*unitbytes*i,sendreq+i*ntags,ctx);CHKERRQ(ierr); 343 } 344 for (i=0; i<*nfrom; i++) { 345 void *header = (*(char**)fromdata) + count*unitbytes*i; 346 PetscMPIInt k; 347 for (k=0; k<ntags; k++) recvreq[i*ntags+k] = MPI_REQUEST_NULL; 348 ierr = (*recv)(comm,tag,(*fromranks)[i],header,recvreq+i*ntags,ctx);CHKERRQ(ierr); 349 } 350 ierr = PetscFree(tag);CHKERRQ(ierr); 351 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 352 *toreqs = sendreq; 353 *fromreqs = recvreq; 354 PetscFunctionReturn(0); 355 } 356 357 #if defined(PETSC_HAVE_MPI_IBARRIER) 358 359 #undef __FUNCT__ 360 #define __FUNCT__ "PetscCommBuildTwoSidedFReq_Ibarrier" 361 static PetscErrorCode PetscCommBuildTwoSidedFReq_Ibarrier(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata, 362 PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata,PetscMPIInt ntags,MPI_Request **toreqs,MPI_Request **fromreqs, 363 PetscErrorCode (*send)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,PetscMPIInt,void*,MPI_Request[],void*), 364 PetscErrorCode (*recv)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,void*,MPI_Request[],void*),void *ctx) 365 { 366 PetscErrorCode ierr; 367 PetscMPIInt nrecvs,tag,*tags,done,i; 368 MPI_Aint lb,unitbytes; 369 char *tdata; 370 MPI_Request *sendreqs,*usendreqs,*req,barrier; 371 PetscSegBuffer segrank,segdata,segreq; 372 373 PetscFunctionBegin; 374 ierr = PetscCommDuplicate(comm,&comm,&tag);CHKERRQ(ierr); 375 ierr = PetscMalloc1(ntags,&tags);CHKERRQ(ierr); 376 for (i=0; i<ntags; i++) { 377 ierr = PetscCommGetNewTag(comm,&tags[i]);CHKERRQ(ierr); 378 } 379 ierr = MPI_Type_get_extent(dtype,&lb,&unitbytes);CHKERRQ(ierr); 380 if (lb != 0) SETERRQ1(comm,PETSC_ERR_SUP,"Datatype with nonzero lower bound %ld\n",(long)lb); 381 tdata = (char*)todata; 382 ierr = PetscMalloc1(nto,&sendreqs);CHKERRQ(ierr); 383 ierr = PetscMalloc1(nto*ntags,&usendreqs);CHKERRQ(ierr); 384 /* Post synchronous sends */ 385 for (i=0; i<nto; i++) { 386 ierr = MPI_Issend((void*)(tdata+count*unitbytes*i),count,dtype,toranks[i],tag,comm,sendreqs+i);CHKERRQ(ierr); 387 } 388 /* Post actual payloads. These are typically larger messages. Hopefully sending these later does not slow down the 389 * synchronous messages above. */ 390 for (i=0; i<nto; i++) { 391 PetscMPIInt k; 392 for (k=0; k<ntags; k++) usendreqs[i*ntags+k] = MPI_REQUEST_NULL; 393 ierr = (*send)(comm,tags,i,toranks[i],tdata+count*unitbytes*i,usendreqs+i*ntags,ctx);CHKERRQ(ierr); 394 } 395 396 ierr = PetscSegBufferCreate(sizeof(PetscMPIInt),4,&segrank);CHKERRQ(ierr); 397 ierr = PetscSegBufferCreate(unitbytes,4*count,&segdata);CHKERRQ(ierr); 398 ierr = PetscSegBufferCreate(sizeof(MPI_Request),4,&segreq);CHKERRQ(ierr); 399 400 nrecvs = 0; 401 barrier = MPI_REQUEST_NULL; 402 for (done=0; !done; ) { 403 PetscMPIInt flag; 404 MPI_Status status; 405 ierr = MPI_Iprobe(MPI_ANY_SOURCE,tag,comm,&flag,&status);CHKERRQ(ierr); 406 if (flag) { /* incoming message */ 407 PetscMPIInt *recvrank,k; 408 void *buf; 409 ierr = PetscSegBufferGet(segrank,1,&recvrank);CHKERRQ(ierr); 410 ierr = PetscSegBufferGet(segdata,count,&buf);CHKERRQ(ierr); 411 *recvrank = status.MPI_SOURCE; 412 ierr = MPI_Recv(buf,count,dtype,status.MPI_SOURCE,tag,comm,MPI_STATUS_IGNORE);CHKERRQ(ierr); 413 ierr = PetscSegBufferGet(segreq,ntags,&req);CHKERRQ(ierr); 414 for (k=0; k<ntags; k++) req[k] = MPI_REQUEST_NULL; 415 ierr = (*recv)(comm,tags,status.MPI_SOURCE,buf,req,ctx);CHKERRQ(ierr); 416 nrecvs++; 417 } 418 if (barrier == MPI_REQUEST_NULL) { 419 PetscMPIInt sent,nsends; 420 ierr = PetscMPIIntCast(nto,&nsends);CHKERRQ(ierr); 421 ierr = MPI_Testall(nsends,sendreqs,&sent,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 422 if (sent) { 423 ierr = MPI_Ibarrier(comm,&barrier);CHKERRQ(ierr); 424 } 425 } else { 426 ierr = MPI_Test(&barrier,&done,MPI_STATUS_IGNORE);CHKERRQ(ierr); 427 } 428 } 429 *nfrom = nrecvs; 430 ierr = PetscSegBufferExtractAlloc(segrank,fromranks);CHKERRQ(ierr); 431 ierr = PetscSegBufferDestroy(&segrank);CHKERRQ(ierr); 432 ierr = PetscSegBufferExtractAlloc(segdata,fromdata);CHKERRQ(ierr); 433 ierr = PetscSegBufferDestroy(&segdata);CHKERRQ(ierr); 434 *toreqs = usendreqs; 435 ierr = PetscSegBufferExtractAlloc(segreq,fromreqs);CHKERRQ(ierr); 436 ierr = PetscSegBufferDestroy(&segreq);CHKERRQ(ierr); 437 ierr = PetscFree(sendreqs);CHKERRQ(ierr); 438 ierr = PetscFree(tags);CHKERRQ(ierr); 439 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 440 PetscFunctionReturn(0); 441 } 442 #endif 443 444 #undef __FUNCT__ 445 #define __FUNCT__ "PetscCommBuildTwoSidedF" 446 /*@C 447 PetscCommBuildTwoSidedF - discovers communicating ranks given one-sided information, calling user-defined functions during rendezvous 448 449 Collective on MPI_Comm 450 451 Input Arguments: 452 + comm - communicator 453 . count - number of entries to send/receive in initial rendezvous (must match on all ranks) 454 . dtype - datatype to send/receive from each rank (must match on all ranks) 455 . nto - number of ranks to send data to 456 . toranks - ranks to send to (array of length nto) 457 . todata - data to send to each rank (packed) 458 . ntags - number of tags needed by send/recv callbacks 459 . send - callback invoked on sending process when ready to send primary payload 460 . recv - callback invoked on receiving process after delivery of rendezvous message 461 - ctx - context for callbacks 462 463 Output Arguments: 464 + nfrom - number of ranks receiving messages from 465 . fromranks - ranks receiving messages from (length nfrom; caller should PetscFree()) 466 - fromdata - packed data from each rank, each with count entries of type dtype (length nfrom, caller responsible for PetscFree()) 467 468 Level: developer 469 470 Notes: 471 This memory-scalable interface is an alternative to calling PetscGatherNumberOfMessages() and 472 PetscGatherMessageLengths(), possibly with a subsequent round of communication to send other data. 473 474 Basic data types as well as contiguous types are supported, but non-contiguous (e.g., strided) types are not. 475 476 References: 477 The MPI_Ibarrier implementation uses the algorithm in 478 Hoefler, Siebert and Lumsdaine, Scalable communication protocols for dynamic sparse data exchange, 2010. 479 480 .seealso: PetscCommBuildTwoSided(), PetscCommBuildTwoSidedFReq(), PetscGatherNumberOfMessages(), PetscGatherMessageLengths() 481 @*/ 482 PetscErrorCode PetscCommBuildTwoSidedF(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata,PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata,PetscMPIInt ntags, 483 PetscErrorCode (*send)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,PetscMPIInt,void*,MPI_Request[],void*), 484 PetscErrorCode (*recv)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,void*,MPI_Request[],void*),void *ctx) 485 { 486 PetscErrorCode ierr; 487 MPI_Request *toreqs,*fromreqs; 488 489 PetscFunctionBegin; 490 ierr = PetscCommBuildTwoSidedFReq(comm,count,dtype,nto,toranks,todata,nfrom,fromranks,fromdata,ntags,&toreqs,&fromreqs,send,recv,ctx);CHKERRQ(ierr); 491 ierr = MPI_Waitall(nto*ntags,toreqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 492 ierr = MPI_Waitall(*nfrom*ntags,fromreqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 493 ierr = PetscFree(toreqs);CHKERRQ(ierr); 494 ierr = PetscFree(fromreqs);CHKERRQ(ierr); 495 PetscFunctionReturn(0); 496 } 497 498 #undef __FUNCT__ 499 #define __FUNCT__ "PetscCommBuildTwoSidedFReq" 500 /*@C 501 PetscCommBuildTwoSidedFReq - discovers communicating ranks given one-sided information, calling user-defined functions during rendezvous, returns requests 502 503 Collective on MPI_Comm 504 505 Input Arguments: 506 + comm - communicator 507 . count - number of entries to send/receive in initial rendezvous (must match on all ranks) 508 . dtype - datatype to send/receive from each rank (must match on all ranks) 509 . nto - number of ranks to send data to 510 . toranks - ranks to send to (array of length nto) 511 . todata - data to send to each rank (packed) 512 . ntags - number of tags needed by send/recv callbacks 513 . send - callback invoked on sending process when ready to send primary payload 514 . recv - callback invoked on receiving process after delivery of rendezvous message 515 - ctx - context for callbacks 516 517 Output Arguments: 518 + nfrom - number of ranks receiving messages from 519 . fromranks - ranks receiving messages from (length nfrom; caller should PetscFree()) 520 . fromdata - packed data from each rank, each with count entries of type dtype (length nfrom, caller responsible for PetscFree()) 521 . toreqs - array of nto*ntags sender requests (caller must wait on these, then PetscFree()) 522 - fromreqs - array of nfrom*ntags receiver requests (caller must wait on these, then PetscFree()) 523 524 Level: developer 525 526 Notes: 527 This memory-scalable interface is an alternative to calling PetscGatherNumberOfMessages() and 528 PetscGatherMessageLengths(), possibly with a subsequent round of communication to send other data. 529 530 Basic data types as well as contiguous types are supported, but non-contiguous (e.g., strided) types are not. 531 532 References: 533 The MPI_Ibarrier implementation uses the algorithm in 534 Hoefler, Siebert and Lumsdaine, Scalable communication protocols for dynamic sparse data exchange, 2010. 535 536 .seealso: PetscCommBuildTwoSided(), PetscCommBuildTwoSidedF(), PetscGatherNumberOfMessages(), PetscGatherMessageLengths() 537 @*/ 538 PetscErrorCode PetscCommBuildTwoSidedFReq(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata, 539 PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata,PetscMPIInt ntags,MPI_Request **toreqs,MPI_Request **fromreqs, 540 PetscErrorCode (*send)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,PetscMPIInt,void*,MPI_Request[],void*), 541 PetscErrorCode (*recv)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,void*,MPI_Request[],void*),void *ctx) 542 { 543 PetscErrorCode ierr,(*f)(MPI_Comm,PetscMPIInt,MPI_Datatype,PetscMPIInt,const PetscMPIInt[],const void*, 544 PetscMPIInt*,PetscMPIInt**,void*,PetscMPIInt,MPI_Request**,MPI_Request**, 545 PetscErrorCode (*send)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,PetscMPIInt,void*,MPI_Request[],void*), 546 PetscErrorCode (*recv)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,void*,MPI_Request[],void*),void *ctx); 547 PetscBuildTwoSidedType buildtype = PETSC_BUILDTWOSIDED_NOTSET; 548 549 PetscFunctionBegin; 550 ierr = PetscSysInitializePackage();CHKERRQ(ierr); 551 ierr = PetscLogEventBegin(PETSC_BuildTwoSided,0,0,0,0);CHKERRQ(ierr); 552 ierr = PetscCommBuildTwoSidedGetType(comm,&buildtype);CHKERRQ(ierr); 553 switch (buildtype) { 554 case PETSC_BUILDTWOSIDED_IBARRIER: 555 #if defined(PETSC_HAVE_MPI_IBARRIER) 556 f = PetscCommBuildTwoSidedFReq_Ibarrier; 557 #else 558 SETERRQ(comm,PETSC_ERR_PLIB,"MPI implementation does not provide MPI_Ibarrier (part of MPI-3)"); 559 #endif 560 break; 561 case PETSC_BUILDTWOSIDED_ALLREDUCE: 562 case PETSC_BUILDTWOSIDED_REDSCATTER: 563 f = PetscCommBuildTwoSidedFReq_Reference; 564 break; 565 default: SETERRQ(comm,PETSC_ERR_PLIB,"Unknown method for building two-sided communication"); 566 } 567 ierr = (*f)(comm,count,dtype,nto,toranks,todata,nfrom,fromranks,fromdata,ntags,toreqs,fromreqs,send,recv,ctx);CHKERRQ(ierr); 568 ierr = PetscLogEventEnd(PETSC_BuildTwoSided,0,0,0,0);CHKERRQ(ierr); 569 PetscFunctionReturn(0); 570 } 571