1 #include <petscsys.h> /*I "petscsys.h" I*/ 2 3 PetscLogEvent PETSC_BuildTwoSided,PETSC_BuildTwoSidedF; 4 5 const char *const PetscBuildTwoSidedTypes[] = { 6 "ALLREDUCE", 7 "IBARRIER", 8 "REDSCATTER", 9 "PetscBuildTwoSidedType", 10 "PETSC_BUILDTWOSIDED_", 11 0 12 }; 13 14 static PetscBuildTwoSidedType _twosided_type = PETSC_BUILDTWOSIDED_NOTSET; 15 16 #undef __FUNCT__ 17 #define __FUNCT__ "PetscCommBuildTwoSidedSetType" 18 /*@ 19 PetscCommBuildTwoSidedSetType - set algorithm to use when building two-sided communication 20 21 Logically Collective 22 23 Input Arguments: 24 + comm - PETSC_COMM_WORLD 25 - twosided - algorithm to use in subsequent calls to PetscCommBuildTwoSided() 26 27 Level: developer 28 29 Note: 30 This option is currently global, but could be made per-communicator. 31 32 .seealso: PetscCommBuildTwoSided(), PetscCommBuildTwoSidedGetType() 33 @*/ 34 PetscErrorCode PetscCommBuildTwoSidedSetType(MPI_Comm comm,PetscBuildTwoSidedType twosided) 35 { 36 PetscFunctionBegin; 37 #if defined(PETSC_USE_DEBUG) 38 { /* We don't have a PetscObject so can't use PetscValidLogicalCollectiveEnum */ 39 PetscMPIInt ierr; 40 PetscMPIInt b1[2],b2[2]; 41 b1[0] = -(PetscMPIInt)twosided; 42 b1[1] = (PetscMPIInt)twosided; 43 ierr = MPI_Allreduce(b1,b2,2,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr); 44 if (-b2[0] != b2[1]) SETERRQ(comm,PETSC_ERR_ARG_WRONG,"Enum value must be same on all processes"); 45 } 46 #endif 47 _twosided_type = twosided; 48 PetscFunctionReturn(0); 49 } 50 51 #undef __FUNCT__ 52 #define __FUNCT__ "PetscCommBuildTwoSidedGetType" 53 /*@ 54 PetscCommBuildTwoSidedGetType - set algorithm to use when building two-sided communication 55 56 Logically Collective 57 58 Output Arguments: 59 + comm - communicator on which to query algorithm 60 - twosided - algorithm to use for PetscCommBuildTwoSided() 61 62 Level: developer 63 64 .seealso: PetscCommBuildTwoSided(), PetscCommBuildTwoSidedSetType() 65 @*/ 66 PetscErrorCode PetscCommBuildTwoSidedGetType(MPI_Comm comm,PetscBuildTwoSidedType *twosided) 67 { 68 PetscErrorCode ierr; 69 70 PetscFunctionBegin; 71 *twosided = PETSC_BUILDTWOSIDED_NOTSET; 72 if (_twosided_type == PETSC_BUILDTWOSIDED_NOTSET) { 73 #if defined(PETSC_HAVE_MPI_IBARRIER) 74 # if defined(PETSC_HAVE_MPICH_CH3_SOCK) && !defined(PETSC_HAVE_MPICH_CH3_SOCK_FIXED_NBC_PROGRESS) 75 /* Deadlock in Ibarrier: http://trac.mpich.org/projects/mpich/ticket/1785 */ 76 _twosided_type = PETSC_BUILDTWOSIDED_ALLREDUCE; 77 # else 78 _twosided_type = PETSC_BUILDTWOSIDED_IBARRIER; 79 # endif 80 #else 81 _twosided_type = PETSC_BUILDTWOSIDED_ALLREDUCE; 82 #endif 83 ierr = PetscOptionsGetEnum(NULL,"-build_twosided",PetscBuildTwoSidedTypes,(PetscEnum*)&_twosided_type,NULL);CHKERRQ(ierr); 84 } 85 *twosided = _twosided_type; 86 PetscFunctionReturn(0); 87 } 88 89 #if defined(PETSC_HAVE_MPI_IBARRIER) || defined(PETSC_HAVE_MPIX_IBARRIER) 90 91 #undef __FUNCT__ 92 #define __FUNCT__ "PetscCommBuildTwoSided_Ibarrier" 93 static PetscErrorCode PetscCommBuildTwoSided_Ibarrier(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata,PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata) 94 { 95 PetscErrorCode ierr; 96 PetscMPIInt nrecvs,tag,done,i; 97 MPI_Aint lb,unitbytes; 98 char *tdata; 99 MPI_Request *sendreqs,barrier; 100 PetscSegBuffer segrank,segdata; 101 102 PetscFunctionBegin; 103 ierr = PetscCommDuplicate(comm,&comm,&tag);CHKERRQ(ierr); 104 ierr = MPI_Type_get_extent(dtype,&lb,&unitbytes);CHKERRQ(ierr); 105 if (lb != 0) SETERRQ1(comm,PETSC_ERR_SUP,"Datatype with nonzero lower bound %ld\n",(long)lb); 106 tdata = (char*)todata; 107 ierr = PetscMalloc1(nto,&sendreqs);CHKERRQ(ierr); 108 for (i=0; i<nto; i++) { 109 ierr = MPI_Issend((void*)(tdata+count*unitbytes*i),count,dtype,toranks[i],tag,comm,sendreqs+i);CHKERRQ(ierr); 110 } 111 ierr = PetscSegBufferCreate(sizeof(PetscMPIInt),4,&segrank);CHKERRQ(ierr); 112 ierr = PetscSegBufferCreate(unitbytes,4*count,&segdata);CHKERRQ(ierr); 113 114 nrecvs = 0; 115 barrier = MPI_REQUEST_NULL; 116 for (done=0; !done; ) { 117 PetscMPIInt flag; 118 MPI_Status status; 119 ierr = MPI_Iprobe(MPI_ANY_SOURCE,tag,comm,&flag,&status);CHKERRQ(ierr); 120 if (flag) { /* incoming message */ 121 PetscMPIInt *recvrank; 122 void *buf; 123 ierr = PetscSegBufferGet(segrank,1,&recvrank);CHKERRQ(ierr); 124 ierr = PetscSegBufferGet(segdata,count,&buf);CHKERRQ(ierr); 125 *recvrank = status.MPI_SOURCE; 126 ierr = MPI_Recv(buf,count,dtype,status.MPI_SOURCE,tag,comm,MPI_STATUS_IGNORE);CHKERRQ(ierr); 127 nrecvs++; 128 } 129 if (barrier == MPI_REQUEST_NULL) { 130 PetscMPIInt sent,nsends; 131 ierr = PetscMPIIntCast(nto,&nsends);CHKERRQ(ierr); 132 ierr = MPI_Testall(nsends,sendreqs,&sent,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 133 if (sent) { 134 #if defined(PETSC_HAVE_MPI_IBARRIER) 135 ierr = MPI_Ibarrier(comm,&barrier);CHKERRQ(ierr); 136 #elif defined(PETSC_HAVE_MPIX_IBARRIER) 137 ierr = MPIX_Ibarrier(comm,&barrier);CHKERRQ(ierr); 138 #endif 139 ierr = PetscFree(sendreqs);CHKERRQ(ierr); 140 } 141 } else { 142 ierr = MPI_Test(&barrier,&done,MPI_STATUS_IGNORE);CHKERRQ(ierr); 143 } 144 } 145 *nfrom = nrecvs; 146 ierr = PetscSegBufferExtractAlloc(segrank,fromranks);CHKERRQ(ierr); 147 ierr = PetscSegBufferDestroy(&segrank);CHKERRQ(ierr); 148 ierr = PetscSegBufferExtractAlloc(segdata,fromdata);CHKERRQ(ierr); 149 ierr = PetscSegBufferDestroy(&segdata);CHKERRQ(ierr); 150 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 151 PetscFunctionReturn(0); 152 } 153 #endif 154 155 #undef __FUNCT__ 156 #define __FUNCT__ "PetscCommBuildTwoSided_Allreduce" 157 static PetscErrorCode PetscCommBuildTwoSided_Allreduce(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata,PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata) 158 { 159 PetscErrorCode ierr; 160 PetscMPIInt size,*iflags,nrecvs,tag,*franks,i; 161 MPI_Aint lb,unitbytes; 162 char *tdata,*fdata; 163 MPI_Request *reqs,*sendreqs; 164 MPI_Status *statuses; 165 166 PetscFunctionBegin; 167 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 168 ierr = PetscCalloc1(size,&iflags);CHKERRQ(ierr); 169 for (i=0; i<nto; i++) iflags[toranks[i]] = 1; 170 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&nrecvs);CHKERRQ(ierr); 171 ierr = PetscFree(iflags);CHKERRQ(ierr); 172 173 ierr = PetscCommDuplicate(comm,&comm,&tag);CHKERRQ(ierr); 174 ierr = MPI_Type_get_extent(dtype,&lb,&unitbytes);CHKERRQ(ierr); 175 if (lb != 0) SETERRQ1(comm,PETSC_ERR_SUP,"Datatype with nonzero lower bound %ld\n",(long)lb); 176 ierr = PetscMalloc(nrecvs*count*unitbytes,&fdata);CHKERRQ(ierr); 177 tdata = (char*)todata; 178 ierr = PetscMalloc2(nto+nrecvs,&reqs,nto+nrecvs,&statuses);CHKERRQ(ierr); 179 sendreqs = reqs + nrecvs; 180 for (i=0; i<nrecvs; i++) { 181 ierr = MPI_Irecv((void*)(fdata+count*unitbytes*i),count,dtype,MPI_ANY_SOURCE,tag,comm,reqs+i);CHKERRQ(ierr); 182 } 183 for (i=0; i<nto; i++) { 184 ierr = MPI_Isend((void*)(tdata+count*unitbytes*i),count,dtype,toranks[i],tag,comm,sendreqs+i);CHKERRQ(ierr); 185 } 186 ierr = MPI_Waitall(nto+nrecvs,reqs,statuses);CHKERRQ(ierr); 187 ierr = PetscMalloc1(nrecvs,&franks);CHKERRQ(ierr); 188 for (i=0; i<nrecvs; i++) franks[i] = statuses[i].MPI_SOURCE; 189 ierr = PetscFree2(reqs,statuses);CHKERRQ(ierr); 190 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 191 192 *nfrom = nrecvs; 193 *fromranks = franks; 194 *(void**)fromdata = fdata; 195 PetscFunctionReturn(0); 196 } 197 198 #if defined(PETSC_HAVE_MPI_REDUCE_SCATTER_BLOCK) 199 #undef __FUNCT__ 200 #define __FUNCT__ "PetscCommBuildTwoSided_RedScatter" 201 static PetscErrorCode PetscCommBuildTwoSided_RedScatter(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata,PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata) 202 { 203 PetscErrorCode ierr; 204 PetscMPIInt size,*iflags,nrecvs,tag,*franks,i; 205 MPI_Aint lb,unitbytes; 206 char *tdata,*fdata; 207 MPI_Request *reqs,*sendreqs; 208 MPI_Status *statuses; 209 210 PetscFunctionBegin; 211 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 212 ierr = PetscMalloc1(size,&iflags);CHKERRQ(ierr); 213 ierr = PetscMemzero(iflags,size*sizeof(*iflags));CHKERRQ(ierr); 214 for (i=0; i<nto; i++) iflags[toranks[i]] = 1; 215 ierr = MPI_Reduce_scatter_block(iflags,&nrecvs,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 216 ierr = PetscFree(iflags);CHKERRQ(ierr); 217 218 ierr = PetscCommDuplicate(comm,&comm,&tag);CHKERRQ(ierr); 219 ierr = MPI_Type_get_extent(dtype,&lb,&unitbytes);CHKERRQ(ierr); 220 if (lb != 0) SETERRQ1(comm,PETSC_ERR_SUP,"Datatype with nonzero lower bound %ld\n",(long)lb); 221 ierr = PetscMalloc(nrecvs*count*unitbytes,&fdata);CHKERRQ(ierr); 222 tdata = (char*)todata; 223 ierr = PetscMalloc2(nto+nrecvs,&reqs,nto+nrecvs,&statuses);CHKERRQ(ierr); 224 sendreqs = reqs + nrecvs; 225 for (i=0; i<nrecvs; i++) { 226 ierr = MPI_Irecv((void*)(fdata+count*unitbytes*i),count,dtype,MPI_ANY_SOURCE,tag,comm,reqs+i);CHKERRQ(ierr); 227 } 228 for (i=0; i<nto; i++) { 229 ierr = MPI_Isend((void*)(tdata+count*unitbytes*i),count,dtype,toranks[i],tag,comm,sendreqs+i);CHKERRQ(ierr); 230 } 231 ierr = MPI_Waitall(nto+nrecvs,reqs,statuses);CHKERRQ(ierr); 232 ierr = PetscMalloc1(nrecvs,&franks);CHKERRQ(ierr); 233 for (i=0; i<nrecvs; i++) franks[i] = statuses[i].MPI_SOURCE; 234 ierr = PetscFree2(reqs,statuses);CHKERRQ(ierr); 235 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 236 237 *nfrom = nrecvs; 238 *fromranks = franks; 239 *(void**)fromdata = fdata; 240 PetscFunctionReturn(0); 241 } 242 #endif 243 244 #undef __FUNCT__ 245 #define __FUNCT__ "PetscCommBuildTwoSided" 246 /*@C 247 PetscCommBuildTwoSided - discovers communicating ranks given one-sided information, moving constant-sized data in the process (often message lengths) 248 249 Collective on MPI_Comm 250 251 Input Arguments: 252 + comm - communicator 253 . count - number of entries to send/receive (must match on all ranks) 254 . dtype - datatype to send/receive from each rank (must match on all ranks) 255 . nto - number of ranks to send data to 256 . toranks - ranks to send to (array of length nto) 257 - todata - data to send to each rank (packed) 258 259 Output Arguments: 260 + nfrom - number of ranks receiving messages from 261 . fromranks - ranks receiving messages from (length nfrom; caller should PetscFree()) 262 - fromdata - packed data from each rank, each with count entries of type dtype (length nfrom, caller responsible for PetscFree()) 263 264 Level: developer 265 266 Options Database Keys: 267 . -build_twosided <allreduce|ibarrier|redscatter> - algorithm to set up two-sided communication 268 269 Notes: 270 This memory-scalable interface is an alternative to calling PetscGatherNumberOfMessages() and 271 PetscGatherMessageLengths(), possibly with a subsequent round of communication to send other constant-size data. 272 273 Basic data types as well as contiguous types are supported, but non-contiguous (e.g., strided) types are not. 274 275 References: 276 The MPI_Ibarrier implementation uses the algorithm in 277 Hoefler, Siebert and Lumsdaine, Scalable communication protocols for dynamic sparse data exchange, 2010. 278 279 .seealso: PetscGatherNumberOfMessages(), PetscGatherMessageLengths() 280 @*/ 281 PetscErrorCode PetscCommBuildTwoSided(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata,PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata) 282 { 283 PetscErrorCode ierr; 284 PetscBuildTwoSidedType buildtype = PETSC_BUILDTWOSIDED_NOTSET; 285 286 PetscFunctionBegin; 287 ierr = PetscSysInitializePackage();CHKERRQ(ierr); 288 ierr = PetscLogEventBegin(PETSC_BuildTwoSided,0,0,0,0);CHKERRQ(ierr); 289 ierr = PetscCommBuildTwoSidedGetType(comm,&buildtype);CHKERRQ(ierr); 290 switch (buildtype) { 291 case PETSC_BUILDTWOSIDED_IBARRIER: 292 #if defined(PETSC_HAVE_MPI_IBARRIER) || defined(PETSC_HAVE_MPIX_IBARRIER) 293 ierr = PetscCommBuildTwoSided_Ibarrier(comm,count,dtype,nto,toranks,todata,nfrom,fromranks,fromdata);CHKERRQ(ierr); 294 #else 295 SETERRQ(comm,PETSC_ERR_PLIB,"MPI implementation does not provide MPI_Ibarrier (part of MPI-3)"); 296 #endif 297 break; 298 case PETSC_BUILDTWOSIDED_ALLREDUCE: 299 ierr = PetscCommBuildTwoSided_Allreduce(comm,count,dtype,nto,toranks,todata,nfrom,fromranks,fromdata);CHKERRQ(ierr); 300 break; 301 case PETSC_BUILDTWOSIDED_REDSCATTER: 302 #if defined(PETSC_HAVE_MPI_REDUCE_SCATTER_BLOCK) 303 ierr = PetscCommBuildTwoSided_RedScatter(comm,count,dtype,nto,toranks,todata,nfrom,fromranks,fromdata);CHKERRQ(ierr); 304 #else 305 SETERRQ(comm,PETSC_ERR_PLIB,"MPI implementation does not provide MPI_Reduce_scatter_block (part of MPI-2.2)"); 306 #endif 307 break; 308 default: SETERRQ(comm,PETSC_ERR_PLIB,"Unknown method for building two-sided communication"); 309 } 310 ierr = PetscLogEventEnd(PETSC_BuildTwoSided,0,0,0,0);CHKERRQ(ierr); 311 PetscFunctionReturn(0); 312 } 313 314 #undef __FUNCT__ 315 #define __FUNCT__ "PetscCommBuildTwoSidedFReq_Reference" 316 static PetscErrorCode PetscCommBuildTwoSidedFReq_Reference(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata, 317 PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata,PetscMPIInt ntags,MPI_Request **toreqs,MPI_Request **fromreqs, 318 PetscErrorCode (*send)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,PetscMPIInt,void*,MPI_Request[],void*), 319 PetscErrorCode (*recv)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,void*,MPI_Request[],void*),void *ctx) 320 { 321 PetscErrorCode ierr; 322 PetscMPIInt i,*tag; 323 MPI_Aint lb,unitbytes; 324 MPI_Request *sendreq,*recvreq; 325 326 PetscFunctionBegin; 327 ierr = PetscMalloc1(ntags,&tag);CHKERRQ(ierr); 328 if (ntags > 0) { 329 ierr = PetscCommDuplicate(comm,&comm,&tag[0]);CHKERRQ(ierr); 330 } 331 for (i=1; i<ntags; i++) { 332 ierr = PetscCommGetNewTag(comm,&tag[i]);CHKERRQ(ierr); 333 } 334 335 /* Perform complete initial rendezvous */ 336 ierr = PetscCommBuildTwoSided(comm,count,dtype,nto,toranks,todata,nfrom,fromranks,fromdata);CHKERRQ(ierr); 337 338 ierr = PetscMalloc1(nto*ntags,&sendreq);CHKERRQ(ierr); 339 ierr = PetscMalloc1(*nfrom*ntags,&recvreq);CHKERRQ(ierr); 340 341 ierr = MPI_Type_get_extent(dtype,&lb,&unitbytes);CHKERRQ(ierr); 342 if (lb != 0) SETERRQ1(comm,PETSC_ERR_SUP,"Datatype with nonzero lower bound %ld\n",(long)lb); 343 for (i=0; i<nto; i++) { 344 PetscMPIInt k; 345 for (k=0; k<ntags; k++) sendreq[i*ntags+k] = MPI_REQUEST_NULL; 346 ierr = (*send)(comm,tag,i,toranks[i],((char*)todata)+count*unitbytes*i,sendreq+i*ntags,ctx);CHKERRQ(ierr); 347 } 348 for (i=0; i<*nfrom; i++) { 349 void *header = (*(char**)fromdata) + count*unitbytes*i; 350 PetscMPIInt k; 351 for (k=0; k<ntags; k++) recvreq[i*ntags+k] = MPI_REQUEST_NULL; 352 ierr = (*recv)(comm,tag,(*fromranks)[i],header,recvreq+i*ntags,ctx);CHKERRQ(ierr); 353 } 354 ierr = PetscFree(tag);CHKERRQ(ierr); 355 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 356 *toreqs = sendreq; 357 *fromreqs = recvreq; 358 PetscFunctionReturn(0); 359 } 360 361 #if defined(PETSC_HAVE_MPI_IBARRIER) || defined(PETSC_HAVE_MPIX_IBARRIER) 362 363 #undef __FUNCT__ 364 #define __FUNCT__ "PetscCommBuildTwoSidedFReq_Ibarrier" 365 static PetscErrorCode PetscCommBuildTwoSidedFReq_Ibarrier(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata, 366 PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata,PetscMPIInt ntags,MPI_Request **toreqs,MPI_Request **fromreqs, 367 PetscErrorCode (*send)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,PetscMPIInt,void*,MPI_Request[],void*), 368 PetscErrorCode (*recv)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,void*,MPI_Request[],void*),void *ctx) 369 { 370 PetscErrorCode ierr; 371 PetscMPIInt nrecvs,tag,*tags,done,i; 372 MPI_Aint lb,unitbytes; 373 char *tdata; 374 MPI_Request *sendreqs,*usendreqs,*req,barrier; 375 PetscSegBuffer segrank,segdata,segreq; 376 377 PetscFunctionBegin; 378 ierr = PetscCommDuplicate(comm,&comm,&tag);CHKERRQ(ierr); 379 ierr = PetscMalloc1(ntags,&tags);CHKERRQ(ierr); 380 for (i=0; i<ntags; i++) { 381 ierr = PetscCommGetNewTag(comm,&tags[i]);CHKERRQ(ierr); 382 } 383 ierr = MPI_Type_get_extent(dtype,&lb,&unitbytes);CHKERRQ(ierr); 384 if (lb != 0) SETERRQ1(comm,PETSC_ERR_SUP,"Datatype with nonzero lower bound %ld\n",(long)lb); 385 tdata = (char*)todata; 386 ierr = PetscMalloc1(nto,&sendreqs);CHKERRQ(ierr); 387 ierr = PetscMalloc1(nto*ntags,&usendreqs);CHKERRQ(ierr); 388 /* Post synchronous sends */ 389 for (i=0; i<nto; i++) { 390 ierr = MPI_Issend((void*)(tdata+count*unitbytes*i),count,dtype,toranks[i],tag,comm,sendreqs+i);CHKERRQ(ierr); 391 } 392 /* Post actual payloads. These are typically larger messages. Hopefully sending these later does not slow down the 393 * synchronous messages above. */ 394 for (i=0; i<nto; i++) { 395 PetscMPIInt k; 396 for (k=0; k<ntags; k++) usendreqs[i*ntags+k] = MPI_REQUEST_NULL; 397 ierr = (*send)(comm,tags,i,toranks[i],tdata+count*unitbytes*i,usendreqs+i*ntags,ctx);CHKERRQ(ierr); 398 } 399 400 ierr = PetscSegBufferCreate(sizeof(PetscMPIInt),4,&segrank);CHKERRQ(ierr); 401 ierr = PetscSegBufferCreate(unitbytes,4*count,&segdata);CHKERRQ(ierr); 402 ierr = PetscSegBufferCreate(sizeof(MPI_Request),4,&segreq);CHKERRQ(ierr); 403 404 nrecvs = 0; 405 barrier = MPI_REQUEST_NULL; 406 for (done=0; !done; ) { 407 PetscMPIInt flag; 408 MPI_Status status; 409 ierr = MPI_Iprobe(MPI_ANY_SOURCE,tag,comm,&flag,&status);CHKERRQ(ierr); 410 if (flag) { /* incoming message */ 411 PetscMPIInt *recvrank,k; 412 void *buf; 413 ierr = PetscSegBufferGet(segrank,1,&recvrank);CHKERRQ(ierr); 414 ierr = PetscSegBufferGet(segdata,count,&buf);CHKERRQ(ierr); 415 *recvrank = status.MPI_SOURCE; 416 ierr = MPI_Recv(buf,count,dtype,status.MPI_SOURCE,tag,comm,MPI_STATUS_IGNORE);CHKERRQ(ierr); 417 ierr = PetscSegBufferGet(segreq,ntags,&req);CHKERRQ(ierr); 418 for (k=0; k<ntags; k++) req[k] = MPI_REQUEST_NULL; 419 ierr = (*recv)(comm,tags,status.MPI_SOURCE,buf,req,ctx);CHKERRQ(ierr); 420 nrecvs++; 421 } 422 if (barrier == MPI_REQUEST_NULL) { 423 PetscMPIInt sent,nsends; 424 ierr = PetscMPIIntCast(nto,&nsends);CHKERRQ(ierr); 425 ierr = MPI_Testall(nsends,sendreqs,&sent,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 426 if (sent) { 427 #if defined(PETSC_HAVE_MPI_IBARRIER) 428 ierr = MPI_Ibarrier(comm,&barrier);CHKERRQ(ierr); 429 #elif defined(PETSC_HAVE_MPIX_IBARRIER) 430 ierr = MPIX_Ibarrier(comm,&barrier);CHKERRQ(ierr); 431 #endif 432 } 433 } else { 434 ierr = MPI_Test(&barrier,&done,MPI_STATUS_IGNORE);CHKERRQ(ierr); 435 } 436 } 437 *nfrom = nrecvs; 438 ierr = PetscSegBufferExtractAlloc(segrank,fromranks);CHKERRQ(ierr); 439 ierr = PetscSegBufferDestroy(&segrank);CHKERRQ(ierr); 440 ierr = PetscSegBufferExtractAlloc(segdata,fromdata);CHKERRQ(ierr); 441 ierr = PetscSegBufferDestroy(&segdata);CHKERRQ(ierr); 442 *toreqs = usendreqs; 443 ierr = PetscSegBufferExtractAlloc(segreq,fromreqs);CHKERRQ(ierr); 444 ierr = PetscSegBufferDestroy(&segreq);CHKERRQ(ierr); 445 ierr = PetscFree(sendreqs);CHKERRQ(ierr); 446 ierr = PetscFree(tags);CHKERRQ(ierr); 447 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 448 PetscFunctionReturn(0); 449 } 450 #endif 451 452 #undef __FUNCT__ 453 #define __FUNCT__ "PetscCommBuildTwoSidedF" 454 /*@C 455 PetscCommBuildTwoSidedF - discovers communicating ranks given one-sided information, calling user-defined functions during rendezvous 456 457 Collective on MPI_Comm 458 459 Input Arguments: 460 + comm - communicator 461 . count - number of entries to send/receive in initial rendezvous (must match on all ranks) 462 . dtype - datatype to send/receive from each rank (must match on all ranks) 463 . nto - number of ranks to send data to 464 . toranks - ranks to send to (array of length nto) 465 . todata - data to send to each rank (packed) 466 . ntags - number of tags needed by send/recv callbacks 467 . send - callback invoked on sending process when ready to send primary payload 468 . recv - callback invoked on receiving process after delivery of rendezvous message 469 - ctx - context for callbacks 470 471 Output Arguments: 472 + nfrom - number of ranks receiving messages from 473 . fromranks - ranks receiving messages from (length nfrom; caller should PetscFree()) 474 - fromdata - packed data from each rank, each with count entries of type dtype (length nfrom, caller responsible for PetscFree()) 475 476 Level: developer 477 478 Notes: 479 This memory-scalable interface is an alternative to calling PetscGatherNumberOfMessages() and 480 PetscGatherMessageLengths(), possibly with a subsequent round of communication to send other data. 481 482 Basic data types as well as contiguous types are supported, but non-contiguous (e.g., strided) types are not. 483 484 References: 485 The MPI_Ibarrier implementation uses the algorithm in 486 Hoefler, Siebert and Lumsdaine, Scalable communication protocols for dynamic sparse data exchange, 2010. 487 488 .seealso: PetscCommBuildTwoSided(), PetscCommBuildTwoSidedFReq(), PetscGatherNumberOfMessages(), PetscGatherMessageLengths() 489 @*/ 490 PetscErrorCode PetscCommBuildTwoSidedF(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata,PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata,PetscMPIInt ntags, 491 PetscErrorCode (*send)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,PetscMPIInt,void*,MPI_Request[],void*), 492 PetscErrorCode (*recv)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,void*,MPI_Request[],void*),void *ctx) 493 { 494 PetscErrorCode ierr; 495 MPI_Request *toreqs,*fromreqs; 496 497 PetscFunctionBegin; 498 ierr = PetscCommBuildTwoSidedFReq(comm,count,dtype,nto,toranks,todata,nfrom,fromranks,fromdata,ntags,&toreqs,&fromreqs,send,recv,ctx);CHKERRQ(ierr); 499 ierr = MPI_Waitall(nto*ntags,toreqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 500 ierr = MPI_Waitall(*nfrom*ntags,fromreqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 501 ierr = PetscFree(toreqs);CHKERRQ(ierr); 502 ierr = PetscFree(fromreqs);CHKERRQ(ierr); 503 PetscFunctionReturn(0); 504 } 505 506 #undef __FUNCT__ 507 #define __FUNCT__ "PetscCommBuildTwoSidedFReq" 508 /*@C 509 PetscCommBuildTwoSidedFReq - discovers communicating ranks given one-sided information, calling user-defined functions during rendezvous, returns requests 510 511 Collective on MPI_Comm 512 513 Input Arguments: 514 + comm - communicator 515 . count - number of entries to send/receive in initial rendezvous (must match on all ranks) 516 . dtype - datatype to send/receive from each rank (must match on all ranks) 517 . nto - number of ranks to send data to 518 . toranks - ranks to send to (array of length nto) 519 . todata - data to send to each rank (packed) 520 . ntags - number of tags needed by send/recv callbacks 521 . send - callback invoked on sending process when ready to send primary payload 522 . recv - callback invoked on receiving process after delivery of rendezvous message 523 - ctx - context for callbacks 524 525 Output Arguments: 526 + nfrom - number of ranks receiving messages from 527 . fromranks - ranks receiving messages from (length nfrom; caller should PetscFree()) 528 . fromdata - packed data from each rank, each with count entries of type dtype (length nfrom, caller responsible for PetscFree()) 529 . toreqs - array of nto*ntags sender requests (caller must wait on these, then PetscFree()) 530 - fromreqs - array of nfrom*ntags receiver requests (caller must wait on these, then PetscFree()) 531 532 Level: developer 533 534 Notes: 535 This memory-scalable interface is an alternative to calling PetscGatherNumberOfMessages() and 536 PetscGatherMessageLengths(), possibly with a subsequent round of communication to send other data. 537 538 Basic data types as well as contiguous types are supported, but non-contiguous (e.g., strided) types are not. 539 540 References: 541 The MPI_Ibarrier implementation uses the algorithm in 542 Hoefler, Siebert and Lumsdaine, Scalable communication protocols for dynamic sparse data exchange, 2010. 543 544 .seealso: PetscCommBuildTwoSided(), PetscCommBuildTwoSidedF(), PetscGatherNumberOfMessages(), PetscGatherMessageLengths() 545 @*/ 546 PetscErrorCode PetscCommBuildTwoSidedFReq(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata, 547 PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata,PetscMPIInt ntags,MPI_Request **toreqs,MPI_Request **fromreqs, 548 PetscErrorCode (*send)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,PetscMPIInt,void*,MPI_Request[],void*), 549 PetscErrorCode (*recv)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,void*,MPI_Request[],void*),void *ctx) 550 { 551 PetscErrorCode ierr,(*f)(MPI_Comm,PetscMPIInt,MPI_Datatype,PetscMPIInt,const PetscMPIInt[],const void*, 552 PetscMPIInt*,PetscMPIInt**,void*,PetscMPIInt,MPI_Request**,MPI_Request**, 553 PetscErrorCode (*send)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,PetscMPIInt,void*,MPI_Request[],void*), 554 PetscErrorCode (*recv)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,void*,MPI_Request[],void*),void *ctx); 555 PetscBuildTwoSidedType buildtype = PETSC_BUILDTWOSIDED_NOTSET; 556 PetscMPIInt i,size; 557 558 PetscFunctionBegin; 559 ierr = PetscSysInitializePackage();CHKERRQ(ierr); 560 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 561 for (i=0; i<nto; i++) { 562 if (toranks[i] < 0 || size <= toranks[i]) SETERRQ3(comm,PETSC_ERR_ARG_OUTOFRANGE,"toranks[%d] %d not in comm size %d",i,toranks[i],size); 563 } 564 ierr = PetscLogEventBegin(PETSC_BuildTwoSidedF,0,0,0,0);CHKERRQ(ierr); 565 ierr = PetscCommBuildTwoSidedGetType(comm,&buildtype);CHKERRQ(ierr); 566 switch (buildtype) { 567 case PETSC_BUILDTWOSIDED_IBARRIER: 568 #if defined(PETSC_HAVE_MPI_IBARRIER) || defined(PETSC_HAVE_MPIX_IBARRIER) 569 f = PetscCommBuildTwoSidedFReq_Ibarrier; 570 #else 571 SETERRQ(comm,PETSC_ERR_PLIB,"MPI implementation does not provide MPI_Ibarrier (part of MPI-3)"); 572 #endif 573 break; 574 case PETSC_BUILDTWOSIDED_ALLREDUCE: 575 case PETSC_BUILDTWOSIDED_REDSCATTER: 576 f = PetscCommBuildTwoSidedFReq_Reference; 577 break; 578 default: SETERRQ(comm,PETSC_ERR_PLIB,"Unknown method for building two-sided communication"); 579 } 580 ierr = (*f)(comm,count,dtype,nto,toranks,todata,nfrom,fromranks,fromdata,ntags,toreqs,fromreqs,send,recv,ctx);CHKERRQ(ierr); 581 ierr = PetscLogEventEnd(PETSC_BuildTwoSidedF,0,0,0,0);CHKERRQ(ierr); 582 PetscFunctionReturn(0); 583 } 584