1 #include <petscsys.h> /*I "petscsys.h" I*/ 2 3 PetscLogEvent PETSC_BuildTwoSided,PETSC_BuildTwoSidedF; 4 5 const char *const PetscBuildTwoSidedTypes[] = { 6 "ALLREDUCE", 7 "IBARRIER", 8 "REDSCATTER", 9 "PetscBuildTwoSidedType", 10 "PETSC_BUILDTWOSIDED_", 11 0 12 }; 13 14 static PetscBuildTwoSidedType _twosided_type = PETSC_BUILDTWOSIDED_NOTSET; 15 16 #undef __FUNCT__ 17 #define __FUNCT__ "PetscCommBuildTwoSidedSetType" 18 /*@ 19 PetscCommBuildTwoSidedSetType - set algorithm to use when building two-sided communication 20 21 Logically Collective 22 23 Input Arguments: 24 + comm - PETSC_COMM_WORLD 25 - twosided - algorithm to use in subsequent calls to PetscCommBuildTwoSided() 26 27 Level: developer 28 29 Note: 30 This option is currently global, but could be made per-communicator. 31 32 .seealso: PetscCommBuildTwoSided(), PetscCommBuildTwoSidedGetType() 33 @*/ 34 PetscErrorCode PetscCommBuildTwoSidedSetType(MPI_Comm comm,PetscBuildTwoSidedType twosided) 35 { 36 PetscFunctionBegin; 37 #if defined(PETSC_USE_DEBUG) 38 { /* We don't have a PetscObject so can't use PetscValidLogicalCollectiveEnum */ 39 PetscMPIInt ierr; 40 PetscMPIInt b1[2],b2[2]; 41 b1[0] = -(PetscMPIInt)twosided; 42 b1[1] = (PetscMPIInt)twosided; 43 ierr = MPIU_Allreduce(b1,b2,2,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr); 44 if (-b2[0] != b2[1]) SETERRQ(comm,PETSC_ERR_ARG_WRONG,"Enum value must be same on all processes"); 45 } 46 #endif 47 _twosided_type = twosided; 48 PetscFunctionReturn(0); 49 } 50 51 #undef __FUNCT__ 52 #define __FUNCT__ "PetscCommBuildTwoSidedGetType" 53 /*@ 54 PetscCommBuildTwoSidedGetType - set algorithm to use when building two-sided communication 55 56 Logically Collective 57 58 Output Arguments: 59 + comm - communicator on which to query algorithm 60 - twosided - algorithm to use for PetscCommBuildTwoSided() 61 62 Level: developer 63 64 .seealso: PetscCommBuildTwoSided(), PetscCommBuildTwoSidedSetType() 65 @*/ 66 PetscErrorCode PetscCommBuildTwoSidedGetType(MPI_Comm comm,PetscBuildTwoSidedType *twosided) 67 { 68 PetscErrorCode ierr; 69 70 PetscFunctionBegin; 71 *twosided = PETSC_BUILDTWOSIDED_NOTSET; 72 if (_twosided_type == PETSC_BUILDTWOSIDED_NOTSET) { 73 #if defined(PETSC_HAVE_MPI_IBARRIER) 74 # if defined(PETSC_HAVE_MPICH_CH3_SOCK) && !defined(PETSC_HAVE_MPICH_CH3_SOCK_FIXED_NBC_PROGRESS) 75 /* Deadlock in Ibarrier: http://trac.mpich.org/projects/mpich/ticket/1785 */ 76 _twosided_type = PETSC_BUILDTWOSIDED_ALLREDUCE; 77 # else 78 _twosided_type = PETSC_BUILDTWOSIDED_IBARRIER; 79 # endif 80 #else 81 _twosided_type = PETSC_BUILDTWOSIDED_ALLREDUCE; 82 #endif 83 ierr = PetscOptionsGetEnum(NULL,NULL,"-build_twosided",PetscBuildTwoSidedTypes,(PetscEnum*)&_twosided_type,NULL);CHKERRQ(ierr); 84 } 85 *twosided = _twosided_type; 86 PetscFunctionReturn(0); 87 } 88 89 #if defined(PETSC_HAVE_MPI_IBARRIER) || defined(PETSC_HAVE_MPIX_IBARRIER) 90 91 #undef __FUNCT__ 92 #define __FUNCT__ "PetscCommBuildTwoSided_Ibarrier" 93 static PetscErrorCode PetscCommBuildTwoSided_Ibarrier(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata,PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata) 94 { 95 PetscErrorCode ierr; 96 PetscMPIInt nrecvs,tag,done,i; 97 MPI_Aint lb,unitbytes; 98 char *tdata; 99 MPI_Request *sendreqs,barrier; 100 PetscSegBuffer segrank,segdata; 101 PetscBool barrier_started; 102 103 PetscFunctionBegin; 104 ierr = PetscCommDuplicate(comm,&comm,&tag);CHKERRQ(ierr); 105 ierr = MPI_Type_get_extent(dtype,&lb,&unitbytes);CHKERRQ(ierr); 106 if (lb != 0) SETERRQ1(comm,PETSC_ERR_SUP,"Datatype with nonzero lower bound %ld\n",(long)lb); 107 tdata = (char*)todata; 108 ierr = PetscMalloc1(nto,&sendreqs);CHKERRQ(ierr); 109 for (i=0; i<nto; i++) { 110 ierr = MPI_Issend((void*)(tdata+count*unitbytes*i),count,dtype,toranks[i],tag,comm,sendreqs+i);CHKERRQ(ierr); 111 } 112 ierr = PetscSegBufferCreate(sizeof(PetscMPIInt),4,&segrank);CHKERRQ(ierr); 113 ierr = PetscSegBufferCreate(unitbytes,4*count,&segdata);CHKERRQ(ierr); 114 115 nrecvs = 0; 116 barrier = MPI_REQUEST_NULL; 117 /* MPICH-3.2 sometimes does not create a request in some "optimized" cases. This is arguably a standard violation, 118 * but we need to work around it. */ 119 barrier_started = PETSC_FALSE; 120 for (done=0; !done; ) { 121 PetscMPIInt flag; 122 MPI_Status status; 123 ierr = MPI_Iprobe(MPI_ANY_SOURCE,tag,comm,&flag,&status);CHKERRQ(ierr); 124 if (flag) { /* incoming message */ 125 PetscMPIInt *recvrank; 126 void *buf; 127 ierr = PetscSegBufferGet(segrank,1,&recvrank);CHKERRQ(ierr); 128 ierr = PetscSegBufferGet(segdata,count,&buf);CHKERRQ(ierr); 129 *recvrank = status.MPI_SOURCE; 130 ierr = MPI_Recv(buf,count,dtype,status.MPI_SOURCE,tag,comm,MPI_STATUS_IGNORE);CHKERRQ(ierr); 131 nrecvs++; 132 } 133 if (!barrier_started) { 134 PetscMPIInt sent,nsends; 135 ierr = PetscMPIIntCast(nto,&nsends);CHKERRQ(ierr); 136 ierr = MPI_Testall(nsends,sendreqs,&sent,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 137 if (sent) { 138 #if defined(PETSC_HAVE_MPI_IBARRIER) 139 ierr = MPI_Ibarrier(comm,&barrier);CHKERRQ(ierr); 140 #elif defined(PETSC_HAVE_MPIX_IBARRIER) 141 ierr = MPIX_Ibarrier(comm,&barrier);CHKERRQ(ierr); 142 #endif 143 barrier_started = PETSC_TRUE; 144 ierr = PetscFree(sendreqs);CHKERRQ(ierr); 145 } 146 } else { 147 ierr = MPI_Test(&barrier,&done,MPI_STATUS_IGNORE);CHKERRQ(ierr); 148 } 149 } 150 *nfrom = nrecvs; 151 ierr = PetscSegBufferExtractAlloc(segrank,fromranks);CHKERRQ(ierr); 152 ierr = PetscSegBufferDestroy(&segrank);CHKERRQ(ierr); 153 ierr = PetscSegBufferExtractAlloc(segdata,fromdata);CHKERRQ(ierr); 154 ierr = PetscSegBufferDestroy(&segdata);CHKERRQ(ierr); 155 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 156 PetscFunctionReturn(0); 157 } 158 #endif 159 160 #undef __FUNCT__ 161 #define __FUNCT__ "PetscCommBuildTwoSided_Allreduce" 162 static PetscErrorCode PetscCommBuildTwoSided_Allreduce(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata,PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata) 163 { 164 PetscErrorCode ierr; 165 PetscMPIInt size,*iflags,nrecvs,tag,*franks,i; 166 MPI_Aint lb,unitbytes; 167 char *tdata,*fdata; 168 MPI_Request *reqs,*sendreqs; 169 MPI_Status *statuses; 170 171 PetscFunctionBegin; 172 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 173 ierr = PetscCalloc1(size,&iflags);CHKERRQ(ierr); 174 for (i=0; i<nto; i++) iflags[toranks[i]] = 1; 175 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&nrecvs);CHKERRQ(ierr); 176 ierr = PetscFree(iflags);CHKERRQ(ierr); 177 178 ierr = PetscCommDuplicate(comm,&comm,&tag);CHKERRQ(ierr); 179 ierr = MPI_Type_get_extent(dtype,&lb,&unitbytes);CHKERRQ(ierr); 180 if (lb != 0) SETERRQ1(comm,PETSC_ERR_SUP,"Datatype with nonzero lower bound %ld\n",(long)lb); 181 ierr = PetscMalloc(nrecvs*count*unitbytes,&fdata);CHKERRQ(ierr); 182 tdata = (char*)todata; 183 ierr = PetscMalloc2(nto+nrecvs,&reqs,nto+nrecvs,&statuses);CHKERRQ(ierr); 184 sendreqs = reqs + nrecvs; 185 for (i=0; i<nrecvs; i++) { 186 ierr = MPI_Irecv((void*)(fdata+count*unitbytes*i),count,dtype,MPI_ANY_SOURCE,tag,comm,reqs+i);CHKERRQ(ierr); 187 } 188 for (i=0; i<nto; i++) { 189 ierr = MPI_Isend((void*)(tdata+count*unitbytes*i),count,dtype,toranks[i],tag,comm,sendreqs+i);CHKERRQ(ierr); 190 } 191 ierr = MPI_Waitall(nto+nrecvs,reqs,statuses);CHKERRQ(ierr); 192 ierr = PetscMalloc1(nrecvs,&franks);CHKERRQ(ierr); 193 for (i=0; i<nrecvs; i++) franks[i] = statuses[i].MPI_SOURCE; 194 ierr = PetscFree2(reqs,statuses);CHKERRQ(ierr); 195 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 196 197 *nfrom = nrecvs; 198 *fromranks = franks; 199 *(void**)fromdata = fdata; 200 PetscFunctionReturn(0); 201 } 202 203 #if defined(PETSC_HAVE_MPI_REDUCE_SCATTER_BLOCK) 204 #undef __FUNCT__ 205 #define __FUNCT__ "PetscCommBuildTwoSided_RedScatter" 206 static PetscErrorCode PetscCommBuildTwoSided_RedScatter(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata,PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata) 207 { 208 PetscErrorCode ierr; 209 PetscMPIInt size,*iflags,nrecvs,tag,*franks,i; 210 MPI_Aint lb,unitbytes; 211 char *tdata,*fdata; 212 MPI_Request *reqs,*sendreqs; 213 MPI_Status *statuses; 214 215 PetscFunctionBegin; 216 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 217 ierr = PetscMalloc1(size,&iflags);CHKERRQ(ierr); 218 ierr = PetscMemzero(iflags,size*sizeof(*iflags));CHKERRQ(ierr); 219 for (i=0; i<nto; i++) iflags[toranks[i]] = 1; 220 ierr = MPI_Reduce_scatter_block(iflags,&nrecvs,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 221 ierr = PetscFree(iflags);CHKERRQ(ierr); 222 223 ierr = PetscCommDuplicate(comm,&comm,&tag);CHKERRQ(ierr); 224 ierr = MPI_Type_get_extent(dtype,&lb,&unitbytes);CHKERRQ(ierr); 225 if (lb != 0) SETERRQ1(comm,PETSC_ERR_SUP,"Datatype with nonzero lower bound %ld\n",(long)lb); 226 ierr = PetscMalloc(nrecvs*count*unitbytes,&fdata);CHKERRQ(ierr); 227 tdata = (char*)todata; 228 ierr = PetscMalloc2(nto+nrecvs,&reqs,nto+nrecvs,&statuses);CHKERRQ(ierr); 229 sendreqs = reqs + nrecvs; 230 for (i=0; i<nrecvs; i++) { 231 ierr = MPI_Irecv((void*)(fdata+count*unitbytes*i),count,dtype,MPI_ANY_SOURCE,tag,comm,reqs+i);CHKERRQ(ierr); 232 } 233 for (i=0; i<nto; i++) { 234 ierr = MPI_Isend((void*)(tdata+count*unitbytes*i),count,dtype,toranks[i],tag,comm,sendreqs+i);CHKERRQ(ierr); 235 } 236 ierr = MPI_Waitall(nto+nrecvs,reqs,statuses);CHKERRQ(ierr); 237 ierr = PetscMalloc1(nrecvs,&franks);CHKERRQ(ierr); 238 for (i=0; i<nrecvs; i++) franks[i] = statuses[i].MPI_SOURCE; 239 ierr = PetscFree2(reqs,statuses);CHKERRQ(ierr); 240 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 241 242 *nfrom = nrecvs; 243 *fromranks = franks; 244 *(void**)fromdata = fdata; 245 PetscFunctionReturn(0); 246 } 247 #endif 248 249 #undef __FUNCT__ 250 #define __FUNCT__ "PetscCommBuildTwoSided" 251 /*@C 252 PetscCommBuildTwoSided - discovers communicating ranks given one-sided information, moving constant-sized data in the process (often message lengths) 253 254 Collective on MPI_Comm 255 256 Input Arguments: 257 + comm - communicator 258 . count - number of entries to send/receive (must match on all ranks) 259 . dtype - datatype to send/receive from each rank (must match on all ranks) 260 . nto - number of ranks to send data to 261 . toranks - ranks to send to (array of length nto) 262 - todata - data to send to each rank (packed) 263 264 Output Arguments: 265 + nfrom - number of ranks receiving messages from 266 . fromranks - ranks receiving messages from (length nfrom; caller should PetscFree()) 267 - fromdata - packed data from each rank, each with count entries of type dtype (length nfrom, caller responsible for PetscFree()) 268 269 Level: developer 270 271 Options Database Keys: 272 . -build_twosided <allreduce|ibarrier|redscatter> - algorithm to set up two-sided communication 273 274 Notes: 275 This memory-scalable interface is an alternative to calling PetscGatherNumberOfMessages() and 276 PetscGatherMessageLengths(), possibly with a subsequent round of communication to send other constant-size data. 277 278 Basic data types as well as contiguous types are supported, but non-contiguous (e.g., strided) types are not. 279 280 References: 281 The MPI_Ibarrier implementation uses the algorithm in 282 Hoefler, Siebert and Lumsdaine, Scalable communication protocols for dynamic sparse data exchange, 2010. 283 284 .seealso: PetscGatherNumberOfMessages(), PetscGatherMessageLengths() 285 @*/ 286 PetscErrorCode PetscCommBuildTwoSided(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata,PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata) 287 { 288 PetscErrorCode ierr; 289 PetscBuildTwoSidedType buildtype = PETSC_BUILDTWOSIDED_NOTSET; 290 291 PetscFunctionBegin; 292 ierr = PetscSysInitializePackage();CHKERRQ(ierr); 293 ierr = PetscLogEventBegin(PETSC_BuildTwoSided,0,0,0,0);CHKERRQ(ierr); 294 ierr = PetscCommBuildTwoSidedGetType(comm,&buildtype);CHKERRQ(ierr); 295 switch (buildtype) { 296 case PETSC_BUILDTWOSIDED_IBARRIER: 297 #if defined(PETSC_HAVE_MPI_IBARRIER) || defined(PETSC_HAVE_MPIX_IBARRIER) 298 ierr = PetscCommBuildTwoSided_Ibarrier(comm,count,dtype,nto,toranks,todata,nfrom,fromranks,fromdata);CHKERRQ(ierr); 299 #else 300 SETERRQ(comm,PETSC_ERR_PLIB,"MPI implementation does not provide MPI_Ibarrier (part of MPI-3)"); 301 #endif 302 break; 303 case PETSC_BUILDTWOSIDED_ALLREDUCE: 304 ierr = PetscCommBuildTwoSided_Allreduce(comm,count,dtype,nto,toranks,todata,nfrom,fromranks,fromdata);CHKERRQ(ierr); 305 break; 306 case PETSC_BUILDTWOSIDED_REDSCATTER: 307 #if defined(PETSC_HAVE_MPI_REDUCE_SCATTER_BLOCK) 308 ierr = PetscCommBuildTwoSided_RedScatter(comm,count,dtype,nto,toranks,todata,nfrom,fromranks,fromdata);CHKERRQ(ierr); 309 #else 310 SETERRQ(comm,PETSC_ERR_PLIB,"MPI implementation does not provide MPI_Reduce_scatter_block (part of MPI-2.2)"); 311 #endif 312 break; 313 default: SETERRQ(comm,PETSC_ERR_PLIB,"Unknown method for building two-sided communication"); 314 } 315 ierr = PetscLogEventEnd(PETSC_BuildTwoSided,0,0,0,0);CHKERRQ(ierr); 316 PetscFunctionReturn(0); 317 } 318 319 #undef __FUNCT__ 320 #define __FUNCT__ "PetscCommBuildTwoSidedFReq_Reference" 321 static PetscErrorCode PetscCommBuildTwoSidedFReq_Reference(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata, 322 PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata,PetscMPIInt ntags,MPI_Request **toreqs,MPI_Request **fromreqs, 323 PetscErrorCode (*send)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,PetscMPIInt,void*,MPI_Request[],void*), 324 PetscErrorCode (*recv)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,void*,MPI_Request[],void*),void *ctx) 325 { 326 PetscErrorCode ierr; 327 PetscMPIInt i,*tag; 328 MPI_Aint lb,unitbytes; 329 MPI_Request *sendreq,*recvreq; 330 331 PetscFunctionBegin; 332 ierr = PetscMalloc1(ntags,&tag);CHKERRQ(ierr); 333 if (ntags > 0) { 334 ierr = PetscCommDuplicate(comm,&comm,&tag[0]);CHKERRQ(ierr); 335 } 336 for (i=1; i<ntags; i++) { 337 ierr = PetscCommGetNewTag(comm,&tag[i]);CHKERRQ(ierr); 338 } 339 340 /* Perform complete initial rendezvous */ 341 ierr = PetscCommBuildTwoSided(comm,count,dtype,nto,toranks,todata,nfrom,fromranks,fromdata);CHKERRQ(ierr); 342 343 ierr = PetscMalloc1(nto*ntags,&sendreq);CHKERRQ(ierr); 344 ierr = PetscMalloc1(*nfrom*ntags,&recvreq);CHKERRQ(ierr); 345 346 ierr = MPI_Type_get_extent(dtype,&lb,&unitbytes);CHKERRQ(ierr); 347 if (lb != 0) SETERRQ1(comm,PETSC_ERR_SUP,"Datatype with nonzero lower bound %ld\n",(long)lb); 348 for (i=0; i<nto; i++) { 349 PetscMPIInt k; 350 for (k=0; k<ntags; k++) sendreq[i*ntags+k] = MPI_REQUEST_NULL; 351 ierr = (*send)(comm,tag,i,toranks[i],((char*)todata)+count*unitbytes*i,sendreq+i*ntags,ctx);CHKERRQ(ierr); 352 } 353 for (i=0; i<*nfrom; i++) { 354 void *header = (*(char**)fromdata) + count*unitbytes*i; 355 PetscMPIInt k; 356 for (k=0; k<ntags; k++) recvreq[i*ntags+k] = MPI_REQUEST_NULL; 357 ierr = (*recv)(comm,tag,(*fromranks)[i],header,recvreq+i*ntags,ctx);CHKERRQ(ierr); 358 } 359 ierr = PetscFree(tag);CHKERRQ(ierr); 360 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 361 *toreqs = sendreq; 362 *fromreqs = recvreq; 363 PetscFunctionReturn(0); 364 } 365 366 #if defined(PETSC_HAVE_MPI_IBARRIER) || defined(PETSC_HAVE_MPIX_IBARRIER) 367 368 #undef __FUNCT__ 369 #define __FUNCT__ "PetscCommBuildTwoSidedFReq_Ibarrier" 370 static PetscErrorCode PetscCommBuildTwoSidedFReq_Ibarrier(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata, 371 PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata,PetscMPIInt ntags,MPI_Request **toreqs,MPI_Request **fromreqs, 372 PetscErrorCode (*send)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,PetscMPIInt,void*,MPI_Request[],void*), 373 PetscErrorCode (*recv)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,void*,MPI_Request[],void*),void *ctx) 374 { 375 PetscErrorCode ierr; 376 PetscMPIInt nrecvs,tag,*tags,done,i; 377 MPI_Aint lb,unitbytes; 378 char *tdata; 379 MPI_Request *sendreqs,*usendreqs,*req,barrier; 380 PetscSegBuffer segrank,segdata,segreq; 381 PetscBool barrier_started; 382 383 PetscFunctionBegin; 384 ierr = PetscCommDuplicate(comm,&comm,&tag);CHKERRQ(ierr); 385 ierr = PetscMalloc1(ntags,&tags);CHKERRQ(ierr); 386 for (i=0; i<ntags; i++) { 387 ierr = PetscCommGetNewTag(comm,&tags[i]);CHKERRQ(ierr); 388 } 389 ierr = MPI_Type_get_extent(dtype,&lb,&unitbytes);CHKERRQ(ierr); 390 if (lb != 0) SETERRQ1(comm,PETSC_ERR_SUP,"Datatype with nonzero lower bound %ld\n",(long)lb); 391 tdata = (char*)todata; 392 ierr = PetscMalloc1(nto,&sendreqs);CHKERRQ(ierr); 393 ierr = PetscMalloc1(nto*ntags,&usendreqs);CHKERRQ(ierr); 394 /* Post synchronous sends */ 395 for (i=0; i<nto; i++) { 396 ierr = MPI_Issend((void*)(tdata+count*unitbytes*i),count,dtype,toranks[i],tag,comm,sendreqs+i);CHKERRQ(ierr); 397 } 398 /* Post actual payloads. These are typically larger messages. Hopefully sending these later does not slow down the 399 * synchronous messages above. */ 400 for (i=0; i<nto; i++) { 401 PetscMPIInt k; 402 for (k=0; k<ntags; k++) usendreqs[i*ntags+k] = MPI_REQUEST_NULL; 403 ierr = (*send)(comm,tags,i,toranks[i],tdata+count*unitbytes*i,usendreqs+i*ntags,ctx);CHKERRQ(ierr); 404 } 405 406 ierr = PetscSegBufferCreate(sizeof(PetscMPIInt),4,&segrank);CHKERRQ(ierr); 407 ierr = PetscSegBufferCreate(unitbytes,4*count,&segdata);CHKERRQ(ierr); 408 ierr = PetscSegBufferCreate(sizeof(MPI_Request),4,&segreq);CHKERRQ(ierr); 409 410 nrecvs = 0; 411 barrier = MPI_REQUEST_NULL; 412 /* MPICH-3.2 sometimes does not create a request in some "optimized" cases. This is arguably a standard violation, 413 * but we need to work around it. */ 414 barrier_started = PETSC_FALSE; 415 for (done=0; !done; ) { 416 PetscMPIInt flag; 417 MPI_Status status; 418 ierr = MPI_Iprobe(MPI_ANY_SOURCE,tag,comm,&flag,&status);CHKERRQ(ierr); 419 if (flag) { /* incoming message */ 420 PetscMPIInt *recvrank,k; 421 void *buf; 422 ierr = PetscSegBufferGet(segrank,1,&recvrank);CHKERRQ(ierr); 423 ierr = PetscSegBufferGet(segdata,count,&buf);CHKERRQ(ierr); 424 *recvrank = status.MPI_SOURCE; 425 ierr = MPI_Recv(buf,count,dtype,status.MPI_SOURCE,tag,comm,MPI_STATUS_IGNORE);CHKERRQ(ierr); 426 ierr = PetscSegBufferGet(segreq,ntags,&req);CHKERRQ(ierr); 427 for (k=0; k<ntags; k++) req[k] = MPI_REQUEST_NULL; 428 ierr = (*recv)(comm,tags,status.MPI_SOURCE,buf,req,ctx);CHKERRQ(ierr); 429 nrecvs++; 430 } 431 if (!barrier_started) { 432 PetscMPIInt sent,nsends; 433 ierr = PetscMPIIntCast(nto,&nsends);CHKERRQ(ierr); 434 ierr = MPI_Testall(nsends,sendreqs,&sent,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 435 if (sent) { 436 #if defined(PETSC_HAVE_MPI_IBARRIER) 437 ierr = MPI_Ibarrier(comm,&barrier);CHKERRQ(ierr); 438 #elif defined(PETSC_HAVE_MPIX_IBARRIER) 439 ierr = MPIX_Ibarrier(comm,&barrier);CHKERRQ(ierr); 440 #endif 441 barrier_started = PETSC_TRUE; 442 } 443 } else { 444 ierr = MPI_Test(&barrier,&done,MPI_STATUS_IGNORE);CHKERRQ(ierr); 445 } 446 } 447 *nfrom = nrecvs; 448 ierr = PetscSegBufferExtractAlloc(segrank,fromranks);CHKERRQ(ierr); 449 ierr = PetscSegBufferDestroy(&segrank);CHKERRQ(ierr); 450 ierr = PetscSegBufferExtractAlloc(segdata,fromdata);CHKERRQ(ierr); 451 ierr = PetscSegBufferDestroy(&segdata);CHKERRQ(ierr); 452 *toreqs = usendreqs; 453 ierr = PetscSegBufferExtractAlloc(segreq,fromreqs);CHKERRQ(ierr); 454 ierr = PetscSegBufferDestroy(&segreq);CHKERRQ(ierr); 455 ierr = PetscFree(sendreqs);CHKERRQ(ierr); 456 ierr = PetscFree(tags);CHKERRQ(ierr); 457 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 458 PetscFunctionReturn(0); 459 } 460 #endif 461 462 #undef __FUNCT__ 463 #define __FUNCT__ "PetscCommBuildTwoSidedF" 464 /*@C 465 PetscCommBuildTwoSidedF - discovers communicating ranks given one-sided information, calling user-defined functions during rendezvous 466 467 Collective on MPI_Comm 468 469 Input Arguments: 470 + comm - communicator 471 . count - number of entries to send/receive in initial rendezvous (must match on all ranks) 472 . dtype - datatype to send/receive from each rank (must match on all ranks) 473 . nto - number of ranks to send data to 474 . toranks - ranks to send to (array of length nto) 475 . todata - data to send to each rank (packed) 476 . ntags - number of tags needed by send/recv callbacks 477 . send - callback invoked on sending process when ready to send primary payload 478 . recv - callback invoked on receiving process after delivery of rendezvous message 479 - ctx - context for callbacks 480 481 Output Arguments: 482 + nfrom - number of ranks receiving messages from 483 . fromranks - ranks receiving messages from (length nfrom; caller should PetscFree()) 484 - fromdata - packed data from each rank, each with count entries of type dtype (length nfrom, caller responsible for PetscFree()) 485 486 Level: developer 487 488 Notes: 489 This memory-scalable interface is an alternative to calling PetscGatherNumberOfMessages() and 490 PetscGatherMessageLengths(), possibly with a subsequent round of communication to send other data. 491 492 Basic data types as well as contiguous types are supported, but non-contiguous (e.g., strided) types are not. 493 494 References: 495 The MPI_Ibarrier implementation uses the algorithm in 496 Hoefler, Siebert and Lumsdaine, Scalable communication protocols for dynamic sparse data exchange, 2010. 497 498 .seealso: PetscCommBuildTwoSided(), PetscCommBuildTwoSidedFReq(), PetscGatherNumberOfMessages(), PetscGatherMessageLengths() 499 @*/ 500 PetscErrorCode PetscCommBuildTwoSidedF(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata,PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata,PetscMPIInt ntags, 501 PetscErrorCode (*send)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,PetscMPIInt,void*,MPI_Request[],void*), 502 PetscErrorCode (*recv)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,void*,MPI_Request[],void*),void *ctx) 503 { 504 PetscErrorCode ierr; 505 MPI_Request *toreqs,*fromreqs; 506 507 PetscFunctionBegin; 508 ierr = PetscCommBuildTwoSidedFReq(comm,count,dtype,nto,toranks,todata,nfrom,fromranks,fromdata,ntags,&toreqs,&fromreqs,send,recv,ctx);CHKERRQ(ierr); 509 ierr = MPI_Waitall(nto*ntags,toreqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 510 ierr = MPI_Waitall(*nfrom*ntags,fromreqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 511 ierr = PetscFree(toreqs);CHKERRQ(ierr); 512 ierr = PetscFree(fromreqs);CHKERRQ(ierr); 513 PetscFunctionReturn(0); 514 } 515 516 #undef __FUNCT__ 517 #define __FUNCT__ "PetscCommBuildTwoSidedFReq" 518 /*@C 519 PetscCommBuildTwoSidedFReq - discovers communicating ranks given one-sided information, calling user-defined functions during rendezvous, returns requests 520 521 Collective on MPI_Comm 522 523 Input Arguments: 524 + comm - communicator 525 . count - number of entries to send/receive in initial rendezvous (must match on all ranks) 526 . dtype - datatype to send/receive from each rank (must match on all ranks) 527 . nto - number of ranks to send data to 528 . toranks - ranks to send to (array of length nto) 529 . todata - data to send to each rank (packed) 530 . ntags - number of tags needed by send/recv callbacks 531 . send - callback invoked on sending process when ready to send primary payload 532 . recv - callback invoked on receiving process after delivery of rendezvous message 533 - ctx - context for callbacks 534 535 Output Arguments: 536 + nfrom - number of ranks receiving messages from 537 . fromranks - ranks receiving messages from (length nfrom; caller should PetscFree()) 538 . fromdata - packed data from each rank, each with count entries of type dtype (length nfrom, caller responsible for PetscFree()) 539 . toreqs - array of nto*ntags sender requests (caller must wait on these, then PetscFree()) 540 - fromreqs - array of nfrom*ntags receiver requests (caller must wait on these, then PetscFree()) 541 542 Level: developer 543 544 Notes: 545 This memory-scalable interface is an alternative to calling PetscGatherNumberOfMessages() and 546 PetscGatherMessageLengths(), possibly with a subsequent round of communication to send other data. 547 548 Basic data types as well as contiguous types are supported, but non-contiguous (e.g., strided) types are not. 549 550 References: 551 The MPI_Ibarrier implementation uses the algorithm in 552 Hoefler, Siebert and Lumsdaine, Scalable communication protocols for dynamic sparse data exchange, 2010. 553 554 .seealso: PetscCommBuildTwoSided(), PetscCommBuildTwoSidedF(), PetscGatherNumberOfMessages(), PetscGatherMessageLengths() 555 @*/ 556 PetscErrorCode PetscCommBuildTwoSidedFReq(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata, 557 PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata,PetscMPIInt ntags,MPI_Request **toreqs,MPI_Request **fromreqs, 558 PetscErrorCode (*send)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,PetscMPIInt,void*,MPI_Request[],void*), 559 PetscErrorCode (*recv)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,void*,MPI_Request[],void*),void *ctx) 560 { 561 PetscErrorCode ierr,(*f)(MPI_Comm,PetscMPIInt,MPI_Datatype,PetscMPIInt,const PetscMPIInt[],const void*, 562 PetscMPIInt*,PetscMPIInt**,void*,PetscMPIInt,MPI_Request**,MPI_Request**, 563 PetscErrorCode (*send)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,PetscMPIInt,void*,MPI_Request[],void*), 564 PetscErrorCode (*recv)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,void*,MPI_Request[],void*),void *ctx); 565 PetscBuildTwoSidedType buildtype = PETSC_BUILDTWOSIDED_NOTSET; 566 PetscMPIInt i,size; 567 568 PetscFunctionBegin; 569 ierr = PetscSysInitializePackage();CHKERRQ(ierr); 570 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 571 for (i=0; i<nto; i++) { 572 if (toranks[i] < 0 || size <= toranks[i]) SETERRQ3(comm,PETSC_ERR_ARG_OUTOFRANGE,"toranks[%d] %d not in comm size %d",i,toranks[i],size); 573 } 574 ierr = PetscLogEventBegin(PETSC_BuildTwoSidedF,0,0,0,0);CHKERRQ(ierr); 575 ierr = PetscCommBuildTwoSidedGetType(comm,&buildtype);CHKERRQ(ierr); 576 switch (buildtype) { 577 case PETSC_BUILDTWOSIDED_IBARRIER: 578 #if defined(PETSC_HAVE_MPI_IBARRIER) || defined(PETSC_HAVE_MPIX_IBARRIER) 579 f = PetscCommBuildTwoSidedFReq_Ibarrier; 580 #else 581 SETERRQ(comm,PETSC_ERR_PLIB,"MPI implementation does not provide MPI_Ibarrier (part of MPI-3)"); 582 #endif 583 break; 584 case PETSC_BUILDTWOSIDED_ALLREDUCE: 585 case PETSC_BUILDTWOSIDED_REDSCATTER: 586 f = PetscCommBuildTwoSidedFReq_Reference; 587 break; 588 default: SETERRQ(comm,PETSC_ERR_PLIB,"Unknown method for building two-sided communication"); 589 } 590 ierr = (*f)(comm,count,dtype,nto,toranks,todata,nfrom,fromranks,fromdata,ntags,toreqs,fromreqs,send,recv,ctx);CHKERRQ(ierr); 591 ierr = PetscLogEventEnd(PETSC_BuildTwoSidedF,0,0,0,0);CHKERRQ(ierr); 592 PetscFunctionReturn(0); 593 } 594