1 #include <petscsys.h> /*I "petscsys.h" I*/ 2 #include <petsc/private/petscimpl.h> 3 4 PetscLogEvent PETSC_BuildTwoSided; 5 PetscLogEvent PETSC_BuildTwoSidedF; 6 7 const char *const PetscBuildTwoSidedTypes[] = { 8 "ALLREDUCE", 9 "IBARRIER", 10 "REDSCATTER", 11 "PetscBuildTwoSidedType", 12 "PETSC_BUILDTWOSIDED_", 13 NULL 14 }; 15 16 static PetscBuildTwoSidedType _twosided_type = PETSC_BUILDTWOSIDED_NOTSET; 17 18 /*@ 19 PetscCommBuildTwoSidedSetType - set algorithm to use when building two-sided communication 20 21 Logically Collective 22 23 Input Arguments: 24 + comm - PETSC_COMM_WORLD 25 - twosided - algorithm to use in subsequent calls to PetscCommBuildTwoSided() 26 27 Level: developer 28 29 Note: 30 This option is currently global, but could be made per-communicator. 31 32 .seealso: PetscCommBuildTwoSided(), PetscCommBuildTwoSidedGetType() 33 @*/ 34 PetscErrorCode PetscCommBuildTwoSidedSetType(MPI_Comm comm,PetscBuildTwoSidedType twosided) 35 { 36 PetscFunctionBegin; 37 if (PetscDefined(USE_DEBUG)) { /* We don't have a PetscObject so can't use PetscValidLogicalCollectiveEnum */ 38 PetscMPIInt ierr; 39 PetscMPIInt b1[2],b2[2]; 40 b1[0] = -(PetscMPIInt)twosided; 41 b1[1] = (PetscMPIInt)twosided; 42 ierr = MPIU_Allreduce(b1,b2,2,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr); 43 if (-b2[0] != b2[1]) SETERRQ(comm,PETSC_ERR_ARG_WRONG,"Enum value must be same on all processes"); 44 } 45 _twosided_type = twosided; 46 PetscFunctionReturn(0); 47 } 48 49 /*@ 50 PetscCommBuildTwoSidedGetType - set algorithm to use when building two-sided communication 51 52 Logically Collective 53 54 Output Arguments: 55 + comm - communicator on which to query algorithm 56 - twosided - algorithm to use for PetscCommBuildTwoSided() 57 58 Level: developer 59 60 .seealso: PetscCommBuildTwoSided(), PetscCommBuildTwoSidedSetType() 61 @*/ 62 PetscErrorCode PetscCommBuildTwoSidedGetType(MPI_Comm comm,PetscBuildTwoSidedType *twosided) 63 { 64 PetscErrorCode ierr; 65 PetscMPIInt size; 66 67 PetscFunctionBegin; 68 *twosided = PETSC_BUILDTWOSIDED_NOTSET; 69 if (_twosided_type == PETSC_BUILDTWOSIDED_NOTSET) { 70 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 71 _twosided_type = PETSC_BUILDTWOSIDED_ALLREDUCE; /* default for small comms, see https://gitlab.com/petsc/petsc/-/merge_requests/2611 */ 72 #if defined(PETSC_HAVE_MPI_IBARRIER) 73 if (size > 1024) _twosided_type = PETSC_BUILDTWOSIDED_IBARRIER; 74 #endif 75 ierr = PetscOptionsGetEnum(NULL,NULL,"-build_twosided",PetscBuildTwoSidedTypes,(PetscEnum*)&_twosided_type,NULL);CHKERRQ(ierr); 76 } 77 *twosided = _twosided_type; 78 PetscFunctionReturn(0); 79 } 80 81 #if defined(PETSC_HAVE_MPI_IBARRIER) || defined(PETSC_HAVE_MPIX_IBARRIER) 82 83 static PetscErrorCode PetscCommBuildTwoSided_Ibarrier(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata,PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata) 84 { 85 PetscErrorCode ierr; 86 PetscMPIInt nrecvs,tag,done,i; 87 MPI_Aint lb,unitbytes; 88 char *tdata; 89 MPI_Request *sendreqs,barrier; 90 PetscSegBuffer segrank,segdata; 91 PetscBool barrier_started; 92 93 PetscFunctionBegin; 94 ierr = PetscCommDuplicate(comm,&comm,&tag);CHKERRQ(ierr); 95 ierr = MPI_Type_get_extent(dtype,&lb,&unitbytes);CHKERRQ(ierr); 96 if (lb != 0) SETERRQ1(comm,PETSC_ERR_SUP,"Datatype with nonzero lower bound %ld\n",(long)lb); 97 tdata = (char*)todata; 98 ierr = PetscMalloc1(nto,&sendreqs);CHKERRQ(ierr); 99 for (i=0; i<nto; i++) { 100 ierr = MPI_Issend((void*)(tdata+count*unitbytes*i),count,dtype,toranks[i],tag,comm,sendreqs+i);CHKERRQ(ierr); 101 } 102 ierr = PetscSegBufferCreate(sizeof(PetscMPIInt),4,&segrank);CHKERRQ(ierr); 103 ierr = PetscSegBufferCreate(unitbytes,4*count,&segdata);CHKERRQ(ierr); 104 105 nrecvs = 0; 106 barrier = MPI_REQUEST_NULL; 107 /* MPICH-3.2 sometimes does not create a request in some "optimized" cases. This is arguably a standard violation, 108 * but we need to work around it. */ 109 barrier_started = PETSC_FALSE; 110 for (done=0; !done;) { 111 PetscMPIInt flag; 112 MPI_Status status; 113 ierr = MPI_Iprobe(MPI_ANY_SOURCE,tag,comm,&flag,&status);CHKERRQ(ierr); 114 if (flag) { /* incoming message */ 115 PetscMPIInt *recvrank; 116 void *buf; 117 ierr = PetscSegBufferGet(segrank,1,&recvrank);CHKERRQ(ierr); 118 ierr = PetscSegBufferGet(segdata,count,&buf);CHKERRQ(ierr); 119 *recvrank = status.MPI_SOURCE; 120 ierr = MPI_Recv(buf,count,dtype,status.MPI_SOURCE,tag,comm,MPI_STATUS_IGNORE);CHKERRQ(ierr); 121 nrecvs++; 122 } 123 if (!barrier_started) { 124 PetscMPIInt sent,nsends; 125 ierr = PetscMPIIntCast(nto,&nsends);CHKERRQ(ierr); 126 ierr = MPI_Testall(nsends,sendreqs,&sent,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 127 if (sent) { 128 #if defined(PETSC_HAVE_MPI_IBARRIER) 129 ierr = MPI_Ibarrier(comm,&barrier);CHKERRQ(ierr); 130 #elif defined(PETSC_HAVE_MPIX_IBARRIER) 131 ierr = MPIX_Ibarrier(comm,&barrier);CHKERRQ(ierr); 132 #endif 133 barrier_started = PETSC_TRUE; 134 ierr = PetscFree(sendreqs);CHKERRQ(ierr); 135 } 136 } else { 137 ierr = MPI_Test(&barrier,&done,MPI_STATUS_IGNORE);CHKERRQ(ierr); 138 } 139 } 140 *nfrom = nrecvs; 141 ierr = PetscSegBufferExtractAlloc(segrank,fromranks);CHKERRQ(ierr); 142 ierr = PetscSegBufferDestroy(&segrank);CHKERRQ(ierr); 143 ierr = PetscSegBufferExtractAlloc(segdata,fromdata);CHKERRQ(ierr); 144 ierr = PetscSegBufferDestroy(&segdata);CHKERRQ(ierr); 145 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 146 PetscFunctionReturn(0); 147 } 148 #endif 149 150 static PetscErrorCode PetscCommBuildTwoSided_Allreduce(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata,PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata) 151 { 152 PetscErrorCode ierr; 153 PetscMPIInt size,rank,*iflags,nrecvs,tag,*franks,i,flg; 154 MPI_Aint lb,unitbytes; 155 char *tdata,*fdata; 156 MPI_Request *reqs,*sendreqs; 157 MPI_Status *statuses; 158 PetscCommCounter *counter; 159 160 PetscFunctionBegin; 161 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 162 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 163 ierr = PetscCommDuplicate(comm,&comm,&tag);CHKERRQ(ierr); 164 ierr = MPI_Comm_get_attr(comm,Petsc_Counter_keyval,&counter,&flg);CHKERRQ(ierr); 165 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Inner PETSc communicator does not have its tag/name counter attribute set"); 166 if (!counter->iflags) { 167 ierr = PetscCalloc1(size,&counter->iflags);CHKERRQ(ierr); 168 iflags = counter->iflags; 169 } else { 170 iflags = counter->iflags; 171 ierr = PetscArrayzero(iflags,size);CHKERRQ(ierr); 172 } 173 for (i=0; i<nto; i++) iflags[toranks[i]] = 1; 174 ierr = MPIU_Allreduce(MPI_IN_PLACE,iflags,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 175 nrecvs = iflags[rank]; 176 ierr = MPI_Type_get_extent(dtype,&lb,&unitbytes);CHKERRQ(ierr); 177 if (lb != 0) SETERRQ1(comm,PETSC_ERR_SUP,"Datatype with nonzero lower bound %ld\n",(long)lb); 178 ierr = PetscMalloc(nrecvs*count*unitbytes,&fdata);CHKERRQ(ierr); 179 tdata = (char*)todata; 180 ierr = PetscMalloc2(nto+nrecvs,&reqs,nto+nrecvs,&statuses);CHKERRQ(ierr); 181 sendreqs = reqs + nrecvs; 182 for (i=0; i<nrecvs; i++) { 183 ierr = MPI_Irecv((void*)(fdata+count*unitbytes*i),count,dtype,MPI_ANY_SOURCE,tag,comm,reqs+i);CHKERRQ(ierr); 184 } 185 for (i=0; i<nto; i++) { 186 ierr = MPI_Isend((void*)(tdata+count*unitbytes*i),count,dtype,toranks[i],tag,comm,sendreqs+i);CHKERRQ(ierr); 187 } 188 ierr = MPI_Waitall(nto+nrecvs,reqs,statuses);CHKERRQ(ierr); 189 ierr = PetscMalloc1(nrecvs,&franks);CHKERRQ(ierr); 190 for (i=0; i<nrecvs; i++) franks[i] = statuses[i].MPI_SOURCE; 191 ierr = PetscFree2(reqs,statuses);CHKERRQ(ierr); 192 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 193 194 *nfrom = nrecvs; 195 *fromranks = franks; 196 *(void**)fromdata = fdata; 197 PetscFunctionReturn(0); 198 } 199 200 #if defined(PETSC_HAVE_MPI_REDUCE_SCATTER_BLOCK) 201 static PetscErrorCode PetscCommBuildTwoSided_RedScatter(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata,PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata) 202 { 203 PetscErrorCode ierr; 204 PetscMPIInt size,*iflags,nrecvs,tag,*franks,i,flg; 205 MPI_Aint lb,unitbytes; 206 char *tdata,*fdata; 207 MPI_Request *reqs,*sendreqs; 208 MPI_Status *statuses; 209 PetscCommCounter *counter; 210 211 PetscFunctionBegin; 212 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 213 ierr = PetscCommDuplicate(comm,&comm,&tag);CHKERRQ(ierr); 214 ierr = MPI_Comm_get_attr(comm,Petsc_Counter_keyval,&counter,&flg);CHKERRQ(ierr); 215 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Inner PETSc communicator does not have its tag/name counter attribute set"); 216 if (!counter->iflags) { 217 ierr = PetscCalloc1(size,&counter->iflags);CHKERRQ(ierr); 218 iflags = counter->iflags; 219 } else { 220 iflags = counter->iflags; 221 ierr = PetscArrayzero(iflags,size);CHKERRQ(ierr); 222 } 223 for (i=0; i<nto; i++) iflags[toranks[i]] = 1; 224 ierr = MPI_Reduce_scatter_block(iflags,&nrecvs,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 225 ierr = MPI_Type_get_extent(dtype,&lb,&unitbytes);CHKERRQ(ierr); 226 if (lb != 0) SETERRQ1(comm,PETSC_ERR_SUP,"Datatype with nonzero lower bound %ld\n",(long)lb); 227 ierr = PetscMalloc(nrecvs*count*unitbytes,&fdata);CHKERRQ(ierr); 228 tdata = (char*)todata; 229 ierr = PetscMalloc2(nto+nrecvs,&reqs,nto+nrecvs,&statuses);CHKERRQ(ierr); 230 sendreqs = reqs + nrecvs; 231 for (i=0; i<nrecvs; i++) { 232 ierr = MPI_Irecv((void*)(fdata+count*unitbytes*i),count,dtype,MPI_ANY_SOURCE,tag,comm,reqs+i);CHKERRQ(ierr); 233 } 234 for (i=0; i<nto; i++) { 235 ierr = MPI_Isend((void*)(tdata+count*unitbytes*i),count,dtype,toranks[i],tag,comm,sendreqs+i);CHKERRQ(ierr); 236 } 237 ierr = MPI_Waitall(nto+nrecvs,reqs,statuses);CHKERRQ(ierr); 238 ierr = PetscMalloc1(nrecvs,&franks);CHKERRQ(ierr); 239 for (i=0; i<nrecvs; i++) franks[i] = statuses[i].MPI_SOURCE; 240 ierr = PetscFree2(reqs,statuses);CHKERRQ(ierr); 241 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 242 243 *nfrom = nrecvs; 244 *fromranks = franks; 245 *(void**)fromdata = fdata; 246 PetscFunctionReturn(0); 247 } 248 #endif 249 250 /*@C 251 PetscCommBuildTwoSided - discovers communicating ranks given one-sided information, moving constant-sized data in the process (often message lengths) 252 253 Collective 254 255 Input Arguments: 256 + comm - communicator 257 . count - number of entries to send/receive (must match on all ranks) 258 . dtype - datatype to send/receive from each rank (must match on all ranks) 259 . nto - number of ranks to send data to 260 . toranks - ranks to send to (array of length nto) 261 - todata - data to send to each rank (packed) 262 263 Output Arguments: 264 + nfrom - number of ranks receiving messages from 265 . fromranks - ranks receiving messages from (length nfrom; caller should PetscFree()) 266 - fromdata - packed data from each rank, each with count entries of type dtype (length nfrom, caller responsible for PetscFree()) 267 268 Level: developer 269 270 Options Database Keys: 271 . -build_twosided <allreduce|ibarrier|redscatter> - algorithm to set up two-sided communication. Default is allreduce for communicators with <= 1024 ranks, otherwise ibarrier. 272 273 Notes: 274 This memory-scalable interface is an alternative to calling PetscGatherNumberOfMessages() and 275 PetscGatherMessageLengths(), possibly with a subsequent round of communication to send other constant-size data. 276 277 Basic data types as well as contiguous types are supported, but non-contiguous (e.g., strided) types are not. 278 279 References: 280 . 1. - Hoefler, Siebert and Lumsdaine, The MPI_Ibarrier implementation uses the algorithm in 281 Scalable communication protocols for dynamic sparse data exchange, 2010. 282 283 .seealso: PetscGatherNumberOfMessages(), PetscGatherMessageLengths() 284 @*/ 285 PetscErrorCode PetscCommBuildTwoSided(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata,PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata) 286 { 287 PetscErrorCode ierr; 288 PetscBuildTwoSidedType buildtype = PETSC_BUILDTWOSIDED_NOTSET; 289 290 PetscFunctionBegin; 291 ierr = PetscSysInitializePackage();CHKERRQ(ierr); 292 ierr = PetscLogEventSync(PETSC_BuildTwoSided,comm);CHKERRQ(ierr); 293 ierr = PetscLogEventBegin(PETSC_BuildTwoSided,0,0,0,0);CHKERRQ(ierr); 294 ierr = PetscCommBuildTwoSidedGetType(comm,&buildtype);CHKERRQ(ierr); 295 switch (buildtype) { 296 case PETSC_BUILDTWOSIDED_IBARRIER: 297 #if defined(PETSC_HAVE_MPI_IBARRIER) || defined(PETSC_HAVE_MPIX_IBARRIER) 298 ierr = PetscCommBuildTwoSided_Ibarrier(comm,count,dtype,nto,toranks,todata,nfrom,fromranks,fromdata);CHKERRQ(ierr); 299 #else 300 SETERRQ(comm,PETSC_ERR_PLIB,"MPI implementation does not provide MPI_Ibarrier (part of MPI-3)"); 301 #endif 302 break; 303 case PETSC_BUILDTWOSIDED_ALLREDUCE: 304 ierr = PetscCommBuildTwoSided_Allreduce(comm,count,dtype,nto,toranks,todata,nfrom,fromranks,fromdata);CHKERRQ(ierr); 305 break; 306 case PETSC_BUILDTWOSIDED_REDSCATTER: 307 #if defined(PETSC_HAVE_MPI_REDUCE_SCATTER_BLOCK) 308 ierr = PetscCommBuildTwoSided_RedScatter(comm,count,dtype,nto,toranks,todata,nfrom,fromranks,fromdata);CHKERRQ(ierr); 309 #else 310 SETERRQ(comm,PETSC_ERR_PLIB,"MPI implementation does not provide MPI_Reduce_scatter_block (part of MPI-2.2)"); 311 #endif 312 break; 313 default: SETERRQ(comm,PETSC_ERR_PLIB,"Unknown method for building two-sided communication"); 314 } 315 ierr = PetscLogEventEnd(PETSC_BuildTwoSided,0,0,0,0);CHKERRQ(ierr); 316 PetscFunctionReturn(0); 317 } 318 319 static PetscErrorCode PetscCommBuildTwoSidedFReq_Reference(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata, 320 PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata,PetscMPIInt ntags,MPI_Request **toreqs,MPI_Request **fromreqs, 321 PetscErrorCode (*send)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,PetscMPIInt,void*,MPI_Request[],void*), 322 PetscErrorCode (*recv)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,void*,MPI_Request[],void*),void *ctx) 323 { 324 PetscErrorCode ierr; 325 PetscMPIInt i,*tag; 326 MPI_Aint lb,unitbytes; 327 MPI_Request *sendreq,*recvreq; 328 329 PetscFunctionBegin; 330 ierr = PetscMalloc1(ntags,&tag);CHKERRQ(ierr); 331 if (ntags > 0) { 332 ierr = PetscCommDuplicate(comm,&comm,&tag[0]);CHKERRQ(ierr); 333 } 334 for (i=1; i<ntags; i++) { 335 ierr = PetscCommGetNewTag(comm,&tag[i]);CHKERRQ(ierr); 336 } 337 338 /* Perform complete initial rendezvous */ 339 ierr = PetscCommBuildTwoSided(comm,count,dtype,nto,toranks,todata,nfrom,fromranks,fromdata);CHKERRQ(ierr); 340 341 ierr = PetscMalloc1(nto*ntags,&sendreq);CHKERRQ(ierr); 342 ierr = PetscMalloc1(*nfrom*ntags,&recvreq);CHKERRQ(ierr); 343 344 ierr = MPI_Type_get_extent(dtype,&lb,&unitbytes);CHKERRQ(ierr); 345 if (lb != 0) SETERRQ1(comm,PETSC_ERR_SUP,"Datatype with nonzero lower bound %ld\n",(long)lb); 346 for (i=0; i<nto; i++) { 347 PetscMPIInt k; 348 for (k=0; k<ntags; k++) sendreq[i*ntags+k] = MPI_REQUEST_NULL; 349 ierr = (*send)(comm,tag,i,toranks[i],((char*)todata)+count*unitbytes*i,sendreq+i*ntags,ctx);CHKERRQ(ierr); 350 } 351 for (i=0; i<*nfrom; i++) { 352 void *header = (*(char**)fromdata) + count*unitbytes*i; 353 PetscMPIInt k; 354 for (k=0; k<ntags; k++) recvreq[i*ntags+k] = MPI_REQUEST_NULL; 355 ierr = (*recv)(comm,tag,(*fromranks)[i],header,recvreq+i*ntags,ctx);CHKERRQ(ierr); 356 } 357 ierr = PetscFree(tag);CHKERRQ(ierr); 358 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 359 *toreqs = sendreq; 360 *fromreqs = recvreq; 361 PetscFunctionReturn(0); 362 } 363 364 #if defined(PETSC_HAVE_MPI_IBARRIER) || defined(PETSC_HAVE_MPIX_IBARRIER) 365 366 static PetscErrorCode PetscCommBuildTwoSidedFReq_Ibarrier(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata, 367 PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata,PetscMPIInt ntags,MPI_Request **toreqs,MPI_Request **fromreqs, 368 PetscErrorCode (*send)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,PetscMPIInt,void*,MPI_Request[],void*), 369 PetscErrorCode (*recv)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,void*,MPI_Request[],void*),void *ctx) 370 { 371 PetscErrorCode ierr; 372 PetscMPIInt nrecvs,tag,*tags,done,i; 373 MPI_Aint lb,unitbytes; 374 char *tdata; 375 MPI_Request *sendreqs,*usendreqs,*req,barrier; 376 PetscSegBuffer segrank,segdata,segreq; 377 PetscBool barrier_started; 378 379 PetscFunctionBegin; 380 ierr = PetscCommDuplicate(comm,&comm,&tag);CHKERRQ(ierr); 381 ierr = PetscMalloc1(ntags,&tags);CHKERRQ(ierr); 382 for (i=0; i<ntags; i++) { 383 ierr = PetscCommGetNewTag(comm,&tags[i]);CHKERRQ(ierr); 384 } 385 ierr = MPI_Type_get_extent(dtype,&lb,&unitbytes);CHKERRQ(ierr); 386 if (lb != 0) SETERRQ1(comm,PETSC_ERR_SUP,"Datatype with nonzero lower bound %ld\n",(long)lb); 387 tdata = (char*)todata; 388 ierr = PetscMalloc1(nto,&sendreqs);CHKERRQ(ierr); 389 ierr = PetscMalloc1(nto*ntags,&usendreqs);CHKERRQ(ierr); 390 /* Post synchronous sends */ 391 for (i=0; i<nto; i++) { 392 ierr = MPI_Issend((void*)(tdata+count*unitbytes*i),count,dtype,toranks[i],tag,comm,sendreqs+i);CHKERRQ(ierr); 393 } 394 /* Post actual payloads. These are typically larger messages. Hopefully sending these later does not slow down the 395 * synchronous messages above. */ 396 for (i=0; i<nto; i++) { 397 PetscMPIInt k; 398 for (k=0; k<ntags; k++) usendreqs[i*ntags+k] = MPI_REQUEST_NULL; 399 ierr = (*send)(comm,tags,i,toranks[i],tdata+count*unitbytes*i,usendreqs+i*ntags,ctx);CHKERRQ(ierr); 400 } 401 402 ierr = PetscSegBufferCreate(sizeof(PetscMPIInt),4,&segrank);CHKERRQ(ierr); 403 ierr = PetscSegBufferCreate(unitbytes,4*count,&segdata);CHKERRQ(ierr); 404 ierr = PetscSegBufferCreate(sizeof(MPI_Request),4,&segreq);CHKERRQ(ierr); 405 406 nrecvs = 0; 407 barrier = MPI_REQUEST_NULL; 408 /* MPICH-3.2 sometimes does not create a request in some "optimized" cases. This is arguably a standard violation, 409 * but we need to work around it. */ 410 barrier_started = PETSC_FALSE; 411 for (done=0; !done;) { 412 PetscMPIInt flag; 413 MPI_Status status; 414 ierr = MPI_Iprobe(MPI_ANY_SOURCE,tag,comm,&flag,&status);CHKERRQ(ierr); 415 if (flag) { /* incoming message */ 416 PetscMPIInt *recvrank,k; 417 void *buf; 418 ierr = PetscSegBufferGet(segrank,1,&recvrank);CHKERRQ(ierr); 419 ierr = PetscSegBufferGet(segdata,count,&buf);CHKERRQ(ierr); 420 *recvrank = status.MPI_SOURCE; 421 ierr = MPI_Recv(buf,count,dtype,status.MPI_SOURCE,tag,comm,MPI_STATUS_IGNORE);CHKERRQ(ierr); 422 ierr = PetscSegBufferGet(segreq,ntags,&req);CHKERRQ(ierr); 423 for (k=0; k<ntags; k++) req[k] = MPI_REQUEST_NULL; 424 ierr = (*recv)(comm,tags,status.MPI_SOURCE,buf,req,ctx);CHKERRQ(ierr); 425 nrecvs++; 426 } 427 if (!barrier_started) { 428 PetscMPIInt sent,nsends; 429 ierr = PetscMPIIntCast(nto,&nsends);CHKERRQ(ierr); 430 ierr = MPI_Testall(nsends,sendreqs,&sent,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 431 if (sent) { 432 #if defined(PETSC_HAVE_MPI_IBARRIER) 433 ierr = MPI_Ibarrier(comm,&barrier);CHKERRQ(ierr); 434 #elif defined(PETSC_HAVE_MPIX_IBARRIER) 435 ierr = MPIX_Ibarrier(comm,&barrier);CHKERRQ(ierr); 436 #endif 437 barrier_started = PETSC_TRUE; 438 } 439 } else { 440 ierr = MPI_Test(&barrier,&done,MPI_STATUS_IGNORE);CHKERRQ(ierr); 441 } 442 } 443 *nfrom = nrecvs; 444 ierr = PetscSegBufferExtractAlloc(segrank,fromranks);CHKERRQ(ierr); 445 ierr = PetscSegBufferDestroy(&segrank);CHKERRQ(ierr); 446 ierr = PetscSegBufferExtractAlloc(segdata,fromdata);CHKERRQ(ierr); 447 ierr = PetscSegBufferDestroy(&segdata);CHKERRQ(ierr); 448 *toreqs = usendreqs; 449 ierr = PetscSegBufferExtractAlloc(segreq,fromreqs);CHKERRQ(ierr); 450 ierr = PetscSegBufferDestroy(&segreq);CHKERRQ(ierr); 451 ierr = PetscFree(sendreqs);CHKERRQ(ierr); 452 ierr = PetscFree(tags);CHKERRQ(ierr); 453 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 454 PetscFunctionReturn(0); 455 } 456 #endif 457 458 /*@C 459 PetscCommBuildTwoSidedF - discovers communicating ranks given one-sided information, calling user-defined functions during rendezvous 460 461 Collective 462 463 Input Arguments: 464 + comm - communicator 465 . count - number of entries to send/receive in initial rendezvous (must match on all ranks) 466 . dtype - datatype to send/receive from each rank (must match on all ranks) 467 . nto - number of ranks to send data to 468 . toranks - ranks to send to (array of length nto) 469 . todata - data to send to each rank (packed) 470 . ntags - number of tags needed by send/recv callbacks 471 . send - callback invoked on sending process when ready to send primary payload 472 . recv - callback invoked on receiving process after delivery of rendezvous message 473 - ctx - context for callbacks 474 475 Output Arguments: 476 + nfrom - number of ranks receiving messages from 477 . fromranks - ranks receiving messages from (length nfrom; caller should PetscFree()) 478 - fromdata - packed data from each rank, each with count entries of type dtype (length nfrom, caller responsible for PetscFree()) 479 480 Level: developer 481 482 Notes: 483 This memory-scalable interface is an alternative to calling PetscGatherNumberOfMessages() and 484 PetscGatherMessageLengths(), possibly with a subsequent round of communication to send other data. 485 486 Basic data types as well as contiguous types are supported, but non-contiguous (e.g., strided) types are not. 487 488 References: 489 . 1. - Hoefler, Siebert and Lumsdaine, The MPI_Ibarrier implementation uses the algorithm in 490 Scalable communication protocols for dynamic sparse data exchange, 2010. 491 492 .seealso: PetscCommBuildTwoSided(), PetscCommBuildTwoSidedFReq(), PetscGatherNumberOfMessages(), PetscGatherMessageLengths() 493 @*/ 494 PetscErrorCode PetscCommBuildTwoSidedF(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata,PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata,PetscMPIInt ntags, 495 PetscErrorCode (*send)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,PetscMPIInt,void*,MPI_Request[],void*), 496 PetscErrorCode (*recv)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,void*,MPI_Request[],void*),void *ctx) 497 { 498 PetscErrorCode ierr; 499 MPI_Request *toreqs,*fromreqs; 500 501 PetscFunctionBegin; 502 ierr = PetscCommBuildTwoSidedFReq(comm,count,dtype,nto,toranks,todata,nfrom,fromranks,fromdata,ntags,&toreqs,&fromreqs,send,recv,ctx);CHKERRQ(ierr); 503 ierr = MPI_Waitall(nto*ntags,toreqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 504 ierr = MPI_Waitall(*nfrom*ntags,fromreqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 505 ierr = PetscFree(toreqs);CHKERRQ(ierr); 506 ierr = PetscFree(fromreqs);CHKERRQ(ierr); 507 PetscFunctionReturn(0); 508 } 509 510 /*@C 511 PetscCommBuildTwoSidedFReq - discovers communicating ranks given one-sided information, calling user-defined functions during rendezvous, returns requests 512 513 Collective 514 515 Input Arguments: 516 + comm - communicator 517 . count - number of entries to send/receive in initial rendezvous (must match on all ranks) 518 . dtype - datatype to send/receive from each rank (must match on all ranks) 519 . nto - number of ranks to send data to 520 . toranks - ranks to send to (array of length nto) 521 . todata - data to send to each rank (packed) 522 . ntags - number of tags needed by send/recv callbacks 523 . send - callback invoked on sending process when ready to send primary payload 524 . recv - callback invoked on receiving process after delivery of rendezvous message 525 - ctx - context for callbacks 526 527 Output Arguments: 528 + nfrom - number of ranks receiving messages from 529 . fromranks - ranks receiving messages from (length nfrom; caller should PetscFree()) 530 . fromdata - packed data from each rank, each with count entries of type dtype (length nfrom, caller responsible for PetscFree()) 531 . toreqs - array of nto*ntags sender requests (caller must wait on these, then PetscFree()) 532 - fromreqs - array of nfrom*ntags receiver requests (caller must wait on these, then PetscFree()) 533 534 Level: developer 535 536 Notes: 537 This memory-scalable interface is an alternative to calling PetscGatherNumberOfMessages() and 538 PetscGatherMessageLengths(), possibly with a subsequent round of communication to send other data. 539 540 Basic data types as well as contiguous types are supported, but non-contiguous (e.g., strided) types are not. 541 542 References: 543 . 1. - Hoefler, Siebert and Lumsdaine, The MPI_Ibarrier implementation uses the algorithm in 544 Scalable communication protocols for dynamic sparse data exchange, 2010. 545 546 .seealso: PetscCommBuildTwoSided(), PetscCommBuildTwoSidedF(), PetscGatherNumberOfMessages(), PetscGatherMessageLengths() 547 @*/ 548 PetscErrorCode PetscCommBuildTwoSidedFReq(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata, 549 PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata,PetscMPIInt ntags,MPI_Request **toreqs,MPI_Request **fromreqs, 550 PetscErrorCode (*send)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,PetscMPIInt,void*,MPI_Request[],void*), 551 PetscErrorCode (*recv)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,void*,MPI_Request[],void*),void *ctx) 552 { 553 PetscErrorCode ierr,(*f)(MPI_Comm,PetscMPIInt,MPI_Datatype,PetscMPIInt,const PetscMPIInt[],const void*, 554 PetscMPIInt*,PetscMPIInt**,void*,PetscMPIInt,MPI_Request**,MPI_Request**, 555 PetscErrorCode (*send)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,PetscMPIInt,void*,MPI_Request[],void*), 556 PetscErrorCode (*recv)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,void*,MPI_Request[],void*),void *ctx); 557 PetscBuildTwoSidedType buildtype = PETSC_BUILDTWOSIDED_NOTSET; 558 PetscMPIInt i,size; 559 560 PetscFunctionBegin; 561 ierr = PetscSysInitializePackage();CHKERRQ(ierr); 562 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 563 for (i=0; i<nto; i++) { 564 if (toranks[i] < 0 || size <= toranks[i]) SETERRQ3(comm,PETSC_ERR_ARG_OUTOFRANGE,"toranks[%d] %d not in comm size %d",i,toranks[i],size); 565 } 566 ierr = PetscLogEventSync(PETSC_BuildTwoSidedF,comm);CHKERRQ(ierr); 567 ierr = PetscLogEventBegin(PETSC_BuildTwoSidedF,0,0,0,0);CHKERRQ(ierr); 568 ierr = PetscCommBuildTwoSidedGetType(comm,&buildtype);CHKERRQ(ierr); 569 switch (buildtype) { 570 case PETSC_BUILDTWOSIDED_IBARRIER: 571 #if defined(PETSC_HAVE_MPI_IBARRIER) || defined(PETSC_HAVE_MPIX_IBARRIER) 572 f = PetscCommBuildTwoSidedFReq_Ibarrier; 573 #else 574 SETERRQ(comm,PETSC_ERR_PLIB,"MPI implementation does not provide MPI_Ibarrier (part of MPI-3)"); 575 #endif 576 break; 577 case PETSC_BUILDTWOSIDED_ALLREDUCE: 578 case PETSC_BUILDTWOSIDED_REDSCATTER: 579 f = PetscCommBuildTwoSidedFReq_Reference; 580 break; 581 default: SETERRQ(comm,PETSC_ERR_PLIB,"Unknown method for building two-sided communication"); 582 } 583 ierr = (*f)(comm,count,dtype,nto,toranks,todata,nfrom,fromranks,fromdata,ntags,toreqs,fromreqs,send,recv,ctx);CHKERRQ(ierr); 584 ierr = PetscLogEventEnd(PETSC_BuildTwoSidedF,0,0,0,0);CHKERRQ(ierr); 585 PetscFunctionReturn(0); 586 } 587