1 #include <petscsys.h> /*I "petscsys.h" I*/ 2 #include <petsc/private/petscimpl.h> 3 4 PetscLogEvent PETSC_BuildTwoSided; 5 PetscLogEvent PETSC_BuildTwoSidedF; 6 7 const char *const PetscBuildTwoSidedTypes[] = { 8 "ALLREDUCE", 9 "IBARRIER", 10 "REDSCATTER", 11 "PetscBuildTwoSidedType", 12 "PETSC_BUILDTWOSIDED_", 13 NULL 14 }; 15 16 static PetscBuildTwoSidedType _twosided_type = PETSC_BUILDTWOSIDED_NOTSET; 17 18 /*@ 19 PetscCommBuildTwoSidedSetType - set algorithm to use when building two-sided communication 20 21 Logically Collective 22 23 Input Arguments: 24 + comm - PETSC_COMM_WORLD 25 - twosided - algorithm to use in subsequent calls to PetscCommBuildTwoSided() 26 27 Level: developer 28 29 Note: 30 This option is currently global, but could be made per-communicator. 31 32 .seealso: PetscCommBuildTwoSided(), PetscCommBuildTwoSidedGetType() 33 @*/ 34 PetscErrorCode PetscCommBuildTwoSidedSetType(MPI_Comm comm,PetscBuildTwoSidedType twosided) 35 { 36 PetscFunctionBegin; 37 if (PetscDefined(USE_DEBUG)) { /* We don't have a PetscObject so can't use PetscValidLogicalCollectiveEnum */ 38 PetscMPIInt ierr; 39 PetscMPIInt b1[2],b2[2]; 40 b1[0] = -(PetscMPIInt)twosided; 41 b1[1] = (PetscMPIInt)twosided; 42 ierr = MPIU_Allreduce(b1,b2,2,MPI_INT,MPI_MAX,comm);CHKERRMPI(ierr); 43 if (-b2[0] != b2[1]) SETERRQ(comm,PETSC_ERR_ARG_WRONG,"Enum value must be same on all processes"); 44 } 45 _twosided_type = twosided; 46 PetscFunctionReturn(0); 47 } 48 49 /*@ 50 PetscCommBuildTwoSidedGetType - set algorithm to use when building two-sided communication 51 52 Logically Collective 53 54 Output Arguments: 55 + comm - communicator on which to query algorithm 56 - twosided - algorithm to use for PetscCommBuildTwoSided() 57 58 Level: developer 59 60 .seealso: PetscCommBuildTwoSided(), PetscCommBuildTwoSidedSetType() 61 @*/ 62 PetscErrorCode PetscCommBuildTwoSidedGetType(MPI_Comm comm,PetscBuildTwoSidedType *twosided) 63 { 64 PetscErrorCode ierr; 65 PetscMPIInt size; 66 67 PetscFunctionBegin; 68 *twosided = PETSC_BUILDTWOSIDED_NOTSET; 69 if (_twosided_type == PETSC_BUILDTWOSIDED_NOTSET) { 70 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 71 _twosided_type = PETSC_BUILDTWOSIDED_ALLREDUCE; /* default for small comms, see https://gitlab.com/petsc/petsc/-/merge_requests/2611 */ 72 #if defined(PETSC_HAVE_MPI_IBARRIER) 73 if (size > 1024) _twosided_type = PETSC_BUILDTWOSIDED_IBARRIER; 74 #endif 75 ierr = PetscOptionsGetEnum(NULL,NULL,"-build_twosided",PetscBuildTwoSidedTypes,(PetscEnum*)&_twosided_type,NULL);CHKERRQ(ierr); 76 } 77 *twosided = _twosided_type; 78 PetscFunctionReturn(0); 79 } 80 81 #if defined(PETSC_HAVE_MPI_IBARRIER) 82 static PetscErrorCode PetscCommBuildTwoSided_Ibarrier(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata,PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata) 83 { 84 PetscErrorCode ierr; 85 PetscMPIInt nrecvs,tag,done,i; 86 MPI_Aint lb,unitbytes; 87 char *tdata; 88 MPI_Request *sendreqs,barrier; 89 PetscSegBuffer segrank,segdata; 90 PetscBool barrier_started; 91 92 PetscFunctionBegin; 93 ierr = PetscCommDuplicate(comm,&comm,&tag);CHKERRQ(ierr); 94 ierr = MPI_Type_get_extent(dtype,&lb,&unitbytes);CHKERRMPI(ierr); 95 if (lb != 0) SETERRQ1(comm,PETSC_ERR_SUP,"Datatype with nonzero lower bound %ld\n",(long)lb); 96 tdata = (char*)todata; 97 ierr = PetscMalloc1(nto,&sendreqs);CHKERRQ(ierr); 98 for (i=0; i<nto; i++) { 99 ierr = MPI_Issend((void*)(tdata+count*unitbytes*i),count,dtype,toranks[i],tag,comm,sendreqs+i);CHKERRMPI(ierr); 100 } 101 ierr = PetscSegBufferCreate(sizeof(PetscMPIInt),4,&segrank);CHKERRQ(ierr); 102 ierr = PetscSegBufferCreate(unitbytes,4*count,&segdata);CHKERRQ(ierr); 103 104 nrecvs = 0; 105 barrier = MPI_REQUEST_NULL; 106 /* MPICH-3.2 sometimes does not create a request in some "optimized" cases. This is arguably a standard violation, 107 * but we need to work around it. */ 108 barrier_started = PETSC_FALSE; 109 for (done=0; !done;) { 110 PetscMPIInt flag; 111 MPI_Status status; 112 ierr = MPI_Iprobe(MPI_ANY_SOURCE,tag,comm,&flag,&status);CHKERRMPI(ierr); 113 if (flag) { /* incoming message */ 114 PetscMPIInt *recvrank; 115 void *buf; 116 ierr = PetscSegBufferGet(segrank,1,&recvrank);CHKERRQ(ierr); 117 ierr = PetscSegBufferGet(segdata,count,&buf);CHKERRQ(ierr); 118 *recvrank = status.MPI_SOURCE; 119 ierr = MPI_Recv(buf,count,dtype,status.MPI_SOURCE,tag,comm,MPI_STATUS_IGNORE);CHKERRMPI(ierr); 120 nrecvs++; 121 } 122 if (!barrier_started) { 123 PetscMPIInt sent,nsends; 124 ierr = PetscMPIIntCast(nto,&nsends);CHKERRQ(ierr); 125 ierr = MPI_Testall(nsends,sendreqs,&sent,MPI_STATUSES_IGNORE);CHKERRMPI(ierr); 126 if (sent) { 127 ierr = MPI_Ibarrier(comm,&barrier);CHKERRMPI(ierr); 128 barrier_started = PETSC_TRUE; 129 ierr = PetscFree(sendreqs);CHKERRQ(ierr); 130 } 131 } else { 132 ierr = MPI_Test(&barrier,&done,MPI_STATUS_IGNORE);CHKERRMPI(ierr); 133 } 134 } 135 *nfrom = nrecvs; 136 ierr = PetscSegBufferExtractAlloc(segrank,fromranks);CHKERRQ(ierr); 137 ierr = PetscSegBufferDestroy(&segrank);CHKERRQ(ierr); 138 ierr = PetscSegBufferExtractAlloc(segdata,fromdata);CHKERRQ(ierr); 139 ierr = PetscSegBufferDestroy(&segdata);CHKERRQ(ierr); 140 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 141 PetscFunctionReturn(0); 142 } 143 #endif 144 145 static PetscErrorCode PetscCommBuildTwoSided_Allreduce(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata,PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata) 146 { 147 PetscErrorCode ierr; 148 PetscMPIInt size,rank,*iflags,nrecvs,tag,*franks,i,flg; 149 MPI_Aint lb,unitbytes; 150 char *tdata,*fdata; 151 MPI_Request *reqs,*sendreqs; 152 MPI_Status *statuses; 153 PetscCommCounter *counter; 154 155 PetscFunctionBegin; 156 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 157 ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr); 158 ierr = PetscCommDuplicate(comm,&comm,&tag);CHKERRQ(ierr); 159 ierr = MPI_Comm_get_attr(comm,Petsc_Counter_keyval,&counter,&flg);CHKERRMPI(ierr); 160 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Inner PETSc communicator does not have its tag/name counter attribute set"); 161 if (!counter->iflags) { 162 ierr = PetscCalloc1(size,&counter->iflags);CHKERRQ(ierr); 163 iflags = counter->iflags; 164 } else { 165 iflags = counter->iflags; 166 ierr = PetscArrayzero(iflags,size);CHKERRQ(ierr); 167 } 168 for (i=0; i<nto; i++) iflags[toranks[i]] = 1; 169 ierr = MPIU_Allreduce(MPI_IN_PLACE,iflags,size,MPI_INT,MPI_SUM,comm);CHKERRMPI(ierr); 170 nrecvs = iflags[rank]; 171 ierr = MPI_Type_get_extent(dtype,&lb,&unitbytes);CHKERRMPI(ierr); 172 if (lb != 0) SETERRQ1(comm,PETSC_ERR_SUP,"Datatype with nonzero lower bound %ld\n",(long)lb); 173 ierr = PetscMalloc(nrecvs*count*unitbytes,&fdata);CHKERRQ(ierr); 174 tdata = (char*)todata; 175 ierr = PetscMalloc2(nto+nrecvs,&reqs,nto+nrecvs,&statuses);CHKERRQ(ierr); 176 sendreqs = reqs + nrecvs; 177 for (i=0; i<nrecvs; i++) { 178 ierr = MPI_Irecv((void*)(fdata+count*unitbytes*i),count,dtype,MPI_ANY_SOURCE,tag,comm,reqs+i);CHKERRMPI(ierr); 179 } 180 for (i=0; i<nto; i++) { 181 ierr = MPI_Isend((void*)(tdata+count*unitbytes*i),count,dtype,toranks[i],tag,comm,sendreqs+i);CHKERRMPI(ierr); 182 } 183 ierr = MPI_Waitall(nto+nrecvs,reqs,statuses);CHKERRMPI(ierr); 184 ierr = PetscMalloc1(nrecvs,&franks);CHKERRQ(ierr); 185 for (i=0; i<nrecvs; i++) franks[i] = statuses[i].MPI_SOURCE; 186 ierr = PetscFree2(reqs,statuses);CHKERRQ(ierr); 187 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 188 189 *nfrom = nrecvs; 190 *fromranks = franks; 191 *(void**)fromdata = fdata; 192 PetscFunctionReturn(0); 193 } 194 195 #if defined(PETSC_HAVE_MPI_REDUCE_SCATTER_BLOCK) 196 static PetscErrorCode PetscCommBuildTwoSided_RedScatter(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata,PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata) 197 { 198 PetscErrorCode ierr; 199 PetscMPIInt size,*iflags,nrecvs,tag,*franks,i,flg; 200 MPI_Aint lb,unitbytes; 201 char *tdata,*fdata; 202 MPI_Request *reqs,*sendreqs; 203 MPI_Status *statuses; 204 PetscCommCounter *counter; 205 206 PetscFunctionBegin; 207 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 208 ierr = PetscCommDuplicate(comm,&comm,&tag);CHKERRQ(ierr); 209 ierr = MPI_Comm_get_attr(comm,Petsc_Counter_keyval,&counter,&flg);CHKERRMPI(ierr); 210 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Inner PETSc communicator does not have its tag/name counter attribute set"); 211 if (!counter->iflags) { 212 ierr = PetscCalloc1(size,&counter->iflags);CHKERRQ(ierr); 213 iflags = counter->iflags; 214 } else { 215 iflags = counter->iflags; 216 ierr = PetscArrayzero(iflags,size);CHKERRQ(ierr); 217 } 218 for (i=0; i<nto; i++) iflags[toranks[i]] = 1; 219 ierr = MPI_Reduce_scatter_block(iflags,&nrecvs,1,MPI_INT,MPI_SUM,comm);CHKERRMPI(ierr); 220 ierr = MPI_Type_get_extent(dtype,&lb,&unitbytes);CHKERRMPI(ierr); 221 if (lb != 0) SETERRQ1(comm,PETSC_ERR_SUP,"Datatype with nonzero lower bound %ld\n",(long)lb); 222 ierr = PetscMalloc(nrecvs*count*unitbytes,&fdata);CHKERRQ(ierr); 223 tdata = (char*)todata; 224 ierr = PetscMalloc2(nto+nrecvs,&reqs,nto+nrecvs,&statuses);CHKERRQ(ierr); 225 sendreqs = reqs + nrecvs; 226 for (i=0; i<nrecvs; i++) { 227 ierr = MPI_Irecv((void*)(fdata+count*unitbytes*i),count,dtype,MPI_ANY_SOURCE,tag,comm,reqs+i);CHKERRMPI(ierr); 228 } 229 for (i=0; i<nto; i++) { 230 ierr = MPI_Isend((void*)(tdata+count*unitbytes*i),count,dtype,toranks[i],tag,comm,sendreqs+i);CHKERRMPI(ierr); 231 } 232 ierr = MPI_Waitall(nto+nrecvs,reqs,statuses);CHKERRMPI(ierr); 233 ierr = PetscMalloc1(nrecvs,&franks);CHKERRQ(ierr); 234 for (i=0; i<nrecvs; i++) franks[i] = statuses[i].MPI_SOURCE; 235 ierr = PetscFree2(reqs,statuses);CHKERRQ(ierr); 236 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 237 238 *nfrom = nrecvs; 239 *fromranks = franks; 240 *(void**)fromdata = fdata; 241 PetscFunctionReturn(0); 242 } 243 #endif 244 245 /*@C 246 PetscCommBuildTwoSided - discovers communicating ranks given one-sided information, moving constant-sized data in the process (often message lengths) 247 248 Collective 249 250 Input Arguments: 251 + comm - communicator 252 . count - number of entries to send/receive (must match on all ranks) 253 . dtype - datatype to send/receive from each rank (must match on all ranks) 254 . nto - number of ranks to send data to 255 . toranks - ranks to send to (array of length nto) 256 - todata - data to send to each rank (packed) 257 258 Output Arguments: 259 + nfrom - number of ranks receiving messages from 260 . fromranks - ranks receiving messages from (length nfrom; caller should PetscFree()) 261 - fromdata - packed data from each rank, each with count entries of type dtype (length nfrom, caller responsible for PetscFree()) 262 263 Level: developer 264 265 Options Database Keys: 266 . -build_twosided <allreduce|ibarrier|redscatter> - algorithm to set up two-sided communication. Default is allreduce for communicators with <= 1024 ranks, otherwise ibarrier. 267 268 Notes: 269 This memory-scalable interface is an alternative to calling PetscGatherNumberOfMessages() and 270 PetscGatherMessageLengths(), possibly with a subsequent round of communication to send other constant-size data. 271 272 Basic data types as well as contiguous types are supported, but non-contiguous (e.g., strided) types are not. 273 274 References: 275 . 1. - Hoefler, Siebert and Lumsdaine, The MPI_Ibarrier implementation uses the algorithm in 276 Scalable communication protocols for dynamic sparse data exchange, 2010. 277 278 .seealso: PetscGatherNumberOfMessages(), PetscGatherMessageLengths() 279 @*/ 280 PetscErrorCode PetscCommBuildTwoSided(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata,PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata) 281 { 282 PetscErrorCode ierr; 283 PetscBuildTwoSidedType buildtype = PETSC_BUILDTWOSIDED_NOTSET; 284 285 PetscFunctionBegin; 286 ierr = PetscSysInitializePackage();CHKERRQ(ierr); 287 ierr = PetscLogEventSync(PETSC_BuildTwoSided,comm);CHKERRQ(ierr); 288 ierr = PetscLogEventBegin(PETSC_BuildTwoSided,0,0,0,0);CHKERRQ(ierr); 289 ierr = PetscCommBuildTwoSidedGetType(comm,&buildtype);CHKERRQ(ierr); 290 switch (buildtype) { 291 case PETSC_BUILDTWOSIDED_IBARRIER: 292 #if defined(PETSC_HAVE_MPI_IBARRIER) 293 ierr = PetscCommBuildTwoSided_Ibarrier(comm,count,dtype,nto,toranks,todata,nfrom,fromranks,fromdata);CHKERRQ(ierr); 294 break; 295 #else 296 SETERRQ(comm,PETSC_ERR_PLIB,"MPI implementation does not provide MPI_Ibarrier (part of MPI-3)"); 297 #endif 298 case PETSC_BUILDTWOSIDED_ALLREDUCE: 299 ierr = PetscCommBuildTwoSided_Allreduce(comm,count,dtype,nto,toranks,todata,nfrom,fromranks,fromdata);CHKERRQ(ierr); 300 break; 301 case PETSC_BUILDTWOSIDED_REDSCATTER: 302 #if defined(PETSC_HAVE_MPI_REDUCE_SCATTER_BLOCK) 303 ierr = PetscCommBuildTwoSided_RedScatter(comm,count,dtype,nto,toranks,todata,nfrom,fromranks,fromdata);CHKERRQ(ierr); 304 break; 305 #else 306 SETERRQ(comm,PETSC_ERR_PLIB,"MPI implementation does not provide MPI_Reduce_scatter_block (part of MPI-2.2)"); 307 #endif 308 default: SETERRQ(comm,PETSC_ERR_PLIB,"Unknown method for building two-sided communication"); 309 } 310 ierr = PetscLogEventEnd(PETSC_BuildTwoSided,0,0,0,0);CHKERRQ(ierr); 311 PetscFunctionReturn(0); 312 } 313 314 static PetscErrorCode PetscCommBuildTwoSidedFReq_Reference(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata, 315 PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata,PetscMPIInt ntags,MPI_Request **toreqs,MPI_Request **fromreqs, 316 PetscErrorCode (*send)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,PetscMPIInt,void*,MPI_Request[],void*), 317 PetscErrorCode (*recv)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,void*,MPI_Request[],void*),void *ctx) 318 { 319 PetscErrorCode ierr; 320 PetscMPIInt i,*tag; 321 MPI_Aint lb,unitbytes; 322 MPI_Request *sendreq,*recvreq; 323 324 PetscFunctionBegin; 325 ierr = PetscMalloc1(ntags,&tag);CHKERRQ(ierr); 326 if (ntags > 0) { 327 ierr = PetscCommDuplicate(comm,&comm,&tag[0]);CHKERRQ(ierr); 328 } 329 for (i=1; i<ntags; i++) { 330 ierr = PetscCommGetNewTag(comm,&tag[i]);CHKERRQ(ierr); 331 } 332 333 /* Perform complete initial rendezvous */ 334 ierr = PetscCommBuildTwoSided(comm,count,dtype,nto,toranks,todata,nfrom,fromranks,fromdata);CHKERRQ(ierr); 335 336 ierr = PetscMalloc1(nto*ntags,&sendreq);CHKERRQ(ierr); 337 ierr = PetscMalloc1(*nfrom*ntags,&recvreq);CHKERRQ(ierr); 338 339 ierr = MPI_Type_get_extent(dtype,&lb,&unitbytes);CHKERRMPI(ierr); 340 if (lb != 0) SETERRQ1(comm,PETSC_ERR_SUP,"Datatype with nonzero lower bound %ld\n",(long)lb); 341 for (i=0; i<nto; i++) { 342 PetscMPIInt k; 343 for (k=0; k<ntags; k++) sendreq[i*ntags+k] = MPI_REQUEST_NULL; 344 ierr = (*send)(comm,tag,i,toranks[i],((char*)todata)+count*unitbytes*i,sendreq+i*ntags,ctx);CHKERRQ(ierr); 345 } 346 for (i=0; i<*nfrom; i++) { 347 void *header = (*(char**)fromdata) + count*unitbytes*i; 348 PetscMPIInt k; 349 for (k=0; k<ntags; k++) recvreq[i*ntags+k] = MPI_REQUEST_NULL; 350 ierr = (*recv)(comm,tag,(*fromranks)[i],header,recvreq+i*ntags,ctx);CHKERRQ(ierr); 351 } 352 ierr = PetscFree(tag);CHKERRQ(ierr); 353 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 354 *toreqs = sendreq; 355 *fromreqs = recvreq; 356 PetscFunctionReturn(0); 357 } 358 359 #if defined(PETSC_HAVE_MPI_IBARRIER) 360 361 static PetscErrorCode PetscCommBuildTwoSidedFReq_Ibarrier(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata, 362 PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata,PetscMPIInt ntags,MPI_Request **toreqs,MPI_Request **fromreqs, 363 PetscErrorCode (*send)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,PetscMPIInt,void*,MPI_Request[],void*), 364 PetscErrorCode (*recv)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,void*,MPI_Request[],void*),void *ctx) 365 { 366 PetscErrorCode ierr; 367 PetscMPIInt nrecvs,tag,*tags,done,i; 368 MPI_Aint lb,unitbytes; 369 char *tdata; 370 MPI_Request *sendreqs,*usendreqs,*req,barrier; 371 PetscSegBuffer segrank,segdata,segreq; 372 PetscBool barrier_started; 373 374 PetscFunctionBegin; 375 ierr = PetscCommDuplicate(comm,&comm,&tag);CHKERRQ(ierr); 376 ierr = PetscMalloc1(ntags,&tags);CHKERRQ(ierr); 377 for (i=0; i<ntags; i++) { 378 ierr = PetscCommGetNewTag(comm,&tags[i]);CHKERRQ(ierr); 379 } 380 ierr = MPI_Type_get_extent(dtype,&lb,&unitbytes);CHKERRMPI(ierr); 381 if (lb != 0) SETERRQ1(comm,PETSC_ERR_SUP,"Datatype with nonzero lower bound %ld\n",(long)lb); 382 tdata = (char*)todata; 383 ierr = PetscMalloc1(nto,&sendreqs);CHKERRQ(ierr); 384 ierr = PetscMalloc1(nto*ntags,&usendreqs);CHKERRQ(ierr); 385 /* Post synchronous sends */ 386 for (i=0; i<nto; i++) { 387 ierr = MPI_Issend((void*)(tdata+count*unitbytes*i),count,dtype,toranks[i],tag,comm,sendreqs+i);CHKERRMPI(ierr); 388 } 389 /* Post actual payloads. These are typically larger messages. Hopefully sending these later does not slow down the 390 * synchronous messages above. */ 391 for (i=0; i<nto; i++) { 392 PetscMPIInt k; 393 for (k=0; k<ntags; k++) usendreqs[i*ntags+k] = MPI_REQUEST_NULL; 394 ierr = (*send)(comm,tags,i,toranks[i],tdata+count*unitbytes*i,usendreqs+i*ntags,ctx);CHKERRQ(ierr); 395 } 396 397 ierr = PetscSegBufferCreate(sizeof(PetscMPIInt),4,&segrank);CHKERRQ(ierr); 398 ierr = PetscSegBufferCreate(unitbytes,4*count,&segdata);CHKERRQ(ierr); 399 ierr = PetscSegBufferCreate(sizeof(MPI_Request),4,&segreq);CHKERRQ(ierr); 400 401 nrecvs = 0; 402 barrier = MPI_REQUEST_NULL; 403 /* MPICH-3.2 sometimes does not create a request in some "optimized" cases. This is arguably a standard violation, 404 * but we need to work around it. */ 405 barrier_started = PETSC_FALSE; 406 for (done=0; !done;) { 407 PetscMPIInt flag; 408 MPI_Status status; 409 ierr = MPI_Iprobe(MPI_ANY_SOURCE,tag,comm,&flag,&status);CHKERRMPI(ierr); 410 if (flag) { /* incoming message */ 411 PetscMPIInt *recvrank,k; 412 void *buf; 413 ierr = PetscSegBufferGet(segrank,1,&recvrank);CHKERRQ(ierr); 414 ierr = PetscSegBufferGet(segdata,count,&buf);CHKERRQ(ierr); 415 *recvrank = status.MPI_SOURCE; 416 ierr = MPI_Recv(buf,count,dtype,status.MPI_SOURCE,tag,comm,MPI_STATUS_IGNORE);CHKERRMPI(ierr); 417 ierr = PetscSegBufferGet(segreq,ntags,&req);CHKERRQ(ierr); 418 for (k=0; k<ntags; k++) req[k] = MPI_REQUEST_NULL; 419 ierr = (*recv)(comm,tags,status.MPI_SOURCE,buf,req,ctx);CHKERRQ(ierr); 420 nrecvs++; 421 } 422 if (!barrier_started) { 423 PetscMPIInt sent,nsends; 424 ierr = PetscMPIIntCast(nto,&nsends);CHKERRQ(ierr); 425 ierr = MPI_Testall(nsends,sendreqs,&sent,MPI_STATUSES_IGNORE);CHKERRMPI(ierr); 426 if (sent) { 427 ierr = MPI_Ibarrier(comm,&barrier);CHKERRMPI(ierr); 428 barrier_started = PETSC_TRUE; 429 } 430 } else { 431 ierr = MPI_Test(&barrier,&done,MPI_STATUS_IGNORE);CHKERRMPI(ierr); 432 } 433 } 434 *nfrom = nrecvs; 435 ierr = PetscSegBufferExtractAlloc(segrank,fromranks);CHKERRQ(ierr); 436 ierr = PetscSegBufferDestroy(&segrank);CHKERRQ(ierr); 437 ierr = PetscSegBufferExtractAlloc(segdata,fromdata);CHKERRQ(ierr); 438 ierr = PetscSegBufferDestroy(&segdata);CHKERRQ(ierr); 439 *toreqs = usendreqs; 440 ierr = PetscSegBufferExtractAlloc(segreq,fromreqs);CHKERRQ(ierr); 441 ierr = PetscSegBufferDestroy(&segreq);CHKERRQ(ierr); 442 ierr = PetscFree(sendreqs);CHKERRQ(ierr); 443 ierr = PetscFree(tags);CHKERRQ(ierr); 444 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 445 PetscFunctionReturn(0); 446 } 447 #endif 448 449 /*@C 450 PetscCommBuildTwoSidedF - discovers communicating ranks given one-sided information, calling user-defined functions during rendezvous 451 452 Collective 453 454 Input Arguments: 455 + comm - communicator 456 . count - number of entries to send/receive in initial rendezvous (must match on all ranks) 457 . dtype - datatype to send/receive from each rank (must match on all ranks) 458 . nto - number of ranks to send data to 459 . toranks - ranks to send to (array of length nto) 460 . todata - data to send to each rank (packed) 461 . ntags - number of tags needed by send/recv callbacks 462 . send - callback invoked on sending process when ready to send primary payload 463 . recv - callback invoked on receiving process after delivery of rendezvous message 464 - ctx - context for callbacks 465 466 Output Arguments: 467 + nfrom - number of ranks receiving messages from 468 . fromranks - ranks receiving messages from (length nfrom; caller should PetscFree()) 469 - fromdata - packed data from each rank, each with count entries of type dtype (length nfrom, caller responsible for PetscFree()) 470 471 Level: developer 472 473 Notes: 474 This memory-scalable interface is an alternative to calling PetscGatherNumberOfMessages() and 475 PetscGatherMessageLengths(), possibly with a subsequent round of communication to send other data. 476 477 Basic data types as well as contiguous types are supported, but non-contiguous (e.g., strided) types are not. 478 479 References: 480 . 1. - Hoefler, Siebert and Lumsdaine, The MPI_Ibarrier implementation uses the algorithm in 481 Scalable communication protocols for dynamic sparse data exchange, 2010. 482 483 .seealso: PetscCommBuildTwoSided(), PetscCommBuildTwoSidedFReq(), PetscGatherNumberOfMessages(), PetscGatherMessageLengths() 484 @*/ 485 PetscErrorCode PetscCommBuildTwoSidedF(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata,PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata,PetscMPIInt ntags, 486 PetscErrorCode (*send)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,PetscMPIInt,void*,MPI_Request[],void*), 487 PetscErrorCode (*recv)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,void*,MPI_Request[],void*),void *ctx) 488 { 489 PetscErrorCode ierr; 490 MPI_Request *toreqs,*fromreqs; 491 492 PetscFunctionBegin; 493 ierr = PetscCommBuildTwoSidedFReq(comm,count,dtype,nto,toranks,todata,nfrom,fromranks,fromdata,ntags,&toreqs,&fromreqs,send,recv,ctx);CHKERRQ(ierr); 494 ierr = MPI_Waitall(nto*ntags,toreqs,MPI_STATUSES_IGNORE);CHKERRMPI(ierr); 495 ierr = MPI_Waitall(*nfrom*ntags,fromreqs,MPI_STATUSES_IGNORE);CHKERRMPI(ierr); 496 ierr = PetscFree(toreqs);CHKERRQ(ierr); 497 ierr = PetscFree(fromreqs);CHKERRQ(ierr); 498 PetscFunctionReturn(0); 499 } 500 501 /*@C 502 PetscCommBuildTwoSidedFReq - discovers communicating ranks given one-sided information, calling user-defined functions during rendezvous, returns requests 503 504 Collective 505 506 Input Arguments: 507 + comm - communicator 508 . count - number of entries to send/receive in initial rendezvous (must match on all ranks) 509 . dtype - datatype to send/receive from each rank (must match on all ranks) 510 . nto - number of ranks to send data to 511 . toranks - ranks to send to (array of length nto) 512 . todata - data to send to each rank (packed) 513 . ntags - number of tags needed by send/recv callbacks 514 . send - callback invoked on sending process when ready to send primary payload 515 . recv - callback invoked on receiving process after delivery of rendezvous message 516 - ctx - context for callbacks 517 518 Output Arguments: 519 + nfrom - number of ranks receiving messages from 520 . fromranks - ranks receiving messages from (length nfrom; caller should PetscFree()) 521 . fromdata - packed data from each rank, each with count entries of type dtype (length nfrom, caller responsible for PetscFree()) 522 . toreqs - array of nto*ntags sender requests (caller must wait on these, then PetscFree()) 523 - fromreqs - array of nfrom*ntags receiver requests (caller must wait on these, then PetscFree()) 524 525 Level: developer 526 527 Notes: 528 This memory-scalable interface is an alternative to calling PetscGatherNumberOfMessages() and 529 PetscGatherMessageLengths(), possibly with a subsequent round of communication to send other data. 530 531 Basic data types as well as contiguous types are supported, but non-contiguous (e.g., strided) types are not. 532 533 References: 534 . 1. - Hoefler, Siebert and Lumsdaine, The MPI_Ibarrier implementation uses the algorithm in 535 Scalable communication protocols for dynamic sparse data exchange, 2010. 536 537 .seealso: PetscCommBuildTwoSided(), PetscCommBuildTwoSidedF(), PetscGatherNumberOfMessages(), PetscGatherMessageLengths() 538 @*/ 539 PetscErrorCode PetscCommBuildTwoSidedFReq(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata, 540 PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata,PetscMPIInt ntags,MPI_Request **toreqs,MPI_Request **fromreqs, 541 PetscErrorCode (*send)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,PetscMPIInt,void*,MPI_Request[],void*), 542 PetscErrorCode (*recv)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,void*,MPI_Request[],void*),void *ctx) 543 { 544 PetscErrorCode ierr,(*f)(MPI_Comm,PetscMPIInt,MPI_Datatype,PetscMPIInt,const PetscMPIInt[],const void*, 545 PetscMPIInt*,PetscMPIInt**,void*,PetscMPIInt,MPI_Request**,MPI_Request**, 546 PetscErrorCode (*send)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,PetscMPIInt,void*,MPI_Request[],void*), 547 PetscErrorCode (*recv)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,void*,MPI_Request[],void*),void *ctx); 548 PetscBuildTwoSidedType buildtype = PETSC_BUILDTWOSIDED_NOTSET; 549 PetscMPIInt i,size; 550 551 PetscFunctionBegin; 552 ierr = PetscSysInitializePackage();CHKERRQ(ierr); 553 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 554 for (i=0; i<nto; i++) { 555 if (toranks[i] < 0 || size <= toranks[i]) SETERRQ3(comm,PETSC_ERR_ARG_OUTOFRANGE,"toranks[%d] %d not in comm size %d",i,toranks[i],size); 556 } 557 ierr = PetscLogEventSync(PETSC_BuildTwoSidedF,comm);CHKERRQ(ierr); 558 ierr = PetscLogEventBegin(PETSC_BuildTwoSidedF,0,0,0,0);CHKERRQ(ierr); 559 ierr = PetscCommBuildTwoSidedGetType(comm,&buildtype);CHKERRQ(ierr); 560 switch (buildtype) { 561 case PETSC_BUILDTWOSIDED_IBARRIER: 562 #if defined(PETSC_HAVE_MPI_IBARRIER) 563 f = PetscCommBuildTwoSidedFReq_Ibarrier; 564 break; 565 #else 566 SETERRQ(comm,PETSC_ERR_PLIB,"MPI implementation does not provide MPI_Ibarrier (part of MPI-3)"); 567 #endif 568 case PETSC_BUILDTWOSIDED_ALLREDUCE: 569 case PETSC_BUILDTWOSIDED_REDSCATTER: 570 f = PetscCommBuildTwoSidedFReq_Reference; 571 break; 572 default: SETERRQ(comm,PETSC_ERR_PLIB,"Unknown method for building two-sided communication"); 573 } 574 ierr = (*f)(comm,count,dtype,nto,toranks,todata,nfrom,fromranks,fromdata,ntags,toreqs,fromreqs,send,recv,ctx);CHKERRQ(ierr); 575 ierr = PetscLogEventEnd(PETSC_BuildTwoSidedF,0,0,0,0);CHKERRQ(ierr); 576 PetscFunctionReturn(0); 577 } 578