1 #include <petscsys.h> /*I "petscsys.h" I*/ 2 #include <petsc/private/petscimpl.h> 3 4 PetscLogEvent PETSC_BuildTwoSided; 5 PetscLogEvent PETSC_BuildTwoSidedF; 6 7 const char *const PetscBuildTwoSidedTypes[] = { 8 "ALLREDUCE", 9 "IBARRIER", 10 "REDSCATTER", 11 "PetscBuildTwoSidedType", 12 "PETSC_BUILDTWOSIDED_", 13 NULL 14 }; 15 16 static PetscBuildTwoSidedType _twosided_type = PETSC_BUILDTWOSIDED_NOTSET; 17 18 /*@ 19 PetscCommBuildTwoSidedSetType - set algorithm to use when building two-sided communication 20 21 Logically Collective 22 23 Input Arguments: 24 + comm - PETSC_COMM_WORLD 25 - twosided - algorithm to use in subsequent calls to PetscCommBuildTwoSided() 26 27 Level: developer 28 29 Note: 30 This option is currently global, but could be made per-communicator. 31 32 .seealso: PetscCommBuildTwoSided(), PetscCommBuildTwoSidedGetType() 33 @*/ 34 PetscErrorCode PetscCommBuildTwoSidedSetType(MPI_Comm comm,PetscBuildTwoSidedType twosided) 35 { 36 PetscFunctionBegin; 37 #if defined(PETSC_USE_DEBUG) 38 { /* We don't have a PetscObject so can't use PetscValidLogicalCollectiveEnum */ 39 PetscMPIInt ierr; 40 PetscMPIInt b1[2],b2[2]; 41 b1[0] = -(PetscMPIInt)twosided; 42 b1[1] = (PetscMPIInt)twosided; 43 ierr = MPIU_Allreduce(b1,b2,2,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr); 44 if (-b2[0] != b2[1]) SETERRQ(comm,PETSC_ERR_ARG_WRONG,"Enum value must be same on all processes"); 45 } 46 #endif 47 _twosided_type = twosided; 48 PetscFunctionReturn(0); 49 } 50 51 /*@ 52 PetscCommBuildTwoSidedGetType - set algorithm to use when building two-sided communication 53 54 Logically Collective 55 56 Output Arguments: 57 + comm - communicator on which to query algorithm 58 - twosided - algorithm to use for PetscCommBuildTwoSided() 59 60 Level: developer 61 62 .seealso: PetscCommBuildTwoSided(), PetscCommBuildTwoSidedSetType() 63 @*/ 64 PetscErrorCode PetscCommBuildTwoSidedGetType(MPI_Comm comm,PetscBuildTwoSidedType *twosided) 65 { 66 PetscErrorCode ierr; 67 PetscMPIInt size; 68 69 PetscFunctionBegin; 70 *twosided = PETSC_BUILDTWOSIDED_NOTSET; 71 if (_twosided_type == PETSC_BUILDTWOSIDED_NOTSET) { 72 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 73 _twosided_type = PETSC_BUILDTWOSIDED_ALLREDUCE; /* default for small comms, see https://gitlab.com/petsc/petsc/-/merge_requests/2611 */ 74 #if defined(PETSC_HAVE_MPI_IBARRIER) 75 if (size > 1024) _twosided_type = PETSC_BUILDTWOSIDED_IBARRIER; 76 #endif 77 ierr = PetscOptionsGetEnum(NULL,NULL,"-build_twosided",PetscBuildTwoSidedTypes,(PetscEnum*)&_twosided_type,NULL);CHKERRQ(ierr); 78 } 79 *twosided = _twosided_type; 80 PetscFunctionReturn(0); 81 } 82 83 #if defined(PETSC_HAVE_MPI_IBARRIER) || defined(PETSC_HAVE_MPIX_IBARRIER) 84 85 static PetscErrorCode PetscCommBuildTwoSided_Ibarrier(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata,PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata) 86 { 87 PetscErrorCode ierr; 88 PetscMPIInt nrecvs,tag,done,i; 89 MPI_Aint lb,unitbytes; 90 char *tdata; 91 MPI_Request *sendreqs,barrier; 92 PetscSegBuffer segrank,segdata; 93 PetscBool barrier_started; 94 95 PetscFunctionBegin; 96 ierr = PetscCommDuplicate(comm,&comm,&tag);CHKERRQ(ierr); 97 ierr = MPI_Type_get_extent(dtype,&lb,&unitbytes);CHKERRQ(ierr); 98 if (lb != 0) SETERRQ1(comm,PETSC_ERR_SUP,"Datatype with nonzero lower bound %ld\n",(long)lb); 99 tdata = (char*)todata; 100 ierr = PetscMalloc1(nto,&sendreqs);CHKERRQ(ierr); 101 for (i=0; i<nto; i++) { 102 ierr = MPI_Issend((void*)(tdata+count*unitbytes*i),count,dtype,toranks[i],tag,comm,sendreqs+i);CHKERRQ(ierr); 103 } 104 ierr = PetscSegBufferCreate(sizeof(PetscMPIInt),4,&segrank);CHKERRQ(ierr); 105 ierr = PetscSegBufferCreate(unitbytes,4*count,&segdata);CHKERRQ(ierr); 106 107 nrecvs = 0; 108 barrier = MPI_REQUEST_NULL; 109 /* MPICH-3.2 sometimes does not create a request in some "optimized" cases. This is arguably a standard violation, 110 * but we need to work around it. */ 111 barrier_started = PETSC_FALSE; 112 for (done=0; !done; ) { 113 PetscMPIInt flag; 114 MPI_Status status; 115 ierr = MPI_Iprobe(MPI_ANY_SOURCE,tag,comm,&flag,&status);CHKERRQ(ierr); 116 if (flag) { /* incoming message */ 117 PetscMPIInt *recvrank; 118 void *buf; 119 ierr = PetscSegBufferGet(segrank,1,&recvrank);CHKERRQ(ierr); 120 ierr = PetscSegBufferGet(segdata,count,&buf);CHKERRQ(ierr); 121 *recvrank = status.MPI_SOURCE; 122 ierr = MPI_Recv(buf,count,dtype,status.MPI_SOURCE,tag,comm,MPI_STATUS_IGNORE);CHKERRQ(ierr); 123 nrecvs++; 124 } 125 if (!barrier_started) { 126 PetscMPIInt sent,nsends; 127 ierr = PetscMPIIntCast(nto,&nsends);CHKERRQ(ierr); 128 ierr = MPI_Testall(nsends,sendreqs,&sent,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 129 if (sent) { 130 #if defined(PETSC_HAVE_MPI_IBARRIER) 131 ierr = MPI_Ibarrier(comm,&barrier);CHKERRQ(ierr); 132 #elif defined(PETSC_HAVE_MPIX_IBARRIER) 133 ierr = MPIX_Ibarrier(comm,&barrier);CHKERRQ(ierr); 134 #endif 135 barrier_started = PETSC_TRUE; 136 ierr = PetscFree(sendreqs);CHKERRQ(ierr); 137 } 138 } else { 139 ierr = MPI_Test(&barrier,&done,MPI_STATUS_IGNORE);CHKERRQ(ierr); 140 } 141 } 142 *nfrom = nrecvs; 143 ierr = PetscSegBufferExtractAlloc(segrank,fromranks);CHKERRQ(ierr); 144 ierr = PetscSegBufferDestroy(&segrank);CHKERRQ(ierr); 145 ierr = PetscSegBufferExtractAlloc(segdata,fromdata);CHKERRQ(ierr); 146 ierr = PetscSegBufferDestroy(&segdata);CHKERRQ(ierr); 147 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 148 PetscFunctionReturn(0); 149 } 150 #endif 151 152 static PetscErrorCode PetscCommBuildTwoSided_Allreduce(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata,PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata) 153 { 154 PetscErrorCode ierr; 155 PetscMPIInt size,rank,*iflags,nrecvs,tag,*franks,i,flg; 156 MPI_Aint lb,unitbytes; 157 char *tdata,*fdata; 158 MPI_Request *reqs,*sendreqs; 159 MPI_Status *statuses; 160 PetscCommCounter *counter; 161 162 PetscFunctionBegin; 163 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 164 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 165 ierr = PetscCommDuplicate(comm,&comm,&tag);CHKERRQ(ierr); 166 ierr = MPI_Comm_get_attr(comm,Petsc_Counter_keyval,&counter,&flg);CHKERRQ(ierr); 167 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Inner PETSc communicator does not have its tag/name counter attribute set"); 168 if (!counter->iflags) { 169 ierr = PetscCalloc1(size,&counter->iflags);CHKERRQ(ierr); 170 iflags = counter->iflags; 171 } else { 172 iflags = counter->iflags; 173 ierr = PetscArrayzero(iflags,size);CHKERRQ(ierr); 174 } 175 for (i=0; i<nto; i++) iflags[toranks[i]] = 1; 176 ierr = MPIU_Allreduce(MPI_IN_PLACE,iflags,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 177 nrecvs = iflags[rank]; 178 ierr = MPI_Type_get_extent(dtype,&lb,&unitbytes);CHKERRQ(ierr); 179 if (lb != 0) SETERRQ1(comm,PETSC_ERR_SUP,"Datatype with nonzero lower bound %ld\n",(long)lb); 180 ierr = PetscMalloc(nrecvs*count*unitbytes,&fdata);CHKERRQ(ierr); 181 tdata = (char*)todata; 182 ierr = PetscMalloc2(nto+nrecvs,&reqs,nto+nrecvs,&statuses);CHKERRQ(ierr); 183 sendreqs = reqs + nrecvs; 184 for (i=0; i<nrecvs; i++) { 185 ierr = MPI_Irecv((void*)(fdata+count*unitbytes*i),count,dtype,MPI_ANY_SOURCE,tag,comm,reqs+i);CHKERRQ(ierr); 186 } 187 for (i=0; i<nto; i++) { 188 ierr = MPI_Isend((void*)(tdata+count*unitbytes*i),count,dtype,toranks[i],tag,comm,sendreqs+i);CHKERRQ(ierr); 189 } 190 ierr = MPI_Waitall(nto+nrecvs,reqs,statuses);CHKERRQ(ierr); 191 ierr = PetscMalloc1(nrecvs,&franks);CHKERRQ(ierr); 192 for (i=0; i<nrecvs; i++) franks[i] = statuses[i].MPI_SOURCE; 193 ierr = PetscFree2(reqs,statuses);CHKERRQ(ierr); 194 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 195 196 *nfrom = nrecvs; 197 *fromranks = franks; 198 *(void**)fromdata = fdata; 199 PetscFunctionReturn(0); 200 } 201 202 #if defined(PETSC_HAVE_MPI_REDUCE_SCATTER_BLOCK) 203 static PetscErrorCode PetscCommBuildTwoSided_RedScatter(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata,PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata) 204 { 205 PetscErrorCode ierr; 206 PetscMPIInt size,*iflags,nrecvs,tag,*franks,i,flg; 207 MPI_Aint lb,unitbytes; 208 char *tdata,*fdata; 209 MPI_Request *reqs,*sendreqs; 210 MPI_Status *statuses; 211 PetscCommCounter *counter; 212 213 PetscFunctionBegin; 214 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 215 ierr = PetscCommDuplicate(comm,&comm,&tag);CHKERRQ(ierr); 216 ierr = MPI_Comm_get_attr(comm,Petsc_Counter_keyval,&counter,&flg);CHKERRQ(ierr); 217 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Inner PETSc communicator does not have its tag/name counter attribute set"); 218 if (!counter->iflags) { 219 ierr = PetscCalloc1(size,&counter->iflags);CHKERRQ(ierr); 220 iflags = counter->iflags; 221 } else { 222 iflags = counter->iflags; 223 ierr = PetscArrayzero(iflags,size);CHKERRQ(ierr); 224 } 225 for (i=0; i<nto; i++) iflags[toranks[i]] = 1; 226 ierr = MPI_Reduce_scatter_block(iflags,&nrecvs,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 227 ierr = MPI_Type_get_extent(dtype,&lb,&unitbytes);CHKERRQ(ierr); 228 if (lb != 0) SETERRQ1(comm,PETSC_ERR_SUP,"Datatype with nonzero lower bound %ld\n",(long)lb); 229 ierr = PetscMalloc(nrecvs*count*unitbytes,&fdata);CHKERRQ(ierr); 230 tdata = (char*)todata; 231 ierr = PetscMalloc2(nto+nrecvs,&reqs,nto+nrecvs,&statuses);CHKERRQ(ierr); 232 sendreqs = reqs + nrecvs; 233 for (i=0; i<nrecvs; i++) { 234 ierr = MPI_Irecv((void*)(fdata+count*unitbytes*i),count,dtype,MPI_ANY_SOURCE,tag,comm,reqs+i);CHKERRQ(ierr); 235 } 236 for (i=0; i<nto; i++) { 237 ierr = MPI_Isend((void*)(tdata+count*unitbytes*i),count,dtype,toranks[i],tag,comm,sendreqs+i);CHKERRQ(ierr); 238 } 239 ierr = MPI_Waitall(nto+nrecvs,reqs,statuses);CHKERRQ(ierr); 240 ierr = PetscMalloc1(nrecvs,&franks);CHKERRQ(ierr); 241 for (i=0; i<nrecvs; i++) franks[i] = statuses[i].MPI_SOURCE; 242 ierr = PetscFree2(reqs,statuses);CHKERRQ(ierr); 243 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 244 245 *nfrom = nrecvs; 246 *fromranks = franks; 247 *(void**)fromdata = fdata; 248 PetscFunctionReturn(0); 249 } 250 #endif 251 252 /*@C 253 PetscCommBuildTwoSided - discovers communicating ranks given one-sided information, moving constant-sized data in the process (often message lengths) 254 255 Collective 256 257 Input Arguments: 258 + comm - communicator 259 . count - number of entries to send/receive (must match on all ranks) 260 . dtype - datatype to send/receive from each rank (must match on all ranks) 261 . nto - number of ranks to send data to 262 . toranks - ranks to send to (array of length nto) 263 - todata - data to send to each rank (packed) 264 265 Output Arguments: 266 + nfrom - number of ranks receiving messages from 267 . fromranks - ranks receiving messages from (length nfrom; caller should PetscFree()) 268 - fromdata - packed data from each rank, each with count entries of type dtype (length nfrom, caller responsible for PetscFree()) 269 270 Level: developer 271 272 Options Database Keys: 273 . -build_twosided <allreduce|ibarrier|redscatter> - algorithm to set up two-sided communication. Default is allreduce for communicators with <= 1024 ranks, otherwise ibarrier. 274 275 Notes: 276 This memory-scalable interface is an alternative to calling PetscGatherNumberOfMessages() and 277 PetscGatherMessageLengths(), possibly with a subsequent round of communication to send other constant-size data. 278 279 Basic data types as well as contiguous types are supported, but non-contiguous (e.g., strided) types are not. 280 281 References: 282 . 1. - Hoefler, Siebert and Lumsdaine, The MPI_Ibarrier implementation uses the algorithm in 283 Scalable communication protocols for dynamic sparse data exchange, 2010. 284 285 .seealso: PetscGatherNumberOfMessages(), PetscGatherMessageLengths() 286 @*/ 287 PetscErrorCode PetscCommBuildTwoSided(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata,PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata) 288 { 289 PetscErrorCode ierr; 290 PetscBuildTwoSidedType buildtype = PETSC_BUILDTWOSIDED_NOTSET; 291 292 PetscFunctionBegin; 293 ierr = PetscSysInitializePackage();CHKERRQ(ierr); 294 ierr = PetscLogEventSync(PETSC_BuildTwoSided,comm);CHKERRQ(ierr); 295 ierr = PetscLogEventBegin(PETSC_BuildTwoSided,0,0,0,0);CHKERRQ(ierr); 296 ierr = PetscCommBuildTwoSidedGetType(comm,&buildtype);CHKERRQ(ierr); 297 switch (buildtype) { 298 case PETSC_BUILDTWOSIDED_IBARRIER: 299 #if defined(PETSC_HAVE_MPI_IBARRIER) || defined(PETSC_HAVE_MPIX_IBARRIER) 300 ierr = PetscCommBuildTwoSided_Ibarrier(comm,count,dtype,nto,toranks,todata,nfrom,fromranks,fromdata);CHKERRQ(ierr); 301 #else 302 SETERRQ(comm,PETSC_ERR_PLIB,"MPI implementation does not provide MPI_Ibarrier (part of MPI-3)"); 303 #endif 304 break; 305 case PETSC_BUILDTWOSIDED_ALLREDUCE: 306 ierr = PetscCommBuildTwoSided_Allreduce(comm,count,dtype,nto,toranks,todata,nfrom,fromranks,fromdata);CHKERRQ(ierr); 307 break; 308 case PETSC_BUILDTWOSIDED_REDSCATTER: 309 #if defined(PETSC_HAVE_MPI_REDUCE_SCATTER_BLOCK) 310 ierr = PetscCommBuildTwoSided_RedScatter(comm,count,dtype,nto,toranks,todata,nfrom,fromranks,fromdata);CHKERRQ(ierr); 311 #else 312 SETERRQ(comm,PETSC_ERR_PLIB,"MPI implementation does not provide MPI_Reduce_scatter_block (part of MPI-2.2)"); 313 #endif 314 break; 315 default: SETERRQ(comm,PETSC_ERR_PLIB,"Unknown method for building two-sided communication"); 316 } 317 ierr = PetscLogEventEnd(PETSC_BuildTwoSided,0,0,0,0);CHKERRQ(ierr); 318 PetscFunctionReturn(0); 319 } 320 321 static PetscErrorCode PetscCommBuildTwoSidedFReq_Reference(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata, 322 PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata,PetscMPIInt ntags,MPI_Request **toreqs,MPI_Request **fromreqs, 323 PetscErrorCode (*send)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,PetscMPIInt,void*,MPI_Request[],void*), 324 PetscErrorCode (*recv)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,void*,MPI_Request[],void*),void *ctx) 325 { 326 PetscErrorCode ierr; 327 PetscMPIInt i,*tag; 328 MPI_Aint lb,unitbytes; 329 MPI_Request *sendreq,*recvreq; 330 331 PetscFunctionBegin; 332 ierr = PetscMalloc1(ntags,&tag);CHKERRQ(ierr); 333 if (ntags > 0) { 334 ierr = PetscCommDuplicate(comm,&comm,&tag[0]);CHKERRQ(ierr); 335 } 336 for (i=1; i<ntags; i++) { 337 ierr = PetscCommGetNewTag(comm,&tag[i]);CHKERRQ(ierr); 338 } 339 340 /* Perform complete initial rendezvous */ 341 ierr = PetscCommBuildTwoSided(comm,count,dtype,nto,toranks,todata,nfrom,fromranks,fromdata);CHKERRQ(ierr); 342 343 ierr = PetscMalloc1(nto*ntags,&sendreq);CHKERRQ(ierr); 344 ierr = PetscMalloc1(*nfrom*ntags,&recvreq);CHKERRQ(ierr); 345 346 ierr = MPI_Type_get_extent(dtype,&lb,&unitbytes);CHKERRQ(ierr); 347 if (lb != 0) SETERRQ1(comm,PETSC_ERR_SUP,"Datatype with nonzero lower bound %ld\n",(long)lb); 348 for (i=0; i<nto; i++) { 349 PetscMPIInt k; 350 for (k=0; k<ntags; k++) sendreq[i*ntags+k] = MPI_REQUEST_NULL; 351 ierr = (*send)(comm,tag,i,toranks[i],((char*)todata)+count*unitbytes*i,sendreq+i*ntags,ctx);CHKERRQ(ierr); 352 } 353 for (i=0; i<*nfrom; i++) { 354 void *header = (*(char**)fromdata) + count*unitbytes*i; 355 PetscMPIInt k; 356 for (k=0; k<ntags; k++) recvreq[i*ntags+k] = MPI_REQUEST_NULL; 357 ierr = (*recv)(comm,tag,(*fromranks)[i],header,recvreq+i*ntags,ctx);CHKERRQ(ierr); 358 } 359 ierr = PetscFree(tag);CHKERRQ(ierr); 360 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 361 *toreqs = sendreq; 362 *fromreqs = recvreq; 363 PetscFunctionReturn(0); 364 } 365 366 #if defined(PETSC_HAVE_MPI_IBARRIER) || defined(PETSC_HAVE_MPIX_IBARRIER) 367 368 static PetscErrorCode PetscCommBuildTwoSidedFReq_Ibarrier(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata, 369 PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata,PetscMPIInt ntags,MPI_Request **toreqs,MPI_Request **fromreqs, 370 PetscErrorCode (*send)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,PetscMPIInt,void*,MPI_Request[],void*), 371 PetscErrorCode (*recv)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,void*,MPI_Request[],void*),void *ctx) 372 { 373 PetscErrorCode ierr; 374 PetscMPIInt nrecvs,tag,*tags,done,i; 375 MPI_Aint lb,unitbytes; 376 char *tdata; 377 MPI_Request *sendreqs,*usendreqs,*req,barrier; 378 PetscSegBuffer segrank,segdata,segreq; 379 PetscBool barrier_started; 380 381 PetscFunctionBegin; 382 ierr = PetscCommDuplicate(comm,&comm,&tag);CHKERRQ(ierr); 383 ierr = PetscMalloc1(ntags,&tags);CHKERRQ(ierr); 384 for (i=0; i<ntags; i++) { 385 ierr = PetscCommGetNewTag(comm,&tags[i]);CHKERRQ(ierr); 386 } 387 ierr = MPI_Type_get_extent(dtype,&lb,&unitbytes);CHKERRQ(ierr); 388 if (lb != 0) SETERRQ1(comm,PETSC_ERR_SUP,"Datatype with nonzero lower bound %ld\n",(long)lb); 389 tdata = (char*)todata; 390 ierr = PetscMalloc1(nto,&sendreqs);CHKERRQ(ierr); 391 ierr = PetscMalloc1(nto*ntags,&usendreqs);CHKERRQ(ierr); 392 /* Post synchronous sends */ 393 for (i=0; i<nto; i++) { 394 ierr = MPI_Issend((void*)(tdata+count*unitbytes*i),count,dtype,toranks[i],tag,comm,sendreqs+i);CHKERRQ(ierr); 395 } 396 /* Post actual payloads. These are typically larger messages. Hopefully sending these later does not slow down the 397 * synchronous messages above. */ 398 for (i=0; i<nto; i++) { 399 PetscMPIInt k; 400 for (k=0; k<ntags; k++) usendreqs[i*ntags+k] = MPI_REQUEST_NULL; 401 ierr = (*send)(comm,tags,i,toranks[i],tdata+count*unitbytes*i,usendreqs+i*ntags,ctx);CHKERRQ(ierr); 402 } 403 404 ierr = PetscSegBufferCreate(sizeof(PetscMPIInt),4,&segrank);CHKERRQ(ierr); 405 ierr = PetscSegBufferCreate(unitbytes,4*count,&segdata);CHKERRQ(ierr); 406 ierr = PetscSegBufferCreate(sizeof(MPI_Request),4,&segreq);CHKERRQ(ierr); 407 408 nrecvs = 0; 409 barrier = MPI_REQUEST_NULL; 410 /* MPICH-3.2 sometimes does not create a request in some "optimized" cases. This is arguably a standard violation, 411 * but we need to work around it. */ 412 barrier_started = PETSC_FALSE; 413 for (done=0; !done; ) { 414 PetscMPIInt flag; 415 MPI_Status status; 416 ierr = MPI_Iprobe(MPI_ANY_SOURCE,tag,comm,&flag,&status);CHKERRQ(ierr); 417 if (flag) { /* incoming message */ 418 PetscMPIInt *recvrank,k; 419 void *buf; 420 ierr = PetscSegBufferGet(segrank,1,&recvrank);CHKERRQ(ierr); 421 ierr = PetscSegBufferGet(segdata,count,&buf);CHKERRQ(ierr); 422 *recvrank = status.MPI_SOURCE; 423 ierr = MPI_Recv(buf,count,dtype,status.MPI_SOURCE,tag,comm,MPI_STATUS_IGNORE);CHKERRQ(ierr); 424 ierr = PetscSegBufferGet(segreq,ntags,&req);CHKERRQ(ierr); 425 for (k=0; k<ntags; k++) req[k] = MPI_REQUEST_NULL; 426 ierr = (*recv)(comm,tags,status.MPI_SOURCE,buf,req,ctx);CHKERRQ(ierr); 427 nrecvs++; 428 } 429 if (!barrier_started) { 430 PetscMPIInt sent,nsends; 431 ierr = PetscMPIIntCast(nto,&nsends);CHKERRQ(ierr); 432 ierr = MPI_Testall(nsends,sendreqs,&sent,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 433 if (sent) { 434 #if defined(PETSC_HAVE_MPI_IBARRIER) 435 ierr = MPI_Ibarrier(comm,&barrier);CHKERRQ(ierr); 436 #elif defined(PETSC_HAVE_MPIX_IBARRIER) 437 ierr = MPIX_Ibarrier(comm,&barrier);CHKERRQ(ierr); 438 #endif 439 barrier_started = PETSC_TRUE; 440 } 441 } else { 442 ierr = MPI_Test(&barrier,&done,MPI_STATUS_IGNORE);CHKERRQ(ierr); 443 } 444 } 445 *nfrom = nrecvs; 446 ierr = PetscSegBufferExtractAlloc(segrank,fromranks);CHKERRQ(ierr); 447 ierr = PetscSegBufferDestroy(&segrank);CHKERRQ(ierr); 448 ierr = PetscSegBufferExtractAlloc(segdata,fromdata);CHKERRQ(ierr); 449 ierr = PetscSegBufferDestroy(&segdata);CHKERRQ(ierr); 450 *toreqs = usendreqs; 451 ierr = PetscSegBufferExtractAlloc(segreq,fromreqs);CHKERRQ(ierr); 452 ierr = PetscSegBufferDestroy(&segreq);CHKERRQ(ierr); 453 ierr = PetscFree(sendreqs);CHKERRQ(ierr); 454 ierr = PetscFree(tags);CHKERRQ(ierr); 455 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 456 PetscFunctionReturn(0); 457 } 458 #endif 459 460 /*@C 461 PetscCommBuildTwoSidedF - discovers communicating ranks given one-sided information, calling user-defined functions during rendezvous 462 463 Collective 464 465 Input Arguments: 466 + comm - communicator 467 . count - number of entries to send/receive in initial rendezvous (must match on all ranks) 468 . dtype - datatype to send/receive from each rank (must match on all ranks) 469 . nto - number of ranks to send data to 470 . toranks - ranks to send to (array of length nto) 471 . todata - data to send to each rank (packed) 472 . ntags - number of tags needed by send/recv callbacks 473 . send - callback invoked on sending process when ready to send primary payload 474 . recv - callback invoked on receiving process after delivery of rendezvous message 475 - ctx - context for callbacks 476 477 Output Arguments: 478 + nfrom - number of ranks receiving messages from 479 . fromranks - ranks receiving messages from (length nfrom; caller should PetscFree()) 480 - fromdata - packed data from each rank, each with count entries of type dtype (length nfrom, caller responsible for PetscFree()) 481 482 Level: developer 483 484 Notes: 485 This memory-scalable interface is an alternative to calling PetscGatherNumberOfMessages() and 486 PetscGatherMessageLengths(), possibly with a subsequent round of communication to send other data. 487 488 Basic data types as well as contiguous types are supported, but non-contiguous (e.g., strided) types are not. 489 490 References: 491 . 1. - Hoefler, Siebert and Lumsdaine, The MPI_Ibarrier implementation uses the algorithm in 492 Scalable communication protocols for dynamic sparse data exchange, 2010. 493 494 .seealso: PetscCommBuildTwoSided(), PetscCommBuildTwoSidedFReq(), PetscGatherNumberOfMessages(), PetscGatherMessageLengths() 495 @*/ 496 PetscErrorCode PetscCommBuildTwoSidedF(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata,PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata,PetscMPIInt ntags, 497 PetscErrorCode (*send)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,PetscMPIInt,void*,MPI_Request[],void*), 498 PetscErrorCode (*recv)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,void*,MPI_Request[],void*),void *ctx) 499 { 500 PetscErrorCode ierr; 501 MPI_Request *toreqs,*fromreqs; 502 503 PetscFunctionBegin; 504 ierr = PetscCommBuildTwoSidedFReq(comm,count,dtype,nto,toranks,todata,nfrom,fromranks,fromdata,ntags,&toreqs,&fromreqs,send,recv,ctx);CHKERRQ(ierr); 505 ierr = MPI_Waitall(nto*ntags,toreqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 506 ierr = MPI_Waitall(*nfrom*ntags,fromreqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 507 ierr = PetscFree(toreqs);CHKERRQ(ierr); 508 ierr = PetscFree(fromreqs);CHKERRQ(ierr); 509 PetscFunctionReturn(0); 510 } 511 512 /*@C 513 PetscCommBuildTwoSidedFReq - discovers communicating ranks given one-sided information, calling user-defined functions during rendezvous, returns requests 514 515 Collective 516 517 Input Arguments: 518 + comm - communicator 519 . count - number of entries to send/receive in initial rendezvous (must match on all ranks) 520 . dtype - datatype to send/receive from each rank (must match on all ranks) 521 . nto - number of ranks to send data to 522 . toranks - ranks to send to (array of length nto) 523 . todata - data to send to each rank (packed) 524 . ntags - number of tags needed by send/recv callbacks 525 . send - callback invoked on sending process when ready to send primary payload 526 . recv - callback invoked on receiving process after delivery of rendezvous message 527 - ctx - context for callbacks 528 529 Output Arguments: 530 + nfrom - number of ranks receiving messages from 531 . fromranks - ranks receiving messages from (length nfrom; caller should PetscFree()) 532 . fromdata - packed data from each rank, each with count entries of type dtype (length nfrom, caller responsible for PetscFree()) 533 . toreqs - array of nto*ntags sender requests (caller must wait on these, then PetscFree()) 534 - fromreqs - array of nfrom*ntags receiver requests (caller must wait on these, then PetscFree()) 535 536 Level: developer 537 538 Notes: 539 This memory-scalable interface is an alternative to calling PetscGatherNumberOfMessages() and 540 PetscGatherMessageLengths(), possibly with a subsequent round of communication to send other data. 541 542 Basic data types as well as contiguous types are supported, but non-contiguous (e.g., strided) types are not. 543 544 References: 545 . 1. - Hoefler, Siebert and Lumsdaine, The MPI_Ibarrier implementation uses the algorithm in 546 Scalable communication protocols for dynamic sparse data exchange, 2010. 547 548 .seealso: PetscCommBuildTwoSided(), PetscCommBuildTwoSidedF(), PetscGatherNumberOfMessages(), PetscGatherMessageLengths() 549 @*/ 550 PetscErrorCode PetscCommBuildTwoSidedFReq(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata, 551 PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata,PetscMPIInt ntags,MPI_Request **toreqs,MPI_Request **fromreqs, 552 PetscErrorCode (*send)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,PetscMPIInt,void*,MPI_Request[],void*), 553 PetscErrorCode (*recv)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,void*,MPI_Request[],void*),void *ctx) 554 { 555 PetscErrorCode ierr,(*f)(MPI_Comm,PetscMPIInt,MPI_Datatype,PetscMPIInt,const PetscMPIInt[],const void*, 556 PetscMPIInt*,PetscMPIInt**,void*,PetscMPIInt,MPI_Request**,MPI_Request**, 557 PetscErrorCode (*send)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,PetscMPIInt,void*,MPI_Request[],void*), 558 PetscErrorCode (*recv)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,void*,MPI_Request[],void*),void *ctx); 559 PetscBuildTwoSidedType buildtype = PETSC_BUILDTWOSIDED_NOTSET; 560 PetscMPIInt i,size; 561 562 PetscFunctionBegin; 563 ierr = PetscSysInitializePackage();CHKERRQ(ierr); 564 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 565 for (i=0; i<nto; i++) { 566 if (toranks[i] < 0 || size <= toranks[i]) SETERRQ3(comm,PETSC_ERR_ARG_OUTOFRANGE,"toranks[%d] %d not in comm size %d",i,toranks[i],size); 567 } 568 ierr = PetscLogEventSync(PETSC_BuildTwoSidedF,comm);CHKERRQ(ierr); 569 ierr = PetscLogEventBegin(PETSC_BuildTwoSidedF,0,0,0,0);CHKERRQ(ierr); 570 ierr = PetscCommBuildTwoSidedGetType(comm,&buildtype);CHKERRQ(ierr); 571 switch (buildtype) { 572 case PETSC_BUILDTWOSIDED_IBARRIER: 573 #if defined(PETSC_HAVE_MPI_IBARRIER) || defined(PETSC_HAVE_MPIX_IBARRIER) 574 f = PetscCommBuildTwoSidedFReq_Ibarrier; 575 #else 576 SETERRQ(comm,PETSC_ERR_PLIB,"MPI implementation does not provide MPI_Ibarrier (part of MPI-3)"); 577 #endif 578 break; 579 case PETSC_BUILDTWOSIDED_ALLREDUCE: 580 case PETSC_BUILDTWOSIDED_REDSCATTER: 581 f = PetscCommBuildTwoSidedFReq_Reference; 582 break; 583 default: SETERRQ(comm,PETSC_ERR_PLIB,"Unknown method for building two-sided communication"); 584 } 585 ierr = (*f)(comm,count,dtype,nto,toranks,todata,nfrom,fromranks,fromdata,ntags,toreqs,fromreqs,send,recv,ctx);CHKERRQ(ierr); 586 ierr = PetscLogEventEnd(PETSC_BuildTwoSidedF,0,0,0,0);CHKERRQ(ierr); 587 PetscFunctionReturn(0); 588 } 589