1f6ced4a3SJed Brown #include <petscsys.h> /*I "petscsys.h" I*/ 295c0884eSLisandro Dalcin #include <petsc/private/petscimpl.h> 3f6ced4a3SJed Brown 495c0884eSLisandro Dalcin PetscLogEvent PETSC_BuildTwoSided; 595c0884eSLisandro Dalcin PetscLogEvent PETSC_BuildTwoSidedF; 63b3561c8SJed Brown 76145cd65SJed Brown const char *const PetscBuildTwoSidedTypes[] = { 8f6ced4a3SJed Brown "ALLREDUCE", 96145cd65SJed Brown "IBARRIER", 101654bf6bSJed Brown "REDSCATTER", 116145cd65SJed Brown "PetscBuildTwoSidedType", 126145cd65SJed Brown "PETSC_BUILDTWOSIDED_", 1302c9f0b5SLisandro Dalcin NULL 14f6ced4a3SJed Brown }; 15f6ced4a3SJed Brown 166145cd65SJed Brown static PetscBuildTwoSidedType _twosided_type = PETSC_BUILDTWOSIDED_NOTSET; 17f6ced4a3SJed Brown 186145cd65SJed Brown /*@ 196145cd65SJed Brown PetscCommBuildTwoSidedSetType - set algorithm to use when building two-sided communication 206145cd65SJed Brown 216145cd65SJed Brown Logically Collective 226145cd65SJed Brown 234165533cSJose E. Roman Input Parameters: 246145cd65SJed Brown + comm - PETSC_COMM_WORLD 256145cd65SJed Brown - twosided - algorithm to use in subsequent calls to PetscCommBuildTwoSided() 266145cd65SJed Brown 276145cd65SJed Brown Level: developer 286145cd65SJed Brown 296145cd65SJed Brown Note: 306145cd65SJed Brown This option is currently global, but could be made per-communicator. 316145cd65SJed Brown 326145cd65SJed Brown .seealso: PetscCommBuildTwoSided(), PetscCommBuildTwoSidedGetType() 336145cd65SJed Brown @*/ 346145cd65SJed Brown PetscErrorCode PetscCommBuildTwoSidedSetType(MPI_Comm comm,PetscBuildTwoSidedType twosided) 356145cd65SJed Brown { 366145cd65SJed Brown PetscFunctionBegin; 3776bd3646SJed Brown if (PetscDefined(USE_DEBUG)) { /* We don't have a PetscObject so can't use PetscValidLogicalCollectiveEnum */ 386145cd65SJed Brown PetscMPIInt ierr; 396145cd65SJed Brown PetscMPIInt b1[2],b2[2]; 406145cd65SJed Brown b1[0] = -(PetscMPIInt)twosided; 416145cd65SJed Brown b1[1] = (PetscMPIInt)twosided; 42820f2d46SBarry Smith ierr = MPIU_Allreduce(b1,b2,2,MPI_INT,MPI_MAX,comm);CHKERRMPI(ierr); 432c71b3e2SJacob Faibussowitsch PetscCheckFalse(-b2[0] != b2[1],comm,PETSC_ERR_ARG_WRONG,"Enum value must be same on all processes"); 446145cd65SJed Brown } 456145cd65SJed Brown _twosided_type = twosided; 466145cd65SJed Brown PetscFunctionReturn(0); 476145cd65SJed Brown } 486145cd65SJed Brown 496145cd65SJed Brown /*@ 506145cd65SJed Brown PetscCommBuildTwoSidedGetType - set algorithm to use when building two-sided communication 516145cd65SJed Brown 526145cd65SJed Brown Logically Collective 536145cd65SJed Brown 544165533cSJose E. Roman Output Parameters: 556145cd65SJed Brown + comm - communicator on which to query algorithm 566145cd65SJed Brown - twosided - algorithm to use for PetscCommBuildTwoSided() 576145cd65SJed Brown 586145cd65SJed Brown Level: developer 596145cd65SJed Brown 606145cd65SJed Brown .seealso: PetscCommBuildTwoSided(), PetscCommBuildTwoSidedSetType() 616145cd65SJed Brown @*/ 626145cd65SJed Brown PetscErrorCode PetscCommBuildTwoSidedGetType(MPI_Comm comm,PetscBuildTwoSidedType *twosided) 63f6ced4a3SJed Brown { 64f6ced4a3SJed Brown PetscErrorCode ierr; 6505342407SJunchao Zhang PetscMPIInt size; 66f6ced4a3SJed Brown 67f6ced4a3SJed Brown PetscFunctionBegin; 686145cd65SJed Brown *twosided = PETSC_BUILDTWOSIDED_NOTSET; 696145cd65SJed Brown if (_twosided_type == PETSC_BUILDTWOSIDED_NOTSET) { 70ffc4695bSBarry Smith ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 7105342407SJunchao Zhang _twosided_type = PETSC_BUILDTWOSIDED_ALLREDUCE; /* default for small comms, see https://gitlab.com/petsc/petsc/-/merge_requests/2611 */ 72*8ee3e7ecSJunchao Zhang #if defined(PETSC_HAVE_MPI_NONBLOCKING_COLLECTIVES) 7305342407SJunchao Zhang if (size > 1024) _twosided_type = PETSC_BUILDTWOSIDED_IBARRIER; 746145cd65SJed Brown #endif 75c5929fdfSBarry Smith ierr = PetscOptionsGetEnum(NULL,NULL,"-build_twosided",PetscBuildTwoSidedTypes,(PetscEnum*)&_twosided_type,NULL);CHKERRQ(ierr); 76f6ced4a3SJed Brown } 77f6ced4a3SJed Brown *twosided = _twosided_type; 78f6ced4a3SJed Brown PetscFunctionReturn(0); 79f6ced4a3SJed Brown } 80f6ced4a3SJed Brown 81*8ee3e7ecSJunchao Zhang #if defined(PETSC_HAVE_MPI_NONBLOCKING_COLLECTIVES) 82cf4b5b4fSJed Brown static PetscErrorCode PetscCommBuildTwoSided_Ibarrier(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata,PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata) 83f6ced4a3SJed Brown { 84f6ced4a3SJed Brown PetscErrorCode ierr; 85c3a59b84SJed Brown PetscMPIInt nrecvs,tag,done,i; 86c3a59b84SJed Brown MPI_Aint lb,unitbytes; 87f6ced4a3SJed Brown char *tdata; 88f6ced4a3SJed Brown MPI_Request *sendreqs,barrier; 890f453b92SJed Brown PetscSegBuffer segrank,segdata; 9046e50bb6SJed Brown PetscBool barrier_started; 91f6ced4a3SJed Brown 92f6ced4a3SJed Brown PetscFunctionBegin; 93a502f807SJed Brown ierr = PetscCommDuplicate(comm,&comm,&tag);CHKERRQ(ierr); 94ffc4695bSBarry Smith ierr = MPI_Type_get_extent(dtype,&lb,&unitbytes);CHKERRMPI(ierr); 952c71b3e2SJacob Faibussowitsch PetscCheckFalse(lb != 0,comm,PETSC_ERR_SUP,"Datatype with nonzero lower bound %ld",(long)lb); 96f6ced4a3SJed Brown tdata = (char*)todata; 97785e854fSJed Brown ierr = PetscMalloc1(nto,&sendreqs);CHKERRQ(ierr); 98f6ced4a3SJed Brown for (i=0; i<nto; i++) { 99ffc4695bSBarry Smith ierr = MPI_Issend((void*)(tdata+count*unitbytes*i),count,dtype,toranks[i],tag,comm,sendreqs+i);CHKERRMPI(ierr); 100f6ced4a3SJed Brown } 1010f453b92SJed Brown ierr = PetscSegBufferCreate(sizeof(PetscMPIInt),4,&segrank);CHKERRQ(ierr); 1020f453b92SJed Brown ierr = PetscSegBufferCreate(unitbytes,4*count,&segdata);CHKERRQ(ierr); 103f6ced4a3SJed Brown 104f6ced4a3SJed Brown nrecvs = 0; 105f6ced4a3SJed Brown barrier = MPI_REQUEST_NULL; 10646e50bb6SJed Brown /* MPICH-3.2 sometimes does not create a request in some "optimized" cases. This is arguably a standard violation, 10746e50bb6SJed Brown * but we need to work around it. */ 10846e50bb6SJed Brown barrier_started = PETSC_FALSE; 109f6ced4a3SJed Brown for (done=0; !done;) { 110f6ced4a3SJed Brown PetscMPIInt flag; 111f6ced4a3SJed Brown MPI_Status status; 112ffc4695bSBarry Smith ierr = MPI_Iprobe(MPI_ANY_SOURCE,tag,comm,&flag,&status);CHKERRMPI(ierr); 113f6ced4a3SJed Brown if (flag) { /* incoming message */ 114f6ced4a3SJed Brown PetscMPIInt *recvrank; 115f6ced4a3SJed Brown void *buf; 116137cf7b6SJed Brown ierr = PetscSegBufferGet(segrank,1,&recvrank);CHKERRQ(ierr); 117137cf7b6SJed Brown ierr = PetscSegBufferGet(segdata,count,&buf);CHKERRQ(ierr); 118f6ced4a3SJed Brown *recvrank = status.MPI_SOURCE; 11955b25c41SPierre Jolivet ierr = MPI_Recv(buf,count,dtype,status.MPI_SOURCE,tag,comm,MPI_STATUS_IGNORE);CHKERRMPI(ierr); 120f6ced4a3SJed Brown nrecvs++; 121f6ced4a3SJed Brown } 12246e50bb6SJed Brown if (!barrier_started) { 1234dc2109aSBarry Smith PetscMPIInt sent,nsends; 1244dc2109aSBarry Smith ierr = PetscMPIIntCast(nto,&nsends);CHKERRQ(ierr); 125ffc4695bSBarry Smith ierr = MPI_Testall(nsends,sendreqs,&sent,MPI_STATUSES_IGNORE);CHKERRMPI(ierr); 126f6ced4a3SJed Brown if (sent) { 127ffc4695bSBarry Smith ierr = MPI_Ibarrier(comm,&barrier);CHKERRMPI(ierr); 12846e50bb6SJed Brown barrier_started = PETSC_TRUE; 129f6ced4a3SJed Brown ierr = PetscFree(sendreqs);CHKERRQ(ierr); 130f6ced4a3SJed Brown } 131f6ced4a3SJed Brown } else { 132ffc4695bSBarry Smith ierr = MPI_Test(&barrier,&done,MPI_STATUS_IGNORE);CHKERRMPI(ierr); 133f6ced4a3SJed Brown } 134f6ced4a3SJed Brown } 135f6ced4a3SJed Brown *nfrom = nrecvs; 136137cf7b6SJed Brown ierr = PetscSegBufferExtractAlloc(segrank,fromranks);CHKERRQ(ierr); 1370f453b92SJed Brown ierr = PetscSegBufferDestroy(&segrank);CHKERRQ(ierr); 138137cf7b6SJed Brown ierr = PetscSegBufferExtractAlloc(segdata,fromdata);CHKERRQ(ierr); 1390f453b92SJed Brown ierr = PetscSegBufferDestroy(&segdata);CHKERRQ(ierr); 140a502f807SJed Brown ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 141f6ced4a3SJed Brown PetscFunctionReturn(0); 142f6ced4a3SJed Brown } 143f6ced4a3SJed Brown #endif 144f6ced4a3SJed Brown 145cf4b5b4fSJed Brown static PetscErrorCode PetscCommBuildTwoSided_Allreduce(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata,PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata) 146f6ced4a3SJed Brown { 147f6ced4a3SJed Brown PetscErrorCode ierr; 14805342407SJunchao Zhang PetscMPIInt size,rank,*iflags,nrecvs,tag,*franks,i,flg; 149c3a59b84SJed Brown MPI_Aint lb,unitbytes; 150f6ced4a3SJed Brown char *tdata,*fdata; 151f6ced4a3SJed Brown MPI_Request *reqs,*sendreqs; 152f6ced4a3SJed Brown MPI_Status *statuses; 15305342407SJunchao Zhang PetscCommCounter *counter; 154f6ced4a3SJed Brown 155f6ced4a3SJed Brown PetscFunctionBegin; 156ffc4695bSBarry Smith ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 157ffc4695bSBarry Smith ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr); 158a502f807SJed Brown ierr = PetscCommDuplicate(comm,&comm,&tag);CHKERRQ(ierr); 159ffc4695bSBarry Smith ierr = MPI_Comm_get_attr(comm,Petsc_Counter_keyval,&counter,&flg);CHKERRMPI(ierr); 1602c71b3e2SJacob Faibussowitsch PetscCheckFalse(!flg,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Inner PETSc communicator does not have its tag/name counter attribute set"); 16105342407SJunchao Zhang if (!counter->iflags) { 16205342407SJunchao Zhang ierr = PetscCalloc1(size,&counter->iflags);CHKERRQ(ierr); 16305342407SJunchao Zhang iflags = counter->iflags; 16405342407SJunchao Zhang } else { 16505342407SJunchao Zhang iflags = counter->iflags; 16605342407SJunchao Zhang ierr = PetscArrayzero(iflags,size);CHKERRQ(ierr); 16705342407SJunchao Zhang } 16805342407SJunchao Zhang for (i=0; i<nto; i++) iflags[toranks[i]] = 1; 169820f2d46SBarry Smith ierr = MPIU_Allreduce(MPI_IN_PLACE,iflags,size,MPI_INT,MPI_SUM,comm);CHKERRMPI(ierr); 17005342407SJunchao Zhang nrecvs = iflags[rank]; 17155b25c41SPierre Jolivet ierr = MPI_Type_get_extent(dtype,&lb,&unitbytes);CHKERRMPI(ierr); 1722c71b3e2SJacob Faibussowitsch PetscCheckFalse(lb != 0,comm,PETSC_ERR_SUP,"Datatype with nonzero lower bound %ld",(long)lb); 173f6ced4a3SJed Brown ierr = PetscMalloc(nrecvs*count*unitbytes,&fdata);CHKERRQ(ierr); 174f6ced4a3SJed Brown tdata = (char*)todata; 175dcca6d9dSJed Brown ierr = PetscMalloc2(nto+nrecvs,&reqs,nto+nrecvs,&statuses);CHKERRQ(ierr); 17606926cafSJed Brown sendreqs = reqs + nrecvs; 177f6ced4a3SJed Brown for (i=0; i<nrecvs; i++) { 178ffc4695bSBarry Smith ierr = MPI_Irecv((void*)(fdata+count*unitbytes*i),count,dtype,MPI_ANY_SOURCE,tag,comm,reqs+i);CHKERRMPI(ierr); 179f6ced4a3SJed Brown } 180f6ced4a3SJed Brown for (i=0; i<nto; i++) { 181ffc4695bSBarry Smith ierr = MPI_Isend((void*)(tdata+count*unitbytes*i),count,dtype,toranks[i],tag,comm,sendreqs+i);CHKERRMPI(ierr); 182f6ced4a3SJed Brown } 183ffc4695bSBarry Smith ierr = MPI_Waitall(nto+nrecvs,reqs,statuses);CHKERRMPI(ierr); 184785e854fSJed Brown ierr = PetscMalloc1(nrecvs,&franks);CHKERRQ(ierr); 185a297a907SKarl Rupp for (i=0; i<nrecvs; i++) franks[i] = statuses[i].MPI_SOURCE; 18606926cafSJed Brown ierr = PetscFree2(reqs,statuses);CHKERRQ(ierr); 187a502f807SJed Brown ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 188f6ced4a3SJed Brown 189f6ced4a3SJed Brown *nfrom = nrecvs; 190f6ced4a3SJed Brown *fromranks = franks; 191f6ced4a3SJed Brown *(void**)fromdata = fdata; 192f6ced4a3SJed Brown PetscFunctionReturn(0); 193f6ced4a3SJed Brown } 194f6ced4a3SJed Brown 1951654bf6bSJed Brown #if defined(PETSC_HAVE_MPI_REDUCE_SCATTER_BLOCK) 196cf4b5b4fSJed Brown static PetscErrorCode PetscCommBuildTwoSided_RedScatter(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata,PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata) 1971654bf6bSJed Brown { 1981654bf6bSJed Brown PetscErrorCode ierr; 19905342407SJunchao Zhang PetscMPIInt size,*iflags,nrecvs,tag,*franks,i,flg; 200c3a59b84SJed Brown MPI_Aint lb,unitbytes; 2011654bf6bSJed Brown char *tdata,*fdata; 2021654bf6bSJed Brown MPI_Request *reqs,*sendreqs; 2031654bf6bSJed Brown MPI_Status *statuses; 20405342407SJunchao Zhang PetscCommCounter *counter; 2051654bf6bSJed Brown 2061654bf6bSJed Brown PetscFunctionBegin; 207ffc4695bSBarry Smith ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 20805342407SJunchao Zhang ierr = PetscCommDuplicate(comm,&comm,&tag);CHKERRQ(ierr); 209ffc4695bSBarry Smith ierr = MPI_Comm_get_attr(comm,Petsc_Counter_keyval,&counter,&flg);CHKERRMPI(ierr); 2102c71b3e2SJacob Faibussowitsch PetscCheckFalse(!flg,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Inner PETSc communicator does not have its tag/name counter attribute set"); 21105342407SJunchao Zhang if (!counter->iflags) { 21205342407SJunchao Zhang ierr = PetscCalloc1(size,&counter->iflags);CHKERRQ(ierr); 21305342407SJunchao Zhang iflags = counter->iflags; 21405342407SJunchao Zhang } else { 21505342407SJunchao Zhang iflags = counter->iflags; 216580bdb30SBarry Smith ierr = PetscArrayzero(iflags,size);CHKERRQ(ierr); 21705342407SJunchao Zhang } 2181654bf6bSJed Brown for (i=0; i<nto; i++) iflags[toranks[i]] = 1; 21955b25c41SPierre Jolivet ierr = MPI_Reduce_scatter_block(iflags,&nrecvs,1,MPI_INT,MPI_SUM,comm);CHKERRMPI(ierr); 22055b25c41SPierre Jolivet ierr = MPI_Type_get_extent(dtype,&lb,&unitbytes);CHKERRMPI(ierr); 2212c71b3e2SJacob Faibussowitsch PetscCheckFalse(lb != 0,comm,PETSC_ERR_SUP,"Datatype with nonzero lower bound %ld",(long)lb); 2221654bf6bSJed Brown ierr = PetscMalloc(nrecvs*count*unitbytes,&fdata);CHKERRQ(ierr); 2231654bf6bSJed Brown tdata = (char*)todata; 2241654bf6bSJed Brown ierr = PetscMalloc2(nto+nrecvs,&reqs,nto+nrecvs,&statuses);CHKERRQ(ierr); 2251654bf6bSJed Brown sendreqs = reqs + nrecvs; 2261654bf6bSJed Brown for (i=0; i<nrecvs; i++) { 227ffc4695bSBarry Smith ierr = MPI_Irecv((void*)(fdata+count*unitbytes*i),count,dtype,MPI_ANY_SOURCE,tag,comm,reqs+i);CHKERRMPI(ierr); 2281654bf6bSJed Brown } 2291654bf6bSJed Brown for (i=0; i<nto; i++) { 230ffc4695bSBarry Smith ierr = MPI_Isend((void*)(tdata+count*unitbytes*i),count,dtype,toranks[i],tag,comm,sendreqs+i);CHKERRMPI(ierr); 2311654bf6bSJed Brown } 232ffc4695bSBarry Smith ierr = MPI_Waitall(nto+nrecvs,reqs,statuses);CHKERRMPI(ierr); 2331654bf6bSJed Brown ierr = PetscMalloc1(nrecvs,&franks);CHKERRQ(ierr); 2341654bf6bSJed Brown for (i=0; i<nrecvs; i++) franks[i] = statuses[i].MPI_SOURCE; 2351654bf6bSJed Brown ierr = PetscFree2(reqs,statuses);CHKERRQ(ierr); 236a502f807SJed Brown ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 2371654bf6bSJed Brown 2381654bf6bSJed Brown *nfrom = nrecvs; 2391654bf6bSJed Brown *fromranks = franks; 2401654bf6bSJed Brown *(void**)fromdata = fdata; 2411654bf6bSJed Brown PetscFunctionReturn(0); 2421654bf6bSJed Brown } 2431654bf6bSJed Brown #endif 2441654bf6bSJed Brown 245f6ced4a3SJed Brown /*@C 246f6ced4a3SJed Brown PetscCommBuildTwoSided - discovers communicating ranks given one-sided information, moving constant-sized data in the process (often message lengths) 247f6ced4a3SJed Brown 248d083f849SBarry Smith Collective 249f6ced4a3SJed Brown 2504165533cSJose E. Roman Input Parameters: 251f6ced4a3SJed Brown + comm - communicator 252f6ced4a3SJed Brown . count - number of entries to send/receive (must match on all ranks) 253f6ced4a3SJed Brown . dtype - datatype to send/receive from each rank (must match on all ranks) 254f6ced4a3SJed Brown . nto - number of ranks to send data to 255f6ced4a3SJed Brown . toranks - ranks to send to (array of length nto) 256f6ced4a3SJed Brown - todata - data to send to each rank (packed) 257f6ced4a3SJed Brown 2584165533cSJose E. Roman Output Parameters: 259f6ced4a3SJed Brown + nfrom - number of ranks receiving messages from 260f6ced4a3SJed Brown . fromranks - ranks receiving messages from (length nfrom; caller should PetscFree()) 261f6ced4a3SJed Brown - fromdata - packed data from each rank, each with count entries of type dtype (length nfrom, caller responsible for PetscFree()) 262f6ced4a3SJed Brown 263f6ced4a3SJed Brown Level: developer 264f6ced4a3SJed Brown 2651654bf6bSJed Brown Options Database Keys: 26605342407SJunchao Zhang . -build_twosided <allreduce|ibarrier|redscatter> - algorithm to set up two-sided communication. Default is allreduce for communicators with <= 1024 ranks, otherwise ibarrier. 2671654bf6bSJed Brown 268f6ced4a3SJed Brown Notes: 269f6ced4a3SJed Brown This memory-scalable interface is an alternative to calling PetscGatherNumberOfMessages() and 270f6ced4a3SJed Brown PetscGatherMessageLengths(), possibly with a subsequent round of communication to send other constant-size data. 271f6ced4a3SJed Brown 272f6ced4a3SJed Brown Basic data types as well as contiguous types are supported, but non-contiguous (e.g., strided) types are not. 273f6ced4a3SJed Brown 274f6ced4a3SJed Brown References: 275606c0280SSatish Balay . * - Hoefler, Siebert and Lumsdaine, The MPI_Ibarrier implementation uses the algorithm in 2764f02bc6aSBarry Smith Scalable communication protocols for dynamic sparse data exchange, 2010. 277f6ced4a3SJed Brown 278f6ced4a3SJed Brown .seealso: PetscGatherNumberOfMessages(), PetscGatherMessageLengths() 279f6ced4a3SJed Brown @*/ 280cf4b5b4fSJed Brown PetscErrorCode PetscCommBuildTwoSided(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata,PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata) 281f6ced4a3SJed Brown { 282f6ced4a3SJed Brown PetscErrorCode ierr; 2836145cd65SJed Brown PetscBuildTwoSidedType buildtype = PETSC_BUILDTWOSIDED_NOTSET; 284f6ced4a3SJed Brown 285f6ced4a3SJed Brown PetscFunctionBegin; 2863b3561c8SJed Brown ierr = PetscSysInitializePackage();CHKERRQ(ierr); 28762872c28SLisandro Dalcin ierr = PetscLogEventSync(PETSC_BuildTwoSided,comm);CHKERRQ(ierr); 2883b3561c8SJed Brown ierr = PetscLogEventBegin(PETSC_BuildTwoSided,0,0,0,0);CHKERRQ(ierr); 2896145cd65SJed Brown ierr = PetscCommBuildTwoSidedGetType(comm,&buildtype);CHKERRQ(ierr); 290f6ced4a3SJed Brown switch (buildtype) { 2916145cd65SJed Brown case PETSC_BUILDTWOSIDED_IBARRIER: 292*8ee3e7ecSJunchao Zhang #if defined(PETSC_HAVE_MPI_NONBLOCKING_COLLECTIVES) 2936145cd65SJed Brown ierr = PetscCommBuildTwoSided_Ibarrier(comm,count,dtype,nto,toranks,todata,nfrom,fromranks,fromdata);CHKERRQ(ierr); 294b458e8f1SJose E. Roman break; 2956145cd65SJed Brown #else 2966145cd65SJed Brown SETERRQ(comm,PETSC_ERR_PLIB,"MPI implementation does not provide MPI_Ibarrier (part of MPI-3)"); 297f6ced4a3SJed Brown #endif 2986145cd65SJed Brown case PETSC_BUILDTWOSIDED_ALLREDUCE: 2996145cd65SJed Brown ierr = PetscCommBuildTwoSided_Allreduce(comm,count,dtype,nto,toranks,todata,nfrom,fromranks,fromdata);CHKERRQ(ierr); 300f6ced4a3SJed Brown break; 3011654bf6bSJed Brown case PETSC_BUILDTWOSIDED_REDSCATTER: 3021654bf6bSJed Brown #if defined(PETSC_HAVE_MPI_REDUCE_SCATTER_BLOCK) 3031654bf6bSJed Brown ierr = PetscCommBuildTwoSided_RedScatter(comm,count,dtype,nto,toranks,todata,nfrom,fromranks,fromdata);CHKERRQ(ierr); 304b458e8f1SJose E. Roman break; 3051654bf6bSJed Brown #else 3061654bf6bSJed Brown SETERRQ(comm,PETSC_ERR_PLIB,"MPI implementation does not provide MPI_Reduce_scatter_block (part of MPI-2.2)"); 3071654bf6bSJed Brown #endif 308f6ced4a3SJed Brown default: SETERRQ(comm,PETSC_ERR_PLIB,"Unknown method for building two-sided communication"); 309f6ced4a3SJed Brown } 3103b3561c8SJed Brown ierr = PetscLogEventEnd(PETSC_BuildTwoSided,0,0,0,0);CHKERRQ(ierr); 311f6ced4a3SJed Brown PetscFunctionReturn(0); 312f6ced4a3SJed Brown } 313d815da10SJed Brown 3146afbe7ddSJed Brown static PetscErrorCode PetscCommBuildTwoSidedFReq_Reference(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata, 3156afbe7ddSJed Brown PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata,PetscMPIInt ntags,MPI_Request **toreqs,MPI_Request **fromreqs, 316d815da10SJed Brown PetscErrorCode (*send)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,PetscMPIInt,void*,MPI_Request[],void*), 317d815da10SJed Brown PetscErrorCode (*recv)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,void*,MPI_Request[],void*),void *ctx) 318d815da10SJed Brown { 319d815da10SJed Brown PetscErrorCode ierr; 320d815da10SJed Brown PetscMPIInt i,*tag; 321d815da10SJed Brown MPI_Aint lb,unitbytes; 322d815da10SJed Brown MPI_Request *sendreq,*recvreq; 323d815da10SJed Brown 324d815da10SJed Brown PetscFunctionBegin; 325d815da10SJed Brown ierr = PetscMalloc1(ntags,&tag);CHKERRQ(ierr); 326d815da10SJed Brown if (ntags > 0) { 327d815da10SJed Brown ierr = PetscCommDuplicate(comm,&comm,&tag[0]);CHKERRQ(ierr); 328d815da10SJed Brown } 329d815da10SJed Brown for (i=1; i<ntags; i++) { 330d815da10SJed Brown ierr = PetscCommGetNewTag(comm,&tag[i]);CHKERRQ(ierr); 331d815da10SJed Brown } 332d815da10SJed Brown 333d815da10SJed Brown /* Perform complete initial rendezvous */ 334d815da10SJed Brown ierr = PetscCommBuildTwoSided(comm,count,dtype,nto,toranks,todata,nfrom,fromranks,fromdata);CHKERRQ(ierr); 335d815da10SJed Brown 3366afbe7ddSJed Brown ierr = PetscMalloc1(nto*ntags,&sendreq);CHKERRQ(ierr); 3376afbe7ddSJed Brown ierr = PetscMalloc1(*nfrom*ntags,&recvreq);CHKERRQ(ierr); 338d815da10SJed Brown 339ffc4695bSBarry Smith ierr = MPI_Type_get_extent(dtype,&lb,&unitbytes);CHKERRMPI(ierr); 3402c71b3e2SJacob Faibussowitsch PetscCheckFalse(lb != 0,comm,PETSC_ERR_SUP,"Datatype with nonzero lower bound %ld",(long)lb); 341d815da10SJed Brown for (i=0; i<nto; i++) { 34289157a57SJed Brown PetscMPIInt k; 34389157a57SJed Brown for (k=0; k<ntags; k++) sendreq[i*ntags+k] = MPI_REQUEST_NULL; 344d815da10SJed Brown ierr = (*send)(comm,tag,i,toranks[i],((char*)todata)+count*unitbytes*i,sendreq+i*ntags,ctx);CHKERRQ(ierr); 345d815da10SJed Brown } 346d815da10SJed Brown for (i=0; i<*nfrom; i++) { 347d815da10SJed Brown void *header = (*(char**)fromdata) + count*unitbytes*i; 34889157a57SJed Brown PetscMPIInt k; 34989157a57SJed Brown for (k=0; k<ntags; k++) recvreq[i*ntags+k] = MPI_REQUEST_NULL; 350d815da10SJed Brown ierr = (*recv)(comm,tag,(*fromranks)[i],header,recvreq+i*ntags,ctx);CHKERRQ(ierr); 351d815da10SJed Brown } 352d815da10SJed Brown ierr = PetscFree(tag);CHKERRQ(ierr); 353d815da10SJed Brown ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 3546afbe7ddSJed Brown *toreqs = sendreq; 3556afbe7ddSJed Brown *fromreqs = recvreq; 356d815da10SJed Brown PetscFunctionReturn(0); 357d815da10SJed Brown } 358d815da10SJed Brown 359*8ee3e7ecSJunchao Zhang #if defined(PETSC_HAVE_MPI_NONBLOCKING_COLLECTIVES) 360afb254cdSJed Brown 3616afbe7ddSJed Brown static PetscErrorCode PetscCommBuildTwoSidedFReq_Ibarrier(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata, 3626afbe7ddSJed Brown PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata,PetscMPIInt ntags,MPI_Request **toreqs,MPI_Request **fromreqs, 363afb254cdSJed Brown PetscErrorCode (*send)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,PetscMPIInt,void*,MPI_Request[],void*), 364afb254cdSJed Brown PetscErrorCode (*recv)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,void*,MPI_Request[],void*),void *ctx) 365afb254cdSJed Brown { 366afb254cdSJed Brown PetscErrorCode ierr; 367afb254cdSJed Brown PetscMPIInt nrecvs,tag,*tags,done,i; 368afb254cdSJed Brown MPI_Aint lb,unitbytes; 369afb254cdSJed Brown char *tdata; 370afb254cdSJed Brown MPI_Request *sendreqs,*usendreqs,*req,barrier; 371afb254cdSJed Brown PetscSegBuffer segrank,segdata,segreq; 37246e50bb6SJed Brown PetscBool barrier_started; 373afb254cdSJed Brown 374afb254cdSJed Brown PetscFunctionBegin; 375afb254cdSJed Brown ierr = PetscCommDuplicate(comm,&comm,&tag);CHKERRQ(ierr); 376afb254cdSJed Brown ierr = PetscMalloc1(ntags,&tags);CHKERRQ(ierr); 377afb254cdSJed Brown for (i=0; i<ntags; i++) { 378afb254cdSJed Brown ierr = PetscCommGetNewTag(comm,&tags[i]);CHKERRQ(ierr); 379afb254cdSJed Brown } 380ffc4695bSBarry Smith ierr = MPI_Type_get_extent(dtype,&lb,&unitbytes);CHKERRMPI(ierr); 3812c71b3e2SJacob Faibussowitsch PetscCheckFalse(lb != 0,comm,PETSC_ERR_SUP,"Datatype with nonzero lower bound %ld",(long)lb); 382afb254cdSJed Brown tdata = (char*)todata; 3836afbe7ddSJed Brown ierr = PetscMalloc1(nto,&sendreqs);CHKERRQ(ierr); 3846afbe7ddSJed Brown ierr = PetscMalloc1(nto*ntags,&usendreqs);CHKERRQ(ierr); 385afb254cdSJed Brown /* Post synchronous sends */ 386afb254cdSJed Brown for (i=0; i<nto; i++) { 387ffc4695bSBarry Smith ierr = MPI_Issend((void*)(tdata+count*unitbytes*i),count,dtype,toranks[i],tag,comm,sendreqs+i);CHKERRMPI(ierr); 388afb254cdSJed Brown } 389afb254cdSJed Brown /* Post actual payloads. These are typically larger messages. Hopefully sending these later does not slow down the 390afb254cdSJed Brown * synchronous messages above. */ 391afb254cdSJed Brown for (i=0; i<nto; i++) { 39289157a57SJed Brown PetscMPIInt k; 39389157a57SJed Brown for (k=0; k<ntags; k++) usendreqs[i*ntags+k] = MPI_REQUEST_NULL; 394afb254cdSJed Brown ierr = (*send)(comm,tags,i,toranks[i],tdata+count*unitbytes*i,usendreqs+i*ntags,ctx);CHKERRQ(ierr); 395afb254cdSJed Brown } 396afb254cdSJed Brown 397afb254cdSJed Brown ierr = PetscSegBufferCreate(sizeof(PetscMPIInt),4,&segrank);CHKERRQ(ierr); 398afb254cdSJed Brown ierr = PetscSegBufferCreate(unitbytes,4*count,&segdata);CHKERRQ(ierr); 399afb254cdSJed Brown ierr = PetscSegBufferCreate(sizeof(MPI_Request),4,&segreq);CHKERRQ(ierr); 400afb254cdSJed Brown 401afb254cdSJed Brown nrecvs = 0; 402afb254cdSJed Brown barrier = MPI_REQUEST_NULL; 40346e50bb6SJed Brown /* MPICH-3.2 sometimes does not create a request in some "optimized" cases. This is arguably a standard violation, 40446e50bb6SJed Brown * but we need to work around it. */ 40546e50bb6SJed Brown barrier_started = PETSC_FALSE; 406afb254cdSJed Brown for (done=0; !done;) { 407afb254cdSJed Brown PetscMPIInt flag; 408afb254cdSJed Brown MPI_Status status; 409ffc4695bSBarry Smith ierr = MPI_Iprobe(MPI_ANY_SOURCE,tag,comm,&flag,&status);CHKERRMPI(ierr); 410afb254cdSJed Brown if (flag) { /* incoming message */ 41189157a57SJed Brown PetscMPIInt *recvrank,k; 412afb254cdSJed Brown void *buf; 413afb254cdSJed Brown ierr = PetscSegBufferGet(segrank,1,&recvrank);CHKERRQ(ierr); 414afb254cdSJed Brown ierr = PetscSegBufferGet(segdata,count,&buf);CHKERRQ(ierr); 415afb254cdSJed Brown *recvrank = status.MPI_SOURCE; 416ffc4695bSBarry Smith ierr = MPI_Recv(buf,count,dtype,status.MPI_SOURCE,tag,comm,MPI_STATUS_IGNORE);CHKERRMPI(ierr); 417afb254cdSJed Brown ierr = PetscSegBufferGet(segreq,ntags,&req);CHKERRQ(ierr); 41889157a57SJed Brown for (k=0; k<ntags; k++) req[k] = MPI_REQUEST_NULL; 419afb254cdSJed Brown ierr = (*recv)(comm,tags,status.MPI_SOURCE,buf,req,ctx);CHKERRQ(ierr); 420afb254cdSJed Brown nrecvs++; 421afb254cdSJed Brown } 42246e50bb6SJed Brown if (!barrier_started) { 423afb254cdSJed Brown PetscMPIInt sent,nsends; 424afb254cdSJed Brown ierr = PetscMPIIntCast(nto,&nsends);CHKERRQ(ierr); 425ffc4695bSBarry Smith ierr = MPI_Testall(nsends,sendreqs,&sent,MPI_STATUSES_IGNORE);CHKERRMPI(ierr); 426afb254cdSJed Brown if (sent) { 427ffc4695bSBarry Smith ierr = MPI_Ibarrier(comm,&barrier);CHKERRMPI(ierr); 42846e50bb6SJed Brown barrier_started = PETSC_TRUE; 429afb254cdSJed Brown } 430afb254cdSJed Brown } else { 431ffc4695bSBarry Smith ierr = MPI_Test(&barrier,&done,MPI_STATUS_IGNORE);CHKERRMPI(ierr); 432afb254cdSJed Brown } 433afb254cdSJed Brown } 434afb254cdSJed Brown *nfrom = nrecvs; 435afb254cdSJed Brown ierr = PetscSegBufferExtractAlloc(segrank,fromranks);CHKERRQ(ierr); 436afb254cdSJed Brown ierr = PetscSegBufferDestroy(&segrank);CHKERRQ(ierr); 437afb254cdSJed Brown ierr = PetscSegBufferExtractAlloc(segdata,fromdata);CHKERRQ(ierr); 438afb254cdSJed Brown ierr = PetscSegBufferDestroy(&segdata);CHKERRQ(ierr); 4396afbe7ddSJed Brown *toreqs = usendreqs; 4406afbe7ddSJed Brown ierr = PetscSegBufferExtractAlloc(segreq,fromreqs);CHKERRQ(ierr); 441afb254cdSJed Brown ierr = PetscSegBufferDestroy(&segreq);CHKERRQ(ierr); 4426afbe7ddSJed Brown ierr = PetscFree(sendreqs);CHKERRQ(ierr); 443afb254cdSJed Brown ierr = PetscFree(tags);CHKERRQ(ierr); 444afb254cdSJed Brown ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 445afb254cdSJed Brown PetscFunctionReturn(0); 446afb254cdSJed Brown } 447afb254cdSJed Brown #endif 448afb254cdSJed Brown 449d815da10SJed Brown /*@C 450d815da10SJed Brown PetscCommBuildTwoSidedF - discovers communicating ranks given one-sided information, calling user-defined functions during rendezvous 451d815da10SJed Brown 452d083f849SBarry Smith Collective 453d815da10SJed Brown 4544165533cSJose E. Roman Input Parameters: 455d815da10SJed Brown + comm - communicator 456d815da10SJed Brown . count - number of entries to send/receive in initial rendezvous (must match on all ranks) 457d815da10SJed Brown . dtype - datatype to send/receive from each rank (must match on all ranks) 458d815da10SJed Brown . nto - number of ranks to send data to 459d815da10SJed Brown . toranks - ranks to send to (array of length nto) 460d815da10SJed Brown . todata - data to send to each rank (packed) 461d815da10SJed Brown . ntags - number of tags needed by send/recv callbacks 462d815da10SJed Brown . send - callback invoked on sending process when ready to send primary payload 463d815da10SJed Brown . recv - callback invoked on receiving process after delivery of rendezvous message 464d815da10SJed Brown - ctx - context for callbacks 465d815da10SJed Brown 4664165533cSJose E. Roman Output Parameters: 467d815da10SJed Brown + nfrom - number of ranks receiving messages from 468d815da10SJed Brown . fromranks - ranks receiving messages from (length nfrom; caller should PetscFree()) 469d815da10SJed Brown - fromdata - packed data from each rank, each with count entries of type dtype (length nfrom, caller responsible for PetscFree()) 470d815da10SJed Brown 471d815da10SJed Brown Level: developer 472d815da10SJed Brown 473d815da10SJed Brown Notes: 474d815da10SJed Brown This memory-scalable interface is an alternative to calling PetscGatherNumberOfMessages() and 475d815da10SJed Brown PetscGatherMessageLengths(), possibly with a subsequent round of communication to send other data. 476d815da10SJed Brown 477d815da10SJed Brown Basic data types as well as contiguous types are supported, but non-contiguous (e.g., strided) types are not. 478d815da10SJed Brown 479d815da10SJed Brown References: 480606c0280SSatish Balay . * - Hoefler, Siebert and Lumsdaine, The MPI_Ibarrier implementation uses the algorithm in 4814f02bc6aSBarry Smith Scalable communication protocols for dynamic sparse data exchange, 2010. 482d815da10SJed Brown 4836afbe7ddSJed Brown .seealso: PetscCommBuildTwoSided(), PetscCommBuildTwoSidedFReq(), PetscGatherNumberOfMessages(), PetscGatherMessageLengths() 484d815da10SJed Brown @*/ 485d815da10SJed Brown PetscErrorCode PetscCommBuildTwoSidedF(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata,PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata,PetscMPIInt ntags, 486d815da10SJed Brown PetscErrorCode (*send)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,PetscMPIInt,void*,MPI_Request[],void*), 487d815da10SJed Brown PetscErrorCode (*recv)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,void*,MPI_Request[],void*),void *ctx) 488d815da10SJed Brown { 4896afbe7ddSJed Brown PetscErrorCode ierr; 4906afbe7ddSJed Brown MPI_Request *toreqs,*fromreqs; 4916afbe7ddSJed Brown 4926afbe7ddSJed Brown PetscFunctionBegin; 4936afbe7ddSJed Brown ierr = PetscCommBuildTwoSidedFReq(comm,count,dtype,nto,toranks,todata,nfrom,fromranks,fromdata,ntags,&toreqs,&fromreqs,send,recv,ctx);CHKERRQ(ierr); 494ffc4695bSBarry Smith ierr = MPI_Waitall(nto*ntags,toreqs,MPI_STATUSES_IGNORE);CHKERRMPI(ierr); 495ffc4695bSBarry Smith ierr = MPI_Waitall(*nfrom*ntags,fromreqs,MPI_STATUSES_IGNORE);CHKERRMPI(ierr); 4966afbe7ddSJed Brown ierr = PetscFree(toreqs);CHKERRQ(ierr); 4976afbe7ddSJed Brown ierr = PetscFree(fromreqs);CHKERRQ(ierr); 4986afbe7ddSJed Brown PetscFunctionReturn(0); 4996afbe7ddSJed Brown } 5006afbe7ddSJed Brown 5016afbe7ddSJed Brown /*@C 5026afbe7ddSJed Brown PetscCommBuildTwoSidedFReq - discovers communicating ranks given one-sided information, calling user-defined functions during rendezvous, returns requests 5036afbe7ddSJed Brown 504d083f849SBarry Smith Collective 5056afbe7ddSJed Brown 5064165533cSJose E. Roman Input Parameters: 5076afbe7ddSJed Brown + comm - communicator 5086afbe7ddSJed Brown . count - number of entries to send/receive in initial rendezvous (must match on all ranks) 5096afbe7ddSJed Brown . dtype - datatype to send/receive from each rank (must match on all ranks) 5106afbe7ddSJed Brown . nto - number of ranks to send data to 5116afbe7ddSJed Brown . toranks - ranks to send to (array of length nto) 5126afbe7ddSJed Brown . todata - data to send to each rank (packed) 5136afbe7ddSJed Brown . ntags - number of tags needed by send/recv callbacks 5146afbe7ddSJed Brown . send - callback invoked on sending process when ready to send primary payload 5156afbe7ddSJed Brown . recv - callback invoked on receiving process after delivery of rendezvous message 5166afbe7ddSJed Brown - ctx - context for callbacks 5176afbe7ddSJed Brown 5184165533cSJose E. Roman Output Parameters: 5196afbe7ddSJed Brown + nfrom - number of ranks receiving messages from 5206afbe7ddSJed Brown . fromranks - ranks receiving messages from (length nfrom; caller should PetscFree()) 5216afbe7ddSJed Brown . fromdata - packed data from each rank, each with count entries of type dtype (length nfrom, caller responsible for PetscFree()) 5226afbe7ddSJed Brown . toreqs - array of nto*ntags sender requests (caller must wait on these, then PetscFree()) 5236afbe7ddSJed Brown - fromreqs - array of nfrom*ntags receiver requests (caller must wait on these, then PetscFree()) 5246afbe7ddSJed Brown 5256afbe7ddSJed Brown Level: developer 5266afbe7ddSJed Brown 5276afbe7ddSJed Brown Notes: 5286afbe7ddSJed Brown This memory-scalable interface is an alternative to calling PetscGatherNumberOfMessages() and 5296afbe7ddSJed Brown PetscGatherMessageLengths(), possibly with a subsequent round of communication to send other data. 5306afbe7ddSJed Brown 5316afbe7ddSJed Brown Basic data types as well as contiguous types are supported, but non-contiguous (e.g., strided) types are not. 5326afbe7ddSJed Brown 5336afbe7ddSJed Brown References: 534606c0280SSatish Balay . * - Hoefler, Siebert and Lumsdaine, The MPI_Ibarrier implementation uses the algorithm in 5354f02bc6aSBarry Smith Scalable communication protocols for dynamic sparse data exchange, 2010. 5366afbe7ddSJed Brown 5376afbe7ddSJed Brown .seealso: PetscCommBuildTwoSided(), PetscCommBuildTwoSidedF(), PetscGatherNumberOfMessages(), PetscGatherMessageLengths() 5386afbe7ddSJed Brown @*/ 5396afbe7ddSJed Brown PetscErrorCode PetscCommBuildTwoSidedFReq(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata, 5406afbe7ddSJed Brown PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata,PetscMPIInt ntags,MPI_Request **toreqs,MPI_Request **fromreqs, 5416afbe7ddSJed Brown PetscErrorCode (*send)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,PetscMPIInt,void*,MPI_Request[],void*), 5426afbe7ddSJed Brown PetscErrorCode (*recv)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,void*,MPI_Request[],void*),void *ctx) 5436afbe7ddSJed Brown { 5446afbe7ddSJed Brown PetscErrorCode ierr,(*f)(MPI_Comm,PetscMPIInt,MPI_Datatype,PetscMPIInt,const PetscMPIInt[],const void*, 5456afbe7ddSJed Brown PetscMPIInt*,PetscMPIInt**,void*,PetscMPIInt,MPI_Request**,MPI_Request**, 546d815da10SJed Brown PetscErrorCode (*send)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,PetscMPIInt,void*,MPI_Request[],void*), 547d815da10SJed Brown PetscErrorCode (*recv)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,void*,MPI_Request[],void*),void *ctx); 548d815da10SJed Brown PetscBuildTwoSidedType buildtype = PETSC_BUILDTWOSIDED_NOTSET; 5493461502dSJed Brown PetscMPIInt i,size; 550d815da10SJed Brown 551d815da10SJed Brown PetscFunctionBegin; 552d815da10SJed Brown ierr = PetscSysInitializePackage();CHKERRQ(ierr); 553ffc4695bSBarry Smith ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 5543461502dSJed Brown for (i=0; i<nto; i++) { 5552c71b3e2SJacob Faibussowitsch PetscCheckFalse(toranks[i] < 0 || size <= toranks[i],comm,PETSC_ERR_ARG_OUTOFRANGE,"toranks[%d] %d not in comm size %d",i,toranks[i],size); 5563461502dSJed Brown } 55762872c28SLisandro Dalcin ierr = PetscLogEventSync(PETSC_BuildTwoSidedF,comm);CHKERRQ(ierr); 55827a32ea5SJed Brown ierr = PetscLogEventBegin(PETSC_BuildTwoSidedF,0,0,0,0);CHKERRQ(ierr); 559d815da10SJed Brown ierr = PetscCommBuildTwoSidedGetType(comm,&buildtype);CHKERRQ(ierr); 560d815da10SJed Brown switch (buildtype) { 561d815da10SJed Brown case PETSC_BUILDTWOSIDED_IBARRIER: 562*8ee3e7ecSJunchao Zhang #if defined(PETSC_HAVE_MPI_NONBLOCKING_COLLECTIVES) 5636afbe7ddSJed Brown f = PetscCommBuildTwoSidedFReq_Ibarrier; 564b458e8f1SJose E. Roman break; 565afb254cdSJed Brown #else 566afb254cdSJed Brown SETERRQ(comm,PETSC_ERR_PLIB,"MPI implementation does not provide MPI_Ibarrier (part of MPI-3)"); 567afb254cdSJed Brown #endif 568d815da10SJed Brown case PETSC_BUILDTWOSIDED_ALLREDUCE: 569d815da10SJed Brown case PETSC_BUILDTWOSIDED_REDSCATTER: 5706afbe7ddSJed Brown f = PetscCommBuildTwoSidedFReq_Reference; 571d815da10SJed Brown break; 572d815da10SJed Brown default: SETERRQ(comm,PETSC_ERR_PLIB,"Unknown method for building two-sided communication"); 573d815da10SJed Brown } 5746afbe7ddSJed Brown ierr = (*f)(comm,count,dtype,nto,toranks,todata,nfrom,fromranks,fromdata,ntags,toreqs,fromreqs,send,recv,ctx);CHKERRQ(ierr); 57527a32ea5SJed Brown ierr = PetscLogEventEnd(PETSC_BuildTwoSidedF,0,0,0,0);CHKERRQ(ierr); 576d815da10SJed Brown PetscFunctionReturn(0); 577d815da10SJed Brown } 578