1 #include <petscsys.h> /*I "petscsys.h" I*/ 2 3 const char *const PetscBuildTwoSidedTypes[] = { 4 "ALLREDUCE", 5 "IBARRIER", 6 "PetscBuildTwoSidedType", 7 "PETSC_BUILDTWOSIDED_", 8 0 9 }; 10 11 static PetscBuildTwoSidedType _twosided_type = PETSC_BUILDTWOSIDED_NOTSET; 12 13 #undef __FUNCT__ 14 #define __FUNCT__ "PetscCommBuildTwoSidedSetType" 15 /*@ 16 PetscCommBuildTwoSidedSetType - set algorithm to use when building two-sided communication 17 18 Logically Collective 19 20 Input Arguments: 21 + comm - PETSC_COMM_WORLD 22 - twosided - algorithm to use in subsequent calls to PetscCommBuildTwoSided() 23 24 Level: developer 25 26 Note: 27 This option is currently global, but could be made per-communicator. 28 29 .seealso: PetscCommBuildTwoSided(), PetscCommBuildTwoSidedGetType() 30 @*/ 31 PetscErrorCode PetscCommBuildTwoSidedSetType(MPI_Comm comm,PetscBuildTwoSidedType twosided) 32 { 33 PetscFunctionBegin; 34 #if defined(PETSC_USE_DEBUG) 35 { /* We don't have a PetscObject so can't use PetscValidLogicalCollectiveEnum */ 36 PetscMPIInt ierr; 37 PetscMPIInt b1[2],b2[2]; 38 b1[0] = -(PetscMPIInt)twosided; 39 b1[1] = (PetscMPIInt)twosided; 40 ierr = MPI_Allreduce(b1,b2,2,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr); 41 if (-b2[0] != b2[1]) SETERRQ(comm,PETSC_ERR_ARG_WRONG,"Enum value must be same on all processes"); 42 } 43 #endif 44 _twosided_type = twosided; 45 PetscFunctionReturn(0); 46 } 47 48 #undef __FUNCT__ 49 #define __FUNCT__ "PetscCommBuildTwoSidedGetType" 50 /*@ 51 PetscCommBuildTwoSidedGetType - set algorithm to use when building two-sided communication 52 53 Logically Collective 54 55 Output Arguments: 56 + comm - communicator on which to query algorithm 57 - twosided - algorithm to use for PetscCommBuildTwoSided() 58 59 Level: developer 60 61 .seealso: PetscCommBuildTwoSided(), PetscCommBuildTwoSidedSetType() 62 @*/ 63 PetscErrorCode PetscCommBuildTwoSidedGetType(MPI_Comm comm,PetscBuildTwoSidedType *twosided) 64 { 65 PetscErrorCode ierr; 66 67 PetscFunctionBegin; 68 *twosided = PETSC_BUILDTWOSIDED_NOTSET; 69 if (_twosided_type == PETSC_BUILDTWOSIDED_NOTSET) { 70 #if defined(PETSC_HAVE_MPI_IBARRIER) 71 # if defined(PETSC_HAVE_MPICH_CH3_SOCK) && !defined(PETSC_HAVE_MPICH_CH3_SOCK_FIXED_NBC_PROGRESS) 72 /* Deadlock in Ibarrier: http://trac.mpich.org/projects/mpich/ticket/1785 */ 73 _twosided_type = PETSC_BUILDTWOSIDED_ALLREDUCE; 74 # else 75 _twosided_type = PETSC_BUILDTWOSIDED_IBARRIER; 76 # endif 77 #else 78 _twosided_type = PETSC_BUILDTWOSIDED_ALLREDUCE; 79 #endif 80 ierr = PetscOptionsGetEnum(NULL,"-build_twosided",PetscBuildTwoSidedTypes,(PetscEnum*)&_twosided_type,NULL);CHKERRQ(ierr); 81 } 82 *twosided = _twosided_type; 83 PetscFunctionReturn(0); 84 } 85 86 #if defined(PETSC_HAVE_MPI_IBARRIER) 87 88 #undef __FUNCT__ 89 #define __FUNCT__ "PetscCommBuildTwoSided_Ibarrier" 90 static PetscErrorCode PetscCommBuildTwoSided_Ibarrier(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscInt nto,const PetscMPIInt *toranks,const void *todata,PetscInt *nfrom,PetscMPIInt **fromranks,void *fromdata) 91 { 92 PetscErrorCode ierr; 93 PetscMPIInt nrecvs,tag,unitbytes,done; 94 PetscInt i; 95 char *tdata; 96 MPI_Request *sendreqs,barrier; 97 PetscSegBuffer segrank,segdata; 98 99 PetscFunctionBegin; 100 ierr = PetscCommGetNewTag(comm,&tag);CHKERRQ(ierr); 101 ierr = MPI_Type_size(dtype,&unitbytes);CHKERRQ(ierr); 102 tdata = (char*)todata; 103 ierr = PetscMalloc(nto*sizeof(MPI_Request),&sendreqs);CHKERRQ(ierr); 104 for (i=0; i<nto; i++) { 105 ierr = MPI_Issend((void*)(tdata+count*unitbytes*i),count,dtype,toranks[i],tag,comm,sendreqs+i);CHKERRQ(ierr); 106 } 107 ierr = PetscSegBufferCreate(sizeof(PetscMPIInt),4,&segrank);CHKERRQ(ierr); 108 ierr = PetscSegBufferCreate(unitbytes,4*count,&segdata);CHKERRQ(ierr); 109 110 nrecvs = 0; 111 barrier = MPI_REQUEST_NULL; 112 for (done=0; !done; ) { 113 PetscMPIInt flag; 114 MPI_Status status; 115 ierr = MPI_Iprobe(MPI_ANY_SOURCE,tag,comm,&flag,&status);CHKERRQ(ierr); 116 if (flag) { /* incoming message */ 117 PetscMPIInt *recvrank; 118 void *buf; 119 ierr = PetscSegBufferGet(segrank,1,&recvrank);CHKERRQ(ierr); 120 ierr = PetscSegBufferGet(segdata,count,&buf);CHKERRQ(ierr); 121 *recvrank = status.MPI_SOURCE; 122 ierr = MPI_Recv(buf,count,dtype,status.MPI_SOURCE,tag,comm,MPI_STATUS_IGNORE);CHKERRQ(ierr); 123 nrecvs++; 124 } 125 if (barrier == MPI_REQUEST_NULL) { 126 PetscMPIInt sent,nsends; 127 ierr = PetscMPIIntCast(nto,&nsends);CHKERRQ(ierr); 128 ierr = MPI_Testall(nsends,sendreqs,&sent,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 129 if (sent) { 130 ierr = MPI_Ibarrier(comm,&barrier);CHKERRQ(ierr); 131 ierr = PetscFree(sendreqs);CHKERRQ(ierr); 132 } 133 } else { 134 ierr = MPI_Test(&barrier,&done,MPI_STATUS_IGNORE);CHKERRQ(ierr); 135 } 136 } 137 *nfrom = nrecvs; 138 ierr = PetscSegBufferExtractAlloc(segrank,fromranks);CHKERRQ(ierr); 139 ierr = PetscSegBufferDestroy(&segrank);CHKERRQ(ierr); 140 ierr = PetscSegBufferExtractAlloc(segdata,fromdata);CHKERRQ(ierr); 141 ierr = PetscSegBufferDestroy(&segdata);CHKERRQ(ierr); 142 PetscFunctionReturn(0); 143 } 144 #endif 145 146 #undef __FUNCT__ 147 #define __FUNCT__ "PetscCommBuildTwoSided_Allreduce" 148 static PetscErrorCode PetscCommBuildTwoSided_Allreduce(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscInt nto,const PetscMPIInt *toranks,const void *todata,PetscInt *nfrom,PetscMPIInt **fromranks,void *fromdata) 149 { 150 PetscErrorCode ierr; 151 PetscMPIInt size,*iflags,nrecvs,tag,unitbytes,*franks; 152 PetscInt i; 153 char *tdata,*fdata; 154 MPI_Request *reqs,*sendreqs; 155 MPI_Status *statuses; 156 157 PetscFunctionBegin; 158 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 159 ierr = PetscMalloc(size*sizeof(*iflags),&iflags);CHKERRQ(ierr); 160 ierr = PetscMemzero(iflags,size*sizeof(*iflags));CHKERRQ(ierr); 161 for (i=0; i<nto; i++) iflags[toranks[i]] = 1; 162 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&nrecvs);CHKERRQ(ierr); 163 ierr = PetscFree(iflags);CHKERRQ(ierr); 164 165 ierr = PetscCommGetNewTag(comm,&tag);CHKERRQ(ierr); 166 ierr = MPI_Type_size(dtype,&unitbytes);CHKERRQ(ierr); 167 ierr = PetscMalloc(nrecvs*count*unitbytes,&fdata);CHKERRQ(ierr); 168 tdata = (char*)todata; 169 ierr = PetscMalloc2(nto+nrecvs,&reqs,nto+nrecvs,&statuses);CHKERRQ(ierr); 170 sendreqs = reqs + nrecvs; 171 for (i=0; i<nrecvs; i++) { 172 ierr = MPI_Irecv((void*)(fdata+count*unitbytes*i),count,dtype,MPI_ANY_SOURCE,tag,comm,reqs+i);CHKERRQ(ierr); 173 } 174 for (i=0; i<nto; i++) { 175 ierr = MPI_Isend((void*)(tdata+count*unitbytes*i),count,dtype,toranks[i],tag,comm,sendreqs+i);CHKERRQ(ierr); 176 } 177 ierr = MPI_Waitall(nto+nrecvs,reqs,statuses);CHKERRQ(ierr); 178 ierr = PetscMalloc(nrecvs*sizeof(PetscMPIInt),&franks);CHKERRQ(ierr); 179 for (i=0; i<nrecvs; i++) franks[i] = statuses[i].MPI_SOURCE; 180 ierr = PetscFree2(reqs,statuses);CHKERRQ(ierr); 181 182 *nfrom = nrecvs; 183 *fromranks = franks; 184 *(void**)fromdata = fdata; 185 PetscFunctionReturn(0); 186 } 187 188 #undef __FUNCT__ 189 #define __FUNCT__ "PetscCommBuildTwoSided" 190 /*@C 191 PetscCommBuildTwoSided - discovers communicating ranks given one-sided information, moving constant-sized data in the process (often message lengths) 192 193 Collective on MPI_Comm 194 195 Input Arguments: 196 + comm - communicator 197 . count - number of entries to send/receive (must match on all ranks) 198 . dtype - datatype to send/receive from each rank (must match on all ranks) 199 . nto - number of ranks to send data to 200 . toranks - ranks to send to (array of length nto) 201 - todata - data to send to each rank (packed) 202 203 Output Arguments: 204 + nfrom - number of ranks receiving messages from 205 . fromranks - ranks receiving messages from (length nfrom; caller should PetscFree()) 206 - fromdata - packed data from each rank, each with count entries of type dtype (length nfrom, caller responsible for PetscFree()) 207 208 Level: developer 209 210 Notes: 211 This memory-scalable interface is an alternative to calling PetscGatherNumberOfMessages() and 212 PetscGatherMessageLengths(), possibly with a subsequent round of communication to send other constant-size data. 213 214 Basic data types as well as contiguous types are supported, but non-contiguous (e.g., strided) types are not. 215 216 References: 217 The MPI_Ibarrier implementation uses the algorithm in 218 Hoefler, Siebert and Lumsdaine, Scalable communication protocols for dynamic sparse data exchange, 2010. 219 220 .seealso: PetscGatherNumberOfMessages(), PetscGatherMessageLengths() 221 @*/ 222 PetscErrorCode PetscCommBuildTwoSided(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscInt nto,const PetscMPIInt *toranks,const void *todata,PetscInt *nfrom,PetscMPIInt **fromranks,void *fromdata) 223 { 224 PetscErrorCode ierr; 225 PetscBuildTwoSidedType buildtype = PETSC_BUILDTWOSIDED_NOTSET; 226 227 PetscFunctionBegin; 228 ierr = PetscCommBuildTwoSidedGetType(comm,&buildtype);CHKERRQ(ierr); 229 switch (buildtype) { 230 case PETSC_BUILDTWOSIDED_IBARRIER: 231 #if defined(PETSC_HAVE_MPI_IBARRIER) 232 ierr = PetscCommBuildTwoSided_Ibarrier(comm,count,dtype,nto,toranks,todata,nfrom,fromranks,fromdata);CHKERRQ(ierr); 233 #else 234 SETERRQ(comm,PETSC_ERR_PLIB,"MPI implementation does not provide MPI_Ibarrier (part of MPI-3)"); 235 #endif 236 break; 237 case PETSC_BUILDTWOSIDED_ALLREDUCE: 238 ierr = PetscCommBuildTwoSided_Allreduce(comm,count,dtype,nto,toranks,todata,nfrom,fromranks,fromdata);CHKERRQ(ierr); 239 break; 240 default: SETERRQ(comm,PETSC_ERR_PLIB,"Unknown method for building two-sided communication"); 241 } 242 PetscFunctionReturn(0); 243 } 244