1 #include <petscsys.h> /*I "petscsys.h" I*/ 2 3 PetscLogEvent PETSC_BuildTwoSided; 4 5 const char *const PetscBuildTwoSidedTypes[] = { 6 "ALLREDUCE", 7 "IBARRIER", 8 "REDSCATTER", 9 "PetscBuildTwoSidedType", 10 "PETSC_BUILDTWOSIDED_", 11 0 12 }; 13 14 static PetscBuildTwoSidedType _twosided_type = PETSC_BUILDTWOSIDED_NOTSET; 15 16 #undef __FUNCT__ 17 #define __FUNCT__ "PetscCommBuildTwoSidedSetType" 18 /*@ 19 PetscCommBuildTwoSidedSetType - set algorithm to use when building two-sided communication 20 21 Logically Collective 22 23 Input Arguments: 24 + comm - PETSC_COMM_WORLD 25 - twosided - algorithm to use in subsequent calls to PetscCommBuildTwoSided() 26 27 Level: developer 28 29 Note: 30 This option is currently global, but could be made per-communicator. 31 32 .seealso: PetscCommBuildTwoSided(), PetscCommBuildTwoSidedGetType() 33 @*/ 34 PetscErrorCode PetscCommBuildTwoSidedSetType(MPI_Comm comm,PetscBuildTwoSidedType twosided) 35 { 36 PetscFunctionBegin; 37 #if defined(PETSC_USE_DEBUG) 38 { /* We don't have a PetscObject so can't use PetscValidLogicalCollectiveEnum */ 39 PetscMPIInt ierr; 40 PetscMPIInt b1[2],b2[2]; 41 b1[0] = -(PetscMPIInt)twosided; 42 b1[1] = (PetscMPIInt)twosided; 43 ierr = MPI_Allreduce(b1,b2,2,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr); 44 if (-b2[0] != b2[1]) SETERRQ(comm,PETSC_ERR_ARG_WRONG,"Enum value must be same on all processes"); 45 } 46 #endif 47 _twosided_type = twosided; 48 PetscFunctionReturn(0); 49 } 50 51 #undef __FUNCT__ 52 #define __FUNCT__ "PetscCommBuildTwoSidedGetType" 53 /*@ 54 PetscCommBuildTwoSidedGetType - set algorithm to use when building two-sided communication 55 56 Logically Collective 57 58 Output Arguments: 59 + comm - communicator on which to query algorithm 60 - twosided - algorithm to use for PetscCommBuildTwoSided() 61 62 Level: developer 63 64 .seealso: PetscCommBuildTwoSided(), PetscCommBuildTwoSidedSetType() 65 @*/ 66 PetscErrorCode PetscCommBuildTwoSidedGetType(MPI_Comm comm,PetscBuildTwoSidedType *twosided) 67 { 68 PetscErrorCode ierr; 69 70 PetscFunctionBegin; 71 *twosided = PETSC_BUILDTWOSIDED_NOTSET; 72 if (_twosided_type == PETSC_BUILDTWOSIDED_NOTSET) { 73 #if defined(PETSC_HAVE_MPI_IBARRIER) 74 # if defined(PETSC_HAVE_MPICH_CH3_SOCK) && !defined(PETSC_HAVE_MPICH_CH3_SOCK_FIXED_NBC_PROGRESS) 75 /* Deadlock in Ibarrier: http://trac.mpich.org/projects/mpich/ticket/1785 */ 76 _twosided_type = PETSC_BUILDTWOSIDED_ALLREDUCE; 77 # else 78 _twosided_type = PETSC_BUILDTWOSIDED_IBARRIER; 79 # endif 80 #else 81 _twosided_type = PETSC_BUILDTWOSIDED_ALLREDUCE; 82 #endif 83 ierr = PetscOptionsGetEnum(NULL,"-build_twosided",PetscBuildTwoSidedTypes,(PetscEnum*)&_twosided_type,NULL);CHKERRQ(ierr); 84 } 85 *twosided = _twosided_type; 86 PetscFunctionReturn(0); 87 } 88 89 #if defined(PETSC_HAVE_MPI_IBARRIER) 90 91 #undef __FUNCT__ 92 #define __FUNCT__ "PetscCommBuildTwoSided_Ibarrier" 93 static PetscErrorCode PetscCommBuildTwoSided_Ibarrier(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscInt nto,const PetscMPIInt *toranks,const void *todata,PetscInt *nfrom,PetscMPIInt **fromranks,void *fromdata) 94 { 95 PetscErrorCode ierr; 96 PetscMPIInt nrecvs,tag,unitbytes,done; 97 PetscInt i; 98 char *tdata; 99 MPI_Request *sendreqs,barrier; 100 PetscSegBuffer segrank,segdata; 101 102 PetscFunctionBegin; 103 ierr = PetscCommDuplicate(comm,&comm,&tag);CHKERRQ(ierr); 104 ierr = MPI_Type_size(dtype,&unitbytes);CHKERRQ(ierr); 105 tdata = (char*)todata; 106 ierr = PetscMalloc1(nto,&sendreqs);CHKERRQ(ierr); 107 for (i=0; i<nto; i++) { 108 ierr = MPI_Issend((void*)(tdata+count*unitbytes*i),count,dtype,toranks[i],tag,comm,sendreqs+i);CHKERRQ(ierr); 109 } 110 ierr = PetscSegBufferCreate(sizeof(PetscMPIInt),4,&segrank);CHKERRQ(ierr); 111 ierr = PetscSegBufferCreate(unitbytes,4*count,&segdata);CHKERRQ(ierr); 112 113 nrecvs = 0; 114 barrier = MPI_REQUEST_NULL; 115 for (done=0; !done; ) { 116 PetscMPIInt flag; 117 MPI_Status status; 118 ierr = MPI_Iprobe(MPI_ANY_SOURCE,tag,comm,&flag,&status);CHKERRQ(ierr); 119 if (flag) { /* incoming message */ 120 PetscMPIInt *recvrank; 121 void *buf; 122 ierr = PetscSegBufferGet(segrank,1,&recvrank);CHKERRQ(ierr); 123 ierr = PetscSegBufferGet(segdata,count,&buf);CHKERRQ(ierr); 124 *recvrank = status.MPI_SOURCE; 125 ierr = MPI_Recv(buf,count,dtype,status.MPI_SOURCE,tag,comm,MPI_STATUS_IGNORE);CHKERRQ(ierr); 126 nrecvs++; 127 } 128 if (barrier == MPI_REQUEST_NULL) { 129 PetscMPIInt sent,nsends; 130 ierr = PetscMPIIntCast(nto,&nsends);CHKERRQ(ierr); 131 ierr = MPI_Testall(nsends,sendreqs,&sent,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 132 if (sent) { 133 ierr = MPI_Ibarrier(comm,&barrier);CHKERRQ(ierr); 134 ierr = PetscFree(sendreqs);CHKERRQ(ierr); 135 } 136 } else { 137 ierr = MPI_Test(&barrier,&done,MPI_STATUS_IGNORE);CHKERRQ(ierr); 138 } 139 } 140 *nfrom = nrecvs; 141 ierr = PetscSegBufferExtractAlloc(segrank,fromranks);CHKERRQ(ierr); 142 ierr = PetscSegBufferDestroy(&segrank);CHKERRQ(ierr); 143 ierr = PetscSegBufferExtractAlloc(segdata,fromdata);CHKERRQ(ierr); 144 ierr = PetscSegBufferDestroy(&segdata);CHKERRQ(ierr); 145 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 146 PetscFunctionReturn(0); 147 } 148 #endif 149 150 #undef __FUNCT__ 151 #define __FUNCT__ "PetscCommBuildTwoSided_Allreduce" 152 static PetscErrorCode PetscCommBuildTwoSided_Allreduce(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscInt nto,const PetscMPIInt *toranks,const void *todata,PetscInt *nfrom,PetscMPIInt **fromranks,void *fromdata) 153 { 154 PetscErrorCode ierr; 155 PetscMPIInt size,*iflags,nrecvs,tag,unitbytes,*franks; 156 PetscInt i; 157 char *tdata,*fdata; 158 MPI_Request *reqs,*sendreqs; 159 MPI_Status *statuses; 160 161 PetscFunctionBegin; 162 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 163 ierr = PetscCalloc1(size,&iflags);CHKERRQ(ierr); 164 for (i=0; i<nto; i++) iflags[toranks[i]] = 1; 165 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&nrecvs);CHKERRQ(ierr); 166 ierr = PetscFree(iflags);CHKERRQ(ierr); 167 168 ierr = PetscCommDuplicate(comm,&comm,&tag);CHKERRQ(ierr); 169 ierr = MPI_Type_size(dtype,&unitbytes);CHKERRQ(ierr); 170 ierr = PetscMalloc(nrecvs*count*unitbytes,&fdata);CHKERRQ(ierr); 171 tdata = (char*)todata; 172 ierr = PetscMalloc2(nto+nrecvs,&reqs,nto+nrecvs,&statuses);CHKERRQ(ierr); 173 sendreqs = reqs + nrecvs; 174 for (i=0; i<nrecvs; i++) { 175 ierr = MPI_Irecv((void*)(fdata+count*unitbytes*i),count,dtype,MPI_ANY_SOURCE,tag,comm,reqs+i);CHKERRQ(ierr); 176 } 177 for (i=0; i<nto; i++) { 178 ierr = MPI_Isend((void*)(tdata+count*unitbytes*i),count,dtype,toranks[i],tag,comm,sendreqs+i);CHKERRQ(ierr); 179 } 180 ierr = MPI_Waitall(nto+nrecvs,reqs,statuses);CHKERRQ(ierr); 181 ierr = PetscMalloc1(nrecvs,&franks);CHKERRQ(ierr); 182 for (i=0; i<nrecvs; i++) franks[i] = statuses[i].MPI_SOURCE; 183 ierr = PetscFree2(reqs,statuses);CHKERRQ(ierr); 184 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 185 186 *nfrom = nrecvs; 187 *fromranks = franks; 188 *(void**)fromdata = fdata; 189 PetscFunctionReturn(0); 190 } 191 192 #if defined(PETSC_HAVE_MPI_REDUCE_SCATTER_BLOCK) 193 #undef __FUNCT__ 194 #define __FUNCT__ "PetscCommBuildTwoSided_RedScatter" 195 static PetscErrorCode PetscCommBuildTwoSided_RedScatter(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscInt nto,const PetscMPIInt *toranks,const void *todata,PetscInt *nfrom,PetscMPIInt **fromranks,void *fromdata) 196 { 197 PetscErrorCode ierr; 198 PetscMPIInt size,*iflags,nrecvs,tag,unitbytes,*franks; 199 PetscInt i; 200 char *tdata,*fdata; 201 MPI_Request *reqs,*sendreqs; 202 MPI_Status *statuses; 203 204 PetscFunctionBegin; 205 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 206 ierr = PetscMalloc1(size,&iflags);CHKERRQ(ierr); 207 ierr = PetscMemzero(iflags,size*sizeof(*iflags));CHKERRQ(ierr); 208 for (i=0; i<nto; i++) iflags[toranks[i]] = 1; 209 ierr = MPI_Reduce_scatter_block(iflags,&nrecvs,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 210 ierr = PetscFree(iflags);CHKERRQ(ierr); 211 212 ierr = PetscCommDuplicate(comm,&comm,&tag);CHKERRQ(ierr); 213 ierr = MPI_Type_size(dtype,&unitbytes);CHKERRQ(ierr); 214 ierr = PetscMalloc(nrecvs*count*unitbytes,&fdata);CHKERRQ(ierr); 215 tdata = (char*)todata; 216 ierr = PetscMalloc2(nto+nrecvs,&reqs,nto+nrecvs,&statuses);CHKERRQ(ierr); 217 sendreqs = reqs + nrecvs; 218 for (i=0; i<nrecvs; i++) { 219 ierr = MPI_Irecv((void*)(fdata+count*unitbytes*i),count,dtype,MPI_ANY_SOURCE,tag,comm,reqs+i);CHKERRQ(ierr); 220 } 221 for (i=0; i<nto; i++) { 222 ierr = MPI_Isend((void*)(tdata+count*unitbytes*i),count,dtype,toranks[i],tag,comm,sendreqs+i);CHKERRQ(ierr); 223 } 224 ierr = MPI_Waitall(nto+nrecvs,reqs,statuses);CHKERRQ(ierr); 225 ierr = PetscMalloc1(nrecvs,&franks);CHKERRQ(ierr); 226 for (i=0; i<nrecvs; i++) franks[i] = statuses[i].MPI_SOURCE; 227 ierr = PetscFree2(reqs,statuses);CHKERRQ(ierr); 228 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 229 230 *nfrom = nrecvs; 231 *fromranks = franks; 232 *(void**)fromdata = fdata; 233 PetscFunctionReturn(0); 234 } 235 #endif 236 237 #undef __FUNCT__ 238 #define __FUNCT__ "PetscCommBuildTwoSided" 239 /*@C 240 PetscCommBuildTwoSided - discovers communicating ranks given one-sided information, moving constant-sized data in the process (often message lengths) 241 242 Collective on MPI_Comm 243 244 Input Arguments: 245 + comm - communicator 246 . count - number of entries to send/receive (must match on all ranks) 247 . dtype - datatype to send/receive from each rank (must match on all ranks) 248 . nto - number of ranks to send data to 249 . toranks - ranks to send to (array of length nto) 250 - todata - data to send to each rank (packed) 251 252 Output Arguments: 253 + nfrom - number of ranks receiving messages from 254 . fromranks - ranks receiving messages from (length nfrom; caller should PetscFree()) 255 - fromdata - packed data from each rank, each with count entries of type dtype (length nfrom, caller responsible for PetscFree()) 256 257 Level: developer 258 259 Options Database Keys: 260 . -build_twosided <allreduce|ibarrier|redscatter> - algorithm to set up two-sided communication 261 262 Notes: 263 This memory-scalable interface is an alternative to calling PetscGatherNumberOfMessages() and 264 PetscGatherMessageLengths(), possibly with a subsequent round of communication to send other constant-size data. 265 266 Basic data types as well as contiguous types are supported, but non-contiguous (e.g., strided) types are not. 267 268 References: 269 The MPI_Ibarrier implementation uses the algorithm in 270 Hoefler, Siebert and Lumsdaine, Scalable communication protocols for dynamic sparse data exchange, 2010. 271 272 .seealso: PetscGatherNumberOfMessages(), PetscGatherMessageLengths() 273 @*/ 274 PetscErrorCode PetscCommBuildTwoSided(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscInt nto,const PetscMPIInt *toranks,const void *todata,PetscInt *nfrom,PetscMPIInt **fromranks,void *fromdata) 275 { 276 PetscErrorCode ierr; 277 PetscBuildTwoSidedType buildtype = PETSC_BUILDTWOSIDED_NOTSET; 278 279 PetscFunctionBegin; 280 ierr = PetscSysInitializePackage();CHKERRQ(ierr); 281 ierr = PetscLogEventBegin(PETSC_BuildTwoSided,0,0,0,0);CHKERRQ(ierr); 282 ierr = PetscCommBuildTwoSidedGetType(comm,&buildtype);CHKERRQ(ierr); 283 switch (buildtype) { 284 case PETSC_BUILDTWOSIDED_IBARRIER: 285 #if defined(PETSC_HAVE_MPI_IBARRIER) 286 ierr = PetscCommBuildTwoSided_Ibarrier(comm,count,dtype,nto,toranks,todata,nfrom,fromranks,fromdata);CHKERRQ(ierr); 287 #else 288 SETERRQ(comm,PETSC_ERR_PLIB,"MPI implementation does not provide MPI_Ibarrier (part of MPI-3)"); 289 #endif 290 break; 291 case PETSC_BUILDTWOSIDED_ALLREDUCE: 292 ierr = PetscCommBuildTwoSided_Allreduce(comm,count,dtype,nto,toranks,todata,nfrom,fromranks,fromdata);CHKERRQ(ierr); 293 break; 294 case PETSC_BUILDTWOSIDED_REDSCATTER: 295 #if defined(PETSC_HAVE_MPI_REDUCE_SCATTER_BLOCK) 296 ierr = PetscCommBuildTwoSided_RedScatter(comm,count,dtype,nto,toranks,todata,nfrom,fromranks,fromdata);CHKERRQ(ierr); 297 #else 298 SETERRQ(comm,PETSC_ERR_PLIB,"MPI implementation does not provide MPI_Reduce_scatter_block (part of MPI-2.2)"); 299 #endif 300 break; 301 default: SETERRQ(comm,PETSC_ERR_PLIB,"Unknown method for building two-sided communication"); 302 } 303 ierr = PetscLogEventEnd(PETSC_BuildTwoSided,0,0,0,0);CHKERRQ(ierr); 304 PetscFunctionReturn(0); 305 } 306