1 #include <petscsys.h> /*I "petscsys.h" I*/ 2 3 PetscLogEvent PETSC_BuildTwoSided; 4 5 const char *const PetscBuildTwoSidedTypes[] = { 6 "ALLREDUCE", 7 "IBARRIER", 8 "REDSCATTER", 9 "PetscBuildTwoSidedType", 10 "PETSC_BUILDTWOSIDED_", 11 0 12 }; 13 14 static PetscBuildTwoSidedType _twosided_type = PETSC_BUILDTWOSIDED_NOTSET; 15 16 #undef __FUNCT__ 17 #define __FUNCT__ "PetscCommBuildTwoSidedSetType" 18 /*@ 19 PetscCommBuildTwoSidedSetType - set algorithm to use when building two-sided communication 20 21 Logically Collective 22 23 Input Arguments: 24 + comm - PETSC_COMM_WORLD 25 - twosided - algorithm to use in subsequent calls to PetscCommBuildTwoSided() 26 27 Level: developer 28 29 Note: 30 This option is currently global, but could be made per-communicator. 31 32 .seealso: PetscCommBuildTwoSided(), PetscCommBuildTwoSidedGetType() 33 @*/ 34 PetscErrorCode PetscCommBuildTwoSidedSetType(MPI_Comm comm,PetscBuildTwoSidedType twosided) 35 { 36 PetscFunctionBegin; 37 #if defined(PETSC_USE_DEBUG) 38 { /* We don't have a PetscObject so can't use PetscValidLogicalCollectiveEnum */ 39 PetscMPIInt ierr; 40 PetscMPIInt b1[2],b2[2]; 41 b1[0] = -(PetscMPIInt)twosided; 42 b1[1] = (PetscMPIInt)twosided; 43 ierr = MPI_Allreduce(b1,b2,2,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr); 44 if (-b2[0] != b2[1]) SETERRQ(comm,PETSC_ERR_ARG_WRONG,"Enum value must be same on all processes"); 45 } 46 #endif 47 _twosided_type = twosided; 48 PetscFunctionReturn(0); 49 } 50 51 #undef __FUNCT__ 52 #define __FUNCT__ "PetscCommBuildTwoSidedGetType" 53 /*@ 54 PetscCommBuildTwoSidedGetType - set algorithm to use when building two-sided communication 55 56 Logically Collective 57 58 Output Arguments: 59 + comm - communicator on which to query algorithm 60 - twosided - algorithm to use for PetscCommBuildTwoSided() 61 62 Level: developer 63 64 .seealso: PetscCommBuildTwoSided(), PetscCommBuildTwoSidedSetType() 65 @*/ 66 PetscErrorCode PetscCommBuildTwoSidedGetType(MPI_Comm comm,PetscBuildTwoSidedType *twosided) 67 { 68 PetscErrorCode ierr; 69 70 PetscFunctionBegin; 71 *twosided = PETSC_BUILDTWOSIDED_NOTSET; 72 if (_twosided_type == PETSC_BUILDTWOSIDED_NOTSET) { 73 #if defined(PETSC_HAVE_MPI_IBARRIER) 74 # if defined(PETSC_HAVE_MPICH_CH3_SOCK) && !defined(PETSC_HAVE_MPICH_CH3_SOCK_FIXED_NBC_PROGRESS) 75 /* Deadlock in Ibarrier: http://trac.mpich.org/projects/mpich/ticket/1785 */ 76 _twosided_type = PETSC_BUILDTWOSIDED_ALLREDUCE; 77 # else 78 _twosided_type = PETSC_BUILDTWOSIDED_IBARRIER; 79 # endif 80 #else 81 _twosided_type = PETSC_BUILDTWOSIDED_ALLREDUCE; 82 #endif 83 ierr = PetscOptionsGetEnum(NULL,"-build_twosided",PetscBuildTwoSidedTypes,(PetscEnum*)&_twosided_type,NULL);CHKERRQ(ierr); 84 } 85 *twosided = _twosided_type; 86 PetscFunctionReturn(0); 87 } 88 89 #if defined(PETSC_HAVE_MPI_IBARRIER) 90 91 #undef __FUNCT__ 92 #define __FUNCT__ "PetscCommBuildTwoSided_Ibarrier" 93 static PetscErrorCode PetscCommBuildTwoSided_Ibarrier(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata,PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata) 94 { 95 PetscErrorCode ierr; 96 PetscMPIInt nrecvs,tag,unitbytes,done,i; 97 char *tdata; 98 MPI_Request *sendreqs,barrier; 99 PetscSegBuffer segrank,segdata; 100 101 PetscFunctionBegin; 102 ierr = PetscCommDuplicate(comm,&comm,&tag);CHKERRQ(ierr); 103 ierr = MPI_Type_size(dtype,&unitbytes);CHKERRQ(ierr); 104 tdata = (char*)todata; 105 ierr = PetscMalloc1(nto,&sendreqs);CHKERRQ(ierr); 106 for (i=0; i<nto; i++) { 107 ierr = MPI_Issend((void*)(tdata+count*unitbytes*i),count,dtype,toranks[i],tag,comm,sendreqs+i);CHKERRQ(ierr); 108 } 109 ierr = PetscSegBufferCreate(sizeof(PetscMPIInt),4,&segrank);CHKERRQ(ierr); 110 ierr = PetscSegBufferCreate(unitbytes,4*count,&segdata);CHKERRQ(ierr); 111 112 nrecvs = 0; 113 barrier = MPI_REQUEST_NULL; 114 for (done=0; !done; ) { 115 PetscMPIInt flag; 116 MPI_Status status; 117 ierr = MPI_Iprobe(MPI_ANY_SOURCE,tag,comm,&flag,&status);CHKERRQ(ierr); 118 if (flag) { /* incoming message */ 119 PetscMPIInt *recvrank; 120 void *buf; 121 ierr = PetscSegBufferGet(segrank,1,&recvrank);CHKERRQ(ierr); 122 ierr = PetscSegBufferGet(segdata,count,&buf);CHKERRQ(ierr); 123 *recvrank = status.MPI_SOURCE; 124 ierr = MPI_Recv(buf,count,dtype,status.MPI_SOURCE,tag,comm,MPI_STATUS_IGNORE);CHKERRQ(ierr); 125 nrecvs++; 126 } 127 if (barrier == MPI_REQUEST_NULL) { 128 PetscMPIInt sent,nsends; 129 ierr = PetscMPIIntCast(nto,&nsends);CHKERRQ(ierr); 130 ierr = MPI_Testall(nsends,sendreqs,&sent,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 131 if (sent) { 132 ierr = MPI_Ibarrier(comm,&barrier);CHKERRQ(ierr); 133 ierr = PetscFree(sendreqs);CHKERRQ(ierr); 134 } 135 } else { 136 ierr = MPI_Test(&barrier,&done,MPI_STATUS_IGNORE);CHKERRQ(ierr); 137 } 138 } 139 *nfrom = nrecvs; 140 ierr = PetscSegBufferExtractAlloc(segrank,fromranks);CHKERRQ(ierr); 141 ierr = PetscSegBufferDestroy(&segrank);CHKERRQ(ierr); 142 ierr = PetscSegBufferExtractAlloc(segdata,fromdata);CHKERRQ(ierr); 143 ierr = PetscSegBufferDestroy(&segdata);CHKERRQ(ierr); 144 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 145 PetscFunctionReturn(0); 146 } 147 #endif 148 149 #undef __FUNCT__ 150 #define __FUNCT__ "PetscCommBuildTwoSided_Allreduce" 151 static PetscErrorCode PetscCommBuildTwoSided_Allreduce(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata,PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata) 152 { 153 PetscErrorCode ierr; 154 PetscMPIInt size,*iflags,nrecvs,tag,unitbytes,*franks,i; 155 char *tdata,*fdata; 156 MPI_Request *reqs,*sendreqs; 157 MPI_Status *statuses; 158 159 PetscFunctionBegin; 160 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 161 ierr = PetscCalloc1(size,&iflags);CHKERRQ(ierr); 162 for (i=0; i<nto; i++) iflags[toranks[i]] = 1; 163 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&nrecvs);CHKERRQ(ierr); 164 ierr = PetscFree(iflags);CHKERRQ(ierr); 165 166 ierr = PetscCommDuplicate(comm,&comm,&tag);CHKERRQ(ierr); 167 ierr = MPI_Type_size(dtype,&unitbytes);CHKERRQ(ierr); 168 ierr = PetscMalloc(nrecvs*count*unitbytes,&fdata);CHKERRQ(ierr); 169 tdata = (char*)todata; 170 ierr = PetscMalloc2(nto+nrecvs,&reqs,nto+nrecvs,&statuses);CHKERRQ(ierr); 171 sendreqs = reqs + nrecvs; 172 for (i=0; i<nrecvs; i++) { 173 ierr = MPI_Irecv((void*)(fdata+count*unitbytes*i),count,dtype,MPI_ANY_SOURCE,tag,comm,reqs+i);CHKERRQ(ierr); 174 } 175 for (i=0; i<nto; i++) { 176 ierr = MPI_Isend((void*)(tdata+count*unitbytes*i),count,dtype,toranks[i],tag,comm,sendreqs+i);CHKERRQ(ierr); 177 } 178 ierr = MPI_Waitall(nto+nrecvs,reqs,statuses);CHKERRQ(ierr); 179 ierr = PetscMalloc1(nrecvs,&franks);CHKERRQ(ierr); 180 for (i=0; i<nrecvs; i++) franks[i] = statuses[i].MPI_SOURCE; 181 ierr = PetscFree2(reqs,statuses);CHKERRQ(ierr); 182 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 183 184 *nfrom = nrecvs; 185 *fromranks = franks; 186 *(void**)fromdata = fdata; 187 PetscFunctionReturn(0); 188 } 189 190 #if defined(PETSC_HAVE_MPI_REDUCE_SCATTER_BLOCK) 191 #undef __FUNCT__ 192 #define __FUNCT__ "PetscCommBuildTwoSided_RedScatter" 193 static PetscErrorCode PetscCommBuildTwoSided_RedScatter(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata,PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata) 194 { 195 PetscErrorCode ierr; 196 PetscMPIInt size,*iflags,nrecvs,tag,unitbytes,*franks,i; 197 char *tdata,*fdata; 198 MPI_Request *reqs,*sendreqs; 199 MPI_Status *statuses; 200 201 PetscFunctionBegin; 202 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 203 ierr = PetscMalloc1(size,&iflags);CHKERRQ(ierr); 204 ierr = PetscMemzero(iflags,size*sizeof(*iflags));CHKERRQ(ierr); 205 for (i=0; i<nto; i++) iflags[toranks[i]] = 1; 206 ierr = MPI_Reduce_scatter_block(iflags,&nrecvs,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 207 ierr = PetscFree(iflags);CHKERRQ(ierr); 208 209 ierr = PetscCommDuplicate(comm,&comm,&tag);CHKERRQ(ierr); 210 ierr = MPI_Type_size(dtype,&unitbytes);CHKERRQ(ierr); 211 ierr = PetscMalloc(nrecvs*count*unitbytes,&fdata);CHKERRQ(ierr); 212 tdata = (char*)todata; 213 ierr = PetscMalloc2(nto+nrecvs,&reqs,nto+nrecvs,&statuses);CHKERRQ(ierr); 214 sendreqs = reqs + nrecvs; 215 for (i=0; i<nrecvs; i++) { 216 ierr = MPI_Irecv((void*)(fdata+count*unitbytes*i),count,dtype,MPI_ANY_SOURCE,tag,comm,reqs+i);CHKERRQ(ierr); 217 } 218 for (i=0; i<nto; i++) { 219 ierr = MPI_Isend((void*)(tdata+count*unitbytes*i),count,dtype,toranks[i],tag,comm,sendreqs+i);CHKERRQ(ierr); 220 } 221 ierr = MPI_Waitall(nto+nrecvs,reqs,statuses);CHKERRQ(ierr); 222 ierr = PetscMalloc1(nrecvs,&franks);CHKERRQ(ierr); 223 for (i=0; i<nrecvs; i++) franks[i] = statuses[i].MPI_SOURCE; 224 ierr = PetscFree2(reqs,statuses);CHKERRQ(ierr); 225 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 226 227 *nfrom = nrecvs; 228 *fromranks = franks; 229 *(void**)fromdata = fdata; 230 PetscFunctionReturn(0); 231 } 232 #endif 233 234 #undef __FUNCT__ 235 #define __FUNCT__ "PetscCommBuildTwoSided" 236 /*@C 237 PetscCommBuildTwoSided - discovers communicating ranks given one-sided information, moving constant-sized data in the process (often message lengths) 238 239 Collective on MPI_Comm 240 241 Input Arguments: 242 + comm - communicator 243 . count - number of entries to send/receive (must match on all ranks) 244 . dtype - datatype to send/receive from each rank (must match on all ranks) 245 . nto - number of ranks to send data to 246 . toranks - ranks to send to (array of length nto) 247 - todata - data to send to each rank (packed) 248 249 Output Arguments: 250 + nfrom - number of ranks receiving messages from 251 . fromranks - ranks receiving messages from (length nfrom; caller should PetscFree()) 252 - fromdata - packed data from each rank, each with count entries of type dtype (length nfrom, caller responsible for PetscFree()) 253 254 Level: developer 255 256 Options Database Keys: 257 . -build_twosided <allreduce|ibarrier|redscatter> - algorithm to set up two-sided communication 258 259 Notes: 260 This memory-scalable interface is an alternative to calling PetscGatherNumberOfMessages() and 261 PetscGatherMessageLengths(), possibly with a subsequent round of communication to send other constant-size data. 262 263 Basic data types as well as contiguous types are supported, but non-contiguous (e.g., strided) types are not. 264 265 References: 266 The MPI_Ibarrier implementation uses the algorithm in 267 Hoefler, Siebert and Lumsdaine, Scalable communication protocols for dynamic sparse data exchange, 2010. 268 269 .seealso: PetscGatherNumberOfMessages(), PetscGatherMessageLengths() 270 @*/ 271 PetscErrorCode PetscCommBuildTwoSided(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata,PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata) 272 { 273 PetscErrorCode ierr; 274 PetscBuildTwoSidedType buildtype = PETSC_BUILDTWOSIDED_NOTSET; 275 276 PetscFunctionBegin; 277 ierr = PetscSysInitializePackage();CHKERRQ(ierr); 278 ierr = PetscLogEventBegin(PETSC_BuildTwoSided,0,0,0,0);CHKERRQ(ierr); 279 ierr = PetscCommBuildTwoSidedGetType(comm,&buildtype);CHKERRQ(ierr); 280 switch (buildtype) { 281 case PETSC_BUILDTWOSIDED_IBARRIER: 282 #if defined(PETSC_HAVE_MPI_IBARRIER) 283 ierr = PetscCommBuildTwoSided_Ibarrier(comm,count,dtype,nto,toranks,todata,nfrom,fromranks,fromdata);CHKERRQ(ierr); 284 #else 285 SETERRQ(comm,PETSC_ERR_PLIB,"MPI implementation does not provide MPI_Ibarrier (part of MPI-3)"); 286 #endif 287 break; 288 case PETSC_BUILDTWOSIDED_ALLREDUCE: 289 ierr = PetscCommBuildTwoSided_Allreduce(comm,count,dtype,nto,toranks,todata,nfrom,fromranks,fromdata);CHKERRQ(ierr); 290 break; 291 case PETSC_BUILDTWOSIDED_REDSCATTER: 292 #if defined(PETSC_HAVE_MPI_REDUCE_SCATTER_BLOCK) 293 ierr = PetscCommBuildTwoSided_RedScatter(comm,count,dtype,nto,toranks,todata,nfrom,fromranks,fromdata);CHKERRQ(ierr); 294 #else 295 SETERRQ(comm,PETSC_ERR_PLIB,"MPI implementation does not provide MPI_Reduce_scatter_block (part of MPI-2.2)"); 296 #endif 297 break; 298 default: SETERRQ(comm,PETSC_ERR_PLIB,"Unknown method for building two-sided communication"); 299 } 300 ierr = PetscLogEventEnd(PETSC_BuildTwoSided,0,0,0,0);CHKERRQ(ierr); 301 PetscFunctionReturn(0); 302 } 303