1 #include <petscsys.h> /*I "petscsys.h" I*/ 2 3 const char *const PetscBuildTwoSidedTypes[] = { 4 "ALLREDUCE", 5 "IBARRIER", 6 "PetscBuildTwoSidedType", 7 "PETSC_BUILDTWOSIDED_", 8 0 9 }; 10 11 static PetscBuildTwoSidedType _twosided_type = PETSC_BUILDTWOSIDED_NOTSET; 12 13 #undef __FUNCT__ 14 #define __FUNCT__ "PetscCommBuildTwoSidedSetType" 15 /*@ 16 PetscCommBuildTwoSidedSetType - set algorithm to use when building two-sided communication 17 18 Logically Collective 19 20 Input Arguments: 21 + comm - PETSC_COMM_WORLD 22 - twosided - algorithm to use in subsequent calls to PetscCommBuildTwoSided() 23 24 Level: developer 25 26 Note: 27 This option is currently global, but could be made per-communicator. 28 29 .seealso: PetscCommBuildTwoSided(), PetscCommBuildTwoSidedGetType() 30 @*/ 31 PetscErrorCode PetscCommBuildTwoSidedSetType(MPI_Comm comm,PetscBuildTwoSidedType twosided) 32 { 33 PetscFunctionBegin; 34 #if defined(PETSC_USE_DEBUG) 35 { /* We don't have a PetscObject so can't use PetscValidLogicalCollectiveEnum */ 36 PetscMPIInt ierr; 37 PetscMPIInt b1[2],b2[2]; 38 b1[0] = -(PetscMPIInt)twosided; 39 b1[1] = (PetscMPIInt)twosided; 40 ierr = MPI_Allreduce(b1,b2,2,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr); 41 if (-b2[0] != b2[1]) SETERRQ(comm,PETSC_ERR_ARG_WRONG,"Enum value must be same on all processes"); 42 } 43 #endif 44 _twosided_type = twosided; 45 PetscFunctionReturn(0); 46 } 47 48 #undef __FUNCT__ 49 #define __FUNCT__ "PetscCommBuildTwoSidedGetType" 50 /*@ 51 PetscCommBuildTwoSidedGetType - set algorithm to use when building two-sided communication 52 53 Logically Collective 54 55 Output Arguments: 56 + comm - communicator on which to query algorithm 57 - twosided - algorithm to use for PetscCommBuildTwoSided() 58 59 Level: developer 60 61 .seealso: PetscCommBuildTwoSided(), PetscCommBuildTwoSidedSetType() 62 @*/ 63 PetscErrorCode PetscCommBuildTwoSidedGetType(MPI_Comm comm,PetscBuildTwoSidedType *twosided) 64 { 65 PetscErrorCode ierr; 66 67 PetscFunctionBegin; 68 *twosided = PETSC_BUILDTWOSIDED_NOTSET; 69 if (_twosided_type == PETSC_BUILDTWOSIDED_NOTSET) { 70 #if defined(PETSC_HAVE_MPI_IBARRIER) 71 # if defined(PETSC_HAVE_MPICH_CH3_SOCK) && !defined(PETSC_HAVE_MPICH_CH3_SOCK_FIXED_NBC_PROGRESS) 72 /* Deadlock in Ibarrier: http://trac.mpich.org/projects/mpich/ticket/1785 */ 73 _twosided_type = PETSC_BUILDTWOSIDED_ALLREDUCE; 74 # else 75 _twosided_type = PETSC_BUILDTWOSIDED_IBARRIER; 76 # endif 77 #else 78 _twosided_type = PETSC_BUILDTWOSIDED_ALLREDUCE; 79 #endif 80 ierr = PetscOptionsGetEnum(NULL,"-build_twosided",PetscBuildTwoSidedTypes,(PetscEnum*)&_twosided_type,NULL);CHKERRQ(ierr); 81 } 82 *twosided = _twosided_type; 83 PetscFunctionReturn(0); 84 } 85 86 #if defined(PETSC_HAVE_MPI_IBARRIER) 87 88 #undef __FUNCT__ 89 #define __FUNCT__ "PetscCommBuildTwoSided_Ibarrier" 90 static PetscErrorCode PetscCommBuildTwoSided_Ibarrier(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscInt nto,const PetscMPIInt *toranks,const void *todata,PetscInt *nfrom,PetscMPIInt **fromranks,void *fromdata) 91 { 92 PetscErrorCode ierr; 93 PetscMPIInt nrecvs,tag,unitbytes,done; 94 PetscInt i; 95 char *tdata; 96 MPI_Request *sendreqs,barrier; 97 PetscSegBuffer segrank,segdata; 98 99 PetscFunctionBegin; 100 ierr = PetscCommGetNewTag(comm,&tag);CHKERRQ(ierr); 101 ierr = MPI_Type_size(dtype,&unitbytes);CHKERRQ(ierr); 102 tdata = (char*)todata; 103 ierr = PetscMalloc1(nto,&sendreqs);CHKERRQ(ierr); 104 for (i=0; i<nto; i++) { 105 ierr = MPI_Issend((void*)(tdata+count*unitbytes*i),count,dtype,toranks[i],tag,comm,sendreqs+i);CHKERRQ(ierr); 106 } 107 ierr = PetscSegBufferCreate(sizeof(PetscMPIInt),4,&segrank);CHKERRQ(ierr); 108 ierr = PetscSegBufferCreate(unitbytes,4*count,&segdata);CHKERRQ(ierr); 109 110 nrecvs = 0; 111 barrier = MPI_REQUEST_NULL; 112 for (done=0; !done; ) { 113 PetscMPIInt flag; 114 MPI_Status status; 115 ierr = MPI_Iprobe(MPI_ANY_SOURCE,tag,comm,&flag,&status);CHKERRQ(ierr); 116 if (flag) { /* incoming message */ 117 PetscMPIInt *recvrank; 118 void *buf; 119 ierr = PetscSegBufferGet(segrank,1,&recvrank);CHKERRQ(ierr); 120 ierr = PetscSegBufferGet(segdata,count,&buf);CHKERRQ(ierr); 121 *recvrank = status.MPI_SOURCE; 122 ierr = MPI_Recv(buf,count,dtype,status.MPI_SOURCE,tag,comm,MPI_STATUS_IGNORE);CHKERRQ(ierr); 123 nrecvs++; 124 } 125 if (barrier == MPI_REQUEST_NULL) { 126 PetscMPIInt sent,nsends; 127 ierr = PetscMPIIntCast(nto,&nsends);CHKERRQ(ierr); 128 ierr = MPI_Testall(nsends,sendreqs,&sent,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 129 if (sent) { 130 ierr = MPI_Ibarrier(comm,&barrier);CHKERRQ(ierr); 131 ierr = PetscFree(sendreqs);CHKERRQ(ierr); 132 } 133 } else { 134 ierr = MPI_Test(&barrier,&done,MPI_STATUS_IGNORE);CHKERRQ(ierr); 135 } 136 } 137 *nfrom = nrecvs; 138 ierr = PetscSegBufferExtractAlloc(segrank,fromranks);CHKERRQ(ierr); 139 ierr = PetscSegBufferDestroy(&segrank);CHKERRQ(ierr); 140 ierr = PetscSegBufferExtractAlloc(segdata,fromdata);CHKERRQ(ierr); 141 ierr = PetscSegBufferDestroy(&segdata);CHKERRQ(ierr); 142 PetscFunctionReturn(0); 143 } 144 #endif 145 146 #undef __FUNCT__ 147 #define __FUNCT__ "PetscCommBuildTwoSided_Allreduce" 148 static PetscErrorCode PetscCommBuildTwoSided_Allreduce(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscInt nto,const PetscMPIInt *toranks,const void *todata,PetscInt *nfrom,PetscMPIInt **fromranks,void *fromdata) 149 { 150 PetscErrorCode ierr; 151 PetscMPIInt size,*iflags,nrecvs,tag,unitbytes,*franks; 152 PetscInt i; 153 char *tdata,*fdata; 154 MPI_Request *reqs,*sendreqs; 155 MPI_Status *statuses; 156 157 PetscFunctionBegin; 158 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 159 ierr = PetscCalloc1(size,&iflags);CHKERRQ(ierr); 160 for (i=0; i<nto; i++) iflags[toranks[i]] = 1; 161 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&nrecvs);CHKERRQ(ierr); 162 ierr = PetscFree(iflags);CHKERRQ(ierr); 163 164 ierr = PetscCommGetNewTag(comm,&tag);CHKERRQ(ierr); 165 ierr = MPI_Type_size(dtype,&unitbytes);CHKERRQ(ierr); 166 ierr = PetscMalloc(nrecvs*count*unitbytes,&fdata);CHKERRQ(ierr); 167 tdata = (char*)todata; 168 ierr = PetscMalloc2(nto+nrecvs,&reqs,nto+nrecvs,&statuses);CHKERRQ(ierr); 169 sendreqs = reqs + nrecvs; 170 for (i=0; i<nrecvs; i++) { 171 ierr = MPI_Irecv((void*)(fdata+count*unitbytes*i),count,dtype,MPI_ANY_SOURCE,tag,comm,reqs+i);CHKERRQ(ierr); 172 } 173 for (i=0; i<nto; i++) { 174 ierr = MPI_Isend((void*)(tdata+count*unitbytes*i),count,dtype,toranks[i],tag,comm,sendreqs+i);CHKERRQ(ierr); 175 } 176 ierr = MPI_Waitall(nto+nrecvs,reqs,statuses);CHKERRQ(ierr); 177 ierr = PetscMalloc1(nrecvs,&franks);CHKERRQ(ierr); 178 for (i=0; i<nrecvs; i++) franks[i] = statuses[i].MPI_SOURCE; 179 ierr = PetscFree2(reqs,statuses);CHKERRQ(ierr); 180 181 *nfrom = nrecvs; 182 *fromranks = franks; 183 *(void**)fromdata = fdata; 184 PetscFunctionReturn(0); 185 } 186 187 #undef __FUNCT__ 188 #define __FUNCT__ "PetscCommBuildTwoSided" 189 /*@C 190 PetscCommBuildTwoSided - discovers communicating ranks given one-sided information, moving constant-sized data in the process (often message lengths) 191 192 Collective on MPI_Comm 193 194 Input Arguments: 195 + comm - communicator 196 . count - number of entries to send/receive (must match on all ranks) 197 . dtype - datatype to send/receive from each rank (must match on all ranks) 198 . nto - number of ranks to send data to 199 . toranks - ranks to send to (array of length nto) 200 - todata - data to send to each rank (packed) 201 202 Output Arguments: 203 + nfrom - number of ranks receiving messages from 204 . fromranks - ranks receiving messages from (length nfrom; caller should PetscFree()) 205 - fromdata - packed data from each rank, each with count entries of type dtype (length nfrom, caller responsible for PetscFree()) 206 207 Level: developer 208 209 Notes: 210 This memory-scalable interface is an alternative to calling PetscGatherNumberOfMessages() and 211 PetscGatherMessageLengths(), possibly with a subsequent round of communication to send other constant-size data. 212 213 Basic data types as well as contiguous types are supported, but non-contiguous (e.g., strided) types are not. 214 215 References: 216 The MPI_Ibarrier implementation uses the algorithm in 217 Hoefler, Siebert and Lumsdaine, Scalable communication protocols for dynamic sparse data exchange, 2010. 218 219 .seealso: PetscGatherNumberOfMessages(), PetscGatherMessageLengths() 220 @*/ 221 PetscErrorCode PetscCommBuildTwoSided(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscInt nto,const PetscMPIInt *toranks,const void *todata,PetscInt *nfrom,PetscMPIInt **fromranks,void *fromdata) 222 { 223 PetscErrorCode ierr; 224 PetscBuildTwoSidedType buildtype = PETSC_BUILDTWOSIDED_NOTSET; 225 226 PetscFunctionBegin; 227 ierr = PetscCommBuildTwoSidedGetType(comm,&buildtype);CHKERRQ(ierr); 228 switch (buildtype) { 229 case PETSC_BUILDTWOSIDED_IBARRIER: 230 #if defined(PETSC_HAVE_MPI_IBARRIER) 231 ierr = PetscCommBuildTwoSided_Ibarrier(comm,count,dtype,nto,toranks,todata,nfrom,fromranks,fromdata);CHKERRQ(ierr); 232 #else 233 SETERRQ(comm,PETSC_ERR_PLIB,"MPI implementation does not provide MPI_Ibarrier (part of MPI-3)"); 234 #endif 235 break; 236 case PETSC_BUILDTWOSIDED_ALLREDUCE: 237 ierr = PetscCommBuildTwoSided_Allreduce(comm,count,dtype,nto,toranks,todata,nfrom,fromranks,fromdata);CHKERRQ(ierr); 238 break; 239 default: SETERRQ(comm,PETSC_ERR_PLIB,"Unknown method for building two-sided communication"); 240 } 241 PetscFunctionReturn(0); 242 } 243