1 #include <petscsys.h> /*I "petscsys.h" I*/ 2 3 PetscLogEvent PETSC_BuildTwoSided; 4 5 const char *const PetscBuildTwoSidedTypes[] = { 6 "ALLREDUCE", 7 "IBARRIER", 8 "PetscBuildTwoSidedType", 9 "PETSC_BUILDTWOSIDED_", 10 0 11 }; 12 13 static PetscBuildTwoSidedType _twosided_type = PETSC_BUILDTWOSIDED_NOTSET; 14 15 #undef __FUNCT__ 16 #define __FUNCT__ "PetscCommBuildTwoSidedSetType" 17 /*@ 18 PetscCommBuildTwoSidedSetType - set algorithm to use when building two-sided communication 19 20 Logically Collective 21 22 Input Arguments: 23 + comm - PETSC_COMM_WORLD 24 - twosided - algorithm to use in subsequent calls to PetscCommBuildTwoSided() 25 26 Level: developer 27 28 Note: 29 This option is currently global, but could be made per-communicator. 30 31 .seealso: PetscCommBuildTwoSided(), PetscCommBuildTwoSidedGetType() 32 @*/ 33 PetscErrorCode PetscCommBuildTwoSidedSetType(MPI_Comm comm,PetscBuildTwoSidedType twosided) 34 { 35 PetscFunctionBegin; 36 #if defined(PETSC_USE_DEBUG) 37 { /* We don't have a PetscObject so can't use PetscValidLogicalCollectiveEnum */ 38 PetscMPIInt ierr; 39 PetscMPIInt b1[2],b2[2]; 40 b1[0] = -(PetscMPIInt)twosided; 41 b1[1] = (PetscMPIInt)twosided; 42 ierr = MPI_Allreduce(b1,b2,2,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr); 43 if (-b2[0] != b2[1]) SETERRQ(comm,PETSC_ERR_ARG_WRONG,"Enum value must be same on all processes"); 44 } 45 #endif 46 _twosided_type = twosided; 47 PetscFunctionReturn(0); 48 } 49 50 #undef __FUNCT__ 51 #define __FUNCT__ "PetscCommBuildTwoSidedGetType" 52 /*@ 53 PetscCommBuildTwoSidedGetType - set algorithm to use when building two-sided communication 54 55 Logically Collective 56 57 Output Arguments: 58 + comm - communicator on which to query algorithm 59 - twosided - algorithm to use for PetscCommBuildTwoSided() 60 61 Level: developer 62 63 .seealso: PetscCommBuildTwoSided(), PetscCommBuildTwoSidedSetType() 64 @*/ 65 PetscErrorCode PetscCommBuildTwoSidedGetType(MPI_Comm comm,PetscBuildTwoSidedType *twosided) 66 { 67 PetscErrorCode ierr; 68 69 PetscFunctionBegin; 70 *twosided = PETSC_BUILDTWOSIDED_NOTSET; 71 if (_twosided_type == PETSC_BUILDTWOSIDED_NOTSET) { 72 #if defined(PETSC_HAVE_MPI_IBARRIER) 73 # if defined(PETSC_HAVE_MPICH_CH3_SOCK) && !defined(PETSC_HAVE_MPICH_CH3_SOCK_FIXED_NBC_PROGRESS) 74 /* Deadlock in Ibarrier: http://trac.mpich.org/projects/mpich/ticket/1785 */ 75 _twosided_type = PETSC_BUILDTWOSIDED_ALLREDUCE; 76 # else 77 _twosided_type = PETSC_BUILDTWOSIDED_IBARRIER; 78 # endif 79 #else 80 _twosided_type = PETSC_BUILDTWOSIDED_ALLREDUCE; 81 #endif 82 ierr = PetscOptionsGetEnum(NULL,"-build_twosided",PetscBuildTwoSidedTypes,(PetscEnum*)&_twosided_type,NULL);CHKERRQ(ierr); 83 } 84 *twosided = _twosided_type; 85 PetscFunctionReturn(0); 86 } 87 88 #if defined(PETSC_HAVE_MPI_IBARRIER) 89 90 #undef __FUNCT__ 91 #define __FUNCT__ "PetscCommBuildTwoSided_Ibarrier" 92 static PetscErrorCode PetscCommBuildTwoSided_Ibarrier(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscInt nto,const PetscMPIInt *toranks,const void *todata,PetscInt *nfrom,PetscMPIInt **fromranks,void *fromdata) 93 { 94 PetscErrorCode ierr; 95 PetscMPIInt nrecvs,tag,unitbytes,done; 96 PetscInt i; 97 char *tdata; 98 MPI_Request *sendreqs,barrier; 99 PetscSegBuffer segrank,segdata; 100 101 PetscFunctionBegin; 102 ierr = PetscCommGetNewTag(comm,&tag);CHKERRQ(ierr); 103 ierr = MPI_Type_size(dtype,&unitbytes);CHKERRQ(ierr); 104 tdata = (char*)todata; 105 ierr = PetscMalloc1(nto,&sendreqs);CHKERRQ(ierr); 106 for (i=0; i<nto; i++) { 107 ierr = MPI_Issend((void*)(tdata+count*unitbytes*i),count,dtype,toranks[i],tag,comm,sendreqs+i);CHKERRQ(ierr); 108 } 109 ierr = PetscSegBufferCreate(sizeof(PetscMPIInt),4,&segrank);CHKERRQ(ierr); 110 ierr = PetscSegBufferCreate(unitbytes,4*count,&segdata);CHKERRQ(ierr); 111 112 nrecvs = 0; 113 barrier = MPI_REQUEST_NULL; 114 for (done=0; !done; ) { 115 PetscMPIInt flag; 116 MPI_Status status; 117 ierr = MPI_Iprobe(MPI_ANY_SOURCE,tag,comm,&flag,&status);CHKERRQ(ierr); 118 if (flag) { /* incoming message */ 119 PetscMPIInt *recvrank; 120 void *buf; 121 ierr = PetscSegBufferGet(segrank,1,&recvrank);CHKERRQ(ierr); 122 ierr = PetscSegBufferGet(segdata,count,&buf);CHKERRQ(ierr); 123 *recvrank = status.MPI_SOURCE; 124 ierr = MPI_Recv(buf,count,dtype,status.MPI_SOURCE,tag,comm,MPI_STATUS_IGNORE);CHKERRQ(ierr); 125 nrecvs++; 126 } 127 if (barrier == MPI_REQUEST_NULL) { 128 PetscMPIInt sent,nsends; 129 ierr = PetscMPIIntCast(nto,&nsends);CHKERRQ(ierr); 130 ierr = MPI_Testall(nsends,sendreqs,&sent,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 131 if (sent) { 132 ierr = MPI_Ibarrier(comm,&barrier);CHKERRQ(ierr); 133 ierr = PetscFree(sendreqs);CHKERRQ(ierr); 134 } 135 } else { 136 ierr = MPI_Test(&barrier,&done,MPI_STATUS_IGNORE);CHKERRQ(ierr); 137 } 138 } 139 *nfrom = nrecvs; 140 ierr = PetscSegBufferExtractAlloc(segrank,fromranks);CHKERRQ(ierr); 141 ierr = PetscSegBufferDestroy(&segrank);CHKERRQ(ierr); 142 ierr = PetscSegBufferExtractAlloc(segdata,fromdata);CHKERRQ(ierr); 143 ierr = PetscSegBufferDestroy(&segdata);CHKERRQ(ierr); 144 PetscFunctionReturn(0); 145 } 146 #endif 147 148 #undef __FUNCT__ 149 #define __FUNCT__ "PetscCommBuildTwoSided_Allreduce" 150 static PetscErrorCode PetscCommBuildTwoSided_Allreduce(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscInt nto,const PetscMPIInt *toranks,const void *todata,PetscInt *nfrom,PetscMPIInt **fromranks,void *fromdata) 151 { 152 PetscErrorCode ierr; 153 PetscMPIInt size,*iflags,nrecvs,tag,unitbytes,*franks; 154 PetscInt i; 155 char *tdata,*fdata; 156 MPI_Request *reqs,*sendreqs; 157 MPI_Status *statuses; 158 159 PetscFunctionBegin; 160 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 161 ierr = PetscCalloc1(size,&iflags);CHKERRQ(ierr); 162 for (i=0; i<nto; i++) iflags[toranks[i]] = 1; 163 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&nrecvs);CHKERRQ(ierr); 164 ierr = PetscFree(iflags);CHKERRQ(ierr); 165 166 ierr = PetscCommGetNewTag(comm,&tag);CHKERRQ(ierr); 167 ierr = MPI_Type_size(dtype,&unitbytes);CHKERRQ(ierr); 168 ierr = PetscMalloc(nrecvs*count*unitbytes,&fdata);CHKERRQ(ierr); 169 tdata = (char*)todata; 170 ierr = PetscMalloc2(nto+nrecvs,&reqs,nto+nrecvs,&statuses);CHKERRQ(ierr); 171 sendreqs = reqs + nrecvs; 172 for (i=0; i<nrecvs; i++) { 173 ierr = MPI_Irecv((void*)(fdata+count*unitbytes*i),count,dtype,MPI_ANY_SOURCE,tag,comm,reqs+i);CHKERRQ(ierr); 174 } 175 for (i=0; i<nto; i++) { 176 ierr = MPI_Isend((void*)(tdata+count*unitbytes*i),count,dtype,toranks[i],tag,comm,sendreqs+i);CHKERRQ(ierr); 177 } 178 ierr = MPI_Waitall(nto+nrecvs,reqs,statuses);CHKERRQ(ierr); 179 ierr = PetscMalloc1(nrecvs,&franks);CHKERRQ(ierr); 180 for (i=0; i<nrecvs; i++) franks[i] = statuses[i].MPI_SOURCE; 181 ierr = PetscFree2(reqs,statuses);CHKERRQ(ierr); 182 183 *nfrom = nrecvs; 184 *fromranks = franks; 185 *(void**)fromdata = fdata; 186 PetscFunctionReturn(0); 187 } 188 189 #undef __FUNCT__ 190 #define __FUNCT__ "PetscCommBuildTwoSided" 191 /*@C 192 PetscCommBuildTwoSided - discovers communicating ranks given one-sided information, moving constant-sized data in the process (often message lengths) 193 194 Collective on MPI_Comm 195 196 Input Arguments: 197 + comm - communicator 198 . count - number of entries to send/receive (must match on all ranks) 199 . dtype - datatype to send/receive from each rank (must match on all ranks) 200 . nto - number of ranks to send data to 201 . toranks - ranks to send to (array of length nto) 202 - todata - data to send to each rank (packed) 203 204 Output Arguments: 205 + nfrom - number of ranks receiving messages from 206 . fromranks - ranks receiving messages from (length nfrom; caller should PetscFree()) 207 - fromdata - packed data from each rank, each with count entries of type dtype (length nfrom, caller responsible for PetscFree()) 208 209 Level: developer 210 211 Notes: 212 This memory-scalable interface is an alternative to calling PetscGatherNumberOfMessages() and 213 PetscGatherMessageLengths(), possibly with a subsequent round of communication to send other constant-size data. 214 215 Basic data types as well as contiguous types are supported, but non-contiguous (e.g., strided) types are not. 216 217 References: 218 The MPI_Ibarrier implementation uses the algorithm in 219 Hoefler, Siebert and Lumsdaine, Scalable communication protocols for dynamic sparse data exchange, 2010. 220 221 .seealso: PetscGatherNumberOfMessages(), PetscGatherMessageLengths() 222 @*/ 223 PetscErrorCode PetscCommBuildTwoSided(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscInt nto,const PetscMPIInt *toranks,const void *todata,PetscInt *nfrom,PetscMPIInt **fromranks,void *fromdata) 224 { 225 PetscErrorCode ierr; 226 PetscBuildTwoSidedType buildtype = PETSC_BUILDTWOSIDED_NOTSET; 227 228 PetscFunctionBegin; 229 ierr = PetscSysInitializePackage();CHKERRQ(ierr); 230 ierr = PetscLogEventBegin(PETSC_BuildTwoSided,0,0,0,0);CHKERRQ(ierr); 231 ierr = PetscCommBuildTwoSidedGetType(comm,&buildtype);CHKERRQ(ierr); 232 switch (buildtype) { 233 case PETSC_BUILDTWOSIDED_IBARRIER: 234 #if defined(PETSC_HAVE_MPI_IBARRIER) 235 ierr = PetscCommBuildTwoSided_Ibarrier(comm,count,dtype,nto,toranks,todata,nfrom,fromranks,fromdata);CHKERRQ(ierr); 236 #else 237 SETERRQ(comm,PETSC_ERR_PLIB,"MPI implementation does not provide MPI_Ibarrier (part of MPI-3)"); 238 #endif 239 break; 240 case PETSC_BUILDTWOSIDED_ALLREDUCE: 241 ierr = PetscCommBuildTwoSided_Allreduce(comm,count,dtype,nto,toranks,todata,nfrom,fromranks,fromdata);CHKERRQ(ierr); 242 break; 243 default: SETERRQ(comm,PETSC_ERR_PLIB,"Unknown method for building two-sided communication"); 244 } 245 ierr = PetscLogEventEnd(PETSC_BuildTwoSided,0,0,0,0);CHKERRQ(ierr); 246 PetscFunctionReturn(0); 247 } 248