1 #include <petscsys.h> /*I "petscsys.h" I*/ 2 3 const char *const PetscBuildTwoSidedTypes[] = { 4 "ALLREDUCE", 5 "IBARRIER", 6 "PetscBuildTwoSidedType", 7 "PETSC_BUILDTWOSIDED_", 8 0 9 }; 10 11 static PetscBuildTwoSidedType _twosided_type = PETSC_BUILDTWOSIDED_NOTSET; 12 13 #undef __FUNCT__ 14 #define __FUNCT__ "PetscCommBuildTwoSidedSetType" 15 /*@ 16 PetscCommBuildTwoSidedSetType - set algorithm to use when building two-sided communication 17 18 Logically Collective 19 20 Input Arguments: 21 + comm - PETSC_COMM_WORLD 22 - twosided - algorithm to use in subsequent calls to PetscCommBuildTwoSided() 23 24 Level: developer 25 26 Note: 27 This option is currently global, but could be made per-communicator. 28 29 .seealso: PetscCommBuildTwoSided(), PetscCommBuildTwoSidedGetType() 30 @*/ 31 PetscErrorCode PetscCommBuildTwoSidedSetType(MPI_Comm comm,PetscBuildTwoSidedType twosided) 32 { 33 PetscFunctionBegin; 34 #if defined(PETSC_USE_DEBUG) 35 { /* We don't have a PetscObject so can't use PetscValidLogicalCollectiveEnum */ 36 PetscMPIInt ierr; 37 PetscMPIInt b1[2],b2[2]; 38 b1[0] = -(PetscMPIInt)twosided; 39 b1[1] = (PetscMPIInt)twosided; 40 ierr = MPI_Allreduce(b1,b2,2,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr); 41 if (-b2[0] != b2[1]) SETERRQ(comm,PETSC_ERR_ARG_WRONG,"Enum value must be same on all processes"); 42 } 43 #endif 44 _twosided_type = twosided; 45 PetscFunctionReturn(0); 46 } 47 48 #undef __FUNCT__ 49 #define __FUNCT__ "PetscCommBuildTwoSidedGetType" 50 /*@ 51 PetscCommBuildTwoSidedGetType - set algorithm to use when building two-sided communication 52 53 Logically Collective 54 55 Output Arguments: 56 + comm - communicator on which to query algorithm 57 - twosided - algorithm to use for PetscCommBuildTwoSided() 58 59 Level: developer 60 61 .seealso: PetscCommBuildTwoSided(), PetscCommBuildTwoSidedSetType() 62 @*/ 63 PetscErrorCode PetscCommBuildTwoSidedGetType(MPI_Comm comm,PetscBuildTwoSidedType *twosided) 64 { 65 PetscErrorCode ierr; 66 67 PetscFunctionBegin; 68 *twosided = PETSC_BUILDTWOSIDED_NOTSET; 69 if (_twosided_type == PETSC_BUILDTWOSIDED_NOTSET) { 70 #if defined(PETSC_HAVE_MPI_IBARRIER) 71 # if defined(PETSC_HAVE_MPICH_CH3_SOCK) && !defined(PETSC_HAVE_MPICH_CH3_SOCK_FIXED_NBC_PROGRESS) 72 /* Deadlock in Ibarrier: http://trac.mpich.org/projects/mpich/ticket/1785 */ 73 _twosided_type = PETSC_BUILDTWOSIDED_ALLREDUCE; 74 # else 75 _twosided_type = PETSC_BUILDTWOSIDED_IBARRIER; 76 # endif 77 #else 78 _twosided_type = PETSC_BUILDTWOSIDED_ALLREDUCE; 79 #endif 80 ierr = PetscOptionsGetEnum(NULL,"-build_twosided",PetscBuildTwoSidedTypes,(PetscEnum*)&_twosided_type,NULL);CHKERRQ(ierr); 81 } 82 *twosided = _twosided_type; 83 PetscFunctionReturn(0); 84 } 85 86 #if defined(PETSC_HAVE_MPI_IBARRIER) 87 /* Segmented (extendable) array implementation */ 88 typedef struct _SegArray *SegArray; 89 struct _SegArray { 90 PetscInt unitbytes; 91 PetscInt alloc; 92 PetscInt used; 93 PetscInt tailused; 94 SegArray tail; 95 union { /* Dummy types to ensure alignment */ 96 PetscReal dummy_real; 97 PetscInt dummy_int; 98 char array[1]; 99 } u; 100 }; 101 102 #undef __FUNCT__ 103 #define __FUNCT__ "SegArrayCreate" 104 static PetscErrorCode SegArrayCreate(PetscInt unitbytes,PetscInt expected,SegArray *seg) 105 { 106 PetscErrorCode ierr; 107 108 PetscFunctionBegin; 109 ierr = PetscMalloc(offsetof(struct _SegArray,u)+expected*unitbytes,seg);CHKERRQ(ierr); 110 ierr = PetscMemzero(*seg,offsetof(struct _SegArray,u));CHKERRQ(ierr); 111 112 (*seg)->unitbytes = unitbytes; 113 (*seg)->alloc = expected; 114 PetscFunctionReturn(0); 115 } 116 117 #undef __FUNCT__ 118 #define __FUNCT__ "SegArrayAlloc_Private" 119 static PetscErrorCode SegArrayAlloc_Private(SegArray *seg,PetscInt count) 120 { 121 PetscErrorCode ierr; 122 SegArray newseg,s; 123 PetscInt alloc; 124 125 PetscFunctionBegin; 126 s = *seg; 127 /* Grow at least fast enough to hold next item, like Fibonacci otherwise (up to 1MB chunks) */ 128 alloc = PetscMax(s->used+count,PetscMin(1000000/s->unitbytes+1,s->alloc+s->tailused)); 129 ierr = PetscMalloc(offsetof(struct _SegArray,u)+alloc*s->unitbytes,&newseg);CHKERRQ(ierr); 130 ierr = PetscMemzero(newseg,offsetof(struct _SegArray,u));CHKERRQ(ierr); 131 132 newseg->unitbytes = s->unitbytes; 133 newseg->tailused = s->used + s->tailused; 134 newseg->tail = s; 135 newseg->alloc = alloc; 136 *seg = newseg; 137 PetscFunctionReturn(0); 138 } 139 140 #undef __FUNCT__ 141 #define __FUNCT__ "SegArrayGet" 142 static PetscErrorCode SegArrayGet(SegArray *seg,PetscInt count,void *array) 143 { 144 PetscErrorCode ierr; 145 SegArray s; 146 147 PetscFunctionBegin; 148 s = *seg; 149 if (PetscUnlikely(s->used + count > s->alloc)) {ierr = SegArrayAlloc_Private(seg,count);CHKERRQ(ierr);} 150 s = *seg; 151 *(char**)array = &s->u.array[s->used*s->unitbytes]; 152 s->used += count; 153 PetscFunctionReturn(0); 154 } 155 156 #undef __FUNCT__ 157 #define __FUNCT__ "SegArrayDestroy" 158 static PetscErrorCode SegArrayDestroy(SegArray *seg) 159 { 160 PetscErrorCode ierr; 161 SegArray s; 162 163 PetscFunctionBegin; 164 for (s=*seg; s;) { 165 SegArray tail = s->tail; 166 ierr = PetscFree(s);CHKERRQ(ierr); 167 s = tail; 168 } 169 *seg = NULL; 170 PetscFunctionReturn(0); 171 } 172 173 #undef __FUNCT__ 174 #define __FUNCT__ "SegArrayExtract" 175 /* Extracts contiguous data and resets segarray */ 176 static PetscErrorCode SegArrayExtract(SegArray *seg,void *contiguous) 177 { 178 PetscErrorCode ierr; 179 PetscInt unitbytes; 180 SegArray s,t; 181 char *contig,*ptr; 182 183 PetscFunctionBegin; 184 s = *seg; 185 186 unitbytes = s->unitbytes; 187 188 ierr = PetscMalloc((s->used+s->tailused)*unitbytes,&contig);CHKERRQ(ierr); 189 ptr = contig + s->tailused*unitbytes; 190 ierr = PetscMemcpy(ptr,s->u.array,s->used*unitbytes);CHKERRQ(ierr); 191 for (t=s->tail; t;) { 192 SegArray tail = t->tail; 193 ptr -= t->used*unitbytes; 194 ierr = PetscMemcpy(ptr,t->u.array,t->used*unitbytes);CHKERRQ(ierr); 195 ierr = PetscFree(t);CHKERRQ(ierr); 196 t = tail; 197 } 198 if (ptr != contig) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Tail count does not match"); 199 s->tailused = 0; 200 s->tail = NULL; 201 *(char**)contiguous = contig; 202 PetscFunctionReturn(0); 203 } 204 205 #undef __FUNCT__ 206 #define __FUNCT__ "PetscCommBuildTwoSided_Ibarrier" 207 static PetscErrorCode PetscCommBuildTwoSided_Ibarrier(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscInt nto,const PetscMPIInt *toranks,const void *todata,PetscInt *nfrom,PetscMPIInt **fromranks,void *fromdata) 208 { 209 PetscErrorCode ierr; 210 PetscMPIInt nrecvs,tag,unitbytes,done; 211 PetscInt i; 212 char *tdata; 213 MPI_Request *sendreqs,barrier; 214 SegArray segrank,segdata; 215 216 PetscFunctionBegin; 217 ierr = PetscCommGetNewTag(comm,&tag);CHKERRQ(ierr); 218 ierr = MPI_Type_size(dtype,&unitbytes);CHKERRQ(ierr); 219 tdata = (char*)todata; 220 ierr = PetscMalloc(nto*sizeof(MPI_Request),&sendreqs);CHKERRQ(ierr); 221 for (i=0; i<nto; i++) { 222 ierr = MPI_Issend((void*)(tdata+count*unitbytes*i),count,dtype,toranks[i],tag,comm,sendreqs+i);CHKERRQ(ierr); 223 } 224 ierr = SegArrayCreate(sizeof(PetscMPIInt),4,&segrank);CHKERRQ(ierr); 225 ierr = SegArrayCreate(unitbytes,4*count,&segdata);CHKERRQ(ierr); 226 227 nrecvs = 0; 228 barrier = MPI_REQUEST_NULL; 229 for (done=0; !done; ) { 230 PetscMPIInt flag; 231 MPI_Status status; 232 ierr = MPI_Iprobe(MPI_ANY_SOURCE,tag,comm,&flag,&status);CHKERRQ(ierr); 233 if (flag) { /* incoming message */ 234 PetscMPIInt *recvrank; 235 void *buf; 236 ierr = SegArrayGet(&segrank,1,&recvrank);CHKERRQ(ierr); 237 ierr = SegArrayGet(&segdata,count,&buf);CHKERRQ(ierr); 238 *recvrank = status.MPI_SOURCE; 239 ierr = MPI_Recv(buf,count,dtype,status.MPI_SOURCE,tag,comm,MPI_STATUS_IGNORE);CHKERRQ(ierr); 240 nrecvs++; 241 } 242 if (barrier == MPI_REQUEST_NULL) { 243 PetscMPIInt sent,nsends; 244 ierr = PetscMPIIntCast(nto,&nsends);CHKERRQ(ierr); 245 ierr = MPI_Testall(nsends,sendreqs,&sent,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 246 if (sent) { 247 ierr = MPI_Ibarrier(comm,&barrier);CHKERRQ(ierr); 248 ierr = PetscFree(sendreqs);CHKERRQ(ierr); 249 } 250 } else { 251 ierr = MPI_Test(&barrier,&done,MPI_STATUS_IGNORE);CHKERRQ(ierr); 252 } 253 } 254 *nfrom = nrecvs; 255 ierr = SegArrayExtract(&segrank,fromranks);CHKERRQ(ierr); 256 ierr = SegArrayDestroy(&segrank);CHKERRQ(ierr); 257 ierr = SegArrayExtract(&segdata,fromdata);CHKERRQ(ierr); 258 ierr = SegArrayDestroy(&segdata);CHKERRQ(ierr); 259 PetscFunctionReturn(0); 260 } 261 #endif 262 263 #undef __FUNCT__ 264 #define __FUNCT__ "PetscCommBuildTwoSided_Allreduce" 265 static PetscErrorCode PetscCommBuildTwoSided_Allreduce(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscInt nto,const PetscMPIInt *toranks,const void *todata,PetscInt *nfrom,PetscMPIInt **fromranks,void *fromdata) 266 { 267 PetscErrorCode ierr; 268 PetscMPIInt size,*iflags,nrecvs,tag,unitbytes,*franks; 269 PetscInt i; 270 char *tdata,*fdata; 271 MPI_Request *reqs,*sendreqs; 272 MPI_Status *statuses; 273 274 PetscFunctionBegin; 275 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 276 ierr = PetscMalloc(size*sizeof(*iflags),&iflags);CHKERRQ(ierr); 277 ierr = PetscMemzero(iflags,size*sizeof(*iflags));CHKERRQ(ierr); 278 for (i=0; i<nto; i++) iflags[toranks[i]] = 1; 279 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&nrecvs);CHKERRQ(ierr); 280 ierr = PetscFree(iflags);CHKERRQ(ierr); 281 282 ierr = PetscCommGetNewTag(comm,&tag);CHKERRQ(ierr); 283 ierr = MPI_Type_size(dtype,&unitbytes);CHKERRQ(ierr); 284 ierr = PetscMalloc(nrecvs*count*unitbytes,&fdata);CHKERRQ(ierr); 285 tdata = (char*)todata; 286 ierr = PetscMalloc2(nto+nrecvs,MPI_Request,&reqs,nto+nrecvs,MPI_Status,&statuses);CHKERRQ(ierr); 287 sendreqs = reqs + nrecvs; 288 for (i=0; i<nrecvs; i++) { 289 ierr = MPI_Irecv((void*)(fdata+count*unitbytes*i),count,dtype,MPI_ANY_SOURCE,tag,comm,reqs+i);CHKERRQ(ierr); 290 } 291 for (i=0; i<nto; i++) { 292 ierr = MPI_Isend((void*)(tdata+count*unitbytes*i),count,dtype,toranks[i],tag,comm,sendreqs+i);CHKERRQ(ierr); 293 } 294 ierr = MPI_Waitall(nto+nrecvs,reqs,statuses);CHKERRQ(ierr); 295 ierr = PetscMalloc(nrecvs*sizeof(PetscMPIInt),&franks);CHKERRQ(ierr); 296 for (i=0; i<nrecvs; i++) franks[i] = statuses[i].MPI_SOURCE; 297 ierr = PetscFree2(reqs,statuses);CHKERRQ(ierr); 298 299 *nfrom = nrecvs; 300 *fromranks = franks; 301 *(void**)fromdata = fdata; 302 PetscFunctionReturn(0); 303 } 304 305 #undef __FUNCT__ 306 #define __FUNCT__ "PetscCommBuildTwoSided" 307 /*@C 308 PetscCommBuildTwoSided - discovers communicating ranks given one-sided information, moving constant-sized data in the process (often message lengths) 309 310 Collective on MPI_Comm 311 312 Input Arguments: 313 + comm - communicator 314 . count - number of entries to send/receive (must match on all ranks) 315 . dtype - datatype to send/receive from each rank (must match on all ranks) 316 . nto - number of ranks to send data to 317 . toranks - ranks to send to (array of length nto) 318 - todata - data to send to each rank (packed) 319 320 Output Arguments: 321 + nfrom - number of ranks receiving messages from 322 . fromranks - ranks receiving messages from (length nfrom; caller should PetscFree()) 323 - fromdata - packed data from each rank, each with count entries of type dtype (length nfrom, caller responsible for PetscFree()) 324 325 Level: developer 326 327 Notes: 328 This memory-scalable interface is an alternative to calling PetscGatherNumberOfMessages() and 329 PetscGatherMessageLengths(), possibly with a subsequent round of communication to send other constant-size data. 330 331 Basic data types as well as contiguous types are supported, but non-contiguous (e.g., strided) types are not. 332 333 References: 334 The MPI_Ibarrier implementation uses the algorithm in 335 Hoefler, Siebert and Lumsdaine, Scalable communication protocols for dynamic sparse data exchange, 2010. 336 337 .seealso: PetscGatherNumberOfMessages(), PetscGatherMessageLengths() 338 @*/ 339 PetscErrorCode PetscCommBuildTwoSided(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscInt nto,const PetscMPIInt *toranks,const void *todata,PetscInt *nfrom,PetscMPIInt **fromranks,void *fromdata) 340 { 341 PetscErrorCode ierr; 342 PetscBuildTwoSidedType buildtype = PETSC_BUILDTWOSIDED_NOTSET; 343 344 PetscFunctionBegin; 345 ierr = PetscCommBuildTwoSidedGetType(comm,&buildtype);CHKERRQ(ierr); 346 switch (buildtype) { 347 case PETSC_BUILDTWOSIDED_IBARRIER: 348 #if defined(PETSC_HAVE_MPI_IBARRIER) 349 ierr = PetscCommBuildTwoSided_Ibarrier(comm,count,dtype,nto,toranks,todata,nfrom,fromranks,fromdata);CHKERRQ(ierr); 350 #else 351 SETERRQ(comm,PETSC_ERR_PLIB,"MPI implementation does not provide MPI_Ibarrier (part of MPI-3)"); 352 #endif 353 break; 354 case PETSC_BUILDTWOSIDED_ALLREDUCE: 355 ierr = PetscCommBuildTwoSided_Allreduce(comm,count,dtype,nto,toranks,todata,nfrom,fromranks,fromdata);CHKERRQ(ierr); 356 break; 357 default: SETERRQ(comm,PETSC_ERR_PLIB,"Unknown method for building two-sided communication"); 358 } 359 PetscFunctionReturn(0); 360 } 361