1 #include <petscsys.h> /*I "petscsys.h" I*/ 2 #include <stddef.h> 3 4 const char *const PetscBuildTwoSidedTypes[] = { 5 "ALLREDUCE", 6 "IBARRIER", 7 "PetscBuildTwoSidedType", 8 "PETSC_BUILDTWOSIDED_", 9 0 10 }; 11 12 static PetscBuildTwoSidedType _twosided_type = PETSC_BUILDTWOSIDED_NOTSET; 13 14 #undef __FUNCT__ 15 #define __FUNCT__ "PetscCommBuildTwoSidedSetType" 16 /*@ 17 PetscCommBuildTwoSidedSetType - set algorithm to use when building two-sided communication 18 19 Logically Collective 20 21 Input Arguments: 22 + comm - PETSC_COMM_WORLD 23 - twosided - algorithm to use in subsequent calls to PetscCommBuildTwoSided() 24 25 Level: developer 26 27 Note: 28 This option is currently global, but could be made per-communicator. 29 30 .seealso: PetscCommBuildTwoSided(), PetscCommBuildTwoSidedGetType() 31 @*/ 32 PetscErrorCode PetscCommBuildTwoSidedSetType(MPI_Comm comm,PetscBuildTwoSidedType twosided) 33 { 34 PetscFunctionBegin; 35 #if defined(PETSC_USE_DEBUG) 36 { /* We don't have a PetscObject so can't use PetscValidLogicalCollectiveEnum */ 37 PetscMPIInt ierr; 38 PetscMPIInt b1[2],b2[2]; 39 b1[0] = -(PetscMPIInt)twosided; 40 b1[1] = (PetscMPIInt)twosided; 41 ierr = MPI_Allreduce(b1,b2,2,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr); 42 if (-b2[0] != b2[1]) SETERRQ(comm,PETSC_ERR_ARG_WRONG,"Enum value must be same on all processes"); 43 } 44 #endif 45 _twosided_type = twosided; 46 PetscFunctionReturn(0); 47 } 48 49 #undef __FUNCT__ 50 #define __FUNCT__ "PetscCommBuildTwoSidedGetType" 51 /*@ 52 PetscCommBuildTwoSidedGetType - set algorithm to use when building two-sided communication 53 54 Logically Collective 55 56 Output Arguments: 57 + comm - communicator on which to query algorithm 58 - twosided - algorithm to use for PetscCommBuildTwoSided() 59 60 Level: developer 61 62 .seealso: PetscCommBuildTwoSided(), PetscCommBuildTwoSidedSetType() 63 @*/ 64 PetscErrorCode PetscCommBuildTwoSidedGetType(MPI_Comm comm,PetscBuildTwoSidedType *twosided) 65 { 66 PetscErrorCode ierr; 67 68 PetscFunctionBegin; 69 *twosided = PETSC_BUILDTWOSIDED_NOTSET; 70 if (_twosided_type == PETSC_BUILDTWOSIDED_NOTSET) { 71 #if defined(PETSC_HAVE_MPI_IBARRIER) 72 # if defined(PETSC_HAVE_MPICH_CH3_SOCK) && !defined(PETSC_HAVE_MPICH_CH3_SOCK_FIXED_NBC_PROGRESS) 73 /* Deadlock in Ibarrier: http://trac.mpich.org/projects/mpich/ticket/1785 */ 74 _twosided_type = PETSC_BUILDTWOSIDED_ALLREDUCE; 75 # else 76 _twosided_type = PETSC_BUILDTWOSIDED_IBARRIER; 77 # endif 78 #else 79 _twosided_type = PETSC_BUILDTWOSIDED_ALLREDUCE; 80 #endif 81 ierr = PetscOptionsGetEnum(NULL,"-build_twosided",PetscBuildTwoSidedTypes,(PetscEnum*)&_twosided_type,NULL);CHKERRQ(ierr); 82 } 83 *twosided = _twosided_type; 84 PetscFunctionReturn(0); 85 } 86 87 #if defined(PETSC_HAVE_MPI_IBARRIER) 88 /* Segmented (extendable) array implementation */ 89 typedef struct _SegArray *SegArray; 90 struct _SegArray { 91 PetscInt unitbytes; 92 PetscInt alloc; 93 PetscInt used; 94 PetscInt tailused; 95 SegArray tail; 96 union { /* Dummy types to ensure alignment */ 97 PetscReal dummy_real; 98 PetscInt dummy_int; 99 char array[1]; 100 } u; 101 }; 102 103 #undef __FUNCT__ 104 #define __FUNCT__ "SegArrayCreate" 105 static PetscErrorCode SegArrayCreate(PetscInt unitbytes,PetscInt expected,SegArray *seg) 106 { 107 PetscErrorCode ierr; 108 109 PetscFunctionBegin; 110 ierr = PetscMalloc(offsetof(struct _SegArray,u)+expected*unitbytes,seg);CHKERRQ(ierr); 111 ierr = PetscMemzero(*seg,offsetof(struct _SegArray,u));CHKERRQ(ierr); 112 113 (*seg)->unitbytes = unitbytes; 114 (*seg)->alloc = expected; 115 PetscFunctionReturn(0); 116 } 117 118 #undef __FUNCT__ 119 #define __FUNCT__ "SegArrayAlloc_Private" 120 static PetscErrorCode SegArrayAlloc_Private(SegArray *seg,PetscInt count) 121 { 122 PetscErrorCode ierr; 123 SegArray newseg,s; 124 PetscInt alloc; 125 126 PetscFunctionBegin; 127 s = *seg; 128 /* Grow at least fast enough to hold next item, like Fibonacci otherwise (up to 1MB chunks) */ 129 alloc = PetscMax(s->used+count,PetscMin(1000000/s->unitbytes+1,s->alloc+s->tailused)); 130 ierr = PetscMalloc(offsetof(struct _SegArray,u)+alloc*s->unitbytes,&newseg);CHKERRQ(ierr); 131 ierr = PetscMemzero(newseg,offsetof(struct _SegArray,u));CHKERRQ(ierr); 132 133 newseg->unitbytes = s->unitbytes; 134 newseg->tailused = s->used + s->tailused; 135 newseg->tail = s; 136 newseg->alloc = alloc; 137 *seg = newseg; 138 PetscFunctionReturn(0); 139 } 140 141 #undef __FUNCT__ 142 #define __FUNCT__ "SegArrayGet" 143 static PetscErrorCode SegArrayGet(SegArray *seg,PetscInt count,void *array) 144 { 145 PetscErrorCode ierr; 146 SegArray s; 147 148 PetscFunctionBegin; 149 s = *seg; 150 if (PetscUnlikely(s->used + count > s->alloc)) {ierr = SegArrayAlloc_Private(seg,count);CHKERRQ(ierr);} 151 s = *seg; 152 *(char**)array = &s->u.array[s->used*s->unitbytes]; 153 s->used += count; 154 PetscFunctionReturn(0); 155 } 156 157 #undef __FUNCT__ 158 #define __FUNCT__ "SegArrayDestroy" 159 static PetscErrorCode SegArrayDestroy(SegArray *seg) 160 { 161 PetscErrorCode ierr; 162 SegArray s; 163 164 PetscFunctionBegin; 165 for (s=*seg; s;) { 166 SegArray tail = s->tail; 167 ierr = PetscFree(s);CHKERRQ(ierr); 168 s = tail; 169 } 170 *seg = NULL; 171 PetscFunctionReturn(0); 172 } 173 174 #undef __FUNCT__ 175 #define __FUNCT__ "SegArrayExtract" 176 /* Extracts contiguous data and resets segarray */ 177 static PetscErrorCode SegArrayExtract(SegArray *seg,void *contiguous) 178 { 179 PetscErrorCode ierr; 180 PetscInt unitbytes; 181 SegArray s,t; 182 char *contig,*ptr; 183 184 PetscFunctionBegin; 185 s = *seg; 186 187 unitbytes = s->unitbytes; 188 189 ierr = PetscMalloc((s->used+s->tailused)*unitbytes,&contig);CHKERRQ(ierr); 190 ptr = contig + s->tailused*unitbytes; 191 ierr = PetscMemcpy(ptr,s->u.array,s->used*unitbytes);CHKERRQ(ierr); 192 for (t=s->tail; t;) { 193 SegArray tail = t->tail; 194 ptr -= t->used*unitbytes; 195 ierr = PetscMemcpy(ptr,t->u.array,t->used*unitbytes);CHKERRQ(ierr); 196 ierr = PetscFree(t);CHKERRQ(ierr); 197 t = tail; 198 } 199 if (ptr != contig) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Tail count does not match"); 200 s->tailused = 0; 201 s->tail = NULL; 202 *(char**)contiguous = contig; 203 PetscFunctionReturn(0); 204 } 205 206 #undef __FUNCT__ 207 #define __FUNCT__ "PetscCommBuildTwoSided_Ibarrier" 208 static PetscErrorCode PetscCommBuildTwoSided_Ibarrier(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscInt nto,const PetscMPIInt *toranks,const void *todata,PetscInt *nfrom,PetscMPIInt **fromranks,void *fromdata) 209 { 210 PetscErrorCode ierr; 211 PetscMPIInt nrecvs,tag,unitbytes,done; 212 PetscInt i; 213 char *tdata; 214 MPI_Request *sendreqs,barrier; 215 SegArray segrank,segdata; 216 217 PetscFunctionBegin; 218 ierr = PetscCommGetNewTag(comm,&tag);CHKERRQ(ierr); 219 ierr = MPI_Type_size(dtype,&unitbytes);CHKERRQ(ierr); 220 tdata = (char*)todata; 221 ierr = PetscMalloc(nto*sizeof(MPI_Request),&sendreqs);CHKERRQ(ierr); 222 for (i=0; i<nto; i++) { 223 ierr = MPI_Issend((void*)(tdata+count*unitbytes*i),count,dtype,toranks[i],tag,comm,sendreqs+i);CHKERRQ(ierr); 224 } 225 ierr = SegArrayCreate(sizeof(PetscMPIInt),4,&segrank);CHKERRQ(ierr); 226 ierr = SegArrayCreate(unitbytes,4*count,&segdata);CHKERRQ(ierr); 227 228 nrecvs = 0; 229 barrier = MPI_REQUEST_NULL; 230 for (done=0; !done; ) { 231 PetscMPIInt flag; 232 MPI_Status status; 233 ierr = MPI_Iprobe(MPI_ANY_SOURCE,tag,comm,&flag,&status);CHKERRQ(ierr); 234 if (flag) { /* incoming message */ 235 PetscMPIInt *recvrank; 236 void *buf; 237 ierr = SegArrayGet(&segrank,1,&recvrank);CHKERRQ(ierr); 238 ierr = SegArrayGet(&segdata,count,&buf);CHKERRQ(ierr); 239 *recvrank = status.MPI_SOURCE; 240 ierr = MPI_Recv(buf,count,dtype,status.MPI_SOURCE,tag,comm,MPI_STATUS_IGNORE);CHKERRQ(ierr); 241 nrecvs++; 242 } 243 if (barrier == MPI_REQUEST_NULL) { 244 PetscMPIInt sent,nsends; 245 ierr = PetscMPIIntCast(nto,&nsends);CHKERRQ(ierr); 246 ierr = MPI_Testall(nsends,sendreqs,&sent,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 247 if (sent) { 248 ierr = MPI_Ibarrier(comm,&barrier);CHKERRQ(ierr); 249 ierr = PetscFree(sendreqs);CHKERRQ(ierr); 250 } 251 } else { 252 ierr = MPI_Test(&barrier,&done,MPI_STATUS_IGNORE);CHKERRQ(ierr); 253 } 254 } 255 *nfrom = nrecvs; 256 ierr = SegArrayExtract(&segrank,fromranks);CHKERRQ(ierr); 257 ierr = SegArrayDestroy(&segrank);CHKERRQ(ierr); 258 ierr = SegArrayExtract(&segdata,fromdata);CHKERRQ(ierr); 259 ierr = SegArrayDestroy(&segdata);CHKERRQ(ierr); 260 PetscFunctionReturn(0); 261 } 262 #endif 263 264 #undef __FUNCT__ 265 #define __FUNCT__ "PetscCommBuildTwoSided_Allreduce" 266 static PetscErrorCode PetscCommBuildTwoSided_Allreduce(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscInt nto,const PetscMPIInt *toranks,const void *todata,PetscInt *nfrom,PetscMPIInt **fromranks,void *fromdata) 267 { 268 PetscErrorCode ierr; 269 PetscMPIInt size,*iflags,nrecvs,tag,unitbytes,*franks; 270 PetscInt i; 271 char *tdata,*fdata; 272 MPI_Request *reqs,*sendreqs; 273 MPI_Status *statuses; 274 275 PetscFunctionBegin; 276 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 277 ierr = PetscMalloc(size*sizeof(*iflags),&iflags);CHKERRQ(ierr); 278 ierr = PetscMemzero(iflags,size*sizeof(*iflags));CHKERRQ(ierr); 279 for (i=0; i<nto; i++) iflags[toranks[i]] = 1; 280 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&nrecvs);CHKERRQ(ierr); 281 ierr = PetscFree(iflags);CHKERRQ(ierr); 282 283 ierr = PetscCommGetNewTag(comm,&tag);CHKERRQ(ierr); 284 ierr = MPI_Type_size(dtype,&unitbytes);CHKERRQ(ierr); 285 ierr = PetscMalloc(nrecvs*count*unitbytes,&fdata);CHKERRQ(ierr); 286 tdata = (char*)todata; 287 ierr = PetscMalloc2(nto+nrecvs,MPI_Request,&reqs,nto+nrecvs,MPI_Status,&statuses);CHKERRQ(ierr); 288 sendreqs = reqs + nrecvs; 289 for (i=0; i<nrecvs; i++) { 290 ierr = MPI_Irecv((void*)(fdata+count*unitbytes*i),count,dtype,MPI_ANY_SOURCE,tag,comm,reqs+i);CHKERRQ(ierr); 291 } 292 for (i=0; i<nto; i++) { 293 ierr = MPI_Isend((void*)(tdata+count*unitbytes*i),count,dtype,toranks[i],tag,comm,sendreqs+i);CHKERRQ(ierr); 294 } 295 ierr = MPI_Waitall(nto+nrecvs,reqs,statuses);CHKERRQ(ierr); 296 ierr = PetscMalloc(nrecvs*sizeof(PetscMPIInt),&franks);CHKERRQ(ierr); 297 for (i=0; i<nrecvs; i++) franks[i] = statuses[i].MPI_SOURCE; 298 ierr = PetscFree2(reqs,statuses);CHKERRQ(ierr); 299 300 *nfrom = nrecvs; 301 *fromranks = franks; 302 *(void**)fromdata = fdata; 303 PetscFunctionReturn(0); 304 } 305 306 #undef __FUNCT__ 307 #define __FUNCT__ "PetscCommBuildTwoSided" 308 /*@C 309 PetscCommBuildTwoSided - discovers communicating ranks given one-sided information, moving constant-sized data in the process (often message lengths) 310 311 Collective on MPI_Comm 312 313 Input Arguments: 314 + comm - communicator 315 . count - number of entries to send/receive (must match on all ranks) 316 . dtype - datatype to send/receive from each rank (must match on all ranks) 317 . nto - number of ranks to send data to 318 . toranks - ranks to send to (array of length nto) 319 - todata - data to send to each rank (packed) 320 321 Output Arguments: 322 + nfrom - number of ranks receiving messages from 323 . fromranks - ranks receiving messages from (length nfrom; caller should PetscFree()) 324 - fromdata - packed data from each rank, each with count entries of type dtype (length nfrom, caller responsible for PetscFree()) 325 326 Level: developer 327 328 Notes: 329 This memory-scalable interface is an alternative to calling PetscGatherNumberOfMessages() and 330 PetscGatherMessageLengths(), possibly with a subsequent round of communication to send other constant-size data. 331 332 Basic data types as well as contiguous types are supported, but non-contiguous (e.g., strided) types are not. 333 334 References: 335 The MPI_Ibarrier implementation uses the algorithm in 336 Hoefler, Siebert and Lumsdaine, Scalable communication protocols for dynamic sparse data exchange, 2010. 337 338 .seealso: PetscGatherNumberOfMessages(), PetscGatherMessageLengths() 339 @*/ 340 PetscErrorCode PetscCommBuildTwoSided(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscInt nto,const PetscMPIInt *toranks,const void *todata,PetscInt *nfrom,PetscMPIInt **fromranks,void *fromdata) 341 { 342 PetscErrorCode ierr; 343 PetscBuildTwoSidedType buildtype = PETSC_BUILDTWOSIDED_NOTSET; 344 345 PetscFunctionBegin; 346 ierr = PetscCommBuildTwoSidedGetType(comm,&buildtype);CHKERRQ(ierr); 347 switch (buildtype) { 348 case PETSC_BUILDTWOSIDED_IBARRIER: 349 #if defined(PETSC_HAVE_MPI_IBARRIER) 350 ierr = PetscCommBuildTwoSided_Ibarrier(comm,count,dtype,nto,toranks,todata,nfrom,fromranks,fromdata);CHKERRQ(ierr); 351 #else 352 SETERRQ(comm,PETSC_ERR_PLIB,"MPI implementation does not provide MPI_Ibarrier (part of MPI-3)"); 353 #endif 354 break; 355 case PETSC_BUILDTWOSIDED_ALLREDUCE: 356 ierr = PetscCommBuildTwoSided_Allreduce(comm,count,dtype,nto,toranks,todata,nfrom,fromranks,fromdata);CHKERRQ(ierr); 357 break; 358 default: SETERRQ(comm,PETSC_ERR_PLIB,"Unknown method for building two-sided communication"); 359 } 360 PetscFunctionReturn(0); 361 } 362