xref: /petsc/src/sys/utils/mpits.c (revision bbd56ea5790821d2a217d362e8e9710702952333)
1 #include <petscsys.h>        /*I  "petscsys.h"  I*/
2 #include <stddef.h>
3 
4 const char *const PetscBuildTwoSidedTypes[] = {
5   "ALLREDUCE",
6   "IBARRIER",
7   "PetscBuildTwoSidedType",
8   "PETSC_BUILDTWOSIDED_",
9   0
10 };
11 
12 static PetscBuildTwoSidedType _twosided_type = PETSC_BUILDTWOSIDED_NOTSET;
13 
14 #undef __FUNCT__
15 #define __FUNCT__ "PetscCommBuildTwoSidedSetType"
16 /*@
17    PetscCommBuildTwoSidedSetType - set algorithm to use when building two-sided communication
18 
19    Logically Collective
20 
21    Input Arguments:
22 +  comm - PETSC_COMM_WORLD
23 -  twosided - algorithm to use in subsequent calls to PetscCommBuildTwoSided()
24 
25    Level: developer
26 
27    Note:
28    This option is currently global, but could be made per-communicator.
29 
30 .seealso: PetscCommBuildTwoSided(), PetscCommBuildTwoSidedGetType()
31 @*/
32 PetscErrorCode PetscCommBuildTwoSidedSetType(MPI_Comm comm,PetscBuildTwoSidedType twosided)
33 {
34   PetscFunctionBegin;
35 #if defined(PETSC_USE_DEBUG)
36   {                             /* We don't have a PetscObject so can't use PetscValidLogicalCollectiveEnum */
37     PetscMPIInt ierr;
38     PetscMPIInt b1[2],b2[2];
39     b1[0] = -(PetscMPIInt)twosided;
40     b1[1] = (PetscMPIInt)twosided;
41     ierr = MPI_Allreduce(b1,b2,2,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr);
42     if (-b2[0] != b2[1]) SETERRQ(comm,PETSC_ERR_ARG_WRONG,"Enum value must be same on all processes");
43   }
44 #endif
45   _twosided_type = twosided;
46   PetscFunctionReturn(0);
47 }
48 
49 #undef __FUNCT__
50 #define __FUNCT__ "PetscCommBuildTwoSidedGetType"
51 /*@
52    PetscCommBuildTwoSidedGetType - set algorithm to use when building two-sided communication
53 
54    Logically Collective
55 
56    Output Arguments:
57 +  comm - communicator on which to query algorithm
58 -  twosided - algorithm to use for PetscCommBuildTwoSided()
59 
60    Level: developer
61 
62 .seealso: PetscCommBuildTwoSided(), PetscCommBuildTwoSidedSetType()
63 @*/
64 PetscErrorCode PetscCommBuildTwoSidedGetType(MPI_Comm comm,PetscBuildTwoSidedType *twosided)
65 {
66   PetscErrorCode ierr;
67 
68   PetscFunctionBegin;
69   *twosided = PETSC_BUILDTWOSIDED_NOTSET;
70   if (_twosided_type == PETSC_BUILDTWOSIDED_NOTSET) {
71 #if defined(PETSC_HAVE_MPI_IBARRIER)
72 #  if defined(PETSC_HAVE_MPICH_CH3_SOCK) && !defined(PETSC_HAVE_MPICH_CH3_SOCK_FIXED_NBC_PROGRESS)
73     /* Deadlock in Ibarrier: http://trac.mpich.org/projects/mpich/ticket/1785 */
74     _twosided_type = PETSC_BUILDTWOSIDED_ALLREDUCE;
75 #  else
76     _twosided_type = PETSC_BUILDTWOSIDED_IBARRIER;
77 #  endif
78 #else
79     _twosided_type = PETSC_BUILDTWOSIDED_ALLREDUCE;
80 #endif
81     ierr = PetscOptionsGetEnum(PETSC_NULL,"-build_twosided",PetscBuildTwoSidedTypes,(PetscEnum*)&_twosided_type,PETSC_NULL);CHKERRQ(ierr);
82   }
83   *twosided = _twosided_type;
84   PetscFunctionReturn(0);
85 }
86 
87 #if defined(PETSC_HAVE_MPI_IBARRIER)
88 /* Segmented (extendable) array implementation */
89 typedef struct _SegArray *SegArray;
90 struct _SegArray {
91   PetscInt unitbytes;
92   PetscInt alloc;
93   PetscInt used;
94   PetscInt tailused;
95   SegArray tail;
96   union {                       /* Dummy types to ensure alignment */
97     PetscReal dummy_real;
98     PetscInt dummy_int;
99     char array[1];
100   } u;
101 };
102 
103 #undef __FUNCT__
104 #define __FUNCT__ "SegArrayCreate"
105 static PetscErrorCode SegArrayCreate(PetscInt unitbytes,PetscInt expected,SegArray *seg)
106 {
107   PetscErrorCode ierr;
108 
109   PetscFunctionBegin;
110   ierr = PetscMalloc(offsetof(struct _SegArray,u)+expected*unitbytes,seg);CHKERRQ(ierr);
111   ierr = PetscMemzero(*seg,offsetof(struct _SegArray,u));CHKERRQ(ierr);
112   (*seg)->unitbytes = unitbytes;
113   (*seg)->alloc = expected;
114   PetscFunctionReturn(0);
115 }
116 
117 #undef __FUNCT__
118 #define __FUNCT__ "SegArrayAlloc_Private"
119 static PetscErrorCode SegArrayAlloc_Private(SegArray *seg,PetscInt count)
120 {
121   PetscErrorCode ierr;
122   SegArray newseg,s;
123   PetscInt alloc;
124 
125   PetscFunctionBegin;
126   s = *seg;
127   /* Grow at least fast enough to hold next item, like Fibonacci otherwise (up to 1MB chunks) */
128   alloc = PetscMax(s->used+count,PetscMin(1000000/s->unitbytes+1,s->alloc+s->tailused));
129   ierr = PetscMalloc(offsetof(struct _SegArray,u)+alloc*s->unitbytes,&newseg);CHKERRQ(ierr);
130   ierr = PetscMemzero(newseg,offsetof(struct _SegArray,u));CHKERRQ(ierr);
131   newseg->unitbytes = s->unitbytes;
132   newseg->tailused = s->used + s->tailused;
133   newseg->tail = s;
134   newseg->alloc = alloc;
135   *seg = newseg;
136   PetscFunctionReturn(0);
137 }
138 
139 #undef __FUNCT__
140 #define __FUNCT__ "SegArrayGet"
141 static PetscErrorCode SegArrayGet(SegArray *seg,PetscInt count,void *array)
142 {
143   PetscErrorCode ierr;
144   SegArray s;
145 
146   PetscFunctionBegin;
147   s = *seg;
148   if (PetscUnlikely(s->used + count > s->alloc)) {ierr = SegArrayAlloc_Private(seg,count);CHKERRQ(ierr);}
149   s = *seg;
150   *(char**)array = &s->u.array[s->used*s->unitbytes];
151   s->used += count;
152   PetscFunctionReturn(0);
153 }
154 
155 #undef __FUNCT__
156 #define __FUNCT__ "SegArrayDestroy"
157 static PetscErrorCode SegArrayDestroy(SegArray *seg)
158 {
159   PetscErrorCode ierr;
160   SegArray s;
161 
162   PetscFunctionBegin;
163   for (s=*seg; s;) {
164     SegArray tail = s->tail;
165     ierr = PetscFree(s);CHKERRQ(ierr);
166     s = tail;
167   }
168   *seg = PETSC_NULL;
169   PetscFunctionReturn(0);
170 }
171 
172 #undef __FUNCT__
173 #define __FUNCT__ "SegArrayExtract"
174 /* Extracts contiguous data and resets segarray */
175 static PetscErrorCode SegArrayExtract(SegArray *seg,void *contiguous)
176 {
177   PetscErrorCode ierr;
178   PetscInt unitbytes;
179   SegArray s,t;
180   char *contig,*ptr;
181 
182   PetscFunctionBegin;
183   s = *seg;
184   unitbytes = s->unitbytes;
185   ierr = PetscMalloc((s->used+s->tailused)*unitbytes,&contig);CHKERRQ(ierr);
186   ptr = contig + s->tailused*unitbytes;
187   ierr = PetscMemcpy(ptr,s->u.array,s->used*unitbytes);CHKERRQ(ierr);
188   for (t=s->tail; t;) {
189     SegArray tail = t->tail;
190     ptr -= t->used*unitbytes;
191     ierr = PetscMemcpy(ptr,t->u.array,t->used*unitbytes);CHKERRQ(ierr);
192     ierr = PetscFree(t);CHKERRQ(ierr);
193     t = tail;
194   }
195   if (ptr != contig) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Tail count does not match");
196   s->tailused = 0;
197   s->tail = PETSC_NULL;
198   *(char**)contiguous = contig;
199   PetscFunctionReturn(0);
200 }
201 
202 #undef __FUNCT__
203 #define __FUNCT__ "PetscCommBuildTwoSided_Ibarrier"
204 static PetscErrorCode PetscCommBuildTwoSided_Ibarrier(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscInt nto,const PetscMPIInt *toranks,const void *todata,PetscInt *nfrom,PetscMPIInt **fromranks,void *fromdata)
205 {
206   PetscErrorCode ierr;
207   PetscMPIInt    nrecvs,tag,unitbytes,done;
208   PetscInt       i;
209   char           *tdata;
210   MPI_Request    *sendreqs,barrier;
211   SegArray       segrank,segdata;
212 
213   PetscFunctionBegin;
214   ierr = PetscCommGetNewTag(comm,&tag);CHKERRQ(ierr);
215   ierr = MPI_Type_size(dtype,&unitbytes);CHKERRQ(ierr);
216   tdata = (char*)todata;
217   ierr = PetscMalloc(nto*sizeof(MPI_Request),&sendreqs);CHKERRQ(ierr);
218   for (i=0; i<nto; i++) {
219     ierr = MPI_Issend((void*)(tdata+count*unitbytes*i),count,dtype,toranks[i],tag,comm,sendreqs+i);CHKERRQ(ierr);
220   }
221   ierr = SegArrayCreate(sizeof(PetscMPIInt),4,&segrank);CHKERRQ(ierr);
222   ierr = SegArrayCreate(unitbytes,4*count,&segdata);CHKERRQ(ierr);
223 
224   nrecvs = 0;
225   barrier = MPI_REQUEST_NULL;
226   for (done=0; !done;) {
227     PetscMPIInt flag;
228     MPI_Status status;
229     ierr = MPI_Iprobe(MPI_ANY_SOURCE,tag,comm,&flag,&status);CHKERRQ(ierr);
230     if (flag) {                 /* incoming message */
231       PetscMPIInt *recvrank;
232       void *buf;
233       ierr = SegArrayGet(&segrank,1,&recvrank);CHKERRQ(ierr);
234       ierr = SegArrayGet(&segdata,count,&buf);CHKERRQ(ierr);
235       *recvrank = status.MPI_SOURCE;
236       ierr = MPI_Recv(buf,count,dtype,status.MPI_SOURCE,tag,comm,MPI_STATUS_IGNORE);CHKERRQ(ierr);
237       nrecvs++;
238     }
239     if (barrier == MPI_REQUEST_NULL) {
240       PetscMPIInt sent,nsends;
241       ierr = PetscMPIIntCast(nto,&nsends);CHKERRQ(ierr);
242       ierr = MPI_Testall(nsends,sendreqs,&sent,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
243       if (sent) {
244         ierr = MPI_Ibarrier(comm,&barrier);CHKERRQ(ierr);
245         ierr = PetscFree(sendreqs);CHKERRQ(ierr);
246       }
247     } else {
248       ierr = MPI_Test(&barrier,&done,MPI_STATUS_IGNORE);CHKERRQ(ierr);
249     }
250   }
251   *nfrom = nrecvs;
252   ierr = SegArrayExtract(&segrank,fromranks);CHKERRQ(ierr);
253   ierr = SegArrayDestroy(&segrank);CHKERRQ(ierr);
254   ierr = SegArrayExtract(&segdata,fromdata);CHKERRQ(ierr);
255   ierr = SegArrayDestroy(&segdata);CHKERRQ(ierr);
256   PetscFunctionReturn(0);
257 }
258 #endif
259 
260 #undef __FUNCT__
261 #define __FUNCT__ "PetscCommBuildTwoSided_Allreduce"
262 static PetscErrorCode PetscCommBuildTwoSided_Allreduce(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscInt nto,const PetscMPIInt *toranks,const void *todata,PetscInt *nfrom,PetscMPIInt **fromranks,void *fromdata)
263 {
264   PetscErrorCode ierr;
265   PetscMPIInt    size,*iflags,nrecvs,tag,unitbytes,*franks;
266   PetscInt       i;
267   char           *tdata,*fdata;
268   MPI_Request    *reqs,*sendreqs;
269   MPI_Status     *statuses;
270 
271   PetscFunctionBegin;
272   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
273   ierr = PetscMalloc(size*sizeof(*iflags),&iflags);CHKERRQ(ierr);
274   ierr = PetscMemzero(iflags,size*sizeof(*iflags));CHKERRQ(ierr);
275   for (i=0; i<nto; i++) iflags[toranks[i]] = 1;
276   ierr = PetscGatherNumberOfMessages(comm,iflags,PETSC_NULL,&nrecvs);CHKERRQ(ierr);
277   ierr = PetscFree(iflags);CHKERRQ(ierr);
278 
279   ierr = PetscCommGetNewTag(comm,&tag);CHKERRQ(ierr);
280   ierr = MPI_Type_size(dtype,&unitbytes);CHKERRQ(ierr);
281   ierr = PetscMalloc(nrecvs*count*unitbytes,&fdata);CHKERRQ(ierr);
282   tdata = (char*)todata;
283   ierr = PetscMalloc2(nto+nrecvs,MPI_Request,&reqs,nto+nrecvs,MPI_Status,&statuses);CHKERRQ(ierr);
284   sendreqs = reqs + nrecvs;
285   for (i=0; i<nrecvs; i++) {
286     ierr = MPI_Irecv((void*)(fdata+count*unitbytes*i),count,dtype,MPI_ANY_SOURCE,tag,comm,reqs+i);CHKERRQ(ierr);
287   }
288   for (i=0; i<nto; i++) {
289     ierr = MPI_Isend((void*)(tdata+count*unitbytes*i),count,dtype,toranks[i],tag,comm,sendreqs+i);CHKERRQ(ierr);
290   }
291   ierr = MPI_Waitall(nto+nrecvs,reqs,statuses);CHKERRQ(ierr);
292   ierr = PetscMalloc(nrecvs*sizeof(PetscMPIInt),&franks);CHKERRQ(ierr);
293   for (i=0; i<nrecvs; i++) {
294     franks[i] = statuses[i].MPI_SOURCE;
295   }
296   ierr = PetscFree2(reqs,statuses);CHKERRQ(ierr);
297 
298   *nfrom = nrecvs;
299   *fromranks = franks;
300   *(void**)fromdata = fdata;
301   PetscFunctionReturn(0);
302 }
303 
304 #undef __FUNCT__
305 #define __FUNCT__ "PetscCommBuildTwoSided"
306 /*@C
307    PetscCommBuildTwoSided - discovers communicating ranks given one-sided information, moving constant-sized data in the process (often message lengths)
308 
309    Collective on MPI_Comm
310 
311    Input Arguments:
312 +  comm - communicator
313 .  count - number of entries to send/receive (must match on all ranks)
314 .  dtype - datatype to send/receive from each rank (must match on all ranks)
315 .  nto - number of ranks to send data to
316 .  toranks - ranks to send to (array of length nto)
317 -  todata - data to send to each rank (packed)
318 
319    Output Arguments:
320 +  nfrom - number of ranks receiving messages from
321 .  fromranks - ranks receiving messages from (length nfrom; caller should PetscFree())
322 -  fromdata - packed data from each rank, each with count entries of type dtype (length nfrom, caller responsible for PetscFree())
323 
324    Level: developer
325 
326    Notes:
327    This memory-scalable interface is an alternative to calling PetscGatherNumberOfMessages() and
328    PetscGatherMessageLengths(), possibly with a subsequent round of communication to send other constant-size data.
329 
330    Basic data types as well as contiguous types are supported, but non-contiguous (e.g., strided) types are not.
331 
332    References:
333    The MPI_Ibarrier implementation uses the algorithm in
334    Hoefler, Siebert and Lumsdaine, Scalable communication protocols for dynamic sparse data exchange, 2010.
335 
336 .seealso: PetscGatherNumberOfMessages(), PetscGatherMessageLengths()
337 @*/
338 PetscErrorCode PetscCommBuildTwoSided(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscInt nto,const PetscMPIInt *toranks,const void *todata,PetscInt *nfrom,PetscMPIInt **fromranks,void *fromdata)
339 {
340   PetscErrorCode ierr;
341   PetscBuildTwoSidedType buildtype = PETSC_BUILDTWOSIDED_NOTSET;
342 
343   PetscFunctionBegin;
344   ierr = PetscCommBuildTwoSidedGetType(comm,&buildtype);CHKERRQ(ierr);
345   switch (buildtype) {
346   case PETSC_BUILDTWOSIDED_IBARRIER:
347 #if defined(PETSC_HAVE_MPI_IBARRIER)
348     ierr = PetscCommBuildTwoSided_Ibarrier(comm,count,dtype,nto,toranks,todata,nfrom,fromranks,fromdata);CHKERRQ(ierr);
349 #else
350     SETERRQ(comm,PETSC_ERR_PLIB,"MPI implementation does not provide MPI_Ibarrier (part of MPI-3)");
351 #endif
352     break;
353   case PETSC_BUILDTWOSIDED_ALLREDUCE:
354     ierr = PetscCommBuildTwoSided_Allreduce(comm,count,dtype,nto,toranks,todata,nfrom,fromranks,fromdata);CHKERRQ(ierr);
355     break;
356   default: SETERRQ(comm,PETSC_ERR_PLIB,"Unknown method for building two-sided communication");
357   }
358   PetscFunctionReturn(0);
359 }
360