xref: /petsc/src/sys/utils/mpits.c (revision 2fa5cd679192b9b390e47ae2d0650965e6b1d9fa)
1 #include <petscsys.h>        /*I  "petscsys.h"  I*/
2 #include <stddef.h>
3 
4 const char *const PetscBuildTwoSidedTypes[] = {
5   "ALLREDUCE",
6   "IBARRIER",
7   "PetscBuildTwoSidedType",
8   "PETSC_BUILDTWOSIDED_",
9   0
10 };
11 
12 static PetscBuildTwoSidedType _twosided_type = PETSC_BUILDTWOSIDED_NOTSET;
13 
14 #undef __FUNCT__
15 #define __FUNCT__ "PetscCommBuildTwoSidedSetType"
16 /*@
17    PetscCommBuildTwoSidedSetType - set algorithm to use when building two-sided communication
18 
19    Logically Collective
20 
21    Input Arguments:
22 +  comm - PETSC_COMM_WORLD
23 -  twosided - algorithm to use in subsequent calls to PetscCommBuildTwoSided()
24 
25    Level: developer
26 
27    Note:
28    This option is currently global, but could be made per-communicator.
29 
30 .seealso: PetscCommBuildTwoSided(), PetscCommBuildTwoSidedGetType()
31 @*/
32 PetscErrorCode PetscCommBuildTwoSidedSetType(MPI_Comm comm,PetscBuildTwoSidedType twosided)
33 {
34   PetscFunctionBegin;
35 #if defined(PETSC_USE_DEBUG)
36   {                             /* We don't have a PetscObject so can't use PetscValidLogicalCollectiveEnum */
37     PetscMPIInt ierr;
38     PetscMPIInt b1[2],b2[2];
39     b1[0] = -(PetscMPIInt)twosided;
40     b1[1] = (PetscMPIInt)twosided;
41     ierr  = MPI_Allreduce(b1,b2,2,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr);
42     if (-b2[0] != b2[1]) SETERRQ(comm,PETSC_ERR_ARG_WRONG,"Enum value must be same on all processes");
43   }
44 #endif
45   _twosided_type = twosided;
46   PetscFunctionReturn(0);
47 }
48 
49 #undef __FUNCT__
50 #define __FUNCT__ "PetscCommBuildTwoSidedGetType"
51 /*@
52    PetscCommBuildTwoSidedGetType - set algorithm to use when building two-sided communication
53 
54    Logically Collective
55 
56    Output Arguments:
57 +  comm - communicator on which to query algorithm
58 -  twosided - algorithm to use for PetscCommBuildTwoSided()
59 
60    Level: developer
61 
62 .seealso: PetscCommBuildTwoSided(), PetscCommBuildTwoSidedSetType()
63 @*/
64 PetscErrorCode PetscCommBuildTwoSidedGetType(MPI_Comm comm,PetscBuildTwoSidedType *twosided)
65 {
66   PetscErrorCode ierr;
67 
68   PetscFunctionBegin;
69   *twosided = PETSC_BUILDTWOSIDED_NOTSET;
70   if (_twosided_type == PETSC_BUILDTWOSIDED_NOTSET) {
71 #if defined(PETSC_HAVE_MPI_IBARRIER)
72 #  if defined(PETSC_HAVE_MPICH_CH3_SOCK) && !defined(PETSC_HAVE_MPICH_CH3_SOCK_FIXED_NBC_PROGRESS)
73     /* Deadlock in Ibarrier: http://trac.mpich.org/projects/mpich/ticket/1785 */
74     _twosided_type = PETSC_BUILDTWOSIDED_ALLREDUCE;
75 #  else
76     _twosided_type = PETSC_BUILDTWOSIDED_IBARRIER;
77 #  endif
78 #else
79     _twosided_type = PETSC_BUILDTWOSIDED_ALLREDUCE;
80 #endif
81     ierr = PetscOptionsGetEnum(PETSC_NULL,"-build_twosided",PetscBuildTwoSidedTypes,(PetscEnum*)&_twosided_type,PETSC_NULL);CHKERRQ(ierr);
82   }
83   *twosided = _twosided_type;
84   PetscFunctionReturn(0);
85 }
86 
87 #if defined(PETSC_HAVE_MPI_IBARRIER)
88 /* Segmented (extendable) array implementation */
89 typedef struct _SegArray *SegArray;
90 struct _SegArray {
91   PetscInt unitbytes;
92   PetscInt alloc;
93   PetscInt used;
94   PetscInt tailused;
95   SegArray tail;
96   union {                       /* Dummy types to ensure alignment */
97     PetscReal dummy_real;
98     PetscInt  dummy_int;
99     char      array[1];
100   } u;
101 };
102 
103 #undef __FUNCT__
104 #define __FUNCT__ "SegArrayCreate"
105 static PetscErrorCode SegArrayCreate(PetscInt unitbytes,PetscInt expected,SegArray *seg)
106 {
107   PetscErrorCode ierr;
108 
109   PetscFunctionBegin;
110   ierr = PetscMalloc(offsetof(struct _SegArray,u)+expected*unitbytes,seg);CHKERRQ(ierr);
111   ierr = PetscMemzero(*seg,offsetof(struct _SegArray,u));CHKERRQ(ierr);
112 
113   (*seg)->unitbytes = unitbytes;
114   (*seg)->alloc     = expected;
115   PetscFunctionReturn(0);
116 }
117 
118 #undef __FUNCT__
119 #define __FUNCT__ "SegArrayAlloc_Private"
120 static PetscErrorCode SegArrayAlloc_Private(SegArray *seg,PetscInt count)
121 {
122   PetscErrorCode ierr;
123   SegArray       newseg,s;
124   PetscInt       alloc;
125 
126   PetscFunctionBegin;
127   s = *seg;
128   /* Grow at least fast enough to hold next item, like Fibonacci otherwise (up to 1MB chunks) */
129   alloc = PetscMax(s->used+count,PetscMin(1000000/s->unitbytes+1,s->alloc+s->tailused));
130   ierr  = PetscMalloc(offsetof(struct _SegArray,u)+alloc*s->unitbytes,&newseg);CHKERRQ(ierr);
131   ierr  = PetscMemzero(newseg,offsetof(struct _SegArray,u));CHKERRQ(ierr);
132 
133   newseg->unitbytes = s->unitbytes;
134   newseg->tailused  = s->used + s->tailused;
135   newseg->tail      = s;
136   newseg->alloc     = alloc;
137   *seg              = newseg;
138   PetscFunctionReturn(0);
139 }
140 
141 #undef __FUNCT__
142 #define __FUNCT__ "SegArrayGet"
143 static PetscErrorCode SegArrayGet(SegArray *seg,PetscInt count,void *array)
144 {
145   PetscErrorCode ierr;
146   SegArray       s;
147 
148   PetscFunctionBegin;
149   s = *seg;
150   if (PetscUnlikely(s->used + count > s->alloc)) {ierr = SegArrayAlloc_Private(seg,count);CHKERRQ(ierr);}
151   s = *seg;
152   *(char**)array = &s->u.array[s->used*s->unitbytes];
153   s->used += count;
154   PetscFunctionReturn(0);
155 }
156 
157 #undef __FUNCT__
158 #define __FUNCT__ "SegArrayDestroy"
159 static PetscErrorCode SegArrayDestroy(SegArray *seg)
160 {
161   PetscErrorCode ierr;
162   SegArray       s;
163 
164   PetscFunctionBegin;
165   for (s=*seg; s;) {
166     SegArray tail = s->tail;
167     ierr = PetscFree(s);CHKERRQ(ierr);
168     s = tail;
169   }
170   *seg = PETSC_NULL;
171   PetscFunctionReturn(0);
172 }
173 
174 #undef __FUNCT__
175 #define __FUNCT__ "SegArrayExtract"
176 /* Extracts contiguous data and resets segarray */
177 static PetscErrorCode SegArrayExtract(SegArray *seg,void *contiguous)
178 {
179   PetscErrorCode ierr;
180   PetscInt       unitbytes;
181   SegArray       s,t;
182   char           *contig,*ptr;
183 
184   PetscFunctionBegin;
185   s = *seg;
186 
187   unitbytes = s->unitbytes;
188 
189   ierr = PetscMalloc((s->used+s->tailused)*unitbytes,&contig);CHKERRQ(ierr);
190   ptr  = contig + s->tailused*unitbytes;
191   ierr = PetscMemcpy(ptr,s->u.array,s->used*unitbytes);CHKERRQ(ierr);
192   for (t=s->tail; t;) {
193     SegArray tail = t->tail;
194     ptr -= t->used*unitbytes;
195     ierr = PetscMemcpy(ptr,t->u.array,t->used*unitbytes);CHKERRQ(ierr);
196     ierr = PetscFree(t);CHKERRQ(ierr);
197     t    = tail;
198   }
199   if (ptr != contig) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Tail count does not match");
200   s->tailused         = 0;
201   s->tail             = PETSC_NULL;
202   *(char**)contiguous = contig;
203   PetscFunctionReturn(0);
204 }
205 
206 #undef __FUNCT__
207 #define __FUNCT__ "PetscCommBuildTwoSided_Ibarrier"
208 static PetscErrorCode PetscCommBuildTwoSided_Ibarrier(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscInt nto,const PetscMPIInt *toranks,const void *todata,PetscInt *nfrom,PetscMPIInt **fromranks,void *fromdata)
209 {
210   PetscErrorCode ierr;
211   PetscMPIInt    nrecvs,tag,unitbytes,done;
212   PetscInt       i;
213   char           *tdata;
214   MPI_Request    *sendreqs,barrier;
215   SegArray       segrank,segdata;
216 
217   PetscFunctionBegin;
218   ierr  = PetscCommGetNewTag(comm,&tag);CHKERRQ(ierr);
219   ierr  = MPI_Type_size(dtype,&unitbytes);CHKERRQ(ierr);
220   tdata = (char*)todata;
221   ierr  = PetscMalloc(nto*sizeof(MPI_Request),&sendreqs);CHKERRQ(ierr);
222   for (i=0; i<nto; i++) {
223     ierr = MPI_Issend((void*)(tdata+count*unitbytes*i),count,dtype,toranks[i],tag,comm,sendreqs+i);CHKERRQ(ierr);
224   }
225   ierr = SegArrayCreate(sizeof(PetscMPIInt),4,&segrank);CHKERRQ(ierr);
226   ierr = SegArrayCreate(unitbytes,4*count,&segdata);CHKERRQ(ierr);
227 
228   nrecvs  = 0;
229   barrier = MPI_REQUEST_NULL;
230   for (done=0; !done; ) {
231     PetscMPIInt flag;
232     MPI_Status  status;
233     ierr = MPI_Iprobe(MPI_ANY_SOURCE,tag,comm,&flag,&status);CHKERRQ(ierr);
234     if (flag) {                 /* incoming message */
235       PetscMPIInt *recvrank;
236       void        *buf;
237       ierr      = SegArrayGet(&segrank,1,&recvrank);CHKERRQ(ierr);
238       ierr      = SegArrayGet(&segdata,count,&buf);CHKERRQ(ierr);
239       *recvrank = status.MPI_SOURCE;
240       ierr      = MPI_Recv(buf,count,dtype,status.MPI_SOURCE,tag,comm,MPI_STATUS_IGNORE);CHKERRQ(ierr);
241       nrecvs++;
242     }
243     if (barrier == MPI_REQUEST_NULL) {
244       PetscMPIInt sent,nsends;
245       ierr = PetscMPIIntCast(nto,&nsends);CHKERRQ(ierr);
246       ierr = MPI_Testall(nsends,sendreqs,&sent,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
247       if (sent) {
248         ierr = MPI_Ibarrier(comm,&barrier);CHKERRQ(ierr);
249         ierr = PetscFree(sendreqs);CHKERRQ(ierr);
250       }
251     } else {
252       ierr = MPI_Test(&barrier,&done,MPI_STATUS_IGNORE);CHKERRQ(ierr);
253     }
254   }
255   *nfrom = nrecvs;
256   ierr   = SegArrayExtract(&segrank,fromranks);CHKERRQ(ierr);
257   ierr   = SegArrayDestroy(&segrank);CHKERRQ(ierr);
258   ierr   = SegArrayExtract(&segdata,fromdata);CHKERRQ(ierr);
259   ierr   = SegArrayDestroy(&segdata);CHKERRQ(ierr);
260   PetscFunctionReturn(0);
261 }
262 #endif
263 
264 #undef __FUNCT__
265 #define __FUNCT__ "PetscCommBuildTwoSided_Allreduce"
266 static PetscErrorCode PetscCommBuildTwoSided_Allreduce(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscInt nto,const PetscMPIInt *toranks,const void *todata,PetscInt *nfrom,PetscMPIInt **fromranks,void *fromdata)
267 {
268   PetscErrorCode ierr;
269   PetscMPIInt    size,*iflags,nrecvs,tag,unitbytes,*franks;
270   PetscInt       i;
271   char           *tdata,*fdata;
272   MPI_Request    *reqs,*sendreqs;
273   MPI_Status     *statuses;
274 
275   PetscFunctionBegin;
276   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
277   ierr = PetscMalloc(size*sizeof(*iflags),&iflags);CHKERRQ(ierr);
278   ierr = PetscMemzero(iflags,size*sizeof(*iflags));CHKERRQ(ierr);
279   for (i=0; i<nto; i++) iflags[toranks[i]] = 1;
280   ierr = PetscGatherNumberOfMessages(comm,iflags,PETSC_NULL,&nrecvs);CHKERRQ(ierr);
281   ierr = PetscFree(iflags);CHKERRQ(ierr);
282 
283   ierr     = PetscCommGetNewTag(comm,&tag);CHKERRQ(ierr);
284   ierr     = MPI_Type_size(dtype,&unitbytes);CHKERRQ(ierr);
285   ierr     = PetscMalloc(nrecvs*count*unitbytes,&fdata);CHKERRQ(ierr);
286   tdata    = (char*)todata;
287   ierr     = PetscMalloc2(nto+nrecvs,MPI_Request,&reqs,nto+nrecvs,MPI_Status,&statuses);CHKERRQ(ierr);
288   sendreqs = reqs + nrecvs;
289   for (i=0; i<nrecvs; i++) {
290     ierr = MPI_Irecv((void*)(fdata+count*unitbytes*i),count,dtype,MPI_ANY_SOURCE,tag,comm,reqs+i);CHKERRQ(ierr);
291   }
292   for (i=0; i<nto; i++) {
293     ierr = MPI_Isend((void*)(tdata+count*unitbytes*i),count,dtype,toranks[i],tag,comm,sendreqs+i);CHKERRQ(ierr);
294   }
295   ierr = MPI_Waitall(nto+nrecvs,reqs,statuses);CHKERRQ(ierr);
296   ierr = PetscMalloc(nrecvs*sizeof(PetscMPIInt),&franks);CHKERRQ(ierr);
297   for (i=0; i<nrecvs; i++) franks[i] = statuses[i].MPI_SOURCE;
298   ierr = PetscFree2(reqs,statuses);CHKERRQ(ierr);
299 
300   *nfrom            = nrecvs;
301   *fromranks        = franks;
302   *(void**)fromdata = fdata;
303   PetscFunctionReturn(0);
304 }
305 
306 #undef __FUNCT__
307 #define __FUNCT__ "PetscCommBuildTwoSided"
308 /*@C
309    PetscCommBuildTwoSided - discovers communicating ranks given one-sided information, moving constant-sized data in the process (often message lengths)
310 
311    Collective on MPI_Comm
312 
313    Input Arguments:
314 +  comm - communicator
315 .  count - number of entries to send/receive (must match on all ranks)
316 .  dtype - datatype to send/receive from each rank (must match on all ranks)
317 .  nto - number of ranks to send data to
318 .  toranks - ranks to send to (array of length nto)
319 -  todata - data to send to each rank (packed)
320 
321    Output Arguments:
322 +  nfrom - number of ranks receiving messages from
323 .  fromranks - ranks receiving messages from (length nfrom; caller should PetscFree())
324 -  fromdata - packed data from each rank, each with count entries of type dtype (length nfrom, caller responsible for PetscFree())
325 
326    Level: developer
327 
328    Notes:
329    This memory-scalable interface is an alternative to calling PetscGatherNumberOfMessages() and
330    PetscGatherMessageLengths(), possibly with a subsequent round of communication to send other constant-size data.
331 
332    Basic data types as well as contiguous types are supported, but non-contiguous (e.g., strided) types are not.
333 
334    References:
335    The MPI_Ibarrier implementation uses the algorithm in
336    Hoefler, Siebert and Lumsdaine, Scalable communication protocols for dynamic sparse data exchange, 2010.
337 
338 .seealso: PetscGatherNumberOfMessages(), PetscGatherMessageLengths()
339 @*/
340 PetscErrorCode PetscCommBuildTwoSided(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscInt nto,const PetscMPIInt *toranks,const void *todata,PetscInt *nfrom,PetscMPIInt **fromranks,void *fromdata)
341 {
342   PetscErrorCode         ierr;
343   PetscBuildTwoSidedType buildtype = PETSC_BUILDTWOSIDED_NOTSET;
344 
345   PetscFunctionBegin;
346   ierr = PetscCommBuildTwoSidedGetType(comm,&buildtype);CHKERRQ(ierr);
347   switch (buildtype) {
348   case PETSC_BUILDTWOSIDED_IBARRIER:
349 #if defined(PETSC_HAVE_MPI_IBARRIER)
350     ierr = PetscCommBuildTwoSided_Ibarrier(comm,count,dtype,nto,toranks,todata,nfrom,fromranks,fromdata);CHKERRQ(ierr);
351 #else
352     SETERRQ(comm,PETSC_ERR_PLIB,"MPI implementation does not provide MPI_Ibarrier (part of MPI-3)");
353 #endif
354     break;
355   case PETSC_BUILDTWOSIDED_ALLREDUCE:
356     ierr = PetscCommBuildTwoSided_Allreduce(comm,count,dtype,nto,toranks,todata,nfrom,fromranks,fromdata);CHKERRQ(ierr);
357     break;
358   default: SETERRQ(comm,PETSC_ERR_PLIB,"Unknown method for building two-sided communication");
359   }
360   PetscFunctionReturn(0);
361 }
362