xref: /petsc/src/sys/utils/mpits.c (revision 9895aa37ac365bac650f6bd8bf977519f7222510)
1 #include <petscsys.h>        /*I  "petscsys.h"  I*/
2 
3 const char *const PetscBuildTwoSidedTypes[] = {
4   "ALLREDUCE",
5   "IBARRIER",
6   "PetscBuildTwoSidedType",
7   "PETSC_BUILDTWOSIDED_",
8   0
9 };
10 
11 static PetscBuildTwoSidedType _twosided_type = PETSC_BUILDTWOSIDED_NOTSET;
12 
13 #undef __FUNCT__
14 #define __FUNCT__ "PetscCommBuildTwoSidedSetType"
15 /*@
16    PetscCommBuildTwoSidedSetType - set algorithm to use when building two-sided communication
17 
18    Logically Collective
19 
20    Input Arguments:
21 +  comm - PETSC_COMM_WORLD
22 -  twosided - algorithm to use in subsequent calls to PetscCommBuildTwoSided()
23 
24    Level: developer
25 
26    Note:
27    This option is currently global, but could be made per-communicator.
28 
29 .seealso: PetscCommBuildTwoSided(), PetscCommBuildTwoSidedGetType()
30 @*/
31 PetscErrorCode PetscCommBuildTwoSidedSetType(MPI_Comm comm,PetscBuildTwoSidedType twosided)
32 {
33   PetscFunctionBegin;
34 #if defined(PETSC_USE_DEBUG)
35   {                             /* We don't have a PetscObject so can't use PetscValidLogicalCollectiveEnum */
36     PetscMPIInt ierr;
37     PetscMPIInt b1[2],b2[2];
38     b1[0] = -(PetscMPIInt)twosided;
39     b1[1] = (PetscMPIInt)twosided;
40     ierr  = MPI_Allreduce(b1,b2,2,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr);
41     if (-b2[0] != b2[1]) SETERRQ(comm,PETSC_ERR_ARG_WRONG,"Enum value must be same on all processes");
42   }
43 #endif
44   _twosided_type = twosided;
45   PetscFunctionReturn(0);
46 }
47 
48 #undef __FUNCT__
49 #define __FUNCT__ "PetscCommBuildTwoSidedGetType"
50 /*@
51    PetscCommBuildTwoSidedGetType - set algorithm to use when building two-sided communication
52 
53    Logically Collective
54 
55    Output Arguments:
56 +  comm - communicator on which to query algorithm
57 -  twosided - algorithm to use for PetscCommBuildTwoSided()
58 
59    Level: developer
60 
61 .seealso: PetscCommBuildTwoSided(), PetscCommBuildTwoSidedSetType()
62 @*/
63 PetscErrorCode PetscCommBuildTwoSidedGetType(MPI_Comm comm,PetscBuildTwoSidedType *twosided)
64 {
65   PetscErrorCode ierr;
66 
67   PetscFunctionBegin;
68   *twosided = PETSC_BUILDTWOSIDED_NOTSET;
69   if (_twosided_type == PETSC_BUILDTWOSIDED_NOTSET) {
70 #if defined(PETSC_HAVE_MPI_IBARRIER)
71 #  if defined(PETSC_HAVE_MPICH_CH3_SOCK) && !defined(PETSC_HAVE_MPICH_CH3_SOCK_FIXED_NBC_PROGRESS)
72     /* Deadlock in Ibarrier: http://trac.mpich.org/projects/mpich/ticket/1785 */
73     _twosided_type = PETSC_BUILDTWOSIDED_ALLREDUCE;
74 #  else
75     _twosided_type = PETSC_BUILDTWOSIDED_IBARRIER;
76 #  endif
77 #else
78     _twosided_type = PETSC_BUILDTWOSIDED_ALLREDUCE;
79 #endif
80     ierr = PetscOptionsGetEnum(NULL,"-build_twosided",PetscBuildTwoSidedTypes,(PetscEnum*)&_twosided_type,NULL);CHKERRQ(ierr);
81   }
82   *twosided = _twosided_type;
83   PetscFunctionReturn(0);
84 }
85 
86 #if defined(PETSC_HAVE_MPI_IBARRIER)
87 /* Segmented (extendable) array implementation */
88 typedef struct _SegArray *SegArray;
89 struct _SegArray {
90   PetscInt unitbytes;
91   PetscInt alloc;
92   PetscInt used;
93   PetscInt tailused;
94   SegArray tail;
95   union {                       /* Dummy types to ensure alignment */
96     PetscReal dummy_real;
97     PetscInt  dummy_int;
98     char      array[1];
99   } u;
100 };
101 
102 #undef __FUNCT__
103 #define __FUNCT__ "SegArrayCreate"
104 static PetscErrorCode SegArrayCreate(PetscInt unitbytes,PetscInt expected,SegArray *seg)
105 {
106   PetscErrorCode ierr;
107 
108   PetscFunctionBegin;
109   ierr = PetscMalloc(offsetof(struct _SegArray,u)+expected*unitbytes,seg);CHKERRQ(ierr);
110   ierr = PetscMemzero(*seg,offsetof(struct _SegArray,u));CHKERRQ(ierr);
111 
112   (*seg)->unitbytes = unitbytes;
113   (*seg)->alloc     = expected;
114   PetscFunctionReturn(0);
115 }
116 
117 #undef __FUNCT__
118 #define __FUNCT__ "SegArrayAlloc_Private"
119 static PetscErrorCode SegArrayAlloc_Private(SegArray *seg,PetscInt count)
120 {
121   PetscErrorCode ierr;
122   SegArray       newseg,s;
123   PetscInt       alloc;
124 
125   PetscFunctionBegin;
126   s = *seg;
127   /* Grow at least fast enough to hold next item, like Fibonacci otherwise (up to 1MB chunks) */
128   alloc = PetscMax(s->used+count,PetscMin(1000000/s->unitbytes+1,s->alloc+s->tailused));
129   ierr  = PetscMalloc(offsetof(struct _SegArray,u)+alloc*s->unitbytes,&newseg);CHKERRQ(ierr);
130   ierr  = PetscMemzero(newseg,offsetof(struct _SegArray,u));CHKERRQ(ierr);
131 
132   newseg->unitbytes = s->unitbytes;
133   newseg->tailused  = s->used + s->tailused;
134   newseg->tail      = s;
135   newseg->alloc     = alloc;
136   *seg              = newseg;
137   PetscFunctionReturn(0);
138 }
139 
140 #undef __FUNCT__
141 #define __FUNCT__ "SegArrayGet"
142 static PetscErrorCode SegArrayGet(SegArray *seg,PetscInt count,void *array)
143 {
144   PetscErrorCode ierr;
145   SegArray       s;
146 
147   PetscFunctionBegin;
148   s = *seg;
149   if (PetscUnlikely(s->used + count > s->alloc)) {ierr = SegArrayAlloc_Private(seg,count);CHKERRQ(ierr);}
150   s = *seg;
151   *(char**)array = &s->u.array[s->used*s->unitbytes];
152   s->used += count;
153   PetscFunctionReturn(0);
154 }
155 
156 #undef __FUNCT__
157 #define __FUNCT__ "SegArrayDestroy"
158 static PetscErrorCode SegArrayDestroy(SegArray *seg)
159 {
160   PetscErrorCode ierr;
161   SegArray       s;
162 
163   PetscFunctionBegin;
164   for (s=*seg; s;) {
165     SegArray tail = s->tail;
166     ierr = PetscFree(s);CHKERRQ(ierr);
167     s = tail;
168   }
169   *seg = NULL;
170   PetscFunctionReturn(0);
171 }
172 
173 #undef __FUNCT__
174 #define __FUNCT__ "SegArrayExtract"
175 /* Extracts contiguous data and resets segarray */
176 static PetscErrorCode SegArrayExtract(SegArray *seg,void *contiguous)
177 {
178   PetscErrorCode ierr;
179   PetscInt       unitbytes;
180   SegArray       s,t;
181   char           *contig,*ptr;
182 
183   PetscFunctionBegin;
184   s = *seg;
185 
186   unitbytes = s->unitbytes;
187 
188   ierr = PetscMalloc((s->used+s->tailused)*unitbytes,&contig);CHKERRQ(ierr);
189   ptr  = contig + s->tailused*unitbytes;
190   ierr = PetscMemcpy(ptr,s->u.array,s->used*unitbytes);CHKERRQ(ierr);
191   for (t=s->tail; t;) {
192     SegArray tail = t->tail;
193     ptr -= t->used*unitbytes;
194     ierr = PetscMemcpy(ptr,t->u.array,t->used*unitbytes);CHKERRQ(ierr);
195     ierr = PetscFree(t);CHKERRQ(ierr);
196     t    = tail;
197   }
198   if (ptr != contig) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Tail count does not match");
199   s->tailused         = 0;
200   s->tail             = NULL;
201   *(char**)contiguous = contig;
202   PetscFunctionReturn(0);
203 }
204 
205 #undef __FUNCT__
206 #define __FUNCT__ "PetscCommBuildTwoSided_Ibarrier"
207 static PetscErrorCode PetscCommBuildTwoSided_Ibarrier(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscInt nto,const PetscMPIInt *toranks,const void *todata,PetscInt *nfrom,PetscMPIInt **fromranks,void *fromdata)
208 {
209   PetscErrorCode ierr;
210   PetscMPIInt    nrecvs,tag,unitbytes,done;
211   PetscInt       i;
212   char           *tdata;
213   MPI_Request    *sendreqs,barrier;
214   SegArray       segrank,segdata;
215 
216   PetscFunctionBegin;
217   ierr  = PetscCommGetNewTag(comm,&tag);CHKERRQ(ierr);
218   ierr  = MPI_Type_size(dtype,&unitbytes);CHKERRQ(ierr);
219   tdata = (char*)todata;
220   ierr  = PetscMalloc(nto*sizeof(MPI_Request),&sendreqs);CHKERRQ(ierr);
221   for (i=0; i<nto; i++) {
222     ierr = MPI_Issend((void*)(tdata+count*unitbytes*i),count,dtype,toranks[i],tag,comm,sendreqs+i);CHKERRQ(ierr);
223   }
224   ierr = SegArrayCreate(sizeof(PetscMPIInt),4,&segrank);CHKERRQ(ierr);
225   ierr = SegArrayCreate(unitbytes,4*count,&segdata);CHKERRQ(ierr);
226 
227   nrecvs  = 0;
228   barrier = MPI_REQUEST_NULL;
229   for (done=0; !done; ) {
230     PetscMPIInt flag;
231     MPI_Status  status;
232     ierr = MPI_Iprobe(MPI_ANY_SOURCE,tag,comm,&flag,&status);CHKERRQ(ierr);
233     if (flag) {                 /* incoming message */
234       PetscMPIInt *recvrank;
235       void        *buf;
236       ierr      = SegArrayGet(&segrank,1,&recvrank);CHKERRQ(ierr);
237       ierr      = SegArrayGet(&segdata,count,&buf);CHKERRQ(ierr);
238       *recvrank = status.MPI_SOURCE;
239       ierr      = MPI_Recv(buf,count,dtype,status.MPI_SOURCE,tag,comm,MPI_STATUS_IGNORE);CHKERRQ(ierr);
240       nrecvs++;
241     }
242     if (barrier == MPI_REQUEST_NULL) {
243       PetscMPIInt sent,nsends;
244       ierr = PetscMPIIntCast(nto,&nsends);CHKERRQ(ierr);
245       ierr = MPI_Testall(nsends,sendreqs,&sent,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
246       if (sent) {
247         ierr = MPI_Ibarrier(comm,&barrier);CHKERRQ(ierr);
248         ierr = PetscFree(sendreqs);CHKERRQ(ierr);
249       }
250     } else {
251       ierr = MPI_Test(&barrier,&done,MPI_STATUS_IGNORE);CHKERRQ(ierr);
252     }
253   }
254   *nfrom = nrecvs;
255   ierr   = SegArrayExtract(&segrank,fromranks);CHKERRQ(ierr);
256   ierr   = SegArrayDestroy(&segrank);CHKERRQ(ierr);
257   ierr   = SegArrayExtract(&segdata,fromdata);CHKERRQ(ierr);
258   ierr   = SegArrayDestroy(&segdata);CHKERRQ(ierr);
259   PetscFunctionReturn(0);
260 }
261 #endif
262 
263 #undef __FUNCT__
264 #define __FUNCT__ "PetscCommBuildTwoSided_Allreduce"
265 static PetscErrorCode PetscCommBuildTwoSided_Allreduce(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscInt nto,const PetscMPIInt *toranks,const void *todata,PetscInt *nfrom,PetscMPIInt **fromranks,void *fromdata)
266 {
267   PetscErrorCode ierr;
268   PetscMPIInt    size,*iflags,nrecvs,tag,unitbytes,*franks;
269   PetscInt       i;
270   char           *tdata,*fdata;
271   MPI_Request    *reqs,*sendreqs;
272   MPI_Status     *statuses;
273 
274   PetscFunctionBegin;
275   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
276   ierr = PetscMalloc(size*sizeof(*iflags),&iflags);CHKERRQ(ierr);
277   ierr = PetscMemzero(iflags,size*sizeof(*iflags));CHKERRQ(ierr);
278   for (i=0; i<nto; i++) iflags[toranks[i]] = 1;
279   ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&nrecvs);CHKERRQ(ierr);
280   ierr = PetscFree(iflags);CHKERRQ(ierr);
281 
282   ierr     = PetscCommGetNewTag(comm,&tag);CHKERRQ(ierr);
283   ierr     = MPI_Type_size(dtype,&unitbytes);CHKERRQ(ierr);
284   ierr     = PetscMalloc(nrecvs*count*unitbytes,&fdata);CHKERRQ(ierr);
285   tdata    = (char*)todata;
286   ierr     = PetscMalloc2(nto+nrecvs,MPI_Request,&reqs,nto+nrecvs,MPI_Status,&statuses);CHKERRQ(ierr);
287   sendreqs = reqs + nrecvs;
288   for (i=0; i<nrecvs; i++) {
289     ierr = MPI_Irecv((void*)(fdata+count*unitbytes*i),count,dtype,MPI_ANY_SOURCE,tag,comm,reqs+i);CHKERRQ(ierr);
290   }
291   for (i=0; i<nto; i++) {
292     ierr = MPI_Isend((void*)(tdata+count*unitbytes*i),count,dtype,toranks[i],tag,comm,sendreqs+i);CHKERRQ(ierr);
293   }
294   ierr = MPI_Waitall(nto+nrecvs,reqs,statuses);CHKERRQ(ierr);
295   ierr = PetscMalloc(nrecvs*sizeof(PetscMPIInt),&franks);CHKERRQ(ierr);
296   for (i=0; i<nrecvs; i++) franks[i] = statuses[i].MPI_SOURCE;
297   ierr = PetscFree2(reqs,statuses);CHKERRQ(ierr);
298 
299   *nfrom            = nrecvs;
300   *fromranks        = franks;
301   *(void**)fromdata = fdata;
302   PetscFunctionReturn(0);
303 }
304 
305 #undef __FUNCT__
306 #define __FUNCT__ "PetscCommBuildTwoSided"
307 /*@C
308    PetscCommBuildTwoSided - discovers communicating ranks given one-sided information, moving constant-sized data in the process (often message lengths)
309 
310    Collective on MPI_Comm
311 
312    Input Arguments:
313 +  comm - communicator
314 .  count - number of entries to send/receive (must match on all ranks)
315 .  dtype - datatype to send/receive from each rank (must match on all ranks)
316 .  nto - number of ranks to send data to
317 .  toranks - ranks to send to (array of length nto)
318 -  todata - data to send to each rank (packed)
319 
320    Output Arguments:
321 +  nfrom - number of ranks receiving messages from
322 .  fromranks - ranks receiving messages from (length nfrom; caller should PetscFree())
323 -  fromdata - packed data from each rank, each with count entries of type dtype (length nfrom, caller responsible for PetscFree())
324 
325    Level: developer
326 
327    Notes:
328    This memory-scalable interface is an alternative to calling PetscGatherNumberOfMessages() and
329    PetscGatherMessageLengths(), possibly with a subsequent round of communication to send other constant-size data.
330 
331    Basic data types as well as contiguous types are supported, but non-contiguous (e.g., strided) types are not.
332 
333    References:
334    The MPI_Ibarrier implementation uses the algorithm in
335    Hoefler, Siebert and Lumsdaine, Scalable communication protocols for dynamic sparse data exchange, 2010.
336 
337 .seealso: PetscGatherNumberOfMessages(), PetscGatherMessageLengths()
338 @*/
339 PetscErrorCode PetscCommBuildTwoSided(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscInt nto,const PetscMPIInt *toranks,const void *todata,PetscInt *nfrom,PetscMPIInt **fromranks,void *fromdata)
340 {
341   PetscErrorCode         ierr;
342   PetscBuildTwoSidedType buildtype = PETSC_BUILDTWOSIDED_NOTSET;
343 
344   PetscFunctionBegin;
345   ierr = PetscCommBuildTwoSidedGetType(comm,&buildtype);CHKERRQ(ierr);
346   switch (buildtype) {
347   case PETSC_BUILDTWOSIDED_IBARRIER:
348 #if defined(PETSC_HAVE_MPI_IBARRIER)
349     ierr = PetscCommBuildTwoSided_Ibarrier(comm,count,dtype,nto,toranks,todata,nfrom,fromranks,fromdata);CHKERRQ(ierr);
350 #else
351     SETERRQ(comm,PETSC_ERR_PLIB,"MPI implementation does not provide MPI_Ibarrier (part of MPI-3)");
352 #endif
353     break;
354   case PETSC_BUILDTWOSIDED_ALLREDUCE:
355     ierr = PetscCommBuildTwoSided_Allreduce(comm,count,dtype,nto,toranks,todata,nfrom,fromranks,fromdata);CHKERRQ(ierr);
356     break;
357   default: SETERRQ(comm,PETSC_ERR_PLIB,"Unknown method for building two-sided communication");
358   }
359   PetscFunctionReturn(0);
360 }
361