xref: /petsc/src/sys/utils/mpits.c (revision da80777bb9afa786145217fc25b54e33c3ebf541)
1 #include <petscsys.h>        /*I  "petscsys.h"  I*/
2 #include <stddef.h>
3 
4 typedef enum {BUILDTWOSIDED_NOTSET = -1,
5 #if defined(PETSC_HAVE_MPI_IBARRIER)
6               BUILDTWOSIDED_IBARRIER,
7 #endif
8               BUILDTWOSIDED_ALLREDUCE
9              } BuildTwoSidedType;
10 
11 static const char *const BuildTwoSidedTypes[] = {
12 #if defined(PETSC_HAVE_MPI_IBARRIER)
13   "IBARRIER",
14 #endif
15   "ALLREDUCE",
16   "BuildTwoSidedType",
17   "BUILDTWOSIDED_",
18   0
19 };
20 
21 static BuildTwoSidedType _twosided_type = BUILDTWOSIDED_NOTSET;
22 
23 #undef __FUNCT__
24 #define __FUNCT__ "PetscBuildTwoSidedGetType"
25 static PetscErrorCode PetscBuildTwoSidedGetType(BuildTwoSidedType *twosided)
26 {
27   PetscErrorCode ierr;
28 
29   PetscFunctionBegin;
30   if (_twosided_type == BUILDTWOSIDED_NOTSET) {
31 #if defined(PETSC_HAVE_MPI_IBARRIER)
32     _twosided_type = BUILDTWOSIDED_IBARRIER;
33 #else
34     _twosided_type = BUILDTWOSIDED_ALLREDUCE;
35 #endif
36     ierr = PetscOptionsGetEnum(PETSC_NULL,"-build_twosided",BuildTwoSidedTypes,(PetscEnum*)&_twosided_type,PETSC_NULL);CHKERRQ(ierr);
37   }
38   *twosided = _twosided_type;
39   PetscFunctionReturn(0);
40 }
41 
42 #if defined(PETSC_HAVE_MPI_IBARRIER)
43 /* Segmented (extendable) array implementation */
44 typedef struct _SegArray *SegArray;
45 struct _SegArray {
46   PetscInt unitbytes;
47   PetscInt alloc;
48   PetscInt used;
49   PetscInt tailused;
50   SegArray tail;
51   union {                       /* Dummy types to ensure alignment */
52     PetscReal dummy_real;
53     PetscInt dummy_int;
54     char array[1];
55   } u;
56 };
57 
58 #undef __FUNCT__
59 #define __FUNCT__ "SegArrayCreate"
60 static PetscErrorCode SegArrayCreate(PetscInt unitbytes,PetscInt expected,SegArray *seg)
61 {
62   PetscErrorCode ierr;
63 
64   PetscFunctionBegin;
65   ierr = PetscMalloc(offsetof(struct _SegArray,u)+expected*unitbytes,seg);CHKERRQ(ierr);
66   ierr = PetscMemzero(*seg,offsetof(struct _SegArray,u));CHKERRQ(ierr);
67   (*seg)->unitbytes = unitbytes;
68   (*seg)->alloc = expected;
69   PetscFunctionReturn(0);
70 }
71 
72 #undef __FUNCT__
73 #define __FUNCT__ "SegArrayAlloc_Private"
74 static PetscErrorCode SegArrayAlloc_Private(SegArray *seg,PetscInt count)
75 {
76   PetscErrorCode ierr;
77   SegArray newseg,s;
78   PetscInt alloc;
79 
80   PetscFunctionBegin;
81   s = *seg;
82   /* Grow at least fast enough to hold next item, like Fibonacci otherwise (up to 1MB chunks) */
83   alloc = PetscMax(s->used+count,PetscMin(1000000/s->unitbytes+1,s->alloc+s->tailused));
84   ierr = PetscMalloc(offsetof(struct _SegArray,u)+alloc*s->unitbytes,&newseg);CHKERRQ(ierr);
85   ierr = PetscMemzero(newseg,offsetof(struct _SegArray,u));CHKERRQ(ierr);
86   newseg->unitbytes = s->unitbytes;
87   newseg->tailused = s->used + s->tailused;
88   newseg->tail = s;
89   newseg->alloc = alloc;
90   *seg = newseg;
91   PetscFunctionReturn(0);
92 }
93 
94 #undef __FUNCT__
95 #define __FUNCT__ "SegArrayGet"
96 static PetscErrorCode SegArrayGet(SegArray *seg,PetscInt count,void *array)
97 {
98   PetscErrorCode ierr;
99   SegArray s;
100 
101   PetscFunctionBegin;
102   s = *seg;
103   if (PetscUnlikely(s->used + count > s->alloc)) {ierr = SegArrayAlloc_Private(seg,count);CHKERRQ(ierr);}
104   s = *seg;
105   *(char**)array = &s->u.array[s->used*s->unitbytes];
106   s->used += count;
107   PetscFunctionReturn(0);
108 }
109 
110 #undef __FUNCT__
111 #define __FUNCT__ "SegArrayDestroy"
112 static PetscErrorCode SegArrayDestroy(SegArray *seg)
113 {
114   PetscErrorCode ierr;
115   SegArray s;
116 
117   PetscFunctionBegin;
118   for (s=*seg; s;) {
119     SegArray tail = s->tail;
120     ierr = PetscFree(s);CHKERRQ(ierr);
121     s = tail;
122   }
123   *seg = PETSC_NULL;
124   PetscFunctionReturn(0);
125 }
126 
127 #undef __FUNCT__
128 #define __FUNCT__ "SegArrayExtract"
129 /* Extracts contiguous data and resets segarray */
130 static PetscErrorCode SegArrayExtract(SegArray *seg,void *contiguous)
131 {
132   PetscErrorCode ierr;
133   PetscInt unitbytes;
134   SegArray s,t;
135   char *contig,*ptr;
136 
137   PetscFunctionBegin;
138   s = *seg;
139   unitbytes = s->unitbytes;
140   ierr = PetscMalloc((s->used+s->tailused)*unitbytes,&contig);CHKERRQ(ierr);
141   ptr = contig + s->tailused*unitbytes;
142   ierr = PetscMemcpy(ptr,s->u.array,s->used*unitbytes);CHKERRQ(ierr);
143   for (t=s->tail; t;) {
144     SegArray tail = t->tail;
145     ptr -= t->used*unitbytes;
146     ierr = PetscMemcpy(ptr,t->u.array,t->used*unitbytes);CHKERRQ(ierr);
147     ierr = PetscFree(t);CHKERRQ(ierr);
148     t = tail;
149   }
150   if (ptr != contig) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Tail count does not match");
151   s->tailused = 0;
152   s->tail = PETSC_NULL;
153   *(char**)contiguous = contig;
154   PetscFunctionReturn(0);
155 }
156 
157 #undef __FUNCT__
158 #define __FUNCT__ "BuildTwoSided_Ibarrier"
159 static PetscErrorCode BuildTwoSided_Ibarrier(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscInt nto,const PetscMPIInt *toranks,const void *todata,PetscInt *nfrom,PetscMPIInt **fromranks,void *fromdata)
160 {
161   PetscErrorCode ierr;
162   PetscMPIInt    nrecvs,tag,unitbytes,done;
163   PetscInt       i;
164   char           *tdata;
165   MPI_Request    *sendreqs,barrier;
166   SegArray       segrank,segdata;
167 
168   PetscFunctionBegin;
169   ierr = PetscCommGetNewTag(comm,&tag);CHKERRQ(ierr);
170   ierr = MPI_Type_size(dtype,&unitbytes);CHKERRQ(ierr);
171   tdata = (char*)todata;
172   ierr = PetscMalloc(nto*sizeof(MPI_Request),&sendreqs);CHKERRQ(ierr);
173   for (i=0; i<nto; i++) {
174     ierr = MPI_Issend((void*)(tdata+count*unitbytes*i),count,dtype,toranks[i],tag,comm,sendreqs+i);CHKERRQ(ierr);
175   }
176   ierr = SegArrayCreate(sizeof(PetscMPIInt),4,&segrank);CHKERRQ(ierr);
177   ierr = SegArrayCreate(unitbytes,4*count,&segdata);CHKERRQ(ierr);
178 
179   nrecvs = 0;
180   barrier = MPI_REQUEST_NULL;
181   for (done=0; !done;) {
182     PetscMPIInt flag;
183     MPI_Status status;
184     ierr = MPI_Iprobe(MPI_ANY_SOURCE,tag,comm,&flag,&status);CHKERRQ(ierr);
185     if (flag) {                 /* incoming message */
186       PetscMPIInt *recvrank;
187       void *buf;
188       ierr = SegArrayGet(&segrank,1,&recvrank);CHKERRQ(ierr);
189       ierr = SegArrayGet(&segdata,count,&buf);CHKERRQ(ierr);
190       *recvrank = status.MPI_SOURCE;
191       ierr = MPI_Recv(buf,count,dtype,status.MPI_SOURCE,tag,comm,MPI_STATUS_IGNORE);CHKERRQ(ierr);
192       nrecvs++;
193     }
194     if (barrier == MPI_REQUEST_NULL) {
195       PetscMPIInt sent,nsends = PetscMPIIntCast(nto);
196       ierr = MPI_Testall(nsends,sendreqs,&sent,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
197       if (sent) {
198         ierr = MPI_Ibarrier(comm,&barrier);CHKERRQ(ierr);
199         ierr = PetscFree(sendreqs);CHKERRQ(ierr);
200       }
201     } else {
202       ierr = MPI_Test(&barrier,&done,MPI_STATUS_IGNORE);CHKERRQ(ierr);
203     }
204   }
205   *nfrom = nrecvs;
206   ierr = SegArrayExtract(&segrank,fromranks);CHKERRQ(ierr);
207   ierr = SegArrayDestroy(&segrank);CHKERRQ(ierr);
208   ierr = SegArrayExtract(&segdata,fromdata);CHKERRQ(ierr);
209   ierr = SegArrayDestroy(&segdata);CHKERRQ(ierr);
210   PetscFunctionReturn(0);
211 }
212 #endif
213 
214 #undef __FUNCT__
215 #define __FUNCT__ "BuildTwoSided_Allreduce"
216 static PetscErrorCode BuildTwoSided_Allreduce(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscInt nto,const PetscMPIInt *toranks,const void *todata,PetscInt *nfrom,PetscMPIInt **fromranks,void *fromdata)
217 {
218   PetscErrorCode ierr;
219   PetscMPIInt    size,*iflags,nrecvs,tag,unitbytes,*franks;
220   PetscInt       i;
221   char           *tdata,*fdata;
222   MPI_Request    *reqs,*sendreqs;
223   MPI_Status     *statuses;
224 
225   PetscFunctionBegin;
226   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
227   ierr = PetscMalloc(size*sizeof(*iflags),&iflags);CHKERRQ(ierr);
228   ierr = PetscMemzero(iflags,size*sizeof(*iflags));CHKERRQ(ierr);
229   for (i=0; i<nto; i++) iflags[toranks[i]] = 1;
230   ierr = PetscGatherNumberOfMessages(comm,iflags,PETSC_NULL,&nrecvs);CHKERRQ(ierr);
231   ierr = PetscFree(iflags);CHKERRQ(ierr);
232 
233   ierr = PetscCommGetNewTag(comm,&tag);CHKERRQ(ierr);
234   ierr = MPI_Type_size(dtype,&unitbytes);CHKERRQ(ierr);
235   ierr = PetscMalloc(nrecvs*count*unitbytes,&fdata);CHKERRQ(ierr);
236   tdata = (char*)todata;
237   ierr = PetscMalloc2(nto+nrecvs,MPI_Request,&reqs,nto+nrecvs,MPI_Status,&statuses);CHKERRQ(ierr);
238   sendreqs = reqs + nrecvs;
239   for (i=0; i<nrecvs; i++) {
240     ierr = MPI_Irecv((void*)(fdata+count*unitbytes*i),count,dtype,MPI_ANY_SOURCE,tag,comm,reqs+i);CHKERRQ(ierr);
241   }
242   for (i=0; i<nto; i++) {
243     ierr = MPI_Isend((void*)(tdata+count*unitbytes*i),count,dtype,toranks[i],tag,comm,sendreqs+i);CHKERRQ(ierr);
244   }
245   ierr = MPI_Waitall(nto+nrecvs,reqs,statuses);CHKERRQ(ierr);
246   ierr = PetscMalloc(nrecvs*sizeof(PetscMPIInt),&franks);CHKERRQ(ierr);
247   for (i=0; i<nrecvs; i++) {
248     franks[i] = statuses[i].MPI_SOURCE;
249   }
250   ierr = PetscFree2(reqs,statuses);CHKERRQ(ierr);
251 
252   *nfrom = nrecvs;
253   *fromranks = franks;
254   *(void**)fromdata = fdata;
255   PetscFunctionReturn(0);
256 }
257 
258 #undef __FUNCT__
259 #define __FUNCT__ "PetscCommBuildTwoSided"
260 /*@C
261    PetscCommBuildTwoSided - discovers communicating ranks given one-sided information, moving constant-sized data in the process (often message lengths)
262 
263    Collective on MPI_Comm
264 
265    Input Arguments:
266 +  comm - communicator
267 .  count - number of entries to send/receive (must match on all ranks)
268 .  dtype - datatype to send/receive from each rank (must match on all ranks)
269 .  nto - number of ranks to send data to
270 .  toranks - ranks to send to (array of length nto)
271 -  todata - data to send to each rank (packed)
272 
273    Output Arguments:
274 +  nfrom - number of ranks receiving messages from
275 .  fromranks - ranks receiving messages from (length nfrom; caller should PetscFree())
276 -  fromdata - packed data from each rank, each with count entries of type dtype (length nfrom, caller responsible for PetscFree())
277 
278    Level: developer
279 
280    Notes:
281    This memory-scalable interface is an alternative to calling PetscGatherNumberOfMessages() and
282    PetscGatherMessageLengths(), possibly with a subsequent round of communication to send other constant-size data.
283 
284    Basic data types as well as contiguous types are supported, but non-contiguous (e.g., strided) types are not.
285 
286    References:
287    The MPI_Ibarrier implementation uses the algorithm in
288    Hoefler, Siebert and Lumsdaine, Scalable communication protocols for dynamic sparse data exchange, 2010.
289 
290 .seealso: PetscGatherNumberOfMessages(), PetscGatherMessageLengths()
291 @*/
292 PetscErrorCode PetscCommBuildTwoSided(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscInt nto,const PetscMPIInt *toranks,const void *todata,PetscInt *nfrom,PetscMPIInt **fromranks,void *fromdata)
293 {
294   PetscErrorCode ierr;
295   BuildTwoSidedType buildtype = BUILDTWOSIDED_NOTSET;
296 
297   PetscFunctionBegin;
298   ierr = PetscBuildTwoSidedGetType(&buildtype);CHKERRQ(ierr);
299   switch (buildtype) {
300 #if defined(PETSC_HAVE_MPI_IBARRIER)
301   case BUILDTWOSIDED_IBARRIER:
302     ierr = BuildTwoSided_Ibarrier(comm,count,dtype,nto,toranks,todata,nfrom,fromranks,fromdata);CHKERRQ(ierr);
303     break;
304 #endif
305   case BUILDTWOSIDED_ALLREDUCE:
306     ierr = BuildTwoSided_Allreduce(comm,count,dtype,nto,toranks,todata,nfrom,fromranks,fromdata);CHKERRQ(ierr);
307     break;
308   default: SETERRQ(comm,PETSC_ERR_PLIB,"Unknown method for building two-sided communication");
309   }
310   PetscFunctionReturn(0);
311 }
312