xref: /petsc/src/sys/utils/mpits.c (revision db4deed73f06ae556a0f2e02a0fd6e016e156849)
1 #include <petscsys.h>        /*I  "petscsys.h"  I*/
2 #include <stddef.h>
3 
4 typedef enum {BUILDTWOSIDED_NOTSET = -1,
5 #if defined(PETSC_HAVE_MPI_IBARRIER)
6               BUILDTWOSIDED_IBARRIER,
7 #endif
8               BUILDTWOSIDED_ALLREDUCE} BuildTwoSidedType;
9 
10 static const char *const BuildTwoSidedTypes[] = {
11 #if defined(PETSC_HAVE_MPI_IBARRIER)
12   "IBARRIER",
13 #endif
14   "ALLREDUCE",
15   "BuildTwoSidedType",
16   "BUILDTWOSIDED_",
17   0
18 };
19 
20 static BuildTwoSidedType _twosided_type = BUILDTWOSIDED_NOTSET;
21 
22 #undef __FUNCT__
23 #define __FUNCT__ "PetscBuildTwoSidedGetType"
24 static PetscErrorCode PetscBuildTwoSidedGetType(BuildTwoSidedType *twosided)
25 {
26   PetscErrorCode ierr;
27 
28   PetscFunctionBegin;
29   if (_twosided_type == BUILDTWOSIDED_NOTSET) {
30 #if defined(PETSC_HAVE_MPI_IBARRIER)
31     _twosided_type = BUILDTWOSIDED_IBARRIER;
32 #else
33     _twosided_type = BUILDTWOSIDED_ALLREDUCE;
34 #endif
35     ierr = PetscOptionsGetEnum(PETSC_NULL,"-build_twosided",BuildTwoSidedTypes,(PetscEnum*)&_twosided_type,PETSC_NULL);CHKERRQ(ierr);
36   }
37   *twosided = _twosided_type;
38   PetscFunctionReturn(0);
39 }
40 
41 #if defined(PETSC_HAVE_MPI_IBARRIER)
42 /* Segmented (extendable) array implementation */
43 typedef struct _SegArray *SegArray;
44 struct _SegArray {
45   PetscInt unitbytes;
46   PetscInt alloc;
47   PetscInt used;
48   PetscInt tailused;
49   SegArray tail;
50   union {                       /* Dummy types to ensure alignment */
51     PetscReal dummy_real;
52     PetscInt dummy_int;
53     char array[1];
54   } u;
55 };
56 
57 #undef __FUNCT__
58 #define __FUNCT__ "SegArrayCreate"
59 static PetscErrorCode SegArrayCreate(PetscInt unitbytes,PetscInt expected,SegArray *seg)
60 {
61   PetscErrorCode ierr;
62 
63   PetscFunctionBegin;
64   ierr = PetscMalloc(offsetof(struct _SegArray,u)+expected*unitbytes,seg);CHKERRQ(ierr);
65   ierr = PetscMemzero(*seg,offsetof(struct _SegArray,u));CHKERRQ(ierr);
66   (*seg)->unitbytes = unitbytes;
67   (*seg)->alloc = expected;
68   PetscFunctionReturn(0);
69 }
70 
71 #undef __FUNCT__
72 #define __FUNCT__ "SegArrayAlloc_Private"
73 static PetscErrorCode SegArrayAlloc_Private(SegArray *seg,PetscInt count)
74 {
75   PetscErrorCode ierr;
76   SegArray newseg,s;
77   PetscInt alloc;
78 
79   PetscFunctionBegin;
80   s = *seg;
81   /* Grow at least fast enough to hold next item, like Fibonacci otherwise (up to 1MB chunks) */
82   alloc = PetscMax(s->used+count,PetscMin(1000000/s->unitbytes+1,s->alloc+s->tailused));
83   ierr = PetscMalloc(offsetof(struct _SegArray,u)+alloc*s->unitbytes,&newseg);CHKERRQ(ierr);
84   ierr = PetscMemzero(newseg,offsetof(struct _SegArray,u));CHKERRQ(ierr);
85   newseg->unitbytes = s->unitbytes;
86   newseg->tailused = s->used + s->tailused;
87   newseg->tail = s;
88   newseg->alloc = alloc;
89   *seg = newseg;
90   PetscFunctionReturn(0);
91 }
92 
93 #undef __FUNCT__
94 #define __FUNCT__ "SegArrayGet"
95 static PetscErrorCode SegArrayGet(SegArray *seg,PetscInt count,void *array)
96 {
97   PetscErrorCode ierr;
98   SegArray s;
99 
100   PetscFunctionBegin;
101   s = *seg;
102   if (PetscUnlikely(s->used + count > s->alloc)) {ierr = SegArrayAlloc_Private(seg,count);CHKERRQ(ierr);}
103   s = *seg;
104   *(char**)array = &s->u.array[s->used*s->unitbytes];
105   s->used += count;
106   PetscFunctionReturn(0);
107 }
108 
109 #undef __FUNCT__
110 #define __FUNCT__ "SegArrayDestroy"
111 static PetscErrorCode SegArrayDestroy(SegArray *seg)
112 {
113   PetscErrorCode ierr;
114   SegArray s;
115 
116   PetscFunctionBegin;
117   for (s=*seg; s; ) {
118     SegArray tail = s->tail;
119     ierr = PetscFree(s);CHKERRQ(ierr);
120     s = tail;
121   }
122   *seg = PETSC_NULL;
123   PetscFunctionReturn(0);
124 }
125 
126 #undef __FUNCT__
127 #define __FUNCT__ "SegArrayExtract"
128 /* Extracts contiguous data and resets segarray */
129 static PetscErrorCode SegArrayExtract(SegArray *seg,void *contiguous)
130 {
131   PetscErrorCode ierr;
132   PetscInt unitbytes;
133   SegArray s,t;
134   char *contig,*ptr;
135 
136   PetscFunctionBegin;
137   s = *seg;
138   unitbytes = s->unitbytes;
139   ierr = PetscMalloc((s->used+s->tailused)*unitbytes,&contig);CHKERRQ(ierr);
140   ptr = contig + s->tailused*unitbytes;
141   ierr = PetscMemcpy(ptr,s->u.array,s->used*unitbytes);CHKERRQ(ierr);
142   for (t=s->tail; t; ) {
143     SegArray tail = t->tail;
144     ptr -= t->used*unitbytes;
145     ierr = PetscMemcpy(ptr,t->u.array,t->used*unitbytes);CHKERRQ(ierr);
146     ierr = PetscFree(t);CHKERRQ(ierr);
147     t = tail;
148   }
149   if (ptr != contig) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Tail count does not match");
150   s->tailused = 0;
151   s->tail = PETSC_NULL;
152   *(char**)contiguous = contig;
153   PetscFunctionReturn(0);
154 }
155 
156 #undef __FUNCT__
157 #define __FUNCT__ "BuildTwoSided_Ibarrier"
158 static PetscErrorCode BuildTwoSided_Ibarrier(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscInt nto,const PetscMPIInt *toranks,const void *todata,PetscInt *nfrom,PetscMPIInt **fromranks,void *fromdata)
159 {
160   PetscErrorCode ierr;
161   PetscMPIInt    nrecvs,tag,unitbytes,done;
162   PetscInt       i;
163   char           *tdata;
164   MPI_Request    *sendreqs,barrier;
165   SegArray       segrank,segdata;
166 
167   PetscFunctionBegin;
168   ierr = PetscCommGetNewTag(comm,&tag);CHKERRQ(ierr);
169   ierr = MPI_Type_size(dtype,&unitbytes);CHKERRQ(ierr);
170   tdata = (char*)todata;
171   ierr = PetscMalloc(nto*sizeof(MPI_Request),&sendreqs);CHKERRQ(ierr);
172   for (i=0; i<nto; i++) {
173     ierr = MPI_Issend((void*)(tdata+count*unitbytes*i),count,dtype,toranks[i],tag,comm,sendreqs+i);CHKERRQ(ierr);
174   }
175   ierr = SegArrayCreate(sizeof(PetscMPIInt),4,&segrank);CHKERRQ(ierr);
176   ierr = SegArrayCreate(unitbytes,4*count,&segdata);CHKERRQ(ierr);
177 
178   nrecvs = 0;
179   barrier = MPI_REQUEST_NULL;
180   for (done=0; !done; ) {
181     PetscMPIInt flag;
182     MPI_Status status;
183     ierr = MPI_Iprobe(MPI_ANY_SOURCE,tag,comm,&flag,&status);CHKERRQ(ierr);
184     if (flag) {                 /* incoming message */
185       PetscMPIInt *recvrank;
186       void *buf;
187       ierr = SegArrayGet(&segrank,1,&recvrank);CHKERRQ(ierr);
188       ierr = SegArrayGet(&segdata,count,&buf);CHKERRQ(ierr);
189       *recvrank = status.MPI_SOURCE;
190       ierr = MPI_Recv(buf,count,dtype,status.MPI_SOURCE,tag,comm,MPI_STATUS_IGNORE);CHKERRQ(ierr);
191       nrecvs++;
192     }
193     if (barrier == MPI_REQUEST_NULL) {
194       PetscMPIInt sent,nsends = PetscMPIIntCast(nto);
195       ierr = MPI_Testall(nsends,sendreqs,&sent,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
196       if (sent) {
197         ierr = MPI_Ibarrier(comm,&barrier);CHKERRQ(ierr);
198         ierr = PetscFree(sendreqs);CHKERRQ(ierr);
199       }
200     } else {
201       ierr = MPI_Test(&barrier,&done,MPI_STATUS_IGNORE);CHKERRQ(ierr);
202     }
203   }
204   *nfrom = nrecvs;
205   ierr = SegArrayExtract(&segrank,fromranks);CHKERRQ(ierr);
206   ierr = SegArrayDestroy(&segrank);CHKERRQ(ierr);
207   ierr = SegArrayExtract(&segdata,fromdata);CHKERRQ(ierr);
208   ierr = SegArrayDestroy(&segdata);CHKERRQ(ierr);
209   PetscFunctionReturn(0);
210 }
211 #endif
212 
213 #undef __FUNCT__
214 #define __FUNCT__ "BuildTwoSided_Allreduce"
215 static PetscErrorCode BuildTwoSided_Allreduce(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscInt nto,const PetscMPIInt *toranks,const void *todata,PetscInt *nfrom,PetscMPIInt **fromranks,void *fromdata)
216 {
217   PetscErrorCode ierr;
218   PetscMPIInt    size,*iflags,nrecvs,tag,unitbytes,*franks;
219   PetscInt       i;
220   char           *tdata,*fdata;
221   MPI_Request    *reqs,*sendreqs;
222   MPI_Status     *statuses;
223 
224   PetscFunctionBegin;
225   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
226   ierr = PetscMalloc(size*sizeof(*iflags),&iflags);CHKERRQ(ierr);
227   ierr = PetscMemzero(iflags,size*sizeof(*iflags));CHKERRQ(ierr);
228   for (i=0; i<nto; i++) iflags[toranks[i]] = 1;
229   ierr = PetscGatherNumberOfMessages(comm,iflags,PETSC_NULL,&nrecvs);CHKERRQ(ierr);
230   ierr = PetscFree(iflags);CHKERRQ(ierr);
231 
232   ierr = PetscCommGetNewTag(comm,&tag);CHKERRQ(ierr);
233   ierr = MPI_Type_size(dtype,&unitbytes);CHKERRQ(ierr);
234   ierr = PetscMalloc(nrecvs*count*unitbytes,&fdata);CHKERRQ(ierr);
235   tdata = (char*)todata;
236   ierr = PetscMalloc2(nto+nrecvs,MPI_Request,&reqs,nto+nrecvs,MPI_Status,&statuses);CHKERRQ(ierr);
237   sendreqs = reqs + nrecvs;
238   for (i=0; i<nrecvs; i++) {
239     ierr = MPI_Irecv((void*)(fdata+count*unitbytes*i),count,dtype,MPI_ANY_SOURCE,tag,comm,reqs+i);CHKERRQ(ierr);
240   }
241   for (i=0; i<nto; i++) {
242     ierr = MPI_Isend((void*)(tdata+count*unitbytes*i),count,dtype,toranks[i],tag,comm,sendreqs+i);CHKERRQ(ierr);
243   }
244   ierr = MPI_Waitall(nto+nrecvs,reqs,statuses);CHKERRQ(ierr);
245   ierr = PetscMalloc(nrecvs*sizeof(PetscMPIInt),&franks);CHKERRQ(ierr);
246   for (i=0; i<nrecvs; i++) {
247     franks[i] = statuses[i].MPI_SOURCE;
248   }
249   ierr = PetscFree2(reqs,statuses);CHKERRQ(ierr);
250 
251   *nfrom = nrecvs;
252   *fromranks = franks;
253   *(void**)fromdata = fdata;
254   PetscFunctionReturn(0);
255 }
256 
257 #undef __FUNCT__
258 #define __FUNCT__ "PetscCommBuildTwoSided"
259 /*@C
260    PetscCommBuildTwoSided - discovers communicating ranks given one-sided information, moving constant-sized data in the process (often message lengths)
261 
262    Collective on MPI_Comm
263 
264    Input Arguments:
265 +  comm - communicator
266 .  count - number of entries to send/receive (must match on all ranks)
267 .  dtype - datatype to send/receive from each rank (must match on all ranks)
268 .  nto - number of ranks to send data to
269 .  toranks - ranks to send to (array of length nto)
270 -  todata - data to send to each rank (packed)
271 
272    Output Arguments:
273 +  nfrom - number of ranks receiving messages from
274 .  fromranks - ranks receiving messages from (length nfrom; caller should PetscFree())
275 -  fromdata - packed data from each rank, each with count entries of type dtype (length nfrom, caller responsible for PetscFree())
276 
277    Level: developer
278 
279    Notes:
280    This memory-scalable interface is an alternative to calling PetscGatherNumberOfMessages() and
281    PetscGatherMessageLengths(), possibly with a subsequent round of communication to send other constant-size data.
282 
283    Basic data types as well as contiguous types are supported, but non-contiguous (e.g., strided) types are not.
284 
285    References:
286    The MPI_Ibarrier implementation uses the algorithm in
287    Hoefler, Siebert and Lumsdaine, Scalable communication protocols for dynamic sparse data exchange, 2010.
288 
289 .seealso: PetscGatherNumberOfMessages(), PetscGatherMessageLengths()
290 @*/
291 PetscErrorCode PetscCommBuildTwoSided(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscInt nto,const PetscMPIInt *toranks,const void *todata,PetscInt *nfrom,PetscMPIInt **fromranks,void *fromdata)
292 {
293   PetscErrorCode ierr;
294   BuildTwoSidedType buildtype;
295 
296   PetscFunctionBegin;
297   ierr = PetscBuildTwoSidedGetType(&buildtype);CHKERRQ(ierr);
298   switch (buildtype) {
299 #if defined(PETSC_HAVE_MPI_IBARRIER)
300   case BUILDTWOSIDED_IBARRIER:
301     ierr = BuildTwoSided_Ibarrier(comm,count,dtype,nto,toranks,todata,nfrom,fromranks,fromdata);CHKERRQ(ierr);
302     break;
303 #endif
304   case BUILDTWOSIDED_ALLREDUCE:
305     ierr = BuildTwoSided_Allreduce(comm,count,dtype,nto,toranks,todata,nfrom,fromranks,fromdata);CHKERRQ(ierr);
306     break;
307   default: SETERRQ(comm,PETSC_ERR_PLIB,"Unknown method for building two-sided communication");
308   }
309   PetscFunctionReturn(0);
310 }
311