xref: /petsc/src/sys/utils/mpits.c (revision be33224567306cb44901cfc55a41c9126afd0e6e)
1 #include <petscsys.h>        /*I  "petscsys.h"  I*/
2 #include <stddef.h>
3 
4 typedef enum {BUILDTWOSIDED_NOTSET = -1,
5 #if defined(PETSC_HAVE_MPI_IBARRIER)
6               BUILDTWOSIDED_IBARRIER,
7 #endif
8               BUILDTWOSIDED_ALLREDUCE
9              } BuildTwoSidedType;
10 
11 static const char *const BuildTwoSidedTypes[] = {
12 #if defined(PETSC_HAVE_MPI_IBARRIER)
13   "IBARRIER",
14 #endif
15   "ALLREDUCE",
16   "BuildTwoSidedType",
17   "BUILDTWOSIDED_",
18   0
19 };
20 
21 static BuildTwoSidedType _twosided_type = BUILDTWOSIDED_NOTSET;
22 
23 #undef __FUNCT__
24 #define __FUNCT__ "PetscBuildTwoSidedGetType"
25 static PetscErrorCode PetscBuildTwoSidedGetType(BuildTwoSidedType *twosided)
26 {
27   PetscErrorCode ierr;
28 
29   PetscFunctionBegin;
30   *twosided = BUILDTWOSIDED_NOTSET;
31   if (_twosided_type == BUILDTWOSIDED_NOTSET) {
32 #if defined(PETSC_HAVE_MPI_IBARRIER)
33     _twosided_type = BUILDTWOSIDED_IBARRIER;
34 #else
35     _twosided_type = BUILDTWOSIDED_ALLREDUCE;
36 #endif
37     ierr = PetscOptionsGetEnum(PETSC_NULL,"-build_twosided",BuildTwoSidedTypes,(PetscEnum*)&_twosided_type,PETSC_NULL);CHKERRQ(ierr);
38   }
39   *twosided = _twosided_type;
40   PetscFunctionReturn(0);
41 }
42 
43 #if defined(PETSC_HAVE_MPI_IBARRIER)
44 /* Segmented (extendable) array implementation */
45 typedef struct _SegArray *SegArray;
46 struct _SegArray {
47   PetscInt unitbytes;
48   PetscInt alloc;
49   PetscInt used;
50   PetscInt tailused;
51   SegArray tail;
52   union {                       /* Dummy types to ensure alignment */
53     PetscReal dummy_real;
54     PetscInt dummy_int;
55     char array[1];
56   } u;
57 };
58 
59 #undef __FUNCT__
60 #define __FUNCT__ "SegArrayCreate"
61 static PetscErrorCode SegArrayCreate(PetscInt unitbytes,PetscInt expected,SegArray *seg)
62 {
63   PetscErrorCode ierr;
64 
65   PetscFunctionBegin;
66   ierr = PetscMalloc(offsetof(struct _SegArray,u)+expected*unitbytes,seg);CHKERRQ(ierr);
67   ierr = PetscMemzero(*seg,offsetof(struct _SegArray,u));CHKERRQ(ierr);
68   (*seg)->unitbytes = unitbytes;
69   (*seg)->alloc = expected;
70   PetscFunctionReturn(0);
71 }
72 
73 #undef __FUNCT__
74 #define __FUNCT__ "SegArrayAlloc_Private"
75 static PetscErrorCode SegArrayAlloc_Private(SegArray *seg,PetscInt count)
76 {
77   PetscErrorCode ierr;
78   SegArray newseg,s;
79   PetscInt alloc;
80 
81   PetscFunctionBegin;
82   s = *seg;
83   /* Grow at least fast enough to hold next item, like Fibonacci otherwise (up to 1MB chunks) */
84   alloc = PetscMax(s->used+count,PetscMin(1000000/s->unitbytes+1,s->alloc+s->tailused));
85   ierr = PetscMalloc(offsetof(struct _SegArray,u)+alloc*s->unitbytes,&newseg);CHKERRQ(ierr);
86   ierr = PetscMemzero(newseg,offsetof(struct _SegArray,u));CHKERRQ(ierr);
87   newseg->unitbytes = s->unitbytes;
88   newseg->tailused = s->used + s->tailused;
89   newseg->tail = s;
90   newseg->alloc = alloc;
91   *seg = newseg;
92   PetscFunctionReturn(0);
93 }
94 
95 #undef __FUNCT__
96 #define __FUNCT__ "SegArrayGet"
97 static PetscErrorCode SegArrayGet(SegArray *seg,PetscInt count,void *array)
98 {
99   PetscErrorCode ierr;
100   SegArray s;
101 
102   PetscFunctionBegin;
103   s = *seg;
104   if (PetscUnlikely(s->used + count > s->alloc)) {ierr = SegArrayAlloc_Private(seg,count);CHKERRQ(ierr);}
105   s = *seg;
106   *(char**)array = &s->u.array[s->used*s->unitbytes];
107   s->used += count;
108   PetscFunctionReturn(0);
109 }
110 
111 #undef __FUNCT__
112 #define __FUNCT__ "SegArrayDestroy"
113 static PetscErrorCode SegArrayDestroy(SegArray *seg)
114 {
115   PetscErrorCode ierr;
116   SegArray s;
117 
118   PetscFunctionBegin;
119   for (s=*seg; s;) {
120     SegArray tail = s->tail;
121     ierr = PetscFree(s);CHKERRQ(ierr);
122     s = tail;
123   }
124   *seg = PETSC_NULL;
125   PetscFunctionReturn(0);
126 }
127 
128 #undef __FUNCT__
129 #define __FUNCT__ "SegArrayExtract"
130 /* Extracts contiguous data and resets segarray */
131 static PetscErrorCode SegArrayExtract(SegArray *seg,void *contiguous)
132 {
133   PetscErrorCode ierr;
134   PetscInt unitbytes;
135   SegArray s,t;
136   char *contig,*ptr;
137 
138   PetscFunctionBegin;
139   s = *seg;
140   unitbytes = s->unitbytes;
141   ierr = PetscMalloc((s->used+s->tailused)*unitbytes,&contig);CHKERRQ(ierr);
142   ptr = contig + s->tailused*unitbytes;
143   ierr = PetscMemcpy(ptr,s->u.array,s->used*unitbytes);CHKERRQ(ierr);
144   for (t=s->tail; t;) {
145     SegArray tail = t->tail;
146     ptr -= t->used*unitbytes;
147     ierr = PetscMemcpy(ptr,t->u.array,t->used*unitbytes);CHKERRQ(ierr);
148     ierr = PetscFree(t);CHKERRQ(ierr);
149     t = tail;
150   }
151   if (ptr != contig) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Tail count does not match");
152   s->tailused = 0;
153   s->tail = PETSC_NULL;
154   *(char**)contiguous = contig;
155   PetscFunctionReturn(0);
156 }
157 
158 #undef __FUNCT__
159 #define __FUNCT__ "BuildTwoSided_Ibarrier"
160 static PetscErrorCode BuildTwoSided_Ibarrier(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscInt nto,const PetscMPIInt *toranks,const void *todata,PetscInt *nfrom,PetscMPIInt **fromranks,void *fromdata)
161 {
162   PetscErrorCode ierr;
163   PetscMPIInt    nrecvs,tag,unitbytes,done;
164   PetscInt       i;
165   char           *tdata;
166   MPI_Request    *sendreqs,barrier;
167   SegArray       segrank,segdata;
168 
169   PetscFunctionBegin;
170   ierr = PetscCommGetNewTag(comm,&tag);CHKERRQ(ierr);
171   ierr = MPI_Type_size(dtype,&unitbytes);CHKERRQ(ierr);
172   tdata = (char*)todata;
173   ierr = PetscMalloc(nto*sizeof(MPI_Request),&sendreqs);CHKERRQ(ierr);
174   for (i=0; i<nto; i++) {
175     ierr = MPI_Issend((void*)(tdata+count*unitbytes*i),count,dtype,toranks[i],tag,comm,sendreqs+i);CHKERRQ(ierr);
176   }
177   ierr = SegArrayCreate(sizeof(PetscMPIInt),4,&segrank);CHKERRQ(ierr);
178   ierr = SegArrayCreate(unitbytes,4*count,&segdata);CHKERRQ(ierr);
179 
180   nrecvs = 0;
181   barrier = MPI_REQUEST_NULL;
182   for (done=0; !done;) {
183     PetscMPIInt flag;
184     MPI_Status status;
185     ierr = MPI_Iprobe(MPI_ANY_SOURCE,tag,comm,&flag,&status);CHKERRQ(ierr);
186     if (flag) {                 /* incoming message */
187       PetscMPIInt *recvrank;
188       void *buf;
189       ierr = SegArrayGet(&segrank,1,&recvrank);CHKERRQ(ierr);
190       ierr = SegArrayGet(&segdata,count,&buf);CHKERRQ(ierr);
191       *recvrank = status.MPI_SOURCE;
192       ierr = MPI_Recv(buf,count,dtype,status.MPI_SOURCE,tag,comm,MPI_STATUS_IGNORE);CHKERRQ(ierr);
193       nrecvs++;
194     }
195     if (barrier == MPI_REQUEST_NULL) {
196       PetscMPIInt sent,nsends = PetscMPIIntCast(nto);
197       ierr = MPI_Testall(nsends,sendreqs,&sent,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
198       if (sent) {
199         ierr = MPI_Ibarrier(comm,&barrier);CHKERRQ(ierr);
200         ierr = PetscFree(sendreqs);CHKERRQ(ierr);
201       }
202     } else {
203       ierr = MPI_Test(&barrier,&done,MPI_STATUS_IGNORE);CHKERRQ(ierr);
204     }
205   }
206   *nfrom = nrecvs;
207   ierr = SegArrayExtract(&segrank,fromranks);CHKERRQ(ierr);
208   ierr = SegArrayDestroy(&segrank);CHKERRQ(ierr);
209   ierr = SegArrayExtract(&segdata,fromdata);CHKERRQ(ierr);
210   ierr = SegArrayDestroy(&segdata);CHKERRQ(ierr);
211   PetscFunctionReturn(0);
212 }
213 #endif
214 
215 #undef __FUNCT__
216 #define __FUNCT__ "BuildTwoSided_Allreduce"
217 static PetscErrorCode BuildTwoSided_Allreduce(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscInt nto,const PetscMPIInt *toranks,const void *todata,PetscInt *nfrom,PetscMPIInt **fromranks,void *fromdata)
218 {
219   PetscErrorCode ierr;
220   PetscMPIInt    size,*iflags,nrecvs,tag,unitbytes,*franks;
221   PetscInt       i;
222   char           *tdata,*fdata;
223   MPI_Request    *reqs,*sendreqs;
224   MPI_Status     *statuses;
225 
226   PetscFunctionBegin;
227   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
228   ierr = PetscMalloc(size*sizeof(*iflags),&iflags);CHKERRQ(ierr);
229   ierr = PetscMemzero(iflags,size*sizeof(*iflags));CHKERRQ(ierr);
230   for (i=0; i<nto; i++) iflags[toranks[i]] = 1;
231   ierr = PetscGatherNumberOfMessages(comm,iflags,PETSC_NULL,&nrecvs);CHKERRQ(ierr);
232   ierr = PetscFree(iflags);CHKERRQ(ierr);
233 
234   ierr = PetscCommGetNewTag(comm,&tag);CHKERRQ(ierr);
235   ierr = MPI_Type_size(dtype,&unitbytes);CHKERRQ(ierr);
236   ierr = PetscMalloc(nrecvs*count*unitbytes,&fdata);CHKERRQ(ierr);
237   tdata = (char*)todata;
238   ierr = PetscMalloc2(nto+nrecvs,MPI_Request,&reqs,nto+nrecvs,MPI_Status,&statuses);CHKERRQ(ierr);
239   sendreqs = reqs + nrecvs;
240   for (i=0; i<nrecvs; i++) {
241     ierr = MPI_Irecv((void*)(fdata+count*unitbytes*i),count,dtype,MPI_ANY_SOURCE,tag,comm,reqs+i);CHKERRQ(ierr);
242   }
243   for (i=0; i<nto; i++) {
244     ierr = MPI_Isend((void*)(tdata+count*unitbytes*i),count,dtype,toranks[i],tag,comm,sendreqs+i);CHKERRQ(ierr);
245   }
246   ierr = MPI_Waitall(nto+nrecvs,reqs,statuses);CHKERRQ(ierr);
247   ierr = PetscMalloc(nrecvs*sizeof(PetscMPIInt),&franks);CHKERRQ(ierr);
248   for (i=0; i<nrecvs; i++) {
249     franks[i] = statuses[i].MPI_SOURCE;
250   }
251   ierr = PetscFree2(reqs,statuses);CHKERRQ(ierr);
252 
253   *nfrom = nrecvs;
254   *fromranks = franks;
255   *(void**)fromdata = fdata;
256   PetscFunctionReturn(0);
257 }
258 
259 #undef __FUNCT__
260 #define __FUNCT__ "PetscCommBuildTwoSided"
261 /*@C
262    PetscCommBuildTwoSided - discovers communicating ranks given one-sided information, moving constant-sized data in the process (often message lengths)
263 
264    Collective on MPI_Comm
265 
266    Input Arguments:
267 +  comm - communicator
268 .  count - number of entries to send/receive (must match on all ranks)
269 .  dtype - datatype to send/receive from each rank (must match on all ranks)
270 .  nto - number of ranks to send data to
271 .  toranks - ranks to send to (array of length nto)
272 -  todata - data to send to each rank (packed)
273 
274    Output Arguments:
275 +  nfrom - number of ranks receiving messages from
276 .  fromranks - ranks receiving messages from (length nfrom; caller should PetscFree())
277 -  fromdata - packed data from each rank, each with count entries of type dtype (length nfrom, caller responsible for PetscFree())
278 
279    Level: developer
280 
281    Notes:
282    This memory-scalable interface is an alternative to calling PetscGatherNumberOfMessages() and
283    PetscGatherMessageLengths(), possibly with a subsequent round of communication to send other constant-size data.
284 
285    Basic data types as well as contiguous types are supported, but non-contiguous (e.g., strided) types are not.
286 
287    References:
288    The MPI_Ibarrier implementation uses the algorithm in
289    Hoefler, Siebert and Lumsdaine, Scalable communication protocols for dynamic sparse data exchange, 2010.
290 
291 .seealso: PetscGatherNumberOfMessages(), PetscGatherMessageLengths()
292 @*/
293 PetscErrorCode PetscCommBuildTwoSided(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscInt nto,const PetscMPIInt *toranks,const void *todata,PetscInt *nfrom,PetscMPIInt **fromranks,void *fromdata)
294 {
295   PetscErrorCode ierr;
296   BuildTwoSidedType buildtype = BUILDTWOSIDED_NOTSET;
297 
298   PetscFunctionBegin;
299   ierr = PetscBuildTwoSidedGetType(&buildtype);CHKERRQ(ierr);
300   switch (buildtype) {
301 #if defined(PETSC_HAVE_MPI_IBARRIER)
302   case BUILDTWOSIDED_IBARRIER:
303     ierr = BuildTwoSided_Ibarrier(comm,count,dtype,nto,toranks,todata,nfrom,fromranks,fromdata);CHKERRQ(ierr);
304     break;
305 #endif
306   case BUILDTWOSIDED_ALLREDUCE:
307     ierr = BuildTwoSided_Allreduce(comm,count,dtype,nto,toranks,todata,nfrom,fromranks,fromdata);CHKERRQ(ierr);
308     break;
309   default: SETERRQ(comm,PETSC_ERR_PLIB,"Unknown method for building two-sided communication");
310   }
311   PetscFunctionReturn(0);
312 }
313