xref: /petsc/src/sys/utils/mpits.c (revision cf4b5b4f730fdcaf80c8091250f26f4f74cb46c1)
1 #include <petscsys.h>        /*I  "petscsys.h"  I*/
2 
3 PetscLogEvent PETSC_BuildTwoSided;
4 
5 const char *const PetscBuildTwoSidedTypes[] = {
6   "ALLREDUCE",
7   "IBARRIER",
8   "REDSCATTER",
9   "PetscBuildTwoSidedType",
10   "PETSC_BUILDTWOSIDED_",
11   0
12 };
13 
14 static PetscBuildTwoSidedType _twosided_type = PETSC_BUILDTWOSIDED_NOTSET;
15 
16 #undef __FUNCT__
17 #define __FUNCT__ "PetscCommBuildTwoSidedSetType"
18 /*@
19    PetscCommBuildTwoSidedSetType - set algorithm to use when building two-sided communication
20 
21    Logically Collective
22 
23    Input Arguments:
24 +  comm - PETSC_COMM_WORLD
25 -  twosided - algorithm to use in subsequent calls to PetscCommBuildTwoSided()
26 
27    Level: developer
28 
29    Note:
30    This option is currently global, but could be made per-communicator.
31 
32 .seealso: PetscCommBuildTwoSided(), PetscCommBuildTwoSidedGetType()
33 @*/
34 PetscErrorCode PetscCommBuildTwoSidedSetType(MPI_Comm comm,PetscBuildTwoSidedType twosided)
35 {
36   PetscFunctionBegin;
37 #if defined(PETSC_USE_DEBUG)
38   {                             /* We don't have a PetscObject so can't use PetscValidLogicalCollectiveEnum */
39     PetscMPIInt ierr;
40     PetscMPIInt b1[2],b2[2];
41     b1[0] = -(PetscMPIInt)twosided;
42     b1[1] = (PetscMPIInt)twosided;
43     ierr  = MPI_Allreduce(b1,b2,2,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr);
44     if (-b2[0] != b2[1]) SETERRQ(comm,PETSC_ERR_ARG_WRONG,"Enum value must be same on all processes");
45   }
46 #endif
47   _twosided_type = twosided;
48   PetscFunctionReturn(0);
49 }
50 
51 #undef __FUNCT__
52 #define __FUNCT__ "PetscCommBuildTwoSidedGetType"
53 /*@
54    PetscCommBuildTwoSidedGetType - set algorithm to use when building two-sided communication
55 
56    Logically Collective
57 
58    Output Arguments:
59 +  comm - communicator on which to query algorithm
60 -  twosided - algorithm to use for PetscCommBuildTwoSided()
61 
62    Level: developer
63 
64 .seealso: PetscCommBuildTwoSided(), PetscCommBuildTwoSidedSetType()
65 @*/
66 PetscErrorCode PetscCommBuildTwoSidedGetType(MPI_Comm comm,PetscBuildTwoSidedType *twosided)
67 {
68   PetscErrorCode ierr;
69 
70   PetscFunctionBegin;
71   *twosided = PETSC_BUILDTWOSIDED_NOTSET;
72   if (_twosided_type == PETSC_BUILDTWOSIDED_NOTSET) {
73 #if defined(PETSC_HAVE_MPI_IBARRIER)
74 #  if defined(PETSC_HAVE_MPICH_CH3_SOCK) && !defined(PETSC_HAVE_MPICH_CH3_SOCK_FIXED_NBC_PROGRESS)
75     /* Deadlock in Ibarrier: http://trac.mpich.org/projects/mpich/ticket/1785 */
76     _twosided_type = PETSC_BUILDTWOSIDED_ALLREDUCE;
77 #  else
78     _twosided_type = PETSC_BUILDTWOSIDED_IBARRIER;
79 #  endif
80 #else
81     _twosided_type = PETSC_BUILDTWOSIDED_ALLREDUCE;
82 #endif
83     ierr = PetscOptionsGetEnum(NULL,"-build_twosided",PetscBuildTwoSidedTypes,(PetscEnum*)&_twosided_type,NULL);CHKERRQ(ierr);
84   }
85   *twosided = _twosided_type;
86   PetscFunctionReturn(0);
87 }
88 
89 #if defined(PETSC_HAVE_MPI_IBARRIER)
90 
91 #undef __FUNCT__
92 #define __FUNCT__ "PetscCommBuildTwoSided_Ibarrier"
93 static PetscErrorCode PetscCommBuildTwoSided_Ibarrier(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata,PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata)
94 {
95   PetscErrorCode ierr;
96   PetscMPIInt    nrecvs,tag,unitbytes,done,i;
97   char           *tdata;
98   MPI_Request    *sendreqs,barrier;
99   PetscSegBuffer segrank,segdata;
100 
101   PetscFunctionBegin;
102   ierr  = PetscCommDuplicate(comm,&comm,&tag);CHKERRQ(ierr);
103   ierr  = MPI_Type_size(dtype,&unitbytes);CHKERRQ(ierr);
104   tdata = (char*)todata;
105   ierr  = PetscMalloc1(nto,&sendreqs);CHKERRQ(ierr);
106   for (i=0; i<nto; i++) {
107     ierr = MPI_Issend((void*)(tdata+count*unitbytes*i),count,dtype,toranks[i],tag,comm,sendreqs+i);CHKERRQ(ierr);
108   }
109   ierr = PetscSegBufferCreate(sizeof(PetscMPIInt),4,&segrank);CHKERRQ(ierr);
110   ierr = PetscSegBufferCreate(unitbytes,4*count,&segdata);CHKERRQ(ierr);
111 
112   nrecvs  = 0;
113   barrier = MPI_REQUEST_NULL;
114   for (done=0; !done; ) {
115     PetscMPIInt flag;
116     MPI_Status  status;
117     ierr = MPI_Iprobe(MPI_ANY_SOURCE,tag,comm,&flag,&status);CHKERRQ(ierr);
118     if (flag) {                 /* incoming message */
119       PetscMPIInt *recvrank;
120       void        *buf;
121       ierr      = PetscSegBufferGet(segrank,1,&recvrank);CHKERRQ(ierr);
122       ierr      = PetscSegBufferGet(segdata,count,&buf);CHKERRQ(ierr);
123       *recvrank = status.MPI_SOURCE;
124       ierr      = MPI_Recv(buf,count,dtype,status.MPI_SOURCE,tag,comm,MPI_STATUS_IGNORE);CHKERRQ(ierr);
125       nrecvs++;
126     }
127     if (barrier == MPI_REQUEST_NULL) {
128       PetscMPIInt sent,nsends;
129       ierr = PetscMPIIntCast(nto,&nsends);CHKERRQ(ierr);
130       ierr = MPI_Testall(nsends,sendreqs,&sent,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
131       if (sent) {
132         ierr = MPI_Ibarrier(comm,&barrier);CHKERRQ(ierr);
133         ierr = PetscFree(sendreqs);CHKERRQ(ierr);
134       }
135     } else {
136       ierr = MPI_Test(&barrier,&done,MPI_STATUS_IGNORE);CHKERRQ(ierr);
137     }
138   }
139   *nfrom = nrecvs;
140   ierr   = PetscSegBufferExtractAlloc(segrank,fromranks);CHKERRQ(ierr);
141   ierr   = PetscSegBufferDestroy(&segrank);CHKERRQ(ierr);
142   ierr   = PetscSegBufferExtractAlloc(segdata,fromdata);CHKERRQ(ierr);
143   ierr   = PetscSegBufferDestroy(&segdata);CHKERRQ(ierr);
144   ierr   = PetscCommDestroy(&comm);CHKERRQ(ierr);
145   PetscFunctionReturn(0);
146 }
147 #endif
148 
149 #undef __FUNCT__
150 #define __FUNCT__ "PetscCommBuildTwoSided_Allreduce"
151 static PetscErrorCode PetscCommBuildTwoSided_Allreduce(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata,PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata)
152 {
153   PetscErrorCode ierr;
154   PetscMPIInt    size,*iflags,nrecvs,tag,unitbytes,*franks,i;
155   char           *tdata,*fdata;
156   MPI_Request    *reqs,*sendreqs;
157   MPI_Status     *statuses;
158 
159   PetscFunctionBegin;
160   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
161   ierr = PetscCalloc1(size,&iflags);CHKERRQ(ierr);
162   for (i=0; i<nto; i++) iflags[toranks[i]] = 1;
163   ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&nrecvs);CHKERRQ(ierr);
164   ierr = PetscFree(iflags);CHKERRQ(ierr);
165 
166   ierr     = PetscCommDuplicate(comm,&comm,&tag);CHKERRQ(ierr);
167   ierr     = MPI_Type_size(dtype,&unitbytes);CHKERRQ(ierr);
168   ierr     = PetscMalloc(nrecvs*count*unitbytes,&fdata);CHKERRQ(ierr);
169   tdata    = (char*)todata;
170   ierr     = PetscMalloc2(nto+nrecvs,&reqs,nto+nrecvs,&statuses);CHKERRQ(ierr);
171   sendreqs = reqs + nrecvs;
172   for (i=0; i<nrecvs; i++) {
173     ierr = MPI_Irecv((void*)(fdata+count*unitbytes*i),count,dtype,MPI_ANY_SOURCE,tag,comm,reqs+i);CHKERRQ(ierr);
174   }
175   for (i=0; i<nto; i++) {
176     ierr = MPI_Isend((void*)(tdata+count*unitbytes*i),count,dtype,toranks[i],tag,comm,sendreqs+i);CHKERRQ(ierr);
177   }
178   ierr = MPI_Waitall(nto+nrecvs,reqs,statuses);CHKERRQ(ierr);
179   ierr = PetscMalloc1(nrecvs,&franks);CHKERRQ(ierr);
180   for (i=0; i<nrecvs; i++) franks[i] = statuses[i].MPI_SOURCE;
181   ierr = PetscFree2(reqs,statuses);CHKERRQ(ierr);
182   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
183 
184   *nfrom            = nrecvs;
185   *fromranks        = franks;
186   *(void**)fromdata = fdata;
187   PetscFunctionReturn(0);
188 }
189 
190 #if defined(PETSC_HAVE_MPI_REDUCE_SCATTER_BLOCK)
191 #undef __FUNCT__
192 #define __FUNCT__ "PetscCommBuildTwoSided_RedScatter"
193 static PetscErrorCode PetscCommBuildTwoSided_RedScatter(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata,PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata)
194 {
195   PetscErrorCode ierr;
196   PetscMPIInt    size,*iflags,nrecvs,tag,unitbytes,*franks,i;
197   char           *tdata,*fdata;
198   MPI_Request    *reqs,*sendreqs;
199   MPI_Status     *statuses;
200 
201   PetscFunctionBegin;
202   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
203   ierr = PetscMalloc1(size,&iflags);CHKERRQ(ierr);
204   ierr = PetscMemzero(iflags,size*sizeof(*iflags));CHKERRQ(ierr);
205   for (i=0; i<nto; i++) iflags[toranks[i]] = 1;
206   ierr = MPI_Reduce_scatter_block(iflags,&nrecvs,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
207   ierr = PetscFree(iflags);CHKERRQ(ierr);
208 
209   ierr     = PetscCommDuplicate(comm,&comm,&tag);CHKERRQ(ierr);
210   ierr     = MPI_Type_size(dtype,&unitbytes);CHKERRQ(ierr);
211   ierr     = PetscMalloc(nrecvs*count*unitbytes,&fdata);CHKERRQ(ierr);
212   tdata    = (char*)todata;
213   ierr     = PetscMalloc2(nto+nrecvs,&reqs,nto+nrecvs,&statuses);CHKERRQ(ierr);
214   sendreqs = reqs + nrecvs;
215   for (i=0; i<nrecvs; i++) {
216     ierr = MPI_Irecv((void*)(fdata+count*unitbytes*i),count,dtype,MPI_ANY_SOURCE,tag,comm,reqs+i);CHKERRQ(ierr);
217   }
218   for (i=0; i<nto; i++) {
219     ierr = MPI_Isend((void*)(tdata+count*unitbytes*i),count,dtype,toranks[i],tag,comm,sendreqs+i);CHKERRQ(ierr);
220   }
221   ierr = MPI_Waitall(nto+nrecvs,reqs,statuses);CHKERRQ(ierr);
222   ierr = PetscMalloc1(nrecvs,&franks);CHKERRQ(ierr);
223   for (i=0; i<nrecvs; i++) franks[i] = statuses[i].MPI_SOURCE;
224   ierr = PetscFree2(reqs,statuses);CHKERRQ(ierr);
225   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
226 
227   *nfrom            = nrecvs;
228   *fromranks        = franks;
229   *(void**)fromdata = fdata;
230   PetscFunctionReturn(0);
231 }
232 #endif
233 
234 #undef __FUNCT__
235 #define __FUNCT__ "PetscCommBuildTwoSided"
236 /*@C
237    PetscCommBuildTwoSided - discovers communicating ranks given one-sided information, moving constant-sized data in the process (often message lengths)
238 
239    Collective on MPI_Comm
240 
241    Input Arguments:
242 +  comm - communicator
243 .  count - number of entries to send/receive (must match on all ranks)
244 .  dtype - datatype to send/receive from each rank (must match on all ranks)
245 .  nto - number of ranks to send data to
246 .  toranks - ranks to send to (array of length nto)
247 -  todata - data to send to each rank (packed)
248 
249    Output Arguments:
250 +  nfrom - number of ranks receiving messages from
251 .  fromranks - ranks receiving messages from (length nfrom; caller should PetscFree())
252 -  fromdata - packed data from each rank, each with count entries of type dtype (length nfrom, caller responsible for PetscFree())
253 
254    Level: developer
255 
256    Options Database Keys:
257 .  -build_twosided <allreduce|ibarrier|redscatter> - algorithm to set up two-sided communication
258 
259    Notes:
260    This memory-scalable interface is an alternative to calling PetscGatherNumberOfMessages() and
261    PetscGatherMessageLengths(), possibly with a subsequent round of communication to send other constant-size data.
262 
263    Basic data types as well as contiguous types are supported, but non-contiguous (e.g., strided) types are not.
264 
265    References:
266    The MPI_Ibarrier implementation uses the algorithm in
267    Hoefler, Siebert and Lumsdaine, Scalable communication protocols for dynamic sparse data exchange, 2010.
268 
269 .seealso: PetscGatherNumberOfMessages(), PetscGatherMessageLengths()
270 @*/
271 PetscErrorCode PetscCommBuildTwoSided(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata,PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata)
272 {
273   PetscErrorCode         ierr;
274   PetscBuildTwoSidedType buildtype = PETSC_BUILDTWOSIDED_NOTSET;
275 
276   PetscFunctionBegin;
277   ierr = PetscSysInitializePackage();CHKERRQ(ierr);
278   ierr = PetscLogEventBegin(PETSC_BuildTwoSided,0,0,0,0);CHKERRQ(ierr);
279   ierr = PetscCommBuildTwoSidedGetType(comm,&buildtype);CHKERRQ(ierr);
280   switch (buildtype) {
281   case PETSC_BUILDTWOSIDED_IBARRIER:
282 #if defined(PETSC_HAVE_MPI_IBARRIER)
283     ierr = PetscCommBuildTwoSided_Ibarrier(comm,count,dtype,nto,toranks,todata,nfrom,fromranks,fromdata);CHKERRQ(ierr);
284 #else
285     SETERRQ(comm,PETSC_ERR_PLIB,"MPI implementation does not provide MPI_Ibarrier (part of MPI-3)");
286 #endif
287     break;
288   case PETSC_BUILDTWOSIDED_ALLREDUCE:
289     ierr = PetscCommBuildTwoSided_Allreduce(comm,count,dtype,nto,toranks,todata,nfrom,fromranks,fromdata);CHKERRQ(ierr);
290     break;
291   case PETSC_BUILDTWOSIDED_REDSCATTER:
292 #if defined(PETSC_HAVE_MPI_REDUCE_SCATTER_BLOCK)
293     ierr = PetscCommBuildTwoSided_RedScatter(comm,count,dtype,nto,toranks,todata,nfrom,fromranks,fromdata);CHKERRQ(ierr);
294 #else
295     SETERRQ(comm,PETSC_ERR_PLIB,"MPI implementation does not provide MPI_Reduce_scatter_block (part of MPI-2.2)");
296 #endif
297     break;
298   default: SETERRQ(comm,PETSC_ERR_PLIB,"Unknown method for building two-sided communication");
299   }
300   ierr = PetscLogEventEnd(PETSC_BuildTwoSided,0,0,0,0);CHKERRQ(ierr);
301   PetscFunctionReturn(0);
302 }
303