xref: /petsc/src/sys/utils/mpits.c (revision fce0c873789145caee477924bfa4ad26b4cd6ea4)
1 #include <petscsys.h>        /*I  "petscsys.h"  I*/
2 
3 const char *const PetscBuildTwoSidedTypes[] = {
4   "ALLREDUCE",
5   "IBARRIER",
6   "PetscBuildTwoSidedType",
7   "PETSC_BUILDTWOSIDED_",
8   0
9 };
10 
11 static PetscBuildTwoSidedType _twosided_type = PETSC_BUILDTWOSIDED_NOTSET;
12 
13 #undef __FUNCT__
14 #define __FUNCT__ "PetscCommBuildTwoSidedSetType"
15 /*@
16    PetscCommBuildTwoSidedSetType - set algorithm to use when building two-sided communication
17 
18    Logically Collective
19 
20    Input Arguments:
21 +  comm - PETSC_COMM_WORLD
22 -  twosided - algorithm to use in subsequent calls to PetscCommBuildTwoSided()
23 
24    Level: developer
25 
26    Note:
27    This option is currently global, but could be made per-communicator.
28 
29 .seealso: PetscCommBuildTwoSided(), PetscCommBuildTwoSidedGetType()
30 @*/
31 PetscErrorCode PetscCommBuildTwoSidedSetType(MPI_Comm comm,PetscBuildTwoSidedType twosided)
32 {
33   PetscFunctionBegin;
34 #if defined(PETSC_USE_DEBUG)
35   {                             /* We don't have a PetscObject so can't use PetscValidLogicalCollectiveEnum */
36     PetscMPIInt ierr;
37     PetscMPIInt b1[2],b2[2];
38     b1[0] = -(PetscMPIInt)twosided;
39     b1[1] = (PetscMPIInt)twosided;
40     ierr  = MPI_Allreduce(b1,b2,2,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr);
41     if (-b2[0] != b2[1]) SETERRQ(comm,PETSC_ERR_ARG_WRONG,"Enum value must be same on all processes");
42   }
43 #endif
44   _twosided_type = twosided;
45   PetscFunctionReturn(0);
46 }
47 
48 #undef __FUNCT__
49 #define __FUNCT__ "PetscCommBuildTwoSidedGetType"
50 /*@
51    PetscCommBuildTwoSidedGetType - set algorithm to use when building two-sided communication
52 
53    Logically Collective
54 
55    Output Arguments:
56 +  comm - communicator on which to query algorithm
57 -  twosided - algorithm to use for PetscCommBuildTwoSided()
58 
59    Level: developer
60 
61 .seealso: PetscCommBuildTwoSided(), PetscCommBuildTwoSidedSetType()
62 @*/
63 PetscErrorCode PetscCommBuildTwoSidedGetType(MPI_Comm comm,PetscBuildTwoSidedType *twosided)
64 {
65   PetscErrorCode ierr;
66 
67   PetscFunctionBegin;
68   *twosided = PETSC_BUILDTWOSIDED_NOTSET;
69   if (_twosided_type == PETSC_BUILDTWOSIDED_NOTSET) {
70 #if defined(PETSC_HAVE_MPI_IBARRIER)
71 #  if defined(PETSC_HAVE_MPICH_CH3_SOCK) && !defined(PETSC_HAVE_MPICH_CH3_SOCK_FIXED_NBC_PROGRESS)
72     /* Deadlock in Ibarrier: http://trac.mpich.org/projects/mpich/ticket/1785 */
73     _twosided_type = PETSC_BUILDTWOSIDED_ALLREDUCE;
74 #  else
75     _twosided_type = PETSC_BUILDTWOSIDED_IBARRIER;
76 #  endif
77 #else
78     _twosided_type = PETSC_BUILDTWOSIDED_ALLREDUCE;
79 #endif
80     ierr = PetscOptionsGetEnum(NULL,"-build_twosided",PetscBuildTwoSidedTypes,(PetscEnum*)&_twosided_type,NULL);CHKERRQ(ierr);
81   }
82   *twosided = _twosided_type;
83   PetscFunctionReturn(0);
84 }
85 
86 #if defined(PETSC_HAVE_MPI_IBARRIER)
87 
88 #undef __FUNCT__
89 #define __FUNCT__ "PetscCommBuildTwoSided_Ibarrier"
90 static PetscErrorCode PetscCommBuildTwoSided_Ibarrier(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscInt nto,const PetscMPIInt *toranks,const void *todata,PetscInt *nfrom,PetscMPIInt **fromranks,void *fromdata)
91 {
92   PetscErrorCode ierr;
93   PetscMPIInt    nrecvs,tag,unitbytes,done;
94   PetscInt       i;
95   char           *tdata;
96   MPI_Request    *sendreqs,barrier;
97   PetscSegBuffer segrank,segdata;
98 
99   PetscFunctionBegin;
100   ierr  = PetscCommGetNewTag(comm,&tag);CHKERRQ(ierr);
101   ierr  = MPI_Type_size(dtype,&unitbytes);CHKERRQ(ierr);
102   tdata = (char*)todata;
103   ierr  = PetscMalloc1(nto,&sendreqs);CHKERRQ(ierr);
104   for (i=0; i<nto; i++) {
105     ierr = MPI_Issend((void*)(tdata+count*unitbytes*i),count,dtype,toranks[i],tag,comm,sendreqs+i);CHKERRQ(ierr);
106   }
107   ierr = PetscSegBufferCreate(sizeof(PetscMPIInt),4,&segrank);CHKERRQ(ierr);
108   ierr = PetscSegBufferCreate(unitbytes,4*count,&segdata);CHKERRQ(ierr);
109 
110   nrecvs  = 0;
111   barrier = MPI_REQUEST_NULL;
112   for (done=0; !done; ) {
113     PetscMPIInt flag;
114     MPI_Status  status;
115     ierr = MPI_Iprobe(MPI_ANY_SOURCE,tag,comm,&flag,&status);CHKERRQ(ierr);
116     if (flag) {                 /* incoming message */
117       PetscMPIInt *recvrank;
118       void        *buf;
119       ierr      = PetscSegBufferGet(segrank,1,&recvrank);CHKERRQ(ierr);
120       ierr      = PetscSegBufferGet(segdata,count,&buf);CHKERRQ(ierr);
121       *recvrank = status.MPI_SOURCE;
122       ierr      = MPI_Recv(buf,count,dtype,status.MPI_SOURCE,tag,comm,MPI_STATUS_IGNORE);CHKERRQ(ierr);
123       nrecvs++;
124     }
125     if (barrier == MPI_REQUEST_NULL) {
126       PetscMPIInt sent,nsends;
127       ierr = PetscMPIIntCast(nto,&nsends);CHKERRQ(ierr);
128       ierr = MPI_Testall(nsends,sendreqs,&sent,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
129       if (sent) {
130         ierr = MPI_Ibarrier(comm,&barrier);CHKERRQ(ierr);
131         ierr = PetscFree(sendreqs);CHKERRQ(ierr);
132       }
133     } else {
134       ierr = MPI_Test(&barrier,&done,MPI_STATUS_IGNORE);CHKERRQ(ierr);
135     }
136   }
137   *nfrom = nrecvs;
138   ierr   = PetscSegBufferExtractAlloc(segrank,fromranks);CHKERRQ(ierr);
139   ierr   = PetscSegBufferDestroy(&segrank);CHKERRQ(ierr);
140   ierr   = PetscSegBufferExtractAlloc(segdata,fromdata);CHKERRQ(ierr);
141   ierr   = PetscSegBufferDestroy(&segdata);CHKERRQ(ierr);
142   PetscFunctionReturn(0);
143 }
144 #endif
145 
146 #undef __FUNCT__
147 #define __FUNCT__ "PetscCommBuildTwoSided_Allreduce"
148 static PetscErrorCode PetscCommBuildTwoSided_Allreduce(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscInt nto,const PetscMPIInt *toranks,const void *todata,PetscInt *nfrom,PetscMPIInt **fromranks,void *fromdata)
149 {
150   PetscErrorCode ierr;
151   PetscMPIInt    size,*iflags,nrecvs,tag,unitbytes,*franks;
152   PetscInt       i;
153   char           *tdata,*fdata;
154   MPI_Request    *reqs,*sendreqs;
155   MPI_Status     *statuses;
156 
157   PetscFunctionBegin;
158   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
159   ierr = PetscCalloc1(size,&iflags);CHKERRQ(ierr);
160   for (i=0; i<nto; i++) iflags[toranks[i]] = 1;
161   ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&nrecvs);CHKERRQ(ierr);
162   ierr = PetscFree(iflags);CHKERRQ(ierr);
163 
164   ierr     = PetscCommGetNewTag(comm,&tag);CHKERRQ(ierr);
165   ierr     = MPI_Type_size(dtype,&unitbytes);CHKERRQ(ierr);
166   ierr     = PetscMalloc(nrecvs*count*unitbytes,&fdata);CHKERRQ(ierr);
167   tdata    = (char*)todata;
168   ierr     = PetscMalloc2(nto+nrecvs,&reqs,nto+nrecvs,&statuses);CHKERRQ(ierr);
169   sendreqs = reqs + nrecvs;
170   for (i=0; i<nrecvs; i++) {
171     ierr = MPI_Irecv((void*)(fdata+count*unitbytes*i),count,dtype,MPI_ANY_SOURCE,tag,comm,reqs+i);CHKERRQ(ierr);
172   }
173   for (i=0; i<nto; i++) {
174     ierr = MPI_Isend((void*)(tdata+count*unitbytes*i),count,dtype,toranks[i],tag,comm,sendreqs+i);CHKERRQ(ierr);
175   }
176   ierr = MPI_Waitall(nto+nrecvs,reqs,statuses);CHKERRQ(ierr);
177   ierr = PetscMalloc1(nrecvs,&franks);CHKERRQ(ierr);
178   for (i=0; i<nrecvs; i++) franks[i] = statuses[i].MPI_SOURCE;
179   ierr = PetscFree2(reqs,statuses);CHKERRQ(ierr);
180 
181   *nfrom            = nrecvs;
182   *fromranks        = franks;
183   *(void**)fromdata = fdata;
184   PetscFunctionReturn(0);
185 }
186 
187 #undef __FUNCT__
188 #define __FUNCT__ "PetscCommBuildTwoSided"
189 /*@C
190    PetscCommBuildTwoSided - discovers communicating ranks given one-sided information, moving constant-sized data in the process (often message lengths)
191 
192    Collective on MPI_Comm
193 
194    Input Arguments:
195 +  comm - communicator
196 .  count - number of entries to send/receive (must match on all ranks)
197 .  dtype - datatype to send/receive from each rank (must match on all ranks)
198 .  nto - number of ranks to send data to
199 .  toranks - ranks to send to (array of length nto)
200 -  todata - data to send to each rank (packed)
201 
202    Output Arguments:
203 +  nfrom - number of ranks receiving messages from
204 .  fromranks - ranks receiving messages from (length nfrom; caller should PetscFree())
205 -  fromdata - packed data from each rank, each with count entries of type dtype (length nfrom, caller responsible for PetscFree())
206 
207    Level: developer
208 
209    Notes:
210    This memory-scalable interface is an alternative to calling PetscGatherNumberOfMessages() and
211    PetscGatherMessageLengths(), possibly with a subsequent round of communication to send other constant-size data.
212 
213    Basic data types as well as contiguous types are supported, but non-contiguous (e.g., strided) types are not.
214 
215    References:
216    The MPI_Ibarrier implementation uses the algorithm in
217    Hoefler, Siebert and Lumsdaine, Scalable communication protocols for dynamic sparse data exchange, 2010.
218 
219 .seealso: PetscGatherNumberOfMessages(), PetscGatherMessageLengths()
220 @*/
221 PetscErrorCode PetscCommBuildTwoSided(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscInt nto,const PetscMPIInt *toranks,const void *todata,PetscInt *nfrom,PetscMPIInt **fromranks,void *fromdata)
222 {
223   PetscErrorCode         ierr;
224   PetscBuildTwoSidedType buildtype = PETSC_BUILDTWOSIDED_NOTSET;
225 
226   PetscFunctionBegin;
227   ierr = PetscCommBuildTwoSidedGetType(comm,&buildtype);CHKERRQ(ierr);
228   switch (buildtype) {
229   case PETSC_BUILDTWOSIDED_IBARRIER:
230 #if defined(PETSC_HAVE_MPI_IBARRIER)
231     ierr = PetscCommBuildTwoSided_Ibarrier(comm,count,dtype,nto,toranks,todata,nfrom,fromranks,fromdata);CHKERRQ(ierr);
232 #else
233     SETERRQ(comm,PETSC_ERR_PLIB,"MPI implementation does not provide MPI_Ibarrier (part of MPI-3)");
234 #endif
235     break;
236   case PETSC_BUILDTWOSIDED_ALLREDUCE:
237     ierr = PetscCommBuildTwoSided_Allreduce(comm,count,dtype,nto,toranks,todata,nfrom,fromranks,fromdata);CHKERRQ(ierr);
238     break;
239   default: SETERRQ(comm,PETSC_ERR_PLIB,"Unknown method for building two-sided communication");
240   }
241   PetscFunctionReturn(0);
242 }
243