xref: /petsc/src/sys/utils/mpits.c (revision bdf89e91df64bd49f4414ba801fb07e6338da027)
1 #include <petscsys.h>        /*I  "petscsys.h"  I*/
2 
3 const char *const PetscBuildTwoSidedTypes[] = {
4   "ALLREDUCE",
5   "IBARRIER",
6   "PetscBuildTwoSidedType",
7   "PETSC_BUILDTWOSIDED_",
8   0
9 };
10 
11 static PetscBuildTwoSidedType _twosided_type = PETSC_BUILDTWOSIDED_NOTSET;
12 
13 #undef __FUNCT__
14 #define __FUNCT__ "PetscCommBuildTwoSidedSetType"
15 /*@
16    PetscCommBuildTwoSidedSetType - set algorithm to use when building two-sided communication
17 
18    Logically Collective
19 
20    Input Arguments:
21 +  comm - PETSC_COMM_WORLD
22 -  twosided - algorithm to use in subsequent calls to PetscCommBuildTwoSided()
23 
24    Level: developer
25 
26    Note:
27    This option is currently global, but could be made per-communicator.
28 
29 .seealso: PetscCommBuildTwoSided(), PetscCommBuildTwoSidedGetType()
30 @*/
31 PetscErrorCode PetscCommBuildTwoSidedSetType(MPI_Comm comm,PetscBuildTwoSidedType twosided)
32 {
33   PetscFunctionBegin;
34 #if defined(PETSC_USE_DEBUG)
35   {                             /* We don't have a PetscObject so can't use PetscValidLogicalCollectiveEnum */
36     PetscMPIInt ierr;
37     PetscMPIInt b1[2],b2[2];
38     b1[0] = -(PetscMPIInt)twosided;
39     b1[1] = (PetscMPIInt)twosided;
40     ierr  = MPI_Allreduce(b1,b2,2,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr);
41     if (-b2[0] != b2[1]) SETERRQ(comm,PETSC_ERR_ARG_WRONG,"Enum value must be same on all processes");
42   }
43 #endif
44   _twosided_type = twosided;
45   PetscFunctionReturn(0);
46 }
47 
48 #undef __FUNCT__
49 #define __FUNCT__ "PetscCommBuildTwoSidedGetType"
50 /*@
51    PetscCommBuildTwoSidedGetType - set algorithm to use when building two-sided communication
52 
53    Logically Collective
54 
55    Output Arguments:
56 +  comm - communicator on which to query algorithm
57 -  twosided - algorithm to use for PetscCommBuildTwoSided()
58 
59    Level: developer
60 
61 .seealso: PetscCommBuildTwoSided(), PetscCommBuildTwoSidedSetType()
62 @*/
63 PetscErrorCode PetscCommBuildTwoSidedGetType(MPI_Comm comm,PetscBuildTwoSidedType *twosided)
64 {
65   PetscErrorCode ierr;
66 
67   PetscFunctionBegin;
68   *twosided = PETSC_BUILDTWOSIDED_NOTSET;
69   if (_twosided_type == PETSC_BUILDTWOSIDED_NOTSET) {
70 #if defined(PETSC_HAVE_MPI_IBARRIER)
71 #  if defined(PETSC_HAVE_MPICH_CH3_SOCK) && !defined(PETSC_HAVE_MPICH_CH3_SOCK_FIXED_NBC_PROGRESS)
72     /* Deadlock in Ibarrier: http://trac.mpich.org/projects/mpich/ticket/1785 */
73     _twosided_type = PETSC_BUILDTWOSIDED_ALLREDUCE;
74 #  else
75     _twosided_type = PETSC_BUILDTWOSIDED_IBARRIER;
76 #  endif
77 #else
78     _twosided_type = PETSC_BUILDTWOSIDED_ALLREDUCE;
79 #endif
80     ierr = PetscOptionsGetEnum(NULL,"-build_twosided",PetscBuildTwoSidedTypes,(PetscEnum*)&_twosided_type,NULL);CHKERRQ(ierr);
81   }
82   *twosided = _twosided_type;
83   PetscFunctionReturn(0);
84 }
85 
86 #if defined(PETSC_HAVE_MPI_IBARRIER)
87 
88 #undef __FUNCT__
89 #define __FUNCT__ "PetscCommBuildTwoSided_Ibarrier"
90 static PetscErrorCode PetscCommBuildTwoSided_Ibarrier(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscInt nto,const PetscMPIInt *toranks,const void *todata,PetscInt *nfrom,PetscMPIInt **fromranks,void *fromdata)
91 {
92   PetscErrorCode ierr;
93   PetscMPIInt    nrecvs,tag,unitbytes,done;
94   PetscInt       i;
95   char           *tdata;
96   MPI_Request    *sendreqs,barrier;
97   PetscSegBuffer segrank,segdata;
98 
99   PetscFunctionBegin;
100   ierr  = PetscCommGetNewTag(comm,&tag);CHKERRQ(ierr);
101   ierr  = MPI_Type_size(dtype,&unitbytes);CHKERRQ(ierr);
102   tdata = (char*)todata;
103   ierr  = PetscMalloc(nto*sizeof(MPI_Request),&sendreqs);CHKERRQ(ierr);
104   for (i=0; i<nto; i++) {
105     ierr = MPI_Issend((void*)(tdata+count*unitbytes*i),count,dtype,toranks[i],tag,comm,sendreqs+i);CHKERRQ(ierr);
106   }
107   ierr = PetscSegBufferCreate(sizeof(PetscMPIInt),4,&segrank);CHKERRQ(ierr);
108   ierr = PetscSegBufferCreate(unitbytes,4*count,&segdata);CHKERRQ(ierr);
109 
110   nrecvs  = 0;
111   barrier = MPI_REQUEST_NULL;
112   for (done=0; !done; ) {
113     PetscMPIInt flag;
114     MPI_Status  status;
115     ierr = MPI_Iprobe(MPI_ANY_SOURCE,tag,comm,&flag,&status);CHKERRQ(ierr);
116     if (flag) {                 /* incoming message */
117       PetscMPIInt *recvrank;
118       void        *buf;
119       ierr      = PetscSegBufferGet(&segrank,1,&recvrank);CHKERRQ(ierr);
120       ierr      = PetscSegBufferGet(&segdata,count,&buf);CHKERRQ(ierr);
121       *recvrank = status.MPI_SOURCE;
122       ierr      = MPI_Recv(buf,count,dtype,status.MPI_SOURCE,tag,comm,MPI_STATUS_IGNORE);CHKERRQ(ierr);
123       nrecvs++;
124     }
125     if (barrier == MPI_REQUEST_NULL) {
126       PetscMPIInt sent,nsends;
127       ierr = PetscMPIIntCast(nto,&nsends);CHKERRQ(ierr);
128       ierr = MPI_Testall(nsends,sendreqs,&sent,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
129       if (sent) {
130         ierr = MPI_Ibarrier(comm,&barrier);CHKERRQ(ierr);
131         ierr = PetscFree(sendreqs);CHKERRQ(ierr);
132       }
133     } else {
134       ierr = MPI_Test(&barrier,&done,MPI_STATUS_IGNORE);CHKERRQ(ierr);
135     }
136   }
137   *nfrom = nrecvs;
138   ierr   = PetscSegBufferExtractAlloc(&segrank,fromranks);CHKERRQ(ierr);
139   ierr   = PetscSegBufferDestroy(&segrank);CHKERRQ(ierr);
140   ierr   = PetscSegBufferExtractAlloc(&segdata,fromdata);CHKERRQ(ierr);
141   ierr   = PetscSegBufferDestroy(&segdata);CHKERRQ(ierr);
142   PetscFunctionReturn(0);
143 }
144 #endif
145 
146 #undef __FUNCT__
147 #define __FUNCT__ "PetscCommBuildTwoSided_Allreduce"
148 static PetscErrorCode PetscCommBuildTwoSided_Allreduce(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscInt nto,const PetscMPIInt *toranks,const void *todata,PetscInt *nfrom,PetscMPIInt **fromranks,void *fromdata)
149 {
150   PetscErrorCode ierr;
151   PetscMPIInt    size,*iflags,nrecvs,tag,unitbytes,*franks;
152   PetscInt       i;
153   char           *tdata,*fdata;
154   MPI_Request    *reqs,*sendreqs;
155   MPI_Status     *statuses;
156 
157   PetscFunctionBegin;
158   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
159   ierr = PetscMalloc(size*sizeof(*iflags),&iflags);CHKERRQ(ierr);
160   ierr = PetscMemzero(iflags,size*sizeof(*iflags));CHKERRQ(ierr);
161   for (i=0; i<nto; i++) iflags[toranks[i]] = 1;
162   ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&nrecvs);CHKERRQ(ierr);
163   ierr = PetscFree(iflags);CHKERRQ(ierr);
164 
165   ierr     = PetscCommGetNewTag(comm,&tag);CHKERRQ(ierr);
166   ierr     = MPI_Type_size(dtype,&unitbytes);CHKERRQ(ierr);
167   ierr     = PetscMalloc(nrecvs*count*unitbytes,&fdata);CHKERRQ(ierr);
168   tdata    = (char*)todata;
169   ierr     = PetscMalloc2(nto+nrecvs,MPI_Request,&reqs,nto+nrecvs,MPI_Status,&statuses);CHKERRQ(ierr);
170   sendreqs = reqs + nrecvs;
171   for (i=0; i<nrecvs; i++) {
172     ierr = MPI_Irecv((void*)(fdata+count*unitbytes*i),count,dtype,MPI_ANY_SOURCE,tag,comm,reqs+i);CHKERRQ(ierr);
173   }
174   for (i=0; i<nto; i++) {
175     ierr = MPI_Isend((void*)(tdata+count*unitbytes*i),count,dtype,toranks[i],tag,comm,sendreqs+i);CHKERRQ(ierr);
176   }
177   ierr = MPI_Waitall(nto+nrecvs,reqs,statuses);CHKERRQ(ierr);
178   ierr = PetscMalloc(nrecvs*sizeof(PetscMPIInt),&franks);CHKERRQ(ierr);
179   for (i=0; i<nrecvs; i++) franks[i] = statuses[i].MPI_SOURCE;
180   ierr = PetscFree2(reqs,statuses);CHKERRQ(ierr);
181 
182   *nfrom            = nrecvs;
183   *fromranks        = franks;
184   *(void**)fromdata = fdata;
185   PetscFunctionReturn(0);
186 }
187 
188 #undef __FUNCT__
189 #define __FUNCT__ "PetscCommBuildTwoSided"
190 /*@C
191    PetscCommBuildTwoSided - discovers communicating ranks given one-sided information, moving constant-sized data in the process (often message lengths)
192 
193    Collective on MPI_Comm
194 
195    Input Arguments:
196 +  comm - communicator
197 .  count - number of entries to send/receive (must match on all ranks)
198 .  dtype - datatype to send/receive from each rank (must match on all ranks)
199 .  nto - number of ranks to send data to
200 .  toranks - ranks to send to (array of length nto)
201 -  todata - data to send to each rank (packed)
202 
203    Output Arguments:
204 +  nfrom - number of ranks receiving messages from
205 .  fromranks - ranks receiving messages from (length nfrom; caller should PetscFree())
206 -  fromdata - packed data from each rank, each with count entries of type dtype (length nfrom, caller responsible for PetscFree())
207 
208    Level: developer
209 
210    Notes:
211    This memory-scalable interface is an alternative to calling PetscGatherNumberOfMessages() and
212    PetscGatherMessageLengths(), possibly with a subsequent round of communication to send other constant-size data.
213 
214    Basic data types as well as contiguous types are supported, but non-contiguous (e.g., strided) types are not.
215 
216    References:
217    The MPI_Ibarrier implementation uses the algorithm in
218    Hoefler, Siebert and Lumsdaine, Scalable communication protocols for dynamic sparse data exchange, 2010.
219 
220 .seealso: PetscGatherNumberOfMessages(), PetscGatherMessageLengths()
221 @*/
222 PetscErrorCode PetscCommBuildTwoSided(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscInt nto,const PetscMPIInt *toranks,const void *todata,PetscInt *nfrom,PetscMPIInt **fromranks,void *fromdata)
223 {
224   PetscErrorCode         ierr;
225   PetscBuildTwoSidedType buildtype = PETSC_BUILDTWOSIDED_NOTSET;
226 
227   PetscFunctionBegin;
228   ierr = PetscCommBuildTwoSidedGetType(comm,&buildtype);CHKERRQ(ierr);
229   switch (buildtype) {
230   case PETSC_BUILDTWOSIDED_IBARRIER:
231 #if defined(PETSC_HAVE_MPI_IBARRIER)
232     ierr = PetscCommBuildTwoSided_Ibarrier(comm,count,dtype,nto,toranks,todata,nfrom,fromranks,fromdata);CHKERRQ(ierr);
233 #else
234     SETERRQ(comm,PETSC_ERR_PLIB,"MPI implementation does not provide MPI_Ibarrier (part of MPI-3)");
235 #endif
236     break;
237   case PETSC_BUILDTWOSIDED_ALLREDUCE:
238     ierr = PetscCommBuildTwoSided_Allreduce(comm,count,dtype,nto,toranks,todata,nfrom,fromranks,fromdata);CHKERRQ(ierr);
239     break;
240   default: SETERRQ(comm,PETSC_ERR_PLIB,"Unknown method for building two-sided communication");
241   }
242   PetscFunctionReturn(0);
243 }
244